├── src ├── r │ ├── .gitignore │ ├── dist.r │ ├── intercoder.r │ └── analysis.r ├── clojure │ └── uic │ │ └── nlp │ │ └── todo │ │ ├── .gitignore │ │ ├── resource.clj │ │ ├── core.clj │ │ ├── thaw_db.clj │ │ ├── db.clj │ │ ├── feature.clj │ │ ├── cli.clj │ │ ├── eval.clj │ │ └── corpus.clj ├── bin │ └── run.sh ├── rest │ └── query.rest └── python │ └── retro-intercoder.py ├── resources ├── corpus.xlsx ├── todocorp.conf └── todotask-log4j2.xml ├── .gitmodules ├── results ├── full-evaluation.xls ├── dist.csv ├── kappa.txt ├── predictions.csv ├── intercoder.csv └── agent-data.arff ├── docker-es ├── makefile └── docker-compose.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── test-resources ├── test-log4j2.xml └── log4j2.xml ├── LICENSE ├── test └── uic │ └── nlp │ └── todo │ ├── db_test.clj │ └── eval_test.clj ├── makefile ├── project.clj └── README.md /src/r/.gitignore: -------------------------------------------------------------------------------- 1 | /.Rhistory -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/.gitignore: -------------------------------------------------------------------------------- 1 | /version.clj 2 | -------------------------------------------------------------------------------- /resources/corpus.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plandes/todo-task/HEAD/resources/corpus.xlsx -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "zenbuild"] 2 | path = zenbuild 3 | url = https://github.com/plandes/zenbuild 4 | -------------------------------------------------------------------------------- /results/full-evaluation.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plandes/todo-task/HEAD/results/full-evaluation.xls -------------------------------------------------------------------------------- /resources/todocorp.conf: -------------------------------------------------------------------------------- 1 | # -*-conf-*- 2 | 3 | [default] 4 | annotated_dir=./resources 5 | annotator_main=corpus 6 | annotator1=corpus 7 | -------------------------------------------------------------------------------- /docker-es/makefile: -------------------------------------------------------------------------------- 1 | PROJ_TYPE= docker 2 | DOCKER_IMG_NAME= es 3 | DOCKER_USER= plandes 4 | DOCKER_BUILD_DEPS= 5 | 6 | include ../zenbuild/main.mk 7 | -------------------------------------------------------------------------------- /src/r/dist.r: -------------------------------------------------------------------------------- 1 | df <- read.csv('../../results/pruned.csv', header=T) 2 | 3 | dist <- data.frame(table(df[,1])) 4 | 5 | write.csv(dist, '../../results/dist.csv') 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /rel 3 | /classes 4 | /checkouts 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | .hgignore 12 | .hg/ 13 | /model 14 | /doc 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: clojure 2 | lein: 2.7.1 3 | before_script: 4 | - mkdir -p target 5 | - git clone http://github.com/plandes/zenbuild 6 | script: ZBHOME=zenbuild make info checkdep 7 | jdk: 8 | - oraclejdk8 9 | -------------------------------------------------------------------------------- /results/dist.csv: -------------------------------------------------------------------------------- 1 | "","Var1","Freq" 2 | "1","buy",52 3 | "2","calendar",22 4 | "3","call",19 5 | "4","contact",47 6 | "5","email",12 7 | "6","find-service",27 8 | "7","find-travel",10 9 | "8","pay-bill-online",17 10 | "9","plan-meal",7 11 | "10","postal",11 12 | "11","print",4 13 | "12","school-work",8 14 | "13","self-improve",4 15 | "14","service",46 16 | -------------------------------------------------------------------------------- /docker-es/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | 3 | services: 4 | elasticsearch: 5 | container_name: todoes 6 | image: elasticsearch:2.2.0 7 | ports: 8 | - "10200:9200" 9 | - "10300:9300" 10 | volumes: 11 | - todo_es_data:/usr/share/elasticsearch/data 12 | environment: 13 | ES_HEAP_SIZE: 2g 14 | 15 | volumes: 16 | todo_es_data: 17 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/resource.clj: -------------------------------------------------------------------------------- 1 | (ns uic.nlp.todo.resource 2 | (:require [clojure.tools.logging :as log]) 3 | (:require [zensols.model.classifier :as c]) 4 | (:require [zensols.actioncli.resource :refer (resource-path) :as res])) 5 | 6 | (defn initialize 7 | [] 8 | (log/debug "initializing") 9 | (c/initialize) 10 | (res/register-resource :todocorp-config-file 11 | :system-property "todocorp-config")) 12 | -------------------------------------------------------------------------------- /src/r/intercoder.r: -------------------------------------------------------------------------------- 1 | #install.packages('psych') 2 | #install.packages('irr') 3 | library('psych') 4 | library('irr') 5 | 6 | intercoder <- function (df) { 7 | #df <- df[,][,-1] 8 | df <- df[,c('annotator1','annotator2')] 9 | print('cohen:') 10 | print(cohen.kappa(df)) 11 | 12 | print('fleiss:') 13 | print(kappam.fleiss(df)) 14 | } 15 | 16 | df <- read.csv('../../results/intercoder-relabeled.csv', header=T) 17 | intercoder(df) 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/) 5 | and this project adheres to [Semantic Versioning](http://semver.org/). 6 | 7 | 8 | ## [Unreleased] 9 | 10 | ## [0.0.1] - 2018-06-20 11 | ### Added 12 | - Initial version 13 | 14 | [Unreleased]: https://github.com/plandes/todo-task/compare/v0.0.1...HEAD 15 | [0.0.2]: https://github.com/plandes/todo-task/compare/v0.0.1...v0.0.2 16 | -------------------------------------------------------------------------------- /results/kappa.txt: -------------------------------------------------------------------------------- 1 | [1] "cohen:" 2 | Call: cohen.kappa1(x = x, w = w, n.obs = n.obs, alpha = alpha, levels = levels) 3 | 4 | Cohen Kappa and Weighted Kappa correlation coefficients and confidence boundaries 5 | lower estimate upper 6 | unweighted kappa 0.42 0.51 0.60 7 | weighted kappa 0.47 0.62 0.77 8 | 9 | Number of subjects = 145 10 | [1] "fleiss:" 11 | Fleiss' Kappa for m Raters 12 | 13 | Subjects = 145 14 | Raters = 2 15 | Kappa = 0.498 16 | 17 | z = 14.1 18 | p-value = 0 19 | -------------------------------------------------------------------------------- /test-resources/test-log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /resources/todotask-log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /test-resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/core.clj: -------------------------------------------------------------------------------- 1 | (ns uic.nlp.todo.core 2 | (:require [zensols.actioncli.log4j2 :as lu] 3 | [zensols.actioncli.parse :as p]) 4 | (:require [uic.nlp.todo.version :as ver]) 5 | (:gen-class :main true)) 6 | 7 | (defn- version-info [] 8 | (println (format "%s (%s)" ver/version ver/gitref))) 9 | 10 | (defn- create-action-context [] 11 | (p/multi-action-context 12 | '((:repl zensols.actioncli.repl repl-command) 13 | (:load uic.nlp.todo.cli load-corpora-command) 14 | (:dsprep uic.nlp.todo.cli split-dataset-command) 15 | (:features uic.nlp.todo.cli features-command) 16 | (:print uic.nlp.todo.cli print-evaluate-command) 17 | (:evaluate uic.nlp.todo.cli evaluates-spreadsheet-command) 18 | (:predict uic.nlp.todo.cli predict-spreadsheet-command)) 19 | :version-option (p/version-option version-info))) 20 | 21 | (defn -main [& args] 22 | (lu/configure "todotask-log4j2.xml") 23 | (p/set-program-name "todotask") 24 | (-> (create-action-context) 25 | (p/process-arguments args))) 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Paul Landes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /src/bin/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | INIT_DIR=$(dirname "$0") 4 | RES_DIR=results 5 | LOG_DIR=log 6 | CONF=${INIT_DIR}/todocorp.conf 7 | 8 | mkdir -p $RES_DIR 9 | mkdir -p $LOG_DIR 10 | 11 | eval_classifiers() { 12 | clname=$1 13 | classifiers=$2 14 | metaset=$3 15 | log=$LOG_DIR/${clname}.log 16 | echo "evaluation set ${clname}, classifiers: ${classifiers}, meta set: ${metaset}, config: $CONF" > $log 17 | nohup ./bin/todotask evaluate -c $CONF -l INFO \ 18 | --metaset $metaset --classifiers $classifiers \ 19 | -o $RES_DIR/${clname}.xls >> $log 2>&1 & 20 | } 21 | 22 | case $1 in 23 | clean) 24 | rm $RES_DIR/* 25 | rm $LOG_DIR/* 26 | ;; 27 | 28 | sanity) 29 | # sanity test 30 | eval_classifiers test-res zeror set-best 31 | ;; 32 | 33 | best) 34 | # single best preforming model 35 | eval_classifiers j48 j48 set-best 36 | ;; 37 | 38 | long) 39 | # single best preforming model 40 | eval_classifiers random-forest random-forest set-best 41 | 42 | # long running 43 | for i in fast lazy meta tree slow really-slow ; do 44 | eval_classifiers $i $i set-compare 45 | done 46 | ;; 47 | 48 | *) 49 | echo "usage: $0 " 50 | ;; 51 | esac 52 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/thaw_db.clj: -------------------------------------------------------------------------------- 1 | (ns uic.nlp.todo.thaw-db 2 | (:require [zensols.actioncli.dynamic :as dyn] 3 | [zensols.actioncli.util :refer (defnlock)] 4 | [zensols.dataset.thaw :as db :refer (with-connection)])) 5 | 6 | (defnlock connection [] 7 | (db/thaw-connection "todo" "resources/todo-dataset.json")) 8 | 9 | (defn reset-connection [] 10 | (-> (meta #'connection) :init-resource (reset! nil))) 11 | 12 | (dyn/register-purge-fn reset-connection) 13 | 14 | (defn instances-count [] 15 | (with-connection (connection) 16 | (db/instances-count))) 17 | 18 | (defn anon-by-id 19 | [id] 20 | (with-connection (connection) 21 | (db/instance-by-id id))) 22 | 23 | (defn anons 24 | "Return all annotations" 25 | [& opts] 26 | (with-connection (connection) 27 | (apply db/instances opts))) 28 | 29 | (defn distribution 30 | "Return a distribution on class label as list of vectors. The first position 31 | is the label and the second the count for that respective label." 32 | [] 33 | (with-connection (connection) 34 | (->> (anons) 35 | (map :class-label) 36 | (reduce (fn [res c] 37 | (assoc res c (+ 1 (or (get res c) 0)))) 38 | {}) 39 | (sort #(compare (second %2) (second %1)))))) 40 | -------------------------------------------------------------------------------- /test/uic/nlp/todo/db_test.clj: -------------------------------------------------------------------------------- 1 | (ns ^{:doc "This namespace is REPL prototyping fodder and *not* not real unit 2 | test cases."} uic.nlp.todo.db-test 3 | (:require [clojure.test :refer :all] 4 | [uic.nlp.todo.db :refer :all] 5 | [zensols.actioncli.dynamic :as dyn] 6 | [zensols.dataset.db :as db :refer (with-connection)])) 7 | 8 | (defn- main [& actions] 9 | (->> actions 10 | (map (fn [action] 11 | (case action 12 | -2 (dyn/purge) 13 | -1 (reset-instances) 14 | 0 (load-corpora) 15 | 1 (divide-by-set 0.8) 16 | 2 (do 17 | (dyn/purge) 18 | (load-corpora) 19 | (Thread/sleep (* 2 1000)) 20 | (divide-by-set 0.8) 21 | (with-connection (connection) 22 | (db/write-dataset :instance-fn #(-> % :panon :text)))) 23 | 3 (with-connection (connection) 24 | (db/instance-count)) 25 | 4 (with-connection (connection) 26 | (db/stats)) 27 | 5 (clojure.pprint/pprint (connection)) 28 | 6 (with-connection (connection) 29 | (db/write-dataset :instance-fn #(-> % :panon :text))) 30 | 7 (distribution)))) 31 | doall)) 32 | -------------------------------------------------------------------------------- /src/r/analysis.r: -------------------------------------------------------------------------------- 1 | proprietary <- "agent,desc,count 2 | buy,Assists in buying goods.,480 3 | service,Do It Yourself type tasks,284 4 | self-improve,Self Improvement/Help,183 5 | school-work,Task related to school,158 6 | contact,Email SMS or call,101 7 | call,Makes a phone call via OS,97 8 | email,Emails a contact via OS,60 9 | calendar,Make an appointment,55 10 | pay-bill-online,Online bill pay,54 11 | find-service,Procure services,42 12 | print,Print out a document,23 13 | postal,Send mail by snail mail,20 14 | plan-meal,Cook or gather ingredients,17 15 | find-travel,Reserve transportation,18 16 | text-sms,Sends SMS text messages,19" 17 | 18 | public <- "agent,desc,count 19 | buy-general,Assists in buying general goods,38 20 | buy-grocery,Assists in buying groceries,2 21 | buy-travel,Assists in buying travel,9 22 | buy-wedding,Assists in buying wedding g/s,44 23 | calendar,Make personal appointment,33 24 | contact,Email SMS or phone call,66 25 | household,Schedule time for personal task in calendar,91 26 | how-to,Identify tutorial video,17 27 | office,Schedule office task in calendar,189 28 | office-calendar,Make work appt,8 29 | office-contact,Email SMS or phone call - work,19 30 | pay-bill-online,Online bill pay,18 31 | search-general,General internet search,17 32 | search-recipe,Internet search recipe,4 33 | sell,Sell or donate an item,7 34 | send,Send item by USPS,15" 35 | 36 | corp.dist <- function(csvstr, name) { 37 | df = read.csv(text=csvstr, header=TRUE) 38 | counts <- df[,3] 39 | print(sprintf('%s:', name)) 40 | summary(counts) 41 | print(sprintf('standard deviation: %.2f, varience: %.2f', sd(counts), var(counts))) 42 | } 43 | 44 | corp.dist(proprietary, 'proprietary') 45 | corp.dist(public, 'public') 46 | -------------------------------------------------------------------------------- /src/rest/query.rest: -------------------------------------------------------------------------------- 1 | # get all indexes 2 | GET http://localhost:10200/_cat/indices?v 3 | 4 | # careful! 5 | #DELETE http://localhost:10200/todo 6 | 7 | # mapping 8 | GET http://localhost:10200/todo/dataset/_mapping 9 | 10 | # count 11 | POST http://localhost:10200/todo/dataset/_search 12 | { 13 | "query": { "match_all": {}}, 14 | "size": 0 15 | } 16 | 17 | # distribution 18 | POST http://localhost:10200/todo/dataset/_search 19 | { 20 | "aggs": { 21 | "act_agg_name": { 22 | "terms": {"field": "class-label", "size": 0} 23 | } 24 | }, 25 | "size": 0 26 | } 27 | 28 | # search 29 | POST http://localhost:10200/todo/dataset/_search 30 | { 31 | "query": { "match_all": {}}, 32 | "size": 1 33 | } 34 | 35 | # act by class 36 | POST http://localhost:10200/todo/dataset/_search 37 | { 38 | "query": { 39 | "term": {"class-label": "self-improve"} 40 | }, 41 | "size": 5 42 | } 43 | 44 | # act by class 45 | POST http://localhost:10200/todo/dataset/_search 46 | { 47 | "query": { 48 | "term": {"class-label": "self-improve"} 49 | }, 50 | "fields": ["class-label"], 51 | "size": 5 52 | } 53 | 54 | # counts 55 | POST http://localhost:10200/todo/dataset/_search 56 | { 57 | "query": { "match_all": {}}, 58 | "size": 0 59 | } 60 | 61 | # just keys 62 | POST http://localhost:10200/todo/dataset/_search 63 | { 64 | "query": { "match_all": {}}, 65 | "fields": [] 66 | } 67 | 68 | # search for text 69 | POST http://localhost:10200/todo/dataset/_search 70 | { 71 | "query": { 72 | "match": {"_all": "office"} 73 | }, 74 | "size": 1 75 | } 76 | 77 | ## stats 78 | GET http://localhost:10200/todo/stats/_search 79 | { 80 | "query": { "match_all": {}}, 81 | "from": 0 82 | } 83 | 84 | 85 | 86 | ## stats (test/train splits) 87 | # stats mapping 88 | GET http://localhost:10200/todo/stats/_mapping 89 | 90 | # stats--fix this 91 | POST http://localhost:10200/todo/stats/_search 92 | { 93 | "query": { "match_all": {}} 94 | } 95 | 96 | # stats 97 | POST http://localhost:10200/todo/stats/_search 98 | { 99 | "query": { "match_all": {}} 100 | } 101 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | ## makefile automates the build and deployment for lein projects 2 | 3 | # type of project, currently one of: clojure, python 4 | PROJ_TYPE= clojure 5 | PROJ_MODULES= nlpmodel appassem 6 | # namespace is not templatized 7 | GITUSER= plandes 8 | GITPROJ= todo-task 9 | 10 | # project specific 11 | PAPER_DOC_SRC_DIR= $(abspath $(DOC_SRC_DIR)) 12 | ADD_CLEAN= results.xls $(PAPER_DOC_SRC_DIR) 13 | DIST_PREFIX= $(HOME)/opt/app 14 | 15 | TODO_CONF= resources/todocorp.conf 16 | TODO_OP= -c $(TODO_CONF) 17 | 18 | # make build dependencies 19 | _ := $(shell [ ! -d .git ] && git init ; [ ! -d zenbuild ] && \ 20 | git submodule add https://github.com/plandes/zenbuild && make gitinit ) 21 | 22 | include ./zenbuild/main.mk 23 | 24 | .PHONY: help 25 | help: 26 | $(LEIN) run 27 | 28 | .PHONY: testprep 29 | testprep: 30 | mkdir -p dev-resources 31 | mkdir -p results 32 | make models 33 | 34 | .PHONY: test 35 | test: testprep 36 | $(LEIN) test 37 | 38 | .PHONY: startes 39 | startes: 40 | make -C docker-es up 41 | 42 | .PHONY: stopes 43 | stopes: 44 | make -C docker-es down 45 | 46 | .PHONY: load 47 | load: testprep 48 | $(LEIN) run load -l INFO $(TODO_OP) 49 | 50 | .PHONY: features 51 | features: testprep 52 | $(LEIN) run features -f 500 $(TODO_OP) 53 | 54 | .PHONY: dsprep 55 | dsprep: testprep 56 | $(LEIN) run dsprep -l INFO $(TODO_OP) 57 | 58 | .PHONY: print 59 | print: testprep 60 | $(LEIN) run print -l INFO $(TODO_OP) 61 | 62 | .PHONY: printbest 63 | printbest: testprep 64 | $(LEIN) run print -l INFO --metaset set-best --classifiers j48 $(TODO_OP) 65 | 66 | .PHONY: evaluate 67 | evaluate: testprep 68 | $(LEIN) run evaluate -l INFO $(TODO_OP) 69 | 70 | .PHONY: predict 71 | predict: testprep 72 | $(LEIN) run predict -l INFO $(TODO_OP) 73 | 74 | .PHONY: disttodo 75 | disttodo: dist 76 | cp $(TODO_CONF) $(DIST_DIR) 77 | cp src/bin/run.sh $(DIST_DIR) 78 | # needed to silence a deeplearn4j exception 79 | mkdir -p $(DIST_DIR)/dev-resources 80 | mkdir -p $(DIST_DIR)/resources 81 | cp resources/todo-dataset.json $(DIST_DIR)/resources 82 | 83 | .PHONY: alldocs 84 | alldocs: 85 | mkdir -p doc 86 | make FINAL_PDF_DIR=$(PAPER_DOC_SRC_DIR) -C ../../paper clean pdf 87 | make FINAL_PDF_DIR=$(PAPER_DOC_SRC_DIR) -C ../../slides clean pdf 88 | make docs 89 | -------------------------------------------------------------------------------- /test/uic/nlp/todo/eval_test.clj: -------------------------------------------------------------------------------- 1 | (ns ^{:doc "This namespace is REPL prototyping fodder and *not* not real unit 2 | test cases."} 3 | uic.nlp.todo.eval-test 4 | (:require [zensols.actioncli.dynamic :as dyn] 5 | [zensols.model.classifier :as cl] 6 | [zensols.model.execute-classifier :as ex :refer (with-model-conf)] 7 | [zensols.model.eval-classifier :as ec :refer (with-two-pass)] 8 | [uic.nlp.todo.feature :as f :refer (with-feature-context)] 9 | [uic.nlp.todo.db :as edb] 10 | [uic.nlp.todo.eval :refer :all] 11 | [uic.nlp.todo.db :as edb])) 12 | 13 | (defn- main [& actions] 14 | (let [classifiers [:fast :tree :meta :lazy] 15 | meta-set :set-compare] 16 | (binding [ec/*default-set-type* :train-test 17 | cl/*rand-fn* (fn [] (java.util.Random. 1)) 18 | edb/*low-class-count-threshold* 10] 19 | (with-model-conf (create-model-config) 20 | (with-feature-context 21 | (f/create-context :anons-fn f/instance-deref-anons-fn 22 | :set-type :train) 23 | (->> (map (fn [action] 24 | (case action 25 | 0 (dyn/purge) 26 | 1 (do (edb/divide-by-set 0.9) 27 | (edb/stats)) 28 | 2 (ec/print-best-results classifiers meta-set) 29 | 3 (ec/terse-results classifiers meta-set :only-stats? true) 30 | 4 (-> (ec/create-model classifiers meta-set) 31 | (ec/train-model :set-type :train) 32 | ec/write-model) 33 | 5 (-> (ex/read-model) 34 | (ex/print-model-info :results? true)) 35 | 6 (->> (ex/read-model) 36 | ex/prime-model 37 | ex/predict 38 | ex/write-predictions) 39 | 7 (-> (ex/read-model) 40 | ex/prime-model 41 | ex/predict) 42 | 8 (ec/eval-and-write classifiers meta-set) 43 | 9 (write-arff) 44 | 10 (-> (ec/create-model classifiers meta-set) 45 | (ec/train-model :set-type :train) 46 | ex/prime-model 47 | ex/predict 48 | ex/write-predictions 49 | ))) 50 | actions) 51 | doall)))))) 52 | -------------------------------------------------------------------------------- /src/python/retro-intercoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import csv,pprint, math, sys 4 | import pandas as pd 5 | 6 | ann1 = '../../../todocorp/annotated/relabeled.xlsx' 7 | ann2 = '../../../todocorp/annotated/annotator2.xlsx' 8 | relabeled_ic = '../../results/intercoder-relabeled.csv' 9 | pruned_file = '../../results/pruned.csv' 10 | 11 | def read_sheet(fname): 12 | ann = pd.ExcelFile(fname) 13 | df = ann.parse('todo_corpus') 14 | dats = [] 15 | for index, row in df.iterrows(): 16 | dats.append(row) 17 | return dats 18 | 19 | def validate(df1, df2): 20 | if len(df1) != len(df2): 21 | raise ValueError('length') 22 | print('lengths validate') 23 | for i in range(len(df1)): 24 | r1, r2 = df1[i], df2[i] 25 | if r1['utterance'] != r2['utterance']: 26 | raise ValueError('alignment: {}, {}'.format(r1, r2)) 27 | print('utterance validate') 28 | 29 | def info(df1, df2): 30 | print('len: {}, {}'.format(len(df1), len(df2))) 31 | 32 | def candidate(val): 33 | return isinstance(val, str) 34 | 35 | def collapse_class(val): 36 | m = {'buy-general': 'buy', 37 | 'buy-grocery': 'buy', 38 | 'buy-travel': 'buy', 39 | 'buy-wedding': 'buy', 40 | 'search-general': 'search', 41 | 'search-recipe': 'search', 42 | 'office-contact': 'contact', 43 | 'office-calendar': 'calendar', 44 | 'office': 'calendar', 45 | 'household': 'calendar', 46 | } 47 | if val in m: 48 | return m[val] 49 | return val 50 | 51 | def write_intercoder(df1, df2): 52 | agree = 0 53 | rows = 0 54 | with open(relabeled_ic, 'w') as f: 55 | c_writer = csv.writer(f) 56 | c_writer.writerow(['annotator1', 'annotator2']) 57 | for i in range(len(df1)): 58 | r1, r2 = df1[i], df2[i] 59 | a1, a2 = r1['class'], r2['class'] 60 | if candidate(a1) and candidate(a2): 61 | rows = rows + 1 62 | a2 = collapse_class(a2) 63 | c_writer.writerow([a1, a2, r1['utterance']]) 64 | #c_writer.writerow([a1, a2]) 65 | if a1 == a2: agree = agree + 1 66 | print('agree: %.2f (%s/%s)' % ((agree/rows), agree, rows)) 67 | 68 | def pruned(): 69 | df1 = read_sheet(ann1) 70 | with open(pruned_file, 'w') as f: 71 | c_writer = csv.writer(f) 72 | c_writer.writerow(['class', 'utterance']) 73 | for r in df1: 74 | if candidate(r['class']): 75 | c_writer.writerow([r['class'], r['utterance']]) 76 | 77 | def main(): 78 | df1 = read_sheet(ann1) 79 | df2 = read_sheet(ann2) 80 | validate(df1, df2) 81 | write_intercoder(df1, df2) 82 | pruned() 83 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject edu.uic.nlp/todotask "0.1.0-SNAPSHOT" 2 | :description "Categorize natural language todo list items" 3 | :url "https://github.com/plandes/todo-task" 4 | :license {:name "MIT" 5 | :url "https://opensource.org/licenses/MIT" 6 | :distribution :repo} 7 | :plugins [[lein-codox "0.10.3"] 8 | [lein-javadoc "0.3.0"] 9 | [org.clojars.cvillecsteele/lein-git-version "1.2.7"]] 10 | :codox {:metadata {:doc/format :markdown} 11 | :project {:name "Todo Categorization"} 12 | :output-path "target/doc/codox" 13 | :source-uri "https://github.com/plandes/todo-task/blob/v{version}/{filepath}#L{line}"} 14 | :javadoc-opts {:package-names ["edu.uic.nlp.todo-task"] 15 | :output-dir "target/doc/apidocs"} 16 | :git-version {:root-ns "uic.nlp.todo" 17 | :path "src/clojure/uic/nlp/todo" 18 | :version-cmd "git describe --match v*.* --abbrev=4 --dirty=-dirty"} 19 | :source-paths ["src/clojure"] 20 | :test-paths ["test" "test-resources"] 21 | :java-source-paths ["src/java"] 22 | :javac-options ["-Xlint:unchecked"] 23 | :jar-exclusions [#".gitignore"] 24 | :exclusions [com.zensols.tools/actioncli 25 | ch.qos.logback/logback-classic 26 | log4j 27 | org.slf4j/slf4j-log4j12 28 | org.yaml/snakeyaml] 29 | :dependencies [[org.clojure/clojure "1.8.0"] 30 | 31 | ;; logging for core 32 | [org.apache.logging.log4j/log4j-1.2-api "2.7"] 33 | [org.apache.logging.log4j/log4j-core "2.7"] 34 | [org.apache.logging.log4j/log4j-jcl "2.7"] 35 | [org.apache.logging.log4j/log4j-jul "2.7"] 36 | [org.apache.logging.log4j/log4j-slf4j-impl "2.7"] 37 | 38 | ;; read ini files 39 | [com.brainbot/iniconfig "0.2.0"] 40 | 41 | ;; nlp/ml 42 | [com.zensols.tools/actioncli "0.0.27"] 43 | [com.zensols.nlp/wordvec "0.0.1" 44 | :exclusions [org.apache.httpcomponents/httpmime 45 | org.apache.httpcomponents/httpclient 46 | org.clojure/tools.macro]] 47 | [com.zensols.ml/model "0.0.18"] 48 | [com.zensols.nlp/parse "0.1.6" 49 | :exclusions [com.zensols.tools/actioncli 50 | org.clojure/tools.macro]] 51 | [com.zensols.ml/dataset "0.0.12" 52 | :exclusions [org.apache.lucene/lucene-analyzers-common 53 | org.apache.lucene/lucene-core 54 | org.apache.lucene/lucene-queries 55 | org.apache.lucene/lucene-queryparser 56 | org.apache.lucene/lucene-sandbox]]] 57 | :pom-plugins [[org.codehaus.mojo/appassembler-maven-plugin "1.6" 58 | {:configuration ([:program 59 | ([:mainClass "uic.nlp.todo.core"] 60 | [:id "todotask"])] 61 | [:environmentSetupFileName "setupenv"])}]] 62 | :profiles {:1.9 {:dependencies [[org.clojure/clojure "1.9.0"]]} 63 | :uberjar {:aot [uic.nlp.todo.core]} 64 | :appassem {:aot :all} 65 | :snapshot {:git-version {:version-cmd "echo -snapshot"}} 66 | :dev 67 | {:exclusions [org.slf4j/slf4j-log4j12 68 | log4j/log4j 69 | ch.qos.logback/logback-classic]} 70 | :test {:jvm-opts ["-Dlog4j.configurationFile=test-resources/test-log4j2.xml" 71 | "-Xms4g" "-Xmx30g" "-XX:+UseConcMarkSweepGC"]}} 72 | :main uic.nlp.todo.core) 73 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/db.clj: -------------------------------------------------------------------------------- 1 | (ns uic.nlp.todo.db 2 | (:require [clojure.java.io :as io] 3 | [clojure.edn :as edn] 4 | [clojure.tools.logging :as log] 5 | [clojure.data.csv :as csv] 6 | [clojure.data.json :as json] 7 | [zensols.actioncli.util :refer (trunc defnlock)] 8 | [zensols.actioncli.dynamic :as dyn] 9 | [zensols.actioncli.resource :as res] 10 | [zensols.nlparse.parse :as p] 11 | [zensols.dataset.db :as db :refer (with-connection)] 12 | [uic.nlp.todo.corpus :as corp])) 13 | 14 | (def ^:dynamic *low-class-count-threshold* 15) 15 | 16 | (defonce ^:private conn-inst (atom nil)) 17 | ;(ns-unmap *ns* 'conn-inst) 18 | 19 | (def ^:dynamic *corpus-read-limit* Integer/MAX_VALUE) 20 | 21 | (defn- read-corpus [add-fn] 22 | (->> (corp/read-anons :limit *corpus-read-limit*) 23 | (map (fn [{:keys [bgid board-id board-name id source short-url 24 | class utterance] :as dmap}] 25 | (log/tracef "corp map: %s, utterance: %s" dmap utterance) 26 | (let [panon (p/parse utterance) 27 | id (str id) 28 | class (get dmap :class) 29 | inst (-> [bgid board-id board-name id source short-url] 30 | (#(select-keys dmap %)) 31 | (assoc :panon panon))] 32 | (log/debugf "adding class: %s, inst: <%s>" class (trunc inst)) 33 | (add-fn id inst class)))) 34 | doall)) 35 | 36 | (defn connection [] 37 | (swap! conn-inst #(or % (db/elasticsearch-connection 38 | "todo" 39 | :url "http://localhost:10200" 40 | :create-instances-fn read-corpus)))) 41 | 42 | (defn reset-instances [] 43 | (reset! conn-inst nil)) 44 | 45 | (dyn/register-purge-fn reset-instances) 46 | 47 | (defnlock class-labels-keep [] 48 | (with-connection (connection) 49 | (->> (db/distribution) 50 | (filter (fn [{:keys [count]}] 51 | (> count *low-class-count-threshold*))) 52 | (map :class-label) 53 | set))) 54 | 55 | (defn reset-labels-keep [] 56 | (-> (meta #'class-labels-keep) :init-resource (reset! nil))) 57 | 58 | (dyn/register-purge-fn reset-labels-keep) 59 | 60 | (defn- filter-low-class-counts [id] 61 | (contains? (class-labels-keep) 62 | (:class-label (db/instance-by-id id)))) 63 | 64 | (defn load-corpora 65 | "Load the corups." 66 | [] 67 | (with-connection (connection) 68 | (db/instances-load))) 69 | 70 | (defn anons 71 | "Return all annotations" 72 | [& opts] 73 | (with-connection (connection) 74 | (apply db/instances opts))) 75 | 76 | (defn anon-by-id 77 | "Return an annotation using its ID." 78 | [& opts] 79 | (with-connection (connection) 80 | (apply db/instance-by-id opts))) 81 | 82 | (defn divide-by-set 83 | "Create a test/train dataset." 84 | [train-ratio] 85 | (with-connection (connection) 86 | (db/divide-by-set train-ratio 87 | :dist-type 'even 88 | :filter-fn filter-low-class-counts))) 89 | 90 | (defn divide-by-fold [& opts] 91 | (with-connection (connection) 92 | (apply db/divide-by-fold opts))) 93 | 94 | (defn set-fold [fold] 95 | (with-connection (connection) 96 | (db/set-fold fold))) 97 | 98 | (defn write-dataset [] 99 | (with-connection (connection) 100 | (db/write-dataset :output-file "resources/todo-dataset.xls"))) 101 | 102 | (defn freeze-dataset [] 103 | (with-connection (connection) 104 | (db/freeze-dataset :output-file "resources/todo-dataset.json"))) 105 | 106 | (defn stats 107 | "Return all annotations" 108 | [& opts] 109 | (with-connection (connection) 110 | (apply db/stats opts))) 111 | 112 | (defn distribution 113 | "Get the label distribution across all todos." 114 | [] 115 | (->> (anons :set-type :train-test) 116 | (map :class-label) 117 | (reduce (fn [res a] 118 | (assoc res a (+ (or (get res a) 0) 1))) 119 | {}))) 120 | -------------------------------------------------------------------------------- /results/predictions.csv: -------------------------------------------------------------------------------- 1 | pred-label,correct-label,correct?,confidence,similarity-top-label,similarity-score,utterance,elected-verb-id,utterance-length,mention-count,sent-count,token-count,token-average-length,stopword-count,is-question,pos-last-tag,pos-first-tag,pos-tag-ratio-adjective,pos-tag-ratio-adverb,pos-tag-ratio-verb,pos-tag-ratio-noun,pos-tag-ratio-wh,pos-tag-count-adjective,pos-tag-count-adverb,pos-tag-count-verb,pos-tag-count-noun,pos-tag-count-wh,word-count-service,word-count-find-service,word-count-calendar,word-count-buy,word-count-contact,word-count-call,word-count-pay-bill-online,word-count-email,word-count-postal 2 | contact,contact,true,0.35294117647058826,call,0.38545797065870974,Confirm final details with photographer (and videographers),951117504,59,0,1,9,53/9,2,false,-RRB-,VB,1/9,0,1/9,1/3,0,1,0,1,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 3 | service,contact,false,0.3684210526315789,call,0.5630079646305791,Speak to your first restaurant,109641682,30,1,1,5,26/5,1,false,NN,VB,1/5,0,1/5,1/5,0,1,0,1,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 4 | contact,contact,true,0.6666666666666666,call,0.40443043341961105,Reach out to Seattle restaurant coalition orgs,108386675,46,1,1,7,40/7,1,false,NNS,VB,0,0,1/7,4/7,0,0,0,1,4,0,0.07142857142857142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 5 | contact,contact,true,0.9411764705882353,calendar,0.1337894063649906,Local Restuarant followup,1822752074,25,0,1,3,23/3,0,false,NN,JJ,1/3,0,0,2/3,0,1,0,0,2,0,0.0,0.0,0.0,0.0,0.1666666666666667,0.0,0.0,0.0,0.0 6 | service,service,true,0.7878787878787878,service,0.5161448025277683,clean bathroom,1822752074,14,0,1,2,13/2,0,false,NN,JJ,1/2,0,0,1/2,0,1,0,0,1,0,0.1785714285714286,0.0,0.0,0.04347826086956522,0.0,0.0,0.0,0.0,0.0 7 | buy,service,false,0.6666666666666666,buy,0.42847449988450687,household - water plants,1822752074,24,0,1,4,21/4,0,false,NNS,NN,0,0,0,3/4,0,0,0,0,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.03846153846153846,0.0,0.0 8 | service,service,true,0.7878787878787878,service,0.3845594759941811,clean up woodpile,1822752074,17,0,1,3,5,0,false,NN,JJ,1/3,0,0,1/3,0,1,0,0,1,0,0.2857142857142857,0.0,0.0,0.0,0.07142857142857142,0.0,0.0,0.0,0.05263157894736842 9 | buy,service,false,0.6666666666666666,buy,0.3956083180247874,figure out bookmarks,1822752074,20,0,1,3,6,0,false,NNS,NN,0,0,0,2/3,0,0,0,0,2,0,0.07142857142857142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 10 | buy,buy,true,0.9142857142857143,buy,0.43503205480215995,Go get plants at Tilth Sale,3304,27,1,1,6,11/3,1,false,NNP,VB,0,0,1/3,1/2,0,0,0,2,3,0,0.0,0.0,0.06060606060606061,0.1304347826086957,0.0,0.0,0.0,0.09523809523809525,0.0 11 | service,buy,false,0.3684210526315789,call,0.4596336372196674,choose front door fixture,-1361218025,25,0,1,4,11/2,0,false,NN,VB,1/4,0,1/4,1/2,0,1,0,1,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 12 | buy,buy,true,0.9142857142857143,buy,0.5170707412711952,Buy Scale,97926,9,0,1,2,4,0,false,NNP,VB,0,0,1/2,1/2,0,0,0,1,1,0,0.0,0.0,0.0,0.3695652173913043,0.0,0.0,0.0,0.0,0.0 13 | buy,buy,true,0.9142857142857143,pay-bill-online,0.5405272486834571,Buy purse,1822752074,9,0,1,2,4,0,false,NN,JJ,1/2,0,0,1/2,0,1,0,0,1,0,0.0,0.0,0.0,0.3695652173913043,0.0,0.0,0.0,0.0,0.0 14 | calendar,buy,false,1.0,service,0.28456407406234313,Select cake topper,1822752074,18,1,1,3,16/3,0,false,NN,NNP,0,0,0,1,0,0,0,0,3,0,0.0,0.0,0.09090909090909091,0.0,0.0,0.0,0.0,0.0,0.0 15 | postal,postal,true,0.75,postal,0.5151746175006816,Send invitations,3526536,16,0,1,2,15/2,0,false,NNS,VB,0,0,1/2,1/2,0,0,0,1,1,0,0.0,0.0,0.0,0.0,0.07142857142857142,0.0,0.0,0.09523809523809525,0.3157894736842105 16 | email,email,true,0.8333333333333334,call,0.20393406120758872,"write appt emails (BV, UIX, AD mtg, GVSU)",113399775,41,3,1,13,34/13,0,false,-RRB-,VB,1/13,0,1/13,6/13,0,1,0,1,6,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09523809523809525,0.0 17 | find-service,find-service,true,0.3333333333333333,pay-bill-online,0.4027918527523677,Apply to Southxchange,1822752074,21,1,1,3,19/3,1,false,NNP,RB,0,1/3,0,1/3,0,0,1,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 18 | find-service,find-service,true,0.4444444444444444,buy,0.4728407757407378,Fix the CD ROM drive on my computer,101397,35,1,1,8,7/2,2,false,NN,VB,0,0,1/8,1/2,0,0,0,1,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 19 | call,call,true,0.9444444444444444,call,0.2983975520212974,call exterminators,1822752074,18,0,1,2,17/2,0,false,NNS,NN,0,0,0,1,0,0,0,0,2,0,0.0,0.0,0.0,0.0,0.0,0.5862068965517241,0.0,0.0,0.0 20 | calendar,calendar,true,1.0,calendar,0.4833775296468626,Schedule appointments with site managers,-697920873,40,0,1,5,36/5,1,false,NNS,VB,0,0,1/5,3/5,0,0,0,1,3,0,0.0,0.0,0.5151515151515152,0.0,0.0,0.0,0.0,0.0,0.0 21 | calendar,calendar,true,1.0,calendar,0.4416694747058279,Schedule sitting for engagement portrait,-697920873,40,0,1,5,36/5,1,false,NN,VB,0,0,2/5,2/5,0,0,0,2,2,0,0.0,0.0,0.303030303030303,0.0,0.0,0.0,0.0,0.0,0.0 22 | pay-bill-online,pay-bill-online,true,1.0,pay-bill-online,0.6746570733734047,pay mortgage,1822752074,12,0,1,2,11/2,0,false,NN,NN,0,0,0,1,0,0,0,0,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.5769230769230769,0.0,0.0 23 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/feature.clj: -------------------------------------------------------------------------------- 1 | (ns ^{:doc "Feature createion" 2 | :author "Paul Landes"} 3 | uic.nlp.todo.feature 4 | (:require [clojure.tools.logging :as log] 5 | [zensols.actioncli.dynamic :as dyn] 6 | [zensols.actioncli.util :refer (defnlock trunc)] 7 | [zensols.nlparse.parse :as p] 8 | [zensols.nlparse.feature.lang :as fe] 9 | [zensols.nlparse.feature.word :as fw] 10 | [zensols.nlparse.feature.word-count :as wc] 11 | [zensols.nlparse.feature.word-similarity :as ws] 12 | [zensols.model.weka :as weka] 13 | [zensols.model.execute-classifier :refer (with-model-conf)] 14 | [zensols.model.eval-classifier :as ec] 15 | [uic.nlp.todo.thaw-db :as tdb] 16 | [uic.nlp.todo.db :as edb])) 17 | 18 | (def id-key :id) 19 | (def class-key :agent) 20 | (def ^{:dynamic true :private true} *context* nil) 21 | (def ^:private wc-config (merge wc/*word-count-config* 22 | {:words-by-label-count 10})) 23 | (def ^:dynamic anons tdb/anons) 24 | (def ^:dynamic anon-by-id tdb/anon-by-id) 25 | 26 | (defnlock classes 27 | [] 28 | (->> (anons :set-type :train-test) 29 | (map :class-label) 30 | distinct 31 | vec)) 32 | 33 | (defn reset [] 34 | (-> (meta #'classes) :init-resource (reset! nil))) 35 | 36 | (dyn/register-purge-fn reset) 37 | 38 | (defmacro with-feature-context 39 | {:style/indent 1} 40 | [context & forms] 41 | `(binding [*context* ~context] 42 | ~@forms)) 43 | 44 | (defn create-features 45 | ([panon] 46 | (create-features panon nil)) 47 | ([panon context] 48 | (log/debugf "creating features (context=<%s>) for <%s>" 49 | (trunc context) (trunc panon)) 50 | (let [{:keys [word-count-stats]} context 51 | tokens (p/tokens panon)] 52 | (binding [wc/*word-count-config* wc-config] 53 | (merge (fe/verb-features (->> panon :sents first)) 54 | (fw/token-features panon tokens) 55 | (fe/pos-tag-features tokens) 56 | (if word-count-stats 57 | (wc/label-count-score-features panon word-count-stats)) 58 | (if word-count-stats 59 | (ws/similarity-features tokens word-count-stats))))))) 60 | 61 | (defn- flatten-keys [adb-keys] 62 | (mapcat #(into [] %) adb-keys)) 63 | 64 | (defn create-feature-sets [& {:keys [context] :as adb-keys}] 65 | (log/debugf "creating features with keys=%s: %s" 66 | adb-keys (trunc adb-keys)) 67 | (let [context (or context *context*) 68 | {:keys [anons-fn]} context 69 | anons (apply anons-fn (->> (flatten-keys adb-keys) 70 | (concat [:include-ids? true])))] 71 | (->> anons 72 | (map (fn [{:keys [class-label instance id]}] 73 | (merge {:utterance (:text instance) 74 | id-key id} 75 | {class-key class-label} 76 | (create-features instance context))))))) 77 | 78 | (defn create-context 79 | [& {:keys [anons-fn] :as adb-keys}] 80 | (let [fkeys (flatten-keys adb-keys) 81 | anons (apply anons-fn fkeys)] 82 | (log/debugf "creating context with key=%s anon count: %d" 83 | (trunc adb-keys) (count anons)) 84 | (log/tracef "adb-keys: %s" (pr-str adb-keys)) 85 | (binding [wc/*word-count-config* wc-config] 86 | (let [stats (wc/calculate-feature-stats anons)] 87 | {:anons-fn anons-fn 88 | :word-count-stats stats})))) 89 | 90 | (defn word-count-features [] 91 | (->> (classes) 92 | (map #(->> % (format "word-count-%s") symbol)))) 93 | 94 | (defn feature-metas [& _] 95 | (concat (ws/similarity-feature-metas (classes)) 96 | [[:utterance 'string]] 97 | (fe/verb-feature-metas) 98 | (fw/token-feature-metas) 99 | (fe/pos-tag-feature-metas) 100 | (wc/label-word-count-feature-metas (classes)))) 101 | 102 | (defn- class-feature-meta [] 103 | [class-key (classes)]) 104 | 105 | (defn create-model-config [] 106 | {:name (name class-key) 107 | :context-fn #(:word-count-stats *context*) 108 | :set-context-fn #(array-map :word-count-stats %) 109 | :create-feature-sets-fn create-feature-sets 110 | :create-features-fn create-features 111 | :feature-metas-fn feature-metas 112 | :class-feature-meta-fn class-feature-meta 113 | :create-two-pass-context-fn create-context 114 | :model-return-keys #{:label :distributions :features}}) 115 | 116 | (defn instance-deref-anon-fn [id] 117 | (-> (anon-by-id id) 118 | :instance 119 | :panon)) 120 | 121 | (defn instance-deref-anons-fn [& keys] 122 | (->> (apply anons keys) 123 | (map (fn [{:keys [class-label instance id]}] 124 | {:class-label class-label 125 | :id id 126 | :instance (:panon instance)})))) 127 | 128 | (defn display-features [& {:keys [num-features] 129 | :or {num-features 100}}] 130 | (with-feature-context 131 | (create-context :anons-fn instance-deref-anons-fn) 132 | (with-model-conf (create-model-config) 133 | (ec/display-features :max num-features)))) 134 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/cli.clj: -------------------------------------------------------------------------------- 1 | (ns uic.nlp.todo.cli 2 | (:require [clojure.java.io :as io] 3 | [clojure.string :as s] 4 | [clojure.edn :as edn] 5 | [clojure.tools.logging :as log] 6 | [clojure.pprint :as pp] 7 | [zensols.actioncli.log4j2 :as lu] 8 | [zensols.actioncli.util :refer (trunc)] 9 | [zensols.model.eval-classifier :as ec] 10 | [zensols.model.execute-classifier :as ex] 11 | [uic.nlp.todo.db :as db] 12 | [uic.nlp.todo.feature :as fe] 13 | [uic.nlp.todo.eval :as ev :refer (with-single-pass)] 14 | [clojure.string :as s])) 15 | 16 | (defn config-file-option [] 17 | ["-c" "--config" "the configuration file path" 18 | :default "todocorp.conf" 19 | :required "" 20 | :parse-fn io/file 21 | :validate [(fn [file] 22 | (if (.exists file) 23 | (do (->> (.getAbsolutePath file) 24 | (System/setProperty "zensols.todocorp-config")) 25 | true) 26 | false)) 27 | "Must be an existing file"]]) 28 | 29 | ;; ["-s" "--step" "create a moving train/test split with increment step (0.03 is a good start)" 30 | ;; :required "" 31 | ;; :parse-fn edn/read-string] 32 | 33 | (defn output-file-option [default] 34 | ["-o" "--output" "output file name or '-' to print results" 35 | :default default 36 | :required ""]) 37 | 38 | (defn metaset-option [] 39 | ["-m" "--metaset" "features set as defined in eval.clj" 40 | :default :set-compare 41 | :required "" 42 | :parse-fn keyword]) 43 | 44 | (defn classifiers-option [] 45 | ["-a" "--classifiers" "comma separated classifier list" 46 | :default [:fast :lazy :tree :meta :slow] 47 | :required "" 48 | :parse-fn (fn [classifiers] 49 | (->> classifiers 50 | (#(s/split % #"\s*,\s*")) 51 | (map (fn [csym] 52 | (if (s/index-of csym ".") 53 | (.newInstance (Class/forName csym)) 54 | (keyword csym)))) 55 | vec))]) 56 | 57 | (def load-corpora-command 58 | "CLI command to load the corpora into elastic search" 59 | {:description "load corpus data into ElasticSearch" 60 | :options 61 | [(lu/log-level-set-option) 62 | (config-file-option)] 63 | :app (fn [& _] 64 | (db/load-corpora))}) 65 | 66 | (def split-dataset-command 67 | "CLI command to split the dataset" 68 | {:description "split the data into train and test sets and dump the JSON representation to disk" 69 | :options [(lu/log-level-set-option) 70 | (config-file-option) 71 | ["-s" "--split" "number (0-1) to leave for training, remaining will be used for test" 72 | :default 0.9 73 | :required "" 74 | :parse-fn edn/read-string 75 | :validate [#(and (> % 0.0) (< % 1.0)) "Must be a number between (0-1)"]]] 76 | :app (fn [{:keys [split]} & _] 77 | (log/infof "spliting data: %.2f" split) 78 | (db/divide-by-set split) 79 | (println "statistics:") 80 | (->> {:split (db/stats) 81 | :distribution (db/distribution)} 82 | pp/pprint) 83 | (db/freeze-dataset))}) 84 | 85 | (def features-command 86 | "CLI command to show features" 87 | {:description "show features" 88 | :options [(lu/log-level-set-option) 89 | (config-file-option) 90 | ["-f" "--features" "the number of features to display" 91 | :default 100 92 | :required "" 93 | :parse-fn edn/read-string]] 94 | :app (fn [{:keys [features]} & _] 95 | (println "Press CONTROL-C to quit") 96 | (fe/display-features :num-features features))}) 97 | 98 | (def print-evaluate-command 99 | "CLI command to evaluate the model" 100 | {:description "evaluate the model using a cross fold validation across feature sets" 101 | :options [(lu/log-level-set-option) 102 | (config-file-option) 103 | (classifiers-option) 104 | (metaset-option)] 105 | :app (fn [{:keys [metaset classifiers]} & _] 106 | (with-single-pass 107 | (ec/print-best-results classifiers metaset)))}) 108 | 109 | (def evaluates-spreadsheet-command 110 | "CLI command to evaluate and output results" 111 | {:description "evaluate the model and output the results to a spreadsheet" 112 | :options [(lu/log-level-set-option) 113 | (output-file-option (io/file "evaluation.xls")) 114 | (config-file-option) 115 | (classifiers-option) 116 | (metaset-option)] 117 | :app (fn [{:keys [output metaset classifiers]} & _] 118 | (with-single-pass 119 | (ec/eval-and-write classifiers metaset output)))}) 120 | 121 | (def predict-spreadsheet-command 122 | "CLI command to predict the model" 123 | {:description "evaluate the model, classify, and output the test set" 124 | :options [(lu/log-level-set-option) 125 | (output-file-option (io/file "predictions.csv")) 126 | (config-file-option) 127 | (classifiers-option) 128 | (metaset-option)] 129 | :app (fn [{:keys [output metaset classifiers]} & _] 130 | (with-single-pass 131 | (try 132 | (-> (ec/create-model classifiers metaset) 133 | (ec/train-model :set-type :train) 134 | ex/prime-model 135 | ex/predict 136 | (#(ex/write-predictions % output))) 137 | (catch Exception e 138 | (println (trunc e))))))}) 139 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/eval.clj: -------------------------------------------------------------------------------- 1 | (ns ^{:doc "Evaluation of the model using features generated 2 | by [[uic.nlp.todo.feature]]." 3 | :author "Paul Landes"} 4 | uic.nlp.todo.eval 5 | (:require [clojure.tools.logging :as log] 6 | [clojure.set :refer (union)] 7 | [zensols.actioncli.dynamic :as dyn] 8 | [zensols.model.classifier :as cl] 9 | [zensols.model.execute-classifier :refer (with-model-conf) :as ex] 10 | [zensols.model.eval-classifier :as ec :refer (with-two-pass)] 11 | [uic.nlp.todo.feature :as f :refer (with-feature-context)] 12 | [uic.nlp.todo.db :as adb] 13 | [uic.nlp.todo.eval :as ev])) 14 | 15 | (defonce ^:private cross-fold-instances-inst (atom nil)) 16 | (defonce ^:private train-test-instances-inst (atom nil)) 17 | 18 | (defn feature-sets-set 19 | "Feature sets to use in the various evaluations of the model." 20 | [] 21 | {:set-compare (list (concat (f/word-count-features) 22 | '(elected-verb-id 23 | token-average-length 24 | pos-first-tag 25 | pos-last-tag 26 | similarity-top-label 27 | similarity-score 28 | pos-tag-ratio-noun)) 29 | (concat (f/word-count-features) 30 | '(elected-verb-id 31 | similarity-top-label 32 | similarity-score 33 | pos-tag-ratio-noun)) 34 | (concat (f/word-count-features) 35 | '(elected-verb-id 36 | token-average-length 37 | pos-first-tag 38 | pos-last-tag 39 | pos-tag-ratio-noun)) 40 | (concat (f/word-count-features) 41 | '(elected-verb-id 42 | token-average-length 43 | similarity-top-label 44 | pos-first-tag 45 | pos-last-tag 46 | pos-tag-ratio-noun)) 47 | (concat '(elected-verb-id 48 | token-average-length 49 | pos-first-tag 50 | pos-last-tag 51 | similarity-top-label 52 | similarity-score 53 | pos-tag-ratio-noun)) 54 | '(similarity-top-label 55 | pos-last-tag 56 | word-count-contact 57 | word-count-call 58 | word-count-buy 59 | word-count-calendar 60 | word-count-pay-bill-online 61 | pos-first-tag 62 | word-count-plan-meal 63 | word-count-email 64 | word-count-postal 65 | word-count-school-work 66 | word-count-print)) 67 | :set-sel (list '(similarity-top-label 68 | pos-last-tag 69 | word-count-contact 70 | word-count-call 71 | word-count-buy 72 | word-count-calendar 73 | word-count-pay-bill-online 74 | pos-first-tag 75 | word-count-plan-meal 76 | word-count-email 77 | word-count-postal 78 | word-count-school-work 79 | word-count-print)) 80 | :set-1 (list (concat (f/word-count-features) 81 | '(elected-verb-id 82 | token-average-length 83 | pos-first-tag 84 | pos-last-tag 85 | similarity-top-label 86 | similarity-score 87 | pos-tag-ratio-noun))) 88 | :set-2 (list (concat (f/word-count-features) 89 | '(elected-verb-id 90 | token-average-length 91 | similarity-top-label 92 | pos-first-tag 93 | pos-last-tag 94 | pos-tag-ratio-noun))) 95 | :set-3 (list (concat (f/word-count-features) 96 | '(elected-verb-id 97 | similarity-top-label 98 | similarity-score 99 | pos-tag-ratio-noun))) 100 | :set-4 (list (concat (f/word-count-features) 101 | '(similarity-top-label 102 | pos-last-tag 103 | pos-first-tag))) 104 | :set-best '((similarity-top-label 105 | pos-last-tag 106 | pos-first-tag 107 | word-count-contact 108 | word-count-call 109 | word-count-buy 110 | word-count-calendar 111 | word-count-pay-bill-online 112 | word-count-plan-meal 113 | word-count-email 114 | word-count-postal 115 | word-count-school-work 116 | word-count-print))}) 117 | 118 | (defn reset-instances [] 119 | (reset! cross-fold-instances-inst nil) 120 | (reset! train-test-instances-inst nil)) 121 | 122 | (dyn/register-purge-fn reset-instances) 123 | 124 | (defn create-model-config 125 | "Create the model configuration for this evalation." 126 | [] 127 | (letfn [(divide-by-set [divide-ratio] 128 | (adb/divide-by-set divide-ratio :shuffle? false) 129 | (reset! train-test-instances-inst nil))] 130 | (merge (f/create-model-config) 131 | {:cross-fold-instances-inst cross-fold-instances-inst 132 | :train-test-instances-inst train-test-instances-inst 133 | :feature-sets-set (feature-sets-set) 134 | :divide-by-set divide-by-set}))) 135 | 136 | (defmacro with-single-pass 137 | "Create bindings and contexts for a single pass train/test model evaluation." 138 | {:style/indent 0} 139 | [& body] 140 | `(binding [cl/*rand-fn* (fn [] (java.util.Random. 1)) 141 | ec/*default-set-type* :train-test 142 | adb/*low-class-count-threshold* 10] 143 | (with-model-conf (create-model-config) 144 | (with-feature-context 145 | (f/create-context :anons-fn f/instance-deref-anons-fn 146 | :set-type :train) 147 | (do ~@body))))) 148 | 149 | (defn write-arff 150 | "Write a Weka ARFF file (handy for importing in R/scikit-learn etc)." 151 | [] 152 | (binding [cl/*rand-fn* (fn [] (java.util.Random. 1)) 153 | ec/*default-set-type* :train-test 154 | adb/*low-class-count-threshold* 0 155 | f/anons adb/anons 156 | f/anon-by-id adb/anon-by-id] 157 | (with-model-conf (create-model-config) 158 | (with-feature-context (f/create-context :anons-fn f/instance-deref-anons-fn 159 | :set-type :train) 160 | (dyn/purge) 161 | (ec/write-arff))))) 162 | -------------------------------------------------------------------------------- /src/clojure/uic/nlp/todo/corpus.clj: -------------------------------------------------------------------------------- 1 | (ns ^{:doc "This namespace parses the corpus from an Excel file."} 2 | uic.nlp.todo.corpus 3 | (:require [clojure.string :as s] 4 | [clojure.data.csv :as csv] 5 | [clojure.java.io :as io] 6 | [clojure.tools.logging :as log] 7 | [com.brainbot.iniconfig :as iniconfig] 8 | [zensols.actioncli.resource :as res] 9 | [zensols.util.spreadsheet :as ss :refer (with-read-spreadsheet)] 10 | [uic.nlp.todo.resource :as ur])) 11 | 12 | (ur/initialize) 13 | 14 | (defn annotation-info 15 | "Return information from the `todocorp.conf` configuration file." 16 | [] 17 | (let [sec (-> (res/resource-path :todocorp-config-file) 18 | iniconfig/read-ini 19 | (#(get % "default")))] 20 | (log/debugf "annotation info from %s" sec) 21 | (->> sec 22 | (filter (fn [[k v]] 23 | (re-matches #"^annotator\d+" k))) 24 | (map second) 25 | (hash-map :annotators) 26 | (merge {:main-annotator (get sec "annotator_main") 27 | :annotated-dir (get sec "annotated_dir") 28 | :results-dir (get sec "results_dir") 29 | :serialized-dir (get sec "serialized_dir")})))) 30 | 31 | (defn annotated-file 32 | "Return the annoatated todo corpus spreadsheet file." 33 | ([] (annotated-file nil)) 34 | ([annotator] 35 | (let [inf (annotation-info) 36 | annotator (or annotator (:main-annotator inf)) 37 | file (-> inf 38 | :annotated-dir 39 | (io/file (format "%s.xlsx" annotator)))] 40 | (if-not (.exists file) 41 | (-> (format "Un-annotated file not found: %s" file) 42 | (ex-info {:file file}) 43 | throw)) 44 | file))) 45 | 46 | (defn read-for-annotator 47 | "Return a list of maps, each with a Todo list data point." 48 | [& {:keys [limit annotator] 49 | :or {limit Integer/MAX_VALUE}}] 50 | (let [file (annotated-file annotator)] 51 | (with-read-spreadsheet [file rows type] 52 | (->> rows 53 | ;(#(ss/rows-to-maps % )) 54 | ss/rows-to-maps 55 | (map (fn [id {:keys [bgid board-id board-name 56 | ;id 57 | source short-url 58 | class utterance]}] 59 | (let [class (and class (->> (s/trim class) s/lower-case)) 60 | utterance (and utterance (str utterance)) 61 | utterance (and utterance (s/trim utterance))] 62 | (when (and class utterance) 63 | {:id (int id) 64 | :bgid bgid 65 | :board-id board-id 66 | :source source 67 | :class (if (> (count class) 0) class) 68 | :utterance utterance 69 | }))) 70 | (range)) 71 | (remove nil?) 72 | (take limit) 73 | doall)))) 74 | 75 | (defn- anons-by-ids 76 | "Create annotations with unique identifiers." 77 | [annotator & {:keys [limit]}] 78 | (->> (read-for-annotator :annotator annotator) 79 | (map (fn [{:keys [id] :as elt}] 80 | {id (assoc elt ;(select-keys elt [:class :utterance]) 81 | :annotator annotator)})) 82 | (take limit) 83 | (apply merge))) 84 | 85 | (defn coder-agreement 86 | "Create the output file used by R to create inercoder agreement (Cohen's 87 | Kappa)." 88 | [& {:keys [annotators limit] 89 | :or {limit Integer/MAX_VALUE}}] 90 | (let [info (annotation-info) 91 | {:keys [results-dir]} info 92 | annotators (or annotators (:annotators info)) 93 | outfile (io/file results-dir "intercoder.csv") 94 | ;; annotator -> annotation 95 | by-annotator (->> annotators 96 | (map (fn [annotator] 97 | {annotator (anons-by-ids annotator :limit limit)})) 98 | (apply merge)) 99 | ;; ids of utterances annotated by all annotators 100 | shared-ids (->> by-annotator 101 | vals 102 | (map #(-> % keys set)) 103 | (apply clojure.set/intersection))] 104 | (->> shared-ids 105 | ;; lists of maps id -> annotation list of all annotators 106 | (map (fn [id] 107 | (->> annotators 108 | (map (fn [annotator] 109 | (let [by-id (get by-annotator annotator)] 110 | (get by-id id)))) 111 | (hash-map :id id :anon-list)))) 112 | ;; rows of ID an class of each annotator 113 | (map (fn [{:keys [id anon-list]}] 114 | (let [utterances (map :utterance anon-list)] 115 | ;; sanity check 116 | (if (> (count (distinct utterances)) 1) 117 | (-> (format "unaligned utterances for id %s: %s" 118 | id (s/join utterances)) 119 | (ex-info {:id id 120 | :annotation-list anon-list}) 121 | throw 122 | ;(#(log/warnf "unaligned: %s" %)) 123 | ))) 124 | (cons id (map :class anon-list)))) 125 | ;; CSV header (for R colnames later) 126 | (cons (cons "id" annotators)) 127 | ((fn [data] 128 | (with-open [writer (io/writer outfile)] 129 | (csv/write-csv writer data))))) 130 | (log/infof "wrote intercoder agreement file: %s" outfile) 131 | outfile)) 132 | 133 | (defn read-anons 134 | "Read annotations from the Excel file." 135 | [& {:keys [annotators limit] 136 | :or {limit Integer/MAX_VALUE}}] 137 | (let [info (annotation-info) 138 | {:keys [results-dir]} info 139 | annotators (or annotators (:annotators info)) 140 | ;; annotator -> annotation 141 | by-annotator (->> annotators 142 | (map (fn [annotator] 143 | {annotator (anons-by-ids annotator :limit limit)})) 144 | (apply merge))] 145 | (->> annotators 146 | reverse 147 | (map (fn [annotator] 148 | (anons-by-ids annotator :limit limit))) 149 | (reduce (fn [res n] 150 | (merge res n))) 151 | vals))) 152 | 153 | (defn serialize-annotations 154 | "Write annotations in an intermedia binary serialization file. 155 | **Note**: this should not be confused with the JSON generation, 156 | which [[uic.nlp.todo.db/freeze-dataset]]." 157 | [] 158 | (let [{:keys [serialized-dir]} (annotation-info) 159 | out-file (io/file serialized-dir "annotations.dat")] 160 | (with-open [writer (io/writer out-file)] 161 | (binding [*out* writer] 162 | (println (pr-str (read-anons))))) 163 | (log/infof "wrote Clojure serialized annotations data to %s" out-file))) 164 | 165 | (defn deserialize-annotation 166 | "See [[serialize-annotations]]." 167 | [] 168 | (let [{:keys [serialized-dir]} (annotation-info) 169 | in-file (io/file serialized-dir "annotations.dat")] 170 | (log/infof "reading annotations data from" in-file) 171 | (with-open [reader (io/reader in-file)] 172 | (->> reader 173 | slurp 174 | read-string)))) 175 | 176 | (defn ^:deprecated metrics 177 | "Generate somewhat useful metrics (depreciated)." 178 | [] 179 | (letfn [(source-dist [anons] 180 | (->> anons 181 | (map (fn [{:keys [source]}] 182 | (if (re-matches #"^person.*" source) 183 | "volunteer" 184 | source))) 185 | (reduce (fn [res source] 186 | (merge res {source (inc (or (get res source) 0))})) 187 | {})))] 188 | (merge (->> (read-anons :annotators ["annotator1" "annotator2" "annotator3" "annotator4"]) 189 | source-dist 190 | (array-map :annotated)) 191 | (->> (read-anons :annotators ["relabeled"] ;["annotator1" "annotator2"] 192 | ) 193 | source-dist 194 | (array-map :used))))) 195 | 196 | -------------------------------------------------------------------------------- /results/intercoder.csv: -------------------------------------------------------------------------------- 1 | annotator1,annotator2 2 | service,calendar,Taxes for 2015 3 | service,calendar,Clear out small garden bed 4 | buy,calendar,Get more dirt 5 | service,calendar,plant more plants 6 | buy,buy,Go get dirt from lowes 7 | buy,buy,Go get plants at Tilth Sale 8 | email,contact,email daniel about strawberries 9 | service,calendar,clean bathroom 10 | school-work,calendar,Read Modules 1-4 11 | school-work,calendar,syllabus quiz 12 | school-work,calendar,HW 1 13 | school-work,calendar,Quiz 14 | service,calendar,Set up org file for garden 15 | service,calendar,finish mowing 16 | service,calendar,clean up woodpile 17 | service,calendar,household - setup vlc @ term on doctor 18 | service,calendar,household - water plants 19 | buy,calendar,Buy container mix 20 | buy,calendar,paper for the upstairs desk 21 | buy,buy,order a meditation cushion 22 | service,calendar,Install Quicksilver and experiment 23 | service,calendar,Take out the dog 24 | service,calendar,Wash the dishes 25 | service,calendar,Clean the carpet 26 | find-service,calendar,Get the oil change 27 | service,calendar,Go pick up my son from school 28 | self-improve,calendar,Practice Mandarin Chinese 29 | service,calendar,Clean the litter box 30 | buy,calendar,Rent a carpet cleaning machine 31 | find-service,calendar,Install my new sink 32 | buy,buy,Tuscon: buy cannister fuel 33 | buy,buy,Tuscon: buy two 1L smartwater bottles 34 | self-improve,calendar,learn guyline knots/technique 35 | service,calendar,set up tent 36 | service,calendar,test sleep system 37 | find-travel,buy,make travel arrangements to Lordsburg 38 | find-travel,buy,make travel arrangements from Albuquerque 39 | plan-meal,search,identify 4-6 dinner recipes 40 | plan-meal,buy,buy ingredients 41 | plan-meal,calendar,test cook recipes 42 | plan-meal,calendar,package food 43 | service,calendar,assemble first aid kit 44 | service,calendar,assemble toiletries kit 45 | service,calendar,assemble repair kit 46 | buy,buy,buy stuff sacks/pack backpack 47 | service,calendar,make windscreen and pot cozy 48 | pay-bill-online,pay-bill-online,pay ATT 49 | pay-bill-online,pay-bill-online,pay comed 50 | pay-bill-online,pay-bill-online,pay mortgage 51 | pay-bill-online,pay-bill-online,pay student loan 52 | call,calendar,update address-ATT 53 | service,calendar,reconcile bank statement 54 | buy,buy,choose front door fixture 55 | call,calendar,call amazon about kindle 56 | find-service,search,find new insurance agent 57 | find-service,calendar,annual physical 58 | service,calendar,clean closet 59 | buy,buy,buy flower girl headbands 60 | buy,buy,buy bridal party gifts 61 | buy,buy,get wedding insurance 62 | find-travel,buy,book honeymoon 63 | find-service,calendar,Please work on an e-visa with the Cambodian embassy in Vietnam. I want to consider making you the admin person in our NGO paperwork. 64 | service,calendar,Upload photos to gmail 65 | find-service,calendar,Apply to Southxchange 66 | email,contact,"Contact people to look for money in Myanmar (Ben, Thieu)" 67 | buy,calendar,Remember the Milk 68 | email,contact,Email people affected by timezone bug 69 | find-service,calendar,Identify office/place for me to live 70 | call,contact,"Call Phil Smith, Mark Fukuda" 71 | email,contact,Email/Call Dan Strickman 72 | find-service,calendar,fix coffee pot dispenser 73 | buy,search,research cell phone plans 74 | buy,buy,get suit for Boston 75 | postal,calendar,send info to credit bureau 76 | school-work,calendar,write nutrition paper 77 | contact,calendar,book babysitter 78 | calendar,calendar,Schedule appointments with site managers 79 | find-service,buy,Hire wedding consultant 80 | calendar,buy,"Place order for wedding cake (and groom's cake, if desired)" 81 | find-service,buy,Book officiant 82 | calendar,calendar,Set a meeting with officiant to go through and confirm the details 83 | contact,contact,Contact local town clerk's office to arrange marriage license 84 | buy,buy,Get marriage license 85 | calendar,calendar,"Schedule rehearsal with officiant, all family members, bridesmaids, best men, wedding planner (if applicable) and other participants." 86 | calendar,calendar,Assign maid of honor / one attendant to arrange train and/or veil at altar 87 | contact,contact,Share contact numbers with at 2 people 88 | service,calendar,Assemble emergency supply kit for wedding day and find a place to keep it 89 | buy,buy,Purchase/make pillow for ring bearer 90 | buy,buy,Purchase birdseed/bubbles/rose petals for guests to shower you with as you leave ceremony site (this custom could instead be performed as you depart from the reception) 91 | contact,calendar,"Confirm with your rental company all details, times, and sites where items must be delivered/picked up" 92 | contact,calendar,Ask caterer/coordinator to have top teir of wedding cake packed up for you (to save for first anniversary) 93 | contact,calendar,Ask caterer/coordinator to have cake topper packed up for you. 94 | calendar,calendar,Schedule sitting for engagement portrait 95 | find-travel,buy,Make your hotel reservation for wedding night 96 | contact,contact,"Send hotel and transportation information to out-of-town guests, include directions from local airorts and cities from which many guests will be arriving by car, information (description, location, phone number) and any code or name that must be mentioned to receive discounted rate when making room reservations" 97 | calendar,calendar,Schedule appointments at bridal salons 98 | calendar,calendar,Make appointments for gown fittings 99 | contact,contact,Inquire as to what you need to bring to fittings 100 | contact,contact,Discuss bustle style of train with seamstress 101 | find-service,calendar,Have shoes dyed to match gown 102 | find-service,buy,Find professional dry cleaner who specialized in bridal gown care to clean and pack up dress andaccessories after wedding 103 | buy,buy,Buy garter 104 | buy,buy,Buy earrings 105 | buy,buy,buy necklace 106 | buy,buy,Buy other jewelry 107 | buy,buy,Buy hair accessories 108 | buy,buy,Buy purse 109 | buy,buy,Buy wrap 110 | buy,buy,Buy gloves 111 | buy,buy,Buy going-away outfit 112 | buy,buy,Buy rehearsal dinner outfit 113 | buy,buy,Order stationery items 114 | buy,buy,Buy special postage stamps for invitation envelopes and response envelopes 115 | plan-meal,calendar,Plan menu 116 | postal,send,Send invitations 117 | contact,calendar,Tally final guest count with site manager/caterer 118 | service,calendar,organize closets 119 | plan-meal,search,find healthy baking recipes 120 | plan-meal,calendar,start meal planning for the week 121 | calendar,calendar,go over budget with Matt 122 | contact,calendar,request permit for signage 123 | contact,calendar,request permit for aframe 124 | pay-bill-online,calendar,get bills on autopay 125 | service,calendar,fix website 126 | postal,calendar,pick up mail at post office 127 | find-service,calendar,transfer internet 128 | buy,calendar,get keys made 129 | buy,calendar,subscription to safari library online 130 | find-service,buy,sound proofing for office 131 | find-service,buy,repair laundry room floor 132 | contact,calendar,Contact student orgs regarding Storyteller Rotation 133 | contact,calendar,Followup with World Renew 134 | call,contact,Call Greg Sims back 135 | buy,calendar,Business cards 136 | contact,contact,"Contact Stella's staff on interest for ""Stella's Super Mario Party""" 137 | print,calendar,Print Gen. Giveaway 138 | contact,calendar,Reach out to Seattle restaurant coalition orgs 139 | contact,calendar,Followup with National NPOs 140 | call,calendar,"Call a ""townhall meeting"" at GRBC and personally invite all past/current owners to give input." 141 | print,calendar,Print/have fedex cut 300 giveaways 142 | contact,calendar,Followup with Jenny at KFB about the restaurants' promo request form 143 | school-work,calendar,Apply for Grad. Student assistance from UM 144 | contact,calendar,Ask KFB for list of restaurants they have worked with in the past. 145 | find-service,calendar,Hire content writer 146 | call,contact,Debrief Constantly \u2013 Call Brooke and discuss how HHCT III went 147 | postal,send,"Send Abby her shirt at 247 W 15th St. Holland, MI 49423" 148 | contact,calendar,Ask IC to recommend us to ILG 149 | contact,calendar,Invite restaurants (and NPOs) to Patagonia event next week 150 | contact,contact,Outreach to 10 restaurants 151 | contact,calendar,Speak to your first restaurant 152 | calendar,calendar,Schedule one tabling event 153 | postal,calendar,Send shirts to our Inner Circlers 154 | buy,calendar,Pre-order from NPO in another state 155 | email,calendar,"write appt emails (BV, UIX, AD mtg, GVSU)" 156 | email,contact,coffeeshop emails 157 | contact,calendar,Arrange print run: giveaway cards 158 | contact,calendar,Followup with GR restaurants 159 | contact,calendar,Press Followups 160 | email,contact,Donate/send 5000 email 161 | postal,send,Mail nonprofit checks to Kyle!!!!! 162 | service,calendar,Iron shirts for Flat lander's 163 | contact,contact,Connect with KFB/FAWM/ACT over checks 164 | contact,calendar,Local Restuarant followup 165 | contact,calendar,Contact Mars Hill about HQ and marketing 166 | contact,calendar,Followup with Local Restaurants 167 | print,calendar,print general giveaways for David 168 | service,calendar,Ironing shirts for staff 169 | contact,calendar,Ann Arbor followups 170 | contact,calendar,Followup with restaurant invoices 171 | call,contact,Call John McCain's team 172 | buy,calendar,order new checks 173 | call,contact,call lesko clients 174 | call,contact,call city of somerville and genki tenants 175 | find-service,calendar,seek new investment ideas 176 | find-service,calendar,repair roof 177 | contact,contact,send note to cindy regarding dance studio taxes 178 | call,calendar,call exterminators 179 | pay-bill-online,calendar,pay rent 180 | postal,calendar,send info to appraiser 181 | pay-bill-online,pay-bill-online,pay NY tax bill 182 | contact,calendar,send updated financials to Armand 183 | find-travel,search,look at airbnbs in binghamton 184 | pay-bill-online,calendar,pay plumber 185 | pay-bill-online,calendar,pay roofer 186 | buy,buy,order gpa mount 187 | contact,calendar,change address at bank 188 | contact,calendar,cancel renters insurance with AAA 189 | buy,buy,Buy mirror for upstairs bathroom 190 | postal,calendar,Mail change of address at DMV 191 | calendar,calendar,Pick up moving truck at 5:30pm Friday 192 | call,contact,Call to shut off water 193 | call,contact,Call to shut off gas 194 | call,contact,Call to find out about moving truck 195 | service,calendar,Vacuum hardwoods & couch 196 | buy,calendar,Pick up more boxes 197 | contact,calendar,Cancel Comcast Internet 198 | pay-bill-online,pay-bill-online,Pay SF Parking Ticket. :( 199 | service,calendar,Pack kitchen 200 | service,calendar,Pack spare room 201 | pay-bill-online,pay-bill-online,Pay water bill 202 | pay-bill-online,calendar,Pay PG&E 203 | pay-bill-online,calendar,Pay Expression 204 | service,calendar,Wash Mazda 205 | buy,search,Research bathroom mirrors online 206 | pay-bill-online,calendar,Pay DMV for Harley before May 207 | buy,calendar,Sign up for pottery 208 | find-service,search,Home security research 209 | self-improve,calendar,Sign up for Spanish 210 | calendar,calendar,Appointment with capital women's care 211 | contact,contact,Talk to few lawyers 212 | calendar,calendar,Apple appointment 213 | find-travel,buy,Kansas City tickets 214 | find-travel,calendar,Book a car - dec 13-17 215 | find-travel,buy,Buy Nicaragua tickets 216 | find-service,buy,Find a photographer/marriage counselor in Hawaii 217 | contact,contact,talk to Mike 218 | calendar,calendar,schedule DHPP shot 219 | service,calendar,mop floors 220 | service,calendar,clean bathroom 221 | pay-bill-online,pay-bill-online,pay bills 222 | service,calendar,clean kitchen 223 | call,contact,call grandmom 224 | find-travel,buy,get return flight 225 | service,calendar,move the pee pad 226 | buy,buy,replace shoes 227 | service,calendar,glue pumpkin 228 | service,calendar,sweep 229 | call,contact,call health care thing 230 | call,contact,call dad re: moving boxes 231 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Supervised Approach Imperative To-Do List Categorization 2 | 3 | This repository contains a corpus and code base to categorize natural language 4 | todo list items as described in our paper [A Supervised Approach To The 5 | Interpretation Of Imperative To-Do Lists]. 6 | 7 | This repository contains: 8 | 9 | * A publicly available [corpus](#corpus). 10 | * A [code base](#code-base) similar to that given as published results in the 11 | [arXiv paper]. 12 | 13 | 14 | 15 | ## Table of Contents 16 | 17 | - [Documents](#documents) 18 | - [Corpus](#corpus) 19 | - [Citation](#citation) 20 | - [Code Base](#code-base) 21 | - [What's Included](#whats-included) 22 | - [Third Party Libraries](#third-party-libraries) 23 | - [Documentation](#documentation) 24 | - [Running the Tests](#running-the-tests) 25 | - [Parsing](#parsing) 26 | - [Test Evaluation](#test-evaluation) 27 | - [Predictions](#predictions) 28 | - [Off-line Tests](#off-line-tests) 29 | - [Sample Results of Test](#sample-results-of-test) 30 | - [Building](#building) 31 | - [Advanced](#advanced) 32 | - [Changelog](#changelog) 33 | - [Special Thanks](#special-thanks) 34 | - [License](#license) 35 | 36 | 37 | 38 | 39 | ## Documents 40 | 41 | * [Corpus] 42 | * [Paper on arXiv] (please [cite](#citation) this paper) 43 | * [Paper](https://plandes.github.io/todo-task/SupervisedInterpretationImperativeToDos.pdf) (please do **not** 44 | cite this paper). 45 | * [Slides](https://plandes.github.io/todo-task/SupervisedInterpretationImperativeToDosSlides.pdf) 46 | * [Evaluation](results/full-evaluation.xls) (generated using 47 | the [evaluation functionality](#test-evaluation)) 48 | * [Predictions](results/predictions.csv) (generated using 49 | the [predictions functionality](#predictions)) 50 | 51 | 52 | ## Corpus 53 | 54 | The publicly available corpus is available [here](resources/corpus.xlsx) in 55 | Excel format. This corpus is referred to as *Corpus B* in the [arXiv 56 | paper]. The columns in the spreadsheet are: 57 | 58 | | Column Name | Description | Trello Artifact | 59 | |---------------|-------------------------------------------------|-----------------| 60 | | `utterance` | The natural language todo list text. | no | 61 | | `class` | The label if classified, otherwise left blank. | no | 62 | | `board_name` | The name of the board | yes | 63 | | `board_id` | The board ID | yes | 64 | | `short_url` | The URL of the comment on Trello | yes | 65 | | `description` | Additional description information for the task | yes | 66 | 67 | 68 | ## Citation 69 | 70 | Please use the following to cite the [arXiv paper]. 71 | 72 | ```jflex 73 | @article{landesDiEugenio2018, 74 | title = {A Supervised Approach To The Interpretation Of Imperative To-Do Lists}, 75 | url = {http://arxiv.org/abs/1806.07999}, 76 | note = {arXiv: 1806.07999}, 77 | journal = {arXiv:1806.07999 [cs]}, 78 | author = {Landes, Paul and Di Eugenio, Barbara}, 79 | year = {2018}, 80 | month = {Jun} 81 | } 82 | ``` 83 | 84 | If you use this software in your research, please cite with the following 85 | BibTeX (note that the [third party libraries] also have citations): 86 | 87 | ```jflex 88 | @misc{plandesTodoTask2018, 89 | author = {Paul Landes}, 90 | title = {Supervised Approach Imperative To-Do List Categorization}, 91 | year = {2018}, 92 | publisher = {GitHub}, 93 | journal = {GitHub repository}, 94 | howpublished = {\url{https://github.com/plandes/todo-task}} 95 | } 96 | ``` 97 | 98 | 99 | ## Code Base 100 | 101 | The code base used in this repository is an updated version of the code used on 102 | *Corpus A* (see the [arXiv paper]). It is written in [Clojure] and written to 103 | be accessed mostly via `make` commands. However, it can be compiled into a 104 | command line app if you want to run the long running cross fold validation 105 | tasks. See the [Running the Tests](#running-the-tests) to compile and run it. 106 | 107 | 108 | ### What's Included 109 | 110 | The functionality included is *agent classification* as described in the [arXiv 111 | paper]. The following is *not* included: 112 | 113 | * Argument classification 114 | * Extending the Named Entity Recognizer (section 4.1) 115 | * The first verb model (section 4.2) 116 | 117 | This functionality is not included as the origianl code base is proprietary. 118 | This code base was rewritten and [third party libraries] utilized where 119 | possible to speed up the development. 120 | 121 | 122 | ### Third Party Libraries 123 | 124 | Primary libraries used are listed below. Their dependencies can be traced from 125 | their respective repo links: 126 | 127 | * [Natural Language Parsing and Feature Generation] 128 | * [Interface for Machine Learning Modeling] 129 | * [Generate, split into folds or train/test and cache a dataset] 130 | * [Natural Language Feature Creation] 131 | * [Word Vector Feature Creation] 132 | 133 | 134 | ### Documentation 135 | 136 | API [documentation](https://plandes.github.io/todo-task/codox/index.html). 137 | 138 | 139 | ## Running the Tests 140 | 141 | This section explains how to run the the model against the corpus to reproduce 142 | the results (*similar*) to the [arXiv paper]. These instructions assume either 143 | a UNIX, Linux, macOS operating system or *maybe* Cygwin under Windows. 144 | 145 | Before proceeding, please install all the all tools given in 146 | the [building](#building) section. 147 | 148 | 149 | ### Parsing 150 | 151 | This section describes how to parse the corpus and load the corpus. Note that 152 | if you just want to run the tests you can **skip** 153 | to [test evaluation](#test-evaluation) section. This means you don't need 154 | [ElasticSearch], which is only necessary for parsing the corpus and creating 155 | file system train/test split. This is already done 156 | and [in the repo](resources/todo-dataset.json) already. 157 | 158 | On the other hand, if you **really** want to manually parse and create the 159 | train/test data sets you must first install [ElasticSearch] or [Docker]. The 160 | easiest way to get this up and working is to use [Docker], which is easy enough 161 | to download, install and get running on a container with: 162 | 163 | ```bash 164 | make startes 165 | ``` 166 | 167 | which provides the configuration necessary to download and start an 168 | [ElasticSearch] container ready to store the generated features from the parsed 169 | natural language text. 170 | 171 | Next, populate [ElasticSearch] with parsed featues: 172 | 173 | ```bash 174 | make load 175 | ``` 176 | 177 | This parses the corpus and adds a JSON parse representation of each utterance 178 | to the database. 179 | 180 | Next create train and test datasets by randomly shuffling the corpus. After 181 | the train/test assignment for each data point, export the data set to the JSON 182 | file: 183 | 184 | ```bash 185 | make dsprep 186 | ``` 187 | 188 | 189 | ### Test Evaluation 190 | 191 | Produce the optimal results for the model by evaluating and 192 | printing the results: 193 | 194 | ```bash 195 | make printbest 196 | ``` 197 | 198 | This gives the best (0.76 F1) results. 199 | 200 | 201 | To run all defined feature and classifier combinations run the following: 202 | 203 | ```bash 204 | make print 205 | ``` 206 | 207 | 208 | To run all defined feature and classifier combinations and create a spreadsheet 209 | with all performance metrics, features and classifiers used for those metrics run: 210 | 211 | ```bash 212 | make evaluate 213 | ``` 214 | 215 | This will create an `evaluation.xls` file. The file this process generates 216 | is [here](results/full-evaluation.xls). 217 | 218 | 219 | ### Predictions 220 | 221 | It is possible to generate a CSV file with predictions complete with the 222 | utterance, the correct label, and the predicted label. In addition, the file 223 | also includes all features used to create the prediction. This proces includes: 224 | 225 | 1. For each feature sets and classifier combination, train the model and test. 226 | 2. The winning combination (by F1) of feature set and classifier is used to 227 | train the model. 228 | 3. Create predictions on the test set. 229 | 4. Generate the spreadhsheet with the results. 230 | 231 | To invoke this functionality, use the following: 232 | 233 | ```bash 234 | make predict 235 | ``` 236 | 237 | This will generate a `predictions.csv` file. The file this process generates 238 | is [here](results/predictions.csv). 239 | 240 | 241 | ### Off-line Tests 242 | 243 | If you have a slower computer and the tests take too long, they can run in an 244 | offline mode. 245 | 246 | To long running offline tests in the background, first download and link to the 247 | models (note the space between `ZMODEL=` and `models` is intentional): 248 | 249 | ```bash 250 | make ZMODEL= models 251 | ``` 252 | 253 | create the application as a standalone and then 254 | execute in the background: 255 | 256 | ```bash 257 | make ZMODEL=`pwd`/model DIST_PREFIX=./inst disttodo 258 | cd ./inst/todotask 259 | ./run.sh sanity 260 | tail -f log/test-res.log 261 | ``` 262 | 263 | Type `CONTROL-C` to break out of `tail` and check open `results/test-res.xlsx` 264 | to confirm the a single line from a simple majority label classifer (it will 265 | have terrible performance). 266 | 267 | If everything works, now run the long running tests: 268 | 269 | ```bash 270 | ./run.sh long 271 | ls results 272 | ``` 273 | 274 | The `results` directory will have the results from each test. 275 | Section [results](#results-of-code-base) has a summary of each test. 276 | 277 | 278 | ### Sample Results of Test 279 | 280 | A selection of results using the this code base on [*Corpus B*] are given 281 | below: 282 | 283 | | Classifier | F1 | Precision | Recall | Attributes | 284 | |--------------|----------:|----------:|-------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 285 | | J48 | 0.763 | 7677 | 0.761 | similarity-top-label, pos-last-tag, word-count-contact, word-count-call, word-count-buy, word-count-calendar, word-count-pay-bill-online, pos-first-tag, word-count-plan-meal, word-count-email, word-count-postal, word-count-school-work, word-count-print | 286 | | RandomForest | 0.695 | 7101 | 0.714 | *all word counts*, elected-verb-id, similarity-top-label, similarity-score, pos-tag-ratio-noun | 287 | | RandomTree | 0.656 | 6654 | 0.666 | *all word counts*, elected-verb-id, similarity-top-label, similarity-score, pos-tag-ratio-noun | 288 | | LogitBoost | 0.592 | 6171 | 0.619 | *all word counts*, elected-verb-id, similarity-top-label, similarity-score, pos-tag-ratio-noun | 289 | | NaiveBayes | 0.547 | 5779 | 0.571 | similarity-top-label, pos-last-tag, word-count-contact, word-count-call, word-count-buy, word-count-calendar, word-count-pay-bill-online, pos-first-tag, word-count-plan-meal, word-count-email, word-count-postal, word-count-school-work, word-count-print | 290 | | SVM | 0.273 | 2815 | 0.285 | elected-verb-id, token-average-length, pos-first-tag, pos-last-tag, similarity-top-label, similarity-score, pos-tag-ratio-noun | 291 | | Baseline | 0.091 | 2356 | 0.238 | similarity-top-label, pos-last-tag, word-count-contact, word-count-call, word-count-buy, word-count-calendar, word-count-pay-bill-online, pos-first-tag, word-count-plan-meal, word-count-email, word-count-postal, word-count-school-work, word-count-print | 292 | 293 | 294 | ### Building 295 | 296 | To build from source, do the folling: 297 | 298 | - Install [Leiningen](http://leiningen.org) (this is just a script) 299 | - Install [GNU make](https://www.gnu.org/software/make/) 300 | - Install [Git](https://git-scm.com) 301 | - Download the source: `git clone --recurse-submodules https://github.com/plandes/todo-task && cd todo-task` 302 | 303 | 304 | ### Advanced 305 | 306 | All the capabilities of the [Interface for Machine Learning Modeling] package, 307 | including creating a usable executable model, are possible. The (not unit test 308 | case) [Clojure] [experimental execution file](test/uic/nlp/todo/eval_test.clj) 309 | demonstrates how to do other things with the model. All you need to do is to 310 | start a [REPL](https://clojure.org/guides/repl/introduction) and call the 311 | `main` function. 312 | 313 | 314 | ## Changelog 315 | 316 | An extensive changelog is available [here](CHANGELOG.md). 317 | 318 | 319 | ## Special Thanks 320 | 321 | Thanks to those that volunteered their To-do tasks that, in part, made 322 | up this publicly available corpus. 323 | 324 | 325 | ## License 326 | 327 | This license applies to the code base and the corpus. 328 | 329 | Copyright (c) 2018 Paul Landes 330 | 331 | Permission is hereby granted, free of charge, to any person obtaining a copy of 332 | this software and associated documentation files (the "Software"), to deal in 333 | the Software without restriction, including without limitation the rights to 334 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 335 | of the Software, and to permit persons to whom the Software is furnished to do 336 | so, subject to the following conditions: 337 | 338 | The above copyright notice and this permission notice shall be included in all 339 | copies or substantial portions of the Software. 340 | 341 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 342 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 343 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 344 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 345 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 346 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 347 | SOFTWARE. 348 | 349 | 350 | 351 | [A Supervised Approach To The Interpretation Of Imperative To-Do Lists]: https://arxiv.org/pdf/1806.07999 352 | [arXiv paper]: https://arxiv.org/pdf/1806.07999 353 | [Paper on arXiv]: https://arxiv.org/pdf/1806.07999 354 | [*Corpus B*]: resources/corpus.xlsx 355 | [Corpus]: resources/corpus.xlsx 356 | 357 | [ElasticSearch]: https://www.elastic.co 358 | [Docker]: https://www.docker.com 359 | [Clojure]: https://clojure.org 360 | 361 | [Natural Language Parsing and Feature Generation]: https://github.com/plandes/clj-nlp-parse 362 | [Interface for Machine Learning Modeling]: https://github.com/plandes/clj-ml-model 363 | [Generate, split into folds or train/test and cache a dataset]: https://github.com/plandes/clj-ml-dataset 364 | [Natural Language Feature Creation]: https://github.com/plandes/clj-nlp-feature 365 | [Word Vector Feature Creation]: https://github.com/plandes/clj-nlp-wordvec 366 | [third party libraries]: #third-party-libraries 367 | -------------------------------------------------------------------------------- /results/agent-data.arff: -------------------------------------------------------------------------------- 1 | @relation agent-classify 2 | 3 | @attribute word-count-calendar numeric 4 | @attribute pos-last-tag {,-RRB-,$,NNS,IN,NN,RBR,'\'\'',#,DT,VBP,JJR,PRP,RP,MD,',',.,RB,VBG,:,FW,TO,CC,-LRB-,VBD,WP,WRB,WDT,VBZ,LS,RBS,NNPS,EX,CD,VB,WP$,NNP,SYM,PRP$,JJS,UH,POS,VBN,JJ,PDT} 5 | @attribute pos-first-tag {,-RRB-,$,NNS,IN,NN,RBR,'\'\'',#,DT,VBP,JJR,PRP,RP,MD,',',.,RB,VBG,:,FW,TO,CC,-LRB-,VBD,WP,WRB,WDT,VBZ,LS,RBS,NNPS,EX,CD,VB,WP$,NNP,SYM,PRP$,JJS,UH,POS,VBN,JJ,PDT} 6 | @attribute word-count-buy numeric 7 | @attribute is-question numeric 8 | @attribute pos-tag-count-adverb numeric 9 | @attribute word-count-find-service numeric 10 | @attribute word-count-email numeric 11 | @attribute similarity-score numeric 12 | @attribute token-count numeric 13 | @attribute pos-tag-count-verb numeric 14 | @attribute pos-tag-count-adjective numeric 15 | @attribute word-count-contact numeric 16 | @attribute pos-tag-ratio-noun numeric 17 | @attribute pos-tag-ratio-adverb numeric 18 | @attribute word-count-self-improve numeric 19 | @attribute word-count-school-work numeric 20 | @attribute word-count-service numeric 21 | @attribute token-average-length numeric 22 | @attribute utterance-length numeric 23 | @attribute pos-tag-count-noun numeric 24 | @attribute word-count-find-travel numeric 25 | @attribute pos-tag-count-wh numeric 26 | @attribute stopword-count numeric 27 | @attribute sent-count numeric 28 | @attribute similarity-top-label {pay-bill-online,calendar,call,find-travel,find-service,email,postal,buy,self-improve,print,service,school-work,contact,plan-meal} 29 | @attribute pos-tag-ratio-verb numeric 30 | @attribute word-count-postal numeric 31 | @attribute word-count-pay-bill-online numeric 32 | @attribute pos-tag-ratio-adjective numeric 33 | @attribute elected-verb-id numeric 34 | @attribute word-count-print numeric 35 | @attribute utterance string 36 | @attribute word-count-call numeric 37 | @attribute mention-count numeric 38 | @attribute pos-tag-ratio-wh numeric 39 | @attribute word-count-plan-meal numeric 40 | @attribute agent {pay-bill-online,calendar,call,find-travel,find-service,email,postal,buy,self-improve,print,service,school-work,contact,plan-meal} 41 | 42 | @data 43 | 0,NNS,NN,0,0,0,0,0,0.72155,2,0,0,0,1,0,0,0,0,4,9,2,0,0,0,1,pay-bill-online,0,0,0.68,0,1822752074,0,'pay bills',0,0,0,0,pay-bill-online 44 | 0,VBD,NN,0,0,0,0,0,0.394499,2,1,0,0,0.5,0,0,0,0,4,9,1,0,0,0,1,pay-bill-online,0.5,0,0.6,0,1822752074,0,'pay comed',0,0,0,0,pay-bill-online 45 | 0,NN,VB,0,0,0,0,0,0.532605,2,1,0,0,0.5,0,0,0,0,6.5,14,1,0,0,0,1,pay-bill-online,0.5,0,0.64,0,110760,0,'Pay Expression',0,0,0,0,pay-bill-online 46 | 0,-LRB-,VB,0,0,0,0,0,0.454836,7,1,0,0,0.428571,0,0,0,0,3,25,3,0,0,0,2,pay-bill-online,0.142857,0,0.68,0,110760,0,'Pay SF Parking Ticket. :(',0,1,0,0,pay-bill-online 47 | 0,NN,NN,0,0,0,0,0,0.561955,4,0,0,0,1,0,0,0,0,3,15,4,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay NY tax bill',0,0,0,0,pay-bill-online 48 | 0,NN,NN,0,0,0,0,0,0.700781,2,0,0,0,1,0,0,0,0,5.5,12,2,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay mortgage',0,0,0,0,pay-bill-online 49 | 0,NN,NN,0,0,0,0,0,0.455481,2,0,0,0,1,0,0,0,0,3,7,2,0,0,0,1,pay-bill-online,0,0,0.6,0,1822752074,0,'pay ATT',0,0,0,0,pay-bill-online 50 | 0,NN,NN,0,0,0,0,0,0.429947,2,0,0,0,1,0,0,0,0,4.5,10,2,0,0,0,1,pay-bill-online,0,0,0.6,0,1822752074,0,'pay roofer',0,0,0,0,pay-bill-online 51 | 0,NNP,VB,0,0,0,0,0,0.392625,2,1,0,0,0.5,0,0,0,0,3.5,8,1,0,0,0,1,pay-bill-online,0.5,0,0.6,0,110760,0,'Pay PG&E',0,0,0,0,pay-bill-online 52 | 0,NNP,VB,0,0,0,0,0,0.383209,2,1,0,0,0.5,0,0,0,0,3,7,1,0,0,0,1,pay-bill-online,0.5,0,0.6,0,110760,0,'Pay BGE',0,0,0,0,pay-bill-online 53 | 0,NN,NN,0,0,0,0,0,0.616068,3,0,0,0,1,0,0,0,0,4.666667,16,3,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay student loan',0,0,0,0,pay-bill-online 54 | 0,NN,NN,0,0,0,0,0,0.479135,2,0,0,0,1,0,0,0,0,5,11,2,0,0,0,1,pay-bill-online,0,0,0.6,0,1822752074,0,'pay plumber',0,0,0,0,pay-bill-online 55 | 0,NNP,VB,0,0,0,0,0,0.440182,6,1,0,0,0.5,0,0,0,0,4,29,3,0,0,1,1,pay-bill-online,0.166667,0,0.6,0,110760,0,'Pay DMV for Harley before May',0,3,0,0,pay-bill-online 56 | 0,NN,NN,0,0,0,0,0,0.748226,2,0,0,0,1,0,0,0,0,3.5,8,2,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay rent',0,0,0,0,pay-bill-online 57 | 0,NN,NN,0,0,0,0,0,0.281828,3,0,0,0,1,0,0,0,0,5.666667,19,3,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay genki violation',0,1,0,0,pay-bill-online 58 | 0,NN,VB,0.133333,0,0,0,0.095238,0.448574,4,1,0,0,0.5,0,0,0,0,4.25,20,2,0,0,1,1,call,0.25,0,0.08,0,102230,0,'get bills on autopay',0,0,0,0,pay-bill-online 59 | 0.147059,NN,NN,0,0,0,0,0,0.481327,6,0,0,0.069767,0.666667,0,0,0,0,5.5,37,4,0,0,1,1,call,0,0,0,0,1822752074,0,'Appointment with capital women\'s care',0,0,0,0,calendar 60 | 0.441176,JJ,VB,0,0,0,0,0,0.45206,7,1,1,0,0.285714,0,0,0,0,6.285714,49,2,0,0,2,1,call,0.142857,0,0,0.142857,-697920873,0,'Schedule appointments with caterers, if necessary',0,0,0,0,calendar 61 | 0.352941,.,VB,0,0,0,0,0,0.433096,24,1,3,0,0.375,0,0,0,0,4.875,133,9,0,0,3,1,call,0.041667,0,0,0.125,-697920873,0,'Schedule rehearsal with officiant, all family members, bridesmaids, best men, wedding planner (if applicable) and other participants.',0,0,0,0,calendar 62 | 0.294118,-RRB-,VB,0,0,0,0,0,0.366983,8,1,0,0,0.375,0,0,0,0,6.625,58,3,0,0,2,1,call,0.125,0,0,0,-697920873,0,'Schedule interviews with photographers (and videographers)',0,0,0,0,calendar 63 | 0.441176,NN,VB,0,0,0,0,0,0.445552,3,1,0,0,0.666667,0,0,0,0,6.333333,21,2,0,0,0,1,plan-meal,0.333333,0,0,0,-697920873,0,'Schedule cake tasting',0,0,0,0,calendar 64 | 0.088235,NN,NNP,0,0,0,0,0,0.412771,2,0,0,0,1,0,0,0,0,8,17,2,0,0,0,1,calendar,0,0,0,0,1822752074,0,'Apple appointment',0,1,0,0,calendar 65 | 0.058824,NNS,VB,0,0,0,0,0,0.516648,12,3,0,0,0.25,0,0,0,0,4.583333,66,3,0,0,5,1,call,0.25,0,0,0,113762,0,'Set a meeting with officiant to go through and confirm the details',0,0,0,0,calendar 66 | 0.529412,NNS,VB,0,0,0,0,0,0.421055,7,1,0,0,0.571429,0,0,0,0,6.571429,52,4,0,0,2,1,pay-bill-online,0.142857,0,0,0,-697920873,0,'Schedule appointments with site managers or caterers',0,0,0,0,calendar 67 | 0.294118,NN,VB,0,0,0,0,0,0.431443,4,2,0,0,0.25,0,0,0,0,5.75,26,1,0,0,0,1,calendar,0.5,0,0,0,-697920873,0,'Schedule one tabling event',0,1,0,0,calendar 68 | 0,NN,VB,0,0,0,0,0,0.373705,14,2,0,0,0.428571,0,0,0,0,4.357143,74,6,0,0,3,1,call,0.142857,0,0,0,-1408204561,0,'Assign maid of honor / one attendant to arrange train and/or veil at altar',0,1,0,0,calendar 69 | 0.058824,NNP,VB,0,0,0,0,0,0.531802,5,1,0,0,0.4,0,0,0,0,4,24,2,0,0,1,1,call,0.2,0,0,0,3304,0,'go over budget with Matt',0,1,0,0,calendar 70 | 0.294118,NNS,VB,0.066667,0,0,0,0,0.449412,7,1,1,0,0.428571,0,0,0,0,5.428571,44,3,0,0,2,1,buy,0.142857,0,0,0.142857,-697920873,0,'Schedule delivery and pickup of rental items',0,0,0,0,calendar 71 | 0.294118,-RRB-,NN,0.111111,0,0,0,0,0.450066,14,1,0,0.069767,0.428571,0,0,0,0,3.571429,59,6,0,0,3,1,call,0.071429,0,0,0,1822752074,0,'Place order for wedding cake (and groom\'s cake, if desired)',0,0,0,0,calendar 72 | 0.529412,NNS,VB,0,0,0,0,0,0.483353,5,1,0,0,0.6,0,0,0,0,7.2,40,3,0,0,1,1,calendar,0.2,0,0,0,-697920873,0,'Schedule appointments with site managers',0,0,0,0,calendar 73 | 0.205882,NN,VB,0,0,0,0,0,0.485166,5,1,0,0,0.6,0,0,0,0,5.8,33,3,0.142857,0,1,1,calendar,0.2,0,0,0,3343854,0,'Make appointment for menu tasting',0,0,0,0.083333,calendar 74 | 0.529412,NNS,VB,0,0,0,0,0,0.405918,5,1,0,0,0.6,0,0,0,0,10.2,55,3,0,0,1,1,calendar,0.2,0,0,0,-697920873,0,'Schedule appointments with site coordinators/officiants',0,0,0,0,calendar 75 | 0,NNP,VB,0.044444,0,0,0,0,0.496192,8,2,0,0.069767,0.375,0,0.090909,0,0.115385,3.875,37,3,0,0,1,1,call,0.25,0.105263,0,0,3440673,0,'Pick up moving truck at 5:30pm Friday',0,1,0,0,calendar 76 | 0.294118,NN,NN,0,0,0,0,0,0.29349,3,0,0,0,1,0,0,0,0,5.333333,18,3,0,0,0,1,call,0,0,0,0,1822752074,0,'schedule DHPP shot',0,1,0,0,calendar 77 | 0,NNP,NNP,0,0,0,0,0,0.300045,5,0,0,0,0.8,0,0,0,0,7.2,40,4,0,0,1,1,service,0,0,0,0,1822752074,0,'Meet with Electric Cheetah/Uncle Cheetah',0,2,0,0,calendar 78 | 0.205882,NNS,VB,0,0,0,0.08,0,0.401357,5,1,0,0,0.6,0,0,0,0,6.2,35,3,0.142857,0,1,1,calendar,0.2,0,0,0,3343854,0,'Make appointments for gown fittings',0,0,0,0,calendar 79 | 0,VBD,NNP,0,0,2,0,0,0.337473,12,3,0,0,0.416667,0.166667,0,0,0,4.75,67,5,0,0,1,1,call,0.25,0,0,0,1822752074,0,'Debrief Constantly \\u2013 Call Brooke and discuss how HHCT III went',0.678571,3,0,0,call 80 | 0,NNP,VB,0,0,0,0,0,0.454765,6,1,0,0,0.666667,0,0,0,0,4,28,4,0,0,0,1,call,0.166667,0,0,0,3045982,0,'Call Phil Smith, Mark Fukuda',0.642857,2,0,0,call 81 | 0,NNP,VB,0,0,0,0,0,0.236103,2,1,0,0,0.5,0,0,0,0,8.5,18,1,0,0,0,1,postal,0.5,0,0,0,-838846263,0,'update address-ATT',0,0,0,0,call 82 | 0,NN,VB,0,0,0,0,0,0.591377,5,2,0,0,0.2,0,0,0,0,3.2,20,1,0,0,1,1,call,0.4,0,0,0,3045982,0,'Call to shut off gas',0.75,0,0,0,call 83 | 0,NN,VB,0,0,0,0,0,0.59916,5,2,0,0,0.2,0,0,0,0,3.6,22,1,0,0,1,1,call,0.4,0,0,0,3045982,0,'Call to shut off water',0.75,0,0,0,call 84 | 0,NN,NN,0,0,0,0,0,0.36377,3,0,0,0,0.666667,0,0,0,0,3.333333,12,2,0,0,1,1,call,0,0,0,0,1822752074,0,'call with tj',0.607143,0,0,0,call 85 | 0,NNS,JJ,0,0,0,0,0,0.329707,7,0,1,0,0.571429,0,0,0,0,5,41,4,0.071429,0,2,1,call,0,0,0,0.142857,1822752074,0,'call city of somerville and genki tenants',0.607143,1,0,0,call 86 | 0,NN,NN,0,0,0,0,0,0.112406,5,0,0,0,1,0,0,0,0,3.8,23,5,0,0,0,1,pay-bill-online,0,0,0.04,0,1822752074,0,'call zacky abt CFB rent',0.607143,1,0,0,call 87 | 0.058824,NN,VB,0,0,0,0,0,0.532414,5,1,0,0.069767,0.6,0,0,0,0,4,23,3,0,0,0,1,call,0.2,0,0,0,3045982,0,'Call John McCain\'s team',0.642857,1,0,0,call 88 | 0,NN,JJ,0,0,0,0,0,0.586275,4,0,1,0,0.75,0,0,0,0,4.75,22,3,0,0,0,1,call,0,0,0,0.25,1822752074,0,'call health care thing',0.642857,0,0,0,call 89 | 0,RB,VB,0,0,1,0,0.095238,0.46121,4,1,0,0,0.5,0.25,0,0,0,4,19,2,0,0,0,1,call,0.25,0,0,0,3045982,0,'Call Greg Sims back',0.642857,1,0,0,call 90 | 0,NNS,NN,0,0,0,0,0,0.558664,5,0,0,0,0.6,0,0,0,0,4.8,28,3,0,0,1,1,call,0,0,0,0,1822752074,0,'call with matt from citizens',0.607143,0,0,0,call 91 | 0,NN,NN,0,0,0,0,0,0.444191,4,0,0,0,0.75,0,0,0,0,5.25,24,3,0,0,0,1,call,0,0,0,0,1822752074,0,'call amazon about kindle',0.607143,1,0,0,call 92 | 0,NN,VB,0,0,0,0.12,0,0.619827,7,3,0,0,0.142857,0,0,0,0.076923,4.142857,35,1,0,0,1,1,call,0.428571,0,0,0,3045982,0,'Call to find out about moving truck',0.607143,0,0,0.083333,call 93 | 0,NNS,NN,0,0,0,0,0,0.302541,2,0,0,0,1,0,0,0,0,8.5,18,2,0,0,0,1,call,0,0,0,0,1822752074,0,'call exterminators',0.607143,0,0,0,call 94 | 0,.,VB,0,0,1,0,0,0.470281,18,3,1,0,0.277778,0.055556,0,0,0,4.444444,94,5,0,0,4,1,call,0.166667,0,0,0.055556,3045982,0,'Call a \"townhall meeting\" at GRBC and personally invite all past/current owners to give input.',0.607143,1,0,0,call 95 | 0,NN,NN,0,0,0,0,0,0.304961,2,0,0,0,1,0,0,0,0,6,13,2,0,0,0,1,call,0,0,0,0,1822752074,0,'call grandmom',0.607143,0,0,0,call 96 | 0,NNS,NN,0,0,0,0,0,0.363489,3,0,0,0,1,0,0,0,0,5.333333,18,3,0,0,0,1,call,0,0,0,0,1822752074,0,'call lesko clients',0.642857,0,0,0,call 97 | 0,CD,VB,0,0,0,0,0,0.336154,6,1,0,0,0.333333,0,0,0,0,2.833333,22,2,0.142857,0,1,1,call,0.166667,0,0,0,3029737,0,'Book a car - dec 13-17',0,2,0,0,find-travel 98 | 0,NNP,VB,0,0,0,0,0,0.337093,5,1,0,0,0.4,0,0,0,0,5,29,2,0.142857,0,2,1,call,0.2,0,0,0,3327647,0,'look at airbnbs in binghamton',0,0,0,0,find-travel 99 | 0.058824,NNP,VB,0,0,0,0,0,0.450885,5,1,0,0,0.6,0,0,0,0,7.4,41,3,0.142857,0,0,1,call,0.2,0,0,0,3343854,0,'make travel arrangements from Albuquerque',0,2,0,0,find-travel 100 | 0,NNS,VB,0,0,0,0,0,0.414428,8,1,1,0,0.5,0,0,0,0,5.125,48,4,0.142857,0,2,1,pay-bill-online,0.125,0,0,0.125,3208383,0,'Hold block of hotel rooms for out-of-town guests',0,0,0,0,find-travel 101 | 0.117647,NN,VB,0,0,0,0,0,0.509584,7,1,0,0,0.571429,0,0,0,0,5.571429,45,4,0.357143,0,1,1,call,0.142857,0,0,0,3343854,0,'Make your hotel reservation for wedding night',0,1,0,0,find-travel 102 | 0,NN,NN,0,0,0,0,0,0.361882,2,0,0,0,1,0,0,0,0,6.5,14,2,0.214286,0,0,1,calendar,0,0,0,0,1822752074,0,'book honeymoon',0,0,0,0,find-travel 103 | 0,NNS,NNP,0,0,0,0,0,0.43187,3,0,0,0,1,0,0,0,0,5.666667,19,3,0.214286,0,0,1,pay-bill-online,0,0,0,0,1822752074,0,'Kansas City tickets',0,1,0,0,find-travel 104 | 0,NN,VB,0.133333,0,0,0,0.095238,0.57803,3,2,0,0,0.333333,0,0,0,0,5,17,1,0,0,0,1,call,0.666667,0,0,0,102230,0,'get return flight',0,0,0,0,find-travel 105 | 0,NNS,VB,0.4,0,0,0,0,0.460225,3,1,0,0,0.666667,0,0,0,0,6.333333,21,2,0.214286,0,0,1,pay-bill-online,0.333333,0,0,0,97926,0,'Buy Nicaragua tickets',0,1,0,0.083333,find-travel 106 | 0.058824,NN,VB,0.044444,0,0,0.2,0,0.4248,19,3,4,0.069767,0.315789,0,0.090909,0,0.461538,5.421053,121,6,0,1,3,1,service,0.157895,0.052632,0,0.210526,3143097,0,'Find professional dry cleaner who specialized in bridal gown care to clean and pack up dress andaccessories after wedding',0,0,0.052632,0.083333,find-service 107 | 0,NN,VB,0,0,0,0.48,0,0.461306,5,1,0,0,0.6,0,0,0,0,5.2,30,3,0,0,1,1,pay-bill-online,0.2,0,0,0,3202804,0,'Hire caterer and sign contract',0,0,0,0,find-service 108 | 0,NNS,NN,0,0,0,0,0,0.334378,3,0,1,0,0.666667,0,0,0,0,8.666667,28,2,0,0,0,1,call,0,0,0,0.333333,1822752074,0,'Interview potential caterers',0,0,0,0,find-service 109 | 0,NN,VB,0,0,0,0.08,0,0.463022,8,1,0,0,0.5,0,0,0,0,3.5,35,4,0,0,2,1,call,0.125,0,0,0,101397,0,'Fix the CD ROM drive on my computer',0,1,0,0,find-service 110 | 0,NN,NN,0,0,0,0.08,0,0.449836,2,0,0,0,1,0,0,0,0,5,11,2,0,0,0,1,service,0,0,0,0,1822752074,0,'repair roof',0,0,0,0,find-service 111 | 0,NN,VB,0,0,0,0.4,0,0.388847,9,1,0,0,0.444444,0,0,0,0,5.555556,56,4,0,0,2,1,call,0.111111,0,0,0,3202804,0,'Hire photographers (and videographers) and sign contract',0,0,0,0,find-service 112 | 0,NN,NN,0,0,0,0,0,0.508423,2,0,0,0,1,0,0,0,0,8,17,2,0,0,0,1,pay-bill-online,0,0,0,0,1822752074,0,'transfer internet',0,0,0,0,find-service 113 | 0,NN,NN,0,0,0,0.08,0,0.372092,4,0,0,0,1,0,0,0,0,5.25,24,4,0,0,0,1,service,0,0,0,0,1822752074,0,'fix coffee pot dispenser',0,0,0,0,find-service 114 | 0,JJ,JJ,0,0,0,0,0,0.417859,2,0,2,0,0,0,0,0,0,7,15,0,0,0,0,1,pay-bill-online,0,0,0,1,1822752074,0,'annual physical',0,1,0,0,find-service 115 | 0,NNS,VB,0,0,0,0,0,0.23797,2,1,0,0,0.5,0,0,0,0,8.5,18,1,0,0,0,1,postal,0.5,0,0,0,1578333328,0,'Notarize documents',0,0,0,0,find-service 116 | 0,NN,VB,0,0,0,0.24,0,0.529937,4,1,1,0,0.5,0,0,0,0,5.25,24,2,0,0,0,1,pay-bill-online,0.25,0,0,0.25,3143097,0,'find new insurance agent',0,1,0,0.083333,find-service 117 | 0,NN,NN,0,0,0,0.08,0,0.515373,4,0,0,0,1,0,0,0,0,5.5,25,4,0,0,0,1,service,0,0,0,0,1822752074,0,'repair laundry room floor',0,0,0,0,find-service 118 | 0,NN,JJ,0,0,0,0.04,0,0.40374,4,0,1,0,0.5,0,0,0,0,5.5,25,2,0,0,1,1,call,0,0,0,0.25,1822752074,0,'sound proofing for office',0,0,0,0,find-service 119 | 0,NNS,VB,0,0,0,0.12,0,0.505132,4,1,1,0,0.5,0,0,0,0,5.5,25,2,0,0,0,1,pay-bill-online,0.25,0,0,0.25,3526264,0,'seek new investment ideas',0,0,0,0,find-service 120 | 0,NNS,VB,0,0,0,0,0,0.50743,4,1,1,0,0.5,0,0,0,0,6.75,30,2,0,0,0,1,pay-bill-online,0.25,0,0.04,0.25,950484197,0,'compare different loan options',0,0,0,0,find-service 121 | 0,NN,VB,0,0,0,0.12,0,0.488852,4,1,1,0,0.25,0,0,0,0,4,19,1,0,0,0,1,call,0.25,0,0,0.25,1957569947,0,'Install my new sink',0,0,0,0,find-service 122 | 0,.,VB,0.066667,0,0,0,0,0.496058,26,5,2,0.069767,0.230769,0,0,0,0,4.192308,134,6,0,0,8,2,call,0.192308,0,0,0.076923,-985656342,0,'Please work on an e-visa with the Cambodian embassy in Vietnam. I want to consider making you the admin person in our NGO paperwork.',0,3,0,0,find-service 123 | 0,NN,VBP,0,0,0,0.08,0,0.403974,6,3,0,0.069767,0.333333,0,0,0,0,4,29,2,0,0,1,1,service,0.5,0,0,0,1822752074,0,'Have shoes dyed to match gown',0,0,0,0,find-service 124 | 0,NNP,RB,0,0,1,0,0,0.400803,3,0,0,0,0.333333,0.333333,0,0,0,6.333333,21,1,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Apply to Southxchange',0,1,0,0,find-service 125 | 0,NN,VB,0,0,0,0.12,0,0.380457,3,1,0,0,0.666667,0,0,0,0,5.666667,19,2,0,0,0,1,call,0.333333,0,0,0,3202804,0,'Hire content writer',0,1,0,0,find-service 126 | 0,NN,NNP,0,0,0,0,0,0.479847,3,0,0,0,1,0,0,0,0,6.666667,22,3,0,0,0,1,call,0,0,0,0,1822752074,0,'Home security research',0,0,0,0,find-service 127 | 0,VB,VB,0,0,0,0,0,0.491032,6,2,0,0,0.166667,0,0,0,0,5.166667,36,1,0,0,2,1,call,0.333333,0,0,0,-135762164,0,'Identify office/place for me to live',0,0,0,0.083333,find-service 128 | 0,NNP,VB,0,0,0,0.12,0,0.383697,6,1,0,0,0.5,0,0,0,0,7.166667,48,3,0,0,2,1,call,0.166667,0,0,0,3143097,0,'Find a photographer/marriage counselor in Hawaii',0,1,0,0.083333,find-service 129 | 0.088235,NN,VB,0,0,0,0.52,0,0.459764,10,1,0,0,0.6,0,0,0,0,4.3,50,6,0.142857,0,1,1,call,0.1,0,0,0,3029737,0,'Book site, caterer, sign contract and sign deposit',0,0,0,0,find-service 130 | 0,NN,VB,0.133333,0,0,0,0.095238,0.557692,4,1,0,0,0.5,0,0,0,0,3.75,18,2,0,0,1,1,call,0.25,0,0,0,102230,0,'Get the oil change',0,0,0,0,find-service 131 | 0,NNS,VB,0.133333,0,1,0,0.333333,0.556835,4,1,0,0,0.25,0.25,0,0,0,3.75,18,1,0,0,1,1,call,0.25,0,0,0,102230,0,'get back to emails',0.035714,0,0,0,email 132 | 0,VB,VB,0.133333,0,1,0,0.52381,0.589134,4,2,0,0,0,0.25,0,0,0,3.5,17,0,0,0,1,1,call,0.5,0,0,0,102230,0,'Get back to email',0.035714,0,0,0,email 133 | 0,NN,VB,0,0,0,0,0.52381,0.341097,7,1,0,0,0.714286,0,0,0,0,4.142857,35,5,0,0,1,1,call,0.142857,0.368421,0,0,3526536,0,'send email re pavan and bucket list',0,1,0,0,email 134 | 0,NN,NNP,0,0,0,0,0.333333,0.21851,3,0,0,0,0.666667,0,0,0,0,6.666667,22,2,0,0,0,1,email,0,0,0,0,1822752074,0,'Donate/send 5000 email',0,1,0,0,email 135 | 0,NNS,NN,0,0,0,0,0.333333,0.32776,4,0,0,0,0.75,0,0,0,0,7,31,3,0,0,0,1,call,0,0,0,0,1822752074,0,'email daniel about strawberries',0,1,0,0,email 136 | 0,NNS,NN,0,0,0,0,0.142857,0.16568,2,0,0,0,1,0,0,0,0,8,17,2,0,0,0,1,email,0,0,0,0,1822752074,0,'coffeeshop emails',0,0,0,0,email 137 | 0,NN,VB,0,0,0,0,0.428571,0.40711,5,2,0,0,0.6,0,0,0,0,6.2,35,3,0,0,0,1,call,0.4,0.368421,0,0,3526536,0,'send dana email regarding insurance',0,0,0,0,email 138 | 0,NNS,NN,0,0,0,0,0.333333,0.304026,5,0,0,0,0.8,0,0,0,0,4.4,26,4,0,0,0,1,pay-bill-online,0,0,0,0,1822752074,0,'email myer about fdo taxes',0,0,0,0,email 139 | 0,-RRB-,VB,0,0,0,0,0.285714,0.203246,13,1,1,0,0.461538,0,0,0,0,2.615385,41,6,0,0,0,1,call,0.076923,0,0,0.076923,113399775,0,'write appt emails (BV, UIX, AD mtg, GVSU)',0,3,0,0,email 140 | 0,NNP,NNP,0,0,0,0,0,0.129684,3,0,0,0,1,0,0,0,0,7.333333,24,3,0,0,0,1,call,0,0,0,0,1822752074,0,'Email/Call Dan Strickman',0,1,0,0,email 141 | 0,NN,VB,0,0,0,0,0.333333,0.331475,6,2,0,0,0.5,0,0,0,0,5.333333,37,3,0,0,1,1,call,0.333333,0,0,0,96619420,0,'Email people affected by timezone bug',0,0,0,0,email 142 | 0,NNS,VB,0,0,0,0,0.095238,0.52985,2,1,0,0,0.5,0,0,0,0,7.5,16,1,0,0,0,1,postal,0.5,0.368421,0,0,3526536,0,'Send invitations',0,0,0,0,postal 143 | 0,VBP,VB,0,0,0,0,0.095238,0.501019,3,2,0,0,0.333333,0,0,0,0,5.333333,18,1,0,0,0,1,call,0.666667,0.368421,0,0,3526536,0,'Send prayer update',0,1,0,0,postal 144 | 0,NN,VB,0,0,0,0,0.095238,0.521713,5,1,0,0,0.6,0,0,0,0,4.4,26,3,0,0,1,1,postal,0.2,0.578947,0,0,3526536,0,'send info to credit bureau',0,0,0,0,postal 145 | 0,CD,VB,0,0,0,0,0.095238,0.288142,13,1,0,0,0.461538,0,0,0,0,3.384615,70,6,0,0,1,1,call,0.076923,0.421053,0,0,3526536,0,'Send Abby her shirt at 247 W 15th St. Holland, MI 49423',0,6,0,0,postal 146 | 0,NN,VB,0,0,0,0,0.095238,0.434096,4,1,0,0,0.5,0,0,0,0,4.75,22,2,0,0,1,1,postal,0.25,0.473684,0,0,3526536,0,'send info to appraiser',0,0,0,0,postal 147 | 0,NNP,VB,0,0,0,0,0.095238,0.42222,6,1,0,0,0.5,0,0,0,0.076923,4.666667,33,3,0,0,1,1,call,0.166667,0.421053,0,0,3526536,0,'Send shirts to our Inner Circlers',0,1,0,0,postal 148 | 0,NN,VB,0.044444,0,0,0,0,0.5651,6,1,0,0.069767,0.5,0,0.090909,0,0.115385,3.666667,27,3,0,0,1,1,call,0.166667,0.210526,0,0,3440673,0,'pick up mail at post office',0,0,0,0,postal 149 | 0,NNP,NN,0,0,0,0,0,0.466686,4,0,0,0,0.75,0,0,0,0,5,23,3,0,0,1,1,call,0,0,0,0,1822752074,0,'resend check to Richard',0,1,0,0,postal 150 | 0,NNP,VB,0,0,0,0,0.095238,0.477605,4,1,0,0,0.5,0,0,0,0,4,19,2,0,0,1,1,call,0.25,0.473684,0,0,3526536,0,'Send checks to Joey',0,1,0,0,postal 151 | 0,NNP,NNP,0,0,0,0,0,0.326321,6,0,1,0,0.666667,0,0.090909,0,0,5,34,4,0,0,1,1,pay-bill-online,0,0.210526,0,0.166667,1822752074,0,'Mail nonprofit checks to Kyle!!!!!',0,1,0,0,postal 152 | 0,NNS,NN,0,0,0,0,0,0.559574,2,0,0,0,1,0,0,0,0,6.5,14,2,0,0,0,1,buy,0,0,0,0,1822752074,0,'Business cards',0,0,0,0,buy 153 | 0,NNS,NNP,0.444444,0,0,0,0,0.21031,7,1,0,0,0.571429,0,0,0,0,4.571429,37,4,0,0,0,1,buy,0.142857,0,0,0,1822752074,0,'Tuscon: buy two 1L smartwater bottles',0,2,0,0.083333,buy 154 | 0,NNP,VB,0.4,0,0,0,0,0.522589,2,1,0,0,0.5,0,0,0,0,4,9,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'Buy Scale',0,0,0,0.083333,buy 155 | 0,NNS,NN,0.111111,0,0,0.12,0,0.568073,3,0,1,0,0.666667,0,0,0,0,4.666667,16,2,0,0,0,1,pay-bill-online,0,0.105263,0,0.333333,1822752074,0,'order new checks',0,0,0,0,buy 156 | 0,NNS,VB,0.4,0,0,0,0,0.501987,2,1,0,0,0.5,0,0,0,0,4.5,10,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'Buy gloves',0,0,0,0.083333,buy 157 | 0,VBP,NN,0.111111,0,0,0,0,0.309961,3,1,0,0,0.666667,0,0,0,0,4.333333,15,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,1822752074,0,'order gpa mount',0,0,0,0,buy 158 | 0,NN,VB,0,0,0,0,0,0.455352,4,1,1,0,0.5,0,0,0,0,5.5,25,2,0,0,0,1,call,0.25,0,0,0.25,-1361218025,0,'choose front door fixture',0,0,0,0,buy 159 | 0.058824,NNS,VB,0.133333,0,0,0,0.095238,0.389976,5,2,0,0,0.4,0,0,0,0,3.6,22,2,0,0,0,1,buy,0.4,0,0,0,3304,0,'Go get dirt from lowes',0,0,0,0,buy 160 | 0,NN,NNP,0,0,0,0,0,0.48608,5,0,0,0,0.8,0,0,0,0,5.2,30,4,0,0,1,1,service,0,0,0.04,0,1822752074,0,'Rent a carpet cleaning machine',0,0,0,0,buy 161 | 0,NN,NN,0,0,0,0,0,0.265483,5,0,0,0,0.8,0,0,0,0,6.4,36,4,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Purchase/make pillow for ring bearer',0,0,0,0,buy 162 | 0.058824,NNP,VB,0.133333,0,0,0,0.095238,0.433438,6,2,0,0,0.5,0,0,0,0.076923,3.666667,27,3,0,0,1,1,buy,0.333333,0,0,0,3304,0,'Go get plants at Tilth Sale',0,1,0,0,buy 163 | 0,NN,VB,0.133333,0,0,0,0.095238,0.524743,3,1,0,0,0.666667,0,0,0,0,6,20,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,102230,0,'Get marriage license',0,0,0,0,buy 164 | 0,NN,JJ,0.444444,0,0,0,0,0.449681,5,0,1,0,0.6,0,0,0,0.076923,5.6,32,3,0,0,1,1,service,0,0,0,0.2,1822752074,0,'Buy mirror for upstairs bathroom',0,0,0,0.083333,buy 165 | 0,NNS,NNP,0.111111,0,0,0,0,0.479031,3,0,1,0,0.666667,0,0,0,0,6,20,2,0,0,0,1,pay-bill-online,0,0,0,0.333333,1822752074,0,'Reserve rental items',0,0,0,0,buy 166 | 0,NNP,VB,0.133333,0,0,0,0.095238,0.55012,4,1,0,0,0.5,0,0,0,0,4,19,2,0,0,1,1,call,0.25,0,0,0,102230,0,'get suit for Boston',0,1,0,0,buy 167 | 0,NN,VB,0,0,0,0,0,0.469299,3,1,0,0,0.333333,0,0,0,0,5,17,1,0,0,1,1,call,0.333333,0,0,0,-522328435,0,'Remember the Milk',0,1,0,0,buy 168 | 0,NN,NN,0.111111,0,0,0,0,0.37984,4,0,0,0,0.75,0,0,0,0,5.75,26,3,0,0,1,1,service,0,0,0,0,1822752074,0,'order a meditation cushion',0,0,0,0,buy 169 | 0,NN,JJ,0.4,0,0,0,0,0.55168,2,0,1,0,0.5,0,0,0,0,3.5,8,1,0,0,0,1,buy,0,0,0,0.5,1822752074,0,'Buy wrap',0,0,0,0.083333,buy 170 | 0,NNS,NN,0,0,0,0,0.333333,0.370768,10,2,0,0,0.5,0,0,0,0,6.2,69,5,0,0,1,1,call,0.2,0,0,0,1822752074,0,'Order/create invitations, call invitees, or compose email invitations',0.607143,0,0,0,buy 171 | 0,NN,NN,0,0,0,0,0,0.459162,5,0,0,0,0.6,0,0,0.090909,0,4.6,27,3,0,0,2,1,call,0,0,0,0,1822752074,0,'paper for the upstairs desk',0,0,0,0,buy 172 | 0,NN,VB,0.044444,0,0,0.16,0,0.476711,4,1,0,0.069767,0.25,0,0.090909,0,0.115385,4,19,1,0,0,1,1,buy,0.25,0.052632,0,0,3530173,0,'Sign up for pottery',0,0,0,0,buy 173 | 0,NN,VB,0.4,0,0,0,0,0.499446,3,1,0,0,0.666667,0,0,0,0,5,17,2,0,0,0,1,buy,0.333333,0,0,0,97926,0,'Buy container mix',0,0,0,0.083333,buy 174 | 0,NN,VB,0.4,0,0,0,0,0.554054,3,1,1,0,0.333333,0,0,0,0,5,17,1,0,0,0,1,buy,0.333333,0,0,0.333333,97926,0,'Buy other jewelry',0,0,0,0.083333,buy 175 | 0,NNS,VB,0.4,0,0,0,0,0.439946,2,1,0,0,0.5,0,0,0,0,5.5,12,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'Buy earrings',0,0,0,0.083333,buy 176 | 0.088235,NN,NN,0.111111,0,0,0,0,0.463207,3,0,0,0,0.666667,0,0,0,0,3.333333,12,2,0,0,1,1,service,0,0,0,0,1822752074,0,'order a cake',0,0,0,0,buy 177 | 0,NN,JJ,0.4,0,0,0,0,0.552242,2,0,1,0,0.5,0,0,0,0,4,9,1,0,0,0,1,pay-bill-online,0,0,0,0.5,1822752074,0,'Buy purse',0,0,0,0.083333,buy 178 | 0,NN,VB,0,0,0,0.28,0,0.47319,6,1,0,0,0.666667,0,0,0,0,5.5,38,4,0.142857,0,1,1,call,0.166667,0,0,0,3029737,0,'Book reception venue and sign contract',0,0,0,0,buy 179 | 0,NN,NNP,0.088889,0,0,0,0,0.355624,4,1,0,0,0.75,0,0,0,0.076923,7.25,32,3,0,0,0,1,service,0.25,0,0,0,1822752074,0,'Research bathroom mirrors online',0,0,0,0,buy 180 | 0,NN,VB,0.4,0,0,0,0,0.438651,2,1,0,0,0.5,0,0,0,0,5.5,12,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'buy necklace',0,0,0,0.083333,buy 181 | 0,NN,NN,0.044444,0,0,0,0,0.388266,5,0,1,0,0.6,0,0,0,0,6.6,37,3,0,0,1,1,buy,0,0,0,0.2,1822752074,0,'subscription to safari library online',0,0,0,0,buy 182 | 0,NNS,VB,0.4,0,0,0,0,0.433738,10,1,1,0,0.6,0,0,0,0,6.5,74,6,0,0,2,1,postal,0.1,0,0,0.1,97926,0,'Buy special postage stamps for invitation envelopes and response envelopes',0,0,0,0.083333,buy 183 | 0,NNS,VB,0,0,0,0,0,0.475226,2,1,0,0,0.5,0,0,0,0,6,13,1,0,0,0,1,service,0.5,0,0,0,1094496948,0,'replace shoes',0,0,0,0,buy 184 | 0,NNS,NN,0,0,0,0,0,0.45605,4,0,0,0,1,0,0,0,0,5.5,25,4,0,0,0,1,call,0,0,0,0,1822752074,0,'research cell phone plans',0,0,0,0,buy 185 | 0,NNS,VB,0.044444,0,0,0,0,0.549679,4,1,1,0.069767,0.25,0,0.090909,0,0.115385,3.75,18,1,0,0,0,1,buy,0.25,0.105263,0,0.25,3440673,0,'Pick up more boxes',0,0,0,0,buy 186 | 0,NN,VB,0.4,0,0,0,0,0.364698,4,1,0,0,0.75,0,0,0,0,6.5,29,3,0,0,0,1,buy,0.25,0,0,0,97926,0,'buy stuff sacks/pack backpack',0,0,0,0.083333,buy 187 | 0,NNS,VB,0.4,0,0,0,0,0.338777,4,1,0,0,0.75,0,0,0,0,5.5,25,3,0,0,0,1,service,0.25,0,0,0,97926,0,'buy flower girl headbands',0,1,0,0.083333,buy 188 | 0,VBN,VB,0.133333,0,0,0,0.095238,0.552758,3,2,0,0,0.333333,0,0,0,0,3.666667,13,1,0,0,0,1,call,0.666667,0,0,0,102230,0,'get keys made',0,0,0,0,buy 189 | 0,NNS,VB,0.4,0,0,0,0,0.472995,3,1,0,0,0.666667,0,0,0,0,6,20,2,0,0,0,1,buy,0.333333,0,0,0,97926,0,'Buy hair accessories',0,0,0,0.083333,buy 190 | 0,NN,JJ,0.4,0,0,0,0,0.386432,4,0,1,0,0.75,0,0,0,0,6,27,3,0,0,0,1,buy,0,0,0,0.25,1822752074,0,'Buy rehearsal dinner outfit',0,0,0,0.083333,buy 191 | 0,NNS,NNP,0.177778,0,0,0,0,0.453556,3,0,0,0,1,0,0,0,0,6.666667,22,3,0,0,0,1,buy,0,0,0,0,1822752074,0,'Order stationery items',0,0,0,0,buy 192 | 0,NNS,NNP,0.111111,0,0,0,0,0.521677,5,0,2,0,0.4,0,0,0,0,6,34,2,0,0,0,1,pay-bill-online,0,0,0,0.4,1822752074,0,'Reserve any necessary rental items',0,0,0,0,buy 193 | 0,NN,VB,0.4,0,0,0,0,0.251512,3,1,1,0,0.333333,0,0,0,0,6.333333,21,1,0,0,0,1,buy,0.333333,0,0,0.333333,97926,0,'Buy going-away outfit',0,0,0,0.083333,buy 194 | 0.088235,-RRB-,NN,0.2,0,1,0,0,0.494411,28,5,0,0.209302,0.285714,0.035714,0,0,0,5.107143,168,8,0,0,8,1,call,0.178571,0,0,0,1822752074,0,'Purchase birdseed/bubbles/rose petals for guests to shower you with as you leave ceremony site (this custom could instead be performed as you depart from the reception)',0,0,0,0,buy 195 | 0.058824,NN,VB,0.133333,0,0,0,0.095238,0.554364,3,1,0,0,0.666667,0,0,0,0,6.333333,21,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,102230,0,'get wedding insurance',0,0,0,0,buy 196 | 0,NN,JJ,0.4,0,0,0,0,0.376219,2,0,1,0,0.5,0,0,0,0,4.5,10,1,0,0,0,1,buy,0,0,0,0.5,1822752074,0,'Buy garter',0,0,0,0.083333,buy 197 | 0,NN,NNP,0.444444,0,0,0,0,0.287136,5,1,0,0,0.6,0,0,0,0,4.6,26,3,0,0,0,1,buy,0.2,0,0,0,1822752074,0,'Tuscon: buy cannister fuel',0,1,0,0.083333,buy 198 | 0.088235,NN,NNP,0,0,0,0,0,0.341863,3,0,0,0,1,0,0,0,0,5.333333,18,3,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'Select cake topper',0,1,0,0,buy 199 | 0,NNPS,VB,0,0,0,0,0,0.392175,3,1,0,0,0.666667,0,0.181818,0,0,7.666667,25,2,0,0,0,1,call,0.333333,0,0,0,-1405517509,0,'Practice Mandarin Chinese',0,1,0,0,self-improve 200 | 0,NNP,VB,0.044444,0,0,0.16,0,0.570339,4,1,0,0.069767,0.25,0,0.181818,0,0.115385,4,19,1,0,0,1,1,call,0.25,0.052632,0,0,3530173,0,'Sign up for Spanish',0,1,0,0,self-improve 201 | 0,NNP,VB,0,0,0,0,0,0.523105,8,2,0,0,0.25,0,0.454545,0.090909,0,3.875,36,2,0,0,0,1,call,0.25,0,0,0,102846020,0,'Learn about \"Cold Calling\" from Kyle',0,1,0,0,self-improve 202 | 0,NN,VB,0,0,0,0,0,0.188599,3,1,0,0,0.666667,0,0.363636,0.090909,0,9,29,2,0,0,0,1,call,0.333333,0,0,0,102846020,0,'learn guyline knots/technique',0,0,0,0,self-improve 203 | 0,NNP,NN,0,0,0,0,0,0.383775,5,0,1,0,0.6,0,0,0,0,5.8,33,3,0,0,1,1,pay-bill-online,0,0,0,0.2,1822752074,0.538462,'print general giveaways for David',0,1,0,0,print 204 | 0,NNS,NN,0,0,0,0,0,0.406903,2,0,0,0,1,0,0,0,0,4.5,10,2,0,0,0,1,postal,0,0,0,0,1822752074,0.307692,'print maps',0,0,0,0,print 205 | 0,NNS,NN,0,0,0,0,0,0.338348,5,1,0,0,0.6,0,0,0,0,6,34,3,0,0,0,1,pay-bill-online,0.2,0,0,0,1822752074,0.384615,'Print/have fedex cut 300 giveaways',0,1,0,0,print 206 | 0,NNP,VB,0,0,0,0,0,0.259075,3,1,0,0,0.666667,0,0,0,0,5.666667,19,2,0,0,0,1,print,0.333333,0,0,0,106934957,0.384615,'Print Gen. Giveaway',0,0,0,0,print 207 | 0,NN,VB,0.044444,0,0,0,0,0.446648,6,1,0,0.069767,0.5,0,0.090909,0,0.192308,3.5,26,3,0,0,1,1,call,0.166667,0.052632,0,0,113762,0,'Set up org file for garden',0,1,0,0,service 208 | 0,NNS,VB,0,0,1,0,0,0.375183,3,1,0,0,0.333333,0.333333,0,0,0,6.666667,22,1,0,0,0,1,postal,0.333333,0,0,0,-318370553,0,'prepare resupply boxes',0,0,0,0,service 209 | 0,NN,JJ,0,0,0,0,0,0.540102,2,0,1,0,0.5,0,0,0,0.269231,5.5,12,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean closet',0,0,0,0,service 210 | 0,NN,JJ,0,0,0,0,0,0.510607,4,0,1,0,0.5,0,0,0,0.269231,4.25,20,2,0,0,1,1,service,0,0,0,0.25,1822752074,0,'Clean the litter box',0,0,0,0,service 211 | 0,JJ,NN,0,0,1,0,0.095238,0.401124,3,0,1,0,0.333333,0.333333,0,0,0,4.666667,16,1,0,0,0,1,service,0,0,0,0.333333,1822752074,0,'caulk back light',0.035714,0,0,0,service 212 | 0,NN,VB,0,0,0,0,0,0.570174,3,1,1,0,0.333333,0,0,0,0.076923,4.333333,15,1,0,0,0,1,service,0.333333,0,0,0.333333,3432985,0,'Pack spare room',0,0,0,0,service 213 | 0,NN,VB,0,0,0,0,0,0.590655,4,1,0,0,0.25,0,0,0,0.076923,3.25,16,1,0,0,1,1,call,0.25,0,0,0,3552391,0,'Take out the dog',0,0,0,0,service 214 | 0,NN,NN,0,0,0,0,0,0.353338,2,0,0,0,1,0,0,0,0,5.5,12,2,0,0,0,1,service,0,0,0,0,1822752074,0,'glue pumpkin',0,0,0,0,service 215 | 0,NN,JJ,0.044444,0,0,0,0,0.571832,2,0,1,0,0.5,0,0,0,0.346154,6.5,14,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean bathroom',0,0,0,0,service 216 | 0.058824,POS,NNP,0,0,0,0,0,0.387923,6,0,0,0.069767,0.666667,0,0,0,0.076923,4.166667,29,4,0,0,1,1,service,0,0.052632,0,0,1822752074,0,'Iron shirts for Flat lander\'s',0,2,0,0,service 217 | 0,NN,VB,0,0,0,0,0,0.391244,4,1,0,0,0.5,0,0,0,0,3.25,16,2,0,0,1,1,call,0.25,0,0,0,3357649,0,'move the pee pad',0,0,0,0,service 218 | 0,NN,VB,0,0,0,0,0,0.417101,3,1,0,0,0.666667,0,0,0,0,7.333333,24,2,0,0,0,1,call,0.333333,0,0,0,989834062,0,'reconcile bank statement',0,0,0,0,service 219 | 0,NN,VB,0,0,0,0,0,0.319196,4,1,0,0,0.5,0,0,0,0,7.75,34,2,0,0,1,1,service,0.25,0,0,0,1957569947,0,'Install Quicksilver and experiment',0,0,0,0,service 220 | 0,NN,NN,0,0,0,0,0,0.287821,4,0,0,0,0.75,0,0,0,0,5.25,24,3,0,0,0,1,service,0,0,0,0,1822752074,0,'Vacuum hardwoods & couch',0,0,0,0,service 221 | 0,NN,JJ,0,0,0,0,0,0.53992,3,0,1,0,0.333333,0,0,0,0.269231,4.666667,16,1,0,0,1,1,service,0,0,0,0.333333,1822752074,0,'Clean the carpet',0,0,0,0,service 222 | 0,NN,VBN,0.044444,0,0,0,0,0.544621,3,1,0,0.069767,0.333333,0,0.090909,0,0.115385,3,11,1,0,0,0,1,service,0.333333,0.052632,0,0,1822752074,0,'set up tent',0,0,0,0,service 223 | 0,CD,NNS,0,0,0,0,0,0.543524,3,0,0,0,0.333333,0,0,0,0,4,14,1,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Taxes for 2015',0,1,0,0,service 224 | 0,NNP,NNP,0,0,0,0,0,0.300871,2,0,0,0,1,0,0,0,0,4.5,10,2,0,0,0,1,service,0,0,0,0,1822752074,0,'Wash Mazda',0,2,0,0,service 225 | 0,NN,VBG,0,0,0,0,0,0.394523,4,1,0,0,0.5,0,0,0,0.076923,5.25,24,2,0,0,1,1,service,0.25,0.052632,0,0,1822752074,0,'Ironing shirts for staff',0,0,0,0,service 226 | 0,NN,JJ,0,0,0,0,0,0.543383,5,0,2,0,0.4,0,0,0,0.153846,4.4,26,2,0,0,0,1,service,0,0,0,0.4,1822752074,0,'Clear out small garden bed',0,1,0,0,service 227 | 0,NN,JJ,0.044444,0,0,0,0,0.571832,2,0,1,0,0.5,0,0,0,0.346154,6.5,14,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean bathroom',0,0,0,0,service 228 | 0.058824,NN,VB,0.044444,0,0,0,0,0.549412,7,2,0,0.069767,0.285714,0,0.090909,0,0.115385,3.285714,29,2,0,0,0,1,call,0.285714,0.105263,0,0,3304,0,'Go pick up my son from school',0,0,0,0,service 229 | 0,NNS,NN,0,0,0,0,0,0.372406,3,0,0,0,0.666667,0,0,0.090909,0.076923,6,20,2,0,0,0,1,buy,0,0,0,0,1822752074,0,'figure out bookmarks',0,0,0,0,service 230 | 0,NNS,NN,0,0,0,0,0,0.272831,3,0,0,0,1,0,0,0,0,8,26,3,0,0,0,1,school-work,0,0,0,0,1822752074,0,'review orienteering basics',0,0,0,0,service 231 | 0,NN,JJ,0.044444,0,0,0,0,0.413027,3,0,1,0.069767,0.333333,0,0.090909,0,0.384615,5,17,1,0,0,0,1,service,0,0.052632,0,0.333333,1822752074,0,'clean up woodpile',0,0,0,0,service 232 | 0,NN,VB,0,0,0,0,0,0.563105,2,1,0,0,0.5,0,0,0,0.153846,5.5,12,1,0,0,0,1,service,0.5,0,0,0,3432985,0,'Pack kitchen',0,0,0,0,service 233 | 0,NN,NN,0,0,0,0,0,0.34765,1,0,0,0,1,0,0,0,0,5,5,1,0,0,0,1,call,0,0,0,0,1822752074,0,sweep,0,0,0,0,service 234 | 0,NN,JJ,0,0,0,0,0,0.609302,2,0,1,0,0.5,0,0,0,0.346154,6,13,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean kitchen',0,0,0,0,service 235 | 0,NNS,VB,0,0,0,0,0,0.392565,2,1,0,0,0.5,0,0,0,0,4.5,10,1,0,0,0,1,service,0.5,0,0,0,108302,0,'mop floors',0,0,0,0,service 236 | 0,NN,NN,0,0,0,0,0,0.428761,3,0,0,0,1,0,0,0,0,5,17,3,0,0,0,1,call,0,0,0,0,1822752074,0,'test sleep system',0,0,0,0,service 237 | 0,NNS,NN,0,0,0,0,0,0.456851,4,0,0,0,0.75,0,0,0,0.076923,5.25,24,3,0,0,0,1,service,0,0,0,0,1822752074,0,'household - water plants',0,0,0,0,service 238 | 0,NNS,NNP,0,0,0,0,0,0.472321,3,0,0,0,0.666667,0,0,0,0,4.333333,15,2,0,0,1,1,service,0,0,0,0,1822752074,0,'Wash the dishes',0,1,0,0,service 239 | 0,NNS,VB,0,0,0,0,0,0.357214,2,1,0,0,0.5,0,0,0,0,7.5,16,1,0,0,0,1,service,0.5,0,0,0,1316389283,0,'organize closets',0,0,0,0,service 240 | 0,NN,VB,0,0,0,0,0,0.352519,3,1,0,0,0.666667,0,0,0,0.076923,7,23,2,0,0,0,1,plan-meal,0.333333,0,0,0,-373408302,0,'assemble toiletries kit',0,0,0,0,service 241 | 0,NNS,NN,0,0,1,0,0,0.467128,3,0,0,0,0.666667,0.333333,0,0,0.076923,5,17,2,0,0,0,1,service,0,0,0,0,1822752074,0,'plant more plants',0,0,0,0,service 242 | 0,VB,NN,0,0,0,0,0,0.410962,4,1,0,0,0.5,0,0,0,0,4.75,22,2,0,0,1,1,postal,0.25,0,0,0,1822752074,0,'Upload photos to gmail',0,0,0,0,service 243 | 0,NN,VB,0,0,0,0.08,0,0.406105,3,1,0,0,0.666667,0,0,0,0.076923,5.666667,19,2,0,0,0,1,service,0.333333,0,0,0,-373408302,0,'assemble repair kit',0,0,0,0,service 244 | 0,NN,NN,0,0,0,0,0,0.351172,2,0,0,0,1,0,0,0,0,6,13,2,0,0,0,1,service,0,0,0,0,1822752074,0,'finish mowing',0,0,0,0,service 245 | 0,NN,VB,0,0,0,0,0,0.298758,4,1,0,0,0.75,0,0,0,0,6.5,29,3,0,0,0,1,service,0.25,0,0,0,1094496948,0,'replace humidifier air filter',0,0,0,0,service 246 | 0,NN,NN,0,0,0,0,0,0.335185,8,0,0,0,0.625,0,0,0,0,3.875,38,5,0,0,1,1,call,0,0,0,0,1822752074,0,'household - setup vlc @ term on doctor',0,1,0,0,service 247 | 0,-RRB-,VB,0,0,0,0,0,0.38532,6,1,1,0,0.166667,0,0,0,0,4.666667,31,1,0,0,1,1,call,0.166667,0,0,0.166667,-734452820,0,'Arrange flowers (if applicable)',0,0,0,0,service 248 | 0,NN,VB,0,0,0,0,0,0.472153,4,1,1,0,0.25,0,0,0,0.076923,5,23,1,0,0,0,1,service,0.25,0,0,0.25,94001407,0,'Break out concrete slab',0,0,0,0,service 249 | 0,'\'\'',JJ,0.133333,0,0,0,0.095238,0.582737,14,4,1,0,0,0,0.181818,0.272727,0,2.785714,47,0,0,0,3,1,call,0.285714,0,0,0.071429,102846020,0,'complete \"Learn by Doing\" and \"Did I get this?\"',0,0,0,0,school-work 250 | 0,NN,VB,0,0,0,0,0,0.450248,3,1,0,0,0.666667,0,0,0.090909,0,6.333333,21,2,0,0,0,1,plan-meal,0.333333,0,0,0,113399775,0,'write nutrition paper',0,0,0,0,school-work 251 | 0,CD,NN,0,0,0,0,0,0.137812,2,0,0,0,0.5,0,0,0.090909,0,1.5,4,1,0,0,0,1,postal,0,0,0,0,1822752074,0,'HW 1',0,2,0,0,school-work 252 | 0,NNS,NN,0,0,0,0,0,0.387932,3,0,0,0,0.666667,0,0,0.181818,0.076923,6,20,2,0,0,0,1,call,0,0,0,0,1822752074,0,'figure out Footnotes',0,0,0,0,school-work 253 | 0,CD,NNP,0,0,0,0,0,0.265165,3,0,0,0,0.666667,0,0,0.181818,0,4.666667,16,2,0,0,0,1,call,0,0,0,0,1822752074,0,'Read Modules 1-4',0,2,0,0,school-work 254 | 0,NN,NN,0,0,0,0,0,0.380167,1,0,0,0,1,0,0,0.181818,0,4,4,1,0,0,0,1,school-work,0,0,0,0,1822752074,0,Quiz,0,0,0,0,school-work 255 | 0,NN,NN,0,0,0,0,0,0.305451,2,0,0,0,1,0,0,0.181818,0,6,13,2,0,0,0,1,school-work,0,0,0,0,1822752074,0,'syllabus quiz',0,0,0,0,school-work 256 | 0,NNP,VB,0,0,0,0,0,0.422694,8,1,0,0,0.5,0,0,0,0,4.5,42,4,0,0,1,2,call,0.125,0,0,0,93029230,0,'Apply for Grad. Student assistance from UM',0,1,0,0,school-work 257 | 0.088235,NN,VB,0,0,0,0,0,0.391976,6,1,1,0,0.5,0,0,0,0,6.333333,43,3,0,0,1,1,call,0.166667,0,0,0.166667,-308949343,0,'Provide final headcount to site coordinator',0,0,0,0,contact 258 | 0.117647,'\'\'',NN,0,0,0,0,0,0.421594,14,0,0,0.255814,0.571429,0,0,0,0,4.142857,67,8,0,0,2,1,call,0,0,0,0,1822752074,0,'Contact Stella\'s staff on interest for \"Stella\'s Super Mario Party\"',0,2,0,0,contact 259 | 0,NNP,VB,0,0,0,0,0,0.444832,3,1,0,0,0.666667,0,0,0,0,7,23,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,-1367724422,0,'Cancel Comcast Internet',0,1,0,0,contact 260 | 0,.,VB,0,0,0,0,0.047619,0.500195,14,3,0,0.325581,0.285714,0,0,0,0,3.857143,66,4,0,0,6,1,call,0.214286,0,0,0,96889,0,'Ask KFB for list of restaurants they have worked with in the past.',0,2,0,0,contact 261 | 0,NN,VB,0,0,0,0,0,0.46772,5,1,0,0,0.6,0,0,0,0,5.8,33,3,0,0,1,1,pay-bill-online,0.2,0,0,0,-1367724422,0,'cancel renters insurance with AAA',0,0,0,0,contact 262 | 0,NNS,NN,0,0,0,0,0,0.624689,4,0,1,0,0.5,0,0,0,0,4,19,2,0,0,1,1,call,0,0,0,0.25,1822752074,0,'Talk to few lawyers',0,0,0,0,contact 263 | 0,NNP,VB,0,0,0,0,0,0.377999,7,2,0,0.093023,0.285714,0,0,0,0,3.285714,29,2,0,0,2,1,buy,0.285714,0,0,0,96889,0,'Ask IC to recommend us to ILG',0,0,0,0,contact 264 | 0,NNS,RB,0,0,1,0,0,0.338006,3,0,0,0.348837,0.333333,0.333333,0,0,0,7.666667,25,1,0,0,1,1,plan-meal,0,0,0,0,1822752074,0,'Followup with restaurants',0,0,0,0,contact 265 | 0,NNS,NN,0,0,0,0,0,0.534252,7,0,0,0.116279,0.571429,0,0,0,0,4.571429,38,4,0,0,2,1,call,0,0,0,0,1822752074,0,'Share contact numbers with at 2 people',0,2,0,0,contact 266 | 0,NN,NN,0,0,0,0,0,0.547899,4,0,0,0,0.75,0,0,0,0,4.75,22,3,0,0,1,1,call,0,0,0,0,1822752074,0,'change address at bank',0,0,0,0,contact 267 | 0,NNP,NN,0,0,0,0,0,0.59759,3,0,0,0,0.666667,0,0,0,0,3.333333,12,2,0,0,1,1,call,0,0,0,0,1822752074,0,'talk to Mike',0,1,0,0,contact 268 | 0,NNS,VB,0,0,1,0,0.095238,0.503746,52,8,4,0.093023,0,0.019231,0,0,0,5.134615,312,0,0.142857,0,11,1,call,0.153846,0.368421,0,0.076923,3526536,0,'Send hotel and transportation information to out-of-town guests, include directions from local airorts and cities from which many guests will be arriving by car, information (description, location, phone number) and any code or name that must be mentioned to receive discounted rate when making room reservations',0,0,0,0,contact 269 | 0,NNS,NNP,0,0,0,0,0,0.007582,3,0,0,0,1,0,0,0,0,5.666667,19,3,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'Ann Arbor followups',0,1,0,0,contact 270 | 0.147059,-RRB-,VB,0.111111,0,0,0,0,0.451727,20,4,2,0.302326,0.25,0,0.090909,0,0.115385,4.45,106,5,0,0,5,1,call,0.2,0.052632,0,0.1,96889,0,'Ask caterer/coordinator to have top teir of wedding cake packed up for you (to save for first anniversary)',0,1,0,0,contact 271 | 0,NNS,RB,0,0,1,0,0,0.249358,4,0,0,0.348837,0.5,0.25,0,0,0,6.25,28,2,0,0,1,1,plan-meal,0,0,0,0,1822752074,0,'Followup with GR restaurants',0,0,0,0,contact 272 | 0,NN,NN,0,0,0,0,0,0.347091,7,0,0,0,0.857143,0,0,0,0,3.571429,31,6,0,0,1,1,call,0,0.052632,0,0,1822752074,0,'touch base w judy on WF account',0,1,0,0,contact 273 | 0,NNS,NNP,0,0,0,0,0,0.43704,4,0,0,0.162791,0.5,0,0,0,0,5.75,26,2,0,0,1,1,call,0,0,0,0,1822752074,0,'Outreach to 10 restaurants',0,1,0,0,contact 274 | 0.088235,NNS,VB,0,0,0,0,0,0.534035,9,1,0,0,0.666667,0,0,0,0,5.888889,61,6,0,0,1,1,call,0.111111,0,0,0,3173137,0,'Give site coordinator arrival times for all service providers',0,0,0,0,contact 275 | 0,NNP,VB,0,0,0,0,0.095238,0.314994,5,2,0,0,0.4,0,0,0,0,5.8,33,2,0,0,1,1,call,0.4,0.368421,0,0,3526536,0,'send updated financials to Armand',0,1,0,0,contact 276 | 0,NN,RB,0,0,1,0,0,0.362459,12,0,0,0.418605,0.5,0.083333,0,0,0,4.833333,68,6,0,0,3,1,call,0,0,0,0,1822752074,0,'Followup with Jenny at KFB about the restaurants\' promo request form',0,2,0,0,contact 277 | 0,-RRB-,VB,0,0,0,0,0,0.383837,9,1,1,0,0.333333,0,0,0,0,5.888889,59,3,0,0,2,1,call,0.111111,0,0,0.111111,951117504,0,'Confirm final details with photographer (and videographers)',0,0,0,0,contact 278 | 0,JJ,NN,0,0,0,0,0,0.435008,4,0,1,0.069767,0.5,0,0,0,0,5.5,25,2,0,0,1,1,pay-bill-online,0,0,0,0.25,1822752074,0,'request permit for aframe',0,0,0,0,contact 279 | 0,NNS,RB,0.066667,0,1,0,0,0.555731,10,2,0,0.069767,0.1,0.1,0,0,0,3.9,48,1,0,1,4,1,call,0.2,0,0,0,1822752074,0,'Inquire as to what you need to bring to fittings',0,0,0.1,0,contact 280 | 0,NNS,RB,0,0,1,0,0,0.302937,4,0,0,0.186047,0.5,0.25,0,0,0,7.5,33,2,0,0,1,1,plan-meal,0,0,0,0,1822752074,0,'Followup with restaurant invoices',0,0,0,0,contact 281 | 0,VB,RB,0,0,1,0,0,0.360939,4,1,0,0.186047,0.25,0.25,0,0,0,5.5,25,1,0,0,1,1,call,0.25,0,0,0,108399245,0,'Followup with World Renew',0,1,0,0,contact 282 | 0,NNS,VB,0,0,0,0,0,0.403412,7,1,0,0,0.571429,0,0,0,0.076923,5.714286,46,4,0,0,1,1,call,0.142857,0,0,0,108386675,0,'Reach out to Seattle restaurant coalition orgs',0,1,0,0,contact 283 | 0,NN,NN,0,0,0,0,0,0.616193,2,0,0,0,1,0,0,0,0,6.5,14,2,0,0,0,1,call,0,0,0,0,1822752074,0,'change address',0,0,0,0,contact 284 | 0,NN,NN,0,0,0,0,0,0.408012,7,0,0,0.116279,0.714286,0,0,0,0,4.857143,40,5,0,0,1,1,call,0,0,0,0,1822752074,0,'Contact Mars Hill about HQ and marketing',0,1,0,0,contact 285 | 0,NN,VB,0,0,0,0,0,0.34723,7,1,1,0,0.428571,0,0,0,0,5.571429,45,3,0,0,2,1,call,0.142857,0,0,0.142857,1671386080,0,'Discuss bustle style of train with seamstress',0,0,0,0,contact 286 | 0,NNS,RB,0,0,1,0,0,0.191724,4,0,0,0.186047,0.5,0.25,0,0,0,6,27,2,0,0,1,1,call,0,0,0,0,1822752074,0,'Followup with National NPOs',0,1,0,0,contact 287 | 0.088235,.,VB,0.111111,0,0,0,0,0.456944,11,3,0,0.302326,0.272727,0,0.090909,0,0.115385,4.818182,62,3,0,0,2,1,call,0.272727,0.052632,0,0,96889,0,'Ask caterer/coordinator to have cake topper packed up for you.',0,1,0,0,contact 288 | 0,NN,VB,0,0,0,0,0,0.373331,11,1,1,0.162791,0.454545,0,0,0,0,4.545455,58,5,0,0,2,1,call,0.090909,0,0,0.090909,-1183699191,0,'Invite restaurants (and NPOs) to Patagonia event next week',0,3,0,0,contact 289 | 0.088235,NN,RB,0,0,1,0,0,0.360547,7,0,1,0,0.571429,0.142857,0,0,0,6.142857,49,4,0,0,1,1,call,0,0,0,0.142857,1822752074,0,'Tally final guest count with site manager/caterer',0,1,0,0,contact 290 | 0,NNS,VB,0,0,0,0,0,0.425146,6,1,0,0,0.666667,0,0,0,0,4.833333,33,4,0,0,0,1,buy,0.166667,0,0,0,-734452820,0.307692,'Arrange print run: giveaway cards',0,0,0,0,contact 291 | 0,NNS,RB,0,0,1,0,0,0.370813,4,0,1,0.44186,0.25,0.25,0,0,0,7,31,1,0,0,1,1,call,0,0,0,0.25,1822752074,0,'Followup with Local Restaurants',0,1,0,0,contact 292 | 0,RP,VB,0.111111,0,1,0,0,0.505248,18,3,1,0.069767,0.277778,0.055556,0.090909,0,0.115385,4.833333,102,5,0,0,3,1,call,0.166667,0.052632,0,0.055556,951117504,0,'Confirm with your rental company all details, times, and sites where items must be delivered/picked up',0,0,0,0,contact 293 | 0,NN,JJ,0,0,0,0,0,0.134104,3,0,1,0.27907,0.666667,0,0,0,0,7.666667,25,2,0,0,0,1,calendar,0,0,0,0.333333,1822752074,0,'Local Restuarant followup',0,0,0,0,contact 294 | 0,NN,NN,0,0,0,0,0,0.468541,4,0,0,0.069767,0.75,0,0,0,0,5.75,26,3,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'request permit for signage',0,0,0,0,contact 295 | 0,NN,NN,0,0,0,0,0,0.265873,2,0,0,0,1,0,0,0,0,7,15,2,0.142857,0,0,1,school-work,0,0,0,0,1822752074,0,'book babysitter',0,0,0,0,contact 296 | 0,NNP,JJ,0,0,0,0,0,0.345547,3,0,1,0,0.333333,0,0,0,0,6.333333,21,1,0,0,1,1,call,0,0,0,0.333333,1822752074,0,'Follow-up with Monica',0,1,0,0,contact 297 | 0,NNP,NN,0,0,0,0,0,0.269166,6,1,0,0.116279,0.833333,0,0,0,0,7.666667,51,5,0,0,0,1,call,0.166667,0,0,0,1822752074,0,'Contact student orgs regarding Storyteller Rotation',0,1,0,0,contact 298 | 0.058824,NN,NN,0,0,0,0,0,0.484375,10,1,1,0.27907,0.6,0,0,0,0,5.3,61,6,0,0,1,1,call,0.1,0,0,0.1,1822752074,0,'Contact local town clerk\'s office to arrange marriage license',0,0,0,0,contact 299 | 0,NNS,NNP,0,0,0,0,0,0.13676,2,0,0,0,1,0,0,0,0,7,15,2,0,0,0,1,email,0,0,0,0,1822752074,0,'Press Followups',0,1,0,0,contact 300 | 0,NNS,VB,0,0,0,0.12,0,0.523948,4,2,1,0,0.25,0,0,0,0,6,27,1,0,0,0,1,plan-meal,0.5,0,0,0.25,3143097,0,'find healthy baking recipes',0,0,0,0.5,plan-meal 301 | 0,NN,NN,0,0,0,0,0,0.558769,2,0,0,0,1,0,0,0,0,5.5,12,2,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'package food',0,0,0,0.166667,plan-meal 302 | 0,NNS,VB,0.4,0,0,0,0,0.546718,2,1,0,0,0.5,0,0,0,0,7,15,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'buy ingredients',0,0,0,0.083333,plan-meal 303 | 0,NN,VB,0,0,0,0,0,0.54952,6,1,0,0,0.5,0,0,0,0,4.5,32,3,0,0,2,1,call,0.166667,0,0,0,109757538,0,'start meal planning for the week',0,1,0,0.083333,plan-meal 304 | 0,NN,NNP,0,0,0,0,0,0.495266,2,0,0,0,1,0,0,0,0,4,9,2,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'Plan menu',0,0,0,0.083333,plan-meal 305 | 0,NNS,VB,0,0,0,0,0,0.402051,4,1,0,0,0.5,0,0,0,0,6,27,2,0,0,0,1,plan-meal,0.25,0,0,0,-135762164,0,'identify 4-6 dinner recipes',0,1,0,0.333333,plan-meal 306 | 0,NNS,NN,0,0,0,0,0,0.466517,3,0,0,0,1,0,0,0,0,5,17,3,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'test cook recipes',0,0,0,0.25,plan-meal 307 | 0,NN,VB,0,0,0,0,0,0.595039,3,1,0,0,0.666667,0,0,0,0,4,14,2,0,0,0,1,pay-bill-online,0.333333,0,0.64,0,110760,0,'pay bills',0,0,0,0,pay-bill-online 308 | 0.294118,NN,VB,0,0,0,0,0,0.440004,5,2,0,0,0.4,0,0,0,0,7.2,40,2,0,0,1,1,calendar,0.4,0,0,0,-697920873,0,'pay comed',0,0,0,0,calendar 309 | 0.441176,NNS,VB,0,0,0,0,0,0.390764,5,1,1,0,0.4,0,0,0,0,6.8,38,2,0,0,1,1,calendar,0.2,0,0,0.2,-697920873,0,'Pay Expression',0,0,0,0,calendar 310 | 0,NNS,NN,0,0,0,0,0,0.556596,6,1,0,0,0.666667,0,0,0,0,3.5,25,4,0,0,0,1,call,0.166667,0,0,0,1822752074,0,'Pay SF Parking Ticket. :(',0.607143,0,0,0,call 311 | 0.058824,NNP,VB,0,0,0,0,0,0.413226,5,1,0,0,0.6,0,0,0,0,6.6,37,3,0.142857,0,1,1,call,0.2,0,0,0,3343854,0,'pay NY tax bill',0,2,0,0,find-travel 312 | 0.058824,NN,VB,0,0,0,0.12,0,0.407804,3,1,0,0,0.666667,0,0,0,0,7,23,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,3202804,0,'pay mortgage',0,0,0,0,find-service 313 | 0,NN,VB,0,0,0,0,0,0.164861,2,1,0,0,0.5,0,0,0,0,6.5,14,1,0.142857,0,0,1,plan-meal,0.5,0,0,0,3029737,0,'pay ATT',0,0,0,0,find-service 314 | 0,-RRB-,NN,0,0,0,0,0,0.468084,13,1,0,0.116279,0.461538,0,0,0,0,3.615385,56,6,0.071429,0,3,1,call,0.076923,0,0,0,1822752074,0,'pay roofer',0,4,0,0,email 315 | 0,NN,NNP,0,0,0,0,0,0.476222,6,0,0,0,0.666667,0,0,0,0,4,29,4,0,0,2,1,call,0,0.105263,0,0,1822752074,0,'Pay PG&E',0,1,0,0,postal 316 | 0,NN,VB,0.133333,0,1,0,0.095238,0.549757,3,1,0,0,0.333333,0.333333,0,0,0,3.666667,13,1,0,0,0,1,service,0.333333,0,0,0,102230,0,'Pay BGE',0,0,0,0,buy 317 | 0.088235,NN,VB,0,0,0,0.28,0,0.519077,8,1,0,0,0.5,0,0,0,0,4.375,42,4,0.142857,0,3,1,call,0.125,0,0,0,3029737,0,'pay student loan',0,0,0,0,buy 318 | 0,NNS,VB,0.4,0,0,0,0,0.432272,4,1,1,0,0.5,0,0,0,0,4.75,22,2,0,0,0,1,pay-bill-online,0.25,0,0,0.25,97926,0,'pay plumber',0,0,0,0.083333,buy 319 | 0,NN,NN,0,0,0,0,0,0.3462,6,0,0,0,0.5,0,0,0,0,5,35,3,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Pay DMV for Harley before May',0,1,0,0,buy 320 | 0,NN,VB,0,0,0,0,0,0.419855,25,5,1,0,0.36,0,0,0,0,4.8,141,9,0,0,5,1,call,0.2,0,0,0.04,-1164222250,0,'pay rent',0,1,0,0,buy 321 | 0.058824,JJ,VB,0,0,0,0,0,0.415902,5,1,1,0,0.4,0,0,0,0,4.8,28,2,0.142857,0,1,1,service,0.2,0,0,0.2,3343854,0,'pay genki violation',0,0,0,0,service 322 | 0,NN,NN,0,0,0,0.08,0,0.430786,2,0,0,0,1,0,0,0,0,5,11,2,0,0,0,1,postal,0,0,0,0,1822752074,0,'get bills on autopay',0,0,0,0,service 323 | 0.058824,PRP,VB,0,0,0,0.12,0,0.525956,14,3,0,0,0.428571,0,0,0,0.076923,4.285714,73,6,0,0,5,1,call,0.214286,0,0,0,-373408302,0,'Appointment with capital women\'s care',0,1,0,0.083333,service 324 | 0,NN,VB,0,0,0,0,0,0.409282,4,1,1,0,0.5,0,0,0,0.076923,4.75,22,2,0,0,0,1,buy,0.25,0,0,0.25,-373408302,0,'Schedule appointments with caterers, if necessary',0,1,0,0,service 325 | 0,NNS,VB,0,0,0,0,0.095238,0.450443,8,3,0,0,0.5,0,0,0,0,5,47,4,0,0,1,1,call,0.375,0.368421,0,0,3526536,0,'Schedule rehearsal with officiant, all family members, bridesmaids, best men, wedding planner (if applicable) and other participants.',0,0,0,0,contact 326 | 0,NN,VB,0,0,0,0,0,0.558463,5,1,1,0,0.2,0,0,0,0,5.2,30,1,0,0,1,1,call,0.2,0,0,0.2,109641682,0,'Schedule interviews with photographers (and videographers)',0,1,0,0,contact 327 | 0,NNS,VB,0,0,0,0,0,0.404537,5,1,0,0,0.4,0,0,0,0,6.6,37,2,0,0,1,1,call,0.2,0.105263,0,0,951351530,0,'Schedule cake tasting',0,0,0,0,contact 328 | 0,CD,VB,0,0,0,0,0,0.474516,7,1,0,0.093023,0.428571,0,0,0,0,3.857143,33,3,0,0,1,1,call,0.142857,0,0,0,96889,0,'Apple appointment',0,3,0,0,contact 329 | --------------------------------------------------------------------------------