├── src
├── r
│ ├── .gitignore
│ ├── dist.r
│ ├── intercoder.r
│ └── analysis.r
├── clojure
│ └── uic
│ │ └── nlp
│ │ └── todo
│ │ ├── .gitignore
│ │ ├── resource.clj
│ │ ├── core.clj
│ │ ├── thaw_db.clj
│ │ ├── db.clj
│ │ ├── feature.clj
│ │ ├── cli.clj
│ │ ├── eval.clj
│ │ └── corpus.clj
├── bin
│ └── run.sh
├── rest
│ └── query.rest
└── python
│ └── retro-intercoder.py
├── resources
├── corpus.xlsx
├── todocorp.conf
└── todotask-log4j2.xml
├── .gitmodules
├── results
├── full-evaluation.xls
├── dist.csv
├── kappa.txt
├── predictions.csv
├── intercoder.csv
└── agent-data.arff
├── docker-es
├── makefile
└── docker-compose.yml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── test-resources
├── test-log4j2.xml
└── log4j2.xml
├── LICENSE
├── test
└── uic
│ └── nlp
│ └── todo
│ ├── db_test.clj
│ └── eval_test.clj
├── makefile
├── project.clj
└── README.md
/src/r/.gitignore:
--------------------------------------------------------------------------------
1 | /.Rhistory
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/.gitignore:
--------------------------------------------------------------------------------
1 | /version.clj
2 |
--------------------------------------------------------------------------------
/resources/corpus.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plandes/todo-task/HEAD/resources/corpus.xlsx
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "zenbuild"]
2 | path = zenbuild
3 | url = https://github.com/plandes/zenbuild
4 |
--------------------------------------------------------------------------------
/results/full-evaluation.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plandes/todo-task/HEAD/results/full-evaluation.xls
--------------------------------------------------------------------------------
/resources/todocorp.conf:
--------------------------------------------------------------------------------
1 | # -*-conf-*-
2 |
3 | [default]
4 | annotated_dir=./resources
5 | annotator_main=corpus
6 | annotator1=corpus
7 |
--------------------------------------------------------------------------------
/docker-es/makefile:
--------------------------------------------------------------------------------
1 | PROJ_TYPE= docker
2 | DOCKER_IMG_NAME= es
3 | DOCKER_USER= plandes
4 | DOCKER_BUILD_DEPS=
5 |
6 | include ../zenbuild/main.mk
7 |
--------------------------------------------------------------------------------
/src/r/dist.r:
--------------------------------------------------------------------------------
1 | df <- read.csv('../../results/pruned.csv', header=T)
2 |
3 | dist <- data.frame(table(df[,1]))
4 |
5 | write.csv(dist, '../../results/dist.csv')
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /rel
3 | /classes
4 | /checkouts
5 | pom.xml
6 | pom.xml.asc
7 | *.jar
8 | *.class
9 | /.lein-*
10 | /.nrepl-port
11 | .hgignore
12 | .hg/
13 | /model
14 | /doc
15 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: clojure
2 | lein: 2.7.1
3 | before_script:
4 | - mkdir -p target
5 | - git clone http://github.com/plandes/zenbuild
6 | script: ZBHOME=zenbuild make info checkdep
7 | jdk:
8 | - oraclejdk8
9 |
--------------------------------------------------------------------------------
/results/dist.csv:
--------------------------------------------------------------------------------
1 | "","Var1","Freq"
2 | "1","buy",52
3 | "2","calendar",22
4 | "3","call",19
5 | "4","contact",47
6 | "5","email",12
7 | "6","find-service",27
8 | "7","find-travel",10
9 | "8","pay-bill-online",17
10 | "9","plan-meal",7
11 | "10","postal",11
12 | "11","print",4
13 | "12","school-work",8
14 | "13","self-improve",4
15 | "14","service",46
16 |
--------------------------------------------------------------------------------
/docker-es/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.4'
2 |
3 | services:
4 | elasticsearch:
5 | container_name: todoes
6 | image: elasticsearch:2.2.0
7 | ports:
8 | - "10200:9200"
9 | - "10300:9300"
10 | volumes:
11 | - todo_es_data:/usr/share/elasticsearch/data
12 | environment:
13 | ES_HEAP_SIZE: 2g
14 |
15 | volumes:
16 | todo_es_data:
17 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/resource.clj:
--------------------------------------------------------------------------------
1 | (ns uic.nlp.todo.resource
2 | (:require [clojure.tools.logging :as log])
3 | (:require [zensols.model.classifier :as c])
4 | (:require [zensols.actioncli.resource :refer (resource-path) :as res]))
5 |
6 | (defn initialize
7 | []
8 | (log/debug "initializing")
9 | (c/initialize)
10 | (res/register-resource :todocorp-config-file
11 | :system-property "todocorp-config"))
12 |
--------------------------------------------------------------------------------
/src/r/intercoder.r:
--------------------------------------------------------------------------------
1 | #install.packages('psych')
2 | #install.packages('irr')
3 | library('psych')
4 | library('irr')
5 |
6 | intercoder <- function (df) {
7 | #df <- df[,][,-1]
8 | df <- df[,c('annotator1','annotator2')]
9 | print('cohen:')
10 | print(cohen.kappa(df))
11 |
12 | print('fleiss:')
13 | print(kappam.fleiss(df))
14 | }
15 |
16 | df <- read.csv('../../results/intercoder-relabeled.csv', header=T)
17 | intercoder(df)
18 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 | All notable changes to this project will be documented in this file.
3 |
4 | The format is based on [Keep a Changelog](http://keepachangelog.com/)
5 | and this project adheres to [Semantic Versioning](http://semver.org/).
6 |
7 |
8 | ## [Unreleased]
9 |
10 | ## [0.0.1] - 2018-06-20
11 | ### Added
12 | - Initial version
13 |
14 | [Unreleased]: https://github.com/plandes/todo-task/compare/v0.0.1...HEAD
15 | [0.0.2]: https://github.com/plandes/todo-task/compare/v0.0.1...v0.0.2
16 |
--------------------------------------------------------------------------------
/results/kappa.txt:
--------------------------------------------------------------------------------
1 | [1] "cohen:"
2 | Call: cohen.kappa1(x = x, w = w, n.obs = n.obs, alpha = alpha, levels = levels)
3 |
4 | Cohen Kappa and Weighted Kappa correlation coefficients and confidence boundaries
5 | lower estimate upper
6 | unweighted kappa 0.42 0.51 0.60
7 | weighted kappa 0.47 0.62 0.77
8 |
9 | Number of subjects = 145
10 | [1] "fleiss:"
11 | Fleiss' Kappa for m Raters
12 |
13 | Subjects = 145
14 | Raters = 2
15 | Kappa = 0.498
16 |
17 | z = 14.1
18 | p-value = 0
19 |
--------------------------------------------------------------------------------
/test-resources/test-log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/resources/todotask-log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/test-resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/core.clj:
--------------------------------------------------------------------------------
1 | (ns uic.nlp.todo.core
2 | (:require [zensols.actioncli.log4j2 :as lu]
3 | [zensols.actioncli.parse :as p])
4 | (:require [uic.nlp.todo.version :as ver])
5 | (:gen-class :main true))
6 |
7 | (defn- version-info []
8 | (println (format "%s (%s)" ver/version ver/gitref)))
9 |
10 | (defn- create-action-context []
11 | (p/multi-action-context
12 | '((:repl zensols.actioncli.repl repl-command)
13 | (:load uic.nlp.todo.cli load-corpora-command)
14 | (:dsprep uic.nlp.todo.cli split-dataset-command)
15 | (:features uic.nlp.todo.cli features-command)
16 | (:print uic.nlp.todo.cli print-evaluate-command)
17 | (:evaluate uic.nlp.todo.cli evaluates-spreadsheet-command)
18 | (:predict uic.nlp.todo.cli predict-spreadsheet-command))
19 | :version-option (p/version-option version-info)))
20 |
21 | (defn -main [& args]
22 | (lu/configure "todotask-log4j2.xml")
23 | (p/set-program-name "todotask")
24 | (-> (create-action-context)
25 | (p/process-arguments args)))
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Paul Landes
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 |
--------------------------------------------------------------------------------
/src/bin/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | INIT_DIR=$(dirname "$0")
4 | RES_DIR=results
5 | LOG_DIR=log
6 | CONF=${INIT_DIR}/todocorp.conf
7 |
8 | mkdir -p $RES_DIR
9 | mkdir -p $LOG_DIR
10 |
11 | eval_classifiers() {
12 | clname=$1
13 | classifiers=$2
14 | metaset=$3
15 | log=$LOG_DIR/${clname}.log
16 | echo "evaluation set ${clname}, classifiers: ${classifiers}, meta set: ${metaset}, config: $CONF" > $log
17 | nohup ./bin/todotask evaluate -c $CONF -l INFO \
18 | --metaset $metaset --classifiers $classifiers \
19 | -o $RES_DIR/${clname}.xls >> $log 2>&1 &
20 | }
21 |
22 | case $1 in
23 | clean)
24 | rm $RES_DIR/*
25 | rm $LOG_DIR/*
26 | ;;
27 |
28 | sanity)
29 | # sanity test
30 | eval_classifiers test-res zeror set-best
31 | ;;
32 |
33 | best)
34 | # single best preforming model
35 | eval_classifiers j48 j48 set-best
36 | ;;
37 |
38 | long)
39 | # single best preforming model
40 | eval_classifiers random-forest random-forest set-best
41 |
42 | # long running
43 | for i in fast lazy meta tree slow really-slow ; do
44 | eval_classifiers $i $i set-compare
45 | done
46 | ;;
47 |
48 | *)
49 | echo "usage: $0 "
50 | ;;
51 | esac
52 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/thaw_db.clj:
--------------------------------------------------------------------------------
1 | (ns uic.nlp.todo.thaw-db
2 | (:require [zensols.actioncli.dynamic :as dyn]
3 | [zensols.actioncli.util :refer (defnlock)]
4 | [zensols.dataset.thaw :as db :refer (with-connection)]))
5 |
6 | (defnlock connection []
7 | (db/thaw-connection "todo" "resources/todo-dataset.json"))
8 |
9 | (defn reset-connection []
10 | (-> (meta #'connection) :init-resource (reset! nil)))
11 |
12 | (dyn/register-purge-fn reset-connection)
13 |
14 | (defn instances-count []
15 | (with-connection (connection)
16 | (db/instances-count)))
17 |
18 | (defn anon-by-id
19 | [id]
20 | (with-connection (connection)
21 | (db/instance-by-id id)))
22 |
23 | (defn anons
24 | "Return all annotations"
25 | [& opts]
26 | (with-connection (connection)
27 | (apply db/instances opts)))
28 |
29 | (defn distribution
30 | "Return a distribution on class label as list of vectors. The first position
31 | is the label and the second the count for that respective label."
32 | []
33 | (with-connection (connection)
34 | (->> (anons)
35 | (map :class-label)
36 | (reduce (fn [res c]
37 | (assoc res c (+ 1 (or (get res c) 0))))
38 | {})
39 | (sort #(compare (second %2) (second %1))))))
40 |
--------------------------------------------------------------------------------
/test/uic/nlp/todo/db_test.clj:
--------------------------------------------------------------------------------
1 | (ns ^{:doc "This namespace is REPL prototyping fodder and *not* not real unit
2 | test cases."} uic.nlp.todo.db-test
3 | (:require [clojure.test :refer :all]
4 | [uic.nlp.todo.db :refer :all]
5 | [zensols.actioncli.dynamic :as dyn]
6 | [zensols.dataset.db :as db :refer (with-connection)]))
7 |
8 | (defn- main [& actions]
9 | (->> actions
10 | (map (fn [action]
11 | (case action
12 | -2 (dyn/purge)
13 | -1 (reset-instances)
14 | 0 (load-corpora)
15 | 1 (divide-by-set 0.8)
16 | 2 (do
17 | (dyn/purge)
18 | (load-corpora)
19 | (Thread/sleep (* 2 1000))
20 | (divide-by-set 0.8)
21 | (with-connection (connection)
22 | (db/write-dataset :instance-fn #(-> % :panon :text))))
23 | 3 (with-connection (connection)
24 | (db/instance-count))
25 | 4 (with-connection (connection)
26 | (db/stats))
27 | 5 (clojure.pprint/pprint (connection))
28 | 6 (with-connection (connection)
29 | (db/write-dataset :instance-fn #(-> % :panon :text)))
30 | 7 (distribution))))
31 | doall))
32 |
--------------------------------------------------------------------------------
/src/r/analysis.r:
--------------------------------------------------------------------------------
1 | proprietary <- "agent,desc,count
2 | buy,Assists in buying goods.,480
3 | service,Do It Yourself type tasks,284
4 | self-improve,Self Improvement/Help,183
5 | school-work,Task related to school,158
6 | contact,Email SMS or call,101
7 | call,Makes a phone call via OS,97
8 | email,Emails a contact via OS,60
9 | calendar,Make an appointment,55
10 | pay-bill-online,Online bill pay,54
11 | find-service,Procure services,42
12 | print,Print out a document,23
13 | postal,Send mail by snail mail,20
14 | plan-meal,Cook or gather ingredients,17
15 | find-travel,Reserve transportation,18
16 | text-sms,Sends SMS text messages,19"
17 |
18 | public <- "agent,desc,count
19 | buy-general,Assists in buying general goods,38
20 | buy-grocery,Assists in buying groceries,2
21 | buy-travel,Assists in buying travel,9
22 | buy-wedding,Assists in buying wedding g/s,44
23 | calendar,Make personal appointment,33
24 | contact,Email SMS or phone call,66
25 | household,Schedule time for personal task in calendar,91
26 | how-to,Identify tutorial video,17
27 | office,Schedule office task in calendar,189
28 | office-calendar,Make work appt,8
29 | office-contact,Email SMS or phone call - work,19
30 | pay-bill-online,Online bill pay,18
31 | search-general,General internet search,17
32 | search-recipe,Internet search recipe,4
33 | sell,Sell or donate an item,7
34 | send,Send item by USPS,15"
35 |
36 | corp.dist <- function(csvstr, name) {
37 | df = read.csv(text=csvstr, header=TRUE)
38 | counts <- df[,3]
39 | print(sprintf('%s:', name))
40 | summary(counts)
41 | print(sprintf('standard deviation: %.2f, varience: %.2f', sd(counts), var(counts)))
42 | }
43 |
44 | corp.dist(proprietary, 'proprietary')
45 | corp.dist(public, 'public')
46 |
--------------------------------------------------------------------------------
/src/rest/query.rest:
--------------------------------------------------------------------------------
1 | # get all indexes
2 | GET http://localhost:10200/_cat/indices?v
3 |
4 | # careful!
5 | #DELETE http://localhost:10200/todo
6 |
7 | # mapping
8 | GET http://localhost:10200/todo/dataset/_mapping
9 |
10 | # count
11 | POST http://localhost:10200/todo/dataset/_search
12 | {
13 | "query": { "match_all": {}},
14 | "size": 0
15 | }
16 |
17 | # distribution
18 | POST http://localhost:10200/todo/dataset/_search
19 | {
20 | "aggs": {
21 | "act_agg_name": {
22 | "terms": {"field": "class-label", "size": 0}
23 | }
24 | },
25 | "size": 0
26 | }
27 |
28 | # search
29 | POST http://localhost:10200/todo/dataset/_search
30 | {
31 | "query": { "match_all": {}},
32 | "size": 1
33 | }
34 |
35 | # act by class
36 | POST http://localhost:10200/todo/dataset/_search
37 | {
38 | "query": {
39 | "term": {"class-label": "self-improve"}
40 | },
41 | "size": 5
42 | }
43 |
44 | # act by class
45 | POST http://localhost:10200/todo/dataset/_search
46 | {
47 | "query": {
48 | "term": {"class-label": "self-improve"}
49 | },
50 | "fields": ["class-label"],
51 | "size": 5
52 | }
53 |
54 | # counts
55 | POST http://localhost:10200/todo/dataset/_search
56 | {
57 | "query": { "match_all": {}},
58 | "size": 0
59 | }
60 |
61 | # just keys
62 | POST http://localhost:10200/todo/dataset/_search
63 | {
64 | "query": { "match_all": {}},
65 | "fields": []
66 | }
67 |
68 | # search for text
69 | POST http://localhost:10200/todo/dataset/_search
70 | {
71 | "query": {
72 | "match": {"_all": "office"}
73 | },
74 | "size": 1
75 | }
76 |
77 | ## stats
78 | GET http://localhost:10200/todo/stats/_search
79 | {
80 | "query": { "match_all": {}},
81 | "from": 0
82 | }
83 |
84 |
85 |
86 | ## stats (test/train splits)
87 | # stats mapping
88 | GET http://localhost:10200/todo/stats/_mapping
89 |
90 | # stats--fix this
91 | POST http://localhost:10200/todo/stats/_search
92 | {
93 | "query": { "match_all": {}}
94 | }
95 |
96 | # stats
97 | POST http://localhost:10200/todo/stats/_search
98 | {
99 | "query": { "match_all": {}}
100 | }
101 |
--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
1 | ## makefile automates the build and deployment for lein projects
2 |
3 | # type of project, currently one of: clojure, python
4 | PROJ_TYPE= clojure
5 | PROJ_MODULES= nlpmodel appassem
6 | # namespace is not templatized
7 | GITUSER= plandes
8 | GITPROJ= todo-task
9 |
10 | # project specific
11 | PAPER_DOC_SRC_DIR= $(abspath $(DOC_SRC_DIR))
12 | ADD_CLEAN= results.xls $(PAPER_DOC_SRC_DIR)
13 | DIST_PREFIX= $(HOME)/opt/app
14 |
15 | TODO_CONF= resources/todocorp.conf
16 | TODO_OP= -c $(TODO_CONF)
17 |
18 | # make build dependencies
19 | _ := $(shell [ ! -d .git ] && git init ; [ ! -d zenbuild ] && \
20 | git submodule add https://github.com/plandes/zenbuild && make gitinit )
21 |
22 | include ./zenbuild/main.mk
23 |
24 | .PHONY: help
25 | help:
26 | $(LEIN) run
27 |
28 | .PHONY: testprep
29 | testprep:
30 | mkdir -p dev-resources
31 | mkdir -p results
32 | make models
33 |
34 | .PHONY: test
35 | test: testprep
36 | $(LEIN) test
37 |
38 | .PHONY: startes
39 | startes:
40 | make -C docker-es up
41 |
42 | .PHONY: stopes
43 | stopes:
44 | make -C docker-es down
45 |
46 | .PHONY: load
47 | load: testprep
48 | $(LEIN) run load -l INFO $(TODO_OP)
49 |
50 | .PHONY: features
51 | features: testprep
52 | $(LEIN) run features -f 500 $(TODO_OP)
53 |
54 | .PHONY: dsprep
55 | dsprep: testprep
56 | $(LEIN) run dsprep -l INFO $(TODO_OP)
57 |
58 | .PHONY: print
59 | print: testprep
60 | $(LEIN) run print -l INFO $(TODO_OP)
61 |
62 | .PHONY: printbest
63 | printbest: testprep
64 | $(LEIN) run print -l INFO --metaset set-best --classifiers j48 $(TODO_OP)
65 |
66 | .PHONY: evaluate
67 | evaluate: testprep
68 | $(LEIN) run evaluate -l INFO $(TODO_OP)
69 |
70 | .PHONY: predict
71 | predict: testprep
72 | $(LEIN) run predict -l INFO $(TODO_OP)
73 |
74 | .PHONY: disttodo
75 | disttodo: dist
76 | cp $(TODO_CONF) $(DIST_DIR)
77 | cp src/bin/run.sh $(DIST_DIR)
78 | # needed to silence a deeplearn4j exception
79 | mkdir -p $(DIST_DIR)/dev-resources
80 | mkdir -p $(DIST_DIR)/resources
81 | cp resources/todo-dataset.json $(DIST_DIR)/resources
82 |
83 | .PHONY: alldocs
84 | alldocs:
85 | mkdir -p doc
86 | make FINAL_PDF_DIR=$(PAPER_DOC_SRC_DIR) -C ../../paper clean pdf
87 | make FINAL_PDF_DIR=$(PAPER_DOC_SRC_DIR) -C ../../slides clean pdf
88 | make docs
89 |
--------------------------------------------------------------------------------
/test/uic/nlp/todo/eval_test.clj:
--------------------------------------------------------------------------------
1 | (ns ^{:doc "This namespace is REPL prototyping fodder and *not* not real unit
2 | test cases."}
3 | uic.nlp.todo.eval-test
4 | (:require [zensols.actioncli.dynamic :as dyn]
5 | [zensols.model.classifier :as cl]
6 | [zensols.model.execute-classifier :as ex :refer (with-model-conf)]
7 | [zensols.model.eval-classifier :as ec :refer (with-two-pass)]
8 | [uic.nlp.todo.feature :as f :refer (with-feature-context)]
9 | [uic.nlp.todo.db :as edb]
10 | [uic.nlp.todo.eval :refer :all]
11 | [uic.nlp.todo.db :as edb]))
12 |
13 | (defn- main [& actions]
14 | (let [classifiers [:fast :tree :meta :lazy]
15 | meta-set :set-compare]
16 | (binding [ec/*default-set-type* :train-test
17 | cl/*rand-fn* (fn [] (java.util.Random. 1))
18 | edb/*low-class-count-threshold* 10]
19 | (with-model-conf (create-model-config)
20 | (with-feature-context
21 | (f/create-context :anons-fn f/instance-deref-anons-fn
22 | :set-type :train)
23 | (->> (map (fn [action]
24 | (case action
25 | 0 (dyn/purge)
26 | 1 (do (edb/divide-by-set 0.9)
27 | (edb/stats))
28 | 2 (ec/print-best-results classifiers meta-set)
29 | 3 (ec/terse-results classifiers meta-set :only-stats? true)
30 | 4 (-> (ec/create-model classifiers meta-set)
31 | (ec/train-model :set-type :train)
32 | ec/write-model)
33 | 5 (-> (ex/read-model)
34 | (ex/print-model-info :results? true))
35 | 6 (->> (ex/read-model)
36 | ex/prime-model
37 | ex/predict
38 | ex/write-predictions)
39 | 7 (-> (ex/read-model)
40 | ex/prime-model
41 | ex/predict)
42 | 8 (ec/eval-and-write classifiers meta-set)
43 | 9 (write-arff)
44 | 10 (-> (ec/create-model classifiers meta-set)
45 | (ec/train-model :set-type :train)
46 | ex/prime-model
47 | ex/predict
48 | ex/write-predictions
49 | )))
50 | actions)
51 | doall))))))
52 |
--------------------------------------------------------------------------------
/src/python/retro-intercoder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import csv,pprint, math, sys
4 | import pandas as pd
5 |
6 | ann1 = '../../../todocorp/annotated/relabeled.xlsx'
7 | ann2 = '../../../todocorp/annotated/annotator2.xlsx'
8 | relabeled_ic = '../../results/intercoder-relabeled.csv'
9 | pruned_file = '../../results/pruned.csv'
10 |
11 | def read_sheet(fname):
12 | ann = pd.ExcelFile(fname)
13 | df = ann.parse('todo_corpus')
14 | dats = []
15 | for index, row in df.iterrows():
16 | dats.append(row)
17 | return dats
18 |
19 | def validate(df1, df2):
20 | if len(df1) != len(df2):
21 | raise ValueError('length')
22 | print('lengths validate')
23 | for i in range(len(df1)):
24 | r1, r2 = df1[i], df2[i]
25 | if r1['utterance'] != r2['utterance']:
26 | raise ValueError('alignment: {}, {}'.format(r1, r2))
27 | print('utterance validate')
28 |
29 | def info(df1, df2):
30 | print('len: {}, {}'.format(len(df1), len(df2)))
31 |
32 | def candidate(val):
33 | return isinstance(val, str)
34 |
35 | def collapse_class(val):
36 | m = {'buy-general': 'buy',
37 | 'buy-grocery': 'buy',
38 | 'buy-travel': 'buy',
39 | 'buy-wedding': 'buy',
40 | 'search-general': 'search',
41 | 'search-recipe': 'search',
42 | 'office-contact': 'contact',
43 | 'office-calendar': 'calendar',
44 | 'office': 'calendar',
45 | 'household': 'calendar',
46 | }
47 | if val in m:
48 | return m[val]
49 | return val
50 |
51 | def write_intercoder(df1, df2):
52 | agree = 0
53 | rows = 0
54 | with open(relabeled_ic, 'w') as f:
55 | c_writer = csv.writer(f)
56 | c_writer.writerow(['annotator1', 'annotator2'])
57 | for i in range(len(df1)):
58 | r1, r2 = df1[i], df2[i]
59 | a1, a2 = r1['class'], r2['class']
60 | if candidate(a1) and candidate(a2):
61 | rows = rows + 1
62 | a2 = collapse_class(a2)
63 | c_writer.writerow([a1, a2, r1['utterance']])
64 | #c_writer.writerow([a1, a2])
65 | if a1 == a2: agree = agree + 1
66 | print('agree: %.2f (%s/%s)' % ((agree/rows), agree, rows))
67 |
68 | def pruned():
69 | df1 = read_sheet(ann1)
70 | with open(pruned_file, 'w') as f:
71 | c_writer = csv.writer(f)
72 | c_writer.writerow(['class', 'utterance'])
73 | for r in df1:
74 | if candidate(r['class']):
75 | c_writer.writerow([r['class'], r['utterance']])
76 |
77 | def main():
78 | df1 = read_sheet(ann1)
79 | df2 = read_sheet(ann2)
80 | validate(df1, df2)
81 | write_intercoder(df1, df2)
82 | pruned()
83 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject edu.uic.nlp/todotask "0.1.0-SNAPSHOT"
2 | :description "Categorize natural language todo list items"
3 | :url "https://github.com/plandes/todo-task"
4 | :license {:name "MIT"
5 | :url "https://opensource.org/licenses/MIT"
6 | :distribution :repo}
7 | :plugins [[lein-codox "0.10.3"]
8 | [lein-javadoc "0.3.0"]
9 | [org.clojars.cvillecsteele/lein-git-version "1.2.7"]]
10 | :codox {:metadata {:doc/format :markdown}
11 | :project {:name "Todo Categorization"}
12 | :output-path "target/doc/codox"
13 | :source-uri "https://github.com/plandes/todo-task/blob/v{version}/{filepath}#L{line}"}
14 | :javadoc-opts {:package-names ["edu.uic.nlp.todo-task"]
15 | :output-dir "target/doc/apidocs"}
16 | :git-version {:root-ns "uic.nlp.todo"
17 | :path "src/clojure/uic/nlp/todo"
18 | :version-cmd "git describe --match v*.* --abbrev=4 --dirty=-dirty"}
19 | :source-paths ["src/clojure"]
20 | :test-paths ["test" "test-resources"]
21 | :java-source-paths ["src/java"]
22 | :javac-options ["-Xlint:unchecked"]
23 | :jar-exclusions [#".gitignore"]
24 | :exclusions [com.zensols.tools/actioncli
25 | ch.qos.logback/logback-classic
26 | log4j
27 | org.slf4j/slf4j-log4j12
28 | org.yaml/snakeyaml]
29 | :dependencies [[org.clojure/clojure "1.8.0"]
30 |
31 | ;; logging for core
32 | [org.apache.logging.log4j/log4j-1.2-api "2.7"]
33 | [org.apache.logging.log4j/log4j-core "2.7"]
34 | [org.apache.logging.log4j/log4j-jcl "2.7"]
35 | [org.apache.logging.log4j/log4j-jul "2.7"]
36 | [org.apache.logging.log4j/log4j-slf4j-impl "2.7"]
37 |
38 | ;; read ini files
39 | [com.brainbot/iniconfig "0.2.0"]
40 |
41 | ;; nlp/ml
42 | [com.zensols.tools/actioncli "0.0.27"]
43 | [com.zensols.nlp/wordvec "0.0.1"
44 | :exclusions [org.apache.httpcomponents/httpmime
45 | org.apache.httpcomponents/httpclient
46 | org.clojure/tools.macro]]
47 | [com.zensols.ml/model "0.0.18"]
48 | [com.zensols.nlp/parse "0.1.6"
49 | :exclusions [com.zensols.tools/actioncli
50 | org.clojure/tools.macro]]
51 | [com.zensols.ml/dataset "0.0.12"
52 | :exclusions [org.apache.lucene/lucene-analyzers-common
53 | org.apache.lucene/lucene-core
54 | org.apache.lucene/lucene-queries
55 | org.apache.lucene/lucene-queryparser
56 | org.apache.lucene/lucene-sandbox]]]
57 | :pom-plugins [[org.codehaus.mojo/appassembler-maven-plugin "1.6"
58 | {:configuration ([:program
59 | ([:mainClass "uic.nlp.todo.core"]
60 | [:id "todotask"])]
61 | [:environmentSetupFileName "setupenv"])}]]
62 | :profiles {:1.9 {:dependencies [[org.clojure/clojure "1.9.0"]]}
63 | :uberjar {:aot [uic.nlp.todo.core]}
64 | :appassem {:aot :all}
65 | :snapshot {:git-version {:version-cmd "echo -snapshot"}}
66 | :dev
67 | {:exclusions [org.slf4j/slf4j-log4j12
68 | log4j/log4j
69 | ch.qos.logback/logback-classic]}
70 | :test {:jvm-opts ["-Dlog4j.configurationFile=test-resources/test-log4j2.xml"
71 | "-Xms4g" "-Xmx30g" "-XX:+UseConcMarkSweepGC"]}}
72 | :main uic.nlp.todo.core)
73 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/db.clj:
--------------------------------------------------------------------------------
1 | (ns uic.nlp.todo.db
2 | (:require [clojure.java.io :as io]
3 | [clojure.edn :as edn]
4 | [clojure.tools.logging :as log]
5 | [clojure.data.csv :as csv]
6 | [clojure.data.json :as json]
7 | [zensols.actioncli.util :refer (trunc defnlock)]
8 | [zensols.actioncli.dynamic :as dyn]
9 | [zensols.actioncli.resource :as res]
10 | [zensols.nlparse.parse :as p]
11 | [zensols.dataset.db :as db :refer (with-connection)]
12 | [uic.nlp.todo.corpus :as corp]))
13 |
14 | (def ^:dynamic *low-class-count-threshold* 15)
15 |
16 | (defonce ^:private conn-inst (atom nil))
17 | ;(ns-unmap *ns* 'conn-inst)
18 |
19 | (def ^:dynamic *corpus-read-limit* Integer/MAX_VALUE)
20 |
21 | (defn- read-corpus [add-fn]
22 | (->> (corp/read-anons :limit *corpus-read-limit*)
23 | (map (fn [{:keys [bgid board-id board-name id source short-url
24 | class utterance] :as dmap}]
25 | (log/tracef "corp map: %s, utterance: %s" dmap utterance)
26 | (let [panon (p/parse utterance)
27 | id (str id)
28 | class (get dmap :class)
29 | inst (-> [bgid board-id board-name id source short-url]
30 | (#(select-keys dmap %))
31 | (assoc :panon panon))]
32 | (log/debugf "adding class: %s, inst: <%s>" class (trunc inst))
33 | (add-fn id inst class))))
34 | doall))
35 |
36 | (defn connection []
37 | (swap! conn-inst #(or % (db/elasticsearch-connection
38 | "todo"
39 | :url "http://localhost:10200"
40 | :create-instances-fn read-corpus))))
41 |
42 | (defn reset-instances []
43 | (reset! conn-inst nil))
44 |
45 | (dyn/register-purge-fn reset-instances)
46 |
47 | (defnlock class-labels-keep []
48 | (with-connection (connection)
49 | (->> (db/distribution)
50 | (filter (fn [{:keys [count]}]
51 | (> count *low-class-count-threshold*)))
52 | (map :class-label)
53 | set)))
54 |
55 | (defn reset-labels-keep []
56 | (-> (meta #'class-labels-keep) :init-resource (reset! nil)))
57 |
58 | (dyn/register-purge-fn reset-labels-keep)
59 |
60 | (defn- filter-low-class-counts [id]
61 | (contains? (class-labels-keep)
62 | (:class-label (db/instance-by-id id))))
63 |
64 | (defn load-corpora
65 | "Load the corups."
66 | []
67 | (with-connection (connection)
68 | (db/instances-load)))
69 |
70 | (defn anons
71 | "Return all annotations"
72 | [& opts]
73 | (with-connection (connection)
74 | (apply db/instances opts)))
75 |
76 | (defn anon-by-id
77 | "Return an annotation using its ID."
78 | [& opts]
79 | (with-connection (connection)
80 | (apply db/instance-by-id opts)))
81 |
82 | (defn divide-by-set
83 | "Create a test/train dataset."
84 | [train-ratio]
85 | (with-connection (connection)
86 | (db/divide-by-set train-ratio
87 | :dist-type 'even
88 | :filter-fn filter-low-class-counts)))
89 |
90 | (defn divide-by-fold [& opts]
91 | (with-connection (connection)
92 | (apply db/divide-by-fold opts)))
93 |
94 | (defn set-fold [fold]
95 | (with-connection (connection)
96 | (db/set-fold fold)))
97 |
98 | (defn write-dataset []
99 | (with-connection (connection)
100 | (db/write-dataset :output-file "resources/todo-dataset.xls")))
101 |
102 | (defn freeze-dataset []
103 | (with-connection (connection)
104 | (db/freeze-dataset :output-file "resources/todo-dataset.json")))
105 |
106 | (defn stats
107 | "Return all annotations"
108 | [& opts]
109 | (with-connection (connection)
110 | (apply db/stats opts)))
111 |
112 | (defn distribution
113 | "Get the label distribution across all todos."
114 | []
115 | (->> (anons :set-type :train-test)
116 | (map :class-label)
117 | (reduce (fn [res a]
118 | (assoc res a (+ (or (get res a) 0) 1)))
119 | {})))
120 |
--------------------------------------------------------------------------------
/results/predictions.csv:
--------------------------------------------------------------------------------
1 | pred-label,correct-label,correct?,confidence,similarity-top-label,similarity-score,utterance,elected-verb-id,utterance-length,mention-count,sent-count,token-count,token-average-length,stopword-count,is-question,pos-last-tag,pos-first-tag,pos-tag-ratio-adjective,pos-tag-ratio-adverb,pos-tag-ratio-verb,pos-tag-ratio-noun,pos-tag-ratio-wh,pos-tag-count-adjective,pos-tag-count-adverb,pos-tag-count-verb,pos-tag-count-noun,pos-tag-count-wh,word-count-service,word-count-find-service,word-count-calendar,word-count-buy,word-count-contact,word-count-call,word-count-pay-bill-online,word-count-email,word-count-postal
2 | contact,contact,true,0.35294117647058826,call,0.38545797065870974,Confirm final details with photographer (and videographers),951117504,59,0,1,9,53/9,2,false,-RRB-,VB,1/9,0,1/9,1/3,0,1,0,1,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3 | service,contact,false,0.3684210526315789,call,0.5630079646305791,Speak to your first restaurant,109641682,30,1,1,5,26/5,1,false,NN,VB,1/5,0,1/5,1/5,0,1,0,1,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 | contact,contact,true,0.6666666666666666,call,0.40443043341961105,Reach out to Seattle restaurant coalition orgs,108386675,46,1,1,7,40/7,1,false,NNS,VB,0,0,1/7,4/7,0,0,0,1,4,0,0.07142857142857142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5 | contact,contact,true,0.9411764705882353,calendar,0.1337894063649906,Local Restuarant followup,1822752074,25,0,1,3,23/3,0,false,NN,JJ,1/3,0,0,2/3,0,1,0,0,2,0,0.0,0.0,0.0,0.0,0.1666666666666667,0.0,0.0,0.0,0.0
6 | service,service,true,0.7878787878787878,service,0.5161448025277683,clean bathroom,1822752074,14,0,1,2,13/2,0,false,NN,JJ,1/2,0,0,1/2,0,1,0,0,1,0,0.1785714285714286,0.0,0.0,0.04347826086956522,0.0,0.0,0.0,0.0,0.0
7 | buy,service,false,0.6666666666666666,buy,0.42847449988450687,household - water plants,1822752074,24,0,1,4,21/4,0,false,NNS,NN,0,0,0,3/4,0,0,0,0,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.03846153846153846,0.0,0.0
8 | service,service,true,0.7878787878787878,service,0.3845594759941811,clean up woodpile,1822752074,17,0,1,3,5,0,false,NN,JJ,1/3,0,0,1/3,0,1,0,0,1,0,0.2857142857142857,0.0,0.0,0.0,0.07142857142857142,0.0,0.0,0.0,0.05263157894736842
9 | buy,service,false,0.6666666666666666,buy,0.3956083180247874,figure out bookmarks,1822752074,20,0,1,3,6,0,false,NNS,NN,0,0,0,2/3,0,0,0,0,2,0,0.07142857142857142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 | buy,buy,true,0.9142857142857143,buy,0.43503205480215995,Go get plants at Tilth Sale,3304,27,1,1,6,11/3,1,false,NNP,VB,0,0,1/3,1/2,0,0,0,2,3,0,0.0,0.0,0.06060606060606061,0.1304347826086957,0.0,0.0,0.0,0.09523809523809525,0.0
11 | service,buy,false,0.3684210526315789,call,0.4596336372196674,choose front door fixture,-1361218025,25,0,1,4,11/2,0,false,NN,VB,1/4,0,1/4,1/2,0,1,0,1,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12 | buy,buy,true,0.9142857142857143,buy,0.5170707412711952,Buy Scale,97926,9,0,1,2,4,0,false,NNP,VB,0,0,1/2,1/2,0,0,0,1,1,0,0.0,0.0,0.0,0.3695652173913043,0.0,0.0,0.0,0.0,0.0
13 | buy,buy,true,0.9142857142857143,pay-bill-online,0.5405272486834571,Buy purse,1822752074,9,0,1,2,4,0,false,NN,JJ,1/2,0,0,1/2,0,1,0,0,1,0,0.0,0.0,0.0,0.3695652173913043,0.0,0.0,0.0,0.0,0.0
14 | calendar,buy,false,1.0,service,0.28456407406234313,Select cake topper,1822752074,18,1,1,3,16/3,0,false,NN,NNP,0,0,0,1,0,0,0,0,3,0,0.0,0.0,0.09090909090909091,0.0,0.0,0.0,0.0,0.0,0.0
15 | postal,postal,true,0.75,postal,0.5151746175006816,Send invitations,3526536,16,0,1,2,15/2,0,false,NNS,VB,0,0,1/2,1/2,0,0,0,1,1,0,0.0,0.0,0.0,0.0,0.07142857142857142,0.0,0.0,0.09523809523809525,0.3157894736842105
16 | email,email,true,0.8333333333333334,call,0.20393406120758872,"write appt emails (BV, UIX, AD mtg, GVSU)",113399775,41,3,1,13,34/13,0,false,-RRB-,VB,1/13,0,1/13,6/13,0,1,0,1,6,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09523809523809525,0.0
17 | find-service,find-service,true,0.3333333333333333,pay-bill-online,0.4027918527523677,Apply to Southxchange,1822752074,21,1,1,3,19/3,1,false,NNP,RB,0,1/3,0,1/3,0,0,1,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18 | find-service,find-service,true,0.4444444444444444,buy,0.4728407757407378,Fix the CD ROM drive on my computer,101397,35,1,1,8,7/2,2,false,NN,VB,0,0,1/8,1/2,0,0,0,1,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19 | call,call,true,0.9444444444444444,call,0.2983975520212974,call exterminators,1822752074,18,0,1,2,17/2,0,false,NNS,NN,0,0,0,1,0,0,0,0,2,0,0.0,0.0,0.0,0.0,0.0,0.5862068965517241,0.0,0.0,0.0
20 | calendar,calendar,true,1.0,calendar,0.4833775296468626,Schedule appointments with site managers,-697920873,40,0,1,5,36/5,1,false,NNS,VB,0,0,1/5,3/5,0,0,0,1,3,0,0.0,0.0,0.5151515151515152,0.0,0.0,0.0,0.0,0.0,0.0
21 | calendar,calendar,true,1.0,calendar,0.4416694747058279,Schedule sitting for engagement portrait,-697920873,40,0,1,5,36/5,1,false,NN,VB,0,0,2/5,2/5,0,0,0,2,2,0,0.0,0.0,0.303030303030303,0.0,0.0,0.0,0.0,0.0,0.0
22 | pay-bill-online,pay-bill-online,true,1.0,pay-bill-online,0.6746570733734047,pay mortgage,1822752074,12,0,1,2,11/2,0,false,NN,NN,0,0,0,1,0,0,0,0,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.5769230769230769,0.0,0.0
23 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/feature.clj:
--------------------------------------------------------------------------------
1 | (ns ^{:doc "Feature createion"
2 | :author "Paul Landes"}
3 | uic.nlp.todo.feature
4 | (:require [clojure.tools.logging :as log]
5 | [zensols.actioncli.dynamic :as dyn]
6 | [zensols.actioncli.util :refer (defnlock trunc)]
7 | [zensols.nlparse.parse :as p]
8 | [zensols.nlparse.feature.lang :as fe]
9 | [zensols.nlparse.feature.word :as fw]
10 | [zensols.nlparse.feature.word-count :as wc]
11 | [zensols.nlparse.feature.word-similarity :as ws]
12 | [zensols.model.weka :as weka]
13 | [zensols.model.execute-classifier :refer (with-model-conf)]
14 | [zensols.model.eval-classifier :as ec]
15 | [uic.nlp.todo.thaw-db :as tdb]
16 | [uic.nlp.todo.db :as edb]))
17 |
18 | (def id-key :id)
19 | (def class-key :agent)
20 | (def ^{:dynamic true :private true} *context* nil)
21 | (def ^:private wc-config (merge wc/*word-count-config*
22 | {:words-by-label-count 10}))
23 | (def ^:dynamic anons tdb/anons)
24 | (def ^:dynamic anon-by-id tdb/anon-by-id)
25 |
26 | (defnlock classes
27 | []
28 | (->> (anons :set-type :train-test)
29 | (map :class-label)
30 | distinct
31 | vec))
32 |
33 | (defn reset []
34 | (-> (meta #'classes) :init-resource (reset! nil)))
35 |
36 | (dyn/register-purge-fn reset)
37 |
38 | (defmacro with-feature-context
39 | {:style/indent 1}
40 | [context & forms]
41 | `(binding [*context* ~context]
42 | ~@forms))
43 |
44 | (defn create-features
45 | ([panon]
46 | (create-features panon nil))
47 | ([panon context]
48 | (log/debugf "creating features (context=<%s>) for <%s>"
49 | (trunc context) (trunc panon))
50 | (let [{:keys [word-count-stats]} context
51 | tokens (p/tokens panon)]
52 | (binding [wc/*word-count-config* wc-config]
53 | (merge (fe/verb-features (->> panon :sents first))
54 | (fw/token-features panon tokens)
55 | (fe/pos-tag-features tokens)
56 | (if word-count-stats
57 | (wc/label-count-score-features panon word-count-stats))
58 | (if word-count-stats
59 | (ws/similarity-features tokens word-count-stats)))))))
60 |
61 | (defn- flatten-keys [adb-keys]
62 | (mapcat #(into [] %) adb-keys))
63 |
64 | (defn create-feature-sets [& {:keys [context] :as adb-keys}]
65 | (log/debugf "creating features with keys=%s: %s"
66 | adb-keys (trunc adb-keys))
67 | (let [context (or context *context*)
68 | {:keys [anons-fn]} context
69 | anons (apply anons-fn (->> (flatten-keys adb-keys)
70 | (concat [:include-ids? true])))]
71 | (->> anons
72 | (map (fn [{:keys [class-label instance id]}]
73 | (merge {:utterance (:text instance)
74 | id-key id}
75 | {class-key class-label}
76 | (create-features instance context)))))))
77 |
78 | (defn create-context
79 | [& {:keys [anons-fn] :as adb-keys}]
80 | (let [fkeys (flatten-keys adb-keys)
81 | anons (apply anons-fn fkeys)]
82 | (log/debugf "creating context with key=%s anon count: %d"
83 | (trunc adb-keys) (count anons))
84 | (log/tracef "adb-keys: %s" (pr-str adb-keys))
85 | (binding [wc/*word-count-config* wc-config]
86 | (let [stats (wc/calculate-feature-stats anons)]
87 | {:anons-fn anons-fn
88 | :word-count-stats stats}))))
89 |
90 | (defn word-count-features []
91 | (->> (classes)
92 | (map #(->> % (format "word-count-%s") symbol))))
93 |
94 | (defn feature-metas [& _]
95 | (concat (ws/similarity-feature-metas (classes))
96 | [[:utterance 'string]]
97 | (fe/verb-feature-metas)
98 | (fw/token-feature-metas)
99 | (fe/pos-tag-feature-metas)
100 | (wc/label-word-count-feature-metas (classes))))
101 |
102 | (defn- class-feature-meta []
103 | [class-key (classes)])
104 |
105 | (defn create-model-config []
106 | {:name (name class-key)
107 | :context-fn #(:word-count-stats *context*)
108 | :set-context-fn #(array-map :word-count-stats %)
109 | :create-feature-sets-fn create-feature-sets
110 | :create-features-fn create-features
111 | :feature-metas-fn feature-metas
112 | :class-feature-meta-fn class-feature-meta
113 | :create-two-pass-context-fn create-context
114 | :model-return-keys #{:label :distributions :features}})
115 |
116 | (defn instance-deref-anon-fn [id]
117 | (-> (anon-by-id id)
118 | :instance
119 | :panon))
120 |
121 | (defn instance-deref-anons-fn [& keys]
122 | (->> (apply anons keys)
123 | (map (fn [{:keys [class-label instance id]}]
124 | {:class-label class-label
125 | :id id
126 | :instance (:panon instance)}))))
127 |
128 | (defn display-features [& {:keys [num-features]
129 | :or {num-features 100}}]
130 | (with-feature-context
131 | (create-context :anons-fn instance-deref-anons-fn)
132 | (with-model-conf (create-model-config)
133 | (ec/display-features :max num-features))))
134 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/cli.clj:
--------------------------------------------------------------------------------
1 | (ns uic.nlp.todo.cli
2 | (:require [clojure.java.io :as io]
3 | [clojure.string :as s]
4 | [clojure.edn :as edn]
5 | [clojure.tools.logging :as log]
6 | [clojure.pprint :as pp]
7 | [zensols.actioncli.log4j2 :as lu]
8 | [zensols.actioncli.util :refer (trunc)]
9 | [zensols.model.eval-classifier :as ec]
10 | [zensols.model.execute-classifier :as ex]
11 | [uic.nlp.todo.db :as db]
12 | [uic.nlp.todo.feature :as fe]
13 | [uic.nlp.todo.eval :as ev :refer (with-single-pass)]
14 | [clojure.string :as s]))
15 |
16 | (defn config-file-option []
17 | ["-c" "--config" "the configuration file path"
18 | :default "todocorp.conf"
19 | :required ""
20 | :parse-fn io/file
21 | :validate [(fn [file]
22 | (if (.exists file)
23 | (do (->> (.getAbsolutePath file)
24 | (System/setProperty "zensols.todocorp-config"))
25 | true)
26 | false))
27 | "Must be an existing file"]])
28 |
29 | ;; ["-s" "--step" "create a moving train/test split with increment step (0.03 is a good start)"
30 | ;; :required ""
31 | ;; :parse-fn edn/read-string]
32 |
33 | (defn output-file-option [default]
34 | ["-o" "--output" "output file name or '-' to print results"
35 | :default default
36 | :required ""])
37 |
38 | (defn metaset-option []
39 | ["-m" "--metaset" "features set as defined in eval.clj"
40 | :default :set-compare
41 | :required ""
42 | :parse-fn keyword])
43 |
44 | (defn classifiers-option []
45 | ["-a" "--classifiers" "comma separated classifier list"
46 | :default [:fast :lazy :tree :meta :slow]
47 | :required ""
48 | :parse-fn (fn [classifiers]
49 | (->> classifiers
50 | (#(s/split % #"\s*,\s*"))
51 | (map (fn [csym]
52 | (if (s/index-of csym ".")
53 | (.newInstance (Class/forName csym))
54 | (keyword csym))))
55 | vec))])
56 |
57 | (def load-corpora-command
58 | "CLI command to load the corpora into elastic search"
59 | {:description "load corpus data into ElasticSearch"
60 | :options
61 | [(lu/log-level-set-option)
62 | (config-file-option)]
63 | :app (fn [& _]
64 | (db/load-corpora))})
65 |
66 | (def split-dataset-command
67 | "CLI command to split the dataset"
68 | {:description "split the data into train and test sets and dump the JSON representation to disk"
69 | :options [(lu/log-level-set-option)
70 | (config-file-option)
71 | ["-s" "--split" "number (0-1) to leave for training, remaining will be used for test"
72 | :default 0.9
73 | :required ""
74 | :parse-fn edn/read-string
75 | :validate [#(and (> % 0.0) (< % 1.0)) "Must be a number between (0-1)"]]]
76 | :app (fn [{:keys [split]} & _]
77 | (log/infof "spliting data: %.2f" split)
78 | (db/divide-by-set split)
79 | (println "statistics:")
80 | (->> {:split (db/stats)
81 | :distribution (db/distribution)}
82 | pp/pprint)
83 | (db/freeze-dataset))})
84 |
85 | (def features-command
86 | "CLI command to show features"
87 | {:description "show features"
88 | :options [(lu/log-level-set-option)
89 | (config-file-option)
90 | ["-f" "--features" "the number of features to display"
91 | :default 100
92 | :required ""
93 | :parse-fn edn/read-string]]
94 | :app (fn [{:keys [features]} & _]
95 | (println "Press CONTROL-C to quit")
96 | (fe/display-features :num-features features))})
97 |
98 | (def print-evaluate-command
99 | "CLI command to evaluate the model"
100 | {:description "evaluate the model using a cross fold validation across feature sets"
101 | :options [(lu/log-level-set-option)
102 | (config-file-option)
103 | (classifiers-option)
104 | (metaset-option)]
105 | :app (fn [{:keys [metaset classifiers]} & _]
106 | (with-single-pass
107 | (ec/print-best-results classifiers metaset)))})
108 |
109 | (def evaluates-spreadsheet-command
110 | "CLI command to evaluate and output results"
111 | {:description "evaluate the model and output the results to a spreadsheet"
112 | :options [(lu/log-level-set-option)
113 | (output-file-option (io/file "evaluation.xls"))
114 | (config-file-option)
115 | (classifiers-option)
116 | (metaset-option)]
117 | :app (fn [{:keys [output metaset classifiers]} & _]
118 | (with-single-pass
119 | (ec/eval-and-write classifiers metaset output)))})
120 |
121 | (def predict-spreadsheet-command
122 | "CLI command to predict the model"
123 | {:description "evaluate the model, classify, and output the test set"
124 | :options [(lu/log-level-set-option)
125 | (output-file-option (io/file "predictions.csv"))
126 | (config-file-option)
127 | (classifiers-option)
128 | (metaset-option)]
129 | :app (fn [{:keys [output metaset classifiers]} & _]
130 | (with-single-pass
131 | (try
132 | (-> (ec/create-model classifiers metaset)
133 | (ec/train-model :set-type :train)
134 | ex/prime-model
135 | ex/predict
136 | (#(ex/write-predictions % output)))
137 | (catch Exception e
138 | (println (trunc e))))))})
139 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/eval.clj:
--------------------------------------------------------------------------------
1 | (ns ^{:doc "Evaluation of the model using features generated
2 | by [[uic.nlp.todo.feature]]."
3 | :author "Paul Landes"}
4 | uic.nlp.todo.eval
5 | (:require [clojure.tools.logging :as log]
6 | [clojure.set :refer (union)]
7 | [zensols.actioncli.dynamic :as dyn]
8 | [zensols.model.classifier :as cl]
9 | [zensols.model.execute-classifier :refer (with-model-conf) :as ex]
10 | [zensols.model.eval-classifier :as ec :refer (with-two-pass)]
11 | [uic.nlp.todo.feature :as f :refer (with-feature-context)]
12 | [uic.nlp.todo.db :as adb]
13 | [uic.nlp.todo.eval :as ev]))
14 |
15 | (defonce ^:private cross-fold-instances-inst (atom nil))
16 | (defonce ^:private train-test-instances-inst (atom nil))
17 |
18 | (defn feature-sets-set
19 | "Feature sets to use in the various evaluations of the model."
20 | []
21 | {:set-compare (list (concat (f/word-count-features)
22 | '(elected-verb-id
23 | token-average-length
24 | pos-first-tag
25 | pos-last-tag
26 | similarity-top-label
27 | similarity-score
28 | pos-tag-ratio-noun))
29 | (concat (f/word-count-features)
30 | '(elected-verb-id
31 | similarity-top-label
32 | similarity-score
33 | pos-tag-ratio-noun))
34 | (concat (f/word-count-features)
35 | '(elected-verb-id
36 | token-average-length
37 | pos-first-tag
38 | pos-last-tag
39 | pos-tag-ratio-noun))
40 | (concat (f/word-count-features)
41 | '(elected-verb-id
42 | token-average-length
43 | similarity-top-label
44 | pos-first-tag
45 | pos-last-tag
46 | pos-tag-ratio-noun))
47 | (concat '(elected-verb-id
48 | token-average-length
49 | pos-first-tag
50 | pos-last-tag
51 | similarity-top-label
52 | similarity-score
53 | pos-tag-ratio-noun))
54 | '(similarity-top-label
55 | pos-last-tag
56 | word-count-contact
57 | word-count-call
58 | word-count-buy
59 | word-count-calendar
60 | word-count-pay-bill-online
61 | pos-first-tag
62 | word-count-plan-meal
63 | word-count-email
64 | word-count-postal
65 | word-count-school-work
66 | word-count-print))
67 | :set-sel (list '(similarity-top-label
68 | pos-last-tag
69 | word-count-contact
70 | word-count-call
71 | word-count-buy
72 | word-count-calendar
73 | word-count-pay-bill-online
74 | pos-first-tag
75 | word-count-plan-meal
76 | word-count-email
77 | word-count-postal
78 | word-count-school-work
79 | word-count-print))
80 | :set-1 (list (concat (f/word-count-features)
81 | '(elected-verb-id
82 | token-average-length
83 | pos-first-tag
84 | pos-last-tag
85 | similarity-top-label
86 | similarity-score
87 | pos-tag-ratio-noun)))
88 | :set-2 (list (concat (f/word-count-features)
89 | '(elected-verb-id
90 | token-average-length
91 | similarity-top-label
92 | pos-first-tag
93 | pos-last-tag
94 | pos-tag-ratio-noun)))
95 | :set-3 (list (concat (f/word-count-features)
96 | '(elected-verb-id
97 | similarity-top-label
98 | similarity-score
99 | pos-tag-ratio-noun)))
100 | :set-4 (list (concat (f/word-count-features)
101 | '(similarity-top-label
102 | pos-last-tag
103 | pos-first-tag)))
104 | :set-best '((similarity-top-label
105 | pos-last-tag
106 | pos-first-tag
107 | word-count-contact
108 | word-count-call
109 | word-count-buy
110 | word-count-calendar
111 | word-count-pay-bill-online
112 | word-count-plan-meal
113 | word-count-email
114 | word-count-postal
115 | word-count-school-work
116 | word-count-print))})
117 |
118 | (defn reset-instances []
119 | (reset! cross-fold-instances-inst nil)
120 | (reset! train-test-instances-inst nil))
121 |
122 | (dyn/register-purge-fn reset-instances)
123 |
124 | (defn create-model-config
125 | "Create the model configuration for this evalation."
126 | []
127 | (letfn [(divide-by-set [divide-ratio]
128 | (adb/divide-by-set divide-ratio :shuffle? false)
129 | (reset! train-test-instances-inst nil))]
130 | (merge (f/create-model-config)
131 | {:cross-fold-instances-inst cross-fold-instances-inst
132 | :train-test-instances-inst train-test-instances-inst
133 | :feature-sets-set (feature-sets-set)
134 | :divide-by-set divide-by-set})))
135 |
136 | (defmacro with-single-pass
137 | "Create bindings and contexts for a single pass train/test model evaluation."
138 | {:style/indent 0}
139 | [& body]
140 | `(binding [cl/*rand-fn* (fn [] (java.util.Random. 1))
141 | ec/*default-set-type* :train-test
142 | adb/*low-class-count-threshold* 10]
143 | (with-model-conf (create-model-config)
144 | (with-feature-context
145 | (f/create-context :anons-fn f/instance-deref-anons-fn
146 | :set-type :train)
147 | (do ~@body)))))
148 |
149 | (defn write-arff
150 | "Write a Weka ARFF file (handy for importing in R/scikit-learn etc)."
151 | []
152 | (binding [cl/*rand-fn* (fn [] (java.util.Random. 1))
153 | ec/*default-set-type* :train-test
154 | adb/*low-class-count-threshold* 0
155 | f/anons adb/anons
156 | f/anon-by-id adb/anon-by-id]
157 | (with-model-conf (create-model-config)
158 | (with-feature-context (f/create-context :anons-fn f/instance-deref-anons-fn
159 | :set-type :train)
160 | (dyn/purge)
161 | (ec/write-arff)))))
162 |
--------------------------------------------------------------------------------
/src/clojure/uic/nlp/todo/corpus.clj:
--------------------------------------------------------------------------------
1 | (ns ^{:doc "This namespace parses the corpus from an Excel file."}
2 | uic.nlp.todo.corpus
3 | (:require [clojure.string :as s]
4 | [clojure.data.csv :as csv]
5 | [clojure.java.io :as io]
6 | [clojure.tools.logging :as log]
7 | [com.brainbot.iniconfig :as iniconfig]
8 | [zensols.actioncli.resource :as res]
9 | [zensols.util.spreadsheet :as ss :refer (with-read-spreadsheet)]
10 | [uic.nlp.todo.resource :as ur]))
11 |
12 | (ur/initialize)
13 |
14 | (defn annotation-info
15 | "Return information from the `todocorp.conf` configuration file."
16 | []
17 | (let [sec (-> (res/resource-path :todocorp-config-file)
18 | iniconfig/read-ini
19 | (#(get % "default")))]
20 | (log/debugf "annotation info from %s" sec)
21 | (->> sec
22 | (filter (fn [[k v]]
23 | (re-matches #"^annotator\d+" k)))
24 | (map second)
25 | (hash-map :annotators)
26 | (merge {:main-annotator (get sec "annotator_main")
27 | :annotated-dir (get sec "annotated_dir")
28 | :results-dir (get sec "results_dir")
29 | :serialized-dir (get sec "serialized_dir")}))))
30 |
31 | (defn annotated-file
32 | "Return the annoatated todo corpus spreadsheet file."
33 | ([] (annotated-file nil))
34 | ([annotator]
35 | (let [inf (annotation-info)
36 | annotator (or annotator (:main-annotator inf))
37 | file (-> inf
38 | :annotated-dir
39 | (io/file (format "%s.xlsx" annotator)))]
40 | (if-not (.exists file)
41 | (-> (format "Un-annotated file not found: %s" file)
42 | (ex-info {:file file})
43 | throw))
44 | file)))
45 |
46 | (defn read-for-annotator
47 | "Return a list of maps, each with a Todo list data point."
48 | [& {:keys [limit annotator]
49 | :or {limit Integer/MAX_VALUE}}]
50 | (let [file (annotated-file annotator)]
51 | (with-read-spreadsheet [file rows type]
52 | (->> rows
53 | ;(#(ss/rows-to-maps % ))
54 | ss/rows-to-maps
55 | (map (fn [id {:keys [bgid board-id board-name
56 | ;id
57 | source short-url
58 | class utterance]}]
59 | (let [class (and class (->> (s/trim class) s/lower-case))
60 | utterance (and utterance (str utterance))
61 | utterance (and utterance (s/trim utterance))]
62 | (when (and class utterance)
63 | {:id (int id)
64 | :bgid bgid
65 | :board-id board-id
66 | :source source
67 | :class (if (> (count class) 0) class)
68 | :utterance utterance
69 | })))
70 | (range))
71 | (remove nil?)
72 | (take limit)
73 | doall))))
74 |
75 | (defn- anons-by-ids
76 | "Create annotations with unique identifiers."
77 | [annotator & {:keys [limit]}]
78 | (->> (read-for-annotator :annotator annotator)
79 | (map (fn [{:keys [id] :as elt}]
80 | {id (assoc elt ;(select-keys elt [:class :utterance])
81 | :annotator annotator)}))
82 | (take limit)
83 | (apply merge)))
84 |
85 | (defn coder-agreement
86 | "Create the output file used by R to create inercoder agreement (Cohen's
87 | Kappa)."
88 | [& {:keys [annotators limit]
89 | :or {limit Integer/MAX_VALUE}}]
90 | (let [info (annotation-info)
91 | {:keys [results-dir]} info
92 | annotators (or annotators (:annotators info))
93 | outfile (io/file results-dir "intercoder.csv")
94 | ;; annotator -> annotation
95 | by-annotator (->> annotators
96 | (map (fn [annotator]
97 | {annotator (anons-by-ids annotator :limit limit)}))
98 | (apply merge))
99 | ;; ids of utterances annotated by all annotators
100 | shared-ids (->> by-annotator
101 | vals
102 | (map #(-> % keys set))
103 | (apply clojure.set/intersection))]
104 | (->> shared-ids
105 | ;; lists of maps id -> annotation list of all annotators
106 | (map (fn [id]
107 | (->> annotators
108 | (map (fn [annotator]
109 | (let [by-id (get by-annotator annotator)]
110 | (get by-id id))))
111 | (hash-map :id id :anon-list))))
112 | ;; rows of ID an class of each annotator
113 | (map (fn [{:keys [id anon-list]}]
114 | (let [utterances (map :utterance anon-list)]
115 | ;; sanity check
116 | (if (> (count (distinct utterances)) 1)
117 | (-> (format "unaligned utterances for id %s: %s"
118 | id (s/join utterances))
119 | (ex-info {:id id
120 | :annotation-list anon-list})
121 | throw
122 | ;(#(log/warnf "unaligned: %s" %))
123 | )))
124 | (cons id (map :class anon-list))))
125 | ;; CSV header (for R colnames later)
126 | (cons (cons "id" annotators))
127 | ((fn [data]
128 | (with-open [writer (io/writer outfile)]
129 | (csv/write-csv writer data)))))
130 | (log/infof "wrote intercoder agreement file: %s" outfile)
131 | outfile))
132 |
133 | (defn read-anons
134 | "Read annotations from the Excel file."
135 | [& {:keys [annotators limit]
136 | :or {limit Integer/MAX_VALUE}}]
137 | (let [info (annotation-info)
138 | {:keys [results-dir]} info
139 | annotators (or annotators (:annotators info))
140 | ;; annotator -> annotation
141 | by-annotator (->> annotators
142 | (map (fn [annotator]
143 | {annotator (anons-by-ids annotator :limit limit)}))
144 | (apply merge))]
145 | (->> annotators
146 | reverse
147 | (map (fn [annotator]
148 | (anons-by-ids annotator :limit limit)))
149 | (reduce (fn [res n]
150 | (merge res n)))
151 | vals)))
152 |
153 | (defn serialize-annotations
154 | "Write annotations in an intermedia binary serialization file.
155 | **Note**: this should not be confused with the JSON generation,
156 | which [[uic.nlp.todo.db/freeze-dataset]]."
157 | []
158 | (let [{:keys [serialized-dir]} (annotation-info)
159 | out-file (io/file serialized-dir "annotations.dat")]
160 | (with-open [writer (io/writer out-file)]
161 | (binding [*out* writer]
162 | (println (pr-str (read-anons)))))
163 | (log/infof "wrote Clojure serialized annotations data to %s" out-file)))
164 |
165 | (defn deserialize-annotation
166 | "See [[serialize-annotations]]."
167 | []
168 | (let [{:keys [serialized-dir]} (annotation-info)
169 | in-file (io/file serialized-dir "annotations.dat")]
170 | (log/infof "reading annotations data from" in-file)
171 | (with-open [reader (io/reader in-file)]
172 | (->> reader
173 | slurp
174 | read-string))))
175 |
176 | (defn ^:deprecated metrics
177 | "Generate somewhat useful metrics (depreciated)."
178 | []
179 | (letfn [(source-dist [anons]
180 | (->> anons
181 | (map (fn [{:keys [source]}]
182 | (if (re-matches #"^person.*" source)
183 | "volunteer"
184 | source)))
185 | (reduce (fn [res source]
186 | (merge res {source (inc (or (get res source) 0))}))
187 | {})))]
188 | (merge (->> (read-anons :annotators ["annotator1" "annotator2" "annotator3" "annotator4"])
189 | source-dist
190 | (array-map :annotated))
191 | (->> (read-anons :annotators ["relabeled"] ;["annotator1" "annotator2"]
192 | )
193 | source-dist
194 | (array-map :used)))))
195 |
196 |
--------------------------------------------------------------------------------
/results/intercoder.csv:
--------------------------------------------------------------------------------
1 | annotator1,annotator2
2 | service,calendar,Taxes for 2015
3 | service,calendar,Clear out small garden bed
4 | buy,calendar,Get more dirt
5 | service,calendar,plant more plants
6 | buy,buy,Go get dirt from lowes
7 | buy,buy,Go get plants at Tilth Sale
8 | email,contact,email daniel about strawberries
9 | service,calendar,clean bathroom
10 | school-work,calendar,Read Modules 1-4
11 | school-work,calendar,syllabus quiz
12 | school-work,calendar,HW 1
13 | school-work,calendar,Quiz
14 | service,calendar,Set up org file for garden
15 | service,calendar,finish mowing
16 | service,calendar,clean up woodpile
17 | service,calendar,household - setup vlc @ term on doctor
18 | service,calendar,household - water plants
19 | buy,calendar,Buy container mix
20 | buy,calendar,paper for the upstairs desk
21 | buy,buy,order a meditation cushion
22 | service,calendar,Install Quicksilver and experiment
23 | service,calendar,Take out the dog
24 | service,calendar,Wash the dishes
25 | service,calendar,Clean the carpet
26 | find-service,calendar,Get the oil change
27 | service,calendar,Go pick up my son from school
28 | self-improve,calendar,Practice Mandarin Chinese
29 | service,calendar,Clean the litter box
30 | buy,calendar,Rent a carpet cleaning machine
31 | find-service,calendar,Install my new sink
32 | buy,buy,Tuscon: buy cannister fuel
33 | buy,buy,Tuscon: buy two 1L smartwater bottles
34 | self-improve,calendar,learn guyline knots/technique
35 | service,calendar,set up tent
36 | service,calendar,test sleep system
37 | find-travel,buy,make travel arrangements to Lordsburg
38 | find-travel,buy,make travel arrangements from Albuquerque
39 | plan-meal,search,identify 4-6 dinner recipes
40 | plan-meal,buy,buy ingredients
41 | plan-meal,calendar,test cook recipes
42 | plan-meal,calendar,package food
43 | service,calendar,assemble first aid kit
44 | service,calendar,assemble toiletries kit
45 | service,calendar,assemble repair kit
46 | buy,buy,buy stuff sacks/pack backpack
47 | service,calendar,make windscreen and pot cozy
48 | pay-bill-online,pay-bill-online,pay ATT
49 | pay-bill-online,pay-bill-online,pay comed
50 | pay-bill-online,pay-bill-online,pay mortgage
51 | pay-bill-online,pay-bill-online,pay student loan
52 | call,calendar,update address-ATT
53 | service,calendar,reconcile bank statement
54 | buy,buy,choose front door fixture
55 | call,calendar,call amazon about kindle
56 | find-service,search,find new insurance agent
57 | find-service,calendar,annual physical
58 | service,calendar,clean closet
59 | buy,buy,buy flower girl headbands
60 | buy,buy,buy bridal party gifts
61 | buy,buy,get wedding insurance
62 | find-travel,buy,book honeymoon
63 | find-service,calendar,Please work on an e-visa with the Cambodian embassy in Vietnam. I want to consider making you the admin person in our NGO paperwork.
64 | service,calendar,Upload photos to gmail
65 | find-service,calendar,Apply to Southxchange
66 | email,contact,"Contact people to look for money in Myanmar (Ben, Thieu)"
67 | buy,calendar,Remember the Milk
68 | email,contact,Email people affected by timezone bug
69 | find-service,calendar,Identify office/place for me to live
70 | call,contact,"Call Phil Smith, Mark Fukuda"
71 | email,contact,Email/Call Dan Strickman
72 | find-service,calendar,fix coffee pot dispenser
73 | buy,search,research cell phone plans
74 | buy,buy,get suit for Boston
75 | postal,calendar,send info to credit bureau
76 | school-work,calendar,write nutrition paper
77 | contact,calendar,book babysitter
78 | calendar,calendar,Schedule appointments with site managers
79 | find-service,buy,Hire wedding consultant
80 | calendar,buy,"Place order for wedding cake (and groom's cake, if desired)"
81 | find-service,buy,Book officiant
82 | calendar,calendar,Set a meeting with officiant to go through and confirm the details
83 | contact,contact,Contact local town clerk's office to arrange marriage license
84 | buy,buy,Get marriage license
85 | calendar,calendar,"Schedule rehearsal with officiant, all family members, bridesmaids, best men, wedding planner (if applicable) and other participants."
86 | calendar,calendar,Assign maid of honor / one attendant to arrange train and/or veil at altar
87 | contact,contact,Share contact numbers with at 2 people
88 | service,calendar,Assemble emergency supply kit for wedding day and find a place to keep it
89 | buy,buy,Purchase/make pillow for ring bearer
90 | buy,buy,Purchase birdseed/bubbles/rose petals for guests to shower you with as you leave ceremony site (this custom could instead be performed as you depart from the reception)
91 | contact,calendar,"Confirm with your rental company all details, times, and sites where items must be delivered/picked up"
92 | contact,calendar,Ask caterer/coordinator to have top teir of wedding cake packed up for you (to save for first anniversary)
93 | contact,calendar,Ask caterer/coordinator to have cake topper packed up for you.
94 | calendar,calendar,Schedule sitting for engagement portrait
95 | find-travel,buy,Make your hotel reservation for wedding night
96 | contact,contact,"Send hotel and transportation information to out-of-town guests, include directions from local airorts and cities from which many guests will be arriving by car, information (description, location, phone number) and any code or name that must be mentioned to receive discounted rate when making room reservations"
97 | calendar,calendar,Schedule appointments at bridal salons
98 | calendar,calendar,Make appointments for gown fittings
99 | contact,contact,Inquire as to what you need to bring to fittings
100 | contact,contact,Discuss bustle style of train with seamstress
101 | find-service,calendar,Have shoes dyed to match gown
102 | find-service,buy,Find professional dry cleaner who specialized in bridal gown care to clean and pack up dress andaccessories after wedding
103 | buy,buy,Buy garter
104 | buy,buy,Buy earrings
105 | buy,buy,buy necklace
106 | buy,buy,Buy other jewelry
107 | buy,buy,Buy hair accessories
108 | buy,buy,Buy purse
109 | buy,buy,Buy wrap
110 | buy,buy,Buy gloves
111 | buy,buy,Buy going-away outfit
112 | buy,buy,Buy rehearsal dinner outfit
113 | buy,buy,Order stationery items
114 | buy,buy,Buy special postage stamps for invitation envelopes and response envelopes
115 | plan-meal,calendar,Plan menu
116 | postal,send,Send invitations
117 | contact,calendar,Tally final guest count with site manager/caterer
118 | service,calendar,organize closets
119 | plan-meal,search,find healthy baking recipes
120 | plan-meal,calendar,start meal planning for the week
121 | calendar,calendar,go over budget with Matt
122 | contact,calendar,request permit for signage
123 | contact,calendar,request permit for aframe
124 | pay-bill-online,calendar,get bills on autopay
125 | service,calendar,fix website
126 | postal,calendar,pick up mail at post office
127 | find-service,calendar,transfer internet
128 | buy,calendar,get keys made
129 | buy,calendar,subscription to safari library online
130 | find-service,buy,sound proofing for office
131 | find-service,buy,repair laundry room floor
132 | contact,calendar,Contact student orgs regarding Storyteller Rotation
133 | contact,calendar,Followup with World Renew
134 | call,contact,Call Greg Sims back
135 | buy,calendar,Business cards
136 | contact,contact,"Contact Stella's staff on interest for ""Stella's Super Mario Party"""
137 | print,calendar,Print Gen. Giveaway
138 | contact,calendar,Reach out to Seattle restaurant coalition orgs
139 | contact,calendar,Followup with National NPOs
140 | call,calendar,"Call a ""townhall meeting"" at GRBC and personally invite all past/current owners to give input."
141 | print,calendar,Print/have fedex cut 300 giveaways
142 | contact,calendar,Followup with Jenny at KFB about the restaurants' promo request form
143 | school-work,calendar,Apply for Grad. Student assistance from UM
144 | contact,calendar,Ask KFB for list of restaurants they have worked with in the past.
145 | find-service,calendar,Hire content writer
146 | call,contact,Debrief Constantly \u2013 Call Brooke and discuss how HHCT III went
147 | postal,send,"Send Abby her shirt at 247 W 15th St. Holland, MI 49423"
148 | contact,calendar,Ask IC to recommend us to ILG
149 | contact,calendar,Invite restaurants (and NPOs) to Patagonia event next week
150 | contact,contact,Outreach to 10 restaurants
151 | contact,calendar,Speak to your first restaurant
152 | calendar,calendar,Schedule one tabling event
153 | postal,calendar,Send shirts to our Inner Circlers
154 | buy,calendar,Pre-order from NPO in another state
155 | email,calendar,"write appt emails (BV, UIX, AD mtg, GVSU)"
156 | email,contact,coffeeshop emails
157 | contact,calendar,Arrange print run: giveaway cards
158 | contact,calendar,Followup with GR restaurants
159 | contact,calendar,Press Followups
160 | email,contact,Donate/send 5000 email
161 | postal,send,Mail nonprofit checks to Kyle!!!!!
162 | service,calendar,Iron shirts for Flat lander's
163 | contact,contact,Connect with KFB/FAWM/ACT over checks
164 | contact,calendar,Local Restuarant followup
165 | contact,calendar,Contact Mars Hill about HQ and marketing
166 | contact,calendar,Followup with Local Restaurants
167 | print,calendar,print general giveaways for David
168 | service,calendar,Ironing shirts for staff
169 | contact,calendar,Ann Arbor followups
170 | contact,calendar,Followup with restaurant invoices
171 | call,contact,Call John McCain's team
172 | buy,calendar,order new checks
173 | call,contact,call lesko clients
174 | call,contact,call city of somerville and genki tenants
175 | find-service,calendar,seek new investment ideas
176 | find-service,calendar,repair roof
177 | contact,contact,send note to cindy regarding dance studio taxes
178 | call,calendar,call exterminators
179 | pay-bill-online,calendar,pay rent
180 | postal,calendar,send info to appraiser
181 | pay-bill-online,pay-bill-online,pay NY tax bill
182 | contact,calendar,send updated financials to Armand
183 | find-travel,search,look at airbnbs in binghamton
184 | pay-bill-online,calendar,pay plumber
185 | pay-bill-online,calendar,pay roofer
186 | buy,buy,order gpa mount
187 | contact,calendar,change address at bank
188 | contact,calendar,cancel renters insurance with AAA
189 | buy,buy,Buy mirror for upstairs bathroom
190 | postal,calendar,Mail change of address at DMV
191 | calendar,calendar,Pick up moving truck at 5:30pm Friday
192 | call,contact,Call to shut off water
193 | call,contact,Call to shut off gas
194 | call,contact,Call to find out about moving truck
195 | service,calendar,Vacuum hardwoods & couch
196 | buy,calendar,Pick up more boxes
197 | contact,calendar,Cancel Comcast Internet
198 | pay-bill-online,pay-bill-online,Pay SF Parking Ticket. :(
199 | service,calendar,Pack kitchen
200 | service,calendar,Pack spare room
201 | pay-bill-online,pay-bill-online,Pay water bill
202 | pay-bill-online,calendar,Pay PG&E
203 | pay-bill-online,calendar,Pay Expression
204 | service,calendar,Wash Mazda
205 | buy,search,Research bathroom mirrors online
206 | pay-bill-online,calendar,Pay DMV for Harley before May
207 | buy,calendar,Sign up for pottery
208 | find-service,search,Home security research
209 | self-improve,calendar,Sign up for Spanish
210 | calendar,calendar,Appointment with capital women's care
211 | contact,contact,Talk to few lawyers
212 | calendar,calendar,Apple appointment
213 | find-travel,buy,Kansas City tickets
214 | find-travel,calendar,Book a car - dec 13-17
215 | find-travel,buy,Buy Nicaragua tickets
216 | find-service,buy,Find a photographer/marriage counselor in Hawaii
217 | contact,contact,talk to Mike
218 | calendar,calendar,schedule DHPP shot
219 | service,calendar,mop floors
220 | service,calendar,clean bathroom
221 | pay-bill-online,pay-bill-online,pay bills
222 | service,calendar,clean kitchen
223 | call,contact,call grandmom
224 | find-travel,buy,get return flight
225 | service,calendar,move the pee pad
226 | buy,buy,replace shoes
227 | service,calendar,glue pumpkin
228 | service,calendar,sweep
229 | call,contact,call health care thing
230 | call,contact,call dad re: moving boxes
231 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Supervised Approach Imperative To-Do List Categorization
2 |
3 | This repository contains a corpus and code base to categorize natural language
4 | todo list items as described in our paper [A Supervised Approach To The
5 | Interpretation Of Imperative To-Do Lists].
6 |
7 | This repository contains:
8 |
9 | * A publicly available [corpus](#corpus).
10 | * A [code base](#code-base) similar to that given as published results in the
11 | [arXiv paper].
12 |
13 |
14 |
15 | ## Table of Contents
16 |
17 | - [Documents](#documents)
18 | - [Corpus](#corpus)
19 | - [Citation](#citation)
20 | - [Code Base](#code-base)
21 | - [What's Included](#whats-included)
22 | - [Third Party Libraries](#third-party-libraries)
23 | - [Documentation](#documentation)
24 | - [Running the Tests](#running-the-tests)
25 | - [Parsing](#parsing)
26 | - [Test Evaluation](#test-evaluation)
27 | - [Predictions](#predictions)
28 | - [Off-line Tests](#off-line-tests)
29 | - [Sample Results of Test](#sample-results-of-test)
30 | - [Building](#building)
31 | - [Advanced](#advanced)
32 | - [Changelog](#changelog)
33 | - [Special Thanks](#special-thanks)
34 | - [License](#license)
35 |
36 |
37 |
38 |
39 | ## Documents
40 |
41 | * [Corpus]
42 | * [Paper on arXiv] (please [cite](#citation) this paper)
43 | * [Paper](https://plandes.github.io/todo-task/SupervisedInterpretationImperativeToDos.pdf) (please do **not**
44 | cite this paper).
45 | * [Slides](https://plandes.github.io/todo-task/SupervisedInterpretationImperativeToDosSlides.pdf)
46 | * [Evaluation](results/full-evaluation.xls) (generated using
47 | the [evaluation functionality](#test-evaluation))
48 | * [Predictions](results/predictions.csv) (generated using
49 | the [predictions functionality](#predictions))
50 |
51 |
52 | ## Corpus
53 |
54 | The publicly available corpus is available [here](resources/corpus.xlsx) in
55 | Excel format. This corpus is referred to as *Corpus B* in the [arXiv
56 | paper]. The columns in the spreadsheet are:
57 |
58 | | Column Name | Description | Trello Artifact |
59 | |---------------|-------------------------------------------------|-----------------|
60 | | `utterance` | The natural language todo list text. | no |
61 | | `class` | The label if classified, otherwise left blank. | no |
62 | | `board_name` | The name of the board | yes |
63 | | `board_id` | The board ID | yes |
64 | | `short_url` | The URL of the comment on Trello | yes |
65 | | `description` | Additional description information for the task | yes |
66 |
67 |
68 | ## Citation
69 |
70 | Please use the following to cite the [arXiv paper].
71 |
72 | ```jflex
73 | @article{landesDiEugenio2018,
74 | title = {A Supervised Approach To The Interpretation Of Imperative To-Do Lists},
75 | url = {http://arxiv.org/abs/1806.07999},
76 | note = {arXiv: 1806.07999},
77 | journal = {arXiv:1806.07999 [cs]},
78 | author = {Landes, Paul and Di Eugenio, Barbara},
79 | year = {2018},
80 | month = {Jun}
81 | }
82 | ```
83 |
84 | If you use this software in your research, please cite with the following
85 | BibTeX (note that the [third party libraries] also have citations):
86 |
87 | ```jflex
88 | @misc{plandesTodoTask2018,
89 | author = {Paul Landes},
90 | title = {Supervised Approach Imperative To-Do List Categorization},
91 | year = {2018},
92 | publisher = {GitHub},
93 | journal = {GitHub repository},
94 | howpublished = {\url{https://github.com/plandes/todo-task}}
95 | }
96 | ```
97 |
98 |
99 | ## Code Base
100 |
101 | The code base used in this repository is an updated version of the code used on
102 | *Corpus A* (see the [arXiv paper]). It is written in [Clojure] and written to
103 | be accessed mostly via `make` commands. However, it can be compiled into a
104 | command line app if you want to run the long running cross fold validation
105 | tasks. See the [Running the Tests](#running-the-tests) to compile and run it.
106 |
107 |
108 | ### What's Included
109 |
110 | The functionality included is *agent classification* as described in the [arXiv
111 | paper]. The following is *not* included:
112 |
113 | * Argument classification
114 | * Extending the Named Entity Recognizer (section 4.1)
115 | * The first verb model (section 4.2)
116 |
117 | This functionality is not included as the origianl code base is proprietary.
118 | This code base was rewritten and [third party libraries] utilized where
119 | possible to speed up the development.
120 |
121 |
122 | ### Third Party Libraries
123 |
124 | Primary libraries used are listed below. Their dependencies can be traced from
125 | their respective repo links:
126 |
127 | * [Natural Language Parsing and Feature Generation]
128 | * [Interface for Machine Learning Modeling]
129 | * [Generate, split into folds or train/test and cache a dataset]
130 | * [Natural Language Feature Creation]
131 | * [Word Vector Feature Creation]
132 |
133 |
134 | ### Documentation
135 |
136 | API [documentation](https://plandes.github.io/todo-task/codox/index.html).
137 |
138 |
139 | ## Running the Tests
140 |
141 | This section explains how to run the the model against the corpus to reproduce
142 | the results (*similar*) to the [arXiv paper]. These instructions assume either
143 | a UNIX, Linux, macOS operating system or *maybe* Cygwin under Windows.
144 |
145 | Before proceeding, please install all the all tools given in
146 | the [building](#building) section.
147 |
148 |
149 | ### Parsing
150 |
151 | This section describes how to parse the corpus and load the corpus. Note that
152 | if you just want to run the tests you can **skip**
153 | to [test evaluation](#test-evaluation) section. This means you don't need
154 | [ElasticSearch], which is only necessary for parsing the corpus and creating
155 | file system train/test split. This is already done
156 | and [in the repo](resources/todo-dataset.json) already.
157 |
158 | On the other hand, if you **really** want to manually parse and create the
159 | train/test data sets you must first install [ElasticSearch] or [Docker]. The
160 | easiest way to get this up and working is to use [Docker], which is easy enough
161 | to download, install and get running on a container with:
162 |
163 | ```bash
164 | make startes
165 | ```
166 |
167 | which provides the configuration necessary to download and start an
168 | [ElasticSearch] container ready to store the generated features from the parsed
169 | natural language text.
170 |
171 | Next, populate [ElasticSearch] with parsed featues:
172 |
173 | ```bash
174 | make load
175 | ```
176 |
177 | This parses the corpus and adds a JSON parse representation of each utterance
178 | to the database.
179 |
180 | Next create train and test datasets by randomly shuffling the corpus. After
181 | the train/test assignment for each data point, export the data set to the JSON
182 | file:
183 |
184 | ```bash
185 | make dsprep
186 | ```
187 |
188 |
189 | ### Test Evaluation
190 |
191 | Produce the optimal results for the model by evaluating and
192 | printing the results:
193 |
194 | ```bash
195 | make printbest
196 | ```
197 |
198 | This gives the best (0.76 F1) results.
199 |
200 |
201 | To run all defined feature and classifier combinations run the following:
202 |
203 | ```bash
204 | make print
205 | ```
206 |
207 |
208 | To run all defined feature and classifier combinations and create a spreadsheet
209 | with all performance metrics, features and classifiers used for those metrics run:
210 |
211 | ```bash
212 | make evaluate
213 | ```
214 |
215 | This will create an `evaluation.xls` file. The file this process generates
216 | is [here](results/full-evaluation.xls).
217 |
218 |
219 | ### Predictions
220 |
221 | It is possible to generate a CSV file with predictions complete with the
222 | utterance, the correct label, and the predicted label. In addition, the file
223 | also includes all features used to create the prediction. This proces includes:
224 |
225 | 1. For each feature sets and classifier combination, train the model and test.
226 | 2. The winning combination (by F1) of feature set and classifier is used to
227 | train the model.
228 | 3. Create predictions on the test set.
229 | 4. Generate the spreadhsheet with the results.
230 |
231 | To invoke this functionality, use the following:
232 |
233 | ```bash
234 | make predict
235 | ```
236 |
237 | This will generate a `predictions.csv` file. The file this process generates
238 | is [here](results/predictions.csv).
239 |
240 |
241 | ### Off-line Tests
242 |
243 | If you have a slower computer and the tests take too long, they can run in an
244 | offline mode.
245 |
246 | To long running offline tests in the background, first download and link to the
247 | models (note the space between `ZMODEL=` and `models` is intentional):
248 |
249 | ```bash
250 | make ZMODEL= models
251 | ```
252 |
253 | create the application as a standalone and then
254 | execute in the background:
255 |
256 | ```bash
257 | make ZMODEL=`pwd`/model DIST_PREFIX=./inst disttodo
258 | cd ./inst/todotask
259 | ./run.sh sanity
260 | tail -f log/test-res.log
261 | ```
262 |
263 | Type `CONTROL-C` to break out of `tail` and check open `results/test-res.xlsx`
264 | to confirm the a single line from a simple majority label classifer (it will
265 | have terrible performance).
266 |
267 | If everything works, now run the long running tests:
268 |
269 | ```bash
270 | ./run.sh long
271 | ls results
272 | ```
273 |
274 | The `results` directory will have the results from each test.
275 | Section [results](#results-of-code-base) has a summary of each test.
276 |
277 |
278 | ### Sample Results of Test
279 |
280 | A selection of results using the this code base on [*Corpus B*] are given
281 | below:
282 |
283 | | Classifier | F1 | Precision | Recall | Attributes |
284 | |--------------|----------:|----------:|-------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
285 | | J48 | 0.763 | 7677 | 0.761 | similarity-top-label, pos-last-tag, word-count-contact, word-count-call, word-count-buy, word-count-calendar, word-count-pay-bill-online, pos-first-tag, word-count-plan-meal, word-count-email, word-count-postal, word-count-school-work, word-count-print |
286 | | RandomForest | 0.695 | 7101 | 0.714 | *all word counts*, elected-verb-id, similarity-top-label, similarity-score, pos-tag-ratio-noun |
287 | | RandomTree | 0.656 | 6654 | 0.666 | *all word counts*, elected-verb-id, similarity-top-label, similarity-score, pos-tag-ratio-noun |
288 | | LogitBoost | 0.592 | 6171 | 0.619 | *all word counts*, elected-verb-id, similarity-top-label, similarity-score, pos-tag-ratio-noun |
289 | | NaiveBayes | 0.547 | 5779 | 0.571 | similarity-top-label, pos-last-tag, word-count-contact, word-count-call, word-count-buy, word-count-calendar, word-count-pay-bill-online, pos-first-tag, word-count-plan-meal, word-count-email, word-count-postal, word-count-school-work, word-count-print |
290 | | SVM | 0.273 | 2815 | 0.285 | elected-verb-id, token-average-length, pos-first-tag, pos-last-tag, similarity-top-label, similarity-score, pos-tag-ratio-noun |
291 | | Baseline | 0.091 | 2356 | 0.238 | similarity-top-label, pos-last-tag, word-count-contact, word-count-call, word-count-buy, word-count-calendar, word-count-pay-bill-online, pos-first-tag, word-count-plan-meal, word-count-email, word-count-postal, word-count-school-work, word-count-print |
292 |
293 |
294 | ### Building
295 |
296 | To build from source, do the folling:
297 |
298 | - Install [Leiningen](http://leiningen.org) (this is just a script)
299 | - Install [GNU make](https://www.gnu.org/software/make/)
300 | - Install [Git](https://git-scm.com)
301 | - Download the source: `git clone --recurse-submodules https://github.com/plandes/todo-task && cd todo-task`
302 |
303 |
304 | ### Advanced
305 |
306 | All the capabilities of the [Interface for Machine Learning Modeling] package,
307 | including creating a usable executable model, are possible. The (not unit test
308 | case) [Clojure] [experimental execution file](test/uic/nlp/todo/eval_test.clj)
309 | demonstrates how to do other things with the model. All you need to do is to
310 | start a [REPL](https://clojure.org/guides/repl/introduction) and call the
311 | `main` function.
312 |
313 |
314 | ## Changelog
315 |
316 | An extensive changelog is available [here](CHANGELOG.md).
317 |
318 |
319 | ## Special Thanks
320 |
321 | Thanks to those that volunteered their To-do tasks that, in part, made
322 | up this publicly available corpus.
323 |
324 |
325 | ## License
326 |
327 | This license applies to the code base and the corpus.
328 |
329 | Copyright (c) 2018 Paul Landes
330 |
331 | Permission is hereby granted, free of charge, to any person obtaining a copy of
332 | this software and associated documentation files (the "Software"), to deal in
333 | the Software without restriction, including without limitation the rights to
334 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
335 | of the Software, and to permit persons to whom the Software is furnished to do
336 | so, subject to the following conditions:
337 |
338 | The above copyright notice and this permission notice shall be included in all
339 | copies or substantial portions of the Software.
340 |
341 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
342 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
343 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
344 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
345 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
346 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
347 | SOFTWARE.
348 |
349 |
350 |
351 | [A Supervised Approach To The Interpretation Of Imperative To-Do Lists]: https://arxiv.org/pdf/1806.07999
352 | [arXiv paper]: https://arxiv.org/pdf/1806.07999
353 | [Paper on arXiv]: https://arxiv.org/pdf/1806.07999
354 | [*Corpus B*]: resources/corpus.xlsx
355 | [Corpus]: resources/corpus.xlsx
356 |
357 | [ElasticSearch]: https://www.elastic.co
358 | [Docker]: https://www.docker.com
359 | [Clojure]: https://clojure.org
360 |
361 | [Natural Language Parsing and Feature Generation]: https://github.com/plandes/clj-nlp-parse
362 | [Interface for Machine Learning Modeling]: https://github.com/plandes/clj-ml-model
363 | [Generate, split into folds or train/test and cache a dataset]: https://github.com/plandes/clj-ml-dataset
364 | [Natural Language Feature Creation]: https://github.com/plandes/clj-nlp-feature
365 | [Word Vector Feature Creation]: https://github.com/plandes/clj-nlp-wordvec
366 | [third party libraries]: #third-party-libraries
367 |
--------------------------------------------------------------------------------
/results/agent-data.arff:
--------------------------------------------------------------------------------
1 | @relation agent-classify
2 |
3 | @attribute word-count-calendar numeric
4 | @attribute pos-last-tag {,-RRB-,$,NNS,IN,NN,RBR,'\'\'',#,DT,VBP,JJR,PRP,RP,MD,',',.,RB,VBG,:,FW,TO,CC,-LRB-,VBD,WP,WRB,WDT,VBZ,LS,RBS,NNPS,EX,CD,VB,WP$,NNP,SYM,PRP$,JJS,UH,POS,VBN,JJ,PDT}
5 | @attribute pos-first-tag {,-RRB-,$,NNS,IN,NN,RBR,'\'\'',#,DT,VBP,JJR,PRP,RP,MD,',',.,RB,VBG,:,FW,TO,CC,-LRB-,VBD,WP,WRB,WDT,VBZ,LS,RBS,NNPS,EX,CD,VB,WP$,NNP,SYM,PRP$,JJS,UH,POS,VBN,JJ,PDT}
6 | @attribute word-count-buy numeric
7 | @attribute is-question numeric
8 | @attribute pos-tag-count-adverb numeric
9 | @attribute word-count-find-service numeric
10 | @attribute word-count-email numeric
11 | @attribute similarity-score numeric
12 | @attribute token-count numeric
13 | @attribute pos-tag-count-verb numeric
14 | @attribute pos-tag-count-adjective numeric
15 | @attribute word-count-contact numeric
16 | @attribute pos-tag-ratio-noun numeric
17 | @attribute pos-tag-ratio-adverb numeric
18 | @attribute word-count-self-improve numeric
19 | @attribute word-count-school-work numeric
20 | @attribute word-count-service numeric
21 | @attribute token-average-length numeric
22 | @attribute utterance-length numeric
23 | @attribute pos-tag-count-noun numeric
24 | @attribute word-count-find-travel numeric
25 | @attribute pos-tag-count-wh numeric
26 | @attribute stopword-count numeric
27 | @attribute sent-count numeric
28 | @attribute similarity-top-label {pay-bill-online,calendar,call,find-travel,find-service,email,postal,buy,self-improve,print,service,school-work,contact,plan-meal}
29 | @attribute pos-tag-ratio-verb numeric
30 | @attribute word-count-postal numeric
31 | @attribute word-count-pay-bill-online numeric
32 | @attribute pos-tag-ratio-adjective numeric
33 | @attribute elected-verb-id numeric
34 | @attribute word-count-print numeric
35 | @attribute utterance string
36 | @attribute word-count-call numeric
37 | @attribute mention-count numeric
38 | @attribute pos-tag-ratio-wh numeric
39 | @attribute word-count-plan-meal numeric
40 | @attribute agent {pay-bill-online,calendar,call,find-travel,find-service,email,postal,buy,self-improve,print,service,school-work,contact,plan-meal}
41 |
42 | @data
43 | 0,NNS,NN,0,0,0,0,0,0.72155,2,0,0,0,1,0,0,0,0,4,9,2,0,0,0,1,pay-bill-online,0,0,0.68,0,1822752074,0,'pay bills',0,0,0,0,pay-bill-online
44 | 0,VBD,NN,0,0,0,0,0,0.394499,2,1,0,0,0.5,0,0,0,0,4,9,1,0,0,0,1,pay-bill-online,0.5,0,0.6,0,1822752074,0,'pay comed',0,0,0,0,pay-bill-online
45 | 0,NN,VB,0,0,0,0,0,0.532605,2,1,0,0,0.5,0,0,0,0,6.5,14,1,0,0,0,1,pay-bill-online,0.5,0,0.64,0,110760,0,'Pay Expression',0,0,0,0,pay-bill-online
46 | 0,-LRB-,VB,0,0,0,0,0,0.454836,7,1,0,0,0.428571,0,0,0,0,3,25,3,0,0,0,2,pay-bill-online,0.142857,0,0.68,0,110760,0,'Pay SF Parking Ticket. :(',0,1,0,0,pay-bill-online
47 | 0,NN,NN,0,0,0,0,0,0.561955,4,0,0,0,1,0,0,0,0,3,15,4,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay NY tax bill',0,0,0,0,pay-bill-online
48 | 0,NN,NN,0,0,0,0,0,0.700781,2,0,0,0,1,0,0,0,0,5.5,12,2,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay mortgage',0,0,0,0,pay-bill-online
49 | 0,NN,NN,0,0,0,0,0,0.455481,2,0,0,0,1,0,0,0,0,3,7,2,0,0,0,1,pay-bill-online,0,0,0.6,0,1822752074,0,'pay ATT',0,0,0,0,pay-bill-online
50 | 0,NN,NN,0,0,0,0,0,0.429947,2,0,0,0,1,0,0,0,0,4.5,10,2,0,0,0,1,pay-bill-online,0,0,0.6,0,1822752074,0,'pay roofer',0,0,0,0,pay-bill-online
51 | 0,NNP,VB,0,0,0,0,0,0.392625,2,1,0,0,0.5,0,0,0,0,3.5,8,1,0,0,0,1,pay-bill-online,0.5,0,0.6,0,110760,0,'Pay PG&E',0,0,0,0,pay-bill-online
52 | 0,NNP,VB,0,0,0,0,0,0.383209,2,1,0,0,0.5,0,0,0,0,3,7,1,0,0,0,1,pay-bill-online,0.5,0,0.6,0,110760,0,'Pay BGE',0,0,0,0,pay-bill-online
53 | 0,NN,NN,0,0,0,0,0,0.616068,3,0,0,0,1,0,0,0,0,4.666667,16,3,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay student loan',0,0,0,0,pay-bill-online
54 | 0,NN,NN,0,0,0,0,0,0.479135,2,0,0,0,1,0,0,0,0,5,11,2,0,0,0,1,pay-bill-online,0,0,0.6,0,1822752074,0,'pay plumber',0,0,0,0,pay-bill-online
55 | 0,NNP,VB,0,0,0,0,0,0.440182,6,1,0,0,0.5,0,0,0,0,4,29,3,0,0,1,1,pay-bill-online,0.166667,0,0.6,0,110760,0,'Pay DMV for Harley before May',0,3,0,0,pay-bill-online
56 | 0,NN,NN,0,0,0,0,0,0.748226,2,0,0,0,1,0,0,0,0,3.5,8,2,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay rent',0,0,0,0,pay-bill-online
57 | 0,NN,NN,0,0,0,0,0,0.281828,3,0,0,0,1,0,0,0,0,5.666667,19,3,0,0,0,1,pay-bill-online,0,0,0.64,0,1822752074,0,'pay genki violation',0,1,0,0,pay-bill-online
58 | 0,NN,VB,0.133333,0,0,0,0.095238,0.448574,4,1,0,0,0.5,0,0,0,0,4.25,20,2,0,0,1,1,call,0.25,0,0.08,0,102230,0,'get bills on autopay',0,0,0,0,pay-bill-online
59 | 0.147059,NN,NN,0,0,0,0,0,0.481327,6,0,0,0.069767,0.666667,0,0,0,0,5.5,37,4,0,0,1,1,call,0,0,0,0,1822752074,0,'Appointment with capital women\'s care',0,0,0,0,calendar
60 | 0.441176,JJ,VB,0,0,0,0,0,0.45206,7,1,1,0,0.285714,0,0,0,0,6.285714,49,2,0,0,2,1,call,0.142857,0,0,0.142857,-697920873,0,'Schedule appointments with caterers, if necessary',0,0,0,0,calendar
61 | 0.352941,.,VB,0,0,0,0,0,0.433096,24,1,3,0,0.375,0,0,0,0,4.875,133,9,0,0,3,1,call,0.041667,0,0,0.125,-697920873,0,'Schedule rehearsal with officiant, all family members, bridesmaids, best men, wedding planner (if applicable) and other participants.',0,0,0,0,calendar
62 | 0.294118,-RRB-,VB,0,0,0,0,0,0.366983,8,1,0,0,0.375,0,0,0,0,6.625,58,3,0,0,2,1,call,0.125,0,0,0,-697920873,0,'Schedule interviews with photographers (and videographers)',0,0,0,0,calendar
63 | 0.441176,NN,VB,0,0,0,0,0,0.445552,3,1,0,0,0.666667,0,0,0,0,6.333333,21,2,0,0,0,1,plan-meal,0.333333,0,0,0,-697920873,0,'Schedule cake tasting',0,0,0,0,calendar
64 | 0.088235,NN,NNP,0,0,0,0,0,0.412771,2,0,0,0,1,0,0,0,0,8,17,2,0,0,0,1,calendar,0,0,0,0,1822752074,0,'Apple appointment',0,1,0,0,calendar
65 | 0.058824,NNS,VB,0,0,0,0,0,0.516648,12,3,0,0,0.25,0,0,0,0,4.583333,66,3,0,0,5,1,call,0.25,0,0,0,113762,0,'Set a meeting with officiant to go through and confirm the details',0,0,0,0,calendar
66 | 0.529412,NNS,VB,0,0,0,0,0,0.421055,7,1,0,0,0.571429,0,0,0,0,6.571429,52,4,0,0,2,1,pay-bill-online,0.142857,0,0,0,-697920873,0,'Schedule appointments with site managers or caterers',0,0,0,0,calendar
67 | 0.294118,NN,VB,0,0,0,0,0,0.431443,4,2,0,0,0.25,0,0,0,0,5.75,26,1,0,0,0,1,calendar,0.5,0,0,0,-697920873,0,'Schedule one tabling event',0,1,0,0,calendar
68 | 0,NN,VB,0,0,0,0,0,0.373705,14,2,0,0,0.428571,0,0,0,0,4.357143,74,6,0,0,3,1,call,0.142857,0,0,0,-1408204561,0,'Assign maid of honor / one attendant to arrange train and/or veil at altar',0,1,0,0,calendar
69 | 0.058824,NNP,VB,0,0,0,0,0,0.531802,5,1,0,0,0.4,0,0,0,0,4,24,2,0,0,1,1,call,0.2,0,0,0,3304,0,'go over budget with Matt',0,1,0,0,calendar
70 | 0.294118,NNS,VB,0.066667,0,0,0,0,0.449412,7,1,1,0,0.428571,0,0,0,0,5.428571,44,3,0,0,2,1,buy,0.142857,0,0,0.142857,-697920873,0,'Schedule delivery and pickup of rental items',0,0,0,0,calendar
71 | 0.294118,-RRB-,NN,0.111111,0,0,0,0,0.450066,14,1,0,0.069767,0.428571,0,0,0,0,3.571429,59,6,0,0,3,1,call,0.071429,0,0,0,1822752074,0,'Place order for wedding cake (and groom\'s cake, if desired)',0,0,0,0,calendar
72 | 0.529412,NNS,VB,0,0,0,0,0,0.483353,5,1,0,0,0.6,0,0,0,0,7.2,40,3,0,0,1,1,calendar,0.2,0,0,0,-697920873,0,'Schedule appointments with site managers',0,0,0,0,calendar
73 | 0.205882,NN,VB,0,0,0,0,0,0.485166,5,1,0,0,0.6,0,0,0,0,5.8,33,3,0.142857,0,1,1,calendar,0.2,0,0,0,3343854,0,'Make appointment for menu tasting',0,0,0,0.083333,calendar
74 | 0.529412,NNS,VB,0,0,0,0,0,0.405918,5,1,0,0,0.6,0,0,0,0,10.2,55,3,0,0,1,1,calendar,0.2,0,0,0,-697920873,0,'Schedule appointments with site coordinators/officiants',0,0,0,0,calendar
75 | 0,NNP,VB,0.044444,0,0,0,0,0.496192,8,2,0,0.069767,0.375,0,0.090909,0,0.115385,3.875,37,3,0,0,1,1,call,0.25,0.105263,0,0,3440673,0,'Pick up moving truck at 5:30pm Friday',0,1,0,0,calendar
76 | 0.294118,NN,NN,0,0,0,0,0,0.29349,3,0,0,0,1,0,0,0,0,5.333333,18,3,0,0,0,1,call,0,0,0,0,1822752074,0,'schedule DHPP shot',0,1,0,0,calendar
77 | 0,NNP,NNP,0,0,0,0,0,0.300045,5,0,0,0,0.8,0,0,0,0,7.2,40,4,0,0,1,1,service,0,0,0,0,1822752074,0,'Meet with Electric Cheetah/Uncle Cheetah',0,2,0,0,calendar
78 | 0.205882,NNS,VB,0,0,0,0.08,0,0.401357,5,1,0,0,0.6,0,0,0,0,6.2,35,3,0.142857,0,1,1,calendar,0.2,0,0,0,3343854,0,'Make appointments for gown fittings',0,0,0,0,calendar
79 | 0,VBD,NNP,0,0,2,0,0,0.337473,12,3,0,0,0.416667,0.166667,0,0,0,4.75,67,5,0,0,1,1,call,0.25,0,0,0,1822752074,0,'Debrief Constantly \\u2013 Call Brooke and discuss how HHCT III went',0.678571,3,0,0,call
80 | 0,NNP,VB,0,0,0,0,0,0.454765,6,1,0,0,0.666667,0,0,0,0,4,28,4,0,0,0,1,call,0.166667,0,0,0,3045982,0,'Call Phil Smith, Mark Fukuda',0.642857,2,0,0,call
81 | 0,NNP,VB,0,0,0,0,0,0.236103,2,1,0,0,0.5,0,0,0,0,8.5,18,1,0,0,0,1,postal,0.5,0,0,0,-838846263,0,'update address-ATT',0,0,0,0,call
82 | 0,NN,VB,0,0,0,0,0,0.591377,5,2,0,0,0.2,0,0,0,0,3.2,20,1,0,0,1,1,call,0.4,0,0,0,3045982,0,'Call to shut off gas',0.75,0,0,0,call
83 | 0,NN,VB,0,0,0,0,0,0.59916,5,2,0,0,0.2,0,0,0,0,3.6,22,1,0,0,1,1,call,0.4,0,0,0,3045982,0,'Call to shut off water',0.75,0,0,0,call
84 | 0,NN,NN,0,0,0,0,0,0.36377,3,0,0,0,0.666667,0,0,0,0,3.333333,12,2,0,0,1,1,call,0,0,0,0,1822752074,0,'call with tj',0.607143,0,0,0,call
85 | 0,NNS,JJ,0,0,0,0,0,0.329707,7,0,1,0,0.571429,0,0,0,0,5,41,4,0.071429,0,2,1,call,0,0,0,0.142857,1822752074,0,'call city of somerville and genki tenants',0.607143,1,0,0,call
86 | 0,NN,NN,0,0,0,0,0,0.112406,5,0,0,0,1,0,0,0,0,3.8,23,5,0,0,0,1,pay-bill-online,0,0,0.04,0,1822752074,0,'call zacky abt CFB rent',0.607143,1,0,0,call
87 | 0.058824,NN,VB,0,0,0,0,0,0.532414,5,1,0,0.069767,0.6,0,0,0,0,4,23,3,0,0,0,1,call,0.2,0,0,0,3045982,0,'Call John McCain\'s team',0.642857,1,0,0,call
88 | 0,NN,JJ,0,0,0,0,0,0.586275,4,0,1,0,0.75,0,0,0,0,4.75,22,3,0,0,0,1,call,0,0,0,0.25,1822752074,0,'call health care thing',0.642857,0,0,0,call
89 | 0,RB,VB,0,0,1,0,0.095238,0.46121,4,1,0,0,0.5,0.25,0,0,0,4,19,2,0,0,0,1,call,0.25,0,0,0,3045982,0,'Call Greg Sims back',0.642857,1,0,0,call
90 | 0,NNS,NN,0,0,0,0,0,0.558664,5,0,0,0,0.6,0,0,0,0,4.8,28,3,0,0,1,1,call,0,0,0,0,1822752074,0,'call with matt from citizens',0.607143,0,0,0,call
91 | 0,NN,NN,0,0,0,0,0,0.444191,4,0,0,0,0.75,0,0,0,0,5.25,24,3,0,0,0,1,call,0,0,0,0,1822752074,0,'call amazon about kindle',0.607143,1,0,0,call
92 | 0,NN,VB,0,0,0,0.12,0,0.619827,7,3,0,0,0.142857,0,0,0,0.076923,4.142857,35,1,0,0,1,1,call,0.428571,0,0,0,3045982,0,'Call to find out about moving truck',0.607143,0,0,0.083333,call
93 | 0,NNS,NN,0,0,0,0,0,0.302541,2,0,0,0,1,0,0,0,0,8.5,18,2,0,0,0,1,call,0,0,0,0,1822752074,0,'call exterminators',0.607143,0,0,0,call
94 | 0,.,VB,0,0,1,0,0,0.470281,18,3,1,0,0.277778,0.055556,0,0,0,4.444444,94,5,0,0,4,1,call,0.166667,0,0,0.055556,3045982,0,'Call a \"townhall meeting\" at GRBC and personally invite all past/current owners to give input.',0.607143,1,0,0,call
95 | 0,NN,NN,0,0,0,0,0,0.304961,2,0,0,0,1,0,0,0,0,6,13,2,0,0,0,1,call,0,0,0,0,1822752074,0,'call grandmom',0.607143,0,0,0,call
96 | 0,NNS,NN,0,0,0,0,0,0.363489,3,0,0,0,1,0,0,0,0,5.333333,18,3,0,0,0,1,call,0,0,0,0,1822752074,0,'call lesko clients',0.642857,0,0,0,call
97 | 0,CD,VB,0,0,0,0,0,0.336154,6,1,0,0,0.333333,0,0,0,0,2.833333,22,2,0.142857,0,1,1,call,0.166667,0,0,0,3029737,0,'Book a car - dec 13-17',0,2,0,0,find-travel
98 | 0,NNP,VB,0,0,0,0,0,0.337093,5,1,0,0,0.4,0,0,0,0,5,29,2,0.142857,0,2,1,call,0.2,0,0,0,3327647,0,'look at airbnbs in binghamton',0,0,0,0,find-travel
99 | 0.058824,NNP,VB,0,0,0,0,0,0.450885,5,1,0,0,0.6,0,0,0,0,7.4,41,3,0.142857,0,0,1,call,0.2,0,0,0,3343854,0,'make travel arrangements from Albuquerque',0,2,0,0,find-travel
100 | 0,NNS,VB,0,0,0,0,0,0.414428,8,1,1,0,0.5,0,0,0,0,5.125,48,4,0.142857,0,2,1,pay-bill-online,0.125,0,0,0.125,3208383,0,'Hold block of hotel rooms for out-of-town guests',0,0,0,0,find-travel
101 | 0.117647,NN,VB,0,0,0,0,0,0.509584,7,1,0,0,0.571429,0,0,0,0,5.571429,45,4,0.357143,0,1,1,call,0.142857,0,0,0,3343854,0,'Make your hotel reservation for wedding night',0,1,0,0,find-travel
102 | 0,NN,NN,0,0,0,0,0,0.361882,2,0,0,0,1,0,0,0,0,6.5,14,2,0.214286,0,0,1,calendar,0,0,0,0,1822752074,0,'book honeymoon',0,0,0,0,find-travel
103 | 0,NNS,NNP,0,0,0,0,0,0.43187,3,0,0,0,1,0,0,0,0,5.666667,19,3,0.214286,0,0,1,pay-bill-online,0,0,0,0,1822752074,0,'Kansas City tickets',0,1,0,0,find-travel
104 | 0,NN,VB,0.133333,0,0,0,0.095238,0.57803,3,2,0,0,0.333333,0,0,0,0,5,17,1,0,0,0,1,call,0.666667,0,0,0,102230,0,'get return flight',0,0,0,0,find-travel
105 | 0,NNS,VB,0.4,0,0,0,0,0.460225,3,1,0,0,0.666667,0,0,0,0,6.333333,21,2,0.214286,0,0,1,pay-bill-online,0.333333,0,0,0,97926,0,'Buy Nicaragua tickets',0,1,0,0.083333,find-travel
106 | 0.058824,NN,VB,0.044444,0,0,0.2,0,0.4248,19,3,4,0.069767,0.315789,0,0.090909,0,0.461538,5.421053,121,6,0,1,3,1,service,0.157895,0.052632,0,0.210526,3143097,0,'Find professional dry cleaner who specialized in bridal gown care to clean and pack up dress andaccessories after wedding',0,0,0.052632,0.083333,find-service
107 | 0,NN,VB,0,0,0,0.48,0,0.461306,5,1,0,0,0.6,0,0,0,0,5.2,30,3,0,0,1,1,pay-bill-online,0.2,0,0,0,3202804,0,'Hire caterer and sign contract',0,0,0,0,find-service
108 | 0,NNS,NN,0,0,0,0,0,0.334378,3,0,1,0,0.666667,0,0,0,0,8.666667,28,2,0,0,0,1,call,0,0,0,0.333333,1822752074,0,'Interview potential caterers',0,0,0,0,find-service
109 | 0,NN,VB,0,0,0,0.08,0,0.463022,8,1,0,0,0.5,0,0,0,0,3.5,35,4,0,0,2,1,call,0.125,0,0,0,101397,0,'Fix the CD ROM drive on my computer',0,1,0,0,find-service
110 | 0,NN,NN,0,0,0,0.08,0,0.449836,2,0,0,0,1,0,0,0,0,5,11,2,0,0,0,1,service,0,0,0,0,1822752074,0,'repair roof',0,0,0,0,find-service
111 | 0,NN,VB,0,0,0,0.4,0,0.388847,9,1,0,0,0.444444,0,0,0,0,5.555556,56,4,0,0,2,1,call,0.111111,0,0,0,3202804,0,'Hire photographers (and videographers) and sign contract',0,0,0,0,find-service
112 | 0,NN,NN,0,0,0,0,0,0.508423,2,0,0,0,1,0,0,0,0,8,17,2,0,0,0,1,pay-bill-online,0,0,0,0,1822752074,0,'transfer internet',0,0,0,0,find-service
113 | 0,NN,NN,0,0,0,0.08,0,0.372092,4,0,0,0,1,0,0,0,0,5.25,24,4,0,0,0,1,service,0,0,0,0,1822752074,0,'fix coffee pot dispenser',0,0,0,0,find-service
114 | 0,JJ,JJ,0,0,0,0,0,0.417859,2,0,2,0,0,0,0,0,0,7,15,0,0,0,0,1,pay-bill-online,0,0,0,1,1822752074,0,'annual physical',0,1,0,0,find-service
115 | 0,NNS,VB,0,0,0,0,0,0.23797,2,1,0,0,0.5,0,0,0,0,8.5,18,1,0,0,0,1,postal,0.5,0,0,0,1578333328,0,'Notarize documents',0,0,0,0,find-service
116 | 0,NN,VB,0,0,0,0.24,0,0.529937,4,1,1,0,0.5,0,0,0,0,5.25,24,2,0,0,0,1,pay-bill-online,0.25,0,0,0.25,3143097,0,'find new insurance agent',0,1,0,0.083333,find-service
117 | 0,NN,NN,0,0,0,0.08,0,0.515373,4,0,0,0,1,0,0,0,0,5.5,25,4,0,0,0,1,service,0,0,0,0,1822752074,0,'repair laundry room floor',0,0,0,0,find-service
118 | 0,NN,JJ,0,0,0,0.04,0,0.40374,4,0,1,0,0.5,0,0,0,0,5.5,25,2,0,0,1,1,call,0,0,0,0.25,1822752074,0,'sound proofing for office',0,0,0,0,find-service
119 | 0,NNS,VB,0,0,0,0.12,0,0.505132,4,1,1,0,0.5,0,0,0,0,5.5,25,2,0,0,0,1,pay-bill-online,0.25,0,0,0.25,3526264,0,'seek new investment ideas',0,0,0,0,find-service
120 | 0,NNS,VB,0,0,0,0,0,0.50743,4,1,1,0,0.5,0,0,0,0,6.75,30,2,0,0,0,1,pay-bill-online,0.25,0,0.04,0.25,950484197,0,'compare different loan options',0,0,0,0,find-service
121 | 0,NN,VB,0,0,0,0.12,0,0.488852,4,1,1,0,0.25,0,0,0,0,4,19,1,0,0,0,1,call,0.25,0,0,0.25,1957569947,0,'Install my new sink',0,0,0,0,find-service
122 | 0,.,VB,0.066667,0,0,0,0,0.496058,26,5,2,0.069767,0.230769,0,0,0,0,4.192308,134,6,0,0,8,2,call,0.192308,0,0,0.076923,-985656342,0,'Please work on an e-visa with the Cambodian embassy in Vietnam. I want to consider making you the admin person in our NGO paperwork.',0,3,0,0,find-service
123 | 0,NN,VBP,0,0,0,0.08,0,0.403974,6,3,0,0.069767,0.333333,0,0,0,0,4,29,2,0,0,1,1,service,0.5,0,0,0,1822752074,0,'Have shoes dyed to match gown',0,0,0,0,find-service
124 | 0,NNP,RB,0,0,1,0,0,0.400803,3,0,0,0,0.333333,0.333333,0,0,0,6.333333,21,1,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Apply to Southxchange',0,1,0,0,find-service
125 | 0,NN,VB,0,0,0,0.12,0,0.380457,3,1,0,0,0.666667,0,0,0,0,5.666667,19,2,0,0,0,1,call,0.333333,0,0,0,3202804,0,'Hire content writer',0,1,0,0,find-service
126 | 0,NN,NNP,0,0,0,0,0,0.479847,3,0,0,0,1,0,0,0,0,6.666667,22,3,0,0,0,1,call,0,0,0,0,1822752074,0,'Home security research',0,0,0,0,find-service
127 | 0,VB,VB,0,0,0,0,0,0.491032,6,2,0,0,0.166667,0,0,0,0,5.166667,36,1,0,0,2,1,call,0.333333,0,0,0,-135762164,0,'Identify office/place for me to live',0,0,0,0.083333,find-service
128 | 0,NNP,VB,0,0,0,0.12,0,0.383697,6,1,0,0,0.5,0,0,0,0,7.166667,48,3,0,0,2,1,call,0.166667,0,0,0,3143097,0,'Find a photographer/marriage counselor in Hawaii',0,1,0,0.083333,find-service
129 | 0.088235,NN,VB,0,0,0,0.52,0,0.459764,10,1,0,0,0.6,0,0,0,0,4.3,50,6,0.142857,0,1,1,call,0.1,0,0,0,3029737,0,'Book site, caterer, sign contract and sign deposit',0,0,0,0,find-service
130 | 0,NN,VB,0.133333,0,0,0,0.095238,0.557692,4,1,0,0,0.5,0,0,0,0,3.75,18,2,0,0,1,1,call,0.25,0,0,0,102230,0,'Get the oil change',0,0,0,0,find-service
131 | 0,NNS,VB,0.133333,0,1,0,0.333333,0.556835,4,1,0,0,0.25,0.25,0,0,0,3.75,18,1,0,0,1,1,call,0.25,0,0,0,102230,0,'get back to emails',0.035714,0,0,0,email
132 | 0,VB,VB,0.133333,0,1,0,0.52381,0.589134,4,2,0,0,0,0.25,0,0,0,3.5,17,0,0,0,1,1,call,0.5,0,0,0,102230,0,'Get back to email',0.035714,0,0,0,email
133 | 0,NN,VB,0,0,0,0,0.52381,0.341097,7,1,0,0,0.714286,0,0,0,0,4.142857,35,5,0,0,1,1,call,0.142857,0.368421,0,0,3526536,0,'send email re pavan and bucket list',0,1,0,0,email
134 | 0,NN,NNP,0,0,0,0,0.333333,0.21851,3,0,0,0,0.666667,0,0,0,0,6.666667,22,2,0,0,0,1,email,0,0,0,0,1822752074,0,'Donate/send 5000 email',0,1,0,0,email
135 | 0,NNS,NN,0,0,0,0,0.333333,0.32776,4,0,0,0,0.75,0,0,0,0,7,31,3,0,0,0,1,call,0,0,0,0,1822752074,0,'email daniel about strawberries',0,1,0,0,email
136 | 0,NNS,NN,0,0,0,0,0.142857,0.16568,2,0,0,0,1,0,0,0,0,8,17,2,0,0,0,1,email,0,0,0,0,1822752074,0,'coffeeshop emails',0,0,0,0,email
137 | 0,NN,VB,0,0,0,0,0.428571,0.40711,5,2,0,0,0.6,0,0,0,0,6.2,35,3,0,0,0,1,call,0.4,0.368421,0,0,3526536,0,'send dana email regarding insurance',0,0,0,0,email
138 | 0,NNS,NN,0,0,0,0,0.333333,0.304026,5,0,0,0,0.8,0,0,0,0,4.4,26,4,0,0,0,1,pay-bill-online,0,0,0,0,1822752074,0,'email myer about fdo taxes',0,0,0,0,email
139 | 0,-RRB-,VB,0,0,0,0,0.285714,0.203246,13,1,1,0,0.461538,0,0,0,0,2.615385,41,6,0,0,0,1,call,0.076923,0,0,0.076923,113399775,0,'write appt emails (BV, UIX, AD mtg, GVSU)',0,3,0,0,email
140 | 0,NNP,NNP,0,0,0,0,0,0.129684,3,0,0,0,1,0,0,0,0,7.333333,24,3,0,0,0,1,call,0,0,0,0,1822752074,0,'Email/Call Dan Strickman',0,1,0,0,email
141 | 0,NN,VB,0,0,0,0,0.333333,0.331475,6,2,0,0,0.5,0,0,0,0,5.333333,37,3,0,0,1,1,call,0.333333,0,0,0,96619420,0,'Email people affected by timezone bug',0,0,0,0,email
142 | 0,NNS,VB,0,0,0,0,0.095238,0.52985,2,1,0,0,0.5,0,0,0,0,7.5,16,1,0,0,0,1,postal,0.5,0.368421,0,0,3526536,0,'Send invitations',0,0,0,0,postal
143 | 0,VBP,VB,0,0,0,0,0.095238,0.501019,3,2,0,0,0.333333,0,0,0,0,5.333333,18,1,0,0,0,1,call,0.666667,0.368421,0,0,3526536,0,'Send prayer update',0,1,0,0,postal
144 | 0,NN,VB,0,0,0,0,0.095238,0.521713,5,1,0,0,0.6,0,0,0,0,4.4,26,3,0,0,1,1,postal,0.2,0.578947,0,0,3526536,0,'send info to credit bureau',0,0,0,0,postal
145 | 0,CD,VB,0,0,0,0,0.095238,0.288142,13,1,0,0,0.461538,0,0,0,0,3.384615,70,6,0,0,1,1,call,0.076923,0.421053,0,0,3526536,0,'Send Abby her shirt at 247 W 15th St. Holland, MI 49423',0,6,0,0,postal
146 | 0,NN,VB,0,0,0,0,0.095238,0.434096,4,1,0,0,0.5,0,0,0,0,4.75,22,2,0,0,1,1,postal,0.25,0.473684,0,0,3526536,0,'send info to appraiser',0,0,0,0,postal
147 | 0,NNP,VB,0,0,0,0,0.095238,0.42222,6,1,0,0,0.5,0,0,0,0.076923,4.666667,33,3,0,0,1,1,call,0.166667,0.421053,0,0,3526536,0,'Send shirts to our Inner Circlers',0,1,0,0,postal
148 | 0,NN,VB,0.044444,0,0,0,0,0.5651,6,1,0,0.069767,0.5,0,0.090909,0,0.115385,3.666667,27,3,0,0,1,1,call,0.166667,0.210526,0,0,3440673,0,'pick up mail at post office',0,0,0,0,postal
149 | 0,NNP,NN,0,0,0,0,0,0.466686,4,0,0,0,0.75,0,0,0,0,5,23,3,0,0,1,1,call,0,0,0,0,1822752074,0,'resend check to Richard',0,1,0,0,postal
150 | 0,NNP,VB,0,0,0,0,0.095238,0.477605,4,1,0,0,0.5,0,0,0,0,4,19,2,0,0,1,1,call,0.25,0.473684,0,0,3526536,0,'Send checks to Joey',0,1,0,0,postal
151 | 0,NNP,NNP,0,0,0,0,0,0.326321,6,0,1,0,0.666667,0,0.090909,0,0,5,34,4,0,0,1,1,pay-bill-online,0,0.210526,0,0.166667,1822752074,0,'Mail nonprofit checks to Kyle!!!!!',0,1,0,0,postal
152 | 0,NNS,NN,0,0,0,0,0,0.559574,2,0,0,0,1,0,0,0,0,6.5,14,2,0,0,0,1,buy,0,0,0,0,1822752074,0,'Business cards',0,0,0,0,buy
153 | 0,NNS,NNP,0.444444,0,0,0,0,0.21031,7,1,0,0,0.571429,0,0,0,0,4.571429,37,4,0,0,0,1,buy,0.142857,0,0,0,1822752074,0,'Tuscon: buy two 1L smartwater bottles',0,2,0,0.083333,buy
154 | 0,NNP,VB,0.4,0,0,0,0,0.522589,2,1,0,0,0.5,0,0,0,0,4,9,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'Buy Scale',0,0,0,0.083333,buy
155 | 0,NNS,NN,0.111111,0,0,0.12,0,0.568073,3,0,1,0,0.666667,0,0,0,0,4.666667,16,2,0,0,0,1,pay-bill-online,0,0.105263,0,0.333333,1822752074,0,'order new checks',0,0,0,0,buy
156 | 0,NNS,VB,0.4,0,0,0,0,0.501987,2,1,0,0,0.5,0,0,0,0,4.5,10,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'Buy gloves',0,0,0,0.083333,buy
157 | 0,VBP,NN,0.111111,0,0,0,0,0.309961,3,1,0,0,0.666667,0,0,0,0,4.333333,15,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,1822752074,0,'order gpa mount',0,0,0,0,buy
158 | 0,NN,VB,0,0,0,0,0,0.455352,4,1,1,0,0.5,0,0,0,0,5.5,25,2,0,0,0,1,call,0.25,0,0,0.25,-1361218025,0,'choose front door fixture',0,0,0,0,buy
159 | 0.058824,NNS,VB,0.133333,0,0,0,0.095238,0.389976,5,2,0,0,0.4,0,0,0,0,3.6,22,2,0,0,0,1,buy,0.4,0,0,0,3304,0,'Go get dirt from lowes',0,0,0,0,buy
160 | 0,NN,NNP,0,0,0,0,0,0.48608,5,0,0,0,0.8,0,0,0,0,5.2,30,4,0,0,1,1,service,0,0,0.04,0,1822752074,0,'Rent a carpet cleaning machine',0,0,0,0,buy
161 | 0,NN,NN,0,0,0,0,0,0.265483,5,0,0,0,0.8,0,0,0,0,6.4,36,4,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Purchase/make pillow for ring bearer',0,0,0,0,buy
162 | 0.058824,NNP,VB,0.133333,0,0,0,0.095238,0.433438,6,2,0,0,0.5,0,0,0,0.076923,3.666667,27,3,0,0,1,1,buy,0.333333,0,0,0,3304,0,'Go get plants at Tilth Sale',0,1,0,0,buy
163 | 0,NN,VB,0.133333,0,0,0,0.095238,0.524743,3,1,0,0,0.666667,0,0,0,0,6,20,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,102230,0,'Get marriage license',0,0,0,0,buy
164 | 0,NN,JJ,0.444444,0,0,0,0,0.449681,5,0,1,0,0.6,0,0,0,0.076923,5.6,32,3,0,0,1,1,service,0,0,0,0.2,1822752074,0,'Buy mirror for upstairs bathroom',0,0,0,0.083333,buy
165 | 0,NNS,NNP,0.111111,0,0,0,0,0.479031,3,0,1,0,0.666667,0,0,0,0,6,20,2,0,0,0,1,pay-bill-online,0,0,0,0.333333,1822752074,0,'Reserve rental items',0,0,0,0,buy
166 | 0,NNP,VB,0.133333,0,0,0,0.095238,0.55012,4,1,0,0,0.5,0,0,0,0,4,19,2,0,0,1,1,call,0.25,0,0,0,102230,0,'get suit for Boston',0,1,0,0,buy
167 | 0,NN,VB,0,0,0,0,0,0.469299,3,1,0,0,0.333333,0,0,0,0,5,17,1,0,0,1,1,call,0.333333,0,0,0,-522328435,0,'Remember the Milk',0,1,0,0,buy
168 | 0,NN,NN,0.111111,0,0,0,0,0.37984,4,0,0,0,0.75,0,0,0,0,5.75,26,3,0,0,1,1,service,0,0,0,0,1822752074,0,'order a meditation cushion',0,0,0,0,buy
169 | 0,NN,JJ,0.4,0,0,0,0,0.55168,2,0,1,0,0.5,0,0,0,0,3.5,8,1,0,0,0,1,buy,0,0,0,0.5,1822752074,0,'Buy wrap',0,0,0,0.083333,buy
170 | 0,NNS,NN,0,0,0,0,0.333333,0.370768,10,2,0,0,0.5,0,0,0,0,6.2,69,5,0,0,1,1,call,0.2,0,0,0,1822752074,0,'Order/create invitations, call invitees, or compose email invitations',0.607143,0,0,0,buy
171 | 0,NN,NN,0,0,0,0,0,0.459162,5,0,0,0,0.6,0,0,0.090909,0,4.6,27,3,0,0,2,1,call,0,0,0,0,1822752074,0,'paper for the upstairs desk',0,0,0,0,buy
172 | 0,NN,VB,0.044444,0,0,0.16,0,0.476711,4,1,0,0.069767,0.25,0,0.090909,0,0.115385,4,19,1,0,0,1,1,buy,0.25,0.052632,0,0,3530173,0,'Sign up for pottery',0,0,0,0,buy
173 | 0,NN,VB,0.4,0,0,0,0,0.499446,3,1,0,0,0.666667,0,0,0,0,5,17,2,0,0,0,1,buy,0.333333,0,0,0,97926,0,'Buy container mix',0,0,0,0.083333,buy
174 | 0,NN,VB,0.4,0,0,0,0,0.554054,3,1,1,0,0.333333,0,0,0,0,5,17,1,0,0,0,1,buy,0.333333,0,0,0.333333,97926,0,'Buy other jewelry',0,0,0,0.083333,buy
175 | 0,NNS,VB,0.4,0,0,0,0,0.439946,2,1,0,0,0.5,0,0,0,0,5.5,12,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'Buy earrings',0,0,0,0.083333,buy
176 | 0.088235,NN,NN,0.111111,0,0,0,0,0.463207,3,0,0,0,0.666667,0,0,0,0,3.333333,12,2,0,0,1,1,service,0,0,0,0,1822752074,0,'order a cake',0,0,0,0,buy
177 | 0,NN,JJ,0.4,0,0,0,0,0.552242,2,0,1,0,0.5,0,0,0,0,4,9,1,0,0,0,1,pay-bill-online,0,0,0,0.5,1822752074,0,'Buy purse',0,0,0,0.083333,buy
178 | 0,NN,VB,0,0,0,0.28,0,0.47319,6,1,0,0,0.666667,0,0,0,0,5.5,38,4,0.142857,0,1,1,call,0.166667,0,0,0,3029737,0,'Book reception venue and sign contract',0,0,0,0,buy
179 | 0,NN,NNP,0.088889,0,0,0,0,0.355624,4,1,0,0,0.75,0,0,0,0.076923,7.25,32,3,0,0,0,1,service,0.25,0,0,0,1822752074,0,'Research bathroom mirrors online',0,0,0,0,buy
180 | 0,NN,VB,0.4,0,0,0,0,0.438651,2,1,0,0,0.5,0,0,0,0,5.5,12,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'buy necklace',0,0,0,0.083333,buy
181 | 0,NN,NN,0.044444,0,0,0,0,0.388266,5,0,1,0,0.6,0,0,0,0,6.6,37,3,0,0,1,1,buy,0,0,0,0.2,1822752074,0,'subscription to safari library online',0,0,0,0,buy
182 | 0,NNS,VB,0.4,0,0,0,0,0.433738,10,1,1,0,0.6,0,0,0,0,6.5,74,6,0,0,2,1,postal,0.1,0,0,0.1,97926,0,'Buy special postage stamps for invitation envelopes and response envelopes',0,0,0,0.083333,buy
183 | 0,NNS,VB,0,0,0,0,0,0.475226,2,1,0,0,0.5,0,0,0,0,6,13,1,0,0,0,1,service,0.5,0,0,0,1094496948,0,'replace shoes',0,0,0,0,buy
184 | 0,NNS,NN,0,0,0,0,0,0.45605,4,0,0,0,1,0,0,0,0,5.5,25,4,0,0,0,1,call,0,0,0,0,1822752074,0,'research cell phone plans',0,0,0,0,buy
185 | 0,NNS,VB,0.044444,0,0,0,0,0.549679,4,1,1,0.069767,0.25,0,0.090909,0,0.115385,3.75,18,1,0,0,0,1,buy,0.25,0.105263,0,0.25,3440673,0,'Pick up more boxes',0,0,0,0,buy
186 | 0,NN,VB,0.4,0,0,0,0,0.364698,4,1,0,0,0.75,0,0,0,0,6.5,29,3,0,0,0,1,buy,0.25,0,0,0,97926,0,'buy stuff sacks/pack backpack',0,0,0,0.083333,buy
187 | 0,NNS,VB,0.4,0,0,0,0,0.338777,4,1,0,0,0.75,0,0,0,0,5.5,25,3,0,0,0,1,service,0.25,0,0,0,97926,0,'buy flower girl headbands',0,1,0,0.083333,buy
188 | 0,VBN,VB,0.133333,0,0,0,0.095238,0.552758,3,2,0,0,0.333333,0,0,0,0,3.666667,13,1,0,0,0,1,call,0.666667,0,0,0,102230,0,'get keys made',0,0,0,0,buy
189 | 0,NNS,VB,0.4,0,0,0,0,0.472995,3,1,0,0,0.666667,0,0,0,0,6,20,2,0,0,0,1,buy,0.333333,0,0,0,97926,0,'Buy hair accessories',0,0,0,0.083333,buy
190 | 0,NN,JJ,0.4,0,0,0,0,0.386432,4,0,1,0,0.75,0,0,0,0,6,27,3,0,0,0,1,buy,0,0,0,0.25,1822752074,0,'Buy rehearsal dinner outfit',0,0,0,0.083333,buy
191 | 0,NNS,NNP,0.177778,0,0,0,0,0.453556,3,0,0,0,1,0,0,0,0,6.666667,22,3,0,0,0,1,buy,0,0,0,0,1822752074,0,'Order stationery items',0,0,0,0,buy
192 | 0,NNS,NNP,0.111111,0,0,0,0,0.521677,5,0,2,0,0.4,0,0,0,0,6,34,2,0,0,0,1,pay-bill-online,0,0,0,0.4,1822752074,0,'Reserve any necessary rental items',0,0,0,0,buy
193 | 0,NN,VB,0.4,0,0,0,0,0.251512,3,1,1,0,0.333333,0,0,0,0,6.333333,21,1,0,0,0,1,buy,0.333333,0,0,0.333333,97926,0,'Buy going-away outfit',0,0,0,0.083333,buy
194 | 0.088235,-RRB-,NN,0.2,0,1,0,0,0.494411,28,5,0,0.209302,0.285714,0.035714,0,0,0,5.107143,168,8,0,0,8,1,call,0.178571,0,0,0,1822752074,0,'Purchase birdseed/bubbles/rose petals for guests to shower you with as you leave ceremony site (this custom could instead be performed as you depart from the reception)',0,0,0,0,buy
195 | 0.058824,NN,VB,0.133333,0,0,0,0.095238,0.554364,3,1,0,0,0.666667,0,0,0,0,6.333333,21,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,102230,0,'get wedding insurance',0,0,0,0,buy
196 | 0,NN,JJ,0.4,0,0,0,0,0.376219,2,0,1,0,0.5,0,0,0,0,4.5,10,1,0,0,0,1,buy,0,0,0,0.5,1822752074,0,'Buy garter',0,0,0,0.083333,buy
197 | 0,NN,NNP,0.444444,0,0,0,0,0.287136,5,1,0,0,0.6,0,0,0,0,4.6,26,3,0,0,0,1,buy,0.2,0,0,0,1822752074,0,'Tuscon: buy cannister fuel',0,1,0,0.083333,buy
198 | 0.088235,NN,NNP,0,0,0,0,0,0.341863,3,0,0,0,1,0,0,0,0,5.333333,18,3,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'Select cake topper',0,1,0,0,buy
199 | 0,NNPS,VB,0,0,0,0,0,0.392175,3,1,0,0,0.666667,0,0.181818,0,0,7.666667,25,2,0,0,0,1,call,0.333333,0,0,0,-1405517509,0,'Practice Mandarin Chinese',0,1,0,0,self-improve
200 | 0,NNP,VB,0.044444,0,0,0.16,0,0.570339,4,1,0,0.069767,0.25,0,0.181818,0,0.115385,4,19,1,0,0,1,1,call,0.25,0.052632,0,0,3530173,0,'Sign up for Spanish',0,1,0,0,self-improve
201 | 0,NNP,VB,0,0,0,0,0,0.523105,8,2,0,0,0.25,0,0.454545,0.090909,0,3.875,36,2,0,0,0,1,call,0.25,0,0,0,102846020,0,'Learn about \"Cold Calling\" from Kyle',0,1,0,0,self-improve
202 | 0,NN,VB,0,0,0,0,0,0.188599,3,1,0,0,0.666667,0,0.363636,0.090909,0,9,29,2,0,0,0,1,call,0.333333,0,0,0,102846020,0,'learn guyline knots/technique',0,0,0,0,self-improve
203 | 0,NNP,NN,0,0,0,0,0,0.383775,5,0,1,0,0.6,0,0,0,0,5.8,33,3,0,0,1,1,pay-bill-online,0,0,0,0.2,1822752074,0.538462,'print general giveaways for David',0,1,0,0,print
204 | 0,NNS,NN,0,0,0,0,0,0.406903,2,0,0,0,1,0,0,0,0,4.5,10,2,0,0,0,1,postal,0,0,0,0,1822752074,0.307692,'print maps',0,0,0,0,print
205 | 0,NNS,NN,0,0,0,0,0,0.338348,5,1,0,0,0.6,0,0,0,0,6,34,3,0,0,0,1,pay-bill-online,0.2,0,0,0,1822752074,0.384615,'Print/have fedex cut 300 giveaways',0,1,0,0,print
206 | 0,NNP,VB,0,0,0,0,0,0.259075,3,1,0,0,0.666667,0,0,0,0,5.666667,19,2,0,0,0,1,print,0.333333,0,0,0,106934957,0.384615,'Print Gen. Giveaway',0,0,0,0,print
207 | 0,NN,VB,0.044444,0,0,0,0,0.446648,6,1,0,0.069767,0.5,0,0.090909,0,0.192308,3.5,26,3,0,0,1,1,call,0.166667,0.052632,0,0,113762,0,'Set up org file for garden',0,1,0,0,service
208 | 0,NNS,VB,0,0,1,0,0,0.375183,3,1,0,0,0.333333,0.333333,0,0,0,6.666667,22,1,0,0,0,1,postal,0.333333,0,0,0,-318370553,0,'prepare resupply boxes',0,0,0,0,service
209 | 0,NN,JJ,0,0,0,0,0,0.540102,2,0,1,0,0.5,0,0,0,0.269231,5.5,12,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean closet',0,0,0,0,service
210 | 0,NN,JJ,0,0,0,0,0,0.510607,4,0,1,0,0.5,0,0,0,0.269231,4.25,20,2,0,0,1,1,service,0,0,0,0.25,1822752074,0,'Clean the litter box',0,0,0,0,service
211 | 0,JJ,NN,0,0,1,0,0.095238,0.401124,3,0,1,0,0.333333,0.333333,0,0,0,4.666667,16,1,0,0,0,1,service,0,0,0,0.333333,1822752074,0,'caulk back light',0.035714,0,0,0,service
212 | 0,NN,VB,0,0,0,0,0,0.570174,3,1,1,0,0.333333,0,0,0,0.076923,4.333333,15,1,0,0,0,1,service,0.333333,0,0,0.333333,3432985,0,'Pack spare room',0,0,0,0,service
213 | 0,NN,VB,0,0,0,0,0,0.590655,4,1,0,0,0.25,0,0,0,0.076923,3.25,16,1,0,0,1,1,call,0.25,0,0,0,3552391,0,'Take out the dog',0,0,0,0,service
214 | 0,NN,NN,0,0,0,0,0,0.353338,2,0,0,0,1,0,0,0,0,5.5,12,2,0,0,0,1,service,0,0,0,0,1822752074,0,'glue pumpkin',0,0,0,0,service
215 | 0,NN,JJ,0.044444,0,0,0,0,0.571832,2,0,1,0,0.5,0,0,0,0.346154,6.5,14,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean bathroom',0,0,0,0,service
216 | 0.058824,POS,NNP,0,0,0,0,0,0.387923,6,0,0,0.069767,0.666667,0,0,0,0.076923,4.166667,29,4,0,0,1,1,service,0,0.052632,0,0,1822752074,0,'Iron shirts for Flat lander\'s',0,2,0,0,service
217 | 0,NN,VB,0,0,0,0,0,0.391244,4,1,0,0,0.5,0,0,0,0,3.25,16,2,0,0,1,1,call,0.25,0,0,0,3357649,0,'move the pee pad',0,0,0,0,service
218 | 0,NN,VB,0,0,0,0,0,0.417101,3,1,0,0,0.666667,0,0,0,0,7.333333,24,2,0,0,0,1,call,0.333333,0,0,0,989834062,0,'reconcile bank statement',0,0,0,0,service
219 | 0,NN,VB,0,0,0,0,0,0.319196,4,1,0,0,0.5,0,0,0,0,7.75,34,2,0,0,1,1,service,0.25,0,0,0,1957569947,0,'Install Quicksilver and experiment',0,0,0,0,service
220 | 0,NN,NN,0,0,0,0,0,0.287821,4,0,0,0,0.75,0,0,0,0,5.25,24,3,0,0,0,1,service,0,0,0,0,1822752074,0,'Vacuum hardwoods & couch',0,0,0,0,service
221 | 0,NN,JJ,0,0,0,0,0,0.53992,3,0,1,0,0.333333,0,0,0,0.269231,4.666667,16,1,0,0,1,1,service,0,0,0,0.333333,1822752074,0,'Clean the carpet',0,0,0,0,service
222 | 0,NN,VBN,0.044444,0,0,0,0,0.544621,3,1,0,0.069767,0.333333,0,0.090909,0,0.115385,3,11,1,0,0,0,1,service,0.333333,0.052632,0,0,1822752074,0,'set up tent',0,0,0,0,service
223 | 0,CD,NNS,0,0,0,0,0,0.543524,3,0,0,0,0.333333,0,0,0,0,4,14,1,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Taxes for 2015',0,1,0,0,service
224 | 0,NNP,NNP,0,0,0,0,0,0.300871,2,0,0,0,1,0,0,0,0,4.5,10,2,0,0,0,1,service,0,0,0,0,1822752074,0,'Wash Mazda',0,2,0,0,service
225 | 0,NN,VBG,0,0,0,0,0,0.394523,4,1,0,0,0.5,0,0,0,0.076923,5.25,24,2,0,0,1,1,service,0.25,0.052632,0,0,1822752074,0,'Ironing shirts for staff',0,0,0,0,service
226 | 0,NN,JJ,0,0,0,0,0,0.543383,5,0,2,0,0.4,0,0,0,0.153846,4.4,26,2,0,0,0,1,service,0,0,0,0.4,1822752074,0,'Clear out small garden bed',0,1,0,0,service
227 | 0,NN,JJ,0.044444,0,0,0,0,0.571832,2,0,1,0,0.5,0,0,0,0.346154,6.5,14,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean bathroom',0,0,0,0,service
228 | 0.058824,NN,VB,0.044444,0,0,0,0,0.549412,7,2,0,0.069767,0.285714,0,0.090909,0,0.115385,3.285714,29,2,0,0,0,1,call,0.285714,0.105263,0,0,3304,0,'Go pick up my son from school',0,0,0,0,service
229 | 0,NNS,NN,0,0,0,0,0,0.372406,3,0,0,0,0.666667,0,0,0.090909,0.076923,6,20,2,0,0,0,1,buy,0,0,0,0,1822752074,0,'figure out bookmarks',0,0,0,0,service
230 | 0,NNS,NN,0,0,0,0,0,0.272831,3,0,0,0,1,0,0,0,0,8,26,3,0,0,0,1,school-work,0,0,0,0,1822752074,0,'review orienteering basics',0,0,0,0,service
231 | 0,NN,JJ,0.044444,0,0,0,0,0.413027,3,0,1,0.069767,0.333333,0,0.090909,0,0.384615,5,17,1,0,0,0,1,service,0,0.052632,0,0.333333,1822752074,0,'clean up woodpile',0,0,0,0,service
232 | 0,NN,VB,0,0,0,0,0,0.563105,2,1,0,0,0.5,0,0,0,0.153846,5.5,12,1,0,0,0,1,service,0.5,0,0,0,3432985,0,'Pack kitchen',0,0,0,0,service
233 | 0,NN,NN,0,0,0,0,0,0.34765,1,0,0,0,1,0,0,0,0,5,5,1,0,0,0,1,call,0,0,0,0,1822752074,0,sweep,0,0,0,0,service
234 | 0,NN,JJ,0,0,0,0,0,0.609302,2,0,1,0,0.5,0,0,0,0.346154,6,13,1,0,0,0,1,service,0,0,0,0.5,1822752074,0,'clean kitchen',0,0,0,0,service
235 | 0,NNS,VB,0,0,0,0,0,0.392565,2,1,0,0,0.5,0,0,0,0,4.5,10,1,0,0,0,1,service,0.5,0,0,0,108302,0,'mop floors',0,0,0,0,service
236 | 0,NN,NN,0,0,0,0,0,0.428761,3,0,0,0,1,0,0,0,0,5,17,3,0,0,0,1,call,0,0,0,0,1822752074,0,'test sleep system',0,0,0,0,service
237 | 0,NNS,NN,0,0,0,0,0,0.456851,4,0,0,0,0.75,0,0,0,0.076923,5.25,24,3,0,0,0,1,service,0,0,0,0,1822752074,0,'household - water plants',0,0,0,0,service
238 | 0,NNS,NNP,0,0,0,0,0,0.472321,3,0,0,0,0.666667,0,0,0,0,4.333333,15,2,0,0,1,1,service,0,0,0,0,1822752074,0,'Wash the dishes',0,1,0,0,service
239 | 0,NNS,VB,0,0,0,0,0,0.357214,2,1,0,0,0.5,0,0,0,0,7.5,16,1,0,0,0,1,service,0.5,0,0,0,1316389283,0,'organize closets',0,0,0,0,service
240 | 0,NN,VB,0,0,0,0,0,0.352519,3,1,0,0,0.666667,0,0,0,0.076923,7,23,2,0,0,0,1,plan-meal,0.333333,0,0,0,-373408302,0,'assemble toiletries kit',0,0,0,0,service
241 | 0,NNS,NN,0,0,1,0,0,0.467128,3,0,0,0,0.666667,0.333333,0,0,0.076923,5,17,2,0,0,0,1,service,0,0,0,0,1822752074,0,'plant more plants',0,0,0,0,service
242 | 0,VB,NN,0,0,0,0,0,0.410962,4,1,0,0,0.5,0,0,0,0,4.75,22,2,0,0,1,1,postal,0.25,0,0,0,1822752074,0,'Upload photos to gmail',0,0,0,0,service
243 | 0,NN,VB,0,0,0,0.08,0,0.406105,3,1,0,0,0.666667,0,0,0,0.076923,5.666667,19,2,0,0,0,1,service,0.333333,0,0,0,-373408302,0,'assemble repair kit',0,0,0,0,service
244 | 0,NN,NN,0,0,0,0,0,0.351172,2,0,0,0,1,0,0,0,0,6,13,2,0,0,0,1,service,0,0,0,0,1822752074,0,'finish mowing',0,0,0,0,service
245 | 0,NN,VB,0,0,0,0,0,0.298758,4,1,0,0,0.75,0,0,0,0,6.5,29,3,0,0,0,1,service,0.25,0,0,0,1094496948,0,'replace humidifier air filter',0,0,0,0,service
246 | 0,NN,NN,0,0,0,0,0,0.335185,8,0,0,0,0.625,0,0,0,0,3.875,38,5,0,0,1,1,call,0,0,0,0,1822752074,0,'household - setup vlc @ term on doctor',0,1,0,0,service
247 | 0,-RRB-,VB,0,0,0,0,0,0.38532,6,1,1,0,0.166667,0,0,0,0,4.666667,31,1,0,0,1,1,call,0.166667,0,0,0.166667,-734452820,0,'Arrange flowers (if applicable)',0,0,0,0,service
248 | 0,NN,VB,0,0,0,0,0,0.472153,4,1,1,0,0.25,0,0,0,0.076923,5,23,1,0,0,0,1,service,0.25,0,0,0.25,94001407,0,'Break out concrete slab',0,0,0,0,service
249 | 0,'\'\'',JJ,0.133333,0,0,0,0.095238,0.582737,14,4,1,0,0,0,0.181818,0.272727,0,2.785714,47,0,0,0,3,1,call,0.285714,0,0,0.071429,102846020,0,'complete \"Learn by Doing\" and \"Did I get this?\"',0,0,0,0,school-work
250 | 0,NN,VB,0,0,0,0,0,0.450248,3,1,0,0,0.666667,0,0,0.090909,0,6.333333,21,2,0,0,0,1,plan-meal,0.333333,0,0,0,113399775,0,'write nutrition paper',0,0,0,0,school-work
251 | 0,CD,NN,0,0,0,0,0,0.137812,2,0,0,0,0.5,0,0,0.090909,0,1.5,4,1,0,0,0,1,postal,0,0,0,0,1822752074,0,'HW 1',0,2,0,0,school-work
252 | 0,NNS,NN,0,0,0,0,0,0.387932,3,0,0,0,0.666667,0,0,0.181818,0.076923,6,20,2,0,0,0,1,call,0,0,0,0,1822752074,0,'figure out Footnotes',0,0,0,0,school-work
253 | 0,CD,NNP,0,0,0,0,0,0.265165,3,0,0,0,0.666667,0,0,0.181818,0,4.666667,16,2,0,0,0,1,call,0,0,0,0,1822752074,0,'Read Modules 1-4',0,2,0,0,school-work
254 | 0,NN,NN,0,0,0,0,0,0.380167,1,0,0,0,1,0,0,0.181818,0,4,4,1,0,0,0,1,school-work,0,0,0,0,1822752074,0,Quiz,0,0,0,0,school-work
255 | 0,NN,NN,0,0,0,0,0,0.305451,2,0,0,0,1,0,0,0.181818,0,6,13,2,0,0,0,1,school-work,0,0,0,0,1822752074,0,'syllabus quiz',0,0,0,0,school-work
256 | 0,NNP,VB,0,0,0,0,0,0.422694,8,1,0,0,0.5,0,0,0,0,4.5,42,4,0,0,1,2,call,0.125,0,0,0,93029230,0,'Apply for Grad. Student assistance from UM',0,1,0,0,school-work
257 | 0.088235,NN,VB,0,0,0,0,0,0.391976,6,1,1,0,0.5,0,0,0,0,6.333333,43,3,0,0,1,1,call,0.166667,0,0,0.166667,-308949343,0,'Provide final headcount to site coordinator',0,0,0,0,contact
258 | 0.117647,'\'\'',NN,0,0,0,0,0,0.421594,14,0,0,0.255814,0.571429,0,0,0,0,4.142857,67,8,0,0,2,1,call,0,0,0,0,1822752074,0,'Contact Stella\'s staff on interest for \"Stella\'s Super Mario Party\"',0,2,0,0,contact
259 | 0,NNP,VB,0,0,0,0,0,0.444832,3,1,0,0,0.666667,0,0,0,0,7,23,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,-1367724422,0,'Cancel Comcast Internet',0,1,0,0,contact
260 | 0,.,VB,0,0,0,0,0.047619,0.500195,14,3,0,0.325581,0.285714,0,0,0,0,3.857143,66,4,0,0,6,1,call,0.214286,0,0,0,96889,0,'Ask KFB for list of restaurants they have worked with in the past.',0,2,0,0,contact
261 | 0,NN,VB,0,0,0,0,0,0.46772,5,1,0,0,0.6,0,0,0,0,5.8,33,3,0,0,1,1,pay-bill-online,0.2,0,0,0,-1367724422,0,'cancel renters insurance with AAA',0,0,0,0,contact
262 | 0,NNS,NN,0,0,0,0,0,0.624689,4,0,1,0,0.5,0,0,0,0,4,19,2,0,0,1,1,call,0,0,0,0.25,1822752074,0,'Talk to few lawyers',0,0,0,0,contact
263 | 0,NNP,VB,0,0,0,0,0,0.377999,7,2,0,0.093023,0.285714,0,0,0,0,3.285714,29,2,0,0,2,1,buy,0.285714,0,0,0,96889,0,'Ask IC to recommend us to ILG',0,0,0,0,contact
264 | 0,NNS,RB,0,0,1,0,0,0.338006,3,0,0,0.348837,0.333333,0.333333,0,0,0,7.666667,25,1,0,0,1,1,plan-meal,0,0,0,0,1822752074,0,'Followup with restaurants',0,0,0,0,contact
265 | 0,NNS,NN,0,0,0,0,0,0.534252,7,0,0,0.116279,0.571429,0,0,0,0,4.571429,38,4,0,0,2,1,call,0,0,0,0,1822752074,0,'Share contact numbers with at 2 people',0,2,0,0,contact
266 | 0,NN,NN,0,0,0,0,0,0.547899,4,0,0,0,0.75,0,0,0,0,4.75,22,3,0,0,1,1,call,0,0,0,0,1822752074,0,'change address at bank',0,0,0,0,contact
267 | 0,NNP,NN,0,0,0,0,0,0.59759,3,0,0,0,0.666667,0,0,0,0,3.333333,12,2,0,0,1,1,call,0,0,0,0,1822752074,0,'talk to Mike',0,1,0,0,contact
268 | 0,NNS,VB,0,0,1,0,0.095238,0.503746,52,8,4,0.093023,0,0.019231,0,0,0,5.134615,312,0,0.142857,0,11,1,call,0.153846,0.368421,0,0.076923,3526536,0,'Send hotel and transportation information to out-of-town guests, include directions from local airorts and cities from which many guests will be arriving by car, information (description, location, phone number) and any code or name that must be mentioned to receive discounted rate when making room reservations',0,0,0,0,contact
269 | 0,NNS,NNP,0,0,0,0,0,0.007582,3,0,0,0,1,0,0,0,0,5.666667,19,3,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'Ann Arbor followups',0,1,0,0,contact
270 | 0.147059,-RRB-,VB,0.111111,0,0,0,0,0.451727,20,4,2,0.302326,0.25,0,0.090909,0,0.115385,4.45,106,5,0,0,5,1,call,0.2,0.052632,0,0.1,96889,0,'Ask caterer/coordinator to have top teir of wedding cake packed up for you (to save for first anniversary)',0,1,0,0,contact
271 | 0,NNS,RB,0,0,1,0,0,0.249358,4,0,0,0.348837,0.5,0.25,0,0,0,6.25,28,2,0,0,1,1,plan-meal,0,0,0,0,1822752074,0,'Followup with GR restaurants',0,0,0,0,contact
272 | 0,NN,NN,0,0,0,0,0,0.347091,7,0,0,0,0.857143,0,0,0,0,3.571429,31,6,0,0,1,1,call,0,0.052632,0,0,1822752074,0,'touch base w judy on WF account',0,1,0,0,contact
273 | 0,NNS,NNP,0,0,0,0,0,0.43704,4,0,0,0.162791,0.5,0,0,0,0,5.75,26,2,0,0,1,1,call,0,0,0,0,1822752074,0,'Outreach to 10 restaurants',0,1,0,0,contact
274 | 0.088235,NNS,VB,0,0,0,0,0,0.534035,9,1,0,0,0.666667,0,0,0,0,5.888889,61,6,0,0,1,1,call,0.111111,0,0,0,3173137,0,'Give site coordinator arrival times for all service providers',0,0,0,0,contact
275 | 0,NNP,VB,0,0,0,0,0.095238,0.314994,5,2,0,0,0.4,0,0,0,0,5.8,33,2,0,0,1,1,call,0.4,0.368421,0,0,3526536,0,'send updated financials to Armand',0,1,0,0,contact
276 | 0,NN,RB,0,0,1,0,0,0.362459,12,0,0,0.418605,0.5,0.083333,0,0,0,4.833333,68,6,0,0,3,1,call,0,0,0,0,1822752074,0,'Followup with Jenny at KFB about the restaurants\' promo request form',0,2,0,0,contact
277 | 0,-RRB-,VB,0,0,0,0,0,0.383837,9,1,1,0,0.333333,0,0,0,0,5.888889,59,3,0,0,2,1,call,0.111111,0,0,0.111111,951117504,0,'Confirm final details with photographer (and videographers)',0,0,0,0,contact
278 | 0,JJ,NN,0,0,0,0,0,0.435008,4,0,1,0.069767,0.5,0,0,0,0,5.5,25,2,0,0,1,1,pay-bill-online,0,0,0,0.25,1822752074,0,'request permit for aframe',0,0,0,0,contact
279 | 0,NNS,RB,0.066667,0,1,0,0,0.555731,10,2,0,0.069767,0.1,0.1,0,0,0,3.9,48,1,0,1,4,1,call,0.2,0,0,0,1822752074,0,'Inquire as to what you need to bring to fittings',0,0,0.1,0,contact
280 | 0,NNS,RB,0,0,1,0,0,0.302937,4,0,0,0.186047,0.5,0.25,0,0,0,7.5,33,2,0,0,1,1,plan-meal,0,0,0,0,1822752074,0,'Followup with restaurant invoices',0,0,0,0,contact
281 | 0,VB,RB,0,0,1,0,0,0.360939,4,1,0,0.186047,0.25,0.25,0,0,0,5.5,25,1,0,0,1,1,call,0.25,0,0,0,108399245,0,'Followup with World Renew',0,1,0,0,contact
282 | 0,NNS,VB,0,0,0,0,0,0.403412,7,1,0,0,0.571429,0,0,0,0.076923,5.714286,46,4,0,0,1,1,call,0.142857,0,0,0,108386675,0,'Reach out to Seattle restaurant coalition orgs',0,1,0,0,contact
283 | 0,NN,NN,0,0,0,0,0,0.616193,2,0,0,0,1,0,0,0,0,6.5,14,2,0,0,0,1,call,0,0,0,0,1822752074,0,'change address',0,0,0,0,contact
284 | 0,NN,NN,0,0,0,0,0,0.408012,7,0,0,0.116279,0.714286,0,0,0,0,4.857143,40,5,0,0,1,1,call,0,0,0,0,1822752074,0,'Contact Mars Hill about HQ and marketing',0,1,0,0,contact
285 | 0,NN,VB,0,0,0,0,0,0.34723,7,1,1,0,0.428571,0,0,0,0,5.571429,45,3,0,0,2,1,call,0.142857,0,0,0.142857,1671386080,0,'Discuss bustle style of train with seamstress',0,0,0,0,contact
286 | 0,NNS,RB,0,0,1,0,0,0.191724,4,0,0,0.186047,0.5,0.25,0,0,0,6,27,2,0,0,1,1,call,0,0,0,0,1822752074,0,'Followup with National NPOs',0,1,0,0,contact
287 | 0.088235,.,VB,0.111111,0,0,0,0,0.456944,11,3,0,0.302326,0.272727,0,0.090909,0,0.115385,4.818182,62,3,0,0,2,1,call,0.272727,0.052632,0,0,96889,0,'Ask caterer/coordinator to have cake topper packed up for you.',0,1,0,0,contact
288 | 0,NN,VB,0,0,0,0,0,0.373331,11,1,1,0.162791,0.454545,0,0,0,0,4.545455,58,5,0,0,2,1,call,0.090909,0,0,0.090909,-1183699191,0,'Invite restaurants (and NPOs) to Patagonia event next week',0,3,0,0,contact
289 | 0.088235,NN,RB,0,0,1,0,0,0.360547,7,0,1,0,0.571429,0.142857,0,0,0,6.142857,49,4,0,0,1,1,call,0,0,0,0.142857,1822752074,0,'Tally final guest count with site manager/caterer',0,1,0,0,contact
290 | 0,NNS,VB,0,0,0,0,0,0.425146,6,1,0,0,0.666667,0,0,0,0,4.833333,33,4,0,0,0,1,buy,0.166667,0,0,0,-734452820,0.307692,'Arrange print run: giveaway cards',0,0,0,0,contact
291 | 0,NNS,RB,0,0,1,0,0,0.370813,4,0,1,0.44186,0.25,0.25,0,0,0,7,31,1,0,0,1,1,call,0,0,0,0.25,1822752074,0,'Followup with Local Restaurants',0,1,0,0,contact
292 | 0,RP,VB,0.111111,0,1,0,0,0.505248,18,3,1,0.069767,0.277778,0.055556,0.090909,0,0.115385,4.833333,102,5,0,0,3,1,call,0.166667,0.052632,0,0.055556,951117504,0,'Confirm with your rental company all details, times, and sites where items must be delivered/picked up',0,0,0,0,contact
293 | 0,NN,JJ,0,0,0,0,0,0.134104,3,0,1,0.27907,0.666667,0,0,0,0,7.666667,25,2,0,0,0,1,calendar,0,0,0,0.333333,1822752074,0,'Local Restuarant followup',0,0,0,0,contact
294 | 0,NN,NN,0,0,0,0,0,0.468541,4,0,0,0.069767,0.75,0,0,0,0,5.75,26,3,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'request permit for signage',0,0,0,0,contact
295 | 0,NN,NN,0,0,0,0,0,0.265873,2,0,0,0,1,0,0,0,0,7,15,2,0.142857,0,0,1,school-work,0,0,0,0,1822752074,0,'book babysitter',0,0,0,0,contact
296 | 0,NNP,JJ,0,0,0,0,0,0.345547,3,0,1,0,0.333333,0,0,0,0,6.333333,21,1,0,0,1,1,call,0,0,0,0.333333,1822752074,0,'Follow-up with Monica',0,1,0,0,contact
297 | 0,NNP,NN,0,0,0,0,0,0.269166,6,1,0,0.116279,0.833333,0,0,0,0,7.666667,51,5,0,0,0,1,call,0.166667,0,0,0,1822752074,0,'Contact student orgs regarding Storyteller Rotation',0,1,0,0,contact
298 | 0.058824,NN,NN,0,0,0,0,0,0.484375,10,1,1,0.27907,0.6,0,0,0,0,5.3,61,6,0,0,1,1,call,0.1,0,0,0.1,1822752074,0,'Contact local town clerk\'s office to arrange marriage license',0,0,0,0,contact
299 | 0,NNS,NNP,0,0,0,0,0,0.13676,2,0,0,0,1,0,0,0,0,7,15,2,0,0,0,1,email,0,0,0,0,1822752074,0,'Press Followups',0,1,0,0,contact
300 | 0,NNS,VB,0,0,0,0.12,0,0.523948,4,2,1,0,0.25,0,0,0,0,6,27,1,0,0,0,1,plan-meal,0.5,0,0,0.25,3143097,0,'find healthy baking recipes',0,0,0,0.5,plan-meal
301 | 0,NN,NN,0,0,0,0,0,0.558769,2,0,0,0,1,0,0,0,0,5.5,12,2,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'package food',0,0,0,0.166667,plan-meal
302 | 0,NNS,VB,0.4,0,0,0,0,0.546718,2,1,0,0,0.5,0,0,0,0,7,15,1,0,0,0,1,buy,0.5,0,0,0,97926,0,'buy ingredients',0,0,0,0.083333,plan-meal
303 | 0,NN,VB,0,0,0,0,0,0.54952,6,1,0,0,0.5,0,0,0,0,4.5,32,3,0,0,2,1,call,0.166667,0,0,0,109757538,0,'start meal planning for the week',0,1,0,0.083333,plan-meal
304 | 0,NN,NNP,0,0,0,0,0,0.495266,2,0,0,0,1,0,0,0,0,4,9,2,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'Plan menu',0,0,0,0.083333,plan-meal
305 | 0,NNS,VB,0,0,0,0,0,0.402051,4,1,0,0,0.5,0,0,0,0,6,27,2,0,0,0,1,plan-meal,0.25,0,0,0,-135762164,0,'identify 4-6 dinner recipes',0,1,0,0.333333,plan-meal
306 | 0,NNS,NN,0,0,0,0,0,0.466517,3,0,0,0,1,0,0,0,0,5,17,3,0,0,0,1,plan-meal,0,0,0,0,1822752074,0,'test cook recipes',0,0,0,0.25,plan-meal
307 | 0,NN,VB,0,0,0,0,0,0.595039,3,1,0,0,0.666667,0,0,0,0,4,14,2,0,0,0,1,pay-bill-online,0.333333,0,0.64,0,110760,0,'pay bills',0,0,0,0,pay-bill-online
308 | 0.294118,NN,VB,0,0,0,0,0,0.440004,5,2,0,0,0.4,0,0,0,0,7.2,40,2,0,0,1,1,calendar,0.4,0,0,0,-697920873,0,'pay comed',0,0,0,0,calendar
309 | 0.441176,NNS,VB,0,0,0,0,0,0.390764,5,1,1,0,0.4,0,0,0,0,6.8,38,2,0,0,1,1,calendar,0.2,0,0,0.2,-697920873,0,'Pay Expression',0,0,0,0,calendar
310 | 0,NNS,NN,0,0,0,0,0,0.556596,6,1,0,0,0.666667,0,0,0,0,3.5,25,4,0,0,0,1,call,0.166667,0,0,0,1822752074,0,'Pay SF Parking Ticket. :(',0.607143,0,0,0,call
311 | 0.058824,NNP,VB,0,0,0,0,0,0.413226,5,1,0,0,0.6,0,0,0,0,6.6,37,3,0.142857,0,1,1,call,0.2,0,0,0,3343854,0,'pay NY tax bill',0,2,0,0,find-travel
312 | 0.058824,NN,VB,0,0,0,0.12,0,0.407804,3,1,0,0,0.666667,0,0,0,0,7,23,2,0,0,0,1,pay-bill-online,0.333333,0,0,0,3202804,0,'pay mortgage',0,0,0,0,find-service
313 | 0,NN,VB,0,0,0,0,0,0.164861,2,1,0,0,0.5,0,0,0,0,6.5,14,1,0.142857,0,0,1,plan-meal,0.5,0,0,0,3029737,0,'pay ATT',0,0,0,0,find-service
314 | 0,-RRB-,NN,0,0,0,0,0,0.468084,13,1,0,0.116279,0.461538,0,0,0,0,3.615385,56,6,0.071429,0,3,1,call,0.076923,0,0,0,1822752074,0,'pay roofer',0,4,0,0,email
315 | 0,NN,NNP,0,0,0,0,0,0.476222,6,0,0,0,0.666667,0,0,0,0,4,29,4,0,0,2,1,call,0,0.105263,0,0,1822752074,0,'Pay PG&E',0,1,0,0,postal
316 | 0,NN,VB,0.133333,0,1,0,0.095238,0.549757,3,1,0,0,0.333333,0.333333,0,0,0,3.666667,13,1,0,0,0,1,service,0.333333,0,0,0,102230,0,'Pay BGE',0,0,0,0,buy
317 | 0.088235,NN,VB,0,0,0,0.28,0,0.519077,8,1,0,0,0.5,0,0,0,0,4.375,42,4,0.142857,0,3,1,call,0.125,0,0,0,3029737,0,'pay student loan',0,0,0,0,buy
318 | 0,NNS,VB,0.4,0,0,0,0,0.432272,4,1,1,0,0.5,0,0,0,0,4.75,22,2,0,0,0,1,pay-bill-online,0.25,0,0,0.25,97926,0,'pay plumber',0,0,0,0.083333,buy
319 | 0,NN,NN,0,0,0,0,0,0.3462,6,0,0,0,0.5,0,0,0,0,5,35,3,0,0,1,1,pay-bill-online,0,0,0,0,1822752074,0,'Pay DMV for Harley before May',0,1,0,0,buy
320 | 0,NN,VB,0,0,0,0,0,0.419855,25,5,1,0,0.36,0,0,0,0,4.8,141,9,0,0,5,1,call,0.2,0,0,0.04,-1164222250,0,'pay rent',0,1,0,0,buy
321 | 0.058824,JJ,VB,0,0,0,0,0,0.415902,5,1,1,0,0.4,0,0,0,0,4.8,28,2,0.142857,0,1,1,service,0.2,0,0,0.2,3343854,0,'pay genki violation',0,0,0,0,service
322 | 0,NN,NN,0,0,0,0.08,0,0.430786,2,0,0,0,1,0,0,0,0,5,11,2,0,0,0,1,postal,0,0,0,0,1822752074,0,'get bills on autopay',0,0,0,0,service
323 | 0.058824,PRP,VB,0,0,0,0.12,0,0.525956,14,3,0,0,0.428571,0,0,0,0.076923,4.285714,73,6,0,0,5,1,call,0.214286,0,0,0,-373408302,0,'Appointment with capital women\'s care',0,1,0,0.083333,service
324 | 0,NN,VB,0,0,0,0,0,0.409282,4,1,1,0,0.5,0,0,0,0.076923,4.75,22,2,0,0,0,1,buy,0.25,0,0,0.25,-373408302,0,'Schedule appointments with caterers, if necessary',0,1,0,0,service
325 | 0,NNS,VB,0,0,0,0,0.095238,0.450443,8,3,0,0,0.5,0,0,0,0,5,47,4,0,0,1,1,call,0.375,0.368421,0,0,3526536,0,'Schedule rehearsal with officiant, all family members, bridesmaids, best men, wedding planner (if applicable) and other participants.',0,0,0,0,contact
326 | 0,NN,VB,0,0,0,0,0,0.558463,5,1,1,0,0.2,0,0,0,0,5.2,30,1,0,0,1,1,call,0.2,0,0,0.2,109641682,0,'Schedule interviews with photographers (and videographers)',0,1,0,0,contact
327 | 0,NNS,VB,0,0,0,0,0,0.404537,5,1,0,0,0.4,0,0,0,0,6.6,37,2,0,0,1,1,call,0.2,0.105263,0,0,951351530,0,'Schedule cake tasting',0,0,0,0,contact
328 | 0,CD,VB,0,0,0,0,0,0.474516,7,1,0,0.093023,0.428571,0,0,0,0,3.857143,33,3,0,0,1,1,call,0.142857,0,0,0,96889,0,'Apple appointment',0,3,0,0,contact
329 |
--------------------------------------------------------------------------------