├── .coveragerc ├── .gitignore ├── .scrutinizer.yml ├── .travis.yml ├── LICENSE ├── README.md ├── crowdtruth ├── __init__.py ├── configuration.py ├── crowd_platform.py ├── load.py ├── models │ ├── __init__.py │ ├── job.py │ ├── metrics.py │ ├── unit.py │ └── worker.py └── run.py ├── setup.cfg ├── setup.py ├── test ├── __main__.py ├── test_data │ ├── load │ │ ├── dir │ │ │ ├── file1.csv │ │ │ └── file2.csv │ │ ├── empty_rows.csv │ │ ├── platform_amt1.csv │ │ ├── platform_amt2.csv │ │ ├── platform_amt3.csv │ │ ├── platform_amt4.csv │ │ ├── platform_amt5.csv │ │ ├── platform_cf1.csv │ │ ├── platform_cf2.csv │ │ ├── platform_cf3.csv │ │ ├── platform_cf4.csv │ │ └── platform_cf5.csv │ └── metrics │ │ ├── 10work_agr.csv │ │ ├── 10work_disagr.csv │ │ ├── 10work_outlier.csv │ │ ├── 2vs3work_agr.csv │ │ ├── 2work_agr.csv │ │ ├── 2work_disagr.csv │ │ ├── 3vs4work_agr.csv │ │ ├── 3work_agr.csv │ │ ├── 3work_disagr.csv │ │ ├── 3work_outlier.csv │ │ ├── 4vs5work_agr.csv │ │ ├── 4work_agr.csv │ │ ├── 4work_disagr.csv │ │ ├── 4work_outlier.csv │ │ ├── 5vs6work_agr.csv │ │ ├── 5work_agr.csv │ │ ├── 5work_disagr.csv │ │ ├── 5work_outlier.csv │ │ ├── 6vs7work_agr.csv │ │ ├── 6work_agr.csv │ │ ├── 6work_disagr.csv │ │ ├── 6work_outlier.csv │ │ ├── 7vs8work_agr.csv │ │ ├── 7work_agr.csv │ │ ├── 7work_disagr.csv │ │ ├── 7work_outlier.csv │ │ ├── 8vs9work_agr.csv │ │ ├── 8work_agr.csv │ │ ├── 8work_disagr.csv │ │ ├── 8work_outlier.csv │ │ ├── 9work_agr.csv │ │ ├── 9work_disagr.csv │ │ └── 9work_outlier.csv ├── test_load.py └── test_metrics.py └── tutorial ├── MACE.jar ├── Part III_ CrowdTruth Tutorial.pdf ├── Part II_ CrowdTruth Tutorial.pdf ├── Part IV_ CrowdTruth Tutorial.pdf ├── Part I_ CrowdTruth Tutorial.pdf ├── crowd_vs_expert_performance.py ├── data ├── .DS_Store ├── custom-platform-person-video-multiple-choice.csv ├── event-text-highlight.csv ├── event-text-sparse-multiple-choice.csv ├── mace_rte.standardized.csv ├── mace_temp.standardized.csv ├── person-video-binary-choice.csv ├── person-video-free-input.csv ├── person-video-highlight.csv ├── person-video-multiple-choice.csv ├── person-video-sparse-multiple-choice-free-input.csv ├── person-video-sparse-multiple-choice.csv ├── person-video-ternary-choice.csv ├── relex-binary-choice.csv ├── relex-multiple-choice.csv ├── relex-sparse-multiple-choice.csv ├── results │ ├── binary-relex-annotations.csv │ ├── binary-relex-units.csv │ ├── binary-relex-workers.csv │ ├── crowdtruth_units_rte.csv │ ├── crowdtruth_units_temp.csv │ ├── crowdtruth_workers_rte.csv │ ├── crowdtruth_workers_temp.csv │ ├── mace_units_rte.csv │ ├── mace_units_temp.csv │ ├── mace_workers_rte.csv │ ├── mace_workers_temp.csv │ ├── majorityvote_units_rte.csv │ ├── multchoice-people-video-annotations.csv │ ├── multchoice-people-video-units.csv │ ├── multchoice-people-video-workers.csv │ ├── multchoice-relex-annotations.csv │ ├── multchoice-relex-units.csv │ ├── multchoice-relex-workers.csv │ ├── openextr-persvid-dimred-units.csv │ ├── openextr-persvid-dimred-workers.csv │ ├── openextr-persvid-units.csv │ ├── openextr-persvid-workers.csv │ ├── sparsemultchoice-relex-annotations.csv │ ├── sparsemultchoice-relex-units.csv │ └── sparsemultchoice-relex-workers.csv ├── rte.standardized.csv ├── temp.standardized.csv └── wsd.standardized.csv ├── experiment_replication_variable_workers.py ├── getting_started.md ├── handout_session_2.md ├── handout_session_3.md ├── img ├── .DS_Store ├── ann-vec │ ├── bin-person-in-vid.pdf │ ├── bin-relex.pdf │ ├── free-person-in-vid.pdf │ ├── mult-person-in-vid.pdf │ ├── mult-relex.pdf │ ├── od-extr-person-in-vid.pdf │ ├── sparse-mult-relex.pdf │ ├── sparse-person-in-vid.pdf │ └── tern-person-in-vid.pdf ├── event-text-highlight.png ├── event-text-sparse-multiple-choice.png ├── person-video-binary.png ├── person-video-free-input.png ├── person-video-highlight.png ├── person-video-multiple-choice.png ├── person-video-sparse-multiple-choice-free-input.png ├── person-video-sparse-multiple-choice.png ├── person-video-ternary.png ├── relex-binary.png ├── relex-free.png ├── relex-highlight.png ├── relex-multiple-choice.png └── relex-sparse-multiple-choice.png ├── notebooks ├── .DS_Store ├── .ipynb_checkpoints │ ├── Binary Choice Task - Person Identification in Video-checkpoint.ipynb │ ├── Binary Choice Task - Relation Extraction-checkpoint.ipynb │ ├── Custom Platform - Multiple Choice Task - Person Type Annotation in Video-checkpoint.ipynb │ ├── Dimensionality Reduction - Stopword Removal from Media Unit & Annotation-checkpoint.ipynb │ ├── Free Input Task - Person Annotation in Video-checkpoint.ipynb │ ├── Highlighting Task - Event Extraction-checkpoint.ipynb │ ├── Multiple Choice Task - Person Type Annotation in Video-checkpoint.ipynb │ ├── Multiple Choice Task - Relation Extraction-checkpoint.ipynb │ ├── Recognizing Textual Entailment-checkpoint.ipynb │ ├── Sparse Multiple Choice Task - Event Extraction-checkpoint.ipynb │ ├── Sparse Multiple Choice Task - Person Annotation in Video-checkpoint.ipynb │ ├── Sparse Multiple Choice Task - Relation Extraction-checkpoint.ipynb │ ├── Temporal Event Ordering-checkpoint.ipynb │ └── Ternary Choice Task - Person Identification in Video-checkpoint.ipynb ├── Binary Choice Task - Person Identification in Video.ipynb ├── Binary Choice Task - Relation Extraction.ipynb ├── CrowdTruth vs. MACE vs. Majority Vote for Recognizing Textual Entailment.ipynb ├── CrowdTruth vs. MACE vs. Majority Vote for Temporal Event Ordering.ipynb ├── Custom Platform - Multiple Choice Task - Person Type Annotation in Video.ipynb ├── Dimensionality Reduction - Stopword Removal from Media Unit & Annotation.ipynb ├── Free Input Task - Person Annotation in Video.ipynb ├── Highlighting Task - Event Extraction.ipynb ├── Multiple Choice Task - Person Type Annotation in Video.ipynb ├── Multiple Choice Task - Relation Extraction.ipynb ├── Sparse Multiple Choice Task - Event Extraction.ipynb ├── Sparse Multiple Choice Task - Person Annotation in Video.ipynb ├── Sparse Multiple Choice Task - Relation Extraction.ipynb └── Ternary Choice Task - Person Identification in Video.ipynb ├── relex_example.csv ├── relex_example_custom.csv ├── templates ├── .DS_Store ├── Events-Text-Highlight │ ├── template.css │ ├── template.html │ └── template.js ├── Events-Text-Sparse-Multiple-Choice │ ├── template.css │ ├── template.html │ └── template.js ├── People-Video-Binary │ ├── .DS_Store │ ├── template.css │ ├── template.html │ └── template.js ├── People-Video-Free │ ├── template.css │ ├── template.html │ └── template.js ├── People-Video-Highlight │ ├── template.css │ ├── template.html │ └── template.js ├── People-Video-Multiple-Choice │ ├── template.css │ ├── template.html │ └── template.js ├── People-Video-Sparse-Multiple-Choice-and-Free │ ├── template.css │ ├── template.html │ └── template.js ├── People-Video-Sparse-Multiple-Choice │ ├── template.css │ ├── template.html │ └── template.js ├── People-Video-Ternary │ ├── template.css │ ├── template.html │ └── template.js ├── Relex-Binary │ ├── template.css │ ├── template.html │ └── template.js ├── Relex-Free │ ├── template.css │ ├── template.html │ └── template.js ├── Relex-Highlight │ ├── template.css │ ├── template.html │ └── template.js ├── Relex-Multiple-Choice │ ├── template.css │ ├── template.html │ └── template.js └── Relex-Sparse-Multiple-Choice │ ├── template.css │ ├── template.html │ └── template.js └── tutorial.ipynb /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=crowdtruth/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | data/ 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | -------------------------------------------------------------------------------- /.scrutinizer.yml: -------------------------------------------------------------------------------- 1 | checks: 2 | python: 3 | code_rating: true 4 | duplicate_code: true 5 | 6 | build: 7 | nodes: 8 | analysis: 9 | tests: 10 | override: 11 | - py-scrutinizer-run 12 | coverage: 13 | tests: 14 | override: 15 | - command: 'coverage run --source=crowdtruth/ setup.py test' 16 | coverage: 17 | file: '.coverage' 18 | config_file: '.coveragerc' 19 | format: 'py-cc' 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.5" 5 | - "3.6" 6 | install: 7 | - pip install . 8 | # command to run tests 9 | script: 10 | - pytest # or py.test for Python versions 3.5 and below 11 | - coverage run --source=crowdtruth/ setup.py test 12 | after_success: 13 | - codecov -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #  2 | 3 | [](https://badge.fury.io/py/CrowdTruth) [](https://travis-ci.org/CrowdTruth/CrowdTruth-core) [](https://codecov.io/gh/CrowdTruth/CrowdTruth-core) [](https://scrutinizer-ci.com/g/CrowdTruth/CrowdTruth-core/?branch=master) 4 | 5 | This library processes crowdsourcing results from Amazon Mechanical Turk and CrowdFlower following the CrowdTruth methodology. A full description of the metrics is available [in this paper](https://arxiv.org/abs/1808.06080). For more information see http://crowdtruth.org. 6 | 7 | If you use this software in your research, please consider citing: 8 | 9 | ``` 10 | @article{CrowdTruth2, 11 | author = {Anca Dumitrache and Oana Inel and Lora Aroyo and Benjamin Timmermans and Chris Welty}, 12 | title = {CrowdTruth 2.0: Quality Metrics for Crowdsourcing with Disagreement}, 13 | year = {2018}, 14 | url = {https://arxiv.org/abs/1808.06080}, 15 | } 16 | ``` 17 | 18 | Useful links: 19 | 20 | * [Data](http://data.crowdtruth.org/) collected with CrowdTruth 21 | * [Papers](http://crowdtruth.org/papers/) that use CrowdTruth 22 | * Previous version [CrowdTruth v.1.0](https://github.com/CrowdTruth/CrowdTruth) 23 | 24 | 25 | ## Installation 26 | 27 | To install the stable version from PyPI, install *pip* for your OS, then install package using: 28 | ``` 29 | pip install crowdtruth 30 | ``` 31 | 32 | To install the latest version from source, download the library and install it using: 33 | ``` 34 | python setup.py install 35 | ``` 36 | 37 | ## Tutorial 38 | 39 | The following tutorial is a collection of slides, exercises and Jupyter notebooks that explains what is the *CrowdTruth methodology*, and how to use it in practice. If you are already familiar with CrowdTruth, you can skip straight to the [guide on how to run this library](tutorial/getting_started.md). 40 | 41 | ### Introduction to CrowdTruth 42 | 43 | * [Slides](https://github.com/CrowdTruth/CrowdTruth-core/blob/master/tutorial/Part%20I_%20CrowdTruth%20Tutorial.pdf) 44 | 45 | ### Task Design & Building CrowdTruth Annotation Vectors 46 | 47 | * [Slides](https://github.com/CrowdTruth/CrowdTruth-core/blob/master/tutorial/Part%20II_%20CrowdTruth%20Tutorial.pdf) 48 | * [Hands-on Exercises](tutorial/handout_session_2.md) 49 | 50 | ### Data Processing & CrowdTruth Metrics 51 | 52 | * [Slides](https://github.com/CrowdTruth/CrowdTruth-core/blob/master/tutorial/Part%20III_%20CrowdTruth%20Tutorial.pdf) 53 | * [Getting Started with the CrowdTruth Library](tutorial/getting_started.md) 54 | * [Hands-on Exercises](tutorial/handout_session_3.md) 55 | -------------------------------------------------------------------------------- /crowdtruth/__init__.py: -------------------------------------------------------------------------------- 1 | from .load import * 2 | from .run import * 3 | from .crowd_platform import * 4 | -------------------------------------------------------------------------------- /crowdtruth/configuration.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module used to configure the processing of the input files. 3 | """ 4 | 5 | class DefaultConfig(): 6 | """ Defines default configuration for cases when users do not provide a custom one. 7 | 8 | Creates a class that lets us define how the input file will be processed: 9 | inputColumns: List of input columns from the .csv file, what the workers were shown. 10 | outputColumns: List of output columns with the answers from the workers. 11 | customPlatformColumns: List of columns that define standard annotation tasks, such as 12 | judgment id, unit id, worker id, started time, submitted time. 13 | This variable is used for custom input files (i.e., do not come 14 | from AMT or FigureEight.) 15 | open_ended_task = Takes the value True if we deal with an open task and False othewise. 16 | annotation_vector = List of annotations from with the crowd can choose from. Only applicable 17 | for closed tasks. 18 | units = List of units to be used. 19 | workers = List of workers to be used. 20 | jobs = List of jobs to be used. 21 | csv_file_separator = Column separator for the input csv files. 22 | annotation_separator = Separator for worker judgments. Default separator for judgments is ',' 23 | processJudgments: Function that defines how the worker judgments wil be processed. 24 | """ 25 | 26 | name = '' # collection name 27 | inputColumns = [] # inputColumns to use 28 | outputColumns = [] # outputColumns to use 29 | customPlatformColumns = [] 30 | open_ended_task = True 31 | annotation_vector = [] 32 | 33 | remove_empty_rows = True 34 | none_token = "NONE" 35 | 36 | units = [] # units to use 37 | workers = [] # workers to use 38 | jobs = [] # jobs to use 39 | 40 | csv_file_separator = ',' 41 | annotation_separator = ',' 42 | 43 | def processJudgments(self, judgments): 44 | """ 45 | Defines how the worker judgments wil be processed. 46 | """ 47 | return judgments 48 | -------------------------------------------------------------------------------- /crowdtruth/crowd_platform.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module used to process information regarding the crowdsourcing platform. 3 | """ 4 | class Found(Exception): 5 | """ Exception. """ 6 | pass 7 | 8 | def get_platform(dframe): 9 | """ Get the crowdsourcing platform this file originates to """ 10 | 11 | if dframe.columns.values[0] == '_unit_id': 12 | # CrowdFlower 13 | return { 14 | #'_platform' : 'cf', 15 | '_id' : 'judgment', 16 | '_unit_id' : 'unit', 17 | '_worker_id' : 'worker', 18 | '_started_at' : 'started', 19 | '_created_at' : 'submitted' 20 | } 21 | elif dframe.columns.values[0] == 'HITId': 22 | # Mturk 23 | return { 24 | #'id' : 'amt', 25 | 'AssignmentId' : 'judgment', 26 | 'HITId' : 'unit', 27 | 'WorkerId' : 'worker', 28 | 'AcceptTime' : 'started', 29 | 'SubmitTime' : 'submitted' 30 | } 31 | return False 32 | 33 | def configure_amt_columns(dframe, config): 34 | """ Configures AMT input and output columns. """ 35 | config.input = {} 36 | config.output = {} 37 | 38 | if config.inputColumns: 39 | config.input = {c: 'input.'+c.replace('Input.', '') \ 40 | for c in dframe.columns.values if c in config.inputColumns} 41 | else: 42 | config.input = {c: 'input.'+c.replace('Input.', '') \ 43 | for c in dframe.columns.values if c.startswith('Input.')} 44 | 45 | # if config is specified, use those columns 46 | if config.outputColumns: 47 | config.output = {c: 'output.'+c.replace('Answer.', '') \ 48 | for c in dframe.columns.values if c in config.outputColumns} 49 | else: 50 | config.output = {c: 'output.'+c.replace('Answer.', '') \ 51 | for c in dframe.columns.values if c.startswith('Answer.')} 52 | return config.input, config.output 53 | 54 | def configure_platform_columns(dframe, config): 55 | """ Configures FigureEight and custom platforms input and output columns. """ 56 | config.input = {} 57 | config.output = {} 58 | 59 | if config.inputColumns: 60 | config.input = {c: 'input.'+c for c in dframe.columns.values \ 61 | if c in config.inputColumns} 62 | if config.outputColumns: 63 | config.output = {c: 'output.'+c for c in dframe.columns.values \ 64 | if c in config.outputColumns} 65 | return config.input, config.output 66 | 67 | def configure_with_missing_columns(dframe, config): 68 | """ Identifies the type of the column based on naming """ 69 | units = dframe.groupby('_unit_id') 70 | columns = [c for c in dframe.columns.values if c != 'clustering' and not c.startswith('_') \ 71 | and not c.startswith('e_') and not c.endswith('_gold') \ 72 | and not c.endswith('_reason') and not c.endswith('browser')] 73 | for colname in columns: 74 | try: 75 | for _, unit in units: 76 | unique = unit[colname].nunique() 77 | if unique != 1 and unique != 0: 78 | raise Found 79 | if not config.inputColumns: 80 | config.input[colname] = 'input.'+colname 81 | 82 | except Found: 83 | if not config.outputColumns: 84 | config.output[colname] = 'output.'+colname 85 | 86 | return config 87 | 88 | def get_column_types(dframe, config): 89 | """ return input and output columns """ 90 | # returns a list of columns that contain are input content 91 | config.input = {} 92 | config.output = {} 93 | 94 | # get a dict of the columns with input content and the columns with output judgments 95 | # each entry matches [original column name]:[safestring column name] 96 | if dframe.columns.values[0] == 'HITId': 97 | # Mturk 98 | # if config is specified, use those columns 99 | config.input, config.output = configure_amt_columns(dframe, config) 100 | 101 | return config 102 | 103 | elif dframe.columns.values[0] == '_unit_id': 104 | 105 | # if a config is specified, use those columns 106 | config.input, config.output = configure_platform_columns(dframe, config) 107 | # if there is a config for both input and output columns, we can return those 108 | if config.inputColumns and config.outputColumns: 109 | return config 110 | 111 | # try to identify the input and output columns 112 | # this is the case if all the values in the column are identical 113 | # this is not failsafe but should give decent results without settings 114 | # it is best to make a settings.py file for a collection 115 | 116 | return configure_with_missing_columns(dframe, config) 117 | 118 | else: 119 | # unknown platform type 120 | 121 | # if a config is specified, use those columns 122 | config.input, config.output = configure_platform_columns(dframe, config) 123 | # if there is a config for both input and output columns, we can return those 124 | if config.inputColumns and config.outputColumns: 125 | return config 126 | -------------------------------------------------------------------------------- /crowdtruth/models/__init__.py: -------------------------------------------------------------------------------- 1 | #from judgment import Judgment 2 | 3 | 4 | #from collection import Collection 5 | 6 | #Judgment, Worker, Unit, Job, Collection -------------------------------------------------------------------------------- /crowdtruth/models/job.py: -------------------------------------------------------------------------------- 1 | """ 2 | Job initialization. 3 | """ 4 | 5 | class Job(): 6 | """ 7 | Performs general statistics over the crowdsourcing jobs. 8 | """ 9 | 10 | @staticmethod 11 | def aggregate(units, judgments, config): 12 | """ 13 | Aggregates information about the total number of units, total number of judgments, 14 | total number of workers that provided annotations and the total duration of the job. 15 | 16 | Args: 17 | units: Units contained in the job. 18 | judgments: Judgments contained in the job. 19 | config: Job configuration as provided as input for the metrics. 20 | 21 | Returns: 22 | A dataframe of one row that stores general stats on the crowdsourcing jobs. 23 | """ 24 | agg = { 25 | 'unit' : 'nunique', 26 | 'judgment' : 'nunique', 27 | 'worker' : 'nunique', 28 | 'duration' : 'mean' 29 | } 30 | job = judgments.groupby('job').agg(agg) 31 | 32 | # compute job runtime 33 | runtime = (max(judgments['submitted']) - min(judgments['started'])) 34 | job['runtime'] = runtime #float(runtime.days) * 24 + float(runtime.seconds) / 3600 35 | job['runtime.per_unit'] = job['runtime'] / job['unit'] 36 | job['judgments.per.worker'] = job['judgment'] / job['worker'] 37 | 38 | metrics = ['unique_annotations', 'annotations'] 39 | for metric in metrics: 40 | for col in config.output.values(): 41 | # aggregate unit metrics 42 | job[col+'.'+metric] = units[col+'.'+metric].mean() 43 | 44 | job = job.reindex(sorted(job.columns), axis=1) 45 | 46 | return job 47 | -------------------------------------------------------------------------------- /crowdtruth/models/unit.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit initialization. 3 | """ 4 | class Unit(): 5 | """ 6 | Performs general statistics over the units in the jobs. 7 | """ 8 | 9 | @staticmethod 10 | def aggregate(judgments, config): 11 | """ 12 | Aggregates information for each unit in the job. For each unit we save the 13 | data that was used as input (in the crowdsourcing template), the job in which 14 | it appeared, the number of workers that annotated the unit and the total 15 | amount of time spent by the workers to annotate it. 16 | 17 | Args: 18 | judgments: Judgments contained in the job. 19 | config: Job configuration as provided as input for the metrics. 20 | 21 | Returns: 22 | A dataframe containing all units that appear in the jobs and the 23 | statistics relevant for them. 24 | """ 25 | agg = {} 26 | for col in config.input.values(): 27 | # for each input column the first value is taken. 28 | # all rows have the same value for each unit. 29 | agg[col] = 'first' 30 | for col in config.output.values(): 31 | # each output column dict is summed 32 | agg[col] = 'sum' 33 | agg['job'] = 'first' 34 | agg['worker'] = 'count' 35 | agg['duration'] = 'mean' 36 | 37 | units = judgments.groupby('unit').agg(agg) 38 | 39 | # 40 | # get unit metrics 41 | # 42 | # for each vector in the unit get the unit metrics 43 | units = units.apply(lambda row: Unit.get_metrics(row, config), axis=1) 44 | 45 | # sort columns 46 | units = units.reindex(sorted(units.columns), axis=1) 47 | 48 | return units 49 | 50 | @staticmethod 51 | def get_metrics(row, config): 52 | """ 53 | Counts the number of annotations and the number of unique annotations for each unit. 54 | """ 55 | for col in config.output.values(): 56 | row[col+'.unique_annotations'] = len(row[col]) 57 | row[col+'.annotations'] = sum(row[col].values()) 58 | return row 59 | -------------------------------------------------------------------------------- /crowdtruth/models/worker.py: -------------------------------------------------------------------------------- 1 | """ 2 | Worker initialization. 3 | """ 4 | class Worker(): 5 | """ 6 | Performs general statistics over the workers in the jobs. 7 | """ 8 | 9 | @staticmethod 10 | def aggregate(judgments, config): 11 | """ 12 | Aggregates information for each worker about the total number of jobs and units 13 | (s)he contributed to, the total number of judgments submitted, the total 14 | amount of time spent of annotating and the average number of annotations provided 15 | across all the units. 16 | 17 | Args: 18 | judgments: Judgments contained in the job. 19 | config: Job configuration as provided as input for the metrics. 20 | 21 | Returns: 22 | A dataframe containing all workers that contributed to the jobs and the 23 | statistics relevant for them. 24 | """ 25 | workers = judgments.copy().groupby('worker') 26 | 27 | agg = { 28 | 'job' : 'nunique', 29 | 'unit' : 'nunique', 30 | 'judgment' : 'nunique', 31 | 'duration' : 'mean' 32 | } 33 | for col in config.output.values(): 34 | agg[col+'.count'] = 'mean' 35 | 36 | workers = workers.agg(agg) 37 | 38 | return workers 39 | -------------------------------------------------------------------------------- /crowdtruth/run.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module used to run the CrowdTruth metrics. 3 | """ 4 | 5 | from crowdtruth.models.metrics import Metrics 6 | 7 | def run(data, config): 8 | """Run the CrowdTruth metrics with the given processing configuration""" 9 | 10 | processed_results = Metrics.run(data, config) 11 | return processed_results 12 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | # import os 3 | # import sys 4 | 5 | setup( 6 | name='crowdtruth', 7 | version='2.1', 8 | description= 9 | "Disagreement based metrics for the processing and evaluation of crowdsourced annotations", 10 | long_description= 11 | ("CrowdTruth is an approach to machine-human computing for collecting annotation data on " + 12 | "text, images and videos. The approach is focussed specifically on collecting annotation" + 13 | " data by capturing and interpreting inter-annotator disagreement. "), 14 | classifiers=[ 15 | 'Development Status :: 4 - Beta', 16 | 'Intended Audience :: Developers', 17 | 'Intended Audience :: Education', 18 | 'Intended Audience :: Information Technology', 19 | 'Intended Audience :: Science/Research', 20 | 'License :: OSI Approved :: Apache Software License', 21 | 'Operating System :: OS Independent', 22 | 'Programming Language :: Python :: 2.7', 23 | 'Programming Language :: Python :: 3.5', 24 | 'Programming Language :: Python :: 3.6', 25 | 'Topic :: Scientific/Engineering', 26 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 27 | 'Topic :: Scientific/Engineering :: Human Machine Interfaces', 28 | 'Topic :: Scientific/Engineering :: Information Analysis', 29 | # 'Topic :: Scientific/Engineering :: Crowdsourcing', 30 | 'Topic :: Text Processing', 31 | 'Topic :: Text Processing :: Linguistic'], 32 | keywords=['CrowdTruth', 'crowdsourcing', 'disagreement', \ 33 | 'metrics', 'crowdflower', 'amazon mechanical turk'], 34 | author='Vrije Universiteit Amsterdam', 35 | author_email='crowdwatson@gmail.com', 36 | url='http://crowdtruth.org', 37 | license='Apache 2.0', 38 | download_url='https://github.com/CrowdTruth/CrowdTruth-core/archive/2.1.zip', 39 | packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), 40 | include_package_data=True, 41 | zip_safe=False, 42 | test_suite='test', 43 | install_requires=[ 44 | 'pymodm>=0.3.0', 45 | 'pandas>=0.23.1', 46 | 'numpy>=1.13.3', 47 | 'scipy>=1.0.0', 48 | 'chardet>=3.0.4', 49 | 'coverage>=4.5.1', 50 | 'codecov>=2.0.15', 51 | 'dateparser>=0.7.0' 52 | ], 53 | setup_requires=["pytest-runner"], 54 | tests_require=["pytest"], 55 | entry_points=""" 56 | [console_scripts] 57 | CrowdTruth = crowdtruth:CrowdTruth 58 | """, 59 | ) 60 | -------------------------------------------------------------------------------- /test/__main__.py: -------------------------------------------------------------------------------- 1 | ''' Start running tests ''' 2 | 3 | import unittest 4 | 5 | if __name__ == '__main__': 6 | unittest.main() 7 | -------------------------------------------------------------------------------- /test/test_data/load/dir/file1.csv: -------------------------------------------------------------------------------- 1 | _unit_id,_id,_worker_id,_started_at,_created_at,input,Answer.output 2 | 1,1321129167,W1,7/12/2014 01:55:25,7/12/2014 01:55:49,-,B 3 | 1,1321137419,W2,7/12/2014 02:15:56,7/12/2014 02:16:11,-,B 4 | 1,1321080237,W3,7/12/2014 00:03:21,7/12/2014 00:03:39,-,C 5 | 1,1321121353,W4,7/12/2014 01:31:26,7/12/2014 01:31:56,-,C 6 | 1,1321267092,W5,7/12/2014 08:21:47,7/12/2014 08:21:55,-,E 7 | 1,1321121859,W6,7/12/2014 01:32:52,7/12/2014 01:33:10,-,B 8 | -------------------------------------------------------------------------------- /test/test_data/load/dir/file2.csv: -------------------------------------------------------------------------------- 1 | _unit_id,_id,_worker_id,_started_at,_created_at,input,Answer.output 2 | 2,1321129167,W2,7/12/2014 01:55:25,7/12/2014 01:55:49,-,B 3 | 2,1321137419,W3,7/12/2014 02:15:56,7/12/2014 02:16:11,-,B 4 | 2,1321080237,W4,7/12/2014 00:03:21,7/12/2014 00:03:39,-,C 5 | 2,1321121353,W5,7/12/2014 01:31:26,7/12/2014 01:31:56,-,C 6 | 2,1321267092,W6,7/12/2014 08:21:47,7/12/2014 08:21:55,-,E 7 | 2,1321121859,W7,7/12/2014 01:32:52,7/12/2014 01:33:10,-,B 8 | -------------------------------------------------------------------------------- /test/test_data/load/empty_rows.csv: -------------------------------------------------------------------------------- 1 | HITId,AssignmentId,WorkerId,AcceptTime,SubmitTime,Input.input,Answer.output 2 | 1,3SLE99ER0OPA0ESHDZK4L6VDRM8BZ0,W12,Wed Jan 24 09:48:10 PST 2018,Wed Jan 24 09:51:16 PST 2018,-,B 3 | 1,3ERET4BTVNLWNQEYFE3Y7LCAZER9K3,W10,Wed Jan 24 10:32:11 PST 2018,Wed Jan 24 10:32:18 PST 2018,-,B 4 | 1,3X4MXAO0BH0TMQ41Y9MNMH6ZS66WR7,W9,Wed Jan 24 09:48:27 PST 2018,Wed Jan 24 10:29:03 PST 2018,-,C 5 | 2,34T446B1C1QW8YEXN537QRY9R0QC0H,W6,Wed Jan 24 09:53:46 PST 2018,Wed Jan 24 09:54:08 PST 2018,-,C 6 | 2,3LOTDFNYA8BWJ0ZS7EMT5SSGPITWFA,W18,Wed Jan 24 09:37:27 PST 2018,Wed Jan 24 09:38:09 PST 2018,-,E 7 | 2,3OJSZ2ATDT8PRO52TU18R02M7X7751,W8,Wed Jan 24 09:37:55 PST 2018,Wed Jan 24 09:39:57 PST 2018,-,B 8 | 3,3NVC2EB65RB5STEZ2K3FPGOZX0PY33,W1,Wed Jan 24 09:41:10 PST 2018,Wed Jan 24 09:41:19 PST 2018,-,E 9 | 3,3NPI0JQDAPHGIWKSQCDNU2ZLNX1PTP,W3,Wed Jan 24 12:45:40 PST 2018,Wed Jan 24 12:45:57 PST 2018,-,D 10 | 3,354P56DE9LFQXQ3D5QEGKBCELHF7S3,W7,Wed Jan 24 09:38:57 PST 2018,Wed Jan 24 09:39:17 PST 2018,-,A 11 | 4,3Z7EFSHGNAQ45OK8QHSEZ5LK18KCXN,W16,Wed Jan 24 10:56:45 PST 2018,Wed Jan 24 11:02:33 PST 2018,-,C 12 | 4,3H0W84IWBLEZ5QIAD9SH0BZZL20ER8,W6,Wed Jan 24 09:39:34 PST 2018,Wed Jan 24 09:43:51 PST 2018,-, 13 | 4,3E1QT0TDFQL53Q50AJC8U92MN3L8I1,W2,Wed Jan 24 09:47:08 PST 2018,Wed Jan 24 09:47:16 PST 2018,-,A 14 | 5,345LHZDEDY4OBKUUSJ6QMVIM1CEU31,W4,Wed Jan 24 09:39:09 PST 2018,Wed Jan 24 09:43:59 PST 2018,-,C 15 | 5,3H8DHMCCWAN8QGRFRU80THV0Q0OKDF,W19,Wed Jan 24 11:22:10 PST 2018,Wed Jan 24 11:22:23 PST 2018,-,E 16 | 5,3WYGZ5XF3XRES5AXOK3POGH0R1FKSM,W9,Wed Jan 24 09:40:22 PST 2018,Wed Jan 24 09:58:58 PST 2018,-,A 17 | 6,3O7L7BFSHF1MCRFRNMY8MJY8Y7TEII,W13,Wed Jan 24 10:56:10 PST 2018,Wed Jan 24 10:59:45 PST 2018,-,E 18 | 6,34Z02EIMITPDUFG89RYXF1YDFMNT0E,W5,Wed Jan 24 09:41:10 PST 2018,Wed Jan 24 09:41:22 PST 2018,-,D 19 | 6,3OJSZ2ATDT8PRO52TU18R02M7X8570,W20,Wed Jan 24 09:41:43 PST 2018,Wed Jan 24 09:41:50 PST 2018,-, 20 | 7,33L7PJKHCHAVC4LB4VBC8G4VYLJT8N,W3,Wed Jan 24 10:19:50 PST 2018,Wed Jan 24 10:20:30 PST 2018,-,D 21 | 7,33IZTU6J82DKC594N3IEEAP2NBXSXO,W11,Wed Jan 24 10:34:22 PST 2018,Wed Jan 24 10:34:36 PST 2018,-,C 22 | 7,39L1G8WVWR382N3WQVW6BTDPQFH31B,W13,Wed Jan 24 09:42:42 PST 2018,Wed Jan 24 09:42:54 PST 2018,-,E 23 | 8,3X1FV8S5JY3PIBVISONF1B9EJOSVGZ,W13,Wed Jan 24 09:40:55 PST 2018,Wed Jan 24 09:41:16 PST 2018,-,D 24 | 8,3GU1KF0O4JDGMAQBQQM2U8TZHPIPBE,W8,Wed Jan 24 11:22:23 PST 2018,Wed Jan 24 11:22:36 PST 2018,-,D 25 | 8,35USIKEBNSSADDX3EKS2F32C38RN6E,W20,Wed Jan 24 10:35:21 PST 2018,Wed Jan 24 10:35:39 PST 2018,-,D 26 | 9,3XM0HYN6NLBETD2V0HOSVGZKKKCEPG,W17,Wed Jan 24 09:39:52 PST 2018,Wed Jan 24 09:40:04 PST 2018,-,E 27 | 9,3KGTPGBS6YXZQ2YBOJSL3LDUVYK2U4,W6,Wed Jan 24 10:04:06 PST 2018,Wed Jan 24 10:04:15 PST 2018,-, 28 | 9,3A1COHJ8NK757VUG41UNUX17NRG8HK,W16,Wed Jan 24 10:55:49 PST 2018,Wed Jan 24 10:58:15 PST 2018,-,E 29 | -------------------------------------------------------------------------------- /test/test_data/load/platform_amt1.csv: -------------------------------------------------------------------------------- 1 | "HITId","AssignmentId","WorkerId","AcceptTime","SubmitTime","Input.input","Answer.output" 2 | 1,"3SLE99ER0OPA0ESHDZK4L6VDRM8BZ0","W12","Wed Jan 24 09:48:10 PST 2018","Wed Jan 24 09:51:16 PST 2018","-","B" 3 | 1,"3ERET4BTVNLWNQEYFE3Y7LCAZER9K3","W10","Wed Jan 24 10:32:11 PST 2018","Wed Jan 24 10:32:18 PST 2018","-","B" 4 | 1,"3X4MXAO0BH0TMQ41Y9MNMH6ZS66WR7","W9","Wed Jan 24 09:48:27 PST 2018","Wed Jan 24 10:29:03 PST 2018","-","C" 5 | 2,"34T446B1C1QW8YEXN537QRY9R0QC0H","W6","Wed Jan 24 09:53:46 PST 2018","Wed Jan 24 09:54:08 PST 2018","-","C" 6 | 2,"3LOTDFNYA8BWJ0ZS7EMT5SSGPITWFA","W18","Wed Jan 24 09:37:27 PST 2018","Wed Jan 24 09:38:09 PST 2018","-","E" 7 | 2,"3OJSZ2ATDT8PRO52TU18R02M7X7751","W8","Wed Jan 24 09:37:55 PST 2018","Wed Jan 24 09:39:57 PST 2018","-","B" 8 | 3,"3NVC2EB65RB5STEZ2K3FPGOZX0PY33","W1","Wed Jan 24 09:41:10 PST 2018","Wed Jan 24 09:41:19 PST 2018","-","E" 9 | 3,"3NPI0JQDAPHGIWKSQCDNU2ZLNX1PTP","W3","Wed Jan 24 12:45:40 PST 2018","Wed Jan 24 12:45:57 PST 2018","-","D" 10 | 3,"354P56DE9LFQXQ3D5QEGKBCELHF7S3","W7","Wed Jan 24 09:38:57 PST 2018","Wed Jan 24 09:39:17 PST 2018","-","A" 11 | 4,"3Z7EFSHGNAQ45OK8QHSEZ5LK18KCXN","W16","Wed Jan 24 10:56:45 PST 2018","Wed Jan 24 11:02:33 PST 2018","-","C" 12 | 4,"3H0W84IWBLEZ5QIAD9SH0BZZL20ER8","W6","Wed Jan 24 09:39:34 PST 2018","Wed Jan 24 09:43:51 PST 2018","-","A" 13 | 4,"3E1QT0TDFQL53Q50AJC8U92MN3L8I1","W2","Wed Jan 24 09:47:08 PST 2018","Wed Jan 24 09:47:16 PST 2018","-","A" 14 | 5,"345LHZDEDY4OBKUUSJ6QMVIM1CEU31","W4","Wed Jan 24 09:39:09 PST 2018","Wed Jan 24 09:43:59 PST 2018","-","C" 15 | 5,"3H8DHMCCWAN8QGRFRU80THV0Q0OKDF","W19","Wed Jan 24 11:22:10 PST 2018","Wed Jan 24 11:22:23 PST 2018","-","E" 16 | 5,"3WYGZ5XF3XRES5AXOK3POGH0R1FKSM","W9","Wed Jan 24 09:40:22 PST 2018","Wed Jan 24 09:58:58 PST 2018","-","A" 17 | 6,"3O7L7BFSHF1MCRFRNMY8MJY8Y7TEII","W13","Wed Jan 24 10:56:10 PST 2018","Wed Jan 24 10:59:45 PST 2018","-","E" 18 | 6,"34Z02EIMITPDUFG89RYXF1YDFMNT0E","W5","Wed Jan 24 09:41:10 PST 2018","Wed Jan 24 09:41:22 PST 2018","-","D" 19 | 6,"3OJSZ2ATDT8PRO52TU18R02M7X8570","W20","Wed Jan 24 09:41:43 PST 2018","Wed Jan 24 09:41:50 PST 2018","-","A" 20 | 7,"33L7PJKHCHAVC4LB4VBC8G4VYLJT8N","W3","Wed Jan 24 10:19:50 PST 2018","Wed Jan 24 10:20:30 PST 2018","-","D" 21 | 7,"33IZTU6J82DKC594N3IEEAP2NBXSXO","W11","Wed Jan 24 10:34:22 PST 2018","Wed Jan 24 10:34:36 PST 2018","-","C" 22 | 7,"39L1G8WVWR382N3WQVW6BTDPQFH31B","W13","Wed Jan 24 09:42:42 PST 2018","Wed Jan 24 09:42:54 PST 2018","-","E" 23 | 8,"3X1FV8S5JY3PIBVISONF1B9EJOSVGZ","W13","Wed Jan 24 09:40:55 PST 2018","Wed Jan 24 09:41:16 PST 2018","-","D" 24 | 8,"3GU1KF0O4JDGMAQBQQM2U8TZHPIPBE","W8","Wed Jan 24 11:22:23 PST 2018","Wed Jan 24 11:22:36 PST 2018","-","D" 25 | 8,"35USIKEBNSSADDX3EKS2F32C38RN6E","W20","Wed Jan 24 10:35:21 PST 2018","Wed Jan 24 10:35:39 PST 2018","-","D" 26 | 9,"3XM0HYN6NLBETD2V0HOSVGZKKKCEPG","W17","Wed Jan 24 09:39:52 PST 2018","Wed Jan 24 09:40:04 PST 2018","-","E" 27 | 9,"3KGTPGBS6YXZQ2YBOJSL3LDUVYK2U4","W6","Wed Jan 24 10:04:06 PST 2018","Wed Jan 24 10:04:15 PST 2018","-","C" 28 | 9,"3A1COHJ8NK757VUG41UNUX17NRG8HK","W16","Wed Jan 24 10:55:49 PST 2018","Wed Jan 24 10:58:15 PST 2018","-","E" 29 | -------------------------------------------------------------------------------- /test/test_data/load/platform_amt2.csv: -------------------------------------------------------------------------------- 1 | "HITId","AssignmentId","WorkerId","AcceptTime","SubmitTime","Input.input","Answer.output" 2 | 1,"3ZPPDN2SLW8T1QMOJSSV9QZ5FGWE9N","W10","Wed Jan 24 09:49:16 PST 2018","Wed Jan 24 09:49:26 PST 2018","-","E" 3 | 1,"3IOEN3P9S8V7Z6QE9XU4IWP909V16X","W16","Wed Jan 24 12:55:39 PST 2018","Wed Jan 24 12:56:04 PST 2018","-","B" 4 | 1,"3E4GGUZ1T93LN6TZU5OJXY4WT7R2KC","W3","Wed Jan 24 10:11:00 PST 2018","Wed Jan 24 10:11:12 PST 2018","-","E" 5 | 1,"3YDTZAI2WYSTKJM0Q91PLY5V4GW41I","W14","Wed Jan 24 10:19:11 PST 2018","Wed Jan 24 10:19:18 PST 2018","-","B" 6 | 1,"3TK8OJTYM2XVVOOHV7D2VKN6KFIVP6","W8","Wed Jan 24 09:57:15 PST 2018","Wed Jan 24 09:57:59 PST 2018","-","C" 7 | 1,"36V4Q8R5ZLCX5DVNHSR08TU3YKDQM0","W1","Wed Jan 24 11:27:09 PST 2018","Wed Jan 24 11:27:32 PST 2018","-","D" 8 | 1,"3IGI0VL648W02J37EGEWW3O3O7HONY","W20","Wed Jan 24 09:38:55 PST 2018","Wed Jan 24 09:39:27 PST 2018","-","C" 9 | 1,"3OJSZ2ATDT8PRO52TU18R02M7X7751","W11","Wed Jan 24 09:37:55 PST 2018","Wed Jan 24 09:39:57 PST 2018","-","C" 10 | 1,"34FNN24DCNL86813G3R84DMJBWK5Y4","W12","Wed Jan 24 09:56:50 PST 2018","Wed Jan 24 09:57:07 PST 2018","-","B" 11 | 2,"3Z3ZLGNNSJ6WJRNDBAMLCULXYU4Q3U","W19","Wed Jan 24 12:06:31 PST 2018","Wed Jan 24 12:06:34 PST 2018","-","E" 12 | 2,"3HSYG7LRBKAG4MXPJBUNXUQQA75KK8","W17","Wed Jan 24 09:54:22 PST 2018","Wed Jan 24 09:55:35 PST 2018","-","C" 13 | 2,"35GCEFQ6I605XJO4Y65Y3NX13UWZ3Y","W13","Wed Jan 24 09:58:08 PST 2018","Wed Jan 24 10:05:26 PST 2018","-","A" 14 | 2,"37XITHEISXLK8SYWHIMMANZW0QRCRH","W11","Wed Jan 24 09:58:53 PST 2018","Wed Jan 24 09:59:08 PST 2018","-","B" 15 | 2,"3LBXNTKX0S727QD5EXINXFKCVM8X9G","W20","Wed Jan 24 09:55:54 PST 2018","Wed Jan 24 10:00:33 PST 2018","-","B" 16 | 2,"3J2UYBXQQMO7SH5WDEQ44R6ZVED60H","W10","Wed Jan 24 09:43:12 PST 2018","Wed Jan 24 09:43:20 PST 2018","-","D" 17 | 2,"3HPZF4IVNN5T5TAIVDEGIJST3ETYCU","W2","Wed Jan 24 10:24:55 PST 2018","Wed Jan 24 10:25:09 PST 2018","-","C" 18 | 2,"3N4BPTXIO94U7I21DPYP3HZOC02KU8","W4","Wed Jan 24 11:39:33 PST 2018","Wed Jan 24 11:39:41 PST 2018","-","B" 19 | 2,"33TIN5LC05MR7V3FJT4PGTRLXU19Y1","W5","Wed Jan 24 11:39:17 PST 2018","Wed Jan 24 11:39:25 PST 2018","-","B" 20 | 3,"3CTOC39K382XYNKNIW52SALRPYHJ7E","W4","Wed Jan 24 10:15:18 PST 2018","Wed Jan 24 10:15:31 PST 2018","-","E" 21 | 3,"323Q6SJS8JSEMA43CF0T4PITAUHFHU","W13","Wed Jan 24 09:40:46 PST 2018","Wed Jan 24 09:41:38 PST 2018","-","B" 22 | 3,"33UKMF931AWEAODZ7TM7ZC0ICZ2TTC","W6","Wed Jan 24 11:21:39 PST 2018","Wed Jan 24 11:21:59 PST 2018","-","B" 23 | 3,"379J5II41PSO2SNX8PVLEZQ0FABLEP","W5","Wed Jan 24 09:41:10 PST 2018","Wed Jan 24 09:41:39 PST 2018","-","C" 24 | 3,"3JZQSN0I3RM0ME9SS9IBJP6SVVMGFG","W8","Wed Jan 24 10:17:34 PST 2018","Wed Jan 24 10:18:43 PST 2018","-","D" 25 | 3,"39OWYR0EPL3085USMF4HQPDW0Q8YFQ","W1","Wed Jan 24 10:14:23 PST 2018","Wed Jan 24 10:14:35 PST 2018","-","C" 26 | 3,"3LOTDFNYA8BWJ0ZS7EMT5SSGPIXWFE","W10","Wed Jan 24 09:57:59 PST 2018","Wed Jan 24 09:59:41 PST 2018","-","C" 27 | 3,"3N2BF7Y2VR6KSKWKY246ZF227WCHMO","W11","Wed Jan 24 10:18:40 PST 2018","Wed Jan 24 10:18:46 PST 2018","-","E" 28 | 3,"3QFUFYSY9ZRKAYG8X8GAA09P5SPF48","W20","Wed Jan 24 12:04:22 PST 2018","Wed Jan 24 12:04:25 PST 2018","-","B" 29 | 4,"37W3JXSD67K2JRGNC4EHUDZWVTLWY9","W1","Wed Jan 24 09:45:34 PST 2018","Wed Jan 24 09:46:28 PST 2018","-","D" 30 | 4,"3MD9PLUKKJQC1NHEGPPDC1V78DANZT","W6","Wed Jan 24 11:28:50 PST 2018","Wed Jan 24 11:29:07 PST 2018","-","D" 31 | 4,"3OB0CAO74I1KM1LYH5SHTAP9G57YHA","W18","Wed Jan 24 09:41:37 PST 2018","Wed Jan 24 09:47:34 PST 2018","-","A" 32 | 4,"3OXV7EAXLF23994TW18N6P46YQ036C","W14","Wed Jan 24 09:56:41 PST 2018","Wed Jan 24 09:57:01 PST 2018","-","D" 33 | 4,"3MYYFCXHJ4JQOYC38SMSSL9164W4GX","W13","Wed Jan 24 09:44:40 PST 2018","Wed Jan 24 09:48:46 PST 2018","-","A" 34 | 4,"3QJOXOW4XK388A5I81W0EC5PFHIEM5","W5","Wed Jan 24 11:17:10 PST 2018","Wed Jan 24 11:17:22 PST 2018","-","C" 35 | 4,"3LJ7UR74RIPWYQTPVWZNI5IY7GI4N5","W8","Wed Jan 24 09:44:48 PST 2018","Wed Jan 24 09:48:51 PST 2018","-","D" 36 | 4,"3HUTX6F6VVZ4DXEESXGC4FL5DCU2OP","W17","Wed Jan 24 09:59:01 PST 2018","Wed Jan 24 10:01:04 PST 2018","-","E" 37 | 4,"32N49TQG3HUO8KIF6OV55PYBLWBAV4","W3","Wed Jan 24 12:06:22 PST 2018","Wed Jan 24 12:06:25 PST 2018","-","B" 38 | 5,"3X31TUMD7YYPIRTLW1067F76Q2XL1Y","W13","Wed Jan 24 10:23:56 PST 2018","Wed Jan 24 10:24:01 PST 2018","-","D" 39 | 5,"36TFCYNS45MVMWVOC96FAWI0EZDHXN","W16","Wed Jan 24 09:40:23 PST 2018","Wed Jan 24 09:40:46 PST 2018","-","B" 40 | 5,"31UV0MXWNROMG8MYCL55IPSVLDJ5IV","W12","Wed Jan 24 09:42:56 PST 2018","Wed Jan 24 09:43:17 PST 2018","-","E" 41 | 5,"3JPSL1DZ5TBB0F964X9EV84YZQKANS","W19","Wed Jan 24 10:06:15 PST 2018","Wed Jan 24 10:19:05 PST 2018","-","D" 42 | 5,"32RIADZISTGTE3LU3SG9HZS4UFQS45","W14","Wed Jan 24 09:46:36 PST 2018","Wed Jan 24 09:51:34 PST 2018","-","D" 43 | 5,"3URFVVM166UPWD1NDQ1ZQWN1G7GZU6","W10","Wed Jan 24 09:58:40 PST 2018","Wed Jan 24 09:58:57 PST 2018","-","B" 44 | 5,"3TXMY6UCAF0KWRJW43XM5XPR44LCQC","W5","Wed Jan 24 09:39:46 PST 2018","Wed Jan 24 09:39:54 PST 2018","-","E" 45 | 5,"3Q5C1WP23NDBG2Q04EKZQQB94B8513","W1","Wed Jan 24 09:45:12 PST 2018","Wed Jan 24 09:45:37 PST 2018","-","B" 46 | 5,"3WQQ9FUS6B6VGIRU49KR8MY9SESB8O","W6","Wed Jan 24 10:25:43 PST 2018","Wed Jan 24 10:25:48 PST 2018","-","D" 47 | 6,"3QEMNNSB2YBKV1KV80JRPCQEKE7D7H","W17","Wed Jan 24 09:49:10 PST 2018","Wed Jan 24 10:30:45 PST 2018","-","A" 48 | 6,"3EF8EXOTT27JN15LTGOTR0UEMEMJ1Q","W2","Wed Jan 24 11:23:43 PST 2018","Wed Jan 24 11:24:09 PST 2018","-","D" 49 | 6,"3A1COHJ8NK757VUG41UNUX17NQ8H8J","W5","Wed Jan 24 09:59:14 PST 2018","Wed Jan 24 10:06:11 PST 2018","-","E" 50 | 6,"3G5F9DBFOQ93I7QTCU5XMBLFYO7HVM","W19","Wed Jan 24 09:50:11 PST 2018","Wed Jan 24 09:50:42 PST 2018","-","C" 51 | 6,"3JBT3HLQF9EC48TRW425GQ028QLZP7","W14","Wed Jan 24 09:40:52 PST 2018","Wed Jan 24 09:50:49 PST 2018","-","D" 52 | 6,"3634BBTX0P6EI5FNIXIE41J8AAXIFX","W10","Wed Jan 24 12:44:04 PST 2018","Wed Jan 24 12:45:03 PST 2018","-","A" 53 | 6,"3QRYMNZ7FZTGFB8FAX0VQDUQNFYNTJ","W6","Wed Jan 24 09:40:37 PST 2018","Wed Jan 24 09:40:58 PST 2018","-","E" 54 | 6,"3OWEPKL08AOTHDB8XUXULH28YI7N77","W11","Wed Jan 24 09:39:58 PST 2018","Wed Jan 24 09:58:11 PST 2018","-","A" 55 | 6,"3OLQQLKKNT1I72ZX54J8YKD8BNMEJ0","W4","Wed Jan 24 11:10:09 PST 2018","Wed Jan 24 11:10:45 PST 2018","-","B" 56 | 7,"36WLNQG780MCPJ1LHC6R0WUV3JUBEV","W16","Wed Jan 24 12:10:50 PST 2018","Wed Jan 24 12:11:34 PST 2018","-","B" 57 | 7,"3B2X28YI3XR8C486BUNTO82IMJ1B61","W3","Wed Jan 24 10:24:50 PST 2018","Wed Jan 24 10:25:31 PST 2018","-","D" 58 | 7,"3HMIGG0U4MIRTQK5E1YBGXWADMHY8V","W1","Wed Jan 24 09:41:15 PST 2018","Wed Jan 24 09:43:47 PST 2018","-","E" 59 | 7,"3IXEICO793V88Q2NBGEEO284SRA6TX","W10","Wed Jan 24 11:38:21 PST 2018","Wed Jan 24 11:38:29 PST 2018","-","B" 60 | 7,"3KGTPGBS6YXZQ2YBOJSL3LDUVZ6U2K","W13","Wed Jan 24 10:25:41 PST 2018","Wed Jan 24 10:25:50 PST 2018","-","A" 61 | 7,"34Z02EIMITPDUFG89RYXF1YDFNM0TM","W8","Wed Jan 24 10:30:24 PST 2018","Wed Jan 24 10:30:29 PST 2018","-","B" 62 | 7,"3KYQYYSHYWJRG7CUPM0T0YC9A3PDOF","W18","Wed Jan 24 10:10:03 PST 2018","Wed Jan 24 10:22:22 PST 2018","-","E" 63 | 7,"3QFUFYSY9ZRKAYG8X8GAA09P5PVF48","W6","Wed Jan 24 09:40:05 PST 2018","Wed Jan 24 09:46:21 PST 2018","-","D" 64 | 7,"3DY4FPOOA20G714W8ZHR0FNLLX3RVR","W14","Wed Jan 24 09:39:36 PST 2018","Wed Jan 24 09:39:49 PST 2018","-","D" 65 | -------------------------------------------------------------------------------- /test/test_data/load/platform_amt3.csv: -------------------------------------------------------------------------------- 1 | "HITId","AssignmentId","WorkerId","AcceptTime","SubmitTime","Input.input","Answer.output" 2 | 1,"3LQ8PUHQFM4YW16TDN6E7MVLQM9HIN","W16","Wed Jan 24 09:39:59 PST 2018","Wed Jan 24 09:40:22 PST 2018","-","E" 3 | 1,"3L2IS5HSFBUVFON4A2Q5V9GS6W2UNK","W15","Wed Jan 24 12:15:32 PST 2018","Wed Jan 24 12:16:27 PST 2018","-","D" 4 | 1,"3FE7TXL1LJZ7Y9RU7A8GTKG5SD8Q2H","W5","Wed Jan 24 09:40:43 PST 2018","Wed Jan 24 09:40:55 PST 2018","-","A" 5 | 1,"3ERET4BTVNLWNQEYFE3Y7LCAZER9K3","W18","Wed Jan 24 10:32:11 PST 2018","Wed Jan 24 10:32:18 PST 2018","-","E" 6 | 1,"3KXIR214I5S0944W9D13QHF0WON24E","W8","Wed Jan 24 12:11:12 PST 2018","Wed Jan 24 12:13:10 PST 2018","-","C" 7 | 1,"3ATTHHXXWB0VIR68IRO0A8JFECSXIW","W19","Wed Jan 24 09:40:27 PST 2018","Wed Jan 24 09:48:53 PST 2018","-","E" 8 | 2,"30IQTZXKALIKBWPSE1ZU44JJSL80X7","W17","Wed Jan 24 12:08:48 PST 2018","Wed Jan 24 12:09:43 PST 2018","-","D" 9 | 2,"30OG32W0SVNEQSQIK2B0RRUPK69ENK","W13","Wed Jan 24 10:29:55 PST 2018","Wed Jan 24 10:35:17 PST 2018","-","D" 10 | 2,"39L1G8WVWR382N3WQVW6BTDPQFH31B","W19","Wed Jan 24 09:42:42 PST 2018","Wed Jan 24 09:42:54 PST 2018","-","A" 11 | 2,"3AZHRG4CU5W8VYIEUCS6XOUFLNO03I","W9","Wed Jan 24 10:13:58 PST 2018","Wed Jan 24 10:14:30 PST 2018","-","D" 12 | 2,"3JJVG1YBEC9CT08S6BDOW2OIQQT5BM","W5","Wed Jan 24 12:05:30 PST 2018","Wed Jan 24 12:05:32 PST 2018","-","E" 13 | 2,"3OVR4I9USQVH1N6HBOR4MZD3CRGQ4P","W14","Wed Jan 24 10:12:05 PST 2018","Wed Jan 24 10:23:03 PST 2018","-","D" 14 | 3,"3LYA37P8IRZFBJTVDYHUYR70NGNBKC","W5","Wed Jan 24 12:04:08 PST 2018","Wed Jan 24 12:04:10 PST 2018","-","C" 15 | 3,"3LYA37P8IRZFBJTVDYHUYR70NEIKBC","W12","Wed Jan 24 10:23:07 PST 2018","Wed Jan 24 10:23:56 PST 2018","-","D" 16 | 3,"3BXQMRHWK0APX624Y1B5IURS16QMUA","W7","Wed Jan 24 12:05:27 PST 2018","Wed Jan 24 12:05:30 PST 2018","-","C" 17 | 3,"3W2LOLRXLCR2RQAK9V3KY6C45ZWKRY","W11","Wed Jan 24 09:40:20 PST 2018","Wed Jan 24 09:45:41 PST 2018","-","C" 18 | 3,"3QECW5O0KIDCPM296X49WWU4WXUT51","W6","Wed Jan 24 12:07:08 PST 2018","Wed Jan 24 12:07:11 PST 2018","-","A" 19 | 3,"3UOUJI6MTEQ0RI18C8DETB7Q87FXU4","W3","Wed Jan 24 09:40:44 PST 2018","Wed Jan 24 09:49:20 PST 2018","-","D" 20 | 4,"3H8DHMCCWAN8QGRFRU80THV0Q0OKDF","W19","Wed Jan 24 11:22:10 PST 2018","Wed Jan 24 11:22:23 PST 2018","-","B" 21 | 4,"3HRMW88U1729HKQO0UXLTN3LLESM07","W12","Wed Jan 24 09:43:14 PST 2018","Wed Jan 24 09:43:32 PST 2018","-","A" 22 | 4,"3EJPLAJKENS4U2BMH83WAQ9LDLVZ61","W15","Wed Jan 24 09:42:36 PST 2018","Wed Jan 24 09:45:39 PST 2018","-","B" 23 | 4,"3TK8OJTYM2XVVOOHV7D2VKN6KFIVP6","W14","Wed Jan 24 09:57:15 PST 2018","Wed Jan 24 09:57:59 PST 2018","-","B" 24 | 4,"3P529IW9KZXG86NT30FRWHB0RBQFL1","W2","Wed Jan 24 09:41:08 PST 2018","Wed Jan 24 09:41:32 PST 2018","-","E" 25 | 4,"3PS7W85Z80EVQ94L9NHQ8CZ6IQK9T9","W10","Wed Jan 24 09:53:51 PST 2018","Wed Jan 24 09:56:49 PST 2018","-","E" 26 | 5,"3NPFYT4IZDGHM00DS3OXPW6ZZ2WXG6","W8","Wed Jan 24 12:07:05 PST 2018","Wed Jan 24 12:07:08 PST 2018","-","E" 27 | 5,"3M1CVSFP61HW3CMYKP1VZ2Z3426QAW","W1","Wed Jan 24 10:37:19 PST 2018","Wed Jan 24 10:37:41 PST 2018","-","E" 28 | 5,"3H7XDTSHKD32X0VNIYSBNDSSQYYWGG","W18","Wed Jan 24 10:35:16 PST 2018","Wed Jan 24 10:35:21 PST 2018","-","D" 29 | 5,"36W0OB37HXQKRRV3DRU07HYBM0UHZ5","W5","Wed Jan 24 10:18:44 PST 2018","Wed Jan 24 10:19:27 PST 2018","-","C" 30 | 5,"3VZLGYJEYMMHDHVIIVCMA3M5APXXZW","W12","Wed Jan 24 09:56:44 PST 2018","Wed Jan 24 09:57:15 PST 2018","-","D" 31 | 5,"3QEMNNSB2YBKV1KV80JRPCQEKE7D7H","W19","Wed Jan 24 09:49:10 PST 2018","Wed Jan 24 10:30:45 PST 2018","-","E" 32 | -------------------------------------------------------------------------------- /test/test_data/load/platform_amt4.csv: -------------------------------------------------------------------------------- 1 | "HITId","AssignmentId","WorkerId","AcceptTime","SubmitTime","Input.input","Answer.output" 2 | 1,"3S4AW7T80CU6AQQ4JJJ85DE92LH4L2","W11","Wed Jan 24 09:37:51 PST 2018","Wed Jan 24 09:38:11 PST 2018","-","B" 3 | 1,"3IHR8NYAM8DW1B52BBCFMUNT7VMP4P","W8","Wed Jan 24 09:59:09 PST 2018","Wed Jan 24 09:59:27 PST 2018","-","C" 4 | 1,"35K3O9HUACP220D1MESNJMLGIHJFEE","W13","Wed Jan 24 12:04:25 PST 2018","Wed Jan 24 12:04:40 PST 2018","-","D" 5 | 1,"3LRLIPTPERLAS9O37KHY3GNU5V8AKG","W12","Wed Jan 24 09:39:13 PST 2018","Wed Jan 24 09:39:25 PST 2018","-","A" 6 | 1,"39LNWE0K4V831NCDDMDDQ1KBKUCUI2","W16","Wed Jan 24 09:45:43 PST 2018","Wed Jan 24 09:47:48 PST 2018","-","D" 7 | 1,"36H9ULYP636ADGZT0F3EG2Y8NMOFJ2","W6","Wed Jan 24 09:55:00 PST 2018","Wed Jan 24 09:55:15 PST 2018","-","D" 8 | 1,"3FE2ERCCZYK054FKUVRO0STYOMUOP6","W1","Wed Jan 24 10:12:22 PST 2018","Wed Jan 24 10:23:21 PST 2018","-","C" 9 | 1,"3J2UYBXQQMO7SH5WDEQ44R6ZVED06B","W4","Wed Jan 24 09:41:51 PST 2018","Wed Jan 24 09:42:18 PST 2018","-","C" 10 | 2,"30H4UDGLT3UC51U8HFCB8O4TTMAPMN","W4","Wed Jan 24 10:06:00 PST 2018","Wed Jan 24 10:06:18 PST 2018","-","D" 11 | 2,"33TIN5LC05MR7V3FJT4PGTRLXS69Y2","W12","Wed Jan 24 10:05:57 PST 2018","Wed Jan 24 10:15:55 PST 2018","-","C" 12 | 2,"3VJ40NV2QJZYXW8RBC0852UVDP4OT0","W3","Wed Jan 24 09:45:19 PST 2018","Wed Jan 24 09:49:23 PST 2018","-","B" 13 | 2,"39GHHAVOMG30F024GY7VPY1HYYVJ42","W6","Wed Jan 24 11:22:50 PST 2018","Wed Jan 24 11:23:10 PST 2018","-","B" 14 | 2,"3KMS4QQVK325O05BI03QHHYL2D1KFN","W14","Wed Jan 24 11:11:14 PST 2018","Wed Jan 24 11:12:55 PST 2018","-","C" 15 | 2,"33M4IA01QHD8BQ9R8EI9O0K619UXRL","W16","Wed Jan 24 10:18:54 PST 2018","Wed Jan 24 10:25:26 PST 2018","-","D" 16 | 2,"3P4RDNWND6IUN71J1N9ZJZB03EDIJT","W15","Wed Jan 24 09:41:03 PST 2018","Wed Jan 24 09:43:02 PST 2018","-","B" 17 | 2,"3XM0HYN6NLBETD2V0HOSVGZKKL9PEQ","W1","Wed Jan 24 10:29:18 PST 2018","Wed Jan 24 10:29:35 PST 2018","-","E" 18 | -------------------------------------------------------------------------------- /test/test_data/load/platform_amt5.csv: -------------------------------------------------------------------------------- 1 | "HITId","AssignmentId","WorkerId","AcceptTime","SubmitTime","Input.input","Answer.output" 2 | 1,"3OB0CAO74I1KM1LYH5SHTAP9G57HYT","W15","Wed Jan 24 09:40:27 PST 2018","Wed Jan 24 09:40:50 PST 2018","-","E" 3 | 1,"3TYCR1GOTDVMDNET4C9YZ9RI3GLLZW","W4","Wed Jan 24 09:43:19 PST 2018","Wed Jan 24 09:46:39 PST 2018","-","C" 4 | 2,"3ZOTGHDK5JNXIWVCMCVIVXY9B87SO0","W10","Wed Jan 24 11:21:19 PST 2018","Wed Jan 24 11:21:25 PST 2018","-","A" 5 | 2,"384PI804XTDCFF3TKRT75W2RD3B0SY","W16","Wed Jan 24 09:46:55 PST 2018","Wed Jan 24 09:51:56 PST 2018","-","D" 6 | 3,"3HFNH7HEMIQXD3Z16PT5VOA62X0GQF","W8","Wed Jan 24 10:21:28 PST 2018","Wed Jan 24 10:28:39 PST 2018","-","A" 7 | 3,"3H7XDTSHKD32X0VNIYSBNDSSQXUWGA","W2","Wed Jan 24 09:42:09 PST 2018","Wed Jan 24 09:42:28 PST 2018","-","D" 8 | 4,"3RXPCZQMQQN9WZFKLA3QUN2YUKMG1W","W15","Wed Jan 24 09:46:24 PST 2018","Wed Jan 24 10:20:20 PST 2018","-","C" 9 | 4,"3MB8LZR5BG5ROSF74B6OCU35Z79KLA","W5","Wed Jan 24 09:50:40 PST 2018","Wed Jan 24 09:53:28 PST 2018","-","D" 10 | 5,"3BF51CHDTWMNP6PD9XH81IVP4SVH0V","W14","Wed Jan 24 09:53:11 PST 2018","Wed Jan 24 09:53:59 PST 2018","-","D" 11 | 5,"3H7XDTSHKD32X0VNIYSBNDSSQYSWGA","W13","Wed Jan 24 10:30:33 PST 2018","Wed Jan 24 10:30:39 PST 2018","-","A" 12 | 6,"3A0EX8ZRN90AVOICHDIL7ZMBLTEBYL","W19","Wed Jan 24 10:56:17 PST 2018","Wed Jan 24 10:59:53 PST 2018","-","B" 13 | 6,"3TK8OJTYM2XVVOOHV7D2VKN6KFIVP6","W5","Wed Jan 24 09:57:15 PST 2018","Wed Jan 24 09:57:59 PST 2018","-","A" 14 | -------------------------------------------------------------------------------- /test/test_data/load/platform_cf1.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_id","_worker_id","_started_at","_created_at","input","Answer.output" 2 | "1",1321129167,"W12","7/12/2014 01:55:25","7/12/2014 01:55:49","-","B" 3 | "1",1321137419,"W10","7/12/2014 02:15:56","7/12/2014 02:16:11","-","B" 4 | "1",1321080237,"W9","7/12/2014 00:03:21","7/12/2014 00:03:39","-","C" 5 | "2",1321121353,"W6","7/12/2014 01:31:26","7/12/2014 01:31:56","-","C" 6 | "2",1321267092,"W18","7/12/2014 08:21:47","7/12/2014 08:21:55","-","E" 7 | "2",1321121859,"W8","7/12/2014 01:32:52","7/12/2014 01:33:10","-","B" 8 | "3",1321168231,"W1","7/12/2014 03:37:07","7/12/2014 03:38:05","-","E" 9 | "3",1321082053,"W3","7/12/2014 00:06:24","7/12/2014 00:06:45","-","D" 10 | "3",1321096929,"W7","7/12/2014 00:32:51","7/12/2014 00:33:24","-","A" 11 | "4",1321131984,"W16","7/12/2014 02:02:47","7/12/2014 02:03:28","-","C" 12 | "4",1321202505,"W6","7/12/2014 05:08:47","7/12/2014 05:09:52","-","A" 13 | "4",1321179245,"W2","7/12/2014 04:03:59","7/12/2014 04:04:41","-","A" 14 | "5",1321258930,"W4","7/12/2014 07:54:46","7/12/2014 07:55:17","-","C" 15 | "5",1321194407,"W19","7/12/2014 04:44:52","7/12/2014 04:46:10","-","E" 16 | "5",1321136734,"W9","7/12/2014 02:14:03","7/12/2014 02:14:50","-","A" 17 | "6",1321172593,"W13","7/12/2014 03:48:02","7/12/2014 03:49:24","-","E" 18 | "6",1321166782,"W5","7/12/2014 03:34:12","7/12/2014 03:34:32","-","D" 19 | "6",1321094216,"W20","7/12/2014 00:27:55","7/12/2014 00:28:14","-","A" 20 | "7",1321085431,"W3","7/12/2014 00:12:03","7/12/2014 00:12:20","-","D" 21 | "7",1321166017,"W11","7/12/2014 03:31:19","7/12/2014 03:32:17","-","C" 22 | "7",1321131660,"W13","7/12/2014 02:00:47","7/12/2014 02:02:47","-","E" 23 | "8",1321103230,"W13","7/12/2014 00:46:32","7/12/2014 00:47:34","-","D" 24 | "8",1321122489,"W8","7/12/2014 01:34:04","7/12/2014 01:34:31","-","D" 25 | "8",1321075328,"W20","7/11/2014 23:53:42","7/11/2014 23:54:35","-","D" 26 | "9",1321130962,"W17","7/12/2014 01:59:08","7/12/2014 02:01:02","-","E" 27 | "9",1321167533,"W6","7/12/2014 03:35:43","7/12/2014 03:35:56","-","C" 28 | "9",1321101871,"W16","7/12/2014 00:43:26","7/12/2014 00:44:06","-","E" 29 | -------------------------------------------------------------------------------- /test/test_data/load/platform_cf2.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_id","_worker_id","_started_at","_created_at","input","Answer.output" 2 | "1",1321145296,"W10","7/12/2014 02:34:17","7/12/2014 02:35:41","-","E" 3 | "1",1321089302,"W16","7/12/2014 00:18:37","7/12/2014 00:19:01","-","B" 4 | "1",1321109289,"W3","7/12/2014 01:02:42","7/12/2014 01:03:20","-","E" 5 | "1",1321098090,"W14","7/12/2014 00:35:22","7/12/2014 00:35:40","-","B" 6 | "1",1321164719,"W8","7/12/2014 03:27:39","7/12/2014 03:27:54","-","C" 7 | "1",1321261178,"W1","7/12/2014 08:00:47","7/12/2014 08:01:54","-","D" 8 | "1",1321176168,"W20","7/12/2014 03:56:00","7/12/2014 03:57:48","-","C" 9 | "1",1321096929,"W11","7/12/2014 00:32:51","7/12/2014 00:33:24","-","C" 10 | "1",1321145225,"W12","7/12/2014 02:34:25","7/12/2014 02:35:23","-","B" 11 | "2",1321100712,"W19","7/12/2014 00:40:24","7/12/2014 00:41:20","-","E" 12 | "2",1321084421,"W17","7/12/2014 00:10:26","7/12/2014 00:10:35","-","C" 13 | "2",1321200351,"W13","7/12/2014 05:03:51","7/12/2014 05:04:34","-","A" 14 | "2",1321109036,"W11","7/12/2014 01:01:58","7/12/2014 01:02:41","-","B" 15 | "2",1321194407,"W20","7/12/2014 04:44:52","7/12/2014 04:46:10","-","B" 16 | "2",1321131807,"W10","7/12/2014 02:01:49","7/12/2014 02:03:06","-","D" 17 | "2",1321182256,"W2","7/12/2014 04:10:14","7/12/2014 04:11:15","-","C" 18 | "2",1321097354,"W4","7/12/2014 00:33:51","7/12/2014 00:34:14","-","B" 19 | "2",1321116145,"W5","7/12/2014 01:19:38","7/12/2014 01:20:15","-","B" 20 | "3",1321199944,"W4","7/12/2014 05:02:37","7/12/2014 05:03:44","-","E" 21 | "3",1321144591,"W13","7/12/2014 02:32:51","7/12/2014 02:33:30","-","B" 22 | "3",1321271857,"W6","7/12/2014 08:35:01","7/12/2014 08:35:29","-","B" 23 | "3",1321091661,"W5","7/12/2014 00:22:18","7/12/2014 00:23:10","-","C" 24 | "3",1321074163,"W8","7/11/2014 23:51:36","7/11/2014 23:52:18","-","D" 25 | "3",1321095794,"W1","7/12/2014 00:30:33","7/12/2014 00:31:10","-","C" 26 | "3",1321259405,"W10","7/12/2014 07:55:18","7/12/2014 07:56:39","-","C" 27 | "3",1321178914,"W11","7/12/2014 04:03:10","7/12/2014 04:03:58","-","E" 28 | "3",1321259582,"W20","7/12/2014 07:56:52","7/12/2014 07:57:16","-","B" 29 | "4",1321084083,"W1","7/12/2014 00:09:41","7/12/2014 00:09:59","-","D" 30 | "4",1321106262,"W6","7/12/2014 00:54:24","7/12/2014 00:55:15","-","D" 31 | "4",1321107201,"W18","7/12/2014 00:56:15","7/12/2014 00:57:38","-","A" 32 | "4",1321260561,"W14","7/12/2014 07:59:39","7/12/2014 08:00:06","-","D" 33 | "4",1321137423,"W13","7/12/2014 02:15:26","7/12/2014 02:16:12","-","A" 34 | "4",1321230611,"W5","7/12/2014 06:33:03","7/12/2014 06:33:48","-","C" 35 | "4",1321165857,"W8","7/12/2014 03:29:47","7/12/2014 03:31:47","-","D" 36 | "4",1321263386,"W17","7/12/2014 08:08:53","7/12/2014 08:09:19","-","E" 37 | "4",1321298488,"W3","7/12/2014 09:55:51","7/12/2014 09:56:25","-","B" 38 | "5",1321222628,"W13","7/12/2014 06:10:10","7/12/2014 06:10:54","-","D" 39 | "5",1321074998,"W16","7/11/2014 23:51:44","7/11/2014 23:53:41","-","B" 40 | "5",1321075484,"W12","7/11/2014 23:54:02","7/11/2014 23:54:49","-","E" 41 | "5",1321165567,"W19","7/12/2014 03:30:38","7/12/2014 03:30:55","-","D" 42 | "5",1321179846,"W14","7/12/2014 04:05:37","7/12/2014 04:05:59","-","D" 43 | "5",1321081670,"W10","7/12/2014 00:05:43","7/12/2014 00:06:05","-","B" 44 | "5",1321115520,"W5","7/12/2014 01:17:48","7/12/2014 01:18:24","-","E" 45 | "5",1321170440,"W1","7/12/2014 03:43:08","7/12/2014 03:43:48","-","B" 46 | "5",1321104860,"W6","7/12/2014 00:51:22","7/12/2014 00:51:58","-","D" 47 | "6",1321084117,"W17","7/12/2014 00:09:54","7/12/2014 00:10:03","-","A" 48 | "6",1321181323,"W2","7/12/2014 04:08:28","7/12/2014 04:09:24","-","D" 49 | "6",1321120366,"W5","7/12/2014 01:28:36","7/12/2014 01:29:51","-","E" 50 | "6",1321085061,"W19","7/12/2014 00:11:39","7/12/2014 00:11:47","-","C" 51 | "6",1321260395,"W14","7/12/2014 07:59:19","7/12/2014 07:59:38","-","D" 52 | "6",1321099000,"W10","7/12/2014 00:36:43","7/12/2014 00:37:36","-","A" 53 | "6",1321260550,"W6","7/12/2014 07:58:48","7/12/2014 08:00:03","-","E" 54 | "6",1321267367,"W11","7/12/2014 08:22:40","7/12/2014 08:22:46","-","A" 55 | "6",1321084172,"W4","7/12/2014 00:10:05","7/12/2014 00:10:13","-","B" 56 | "7",1321169247,"W16","7/12/2014 03:39:44","7/12/2014 03:40:41","-","B" 57 | "7",1321261850,"W3","7/12/2014 08:03:28","7/12/2014 08:04:04","-","D" 58 | "7",1321083864,"W1","7/12/2014 00:09:22","7/12/2014 00:09:39","-","E" 59 | "7",1321271707,"W10","7/12/2014 08:34:30","7/12/2014 08:35:00","-","B" 60 | "7",1321080494,"W13","7/12/2014 00:02:53","7/12/2014 00:04:01","-","A" 61 | "7",1321225450,"W8","7/12/2014 06:17:21","7/12/2014 06:19:02","-","B" 62 | "7",1321135767,"W18","7/12/2014 02:12:00","7/12/2014 02:12:59","-","E" 63 | "7",1321082158,"W6","7/12/2014 00:06:21","7/12/2014 00:06:56","-","D" 64 | "7",1321138046,"W14","7/12/2014 02:16:13","7/12/2014 02:17:38","-","D" 65 | -------------------------------------------------------------------------------- /test/test_data/load/platform_cf3.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_id","_worker_id","_started_at","_created_at","input","Answer.output" 2 | "1",1321271521,"W16","7/12/2014 08:34:16","7/12/2014 08:34:29","-","E" 3 | "1",1321136241,"W15","7/12/2014 02:13:04","7/12/2014 02:13:57","-","D" 4 | "1",1321271857,"W5","7/12/2014 08:35:01","7/12/2014 08:35:29","-","A" 5 | "1",1321097580,"W18","7/12/2014 00:34:15","7/12/2014 00:34:44","-","E" 6 | "1",1321166082,"W8","7/12/2014 03:31:48","7/12/2014 03:32:34","-","C" 7 | "1",1321285795,"W19","7/12/2014 09:18:49","7/12/2014 09:19:31","-","E" 8 | "2",1321258728,"W17","7/12/2014 07:53:14","7/12/2014 07:54:45","-","D" 9 | "2",1321168500,"W13","7/12/2014 03:38:06","7/12/2014 03:38:49","-","D" 10 | "2",1321080955,"W19","7/12/2014 00:04:04","7/12/2014 00:04:44","-","A" 11 | "2",1321180681,"W9","7/12/2014 04:07:29","7/12/2014 04:07:57","-","D" 12 | "2",1321106659,"W5","7/12/2014 00:55:17","7/12/2014 00:56:14","-","E" 13 | "2",1321231022,"W14","7/12/2014 06:34:47","7/12/2014 06:35:20","-","D" 14 | "3",1321102936,"W5","7/12/2014 00:45:56","7/12/2014 00:46:50","-","C" 15 | "3",1321166175,"W12","7/12/2014 03:32:22","7/12/2014 03:32:50","-","D" 16 | "3",1321266695,"W7","7/12/2014 08:20:43","7/12/2014 08:20:49","-","C" 17 | "3",1321129378,"W11","7/12/2014 01:54:13","7/12/2014 01:56:18","-","C" 18 | "3",1321259197,"W6","7/12/2014 07:55:47","7/12/2014 07:56:08","-","A" 19 | "3",1321132103,"W3","7/12/2014 02:02:41","7/12/2014 02:03:43","-","D" 20 | "4",1321098557,"W19","7/12/2014 00:36:08","7/12/2014 00:36:39","-","B" 21 | "4",1321164021,"W12","7/12/2014 03:25:21","7/12/2014 03:26:05","-","A" 22 | "4",1321075857,"W15","7/11/2014 23:54:36","7/11/2014 23:55:35","-","B" 23 | "4",1321130382,"W14","7/12/2014 01:57:40","7/12/2014 01:59:07","-","B" 24 | "4",1321084675,"W2","7/12/2014 00:10:43","7/12/2014 00:11:08","-","E" 25 | "4",1321080021,"W10","7/12/2014 00:02:55","7/12/2014 00:03:16","-","E" 26 | "5",1321230365,"W8","7/12/2014 06:32:07","7/12/2014 06:33:03","-","E" 27 | "5",1321267713,"W1","7/12/2014 08:23:23","7/12/2014 08:23:54","-","E" 28 | "5",1321138625,"W18","7/12/2014 02:17:38","7/12/2014 02:18:47","-","D" 29 | "5",1321260163,"W5","7/12/2014 07:58:36","7/12/2014 07:59:01","-","C" 30 | "5",1321210108,"W12","7/12/2014 05:29:33","7/12/2014 05:31:35","-","D" 31 | "5",1321099376,"W19","7/12/2014 00:37:39","7/12/2014 00:38:28","-","E" 32 | -------------------------------------------------------------------------------- /test/test_data/load/platform_cf4.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_id","_worker_id","_started_at","_created_at","input","Answer.output" 2 | "1",1321132103,"W11","7/12/2014 02:02:41","7/12/2014 02:03:43","-","B" 3 | "1",1321094336,"W8","7/12/2014 00:27:45","7/12/2014 00:28:28","-","C" 4 | "1",1321168832,"W13","7/12/2014 03:38:50","7/12/2014 03:39:42","-","D" 5 | "1",1321114758,"W12","7/12/2014 01:15:25","7/12/2014 01:16:01","-","A" 6 | "1",1321128865,"W16","7/12/2014 01:53:20","7/12/2014 01:55:02","-","D" 7 | "1",1321166526,"W6","7/12/2014 03:33:26","7/12/2014 03:33:38","-","D" 8 | "1",1321096803,"W1","7/12/2014 00:32:30","7/12/2014 00:33:05","-","C" 9 | "1",1321183813,"W4","7/12/2014 04:13:40","7/12/2014 04:14:24","-","C" 10 | "2",1321262127,"W4","7/12/2014 08:04:33","7/12/2014 08:05:10","-","D" 11 | "2",1321298274,"W12","7/12/2014 09:54:25","7/12/2014 09:55:49","-","C" 12 | "2",1321271707,"W3","7/12/2014 08:34:30","7/12/2014 08:35:00","-","B" 13 | "2",1321120748,"W6","7/12/2014 01:30:11","7/12/2014 01:30:43","-","B" 14 | "2",1321136186,"W14","7/12/2014 02:12:50","7/12/2014 02:13:49","-","C" 15 | "2",1321167778,"W16","7/12/2014 03:36:21","7/12/2014 03:36:33","-","D" 16 | "2",1321298589,"W15","7/12/2014 09:56:20","7/12/2014 09:56:46","-","B" 17 | "2",1321106701,"W1","7/12/2014 00:54:30","7/12/2014 00:56:20","-","E" 18 | -------------------------------------------------------------------------------- /test/test_data/load/platform_cf5.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_id","_worker_id","_started_at","_created_at","input","Answer.output" 2 | "1",1321176112,"W15","7/12/2014 03:56:29","7/12/2014 03:57:36","-","E" 3 | "1",1321263165,"W4","7/12/2014 08:07:03","7/12/2014 08:08:29","-","C" 4 | "2",1321259197,"W10","7/12/2014 07:55:47","7/12/2014 07:56:08","-","A" 5 | "2",1321117625,"W16","7/12/2014 01:23:25","7/12/2014 01:23:58","-","D" 6 | "3",1321120748,"W8","7/12/2014 01:30:11","7/12/2014 01:30:43","-","A" 7 | "3",1321263295,"W2","7/12/2014 08:08:30","7/12/2014 08:08:52","-","D" 8 | "4",1321267092,"W15","7/12/2014 08:21:47","7/12/2014 08:21:55","-","C" 9 | "4",1321084500,"W5","7/12/2014 00:09:35","7/12/2014 00:10:43","-","D" 10 | "5",1321266657,"W14","7/12/2014 08:20:37","7/12/2014 08:20:42","-","D" 11 | "5",1321298488,"W13","7/12/2014 09:55:51","7/12/2014 09:56:25","-","A" 12 | "6",1321080494,"W19","7/12/2014 00:02:53","7/12/2014 00:04:01","-","B" 13 | "6",1321099670,"W5","7/12/2014 00:36:09","7/12/2014 00:39:04","-","A" 14 | -------------------------------------------------------------------------------- /test/test_data/metrics/10work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:21:04",1321266784,"7/12/2014 08:20:58","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:09:02",1321083463,"7/12/2014 00:08:49","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 07:56:21",1321259312,"7/12/2014 07:55:00","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 00:05:42",1321081420,"7/12/2014 00:05:30","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 00:05:42",1321081421,"7/12/2014 00:05:04","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 03:42:40",1321169989,"7/12/2014 03:41:51","NA","W6","NA","NA","NA","NA","NA","A" 8 | 1,"7/12/2014 00:26:11",1321093237,"7/12/2014 00:25:41","NA","W7","NA","NA","NA","NA","NA","A" 9 | 1,"7/12/2014 00:10:43",1321084500,"7/12/2014 00:09:35","NA","W8","NA","NA","NA","NA","NA","A" 10 | 1,"7/12/2014 02:16:46",1321137674,"7/12/2014 02:16:11","NA","W9","NA","NA","NA","NA","NA","A" 11 | 1,"7/12/2014 00:51:15",1321104588,"7/12/2014 00:50:10","NA","W10","NA","NA","NA","NA","NA","A" 12 | -------------------------------------------------------------------------------- /test/test_data/metrics/10work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:21:04",1321266784,"7/12/2014 08:20:58","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:09:02",1321083463,"7/12/2014 00:08:49","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 07:56:21",1321259312,"7/12/2014 07:55:00","NA","W3","NA","NA","NA","NA","NA","C" 5 | 1,"7/12/2014 00:05:42",1321081420,"7/12/2014 00:05:30","NA","W4","NA","NA","NA","NA","NA","D" 6 | 1,"7/12/2014 00:05:42",1321081421,"7/12/2014 00:05:04","NA","W5","NA","NA","NA","NA","NA","E" 7 | 1,"7/12/2014 03:42:40",1321169989,"7/12/2014 03:41:51","NA","W6","NA","NA","NA","NA","NA","F" 8 | 1,"7/12/2014 00:26:11",1321093237,"7/12/2014 00:25:41","NA","W7","NA","NA","NA","NA","NA","G" 9 | 1,"7/12/2014 00:10:43",1321084500,"7/12/2014 00:09:35","NA","W8","NA","NA","NA","NA","NA","H" 10 | 1,"7/12/2014 02:16:46",1321137674,"7/12/2014 02:16:11","NA","W9","NA","NA","NA","NA","NA","I" 11 | 1,"7/12/2014 00:51:15",1321104588,"7/12/2014 00:50:10","NA","W10","NA","NA","NA","NA","NA","J" 12 | -------------------------------------------------------------------------------- /test/test_data/metrics/10work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 02:24:17",1321141190,"7/12/2014 02:23:26","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 04:14:24",1321183813,"7/12/2014 04:13:40","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 00:39:10",1321099697,"7/12/2014 00:38:10","NA","W3","NA","NA","NA","NA","NA","B" 5 | 1,"7/12/2014 01:56:18",1321129378,"7/12/2014 01:54:13","NA","W4","NA","NA","NA","NA","NA","B" 6 | 1,"7/12/2014 03:21:53",1321162331,"7/12/2014 03:21:10","NA","W5","NA","NA","NA","NA","NA","B" 7 | 1,"7/12/2014 07:51:23",1321257542,"7/12/2014 07:50:15","NA","W6","NA","NA","NA","NA","NA","B" 8 | 1,"7/12/2014 01:01:34",1321108709,"7/12/2014 01:00:48","NA","W7","NA","NA","NA","NA","NA","B" 9 | 1,"7/12/2014 00:50:46",1321104387,"7/12/2014 00:49:11","NA","W8","NA","NA","NA","NA","NA","B" 10 | 1,"7/12/2014 01:59:27",1321130494,"7/12/2014 01:58:42","NA","W9","NA","NA","NA","NA","NA","B" 11 | 1,"7/12/2014 00:59:50",1321107973,"7/12/2014 00:59:02","NA","W10","NA","NA","NA","NA","NA","B" 12 | -------------------------------------------------------------------------------- /test/test_data/metrics/2vs3work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 01:54:23",1321128633,"7/12/2014 01:53:20","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:54:38",1321106062,"7/12/2014 00:54:13","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 00:09:32",1321083795,"7/12/2014 00:09:07","NA","W3","NA","NA","NA","NA","NA","B" 5 | 2,"7/12/2014 02:33:30",1321144591,"7/12/2014 02:32:51","NA","W4","NA","NA","NA","NA","NA","C" 6 | 2,"7/12/2014 01:35:38",1321122895,"7/12/2014 01:34:03","NA","W5","NA","NA","NA","NA","NA","C" 7 | 2,"7/12/2014 08:20:49",1321266695,"7/12/2014 08:20:43","NA","W6","NA","NA","NA","NA","NA","C" 8 | 2,"7/12/2014 08:35:00",1321271707,"7/12/2014 08:34:30","NA","W7","NA","NA","NA","NA","NA","D" 9 | 3,"7/12/2014 08:22:46",1321267367,"7/12/2014 08:22:40","NA","W1","NA","NA","NA","NA","NA","E" 10 | 3,"7/11/2014 23:54:00",1321075109,"7/11/2014 23:52:41","NA","W2","NA","NA","NA","NA","NA","E" 11 | 3,"7/12/2014 09:54:23",1321297648,"7/12/2014 09:53:03","NA","W3","NA","NA","NA","NA","NA","E" 12 | 3,"7/12/2014 00:12:41",1321085633,"7/12/2014 00:12:21","NA","W4","NA","NA","NA","NA","NA","E" 13 | 3,"7/12/2014 00:57:41",1321107216,"7/12/2014 00:56:59","NA","W5","NA","NA","NA","NA","NA","E" 14 | 3,"7/12/2014 03:31:40",1321165815,"7/12/2014 03:31:23","NA","W6","NA","NA","NA","NA","NA","E" 15 | 3,"7/12/2014 07:54:59",1321258854,"7/12/2014 07:53:41","NA","W7","NA","NA","NA","NA","NA","E" 16 | -------------------------------------------------------------------------------- /test/test_data/metrics/2work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 03:41:10",1321169416,"7/12/2014 03:40:15","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/11/2014 23:54:00",1321075109,"7/11/2014 23:52:41","NA","W2","NA","NA","NA","NA","NA","A" 4 | -------------------------------------------------------------------------------- /test/test_data/metrics/2work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 03:41:10",1321169416,"7/12/2014 03:40:15","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/11/2014 23:54:00",1321075109,"7/11/2014 23:52:41","NA","W2","NA","NA","NA","NA","NA","B" 4 | -------------------------------------------------------------------------------- /test/test_data/metrics/3vs4work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:20:33",1321266614,"7/12/2014 08:20:08","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:59:48",1321107969,"7/12/2014 00:57:42","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 00:05:42",1321081421,"7/12/2014 00:05:04","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 00:05:59",1321081629,"7/12/2014 00:05:43","NA","W4","NA","NA","NA","NA","NA","B" 6 | 2,"7/12/2014 02:23:18",1321140803,"7/12/2014 02:21:09","NA","W5","NA","NA","NA","NA","NA","C" 7 | 2,"7/12/2014 00:41:20",1321100712,"7/12/2014 00:40:24","NA","W6","NA","NA","NA","NA","NA","C" 8 | 2,"7/12/2014 06:17:16",1321224805,"7/12/2014 06:16:10","NA","W7","NA","NA","NA","NA","NA","C" 9 | 2,"7/12/2014 01:20:15",1321116145,"7/12/2014 01:19:38","NA","W8","NA","NA","NA","NA","NA","C" 10 | 2,"7/12/2014 00:35:27",1321097976,"7/12/2014 00:35:08","NA","W9","NA","NA","NA","NA","NA","D" 11 | 3,"7/11/2014 23:51:40",1321073901,"7/11/2014 23:50:11","NA","W1","NA","NA","NA","NA","NA","E" 12 | 3,"7/12/2014 00:47:52",1321103304,"7/12/2014 00:46:51","NA","W2","NA","NA","NA","NA","NA","E" 13 | 3,"7/12/2014 08:24:50",1321268016,"7/12/2014 08:24:21","NA","W3","NA","NA","NA","NA","NA","E" 14 | 3,"7/12/2014 01:20:54",1321116472,"7/12/2014 01:20:16","NA","W4","NA","NA","NA","NA","NA","E" 15 | 3,"7/12/2014 00:56:14",1321106659,"7/12/2014 00:55:17","NA","W5","NA","NA","NA","NA","NA","E" 16 | 3,"7/12/2014 05:10:56",1321202973,"7/12/2014 05:09:53","NA","W6","NA","NA","NA","NA","NA","E" 17 | 3,"7/12/2014 01:00:28",1321108209,"7/12/2014 00:59:49","NA","W7","NA","NA","NA","NA","NA","E" 18 | 3,"7/12/2014 00:05:42",1321081423,"7/12/2014 00:05:13","NA","W8","NA","NA","NA","NA","NA","E" 19 | 3,"7/12/2014 00:10:29",1321084373,"7/12/2014 00:10:14","NA","W9","NA","NA","NA","NA","NA","E" 20 | -------------------------------------------------------------------------------- /test/test_data/metrics/3work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/11/2014 23:54:23",1321075255,"7/11/2014 23:54:03","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 03:40:35",1321169181,"7/12/2014 03:39:37","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 09:20:42",1321286135,"7/12/2014 09:20:22","NA","W3","NA","NA","NA","NA","NA","A" 5 | -------------------------------------------------------------------------------- /test/test_data/metrics/3work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/11/2014 23:54:23",1321075255,"7/11/2014 23:54:03","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 03:40:35",1321169181,"7/12/2014 03:39:37","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 09:20:42",1321286135,"7/12/2014 09:20:22","NA","W3","NA","NA","NA","NA","NA","C" 5 | -------------------------------------------------------------------------------- /test/test_data/metrics/3work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 00:44:33",1321102044,"7/12/2014 00:43:56","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:41:20",1321100712,"7/12/2014 00:40:24","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 03:34:13",1321166667,"7/12/2014 03:32:34","NA","W3","NA","NA","NA","NA","NA","B" 5 | -------------------------------------------------------------------------------- /test/test_data/metrics/4vs5work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:02:52",1321261481,"7/12/2014 08:02:33","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 02:24:52",1321141383,"7/12/2014 02:23:46","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 00:10:09",1321084151,"7/12/2014 00:09:51","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 02:33:45",1321144660,"7/12/2014 02:32:35","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 07:45:55",1321255780,"7/12/2014 07:44:35","NA","W5","NA","NA","NA","NA","NA","B" 7 | 2,"7/12/2014 01:20:15",1321116145,"7/12/2014 01:19:38","NA","W6","NA","NA","NA","NA","NA","C" 8 | 2,"7/12/2014 02:11:22",1321135080,"7/12/2014 02:09:51","NA","W7","NA","NA","NA","NA","NA","C" 9 | 2,"7/12/2014 03:38:49",1321168500,"7/12/2014 03:38:06","NA","W8","NA","NA","NA","NA","NA","C" 10 | 2,"7/12/2014 06:19:09",1321225506,"7/12/2014 06:17:53","NA","W9","NA","NA","NA","NA","NA","C" 11 | 2,"7/12/2014 03:29:20",1321165157,"7/12/2014 03:28:00","NA","W10","NA","NA","NA","NA","NA","C" 12 | 2,"7/12/2014 00:45:11",1321102291,"7/12/2014 00:44:34","NA","W11","NA","NA","NA","NA","NA","D" 13 | 3,"7/12/2014 07:58:35",1321259985,"7/12/2014 07:58:22","NA","W1","NA","NA","NA","NA","NA","E" 14 | 3,"7/12/2014 09:22:09",1321286685,"7/12/2014 09:21:48","NA","W2","NA","NA","NA","NA","NA","E" 15 | 3,"7/12/2014 00:19:53",1321089762,"7/12/2014 00:19:23","NA","W3","NA","NA","NA","NA","NA","E" 16 | 3,"7/12/2014 08:05:53",1321262379,"7/12/2014 08:05:31","NA","W4","NA","NA","NA","NA","NA","E" 17 | 3,"7/12/2014 03:38:05",1321168231,"7/12/2014 03:37:07","NA","W5","NA","NA","NA","NA","NA","E" 18 | 3,"7/12/2014 04:11:15",1321182256,"7/12/2014 04:10:14","NA","W6","NA","NA","NA","NA","NA","E" 19 | 3,"7/12/2014 06:21:57",1321226463,"7/12/2014 06:21:21","NA","W7","NA","NA","NA","NA","NA","E" 20 | 3,"7/12/2014 08:08:21",1321263099,"7/12/2014 08:07:53","NA","W8","NA","NA","NA","NA","NA","E" 21 | 3,"7/12/2014 07:43:36",1321255088,"7/12/2014 07:41:41","NA","W9","NA","NA","NA","NA","NA","E" 22 | 3,"7/12/2014 02:11:33",1321135182,"7/12/2014 02:09:42","NA","W10","NA","NA","NA","NA","NA","E" 23 | 3,"7/12/2014 03:25:20",1321163742,"7/12/2014 03:23:37","NA","W11","NA","NA","NA","NA","NA","E" 24 | -------------------------------------------------------------------------------- /test/test_data/metrics/4work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:17:09",1321265443,"7/12/2014 08:16:34","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 01:40:14",1321124448,"7/12/2014 01:39:52","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/11/2014 23:53:41",1321074998,"7/11/2014 23:51:44","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 01:36:34",1321123249,"7/12/2014 01:36:08","NA","W4","NA","NA","NA","NA","NA","A" 6 | -------------------------------------------------------------------------------- /test/test_data/metrics/4work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:17:09",1321265443,"7/12/2014 08:16:34","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 01:40:14",1321124448,"7/12/2014 01:39:52","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/11/2014 23:53:41",1321074998,"7/11/2014 23:51:44","NA","W3","NA","NA","NA","NA","NA","C" 5 | 1,"7/12/2014 01:36:34",1321123249,"7/12/2014 01:36:08","NA","W4","NA","NA","NA","NA","NA","D" 6 | -------------------------------------------------------------------------------- /test/test_data/metrics/4work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 00:09:21",1321083669,"7/12/2014 00:09:04","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 01:55:49",1321129167,"7/12/2014 01:55:25","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 04:58:28",1321198134,"7/12/2014 04:57:42","NA","W3","NA","NA","NA","NA","NA","B" 5 | 1,"7/12/2014 04:41:59",1321192758,"7/12/2014 04:40:10","NA","W4","NA","NA","NA","NA","NA","B" 6 | -------------------------------------------------------------------------------- /test/test_data/metrics/5vs6work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 01:34:31",1321122489,"7/12/2014 01:34:04","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:36:39",1321098569,"7/12/2014 00:36:20","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 05:07:52",1321201722,"7/12/2014 05:06:55","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 00:50:07",1321104103,"7/12/2014 00:47:44","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 05:02:14",1321199248,"7/12/2014 05:01:44","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 00:28:56",1321094550,"7/12/2014 00:28:30","NA","W6","NA","NA","NA","NA","NA","B" 8 | 2,"7/12/2014 00:02:22",1321079483,"7/12/2014 00:01:55","NA","W7","NA","NA","NA","NA","NA","C" 9 | 2,"7/12/2014 01:05:17",1321110140,"7/12/2014 01:04:35","NA","W8","NA","NA","NA","NA","NA","C" 10 | 2,"7/11/2014 23:56:17",1321076307,"7/11/2014 23:55:55","NA","W9","NA","NA","NA","NA","NA","C" 11 | 2,"7/12/2014 03:36:15",1321167669,"7/12/2014 03:35:08","NA","W10","NA","NA","NA","NA","NA","C" 12 | 2,"7/12/2014 02:10:48",1321134901,"7/12/2014 02:08:32","NA","W11","NA","NA","NA","NA","NA","C" 13 | 2,"7/12/2014 01:06:06",1321110480,"7/12/2014 01:05:31","NA","W12","NA","NA","NA","NA","NA","C" 14 | 2,"7/12/2014 07:56:08",1321259197,"7/12/2014 07:55:47","NA","W13","NA","NA","NA","NA","NA","D" 15 | 3,"7/12/2014 02:16:12",1321137423,"7/12/2014 02:15:26","NA","W1","NA","NA","NA","NA","NA","E" 16 | 3,"7/11/2014 23:55:35",1321075857,"7/11/2014 23:54:36","NA","W2","NA","NA","NA","NA","NA","E" 17 | 3,"7/12/2014 03:45:48",1321171229,"7/12/2014 03:44:32","NA","W3","NA","NA","NA","NA","NA","E" 18 | 3,"7/11/2014 23:54:00",1321075109,"7/11/2014 23:52:41","NA","W4","NA","NA","NA","NA","NA","E" 19 | 3,"7/12/2014 02:37:10",1321145756,"7/12/2014 02:36:28","NA","W5","NA","NA","NA","NA","NA","E" 20 | 3,"7/12/2014 03:34:11",1321166674,"7/12/2014 03:33:06","NA","W6","NA","NA","NA","NA","NA","E" 21 | 3,"7/12/2014 01:35:38",1321122895,"7/12/2014 01:34:03","NA","W7","NA","NA","NA","NA","NA","E" 22 | 3,"7/12/2014 00:52:41",1321105269,"7/12/2014 00:51:54","NA","W8","NA","NA","NA","NA","NA","E" 23 | 3,"7/12/2014 07:58:46",1321260076,"7/12/2014 07:57:24","NA","W9","NA","NA","NA","NA","NA","E" 24 | 3,"7/12/2014 04:13:05",1321183193,"7/12/2014 04:12:08","NA","W10","NA","NA","NA","NA","NA","E" 25 | 3,"7/12/2014 00:56:58",1321106959,"7/12/2014 00:56:15","NA","W11","NA","NA","NA","NA","NA","E" 26 | 3,"7/12/2014 01:36:07",1321123048,"7/12/2014 01:35:39","NA","W12","NA","NA","NA","NA","NA","E" 27 | 3,"7/12/2014 06:16:18",1321224474,"7/12/2014 06:15:07","NA","W13","NA","NA","NA","NA","NA","E" 28 | -------------------------------------------------------------------------------- /test/test_data/metrics/5work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 00:09:54",1321083995,"7/12/2014 00:09:16","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 08:37:11",1321272287,"7/12/2014 08:36:34","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 07:55:17",1321258930,"7/12/2014 07:54:46","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 01:28:35",1321119801,"7/12/2014 01:27:38","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 02:08:29",1321133916,"7/12/2014 02:07:05","NA","W5","NA","NA","NA","NA","NA","A" 7 | -------------------------------------------------------------------------------- /test/test_data/metrics/5work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 00:09:54",1321083995,"7/12/2014 00:09:16","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 08:37:11",1321272287,"7/12/2014 08:36:34","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 07:55:17",1321258930,"7/12/2014 07:54:46","NA","W3","NA","NA","NA","NA","NA","C" 5 | 1,"7/12/2014 01:28:35",1321119801,"7/12/2014 01:27:38","NA","W4","NA","NA","NA","NA","NA","D" 6 | 1,"7/12/2014 02:08:29",1321133916,"7/12/2014 02:07:05","NA","W5","NA","NA","NA","NA","NA","E" 7 | -------------------------------------------------------------------------------- /test/test_data/metrics/5work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 03:30:05",1321165353,"7/12/2014 03:29:43","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 06:12:24",1321223074,"7/12/2014 06:11:34","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 03:29:46",1321165287,"7/12/2014 03:28:54","NA","W3","NA","NA","NA","NA","NA","B" 5 | 1,"7/12/2014 01:56:16",1321129367,"7/12/2014 01:55:03","NA","W4","NA","NA","NA","NA","NA","B" 6 | 1,"7/12/2014 06:35:43",1321231207,"7/12/2014 06:35:22","NA","W5","NA","NA","NA","NA","NA","B" 7 | -------------------------------------------------------------------------------- /test/test_data/metrics/6vs7work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 03:37:09",1321167920,"7/12/2014 03:36:24","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 08:04:59",1321262089,"7/12/2014 08:03:48","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 03:27:38",1321164623,"7/12/2014 03:27:15","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 01:00:28",1321108209,"7/12/2014 00:59:49","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 03:34:53",1321166981,"7/12/2014 03:34:28","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 03:05:49",1321155879,"7/12/2014 03:05:12","NA","W6","NA","NA","NA","NA","NA","A" 8 | 1,"7/12/2014 07:58:46",1321260076,"7/12/2014 07:57:24","NA","W7","NA","NA","NA","NA","NA","B" 9 | 2,"7/12/2014 01:23:58",1321117625,"7/12/2014 01:23:25","NA","W8","NA","NA","NA","NA","NA","C" 10 | 2,"7/12/2014 00:10:56",1321084574,"7/12/2014 00:10:45","NA","W9","NA","NA","NA","NA","NA","C" 11 | 2,"7/12/2014 01:32:21",1321121518,"7/12/2014 01:31:37","NA","W10","NA","NA","NA","NA","NA","C" 12 | 2,"7/12/2014 00:39:10",1321099697,"7/12/2014 00:38:10","NA","W11","NA","NA","NA","NA","NA","C" 13 | 2,"7/12/2014 02:07:23",1321133471,"7/12/2014 02:06:25","NA","W12","NA","NA","NA","NA","NA","C" 14 | 2,"7/12/2014 06:35:20",1321231022,"7/12/2014 06:34:47","NA","W13","NA","NA","NA","NA","NA","C" 15 | 2,"7/12/2014 00:42:48",1321101347,"7/12/2014 00:41:21","NA","W14","NA","NA","NA","NA","NA","C" 16 | 2,"7/12/2014 00:11:39",1321084983,"7/12/2014 00:11:25","NA","W15","NA","NA","NA","NA","NA","D" 17 | 3,"7/12/2014 01:31:37",1321121184,"7/12/2014 01:30:46","NA","W1","NA","NA","NA","NA","NA","E" 18 | 3,"7/12/2014 08:21:19",1321266904,"7/12/2014 08:21:12","NA","W2","NA","NA","NA","NA","NA","E" 19 | 3,"7/12/2014 01:34:02",1321122298,"7/12/2014 01:33:31","NA","W3","NA","NA","NA","NA","NA","E" 20 | 3,"7/12/2014 04:07:28",1321180509,"7/12/2014 04:06:39","NA","W4","NA","NA","NA","NA","NA","E" 21 | 3,"7/12/2014 07:59:01",1321260163,"7/12/2014 07:58:36","NA","W5","NA","NA","NA","NA","NA","E" 22 | 3,"7/12/2014 00:36:39",1321098569,"7/12/2014 00:36:20","NA","W6","NA","NA","NA","NA","NA","E" 23 | 3,"7/12/2014 01:05:17",1321110140,"7/12/2014 01:04:35","NA","W7","NA","NA","NA","NA","NA","E" 24 | 3,"7/12/2014 01:54:23",1321128633,"7/12/2014 01:53:20","NA","W8","NA","NA","NA","NA","NA","E" 25 | 3,"7/12/2014 02:11:55",1321135275,"7/12/2014 02:10:54","NA","W9","NA","NA","NA","NA","NA","E" 26 | 3,"7/12/2014 07:57:16",1321259582,"7/12/2014 07:56:52","NA","W10","NA","NA","NA","NA","NA","E" 27 | 3,"7/12/2014 04:46:52",1321194633,"7/12/2014 04:46:11","NA","W11","NA","NA","NA","NA","NA","E" 28 | 3,"7/12/2014 00:54:38",1321106062,"7/12/2014 00:54:13","NA","W12","NA","NA","NA","NA","NA","E" 29 | 3,"7/12/2014 08:23:22",1321267570,"7/12/2014 08:22:54","NA","W13","NA","NA","NA","NA","NA","E" 30 | 3,"7/12/2014 08:21:11",1321266833,"7/12/2014 08:21:05","NA","W14","NA","NA","NA","NA","NA","E" 31 | 3,"7/12/2014 03:42:24",1321169855,"7/12/2014 03:41:09","NA","W15","NA","NA","NA","NA","NA","E" 32 | -------------------------------------------------------------------------------- /test/test_data/metrics/6work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 00:12:20",1321085431,"7/12/2014 00:12:03","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 10:19:02",1321305706,"7/12/2014 10:18:32","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 00:31:36",1321096010,"7/12/2014 00:31:12","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/11/2014 23:57:01",1321076680,"7/11/2014 23:56:39","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 01:38:57",1321124104,"7/12/2014 01:37:52","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 03:35:56",1321167533,"7/12/2014 03:35:43","NA","W6","NA","NA","NA","NA","NA","A" 8 | -------------------------------------------------------------------------------- /test/test_data/metrics/6work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 00:12:20",1321085431,"7/12/2014 00:12:03","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 10:19:02",1321305706,"7/12/2014 10:18:32","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 00:31:36",1321096010,"7/12/2014 00:31:12","NA","W3","NA","NA","NA","NA","NA","C" 5 | 1,"7/11/2014 23:57:01",1321076680,"7/11/2014 23:56:39","NA","W4","NA","NA","NA","NA","NA","D" 6 | 1,"7/12/2014 01:38:57",1321124104,"7/12/2014 01:37:52","NA","W5","NA","NA","NA","NA","NA","E" 7 | 1,"7/12/2014 03:35:56",1321167533,"7/12/2014 03:35:43","NA","W6","NA","NA","NA","NA","NA","F" 8 | -------------------------------------------------------------------------------- /test/test_data/metrics/6work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 05:07:52",1321201722,"7/12/2014 05:06:55","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 01:39:51",1321124369,"7/12/2014 01:38:57","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 05:00:59",1321198844,"7/12/2014 04:59:46","NA","W3","NA","NA","NA","NA","NA","B" 5 | 1,"7/12/2014 03:36:33",1321167778,"7/12/2014 03:36:21","NA","W4","NA","NA","NA","NA","NA","B" 6 | 1,"7/12/2014 04:50:18",1321195542,"7/12/2014 04:48:46","NA","W5","NA","NA","NA","NA","NA","B" 7 | 1,"7/12/2014 00:11:09",1321084679,"7/12/2014 00:11:01","NA","W6","NA","NA","NA","NA","NA","B" 8 | -------------------------------------------------------------------------------- /test/test_data/metrics/7vs8work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:15:31",1321264968,"7/12/2014 08:13:22","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:43:18",1321101572,"7/12/2014 00:42:54","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 00:39:19",1321099755,"7/12/2014 00:38:30","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 03:58:15",1321176321,"7/12/2014 03:57:37","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 03:56:01",1321175437,"7/12/2014 03:55:11","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 00:57:38",1321107201,"7/12/2014 00:56:15","NA","W6","NA","NA","NA","NA","NA","A" 8 | 1,"7/12/2014 00:06:11",1321081711,"7/12/2014 00:06:00","NA","W7","NA","NA","NA","NA","NA","A" 9 | 1,"7/11/2014 23:52:18",1321074163,"7/11/2014 23:51:36","NA","W8","NA","NA","NA","NA","NA","B" 10 | 2,"7/12/2014 03:46:26",1321171521,"7/12/2014 03:45:49","NA","W9","NA","NA","NA","NA","NA","C" 11 | 2,"7/12/2014 01:53:19",1321128139,"7/12/2014 01:52:32","NA","W10","NA","NA","NA","NA","NA","C" 12 | 2,"7/12/2014 07:48:29",1321256563,"7/12/2014 07:46:45","NA","W11","NA","NA","NA","NA","NA","C" 13 | 2,"7/12/2014 00:54:38",1321106062,"7/12/2014 00:54:13","NA","W12","NA","NA","NA","NA","NA","C" 14 | 2,"7/12/2014 03:29:46",1321165287,"7/12/2014 03:28:54","NA","W13","NA","NA","NA","NA","NA","C" 15 | 2,"7/12/2014 04:36:19",1321190801,"7/12/2014 04:33:15","NA","W14","NA","NA","NA","NA","NA","C" 16 | 2,"7/12/2014 05:09:52",1321202505,"7/12/2014 05:08:47","NA","W15","NA","NA","NA","NA","NA","C" 17 | 2,"7/12/2014 03:40:14",1321169037,"7/12/2014 03:39:37","NA","W16","NA","NA","NA","NA","NA","C" 18 | 2,"7/12/2014 01:33:30",1321122025,"7/12/2014 01:32:22","NA","W17","NA","NA","NA","NA","NA","D" 19 | 3,"7/12/2014 03:39:42",1321168832,"7/12/2014 03:38:50","NA","W1","NA","NA","NA","NA","NA","E" 20 | 3,"7/12/2014 00:35:21",1321097910,"7/12/2014 00:34:03","NA","W2","NA","NA","NA","NA","NA","E" 21 | 3,"7/12/2014 07:56:31",1321259357,"7/12/2014 07:56:09","NA","W3","NA","NA","NA","NA","NA","E" 22 | 3,"7/12/2014 01:31:56",1321121353,"7/12/2014 01:31:26","NA","W4","NA","NA","NA","NA","NA","E" 23 | 3,"7/12/2014 01:08:42",1321111448,"7/12/2014 01:07:50","NA","W5","NA","NA","NA","NA","NA","E" 24 | 3,"7/12/2014 00:05:42",1321081420,"7/12/2014 00:05:30","NA","W6","NA","NA","NA","NA","NA","E" 25 | 3,"7/12/2014 03:31:47",1321165857,"7/12/2014 03:29:47","NA","W7","NA","NA","NA","NA","NA","E" 26 | 3,"7/12/2014 03:43:48",1321170440,"7/12/2014 03:43:08","NA","W8","NA","NA","NA","NA","NA","E" 27 | 3,"7/12/2014 00:36:08",1321098326,"7/12/2014 00:35:31","NA","W9","NA","NA","NA","NA","NA","E" 28 | 3,"7/12/2014 04:00:14",1321177274,"7/12/2014 03:59:23","NA","W10","NA","NA","NA","NA","NA","E" 29 | 3,"7/12/2014 00:55:15",1321106262,"7/12/2014 00:54:24","NA","W11","NA","NA","NA","NA","NA","E" 30 | 3,"7/12/2014 02:17:38",1321138046,"7/12/2014 02:16:13","NA","W12","NA","NA","NA","NA","NA","E" 31 | 3,"7/12/2014 01:07:01",1321110823,"7/12/2014 01:06:09","NA","W13","NA","NA","NA","NA","NA","E" 32 | 3,"7/12/2014 04:51:50",1321196063,"7/12/2014 04:50:19","NA","W14","NA","NA","NA","NA","NA","E" 33 | 3,"7/12/2014 00:04:44",1321080955,"7/12/2014 00:04:04","NA","W15","NA","NA","NA","NA","NA","E" 34 | 3,"7/12/2014 00:50:46",1321104387,"7/12/2014 00:49:11","NA","W16","NA","NA","NA","NA","NA","E" 35 | 3,"7/12/2014 00:29:54",1321095040,"7/12/2014 00:28:16","NA","W17","NA","NA","NA","NA","NA","E" 36 | -------------------------------------------------------------------------------- /test/test_data/metrics/7work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 02:08:26",1321133897,"7/12/2014 02:07:28","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 03:55:59",1321175422,"7/12/2014 03:55:17","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 02:17:38",1321138046,"7/12/2014 02:16:13","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 00:54:22",1321105936,"7/12/2014 00:53:35","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 00:45:15",1321102299,"7/12/2014 00:44:37","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/11/2014 23:56:17",1321076307,"7/11/2014 23:55:55","NA","W6","NA","NA","NA","NA","NA","A" 8 | 1,"7/12/2014 03:38:49",1321168500,"7/12/2014 03:38:06","NA","W7","NA","NA","NA","NA","NA","A" 9 | -------------------------------------------------------------------------------- /test/test_data/metrics/7work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 02:08:26",1321133897,"7/12/2014 02:07:28","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 03:55:59",1321175422,"7/12/2014 03:55:17","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 02:17:38",1321138046,"7/12/2014 02:16:13","NA","W3","NA","NA","NA","NA","NA","C" 5 | 1,"7/12/2014 00:54:22",1321105936,"7/12/2014 00:53:35","NA","W4","NA","NA","NA","NA","NA","D" 6 | 1,"7/12/2014 00:45:15",1321102299,"7/12/2014 00:44:37","NA","W5","NA","NA","NA","NA","NA","E" 7 | 1,"7/11/2014 23:56:17",1321076307,"7/11/2014 23:55:55","NA","W6","NA","NA","NA","NA","NA","F" 8 | 1,"7/12/2014 03:38:49",1321168500,"7/12/2014 03:38:06","NA","W7","NA","NA","NA","NA","NA","G" 9 | -------------------------------------------------------------------------------- /test/test_data/metrics/7work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 05:02:58",1321199613,"7/12/2014 05:02:15","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 01:37:17",1321123504,"7/12/2014 01:36:45","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 01:20:15",1321116145,"7/12/2014 01:19:38","NA","W3","NA","NA","NA","NA","NA","B" 5 | 1,"7/12/2014 08:04:32",1321261964,"7/12/2014 08:04:05","NA","W4","NA","NA","NA","NA","NA","B" 6 | 1,"7/12/2014 09:55:49",1321298274,"7/12/2014 09:54:25","NA","W5","NA","NA","NA","NA","NA","B" 7 | 1,"7/12/2014 03:55:10",1321175094,"7/12/2014 03:53:49","NA","W6","NA","NA","NA","NA","NA","B" 8 | 1,"7/12/2014 06:35:20",1321231022,"7/12/2014 06:34:47","NA","W7","NA","NA","NA","NA","NA","B" 9 | -------------------------------------------------------------------------------- /test/test_data/metrics/8vs9work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 03:36:33",1321167778,"7/12/2014 03:36:21","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:21:03",1321090382,"7/12/2014 00:20:22","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 01:16:01",1321114758,"7/12/2014 01:15:25","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 01:33:32",1321122059,"7/12/2014 01:33:11","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 03:37:05",1321167908,"7/12/2014 03:36:17","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 09:20:21",1321286027,"7/12/2014 09:19:57","NA","W6","NA","NA","NA","NA","NA","A" 8 | 1,"7/12/2014 02:00:06",1321130661,"7/12/2014 01:58:55","NA","W7","NA","NA","NA","NA","NA","A" 9 | 1,"7/12/2014 09:54:23",1321297648,"7/12/2014 09:53:03","NA","W8","NA","NA","NA","NA","NA","A" 10 | 1,"7/12/2014 03:30:55",1321165567,"7/12/2014 03:30:38","NA","W9","NA","NA","NA","NA","NA","B" 11 | 2,"7/11/2014 23:56:17",1321076307,"7/11/2014 23:55:55","NA","W10","NA","NA","NA","NA","NA","C" 12 | 2,"7/12/2014 00:10:13",1321084172,"7/12/2014 00:10:05","NA","W11","NA","NA","NA","NA","NA","C" 13 | 2,"7/12/2014 02:24:17",1321141190,"7/12/2014 02:23:26","NA","W12","NA","NA","NA","NA","NA","C" 14 | 2,"7/12/2014 04:44:51",1321193775,"7/12/2014 04:43:43","NA","W13","NA","NA","NA","NA","NA","C" 15 | 2,"7/12/2014 00:17:08",1321088270,"7/12/2014 00:16:20","NA","W14","NA","NA","NA","NA","NA","C" 16 | 2,"7/12/2014 01:36:34",1321123249,"7/12/2014 01:36:08","NA","W15","NA","NA","NA","NA","NA","C" 17 | 2,"7/12/2014 02:19:23",1321138961,"7/12/2014 02:17:57","NA","W16","NA","NA","NA","NA","NA","C" 18 | 2,"7/12/2014 03:38:05",1321168231,"7/12/2014 03:37:07","NA","W17","NA","NA","NA","NA","NA","C" 19 | 2,"7/12/2014 02:37:10",1321145756,"7/12/2014 02:36:28","NA","W18","NA","NA","NA","NA","NA","C" 20 | 2,"7/12/2014 03:36:17",1321167683,"7/12/2014 03:35:57","NA","W19","NA","NA","NA","NA","NA","D" 21 | 3,"7/12/2014 03:33:38",1321166526,"7/12/2014 03:33:26","NA","W1","NA","NA","NA","NA","NA","E" 22 | 3,"7/12/2014 00:31:10",1321095794,"7/12/2014 00:30:33","NA","W2","NA","NA","NA","NA","NA","E" 23 | 3,"7/12/2014 02:20:03",1321139423,"7/12/2014 02:18:49","NA","W3","NA","NA","NA","NA","NA","E" 24 | 3,"7/12/2014 02:32:07",1321144018,"7/12/2014 02:30:48","NA","W4","NA","NA","NA","NA","NA","E" 25 | 3,"7/12/2014 00:20:21",1321089984,"7/12/2014 00:19:53","NA","W5","NA","NA","NA","NA","NA","E" 26 | 3,"7/12/2014 03:04:31",1321155464,"7/12/2014 03:03:02","NA","W6","NA","NA","NA","NA","NA","E" 27 | 3,"7/12/2014 00:39:10",1321099697,"7/12/2014 00:38:10","NA","W7","NA","NA","NA","NA","NA","E" 28 | 3,"7/12/2014 00:32:15",1321096284,"7/12/2014 00:31:37","NA","W8","NA","NA","NA","NA","NA","E" 29 | 3,"7/12/2014 03:30:04",1321165352,"7/12/2014 03:29:21","NA","W9","NA","NA","NA","NA","NA","E" 30 | 3,"7/11/2014 23:50:09",1321073252,"7/11/2014 23:49:19","NA","W10","NA","NA","NA","NA","NA","E" 31 | 3,"7/12/2014 02:08:26",1321133897,"7/12/2014 02:07:28","NA","W11","NA","NA","NA","NA","NA","E" 32 | 3,"7/12/2014 07:58:35",1321259985,"7/12/2014 07:58:22","NA","W12","NA","NA","NA","NA","NA","E" 33 | 3,"7/12/2014 08:12:13",1321264196,"7/12/2014 08:10:33","NA","W13","NA","NA","NA","NA","NA","E" 34 | 3,"7/12/2014 00:29:54",1321095040,"7/12/2014 00:28:16","NA","W14","NA","NA","NA","NA","NA","E" 35 | 3,"7/12/2014 00:56:13",1321106656,"7/12/2014 00:55:19","NA","W15","NA","NA","NA","NA","NA","E" 36 | 3,"7/12/2014 04:46:10",1321194407,"7/12/2014 04:44:52","NA","W16","NA","NA","NA","NA","NA","E" 37 | 3,"7/12/2014 08:17:37",1321265589,"7/12/2014 08:17:10","NA","W17","NA","NA","NA","NA","NA","E" 38 | 3,"7/12/2014 01:13:47",1321113783,"7/12/2014 01:13:01","NA","W18","NA","NA","NA","NA","NA","E" 39 | 3,"7/12/2014 03:43:24",1321170268,"7/12/2014 03:42:41","NA","W19","NA","NA","NA","NA","NA","E" 40 | -------------------------------------------------------------------------------- /test/test_data/metrics/8work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:36:33",1321272139,"7/12/2014 08:36:06","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 03:10:06",1321157653,"7/12/2014 03:08:59","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/12/2014 03:34:13",1321166667,"7/12/2014 03:32:34","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 08:05:10",1321262127,"7/12/2014 08:04:33","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 00:04:09",1321080574,"7/12/2014 00:03:59","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 03:55:59",1321175422,"7/12/2014 03:55:17","NA","W6","NA","NA","NA","NA","NA","A" 8 | 1,"7/12/2014 01:29:10",1321120029,"7/12/2014 01:28:36","NA","W7","NA","NA","NA","NA","NA","A" 9 | 1,"7/12/2014 00:11:37",1321084980,"7/12/2014 00:11:30","NA","W8","NA","NA","NA","NA","NA","A" 10 | -------------------------------------------------------------------------------- /test/test_data/metrics/8work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:36:33",1321272139,"7/12/2014 08:36:06","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 03:10:06",1321157653,"7/12/2014 03:08:59","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 03:34:13",1321166667,"7/12/2014 03:32:34","NA","W3","NA","NA","NA","NA","NA","C" 5 | 1,"7/12/2014 08:05:10",1321262127,"7/12/2014 08:04:33","NA","W4","NA","NA","NA","NA","NA","D" 6 | 1,"7/12/2014 00:04:09",1321080574,"7/12/2014 00:03:59","NA","W5","NA","NA","NA","NA","NA","E" 7 | 1,"7/12/2014 03:55:59",1321175422,"7/12/2014 03:55:17","NA","W6","NA","NA","NA","NA","NA","F" 8 | 1,"7/12/2014 01:29:10",1321120029,"7/12/2014 01:28:36","NA","W7","NA","NA","NA","NA","NA","G" 9 | 1,"7/12/2014 00:11:37",1321084980,"7/12/2014 00:11:30","NA","W8","NA","NA","NA","NA","NA","H" 10 | -------------------------------------------------------------------------------- /test/test_data/metrics/8work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 00:09:02",1321083463,"7/12/2014 00:08:49","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 01:01:57",1321108808,"7/12/2014 01:01:14","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/11/2014 23:51:41",1321073910,"7/11/2014 23:50:55","NA","W3","NA","NA","NA","NA","NA","B" 5 | 1,"7/12/2014 06:17:19",1321224835,"7/12/2014 06:16:19","NA","W4","NA","NA","NA","NA","NA","B" 6 | 1,"7/12/2014 04:56:12",1321197506,"7/12/2014 04:55:36","NA","W5","NA","NA","NA","NA","NA","B" 7 | 1,"7/12/2014 02:31:50",1321143909,"7/12/2014 02:30:58","NA","W6","NA","NA","NA","NA","NA","B" 8 | 1,"7/12/2014 08:21:04",1321266784,"7/12/2014 08:20:58","NA","W7","NA","NA","NA","NA","NA","B" 9 | 1,"7/11/2014 23:55:35",1321075857,"7/11/2014 23:54:36","NA","W8","NA","NA","NA","NA","NA","B" 10 | -------------------------------------------------------------------------------- /test/test_data/metrics/9work_agr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 01:56:16",1321129367,"7/12/2014 01:55:03","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:09:05",1321083508,"7/12/2014 00:08:49","NA","W2","NA","NA","NA","NA","NA","A" 4 | 1,"7/11/2014 23:50:49",1321073513,"7/11/2014 23:50:22","NA","W3","NA","NA","NA","NA","NA","A" 5 | 1,"7/12/2014 03:25:20",1321163742,"7/12/2014 03:23:37","NA","W4","NA","NA","NA","NA","NA","A" 6 | 1,"7/12/2014 03:38:35",1321168414,"7/12/2014 03:37:49","NA","W5","NA","NA","NA","NA","NA","A" 7 | 1,"7/12/2014 00:12:01",1321085183,"7/12/2014 00:11:29","NA","W6","NA","NA","NA","NA","NA","A" 8 | 1,"7/12/2014 00:04:02",1321080513,"7/12/2014 00:03:40","NA","W7","NA","NA","NA","NA","NA","A" 9 | 1,"7/12/2014 04:14:24",1321183813,"7/12/2014 04:13:40","NA","W8","NA","NA","NA","NA","NA","A" 10 | 1,"7/12/2014 06:17:51",1321225052,"7/12/2014 06:17:17","NA","W9","NA","NA","NA","NA","NA","A" 11 | -------------------------------------------------------------------------------- /test/test_data/metrics/9work_disagr.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 01:56:16",1321129367,"7/12/2014 01:55:03","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 00:09:05",1321083508,"7/12/2014 00:08:49","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/11/2014 23:50:49",1321073513,"7/11/2014 23:50:22","NA","W3","NA","NA","NA","NA","NA","C" 5 | 1,"7/12/2014 03:25:20",1321163742,"7/12/2014 03:23:37","NA","W4","NA","NA","NA","NA","NA","D" 6 | 1,"7/12/2014 03:38:35",1321168414,"7/12/2014 03:37:49","NA","W5","NA","NA","NA","NA","NA","E" 7 | 1,"7/12/2014 00:12:01",1321085183,"7/12/2014 00:11:29","NA","W6","NA","NA","NA","NA","NA","F" 8 | 1,"7/12/2014 00:04:02",1321080513,"7/12/2014 00:03:40","NA","W7","NA","NA","NA","NA","NA","G" 9 | 1,"7/12/2014 04:14:24",1321183813,"7/12/2014 04:13:40","NA","W8","NA","NA","NA","NA","NA","H" 10 | 1,"7/12/2014 06:17:51",1321225052,"7/12/2014 06:17:17","NA","W9","NA","NA","NA","NA","NA","I" 11 | -------------------------------------------------------------------------------- /test/test_data/metrics/9work_outlier.csv: -------------------------------------------------------------------------------- 1 | "_unit_id","_created_at","_id","_started_at","_channel","_worker_id","_country","_region","_city","_ip","in_col","out_col" 2 | 1,"7/12/2014 08:04:04",1321261850,"7/12/2014 08:03:28","NA","W1","NA","NA","NA","NA","NA","A" 3 | 1,"7/12/2014 03:39:36",1321168780,"7/12/2014 03:38:02","NA","W2","NA","NA","NA","NA","NA","B" 4 | 1,"7/12/2014 03:43:24",1321170268,"7/12/2014 03:42:41","NA","W3","NA","NA","NA","NA","NA","B" 5 | 1,"7/12/2014 03:53:48",1321174510,"7/12/2014 03:51:51","NA","W4","NA","NA","NA","NA","NA","B" 6 | 1,"7/12/2014 00:47:47",1321103289,"7/12/2014 00:45:28","NA","W5","NA","NA","NA","NA","NA","B" 7 | 1,"7/12/2014 01:31:37",1321121184,"7/12/2014 01:30:46","NA","W6","NA","NA","NA","NA","NA","B" 8 | 1,"7/12/2014 08:24:50",1321268016,"7/12/2014 08:24:21","NA","W7","NA","NA","NA","NA","NA","B" 9 | 1,"7/12/2014 03:05:12",1321155667,"7/12/2014 03:04:31","NA","W8","NA","NA","NA","NA","NA","B" 10 | 1,"7/12/2014 05:02:14",1321199248,"7/12/2014 05:01:44","NA","W9","NA","NA","NA","NA","NA","B" 11 | -------------------------------------------------------------------------------- /test/test_load.py: -------------------------------------------------------------------------------- 1 | """ Unit testing module for pre-processing functions """ 2 | 3 | import unittest 4 | import string 5 | import pandas as pd 6 | 7 | import crowdtruth 8 | from crowdtruth.configuration import DefaultConfig 9 | 10 | TEST_FILE_PREF = "test/test_data/load/" 11 | 12 | class TestConfig(DefaultConfig): 13 | inputColumns = ["input"] 14 | outputColumns = ["Answer.output"] 15 | open_ended_task = False 16 | annotation_separator = " " 17 | annotation_vector = list(string.ascii_uppercase) 18 | def processJudgments(self, judgments): 19 | return judgments 20 | 21 | class ConfigKeepEmptyRows(TestConfig): 22 | remove_empty_rows = False 23 | 24 | class ConfigProcessJudg(TestConfig): 25 | def processJudgments(self, judgments): 26 | for col in self.outputColumns: 27 | judgments[col] = judgments[col].apply(lambda x: str(x).lower()) 28 | return judgments 29 | 30 | class TestLoad(unittest.TestCase): 31 | test_conf_const = TestConfig() 32 | test_keep_empty_rows = ConfigKeepEmptyRows() 33 | test_process_judg = ConfigProcessJudg() 34 | 35 | def test_platform(self): 36 | for w in range(1, 6): 37 | test_config_amt = self.test_conf_const.__class__ 38 | data_amt, _ = crowdtruth.load( 39 | file=TEST_FILE_PREF + "platform_amt" + str(w) + ".csv", 40 | config=test_config_amt()) 41 | test_config_cf = self.test_conf_const.__class__ 42 | data_cf, _ = crowdtruth.load( 43 | file=TEST_FILE_PREF + "platform_cf" + str(w) + ".csv", 44 | config=test_config_cf()) 45 | self.assertEqual( 46 | (set(data_cf["units"]["duration"].keys()) - 47 | set(data_amt["units"]["duration"].keys())), 48 | set([])) 49 | self.assertEqual( 50 | (set(data_cf["workers"]["judgment"].keys()) - 51 | set(data_amt["workers"]["judgment"].keys())), 52 | set([])) 53 | self.assertEqual( 54 | set(data_cf["workers"]["judgment"] - data_amt["workers"]["judgment"]), 55 | set([0])) 56 | 57 | def test_folder(self): 58 | test_config = self.test_conf_const.__class__ 59 | data, _ = crowdtruth.load( 60 | directory=TEST_FILE_PREF + "dir/", 61 | config=test_config()) 62 | self.assertEqual(data["workers"].shape[0], 7) 63 | self.assertEqual(data["units"].shape[0], 2) 64 | self.assertEqual(data["judgments"].shape[0], 12) 65 | 66 | def test_empty_rows(self): 67 | test_without = self.test_conf_const.__class__ 68 | data_without, _ = crowdtruth.load( 69 | file=TEST_FILE_PREF + "empty_rows.csv", 70 | config=test_without()) 71 | self.assertEqual(data_without["judgments"].shape[0], 24) 72 | 73 | test_proc_judg = self.test_process_judg.__class__ 74 | data_proc_judg, _ = crowdtruth.load( 75 | file=TEST_FILE_PREF + "empty_rows.csv", 76 | config=test_proc_judg()) 77 | self.assertEqual(data_proc_judg["judgments"].shape[0], 24) 78 | 79 | test_with = self.test_keep_empty_rows.__class__ 80 | data_with, _ = crowdtruth.load( 81 | file=TEST_FILE_PREF + "empty_rows.csv", 82 | config=test_with()) 83 | self.assertEqual(data_with["judgments"].shape[0], 27) 84 | 85 | def test_data_frame(self): 86 | for w in range(1, 6): 87 | test_config_file = self.test_conf_const.__class__ 88 | data_file, _ = crowdtruth.load( 89 | file=TEST_FILE_PREF + "platform_cf" + str(w) + ".csv", 90 | config=test_config_file()) 91 | df = pd.read_csv(TEST_FILE_PREF + "platform_cf" + str(w) + ".csv") 92 | test_config_df = self.test_conf_const.__class__ 93 | data_df, _ = crowdtruth.load( 94 | data_frame=df, 95 | config=test_config_df()) 96 | self.assertEqual( 97 | (set(data_df["units"]["duration"].keys()) - 98 | set(data_file["units"]["duration"].keys())), 99 | set([])) 100 | self.assertEqual( 101 | (set(data_df["workers"]["judgment"].keys()) - 102 | set(data_file["workers"]["judgment"].keys())), 103 | set([])) 104 | self.assertEqual( 105 | set(data_df["workers"]["judgment"] - data_file["workers"]["judgment"]), 106 | set([0])) 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /tutorial/MACE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/MACE.jar -------------------------------------------------------------------------------- /tutorial/Part III_ CrowdTruth Tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/Part III_ CrowdTruth Tutorial.pdf -------------------------------------------------------------------------------- /tutorial/Part II_ CrowdTruth Tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/Part II_ CrowdTruth Tutorial.pdf -------------------------------------------------------------------------------- /tutorial/Part IV_ CrowdTruth Tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/Part IV_ CrowdTruth Tutorial.pdf -------------------------------------------------------------------------------- /tutorial/Part I_ CrowdTruth Tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/Part I_ CrowdTruth Tutorial.pdf -------------------------------------------------------------------------------- /tutorial/crowd_vs_expert_performance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Apr 26 11:39:03 2018 5 | """ 6 | 7 | def compute_precision(true_positive, false_positive): 8 | """ Function to compute Precision""" 9 | if true_positive == 0: 10 | return 0 11 | return float(true_positive) / float(true_positive + false_positive) 12 | 13 | def compute_recall(true_positive, false_negative): 14 | """ Function to compute Recall""" 15 | if true_positive == 0: 16 | return 0 17 | return float(true_positive) / float(true_positive + false_negative) 18 | 19 | def compute_accuracy(true_positive, true_negative, false_positive, false_negative): 20 | """ Function to compute Accuracy""" 21 | if true_positive + true_negative == 0: 22 | return 0 23 | return float(true_positive + true_negative) / \ 24 | float(true_positive + true_negative + false_positive + false_negative) 25 | 26 | def compute_f1score(precision, recall): 27 | """ Function to compute F1 Score""" 28 | if precision * recall == 0: 29 | return 0 30 | return float(2 * precision * recall) / float(precision + recall) 31 | 32 | def compute_crowd_performance(df_crowd_results, crowd_score_column, experts_score_column): 33 | """ Function to evaluate the answers of the crowd at each posible crowd score threshold""" 34 | rows = [] 35 | rows.append(["Thresh", "TP", "TN", "FP", "FN", "Precision", "Recall", "Accuracy", "F1-score"]) 36 | 37 | precision = 0.0 38 | recall = 0.0 39 | accuracy = 0.0 40 | f1score = 0.0 41 | 42 | for i in range(5, 101, 5): 43 | thresh = i / 100.0 44 | 45 | true_pos, true_neg, false_pos, false_neg = count_positives_and_negatives(df_crowd_results, \ 46 | crowd_score_column, experts_score_column, thresh) 47 | 48 | precision = compute_precision(true_pos, false_pos) 49 | recall = compute_recall(true_pos, false_neg) 50 | accuracy = compute_accuracy(true_pos, true_neg, false_pos, false_neg) 51 | f1score = compute_f1score(precision, recall) 52 | 53 | row = [thresh, true_pos, true_neg, false_pos, false_neg, \ 54 | precision, recall, accuracy, f1score] 55 | rows.append(row) 56 | 57 | return rows 58 | 59 | def compute_majority_vote(df_crowd_results, crowd_score_column, experts_score_column, no_workers): 60 | """ Function to evaluate the answers of the crowd using majority vote""" 61 | 62 | true_pos, true_neg, false_pos, false_neg = count_positives_and_negatives(df_crowd_results, \ 63 | crowd_score_column, experts_score_column, no_workers) 64 | 65 | precision = compute_precision(true_pos, false_pos) 66 | recall = compute_recall(true_pos, false_neg) 67 | accuracy = compute_accuracy(true_pos, true_neg, false_pos, false_neg) 68 | f1score = compute_f1score(precision, recall) 69 | 70 | return true_pos, true_neg, false_pos, false_neg, \ 71 | precision, recall, accuracy, f1score 72 | 73 | def count_positives_and_negatives(df_crowd_results, crowd_score_col, expert_score_col, crowd_value): 74 | """ Help function for reading the crowd results """ 75 | true_positive = 0 76 | true_negative = 0 77 | false_positive = 0 78 | false_negative = 0 79 | 80 | for j in range(len(df_crowd_results.index)): 81 | if df_crowd_results[crowd_score_col].iloc[j] >= crowd_value: 82 | if df_crowd_results[expert_score_col].iloc[j] == 1: 83 | true_positive = true_positive + 1 84 | else: 85 | false_positive = false_positive + 1 86 | else: 87 | if df_crowd_results[expert_score_col].iloc[j] == 1: 88 | false_negative = false_negative + 1 89 | else: 90 | true_negative = true_negative + 1 91 | return true_positive, true_negative, false_positive, false_negative 92 | -------------------------------------------------------------------------------- /tutorial/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/data/.DS_Store -------------------------------------------------------------------------------- /tutorial/data/results/binary-relex-annotations.csv: -------------------------------------------------------------------------------- 1 | ,output.top_member_employee,aqs,aqs_initial 2 | false,150,0.960090518014,0.867665418227 3 | true,150,0.921697021187,0.795133437991 4 | -------------------------------------------------------------------------------- /tutorial/data/results/binary-relex-units.csv: -------------------------------------------------------------------------------- 1 | unit,duration,input.b1,input.b2,input.e1,input.e2,input.sent_id,input.sentence,input.term1,input.term2,job,output.top_member_employee,output.top_member_employee.annotations,output.top_member_employee.unique_annotations,worker,uqs,unit_annotation_score,uqs_initial,unit_annotation_score_initial 2 | 897534786,140.8,6,3,8,4,UAD-A-1535,"On Wednesday , Lyon led through Karim Benzema 's 26th goal of the season in the 55th minute , but Carlos Tevez leveled in the 87th .",Karim Benzema,Lyon,../data/relex-binary-choice,"Counter({'false': 13, 'true': 2})",15,2,15,0.838395921467,"Counter({'false': 0.917969296194994, 'true': 0.08203070380500596})",0.752380952381,"Counter({'false': 0.8666666666666667, 'true': 0.13333333333333333})" 3 | 897534787,48.5333333333,23,30,25,32,UAD-A-2322,"`` We have all this library content , and we 've been surprised at how much interest there is in it , `` Jeff Zucker , the chief executive of NBC Universal , said recently .",Jeff Zucker,NBC Universal,../data/relex-binary-choice,"Counter({'true': 15, 'false': 0})",15,1,15,1.0,"Counter({'true': 1.0, 'false': 0.0})",1.0,"Counter({'true': 1.0, 'false': 0.0})" 4 | 897534788,190.933333333,0,14,2,17,UAD-A-0024,"Addie Wagenknecht ( born Portland , Oregon ) is an American artist living in New York City .",Addie Wagenknecht,New York City,../data/relex-binary-choice,"Counter({'false': 15, 'true': 0})",15,1,15,1.0,"Counter({'false': 1.0, 'true': 0.0})",1.0,"Counter({'false': 1.0, 'true': 0.0})" 5 | 897534789,51.8,2,0,4,1,UAD-A-2211,"Toyota President Katsuaki Watanabe said Thursday that Toyota will speed up delivery of its plug-in hybrid from 2010 to the end of 2009 , while the Volt is due in showrooms in late 2010 .",Katsuaki Watanabe,Toyota,../data/relex-binary-choice,"Counter({'true': 13, 'false': 2})",15,2,15,0.95706901474,"Counter({'true': 0.979734541799039, 'false': 0.02026545820096097})",0.752380952381,"Counter({'true': 0.8666666666666667, 'false': 0.13333333333333333})" 6 | 897534790,128.6,0,23,2,26,UAD-A-0115,"Andrea Bargnani , nicknamed `` Il Mago '' ( translated to `` The Magician '' ) , ( born October 26 1985 in Rome , Italy ) is an Italian professional basketball player with the Toronto Raptors of the National Basketball Association .",Andrea Bargnani,"Rome , Italy",../data/relex-binary-choice,"Counter({'false': 15, 'true': 0})",15,1,15,1.0,"Counter({'false': 1.0, 'true': 0.0})",1.0,"Counter({'false': 1.0, 'true': 0.0})" 7 | 897534791,84.0,21,10,23,13,UAD-A-0543,"David Martin Blake ( born January 18 , 1970 in Compton , California ) , better known by his stage name DJ Quik , is a rapper and record producer from Compton , California .",DJ Quik,"Compton , California",../data/relex-binary-choice,"Counter({'false': 15, 'true': 0})",15,1,15,1.0,"Counter({'false': 1.0, 'true': 0.0})",1.0,"Counter({'false': 1.0, 'true': 0.0})" 8 | 897534792,49.2666666667,0,4,3,6,UAD-A-0454,"Charles B. Rangel of New York , chairman of the House Ways and Means Committee , the bill would also overhaul corporate taxes by eliminating many major tax breaks and lowering overall tax rates .",Charles B. Rangel,New York,../data/relex-binary-choice,"Counter({'false': 14, 'true': 1})",15,2,15,1.0,"Counter({'false': 1.0, 'true': 0.0})",0.866666666667,"Counter({'false': 0.9333333333333333, 'true': 0.06666666666666667})" 9 | 897534793,73.2,8,16,11,17,UAD-A-0196,"A teary Florida judge said Friday he wanted Anna Nicole Smith to be buried in the Bahamas , ending two weeks of bizarre wranglings over the remains of the former Playboy model and billionaire 's widow .",Anna Nicole Smith,Bahamas,../data/relex-binary-choice,"Counter({'false': 14, 'true': 1})",15,2,15,1.0,"Counter({'false': 1.0, 'true': 0.0})",0.866666666667,"Counter({'false': 0.9333333333333333, 'true': 0.06666666666666667})" 10 | 897534794,75.2666666667,1,34,3,35,UAD-A-0951,"If Barack Obama wins the White House in November , we may well look back on this week as the turning point when he started to ease lingering doubts about his readiness to lead America .",Barack Obama,America,../data/relex-binary-choice,"Counter({'true': 8, 'false': 7})",15,2,15,0.475610130877,"Counter({'true': 0.5768584318701289, 'false': 0.42314156812987114})",0.466666666667,"Counter({'true': 0.5333333333333333, 'false': 0.4666666666666667})" 11 | 897534795,45.4,0,11,2,15,UAD-A-1875,"Steven Chu , a Nobel Laureate in physics who heads the Lawrence Berkeley National Laboratory , and Dan Reicher , a former assistant energy secretary in the Clinton administration who now directs the Google.org foundation 's energy and climate change initiatives .",Steven Chu,Lawrence Berkeley National Laboratory,../data/relex-binary-choice,"Counter({'true': 13, 'false': 2})",15,2,15,0.952315105766,"Counter({'true': 0.9774366183305948, 'false': 0.0225633816694051})",0.752380952381,"Counter({'true': 0.8666666666666667, 'false': 0.13333333333333333})" 12 | -------------------------------------------------------------------------------- /tutorial/data/results/binary-relex-workers.csv: -------------------------------------------------------------------------------- 1 | worker,duration,job,judgment,unit,wqs,wwa,wsa,wqs_initial,wwa_initial,wsa_initial 2 | 3587109,25.3333333333,1,3,3,0.514862631315,0.716288803267,0.718791957891,0.477969829108,0.690476190476,0.692232166295 3 | 4316379,30.0,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0 4 | 4688131,136.0,1,1,1,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 5 | 4711962,35.0,1,1,1,0.000530034180477,0.0225329030776,0.0235226760907,0.00547832134891,0.0714285714286,0.0766964988847 6 | 6336109,122.0,1,1,1,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 7 | 6344072,86.75,1,4,4,0.979497956209,0.980454598232,0.99902428728,0.942512346851,0.946428571429,0.995862102333 8 | 6744840,93.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 9 | 7051387,66.3333333333,1,3,3,0.858503813619,0.900925496877,0.952913217125,0.705440660838,0.785714285714,0.89783356834 10 | 7385617,41.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 11 | 8108990,100.0,1,4,4,0.979497956209,0.980454598232,0.99902428728,0.942512346851,0.946428571429,0.995862102333 12 | 8715359,63.0,1,2,2,0.989218479803,0.989345376885,0.999871736316,0.922254321779,0.928571428571,0.993196961916 13 | 10959404,86.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 14 | 11598752,117.5,1,2,2,0.801871620185,0.868111212781,0.923696881666,0.640165042945,0.75,0.853553390593 15 | 13300986,60.0,1,2,2,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 16 | 13581319,16.0,1,1,1,0.000427103320831,0.0202500662998,0.0210914529616,0.00547832134891,0.0714285714286,0.0766964988847 17 | 16844474,68.0,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0 18 | 16854635,125.333333333,1,3,3,0.972459471683,0.973742789142,0.998682077574,0.948061563121,0.952380952381,0.995464641277 19 | 17950689,159.666666667,1,3,3,0.263534786738,0.501575481749,0.525414013099,0.279449416481,0.5,0.558898832962 20 | 18531561,71.6666666667,1,3,3,0.789864318116,0.869163381206,0.908763916192,0.65686194891,0.761904761905,0.862131307944 21 | 18666417,204.0,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0 22 | 18672993,26.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 23 | 18960682,41.0,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0 24 | 20929875,33.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 25 | 21457135,37.0,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0 26 | 21490235,140.75,1,4,4,0.979497956209,0.980454598232,0.99902428728,0.942512346851,0.946428571429,0.995862102333 27 | 21665495,34.75,1,4,4,0.979497956209,0.980454598232,0.99902428728,0.942512346851,0.946428571429,0.995862102333 28 | 21865639,25.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 29 | 23503585,293.666666667,1,3,3,0.809380910836,0.889111707086,0.910325333011,0.701587301587,0.809523809524,0.866666666667 30 | 24043308,283.0,1,2,2,0.262981971642,0.514503995495,0.511136889012,0.288400847916,0.535714285714,0.538348249442 31 | 24403612,51.0,1,4,4,0.979649262642,0.980606076661,0.999024262604,0.924045314872,0.928571428571,0.995125723709 32 | 24636399,137.5,1,2,2,1.0,1.0,1.0,1.0,1.0,1.0 33 | 25486037,123.0,1,1,1,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 34 | 26492726,41.25,1,4,4,0.619911081838,0.784405765179,0.790293887879,0.562067072613,0.732142857143,0.767701367472 35 | 26531841,91.0,1,2,2,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 36 | 26770238,62.0,1,1,1,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 37 | 27934334,34.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 38 | 28103910,48.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 39 | 28301350,32.3333333333,1,3,3,0.875739295495,0.918180308839,0.953777038197,0.751974105885,0.833333333333,0.902368927062 40 | 28351359,32.0,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0 41 | 29363385,63.75,1,4,4,0.901432193397,0.932836127365,0.966334993847,0.774975594357,0.839285714286,0.923375176255 42 | 29411108,40.5,1,2,2,0.989386963379,0.989513885706,0.999871732647,0.885468039881,0.892857142857,0.991724204667 43 | 30312592,41.6666666667,1,3,3,0.789864318116,0.869163381206,0.908763916192,0.65686194891,0.761904761905,0.862131307944 44 | 30777913,26.0,1,3,3,0.858503813619,0.900925496877,0.952913217125,0.705440660838,0.785714285714,0.89783356834 45 | 30941227,185.0,1,3,3,0.809380910836,0.889111707086,0.910325333011,0.701587301587,0.809523809524,0.866666666667 46 | 31089452,26.6666666667,1,3,3,0.789864318116,0.869163381206,0.908763916192,0.65686194891,0.761904761905,0.862131307944 47 | 31228275,90.75,1,4,4,0.979649262642,0.980606076661,0.999024262604,0.924045314872,0.928571428571,0.995125723709 48 | 31329809,62.75,1,4,4,0.851569096032,0.911139139449,0.934620256295,0.736491609358,0.821428571429,0.896598480958 49 | 31883685,23.5,1,2,2,0.988118578168,0.988278986456,0.999837689266,0.922254321779,0.928571428571,0.993196961916 50 | 32363453,26.0,1,2,2,0.0,0.0,1e-08,0.0,0.0,0.0 51 | 32695853,15.0,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0 52 | 33057016,143.0,1,1,1,0.975046589374,0.975370393876,0.999668018935,0.845480506142,0.857142857143,0.986393923832 53 | 33110177,92.0,1,3,3,0.972459471683,0.973742789142,0.998682077574,0.948061563121,0.952380952381,0.995464641277 54 | 33330229,55.5,1,4,4,0.979497956209,0.980454598232,0.99902428728,0.942512346851,0.946428571429,0.995862102333 55 | 33427067,26.25,1,4,4,0.901432193397,0.932836127365,0.966334993847,0.774975594357,0.839285714286,0.923375176255 56 | 33435788,38.5,1,4,4,0.901432193397,0.932836127365,0.966334993847,0.774975594357,0.839285714286,0.923375176255 57 | 33878524,231.0,1,4,4,0.979497956209,0.980454598232,0.99902428728,0.942512346851,0.946428571429,0.995862102333 58 | 34333895,114.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 59 | 34650780,13.0,1,1,1,0.975046589374,0.975370393876,0.999668018935,0.845480506142,0.857142857143,0.986393923832 60 | 35774053,31.0,1,3,3,0.858503813619,0.900925496877,0.952913217125,0.705440660838,0.785714285714,0.89783356834 61 | 35936381,37.0,1,1,1,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 62 | 36155226,33.0,1,2,2,0.958504204318,0.960454017336,0.997969904875,0.922254321779,0.928571428571,0.993196961916 63 | 36218728,78.5,1,2,2,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 64 | 36261795,61.5,1,2,2,1.0,1.0,1.0,0.925836307966,0.928571428571,0.997054485502 65 | 36340274,45.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 66 | 36350388,77.0,1,1,1,0.977810802712,0.9780669056,0.99973815402,0.845480506142,0.857142857143,0.986393923832 67 | 36837746,59.75,1,4,4,0.97954296101,0.980499654247,0.999024279883,0.942512346851,0.946428571429,0.995862102333 68 | 36851940,422.5,1,4,4,0.979497956209,0.980454598232,0.99902428728,0.942512346851,0.946428571429,0.995862102333 69 | -------------------------------------------------------------------------------- /tutorial/data/results/mace_workers_rte.csv: -------------------------------------------------------------------------------- 1 | worker,competence 2 | A2K5ICP43ML4PW,0.8042523708 3 | A15L6WGIK3VU7N,0.8551975291 4 | AHPSMRLKAEJV,0.6904290914 5 | A25QX7IUS1KI5E,0.4734489819 6 | A2RV3FIO3IAZS,0.3481403433 7 | ATP8SOHDM9QOY,0.7975533777 8 | A18941IO2ZZWW6,0.0079000147 9 | A14JQX7IFAICP0,0.0681179966 10 | A1GD3MQELM1R9M,0.6560087179 11 | A11GX90QFWDLMM,0.0133964741 12 | AXBQF8RALCIGV,0.2094276064 13 | AEX5NCH03LWSG,0.7951642176 14 | A1Q4VUJBMY78YR,0.0043825526 15 | AMO4BPP31P1QA,0.98496779 16 | A34AZLVR1033TZ,0.8804644211 17 | A2VKQM4ESHIS95,0.6912635248 18 | AHFO0JTF5WO8J,0.8027015759 19 | A2MN1MDFIH9CEW,0.7880238583 20 | AKZ9MWLHRE34O,0.7795827357 21 | A1JGQL8LIRM598,0.9703724975 22 | A1WH4J0UT8YAPQ,0.8795514977 23 | A22CRWMZUX7FFR,0.7683437433 24 | A37C370E0K9GDI,0.3015301722 25 | A3ODY8U1EQDB8S,0.9703724975 26 | A1FESKIC2CUD4Y,0.699349425 27 | A1APFXFJHE25EE,0.793737524 28 | A1DCEOFAUIDY58,0.7722998627 29 | A19IBSKBTABMR3,0.6365990543 30 | A17RPF5ZMO75GW,0.6918903862 31 | A3U7T47F498T1P,0.8752486304 32 | A3JEUXPU5NEHXR,0.8354444091 33 | A14WWG6NKBDWGP,0.6876689245 34 | A2CJUR18C55EF4,0.7997058686 35 | AKTL5L2PJ2XCH,0.969744313 36 | A37BYUF88E7RZ0,0.3816363558 37 | A19G248ZRHWWHT,0.7723265387 38 | A2BYHSKILYQWEU,0.8786729388 39 | AD4FDBEA9GGFE,0.6853870205 40 | A19PMUTQXDIPLZ,0.8793982862 41 | A3HR7E5S52PME5,0.9702377189 42 | A366APTH5ECXG1,0.7845899943 43 | A16WH8PG6BUGSR,0.7778293147 44 | A3EX8IW960QJ3I,0.8353706726 45 | A2NCIFVGIZESDF,0.7030594467 46 | A1BLK8SJHCYOQK,0.7839340394 47 | A17HNBZF5A1CWF,0.6217535718 48 | A1L1WI1BXLLZZM,0.7841689274 49 | AS51Z6DBWM9JM,0.4396182038 50 | A2O3686VJEDHBU,0.6889456173 51 | A3TBGB7VGRRYCT,0.7837382877 52 | A32D1CITAL0BAY,0.9702429064 53 | A1UQKF8NB9FVOA,0.7610389949 54 | A5SQMVCBLLR10,0.7343467153 55 | A261BBDBVRB79K,0.7339791687 56 | A3Q9V4M3V0L77A,0.5312458766 57 | A389Z7A60D4L8I,0.6546240703 58 | A2XAB2SZD9816Y,0.9525140975 59 | AJR1HBQLBT2UT,0.9474274079 60 | A2KST2DIWAB8Z4,0.7832733012 61 | AD1BB9AC3IZXQ,0.9703780504 62 | A30DASEJS2ZINX,0.7832941772 63 | A1K0I2MPF4CF13,0.9703780504 64 | A34JFTRXZ9U33N,0.7886261988 65 | A39APWH62XBLR9,0.7648590765 66 | AO49JS7KNRQ5A,0.5983889244 67 | AH02ZNR6AGX8J,0.6820052672 68 | A5M1CDOBQXIND,0.6831964742 69 | A1FSNT6DWYDIWA,0.8682643059 70 | A1M0SEWUJYX9K0,0.8372698677 71 | A2G80S3EM9SDBD,0.7685230437 72 | A9YV82C0R0HHU,0.9702173203 73 | AZG7NPU1UZMPM,0.7762293741 74 | AM367N2MAMQ15,0.7887436443 75 | A24R6AHP7B0TLN,0.8776565699 76 | A2HNP1YL1IBFMU,0.496178289 77 | A3FVCQZ8DA0UV9,0.9702599019 78 | A16D3RBFGNRKL8,0.7883612555 79 | A37R966EV2XC6Y,0.9702599019 80 | AHN23PP23PHZK,0.6963968204 81 | A1ZGDXSHUPEQD9,0.927440267 82 | A1RSPO7YXD7OS,0.6972156307 83 | A1NI967I2I3UXN,0.7584760244 84 | A18SO7XEA5K3NR,0.4203796794 85 | A24KZL47I6CH4C,0.6706037695 86 | A1CP0KZJS5LSIF,0.7985335037 87 | A1ZVGUVI9TAZJX,0.7597600983 88 | A1NCNZ96FZP9MC,0.7702653619 89 | A2XRLW6EWPXFZJ,0.4921192948 90 | A1IPO1FAD1A60X,0.6967944692 91 | A1S2LOH6K9Q3Y8,0.7884784688 92 | A15GNVZ1EXQMYR,0.6264526972 93 | A1KS77MYC4GGZ3,0.7787631453 94 | A1ORRI82WFHIKZ,0.8102697767 95 | A28W67ZXNQ3K19,0.970293979 96 | A99F7DB1MDLVK,0.3830327637 97 | A1KFIQBETMB81I,0.9702598188 98 | A38D94OEB20V2S,0.5946844381 99 | A2TFRXLFNLRE10,0.9700396759 100 | A15MN5MDG4D7Q9,0.8901152161 101 | A37XSAE8QVBVGS,0.779293902 102 | A3MWTGQFPYGGW6,0.7046994221 103 | A29CFIMDI0ID2V,0.6111290018 104 | AQ1WI10VDQ7FE,0.6647064573 105 | A5TYBSH7AYMK7,0.806087306 106 | AYZHZZZE0UP0A,0.1033924601 107 | ADMPXNDZ8F6QA,0.6769164943 108 | APOA130JRSXAV,0.3533638434 109 | AJ9MI2GEX5R28,0.9687556659 110 | A2QH0Y8GLQ090K,0.1465465714 111 | A1QL3BQX00Y2OC,0.9703874765 112 | AVIVV0WYFK6EJ,0.8795710643 113 | AQ6JP5VBCTYDN,0.381831636 114 | A3NWTTG6VC0N8S,0.6869583588 115 | A1G42XP7E394WU,0.9703874765 116 | A3B9CRWYGCJ1GJ,0.6595400929 117 | A1XXMVPHRKUTAH,0.2058548883 118 | A3LMXIKF4NPSBN,0.6177989089 119 | A358E8MG0GCC37,0.4833418736 120 | A1LY3NJTYW9TFF,0.5745088758 121 | A1I48Z1CH0458V,0.8685471752 122 | A14Q86RX5HGCN,0.8894567167 123 | A3BMXJJ6MRE5C1,0.7716365408 124 | A7B3KMBP39QWF,0.698279414 125 | A1ZXM6L5BLKBW9,0.6985791855 126 | A2ELWPBL46UA0E,0.7890638619 127 | A1IHLPH5UWHAB6,0.9700155026 128 | A32QZGM6C949ZI,0.9700155026 129 | A151VN1BOY29J1,0.6699912248 130 | ATJQTHNPVWH9,0.8619338492 131 | A153JJMSL7V3C9,0.7673204833 132 | A1SM3O307GMDK8,0.0825605931 133 | AZKV0OXDZVDYR,0.2218907252 134 | A1BG7KLBHHCLYP,0.2871852543 135 | A3BY5HXZIM04PN,0.6182504742 136 | A3RW8TMXMQ7I6B,0.1912749308 137 | A2QPX2MS844TYJ,0.0131331017 138 | A3VMJIW0DP7007,0.5422139461 139 | AK7YVYGMIF6T0,0.4991147021 140 | A2PAV7JATODL20,0.6905545696 141 | A2CET0W1FJGMV,0.0204578623 142 | A3KQFLPDA52RE,0.7705330343 143 | A27SZLZECUSPWB,0.4097121782 144 | ADX3CN405XAMF,0.1379445431 145 | A2YRAPAV1WNYWB,0.143316194 146 | A15H21NMKA3TE4,0.8413285705 147 | A1W8YY6KOEOG6,0.8601810211 148 | A1ZIUN0W12X1SP,0.1732256414 149 | AMVOSOXRSRMHD,0.6778123858 150 | AADS3JU8O57J3,0.8660866228 151 | A2LL0VL0D92ZL1,0.7879155109 152 | A3N3ESBU769SMG,0.0365184675 153 | ADAGUJNWMEPT6,0.4863084392 154 | A1SHEWULPSH627,0.7459022305 155 | AX2XQPTZ1H49C,0.8807378856 156 | AFZECAKP08NZA,0.7838482805 157 | A1JPASJWRS7QMK,0.6869591704 158 | A2CGA60V75C34Q,0.9703890993 159 | A1BCDJEVMCR5HD,0.9703890993 160 | A1B0YWANQX8P6S,0.9703890993 161 | A3SJOKB3ZHQDQ0,0.8795705696 162 | AS7QSZPN28VFT,0.706259659 163 | AWK3LFAT0IHP3,0.8498743253 164 | A2OOM1MEGTT1MJ,0.8791336697 165 | A1XUMT8PKEPRR1,0.8790441539 166 | -------------------------------------------------------------------------------- /tutorial/data/results/mace_workers_temp.csv: -------------------------------------------------------------------------------- 1 | worker,competence 2 | A2HTGQE4AACVRV,0.9422128633893267 3 | AYHHOK9GDSWNH,0.9287597990542534 4 | A1QRQZWBL1SVEX,0.9422128633893267 5 | A3G0MGLBT484I1,0.0006669816621639987 6 | A7NC1H5ZK7TO0,0.9422128633893267 7 | A3MPXWFMR0Y2LK,0.9054656560138375 8 | A11GX90QFWDLMM,0.0016343498048797555 9 | A3PIXX8DQGYHGI,0.0007262613141985392 10 | A2KONK3TIL5KVX,0.0020043474623211385 11 | A18941IO2ZZWW6,0.05601111919006349 12 | A5C6BAAHFTJFP,0.8947635584844109 13 | A18WNE09USHJSG,0.8835686712120338 14 | A3J0VZOPOGW1QC,0.92579596772172 15 | AUPV5WJK85K4F,0.8354420813343054 16 | A1PBRFFOTO5ZU6,0.5942272499703642 17 | AXTU992Z7V3ZX,0.9317187553796702 18 | A26KXB5XCGERD2,0.7735857852183209 19 | A370556EFVZ9W4,0.7652079307179244 20 | AEX5NCH03LWSG,0.9075030159924901 21 | A2G80S3EM9SDBD,0.9442909864549448 22 | A38UUFVXEND88G,0.7294744145202884 23 | A1EPIK54UEQYQB,0.9036726564515092 24 | A2VKPMR5HCCZF5,0.9269306623117476 25 | A1GR4JB3DODIQ1,0.9650185186666024 26 | AEU1NROWHLIJF,0.4999371984469024 27 | A1UV8TD7KO88IM,0.9879750309444376 28 | A26J29AVWFIMNY,0.7658515078556721 29 | AZE6KLDFEXBM0,0.7910969991350698 30 | A3BD4NONKGONRM,0.8955467162086959 31 | A2BEQQH909NJRN,0.6166726188970204 32 | AA5CLQLMVT5GA,0.6589730613785806 33 | A9K43JFO379YG,0.8806138951222766 34 | A2GLG8R57PVEQU,0.9838161769697732 35 | A16QMNGIR7N53M,0.4880463057584336 36 | AF9RXZRI2WUNQ,0.4880459902804257 37 | A1IYVTS4ZVIGSE,0.9422240126011346 38 | A1VG58HXS2O3JX,0.02425776220055266 39 | A1YG3DEOOKQYTA,0.4960539970266538 40 | A2O6FTKXAJGRSQ,0.8897794577837772 41 | AEPI85FMNTIA8,0.8298294016295922 42 | AK7YVYGMIF6T0,0.7271446029350374 43 | A1123L7ANYUTG0,0.970281601587563 44 | AWXSX0YWT2J50,0.8003624941325318 45 | A2MIF2GUSZLJ0X,0.2779888440921644 46 | A33QAD7EN7F6XX,0.9702819015956724 47 | A37Q72KDR2VIZ8,0.7569445242528774 48 | A32IZC32FQKVRX,0.9241283036187528 49 | A2VH5LBZ5S5IBL,0.4119048071402508 50 | A2B6ZPXXMJB1B1,0.9421942246964944 51 | A3FLN53WRJ0C7W,0.9128926963521516 52 | A3K7B6ZPJH2EVC,0.7625501135668841 53 | A3KKCYI5HAHQ3,0.7718291786092261 54 | A2GKEUW6PGONEC,0.9352630887159878 55 | A1KFIQBETMB81I,0.9388464864112104 56 | A1AML5YZXW4FIS,0.9421680069314979 57 | AM7BYJ804O5TF,0.8738336746935814 58 | A282GBBIPRRVIB,0.9421940248694168 59 | A13PCLSK1JA8QL,0.03209801744074072 60 | A7R1L4ZYYPOZM,0.7666775224030519 61 | A23NXGIV5YVQHJ,0.9421942259092986 62 | ATJQTHNPVWH9,0.7607011544513521 63 | A2Q547UYDL9SFU,0.8749396034674204 64 | A1ZMDED6CWMABK,0.9334673947230638 65 | AV05VG3COGLW0,0.7766485460298138 66 | A2DV8HITPQN4AJ,0.2072622652989162 67 | A3OHQRF1MDQ99B,0.7637898087574925 68 | A3JWUNYRV5KTUS,0.9422262406896552 69 | AZ37E749R4SUA,0.9422262406896552 70 | A2DTM58I82M96Y,0.3894915758747558 71 | A17743NDSCO8P5,0.3657212150823284 72 | A1RIILHVSAY5IW,0.7669498708777065 73 | A2OZTBCBOCHF9E,0.0312937905934396 74 | A20BKXSNZS2O3J,0.7321276265038822 75 | A1ER3JLKRM3046,0.5647586134953013 76 | A1DE9XQJ5FSGIB,0.5776551692030749 77 | A3G3JWL19SBHCH,0.7668053646765115 78 | -------------------------------------------------------------------------------- /tutorial/data/results/multchoice-people-video-annotations.csv: -------------------------------------------------------------------------------- 1 | ,output.selected_answer,aqs,aqs_initial 2 | archeologist,1000,1e-08,1e-08 3 | architect,1000,1e-08,1e-08 4 | artist,1000,0.2512851218961939,0.2520942408376964 5 | astronaut,1000,0.9999680492226728,0.9523809523809523 6 | athlete,1000,0.2801649773096277,0.2788104089219331 7 | businessperson,1000,0.06188726908753386,0.09618163054695561 8 | celebrity,1000,0.7459196879955364,0.6066315558481457 9 | chef,1000,1e-08,1e-08 10 | criminal,1000,0.12723840573052922,0.15306122448979592 11 | engineer,1000,0.036086836577669285,0.029914529914529916 12 | farmer,1000,0.0013543982464316537,0.02631578947368421 13 | fictionalcharacter,1000,0.3061545340475326,0.30235457063711935 14 | journalist,1000,0.07588007160469219,0.07522644927536232 15 | judge,1000,1e-08,1e-08 16 | lawyer,1000,1e-08,1e-08 17 | militaryperson,1000,0.8461288182796715,0.6695402298850575 18 | model,1000,0.4571709228250726,0.3812154696132596 19 | monarch,1000,0.31936234307772043,0.3198757763975155 20 | none,1000,0.857716020329372,0.634973554220735 21 | other,1000,0.4495046609996766,0.35614836519519577 22 | philosopher,1000,0.00040181457411572004,0.008620689655172414 23 | politician,1000,0.05617034666730035,0.052410901467505246 24 | presenter,1000,0.42955579353065915,0.3083160800552106 25 | producer,1000,0.0006617932876057423,0.009216589861751152 26 | psychologist,1000,1e-08,1e-08 27 | scientist,1000,0.40745494918802566,0.3220470470470471 28 | sportsmanager,1000,1e-08,1e-08 29 | writer,1000,0.05708570799473144,0.08579335793357933 30 | -------------------------------------------------------------------------------- /tutorial/data/results/multchoice-relex-annotations.csv: -------------------------------------------------------------------------------- 1 | ,output.relations,aqs,aqs_initial 2 | alternate_names,150,1e-08,1e-08 3 | cause_of_death,150,1e-08,1e-08 4 | charges,150,1e-08,1e-08 5 | children,150,1e-08,1e-08 6 | employee_or_member_of,150,0.248137168368,0.249240121581 7 | founded_org,150,1e-08,1e-08 8 | none,150,0.838371858268,0.605691056911 9 | origin,150,0.234005885279,0.237179487179 10 | place_of_birth,150,0.694388915554,0.579439252336 11 | place_of_death,150,0.00370611328581,0.107142857143 12 | place_of_headquarters,150,0.158468654582,0.0952380952381 13 | places_of_residence,150,0.778707907488,0.676755447942 14 | schools_attended,150,1e-08,1e-08 15 | spouse,150,1e-08,1e-08 16 | subsidiaries,150,1e-08,1e-08 17 | title,150,1e-08,1e-08 18 | top_member_employee_of_org,150,0.944476329146,0.795133437991 19 | -------------------------------------------------------------------------------- /tutorial/data/results/openextr-persvid-dimred-workers.csv: -------------------------------------------------------------------------------- 1 | worker,duration,job,judgment,unit,wqs,wwa,wsa,wqs_initial,wwa_initial,wsa_initial 2 | 3587109,15.2,1,50,50,0.376816793549,0.526484975622,0.715721836324,0.281532701135,0.426392105194,0.660267152476 3 | 4316379,27.0416666667,1,24,24,0.10284460534,0.30113026848,0.341528621015,0.140515122051,0.340782323323,0.412331017291 4 | 6367365,41.26,1,50,50,0.281285198363,0.476561213457,0.590239386715,0.2707996311,0.445482164002,0.607879850154 5 | 6377879,29.6111111111,1,18,18,0.0730684595949,0.249474587329,0.292889389566,0.110567389235,0.297331343578,0.371865905236 6 | 6481150,272.965517241,1,29,29,0.532959031384,0.613808252978,0.868282609102,0.409092813258,0.506799915214,0.807207738157 7 | 11131207,66.7,1,50,50,0.178509117408,0.389686192234,0.458084276439,0.191524219731,0.386717625135,0.495255988562 8 | 12999735,72.1052631579,1,19,19,0.219219714268,0.434501377996,0.504531689356,0.232752845483,0.435352526896,0.534630744291 9 | 14054543,21.7407407407,1,27,27,0.380359138129,0.533013231175,0.713601681689,0.286144839855,0.42975915052,0.66582605515 10 | 15298546,25.9,1,50,50,0.448895099879,0.5562306307,0.807030528531,0.319947649301,0.43820096986,0.730139071585 11 | 15439740,24.34,1,50,50,0.367903934111,0.522747040478,0.703789606872,0.265183090641,0.41419771013,0.640233116106 12 | 15965551,34.28,1,50,50,0.15036027154,0.363557512987,0.413580427218,0.160798414197,0.362445884121,0.443648062348 13 | 18972023,57.1333333333,1,15,15,0.312630662436,0.501554318325,0.623323638165,0.289424574958,0.462036299185,0.626410902061 14 | 23503585,105.42,1,50,50,0.416162875068,0.551064364437,0.755198306995,0.323272686,0.458982596695,0.704324495802 15 | 26024737,15.1904761905,1,21,21,0.330786114022,0.452718865871,0.730665627079,0.213892487709,0.339627765265,0.62978504582 16 | 28526462,24.6363636364,1,11,11,0.546327405199,0.618485676787,0.883330731339,0.4401538529,0.525596331105,0.837437072618 17 | 30315390,26.0,1,27,27,0.174031960687,0.379278812189,0.458849677582,0.201783626908,0.39477848323,0.511131268495 18 | 31508822,183.541666667,1,24,24,0.402479442365,0.559748996973,0.719035575841,0.321726210279,0.473961970328,0.678801740267 19 | 31883685,64.08,1,50,50,0.213095051668,0.423918458674,0.502679341528,0.224770343811,0.417705640153,0.538107035683 20 | 38202325,144.0,1,45,45,0.340123160545,0.509533910969,0.667518202858,0.304589054599,0.456585947817,0.667101245791 21 | 39017656,31.28,1,50,50,0.301293063663,0.471055858444,0.639612178179,0.247948845567,0.404901195529,0.612368766269 22 | 39127197,15.6538461538,1,26,26,0.191290357328,0.394820292472,0.484499811624,0.211489925662,0.40036734961,0.528239692542 23 | 40421145,20.6666666667,1,3,3,0.0914226507651,0.288847660355,0.316508192079,0.12197818709,0.332296207801,0.367076675046 24 | 40925305,16.62,1,50,50,0.231285040136,0.4304272908,0.53733823361,0.204276644123,0.385309576692,0.53016238495 25 | 41746613,14.88,1,50,50,0.341856281258,0.498177633366,0.686213628156,0.245555890083,0.396520756152,0.619276257983 26 | 44453708,52.5172413793,1,29,29,0.418160233361,0.561723947142,0.744423013276,0.313041409563,0.460433343804,0.679884317189 27 | 44637936,20.32,1,50,50,0.359522331113,0.511770065058,0.702507543251,0.263270919369,0.407854874881,0.645501465308 28 | 44803262,13.88,1,50,50,0.469890827946,0.573487565084,0.819356611292,0.328572795949,0.448343907755,0.732858839534 29 | 44809353,57.53125,1,32,32,0.18652941512,0.378702432689,0.492548763934,0.132386554324,0.298489583662,0.443521521589 30 | -------------------------------------------------------------------------------- /tutorial/data/results/openextr-persvid-workers.csv: -------------------------------------------------------------------------------- 1 | worker,duration,job,judgment,unit,wqs,wwa,wsa,wqs_initial,wwa_initial,wsa_initial 2 | 3587109,15.2,1,50,50,0.317575024621,0.482672762196,0.657950995984,0.240806681241,0.39604923488,0.608022084208 3 | 4316379,27.0416666667,1,24,24,0.100412992607,0.299572085929,0.335188081012,0.129994133243,0.331845718442,0.391730632697 4 | 6367365,41.26,1,50,50,0.260924543321,0.456099752937,0.572077800175,0.253243343374,0.431093077091,0.587444700071 5 | 6377879,29.6111111111,1,18,18,0.0867272211444,0.264501999451,0.327888716624,0.12696802719,0.314893793941,0.403209048996 6 | 6481150,272.965517241,1,29,29,0.439820214264,0.531899908999,0.82688529707,0.341114380229,0.440726734614,0.773981593215 7 | 11131207,66.7,1,50,50,0.182362803483,0.386719500034,0.47156350654,0.187068253152,0.378976441173,0.493614464721 8 | 12999735,72.1052631579,1,19,19,0.198931334956,0.411830859848,0.483041351078,0.209285969685,0.411855508976,0.508153867373 9 | 14054543,21.7407407407,1,27,27,0.347724061829,0.505363066848,0.68806781627,0.271617433679,0.417036981485,0.651302991671 10 | 15298546,25.9,1,50,50,0.386716571369,0.495757752608,0.780051485499,0.287008901411,0.400898499142,0.715914132942 11 | 15439740,24.34,1,50,50,0.302442314571,0.471537458868,0.641396158212,0.233710717393,0.389057823125,0.600709466566 12 | 15965551,34.28,1,50,50,0.131402379443,0.341676337727,0.384581444289,0.140901782871,0.344570385053,0.408920177077 13 | 18972023,57.1333333333,1,15,15,0.288327228755,0.474350768823,0.607835483159,0.265836559654,0.437907833252,0.607060526137 14 | 23503585,105.42,1,50,50,0.382586216826,0.516542568423,0.740667352923,0.307780813101,0.441086169843,0.697779332347 15 | 26024737,15.1904761905,1,21,21,0.322975944663,0.439215517421,0.735347299566,0.222094834701,0.342868544125,0.647755060962 16 | 28526462,24.6363636364,1,11,11,0.44485457221,0.519741734962,0.855914663544,0.356826543543,0.440007375134,0.810955824171 17 | 30315390,26.0,1,27,27,0.177518094006,0.383086070145,0.46338958224,0.186774440484,0.383108006323,0.487524242253 18 | 31508822,183.541666667,1,24,24,0.331623937077,0.508897290788,0.651651999491,0.268678985286,0.436467625291,0.615575977959 19 | 31883685,64.08,1,50,50,0.206989951168,0.416468635599,0.497012100012,0.212917828506,0.408008246011,0.52184687586 20 | 38202325,144.0,1,45,45,0.332117322029,0.50023615301,0.66392107014,0.307400643451,0.460365913782,0.667731111814 21 | 39017656,31.28,1,50,50,0.26775136424,0.439950269194,0.608594613957,0.226319150527,0.385122438675,0.587655061869 22 | 39127197,15.6538461538,1,26,26,0.203229013645,0.405412760999,0.501289138369,0.209605038865,0.400732404141,0.523054878265 23 | 40421145,20.6666666667,1,3,3,0.0802399030829,0.271623788579,0.295408231741,0.108452839251,0.312994136828,0.346501184813 24 | 40925305,16.62,1,50,50,0.237143244631,0.427997886055,0.554075738123,0.206347980051,0.38258872256,0.539346739417 25 | 41746613,14.88,1,50,50,0.279889646238,0.446597799994,0.626715237383,0.207248891906,0.363334289523,0.570408293084 26 | 44453708,52.5172413793,1,29,29,0.343560537529,0.493002381785,0.696873991329,0.273106707268,0.41753412734,0.654094334775 27 | 44637936,20.32,1,50,50,0.311951930537,0.472262224491,0.660548132709,0.235114459703,0.384156237245,0.612028224216 28 | 44803262,13.88,1,50,50,0.394344496458,0.513460118067,0.768013877967,0.283021943904,0.407997813829,0.693684952003 29 | 44809353,57.53125,1,32,32,0.153032867103,0.340102178177,0.449961443714,0.109274053283,0.270096498274,0.40457412066 30 | -------------------------------------------------------------------------------- /tutorial/data/results/sparsemultchoice-relex-annotations.csv: -------------------------------------------------------------------------------- 1 | ,output.output_relations,aqs,aqs_initial 2 | alternate_names,150,1e-08,1e-08 3 | cause_of_death,150,1e-08,1e-08 4 | charges,150,1e-08,1e-08 5 | children,150,1e-08,1e-08 6 | employee_or_member_of,150,0.248137168368,0.249240121581 7 | founded_org,150,1e-08,1e-08 8 | none,150,0.838371858268,0.605691056911 9 | origin,150,0.234005885279,0.237179487179 10 | place_of_birth,150,0.694388915554,0.579439252336 11 | place_of_death,150,0.00370611328581,0.107142857143 12 | place_of_headquarters,150,0.158468654582,0.0952380952381 13 | places_of_residence,150,0.778707907488,0.676755447942 14 | schools_attended,150,1e-08,1e-08 15 | spouse,150,1e-08,1e-08 16 | subsidiaries,150,1e-08,1e-08 17 | title,150,1e-08,1e-08 18 | top_member_employee_of_org,150,0.944476329146,0.795133437991 19 | -------------------------------------------------------------------------------- /tutorial/experiment_replication_variable_workers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jul 2 14:40:44 2018 5 | """ 6 | import os 7 | import random 8 | import sys 9 | 10 | import itertools as it 11 | import pandas as pd 12 | 13 | def get_uniq_unit_ids(dframe, unit_id_field): 14 | """Get all unit ids in the output file""" 15 | unique_unit_ids = dframe[unit_id_field].unique() 16 | return unique_unit_ids 17 | 18 | def get_no_work_unit_id(dframe, unit_id, unit_id_field): 19 | """Get the worker annotations for a unit""" 20 | subset_unit_id = dframe[dframe[unit_id_field] == unit_id] 21 | return (len(subset_unit_id), subset_unit_id) 22 | 23 | def count_bits(number, n_bits): 24 | """ Optimization function for creating all possible combinations""" 25 | ret = 0 26 | bit_pos = [] 27 | for i in range(0, n_bits): 28 | if (1 << i) & number != 0: 29 | ret += 1 30 | bit_pos.append(i) 31 | return (ret, bit_pos) 32 | 33 | def gen_all_k_combinations(k, num_size): 34 | """ Generic function to compute combinations""" 35 | result = [] 36 | for i in range(1, 2**num_size): 37 | bit_count, bit_pos = count_bits(i, num_size) 38 | if bit_count == k: 39 | result.append(bit_pos) 40 | return result 41 | 42 | def gen_all_worker_combinations(subset_size, count, subset_unit_id, worker_id_field): 43 | """ Get all worker combinations""" 44 | combinations = gen_all_k_combinations(subset_size, count) 45 | final_result = [] 46 | for comb in combinations: 47 | crnt_workers = [] 48 | for j in range(0, len(comb)): 49 | crnt_workers.append(subset_unit_id[worker_id_field].iloc[comb[j]]) 50 | final_result.append(crnt_workers) 51 | return final_result 52 | 53 | def get_all_unit_combinations(unit_dict): 54 | """ Create worker combinations for all units""" 55 | sorted_unit_dict = sorted(unit_dict) 56 | combinations = it.product(*(unit_dict[unit_id] for unit_id in sorted_unit_dict)) 57 | print(list(combinations)) 58 | 59 | def my_product(dicts): 60 | """Create sets of workers""" 61 | units, comb_of_workers = zip(*dicts.items()) 62 | return [dict(zip(units, x)) for x in it.product(*comb_of_workers)] 63 | 64 | def pick_random_worker_set(worker_sets): 65 | """Pick random set of workers""" 66 | return random.choice(worker_sets) 67 | 68 | 69 | def create_analysis_files(dataset_file, max_no_workers, max_runs, \ 70 | storing_folder, unit_id_field, worker_id_field): 71 | """Create files of various number of workers""" 72 | dataset = pd.read_csv(dataset_file) 73 | unique_unit_ids = get_uniq_unit_ids(dataset, unit_id_field) 74 | 75 | for subset_size in range(3, max_no_workers + 1): 76 | workers_directory = storing_folder + str(subset_size) + "workers" 77 | if not os.path.exists(workers_directory): 78 | os.makedirs(workers_directory) 79 | 80 | map_unit_id_combinations = {} 81 | for unit_id in range(0, len(unique_unit_ids)): 82 | (count, subset_unit_id) = get_no_work_unit_id(dataset, unique_unit_ids[unit_id], \ 83 | unit_id_field) 84 | combinations = gen_all_worker_combinations(subset_size, count, subset_unit_id, \ 85 | worker_id_field) 86 | map_unit_id_combinations[unique_unit_ids[unit_id]] = combinations 87 | 88 | for run_no in range(0, max_runs + 1): 89 | unit_worker_set = {} 90 | for unit_id, worker_sets in map_unit_id_combinations.items(): 91 | unit_worker_set[unit_id] = pick_random_worker_set(worker_sets) 92 | 93 | df_subset_size = pd.DataFrame() 94 | for unit_id, worker_set in unit_worker_set.items(): 95 | df_subset = dataset[(dataset[unit_id_field] == unit_id) & 96 | (dataset[worker_id_field].isin(worker_set))] 97 | frames = [df_subset_size, df_subset] 98 | df_subset_size = pd.concat(frames) 99 | 100 | df_subset_size.to_csv(workers_directory + "/run_" + str(run_no) + ".csv", index=False) 101 | 102 | def main(argv=None): 103 | """Run the script""" 104 | if argv is None: 105 | argv = sys.argv 106 | 107 | if len(argv) < 6: 108 | print('Usage: python replication_experiment_wrt_workers.py dataset_filename' 109 | ' max_no_workers max_runs storing_folder unit_id_field, worker_id_field') 110 | 111 | else: 112 | create_analysis_files(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]) 113 | 114 | if __name__ == '__main__': 115 | main() 116 | -------------------------------------------------------------------------------- /tutorial/handout_session_2.md: -------------------------------------------------------------------------------- 1 | # Handout Session 2: Task Design & Building the Annotation Vector 2 | 3 | ## Session Summary 4 | 5 | In this session, we discussed a series of crowdsourcing tasks, with different methods of collecting the annotations from the workers. Below you can find a list of tasks (with their corresponding crowdsoursing templates): 6 | 7 | **Closed Tasks:** the crowd picks from a set of annotations that is known beforehand 8 | 9 | * **Binary Choice:** the crowd picks 1 annotation out of 2 choices (e.g. `True` and `False`) 10 | + *e.g.:* [Person identification in videos](img/ann-vec/bin-person-in-vid.pdf), [Relation extraction from sentences](img/ann-vec/bin-relex.pdf) 11 | * **Ternary Choice:** the crowd picks 1 annotation out of 3 choices, (e.g. `True`, `False` and `None/Other`) 12 | + *e.g.:* [Person identification in videos](img/ann-vec/tern-person-in-vid.pdf) 13 | * **Multiple Choice:** the crowd picks multiple annotation out of a set list of choices that are *the same* for every input unit 14 | + *e.g.:* [Person identification in videos](img/ann-vec/mult-person-in-vid.pdf), [Relation extraction from sentences](img/ann-vec/mult-relex.pdf) 15 | * **Sparse Multiple Choice:** the crowd picks multiple annotation out of a set list of choices that are *different* across input units 16 | + *e.g.:* [Person identification in videos](img/ann-vec/sparse-person-in-vid.pdf), [Relation extraction from sentences](img/ann-vec/mult-relex.pdf), Event extraction from sentences 17 | 18 | **Open-Ended Tasks:** the crowd dynamically creates the list of annotations, or the set of annotations is too big to compute beforehand 19 | 20 | * **Sparse Multiple Choice:** the crowd picks multiple annotation out of a set list of choices that are *different* across input units 21 | + *e.g.:* Event extraction from sentences 22 | * **Open-ended extraction tasks::** the crowd creates different combinations of annotations based on the input unit 23 | + *e.g.:* [Person identification by highlighting words in text](img/ann-vec/od-extr-person-in-vid.pdf) 24 | * **Free Choice:** the crowd inputs all possible annotations for an input unit 25 | + *e.g.:* [Person identification in videos](img/ann-vec/free-person-in-vid.pdf) 26 | 27 | 28 | 29 | ## Session Excercises 30 | 31 | Take either *one of the annotation tasks* presented, or *a task of your choice*, and transform the annotation vector, by doing one or several of the following: 32 | 33 | * Redesigning the annotation task by: 34 | + Changing the type of the input unit (text vs. image vs. video) 35 | + Changing the way the crowd annotations are collected (radio boxes, checkboxes, free text, other) 36 | * Adding/removing components of the annotation vector 37 | * Merging/clustering existing components of the annotation vector 38 | 39 | How will these changes affect the annotation vector of your crowdsourcing task? Describe the outcome in terms of the trade-off between the degree of expressivity in crowd annotations and potential for ambiguity and disagreement. 40 | 41 | Create *slides for a 1 minute presentation* summarizing your new crowdsourcing task, annotation vector, and the possible ways for ambiguity to be expressed in it. 42 | -------------------------------------------------------------------------------- /tutorial/img/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/.DS_Store -------------------------------------------------------------------------------- /tutorial/img/ann-vec/bin-person-in-vid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/bin-person-in-vid.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/bin-relex.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/bin-relex.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/free-person-in-vid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/free-person-in-vid.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/mult-person-in-vid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/mult-person-in-vid.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/mult-relex.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/mult-relex.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/od-extr-person-in-vid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/od-extr-person-in-vid.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/sparse-mult-relex.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/sparse-mult-relex.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/sparse-person-in-vid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/sparse-person-in-vid.pdf -------------------------------------------------------------------------------- /tutorial/img/ann-vec/tern-person-in-vid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/ann-vec/tern-person-in-vid.pdf -------------------------------------------------------------------------------- /tutorial/img/event-text-highlight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/event-text-highlight.png -------------------------------------------------------------------------------- /tutorial/img/event-text-sparse-multiple-choice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/event-text-sparse-multiple-choice.png -------------------------------------------------------------------------------- /tutorial/img/person-video-binary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/person-video-binary.png -------------------------------------------------------------------------------- /tutorial/img/person-video-free-input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/person-video-free-input.png -------------------------------------------------------------------------------- /tutorial/img/person-video-highlight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/person-video-highlight.png -------------------------------------------------------------------------------- /tutorial/img/person-video-multiple-choice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/person-video-multiple-choice.png -------------------------------------------------------------------------------- /tutorial/img/person-video-sparse-multiple-choice-free-input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/person-video-sparse-multiple-choice-free-input.png -------------------------------------------------------------------------------- /tutorial/img/person-video-sparse-multiple-choice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/person-video-sparse-multiple-choice.png -------------------------------------------------------------------------------- /tutorial/img/person-video-ternary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/person-video-ternary.png -------------------------------------------------------------------------------- /tutorial/img/relex-binary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/relex-binary.png -------------------------------------------------------------------------------- /tutorial/img/relex-free.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/relex-free.png -------------------------------------------------------------------------------- /tutorial/img/relex-highlight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/relex-highlight.png -------------------------------------------------------------------------------- /tutorial/img/relex-multiple-choice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/relex-multiple-choice.png -------------------------------------------------------------------------------- /tutorial/img/relex-sparse-multiple-choice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/img/relex-sparse-multiple-choice.png -------------------------------------------------------------------------------- /tutorial/notebooks/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/notebooks/.DS_Store -------------------------------------------------------------------------------- /tutorial/templates/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrowdTruth/CrowdTruth-core/1330d5d275b01d93618513ca0956a0b559dd94a2/tutorial/templates/.DS_Store -------------------------------------------------------------------------------- /tutorial/templates/Events-Text-Highlight/template.html: -------------------------------------------------------------------------------- 1 | 2 |
30 | {{term1}} is/was a member/employee of 31 | {{term2}} 32 |
33 |39 | {{term2}} was founded by 40 | {{term1}} 41 |
42 |49 | {{term1}} is/was a top member/employee of 50 | {{term2}} 51 |
52 |58 | {{term1}} is a person who lives/lived in 59 | {{term2}} 60 |
61 |68 | {{term1}} is a person who is/was born in 69 | {{term2}} 70 |
71 |78 | {{term1}} is a person who died in 79 | {{term2}} 80 |
81 |87 | {{term1}} is a person originating from 88 | {{term2}} 89 |
90 |There is no valid expression above
97 |