├── .gitignore
├── CDB-Assignment
├── CrowdAssign
│ ├── __init__.py
│ ├── settings.py
│ ├── urls.py
│ └── wsgi.py
├── README.md
├── main
│ ├── .gitkeep
│ ├── __init__.py
│ ├── admin.py
│ ├── apps.py
│ ├── models.py
│ ├── property.py
│ ├── task
│ │ ├── __init__.py
│ │ ├── amt.py
│ │ ├── api_key.py
│ │ ├── assign.py
│ │ ├── cc.py
│ │ ├── cf.py
│ │ ├── chinacrowd.py
│ │ ├── crowddb_crowdflower.py
│ │ ├── crowddb_type_collection.py
│ │ ├── crowddb_type_fillin.py
│ │ ├── crowddb_type_multi_label.py
│ │ ├── crowddb_type_one_label.py
│ │ ├── crowddb_type_y_n.py
│ │ ├── infer
│ │ │ ├── EM.py
│ │ │ └── __init__.py
│ │ ├── mtc.py
│ │ └── task_const.py
│ ├── tests.py
│ ├── urls.py
│ └── views.py
└── manage.py
├── CDB-Server
├── README.md
├── build.sh
├── crowdcore
│ ├── pom.xml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── com
│ │ └── tsinghua
│ │ └── dbgroup
│ │ └── crowddb
│ │ └── crowdcore
│ │ ├── configs
│ │ └── GlobalConfigs.java
│ │ └── exceptions
│ │ ├── CrowdDBException.java
│ │ └── ExceptionMap.java
├── crowdexec
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ ├── java
│ │ │ └── com
│ │ │ │ └── tsinghua
│ │ │ │ └── dbgroup
│ │ │ │ └── crowddb
│ │ │ │ └── crowdexec
│ │ │ │ ├── gmodel
│ │ │ │ ├── Graph.java
│ │ │ │ ├── Main.java
│ │ │ │ ├── Record.java
│ │ │ │ ├── Result.java
│ │ │ │ ├── Similarity_function.java
│ │ │ │ └── Table.java
│ │ │ │ ├── operator
│ │ │ │ ├── BaseOperator.java
│ │ │ │ ├── CollectOperator.java
│ │ │ │ ├── CrowdEQOperator.java
│ │ │ │ ├── CrowdGTOperator.java
│ │ │ │ ├── CrowdGraphOperator.java
│ │ │ │ ├── CrowdInOperator.java
│ │ │ │ ├── CrowdJoinOperator.java
│ │ │ │ ├── CrowdLTOperator.java
│ │ │ │ ├── EQOperator.java
│ │ │ │ ├── FillOperator.java
│ │ │ │ ├── GTOperator.java
│ │ │ │ ├── IOperator.java
│ │ │ │ ├── JoinOperator.java
│ │ │ │ ├── LTOperator.java
│ │ │ │ ├── MultiLabel.java
│ │ │ │ ├── OperatorStatus.java
│ │ │ │ ├── ProjectOperator.java
│ │ │ │ └── SingleLabel.java
│ │ │ │ └── query
│ │ │ │ ├── IQueryExecutor.java
│ │ │ │ ├── IQueryManager.java
│ │ │ │ ├── QueryExecutor.java
│ │ │ │ ├── QueryManager.java
│ │ │ │ ├── RelationOperator.java
│ │ │ │ └── schema
│ │ │ │ ├── HibernateSessionManager.java
│ │ │ │ └── Query.java
│ │ └── resources
│ │ │ └── hibernate
│ │ │ ├── hibernate.cfg.xml
│ │ │ └── query.hbm.xml
│ │ └── test
│ │ └── java
│ │ └── com
│ │ └── tsinghua
│ │ └── dbgroup
│ │ └── crowddb
│ │ └── crowdexec
│ │ ├── operator
│ │ └── CrowdEQOperatorTest.java
│ │ ├── query
│ │ └── schema
│ │ │ └── QueryControllerTest.java
│ │ └── table
│ │ └── TableManagerTest.java
├── crowdplat
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ └── java
│ │ │ └── com
│ │ │ └── tsinghua
│ │ │ └── dbgroup
│ │ │ └── crowddb
│ │ │ └── crowdplat
│ │ │ ├── TaskManager.java
│ │ │ ├── core
│ │ │ ├── Question.java
│ │ │ ├── QuestionBuilder.java
│ │ │ ├── Task.java
│ │ │ ├── TaskCategory.java
│ │ │ ├── TaskConfigure.java
│ │ │ ├── TaskPair.java
│ │ │ ├── TaskStatus.java
│ │ │ └── TaskType.java
│ │ │ ├── engine
│ │ │ ├── APEngine.java
│ │ │ ├── ChinaCrowdsEngine.java
│ │ │ └── ICrowdEngine.java
│ │ │ ├── http
│ │ │ ├── HttpRequest.java
│ │ │ └── IHttpRequest.java
│ │ │ ├── result
│ │ │ ├── BaseResult.java
│ │ │ └── impl
│ │ │ │ ├── CollectionResult.java
│ │ │ │ ├── ColumnsResult.java
│ │ │ │ ├── JudgementResult.java
│ │ │ │ └── OptionsResult.java
│ │ │ └── schema
│ │ │ ├── BaseSchema.java
│ │ │ ├── ISchema.java
│ │ │ ├── SchemaType.java
│ │ │ └── impl
│ │ │ ├── CollectionSchema.java
│ │ │ ├── FillSchema.java
│ │ │ ├── JudgementSchema.java
│ │ │ ├── LabelSchema.java
│ │ │ └── OptionsSchema.java
│ │ └── test
│ │ └── java
│ │ └── com
│ │ └── tsinghua
│ │ └── dbgroup
│ │ └── crowddb
│ │ └── crowdplat
│ │ ├── TaskManagerTest.java
│ │ ├── engine
│ │ └── APEngineTest.java
│ │ └── http
│ │ └── HttpRequestTest.java
├── crowdscheduler
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ ├── java
│ │ │ └── com
│ │ │ │ └── tsinghua
│ │ │ │ └── dbgroup
│ │ │ │ └── crowddb
│ │ │ │ └── scheduler
│ │ │ │ ├── Configs.java
│ │ │ │ ├── Server.java
│ │ │ │ ├── dispatcher
│ │ │ │ └── QueryScheduler.java
│ │ │ │ ├── server
│ │ │ │ └── SocketServer.java
│ │ │ │ ├── threads
│ │ │ │ ├── ContinueQueryThread.java
│ │ │ │ ├── FinishQueryThread.java
│ │ │ │ └── NewQueryThread.java
│ │ │ │ └── utils
│ │ │ │ └── Util.java
│ │ └── resources
│ │ │ └── logback.xml
│ │ └── test
│ │ └── java
│ │ └── com
│ │ └── tsinghua
│ │ └── dbgroup
│ │ └── crowddb
│ │ └── scheduler
│ │ ├── dispatcher
│ │ └── QuerySchedulerTest.java
│ │ └── server
│ │ └── SocketServerTest.java
├── crowdsql
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ └── java
│ │ │ ├── META-INF
│ │ │ └── MANIFEST.MF
│ │ │ └── com
│ │ │ └── tsinghua
│ │ │ └── dbgroup
│ │ │ └── crowddb
│ │ │ └── crowdsql
│ │ │ ├── operator
│ │ │ ├── OperatorHelper.java
│ │ │ └── Operators.java
│ │ │ ├── parser
│ │ │ ├── ISqlParser.java
│ │ │ └── SqlParser.java
│ │ │ ├── query
│ │ │ └── SqlContext.java
│ │ │ ├── tree
│ │ │ ├── NodeType.java
│ │ │ ├── SqlTree.java
│ │ │ ├── SqlTreeBuilder.java
│ │ │ └── SqlTreeNode.java
│ │ │ └── util
│ │ │ └── Utils.java
│ │ └── test
│ │ └── java
│ │ └── com
│ │ └── tsinghua
│ │ └── dbgroup
│ │ └── crowddb
│ │ └── crowdsql
│ │ ├── parser
│ │ └── SqlParserTest.java
│ │ └── query
│ │ └── SqlContextTest.java
├── crowdstorage
│ ├── pom.xml
│ └── src
│ │ └── main
│ │ ├── java
│ │ └── com
│ │ │ └── tsinghua
│ │ │ └── dbgroup
│ │ │ └── crowddb
│ │ │ └── crowdstorage
│ │ │ ├── table
│ │ │ ├── BaseDBStorage.java
│ │ │ ├── ITableManager.java
│ │ │ ├── TableManager.java
│ │ │ └── schema
│ │ │ │ ├── ColumnsSchema.java
│ │ │ │ ├── EqualSchema.java
│ │ │ │ └── JoinSchema.java
│ │ │ └── utils
│ │ │ ├── TableHelper.java
│ │ │ └── Utils.java
│ │ └── resources
│ │ └── jdbc.properties.development
├── pom.xml
├── resources
│ ├── META-INF
│ │ └── MANIFEST.MF
│ ├── logback.xml
│ └── runtime.configs.properties
├── scripts
│ ├── runing_scripts
│ │ ├── init_db.sh
│ │ └── init_db.sql
│ └── test_scripts
│ │ ├── send_request.py
│ │ └── test_server.py
└── start.sh
├── CDB-WEB
├── README.md
├── front
│ ├── .babelrc
│ ├── .editorconfig
│ ├── .gitignore
│ ├── README.md
│ ├── build
│ │ ├── build.js
│ │ ├── check-versions.js
│ │ ├── dev-client.js
│ │ ├── dev-server.js
│ │ ├── utils.js
│ │ ├── webpack.base.conf.js
│ │ ├── webpack.dev.conf.js
│ │ └── webpack.prod.conf.js
│ ├── config
│ │ ├── dev.env.js
│ │ ├── index.js
│ │ └── prod.env.js
│ ├── index.html
│ ├── package.json
│ ├── src
│ │ ├── App.vue
│ │ ├── assets
│ │ │ ├── login.jpg
│ │ │ └── logo.png
│ │ ├── components
│ │ │ ├── CrowdShow.vue
│ │ │ └── Hello.vue
│ │ ├── main.js
│ │ └── store
│ │ │ ├── actions.js
│ │ │ ├── index.js
│ │ │ └── mutation_types.js
│ └── static
│ │ └── .gitkeep
└── service
│ ├── README.md
│ ├── app.py
│ ├── checks.py
│ ├── dbcontrol.py
│ ├── error.py
│ ├── model.py
│ ├── remove_user.py
│ ├── requirements.txt
│ ├── static
│ ├── css
│ │ ├── app.8d53a3b4be94e580d5043f5e3d68dd4f.css
│ │ └── app.8d53a3b4be94e580d5043f5e3d68dd4f.css.map
│ ├── img
│ │ └── login.abdb9ad.jpg
│ └── js
│ │ ├── app.fbf9f7e5f297ee9b64d6.js
│ │ ├── app.fbf9f7e5f297ee9b64d6.js.map
│ │ ├── manifest.dbb9dc97ad79361bbc8e.js
│ │ ├── manifest.dbb9dc97ad79361bbc8e.js.map
│ │ ├── vendor.fe49eeef8e49276803d9.js
│ │ └── vendor.fe49eeef8e49276803d9.js.map
│ ├── templates
│ └── index.html
│ ├── upload
│ └── .gitignore
│ ├── util.py
│ └── web.py
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | *.pyc
3 | CDB-Assignment/main/migrations
4 | *.DS_Store
5 | CDB-Assignment/main/task/cf_temp/*
6 | CDB-Assignment/!main/task/cf_temp/.gitkeep
7 | .idea/
8 | CDB-Server/target/
9 | crowddb.iml
10 | *.log
11 | *.iml
12 | *.class
13 | *.jar
14 | CDB-Server/out/
15 |
--------------------------------------------------------------------------------
/CDB-Assignment/CrowdAssign/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TsinghuaDatabaseGroup/CDB/6d949d89247598d8785d59d25d0557726d4e652f/CDB-Assignment/CrowdAssign/__init__.py
--------------------------------------------------------------------------------
/CDB-Assignment/CrowdAssign/urls.py:
--------------------------------------------------------------------------------
1 | """CrowdAssign URL Configuration
2 |
3 | The `urlpatterns` list routes URLs to views. For more information please see:
4 | https://docs.djangoproject.com/en/1.10/topics/http/urls/
5 | Examples:
6 | Function views
7 | 1. Add an import: from my_app import views
8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home')
9 | Class-based views
10 | 1. Add an import: from other_app.views import Home
11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 | 1. Import the include() function: from django.conf.urls import url, include
14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import include, url
17 | from django.contrib import admin
18 |
19 | urlpatterns = [
20 | url(r'^', include('main.urls')),
21 | url(r'^admin/', admin.site.urls),
22 | ]
23 |
--------------------------------------------------------------------------------
/CDB-Assignment/CrowdAssign/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for CrowdAssign project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "CrowdAssign.settings")
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/CDB-Assignment/README.md:
--------------------------------------------------------------------------------
1 | ## CrowdAssign
2 | ### Introduction
3 | Assignment module of CrowdDB
4 |
5 | This module acts as a server to receive requests with well-formatted crowdsourcing tasks data from CDB-Server and publish the tasks to different platforms including AMT (Amazon Mechanical Turk), CF (CrowdFlower), CC (Chinacrowds).
6 |
7 |
8 | ### Environment Specs
9 | This module has been tested on MySQL 5.6.19, Python 2.7.12 with packages below:
10 |
11 | |Package|Version|
12 | |:-----:|:-----:|
13 | |Django | 1.10.1|
14 | |boto | 2.43.0|
15 | |MySQL-python|1.2.5|
16 | |crowdflower|0.1.4|
17 | |poster|0.8.1|
18 |
19 | ### Setup
20 | #### Software install
21 | First install **mysql** and **python**, then install required packages using commands like below:
22 |
23 | ```
24 | pip install django
25 | pip install boto
26 | pip install mysql-python
27 | pip install crowdflower
28 | pip install poster
29 | ```
30 |
31 | #### Configuration
32 | 1. Configure your MySQL connection parameters in **CrowdAssign/settings.py**, around line 78, in **DATABASE** dict, fill in your database name, username and password.
33 | 2. Use commands below to create corresponding tables in MySQL:
34 | ```
35 | python manage.py makemigrations main
36 | python manage.py migrate
37 | ```
38 | 3. Fill in your API-KEY or username/password and switch to production/test environment in corresponding file in:
39 |
40 | |Platform| File |
41 | |:------:|:----:|
42 | | AMT |main/task/mtc.py|
43 | |CrowdFlower|main/task/api_key.py|
44 | |Chinacrowds|main/property.py|
45 |
46 |
47 | 4. Run server (listen on localhost:9000) to see if everything goes fine.
48 | ```
49 | python manage.py runserver 9000
50 | ```
51 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TsinghuaDatabaseGroup/CDB/6d949d89247598d8785d59d25d0557726d4e652f/CDB-Assignment/main/.gitkeep
--------------------------------------------------------------------------------
/CDB-Assignment/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TsinghuaDatabaseGroup/CDB/6d949d89247598d8785d59d25d0557726d4e652f/CDB-Assignment/main/__init__.py
--------------------------------------------------------------------------------
/CDB-Assignment/main/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 |
3 | # Register your models here.
4 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/apps.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 |
3 | from django.apps import AppConfig
4 |
5 |
6 | class MainConfig(AppConfig):
7 | name = 'main'
8 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/models.py:
--------------------------------------------------------------------------------
1 | ### table models definition, save tasks, as well as questions and answers info
2 | from __future__ import unicode_literals
3 |
4 | from django.db import models
5 |
6 |
7 | class Task(models.Model):
8 | PLATFORMS = (
9 | ('AMT', 'Amazon Mechanical Turk'),
10 | ('CF', 'CrowdFlower'),
11 | ('CC', 'ChinaCrowd'),
12 | )
13 | Q_TYPES = (
14 | ('Y_N', 'Yes or No'),
15 | ('M_TO_O', 'Many to One'),
16 | ('M_TO_M', 'Many to Many'),
17 | ('FREE', 'Free text'),
18 | ('COLLECT', 'Collection'),
19 | )
20 | task_id = models.CharField(max_length=20, primary_key=True)
21 | platform = models.CharField(max_length=10, choices=PLATFORMS)
22 | q_type = models.CharField(max_length=10, choices=Q_TYPES)
23 | created_time = models.DateTimeField(auto_now_add=True)
24 |
25 |
26 | class Hit(models.Model):
27 | hit_id = models.CharField(max_length=50, primary_key=True)
28 | task = models.ForeignKey(Task)
29 |
30 |
31 | class Question(models.Model):
32 | id = models.CharField(max_length=50, primary_key=True)
33 | task = models.ForeignKey(Task)
34 | content = models.CharField(max_length=500)
35 | sequence = models.IntegerField()
36 |
37 |
38 | class Answer(models.Model):
39 | question = models.ForeignKey(Question)
40 | worker = models.CharField(max_length=50)
41 | answer = models.IntegerField()
42 |
43 |
44 | class TaskProject(models.Model):
45 | task_db = models.CharField(max_length=20)
46 | task_bao = models.CharField(max_length=200)
47 |
48 |
49 | class QueTask(models.Model):
50 | task_id = models.IntegerField()
51 | que_id = models.TextField()
52 | attr_id = models.TextField()
53 | unit_num = models.IntegerField(default=1)
54 | need_url = models.CharField(max_length=20, default="no")
55 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/property.py:
--------------------------------------------------------------------------------
1 | #encoding:utf-8
2 | class ChinaCrowd_Property:
3 | #chinacrowd_api
4 | # Api_Crowd = "http://www.crowdbao.com:6789"; # product
5 | Api_Crowd = "http://166.111.71.172:6789" # test
6 | name = "Your Chinacrowds username"
7 | password = "Your Chinacrowds passowrd"
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TsinghuaDatabaseGroup/CDB/6d949d89247598d8785d59d25d0557726d4e652f/CDB-Assignment/main/task/__init__.py
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/api_key.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | api_key_string = 'Your api_key' # CrowdFlower API_KEY
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/assign.py:
--------------------------------------------------------------------------------
1 | ### assign tasks to different platforms
2 | from main.models import Task
3 | from infer import EM
4 | from task_const import *
5 |
6 | def get_task_publisher(taskid):
7 | task = Task.objects.get(task_id=taskid)
8 | return platforms.get(task.platform, amt)
9 |
10 |
11 | def alter_questions(option, questions):
12 | if option['q_type'] == COLLECT:
13 | questions[0]['repeats'] = 2
14 |
15 |
16 | def publish(option, questions, platform):
17 | platform = platform.strip()
18 | if platform not in platforms:
19 | platform = default_platform
20 | Task.objects.create(task_id=option['task_id'], platform=platform, q_type=option['q_type'])
21 | publisher = platforms.get(platform, amt)
22 | alter_questions(option, questions)
23 | publisher.publish(option, questions)
24 |
25 |
26 | def is_complete(taskid):
27 | return get_task_publisher(taskid).is_complete(taskid)
28 |
29 |
30 | def get_all_result(taskid, q_type):
31 | return get_task_publisher(taskid).get_result(taskid, q_type)
32 |
33 |
34 | def get_aggregated_result(taskid):
35 | task = Task.objects.get(pk=taskid)
36 | result = get_all_result(taskid, task.q_type)
37 | if task.q_type == Y_N:
38 | result = EM.infer(result)
39 | for i in result:
40 | result[i] = int(result[i])
41 | result_dict = {}
42 | for i in result:
43 | result_dict[i] = {'answer': result[i], 'id': i}
44 | result = result_dict
45 | elif task.q_type == M_TO_O:
46 | result = EM.infer(result)
47 | result_dict = {}
48 | for i in result:
49 | result_dict[i] = {'answer': [result[i]], 'id': i}
50 | result = result_dict
51 | elif task.q_type == FREE:
52 | result_dict = {}
53 | for line in result:
54 | # line: [q, w, a]
55 | result_dict[line[0]] = {'answer': line[2], 'id': line[0]}
56 | result = result_dict
57 | elif task.q_type == M_TO_M:
58 | result_dict = {}
59 | for line in result:
60 | result_dict[line[0]] = {'answer': line[2], 'id': line[0]}
61 | result = result_dict
62 | elif task.q_type == COLLECT:
63 | result_dict = {}
64 | qid = result[0][0].rsplit(free_sep, 1)[0]
65 | answer = []
66 | for line in result:
67 | answer.append(line[2])
68 | result_dict[qid] = {'answer': answer, 'id': qid}
69 | result = result_dict
70 |
71 | return result
72 |
73 |
74 | if __name__ == '__main__':
75 | # option = {'q_per_hit': 2}
76 | # questions = [
77 | # {
78 | # 'id': 1,
79 | # 'content': 'First question'
80 | # },
81 | # {
82 | # 'id': 2,
83 | # 'content': 'Second Question'
84 | # },
85 | # {
86 | # 'id': 3,
87 | # 'content': 'Third with new line----\n\n\n----three times'
88 | # }
89 | # ]
90 | #
91 | # publish(option, questions, "AMT")
92 | get_all_result('1010')
93 |
94 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/cc.py:
--------------------------------------------------------------------------------
1 | ### chinacrowd interface
2 | import chinacrowd
3 |
4 | api = chinacrowd.ChinaBaoApi()
5 |
6 |
7 | def publish(option, questions):
8 | api.TaskCreated(questions, option['task_id'], option['q_type'], option['title'])
9 |
10 |
11 | def is_complete(taskid):
12 | return api.TaskDetected(taskid)
13 |
14 |
15 | def get_result(taskid, q_type):
16 | return api.TaskResult(taskid, q_type)
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/cf.py:
--------------------------------------------------------------------------------
1 | ### crowdflower interface
2 | import crowddb_crowdflower
3 |
4 |
5 | def publish(option, questions):
6 | crowddb_crowdflower.design_question(option['q_type'], option['task_id'], questions)
7 |
8 |
9 | def is_complete(taskid):
10 | return crowddb_crowdflower.query_the_status_of_the_job(taskid)
11 |
12 |
13 | def get_result(taskid, q_type):
14 | return crowddb_crowdflower.get_answers(taskid, q_type)
15 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/crowddb_type_one_label.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import sys
4 | sys.path.append("..")
5 | import csv
6 | import crowdflower
7 | import api_key
8 |
9 | def upload_questions_to_crowdflower(title, job_distinguish_tag, question_content, max_judgments_per_worker = 50, units_per_assignment = 3,
10 | judgments_per_unit = 1, payment_cents = 3):
11 |
12 | conn = crowdflower.Connection(api_key = api_key.api_key_string)
13 | job = conn.upload(question_content)
14 | options = question_content[0]['options']
15 | first_line = '''
16 |
17 |
{{content}}
18 |
19 | '''
20 | question_strings = first_line
21 | for i in range(len(options)):
22 | item = ''''''
23 | question_strings = question_strings + item + '\n'
24 | question_strings = question_strings + ''''''
25 |
26 | job.update({
27 | 'title': title,
28 | 'max_judgments_per_worker': max_judgments_per_worker,
29 | 'units_per_assignment': units_per_assignment,
30 | 'judgments_per_unit': judgments_per_unit,
31 | 'payment_cents': payment_cents,
32 | 'instructions':
33 | '''
34 | Tell us the details about the country
35 | ''',
36 | 'cml':
37 | question_strings
38 | ,
39 | 'options': {
40 | 'front_load': 0, # quiz mode = 1; turn off with 0
41 | }
42 | })
43 | job.tags = [job_distinguish_tag]
44 | job.launch(len(question_content), channels = ('on_demand', 'cf_internal'))
45 |
46 | def collect_answers_from_crowdflower(job_distinguish_tag):
47 | conn = crowdflower.Connection(api_key=api_key.api_key_string)
48 | result = []
49 | out_dir = os.path.dirname(os.path.realpath(__file__)) + os.path.sep + 'cf_temp/'
50 | for job in conn.jobs():
51 | # if job_distinguish_tag in job.tags and job.properties['state'] == 'finished':
52 | if job_distinguish_tag in job.tags:
53 | job.download_csv(out_dir + str(job.id) + '.csv')
54 | reader = csv.reader(file(out_dir + str(job.id) + '.csv', 'rb'))
55 | first_line = True
56 | for line in reader:
57 | if first_line == True:
58 | first_line = False
59 | for index, item in enumerate(line):
60 | if item == 'id':
61 | id_index = index
62 | if item == 'category':
63 | category_index = index
64 |
65 | else:
66 | worker_id = line[7]
67 | unique_id = line[id_index]
68 | option_single_answer = line[category_index].split('_')[1]
69 | result.append((unique_id, worker_id, option_single_answer))
70 | break
71 | return job_distinguish_tag, result
72 |
73 | if __name__ == "__main__":
74 | question_content = [
75 | {'id': '1', 'content': 'Germany', 'url': 'http://farm9.staticflickr.com/8199/8252746471_7cd4cccc3b_n.jpg', 'options':['A', 'B', 'C']},
76 | {'id': '2', 'content': 'China', 'url':'http://farm8.staticflickr.com/7287/8745135210_b556f8f586_n.jpg', 'options':['A', 'B', 'C']},
77 | {'id': '3', 'content': 'USA', 'url':'http://farm9.staticflickr.com/8195/8093245580_d7c95a2eca_n.jpg', 'options':['A', 'B', 'C']}
78 | ]
79 | job_distinguish_tag = 'test-a-1'
80 | # collect_answers_from_crowdflower(job_distinguish_tag)
81 | upload_questions_to_crowdflower(job_distinguish_tag, question_content)
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/crowddb_type_y_n.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import csv
4 | import time
5 | import crowdflower
6 | import api_key
7 |
8 | def upload_questions_to_crowdflower(title, job_distinguish_tag, question_content, max_judgments_per_worker = 50, units_per_assignment = 3,
9 | judgments_per_unit = 1, payment_cents = 3):
10 | conn = crowdflower.Connection(api_key = api_key.api_key_string)
11 | job = conn.upload(question_content)
12 | job.update({
13 | 'title': title,
14 | 'max_judgments_per_worker': max_judgments_per_worker,
15 | 'units_per_assignment': units_per_assignment,
16 | 'judgments_per_unit': judgments_per_unit,
17 | 'payment_cents': payment_cents,
18 | 'instructions':
19 | '''
20 | Semantic Discrimination
21 | Judge whether the two paragraphs describe the same object or not.
22 | ''',
23 | 'cml':
24 | '''
25 | {{content}}
26 |
27 |
28 |
29 |
30 | ''',
31 | 'options': {
32 | 'front_load': 0, # quiz mode = 1; turn off with 0
33 | }
34 | })
35 | job.tags = [job_distinguish_tag]
36 | job.launch(len(question_content), channels = ('on_demand', 'cf_internal'))
37 |
38 | def collect_answers_from_crowdflower(job_distinguish_tag):
39 | conn = crowdflower.Connection(api_key = api_key.api_key_string)
40 | rating_result = []
41 | out_dir = os.path.dirname(os.path.realpath(__file__)) + os.path.sep + 'cf_temp/'
42 | for job in conn.jobs():
43 | if job_distinguish_tag in job.tags:
44 | job.download_csv(out_dir + str(job.id) + '.csv')
45 | reader = csv.reader(file(out_dir + str(job.id) + '.csv', 'rb'))
46 | first_line = True
47 | for line in reader:
48 | for index, item in enumerate(line):
49 | print index, item
50 | if item == 'id':
51 | id_index = index
52 | if item == 'option':
53 | option_index = index
54 | task_id = line[id_index]
55 | option = line[option_index]
56 | if option == 'same':
57 | option_result = 1
58 | elif option == 'different':
59 | option_result = 0
60 | worker_id = line[7]
61 | if first_line == False:
62 | rating_result.append([task_id, worker_id, option_result])
63 | print [task_id, worker_id, option_result]
64 | else:
65 | first_line = False
66 | return job_distinguish_tag, rating_result
67 |
68 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/infer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TsinghuaDatabaseGroup/CDB/6d949d89247598d8785d59d25d0557726d4e652f/CDB-Assignment/main/task/infer/__init__.py
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/mtc.py:
--------------------------------------------------------------------------------
1 | ### AMT configuration
2 | from django.conf import settings
3 | from boto.mturk.connection import MTurkConnection
4 |
5 | if True: # SANDBOX:
6 | HOST="mechanicalturk.sandbox.amazonaws.com"
7 | else: # PRODUCTION
8 | HOST="mechanicalturk.amazonaws.com"
9 |
10 | AWS_ACCESS_ID = 'Your AWS_ACCESS_ID'
11 | AWS_SECRET_KEY = 'Your AWS_SECRET_KEY'
12 |
13 | mtc = MTurkConnection(aws_access_key_id=AWS_ACCESS_ID, aws_secret_access_key=AWS_SECRET_KEY, host=HOST)
14 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/task/task_const.py:
--------------------------------------------------------------------------------
1 | Y_N = 'Y_N'
2 | M_TO_O = 'M_TO_O'
3 | M_TO_M = 'M_TO_M'
4 | FREE = 'FREE'
5 | COLLECT = 'COLLECT'
6 |
7 | import amt
8 | import cf
9 | import cc
10 |
11 | platforms = {
12 | "AMT": amt,
13 | "CC": cc,
14 | "CF": cf
15 | }
16 |
17 | default_platform = 'AMT'
18 |
19 | free_sep = '::'
20 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 |
3 | # Create your tests here.
4 |
5 | questions = [
6 | {
7 | 'id': 1,
8 | 'content': 'First question',
9 | 'fields': ['first', 'second'],
10 | },
11 | {
12 | 'id': 2,
13 | 'content': 'Second Question',
14 | 'fields': ['first', 'second'],
15 | },
16 | {
17 | 'id': 3,
18 | 'content': 'Third with new line----\n\n\n----three times',
19 | 'fields': ['first', 'second'],
20 | }
21 | ]
22 |
23 |
24 | collect_test = {
25 | 'option': {'q_type': 'COLLECT', 'task_id': 'collect_3', 'title': 'collect_test_1'},
26 | 'questions': [{
27 | "id": "q-1-op-1-hit-e856e60g",
28 | "columns": [
29 | "name",
30 | "school",
31 | "birthday"
32 | ],
33 | "limit": 3,
34 | "content": "Please judge whether 450 is equal to 450 ?"
35 | }]
36 | }
--------------------------------------------------------------------------------
/CDB-Assignment/main/urls.py:
--------------------------------------------------------------------------------
1 | from django.conf.urls import url
2 |
3 | from . import views
4 |
5 | urlpatterns = [
6 | url(r'^$', views.index, name='index'),
7 | url(r'^upload/$', views.upload),
8 | url(r'^check/$', views.check),
9 | url(r'^results/$', views.results),
10 | ]
11 |
12 |
--------------------------------------------------------------------------------
/CDB-Assignment/main/views.py:
--------------------------------------------------------------------------------
1 | ### http requests handlers
2 | from django.shortcuts import render
3 | from django.http import HttpResponse, JsonResponse
4 | import random
5 | import json
6 | from task import assign
7 | import traceback
8 |
9 | test_tasks = {}
10 |
11 |
12 | def index(request):
13 | return HttpResponse('works!')
14 |
15 |
16 | def upload(request):
17 | data = json.loads(request.POST['data'])
18 | option = data['options']
19 | questions = data['questions']
20 | test_tasks[option['task_id']] = {'option': option, 'questions': questions}
21 | res = {}
22 | print option, questions
23 | platform = option['platform']
24 | code = 1
25 | try:
26 | assign.publish(option, questions, platform)
27 | code = 0
28 | except Exception as e:
29 | print '[[ERROR]] -----upload-----'
30 | print option
31 | traceback.print_exc()
32 | res['code'] = code
33 | return JsonResponse(res, safe=False)
34 |
35 |
36 | def check(request):
37 | task_id = request.GET['task_id']
38 | code = 1
39 | try:
40 | status = assign.is_complete(task_id)
41 | code = 0
42 | except Exception as e:
43 | print '[[ERROR]] -----check-----: ', task_id
44 | traceback.print_exc()
45 | res = {'code': code}
46 | if code == 0:
47 | res['status'] = status
48 | return JsonResponse(res, safe=False)
49 |
50 |
51 | def results(request):
52 | task_id = request.GET['task_id']
53 | code = 1
54 | try:
55 | task_results = assign.get_aggregated_result(task_id)
56 | code = 0
57 | except Exception as e:
58 | print '[[ERROR]] -----results-----: ', task_id
59 | traceback.print_exc()
60 | res = {'code': code}
61 | if code == 0:
62 | res['data'] = task_results
63 | print task_results
64 |
65 | return JsonResponse(res, safe=False)
66 |
--------------------------------------------------------------------------------
/CDB-Assignment/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == "__main__":
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "CrowdAssign.settings")
7 | try:
8 | from django.core.management import execute_from_command_line
9 | except ImportError:
10 | # The above import may fail for some other reason. Ensure that the
11 | # issue is really that Django is missing to avoid masking other
12 | # exceptions on Python 2.
13 | try:
14 | import django
15 | except ImportError:
16 | raise ImportError(
17 | "Couldn't import Django. Are you sure it's installed and "
18 | "available on your PYTHONPATH environment variable? Did you "
19 | "forget to activate a virtual environment?"
20 | )
21 | raise
22 | execute_from_command_line(sys.argv)
23 |
--------------------------------------------------------------------------------
/CDB-Server/README.md:
--------------------------------------------------------------------------------
1 | # CDB-Server
2 |
3 | ## Introduction
4 |
5 | CDB-Server is the kernal of CDB database, and it has 6 parts:
6 | ##### crowdcore
7 | It defines some core data structures and core componments.
8 |
9 | ##### crowdexec
10 | crowdexec is responsible for operator execution details. It will pack up operators into questions and interactive with CDB-Assignment platform.
11 |
12 | ##### crowdplat
13 | crowdplat is an interface between CDB-Server and CDB-platform.
14 |
15 | ##### crowdschedular
16 | crowdschedular receive queries from clients and append them into schedula queue. In every cycle, crowdschedular will check every query if its current
17 | operator has been finished. If it has been finished, crowdschedular stash the current operator's data and continue to the next operator.
18 |
19 | ##### crowdsql
20 | crowdsql will recevice an input query and parse it to a synax tree or graphmodel.
21 |
22 | ##### crowdstorage
23 | crowdstorage is responsible for table storage and intermediate data stash.
24 |
25 | ## Install
26 |
27 | ### Install Dependences
28 | CDB-Server is based on Ubuntu and developed by Java. We take `Maven` as the integrated build tools, and use `Mysql` as storage warehouse.
29 |
30 | Dependences:
31 | - Java: version 1.8.0
32 | - Mysql: version 14.14
33 | - Apache Maven: version 3.3.9
34 | - Python 2: version 2.7
35 | - Git: version 2.10.1
36 |
37 | Please install those dependences following by its official documents.
38 |
39 | ### Config and Init DB
40 | This step will init the database and create nesscery databases, users and tables in it.
41 |
42 | open `CDB-Server/scripts/running_scripts/init_db.sh` and edit `host`, `port`, `user` and `password`.
43 | ```
44 | HOST="127.0.0.1"
45 | PORT="3306"
46 | USER="root"
47 | PASSWORD=""
48 | ```
49 |
50 | execute the following command
51 | ```
52 | sh CDB-Server/scripts/running_scripts/init_db.sh
53 | ```
54 |
55 | ### Edit CDB-Server configs
56 | Go to `CDB-Server/resources/runtime.configs.properties` directory and edit the configs. Here listing some important configs.
57 |
58 | ```
59 | # assignment server
60 | ASSIGNMENT_SERVER_URL = http://127.0.0.1:9000
61 | # server listening port
62 | PORT = 1234
63 | # The time interval of rolling queries
64 | POLL_INTERVAL = 300
65 |
66 | ### Crowdsourcing Platform Settings
67 | # the default crowdsourcing platform for answering questions, there are three options:
68 | # 1. CC = ChinaCrowds, http://chinacrowds.com/
69 | # 2. CF = Crowdflower, http://www.crowdflower.com/
70 | # 3. AMT = Amazon Mechanical Turk, https://www.mturk.com/mturk/welcome
71 | DEFAULT_PLATFORM = CC
72 | ```
73 |
74 | ### Build Project
75 | Go to the `CDB-Server` directory and run the following command.
76 | ```
77 | sh build.sh
78 | ```
79 |
80 | ### Run server
81 | ```
82 | sh start.sh
83 | ```
84 |
85 |
86 | ### Test
87 |
88 | edit `CDB-Server/scripts/test_scripts/send_request.py` and edit `host` and `port` for CDB-Server.
89 | ```
90 | host = "127.0.0.1"
91 | port = 1234
92 | ```
93 |
94 | run the following command to test server and check logs from `logs/crowddb-[year]-[month].log`.
95 | ```
96 | python send_request.py [query_id]
97 | ```
98 | please replace the query_id to real query_id
99 |
100 |
101 | ## Contact
102 | If you have any questions, please feel free to contact Xueping Weng(wxping715@gmail.com)
103 |
--------------------------------------------------------------------------------
/CDB-Server/build.sh:
--------------------------------------------------------------------------------
1 | mvn clean
2 | mvn install -Dmaven.test.skip=true
3 |
--------------------------------------------------------------------------------
/CDB-Server/crowdcore/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | crowddb
5 | com.tsinghua.dbgroup.crowddb
6 | 0.01
7 |
8 | 4.0.0
9 |
10 | crowdcore
11 | jar
12 |
13 | crowdcore
14 | http://maven.apache.org
15 |
16 |
17 | UTF-8
18 |
19 |
20 |
21 |
22 | junit
23 | junit
24 | 3.8.1
25 | test
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/CDB-Server/crowdcore/src/main/java/com/tsinghua/dbgroup/crowddb/crowdcore/configs/GlobalConfigs.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdcore.configs;
2 |
3 | import java.util.Properties;
4 |
5 | /**
6 | * Created by talus on 11/19/16.
7 | */
8 | public class GlobalConfigs {
9 | public static Properties GlobalConfigs;
10 | }
11 |
--------------------------------------------------------------------------------
/CDB-Server/crowdcore/src/main/java/com/tsinghua/dbgroup/crowddb/crowdcore/exceptions/CrowdDBException.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdcore.exceptions;
2 |
3 | /**
4 | * Created by talus on 11/19/16.
5 | */
6 | public class CrowdDBException extends RuntimeException {
7 |
8 | public CrowdDBException(int errorCode) {
9 | super(ExceptionMap.ErrorMap.getOrDefault(errorCode, "unknown error"));
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/CDB-Server/crowdcore/src/main/java/com/tsinghua/dbgroup/crowddb/crowdcore/exceptions/ExceptionMap.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdcore.exceptions;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 |
6 | /**
7 | * Created by talus on 11/19/16.
8 | */
9 | public class ExceptionMap {
10 |
11 | public static Map ErrorMap = new HashMap<>();
12 |
13 | static {
14 | initErrorMap();
15 | }
16 |
17 | private static void initErrorMap() {
18 | ErrorMap.put(0x100, "sql parse error");
19 | ErrorMap.put(0x200, "database error");
20 | ErrorMap.put(0x300, "crowdsourcing platform error");
21 | ErrorMap.put(0x400, "operator execution error");
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | crowddb
5 | com.tsinghua.dbgroup.crowddb
6 | 0.01
7 |
8 | 4.0.0
9 |
10 | crowdexec
11 | jar
12 |
13 | crowdexec
14 | http://maven.apache.org
15 |
16 |
17 | UTF-8
18 |
19 |
20 |
21 |
22 | junit
23 | junit
24 | 4.12
25 | test
26 |
27 |
28 | com.tsinghua.dbgroup.crowddb
29 | crowdsql
30 | 0.01
31 |
32 |
33 |
34 | commons-dbutils
35 | commons-dbutils
36 | 1.6
37 |
38 |
39 | org.mariadb.jdbc
40 | mariadb-java-client
41 | 1.5.4
42 |
43 |
44 | mysql
45 | mysql-connector-java
46 | 6.0.4
47 | jar
48 | compile
49 |
50 |
51 | org.hibernate
52 | hibernate-core
53 | 5.2.3.Final
54 |
55 |
56 | com.tsinghua.dbgroup.crowddb
57 | crowdplat
58 | 0.01
59 |
60 |
61 | com.tsinghua.dbgroup.crowddb
62 | crowdstorage
63 | 0.01
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/gmodel/Main.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdexec.gmodel;
2 |
3 | public class Main {
4 | // public static void main(String[] args) {
5 | // /**
6 | // String str=System.getProperty("user.dir");
7 | // System.out.print(str);
8 | // File file = new File(str,"hhh.txt");
9 | // try {
10 | // file.createNewFile();
11 | // FileWriter fileWritter = new FileWriter(file.getName(),true);
12 | // fileWritter.write("jdhakjdh");
13 | // fileWritter.flush();
14 | // //BufferedWriter bufferWritter = new BufferedWriter(fileWritter);
15 | // //bufferWritter.write("ejwdkj\nkjd");
16 | // //bufferWritter.close();
17 | // } catch (IOException e) {
18 | //
19 | // }
20 | // */
21 | // /**ArrayList> a=new ArrayList>();
22 | // ArrayList> b=new ArrayList>();
23 | // ArrayList temp=new ArrayList();
24 | // temp.add(9);temp.add(2);a.add(temp);
25 | // ArrayList temp1=new ArrayList();
26 | // temp1.add(a.get(0).get(0));
27 | // b.add(temp1);
28 | // a.get(0).set(0,10);
29 | // System.out.print(b.get(0).get(0));*/
30 | // //String[] sql={"country.school=prof.school","prof.name=paper.name","paper.title=cite.title"};
31 | // //String[] froms={"country","prof","paper","cite"};
32 | // String[] joins_in_sql={"University.Name=Researcher.Affiliation","Researcher.Name=Papers.Author","Papers.Title=Citation.Title"};
33 | // String[] sql_selection={"Papers.Conference=sigmod"};
34 | // String[] froms_temp=new String[10];
35 | // String[] joins_temp=new String[10];
36 | // String[] from_in_sql={"University","Researcher","Papers","Citation"};
37 | //
38 | //
39 | // for(int i=0;i set1 = new HashSet();
13 | Set set2 = new HashSet();
14 | Set set_union = new HashSet();
15 | Set set_insection = new HashSet();
16 | set_union.clear();
17 | set_insection.clear();
18 | String[] str1_split={};
19 | String[] str2_split={};
20 | str1_split=str1.split(" ");
21 | str2_split=str2.split(" ");
22 | for(String token:str1_split){
23 | set1.add(token);
24 | }
25 | for(String token:str2_split){
26 | set2.add(token);
27 | }
28 | set_union.addAll(set1);
29 | set_union.addAll(set2);
30 | set_insection.addAll(set1);
31 | set_insection.retainAll(set2);
32 | //System.out.println(set_insection.size());
33 | //System.out.println(set_union.size());
34 | return (float) set_insection.size()/set_union.size();
35 | }
36 | public float bigram(String str1, String str2)
37 | {
38 | int i;
39 | String str1_append="",str2_append="";
40 | for(i=0;i %s",newTableName,from0,column0,column1);
76 |
77 | try {
78 | tmpTableManager.execSQL(sql);
79 | tmpTableManager.deleteTmpTable(from0);
80 | }catch (SQLException e){
81 | LOG.info(e.getMessage());
82 | e.printStackTrace();
83 | }
84 |
85 | sqlTreeNode.setTableName(newTableName);
86 | return newTableName;
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/operator/IOperator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2016-2016 by The Department of Computer Science and
3 | * Technology, Tsinghua University
4 | *
5 | * Redistribution of this file is permitted under the terms of
6 | * the BSD license.
7 | *
8 | * Author : XuepingWeng
9 | * Created : 10/13/16 11:10 AM
10 | * Modified :
11 | * Contact : wxping715@gmail.com
12 | */
13 |
14 | package com.tsinghua.dbgroup.crowddb.crowdexec.operator;
15 |
16 |
17 | public interface IOperator {
18 |
19 | public boolean process();
20 |
21 | public String finish();
22 | }
23 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/operator/JoinOperator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2016-2016 by The Department of Computer Science and
3 | * Technology, Tsinghua University
4 | *
5 | * Redistribution of this file is permitted under the terms of
6 | * the BSD license.
7 | *
8 | * Author : XuepingWeng
9 | * Created : 10/13/16 11:58 AM
10 | * Modified :
11 | * Contact : wxping715@gmail.com
12 | */
13 |
14 | package com.tsinghua.dbgroup.crowddb.crowdexec.operator;
15 |
16 | import com.tsinghua.dbgroup.crowddb.crowdstorage.table.TableManager;
17 | import com.tsinghua.dbgroup.crowddb.crowdplat.core.TaskCategory;
18 | import com.tsinghua.dbgroup.crowddb.crowdplat.core.TaskType;
19 | import com.tsinghua.dbgroup.crowddb.crowdsql.tree.SqlTreeNode;
20 | import com.tsinghua.dbgroup.crowddb.crowdstorage.utils.Utils;
21 | import org.apache.commons.lang3.tuple.Pair;
22 | import org.slf4j.Logger;
23 | import org.slf4j.LoggerFactory;
24 |
25 | public class JoinOperator extends BaseOperator implements IOperator {
26 | private static Logger LOG = LoggerFactory.getLogger(JoinOperator.class);
27 |
28 | String from0,from1,column0,column1,DBNAME;
29 |
30 | TableManager tmpTableManager = new TableManager();
31 |
32 |
33 | public JoinOperator(SqlTreeNode sqlTreeNode,String dbName) {
34 | super(sqlTreeNode);
35 | this.taskCategory = TaskCategory.TEXT;
36 | this.taskType = TaskType.CROWD_EQUAL;
37 | this.DBNAME= dbName;
38 | }
39 |
40 |
41 | private void initialFromColumn(){
42 |
43 | String[] part = sqlTreeNode.getWhereClause().split(" ");
44 |
45 | from0= part[0].substring(0,part[0].indexOf("."));
46 |
47 | column0 = part[0].substring(part[0].indexOf('.')+1,part[0].length());
48 |
49 | from1 = part[2].substring(0,part[2].indexOf("."));
50 |
51 | column1 = part[2].substring(part[2].indexOf('.')+1,part[2].length());
52 |
53 | LOG.info(DBNAME);
54 |
55 | if (sqlTreeNode.getLeft()!=null) {
56 |
57 | column0 = Utils.packetColumn(from0, column0);
58 | from0 = sqlTreeNode.getLeft().getTableName();
59 | }
60 | else {
61 | column0 = Utils.packetColumn(from0, column0);
62 |
63 | from0 = Utils.packetTable(DBNAME,from0);
64 | String newTableName = TableManager.TMP_DATABASE +"."+TableManager.generateTableName();
65 | tmpTableManager.packetTable(newTableName,from0);
66 | from0 = newTableName;
67 | }
68 | column1 = Utils.packetColumn(from1,column1);
69 |
70 | if (sqlTreeNode.getRight()!=null) {
71 | from1 = sqlTreeNode.getRight().getTableName();
72 | }
73 | else {
74 | from1 = Utils.packetTable(DBNAME,from1);
75 | String newTableName = TableManager.TMP_DATABASE +"."+TableManager.generateTableName();
76 | tmpTableManager.packetTable(newTableName,from1);
77 | from1 = newTableName;
78 | }
79 | }
80 | @Override
81 | public boolean process() {
82 | initialFromColumn();
83 | return true;
84 | }
85 |
86 | @Override
87 | public String finish() {
88 | String newTableName = TableManager.TMP_DATABASE+"."+TableManager.generateTableName();
89 | tmpTableManager.normalJoinTables(newTableName,from0,from1,Pair.of(column0,column1), true);
90 | this.sqlTreeNode.setTableName(newTableName);
91 | return newTableName;
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/operator/LTOperator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2016-2016 by The Department of Computer Science and
3 | * Technology, Tsinghua University
4 | *
5 | * Redistribution of this file is permitted under the terms of
6 | * the BSD license.
7 | *
8 | * Author : XuepingWeng
9 | * Created : 10/13/16 11:58 AM
10 | * Modified :
11 | * Contact : wxping715@gmail.com
12 | */
13 |
14 | package com.tsinghua.dbgroup.crowddb.crowdexec.operator;
15 |
16 | import com.tsinghua.dbgroup.crowddb.crowdstorage.table.TableManager;
17 | import com.tsinghua.dbgroup.crowddb.crowdplat.core.TaskCategory;
18 | import com.tsinghua.dbgroup.crowddb.crowdplat.core.TaskType;
19 | import com.tsinghua.dbgroup.crowddb.crowdsql.tree.SqlTreeNode;
20 | import com.tsinghua.dbgroup.crowddb.crowdstorage.utils.Utils;
21 | import org.slf4j.Logger;
22 | import org.slf4j.LoggerFactory;
23 |
24 | import java.sql.SQLException;
25 |
26 | public class LTOperator extends BaseOperator implements IOperator {
27 | private static Logger LOG = LoggerFactory.getLogger(LTOperator.class);
28 |
29 | String from0,from1,column0,column1,DBNAME;
30 |
31 | TableManager tmpTableManager = new TableManager();
32 |
33 |
34 | public LTOperator(SqlTreeNode sqlTreeNode,String dbName) {
35 | super(sqlTreeNode);
36 | this.taskCategory = TaskCategory.TEXT;
37 | this.taskType = TaskType.CROWD_EQUAL;
38 | this.DBNAME= dbName;
39 | }
40 |
41 |
42 | private void initialFromColumn(){
43 |
44 | String[] part = sqlTreeNode.getWhereClause().split(" ");
45 | LOG.info(sqlTreeNode.getWhereClause());
46 | LOG.info(part[0]);
47 | from0= part[0].substring(0,part[0].indexOf("."));
48 |
49 | column0 = part[0].substring(part[0].indexOf('.')+1,part[0].length());
50 | column1 = part[2];
51 |
52 |
53 | column0 = Utils.packetColumn(from0, column0);
54 | if (sqlTreeNode.getLeft()!=null) {
55 |
56 | from0 = sqlTreeNode.getLeft().getTableName();
57 | }
58 | else {
59 |
60 | from0 = Utils.packetTable(DBNAME ,from0);
61 | String newTableName = TableManager.TMP_DATABASE +"."+TableManager.generateTableName();
62 | tmpTableManager.packetTable(newTableName,from0);
63 | from0 = newTableName;
64 | }
65 | }
66 | @Override
67 | public boolean process() {
68 | initialFromColumn();
69 | return true;
70 | }
71 |
72 | @Override
73 | public String finish() {
74 | String newTableName = Utils.packetTable(TableManager.TMP_DATABASE, TableManager.generateTableName());
75 | String sql=String.format("create table %s select * from %s where `%s` < %s",newTableName,from0,column0,column1);
76 |
77 | try {
78 | tmpTableManager.execSQL(sql);
79 | tmpTableManager.deleteTmpTable(from0);
80 | }catch (SQLException e){
81 | LOG.info(e.getMessage());
82 | e.printStackTrace();
83 | }
84 |
85 | sqlTreeNode.setTableName(newTableName);
86 | return newTableName;
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/operator/OperatorStatus.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdexec.operator;
2 |
3 | /**
4 | * Created by talus on 11/19/16.
5 | */
6 | public enum OperatorStatus {
7 | /**
8 | * Have not sent task to crowd platform
9 | */
10 | INIT,
11 |
12 | /**
13 | * Querying questions on crowd platform
14 | */
15 | RUNNING,
16 |
17 | /**
18 | * Have received answers from platform, try to store into database
19 | */
20 | FINISHING,
21 |
22 | /**
23 | * Current operator has finished
24 | */
25 | FINISHED,
26 | }
27 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/operator/ProjectOperator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2016-2016 by The Department of Computer Science and
3 | * Technology, Tsinghua University
4 | *
5 | * Redistribution of this file is permitted under the terms of
6 | * the BSD license.
7 | *
8 | * Author : XuepingWeng
9 | * Created : 10/13/16 11:58 AM
10 | * Modified :
11 | * Contact : wxping715@gmail.com
12 | */
13 |
14 | package com.tsinghua.dbgroup.crowddb.crowdexec.operator;
15 |
16 | import com.tsinghua.dbgroup.crowddb.crowdstorage.table.TableManager;
17 | import com.tsinghua.dbgroup.crowddb.crowdplat.core.TaskCategory;
18 | import com.tsinghua.dbgroup.crowddb.crowdplat.core.TaskType;
19 | import com.tsinghua.dbgroup.crowddb.crowdsql.tree.SqlTreeNode;
20 | import com.tsinghua.dbgroup.crowddb.crowdstorage.utils.Utils;
21 | import org.slf4j.Logger;
22 | import org.slf4j.LoggerFactory;
23 |
24 | import java.sql.SQLException;
25 | import java.util.ArrayList;
26 | import java.util.List;
27 |
28 | public class ProjectOperator extends BaseOperator implements IOperator {
29 | private static Logger LOG = LoggerFactory.getLogger(ProjectOperator.class);
30 |
31 | String from0,from1,column0,column1,DBNAME;
32 |
33 | TableManager tmpTableManager = new TableManager();
34 |
35 |
36 | public ProjectOperator(SqlTreeNode sqlTreeNode,String dbName) {
37 | super(sqlTreeNode);
38 | this.taskCategory = TaskCategory.TEXT;
39 | this.taskType = TaskType.CROWD_EQUAL;
40 | this.DBNAME= dbName;
41 | }
42 |
43 |
44 | private void initialFromColumn(){
45 | // LOG.info(sqlTreeNode.getWhereClause());
46 | from0= DBNAME+sqlTreeNode.getFroms().get(0);
47 |
48 | if (sqlTreeNode.getLeft()!=null) {
49 | from0 = sqlTreeNode.getLeft().getTableName();
50 | }else{
51 | String newTableName = TableManager.TMP_DATABASE +"."+TableManager.generateTableName();
52 | tmpTableManager.packetTable(newTableName,from0);
53 | from0 = newTableName;
54 | }
55 | }
56 | @Override
57 | public boolean process() {
58 | initialFromColumn();
59 | return true;
60 | }
61 |
62 | public String finish() {
63 | String newTableName = TableManager.generateTableName();
64 | newTableName = Utils.packetTable(TableManager.TMP_DATABASE, newTableName);
65 |
66 | List fields = new ArrayList<>();
67 | for (String col: sqlTreeNode.getProjects()) {
68 | fields.add(String.format("`%s`", col));
69 | }
70 | String projects = String.join(", ", fields);
71 |
72 | String SQL=String.format("create table %s select %s from %s", newTableName, projects, from0);
73 | try {
74 | tmpTableManager.execSQL(SQL);
75 | tmpTableManager.deleteTmpTable(from0);
76 | }catch (SQLException e){
77 | LOG.info(e.getMessage());
78 | e.printStackTrace();
79 | }
80 |
81 | sqlTreeNode.setTableName(newTableName);
82 | return newTableName;
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/query/IQueryExecutor.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdexec.query;
2 |
3 | /**
4 | * Created by talus on 16/6/2.
5 | */
6 | public interface IQueryExecutor {
7 | }
8 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/query/IQueryManager.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdexec.query;
2 |
3 | /**
4 | * Created by talus on 16/6/2.
5 | */
6 | public interface IQueryManager {
7 |
8 | }
9 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/query/QueryExecutor.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdexec.query;
2 |
3 | import com.tsinghua.dbgroup.crowddb.crowdexec.operator.*;
4 | import com.tsinghua.dbgroup.crowddb.crowdsql.tree.NodeType;
5 | import com.tsinghua.dbgroup.crowddb.crowdsql.tree.SqlTreeNode;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | import java.lang.reflect.Constructor;
10 | import java.lang.reflect.InvocationTargetException;
11 | import java.util.HashMap;
12 |
13 | /**
14 | * Created by talus on 16/6/2.
15 | */
16 |
17 | class OperatorBuilder {
18 |
19 | private static HashMap> operatorMap = new HashMap<>();
20 |
21 | static {
22 | initOperatorMap();
23 | }
24 |
25 | private static void initOperatorMap() {
26 | operatorMap.put(NodeType.CROWD_JOIN, CrowdJoinOperator.class);
27 | operatorMap.put(NodeType.CROWD_EQ, CrowdEQOperator.class);
28 | operatorMap.put(NodeType.CROWD_GT, CrowdGTOperator.class);
29 | operatorMap.put(NodeType.CROWD_LT, CrowdLTOperator.class);
30 | operatorMap.put(NodeType.CROWD_IN, CrowdInOperator.class);
31 |
32 | operatorMap.put(NodeType.JOIN, JoinOperator.class);
33 | operatorMap.put(NodeType.EQ, EQOperator.class);
34 | operatorMap.put(NodeType.GT, GTOperator.class);
35 | operatorMap.put(NodeType.LT, LTOperator.class);
36 | operatorMap.put(NodeType.PROJECT, ProjectOperator.class);
37 |
38 | operatorMap.put(NodeType.COLLECT, CollectOperator.class);
39 | operatorMap.put(NodeType.FILL, FillOperator.class);
40 | operatorMap.put(NodeType.SINGLELABEL, SingleLabel.class);
41 | operatorMap.put(NodeType.MULTILABEL, MultiLabel.class);
42 |
43 | operatorMap.put(NodeType.GRAPH_MODEL, CrowdGraphOperator.class);
44 | }
45 |
46 | public static Class extends BaseOperator> createOperator(NodeType nodeType) {
47 | if (!operatorMap.containsKey(nodeType)) {
48 | return null;
49 | }
50 |
51 | return operatorMap.get(nodeType);
52 | }
53 | }
54 |
55 | public class QueryExecutor implements IQueryExecutor {
56 |
57 | private static String LOG_FORMAT = "##Query Executor##";
58 |
59 | private static Logger LOG = LoggerFactory.getLogger(QueryExecutor.class);
60 |
61 | public BaseOperator buildOperator(SqlTreeNode node, String dbName) {
62 | Class extends BaseOperator> operatorClass = OperatorBuilder.createOperator(node.getNodeType());
63 | if (operatorClass == null) {
64 | LOG.error(String.format("no right operator, nodetype = %s", node.getNodeType()));
65 | return null;
66 | }
67 | try {
68 | Constructor extends BaseOperator> cons = operatorClass.getConstructor(SqlTreeNode.class, String.class);
69 | BaseOperator operator = cons.newInstance(node, dbName);
70 | return operator;
71 | } catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
72 | LOG.error(String.format("can not create new instance for operator = %s", operatorClass.getName()));
73 | e.printStackTrace();
74 | }
75 | return null;
76 | }
77 |
78 | public boolean execute(BaseOperator operator) {
79 | if (operator == null) return false;
80 | return operator.process();
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/CDB-Server/crowdexec/src/main/java/com/tsinghua/dbgroup/crowddb/crowdexec/query/RelationOperator.java:
--------------------------------------------------------------------------------
1 | package com.tsinghua.dbgroup.crowddb.crowdexec.query;
2 |
3 | import com.tsinghua.dbgroup.crowddb.crowdstorage.table.TableManager;
4 | import com.tsinghua.dbgroup.crowddb.crowdstorage.utils.TableHelper;
5 | import org.apache.commons.lang3.tuple.Pair;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | import java.sql.SQLException;
10 | import java.util.List;
11 | import java.util.Map;
12 |
13 | /**
14 | * Created by talus on 16/6/16.
15 | */
16 | public class RelationOperator {
17 |
18 | private TableManager tm;
19 |
20 | private static String LOG_FORMAT = "##RelationOperator##";
21 |
22 | private static Logger LOG = LoggerFactory.getLogger(RelationOperator.class);
23 |
24 | public RelationOperator() {
25 | tm = new TableManager();
26 | }
27 |
28 | public void execEquals() {
29 |
30 | }
31 |
32 | public void execGreater() {
33 |
34 | }
35 |
36 | public void execLess() {
37 |
38 | }
39 |
40 | private List