13 |
14 | {% for message in messages %}
15 | {% if message.tags == 'success' %}
16 | {{message}}
17 | {% endif %}
18 | {% if message.tags == 'error' %}
19 | {{message}}
20 | {% endif %}
21 | {% endfor %}
22 |
23 | {% endif %}
24 |
25 |
26 |
27 |
28 |
29 |
54 |
55 | {% endblock %}
56 |
--------------------------------------------------------------------------------
/library/serializers.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from rest_framework import serializers
3 |
4 | class RepositorySerializer(serializers.ModelSerializer):
5 | class Meta:
6 | model = Repository
7 |
8 | class RuntimeSerializer(serializers.ModelSerializer):
9 | class Meta:
10 | model = Runtime
11 |
12 | class DatabaseSerializer(serializers.ModelSerializer):
13 | class Meta:
14 | model = Database
15 |
16 | class PackageSerializer(serializers.ModelSerializer):
17 | class Meta:
18 | model = Package
19 |
20 | class DependencySerializer(serializers.ModelSerializer):
21 | package_info = PackageSerializer(source='package')
22 |
23 | class Meta:
24 | model = Dependency
25 | fields = ('id', 'source', 'attempt', 'package_info')
26 |
27 | class FieldSerializer(serializers.ModelSerializer):
28 | class Meta:
29 | model = Field
30 |
31 | class QuerySerializer(serializers.ModelSerializer):
32 | class Meta:
33 | model = Query
34 |
35 | class ActionSerializer(serializers.ModelSerializer):
36 | fields = FieldSerializer(many=True, read_only=True)
37 | queries = QuerySerializer(many=True, read_only=True)
38 | class Meta:
39 | model = Action
40 |
41 |
42 | class AttemptSerializer(serializers.ModelSerializer):
43 | repo_info = RepositorySerializer(source='repo')
44 | runtime_info = RuntimeSerializer(source='runtime')
45 | database_info = DatabaseSerializer(source='database')
46 | dependencies = DependencySerializer(source='dependency_set', many=True)
47 | actions = ActionSerializer(many=True, read_only = True)
48 |
49 | class Meta:
50 | model = Attempt
51 | fields = ('id', 'start_time', 'stop_time', 'repo_info', 'sha', 'size', 'log', 'hostname',
52 | 'runtime_info', 'database_info', 'result', 'register', 'login', 'actions_count', 'queries_count',
53 | 'dependencies', 'actions'
54 | )
--------------------------------------------------------------------------------
/core/analyzers/baseanalyzer.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import logging
5 |
6 | ## =====================================================================
7 | ## LOGGING CONFIGURATION
8 | ## =====================================================================
9 | LOG = logging.getLogger()
10 |
11 | ## =====================================================================
12 | ## BASE ANALYZER
13 | ## =====================================================================
14 | class BaseAnalyzer(object):
15 |
16 | def __init__(self, deployer):
17 | self.queries_stats = {}
18 | self.database_stats = {}
19 | self.database_informations = {}
20 | self.deployer = deployer
21 |
22 | def is_valid_for_explain(self, query):
23 | if not query:
24 | return False
25 | prefixes = ['show', 'begin', 'end', 'commit', 'set']
26 | lowered_query = query.lower()
27 | if any(lowered_query.startswith(prefix) for prefix in prefixes):
28 | return False
29 | return True
30 |
31 | def count_transaction(self, queries):
32 | transaction = False
33 | transaction_count = 0
34 | for query in queries:
35 | if 'BEGIN' in query['content'].upper() or 'START TRANSACTION' in query['content'].upper():
36 | transaction = True
37 | elif transaction:
38 | if 'COMMIT' in query['content'].upper():
39 | # for each transaction, count the number of transactions
40 | transaction_count += 1
41 | transaction = False
42 | return transaction_count
43 |
44 | def analyze_queries(self, queries):
45 | raise NotImplementedError("Unimplemented %s" % self.__init__.im_class)
46 |
47 | def analyze_database(self):
48 | raise NotImplementedError("Unimplemented %s" % self.__init__.im_class)
--------------------------------------------------------------------------------
/core/drivers/benchmarkdriver.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import logging
5 | import requests
6 | import re
7 | import copy
8 | import traceback
9 | import requests
10 | import mechanize
11 |
12 | from library.models import *
13 | from cmudbac.settings import *
14 | import utils
15 | import extract
16 | import submit
17 | import count
18 | from basedriver import BaseDriver
19 |
20 | ## =====================================================================
21 | ## LOGGING CONFIGURATION
22 | ## =====================================================================
23 | LOG = logging.getLogger()
24 |
25 | ## =====================================================================
26 | ## BENCHMARK DRIVER
27 | ## =====================================================================
28 | class BenchmarkDriver(BaseDriver):
29 |
30 | def __init__(self, driver):
31 | BaseDriver.__init__(self, driver.deployer)
32 | self.forms = driver.forms
33 | self.urls = driver.urls
34 | self.browser = mechanize.Browser()
35 | if driver.browser != None:
36 | self.browser.set_cookiejar(driver.browser._ua_handlers['_cookies'].cookiejar)
37 | self.browser.set_handle_robots(False)
38 |
39 | def submit_actions(self):
40 | actions_cnt = 0
41 | for form, browser_index in self.forms:
42 | try:
43 | if browser_index == 0:
44 | submit.fill_form_random(self.deployer.base_path, form, self.browser)
45 | else:
46 | submit.fill_form_random(self.deployer.base_path, form, None)
47 | except:
48 | pass
49 | actions_cnt += 1
50 | for url in self.urls:
51 | try:
52 | submit.query_url(url, self.browser)
53 | except:
54 | pass
55 | actions_cnt += 1
56 | return actions_cnt
57 |
58 |
59 |
--------------------------------------------------------------------------------
/library/templates/admin/add_module.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/core/drivers/submit/register.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import re
5 | from urlparse import urlparse
6 |
7 | import extract
8 | from patterns import patterns, match_any_pattern
9 | from submit import fill_form
10 |
11 | def get_register_form(forms):
12 | register_patterns = ['register', 'signup', 'sign-up', 'sign_up']
13 | for form in forms:
14 | if 'method' in form and form['method'] != 'post':
15 | continue
16 | if match_any_pattern(form['action'], register_patterns):
17 | return form
18 | if match_any_pattern(form['url'], register_patterns):
19 | return form
20 | if match_any_pattern(form.get('id', ''), register_patterns):
21 | return form
22 | return None
23 |
24 | def verify_email(deploy_path, form, matched_patterns):
25 | email_file = None
26 | for log_file in os.listdir(deploy_path):
27 | if log_file.endswith('.log'):
28 | email_file = log_file
29 | break
30 | if not email_file:
31 | return matched_patterns, None
32 |
33 | email_content = open(os.path.join(deploy_path, email_file)).read()
34 | verify_url = re.search('http://.+', email_content)
35 | if not verify_url:
36 | return matched_patterns, None
37 | verify_url = urlparse(verify_url.group(0))._replace(netloc = urlparse(form['url']).netloc)
38 | verify_url = verify_url.geturl()
39 |
40 | verify_forms = extract.extract_forms(verify_url)
41 | for verify_form in verify_forms:
42 | verify_form['url'] = verify_url
43 | matched_patterns, inputs, response, br = fill_form(verify_form, matched_patterns)
44 |
45 | return matched_patterns, inputs
46 |
47 | def register(deploy_path, forms):
48 | register_form = get_register_form(forms)
49 | print 'Register form: {}'.format(register_form)
50 | if register_form == None:
51 | return None, None, None
52 |
53 | matched_patterns, inputs, response, br = fill_form(register_form)
54 |
55 | if 'email' in matched_patterns:
56 | matched_patterns, part_inputs = verify_email(deploy_path, register_form, matched_patterns)
57 | if part_inputs != None:
58 | inputs.update(part_inputs)
59 |
60 | return register_form, matched_patterns, inputs
61 |
--------------------------------------------------------------------------------
/core/utils/pip.py:
--------------------------------------------------------------------------------
1 | import os
2 | from os.path import expanduser
3 |
4 | from run import run_command
5 | from file import cd
6 |
7 | HOME_DIR = expanduser('~')
8 |
9 | def home_path(path):
10 | return os.path.join(HOME_DIR, path)
11 |
12 | def configure_env(path):
13 | command = 'virtualenv --no-site-packages {}'.format(path)
14 | return run_command(command)
15 |
16 | def to_env(path):
17 | return '{} && {}'.format(cd(path), 'source bin/activate')
18 |
19 | def pip_install(path, names, is_file, has_version = True):
20 | command = '{} && pip --no-cache-dir install'.format(to_env(path))
21 |
22 | proxy = os.environ.get('http_proxy')
23 | if proxy:
24 | command = '{} --proxy {} '.format(command, proxy)
25 | if is_file:
26 | filename = home_path(names)
27 | command = '{} -r {}'.format(command, filename)
28 | else:
29 | for name in names:
30 | if isinstance(name, dict):
31 | if name.get('version', ''):
32 | command = '{} {}=={} '.format(command, name['name'], name['version'])
33 | else:
34 | command = '{} {}'.format(command, name['name'])
35 | else:
36 | if has_version and name.version != None and name.version != '':
37 | command = '{} {}=={} '.format(command, name.name, name.version)
38 | elif name.name == 'django':
39 | command = '{} {}==1.8.4'.format(command, name.name)
40 | else:
41 | command = '{} {}'.format(command, name.name)
42 | out = run_command(command)
43 |
44 | return out
45 |
46 | def pip_install_text(path, name):
47 | command = '{} && pip --no-cache-dir install'.format(to_env(path))
48 |
49 | proxy = os.environ.get('http_proxy')
50 | if proxy:
51 | command = '{} --proxy {} '.format(command, proxy)
52 | command = '{} {} '.format(command, name)
53 | out = run_command(command)
54 |
55 | return out
56 |
57 | def pip_freeze(path):
58 | out = run_command('{} && pip freeze'.format(to_env(path)))
59 | out = out[1].strip().splitlines()
60 | out = [line for line in out if not ' ' in line and '==' in line]
61 | return out
62 |
--------------------------------------------------------------------------------
/library/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | from models import *
3 |
4 | class DependencyInline(admin.StackedInline):
5 | model = Dependency
6 | extra = 3
7 |
8 | class ProjectTypeAdmin(admin.ModelAdmin):
9 | list_display = [ 'name', 'filename', 'deployer_class' ]
10 | ## CLASS
11 |
12 | class RepositorySourceAdmin(admin.ModelAdmin):
13 | list_display = [ 'name', 'crawler_class', 'base_url', 'commit_url', 'search_token', ]
14 | ## CLASS
15 |
16 | class CrawlerStatusAdmin(admin.ModelAdmin):
17 | list_display = [ 'id', 'source', 'project_type', 'next_url', 'last_crawler_time', ]
18 | ## CLASS
19 |
20 | class RepositoryAdmin(admin.ModelAdmin):
21 | list_display = [ 'id', 'name', 'valid_project', 'get_project_type', 'source', 'commits_count', 'description', 'crawler_date', 'updated_date' ]
22 | list_filter = ['project_type', 'valid_project', 'crawler_date', 'updated_date']
23 | fieldsets = [
24 | (None, {'fields': ['name', 'project_type', 'source', 'description']}),
25 | ('Date information', {'fields': ['created_at', 'updated_at', 'pushed_at']}),
26 | ]
27 |
28 | def get_project_type(self, obj):
29 | return obj.project_type.name
30 |
31 | get_project_type.short_description = 'Project Type'
32 | # CLASS
33 |
34 | class AttemptAdmin(admin.ModelAdmin):
35 | list_display = [ 'id', 'repo', 'result_name', 'start_time', 'stop_time' ]
36 | list_filter = ['result', 'start_time']
37 | raw_id_fields = [ 'repo' ]
38 | #inlines = [DependencyInline]
39 | # CLASS
40 |
41 | class PackageAdmin(admin.ModelAdmin):
42 | list_display = [ 'name', 'project_type', 'version', 'count' ]
43 | list_filter = ['project_type']
44 | # CLASS
45 |
46 | # Register your models here.
47 | admin.site.register(ProjectType, ProjectTypeAdmin)
48 | admin.site.register(RepositorySource, RepositorySourceAdmin)
49 | admin.site.register(CrawlerStatus, CrawlerStatusAdmin)
50 | admin.site.register(Database)
51 |
52 | admin.site.register(Repository, RepositoryAdmin)
53 | admin.site.register(Package, PackageAdmin)
54 | admin.site.register(Dependency)
55 | admin.site.register(Attempt, AttemptAdmin)
56 | admin.site.register(Module)
57 | admin.site.register(WebStatistic)
58 | admin.site.register(Statistic)
59 |
--------------------------------------------------------------------------------
/library/static/md/tools.md:
--------------------------------------------------------------------------------
1 | # Command Line Tool Tutorial
2 |
3 | ### Install Requirements
4 | To Start with, please install the required Python packages for running the command line tool. You can install them by pip as following
5 | ```sh
6 | $ pip install requests
7 | ```
8 |
9 | Please add enough permission for the main.py file as following
10 | ```sh
11 | $ chmod +x main.py
12 | ```
13 |
14 | ### Get Attempt Information
15 | You can get the sufficient information of an attempt by running this command:
16 | ```sh
17 | $ ./main.py info -attempt ATTEMPT
18 | ```
19 | where *ATTEMPT* is the id of the attempt you want to inquire.
20 |
21 | If you want to know more information, you can type this command to get a help message:
22 | ```sh
23 | $ ./main.py info -h
24 | ```
25 |
26 | ### Running Benchmark
27 | A lot of arguments are required to run the benchmark. You can type this command to get the full information:
28 | ```sh
29 | ./main.py benchmark -h
30 | ```
31 | We have provide you with a comprehensive illustraions about the arguments:
32 | ```sh
33 | usage: main.py benchmark [-h] [-attempt ATTEMPT] [-database DATABASE] [-host HOST] [-port PORT] [-name NAME] [-username USERNAME] [-password PASSWORD] [-num_threads NUM_THREADS] [-timeout TIMEOUT]
34 |
35 | optional arguments:
36 | -h, --help show this help message and exit
37 | -attempt ATTEMPT, --attempt ATTEMPT
38 | the id of the attempt
39 | -database DATABASE, --database DATABASE
40 | the database you are using, e.g. mysql
41 | -host HOST, --host HOST
42 | the host address of your database server
43 | -port PORT, --port PORT
44 | the port of your database server
45 | -name NAME, --name NAME
46 | the name of your database
47 | -username USERNAME, --username USERNAME
48 | the username of your database server
49 | -password PASSWORD, --password PASSWORD
50 | the password of your database server
51 | -num_threads NUM_THREADS, --num_threads NUM_THREADS
52 | the number of threads you want to use to submit forms
53 | -timeout TIMEOUT, --timeout TIMEOUT
54 | the timeout for submitting forms
55 | ```
56 |
57 | Then you can see the results if the arguments are correctly provided.
58 |
--------------------------------------------------------------------------------
/library/templates/admin/add_repository.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/library/templates/about.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}About » {% endblock %}
4 |
5 | {% block main %}
6 |
About
7 |
8 |
9 |
10 | The goal of the Database Application Catalog project is to find a large amount of database applications to use in various projects, including workload analysis, automatic tuning, and benchmarking. It searches the Internet for web-based database applications and run them locally in order to learn how they use a DBMS.
11 |
12 |
All of the source code for the CMDBAC is available on GitHub under the Apache Software License.
13 |
14 |
15 |
16 |
17 |
18 | People
19 |
20 |
21 |
22 |
26 |
30 |
31 |
Andy Pavlo
32 |
Carnegie Mellon University
33 |
34 |
35 |
36 |
37 | Alumni
38 |
41 |
42 |
43 |
44 |
45 |
46 | Acknowledgements
47 | This research was funded (in part) by the National Science Foundation (III-1423210 ).
48 |
49 |
50 |
51 |
52 |
53 | {% endblock %}
--------------------------------------------------------------------------------
/analysis/general/analyze_repository.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
5 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "core"))
6 |
7 | from utils import filter_repository
8 |
9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 |
13 | from library.models import *
14 |
15 | def repository_stats():
16 | stats = {}
17 |
18 | for project_type in ProjectType.objects.all():
19 | project_type_name = project_type.name
20 | stats[project_type_name] = []
21 |
22 | for repo in Repository.objects.filter(project_type = project_type).exclude(latest_successful_attempt = None):
23 | if filter_repository(repo):
24 | continue
25 | transaction_count = 0
26 |
27 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
28 | transaction = ''
29 | for query in Query.objects.filter(action = action):
30 | if 'BEGIN' in query.content.upper() or 'START TRANSACTION' in query.content.upper():
31 | transaction = query.content + '\n'
32 | elif transaction != '':
33 | transaction += query.content + '\n'
34 | if 'COMMIT' in query.content.upper():
35 | transaction = transaction.strip('\n')
36 |
37 | # for each transaction, count the number of transactions
38 | transaction_count += 1
39 |
40 | if transaction_count > 0:
41 | stats[project_type_name].append((repo.commits_count, transaction_count, repo))
42 |
43 | for project_type_name in stats:
44 | print project_type_name
45 |
46 | for commits_count, transaction_count, repo in sorted(stats[project_type_name], reverse = True):
47 | print repo.name, 'txns:{}'.format(transaction_count), 'commits:{}'.format(commits_count),
48 | print 'http://cmdbac.cs.cmu.edu/attempt/' + str(repo.latest_successful_attempt.id)
49 |
50 | print '------------------------------'
51 |
52 | def main():
53 | # active
54 | repository_stats()
55 |
56 | # working
57 |
58 | # deprecated
59 | if __name__ == '__main__':
60 | main()
61 |
--------------------------------------------------------------------------------
/blog/templatetags/pinax_blog_tags.py:
--------------------------------------------------------------------------------
1 | from django import template
2 |
3 | from ..models import Post, Section
4 |
5 |
6 | register = template.Library()
7 |
8 |
9 | class LatestBlogPostsNode(template.Node):
10 |
11 | def __init__(self, context_var):
12 | self.context_var = context_var
13 |
14 | def render(self, context):
15 | latest_posts = Post.objects.current()[:5]
16 | context[self.context_var] = latest_posts
17 | return ""
18 |
19 |
20 | @register.tag
21 | def latest_blog_posts(parser, token):
22 | bits = token.split_contents()
23 | return LatestBlogPostsNode(bits[2])
24 |
25 |
26 | class LatestBlogPostNode(template.Node):
27 |
28 | def __init__(self, context_var):
29 | self.context_var = context_var
30 |
31 | def render(self, context):
32 | try:
33 | latest_post = Post.objects.current()[0]
34 | except IndexError:
35 | latest_post = None
36 | context[self.context_var] = latest_post
37 | return ""
38 |
39 |
40 | @register.tag
41 | def latest_blog_post(parser, token):
42 | bits = token.split_contents()
43 | return LatestBlogPostNode(bits[2])
44 |
45 |
46 | class LatestSectionPostNode(template.Node):
47 |
48 | def __init__(self, section, context_var):
49 | self.section = template.Variable(section)
50 | self.context_var = context_var
51 |
52 | def render(self, context):
53 | section = self.section.resolve(context)
54 |
55 | post = Post.objects.published().filter(section__name=section).order_by("-published")
56 | try:
57 | post = post[0]
58 | except IndexError:
59 | post = None
60 | context[self.context_var] = post
61 | return ""
62 |
63 |
64 | @register.tag
65 | def latest_section_post(parser, token):
66 | """
67 | {% latest_section_post "articles" as latest_article_post %}
68 | """
69 | bits = token.split_contents()
70 | return LatestSectionPostNode(bits[1], bits[3])
71 |
72 |
73 | class BlogSectionsNode(template.Node):
74 |
75 | def __init__(self, context_var):
76 | self.context_var = context_var
77 |
78 | def render(self, context):
79 | sections = Section.objects.filter(enabled=True)
80 | context[self.context_var] = sections
81 | return ""
82 |
83 |
84 | @register.tag
85 | def blog_sections(parser, token):
86 | """
87 | {% blog_sections as blog_sections %}
88 | """
89 | bits = token.split_contents()
90 | return BlogSectionsNode(bits[2])
91 |
--------------------------------------------------------------------------------
/core/analyzers/sqlite3analyzer.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import logging
5 |
6 | from baseanalyzer import BaseAnalyzer
7 |
8 | ## =====================================================================
9 | ## LOGGING CONFIGURATION
10 | ## =====================================================================
11 | LOG = logging.getLogger()
12 |
13 | ## =====================================================================
14 | ## SQLITE3 ANALYZER
15 | ## =====================================================================
16 | class SQLite3Analyzer(BaseAnalyzer):
17 |
18 | def __init__(self, deployer):
19 | BaseAnalyzer.__init__(self, deployer)
20 |
21 | def analyze_queries(self, queries):
22 | self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0)
23 |
24 | try:
25 | conn = self.deployer.get_database_connection()
26 | cur = conn.cursor()
27 |
28 | for query in queries:
29 | try:
30 | if self.is_valid_for_explain(query['raw']):
31 | explain_query = 'EXPLAIN {};'.format(query['raw'])
32 | # print explain_query
33 | cur.execute(explain_query)
34 | rows = cur.fetchall()
35 | output = '\n'
36 | for row in rows:
37 | output += str(row) + '\n'
38 | query['explain'] = output
39 | except Exception, e:
40 | pass
41 | # LOG.exception(e)
42 |
43 | cur.close()
44 | conn.close()
45 | except Exception, e:
46 | LOG.exception(e)
47 |
48 | def analyze_database(self):
49 | try:
50 | conn = self.deployer.get_database_connection()
51 | cur = conn.cursor()
52 | database = self.deployer.get_database_name()
53 |
54 | # the number of tables
55 | cur.execute("SELECT COUNT(*) FROM sqlite_master WHERE type = 'table';")
56 | self.database_stats['num_tables'] = int(cur.fetchone()[0])
57 |
58 | # the number of indexes
59 | cur.execute("SELECT COUNT(*) FROM sqlite_master WHERE type = 'index';")
60 | self.database_stats['num_indexes'] = int(cur.fetchone()[0])
61 |
62 | cur.close()
63 | conn.close()
64 | except Exception, e:
65 | LOG.exception(e)
--------------------------------------------------------------------------------
/core/scripts/vagrant_deploy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
5 |
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
7 | import django
8 | django.setup()
9 | from library.models import *
10 | from deployers import *
11 | from drivers import *
12 | from analyzers import *
13 | import utils
14 |
15 | def main():
16 | if len(sys.argv) not in [3, 4]:
17 | return
18 | repo_name = sys.argv[1]
19 | deploy_id = sys.argv[2]
20 | if len(sys.argv) > 3:
21 | database_name = sys.argv[3]
22 | else:
23 | database_name = 'MySQL'
24 | print 'Database : {} ...'.format(database_name)
25 |
26 | repo = Repository.objects.get(name=repo_name)
27 | database = Database.objects.get(name=database_name)
28 |
29 | moduleName = "deployers.%s" % (repo.project_type.deployer_class.lower())
30 | moduleHandle = __import__(moduleName, globals(), locals(), [repo.project_type.deployer_class])
31 | klass = getattr(moduleHandle, repo.project_type.deployer_class)
32 |
33 | deployer = klass(repo, database, deploy_id)
34 | if deployer.deploy() != 0:
35 | deployer.kill_server()
36 | sys.exit(-1)
37 |
38 | print 'Driving ...'
39 |
40 | driver = BaseDriver(deployer.get_main_url(), deployer.get_database(), deployer.deploy_id, deployer.base_path, deployer.log_file)
41 | try:
42 | driverResult = driver.drive()
43 | except Exception, e:
44 | LOG.exception(e)
45 | driverResult = {}
46 |
47 | print 'Random Walking ...'
48 |
49 | try:
50 | random_driver = RandomDriver(driver)
51 | random_driver.start()
52 | print 'Random Walk Forms Count: {}'.format(len(random_driver.forms))
53 | print 'Basic Forms Count: {}'.format(len(driverResult['forms']))
54 | for form in random_driver.forms:
55 | if any(random_driver.equal_form(form, ret_form) for ret_form in driverResult['forms']):
56 | continue
57 | driverResult['forms'].append(form)
58 | except Exception, e:
59 | LOG.exception(e)
60 |
61 | deployer.kill_server()
62 |
63 | analyzer = get_analyzer(deployer)
64 | for form in driverResult['forms']:
65 | analyzer.analyze_queries(form['queries'])
66 | for url in driverResult['urls']:
67 | analyzer.analyze_queries(url['queries'])
68 | driverResult['statistics'] = analyzer.queries_stats
69 | analyzer.analyze_database()
70 | driverResult['statistics'].update(analyzer.database_stats)
71 | driverResult['informations'] = analyzer.database_informations
72 |
73 | deployer.save_attempt(ATTEMPT_STATUS_SUCCESS, driverResult)
74 |
75 | if __name__ == "__main__":
76 | main()
--------------------------------------------------------------------------------
/blog/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | from django.utils import timezone
3 | from django.utils.functional import curry
4 |
5 | from .forms import AdminPostForm
6 | from .models import Post, Image, ReviewComment, Section
7 | from .utils import can_tweet
8 |
9 |
10 | class ImageInline(admin.TabularInline):
11 | model = Image
12 | fields = ["image_path"]
13 |
14 |
15 | class ReviewInline(admin.TabularInline):
16 | model = ReviewComment
17 |
18 |
19 | def make_published(modeladmin, request, queryset):
20 | queryset = queryset.exclude(state=Post.STATE_CHOICES[-1][0], published__isnull=False)
21 | queryset.update(state=Post.STATE_CHOICES[-1][0])
22 | queryset.filter(published__isnull=True).update(published=timezone.now())
23 | make_published.short_description = "Publish selected posts"
24 |
25 |
26 | class PostAdmin(admin.ModelAdmin):
27 | list_display = ["title", "state", "section", "published", "show_secret_share_url"]
28 | list_filter = ["section", "state"]
29 | form = AdminPostForm
30 | actions = [make_published]
31 | fields = [
32 | "section",
33 | "title",
34 | "slug",
35 | "author",
36 | "published",
37 | "markup",
38 | "teaser",
39 | "content",
40 | "description",
41 | "primary_image",
42 | "sharable_url",
43 | "state"
44 | ]
45 | readonly_fields = ["sharable_url"]
46 |
47 | if can_tweet():
48 | fields.append("tweet")
49 | prepopulated_fields = {"slug": ("title",)}
50 | inlines = [
51 | ImageInline,
52 | ReviewInline,
53 | ]
54 |
55 | def show_secret_share_url(self, obj):
56 | return '
%s ' % (obj.sharable_url, obj.sharable_url)
57 | show_secret_share_url.short_description = "Share this url"
58 | show_secret_share_url.allow_tags = True
59 |
60 | def formfield_for_dbfield(self, db_field, **kwargs):
61 | request = kwargs.get("request")
62 | if db_field.name == "author":
63 | ff = super(PostAdmin, self).formfield_for_dbfield(db_field, **kwargs)
64 | ff.initial = request.user.id
65 | return ff
66 | return super(PostAdmin, self).formfield_for_dbfield(db_field, **kwargs)
67 |
68 | def get_form(self, request, obj=None, **kwargs):
69 | kwargs.update({
70 | "formfield_callback": curry(self.formfield_for_dbfield, request=request),
71 | })
72 | return super(PostAdmin, self).get_form(request, obj, **kwargs)
73 |
74 | def save_form(self, request, form, change):
75 | # this is done for explicitness that we want form.save to commit
76 | # form.save doesn't take a commit kwarg for this reason
77 | return form.save()
78 |
79 |
80 | class SectionAdmin(admin.ModelAdmin):
81 | prepopulated_fields = {"slug": ("name",)}
82 |
83 |
84 | admin.site.register(Post, PostAdmin)
85 | admin.site.register(Image)
86 | admin.site.register(Section, SectionAdmin)
87 |
--------------------------------------------------------------------------------
/library/forms.py:
--------------------------------------------------------------------------------
1 | from django import forms
2 | from models import *
3 | from django.template.loader import render_to_string
4 | from django.forms.fields import EMPTY_VALUES
5 | from django.utils.translation import ugettext as _
6 |
7 |
8 | class ResultForm(forms.Form):
9 | results = forms.MultipleChoiceField(
10 | widget=forms.CheckboxSelectMultiple,
11 | choices=reversed(ATTEMPT_STATUS),
12 | required=False,
13 | label="Latest Attempt Status")
14 |
15 | class ProjectTypeForm(forms.Form):
16 | options = ProjectType.objects.all().values_list('name', 'name')
17 | types = forms.MultipleChoiceField(
18 | widget=forms.CheckboxSelectMultiple,
19 | choices=options,
20 | required=False,
21 | label="Project Type")
22 |
23 | class StatisticsForm(forms.Form):
24 | num_options = [('-1', 'Any'), ('0-10', 'Less than or equal to 10'), ('11-100', 'Between 11 and 100'), ('101-99999', 'More than 100')]
25 | ratio_options = [('-1', 'Any'), ('0-50', 'Lesson than or equal to 0.5'), ('51-100', '0.5-1'), ('101-99999', 'More than 1')]
26 |
27 | num_tables = forms.ChoiceField(choices=num_options, required = False, label = '# of Tables', widget=forms.Select(attrs={'class':'form-control'}))
28 | num_indexes = forms.ChoiceField(choices=num_options, required = False, label = '# of Indexes', widget=forms.Select(attrs={'class':'form-control'}))
29 | num_secondary_indexes = forms.ChoiceField(choices=num_options, required = False, label = '# of Secondary Indexes', widget=forms.Select(attrs={'class':'form-control'}))
30 | num_constraints = forms.ChoiceField(choices=num_options, required = False, label = '# of Constraints', widget=forms.Select(attrs={'class':'form-control'}))
31 | num_foreignkeys = forms.ChoiceField(choices=num_options, required = False, label = '# of Foreign Keys', widget=forms.Select(attrs={'class':'form-control'}))
32 | num_transactions = forms.ChoiceField(choices=num_options, required = False, label = '# of Transactions', widget=forms.Select(attrs={'class':'form-control'}))
33 | transaction_ratio = forms.ChoiceField(choices=ratio_options, required = False, label = 'Ratio of Txn/Action', widget=forms.Select(attrs={'class':'form-control'}))
34 |
35 | coverage_options = [('-1', 'Any'), ('0-20', 'Less than 20'), ('21-40', '21-40'), ('41-60', '41-60'), ('61-80', '61-80'), ('81-100', '81-100')]
36 | table_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Table Coverage', widget=forms.Select(attrs={'class':'form-control'}))
37 | column_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Column Coverage', widget=forms.Select(attrs={'class':'form-control'}))
38 | # index_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Index Coverage', widget=forms.Select(attrs={'class':'form-control'}))
39 |
40 |
--------------------------------------------------------------------------------
/scripts/count_repos.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
5 |
6 | import time
7 | import traceback
8 |
9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | from django.db.models import Q
13 |
14 | from library.models import *
15 | import utils
16 |
17 | COMMITS_COUNT_THRESHOLD = 10
18 |
19 | def count_deployed_repos():
20 | stats = {}
21 | for repo in Repository.objects.exclude(latest_successful_attempt = None):
22 | if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD:
23 | continue
24 | if Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'key_column_usage'):
25 | stats[repo.project_type] = stats.get(repo.project_type, 0) + 1
26 |
27 | print stats
28 |
29 | def count_ruby_failed_repos():
30 | count = 0
31 | for repo in Repository.objects.filter(latest_successful_attempt = None).filter(project_type = 2).exclude(latest_attempt = None):
32 | if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD:
33 | continue
34 | if 'Unable to find database.yml' in repo.latest_attempt.log:
35 | count += 1
36 |
37 | print count
38 |
39 | def count_ruby_repetive_queries():
40 | repo_count = [0, 0]
41 | action_count = [0, 0]
42 | for repo in Repository.objects.exclude(latest_successful_attempt = None).filter(project_type = 2):
43 | repo_flag = False
44 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
45 | action_flag = False
46 | for query in Query.objects.filter(action = action):
47 | if 'SELECT 1' in query.content:
48 | repo_flag = True
49 | action_flag = True
50 | if action_flag:
51 | action_count[0] += 1
52 | action_count[1] += 1
53 | if repo_flag:
54 | repo_count[0] += 1
55 | repo_count[1] += 1
56 |
57 | print repo_count
58 | print action_count
59 |
60 | def count_wrong_marked_repos():
61 | repo_count = 0
62 | for repo in Repository.objects.exclude(latest_successful_attempt = None):
63 | if repo.latest_successful_attempt.result != 'OK':
64 | repo_count += 1
65 | repo.latest_successful_attempt = None
66 | repo.save()
67 | for repo in Repository.objects.filter(project_type = 2):
68 | attempts = Attempt.objects.filter(repo = repo).filter(result = 'OK')
69 | if attempts:
70 | repo.latest_successful_attempt = list(attempts)[-1]
71 | repo.save()
72 | print repo_count
73 |
74 | def main():
75 | # count_deployed_repos()
76 | # count_ruby_failed_repos()
77 | # count_ruby_repetive_queries()
78 | count_wrong_marked_repos()
79 |
80 | if __name__ == '__main__':
81 | main()
82 |
--------------------------------------------------------------------------------
/core/utils/file.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import re
4 |
5 | from run import run_command
6 |
7 | def search_file(directory_name, file_name):
8 | result = []
9 | for root, dirs, files in os.walk(directory_name):
10 | for file in files:
11 | if file == file_name:
12 | path = os.path.join(root, file)
13 | if not os.path.islink(path):
14 | result.append(path)
15 | return result
16 |
17 | def search_file_regex(directory_name, file_name_pattern):
18 | result = []
19 | for root, dirs, files in os.walk(directory_name):
20 | for file in files:
21 | if re.search(file_name_pattern, file):
22 | path = os.path.join(root, file)
23 | if not os.path.islink(path):
24 | result.append(path)
25 | return result
26 |
27 | def search_file_norecur(directory_name, file_name):
28 | for file in os.listdir(directory_name):
29 | if os.path.isfile(os.path.join(directory_name, file)) and file == file_name:
30 | return True
31 | return False
32 |
33 | def search_dir(directory_name, query_name):
34 | for root, dirs, files in os.walk(directory_name):
35 | for _dir in dirs:
36 | if query_name in _dir:
37 | path = os.path.join(root, _dir)
38 | return path
39 |
40 | def replace_file_regex(file, string_pattern, string):
41 | with open(file, "r+") as f:
42 | s = f.read()
43 | s = re.sub(string_pattern, string, s, flags=re.DOTALL)
44 | f.seek(0)
45 | f.write(s)
46 | f.truncate()
47 | f.close()
48 |
49 | def replace_files_regex(directory_name, string_pattern, string):
50 | for root, dirs, files in os.walk(directory_name):
51 | for file in files:
52 | replace_file_regex(os.path.join(root, file), string_pattern, string)
53 |
54 | def unzip(zip_name, dir_name):
55 | command = 'unzip -o -qq ' + zip_name + ' -d ' + dir_name
56 | out = run_command(command)
57 |
58 | def rm_dir(path):
59 | #if os.path.exists(path):
60 | # shutil.rmtree(path)
61 | os.system('sudo rm -rf {}'.format(path))
62 |
63 | def mk_dir(path):
64 | if not os.path.exists(path):
65 | os.makedirs(path)
66 |
67 | def chmod_dir(path):
68 | if os.path.exists(path):
69 | os.chmod(path, 0777)
70 |
71 | def make_dir(path):
72 | rm_dir(path)
73 | mk_dir(path)
74 | chmod_dir(path)
75 |
76 | def cd(path):
77 | return "cd "+ path
78 |
79 | def rename_file(old_file, new_file):
80 | return run_command('mv {} {}'.format(
81 | old_file,
82 | new_file))
83 |
84 | def copy_file(old_file, new_file):
85 | shutil.copy2(old_file, new_file)
86 |
87 | def remove_file(path):
88 | try:
89 | os.remove(path)
90 | except:
91 | pass
92 |
93 | def get_size(start_path = '.'):
94 | total_size = 0
95 | for dirpath, dirnames, filenames in os.walk(start_path):
96 | for f in filenames:
97 | try:
98 | fp = os.path.join(dirpath, f)
99 | total_size += os.path.getsize(fp)
100 | except:
101 | pass
102 | return total_size
103 |
--------------------------------------------------------------------------------
/core/drivers/extract/extract.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
3 |
4 | import utils
5 | import json
6 | from cmudbac.settings import *
7 |
8 | EXTRACT_WAIT_TIME = 0
9 |
10 | def extract_forms(url, follow = "false", cookie_jar = None, filename = "forms.json"):
11 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
12 |
13 | if cookie_jar == None:
14 | try:
15 | out = utils.run_command('{} && {}'.format(
16 | utils.cd(os.path.dirname(os.path.abspath(__file__))),
17 | 'scrapy crawl form -o {} -a start_url="{}" -a follow={} -a proxy={}'.format(filename, url, follow, HTTP_PROXY)), EXTRACT_WAIT_TIME)
18 | except:
19 | out = utils.run_command('{} && {}'.format(
20 | utils.cd(os.path.dirname(os.path.abspath(__file__))),
21 | 'scrapy crawl form -o {} -a start_url="{}" -a follow={}'.format(filename, url, follow)), EXTRACT_WAIT_TIME)
22 | else:
23 | cookie_jar_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename.replace('.json', '.txt'))
24 | cookie_jar.save(cookie_jar_path)
25 | out = utils.run_command('{} && {}'.format(
26 | utils.cd(os.path.dirname(os.path.abspath(__file__))),
27 | 'scrapy crawl form_with_cookie -o {} -a start_url="{}" -a cookie_jar={}'.format(filename, url, cookie_jar_path)), EXTRACT_WAIT_TIME)
28 |
29 | with open(os.path.join(os.path.dirname(__file__), filename)) as json_forms:
30 | forms = json.load(json_forms)
31 |
32 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
33 |
34 | return forms
35 |
36 | def extract_all_forms(url, filename):
37 | return extract_forms(url, "true", filename = filename)
38 |
39 | def extract_all_forms_with_cookie(url, cookie_jar, filename):
40 | return extract_forms(url, "true", cookie_jar, filename)
41 |
42 | def extract_urls(url, follow = "false", cookie_jar = None, filename = "urls.json"):
43 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
44 |
45 | if cookie_jar == None:
46 | try:
47 | out = utils.run_command('{} && {}'.format(
48 | utils.cd(os.path.dirname(os.path.abspath(__file__))),
49 | 'scrapy crawl url -o {} -a start_url="{}" -a follow={} -a proxy={}'.format(filename, url, follow, HTTP_PROXY)), EXTRACT_WAIT_TIME)
50 | except:
51 | out = utils.run_command('{} && {}'.format(
52 | utils.cd(os.path.dirname(os.path.abspath(__file__))),
53 | 'scrapy crawl url -o {} -a start_url="{}" -a follow={}'.format(filename, url, follow)), EXTRACT_WAIT_TIME)
54 | else:
55 | cookie_jar_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename.replace('.json', '.txt'))
56 | cookie_jar.save(cookie_jar_path)
57 | out = utils.run_command('{} && {}'.format(
58 | utils.cd(os.path.dirname(os.path.abspath(__file__))),
59 | 'scrapy crawl url_with_cookie -o {} -a start_url="{}" -a cookie_jar={}'.format(filename, url, cookie_jar_path)), EXTRACT_WAIT_TIME)
60 |
61 | with open(os.path.join(os.path.dirname(__file__), filename)) as json_urls:
62 | urls = json.load(json_urls)
63 |
64 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
65 | return urls
66 |
67 | def extract_all_urls(url, filename):
68 | return extract_urls(url, "true", filename = filename)
69 |
70 | def extract_all_urls_with_cookie(url, cookie_jar, filename):
71 | return extract_urls(url, "true", cookie_jar, filename)
--------------------------------------------------------------------------------
/core/drivers/extract/driver/spiders/form_with_cookie.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import scrapy
4 | from scrapy.spiders import CrawlSpider, Rule
5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
6 | import cookielib
7 |
8 | from driver.items import InputItem, FormItem
9 |
10 | class FormWithCookieSpider(CrawlSpider):
11 | name = "form_with_cookie"
12 | allowed_domains = ["127.0.0.1"]
13 |
14 | def __init__(self, *args, **kwargs):
15 | super(FormWithCookieSpider, self).__init__(*args, **kwargs)
16 |
17 | self.start_urls = [kwargs.get('start_url')]
18 | self.cookiejar = cookielib.LWPCookieJar()
19 | self.cookiejar.load(kwargs.get('cookie_jar'))
20 |
21 | self.rules = (
22 | Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=True, process_request='add_cookie_for_request'),
23 | )
24 | super(FormWithCookieSpider, self)._compile_rules()
25 |
26 | def add_cookie_for_request(self, request):
27 | for cookie in self.cookiejar:
28 | request.cookies[cookie.name] = cookie.value
29 | logout_patterns = ['logout', 'log-out', 'log_out']
30 | if any(logout_pattern in request.url for logout_pattern in logout_patterns):
31 | return None
32 | return request
33 |
34 | def parse_form(self, response):
35 | for sel in response.xpath('//form'):
36 | formItem = FormItem()
37 |
38 | formItem['action'] = ''
39 | try:
40 | formItem['action'] = sel.xpath('@action').extract()[0]
41 | except:
42 | pass
43 |
44 | formItem['url'] = response.url
45 |
46 | formItem['method'] = ''
47 | try:
48 | formItem['method'] = sel.xpath('@method').extract()[0].lower()
49 | except:
50 | pass
51 |
52 | formItem['inputs'] = []
53 | for ip in sel.xpath('.//input|.//textarea'):
54 | try:
55 | _id = ip.xpath('@id').extract()[0]
56 | except:
57 | _id = ''
58 | name = ip.xpath('@name').extract()[0]
59 | try:
60 | _type = ip.xpath('@type').extract()[0]
61 | except:
62 | _type = 'textarea'
63 | try:
64 | value = ip.xpath('@value').extract()[0]
65 | except:
66 | value = ''
67 | inputItem = InputItem()
68 | inputItem['id'] = _id
69 | inputItem['name'] = name
70 | inputItem['type'] = _type
71 | inputItem['value'] = value
72 | formItem['inputs'].append(inputItem)
73 |
74 | try:
75 | _id = sel.xpath('@id').extract()[0]
76 | except:
77 | _id = ''
78 | try:
79 | _class = sel.xpath('@class').extract()[0]
80 | except:
81 | _class = ''
82 | try:
83 | enctype = sel.xpath('@enctype').extract()[0]
84 | except:
85 | enctype = ''
86 | formItem['id'] = _id
87 | formItem['clazz'] = _class
88 | formItem['enctype'] = enctype
89 |
90 | yield formItem
91 |
92 |
--------------------------------------------------------------------------------
/core/drivers/extract/driver/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Scrapy settings for driver project
4 | #
5 | # For simplicity, this file contains only settings considered important or
6 | # commonly used. You can find more settings consulting the documentation:
7 | #
8 | # http://doc.scrapy.org/en/latest/topics/settings.html
9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11 |
12 | BOT_NAME = 'driver'
13 |
14 | SPIDER_MODULES = ['driver.spiders']
15 | NEWSPIDER_MODULE = 'driver.spiders'
16 |
17 |
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'driver (+http://www.yourdomain.com)'
20 |
21 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
22 | CONCURRENT_REQUESTS=32
23 |
24 | # Configure a delay for requests for the same website (default: 0)
25 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
26 | # See also autothrottle settings and docs
27 | #DOWNLOAD_DELAY=3
28 | # The download delay setting will honor only one of:
29 | #CONCURRENT_REQUESTS_PER_DOMAIN=16
30 | #CONCURRENT_REQUESTS_PER_IP=16
31 |
32 | # Disable cookies (enabled by default)
33 | #COOKIES_ENABLED=False
34 |
35 | # Disable Telnet Console (enabled by default)
36 | #TELNETCONSOLE_ENABLED=False
37 |
38 | # Override the default request headers:
39 | #DEFAULT_REQUEST_HEADERS = {
40 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
41 | # 'Accept-Language': 'en',
42 | #}
43 |
44 | # Enable or disable spider middlewares
45 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
46 | #SPIDER_MIDDLEWARES = {
47 | # 'driver.middlewares.MyCustomSpiderMiddleware': 543,
48 | #}
49 |
50 | # Enable or disable downloader middlewares
51 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
52 | #DOWNLOADER_MIDDLEWARES = {
53 | # 'driver.middlewares.MyCustomDownloaderMiddleware': 543,
54 | #}
55 |
56 | # Enable or disable extensions
57 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
58 | #EXTENSIONS = {
59 | # 'scrapy.telnet.TelnetConsole': None,
60 | #}
61 |
62 | # Configure item pipelines
63 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
64 | #ITEM_PIPELINES = {
65 | # 'driver.pipelines.SomePipeline': 300,
66 | #}
67 |
68 | # Enable and configure the AutoThrottle extension (disabled by default)
69 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
70 | # NOTE: AutoThrottle will honour the standard settings for concurrency and delay
71 | #AUTOTHROTTLE_ENABLED=True
72 | # The initial download delay
73 | #AUTOTHROTTLE_START_DELAY=5
74 | # The maximum download delay to be set in case of high latencies
75 | #AUTOTHROTTLE_MAX_DELAY=60
76 | # Enable showing throttling stats for every response received:
77 | #AUTOTHROTTLE_DEBUG=False
78 |
79 | # Enable and configure HTTP caching (disabled by default)
80 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
81 | #HTTPCACHE_ENABLED=True
82 | #HTTPCACHE_EXPIRATION_SECS=0
83 | #HTTPCACHE_DIR='httpcache'
84 | #HTTPCACHE_IGNORE_HTTP_CODES=[]
85 | #HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'
86 |
87 | # Disable S3 handler explicitly
88 | DOWNLOAD_HANDLERS={'s3': None}
89 |
90 | CLOSESPIDER_TIMEOUT=120
91 |
--------------------------------------------------------------------------------
/blog/forms.py:
--------------------------------------------------------------------------------
1 | from django import forms
2 | from django.utils import timezone
3 | from django.utils.functional import curry
4 |
5 | from .conf import settings
6 | from .models import Post, Revision
7 | from .utils import can_tweet, load_path_attr
8 | from .signals import post_published
9 |
10 |
11 | FIELDS = [
12 | "section",
13 | "author",
14 | "markup",
15 | "title",
16 | "slug",
17 | "teaser",
18 | "content",
19 | "description",
20 | "primary_image",
21 | "state",
22 | ]
23 |
24 | if can_tweet():
25 | FIELDS.append("tweet")
26 |
27 |
28 | class AdminPostForm(forms.ModelForm):
29 |
30 | title = forms.CharField(
31 | max_length=90,
32 | widget=forms.TextInput(attrs={"style": "width: 50%;"}),
33 | )
34 | slug = forms.CharField(
35 | widget=forms.TextInput(attrs={"style": "width: 50%;"})
36 | )
37 | teaser = forms.CharField(
38 | widget=forms.Textarea(attrs={"style": "width: 80%;"}),
39 | )
40 | content = forms.CharField(
41 | widget=forms.Textarea(attrs={"style": "width: 80%; height: 300px;"})
42 | )
43 | description = forms.CharField(
44 | widget=forms.Textarea(attrs={"style": "width: 80%;"}),
45 | required=False
46 | )
47 | if can_tweet():
48 | tweet = forms.BooleanField(
49 | required=False,
50 | help_text="Checking this will send out a tweet for this post",
51 | )
52 |
53 | class Meta:
54 | model = Post
55 | fields = FIELDS
56 |
57 | class Media:
58 | js = ("js/admin_post_form.js",)
59 |
60 | def __init__(self, *args, **kwargs):
61 | super(AdminPostForm, self).__init__(*args, **kwargs)
62 |
63 | post = self.instance
64 |
65 | # grab the latest revision of the Post instance
66 | latest_revision = post.latest()
67 |
68 | if latest_revision:
69 | # set initial data from the latest revision
70 | self.fields["teaser"].initial = latest_revision.teaser
71 | self.fields["content"].initial = latest_revision.content
72 |
73 | def save(self):
74 | published = False
75 | post = super(AdminPostForm, self).save(commit=False)
76 |
77 | if post.pk is None or Post.objects.filter(pk=post.pk, published=None).count():
78 | if self.cleaned_data["state"] == Post.STATE_CHOICES[-1][0]:
79 | post.published = timezone.now()
80 | published = True
81 |
82 | render_func = curry(
83 | load_path_attr(
84 | settings.PINAX_BLOG_MARKUP_CHOICE_MAP[self.cleaned_data["markup"]]["parser"]
85 | )
86 | )
87 |
88 | post.teaser_html = render_func(self.cleaned_data["teaser"])
89 | post.content_html = render_func(self.cleaned_data["content"])
90 | post.updated = timezone.now()
91 | post.save()
92 |
93 | r = Revision()
94 | r.post = post
95 | r.title = post.title
96 | r.teaser = self.cleaned_data["teaser"]
97 | r.content = self.cleaned_data["content"]
98 | r.author = post.author
99 | r.updated = post.updated
100 | r.published = post.published
101 | r.save()
102 |
103 | if can_tweet() and self.cleaned_data["tweet"]:
104 | post.tweet()
105 |
106 | if published:
107 | post_published.send(sender=Post, post=post)
108 |
109 | return post
110 |
--------------------------------------------------------------------------------
/core/utils/data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
5 |
6 | import json
7 | import logging
8 |
9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | import library
13 | from library.models import *
14 | import utils
15 |
16 | ## =====================================================================
17 | ## LOGGING CONFIGURATION
18 | ## =====================================================================
19 | LOG = logging.getLogger()
20 |
21 | def get_crawler(crawler_status, crawler_class):
22 | moduleName = "crawlers.%s" % (crawler_class.lower())
23 | moduleHandle = __import__(moduleName, globals(), locals(), [crawler_class])
24 | klass = getattr(moduleHandle, crawler_class)
25 | # FOR GITHUB
26 | try:
27 | with open(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "secrets", "secrets.json"), 'r') as auth_file:
28 | auth = json.load(auth_file)
29 | except:
30 | auth = None
31 | crawler = klass(crawler_status, auth)
32 | return crawler
33 |
34 | def add_module(module_name, package_name, package_type_id, package_version):
35 | project_type = ProjectType.objects.get(id=package_type_id)
36 | Package.objects.get_or_create(name = package_name, version = package_version, project_type = project_type)
37 | package = Package.objects.get(name = package_name, version = package_version, project_type = project_type)
38 | module = Module()
39 | module.name = module_name
40 | module.package = package
41 | module.save()
42 |
43 | def add_repo(repo_name, crawler_status_id, repo_setup_scripts):
44 | cs = CrawlerStatus.objects.get(id=crawler_status_id)
45 | repo_source = cs.source
46 | project_type = cs.project_type
47 | crawler = get_crawler(cs, repo_source.crawler_class)
48 | crawler.add_repository(repo_name, repo_setup_scripts)
49 |
50 | def deploy_repo(repo_name, database = 'PostgreSQL'):
51 | repo = Repository.objects.get(name=repo_name)
52 | print 'Attempting to deploy {} using {} ...'.format(repo, repo.project_type.deployer_class)
53 | try:
54 | result = utils.vagrant_deploy(repo, 0, database)
55 | except Exception, e:
56 | LOG.exception(e)
57 | raise e
58 | return result
59 |
60 | def delete_repo(repo_name):
61 | for repo in Repository.objects.filter(name=repo_name):
62 | repo.delete()
63 |
64 | def edit_distance(a, b, threshold = 3):
65 | dis = threshold + 1
66 | len_a = len(a)
67 | len_b = len(b)
68 | if abs(len_a - len_b) > threshold:
69 | return dis
70 | d0 = [0] * (max(len_a, len_b) + 1)
71 | d1 = [0] * (max(len_a, len_b) + 1)
72 | for i in range(len_a + 1):
73 | l = max(0, i - threshold)
74 | r = min(len_b, i + threshold)
75 | minDis = threshold + 1
76 | for j in range(l, r + 1):
77 | if i == 0:
78 | d1[j] = j
79 | elif j == 0:
80 | d1[j] = i
81 | else:
82 | if a[i - 1] == b[j - 1]:
83 | d1[j] = d0[j - 1]
84 | else:
85 | d1[j] = d0[j - 1] + 1
86 | if j > l:
87 | d1[j] = min(d1[j], d1[j - 1] + 1)
88 | if j < i + threshold:
89 | d1[j] = min(d1[j], d0[j] + 1)
90 | minDis = min(minDis, d1[j])
91 | if minDis > threshold:
92 | return dis;
93 | d0, d1 = d1, d0
94 |
95 | dis = d0[len_b]
96 | return dis
97 |
--------------------------------------------------------------------------------
/core/utils/vagrant.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 |
4 | import shutil
5 | import traceback
6 |
7 | from run import run_command
8 | from file import cd
9 |
10 | copied_dir = ['cmudbac', 'library', 'blog', 'core', 'secrets', 'scripts']
11 | vagrant_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, 'vagrant')
12 | copied_files = []
13 |
14 | def vagrant_setup():
15 | print 'Setuping Vagrant ...'
16 |
17 | ## Copy files
18 | for new_dir in copied_dir:
19 | old_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, new_dir)
20 | if os.path.exists(old_dir) and not os.path.exists(os.path.join(vagrant_dir, new_dir)):
21 | shutil.copytree(old_dir, os.path.join(vagrant_dir, new_dir))
22 |
23 | # run_command('{} && {}'.format(cd(vagrant_dir), 'vagrant up'))
24 |
25 | def vagrant_clear():
26 | # Delete files
27 | for new_dir in copied_dir:
28 | try:
29 | shutil.rmtree(os.path.join(vagrant_dir, new_dir))
30 | except:
31 | pass
32 |
33 | # run_command('{} && {}'.format(cd(vagrant_dir), 'vagrant halt'))
34 |
35 | def set_vagrant_database():
36 | settings_file = os.path.join(vagrant_dir, "cmudbac", "settings.py")
37 | settings = open(settings_file).read()
38 | if "'HOST': 'localhost'" in settings:
39 | settings = settings.replace("'HOST': 'localhost'", "'HOST': '10.0.2.2'")
40 | fout = open(settings_file, 'w')
41 | fout.write(settings)
42 | fout.flush()
43 | fout.close()
44 |
45 | def unset_vagrant_database():
46 | settings_file = os.path.join(vagrant_dir, "cmudbac", "settings.py")
47 | settings = open(settings_file).read()
48 | if "'HOST': '10.0.2.2'" in settings:
49 | settings = settings.replace("'HOST': 'localhost'", "'HOST': 'localhost'")
50 | fout = open(settings_file, 'w')
51 | fout.write(settings)
52 | fout.flush()
53 | fout.close()
54 |
55 | def vagrant_deploy(repo, deploy_id, database):
56 | set_vagrant_database()
57 | out = os.system('{} && {}'.format(
58 | cd(vagrant_dir),
59 | 'vagrant ssh -c "{}"'.format(
60 | 'python /vagrant/core/scripts/vagrant_deploy.py {} {} {}'.format(repo, deploy_id, database))))
61 | unset_vagrant_database()
62 |
63 | return out
64 |
65 | def vagrant_benchmark(attempt_info, database, benchmark, deploy_id = 1):
66 | # run the benchmark
67 | vagrant_setup()
68 | out = None
69 | temp_dir = None
70 | try:
71 | import json
72 | attempt_info_file_path = os.path.join(vagrant_dir, 'attempt_info.json')
73 | with open(attempt_info_file_path, 'w') as attempt_info_file:
74 | json.dump(attempt_info, attempt_info_file)
75 | command = '{} && {}'.format(
76 | cd(vagrant_dir),
77 | 'vagrant ssh -c "{}"'.format(
78 | 'python /vagrant/core/scripts/vagrant_benchmark.py --attempt_info="{attempt_info}" --deploy_id={deploy_id} {database} {benchmark}'
79 | .format(attempt_info=os.path.join('/vagrant', 'attempt_info.json'), deploy_id=deploy_id,
80 | database=' '.join('--{}={}'.format(key, value) for key, value in database.iteritems()),
81 | benchmark=' '.join('--{}={}'.format(key, value) for key, value in benchmark.iteritems())
82 | )
83 | )
84 | )
85 | out = os.system(command)
86 | return out
87 | except:
88 | traceback.print_exc()
89 | finally:
90 | try:
91 | vagrant_clear()
92 | except:
93 | pass
94 |
95 | return out
--------------------------------------------------------------------------------
/tools/local-deployer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core", "utils"))
5 |
6 | import argparse
7 | import requests
8 | import traceback
9 | import json
10 | import vagrant
11 |
12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
13 | import django
14 | django.setup()
15 |
16 | CMDBAC_URL = "http://cmdbac.cs.cmu.edu/"
17 | ATTEMPT_INFO_URL = "/api/attempt/{id}/info/"
18 |
19 | ACTION_TYPES = (
20 | "info",
21 | "deploy",
22 | )
23 |
24 | DATABASE_TYPES = (
25 | "mysql",
26 | "postgres",
27 | "sqlite"
28 | )
29 |
30 | def parse_args():
31 | aparser = argparse.ArgumentParser(description='CMDBAC Local Deployer Tool')
32 |
33 | # Actions
34 | aparser.add_argument('action', choices=ACTION_TYPES, \
35 | help='Deployer Action')
36 |
37 | # Attempt Parameters
38 | agroup = aparser.add_argument_group('Deployment Parameters')
39 | agroup.add_argument('--catalog', default=CMDBAC_URL, metavar='URL', \
40 | help='Catalog API URL')
41 | agroup.add_argument('--attempt', type=int, metavar='ID', \
42 | help='Id of the attempt to deploy')
43 | agroup.add_argument('--num_threads', type=int, default=1, metavar='N', \
44 | help='Number of threads you want to use to submit actions')
45 | agroup.add_argument('--timeout', type=int, metavar='T', \
46 | help='Timeout for submitting actions (seconds)')
47 | agroup.add_argument('--db-size', type=int, \
48 | help='The expected Database size, 10 stands for 10MB')
49 |
50 | # Database Parameters
51 | agroup = aparser.add_argument_group('Local Database Parameters')
52 | agroup.add_argument('--db-type', choices=DATABASE_TYPES, \
53 | help='Database Type')
54 | agroup.add_argument('--db-host', type=str, \
55 | help='Database Hostname')
56 | agroup.add_argument('--db-port', type=int, \
57 | help='Databsae Port')
58 | agroup.add_argument('--db-name', type=str, \
59 | help='Database Name')
60 | agroup.add_argument('--db-user', type=str, \
61 | help='Database User')
62 | agroup.add_argument('--db-pass', type=str, \
63 | help='Database Password')
64 |
65 | return vars(aparser.parse_args())
66 | ## DEF
67 |
68 | def get_attempt_info(api_url, attempt_id):
69 | url = api_url + ATTEMPT_INFO_URL.format(id = attempt_id)
70 | response = requests.get(url)
71 | return response.json()
72 | ## DEF
73 |
74 | def run_attempt_benchmark(api_url, attempt_id, database, benchmark):
75 | attempt_info = get_attempt_info(api_url, attempt_id)
76 | print 'Running Benchmark for Attempt {}'.format(attempt_id)
77 | try:
78 | vagrant.vagrant_benchmark(attempt_info, database, benchmark)
79 | except Exception, e:
80 | traceback.print_exc()
81 | ## DEF
82 |
83 | if __name__ == "__main__":
84 | args = parse_args()
85 |
86 | if args["action"] == "info":
87 | attempt_info = get_attempt_info(args["catalog"], args["attempt"])
88 | print json.dumps(attempt_info, indent = 4)
89 | elif args["action"] == "deploy":
90 | database = {
91 | 'database': args["db_type"],
92 | 'host': args["db_host"],
93 | 'port': args["db_port"],
94 | 'name': args["db_name"],
95 | 'username': args["db_user"],
96 | 'password': args["db_pass"]
97 | }
98 | benchmark = {
99 | 'num_threads': args["num_threads"],
100 | 'timeout': args["timeout"],
101 | 'size': arg["db_size"]
102 | }
103 | run_attempt_benchmark(args["catalog"], args["attempt"], database, benchmark)
104 | else:
105 | print "Invalid action '%s'" % args["action"]
106 | sys.exit(1)
107 |
108 | ## MAIN
--------------------------------------------------------------------------------
/core/drivers/extract/driver/spiders/form.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import scrapy
4 | from scrapy.spiders import CrawlSpider, Rule
5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
6 |
7 | from driver.items import InputItem, FormItem
8 | from selenium import webdriver
9 |
10 | class FormSpider(CrawlSpider):
11 | name = "form"
12 | allowed_domains = ["127.0.0.1"]
13 |
14 | def __init__(self, *args, **kwargs):
15 | super(FormSpider, self).__init__(*args, **kwargs)
16 |
17 | self.start_urls = [kwargs.get('start_url')]
18 |
19 | follow = True if kwargs.get('follow') == 'true' else False
20 | self.rules = (
21 | Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=follow),
22 | )
23 | super(FormSpider, self)._compile_rules()
24 |
25 | try:
26 | proxy = kwargs.get('proxy')
27 | service_args = [
28 | '--proxy=' + proxy,
29 | '--proxy-type=http',
30 | ]
31 | except:
32 | service_args = None
33 | self.browser = webdriver.PhantomJS(service_args=service_args)
34 |
35 | def closed(self, reason):
36 | self.browser.quit()
37 |
38 | def parse_form(self, response):
39 | register_patterns = ['register', 'signup', 'sign-up', 'sign_up']
40 | if any(pattern in response.url for pattern in register_patterns):
41 | use_browser = True
42 | else:
43 | use_browser = False
44 | for sel in response.xpath('//form'):
45 | if use_browser:
46 | self.browser.get(response.url)
47 | formItem = FormItem()
48 |
49 | formItem['action'] = ''
50 | try:
51 | formItem['action'] = sel.xpath('@action').extract()[0]
52 | except:
53 | pass
54 |
55 | formItem['url'] = response.url
56 |
57 | formItem['method'] = ''
58 | try:
59 | formItem['method'] = sel.xpath('@method').extract()[0].lower()
60 | except:
61 | pass
62 |
63 | formItem['inputs'] = []
64 | for ip in sel.xpath('.//input|.//textarea'):
65 | try:
66 | _id = ip.xpath('@id').extract()[0]
67 | except:
68 | _id = ''
69 | if _id != '':
70 | if use_browser:
71 | input_element = self.browser.find_element_by_id(_id)
72 | if not input_element.is_displayed():
73 | continue
74 | try:
75 | name = ip.xpath('@name').extract()[0]
76 | except:
77 | name = ''
78 | try:
79 | _type = ip.xpath('@type').extract()[0]
80 | except:
81 | _type = 'textarea'
82 | try:
83 | value = ip.xpath('@value').extract()[0]
84 | except:
85 | value = ''
86 | inputItem = InputItem()
87 | inputItem['id'] = _id
88 | inputItem['name'] = name
89 | inputItem['type'] = _type
90 | inputItem['value'] = value
91 | formItem['inputs'].append(inputItem)
92 |
93 | try:
94 | _id = sel.xpath('@id').extract()[0]
95 | except:
96 | _id = ''
97 | try:
98 | _class = sel.xpath('@class').extract()[0]
99 | except:
100 | _class = ''
101 | try:
102 | enctype = sel.xpath('@enctype').extract()[0]
103 | except:
104 | enctype = ''
105 | formItem['id'] = _id
106 | formItem['clazz'] = _class
107 | formItem['enctype'] = enctype
108 |
109 | yield formItem
110 |
111 |
--------------------------------------------------------------------------------
/library/templates/status/attempt_status_codes.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | Success
17 |
18 | The application was successfully deployed and the system was able to run a workload and capture queries.
19 |
20 |
21 |
22 | Download Error
23 |
24 | The system failed to download the application's source code from the on-line repository. This could be a transient network error or the application could have been deleted.
25 |
26 |
27 |
28 | Missing Required Files
29 |
30 | The application did not contain the necessary files that are needed to deploy and run it. This likely means that the application is not the project type that we assumed that it was. This can occur if the crawler mis-idenfies it as a database application.
31 |
32 |
33 |
34 | Missing Dependencies
35 |
36 | The deployer was unable to determine what dependencies are needed to automatically deploy the application.
37 |
38 |
39 |
40 | Database Error
41 |
42 | The application failed to instantiate and synchronize its database properly. This can occur if there are additional manual steps that are needed prior to deployment.
43 |
44 |
45 |
46 | Running Error
47 |
48 | The system was able to install the dependencies needed for the application but then it experienced an unexpected error when trying to actually run it. This could because the application in its current form is broken and is thus not runnable.
49 |
50 |
51 |
52 | No Queries
53 |
54 | The application was successfully deployed but the system was not able to run a workload and capture queries.
55 |
56 |
57 |
58 |
59 |
60 |
61 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/cmudbac/settings_example.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | """
3 | Django settings for cmudbac project.
4 |
5 | For more information on this file, see
6 | https://docs.djangoproject.com/en/1.6/topics/settings/
7 |
8 | For the full list of settings and their values, see
9 | https://docs.djangoproject.com/en/1.6/ref/settings/
10 | """
11 |
12 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
13 | import os
14 | BASE_DIR = os.path.dirname(os.path.dirname(__file__))
15 |
16 | # Quick-start development settings - unsuitable for production
17 | # See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/
18 |
19 | # SECURITY WARNING: keep the secret key used in production secret!
20 | SECRET_KEY = 'CHANGE ME'
21 |
22 | # SECURITY WARNING: don't run with debug turned on in production!
23 | DEBUG = True
24 |
25 | ALLOWED_HOSTS = []
26 |
27 | # Application definition
28 |
29 | INSTALLED_APPS = (
30 | 'django.contrib.admin',
31 | 'django.contrib.auth',
32 | 'django.contrib.contenttypes',
33 | 'django.contrib.sessions',
34 | 'django.contrib.messages',
35 | 'django.contrib.staticfiles',
36 | 'django.contrib.sites',
37 | 'rest_framework',
38 | 'library',
39 | 'blog'
40 | )
41 |
42 | MIDDLEWARE_CLASSES = (
43 | 'django.contrib.sessions.middleware.SessionMiddleware',
44 | 'django.middleware.cache.UpdateCacheMiddleware',
45 | 'django.middleware.common.CommonMiddleware',
46 | 'django.middleware.csrf.CsrfViewMiddleware',
47 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
48 | 'django.contrib.messages.middleware.MessageMiddleware',
49 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
50 | 'django.middleware.cache.FetchFromCacheMiddleware',
51 | )
52 |
53 | ROOT_URLCONF = 'cmudbac.urls'
54 |
55 | WSGI_APPLICATION = 'cmudbac.wsgi.application'
56 |
57 | TEMPLATES = [
58 | {
59 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
60 | 'APP_DIRS': True,
61 | 'OPTIONS': {
62 | 'context_processors': [
63 | 'django.template.context_processors.debug',
64 | 'django.template.context_processors.request',
65 | 'django.contrib.auth.context_processors.auth',
66 | 'django.contrib.messages.context_processors.messages',
67 | 'django.core.context_processors.static',
68 | 'library.context_processors.analytics'
69 | ],
70 | },
71 | },
72 | ]
73 |
74 |
75 | # Database
76 | # https://docs.djangoproject.com/en/1.6/ref/settings/#databases
77 |
78 | DATABASES = {
79 | 'default': {
80 | 'ENGINE': 'django.db.backends.mysql',
81 | 'NAME': 'dbac',
82 | 'HOST': 'localhost',
83 | 'PORT': '3306',
84 | 'USER': 'CHANGE_ME',
85 | 'PASSWORD': 'CHANGE_ME',
86 | 'STORAGE_ENGINE': 'InnoDB'
87 | }
88 | }
89 |
90 | CACHES = {
91 | 'default': {
92 | 'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
93 | 'LOCATION': '/var/tmp/django_cache/cmdbac',
94 | }
95 | }
96 |
97 | LOG_FILE_LOCATION = {
98 | 'mysql': '/var/log/mysql/mysql.log',
99 | 'postgresql': '/var/log/postgresql/postgresql-9.3-main.log'
100 | }
101 |
102 | # Internationalization
103 | # https://docs.djangoproject.com/en/1.6/topics/i18n/
104 |
105 | LANGUAGE_CODE = 'en-us'
106 |
107 | TIME_ZONE = 'America/New_York'
108 |
109 | USE_I18N = True
110 |
111 | USE_L10N = True
112 |
113 | #USE_TZ = True
114 |
115 |
116 | # Static files (CSS, JavaScript, Images)
117 | # https://docs.djangoproject.com/en/1.6/howto/static-files/
118 |
119 | STATIC_URL = '/static/'
120 |
121 | HTTP_PROXY = ''
122 |
123 | REST_FRAMEWORK = {
124 | # Use Django's standard `django.contrib.auth` permissions,
125 | # or allow read-only access for unauthenticated users.
126 | 'DEFAULT_PERMISSION_CLASSES': [
127 | 'rest_framework.permissions.AllowAny'
128 | ],
129 | 'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.PageNumberPagination',
130 | 'PAGE_SIZE': 50
131 | }
132 |
133 | # Google Analytics
134 | GOOGLE_ANALYTICS_KEY = ''
135 |
--------------------------------------------------------------------------------
/analysis/general/analyze_transactions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
5 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "core"))
6 |
7 | import re
8 | import csv
9 | import pickle
10 | from utils import filter_repository, dump_all_stats, pickle_dump
11 |
12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
13 | import django
14 | django.setup()
15 |
16 | from library.models import *
17 |
18 | TRANSACTION_DIRECTORY = 'transactions'
19 |
20 | def action_stats(directory = '.'):
21 | stats = {'action_query_count': {}}
22 |
23 | for repo in Repository.objects.exclude(latest_successful_attempt = None):
24 | if filter_repository(repo):
25 | continue
26 |
27 | project_type_name = repo.project_type.name
28 | if project_type_name not in stats['action_query_count']:
29 | stats['action_query_count'][project_type_name] = []
30 |
31 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
32 | query_count = len(Query.objects.filter(action = action))
33 | if query_count > 0:
34 | stats['action_query_count'][project_type_name].append(query_count)
35 |
36 |
37 | dump_all_stats(directory, stats)
38 |
39 | def transaction_stats(directory = '.'):
40 | stats = {'transaction_count': {}, 'transaction_query_count': {}, 'transaction_read_count': {}, 'transaction_write_count': {}}
41 |
42 | transactions = []
43 |
44 | for repo in Repository.objects.exclude(latest_successful_attempt = None):
45 | if filter_repository(repo):
46 | continue
47 |
48 | project_type_name = repo.project_type.name
49 | if project_type_name not in stats['transaction_count']:
50 | stats['transaction_count'][project_type_name] = []
51 | if project_type_name not in stats['transaction_query_count']:
52 | stats['transaction_query_count'][project_type_name] = []
53 | if project_type_name not in stats['transaction_read_count']:
54 | stats['transaction_read_count'][project_type_name] = []
55 | if project_type_name not in stats['transaction_write_count']:
56 | stats['transaction_write_count'][project_type_name] = []
57 |
58 |
59 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
60 | transaction = ''
61 | query_count = 0
62 | transaction_count = 0
63 |
64 | for query in Query.objects.filter(action = action):
65 | if 'BEGIN' in query.content.upper() or 'START TRANSACTION' in query.content.upper() or 'SET AUTOCOMMIT=0' in query.content.upper():
66 | transaction = query.content + '\n'
67 | query_count = 1
68 | elif transaction != '':
69 | transaction += query.content + '\n'
70 | query_count += 1
71 | if 'COMMIT' in query.content.upper():
72 | transaction = transaction.strip('\n')
73 |
74 | # for each transaction, count the number of transactions
75 | transaction_count += 1
76 |
77 | # for each transaction, count the number of read/write
78 | read_count = len(re.findall('SELECT', transaction.upper()))
79 | stats['transaction_read_count'][project_type_name].append(read_count)
80 | write_count = 0
81 | for keyword in ['INSERT', 'DELETE', 'UPDATE']:
82 | write_count += len(re.findall(keyword, transaction.upper()))
83 | stats['transaction_write_count'][project_type_name].append(write_count)
84 |
85 | # for each transaction, count the queries
86 | query_count -= 2
87 | stats['transaction_query_count'][project_type_name].append(query_count)
88 |
89 | try:
90 | transactions.append((repo.name, repo.project_type.name, transaction))
91 | except:
92 | pass
93 |
94 | transaction = ''
95 |
96 | if transaction_count > 0:
97 | stats['transaction_count'][project_type_name].append(transaction_count)
98 |
99 | pickle_dump(directory, 'transactions', transactions)
100 |
101 | dump_all_stats(directory, stats)
102 |
103 | def main():
104 | # active
105 | action_stats(TRANSACTION_DIRECTORY)
106 | transaction_stats(TRANSACTION_DIRECTORY)
107 |
108 | # working
109 |
110 | # deprecated
111 | if __name__ == '__main__':
112 | main()
113 |
--------------------------------------------------------------------------------
/core/drivers/randomdriver.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import logging
5 | import requests
6 | import re
7 | import traceback
8 | import requests
9 | import mechanize
10 | import random
11 |
12 | from library.models import *
13 | from cmudbac.settings import *
14 | import utils
15 | import extract
16 | import submit
17 | import count
18 | from basedriver import BaseDriver
19 |
20 | ## =====================================================================
21 | ## LOGGING CONFIGURATION
22 | ## =====================================================================
23 | LOG = logging.getLogger()
24 |
25 | MAX_RANDOM_WALK_DEPTH = 5
26 |
27 | ## =====================================================================
28 | ## RANDOM DRIVER
29 | ## =====================================================================
30 | class RandomDriver(BaseDriver):
31 |
32 | def __init__(self, driver):
33 | self.driver = driver
34 | self.start_urls = set(map(lambda url: url['url'], driver.urls))
35 | self.database = self.driver.database
36 | if driver.browser != None:
37 | self.cookiejar = driver.browser._ua_handlers['_cookies'].cookiejar
38 | self.walked_path = set()
39 | self.log_file = driver.log_file
40 |
41 | def new_browser(self, cookiejar = None, url = None):
42 | browser = mechanize.Browser()
43 | if cookiejar != None:
44 | browser.set_cookiejar(self.cookiejar)
45 | browser.set_handle_robots(False)
46 | if url != None:
47 | browser.open(url)
48 | return browser
49 |
50 | def start(self):
51 | self.forms = []
52 | self.urls = []
53 | for url in self.start_urls:
54 | self.random_walk(self.new_browser(self.cookiejar, url))
55 |
56 | def random_walk(self, browser, depth = MAX_RANDOM_WALK_DEPTH):
57 | if depth == 0:
58 | return
59 |
60 | try:
61 | last_line_no = self.check_log()
62 | browser_url = browser.geturl()
63 | cookiejar = browser._ua_handlers['_cookies'].cookiejar
64 |
65 | LOG.info('Walking URL: {}'.format(browser_url))
66 |
67 | forms = list(enumerate(list(browser.forms())))
68 | for idx, form in forms:
69 | key = '{}_{}'.format(browser_url, form.name)
70 | if key in self.walked_path:
71 | continue
72 | self.walked_path.add(key)
73 |
74 | browser.select_form(nr = idx)
75 | form_stats = {
76 | 'url': browser_url,
77 | 'method': form.method,
78 | 'inputs': []
79 | }
80 | for control in form.controls:
81 | if control.type == 'text':
82 | browser[control.name] = submit.gen_random_value()
83 | form_stats['inputs'].append({
84 | 'name': control.name,
85 | 'type': control.type
86 | })
87 | succ = True
88 | try:
89 | traceback.print_exc()
90 | browser.submit()
91 | except:
92 | succ = False
93 |
94 | form_stats['queries'], form_stats['counter'] = self.process_logs(self.check_log(last_line_no), None)
95 |
96 | if all(not self.equal_form(form_stats, ret_form) for ret_form in self.forms):
97 | self.forms.append(form_stats)
98 |
99 | if succ:
100 | self.random_walk(browser, depth - 1)
101 |
102 | browser = self.new_browser(cookiejar, browser_url)
103 |
104 | links = list(browser.links())
105 | for link in links:
106 | key = link.url
107 | if key in self.walked_path:
108 | continue
109 | self.walked_path.add(key)
110 |
111 | url = {
112 | 'url': link.url,
113 | 'queries': [],
114 | 'counter': {}
115 | }
116 |
117 | succ = True
118 | try:
119 | browser.follow_link(link)
120 | except:
121 | traceback.print_exc()
122 | succ = False
123 |
124 | url['queries'], url['counter'] = self.process_logs(self.check_log(last_line_no), None)
125 |
126 | if any(self.equal_url(url, ret_url) for ret_url in self.urls):
127 | continue
128 |
129 | if succ:
130 | self.random_walk(browser, depth - 1)
131 |
132 | browser = self.new_browser(cookiejar, browser_url)
133 |
134 | except:
135 | traceback.print_exc()
--------------------------------------------------------------------------------
/vagrant/bootstrap.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 |
4 | # install the package that sometime needed for deploying django or ruby on rails apps
5 | # continue adding packages to this file if missing some packages may cause common errors
6 |
7 | # use this line if the host is using proxy, and change the proxy
8 | # http_proxy=http://proxy.pdl.cmu.edu:8080
9 |
10 | if [ -n "$http_proxy" ]
11 | then
12 | echo "use proxy: "$http_proxy
13 | echo "export http_proxy=\"$http_proxy\"" >> /home/vagrant/.bashrc
14 | echo "export https_proxy=\"$http_proxy\"" >> /home/vagrant/.bashrc
15 |
16 | export http_proxy="$http_proxy"
17 | export https_proxy="$http_proxy"
18 |
19 | echo "Acquire::http::Proxy \"$http_proxy\";" > /etc/apt/apt.conf
20 | else
21 | echo "not use proxy"
22 | fi
23 |
24 | # The output of all these installation steps is noisy. With this utility
25 | # the progress report is nice and concise.
26 |
27 | function install {
28 | echo Installing $1
29 | shift
30 | apt-get -y install "$@" >/dev/null 2>&1
31 | }
32 |
33 | echo updating package information
34 | install 'apt-repository' software-properties-common python-software-properties
35 | curl --silent --location https://deb.nodesource.com/setup_4.x | sudo bash -
36 | apt-get -y update >/dev/null 2>&1
37 |
38 | install 'development tools' build-essential unzip curl openssl libssl-dev libcurl4-openssl-dev zlib1g zlib1g-dev libgmp-dev
39 | install 'Python' python-dev python-software-properties
40 |
41 | # install Ruby
42 | command curl -sSL https://rvm.io/mpapis.asc | gpg --import -
43 | curl -sSL https://get.rvm.io | bash -s stable
44 | source /usr/local/rvm/scripts/rvm
45 | rvm install 1.9.3
46 | rvm install 2.0.0
47 | rvm install 2.2.2
48 | rvm use 1.9.3 --default
49 | gem install bundler
50 | gem install bundle
51 | rvm use 2.0.0 --default
52 | gem install bundler
53 | gem install bundle
54 | rvm use 2.2.2 --default
55 | gem install bundler
56 | gem install bundle
57 | install 'ruby' ruby-dev
58 |
59 | echo -e "\n- - - - - -\n"
60 | echo -n "Should be sqlite 3.8.1 or higher: sqlite "
61 | sqlite3 --version
62 | echo -n "Should be rvm 1.26.11 or higher: "
63 | rvm --version | sed '/^.*$/N;s/\n//g' | cut -c 1-11
64 | echo -n "Should be ruby 2.2.2: "
65 | ruby -v | cut -d " " -f 2
66 | echo -n "Should be Rails 4.2.1 or higher: "
67 | rails -v
68 | echo -e "\n- - - - - -\n"
69 |
70 | # install pip
71 | wget https://bootstrap.pypa.io/get-pip.py -O /home/vagrant/get-pip.py
72 | python /home/vagrant/get-pip.py
73 | echo 'export PYTHONUSERBASE="/home/vagrant/pip"' >> /home/vagrant/.bashrc
74 |
75 | # install Beautifulsoup
76 | echo installing Beautifulsoup
77 | pip install BeautifulSoup4
78 |
79 | # install Django
80 | echo installing Djano
81 | pip install django==1.8.6
82 |
83 | # install dependencies
84 | install 'Git' git
85 | git config --global http.proxy $http_proxy
86 |
87 | install 'SQLite' sqlite3 libsqlite3-dev
88 |
89 | install 'PostgreSQL' postgresql postgresql-contrib libpq-dev
90 | sudo -u postgres psql -U postgres -d postgres -c "alter user postgres with password 'postgres';"
91 | pip install psycopg2
92 |
93 | debconf-set-selections <<< "mysql-server mysql-server/root_password password root"
94 | debconf-set-selections <<< "mysql-server mysql-server/root_password_again password root"
95 | install 'MySQL' mysql-server libmysqlclient-dev
96 | pip install MySQL-python
97 | # mysql -u root --password=root -e "CREATE DATABASE vm"
98 |
99 | install 'Nodejs' nodejs
100 |
101 | install 'Nokogiri dependencies' libxml2 libxml2-dev libxslt1-dev imagemagick libmagickwand-dev
102 |
103 | # install scrapy
104 | echo installing scrapy
105 | pip install scrapy
106 |
107 | # web and env
108 | pip install mechanize
109 | pip install python-dateutil
110 | pip install virtualenv
111 | pip install hurry.filesize
112 | pip install selenium
113 | install 'phantomjs' phantomjs
114 | install 'firefox' firefox=28.0+build2-0ubuntu2
115 | install 'xvfb' xvfb
116 | pip install pyvirtualdisplay
117 | pip install djangorestframework
118 | pip install pinax-blog
119 | pip install pytz
120 |
121 | # install php
122 | install 'php' apache2 php5-mysql libapache2-mod-php5 mysql-server php5-dev php5-gd php5-curl php5-pgsql php5-sqlite
123 |
124 | # install drush
125 | wget http://files.drush.org/drush.phar
126 | php drush.phar core-status
127 | chmod +x drush.phar
128 | mv drush.phar /usr/local/bin/drush
129 | drush init
130 | drush dl php_server-7.x
131 |
132 | # Fix Dependencies
133 | apt-get -f -y install >/dev/null 2>&1
134 |
135 | # Needed for docs generation.
136 | update-locale LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8 LC_ALL=en_US.UTF-8
137 |
138 | pip install -r requirements.txt
139 |
140 | # configure MySQL logging
141 | mysql -u root --password=root -e "SET GLOBAL general_log = 'ON';"
142 | mysql -u root --password=root -e "SET GLOBAL general_log_file = '/var/log/mysql/mysql.log';"
143 | mysql -u root --password=root -e "SELECT 1;"
144 | chmod 777 /var/log/mysql/mysql.log
145 |
146 | echo 'all set, rock on!'
147 |
--------------------------------------------------------------------------------
/core/analyzers/postgresqlanalyzer.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import logging
5 | import re
6 |
7 | from baseanalyzer import BaseAnalyzer
8 |
9 | ## =====================================================================
10 | ## LOGGING CONFIGURATION
11 | ## =====================================================================
12 | LOG = logging.getLogger()
13 |
14 | ## =====================================================================
15 | ## POSTGRESQL ANALYZER
16 | ## =====================================================================
17 | class PostgreSQLAnalyzer(BaseAnalyzer):
18 |
19 | def __init__(self, deployer):
20 | BaseAnalyzer.__init__(self, deployer)
21 |
22 | def analyze_queries(self, queries):
23 | self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0)
24 |
25 | try:
26 | conn = self.deployer.get_database_connection()
27 | conn.set_isolation_level(0)
28 | cur = conn.cursor()
29 |
30 | for query in queries:
31 | try:
32 | if self.is_valid_for_explain(query['raw']):
33 | explain_query = 'EXPLAIN ANALYZE {};'.format(query['raw'])
34 | # print explain_query
35 | cur.execute(explain_query)
36 | rows = cur.fetchall()
37 | output = '\n'
38 | for row in rows:
39 | output += row[0] + '\n'
40 | query['explain'] = output
41 | except Exception, e:
42 | pass
43 | # LOG.exception(e)
44 |
45 | conn.set_isolation_level(1)
46 | cur.close()
47 | conn.close()
48 | except Exception, e:
49 | LOG.exception(e)
50 |
51 | def analyze_database(self):
52 | try:
53 | conn = self.deployer.get_database_connection()
54 | cur = conn.cursor()
55 | database = self.deployer.get_database_name()
56 |
57 | # the number of tables
58 | cur.execute("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';")
59 | self.database_stats['num_tables'] = int(cur.fetchone()[0])
60 |
61 | # the number of indexes
62 | cur.execute("SELECT COUNT(*) FROM pg_stat_all_indexes WHERE schemaname = 'public';")
63 | self.database_stats['num_indexes'] = int(cur.fetchone()[0])
64 |
65 | # the number of constraints
66 | cur.execute("SELECT COUNT(*) FROM information_schema.table_constraints WHERE constraint_schema = 'public';")
67 | self.database_stats['num_constraints'] = int(cur.fetchone()[0])
68 |
69 | # the number of foreign keys
70 | cur.execute("SELECT COUNT(*) FROM information_schema.referential_constraints WHERE constraint_schema = 'public';")
71 | self.database_stats['num_foreignkeys'] = int(cur.fetchone()[0])
72 |
73 | # the full information of tables
74 | cur.execute("SELECT * FROM information_schema.tables WHERE table_schema = 'public';")
75 | self.database_informations['tables'] = str(cur.fetchall())
76 |
77 | # the full information of columns
78 | cur.execute("SELECT * FROM information_schema.columns WHERE table_schema = 'public';")
79 | self.database_informations['columns'] = str(cur.fetchall())
80 |
81 | # the full information of indexes
82 | cur.execute("SELECT * FROM pg_stat_all_indexes WHERE schemaname = 'public';")
83 | self.database_informations['indexes'] = str(cur.fetchall())
84 |
85 | # the full information of constraints
86 | cur.execute("SELECT * FROM information_schema.table_constraints WHERE constraint_schema = 'public';")
87 | self.database_informations['constraints'] = str(cur.fetchall())
88 |
89 | # the full information of constraints
90 | cur.execute("SELECT * FROM information_schema.key_column_usage WHERE constraint_schema = 'public';")
91 | self.database_informations['key_column_usage'] = str(cur.fetchall())
92 |
93 | # the full information of foreign keys
94 | cur.execute("SELECT * FROM information_schema.referential_constraints WHERE constraint_schema = 'public';")
95 | self.database_informations['foreignkeys'] = str(cur.fetchall())
96 |
97 | # the full information of triggers
98 | cur.execute("SELECT * FROM information_schema.triggers WHERE trigger_schema = 'public';")
99 | self.database_informations['triggers'] = str(cur.fetchall())
100 |
101 | # the full information of views
102 | cur.execute("SELECT * FROM information_schema.views WHERE table_schema = 'public';")
103 | self.database_informations['views'] = str(cur.fetchall())
104 |
105 | cur.close()
106 | conn.close()
107 | except Exception, e:
108 | LOG.exception(e)
--------------------------------------------------------------------------------
/core/scripts/vagrant_benchmark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
5 |
6 | import argparse
7 | import datetime
8 | import socket
9 | import traceback
10 | import time
11 | import logging
12 | import json
13 | from multiprocessing import Process, Queue
14 |
15 | from deployers import *
16 | from drivers import *
17 | from analyzers import *
18 | import utils
19 |
20 | ## =====================================================================
21 | ## LOGGING CONFIGURATION
22 | ## =====================================================================
23 | LOG = logging.getLogger()
24 |
25 | def run_driver(driver, timeout, size, queue):
26 | cnt = 0
27 | start_time = time.time()
28 | stop_time = start_time + timeout
29 | new_driver = BenchmarkDriver(driver)
30 | try:
31 | while True:
32 | cnt += new_driver.submit_actions()
33 | if time.time() >= stop_time or get_database_size() >= size:
34 | break
35 | queue.put(cnt)
36 | except Exception, e:
37 | traceback.print_exc()
38 | queue.put(cnt)
39 |
40 | def get_database_size(deployer):
41 | deployer.database = Database()
42 | deployer.database.name = 'MySQL'
43 | conn = deployer.get_database_connection(False)
44 | cur = conn.cursor()
45 | cur.execute('''
46 | SELECT Round(SUM(data_length + index_length) / 1024 / 1024, 1)
47 | FROM information_schema.tables
48 | WHERE table_schema = '{}'
49 | '''.format(deployer.database_config['name']))
50 | size = cur.fetchone()[0]
51 | return size
52 |
53 | def main():
54 | # parse args
55 | parser = argparse.ArgumentParser()
56 | parser.add_argument('--attempt_info', type=str)
57 | parser.add_argument('--deploy_id', type=int)
58 | parser.add_argument('--database', type=str)
59 | parser.add_argument('--host', type=str)
60 | parser.add_argument('--port', type=int)
61 | parser.add_argument('--name', type=str)
62 | parser.add_argument('--username', type=str)
63 | parser.add_argument('--password', type=str)
64 | parser.add_argument('--num_threads', type=int)
65 | parser.add_argument('--timeout', type=int)
66 | parser.add_argument('--size', type=int)
67 | args = parser.parse_args()
68 |
69 | # get args
70 | with open(args.attempt_info, 'r') as attempt_info_file:
71 | attempt_info = json.loads(attempt_info_file.read())
72 | deploy_id = args.deploy_id
73 | database_config = {
74 | 'database': args.database,
75 | 'host': args.host,
76 | 'port': args.port,
77 | 'name': args.name,
78 | 'username': args.username,
79 | 'password': args.password
80 | }
81 | num_threads = args.num_threads
82 | timeout = args.timeout
83 | size = args.size
84 |
85 | # get deployer
86 | project_type = attempt_info['repo_info']['project_type']
87 | deployer_class = {
88 | 1: 'DjangoDeployer',
89 | 2: 'RoRDeployer',
90 | 3: 'NodeDeployer',
91 | 4: 'DrupalDeployer',
92 | 5: 'GrailsDeployer'
93 | }[project_type]
94 |
95 | moduleName = "deployers.%s" % (deployer_class.lower())
96 | moduleHandle = __import__(moduleName, globals(), locals(), [deployer_class])
97 | klass = getattr(moduleHandle, deployer_class)
98 |
99 | deployer = klass(None, None, deploy_id, database_config)
100 |
101 | result = deployer.deploy(attempt_info)
102 | if result != 0:
103 | deployer.kill_server()
104 | sys.exit(-1)
105 |
106 | LOG.info('Running driver ...')
107 | driver = BaseDriver(deployer.get_main_url(), deployer.get_database(), deployer.deploy_id, deployer.base_path, deployer.log_file)
108 | try:
109 | driver.bootstrap()
110 | driver.initialize()
111 | except Exception, e:
112 | traceback.print_exc()
113 |
114 | LOG.info('Start Driving the Database ...')
115 | actions_cnt = 0
116 | processes = []
117 | try:
118 | # disable logging of requests
119 | logging.getLogger("requests").setLevel(logging.WARNING)
120 | logging.getLogger("urllib3").setLevel(logging.WARNING)
121 | # multi-processing
122 | queue = Queue()
123 | for _ in range(num_threads):
124 | process = Process(target = run_driver, args = (driver, timeout, size, queue))
125 | processes.append(process)
126 | process.start()
127 | for process in processes:
128 | process.join()
129 | for _ in range(num_threads):
130 | actions_cnt += queue.get()
131 | except Exception, e:
132 | traceback.print_exc()
133 |
134 | LOG.info('The number of actions submitted : {}'.format(actions_cnt))
135 |
136 | # kill server
137 | deployer.kill_server()
138 |
139 | # analyze
140 | LOG.info('Analyzing queries ...')
141 | analyzer = get_analyzer(deployer)
142 | for form, _ in driver.forms:
143 | analyzer.analyze_queries(form['queries'])
144 | for url in driver.urls:
145 | analyzer.analyze_queries(url['queries'])
146 | LOG.info(analyzer.queries_stats)
147 |
148 | # extract database info
149 | LOG.info('Extracting database info ...')
150 | analyzer.analyze_database()
151 | LOG.info(analyzer.database_stats)
152 |
153 | LOG.info('Database Size : {} '.format(get_database_size(deployer)))
154 |
155 | LOG.info('Finishing ...')
156 |
157 | if __name__ == "__main__":
158 | main()
159 |
--------------------------------------------------------------------------------
/library/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
{% block title %}{% endblock %}Carnegie Mellon Database Application Catalog (CMDBAC)
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
30 |
31 |
32 |
33 |
34 | {% load active_page %}
35 |
36 |
37 |
38 |
39 |
40 |
49 |
50 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | {% block header %}{% endblock %}
76 |
77 |
78 |
79 |
80 | {% block main %}{% endblock %}
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
93 |
101 |
102 |
103 |
104 |
105 |
106 |
108 |
109 |
110 |
111 | {{ analytics_code }}
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/core/analyzers/mysqlanalyzer.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import logging
5 | import datetime
6 | import traceback
7 |
8 | from baseanalyzer import BaseAnalyzer
9 |
10 | ## =====================================================================
11 | ## LOGGING CONFIGURATION
12 | ## =====================================================================
13 | LOG = logging.getLogger()
14 |
15 | ## =====================================================================
16 | ## MYSQL ANALYZER
17 | ## =====================================================================
18 | class MySQLAnalyzer(BaseAnalyzer):
19 |
20 | def __init__(self, deployer):
21 | BaseAnalyzer.__init__(self, deployer)
22 |
23 | def analyze_queries(self, queries):
24 | self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0)
25 |
26 | try:
27 | conn = self.deployer.get_database_connection()
28 | cur = conn.cursor()
29 |
30 | for query in queries:
31 | try:
32 | if self.is_valid_for_explain(query['raw']):
33 | explain_query = 'EXPLAIN {};'.format(query['raw'])
34 | # print explain_query
35 | cur.execute(explain_query)
36 | rows = cur.fetchall()
37 | output = '\n'
38 | for row in rows:
39 | output += str(row) + '\n'
40 | query['explain'] = output
41 | except Exception, e:
42 | pass
43 | # LOG.exception(e)
44 |
45 | for query in queries:
46 | try:
47 | if self.is_valid_for_explain(query['raw']):
48 | cur.execute(query['raw'])
49 | cur.fetchall()
50 |
51 | stats_query = 'SHOW SESSION STATUS;'
52 | # print explain_query
53 | cur.execute(stats_query)
54 | rows = cur.fetchall()
55 | output = '\n'
56 | for row in rows:
57 | output += str(row) + '\n'
58 | query['stats'] = output
59 | except Exception, e:
60 | # traceback.print_exc()
61 | pass
62 | # pass
63 | # LOG.exception(e)
64 |
65 | cur.close()
66 | conn.close()
67 | except Exception, e:
68 | LOG.exception(e)
69 |
70 | def analyze_database(self):
71 | try:
72 | conn = self.deployer.get_database_connection()
73 | cur = conn.cursor()
74 | database = self.deployer.get_database_name()
75 |
76 | # the number of tables
77 | cur.execute("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '{}';".format(database))
78 | self.database_stats['num_tables'] = int(cur.fetchone()[0])
79 |
80 | # the number of indexes
81 | cur.execute("SELECT COUNT(DISTINCT table_name, index_name) FROM information_schema.statistics WHERE table_schema = '{}';".format(database))
82 | self.database_stats['num_indexes'] = int(cur.fetchone()[0])
83 |
84 | # the number of constraints
85 | cur.execute("SELECT COUNT(*) FROM information_schema.table_constraints WHERE constraint_schema = '{}';".format(database))
86 | self.database_stats['num_constraints'] = int(cur.fetchone()[0])
87 |
88 | # the number of foreign keys
89 | cur.execute("SELECT COUNT(*) FROM information_schema.referential_constraints WHERE constraint_schema = '{}';".format(database))
90 | self.database_stats['num_foreignkeys'] = int(cur.fetchone()[0])
91 |
92 | # the full information of tables
93 | cur.execute("SELECT * FROM information_schema.tables WHERE table_schema = '{}';".format(database))
94 | self.database_informations['tables'] = str(cur.fetchall())
95 |
96 | # the full information of columns
97 | cur.execute("SELECT * from INFORMATION_SCHEMA.columns WHERE table_schema = '{}';".format(database))
98 | self.database_informations['columns'] = str(cur.fetchall())
99 |
100 | # the full information of indexes
101 | cur.execute("SELECT * FROM information_schema.statistics WHERE table_schema = '{}';".format(database))
102 | self.database_informations['indexes'] = str(cur.fetchall())
103 |
104 | # the full information of constraints
105 | cur.execute("SELECT * FROM information_schema.table_constraints WHERE constraint_schema = '{}';".format(database))
106 | self.database_informations['constraints'] = str(cur.fetchall())
107 |
108 | # the full information of constraints
109 | cur.execute("SELECT * FROM information_schema.key_column_usage WHERE constraint_schema = '{}';".format(database))
110 | self.database_informations['key_column_usage'] = str(cur.fetchall())
111 |
112 | # the full information of foreign keys
113 | cur.execute("SELECT * FROM information_schema.referential_constraints WHERE constraint_schema = '{}';".format(database))
114 | self.database_informations['foreignkeys'] = str(cur.fetchall())
115 |
116 | # the full information of triggers
117 | cur.execute("SELECT * FROM information_schema.triggers WHERE trigger_schema = '{}';".format(database))
118 | self.database_informations['triggers'] = str(cur.fetchall())
119 |
120 | # the full information of views
121 | cur.execute("SELECT * FROM information_schema.views WHERE table_schema = '{}';".format(database))
122 | self.database_informations['views'] = str(cur.fetchall())
123 |
124 | cur.close()
125 | conn.close()
126 | except Exception, e:
127 | LOG.exception(e)
--------------------------------------------------------------------------------
/analysis/foreign/foreign.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
5 |
6 | import re
7 | import csv
8 | import numpy as np
9 | import sqlparse
10 | import traceback
11 | from utils import filter_repository, dump_all_stats, pickle_dump
12 |
13 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
14 | import django
15 | django.setup()
16 |
17 | from library.models import *
18 |
19 | def foreign_key_stats(directory = '.'):
20 | stats = {'foreign_key_count': {}, 'foreign_key_type': {}}
21 |
22 | for repo in Repository.objects.exclude(latest_successful_attempt = None):
23 | if filter_repository(repo):
24 | continue
25 |
26 | project_type_name = repo.project_type.name
27 | if project_type_name not in stats['foreign_key_count']:
28 | stats['foreign_key_count'][project_type_name] = []
29 | if project_type_name not in stats['foreign_key_type']:
30 | stats['foreign_key_type'][project_type_name] = {}
31 | if 0:
32 | if project_type_name not in stats['join_key_constraint']:
33 | stats['join_key_constraint'][project_type_name] = {}
34 |
35 | informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'columns')
36 | column_map = {}
37 | if len(informations) > 0:
38 | information = informations[0]
39 | if repo.latest_successful_attempt.database.name == 'PostgreSQL':
40 | regex = '(\(.*?\))[,\]]'
41 | elif repo.latest_successful_attempt.database.name == 'MySQL':
42 | regex = '(\(.*?\))[,\)]'
43 |
44 | for column in re.findall(regex, information.description):
45 | cells = column.split(',')
46 | table = str(cells[2]).replace("'", "").strip()
47 | name = str(cells[3]).replace("'", "").strip()
48 | _type = str(cells[7]).replace("'", "").strip()
49 | column_map[table + '.' + name] = _type
50 | column_map[name] = _type
51 |
52 | key_column_usage_informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'key_column_usage')
53 | constraint_informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'constraints')
54 | constraint_map = {}
55 | if len(key_column_usage_informations) > 0 and len(constraint_informations) > 0:
56 | if repo.latest_successful_attempt.database.name == 'PostgreSQL':
57 | regex = '(\(.*?\))[,\]]'
58 | elif repo.latest_successful_attempt.database.name == 'MySQL':
59 | regex = '(\(.*?\))[,\)]'
60 |
61 | merge_map = {}
62 | key_column_usage_information = key_column_usage_informations[0]
63 | for column in re.findall(regex, key_column_usage_information.description):
64 | cells = column.split(',')
65 | constraint_name = str(cells[2]).replace("'", "").strip()
66 | table_name = str(cells[5]).replace("'", "").strip()
67 | column_name = str(cells[6]).replace("'", "").strip()
68 | merge_map_key = table_name + '.' + constraint_name
69 | if merge_map_key in merge_map:
70 | merge_map[merge_map_key].append(column_name)
71 | else:
72 | merge_map[merge_map_key] = [column_name]
73 |
74 | constraint_information = constraint_informations[0]
75 | for column in re.findall(regex, constraint_information.description):
76 | cells = column.split(',')
77 | constraint_name = str(cells[2]).replace("'", "").strip()
78 | if repo.latest_successful_attempt.database.name == 'PostgreSQL':
79 | table_name = str(cells[5]).replace("'", "").strip()
80 | constraint_type = str(cells[6]).replace("'", "").strip()
81 | elif repo.latest_successful_attempt.database.name == 'MySQL':
82 | table_name = str(cells[4]).replace("'", "").strip()
83 | constraint_type = str(cells[5])[:-1].replace("'", "").strip()
84 | merge_map_key = table_name + '.' + constraint_name
85 | if merge_map_key in merge_map:
86 | for column_name in merge_map[merge_map_key]:
87 | constraint_map[table_name + '.' + column_name] = constraint_type
88 | constraint_map[column_name] = constraint_type
89 |
90 | if constraint_type == 'FOREIGN KEY':
91 | _type = column_map[table_name + '.' + column_name]
92 | stats['foreign_key_type'][project_type_name][_type] = stats['foreign_key_type'][project_type_name].get(_type, 0) + 1
93 |
94 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
95 | queries = Query.objects.filter(action = action)
96 | foreign_key_count = 0
97 |
98 | for query in queries:
99 | parsed = sqlparse.parse(query.content)[0]
100 | tokens = parsed.tokens
101 |
102 | for token in tokens:
103 | if isinstance(token, sqlparse.sql.Identifier):
104 | token_name = token.value.replace('"', '').replace('`', '')
105 | if token_name in constraint_map:
106 | constraint = constraint_map[token_name]
107 | if constraint == 'FOREIGN KEY':
108 | foreign_key_count += 1
109 |
110 | for explain in Explain.objects.filter(query = query):
111 | if 'FOREIGN' in explain.output:
112 | print explain.output
113 |
114 | stats['foreign_key_count'][project_type_name].append(foreign_key_count)
115 |
116 | dump_all_stats(directory, stats)
117 |
118 | def main():
119 | foreign_key_stats()
120 |
121 | if __name__ == '__main__':
122 | main()
123 |
--------------------------------------------------------------------------------
/core/crawlers/drupalcrawler.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import time
5 | import re
6 | import urllib
7 | import urllib2
8 | import logging
9 | import urlparse
10 | import requests
11 | from bs4 import BeautifulSoup
12 | from datetime import datetime
13 | import traceback
14 |
15 | from basecrawler import BaseCrawler
16 | from library.models import *
17 | import utils
18 |
19 | ## =====================================================================
20 | ## LOGGING CONFIGURATION
21 | ## =====================================================================
22 |
23 | LOG = logging.getLogger(__name__)
24 | LOG_handler = logging.StreamHandler()
25 | LOG_formatter = logging.Formatter(fmt='%(asctime)s [%(filename)s:%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s',
26 | datefmt='%m-%d-%Y %H:%M:%S')
27 | LOG_handler.setFormatter(LOG_formatter)
28 | LOG.addHandler(LOG_handler)
29 | LOG.setLevel(logging.INFO)
30 |
31 | ## =====================================================================
32 | ## DRUPAL CONFIGURATION
33 | ## =====================================================================
34 | BASE_URL = 'https://www.drupal.org/project/{name}'
35 | COMMIT_URL = 'https://www.drupal.org/node/{sha}'
36 | SEARCH_URL = 'https://www.drupal.org/project/project_distribution'
37 | DRUPAL_HOST = 'https://www.drupal.org'
38 | DRUPAL_SLEEP = 1
39 |
40 | ## =====================================================================
41 | ## DRUPAL CRAWLER
42 | ## =====================================================================
43 | class DrupalCrawler(BaseCrawler):
44 | def __init__(self, crawlerStatus, auth):
45 | BaseCrawler.__init__(self, crawlerStatus)
46 | ## DEF
47 |
48 | def next_url(self):
49 | # Check whether there is a next url that we need to load
50 | # from where we left off from our last run\
51 | if not self.crawlerStatus.next_url is None and not self.crawlerStatus.next_url == '':
52 | return self.crawlerStatus.next_url
53 |
54 | # Otherwise, compute what the next page we want to load
55 | return SEARCH_URL
56 | ## DEF
57 |
58 | def search(self):
59 | # Load and parse!
60 | response = utils.query(self.next_url())
61 | soup = BeautifulSoup(response.text)
62 | titles = soup.find_all(class_='node-project-distribution')
63 | LOG.info("Found %d repositories" % len(titles))
64 |
65 | # Pick through the results and find repos
66 | for title in titles:
67 | name = title.contents[1].contents[0]['href'].split('/')[2]
68 | try:
69 | self.add_repository(name)
70 | except:
71 | traceback.print_exc()
72 | # Sleep for a little bit to prevent us from getting blocked
73 | time.sleep(DRUPAL_SLEEP)
74 | ## FOR
75 |
76 | # Figure out what is the next page that we need to load
77 | try:
78 | next_page = soup.find(class_='pager-next').contents[0]
79 | except:
80 | next_page = None
81 | if not next_page or not next_page.has_attr('href'):
82 | LOG.info("No next page link found!")
83 | self.crawlerStatus.next_url = None
84 | else:
85 | self.crawlerStatus.next_url = DRUPAL_HOST + next_page['href']
86 |
87 | # Make sure we update our crawler status
88 | LOG.info("Updating status for %s" % self.crawlerStatus)
89 | self.crawlerStatus.save()
90 |
91 | return
92 | ## DEF
93 |
94 | def get_api_data(self, name):
95 | data = {}
96 | data['url'] = self.crawlerStatus.source.get_url(name)
97 | response = requests.get(data['url'])
98 | soup = BeautifulSoup(response.text)
99 | data['time'] = soup.find('time').attrs['datetime']
100 | return data
101 | # DEF
102 |
103 | def add_repository(self, name, setup_scripts = None):
104 | if Repository.objects.filter(name='drupal/' + name, source=self.crawlerStatus.source).exists():
105 | LOG.info("Repository '%s' already exists" % name)
106 | else:
107 | api_data = self.get_api_data(name)
108 |
109 | # Create the new repository
110 | repo = Repository()
111 | repo.name = 'drupal/' + name
112 | repo.source = self.crawlerStatus.source
113 | repo.project_type = self.crawlerStatus.project_type
114 | repo.last_attempt = None
115 | repo.created_at = datetime.fromtimestamp(int(api_data['time'])).strftime("%Y-%m-%d %H:%M:%S")
116 | repo.updated_at = repo.created_at
117 | repo.pushed_at = repo.created_at
118 | repo.homepage = api_data['url']
119 | repo.size = -1
120 | repo.stargazers_count = -1
121 | repo.watchers_count = -1
122 | repo.language = 'PHP'
123 | repo.forks_count = -1
124 | repo.open_issues_count = -1
125 | repo.default_branch = 'master'
126 | repo.network_count = -1
127 | repo.subscribers_count = -1
128 | repo.commits_count = -1
129 | repo.branches_count = -1
130 | repo.releases_count = -1
131 | repo.contributors_count = -1
132 | repo.setup_scripts = setup_scripts
133 | repo.save()
134 | LOG.info("Successfully created new repository '%s' [%d]" % (repo, repo.id))
135 | ## IF
136 | # DEF
137 |
138 | def get_latest_sha(self, repo_name):
139 | url = BASE_URL.format(name = repo_name)
140 | response = utils.query(url)
141 | data = response.text
142 | results = re.findall(COMMIT_URL.format(sha='(\d+)'), data)
143 | return results[1]
144 | # DEF
145 |
146 | def download_repository(self, repo_name, sha, zip_name):
147 | url = BASE_URL.format(name = repo_name)
148 | response = utils.query(url)
149 | data = response.text
150 | download_url = re.search('https://[^ ]*?\.zip', data).group(0)
151 |
152 | response = utils.query(download_url)
153 | zip_file = open(zip_name, 'wb')
154 | for chunk in response.iter_content(chunk_size=1024):
155 | if chunk:
156 | zip_file.write(chunk)
157 | zip_file.flush()
158 | zip_file.close()
159 | # DEF
--------------------------------------------------------------------------------
/core/deployers/nodedeployer.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import logging
5 | import re
6 | import time
7 |
8 | from basedeployer import BaseDeployer
9 | from library.models import *
10 | import utils
11 |
12 | ## =====================================================================
13 | ## LOGGING CONFIGURATION
14 | ## =====================================================================
15 | LOG = logging.getLogger()
16 |
17 | ## =====================================================================
18 | ## SETTINGS
19 | ## =====================================================================
20 |
21 |
22 | ## =====================================================================
23 | ## NODE.JS DEPLOYER
24 | ## =====================================================================
25 | class NodeDeployer(BaseDeployer):
26 | def __init__(self, repo, database, deploy_id, database_config = None):
27 | BaseDeployer.__init__(self, repo, database, deploy_id, database_config)
28 | if database_config == None:
29 | self.database_config['name'] = 'node_app' + str(deploy_id)
30 | self.main_filename = None
31 | ## DEF
32 |
33 | def configure_settings(self, path):
34 | utils.replace_files_regex(path, "mysql\.createConnection\({.*?}.*?\);",
35 | """mysql.createConnection({{
36 | host : '{host}',
37 | port : '{port}',
38 | user : '{user}',
39 | password : '{password}',
40 | database : '{database}'
41 | }});
42 | """.format(host=self.database_config['host'], port=self.database_config['port'],
43 | user=self.database_config['username'],password=self.database_config['password'],
44 | database=self.database_config['name']))
45 | ## DEF
46 |
47 | def install_requirements(self, path):
48 | if path:
49 | command = '{} && npm install'.format(utils.cd(path))
50 | out = utils.run_command(command)
51 | if out[1] == '':
52 | return out[2]
53 | else:
54 | return out[1]
55 | return ''
56 | ## DEF
57 |
58 | def get_main_url(self):
59 | return 'http://127.0.0.1:{}/'.format(self.port)
60 | ## DEF
61 |
62 | def sync_server(self, path):
63 | pass
64 | ## DEF
65 |
66 | def run_server(self, path):
67 | self.configure_network()
68 | LOG.info('Running server ...')
69 | command = '{} && node {}'.format(
70 | utils.cd(path), self.main_filename)
71 | return utils.run_command_async(command)
72 | ## DEF
73 |
74 | def get_runtime(self):
75 | out = utils.run_command('node -v')
76 | return {
77 | 'executable': 'node',
78 | 'version': out[1][1:]
79 | }
80 | ## DEF
81 |
82 | def find_port(self):
83 | out = utils.run_command('netstat -nlp | grep -i "node"')
84 | port = re.search('0 :::(\d+)', out[1])
85 | if port:
86 | self.port = port.group(1)
87 |
88 | def create_tables(self, deploy_path):
89 | executed = False
90 | sql_files = utils.search_file_regex(deploy_path, '.*\.sql')
91 | conn = self.get_database_connection()
92 | cur = conn.cursor()
93 | for sql_file in sql_files:
94 | executed = True
95 | for statement in open(sql_file).read().split(';'):
96 | try:
97 | cur.execute(statement)
98 | except Exception, e:
99 | print statement
100 | LOG.exception(e)
101 | if self.database.name == 'MySQL':
102 | conn.commit()
103 | return executed
104 |
105 | def try_deploy(self, deploy_path):
106 | LOG.info('Configuring settings ...')
107 | self.kill_server()
108 | self.clear_database()
109 | self.configure_settings(deploy_path)
110 | self.runtime = self.get_runtime()
111 | LOG.info(self.runtime)
112 |
113 | self.attempt.database = self.get_database()
114 | LOG.info('Database: ' + self.attempt.database.name)
115 |
116 | LOG.info('Create Tables ...')
117 | try:
118 | if not self.create_tables(deploy_path):
119 | LOG.error('No sql file found!')
120 | return ATTEMPT_STATUS_MISSING_REQUIRED_FILES
121 | except Exception, e:
122 | LOG.exception(e)
123 |
124 | LOG.info('Installing requirements ...')
125 | out = self.install_requirements(deploy_path)
126 | lines = out.split('\n')
127 | packages = {}
128 | for line in lines:
129 | s = re.search('(.+?)@([0-9\.]+)', line)
130 | if s:
131 | name, version = s.group(1), s.group(2)
132 | name = name.split(' ')[-1]
133 | packages[name] = version
134 |
135 | for name, version in packages.iteritems():
136 | try:
137 | pkg, created = Package.objects.get_or_create(name=name, version=version, project_type=self.repo.project_type)
138 | self.packages_from_file.append(pkg)
139 | except Exception, e:
140 | LOG.exception(e)
141 |
142 | self.run_server(deploy_path)
143 | time.sleep(5)
144 |
145 | self.find_port()
146 |
147 | attemptStatus = self.check_server()
148 |
149 | return attemptStatus
150 | ## DEF
151 |
152 | def deploy_repo_attempt(self, deploy_path):
153 | package_jsons = utils.search_file(deploy_path, 'package.json')
154 | if not package_jsons:
155 | LOG.error('No package.json found!')
156 | return ATTEMPT_STATUS_MISSING_REQUIRED_FILES
157 | base_dir = sorted([os.path.dirname(package_json) for package_json in package_jsons])[0]
158 |
159 | for main_filename in ['server.js', 'app.js', 'main.js']:
160 | if utils.search_file_norecur(base_dir, main_filename):
161 | self.main_filename = main_filename
162 | break
163 | if self.main_filename == None:
164 | LOG.error('No main file found!')
165 | return ATTEMPT_STATUS_MISSING_REQUIRED_FILES
166 |
167 | self.setting_path = base_dir
168 |
169 | return self.try_deploy(base_dir)
170 | ## DEF
171 |
172 | ## CLASS
--------------------------------------------------------------------------------
/core/drivers/submit/submit.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
3 |
4 | import mechanize
5 | import cookielib
6 | import string
7 | import random
8 | import traceback
9 | import requests
10 | import urlparse
11 | from bs4 import BeautifulSoup
12 |
13 | from patterns import patterns, match_any_pattern
14 | import extract
15 |
16 | def get_form_index(br, form):
17 | index = 0
18 | for f in br.forms():
19 | equal = True
20 | form['class'] = form['clazz']
21 | for name, value in form.iteritems():
22 | if name in f.attrs:
23 | if str(f.attrs[name]).lower() != str(value).lower():
24 | equal = False
25 | break
26 | if equal:
27 | break
28 | index = index + 1
29 | return index
30 |
31 | def submit_form(form, inputs, br = None):
32 | if br == None:
33 | br = mechanize.Browser()
34 | cj = cookielib.LWPCookieJar()
35 | br.set_cookiejar(cj)
36 | br.set_handle_robots(False)
37 |
38 | br.open(form['url'].encode("ascii","ignore"))
39 | br.select_form(nr=get_form_index(br, form))
40 |
41 | for input in form['inputs']:
42 | if input['name'] in inputs:
43 | try:
44 | if br.find_control(name = input['name'], type = input['type']) == None:
45 | continue
46 | if input['type'] == 'file':
47 | filename = inputs[input['name']]['filename']
48 | upload_filename = os.path.basename(filename)
49 | mime_type = inputs[input['name']]['mime_type']
50 | br.form.add_file(open(filename), mime_type, upload_filename, name = input['name'])
51 | br.form.set_all_readonly(False)
52 | elif input['type'] == 'checkbox':
53 | br.find_control(name = input['name'], type = input['type']).selected = inputs[input['name']]
54 | else:
55 | if br.find_control(name = input['name'], type = input['type']).readonly:
56 | continue
57 | if input['type'] == 'radio':
58 | continue
59 | br[input['name']] = inputs[input['name']]
60 | except:
61 | # traceback.print_exc()
62 | pass
63 |
64 | response = br.submit().code
65 |
66 | return response, br
67 |
68 | def gen_random_value(chars = string.ascii_letters + string.digits, length = 0):
69 | if length == 0:
70 | length = random.choice(range(8, 21))
71 | return ''.join(random.choice(chars) for x in range(length))
72 |
73 | def gen_random_true_false():
74 | return random.choice([True, False])
75 |
76 | def gen_file(base_path, input):
77 | if input['name'] != '' and 'image' in input['name']:
78 | filename = os.path.join(os.path.dirname(__file__), os.pardir, "files", "image.jpg")
79 | mime_type = 'image/jpeg'
80 | else:
81 | filename = os.path.join(base_path, gen_random_value() + '.txt')
82 | with open(filename, 'w') as f:
83 | f.write(gen_random_value(length = 1000))
84 | f.close()
85 | mime_type = 'text/plain'
86 | return filename, mime_type
87 |
88 | def fill_form(form, matched_patterns = {}, br = None):
89 | inputs = {}
90 | for input in form['inputs']:
91 | if input['value'] != '':
92 | continue
93 | for pattern_name in patterns:
94 | if input['type'] == 'hidden':
95 | continue
96 | pattern, value = patterns[pattern_name]
97 | if match_any_pattern(input['name'], pattern) or match_any_pattern(input['type'], pattern):
98 | if pattern_name in matched_patterns:
99 | inputs[input['name']] = matched_patterns[pattern_name]
100 | else:
101 | inputs[input['name']] = value[0]
102 | matched_patterns[pattern_name] = value[0]
103 | break
104 | elif input['type'] == 'checkbox':
105 | inputs[input['name']] = True
106 | else:
107 | inputs[input['name']] = gen_random_value()
108 |
109 | response, br = submit_form(form, inputs, br)
110 |
111 | return matched_patterns, inputs, response, br
112 |
113 | def fill_form_random(form, br, base_path = '/tmp'):
114 | inputs = {}
115 | for input in form['inputs']:
116 | if input['value'] != '':
117 | continue
118 | if input['type'] == 'file':
119 | filename, mime_type = gen_file(base_path, input)
120 | inputs[input['name']] = {
121 | 'filename' : filename,
122 | 'mime_type': mime_type
123 | }
124 | elif input['type'] == 'checkbox':
125 | inputs[input['name']] = gen_random_true_false()
126 | else:
127 | inputs[input['name']] = gen_random_value()
128 |
129 | response, br = submit_form(form, inputs, br)
130 |
131 | return inputs
132 |
133 | def submit_form_fast(form, inputs, files, session):
134 | new_url = urlparse.urljoin(form['url'], form['action'])
135 | if files == None:
136 | response = session.post(new_url, data = inputs)
137 | else:
138 | response = session.post(new_url, data = inputs, files = files)
139 | return response
140 |
141 | def fill_form_random_fast(form, session, base_path = '/tmp'):
142 | inputs = {}
143 | files = None
144 | response = session.get(form['url'])
145 | soup = BeautifulSoup(response.text)
146 | for input in form['inputs']:
147 | if input['value'] != '':
148 | i = soup.find('input', {"name":input['name']})
149 | if i:
150 | inputs[input['name']] = i['value']
151 | continue
152 | if input['type'] == 'file':
153 | if files == None:
154 | files = {}
155 | filename, mime_type = gen_file(base_path, input)
156 | upload_filename = os.path.basename(filename)
157 | files[input['name']] = (upload_filename, open(filename), mime_type)
158 | elif input['type'] == 'checkbox':
159 | inputs[input['name']] = gen_random_true_false()
160 | else:
161 | inputs[input['name']] = gen_random_value()
162 |
163 | response = submit_form_fast(form, inputs, files, session)
164 |
165 | return inputs
--------------------------------------------------------------------------------