├── blog
    ├── __init__.py
    ├── parsers
    │   ├── __init__.py
    │   └── markdown_parser.py
    ├── templatetags
    │   ├── __init__.py
    │   └── pinax_blog_tags.py
    ├── signals.py
    ├── templates
    │   ├── blog_base.html
    │   ├── rss_item.xml
    │   ├── dateline.html
    │   ├── atom_feed.xml
    │   ├── dateline_stale.html
    │   ├── rss_feed.xml
    │   ├── blog_list.html
    │   ├── blog_section_list.html
    │   ├── blog_post.html
    │   └── atom_entry.xml
    ├── managers.py
    ├── conf.py
    ├── utils.py
    ├── urls.py
    ├── admin.py
    └── forms.py
├── library
    ├── __init__.py
    ├── .gitignore
    ├── templatetags
    │   ├── __init__.py
    │   └── active_page.py
    ├── tests.py
    ├── static
    │   ├── img
    │   │   ├── django.png
    │   │   ├── drupal.png
    │   │   ├── github.png
    │   │   ├── grails.png
    │   │   ├── nodejs.png
    │   │   ├── favicon.png
    │   │   ├── no_image.gif
    │   │   ├── jumbotron-bg.jpg
    │   │   ├── ruby_on_rails.png
    │   │   ├── people
    │   │   │   ├── andypavlo.jpg
    │   │   │   ├── danavanaken.jpg
    │   │   │   └── zeyuanshang.jpg
    │   │   ├── glyphicons-halflings.png
    │   │   └── glyphicons-halflings-white.png
    │   ├── font-awesome
    │   │   ├── fonts
    │   │   │   ├── FontAwesome.otf
    │   │   │   ├── fontawesome-webfont.eot
    │   │   │   ├── fontawesome-webfont.ttf
    │   │   │   └── fontawesome-webfont.woff
    │   │   ├── less
    │   │   │   ├── fixed-width.less
    │   │   │   ├── bordered-pulled.less
    │   │   │   ├── larger.less
    │   │   │   ├── core.less
    │   │   │   ├── list.less
    │   │   │   ├── font-awesome.less
    │   │   │   ├── stacked.less
    │   │   │   ├── rotated-flipped.less
    │   │   │   ├── spinning.less
    │   │   │   ├── path.less
    │   │   │   └── mixins.less
    │   │   └── scss
    │   │   │   ├── _fixed-width.scss
    │   │   │   ├── _bordered-pulled.scss
    │   │   │   ├── _larger.scss
    │   │   │   ├── _core.scss
    │   │   │   ├── _list.scss
    │   │   │   ├── font-awesome.scss
    │   │   │   ├── _stacked.scss
    │   │   │   ├── _spinning.scss
    │   │   │   ├── _path.scss
    │   │   │   ├── _rotated-flipped.scss
    │   │   │   └── _mixins.scss
    │   ├── fonts
    │   │   ├── glyphicons-halflings-regular.eot
    │   │   ├── glyphicons-halflings-regular.ttf
    │   │   ├── glyphicons-halflings-regular.woff
    │   │   └── glyphicons-halflings-regular.woff2
    │   ├── js
    │   │   └── collapse.js
    │   └── md
    │   │   └── tools.md
    ├── fixtures
    │   ├── apistatistic.json
    │   ├── database.json
    │   ├── repositorysource.json
    │   ├── projecttype.json
    │   └── crawlerstatus.json
    ├── templates
    │   ├── queries.html
    │   ├── analytics
    │   │   └── analytics.html
    │   ├── search.html
    │   ├── admin
    │   │   ├── add_module.html
    │   │   └── add_repository.html
    │   ├── about.html
    │   ├── status
    │   │   └── attempt_status_codes.html
    │   └── base.html
    ├── context_processors.py
    ├── urls.py
    ├── serializers.py
    ├── admin.py
    └── forms.py
├── cmudbac
    ├── .gitignore
    ├── __init__.py
    ├── urls.py
    ├── wsgi.py
    └── settings_example.py
├── analysis
    ├── .gitignore
    ├── cluster
    │   └── .gitignore
    ├── utils.py
    ├── general
    │   ├── analyze_repository.py
    │   └── analyze_transactions.py
    └── foreign
    │   └── foreign.py
├── core
    ├── drivers
    │   ├── count
    │   │   ├── __init__.py
    │   │   └── count.py
    │   ├── extract
    │   │   ├── driver
    │   │   │   ├── __init__.py
    │   │   │   ├── spiders
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── url.py
    │   │   │   │   ├── url_with_cookie.py
    │   │   │   │   ├── form_with_cookie.py
    │   │   │   │   └── form.py
    │   │   │   ├── pipelines.py
    │   │   │   ├── items.py
    │   │   │   └── settings.py
    │   │   ├── __init__.py
    │   │   ├── scrapy.cfg
    │   │   └── extract.py
    │   ├── files
    │   │   └── image.jpg
    │   ├── __init__.py
    │   ├── submit
    │   │   ├── __init__.py
    │   │   ├── query.py
    │   │   ├── patterns.py
    │   │   ├── login.py
    │   │   ├── register.py
    │   │   └── submit.py
    │   ├── benchmarkdriver.py
    │   └── randomdriver.py
    ├── crawlers
    │   ├── __init__.py
    │   ├── basecrawler.py
    │   └── drupalcrawler.py
    ├── deployers
    │   ├── __init__.py
    │   └── nodedeployer.py
    ├── analyzers
    │   ├── __init__.py
    │   ├── baseanalyzer.py
    │   ├── sqlite3analyzer.py
    │   ├── postgresqlanalyzer.py
    │   └── mysqlanalyzer.py
    ├── utils
    │   ├── network.py
    │   ├── timeout.py
    │   ├── __init__.py
    │   ├── rvm.py
    │   ├── run.py
    │   ├── pip.py
    │   ├── file.py
    │   ├── data.py
    │   └── vagrant.py
    └── scripts
    │   ├── vagrant_deploy.py
    │   └── vagrant_benchmark.py
├── vagrant
    ├── .gitignore
    ├── requirements.txt
    ├── Vagrantfile_example
    └── bootstrap.sh
├── .gitignore
├── requirements.txt
├── README.md
├── manage.py
├── scripts
    ├── deploy_repo.py
    ├── remove_attempts.py
    ├── crawl_repos.py
    ├── run_driver.py
    ├── crawl_repo.py
    └── count_repos.py
└── tools
    └── local-deployer.py


/blog/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/library/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/blog/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/blog/templatetags/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cmudbac/.gitignore:
--------------------------------------------------------------------------------
1 | settings.py


--------------------------------------------------------------------------------
/library/.gitignore:
--------------------------------------------------------------------------------
1 | !*.json
2 | 


--------------------------------------------------------------------------------
/library/templatetags/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/analysis/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt
2 | fig/*
3 | 


--------------------------------------------------------------------------------
/analysis/cluster/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.pdf
3 | 


--------------------------------------------------------------------------------
/core/drivers/count/__init__.py:
--------------------------------------------------------------------------------
1 | from count import count_query


--------------------------------------------------------------------------------
/core/drivers/extract/driver/__init__.py:
--------------------------------------------------------------------------------
1 | from driver import *


--------------------------------------------------------------------------------
/core/crawlers/__init__.py:
--------------------------------------------------------------------------------
1 | from basecrawler import *
2 | from githubcrawler import *


--------------------------------------------------------------------------------
/library/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 | 
3 | # Create your tests here.
4 | 


--------------------------------------------------------------------------------
/core/drivers/files/image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/core/drivers/files/image.jpg


--------------------------------------------------------------------------------
/library/static/img/django.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/django.png


--------------------------------------------------------------------------------
/library/static/img/drupal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/drupal.png


--------------------------------------------------------------------------------
/library/static/img/github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/github.png


--------------------------------------------------------------------------------
/library/static/img/grails.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/grails.png


--------------------------------------------------------------------------------
/library/static/img/nodejs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/nodejs.png


--------------------------------------------------------------------------------
/vagrant/.gitignore:
--------------------------------------------------------------------------------
1 | Vagrantfile
2 | blog/
3 | cmudbac/
4 | core/
5 | library/
6 | tests/
7 | scripts/
8 | 


--------------------------------------------------------------------------------
/core/drivers/__init__.py:
--------------------------------------------------------------------------------
1 | from basedriver import *
2 | from benchmarkdriver import *
3 | from randomdriver import *


--------------------------------------------------------------------------------
/library/static/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/favicon.png


--------------------------------------------------------------------------------
/library/static/img/no_image.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/no_image.gif


--------------------------------------------------------------------------------
/library/static/img/jumbotron-bg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/jumbotron-bg.jpg


--------------------------------------------------------------------------------
/library/static/img/ruby_on_rails.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/ruby_on_rails.png


--------------------------------------------------------------------------------
/library/static/img/people/andypavlo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/people/andypavlo.jpg


--------------------------------------------------------------------------------
/library/static/img/people/danavanaken.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/people/danavanaken.jpg


--------------------------------------------------------------------------------
/library/static/img/people/zeyuanshang.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/people/zeyuanshang.jpg


--------------------------------------------------------------------------------
/library/static/img/glyphicons-halflings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/glyphicons-halflings.png


--------------------------------------------------------------------------------
/library/static/font-awesome/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/FontAwesome.otf


--------------------------------------------------------------------------------
/library/static/img/glyphicons-halflings-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/glyphicons-halflings-white.png


--------------------------------------------------------------------------------
/library/static/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/library/static/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/library/static/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/library/static/font-awesome/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/library/static/font-awesome/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/library/static/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.woff2


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vagrant/
 2 | *.log
 3 | *.pyc
 4 | *.sqlite3
 5 | *.kate-swp
 6 | *.json
 7 | *.box
 8 | *.csv
 9 | screenshot*.png
10 | dump/
11 | *.pkl
12 | .env/*
13 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/cmudbac/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | # This will make sure the app is always imported when
4 | # Django starts so that shared_task will use this app.
5 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/fixed-width.less:
--------------------------------------------------------------------------------
1 | // Fixed Width Icons
2 | // -------------------------
3 | .@{fa-css-prefix}-fw {
4 |   width: (18em / 14);
5 |   text-align: center;
6 | }
7 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_fixed-width.scss:
--------------------------------------------------------------------------------
1 | // Fixed Width Icons
2 | // -------------------------
3 | .#{$fa-css-prefix}-fw {
4 |   width: (18em / 14);
5 |   text-align: center;
6 | }
7 | 


--------------------------------------------------------------------------------
/core/drivers/extract/__init__.py:
--------------------------------------------------------------------------------
1 | from extract import extract_forms, extract_all_forms, extract_all_forms_with_cookie
2 | from extract import extract_urls, extract_all_urls, extract_all_urls_with_cookie


--------------------------------------------------------------------------------
/core/drivers/submit/__init__.py:
--------------------------------------------------------------------------------
1 | from register import register
2 | from login import login
3 | from submit import fill_form_random, fill_form_random_fast, gen_random_value
4 | from query import query_url


--------------------------------------------------------------------------------
/library/fixtures/apistatistic.json:
--------------------------------------------------------------------------------
 1 | [
 2 | {
 3 |   "pk": 1,
 4 |   "model": "library.WebStatistic",
 5 |   "fields": {
 6 |     "name": "Attempt Info API",
 7 |     "count": 0
 8 |   }
 9 | }
10 | ]
11 | 


--------------------------------------------------------------------------------
/core/deployers/__init__.py:
--------------------------------------------------------------------------------
1 | from basedeployer import *
2 | from djangodeployer import *
3 | from rordeployer import *
4 | from nodedeployer import *
5 | from drupaldeployer import *
6 | from grailsdeployer import *


--------------------------------------------------------------------------------
/core/drivers/extract/driver/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/vagrant/requirements.txt:
--------------------------------------------------------------------------------
 1 | selenium == 2.48.0
 2 | djangorestframework == 3.3.1
 3 | Django == 1.8.6
 4 | Scrapy
 5 | mechanize
 6 | beautifulsoup4
 7 | requests == 2.8.1
 8 | hurry.filesize == 0.9
 9 | django_appconf == 1.0.1
10 | 


--------------------------------------------------------------------------------
/blog/signals.py:
--------------------------------------------------------------------------------
1 | import django.dispatch
2 | 
3 | 
4 | post_viewed = django.dispatch.Signal(providing_args=["post", "request"])
5 | post_published = django.dispatch.Signal(providing_args=["post"])
6 | post_redirected = django.dispatch.Signal(providing_args=["post", "request"])
7 | 


--------------------------------------------------------------------------------
/cmudbac/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls import patterns, include, url
 2 | from django.contrib import admin
 3 | admin.autodiscover()
 4 | 
 5 | urlpatterns = patterns('',
 6 |     url(r'', include('library.urls')),
 7 |     url(r'^blog/', include('blog.urls')),
 8 |     url(r'^admin/', include(admin.site.urls)),
 9 | )
10 | 


--------------------------------------------------------------------------------
/blog/templates/blog_base.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block body_class %}blog{% endblock %}
 4 | 
 5 | {% block main %}
 6 |     <div class="row blog-container">
 7 |         <div class="col-md-8 blog-content">
 8 |             {% block content %}{% endblock %}
 9 |         </div>
10 |     </div>
11 | {% endblock %}
12 | 


--------------------------------------------------------------------------------
/core/drivers/extract/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = driver.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = driver
12 | 


--------------------------------------------------------------------------------
/library/templates/queries.html:
--------------------------------------------------------------------------------
 1 | <div style="overflow: scroll; height: 200px">
 2 | <table class="table table-striped">
 3 |     <tbody>
 4 |         {% for query in queries %}
 5 |         <tr>
 6 |             <td><span>{{ query.content|safe }}</span><br/></td>
 7 |         </tr>
 8 |         {% endfor %}
 9 |     </tbody>
10 | </table>
11 | </div>


--------------------------------------------------------------------------------
/core/drivers/submit/query.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 3 | 
 4 | import mechanize
 5 | 
 6 | def query_url(url, br = None):
 7 |     if br == None:
 8 |         br = mechanize.Browser()
 9 |         br.set_handle_robots(False)
10 | 
11 |     br.open(url['url'].encode("ascii","ignore"))
12 | 
13 |     return
14 | 


--------------------------------------------------------------------------------
/core/drivers/extract/driver/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class DriverPipeline(object):
10 |     def process_item(self, item, spider):
11 |         return item
12 | 


--------------------------------------------------------------------------------
/blog/managers.py:
--------------------------------------------------------------------------------
 1 | from django.db import models
 2 | 
 3 | from .conf import settings
 4 | 
 5 | 
 6 | PUBLISHED_STATE = len(settings.PINAX_BLOG_UNPUBLISHED_STATES) + 1
 7 | 
 8 | 
 9 | class PostManager(models.Manager):
10 | 
11 |     def published(self):
12 |         return self.filter(published__isnull=False, state=PUBLISHED_STATE)
13 | 
14 |     def current(self):
15 |     	return self.published().order_by("-published")
16 | 


--------------------------------------------------------------------------------
/library/static/js/collapse.js:
--------------------------------------------------------------------------------
1 | $('.collapse').on('show.bs.collapse', function(event) {
2 | 	icon = $('#' + $(this).attr('id') + '-icon');
3 | 	icon.removeClass("glyphicon-plus").addClass("glyphicon-minus");
4 | 	event.stopPropagation();
5 | }).on('hidden.bs.collapse', function(event) {
6 | 	icon = $('#' + $(this).attr('id') + '-icon');
7 | 	icon.removeClass("glyphicon-minus").addClass("glyphicon-plus");
8 | 	event.stopPropagation();
9 | });


--------------------------------------------------------------------------------
/library/templatetags/active_page.py:
--------------------------------------------------------------------------------
 1 | from django import template
 2 | 
 3 | register = template.Library()
 4 | 
 5 | @register.simple_tag
 6 | def active_page(request, view_name):
 7 |     from django.core.urlresolvers import resolve, Resolver404
 8 |     if not request:
 9 |         return ""
10 |     try:
11 |         return "active" if resolve(request.path_info).url_name == view_name else ""
12 |     except Resolver404:
13 |         return ""


--------------------------------------------------------------------------------
/core/drivers/count/count.py:
--------------------------------------------------------------------------------
 1 | keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE']
 2 | 
 3 | def count_query(queries):
 4 | 	ret = {}
 5 | 	for keyword in keywords:
 6 | 		ret[keyword] = 0
 7 | 	ret['OTHER'] = 0
 8 | 	for query in queries:
 9 | 		counted = False
10 | 		for keyword in keywords:
11 | 			if keyword in query['raw'].upper():
12 | 				ret[keyword] += 1
13 | 				counted = True
14 | 		if not counted:
15 | 			ret['OTHER'] += 1
16 | 	return ret
17 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/bordered-pulled.less:
--------------------------------------------------------------------------------
 1 | // Bordered & Pulled
 2 | // -------------------------
 3 | 
 4 | .@{fa-css-prefix}-border {
 5 |   padding: .2em .25em .15em;
 6 |   border: solid .08em @fa-border-color;
 7 |   border-radius: .1em;
 8 | }
 9 | 
10 | .pull-right { float: right; }
11 | .pull-left { float: left; }
12 | 
13 | .@{fa-css-prefix} {
14 |   &.pull-left { margin-right: .3em; }
15 |   &.pull-right { margin-left: .3em; }
16 | }
17 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_bordered-pulled.scss:
--------------------------------------------------------------------------------
 1 | // Bordered & Pulled
 2 | // -------------------------
 3 | 
 4 | .#{$fa-css-prefix}-border {
 5 |   padding: .2em .25em .15em;
 6 |   border: solid .08em $fa-border-color;
 7 |   border-radius: .1em;
 8 | }
 9 | 
10 | .pull-right { float: right; }
11 | .pull-left { float: left; }
12 | 
13 | .#{$fa-css-prefix} {
14 |   &.pull-left { margin-right: .3em; }
15 |   &.pull-right { margin-left: .3em; }
16 | }
17 | 


--------------------------------------------------------------------------------
/blog/templates/rss_item.xml:
--------------------------------------------------------------------------------
1 | <item>
2 |     <title>{{ entry.title }}</title>
3 |     <link>http://{{ current_site.domain }}{{ entry.get_absolute_url }}</link>
4 |     <description>{{ entry.meta_description }}</description>
5 |     <pubDate>{{ entry.published|date:"D, d M Y H:i:s O" }}</pubDate>
6 |     <dc:creator>{{ entry.author.get_full_name }}</dc:creator>
7 |     <guid isPermaLink="true">http://{{ current_site.domain }}{{ entry.get_absolute_url }}</guid>
8 | </item>


--------------------------------------------------------------------------------
/library/static/font-awesome/less/larger.less:
--------------------------------------------------------------------------------
 1 | // Icon Sizes
 2 | // -------------------------
 3 | 
 4 | /* makes the font 33% larger relative to the icon container */
 5 | .@{fa-css-prefix}-lg {
 6 |   font-size: (4em / 3);
 7 |   line-height: (3em / 4);
 8 |   vertical-align: -15%;
 9 | }
10 | .@{fa-css-prefix}-2x { font-size: 2em; }
11 | .@{fa-css-prefix}-3x { font-size: 3em; }
12 | .@{fa-css-prefix}-4x { font-size: 4em; }
13 | .@{fa-css-prefix}-5x { font-size: 5em; }
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | sqlparse == 0.1.19
 2 | Markdown == 2.6.4
 3 | psycopg2 == 2.6.1
 4 | twitter == 1.17.1
 5 | selenium == 2.48.0
 6 | djangorestframework == 3.3.1
 7 | djangorestframework-filters==0.9.1
 8 | Django == 1.8.6
 9 | Scrapy == 1.0.3
10 | Pygments == 2.0.2
11 | django_appconf == 1.0.1
12 | mechanize == 0.2.5
13 | beautifulsoup4 == 4.4.1
14 | python-creole == 1.3.1
15 | requests == 2.8.1
16 | MySQL_python == 1.2.5
17 | hurry.filesize == 0.9
18 | Pillow==4.2.1
19 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_larger.scss:
--------------------------------------------------------------------------------
 1 | // Icon Sizes
 2 | // -------------------------
 3 | 
 4 | /* makes the font 33% larger relative to the icon container */
 5 | .#{$fa-css-prefix}-lg {
 6 |   font-size: (4em / 3);
 7 |   line-height: (3em / 4);
 8 |   vertical-align: -15%;
 9 | }
10 | .#{$fa-css-prefix}-2x { font-size: 2em; }
11 | .#{$fa-css-prefix}-3x { font-size: 3em; }
12 | .#{$fa-css-prefix}-4x { font-size: 4em; }
13 | .#{$fa-css-prefix}-5x { font-size: 5em; }
14 | 


--------------------------------------------------------------------------------
/library/templates/analytics/analytics.html:
--------------------------------------------------------------------------------
1 | <script type="text/javascript">
2 | var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
3 | document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
4 | </script>
5 | <script type="text/javascript">
6 | try {
7 | var pageTracker = _gat._getTracker("{{ google_analytics_key }}");
8 | pageTracker._trackPageview();
9 | } catch(err) {}</script>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Carnegie Mellon Database Application Catalog
 2 | 
 3 | For more details, Please visit the [CMDBAC Wiki](https://github.com/cmu-db/dbac/wiki "DBAC Wiki") page.
 4 | 
 5 | ### Installation
 6 | 
 7 | Please refer to the [installation instructions](https://github.com/cmu-db/dbac/wiki/Installation).
 8 | 
 9 | ### Contributors
10 | 
11 | * [Zeyuan Shang](http://www.shangzeyuan.com/)
12 | * [Andy Pavlo](http://www.cs.cmu.edu/~pavlo)
13 | * [Dana Van Aken](http://www.cs.cmu.edu/~dvaken)
14 | 


--------------------------------------------------------------------------------
/library/context_processors.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Author: Zeyuan Shang
 4 | # @Date:   2015-11-13 22:02:21
 5 | # @Last Modified by:   Zeyuan Shang
 6 | # @Last Modified time: 2015-11-13 22:02:57
 7 | from django.conf import settings
 8 | from django.template.loader import render_to_string
 9 | 
10 | def analytics(request):
11 |     return { 'analytics_code': render_to_string("analytics/analytics.html", { 'google_analytics_key': settings.GOOGLE_ANALYTICS_KEY }) }


--------------------------------------------------------------------------------
/blog/templates/dateline.html:
--------------------------------------------------------------------------------
1 | <p class="post-dateline">
2 |     <span class="date">{% if post.published %}{{ post.published|date:"F jS, Y" }}{% else %}Not published yet{% endif %}</span>
3 | <!--     by <span class="author"><a href="http://twitter.com/{{ post.author.username }}" title="@{{ post.author.username }}">{{ post.author.get_full_name }}</a></span> in -->
4 | <!--     <a href="{% url "blog_section" post.slug %}"><span class="section">{{ post.get_section_display|capfirst }}</span></a> -->
5 | </p>
6 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/core.less:
--------------------------------------------------------------------------------
 1 | // Base Class Definition
 2 | // -------------------------
 3 | 
 4 | .@{fa-css-prefix} {
 5 |   display: inline-block;
 6 |   font: normal normal normal 14px/1 FontAwesome; // shortening font declaration
 7 |   font-size: inherit; // can't have font-size inherit on line above, so need to override
 8 |   text-rendering: auto; // optimizelegibility throws things off #1094
 9 |   -webkit-font-smoothing: antialiased;
10 |   -moz-osx-font-smoothing: grayscale;
11 | }
12 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/list.less:
--------------------------------------------------------------------------------
 1 | // List Icons
 2 | // -------------------------
 3 | 
 4 | .@{fa-css-prefix}-ul {
 5 |   padding-left: 0;
 6 |   margin-left: @fa-li-width;
 7 |   list-style-type: none;
 8 |   > li { position: relative; }
 9 | }
10 | .@{fa-css-prefix}-li {
11 |   position: absolute;
12 |   left: -@fa-li-width;
13 |   width: @fa-li-width;
14 |   top: (2em / 14);
15 |   text-align: center;
16 |   &.@{fa-css-prefix}-lg {
17 |     left: (-@fa-li-width + (4em / 14));
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_core.scss:
--------------------------------------------------------------------------------
 1 | // Base Class Definition
 2 | // -------------------------
 3 | 
 4 | .#{$fa-css-prefix} {
 5 |   display: inline-block;
 6 |   font: normal normal normal 14px/1 FontAwesome; // shortening font declaration
 7 |   font-size: inherit; // can't have font-size inherit on line above, so need to override
 8 |   text-rendering: auto; // optimizelegibility throws things off #1094
 9 |   -webkit-font-smoothing: antialiased;
10 |   -moz-osx-font-smoothing: grayscale;
11 | }
12 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_list.scss:
--------------------------------------------------------------------------------
 1 | // List Icons
 2 | // -------------------------
 3 | 
 4 | .#{$fa-css-prefix}-ul {
 5 |   padding-left: 0;
 6 |   margin-left: $fa-li-width;
 7 |   list-style-type: none;
 8 |   > li { position: relative; }
 9 | }
10 | .#{$fa-css-prefix}-li {
11 |   position: absolute;
12 |   left: -$fa-li-width;
13 |   width: $fa-li-width;
14 |   top: (2em / 14);
15 |   text-align: center;
16 |   &.#{$fa-css-prefix}-lg {
17 |     left: -$fa-li-width + (4em / 14);
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/core/analyzers/__init__.py:
--------------------------------------------------------------------------------
 1 | from baseanalyzer import *
 2 | from mysqlanalyzer import *
 3 | from postgresqlanalyzer import *
 4 | from sqlite3analyzer import *
 5 | 
 6 | def get_analyzer(deployer):
 7 | 	if deployer.get_database().name == 'MySQL':
 8 | 		return MySQLAnalyzer(deployer)
 9 | 	elif deployer.get_database().name == 'PostgreSQL':
10 | 		return PostgreSQLAnalyzer(deployer)
11 | 	elif deployer.get_database().name == 'SQLite3':
12 | 		return SQLite3Analyzer(deployer)
13 | 	else:
14 | 		return BaseAnalyzer(deployer)


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/font-awesome.scss:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Font Awesome 4.2.0 by @davegandy - http://fontawesome.io - @fontawesome
 3 |  *  License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
 4 |  */
 5 | 
 6 | @import "variables";
 7 | @import "mixins";
 8 | @import "path";
 9 | @import "core";
10 | @import "larger";
11 | @import "fixed-width";
12 | @import "list";
13 | @import "bordered-pulled";
14 | @import "spinning";
15 | @import "rotated-flipped";
16 | @import "stacked";
17 | @import "icons";
18 | 


--------------------------------------------------------------------------------
/blog/templates/atom_feed.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 | 
 4 |     <id>{{ feed_id }}</id>
 5 | 
 6 |     <title>{{ feed_title }}</title>
 7 | 
 8 |     <link rel="alternate" type="text/html" href="{{ blog_url }}" />
 9 |     <link rel="self" type="application/atom+xml" href="{{ feed_url }}" />
10 | 
11 |     <updated>{{ feed_updated|date:"Y-m-d\TH:i:s\Z" }}</updated>
12 | 
13 |     {% for entry in entries %}
14 |         {% include "atom_entry.xml" %}
15 |     {% endfor %}
16 | </feed>
17 | 


--------------------------------------------------------------------------------
/blog/templates/dateline_stale.html:
--------------------------------------------------------------------------------
1 | <p class="post_dateline">
2 |     {% if not post.stale %}
3 |         <span class="date">{% if post.published %}{{ post.published|date:"jS F Y" }}{% else %}Not published yet{% endif %}</span>
4 |         <br />
5 |     {% endif %}
6 |     by <span class="author"><a href="http://twitter.com/{{ post.author.username }}" title="@{{ post.author.username }}">{{ post.author.get_full_name }}</a></span> in
7 |     <a href="{% url "blog_section" post.slug %}"><span class="section">{{ post.get_section_display|capfirst }}</span></a>
8 | </p>
9 | 


--------------------------------------------------------------------------------
/core/drivers/submit/patterns.py:
--------------------------------------------------------------------------------
 1 | # Patterns
 2 | email_patterns = ['email', 'mail']
 3 | email_values = ['testuser@gmail.com']
 4 | username_patterns = ['username', 'name']
 5 | username_values = ['testuser']
 6 | password_patterns = ['password', 'pass']
 7 | password_values = ['Test1234--']
 8 | patterns = {
 9 | 	'email': (email_patterns, email_values),
10 | 	'username': (username_patterns, username_values),
11 | 	'password': (password_patterns, password_values)
12 | }
13 | 
14 | def match_any_pattern(name, patterns):
15 | 	return any(pattern in name.lower() for pattern in patterns)


--------------------------------------------------------------------------------
/library/static/font-awesome/less/font-awesome.less:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Font Awesome 4.2.0 by @davegandy - http://fontawesome.io - @fontawesome
 3 |  *  License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
 4 |  */
 5 | 
 6 | @import "variables.less";
 7 | @import "mixins.less";
 8 | @import "path.less";
 9 | @import "core.less";
10 | @import "larger.less";
11 | @import "fixed-width.less";
12 | @import "list.less";
13 | @import "bordered-pulled.less";
14 | @import "spinning.less";
15 | @import "rotated-flipped.less";
16 | @import "stacked.less";
17 | @import "icons.less";
18 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/stacked.less:
--------------------------------------------------------------------------------
 1 | // Stacked Icons
 2 | // -------------------------
 3 | 
 4 | .@{fa-css-prefix}-stack {
 5 |   position: relative;
 6 |   display: inline-block;
 7 |   width: 2em;
 8 |   height: 2em;
 9 |   line-height: 2em;
10 |   vertical-align: middle;
11 | }
12 | .@{fa-css-prefix}-stack-1x, .@{fa-css-prefix}-stack-2x {
13 |   position: absolute;
14 |   left: 0;
15 |   width: 100%;
16 |   text-align: center;
17 | }
18 | .@{fa-css-prefix}-stack-1x { line-height: inherit; }
19 | .@{fa-css-prefix}-stack-2x { font-size: 2em; }
20 | .@{fa-css-prefix}-inverse { color: @fa-inverse; }
21 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_stacked.scss:
--------------------------------------------------------------------------------
 1 | // Stacked Icons
 2 | // -------------------------
 3 | 
 4 | .#{$fa-css-prefix}-stack {
 5 |   position: relative;
 6 |   display: inline-block;
 7 |   width: 2em;
 8 |   height: 2em;
 9 |   line-height: 2em;
10 |   vertical-align: middle;
11 | }
12 | .#{$fa-css-prefix}-stack-1x, .#{$fa-css-prefix}-stack-2x {
13 |   position: absolute;
14 |   left: 0;
15 |   width: 100%;
16 |   text-align: center;
17 | }
18 | .#{$fa-css-prefix}-stack-1x { line-height: inherit; }
19 | .#{$fa-css-prefix}-stack-2x { font-size: 2em; }
20 | .#{$fa-css-prefix}-inverse { color: $fa-inverse; }
21 | 


--------------------------------------------------------------------------------
/blog/templates/rss_feed.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom">
 3 | 
 4 | <channel>
 5 |   <title>{{ feed_title }}</title>
 6 |   <description>The latest posts from {{ feed_title }}</description>
 7 |   <link>{{ blog_url }}</link>
 8 |   <atom:link href="{{ feed_url }}" rel="self" type="application/rss+xml" />
 9 |   <lastBuildDate>{{ feed_updated|date:"D, d M Y H:i:s O" }}</lastBuildDate>
10 |     {% for entry in entries %}
11 |         {% include "rss_item.xml" %}
12 |     {% endfor %}
13 | </channel>
14 | 
15 | </rss>


--------------------------------------------------------------------------------
/core/utils/network.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import time
 4 | import requests
 5 | import logging
 6 | 
 7 | from run import run_command
 8 | 
 9 | def query(url, auth = None):
10 |     if auth == None:
11 |         response = requests.get(url, verify=False)
12 |     else:
13 |         response = requests.get(url, auth=(auth['user'], auth['pass']), verify=False)
14 |     return response
15 | 
16 | def kill_port(port):
17 |     return run_command('fuser -n tcp -k {}'.format(port))
18 | 
19 | def block_network():
20 |     return run_command('ufw enable')
21 | 
22 | def unblock_network():
23 |     return run_command('ufw disable')


--------------------------------------------------------------------------------
/core/utils/timeout.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | import errno
 3 | import os
 4 | import signal
 5 | 
 6 | class TimeoutError(Exception):
 7 |     pass
 8 | 
 9 | class timeout:
10 |     def __init__(self, seconds=1, error_message='Timeout'):
11 |         self.seconds = seconds
12 |         self.error_message = error_message
13 |     def handle_timeout(self, signum, frame):
14 |         raise TimeoutError(self.error_message)
15 |     def __enter__(self):
16 |         signal.signal(signal.SIGALRM, self.handle_timeout)
17 |         signal.alarm(self.seconds)
18 |     def __exit__(self, type, value, traceback):
19 |         signal.alarm(0)


--------------------------------------------------------------------------------
/cmudbac/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for CMDBAC project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | """
 6 | 
 7 | import os
 8 | import sys
 9 | 
10 | # Change the env variable where django looks for the settings module
11 | # http://stackoverflow.com/a/11817088
12 | import django.conf
13 | django.conf.ENVIRONMENT_VARIABLE = "DJANGO_CMDBAC_SETTINGS_MODULE"
14 | os.environ.setdefault("DJANGO_CMDBAC_SETTINGS_MODULE", "cmudbac.settings")
15 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
16 | 
17 | from django.core.wsgi import get_wsgi_application
18 | application = get_wsgi_application()
19 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), "core"))
 5 | 
 6 | import utils
 7 | 
 8 | def vagrant_init():
 9 |     utils.vagrant_clear()
10 |     utils.vagrant_setup()
11 | 
12 | def vagrant_final():
13 |     utils.vagrant_clear()
14 | 
15 | if __name__ == "__main__":
16 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
17 | 
18 |     from django.core.management import execute_from_command_line
19 | 
20 |     vagrant_init()
21 | 
22 |     try:
23 |         execute_from_command_line(sys.argv)
24 |     finally:
25 |         vagrant_final()
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/blog/templates/blog_list.html:
--------------------------------------------------------------------------------
 1 | {% extends "blog_base.html" %}
 2 | 
 3 | {% block head_title %}News &raquo; {{ block.super }}{% endblock %}
 4 | 
 5 | {% block content %}
 6 |     <h1>News</h1>
 7 |     {% if post_list %}
 8 |     <div class="post-list">
 9 |         {% for post in post_list %}
10 |             <div class="post">
11 |                 <h3>{{ post.title }}</h3>
12 |                 {% include "dateline.html" %}
13 |                 <div class="content">{{ post.content_html|safe }}</div>
14 |             </div>
15 |         {% endfor %}
16 |     </div>
17 |     {% else %}
18 |         <p class="alert alert-info lead">No posts have been published.</p>
19 |     {% endif %}
20 | {% endblock %}
21 | 


--------------------------------------------------------------------------------
/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from run import run_command, run_command_async
2 | from network import query, kill_port, block_network, unblock_network
3 | from file import search_file, search_file_regex, search_file_norecur, search_dir, replace_file_regex, replace_files_regex, mk_dir, make_dir, rm_dir, unzip, cd, rename_file, copy_file, remove_file, get_size
4 | from pip import home_path, configure_env, to_env, pip_install, pip_install_text, pip_freeze
5 | from data import get_crawler, add_module, add_repo, delete_repo, deploy_repo, edit_distance
6 | from vagrant import vagrant_setup, vagrant_clear, vagrant_deploy, vagrant_benchmark
7 | from rvm import get_ruby_versions, use_ruby_version, install_ruby_version
8 | from timeout import timeout


--------------------------------------------------------------------------------
/library/fixtures/database.json:
--------------------------------------------------------------------------------
 1 | [
 2 | {
 3 |   "pk": 1,
 4 |   "model": "library.database",
 5 |   "fields": {
 6 |     "name": "Unknown"
 7 |   }
 8 | },
 9 | {
10 |   "pk": 2,
11 |   "model": "library.database",
12 |   "fields": {
13 |     "name": "Oracle"
14 |   }
15 | },
16 | {
17 |   "pk": 3,
18 |   "model": "library.database",
19 |   "fields": {
20 |     "name": "Other"
21 |   }
22 | },
23 | {
24 |   "pk": 4,
25 |   "model": "library.database",
26 |   "fields": {
27 |     "name": "PostgreSQL"
28 |   }
29 | },
30 | {
31 |   "pk": 5,
32 |   "model": "library.database",
33 |   "fields": {
34 |     "name": "SQLite3"
35 |   }
36 | },
37 | {
38 |   "pk": 6,
39 |   "model": "library.database",
40 |   "fields": {
41 |     "name": "MySQL"
42 |   }
43 | }
44 | ]
45 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/rotated-flipped.less:
--------------------------------------------------------------------------------
 1 | // Rotated & Flipped Icons
 2 | // -------------------------
 3 | 
 4 | .@{fa-css-prefix}-rotate-90  { .fa-icon-rotate(90deg, 1);  }
 5 | .@{fa-css-prefix}-rotate-180 { .fa-icon-rotate(180deg, 2); }
 6 | .@{fa-css-prefix}-rotate-270 { .fa-icon-rotate(270deg, 3); }
 7 | 
 8 | .@{fa-css-prefix}-flip-horizontal { .fa-icon-flip(-1, 1, 0); }
 9 | .@{fa-css-prefix}-flip-vertical   { .fa-icon-flip(1, -1, 2); }
10 | 
11 | // Hook for IE8-9
12 | // -------------------------
13 | 
14 | :root .@{fa-css-prefix}-rotate-90,
15 | :root .@{fa-css-prefix}-rotate-180,
16 | :root .@{fa-css-prefix}-rotate-270,
17 | :root .@{fa-css-prefix}-flip-horizontal,
18 | :root .@{fa-css-prefix}-flip-vertical {
19 |   filter: none;
20 | }
21 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/spinning.less:
--------------------------------------------------------------------------------
 1 | // Spinning Icons
 2 | // --------------------------
 3 | 
 4 | .@{fa-css-prefix}-spin {
 5 |   -webkit-animation: fa-spin 2s infinite linear;
 6 |           animation: fa-spin 2s infinite linear;
 7 | }
 8 | 
 9 | @-webkit-keyframes fa-spin {
10 |   0% {
11 |     -webkit-transform: rotate(0deg);
12 |             transform: rotate(0deg);
13 |   }
14 |   100% {
15 |     -webkit-transform: rotate(359deg);
16 |             transform: rotate(359deg);
17 |   }
18 | }
19 | 
20 | @keyframes fa-spin {
21 |   0% {
22 |     -webkit-transform: rotate(0deg);
23 |             transform: rotate(0deg);
24 |   }
25 |   100% {
26 |     -webkit-transform: rotate(359deg);
27 |             transform: rotate(359deg);
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_spinning.scss:
--------------------------------------------------------------------------------
 1 | // Spinning Icons
 2 | // --------------------------
 3 | 
 4 | .#{$fa-css-prefix}-spin {
 5 |   -webkit-animation: fa-spin 2s infinite linear;
 6 |           animation: fa-spin 2s infinite linear;
 7 | }
 8 | 
 9 | @-webkit-keyframes fa-spin {
10 |   0% {
11 |     -webkit-transform: rotate(0deg);
12 |             transform: rotate(0deg);
13 |   }
14 |   100% {
15 |     -webkit-transform: rotate(359deg);
16 |             transform: rotate(359deg);
17 |   }
18 | }
19 | 
20 | @keyframes fa-spin {
21 |   0% {
22 |     -webkit-transform: rotate(0deg);
23 |             transform: rotate(0deg);
24 |   }
25 |   100% {
26 |     -webkit-transform: rotate(359deg);
27 |             transform: rotate(359deg);
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/path.less:
--------------------------------------------------------------------------------
 1 | /* FONT PATH
 2 |  * -------------------------- */
 3 | 
 4 | @font-face {
 5 |   font-family: 'FontAwesome';
 6 |   src: url('@{fa-font-path}/fontawesome-webfont.eot?v=@{fa-version}');
 7 |   src: url('@{fa-font-path}/fontawesome-webfont.eot?#iefix&v=@{fa-version}') format('embedded-opentype'),
 8 |     url('@{fa-font-path}/fontawesome-webfont.woff?v=@{fa-version}') format('woff'),
 9 |     url('@{fa-font-path}/fontawesome-webfont.ttf?v=@{fa-version}') format('truetype'),
10 |     url('@{fa-font-path}/fontawesome-webfont.svg?v=@{fa-version}#fontawesomeregular') format('svg');
11 | //  src: url('@{fa-font-path}/FontAwesome.otf') format('opentype'); // used when developing fonts
12 |   font-weight: normal;
13 |   font-style: normal;
14 | }
15 | 


--------------------------------------------------------------------------------
/library/fixtures/repositorysource.json:
--------------------------------------------------------------------------------
 1 | [
 2 | {
 3 |   "pk": 1,
 4 |   "model": "library.repositorysource",
 5 |   "fields": {
 6 |     "name": "GitHub",
 7 |     "commit_url": "https://github.com/${repo_name}/tree/${commit}",
 8 |     "base_url": "https://github.com/${repo_name}",
 9 |     "search_token": "CHANGE ME",
10 |     "crawler_class": "GitHubCrawler",
11 |     "logo": "img/github.png"
12 |   }
13 | },
14 | {
15 |   "pk": 2,
16 |   "model": "library.repositorysource",
17 |   "fields": {
18 |     "name": "Drupal",
19 |     "commit_url": "https://www.drupal.org/node/${commit}",
20 |     "base_url": "https://www.drupal.org/project/${repo_name}",
21 |     "search_token": "CHANGE ME",
22 |     "crawler_class": "DrupalCrawler",
23 |     "logo": "img/drupal.png"
24 |   }
25 | }
26 | ]
27 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_path.scss:
--------------------------------------------------------------------------------
 1 | /* FONT PATH
 2 |  * -------------------------- */
 3 | 
 4 | @font-face {
 5 |   font-family: 'FontAwesome';
 6 |   src: url('#{$fa-font-path}/fontawesome-webfont.eot?v=#{$fa-version}');
 7 |   src: url('#{$fa-font-path}/fontawesome-webfont.eot?#iefix&v=#{$fa-version}') format('embedded-opentype'),
 8 |     url('#{$fa-font-path}/fontawesome-webfont.woff?v=#{$fa-version}') format('woff'),
 9 |     url('#{$fa-font-path}/fontawesome-webfont.ttf?v=#{$fa-version}') format('truetype'),
10 |     url('#{$fa-font-path}/fontawesome-webfont.svg?v=#{$fa-version}#fontawesomeregular') format('svg');
11 |   //src: url('#{$fa-font-path}/FontAwesome.otf') format('opentype'); // used when developing fonts
12 |   font-weight: normal;
13 |   font-style: normal;
14 | }
15 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_rotated-flipped.scss:
--------------------------------------------------------------------------------
 1 | // Rotated & Flipped Icons
 2 | // -------------------------
 3 | 
 4 | .#{$fa-css-prefix}-rotate-90  { @include fa-icon-rotate(90deg, 1);  }
 5 | .#{$fa-css-prefix}-rotate-180 { @include fa-icon-rotate(180deg, 2); }
 6 | .#{$fa-css-prefix}-rotate-270 { @include fa-icon-rotate(270deg, 3); }
 7 | 
 8 | .#{$fa-css-prefix}-flip-horizontal { @include fa-icon-flip(-1, 1, 0); }
 9 | .#{$fa-css-prefix}-flip-vertical   { @include fa-icon-flip(1, -1, 2); }
10 | 
11 | // Hook for IE8-9
12 | // -------------------------
13 | 
14 | :root .#{$fa-css-prefix}-rotate-90,
15 | :root .#{$fa-css-prefix}-rotate-180,
16 | :root .#{$fa-css-prefix}-rotate-270,
17 | :root .#{$fa-css-prefix}-flip-horizontal,
18 | :root .#{$fa-css-prefix}-flip-vertical {
19 |   filter: none;
20 | }
21 | 


--------------------------------------------------------------------------------
/blog/templates/blog_section_list.html:
--------------------------------------------------------------------------------
 1 | {% extends "blog_base.html" %}
 2 | 
 3 | {% block head_title %}{{ SITE_NAME }} ({{ section_name }}){% endblock %}
 4 | 
 5 | {% block content %}
 6 |     <h1>{{ section_name|title }}</h1>
 7 |     {% if posts %}
 8 |         <div class="post-list">
 9 |             {% for post in posts %}
10 |                 <h2><a href="{{ post.get_absolute_url }}">{{ post.title }}</a></h2>
11 |                 {% include "dateline.html" %}
12 |                 <div class="teaser">{{ post.teaser_html|safe }}</div>
13 |                 <p class="more"><a href="{{ post.get_absolute_url }}" class="post-link">read more...</a></p>
14 |             {% endfor %}
15 |         </div>
16 |     {% else %}
17 |         <p>No blog posts have been published in this section.</p>
18 |     {% endif %}
19 | {% endblock %}
20 | 


--------------------------------------------------------------------------------
/core/utils/rvm.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | from run import run_command
 5 | from file import cd
 6 | 
 7 | def get_ruby_versions():
 8 |     command = 'source /usr/local/rvm/scripts/rvm && rvm list'
 9 |     output = run_command(command)
10 |     versions = []
11 |     for line in output[1].split('\n'):
12 |         s = re.search('ruby-(.+) \[', line)
13 |         if s:
14 |             versions.append(s.group(1))
15 |     return sorted(versions)
16 | 
17 | def use_ruby_version(version):
18 |     command = 'source /usr/local/rvm/scripts/rvm && rvm use {}'.format(version[:5])
19 |     return command
20 | 
21 | def install_ruby_version(version):
22 |     command = 'sudo su && source /usr/local/rvm/scripts/rvm && rvm install {} && gem install bundle && gem install bundler'.format(version)
23 |     return run_command(command)


--------------------------------------------------------------------------------
/core/drivers/extract/driver/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class DriverItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     pass
15 | 
16 | class InputItem(scrapy.Item):
17 | 	id = scrapy.Field()
18 | 	name = scrapy.Field()
19 | 	type = scrapy.Field()
20 | 	value = scrapy.Field()
21 | 
22 | class FormItem(scrapy.Item):
23 | 	action = scrapy.Field()
24 | 	url = scrapy.Field()
25 | 	method = scrapy.Field()
26 | 	inputs = scrapy.Field()
27 | 	id = scrapy.Field()
28 | 	clazz = scrapy.Field()
29 | 	enctype = scrapy.Field()
30 | 
31 | class UrlItem(scrapy.Item):
32 |     url = scrapy.Field()


--------------------------------------------------------------------------------
/blog/templates/blog_post.html:
--------------------------------------------------------------------------------
 1 | {% extends "blog_base.html" %}
 2 | 
 3 | {% block head_title %}{{ post.title }} &raquo; {{ block.super }}{% endblock %}
 4 | 
 5 | {% block extra_head %}
 6 |     <meta name="description" content="{{ post.teaser_html|striptags }}">
 7 |     <meta name="og:title" content="{{ post.title }}">
 8 |     <meta name="og:description" content="{{ post.teaser_html|striptags }}">
 9 | {% endblock %}
10 | 
11 | {% block content %}
12 |     <div class="post-detail">
13 |         <div class="post">
14 |             <h1>{{ post.title }}</h1>
15 |             {% include "dateline_stale.html" %}
16 | 
17 |             <!--
18 |             <div class="teaser">{{ post.teaser_html|safe }}</div>
19 |             !-->
20 | 
21 |             <div class="content">{{ post.content_html|safe }}</div>
22 | 
23 |         </div>
24 |     </div>
25 | {% endblock %}
26 | 


--------------------------------------------------------------------------------
/core/drivers/submit/login.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 3 | 
 4 | import extract
 5 | from patterns import patterns, match_any_pattern
 6 | from submit import fill_form
 7 | 
 8 | def get_login_form(forms):
 9 | 	login_patterns = ['login', 'signin', 'sign-in', 'sign_in']
10 | 	for form in forms:
11 | 		if match_any_pattern(form['action'], login_patterns):
12 | 			return form
13 |                 if form['action'] != '':
14 |                         continue
15 |                 if match_any_pattern(form['url'], login_patterns):
16 | 			return form
17 | 	return None
18 | 
19 | def login(forms, matched_patterns):
20 | 	login_form = get_login_form(forms)
21 | 	if login_form == None:
22 | 		return None, None
23 | 
24 | 	matched_patterns, inputs, response, br = fill_form(login_form, matched_patterns)
25 | 
26 | 	return login_form, br
27 | 


--------------------------------------------------------------------------------
/blog/conf.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | 
 3 | from django.conf import settings  # noqa
 4 | 
 5 | from appconf import AppConf
 6 | 
 7 | 
 8 | DEFAULT_MARKUP_CHOICE_MAP = {
 9 |     "markdown": {"label": "Markdown", "parser": "blog.parsers.markdown_parser.parse"}
10 | }
11 | 
12 | class PinaxBlogAppConf(AppConf):
13 | 
14 |     ALL_SECTION_NAME = "all"
15 |     SECTIONS = []
16 |     UNPUBLISHED_STATES = [
17 |         "Draft"
18 |     ]
19 |     FEED_TITLE = "Blog"
20 |     SECTION_FEED_TITLE = "Blog (%s)"
21 |     MARKUP_CHOICE_MAP = DEFAULT_MARKUP_CHOICE_MAP
22 |     MARKUP_CHOICES = DEFAULT_MARKUP_CHOICE_MAP
23 |     SLUG_UNIQUE = False
24 | 
25 |     def configure_markup_choices(self, value):
26 |         return [
27 |             (key, value[key]["label"])
28 |             for key in value.keys()
29 |         ]
30 | 
31 |     class Meta:
32 |         prefix = "pinax_blog"
33 | 


--------------------------------------------------------------------------------
/blog/templates/atom_entry.xml:
--------------------------------------------------------------------------------
 1 | <entry xml:base="http://{{ current_site.domain }}/">
 2 |     <id>http://{{ current_site.domain }}{{ entry.get_absolute_url }}</id>
 3 |     <title>{{ entry.title }}</title>
 4 |     <link rel="alternate" type="text/html" href="http://{{ current_site.domain }}{{ entry.get_absolute_url }}"/>
 5 | 
 6 |     <updated>{{ entry.updated|date:"Y-m-d\TH:i:s\Z" }}</updated>
 7 |     <published>{{ entry.published|date:"Y-m-d\TH:i:s\Z" }}</published>
 8 | 
 9 |     <author>
10 |          <name>{{ entry.author.get_full_name }}</name>
11 |     </author>
12 | 
13 |     <summary type="xhtml">
14 |         <div xmlns="http://www.w3.org/1999/xhtml">
15 |             {{ entry.teaser_html|safe }}
16 |         </div>
17 |     </summary>
18 | 
19 |     <content type="html" xml:lang="en">
20 |         {{ entry.teaser_html }}
21 |         {{ entry.content_html }}
22 |     </content>
23 | </entry>
24 | 


--------------------------------------------------------------------------------
/blog/parsers/markdown_parser.py:
--------------------------------------------------------------------------------
 1 | from markdown import Markdown
 2 | from markdown.inlinepatterns import ImagePattern, IMAGE_LINK_RE
 3 | 
 4 | from ..models import Image
 5 | 
 6 | 
 7 | class ImageLookupImagePattern(ImagePattern):
 8 | 
 9 |     def sanitize_url(self, url):
10 |         if url.startswith("http"):
11 |             return url
12 |         else:
13 |             try:
14 |                 image = Image.objects.get(pk=int(url))
15 |                 return image.image_path.url
16 |             except Image.DoesNotExist:
17 |                 pass
18 |             except ValueError:
19 |                 return url
20 |         return ""
21 | 
22 | 
23 | def parse(text):
24 |     md = Markdown(extensions=["codehilite", "tables", "smarty", "admonition", "toc"])
25 |     md.inlinePatterns["image_link"] = ImageLookupImagePattern(IMAGE_LINK_RE, md)
26 |     html = md.convert(text)
27 |     return html
28 | 


--------------------------------------------------------------------------------
/core/crawlers/basecrawler.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
 4 | 
 5 | import utils
 6 | from library.models import *
 7 | 
 8 | class BaseCrawler(object):
 9 |     def __init__(self, crawlerStatus, auth = None):
10 |         self.crawlerStatus = crawlerStatus
11 |         self.auth = auth
12 |     # DEF
13 | 
14 |     def search(self):
15 |         raise NotImplementedError("Unimplemented %s" % self.__init__.im_class)
16 |     # DEF
17 | 
18 |     def crawl(self):
19 |         nextResults = self.search()
20 |     ## DEF
21 | 
22 |     def add_repository(self, name, setup_scripts = None):
23 |         raise NotImplementedError("Unimplemented %s" % self.__init__.im_class)
24 |     # DEF
25 | 
26 |     def download_repository(self, repo_name, sha, zip_name):
27 |         raise NotImplementedError("Unimplemented %s" % self.__init__.im_class)
28 |     # DEF
29 | 
30 | ## CLASS


--------------------------------------------------------------------------------
/library/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls import patterns, include, url
 2 | from rest_framework import routers
 3 | import views
 4 | 
 5 | router = routers.DefaultRouter()
 6 | router.register(r'attempt', views.AttemptViewSet, base_name='attempt')
 7 | router.register(r'repository', views.RepositoryViewSet, base_name='repository')
 8 | 
 9 | urlpatterns = patterns('',
10 |     url(r'^api/', include(router.urls)),
11 |     url(r'^api/repositories/', views.RepositoryListView.as_view()),
12 |     url(r'^$', 'library.views.home', name='home'),
13 |     url(r'^repositories/$', 'library.views.repositories', name='repositories'),
14 |     url(r'^repository/(?P<user_name>.+)/(?P<repo_name>.+)/', 'library.views.repository', name='repository'),
15 |     url(r'^attempt/(?P<id>\d+)/', 'library.views.attempt', name='attempt'),
16 |     url(r'^queries/(?P<id>\d+)/', 'library.views.queries', name='queries'),
17 |     url(r'^about/$', 'library.views.about', name='about'),
18 |     url(r'^search/$', 'library.views.search', name='search')
19 | )
20 | 


--------------------------------------------------------------------------------
/vagrant/Vagrantfile_example:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | 
 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
 5 | VAGRANTFILE_API_VERSION = "2"
 6 | 
 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
 8 |     # All Vagrant configuration is done here. The most common configuration
 9 |     # options are documented and commented below. For a complete reference,
10 |     # please see the online documentation at vagrantup.com.
11 | 
12 |     config.vm.box      = 'ubuntu/trusty64'
13 |     config.vm.network :forwarded_port, host: 8100, guest: 8000
14 |     config.vm.network :forwarded_port, host: 3100, guest: 3000
15 |     config.vm.network :forwarded_port, host: 8180, guest: 8080
16 |     config.vm.network :forwarded_port, host: 8181, guest: 8181
17 | 
18 |     config.vm.provider "virtualbox" do |vb|
19 |         vb.memory = 1024
20 |         vb.cpus = 1
21 |     end
22 | 
23 |     config.vm.provision :shell, path: 'bootstrap.sh', keep_color: true
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/blog/utils.py:
--------------------------------------------------------------------------------
 1 | from django.core.exceptions import ImproperlyConfigured
 2 | try:
 3 |     from importlib import import_module
 4 | except ImportError:
 5 |     from django.utils.importlib import import_module
 6 | 
 7 | try:
 8 |     import twitter
 9 | except ImportError:
10 |     twitter = None
11 | 
12 | 
13 | from .conf import settings
14 | 
15 | 
16 | def can_tweet():
17 |     creds_available = (hasattr(settings, "TWITTER_USERNAME") and
18 |                        hasattr(settings, "TWITTER_PASSWORD"))
19 |     return twitter and creds_available
20 | 
21 | 
22 | def load_path_attr(path):
23 |     i = path.rfind(".")
24 |     module, attr = path[:i], path[i + 1:]
25 |     try:
26 |         mod = import_module(module)
27 |     except ImportError as e:
28 |         raise ImproperlyConfigured("Error importing %s: '%s'" % (module, e))
29 |     try:
30 |         attr = getattr(mod, attr)
31 |     except AttributeError:
32 |         raise ImproperlyConfigured("Module '%s' does not define a '%s'" % (module, attr))
33 |     return attr
34 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/less/mixins.less:
--------------------------------------------------------------------------------
 1 | // Mixins
 2 | // --------------------------
 3 | 
 4 | .fa-icon() {
 5 |   display: inline-block;
 6 |   font: normal normal normal 14px/1 FontAwesome; // shortening font declaration
 7 |   font-size: inherit; // can't have font-size inherit on line above, so need to override
 8 |   text-rendering: auto; // optimizelegibility throws things off #1094
 9 |   -webkit-font-smoothing: antialiased;
10 |   -moz-osx-font-smoothing: grayscale;
11 | }
12 | 
13 | .fa-icon-rotate(@degrees, @rotation) {
14 |   filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=@rotation);
15 |   -webkit-transform: rotate(@degrees);
16 |       -ms-transform: rotate(@degrees);
17 |           transform: rotate(@degrees);
18 | }
19 | 
20 | .fa-icon-flip(@horiz, @vert, @rotation) {
21 |   filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=@rotation, mirror=1);
22 |   -webkit-transform: scale(@horiz, @vert);
23 |       -ms-transform: scale(@horiz, @vert);
24 |           transform: scale(@horiz, @vert);
25 | }
26 | 


--------------------------------------------------------------------------------
/library/static/font-awesome/scss/_mixins.scss:
--------------------------------------------------------------------------------
 1 | // Mixins
 2 | // --------------------------
 3 | 
 4 | @mixin fa-icon() {
 5 |   display: inline-block;
 6 |   font: normal normal normal 14px/1 FontAwesome; // shortening font declaration
 7 |   font-size: inherit; // can't have font-size inherit on line above, so need to override
 8 |   text-rendering: auto; // optimizelegibility throws things off #1094
 9 |   -webkit-font-smoothing: antialiased;
10 |   -moz-osx-font-smoothing: grayscale;
11 | }
12 | 
13 | @mixin fa-icon-rotate($degrees, $rotation) {
14 |   filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=#{$rotation});
15 |   -webkit-transform: rotate($degrees);
16 |       -ms-transform: rotate($degrees);
17 |           transform: rotate($degrees);
18 | }
19 | 
20 | @mixin fa-icon-flip($horiz, $vert, $rotation) {
21 |   filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=#{$rotation});
22 |   -webkit-transform: scale($horiz, $vert);
23 |       -ms-transform: scale($horiz, $vert);
24 |           transform: scale($horiz, $vert);
25 | }
26 | 


--------------------------------------------------------------------------------
/scripts/deploy_repo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
 5 | 
 6 | import time
 7 | import traceback
 8 | 
 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | from django.db.models import Q
13 | 
14 | from library.models import *
15 | import utils
16 | 
17 | def main():
18 |     if len(sys.argv) < 3:
19 |         return
20 |     deploy_id = int(sys.argv[1])
21 |     repo_name = sys.argv[2]
22 |     if len(sys.argv) >= 4:
23 |         database_name = sys.argv[3]
24 |     else:
25 |         database_name = 'MySQL'
26 |     database = Database.objects.get(name = database_name)
27 | 
28 |     repo = Repository.objects.get(name = repo_name)
29 |     print 'Attempting to deploy {} using {} ...'.format(repo, repo.project_type.deployer_class)
30 |     try:
31 |         utils.vagrant_deploy(repo, deploy_id, database)
32 |     except:
33 |         traceback.print_exc()
34 | 
35 | if __name__ == '__main__':
36 |     main()
37 | 


--------------------------------------------------------------------------------
/core/drivers/extract/driver/spiders/url.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import scrapy
 4 | from scrapy.spiders import CrawlSpider, Rule
 5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
 6 | 
 7 | from driver.items import UrlItem
 8 | 
 9 | class UrlSpider(CrawlSpider):
10 |     name = "url"
11 |     allowed_domains = ["127.0.0.1"]
12 | 
13 |     def __init__(self, *args, **kwargs):
14 |         super(UrlSpider, self).__init__(*args, **kwargs)
15 | 
16 |         self.start_urls = [kwargs.get('start_url')]
17 | 
18 |         follow = True if kwargs.get('follow') == 'true' else False
19 |         self.rules = (
20 |             Rule (SgmlLinkExtractor(allow=('')), callback='parse_url', follow=follow),
21 |         )
22 |         super(UrlSpider, self)._compile_rules()
23 | 
24 |         try:
25 |             proxy = kwargs.get('proxy')
26 |             service_args = [
27 |                 '--proxy=' + proxy,
28 |                 '--proxy-type=http',
29 |             ]
30 |         except:
31 |             service_args = None
32 | 
33 |     def parse_url(self, response):
34 |         urlItem = UrlItem()
35 |         urlItem['url'] = response.url
36 |         yield urlItem
37 | 


--------------------------------------------------------------------------------
/blog/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls import url, patterns
 2 | 
 3 | from .conf import settings
 4 | from .views import (
 5 |     BlogIndexView,
 6 |     DateBasedPostDetailView,
 7 |     SecretKeyPostDetailView,
 8 |     SectionIndexView,
 9 |     SlugUniquePostDetailView,
10 |     StaffPostDetailView
11 | )
12 | 
13 | 
14 | urlpatterns = patterns(
15 |     "blog.views",
16 |     url(r"^$", BlogIndexView.as_view(), name="blog"),
17 |     url(r"^section/(?P<section>[-\w]+)/$", SectionIndexView.as_view(), name="blog_section"),
18 |     url(r"^post/(?P<post_pk>\d+)/$", StaffPostDetailView.as_view(), name="blog_post_pk"),
19 |     url(r"^post/(?P<post_secret_key>\w+)/$", SecretKeyPostDetailView.as_view(), name="blog_post_secret"),
20 |     url(r"^feed/(?P<section>[-\w]+)/(?P<feed_type>[-\w]+)/$", "blog_feed", name="blog_feed"),
21 | )
22 | 
23 | 
24 | if settings.PINAX_BLOG_SLUG_UNIQUE:
25 |     urlpatterns += patterns(
26 |         "",
27 |         url(r"^(?P<post_slug>[-\w]+)/$", SlugUniquePostDetailView.as_view(), name="blog_post_slug")
28 |     )
29 | else:
30 |     urlpatterns += patterns(
31 |         "",
32 |         url(r"^(?P<year>\d{4})/(?P<month>\d{2})/(?P<day>\d{2})/(?P<slug>[-\w]+)/$", DateBasedPostDetailView.as_view(), name="blog_post"),
33 |     )
34 | 


--------------------------------------------------------------------------------
/scripts/remove_attempts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
 5 | 
 6 | import datetime
 7 | import traceback
 8 | 
 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | from django.db.models import Q
13 | 
14 | from library.models import *
15 | import utils
16 | 
17 | def remove_attempt(attempt):
18 |     if attempt.repo.latest_attempt == attempt:
19 |         attempt.repo.latest_attempt = None
20 |     attempt.delete()
21 | 
22 | def remove_unuseful_attempts():
23 |     reference_time = datetime.datetime.strptime('2016-01-01', '%Y-%m-%d')
24 | 
25 |     for repo in Repository.objects.all():
26 |         if repo.latest_successful_attempt == None:
27 |             for attempt in Attempt.objects.filter(repo = repo).exclude(result = 'OK'):
28 |                 if attempt.stop_time < reference_time:
29 |                     remove_attempt(attempt)
30 |         else:
31 |             for attempt in Attempt.objects.filter(repo = repo).exclude(result = 'OK'):
32 |                 remove_attempt(attempt)
33 | 
34 | def main():
35 |     remove_unuseful_attempts()
36 | 
37 | if __name__ == '__main__':
38 |     main()
39 | 


--------------------------------------------------------------------------------
/library/fixtures/projecttype.json:
--------------------------------------------------------------------------------
 1 | [
 2 | {
 3 |   "pk": 1,
 4 |   "model": "library.projecttype",
 5 |   "fields": {
 6 |     "logo": "img/django.png",
 7 |     "deployer_class": "DjangoDeployer",
 8 |     "default_port": 8000,
 9 |     "name": "Django",
10 |     "filename": "models.py"
11 |   }
12 | },
13 | {
14 |   "pk": 2,
15 |   "model": "library.projecttype",
16 |   "fields": {
17 |     "logo": "img/ruby_on_rails.png",
18 |     "deployer_class": "RoRDeployer",
19 |     "default_port": 3000,
20 |     "name": "Ruby on Rails",
21 |     "filename": "database.yml"
22 |   }
23 | },
24 | {
25 |   "pk": 3,
26 |   "model": "library.projecttype",
27 |   "fields": {
28 |     "logo": "img/nodejs.png",
29 |     "deployer_class": "NodeDeployer",
30 |     "default_port": 8080,
31 |     "name": "Node.js",
32 |     "filename": "package.json"
33 |   }
34 | },
35 | {
36 |   "pk": 4,
37 |   "model": "library.projecttype",
38 |   "fields": {
39 |     "logo": "img/drupal.png",
40 |     "deployer_class": "DrupalDeployer",
41 |     "default_port": 8181,
42 |     "name": "Drupal",
43 |     "filename": "install.php"
44 |   }
45 | },
46 | {
47 |   "pk": 5,
48 |   "model": "library.projecttype",
49 |   "fields": {
50 |     "logo": "img/drupal.png",
51 |     "deployer_class": "GrailsDeployer",
52 |     "default_port": 8080,
53 |     "name": "Grails",
54 |     "filename": "application.properties"
55 |   }
56 | }
57 | ]
58 | 


--------------------------------------------------------------------------------
/core/drivers/extract/driver/spiders/url_with_cookie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import scrapy
 4 | from scrapy.spiders import CrawlSpider, Rule
 5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
 6 | import cookielib
 7 | 
 8 | from driver.items import UrlItem
 9 | 
10 | class UrlWithCookieSpider(CrawlSpider):
11 |     name = "url_with_cookie"
12 |     allowed_domains = ["127.0.0.1"]
13 | 
14 |     def __init__(self, *args, **kwargs):
15 |         super(UrlWithCookieSpider, self).__init__(*args, **kwargs)
16 | 
17 |         self.start_urls = [kwargs.get('start_url')]
18 |         self.cookiejar = cookielib.LWPCookieJar()
19 |         self.cookiejar.load(kwargs.get('cookie_jar'))
20 | 
21 |         self.rules = (
22 |             Rule (SgmlLinkExtractor(allow=('')), callback='parse_url', follow=True, process_request='add_cookie_for_request'),
23 |         )
24 |         super(UrlWithCookieSpider, self)._compile_rules()
25 | 
26 |     def add_cookie_for_request(self, request):
27 |         for cookie in self.cookiejar:
28 |             request.cookies[cookie.name] = cookie.value
29 |         logout_patterns = ['logout', 'log-out', 'log_out']
30 |         if any(logout_pattern in request.url for logout_pattern in logout_patterns):
31 |             return None
32 |         return request
33 | 
34 |     def parse_url(self, response):
35 |         urlItem = UrlItem()
36 |         urlItem['url'] = response.url
37 |         yield urlItem


--------------------------------------------------------------------------------
/core/utils/run.py:
--------------------------------------------------------------------------------
 1 | from subprocess import PIPE, Popen
 2 | from multiprocessing import Pool
 3 | import time
 4 | import traceback
 5 | 
 6 | def get_process_children(pid):
 7 |     p = Popen('ps --no-headers -o pid --ppid %d' % pid, shell = True, stdout = PIPE, stderr = PIPE)
 8 |     stdout, stderr = p.communicate()
 9 |     return [int(p) for p in stdout.split()]
10 | 
11 | def run(args, cwd = None, shell = True, env = None, inputs = None):
12 |     '''
13 |     Run a command
14 |     '''
15 |     p = Popen(args, shell = shell, executable = '/bin/bash', stdin = PIPE, stdout = PIPE, stderr = PIPE, cwd = cwd, env = env)
16 |     stdout, stderr = '', ''
17 |     if inputs != None:
18 |         for input in inputs:
19 |             try:
20 |                 time.sleep(5)
21 |                 p.stdin.write(input)
22 |             except:
23 |                 # traceback.print_exc()
24 |                 pass
25 |     stdout, stderr = p.communicate()
26 |     return p.returncode, stdout, stderr
27 | 
28 | def run_command(command, timeout=0, input=None, cwd=None):
29 |     if timeout > 0:
30 |         commands = command.split('&&')
31 |         commands[-1] = 'timeout {} {}'.format(timeout, commands[-1])
32 |         command = '&& '.join(commands)
33 |     return run(command, inputs = input, cwd = cwd)
34 | 
35 | def run_command_async(command, timeout=0, input=None, cwd=None):
36 |     pool = Pool(processes=1)
37 |     return pool.apply_async(run_command, [command, timeout, input, cwd]), pool


--------------------------------------------------------------------------------
/scripts/crawl_repos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
 5 | 
 6 | import time
 7 | import logging
 8 | logging.basicConfig(filename='repo_crawler.log',level=logging.DEBUG)
 9 | import json
10 | import traceback
11 | 
12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
13 | import django
14 | django.setup()
15 | 
16 | import crawlers
17 | from library.models import *
18 | 
19 | def main():
20 |     if len(sys.argv) != 2:
21 |         return
22 |     project_id = int(sys.argv[1])
23 | 
24 |     try:
25 |         with open(os.path.join(os.path.dirname(__file__), os.pardir, "secrets", "secrets.json"), 'r') as auth_file:
26 |             auth = json.load(auth_file)
27 |     except:
28 |         auth = None
29 | 
30 |     while True:
31 |         cs = CrawlerStatus.objects.get(id = project_id)
32 |         repo_source = cs.source
33 |         project_type = cs.project_type
34 | 
35 |         moduleName = "crawlers.%s" % (repo_source.crawler_class.lower())
36 |         moduleHandle = __import__(moduleName, globals(), locals(), [repo_source.crawler_class])
37 |         klass = getattr(moduleHandle, repo_source.crawler_class)
38 |         crawler = klass(cs, auth)
39 | 
40 |         try:
41 |             crawler.crawl()
42 |         except:
43 |             traceback.print_exc()
44 |         time.sleep(10)
45 |     ## WHILE
46 | ## IF
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()


--------------------------------------------------------------------------------
/scripts/run_driver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
 5 | 
 6 | import json
 7 | import traceback
 8 | 
 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | 
13 | from drivers import *
14 | 
15 | def main():
16 |     if len(sys.argv) < 2:
17 |         return
18 |     main_url = sys.argv[1]
19 |     if len(sys.argv) >= 3:
20 |         database_name = sys.argv[2]
21 |     else:
22 |         database_name = 'MySQL'
23 |     database = Database.objects.get(name = database_name)
24 | 
25 |     print 'Driving ...'
26 |     base_driver = BaseDriver(main_url, database, 'test')
27 |     try:
28 |         driverResult = base_driver.drive()
29 |     except:
30 |         traceback.print_exc()
31 |         driverResult = {}
32 | 
33 |     print 'Random Walking ...'
34 | 
35 |     try:
36 |         random_driver = RandomDriver(base_driver)
37 |         random_driver.submit_forms()
38 |         print random_driver.forms
39 |         for form in random_driver.forms:
40 |             if any(random_driver.equal_form(form, ret_form) for ret_form in driverResult['forms']):
41 |                 continue
42 |             driverResult['forms'].append(form)
43 |     except Exception, e:
44 |         traceback.print_exc()
45 | 
46 |     print 'Driver Results:'
47 |     print json.dumps(driverResult, indent=4, sort_keys=True)
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/scripts/crawl_repo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
 5 | 
 6 | import time
 7 | import logging
 8 | logging.basicConfig(filename='repo_crawler.log',level=logging.DEBUG)
 9 | import json
10 | import traceback
11 | 
12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
13 | import django
14 | django.setup()
15 | 
16 | import crawlers
17 | from library.models import *
18 | import utils
19 | 
20 | def add_module():
21 |     if len(sys.argv) != 5:
22 |         return
23 |     module_name = sys.argv[1]
24 |     package_name = sys.argv[2]
25 |     package_type_id = sys.argv[3]
26 |     package_version = sys.argv[4]
27 |     try:
28 |         utils.add_module(module_name, package_name, package_type_id, package_version)
29 |         print 'Successfully added new module {}'.format(module_name)
30 |     except:
31 |         print 'Failed to add new module {}'.format(repo_name)
32 |         traceback.print_exc()
33 | 
34 | def add_repository():
35 |     if len(sys.argv) != 3:
36 |         return
37 |     repo_name = sys.argv[1]
38 |     repo_type_id = sys.argv[2]
39 |     try:
40 |         utils.add_repo(repo_name, repo_type_id, None)
41 |         print 'Successfully added new repository {}'.format(repo_name)
42 |     except:
43 |         print 'Failed to add new repository {}'.format(repo_name)
44 |         traceback.print_exc()
45 | 
46 | def main():
47 |     # add_module()
48 |     add_repository()
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/analysis/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Author: Zeyuan Shang
 3 | # @Date:   2016-03-21 01:05:00
 4 | # @Last Modified by:   Zeyuan Shang
 5 | # @Last Modified time: 2016-08-15 23:13:45
 6 | import os
 7 | import csv
 8 | import pickle
 9 | 
10 | COMMITS_COUNT_THRESHOLD = 10
11 | 
12 | def filter_repository(repo):
13 |     if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD:
14 |         return True
15 |     return False
16 | 
17 | def dump_stats(directory, description, values):
18 |     with open(os.path.join(directory, description + '.csv'), 'wb') as csv_file:
19 |         writer = csv.writer(csv_file)
20 |         writer.writerow([description])
21 |         for label, stats in values.iteritems():
22 |             if isinstance(stats, list):
23 |                 for i in stats:
24 |                     writer.writerow([label, i])
25 |             elif isinstance(stats, dict):
26 |                 for key, value in stats.iteritems():
27 |                     if isinstance(value, list):
28 |                         for second_value in value:
29 |                             writer.writerow([label, key, second_value])
30 |                     else:
31 |                         writer.writerow([label, key, value])
32 |             else:
33 |                 writer.writerow([label, stats])
34 | 
35 | def dump_all_stats(directory, all_stats):
36 |     for description in all_stats:
37 |         dump_stats(directory, description, all_stats[description])
38 | 
39 | def pickle_dump(directory, description, data):
40 |     with open(os.path.join(directory, description + '.pkl'), 'wb') as pickle_file:
41 |         pickle.dump(data, pickle_file)


--------------------------------------------------------------------------------
/library/fixtures/crawlerstatus.json:
--------------------------------------------------------------------------------
 1 | [
 2 | {
 3 |   "pk": 1,
 4 |   "model": "library.crawlerstatus",
 5 |   "fields": {
 6 |     "last_crawler_time": "2015-01-16T15:59:45",
 7 |     "source": 1,
 8 |     "project_type": 1,
 9 |     "next_url": "",
10 |     "min_size": 100,
11 |     "cur_size": 100,
12 |     "max_size": 20000,
13 |     "query": "django"
14 |   }
15 | },
16 | {
17 |   "pk": 2,
18 |   "model": "library.crawlerstatus",
19 |   "fields": {
20 |     "last_crawler_time": "2015-01-16T15:58:19",
21 |     "source": 1,
22 |     "project_type": 2,
23 |     "next_url": "",
24 |     "min_size": 100,
25 |     "cur_size": 100,
26 |     "max_size": 10000,
27 |     "query": ""
28 |   }
29 | },
30 | {
31 |   "pk": 3,
32 |   "model": "library.crawlerstatus",
33 |   "fields": {
34 |     "last_crawler_time": "2015-01-16T15:58:19",
35 |     "source": 1,
36 |     "project_type": 3,
37 |     "next_url": "",
38 |     "min_size": 100,
39 |     "cur_size": 100,
40 |     "max_size": 10000,
41 |     "query": "mysql"
42 |   }
43 | },
44 | {
45 |   "pk": 4,
46 |   "model": "library.crawlerstatus",
47 |   "fields": {
48 |     "last_crawler_time": "2015-01-16T15:58:19",
49 |     "source": 2,
50 |     "project_type": 4,
51 |     "next_url": "",
52 |     "min_size": 100,
53 |     "cur_size": 100,
54 |     "max_size": 10000,
55 |     "query": "DRUPAL_ROOT"
56 |   }
57 | },
58 | {
59 |   "pk": 5,
60 |   "model": "library.crawlerstatus",
61 |   "fields": {
62 |     "last_crawler_time": "2015-01-16T15:58:19",
63 |     "source": 1,
64 |     "project_type": 5,
65 |     "next_url": "",
66 |     "min_size": 100,
67 |     "cur_size": 100,
68 |     "max_size": 10000,
69 |     "query": "grails"
70 |   }
71 | }
72 | ]
73 | 


--------------------------------------------------------------------------------
/library/templates/search.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% load staticfiles %}
 3 | 
 4 | {% block title %}Repositories &raquo; {{ block.super }}{% endblock %}
 5 | 
 6 | {% block main %}
 7 | {% include "status/attempt_status_codes.html" %}
 8 | {% include "admin/add_module.html" %}
 9 | {% include "admin/add_repository.html" %}
10 | 
11 | {% if messages %}
12 | <div>
13 | <ul class="messages">
14 |     {% for message in messages %}
15 |         {% if message.tags == 'success' %}
16 |             <div class="alert alert-success" role="alert">{{message}}</div>
17 |         {% endif %}
18 |         {% if message.tags == 'error' %}
19 |             <div class="alert alert-danger" role="alert">{{message}}</div>
20 |         {% endif %}
21 |     {% endfor %}
22 | </ul>
23 | {% endif %}
24 | 
25 | <h2 class="page-header">Repositories</h2>
26 | 
27 | <ul id="pagination1" class="sync-pagination pagination-sm pagination"></ul>
28 | 
29 | <form action="{% url 'repositories' %}" method="get">
30 |     <div style="float:left">
31 |     {{ result_form }}
32 |     </div>
33 | 
34 |     <div style="float:left">
35 |     {{ type_form }}
36 |     </div>
37 | 
38 |     <div style="float:left">
39 |         <button type="button" data-toggle="collapse" data-target="#advanced-search" aria-expanded="false">Advanced</button>
40 | 
41 |         <div id="advanced-search" class="collapse">
42 |            {{ statistics_form.as_ul }}
43 |         </div>
44 |     </div>
45 | 
46 |     <div style="float:right">
47 |         <b>Name:</b>
48 |         <input type="Search" placeholder='Search by full name' value='{{ search }}' class="form-control" name='search'/>
49 |         <div style="margin: 20px">
50 |             <input class="btn btn-primary btn-lg" type="submit" value="Filter"/>
51 |         </div>
52 |     </div>
53 | </form>
54 | 
55 | {% endblock %}
56 | 


--------------------------------------------------------------------------------
/library/serializers.py:
--------------------------------------------------------------------------------
 1 | from models import *
 2 | from rest_framework import serializers
 3 | 
 4 | class RepositorySerializer(serializers.ModelSerializer):
 5 |     class Meta:
 6 |         model = Repository
 7 | 
 8 | class RuntimeSerializer(serializers.ModelSerializer):
 9 |     class Meta:
10 |         model = Runtime
11 | 
12 | class DatabaseSerializer(serializers.ModelSerializer):
13 |     class Meta:
14 |         model = Database
15 | 
16 | class PackageSerializer(serializers.ModelSerializer):
17 |     class Meta:
18 |         model = Package
19 | 
20 | class DependencySerializer(serializers.ModelSerializer):
21 |     package_info = PackageSerializer(source='package')
22 | 
23 |     class Meta:
24 |         model = Dependency
25 |         fields = ('id', 'source', 'attempt', 'package_info')
26 | 
27 | class FieldSerializer(serializers.ModelSerializer):
28 |     class Meta:
29 |         model = Field
30 | 
31 | class QuerySerializer(serializers.ModelSerializer):
32 |     class Meta:
33 |         model = Query
34 | 
35 | class ActionSerializer(serializers.ModelSerializer):
36 |     fields = FieldSerializer(many=True, read_only=True)
37 |     queries = QuerySerializer(many=True, read_only=True)
38 |     class Meta:
39 |         model = Action
40 | 
41 | 
42 | class AttemptSerializer(serializers.ModelSerializer):
43 |     repo_info = RepositorySerializer(source='repo')
44 |     runtime_info = RuntimeSerializer(source='runtime')
45 |     database_info = DatabaseSerializer(source='database')
46 |     dependencies = DependencySerializer(source='dependency_set', many=True)
47 |     actions = ActionSerializer(many=True, read_only = True)
48 | 
49 |     class Meta:
50 |         model = Attempt
51 |         fields = ('id', 'start_time', 'stop_time', 'repo_info', 'sha', 'size', 'log', 'hostname',
52 |                   'runtime_info', 'database_info', 'result', 'register', 'login', 'actions_count', 'queries_count',
53 |                   'dependencies', 'actions'
54 |             )


--------------------------------------------------------------------------------
/core/analyzers/baseanalyzer.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 3 | 
 4 | import logging
 5 | 
 6 | ## =====================================================================
 7 | ## LOGGING CONFIGURATION
 8 | ## =====================================================================
 9 | LOG = logging.getLogger()
10 | 
11 | ## =====================================================================
12 | ## BASE ANALYZER
13 | ## =====================================================================
14 | class BaseAnalyzer(object):
15 | 
16 |     def __init__(self, deployer):
17 |         self.queries_stats = {}
18 |         self.database_stats = {}
19 |         self.database_informations = {}
20 |         self.deployer = deployer
21 | 
22 |     def is_valid_for_explain(self, query):
23 |         if not query:
24 |             return False
25 |         prefixes = ['show', 'begin', 'end', 'commit', 'set']
26 |         lowered_query = query.lower()
27 |         if any(lowered_query.startswith(prefix) for prefix in prefixes):
28 |             return False
29 |         return True
30 | 
31 |     def count_transaction(self, queries):
32 |         transaction = False
33 |         transaction_count = 0
34 |         for query in queries:
35 |             if 'BEGIN' in query['content'].upper() or 'START TRANSACTION' in query['content'].upper():
36 |                 transaction = True
37 |             elif transaction:
38 |                 if 'COMMIT' in query['content'].upper():
39 |                     # for each transaction, count the number of transactions
40 |                     transaction_count += 1
41 |                     transaction = False
42 |         return transaction_count
43 | 
44 |     def analyze_queries(self, queries):
45 |         raise NotImplementedError("Unimplemented %s" % self.__init__.im_class)
46 | 
47 |     def analyze_database(self):
48 |         raise NotImplementedError("Unimplemented %s" % self.__init__.im_class)


--------------------------------------------------------------------------------
/core/drivers/benchmarkdriver.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 3 | 
 4 | import logging
 5 | import requests
 6 | import re
 7 | import copy
 8 | import traceback
 9 | import requests
10 | import mechanize
11 | 
12 | from library.models import *
13 | from cmudbac.settings import *
14 | import utils
15 | import extract
16 | import submit
17 | import count
18 | from basedriver import BaseDriver
19 | 
20 | ## =====================================================================
21 | ## LOGGING CONFIGURATION
22 | ## =====================================================================
23 | LOG = logging.getLogger()
24 | 
25 | ## =====================================================================
26 | ## BENCHMARK DRIVER
27 | ## =====================================================================
28 | class BenchmarkDriver(BaseDriver):
29 | 
30 |     def __init__(self, driver):
31 |         BaseDriver.__init__(self, driver.deployer)
32 |         self.forms = driver.forms
33 |         self.urls = driver.urls
34 |         self.browser = mechanize.Browser()
35 |         if driver.browser != None:
36 |             self.browser.set_cookiejar(driver.browser._ua_handlers['_cookies'].cookiejar)
37 |         self.browser.set_handle_robots(False)
38 | 
39 |     def submit_actions(self):
40 |         actions_cnt = 0
41 |         for form, browser_index in self.forms:
42 |             try:
43 |                 if browser_index == 0:
44 |                     submit.fill_form_random(self.deployer.base_path, form, self.browser)
45 |                 else:
46 |                     submit.fill_form_random(self.deployer.base_path, form, None)
47 |             except:
48 |                 pass
49 |             actions_cnt += 1
50 |         for url in self.urls:
51 |             try:
52 |                 submit.query_url(url, self.browser)
53 |             except:
54 |                 pass
55 |             actions_cnt += 1
56 |         return actions_cnt
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/library/templates/admin/add_module.html:
--------------------------------------------------------------------------------
 1 | <div class="modal fade" id="addModuleModal">
 2 |     <div class="modal-dialog">
 3 |         <div class="modal-content">
 4 |             <div class="modal-header">
 5 |                 <button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button>
 6 |                 <h4 class="modal-title">Add Repository</h4>
 7 |             </div>
 8 |             <div class="modal-body">
 9 |                 <form action="{% url 'repositories' %}" method="get" role="form">
10 |                     <div class="form-group">
11 |                         <label for="module">Module Name:</label>
12 |                         <input type="text" class="form-control" name="module">
13 |                     </div>
14 |                     <div class="form-group">
15 |                         <label for="package">Package Name:</label>
16 |                         <input type="text" class="form-control" name="package">
17 |                     </div>
18 |                     <div class="form-group">
19 |                         <label for="type">Type:</label><br/>
20 |                         <label class="radio-inline"><input type="radio" name="type" value="django">Django</label>
21 |                     </div>
22 |                     <div class="form-group">
23 |                         <label for="version">Version:</label>
24 |                         <input type="text" class="form-control" name="version">
25 |                     </div>
26 |                     <div class="form-group">
27 |                         <div style="margin: 10px; text-align: center;">
28 |                             <input class="btn btn-primary btn-lg" type="submit" value="Add"/>
29 |                         </div>
30 |                     </div>
31 |                 </form>
32 |             </div>
33 |             <div class="modal-footer">
34 |                 <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
35 |             </div>
36 |         </div><!-- /.modal-content -->
37 |     </div><!-- /.modal-dialog -->
38 | </div><!-- /.modal -->


--------------------------------------------------------------------------------
/core/drivers/submit/register.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 3 | 
 4 | import re
 5 | from urlparse import urlparse
 6 | 
 7 | import extract
 8 | from patterns import patterns, match_any_pattern
 9 | from submit import fill_form
10 | 
11 | def get_register_form(forms):
12 | 	register_patterns = ['register', 'signup', 'sign-up', 'sign_up']
13 | 	for form in forms:
14 | 		if 'method' in form and form['method'] != 'post':
15 | 			continue
16 | 		if match_any_pattern(form['action'], register_patterns):
17 | 			return form
18 | 		if match_any_pattern(form['url'], register_patterns):
19 | 			return form
20 | 		if match_any_pattern(form.get('id', ''), register_patterns):
21 | 			return form
22 | 	return None
23 | 
24 | def verify_email(deploy_path, form, matched_patterns):
25 | 	email_file = None
26 | 	for log_file in os.listdir(deploy_path):
27 | 		if log_file.endswith('.log'):
28 | 			email_file = log_file
29 | 			break
30 | 	if not email_file:
31 | 		return matched_patterns, None
32 | 
33 | 	email_content = open(os.path.join(deploy_path, email_file)).read()
34 | 	verify_url = re.search('http://.+', email_content)
35 | 	if not verify_url:
36 | 		return matched_patterns, None
37 | 	verify_url = urlparse(verify_url.group(0))._replace(netloc = urlparse(form['url']).netloc)
38 | 	verify_url = verify_url.geturl()
39 | 	
40 | 	verify_forms = extract.extract_forms(verify_url)
41 | 	for verify_form in verify_forms:
42 | 		verify_form['url'] = verify_url
43 | 		matched_patterns, inputs, response, br = fill_form(verify_form, matched_patterns)
44 | 
45 | 	return matched_patterns, inputs
46 | 
47 | def register(deploy_path, forms):
48 | 	register_form = get_register_form(forms)
49 | 	print 'Register form: {}'.format(register_form)
50 | 	if register_form == None:
51 | 		return None, None, None
52 | 	
53 | 	matched_patterns, inputs, response, br = fill_form(register_form)
54 | 
55 | 	if 'email' in matched_patterns:
56 | 		matched_patterns, part_inputs = verify_email(deploy_path, register_form, matched_patterns)
57 | 		if part_inputs != None:
58 | 			inputs.update(part_inputs)
59 | 
60 | 	return register_form, matched_patterns, inputs
61 | 


--------------------------------------------------------------------------------
/core/utils/pip.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os.path import expanduser
 3 | 
 4 | from run import run_command
 5 | from file import cd
 6 | 
 7 | HOME_DIR = expanduser('~')
 8 | 
 9 | def home_path(path):
10 |     return os.path.join(HOME_DIR, path)
11 | 
12 | def configure_env(path):
13 |     command = 'virtualenv --no-site-packages {}'.format(path)
14 |     return run_command(command)
15 | 
16 | def to_env(path):
17 |     return '{} && {}'.format(cd(path), 'source bin/activate')
18 | 
19 | def pip_install(path, names, is_file, has_version = True):
20 |     command = '{} && pip --no-cache-dir install'.format(to_env(path))
21 | 
22 |     proxy = os.environ.get('http_proxy')
23 |     if proxy:
24 |         command = '{} --proxy {} '.format(command, proxy)
25 |     if is_file:
26 |         filename = home_path(names)
27 |         command = '{} -r {}'.format(command, filename)
28 |     else:
29 |         for name in names:
30 |             if isinstance(name, dict):
31 |                 if name.get('version', ''):
32 |                     command = '{} {}=={} '.format(command, name['name'], name['version'])
33 |                 else:
34 |                     command = '{} {}'.format(command, name['name'])
35 |             else:
36 |                 if has_version and name.version != None and name.version != '':
37 |                     command = '{} {}=={} '.format(command, name.name, name.version)
38 |                 elif name.name == 'django':
39 |                     command = '{} {}==1.8.4'.format(command, name.name)
40 |                 else:
41 |                     command = '{} {}'.format(command, name.name)
42 |     out = run_command(command)
43 | 
44 |     return out
45 | 
46 | def pip_install_text(path, name):
47 |     command = '{} && pip --no-cache-dir install'.format(to_env(path))
48 | 
49 |     proxy = os.environ.get('http_proxy')
50 |     if proxy:
51 |         command = '{} --proxy {} '.format(command, proxy)
52 |     command = '{} {} '.format(command, name)
53 |     out = run_command(command)
54 | 
55 |     return out
56 | 
57 | def pip_freeze(path):
58 |     out = run_command('{} && pip freeze'.format(to_env(path)))
59 |     out = out[1].strip().splitlines()
60 |     out = [line for line in out if not ' ' in line and '==' in line]
61 |     return out
62 | 


--------------------------------------------------------------------------------
/library/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | from models import *
 3 | 
 4 | class DependencyInline(admin.StackedInline):
 5 |     model = Dependency
 6 |     extra = 3
 7 | 
 8 | class ProjectTypeAdmin(admin.ModelAdmin):
 9 |     list_display = [ 'name', 'filename', 'deployer_class' ]
10 | ## CLASS
11 | 
12 | class RepositorySourceAdmin(admin.ModelAdmin):
13 |     list_display = [ 'name', 'crawler_class', 'base_url', 'commit_url', 'search_token', ]
14 | ## CLASS
15 | 
16 | class CrawlerStatusAdmin(admin.ModelAdmin):
17 |     list_display = [ 'id', 'source', 'project_type', 'next_url', 'last_crawler_time', ]
18 | ## CLASS
19 | 
20 | class RepositoryAdmin(admin.ModelAdmin):
21 |     list_display = [ 'id', 'name', 'valid_project', 'get_project_type', 'source', 'commits_count', 'description', 'crawler_date', 'updated_date' ]
22 |     list_filter = ['project_type', 'valid_project', 'crawler_date', 'updated_date']
23 |     fieldsets = [
24 |         (None,               {'fields': ['name', 'project_type', 'source', 'description']}),
25 |         ('Date information', {'fields': ['created_at', 'updated_at', 'pushed_at']}),
26 |     ]
27 | 
28 |     def get_project_type(self, obj):
29 |         return obj.project_type.name
30 | 
31 |     get_project_type.short_description = 'Project Type'
32 | # CLASS
33 | 
34 | class AttemptAdmin(admin.ModelAdmin):
35 |     list_display = [ 'id', 'repo', 'result_name', 'start_time', 'stop_time' ]
36 |     list_filter = ['result', 'start_time']
37 |     raw_id_fields = [ 'repo' ]
38 |     #inlines = [DependencyInline]
39 | # CLASS
40 | 
41 | class PackageAdmin(admin.ModelAdmin):
42 |     list_display = [ 'name', 'project_type', 'version', 'count' ]
43 |     list_filter = ['project_type']
44 | # CLASS
45 | 
46 | # Register your models here.
47 | admin.site.register(ProjectType, ProjectTypeAdmin)
48 | admin.site.register(RepositorySource, RepositorySourceAdmin)
49 | admin.site.register(CrawlerStatus, CrawlerStatusAdmin)
50 | admin.site.register(Database)
51 | 
52 | admin.site.register(Repository, RepositoryAdmin)
53 | admin.site.register(Package, PackageAdmin)
54 | admin.site.register(Dependency)
55 | admin.site.register(Attempt, AttemptAdmin)
56 | admin.site.register(Module)
57 | admin.site.register(WebStatistic)
58 | admin.site.register(Statistic)
59 | 


--------------------------------------------------------------------------------
/library/static/md/tools.md:
--------------------------------------------------------------------------------
 1 | # Command Line Tool Tutorial
 2 | 
 3 | ### Install Requirements
 4 | To Start with, please install the required Python packages for running the command line tool. You can install them by pip as following
 5 | ```sh
 6 | $ pip install requests
 7 | ```
 8 | 
 9 | Please add enough permission for the main.py file as following
10 | ```sh
11 | $ chmod +x main.py
12 | ```
13 | 
14 | ### Get Attempt Information
15 | You can get the sufficient information of an attempt by running this command:
16 | ```sh
17 | $ ./main.py info -attempt ATTEMPT
18 | ```
19 | where *ATTEMPT* is the id of the attempt you want to inquire.
20 | 
21 | If you want to know more information, you can type this command to get a help message:
22 | ```sh
23 | $ ./main.py info -h
24 | ```
25 | 
26 | ### Running Benchmark
27 | A lot of arguments are required to run the benchmark. You can type this command to get the full information:
28 | ```sh
29 | ./main.py benchmark -h
30 | ```
31 | We have provide you with a comprehensive illustraions about the arguments:
32 | ```sh
33 | usage: main.py benchmark [-h] [-attempt ATTEMPT] [-database DATABASE] [-host HOST] [-port PORT] [-name NAME] [-username USERNAME] [-password PASSWORD] [-num_threads NUM_THREADS] [-timeout TIMEOUT]
34 | 
35 | optional arguments:
36 |   -h, --help            show this help message and exit
37 |   -attempt ATTEMPT, --attempt ATTEMPT
38 |                         the id of the attempt
39 |   -database DATABASE, --database DATABASE
40 |                         the database you are using, e.g. mysql
41 |   -host HOST, --host HOST
42 |                         the host address of your database server
43 |   -port PORT, --port PORT
44 |                         the port of your database server
45 |   -name NAME, --name NAME
46 |                         the name of your database
47 |   -username USERNAME, --username USERNAME
48 |                         the username of your database server
49 |   -password PASSWORD, --password PASSWORD
50 |                         the password of your database server
51 |   -num_threads NUM_THREADS, --num_threads NUM_THREADS
52 |                         the number of threads you want to use to submit forms
53 |   -timeout TIMEOUT, --timeout TIMEOUT
54 |                         the timeout for submitting forms
55 | ```
56 | 
57 | Then you can see the results if the arguments are correctly provided.
58 | 


--------------------------------------------------------------------------------
/library/templates/admin/add_repository.html:
--------------------------------------------------------------------------------
 1 | <div class="modal fade" id="addRepositoryModal">
 2 |     <div class="modal-dialog">
 3 |         <div class="modal-content">
 4 |             <div class="modal-header">
 5 |                 <button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button>
 6 |                 <h4 class="modal-title">Add Repository</h4>
 7 |             </div>
 8 |             <div class="modal-body">
 9 |                 <form action="{% url 'repositories' %}" method="get" role="form">
10 |                     <div class="form-group">
11 |                         <label for="repo">URL:</label>
12 |                         <input type="text" class="form-control" name="repo">
13 |                     </div>
14 |                     <div class="form-group">
15 |                         <label for="type">Type:</label><br/>
16 |                         <label class="radio-inline"><input type="radio" name="type" value="django">Django</label>
17 |                         <label class="radio-inline"><input type="radio" name="type" value="ror">Ruby on Rails</label>
18 |                         <label class="radio-inline"><input type="radio" name="type" value="node">Node.js</label>
19 |                         <label class="radio-inline"><input type="radio" name="type" value="drupal">Drupal</label>
20 |                         <label class="radio-inline"><input type="radio" name="type" value="grails">Grails</label>
21 |                     </div>
22 |                     <div class="form-group">
23 |                         <label for="scripts">Scripts:</label>
24 |                         <textarea class="form-control" rows="10" id="scripts" name="scripts"></textarea>
25 |                     </div>
26 |                     <div class="form-group">
27 |                         <div style="margin: 10px; text-align: center;">
28 |                             <input class="btn btn-primary btn-lg" type="submit" value="Add"/>
29 |                         </div>
30 |                     </div>
31 |                 </form>
32 |             </div>
33 |             <div class="modal-footer">
34 |                 <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
35 |             </div>
36 |         </div><!-- /.modal-content -->
37 |     </div><!-- /.modal-dialog -->
38 | </div><!-- /.modal -->


--------------------------------------------------------------------------------
/library/templates/about.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block title %}About &raquo; {% endblock %}
 4 | 
 5 | {% block main %}
 6 |     <h2>About</h2>
 7 | 
 8 |     <!-- PROJECT OVERVIEW -->
 9 |     <section id="info">
10 |         <p>The goal of the <b>Database Application Catalog</b> project is to find a large amount of database applications to use in various projects, including workload analysis, automatic tuning, and benchmarking. It searches the Internet for web-based database applications and run them locally in order to learn how they use a DBMS.
11 | 
12 |         <p>All of the source code for the CMDBAC is available on <a href="https://github.com/cmu-db/cmdbac">GitHub</a> under the Apache Software License.
13 |     </section>
14 |     <!-- END PROJECT OVERVIEW -->
15 | 
16 |     <!-- PEOPLE -->
17 |     <section id="people">
18 |         <h3>People</h3>
19 | 
20 |         <!-- ACTIVE PEOPLE -->
21 |         <div class="row text-center people">
22 |             <div class="col-xs-4">
23 |                 <a href="http://www.shangzeyuan.com"><img src="{{STATIC_URL}}img/people/zeyuanshang.jpg" alt="Zeyuan Shang"/><BR/>Zeyuan Shang</a>
24 |                 <br /><em>Tsinghua University</em>
25 |             </div>
26 |             <div class="col-xs-4">
27 |                 <a href="http://www.cs.cmu.edu/~dvanaken/"><img src="{{STATIC_URL}}img/people/danavanaken.jpg" alt="Dana Van Aken"/><br />Dana Van Aken</a>
28 |                 <br /><em>Carnegie Mellon University</em>
29 |             </div>
30 |             <div class="col-xs-4">
31 |                 <a href="http://www.cs.cmu.edu/~pavlo/"><img src="{{STATIC_URL}}img/people/andypavlo.jpg" alt="Andy Pavlo" /><br />Andy Pavlo</a>
32 |                 <br /><em>Carnegie Mellon University</em>
33 |             </div>
34 |         </div>
35 | 
36 |         <!-- ALUMNI -->
37 |         <h4>Alumni</h4>
38 |         <ul>
39 |             <li><a href="https://www.linkedin.com/in/fangyu-gao-01884067">Fangyu Gao</a> <em>(Carnegie Mellon University)</em></li>
40 |         </ul>
41 |     </section>
42 |     <!-- END PEOPLE -->
43 | 
44 |     <!-- ACKNOWLEDGEMENTS -->
45 |     <section id="ack">
46 |         <h3>Acknowledgements</h3>
47 |         This research was funded (in part) by the National Science Foundation (<a href="http://www.nsf.gov/awardsearch/showAward?AWD_ID=1423210">III-1423210</a>).
48 |         <BR/>
49 |     </section>
50 |     <!-- END ACKNOWLEDGEMENTS -->
51 | 
52 | 
53 | {% endblock %}


--------------------------------------------------------------------------------
/analysis/general/analyze_repository.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
 5 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "core"))
 6 | 
 7 | from utils import filter_repository
 8 | 
 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | 
13 | from library.models import *
14 | 
15 | def repository_stats():
16 |     stats = {}
17 | 
18 |     for project_type in ProjectType.objects.all():
19 |         project_type_name = project_type.name
20 |         stats[project_type_name] = []
21 | 
22 |         for repo in Repository.objects.filter(project_type = project_type).exclude(latest_successful_attempt = None):
23 |             if filter_repository(repo):
24 |                 continue
25 |             transaction_count = 0
26 | 
27 |             for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
28 |                 transaction = ''
29 |                 for query in Query.objects.filter(action = action):
30 |                     if 'BEGIN' in query.content.upper() or 'START TRANSACTION' in query.content.upper():
31 |                         transaction = query.content + '\n'
32 |                     elif transaction != '':
33 |                         transaction += query.content + '\n'
34 |                         if 'COMMIT' in query.content.upper():
35 |                             transaction = transaction.strip('\n')
36 | 
37 |                             # for each transaction, count the number of transactions
38 |                             transaction_count += 1
39 | 
40 |             if transaction_count > 0:
41 |                 stats[project_type_name].append((repo.commits_count, transaction_count, repo))
42 | 
43 |     for project_type_name in stats:
44 |         print project_type_name
45 | 
46 |         for commits_count, transaction_count, repo in sorted(stats[project_type_name], reverse = True):
47 |             print repo.name, 'txns:{}'.format(transaction_count), 'commits:{}'.format(commits_count),
48 |             print 'http://cmdbac.cs.cmu.edu/attempt/' + str(repo.latest_successful_attempt.id)
49 | 
50 |         print '------------------------------'
51 | 
52 | def main():
53 |     # active
54 |     repository_stats()
55 | 
56 |     # working
57 | 
58 |     # deprecated
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/blog/templatetags/pinax_blog_tags.py:
--------------------------------------------------------------------------------
 1 | from django import template
 2 | 
 3 | from ..models import Post, Section
 4 | 
 5 | 
 6 | register = template.Library()
 7 | 
 8 | 
 9 | class LatestBlogPostsNode(template.Node):
10 | 
11 |     def __init__(self, context_var):
12 |         self.context_var = context_var
13 | 
14 |     def render(self, context):
15 |         latest_posts = Post.objects.current()[:5]
16 |         context[self.context_var] = latest_posts
17 |         return ""
18 | 
19 | 
20 | @register.tag
21 | def latest_blog_posts(parser, token):
22 |     bits = token.split_contents()
23 |     return LatestBlogPostsNode(bits[2])
24 | 
25 | 
26 | class LatestBlogPostNode(template.Node):
27 | 
28 |     def __init__(self, context_var):
29 |         self.context_var = context_var
30 | 
31 |     def render(self, context):
32 |         try:
33 |             latest_post = Post.objects.current()[0]
34 |         except IndexError:
35 |             latest_post = None
36 |         context[self.context_var] = latest_post
37 |         return ""
38 | 
39 | 
40 | @register.tag
41 | def latest_blog_post(parser, token):
42 |     bits = token.split_contents()
43 |     return LatestBlogPostNode(bits[2])
44 | 
45 | 
46 | class LatestSectionPostNode(template.Node):
47 | 
48 |     def __init__(self, section, context_var):
49 |         self.section = template.Variable(section)
50 |         self.context_var = context_var
51 | 
52 |     def render(self, context):
53 |         section = self.section.resolve(context)
54 | 
55 |         post = Post.objects.published().filter(section__name=section).order_by("-published")
56 |         try:
57 |             post = post[0]
58 |         except IndexError:
59 |             post = None
60 |         context[self.context_var] = post
61 |         return ""
62 | 
63 | 
64 | @register.tag
65 | def latest_section_post(parser, token):
66 |     """
67 |         {% latest_section_post "articles" as latest_article_post %}
68 |     """
69 |     bits = token.split_contents()
70 |     return LatestSectionPostNode(bits[1], bits[3])
71 | 
72 | 
73 | class BlogSectionsNode(template.Node):
74 | 
75 |     def __init__(self, context_var):
76 |         self.context_var = context_var
77 | 
78 |     def render(self, context):
79 |         sections = Section.objects.filter(enabled=True)
80 |         context[self.context_var] = sections
81 |         return ""
82 | 
83 | 
84 | @register.tag
85 | def blog_sections(parser, token):
86 |     """
87 |         {% blog_sections as blog_sections %}
88 |     """
89 |     bits = token.split_contents()
90 |     return BlogSectionsNode(bits[2])
91 | 


--------------------------------------------------------------------------------
/core/analyzers/sqlite3analyzer.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 3 | 
 4 | import logging
 5 | 
 6 | from baseanalyzer import BaseAnalyzer
 7 | 
 8 | ## =====================================================================
 9 | ## LOGGING CONFIGURATION
10 | ## =====================================================================
11 | LOG = logging.getLogger()
12 | 
13 | ## =====================================================================
14 | ## SQLITE3 ANALYZER
15 | ## =====================================================================
16 | class SQLite3Analyzer(BaseAnalyzer):
17 | 
18 |     def __init__(self, deployer):
19 |         BaseAnalyzer.__init__(self, deployer)
20 | 
21 |     def analyze_queries(self, queries):
22 |         self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0)
23 | 
24 |         try:
25 |             conn = self.deployer.get_database_connection()
26 |             cur = conn.cursor()
27 | 
28 |             for query in queries:
29 |                 try:
30 |                     if self.is_valid_for_explain(query['raw']):
31 |                         explain_query = 'EXPLAIN {};'.format(query['raw'])
32 |                         # print explain_query
33 |                         cur.execute(explain_query)
34 |                         rows = cur.fetchall()
35 |                         output = '\n'
36 |                         for row in rows:
37 |                             output += str(row) + '\n'
38 |                         query['explain'] = output
39 |                 except Exception, e:
40 |                     pass
41 |                     # LOG.exception(e)
42 | 
43 |             cur.close()
44 |             conn.close()
45 |         except Exception, e:
46 |             LOG.exception(e)
47 | 
48 |     def analyze_database(self):
49 |         try:
50 |             conn = self.deployer.get_database_connection()
51 |             cur = conn.cursor()
52 |             database = self.deployer.get_database_name()
53 | 
54 |             # the number of tables
55 |             cur.execute("SELECT COUNT(*) FROM sqlite_master WHERE type = 'table';")
56 |             self.database_stats['num_tables'] = int(cur.fetchone()[0])
57 | 
58 |             # the number of indexes
59 |             cur.execute("SELECT COUNT(*) FROM sqlite_master WHERE type = 'index';")
60 |             self.database_stats['num_indexes'] = int(cur.fetchone()[0])
61 | 
62 |             cur.close()
63 |             conn.close()
64 |         except Exception, e:
65 |             LOG.exception(e)


--------------------------------------------------------------------------------
/core/scripts/vagrant_deploy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
 5 | 
 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
 7 | import django
 8 | django.setup()
 9 | from library.models import *
10 | from deployers import *
11 | from drivers import *
12 | from analyzers import *
13 | import utils
14 | 
15 | def main():
16 |     if len(sys.argv) not in [3, 4]:
17 |         return
18 |     repo_name = sys.argv[1]
19 |     deploy_id = sys.argv[2]
20 |     if len(sys.argv) > 3:
21 |         database_name = sys.argv[3]
22 |     else:
23 |         database_name = 'MySQL'
24 |     print 'Database : {} ...'.format(database_name)
25 | 
26 |     repo = Repository.objects.get(name=repo_name)
27 |     database = Database.objects.get(name=database_name)
28 | 
29 |     moduleName = "deployers.%s" % (repo.project_type.deployer_class.lower())
30 |     moduleHandle = __import__(moduleName, globals(), locals(), [repo.project_type.deployer_class])
31 |     klass = getattr(moduleHandle, repo.project_type.deployer_class)
32 | 
33 |     deployer = klass(repo, database, deploy_id)
34 |     if deployer.deploy() != 0:
35 |         deployer.kill_server()
36 |         sys.exit(-1)
37 | 
38 |     print 'Driving ...'
39 | 
40 |     driver = BaseDriver(deployer.get_main_url(), deployer.get_database(), deployer.deploy_id, deployer.base_path, deployer.log_file)
41 |     try:
42 |         driverResult = driver.drive()
43 |     except Exception, e:
44 |         LOG.exception(e)
45 |         driverResult = {}
46 | 
47 |     print 'Random Walking ...'
48 | 
49 |     try:
50 |         random_driver = RandomDriver(driver)
51 |         random_driver.start()
52 |         print 'Random Walk Forms Count: {}'.format(len(random_driver.forms))
53 |         print 'Basic Forms Count: {}'.format(len(driverResult['forms']))
54 |         for form in random_driver.forms:
55 |             if any(random_driver.equal_form(form, ret_form) for ret_form in driverResult['forms']):
56 |                 continue
57 |             driverResult['forms'].append(form)
58 |     except Exception, e:
59 |         LOG.exception(e)
60 | 
61 |     deployer.kill_server()
62 | 
63 |     analyzer = get_analyzer(deployer)
64 |     for form in driverResult['forms']:
65 |         analyzer.analyze_queries(form['queries'])
66 |     for url in driverResult['urls']:
67 |         analyzer.analyze_queries(url['queries'])
68 |     driverResult['statistics'] = analyzer.queries_stats
69 |     analyzer.analyze_database()
70 |     driverResult['statistics'].update(analyzer.database_stats)
71 |     driverResult['informations'] = analyzer.database_informations
72 | 
73 |     deployer.save_attempt(ATTEMPT_STATUS_SUCCESS, driverResult)
74 | 
75 | if __name__ == "__main__":
76 |     main()


--------------------------------------------------------------------------------
/blog/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | from django.utils import timezone
 3 | from django.utils.functional import curry
 4 | 
 5 | from .forms import AdminPostForm
 6 | from .models import Post, Image, ReviewComment, Section
 7 | from .utils import can_tweet
 8 | 
 9 | 
10 | class ImageInline(admin.TabularInline):
11 |     model = Image
12 |     fields = ["image_path"]
13 | 
14 | 
15 | class ReviewInline(admin.TabularInline):
16 |     model = ReviewComment
17 | 
18 | 
19 | def make_published(modeladmin, request, queryset):
20 |     queryset = queryset.exclude(state=Post.STATE_CHOICES[-1][0], published__isnull=False)
21 |     queryset.update(state=Post.STATE_CHOICES[-1][0])
22 |     queryset.filter(published__isnull=True).update(published=timezone.now())
23 | make_published.short_description = "Publish selected posts"
24 | 
25 | 
26 | class PostAdmin(admin.ModelAdmin):
27 |     list_display = ["title", "state", "section", "published", "show_secret_share_url"]
28 |     list_filter = ["section", "state"]
29 |     form = AdminPostForm
30 |     actions = [make_published]
31 |     fields = [
32 |         "section",
33 |         "title",
34 |         "slug",
35 |         "author",
36 |         "published",
37 |         "markup",
38 |         "teaser",
39 |         "content",
40 |         "description",
41 |         "primary_image",
42 |         "sharable_url",
43 |         "state"
44 |     ]
45 |     readonly_fields = ["sharable_url"]
46 | 
47 |     if can_tweet():
48 |         fields.append("tweet")
49 |     prepopulated_fields = {"slug": ("title",)}
50 |     inlines = [
51 |         ImageInline,
52 |         ReviewInline,
53 |     ]
54 | 
55 |     def show_secret_share_url(self, obj):
56 |         return '<a href="%s">%s</a>' % (obj.sharable_url, obj.sharable_url)
57 |     show_secret_share_url.short_description = "Share this url"
58 |     show_secret_share_url.allow_tags = True
59 | 
60 |     def formfield_for_dbfield(self, db_field, **kwargs):
61 |         request = kwargs.get("request")
62 |         if db_field.name == "author":
63 |             ff = super(PostAdmin, self).formfield_for_dbfield(db_field, **kwargs)
64 |             ff.initial = request.user.id
65 |             return ff
66 |         return super(PostAdmin, self).formfield_for_dbfield(db_field, **kwargs)
67 | 
68 |     def get_form(self, request, obj=None, **kwargs):
69 |         kwargs.update({
70 |             "formfield_callback": curry(self.formfield_for_dbfield, request=request),
71 |         })
72 |         return super(PostAdmin, self).get_form(request, obj, **kwargs)
73 | 
74 |     def save_form(self, request, form, change):
75 |         # this is done for explicitness that we want form.save to commit
76 |         # form.save doesn't take a commit kwarg for this reason
77 |         return form.save()
78 | 
79 | 
80 | class SectionAdmin(admin.ModelAdmin):
81 |     prepopulated_fields = {"slug": ("name",)}
82 | 
83 | 
84 | admin.site.register(Post, PostAdmin)
85 | admin.site.register(Image)
86 | admin.site.register(Section, SectionAdmin)
87 | 


--------------------------------------------------------------------------------
/library/forms.py:
--------------------------------------------------------------------------------
 1 | from django import forms
 2 | from models import *
 3 | from django.template.loader import render_to_string
 4 | from django.forms.fields import EMPTY_VALUES
 5 | from django.utils.translation import ugettext as _
 6 | 
 7 | 
 8 | class ResultForm(forms.Form):
 9 |     results = forms.MultipleChoiceField(
10 |                         widget=forms.CheckboxSelectMultiple,
11 |                         choices=reversed(ATTEMPT_STATUS),
12 |                         required=False,
13 |                         label="Latest Attempt Status")
14 | 
15 | class ProjectTypeForm(forms.Form):
16 |     options = ProjectType.objects.all().values_list('name', 'name')
17 |     types = forms.MultipleChoiceField(
18 |                         widget=forms.CheckboxSelectMultiple,
19 |                         choices=options,
20 |                         required=False,
21 |                         label="Project Type")
22 | 
23 | class StatisticsForm(forms.Form):
24 |     num_options = [('-1', 'Any'), ('0-10', 'Less than or equal to 10'), ('11-100', 'Between 11 and 100'), ('101-99999', 'More than 100')]
25 |     ratio_options = [('-1', 'Any'), ('0-50', 'Lesson than or equal to 0.5'), ('51-100', '0.5-1'), ('101-99999', 'More than 1')]
26 | 
27 |     num_tables = forms.ChoiceField(choices=num_options, required = False, label = '# of Tables', widget=forms.Select(attrs={'class':'form-control'}))
28 |     num_indexes = forms.ChoiceField(choices=num_options, required = False, label = '# of Indexes', widget=forms.Select(attrs={'class':'form-control'}))
29 |     num_secondary_indexes = forms.ChoiceField(choices=num_options, required = False, label = '# of Secondary Indexes', widget=forms.Select(attrs={'class':'form-control'}))
30 |     num_constraints = forms.ChoiceField(choices=num_options, required = False, label = '# of Constraints', widget=forms.Select(attrs={'class':'form-control'}))
31 |     num_foreignkeys = forms.ChoiceField(choices=num_options, required = False, label = '# of Foreign Keys', widget=forms.Select(attrs={'class':'form-control'}))
32 |     num_transactions = forms.ChoiceField(choices=num_options, required = False, label = '# of Transactions', widget=forms.Select(attrs={'class':'form-control'}))
33 |     transaction_ratio = forms.ChoiceField(choices=ratio_options, required = False, label = 'Ratio of Txn/Action', widget=forms.Select(attrs={'class':'form-control'}))
34 | 
35 |     coverage_options = [('-1', 'Any'), ('0-20', 'Less than 20'), ('21-40', '21-40'), ('41-60', '41-60'), ('61-80', '61-80'), ('81-100', '81-100')]
36 |     table_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Table Coverage', widget=forms.Select(attrs={'class':'form-control'}))
37 |     column_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Column Coverage', widget=forms.Select(attrs={'class':'form-control'}))
38 |     # index_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Index Coverage', widget=forms.Select(attrs={'class':'form-control'}))
39 | 
40 | 


--------------------------------------------------------------------------------
/scripts/count_repos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core"))
 5 | 
 6 | import time
 7 | import traceback
 8 | 
 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | from django.db.models import Q
13 | 
14 | from library.models import *
15 | import utils
16 | 
17 | COMMITS_COUNT_THRESHOLD = 10
18 | 
19 | def count_deployed_repos():
20 |     stats = {}
21 |     for repo in Repository.objects.exclude(latest_successful_attempt = None):
22 |         if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD:
23 |             continue
24 |         if Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'key_column_usage'):
25 |             stats[repo.project_type] = stats.get(repo.project_type, 0) + 1
26 | 
27 |     print stats
28 | 
29 | def count_ruby_failed_repos():
30 |     count = 0
31 |     for repo in Repository.objects.filter(latest_successful_attempt = None).filter(project_type = 2).exclude(latest_attempt = None):
32 |         if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD:
33 |             continue
34 |         if 'Unable to find database.yml' in repo.latest_attempt.log:
35 |             count += 1
36 | 
37 |     print count
38 | 
39 | def count_ruby_repetive_queries():
40 |     repo_count = [0, 0]
41 |     action_count = [0, 0]
42 |     for repo in Repository.objects.exclude(latest_successful_attempt = None).filter(project_type = 2):
43 |         repo_flag = False
44 |         for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
45 |             action_flag = False
46 |             for query in Query.objects.filter(action = action):
47 |                 if 'SELECT 1' in query.content:
48 |                     repo_flag = True
49 |                     action_flag = True
50 |             if action_flag:
51 |                 action_count[0] += 1
52 |             action_count[1] += 1
53 |         if repo_flag:
54 |             repo_count[0] += 1
55 |         repo_count[1] += 1
56 | 
57 |     print repo_count
58 |     print action_count
59 | 
60 | def count_wrong_marked_repos():
61 |     repo_count = 0
62 |     for repo in Repository.objects.exclude(latest_successful_attempt = None):
63 |         if repo.latest_successful_attempt.result != 'OK':
64 |             repo_count += 1
65 |             repo.latest_successful_attempt = None
66 |             repo.save()
67 |     for repo in Repository.objects.filter(project_type = 2):
68 |         attempts = Attempt.objects.filter(repo = repo).filter(result = 'OK')
69 |         if attempts:
70 |             repo.latest_successful_attempt = list(attempts)[-1]
71 |             repo.save()
72 |     print repo_count
73 | 
74 | def main():
75 |     # count_deployed_repos()
76 |     # count_ruby_failed_repos()
77 |     # count_ruby_repetive_queries()
78 |     count_wrong_marked_repos()
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/core/utils/file.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import re
  4 | 
  5 | from run import run_command
  6 | 
  7 | def search_file(directory_name, file_name):
  8 |     result = []
  9 |     for root, dirs, files in os.walk(directory_name):
 10 |         for file in files:
 11 |             if file == file_name:
 12 |                 path = os.path.join(root, file)
 13 |                 if not os.path.islink(path):
 14 |                     result.append(path)
 15 |     return result
 16 | 
 17 | def search_file_regex(directory_name, file_name_pattern):
 18 |     result = []
 19 |     for root, dirs, files in os.walk(directory_name):
 20 |         for file in files:
 21 |             if re.search(file_name_pattern, file):
 22 |                 path = os.path.join(root, file)
 23 |                 if not os.path.islink(path):
 24 |                     result.append(path)
 25 |     return result
 26 | 
 27 | def search_file_norecur(directory_name, file_name):
 28 |     for file in os.listdir(directory_name):
 29 |         if os.path.isfile(os.path.join(directory_name, file)) and file == file_name:
 30 |             return True
 31 |     return False
 32 | 
 33 | def search_dir(directory_name, query_name):
 34 |     for root, dirs, files in os.walk(directory_name):
 35 |         for _dir in dirs:
 36 |             if query_name in _dir:
 37 |                 path = os.path.join(root, _dir)
 38 |                 return path
 39 | 
 40 | def replace_file_regex(file, string_pattern, string):
 41 |     with open(file, "r+") as f:
 42 |         s = f.read()
 43 |         s = re.sub(string_pattern, string, s, flags=re.DOTALL)
 44 |         f.seek(0)
 45 |         f.write(s)
 46 |         f.truncate()
 47 |         f.close()
 48 | 
 49 | def replace_files_regex(directory_name, string_pattern, string):
 50 |     for root, dirs, files in os.walk(directory_name):
 51 |         for file in files:
 52 |             replace_file_regex(os.path.join(root, file), string_pattern, string)
 53 | 
 54 | def unzip(zip_name, dir_name):
 55 |     command = 'unzip -o -qq ' + zip_name + ' -d ' + dir_name
 56 |     out = run_command(command)
 57 | 
 58 | def rm_dir(path):
 59 |     #if os.path.exists(path):
 60 |     #    shutil.rmtree(path)
 61 |     os.system('sudo rm -rf {}'.format(path))
 62 | 
 63 | def mk_dir(path):
 64 |     if not os.path.exists(path):
 65 |         os.makedirs(path)
 66 | 
 67 | def chmod_dir(path):
 68 |     if os.path.exists(path):
 69 |         os.chmod(path, 0777)
 70 | 
 71 | def make_dir(path):
 72 |     rm_dir(path)
 73 |     mk_dir(path)
 74 |     chmod_dir(path)
 75 | 
 76 | def cd(path):
 77 |     return "cd "+ path
 78 | 
 79 | def rename_file(old_file, new_file):
 80 |     return run_command('mv {} {}'.format(
 81 |         old_file,
 82 |         new_file))
 83 | 
 84 | def copy_file(old_file, new_file):
 85 |     shutil.copy2(old_file, new_file)
 86 | 
 87 | def remove_file(path):
 88 |     try:
 89 |         os.remove(path)
 90 |     except:
 91 |         pass
 92 | 
 93 | def get_size(start_path = '.'):
 94 |     total_size = 0
 95 |     for dirpath, dirnames, filenames in os.walk(start_path):
 96 |         for f in filenames:
 97 |             try:
 98 |                 fp = os.path.join(dirpath, f)
 99 |                 total_size += os.path.getsize(fp)
100 |             except:
101 |                 pass
102 |     return total_size
103 | 


--------------------------------------------------------------------------------
/core/drivers/extract/extract.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
 3 | 
 4 | import utils
 5 | import json
 6 | from cmudbac.settings import *
 7 | 
 8 | EXTRACT_WAIT_TIME = 0
 9 | 
10 | def extract_forms(url, follow = "false", cookie_jar = None, filename = "forms.json"):
11 | 	utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
12 | 
13 | 	if cookie_jar == None:
14 | 		try:
15 | 			out = utils.run_command('{} && {}'.format(
16 | 				utils.cd(os.path.dirname(os.path.abspath(__file__))),
17 | 				'scrapy crawl form -o {} -a start_url="{}" -a follow={} -a proxy={}'.format(filename, url, follow, HTTP_PROXY)), EXTRACT_WAIT_TIME)
18 | 		except:
19 | 			out = utils.run_command('{} && {}'.format(
20 | 				utils.cd(os.path.dirname(os.path.abspath(__file__))),
21 | 				'scrapy crawl form -o {} -a start_url="{}" -a follow={}'.format(filename, url, follow)), EXTRACT_WAIT_TIME)
22 | 	else:
23 | 		cookie_jar_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename.replace('.json', '.txt'))
24 | 		cookie_jar.save(cookie_jar_path)
25 | 		out = utils.run_command('{} && {}'.format(
26 | 			utils.cd(os.path.dirname(os.path.abspath(__file__))),
27 | 			'scrapy crawl form_with_cookie -o {} -a start_url="{}" -a cookie_jar={}'.format(filename, url, cookie_jar_path)), EXTRACT_WAIT_TIME)
28 | 
29 | 	with open(os.path.join(os.path.dirname(__file__), filename)) as json_forms:
30 | 		forms = json.load(json_forms)
31 | 
32 | 	utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
33 | 
34 | 	return forms
35 | 
36 | def extract_all_forms(url, filename):
37 | 	return extract_forms(url, "true", filename = filename)
38 | 
39 | def extract_all_forms_with_cookie(url, cookie_jar, filename):
40 | 	return extract_forms(url, "true", cookie_jar, filename)
41 | 
42 | def extract_urls(url, follow = "false", cookie_jar = None, filename = "urls.json"):
43 | 	utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
44 | 
45 | 	if cookie_jar == None:
46 | 		try:
47 | 			out = utils.run_command('{} && {}'.format(
48 | 				utils.cd(os.path.dirname(os.path.abspath(__file__))),
49 | 				'scrapy crawl url -o {} -a start_url="{}" -a follow={} -a proxy={}'.format(filename, url, follow, HTTP_PROXY)), EXTRACT_WAIT_TIME)
50 | 		except:
51 | 			out = utils.run_command('{} && {}'.format(
52 | 				utils.cd(os.path.dirname(os.path.abspath(__file__))),
53 | 				'scrapy crawl url -o {} -a start_url="{}" -a follow={}'.format(filename, url, follow)), EXTRACT_WAIT_TIME)
54 | 	else:
55 | 		cookie_jar_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename.replace('.json', '.txt'))
56 | 		cookie_jar.save(cookie_jar_path)
57 | 		out = utils.run_command('{} && {}'.format(
58 | 			utils.cd(os.path.dirname(os.path.abspath(__file__))),
59 | 			'scrapy crawl url_with_cookie -o {} -a start_url="{}" -a cookie_jar={}'.format(filename, url, cookie_jar_path)), EXTRACT_WAIT_TIME)
60 | 
61 | 	with open(os.path.join(os.path.dirname(__file__), filename)) as json_urls:
62 | 		urls = json.load(json_urls)
63 | 
64 | 	utils.remove_file(os.path.join(os.path.dirname(__file__), filename))
65 | 	return urls
66 | 
67 | def extract_all_urls(url, filename):
68 | 	return extract_urls(url, "true", filename = filename)
69 | 
70 | def extract_all_urls_with_cookie(url, cookie_jar, filename):
71 | 	return extract_urls(url, "true", cookie_jar, filename)


--------------------------------------------------------------------------------
/core/drivers/extract/driver/spiders/form_with_cookie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import scrapy
 4 | from scrapy.spiders import CrawlSpider, Rule
 5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
 6 | import cookielib
 7 | 
 8 | from driver.items import InputItem, FormItem
 9 | 
10 | class FormWithCookieSpider(CrawlSpider):
11 |     name = "form_with_cookie"
12 |     allowed_domains = ["127.0.0.1"]
13 | 
14 |     def __init__(self, *args, **kwargs):
15 |         super(FormWithCookieSpider, self).__init__(*args, **kwargs)
16 | 
17 |         self.start_urls = [kwargs.get('start_url')]
18 |         self.cookiejar = cookielib.LWPCookieJar()
19 |         self.cookiejar.load(kwargs.get('cookie_jar'))
20 | 
21 |         self.rules = (
22 |             Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=True, process_request='add_cookie_for_request'),
23 |         )
24 |         super(FormWithCookieSpider, self)._compile_rules()
25 | 
26 |     def add_cookie_for_request(self, request):
27 |         for cookie in self.cookiejar:
28 |             request.cookies[cookie.name] = cookie.value
29 |         logout_patterns = ['logout', 'log-out', 'log_out']
30 |         if any(logout_pattern in request.url for logout_pattern in logout_patterns):
31 |             return None
32 |         return request
33 | 
34 |     def parse_form(self, response):
35 |         for sel in response.xpath('//form'):
36 |             formItem = FormItem()
37 | 
38 |             formItem['action'] = ''
39 |             try:
40 |                 formItem['action'] = sel.xpath('@action').extract()[0]
41 |             except:
42 |                 pass
43 | 
44 |             formItem['url'] = response.url
45 | 
46 |             formItem['method'] = ''
47 |             try:
48 |                 formItem['method'] = sel.xpath('@method').extract()[0].lower()
49 |             except:
50 |                 pass
51 | 
52 |             formItem['inputs'] = []
53 |             for ip in sel.xpath('.//input|.//textarea'):
54 |                 try:
55 |                     _id = ip.xpath('@id').extract()[0]
56 |                 except:
57 |                     _id = ''
58 |                 name = ip.xpath('@name').extract()[0]
59 |                 try:
60 |                     _type = ip.xpath('@type').extract()[0]
61 |                 except:
62 |                     _type = 'textarea'
63 |                 try:
64 |                     value = ip.xpath('@value').extract()[0]
65 |                 except:
66 |                     value = ''
67 |                 inputItem = InputItem()
68 |                 inputItem['id'] = _id
69 |                 inputItem['name'] = name
70 |                 inputItem['type'] = _type
71 |                 inputItem['value'] = value
72 |                 formItem['inputs'].append(inputItem)
73 | 
74 |             try:
75 |                 _id = sel.xpath('@id').extract()[0]
76 |             except:
77 |                 _id = ''
78 |             try:
79 |                 _class = sel.xpath('@class').extract()[0]
80 |             except:
81 |                 _class = ''
82 |             try:
83 |                 enctype = sel.xpath('@enctype').extract()[0]
84 |             except:
85 |                 enctype = ''
86 |             formItem['id'] = _id
87 |             formItem['clazz'] = _class
88 |             formItem['enctype'] = enctype
89 | 
90 |             yield formItem
91 | 
92 | 


--------------------------------------------------------------------------------
/core/drivers/extract/driver/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for driver project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     http://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10 | #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'driver'
13 | 
14 | SPIDER_MODULES = ['driver.spiders']
15 | NEWSPIDER_MODULE = 'driver.spiders'
16 | 
17 | 
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'driver (+http://www.yourdomain.com)'
20 | 
21 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
22 | CONCURRENT_REQUESTS=32
23 | 
24 | # Configure a delay for requests for the same website (default: 0)
25 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
26 | # See also autothrottle settings and docs
27 | #DOWNLOAD_DELAY=3
28 | # The download delay setting will honor only one of:
29 | #CONCURRENT_REQUESTS_PER_DOMAIN=16
30 | #CONCURRENT_REQUESTS_PER_IP=16
31 | 
32 | # Disable cookies (enabled by default)
33 | #COOKIES_ENABLED=False
34 | 
35 | # Disable Telnet Console (enabled by default)
36 | #TELNETCONSOLE_ENABLED=False
37 | 
38 | # Override the default request headers:
39 | #DEFAULT_REQUEST_HEADERS = {
40 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
41 | #   'Accept-Language': 'en',
42 | #}
43 | 
44 | # Enable or disable spider middlewares
45 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
46 | #SPIDER_MIDDLEWARES = {
47 | #    'driver.middlewares.MyCustomSpiderMiddleware': 543,
48 | #}
49 | 
50 | # Enable or disable downloader middlewares
51 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
52 | #DOWNLOADER_MIDDLEWARES = {
53 | #    'driver.middlewares.MyCustomDownloaderMiddleware': 543,
54 | #}
55 | 
56 | # Enable or disable extensions
57 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
58 | #EXTENSIONS = {
59 | #    'scrapy.telnet.TelnetConsole': None,
60 | #}
61 | 
62 | # Configure item pipelines
63 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
64 | #ITEM_PIPELINES = {
65 | #    'driver.pipelines.SomePipeline': 300,
66 | #}
67 | 
68 | # Enable and configure the AutoThrottle extension (disabled by default)
69 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
70 | # NOTE: AutoThrottle will honour the standard settings for concurrency and delay
71 | #AUTOTHROTTLE_ENABLED=True
72 | # The initial download delay
73 | #AUTOTHROTTLE_START_DELAY=5
74 | # The maximum download delay to be set in case of high latencies
75 | #AUTOTHROTTLE_MAX_DELAY=60
76 | # Enable showing throttling stats for every response received:
77 | #AUTOTHROTTLE_DEBUG=False
78 | 
79 | # Enable and configure HTTP caching (disabled by default)
80 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
81 | #HTTPCACHE_ENABLED=True
82 | #HTTPCACHE_EXPIRATION_SECS=0
83 | #HTTPCACHE_DIR='httpcache'
84 | #HTTPCACHE_IGNORE_HTTP_CODES=[]
85 | #HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'
86 | 
87 | # Disable S3 handler explicitly
88 | DOWNLOAD_HANDLERS={'s3': None}
89 | 
90 | CLOSESPIDER_TIMEOUT=120
91 | 


--------------------------------------------------------------------------------
/blog/forms.py:
--------------------------------------------------------------------------------
  1 | from django import forms
  2 | from django.utils import timezone
  3 | from django.utils.functional import curry
  4 | 
  5 | from .conf import settings
  6 | from .models import Post, Revision
  7 | from .utils import can_tweet, load_path_attr
  8 | from .signals import post_published
  9 | 
 10 | 
 11 | FIELDS = [
 12 |     "section",
 13 |     "author",
 14 |     "markup",
 15 |     "title",
 16 |     "slug",
 17 |     "teaser",
 18 |     "content",
 19 |     "description",
 20 |     "primary_image",
 21 |     "state",
 22 | ]
 23 | 
 24 | if can_tweet():
 25 |     FIELDS.append("tweet")
 26 | 
 27 | 
 28 | class AdminPostForm(forms.ModelForm):
 29 | 
 30 |     title = forms.CharField(
 31 |         max_length=90,
 32 |         widget=forms.TextInput(attrs={"style": "width: 50%;"}),
 33 |     )
 34 |     slug = forms.CharField(
 35 |         widget=forms.TextInput(attrs={"style": "width: 50%;"})
 36 |     )
 37 |     teaser = forms.CharField(
 38 |         widget=forms.Textarea(attrs={"style": "width: 80%;"}),
 39 |     )
 40 |     content = forms.CharField(
 41 |         widget=forms.Textarea(attrs={"style": "width: 80%; height: 300px;"})
 42 |     )
 43 |     description = forms.CharField(
 44 |         widget=forms.Textarea(attrs={"style": "width: 80%;"}),
 45 |         required=False
 46 |     )
 47 |     if can_tweet():
 48 |         tweet = forms.BooleanField(
 49 |             required=False,
 50 |             help_text="Checking this will send out a tweet for this post",
 51 |         )
 52 | 
 53 |     class Meta:
 54 |         model = Post
 55 |         fields = FIELDS
 56 | 
 57 |     class Media:
 58 |         js = ("js/admin_post_form.js",)
 59 | 
 60 |     def __init__(self, *args, **kwargs):
 61 |         super(AdminPostForm, self).__init__(*args, **kwargs)
 62 | 
 63 |         post = self.instance
 64 | 
 65 |         # grab the latest revision of the Post instance
 66 |         latest_revision = post.latest()
 67 | 
 68 |         if latest_revision:
 69 |             # set initial data from the latest revision
 70 |             self.fields["teaser"].initial = latest_revision.teaser
 71 |             self.fields["content"].initial = latest_revision.content
 72 | 
 73 |     def save(self):
 74 |         published = False
 75 |         post = super(AdminPostForm, self).save(commit=False)
 76 | 
 77 |         if post.pk is None or Post.objects.filter(pk=post.pk, published=None).count():
 78 |             if self.cleaned_data["state"] == Post.STATE_CHOICES[-1][0]:
 79 |                 post.published = timezone.now()
 80 |                 published = True
 81 | 
 82 |         render_func = curry(
 83 |             load_path_attr(
 84 |                 settings.PINAX_BLOG_MARKUP_CHOICE_MAP[self.cleaned_data["markup"]]["parser"]
 85 |             )
 86 |         )
 87 | 
 88 |         post.teaser_html = render_func(self.cleaned_data["teaser"])
 89 |         post.content_html = render_func(self.cleaned_data["content"])
 90 |         post.updated = timezone.now()
 91 |         post.save()
 92 | 
 93 |         r = Revision()
 94 |         r.post = post
 95 |         r.title = post.title
 96 |         r.teaser = self.cleaned_data["teaser"]
 97 |         r.content = self.cleaned_data["content"]
 98 |         r.author = post.author
 99 |         r.updated = post.updated
100 |         r.published = post.published
101 |         r.save()
102 | 
103 |         if can_tweet() and self.cleaned_data["tweet"]:
104 |             post.tweet()
105 | 
106 |         if published:
107 |             post_published.send(sender=Post, post=post)
108 | 
109 |         return post
110 | 


--------------------------------------------------------------------------------
/core/utils/data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
 5 | 
 6 | import json
 7 | import logging
 8 | 
 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
10 | import django
11 | django.setup()
12 | import library
13 | from library.models import *
14 | import utils
15 | 
16 | ## =====================================================================
17 | ## LOGGING CONFIGURATION
18 | ## =====================================================================
19 | LOG = logging.getLogger()
20 | 
21 | def get_crawler(crawler_status, crawler_class):
22 |     moduleName = "crawlers.%s" % (crawler_class.lower())
23 |     moduleHandle = __import__(moduleName, globals(), locals(), [crawler_class])
24 |     klass = getattr(moduleHandle, crawler_class)
25 |     # FOR GITHUB
26 |     try:
27 |         with open(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "secrets", "secrets.json"), 'r') as auth_file:
28 |             auth = json.load(auth_file)
29 |     except:
30 |         auth = None
31 |     crawler = klass(crawler_status, auth)
32 |     return crawler
33 | 
34 | def add_module(module_name, package_name, package_type_id, package_version):
35 |     project_type = ProjectType.objects.get(id=package_type_id)
36 |     Package.objects.get_or_create(name = package_name, version = package_version, project_type = project_type)
37 |     package = Package.objects.get(name = package_name, version = package_version, project_type = project_type)
38 |     module = Module()
39 |     module.name = module_name
40 |     module.package = package
41 |     module.save()
42 | 
43 | def add_repo(repo_name, crawler_status_id, repo_setup_scripts):
44 |     cs = CrawlerStatus.objects.get(id=crawler_status_id)
45 |     repo_source = cs.source
46 |     project_type = cs.project_type
47 |     crawler = get_crawler(cs, repo_source.crawler_class)
48 |     crawler.add_repository(repo_name, repo_setup_scripts)
49 | 
50 | def deploy_repo(repo_name, database = 'PostgreSQL'):
51 |     repo = Repository.objects.get(name=repo_name)
52 |     print 'Attempting to deploy {} using {} ...'.format(repo, repo.project_type.deployer_class)
53 |     try:
54 |         result = utils.vagrant_deploy(repo, 0, database)
55 |     except Exception, e:
56 |         LOG.exception(e)
57 |         raise e
58 |     return result
59 | 
60 | def delete_repo(repo_name):
61 |     for repo in Repository.objects.filter(name=repo_name):
62 |         repo.delete()
63 | 
64 | def edit_distance(a, b, threshold = 3):
65 |     dis = threshold + 1
66 |     len_a = len(a)
67 |     len_b = len(b)
68 |     if abs(len_a - len_b) > threshold:
69 |         return dis
70 |     d0 = [0] * (max(len_a, len_b) + 1)
71 |     d1 = [0] * (max(len_a, len_b) + 1)
72 |     for i in range(len_a + 1):
73 |         l = max(0, i - threshold)
74 |         r = min(len_b, i + threshold)
75 |         minDis = threshold + 1
76 |         for j in range(l, r + 1):
77 |             if i == 0:
78 |                 d1[j] = j
79 |             elif j == 0:
80 |                 d1[j] = i
81 |             else:
82 |                 if a[i - 1] == b[j - 1]:
83 |                     d1[j] = d0[j - 1]
84 |                 else:
85 |                     d1[j] = d0[j - 1] + 1
86 |                 if j > l:
87 |                     d1[j] = min(d1[j], d1[j - 1] + 1)
88 |                 if j < i + threshold:
89 |                     d1[j] = min(d1[j], d0[j] + 1)
90 |             minDis = min(minDis, d1[j])
91 |         if minDis > threshold:
92 |             return dis;
93 |         d0, d1 = d1, d0
94 | 
95 |     dis = d0[len_b]
96 |     return dis
97 | 


--------------------------------------------------------------------------------
/core/utils/vagrant.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os, sys
 3 | 
 4 | import shutil
 5 | import traceback
 6 | 
 7 | from run import run_command
 8 | from file import cd
 9 | 
10 | copied_dir = ['cmudbac', 'library', 'blog', 'core', 'secrets', 'scripts']
11 | vagrant_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, 'vagrant')
12 | copied_files = []
13 | 
14 | def vagrant_setup():
15 |     print 'Setuping Vagrant ...'
16 | 
17 |     ## Copy files
18 |     for new_dir in copied_dir:
19 |         old_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, new_dir)
20 |         if os.path.exists(old_dir) and not os.path.exists(os.path.join(vagrant_dir, new_dir)):
21 |             shutil.copytree(old_dir, os.path.join(vagrant_dir, new_dir))
22 | 
23 |     # run_command('{} && {}'.format(cd(vagrant_dir), 'vagrant up'))
24 | 
25 | def vagrant_clear():
26 |     # Delete files
27 |     for new_dir in copied_dir:
28 |         try:
29 |             shutil.rmtree(os.path.join(vagrant_dir, new_dir))
30 |         except:
31 |             pass
32 | 
33 |     # run_command('{} && {}'.format(cd(vagrant_dir), 'vagrant halt'))
34 | 
35 | def set_vagrant_database():
36 |     settings_file = os.path.join(vagrant_dir, "cmudbac", "settings.py")
37 |     settings = open(settings_file).read()
38 |     if "'HOST': 'localhost'" in settings:
39 |         settings = settings.replace("'HOST': 'localhost'", "'HOST': '10.0.2.2'")
40 |         fout = open(settings_file, 'w')
41 |         fout.write(settings)
42 |         fout.flush()
43 |         fout.close()
44 | 
45 | def unset_vagrant_database():
46 |     settings_file = os.path.join(vagrant_dir, "cmudbac", "settings.py")
47 |     settings = open(settings_file).read()
48 |     if "'HOST': '10.0.2.2'" in settings:
49 |         settings = settings.replace("'HOST': 'localhost'", "'HOST': 'localhost'")
50 |         fout = open(settings_file, 'w')
51 |         fout.write(settings)
52 |         fout.flush()
53 |         fout.close()
54 | 
55 | def vagrant_deploy(repo, deploy_id, database):
56 |     set_vagrant_database()
57 |     out = os.system('{} && {}'.format(
58 |         cd(vagrant_dir),
59 |         'vagrant ssh -c "{}"'.format(
60 |             'python /vagrant/core/scripts/vagrant_deploy.py {} {} {}'.format(repo, deploy_id, database))))
61 |     unset_vagrant_database()
62 | 
63 |     return out
64 | 
65 | def vagrant_benchmark(attempt_info, database, benchmark, deploy_id = 1):
66 |     # run the benchmark
67 |     vagrant_setup()
68 |     out = None
69 |     temp_dir = None
70 |     try:
71 |         import json
72 |         attempt_info_file_path = os.path.join(vagrant_dir, 'attempt_info.json')
73 |         with open(attempt_info_file_path, 'w') as attempt_info_file:
74 |             json.dump(attempt_info, attempt_info_file)
75 |         command = '{} && {}'.format(
76 |                 cd(vagrant_dir),
77 |                 'vagrant ssh -c "{}"'.format(
78 |                     'python /vagrant/core/scripts/vagrant_benchmark.py --attempt_info="{attempt_info}" --deploy_id={deploy_id} {database} {benchmark}'
79 |                     .format(attempt_info=os.path.join('/vagrant', 'attempt_info.json'), deploy_id=deploy_id,
80 |                             database=' '.join('--{}={}'.format(key, value) for key, value in database.iteritems()),
81 |                             benchmark=' '.join('--{}={}'.format(key, value) for key, value in benchmark.iteritems())
82 |                     )
83 |                 )
84 |             )
85 |         out = os.system(command)
86 |         return out
87 |     except:
88 |         traceback.print_exc()
89 |     finally:
90 |         try:
91 |             vagrant_clear()
92 |         except:
93 |             pass
94 | 
95 |     return out


--------------------------------------------------------------------------------
/tools/local-deployer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os, sys
  3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core", "utils"))
  5 | 
  6 | import argparse
  7 | import requests
  8 | import traceback
  9 | import json
 10 | import vagrant
 11 | 
 12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
 13 | import django
 14 | django.setup()
 15 | 
 16 | CMDBAC_URL = "http://cmdbac.cs.cmu.edu/"
 17 | ATTEMPT_INFO_URL = "/api/attempt/{id}/info/"
 18 | 
 19 | ACTION_TYPES = (
 20 |     "info",
 21 |     "deploy",
 22 | )
 23 | 
 24 | DATABASE_TYPES = (
 25 |     "mysql",
 26 |     "postgres",
 27 |     "sqlite"
 28 | )
 29 | 
 30 | def parse_args():
 31 |     aparser = argparse.ArgumentParser(description='CMDBAC Local Deployer Tool')
 32 | 
 33 |     # Actions
 34 |     aparser.add_argument('action', choices=ACTION_TYPES, \
 35 |         help='Deployer Action')
 36 | 
 37 |     # Attempt Parameters
 38 |     agroup = aparser.add_argument_group('Deployment Parameters')
 39 |     agroup.add_argument('--catalog', default=CMDBAC_URL, metavar='URL', \
 40 |         help='Catalog API URL')
 41 |     agroup.add_argument('--attempt', type=int, metavar='ID', \
 42 |         help='Id of the attempt to deploy')
 43 |     agroup.add_argument('--num_threads', type=int, default=1, metavar='N', \
 44 |         help='Number of threads you want to use to submit actions')
 45 |     agroup.add_argument('--timeout', type=int, metavar='T', \
 46 |         help='Timeout for submitting actions (seconds)')
 47 |     agroup.add_argument('--db-size', type=int, \
 48 |         help='The expected Database size, 10 stands for 10MB')
 49 | 
 50 |     # Database Parameters
 51 |     agroup = aparser.add_argument_group('Local Database Parameters')
 52 |     agroup.add_argument('--db-type', choices=DATABASE_TYPES, \
 53 |         help='Database Type')
 54 |     agroup.add_argument('--db-host', type=str, \
 55 |         help='Database Hostname')
 56 |     agroup.add_argument('--db-port', type=int, \
 57 |         help='Databsae Port')
 58 |     agroup.add_argument('--db-name', type=str, \
 59 |         help='Database Name')
 60 |     agroup.add_argument('--db-user', type=str, \
 61 |         help='Database User')
 62 |     agroup.add_argument('--db-pass', type=str, \
 63 |         help='Database Password')
 64 | 
 65 |     return vars(aparser.parse_args())
 66 | ## DEF
 67 | 
 68 | def get_attempt_info(api_url, attempt_id):
 69 |     url = api_url + ATTEMPT_INFO_URL.format(id = attempt_id)
 70 |     response = requests.get(url)
 71 |     return response.json()
 72 | ## DEF
 73 | 
 74 | def run_attempt_benchmark(api_url, attempt_id, database, benchmark):
 75 |     attempt_info = get_attempt_info(api_url, attempt_id)
 76 |     print 'Running Benchmark for Attempt {}'.format(attempt_id)
 77 |     try:
 78 |         vagrant.vagrant_benchmark(attempt_info, database, benchmark)
 79 |     except Exception, e:
 80 |         traceback.print_exc()
 81 | ## DEF
 82 | 
 83 | if __name__ == "__main__":
 84 |     args = parse_args()
 85 | 
 86 |     if args["action"] == "info":
 87 |         attempt_info = get_attempt_info(args["catalog"], args["attempt"])
 88 |         print json.dumps(attempt_info, indent = 4)
 89 |     elif args["action"] == "deploy":
 90 |         database = {
 91 |             'database': args["db_type"],
 92 |             'host':     args["db_host"],
 93 |             'port':     args["db_port"],
 94 |             'name':     args["db_name"],
 95 |             'username': args["db_user"],
 96 |             'password': args["db_pass"]
 97 |         }
 98 |         benchmark = {
 99 |             'num_threads': args["num_threads"],
100 |             'timeout': args["timeout"],
101 |             'size': arg["db_size"]
102 |         }
103 |         run_attempt_benchmark(args["catalog"], args["attempt"], database, benchmark)
104 |     else:
105 |         print "Invalid action '%s'" % args["action"]
106 |         sys.exit(1)
107 | 
108 | ## MAIN


--------------------------------------------------------------------------------
/core/drivers/extract/driver/spiders/form.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import scrapy
  4 | from scrapy.spiders import CrawlSpider, Rule
  5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
  6 | 
  7 | from driver.items import InputItem, FormItem
  8 | from selenium import webdriver
  9 | 
 10 | class FormSpider(CrawlSpider):
 11 |     name = "form"
 12 |     allowed_domains = ["127.0.0.1"]
 13 | 
 14 |     def __init__(self, *args, **kwargs):
 15 |         super(FormSpider, self).__init__(*args, **kwargs)
 16 | 
 17 |         self.start_urls = [kwargs.get('start_url')]
 18 | 
 19 |         follow = True if kwargs.get('follow') == 'true' else False
 20 |         self.rules = (
 21 |             Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=follow),
 22 |         )
 23 |         super(FormSpider, self)._compile_rules()
 24 | 
 25 |         try:
 26 |             proxy = kwargs.get('proxy')
 27 |             service_args = [
 28 |                 '--proxy=' + proxy,
 29 |                 '--proxy-type=http',
 30 |             ]
 31 |         except:
 32 |             service_args = None
 33 |         self.browser = webdriver.PhantomJS(service_args=service_args)
 34 | 
 35 |     def closed(self, reason):
 36 |         self.browser.quit()
 37 | 
 38 |     def parse_form(self, response):
 39 |         register_patterns = ['register', 'signup', 'sign-up', 'sign_up']
 40 |         if any(pattern in response.url for pattern in register_patterns):
 41 |             use_browser = True
 42 |         else:
 43 |             use_browser = False
 44 |         for sel in response.xpath('//form'):
 45 |             if use_browser:
 46 |                 self.browser.get(response.url)
 47 |             formItem = FormItem()
 48 | 
 49 |             formItem['action'] = ''
 50 |             try:
 51 |                 formItem['action'] = sel.xpath('@action').extract()[0]
 52 |             except:
 53 |                 pass
 54 | 
 55 |             formItem['url'] = response.url
 56 | 
 57 |             formItem['method'] = ''
 58 |             try:
 59 |                 formItem['method'] = sel.xpath('@method').extract()[0].lower()
 60 |             except:
 61 |                 pass
 62 | 
 63 |             formItem['inputs'] = []
 64 |             for ip in sel.xpath('.//input|.//textarea'):
 65 |                 try:
 66 |                     _id = ip.xpath('@id').extract()[0]
 67 |                 except:
 68 |                     _id = ''
 69 |                 if _id != '':
 70 |                     if use_browser:
 71 |                         input_element = self.browser.find_element_by_id(_id)
 72 |                         if not input_element.is_displayed():
 73 |                             continue
 74 |                 try:
 75 |                     name = ip.xpath('@name').extract()[0]
 76 |                 except:
 77 |                     name = ''
 78 |                 try:
 79 |                     _type = ip.xpath('@type').extract()[0]
 80 |                 except:
 81 |                     _type = 'textarea'
 82 |                 try:
 83 |                     value = ip.xpath('@value').extract()[0]
 84 |                 except:
 85 |                     value = ''
 86 |                 inputItem = InputItem()
 87 |                 inputItem['id'] = _id
 88 |                 inputItem['name'] = name
 89 |                 inputItem['type'] = _type
 90 |                 inputItem['value'] = value
 91 |                 formItem['inputs'].append(inputItem)
 92 | 
 93 |             try:
 94 |                 _id = sel.xpath('@id').extract()[0]
 95 |             except:
 96 |                 _id = ''
 97 |             try:
 98 |                 _class = sel.xpath('@class').extract()[0]
 99 |             except:
100 |                 _class = ''
101 |             try:
102 |                 enctype = sel.xpath('@enctype').extract()[0]
103 |             except:
104 |                 enctype = ''
105 |             formItem['id'] = _id
106 |             formItem['clazz'] = _class
107 |             formItem['enctype'] = enctype
108 | 
109 |             yield formItem
110 | 
111 | 


--------------------------------------------------------------------------------
/library/templates/status/attempt_status_codes.html:
--------------------------------------------------------------------------------
 1 | <div class="modal fade" id="attemptStatusModal">
 2 |     <div class="modal-dialog">
 3 |         <div class="modal-content">
 4 |             <div class="modal-header">
 5 |                 <button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button>
 6 |                 <h4 class="modal-title">Attempt Status Codes</h4>
 7 |             </div>
 8 |             <div class="modal-body">
 9 |                 <table class="table table-bordered">
10 |                     <colgroup>
11 |                         <col class="col-xs-2">
12 |                         <col class="col-xs-6">
13 |                     </colgroup>
14 |                     <tbody>
15 |                         <tr>
16 |                             <td><span class="label label-success">Success</span></td>
17 |                             <td>
18 |                                 The application was successfully deployed and the system was able to run a workload and capture queries.
19 |                             </td>
20 |                         </tr>
21 |                         <tr>
22 |                             <td><span class="label label-warning">Download Error</span></td>
23 |                             <td>
24 |                                 The system failed to download the application's source code from the on-line repository. This could be a transient network error or the application could have been deleted.
25 |                             </td>
26 |                         </tr>
27 |                         <tr>
28 |                             <td><span class="label label-danger">Missing Required Files</span></td>
29 |                             <td>
30 |                                 The application did not contain the necessary files that are needed to deploy and run it. This likely means that the application is not the project type that we assumed that it was. This can occur if the crawler mis-idenfies it as a database application.
31 |                             </td>
32 |                         </tr>
33 |                         <tr>
34 |                             <td><span class="label label-danger">Missing Dependencies</span></td>
35 |                             <td>
36 |                                 The deployer was unable to determine what dependencies are needed to automatically deploy the application.
37 |                             </td>
38 |                         </tr>
39 |                         <tr>
40 |                             <td><span class="label label-danger">Database Error</span></td>
41 |                             <td>
42 |                                 The application failed to instantiate and synchronize its database properly. This can occur if there are additional manual steps that are needed prior to deployment.
43 |                             </td>
44 |                         </tr>
45 |                         <tr>
46 |                             <td><span class="label label-danger">Running Error</span></td>
47 |                             <td>
48 |                                 The system was able to install the dependencies needed for the application but then it experienced an unexpected error when trying to actually run it. This could because the application in its current form is broken and is thus not runnable.
49 |                             </td>
50 |                         </tr>
51 |                         <tr>
52 |                             <td><span class="label label-danger">No Queries</span></td>
53 |                             <td>
54 |                                 The application was successfully deployed but the system was not able to run a workload and capture queries.
55 |                             </td>
56 |                         </tr>
57 | 
58 |                     </tbody>
59 |                 </table>
60 |             </div>
61 |             <div class="modal-footer">
62 |                 <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
63 |             </div>
64 |         </div><!-- /.modal-content -->
65 |     </div><!-- /.modal-dialog -->
66 | </div><!-- /.modal -->


--------------------------------------------------------------------------------
/cmudbac/settings_example.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | """
  3 | Django settings for cmudbac project.
  4 | 
  5 | For more information on this file, see
  6 | https://docs.djangoproject.com/en/1.6/topics/settings/
  7 | 
  8 | For the full list of settings and their values, see
  9 | https://docs.djangoproject.com/en/1.6/ref/settings/
 10 | """
 11 | 
 12 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 13 | import os
 14 | BASE_DIR = os.path.dirname(os.path.dirname(__file__))
 15 | 
 16 | # Quick-start development settings - unsuitable for production
 17 | # See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/
 18 | 
 19 | # SECURITY WARNING: keep the secret key used in production secret!
 20 | SECRET_KEY = 'CHANGE ME'
 21 | 
 22 | # SECURITY WARNING: don't run with debug turned on in production!
 23 | DEBUG = True
 24 | 
 25 | ALLOWED_HOSTS = []
 26 | 
 27 | # Application definition
 28 | 
 29 | INSTALLED_APPS = (
 30 |     'django.contrib.admin',
 31 |     'django.contrib.auth',
 32 |     'django.contrib.contenttypes',
 33 |     'django.contrib.sessions',
 34 |     'django.contrib.messages',
 35 |     'django.contrib.staticfiles',
 36 |     'django.contrib.sites',
 37 |     'rest_framework',
 38 |     'library',
 39 |     'blog'
 40 | )
 41 | 
 42 | MIDDLEWARE_CLASSES = (
 43 |     'django.contrib.sessions.middleware.SessionMiddleware',
 44 |     'django.middleware.cache.UpdateCacheMiddleware',
 45 |     'django.middleware.common.CommonMiddleware',
 46 |     'django.middleware.csrf.CsrfViewMiddleware',
 47 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
 48 |     'django.contrib.messages.middleware.MessageMiddleware',
 49 |     'django.middleware.clickjacking.XFrameOptionsMiddleware',
 50 |     'django.middleware.cache.FetchFromCacheMiddleware',
 51 | )
 52 | 
 53 | ROOT_URLCONF = 'cmudbac.urls'
 54 | 
 55 | WSGI_APPLICATION = 'cmudbac.wsgi.application'
 56 | 
 57 | TEMPLATES = [
 58 |     {
 59 |         'BACKEND': 'django.template.backends.django.DjangoTemplates',
 60 |         'APP_DIRS': True,
 61 |         'OPTIONS': {
 62 |             'context_processors': [
 63 |                 'django.template.context_processors.debug',
 64 |                 'django.template.context_processors.request',
 65 |                 'django.contrib.auth.context_processors.auth',
 66 |                 'django.contrib.messages.context_processors.messages',
 67 |                 'django.core.context_processors.static',
 68 |                 'library.context_processors.analytics'
 69 |             ],
 70 |         },
 71 |     },
 72 | ]
 73 | 
 74 | 
 75 | # Database
 76 | # https://docs.djangoproject.com/en/1.6/ref/settings/#databases
 77 | 
 78 | DATABASES = {
 79 |     'default': {
 80 |         'ENGINE': 'django.db.backends.mysql',
 81 |         'NAME': 'dbac',
 82 |         'HOST': 'localhost',
 83 |         'PORT': '3306',
 84 |         'USER': 'CHANGE_ME',
 85 |         'PASSWORD': 'CHANGE_ME',
 86 |         'STORAGE_ENGINE': 'InnoDB'
 87 |     }
 88 | }
 89 | 
 90 | CACHES = {
 91 |     'default': {
 92 |         'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
 93 |         'LOCATION': '/var/tmp/django_cache/cmdbac',
 94 |     }
 95 | }
 96 | 
 97 | LOG_FILE_LOCATION = {
 98 |     'mysql': '/var/log/mysql/mysql.log',
 99 |     'postgresql': '/var/log/postgresql/postgresql-9.3-main.log'
100 | }
101 | 
102 | # Internationalization
103 | # https://docs.djangoproject.com/en/1.6/topics/i18n/
104 | 
105 | LANGUAGE_CODE = 'en-us'
106 | 
107 | TIME_ZONE = 'America/New_York'
108 | 
109 | USE_I18N = True
110 | 
111 | USE_L10N = True
112 | 
113 | #USE_TZ = True
114 | 
115 | 
116 | # Static files (CSS, JavaScript, Images)
117 | # https://docs.djangoproject.com/en/1.6/howto/static-files/
118 | 
119 | STATIC_URL = '/static/'
120 | 
121 | HTTP_PROXY = ''
122 | 
123 | REST_FRAMEWORK = {
124 |     # Use Django's standard `django.contrib.auth` permissions,
125 |     # or allow read-only access for unauthenticated users.
126 |     'DEFAULT_PERMISSION_CLASSES': [
127 |         'rest_framework.permissions.AllowAny'
128 |     ],
129 |     'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.PageNumberPagination',
130 |     'PAGE_SIZE': 50
131 | }
132 | 
133 | # Google Analytics
134 | GOOGLE_ANALYTICS_KEY = ''
135 | 


--------------------------------------------------------------------------------
/analysis/general/analyze_transactions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os, sys
  3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
  5 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "core"))
  6 | 
  7 | import re
  8 | import csv
  9 | import pickle
 10 | from utils import filter_repository, dump_all_stats, pickle_dump
 11 | 
 12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
 13 | import django
 14 | django.setup()
 15 | 
 16 | from library.models import *
 17 | 
 18 | TRANSACTION_DIRECTORY = 'transactions'
 19 | 
 20 | def action_stats(directory = '.'):
 21 |     stats = {'action_query_count': {}}
 22 | 
 23 |     for repo in Repository.objects.exclude(latest_successful_attempt = None):
 24 |         if filter_repository(repo):
 25 |             continue
 26 | 
 27 |         project_type_name = repo.project_type.name
 28 |         if project_type_name not in stats['action_query_count']:
 29 |             stats['action_query_count'][project_type_name] = []
 30 | 
 31 |         for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
 32 |             query_count = len(Query.objects.filter(action = action))
 33 |             if query_count > 0:
 34 |                 stats['action_query_count'][project_type_name].append(query_count)
 35 | 
 36 | 
 37 |     dump_all_stats(directory, stats)
 38 | 
 39 | def transaction_stats(directory = '.'):
 40 |     stats = {'transaction_count': {}, 'transaction_query_count': {}, 'transaction_read_count': {}, 'transaction_write_count': {}}
 41 | 
 42 |     transactions = []
 43 | 
 44 |     for repo in Repository.objects.exclude(latest_successful_attempt = None):
 45 |         if filter_repository(repo):
 46 |             continue
 47 | 
 48 |         project_type_name = repo.project_type.name
 49 |         if project_type_name not in stats['transaction_count']:
 50 |             stats['transaction_count'][project_type_name] = []
 51 |         if project_type_name not in stats['transaction_query_count']:
 52 |             stats['transaction_query_count'][project_type_name] = []
 53 |         if project_type_name not in stats['transaction_read_count']:
 54 |             stats['transaction_read_count'][project_type_name] = []
 55 |         if project_type_name not in stats['transaction_write_count']:
 56 |             stats['transaction_write_count'][project_type_name] = []
 57 | 
 58 | 
 59 |         for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
 60 |             transaction = ''
 61 |             query_count = 0
 62 |             transaction_count = 0
 63 | 
 64 |             for query in Query.objects.filter(action = action):
 65 |                 if 'BEGIN' in query.content.upper() or 'START TRANSACTION' in query.content.upper() or 'SET AUTOCOMMIT=0' in query.content.upper():
 66 |                     transaction = query.content + '\n'
 67 |                     query_count = 1
 68 |                 elif transaction != '':
 69 |                     transaction += query.content + '\n'
 70 |                     query_count += 1
 71 |                     if 'COMMIT' in query.content.upper():
 72 |                         transaction = transaction.strip('\n')
 73 | 
 74 |                         # for each transaction, count the number of transactions
 75 |                         transaction_count += 1
 76 | 
 77 |                         # for each transaction, count the number of read/write
 78 |                         read_count = len(re.findall('SELECT', transaction.upper()))
 79 |                         stats['transaction_read_count'][project_type_name].append(read_count)
 80 |                         write_count = 0
 81 |                         for keyword in ['INSERT', 'DELETE', 'UPDATE']:
 82 |                             write_count += len(re.findall(keyword, transaction.upper()))
 83 |                         stats['transaction_write_count'][project_type_name].append(write_count)
 84 | 
 85 |                         # for each transaction, count the queries
 86 |                         query_count -= 2
 87 |                         stats['transaction_query_count'][project_type_name].append(query_count)
 88 | 
 89 |                         try:
 90 |                             transactions.append((repo.name, repo.project_type.name, transaction))
 91 |                         except:
 92 |                             pass
 93 | 
 94 |                         transaction = ''
 95 | 
 96 |             if transaction_count > 0:
 97 |                 stats['transaction_count'][project_type_name].append(transaction_count)
 98 | 
 99 |     pickle_dump(directory, 'transactions', transactions)
100 | 
101 |     dump_all_stats(directory, stats)
102 | 
103 | def main():
104 |     # active
105 |     action_stats(TRANSACTION_DIRECTORY)
106 |     transaction_stats(TRANSACTION_DIRECTORY)
107 | 
108 |     # working
109 | 
110 |     # deprecated
111 | if __name__ == '__main__':
112 |     main()
113 | 


--------------------------------------------------------------------------------
/core/drivers/randomdriver.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  3 | 
  4 | import logging
  5 | import requests
  6 | import re
  7 | import traceback
  8 | import requests
  9 | import mechanize
 10 | import random
 11 | 
 12 | from library.models import *
 13 | from cmudbac.settings import *
 14 | import utils
 15 | import extract
 16 | import submit
 17 | import count
 18 | from basedriver import BaseDriver
 19 | 
 20 | ## =====================================================================
 21 | ## LOGGING CONFIGURATION
 22 | ## =====================================================================
 23 | LOG = logging.getLogger()
 24 | 
 25 | MAX_RANDOM_WALK_DEPTH = 5
 26 | 
 27 | ## =====================================================================
 28 | ## RANDOM DRIVER
 29 | ## =====================================================================
 30 | class RandomDriver(BaseDriver):
 31 | 
 32 |     def __init__(self, driver):
 33 |         self.driver = driver
 34 |         self.start_urls = set(map(lambda url: url['url'], driver.urls))
 35 |         self.database = self.driver.database
 36 |         if driver.browser != None:
 37 |             self.cookiejar = driver.browser._ua_handlers['_cookies'].cookiejar
 38 |         self.walked_path = set()
 39 |         self.log_file = driver.log_file
 40 | 
 41 |     def new_browser(self, cookiejar = None, url = None):
 42 |         browser = mechanize.Browser()
 43 |         if cookiejar != None:
 44 |             browser.set_cookiejar(self.cookiejar)
 45 |         browser.set_handle_robots(False)
 46 |         if url != None:
 47 |             browser.open(url)
 48 |         return browser
 49 | 
 50 |     def start(self):
 51 |         self.forms = []
 52 |         self.urls = []
 53 |         for url in self.start_urls:
 54 |             self.random_walk(self.new_browser(self.cookiejar, url))
 55 | 
 56 |     def random_walk(self, browser, depth = MAX_RANDOM_WALK_DEPTH):
 57 |         if depth == 0:
 58 |             return
 59 | 
 60 |         try:
 61 |             last_line_no = self.check_log()
 62 |             browser_url = browser.geturl()
 63 |             cookiejar = browser._ua_handlers['_cookies'].cookiejar
 64 | 
 65 |             LOG.info('Walking URL: {}'.format(browser_url))
 66 | 
 67 |             forms = list(enumerate(list(browser.forms())))
 68 |             for idx, form in forms:
 69 |                 key = '{}_{}'.format(browser_url, form.name)
 70 |                 if key in self.walked_path:
 71 |                     continue
 72 |                 self.walked_path.add(key)
 73 | 
 74 |                 browser.select_form(nr = idx)
 75 |                 form_stats = {
 76 |                     'url': browser_url,
 77 |                     'method': form.method,
 78 |                     'inputs': []
 79 |                 }
 80 |                 for control in form.controls:
 81 |                     if control.type == 'text':
 82 |                         browser[control.name] = submit.gen_random_value()
 83 |                         form_stats['inputs'].append({
 84 |                             'name': control.name,
 85 |                             'type': control.type
 86 |                         })
 87 |                 succ = True
 88 |                 try:
 89 |                     traceback.print_exc()
 90 |                     browser.submit()
 91 |                 except:
 92 |                     succ = False
 93 | 
 94 |                 form_stats['queries'], form_stats['counter'] = self.process_logs(self.check_log(last_line_no), None)
 95 | 
 96 |                 if all(not self.equal_form(form_stats, ret_form) for ret_form in self.forms):
 97 |                     self.forms.append(form_stats)
 98 | 
 99 |                 if succ:
100 |                     self.random_walk(browser, depth - 1)
101 | 
102 |                 browser = self.new_browser(cookiejar, browser_url)
103 | 
104 |             links = list(browser.links())
105 |             for link in links:
106 |                 key = link.url
107 |                 if key in self.walked_path:
108 |                     continue
109 |                 self.walked_path.add(key)
110 | 
111 |                 url = {
112 |                     'url': link.url,
113 |                     'queries': [],
114 |                     'counter': {}
115 |                 }
116 | 
117 |                 succ = True
118 |                 try:
119 |                     browser.follow_link(link)
120 |                 except:
121 |                     traceback.print_exc()
122 |                     succ = False
123 | 
124 |                 url['queries'], url['counter'] = self.process_logs(self.check_log(last_line_no), None)
125 | 
126 |                 if any(self.equal_url(url, ret_url) for ret_url in self.urls):
127 |                     continue
128 | 
129 |                 if succ:
130 |                     self.random_walk(browser, depth - 1)
131 | 
132 |                 browser = self.new_browser(cookiejar, browser_url)
133 | 
134 |         except:
135 |             traceback.print_exc()


--------------------------------------------------------------------------------
/vagrant/bootstrap.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | 
  4 | # install the package that sometime needed for deploying django or ruby on rails apps
  5 | # continue adding packages to this file if missing some packages may cause common errors
  6 | 
  7 | # use this line if the host is using proxy, and change the proxy
  8 | # http_proxy=http://proxy.pdl.cmu.edu:8080
  9 | 
 10 | if [ -n "$http_proxy" ]
 11 | then
 12 |     echo "use proxy: "$http_proxy
 13 |     echo "export http_proxy=\"$http_proxy\"" >> /home/vagrant/.bashrc
 14 |     echo "export https_proxy=\"$http_proxy\"" >> /home/vagrant/.bashrc
 15 | 
 16 |     export http_proxy="$http_proxy"
 17 |     export https_proxy="$http_proxy"
 18 | 
 19 |     echo "Acquire::http::Proxy \"$http_proxy\";" > /etc/apt/apt.conf
 20 | else
 21 |     echo "not use proxy"
 22 | fi
 23 | 
 24 | # The output of all these installation steps is noisy. With this utility
 25 | # the progress report is nice and concise.
 26 | 
 27 | function install {
 28 |     echo Installing $1
 29 |     shift
 30 |     apt-get -y install "$@" >/dev/null 2>&1
 31 | }
 32 | 
 33 | echo updating package information
 34 | install 'apt-repository' software-properties-common python-software-properties
 35 | curl --silent --location https://deb.nodesource.com/setup_4.x | sudo bash -
 36 | apt-get -y update >/dev/null 2>&1
 37 | 
 38 | install 'development tools' build-essential unzip curl openssl libssl-dev libcurl4-openssl-dev zlib1g zlib1g-dev libgmp-dev
 39 | install 'Python' python-dev python-software-properties
 40 | 
 41 | # install Ruby
 42 | command curl -sSL https://rvm.io/mpapis.asc | gpg --import -
 43 | curl -sSL https://get.rvm.io | bash -s stable
 44 | source /usr/local/rvm/scripts/rvm
 45 | rvm install 1.9.3
 46 | rvm install 2.0.0
 47 | rvm install 2.2.2
 48 | rvm use 1.9.3 --default
 49 | gem install bundler
 50 | gem install bundle
 51 | rvm use 2.0.0 --default
 52 | gem install bundler
 53 | gem install bundle
 54 | rvm use 2.2.2 --default
 55 | gem install bundler
 56 | gem install bundle
 57 | install 'ruby' ruby-dev
 58 | 
 59 | echo -e "\n- - - - - -\n"
 60 | echo -n "Should be sqlite 3.8.1 or higher: sqlite "
 61 | sqlite3 --version
 62 | echo -n "Should be rvm 1.26.11 or higher:         "
 63 | rvm --version | sed '/^.*$/N;s/\n//g' | cut -c 1-11
 64 | echo -n "Should be ruby 2.2.2:                "
 65 | ruby -v | cut -d " " -f 2
 66 | echo -n "Should be Rails 4.2.1 or higher:         "
 67 | rails -v
 68 | echo -e "\n- - - - - -\n"
 69 | 
 70 | # install pip
 71 | wget https://bootstrap.pypa.io/get-pip.py -O /home/vagrant/get-pip.py
 72 | python /home/vagrant/get-pip.py
 73 | echo 'export PYTHONUSERBASE="/home/vagrant/pip"' >> /home/vagrant/.bashrc
 74 | 
 75 | # install Beautifulsoup
 76 | echo installing Beautifulsoup
 77 | pip install BeautifulSoup4
 78 | 
 79 | # install Django
 80 | echo installing Djano
 81 | pip install django==1.8.6
 82 | 
 83 | # install dependencies
 84 | install 'Git' git
 85 | git config --global http.proxy $http_proxy
 86 | 
 87 | install 'SQLite' sqlite3 libsqlite3-dev
 88 | 
 89 | install 'PostgreSQL' postgresql postgresql-contrib libpq-dev
 90 | sudo -u postgres psql -U postgres -d postgres -c "alter user postgres with password 'postgres';"
 91 | pip install psycopg2
 92 | 
 93 | debconf-set-selections <<< "mysql-server mysql-server/root_password password root"
 94 | debconf-set-selections <<< "mysql-server mysql-server/root_password_again password root"
 95 | install 'MySQL' mysql-server libmysqlclient-dev
 96 | pip install MySQL-python
 97 | # mysql -u root --password=root -e "CREATE DATABASE vm"
 98 | 
 99 | install 'Nodejs' nodejs
100 | 
101 | install 'Nokogiri dependencies' libxml2 libxml2-dev libxslt1-dev imagemagick libmagickwand-dev
102 | 
103 | # install scrapy
104 | echo installing scrapy
105 | pip install scrapy
106 | 
107 | # web and env
108 | pip install mechanize
109 | pip install python-dateutil
110 | pip install virtualenv
111 | pip install hurry.filesize
112 | pip install selenium
113 | install 'phantomjs' phantomjs
114 | install 'firefox' firefox=28.0+build2-0ubuntu2
115 | install 'xvfb' xvfb
116 | pip install pyvirtualdisplay
117 | pip install djangorestframework
118 | pip install pinax-blog
119 | pip install pytz
120 | 
121 | # install php
122 | install 'php' apache2 php5-mysql libapache2-mod-php5 mysql-server php5-dev php5-gd php5-curl php5-pgsql php5-sqlite
123 | 
124 | # install drush
125 | wget http://files.drush.org/drush.phar
126 | php drush.phar core-status
127 | chmod +x drush.phar
128 | mv drush.phar /usr/local/bin/drush
129 | drush init
130 | drush dl php_server-7.x
131 | 
132 | # Fix Dependencies
133 | apt-get -f -y install >/dev/null 2>&1
134 | 
135 | # Needed for docs generation.
136 | update-locale LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8 LC_ALL=en_US.UTF-8
137 | 
138 | pip install -r requirements.txt
139 | 
140 | # configure MySQL logging
141 | mysql -u root --password=root -e "SET GLOBAL general_log = 'ON';"
142 | mysql -u root --password=root -e "SET GLOBAL general_log_file = '/var/log/mysql/mysql.log';"
143 | mysql -u root --password=root -e "SELECT 1;"
144 | chmod 777 /var/log/mysql/mysql.log
145 | 
146 | echo 'all set, rock on!'
147 | 


--------------------------------------------------------------------------------
/core/analyzers/postgresqlanalyzer.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  3 | 
  4 | import logging
  5 | import re
  6 | 
  7 | from baseanalyzer import BaseAnalyzer
  8 | 
  9 | ## =====================================================================
 10 | ## LOGGING CONFIGURATION
 11 | ## =====================================================================
 12 | LOG = logging.getLogger()
 13 | 
 14 | ## =====================================================================
 15 | ## POSTGRESQL ANALYZER
 16 | ## =====================================================================
 17 | class PostgreSQLAnalyzer(BaseAnalyzer):
 18 | 
 19 |     def __init__(self, deployer):
 20 |         BaseAnalyzer.__init__(self, deployer)
 21 | 
 22 |     def analyze_queries(self, queries):
 23 |         self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0)
 24 | 
 25 |         try:
 26 |             conn = self.deployer.get_database_connection()
 27 |             conn.set_isolation_level(0)
 28 |             cur = conn.cursor()
 29 | 
 30 |             for query in queries:
 31 |                 try:
 32 |                     if self.is_valid_for_explain(query['raw']):
 33 |                         explain_query = 'EXPLAIN ANALYZE {};'.format(query['raw'])
 34 |                         # print explain_query
 35 |                         cur.execute(explain_query)
 36 |                         rows = cur.fetchall()
 37 |                         output = '\n'
 38 |                         for row in rows:
 39 |                             output += row[0] + '\n'
 40 |                         query['explain'] = output
 41 |                 except Exception, e:
 42 |                     pass
 43 |                     # LOG.exception(e)
 44 | 
 45 |             conn.set_isolation_level(1)
 46 |             cur.close()
 47 |             conn.close()
 48 |         except Exception, e:
 49 |             LOG.exception(e)
 50 | 
 51 |     def analyze_database(self):
 52 |         try:
 53 |             conn = self.deployer.get_database_connection()
 54 |             cur = conn.cursor()
 55 |             database = self.deployer.get_database_name()
 56 | 
 57 |             # the number of tables
 58 |             cur.execute("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';")
 59 |             self.database_stats['num_tables'] = int(cur.fetchone()[0])
 60 | 
 61 |             # the number of indexes
 62 |             cur.execute("SELECT COUNT(*) FROM pg_stat_all_indexes WHERE schemaname = 'public';")
 63 |             self.database_stats['num_indexes'] = int(cur.fetchone()[0])
 64 | 
 65 |             # the number of constraints
 66 |             cur.execute("SELECT COUNT(*) FROM information_schema.table_constraints WHERE constraint_schema = 'public';")
 67 |             self.database_stats['num_constraints'] = int(cur.fetchone()[0])
 68 | 
 69 |             # the number of foreign keys
 70 |             cur.execute("SELECT COUNT(*) FROM information_schema.referential_constraints WHERE constraint_schema = 'public';")
 71 |             self.database_stats['num_foreignkeys'] = int(cur.fetchone()[0])
 72 | 
 73 |             # the full information of tables
 74 |             cur.execute("SELECT * FROM information_schema.tables WHERE table_schema = 'public';")
 75 |             self.database_informations['tables'] = str(cur.fetchall())
 76 | 
 77 |             # the full information of columns
 78 |             cur.execute("SELECT * FROM information_schema.columns WHERE table_schema = 'public';")
 79 |             self.database_informations['columns'] = str(cur.fetchall())
 80 | 
 81 |             # the full information of indexes
 82 |             cur.execute("SELECT * FROM pg_stat_all_indexes WHERE schemaname = 'public';")
 83 |             self.database_informations['indexes'] = str(cur.fetchall())
 84 | 
 85 |             # the full information of constraints
 86 |             cur.execute("SELECT * FROM information_schema.table_constraints WHERE constraint_schema = 'public';")
 87 |             self.database_informations['constraints'] = str(cur.fetchall())
 88 | 
 89 |             # the full information of constraints
 90 |             cur.execute("SELECT * FROM information_schema.key_column_usage WHERE constraint_schema = 'public';")
 91 |             self.database_informations['key_column_usage'] = str(cur.fetchall())
 92 | 
 93 |             # the full information of foreign keys
 94 |             cur.execute("SELECT * FROM information_schema.referential_constraints WHERE constraint_schema = 'public';")
 95 |             self.database_informations['foreignkeys'] = str(cur.fetchall())
 96 | 
 97 |             # the full information of triggers
 98 |             cur.execute("SELECT * FROM information_schema.triggers WHERE trigger_schema = 'public';")
 99 |             self.database_informations['triggers'] = str(cur.fetchall())
100 | 
101 |             # the full information of views
102 |             cur.execute("SELECT * FROM information_schema.views WHERE table_schema = 'public';")
103 |             self.database_informations['views'] = str(cur.fetchall())
104 | 
105 |             cur.close()
106 |             conn.close()
107 |         except Exception, e:
108 |             LOG.exception(e)


--------------------------------------------------------------------------------
/core/scripts/vagrant_benchmark.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os, sys
  3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
  5 | 
  6 | import argparse
  7 | import datetime
  8 | import socket
  9 | import traceback
 10 | import time
 11 | import logging
 12 | import json
 13 | from multiprocessing import Process, Queue
 14 | 
 15 | from deployers import *
 16 | from drivers import *
 17 | from analyzers import *
 18 | import utils
 19 | 
 20 | ## =====================================================================
 21 | ## LOGGING CONFIGURATION
 22 | ## =====================================================================
 23 | LOG = logging.getLogger()
 24 | 
 25 | def run_driver(driver, timeout, size, queue):
 26 |     cnt = 0
 27 |     start_time = time.time()
 28 |     stop_time = start_time + timeout
 29 |     new_driver = BenchmarkDriver(driver)
 30 |     try:
 31 |         while True:
 32 |             cnt += new_driver.submit_actions()
 33 |             if time.time() >= stop_time or get_database_size() >= size:
 34 |                 break
 35 |         queue.put(cnt)
 36 |     except Exception, e:
 37 |         traceback.print_exc()
 38 |         queue.put(cnt)
 39 | 
 40 | def get_database_size(deployer):
 41 |     deployer.database = Database()
 42 |     deployer.database.name = 'MySQL'
 43 |     conn = deployer.get_database_connection(False)
 44 |     cur = conn.cursor()
 45 |     cur.execute('''
 46 |         SELECT Round(SUM(data_length + index_length) / 1024 / 1024, 1)
 47 |         FROM information_schema.tables
 48 |         WHERE table_schema = '{}'
 49 |     '''.format(deployer.database_config['name']))
 50 |     size = cur.fetchone()[0]
 51 |     return size
 52 | 
 53 | def main():
 54 |     # parse args
 55 |     parser = argparse.ArgumentParser()
 56 |     parser.add_argument('--attempt_info', type=str)
 57 |     parser.add_argument('--deploy_id', type=int)
 58 |     parser.add_argument('--database', type=str)
 59 |     parser.add_argument('--host', type=str)
 60 |     parser.add_argument('--port', type=int)
 61 |     parser.add_argument('--name', type=str)
 62 |     parser.add_argument('--username', type=str)
 63 |     parser.add_argument('--password', type=str)
 64 |     parser.add_argument('--num_threads', type=int)
 65 |     parser.add_argument('--timeout', type=int)
 66 |     parser.add_argument('--size', type=int)
 67 |     args = parser.parse_args()
 68 | 
 69 |     # get args
 70 |     with open(args.attempt_info, 'r') as attempt_info_file:
 71 |         attempt_info = json.loads(attempt_info_file.read())
 72 |     deploy_id = args.deploy_id
 73 |     database_config = {
 74 |         'database': args.database,
 75 |         'host': args.host,
 76 |         'port': args.port,
 77 |         'name': args.name,
 78 |         'username': args.username,
 79 |         'password': args.password
 80 |     }
 81 |     num_threads = args.num_threads
 82 |     timeout = args.timeout
 83 |     size = args.size
 84 | 
 85 |     # get deployer
 86 |     project_type = attempt_info['repo_info']['project_type']
 87 |     deployer_class = {
 88 |         1: 'DjangoDeployer',
 89 |         2: 'RoRDeployer',
 90 |         3: 'NodeDeployer',
 91 |         4: 'DrupalDeployer',
 92 |         5: 'GrailsDeployer'
 93 |     }[project_type]
 94 | 
 95 |     moduleName = "deployers.%s" % (deployer_class.lower())
 96 |     moduleHandle = __import__(moduleName, globals(), locals(), [deployer_class])
 97 |     klass = getattr(moduleHandle, deployer_class)
 98 | 
 99 |     deployer = klass(None, None, deploy_id, database_config)
100 | 
101 |     result = deployer.deploy(attempt_info)
102 |     if result != 0:
103 |         deployer.kill_server()
104 |         sys.exit(-1)
105 | 
106 |     LOG.info('Running driver ...')
107 |     driver = BaseDriver(deployer.get_main_url(), deployer.get_database(), deployer.deploy_id, deployer.base_path, deployer.log_file)
108 |     try:
109 |         driver.bootstrap()
110 |         driver.initialize()
111 |     except Exception, e:
112 |         traceback.print_exc()
113 | 
114 |     LOG.info('Start Driving the Database ...')
115 |     actions_cnt = 0
116 |     processes = []
117 |     try:
118 |         # disable logging of requests
119 |         logging.getLogger("requests").setLevel(logging.WARNING)
120 |         logging.getLogger("urllib3").setLevel(logging.WARNING)
121 |         # multi-processing
122 |         queue = Queue()
123 |         for _ in range(num_threads):
124 |             process = Process(target = run_driver, args = (driver, timeout, size, queue))
125 |             processes.append(process)
126 |             process.start()
127 |         for process in processes:
128 |             process.join()
129 |         for _ in range(num_threads):
130 |             actions_cnt += queue.get()
131 |     except Exception, e:
132 |         traceback.print_exc()
133 | 
134 |     LOG.info('The number of actions submitted : {}'.format(actions_cnt))
135 | 
136 |     # kill server
137 |     deployer.kill_server()
138 | 
139 |     # analyze
140 |     LOG.info('Analyzing queries ...')
141 |     analyzer = get_analyzer(deployer)
142 |     for form, _ in driver.forms:
143 |         analyzer.analyze_queries(form['queries'])
144 |     for url in driver.urls:
145 |         analyzer.analyze_queries(url['queries'])
146 |     LOG.info(analyzer.queries_stats)
147 | 
148 |     # extract database info
149 |     LOG.info('Extracting database info ...')
150 |     analyzer.analyze_database()
151 |     LOG.info(analyzer.database_stats)
152 | 
153 |     LOG.info('Database Size : {} '.format(get_database_size(deployer)))
154 | 
155 |     LOG.info('Finishing ...')
156 | 
157 | if __name__ == "__main__":
158 |     main()
159 | 


--------------------------------------------------------------------------------
/library/templates/base.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |     <head>
  4 |         <meta charset="utf-8">
  5 |         <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <meta name="description" content="">
  8 |         <meta name="author" content="">
  9 |         <link href="{{STATIC_URL}}/img/favicon.png" rel="icon">
 10 |         <title>{% block title %}{% endblock %}Carnegie Mellon Database Application Catalog (CMDBAC)</title>
 11 | 
 12 |         <!-- CSS -->
 13 |         <link href="{{STATIC_URL}}css/bootstrap.yeti.min.css" type="text/css" rel="stylesheet">
 14 |         <link href="{{STATIC_URL}}css/dbac.css" type="text/css" rel="stylesheet">
 15 | 
 16 |         <!-- Fonts -->
 17 |         <link href="{{STATIC_URL}}font-awesome/css/font-awesome.min.css" rel="stylesheet" type="text/css">
 18 |         <link href="http://fonts.googleapis.com/css?family=Lato:300,400,700,300italic,400italic,700italic" rel="stylesheet" type="text/css">
 19 |         <link href='https://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'>
 20 | 
 21 |         <script src="//code.jquery.com/jquery-2.0.3.min.js"></script>
 22 |         <script src="{{STATIC_URL}}js/jquery.twbsPagination.js"></script>
 23 | 
 24 |         <!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
 25 |         <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
 26 |         <!--[if lt IE 9]>
 27 |             <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
 28 |             <script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
 29 |         <![endif]-->
 30 | 
 31 |     </head>
 32 | 
 33 |     <body>
 34 |     {% load active_page %}
 35 | 
 36 |     <!-- NAVIGATION -->
 37 |     <nav class="navbar navbar-default navbar-fixed-top topnav" role="navigation">
 38 |         <div class="container topnav">
 39 |             <!-- Brand and toggle get grouped for better mobile display -->
 40 |             <div class="navbar-header">
 41 |                 <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
 42 |                     <span class="sr-only">Toggle navigation</span>
 43 |                     <span class="icon-bar"></span>
 44 |                     <span class="icon-bar"></span>
 45 |                     <span class="icon-bar"></span>
 46 |                 </button>
 47 |                 <a class="navbar-brand topnav" href="{% url 'home' %}">CMDBAC</a>
 48 |             </div>
 49 |             <!-- Collect the nav links, forms, and other content for toggling -->
 50 |             <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
 51 |                 <ul class="nav navbar-nav">
 52 |                     <li class="{% active_page request 'repositories' %}"><a href="{% url 'repositories' %}">Repositories</a></li>
 53 |                     <li><a href="https://github.com/cmu-db/db-webcrawler/wiki">Documentation</a></li>
 54 |                     <li class="{% active_page request 'blog' %}"><a href="{% url 'blog' %}">News</a></li>
 55 |                     <li class="{% active_page request 'about' %}"><a href="{% url 'about' %}">About</a></li>
 56 |                 </ul>
 57 |                 <div class="col-sm-3 col-md-3 pull-right">
 58 |                     <form class="navbar-form" role="search" action="{% url 'repositories' %}" method="get">
 59 |                         <div class="input-group">
 60 |                             <input type="text" class="form-control" placeholder="Repository Search" name="search" value="{{ search }}">
 61 |                             <div class="input-group-btn">
 62 |                                 <button class="btn btn-default" type="submit"><i class="glyphicon glyphicon-search"></i></button>
 63 |                             </div>
 64 |                         </div>
 65 |                     </form>
 66 |                 </div>
 67 |             </div>
 68 |             <!-- /.navbar-collapse -->
 69 |         </div>
 70 |         <!-- /.container -->
 71 |     </nav>
 72 |     <!-- END NAVIGATION -->
 73 | 
 74 |     <!-- HEADER -->
 75 |     {% block header %}{% endblock %}
 76 |     <!-- END HEADER -->
 77 | 
 78 |     <!-- MAIN -->
 79 |     <div class="container">
 80 |         {% block main %}{% endblock %}
 81 |     </div>
 82 |     <!-- END MAIN -->
 83 | 
 84 |     <!-- FOOTER -->
 85 |     <footer>
 86 |         <div class="container">
 87 |             <div class="row">
 88 |                 <div class="col-md-6 text-left footer-copyright">
 89 |                     <span class="copyright">
 90 |                         Copyright &copy; {%now "Y"%} <a href="http://db.cs.cmu.edu">Carnegie Mellon University Database Group</a>
 91 |                     </span>
 92 |                 </div>
 93 |                 <div class="col-md-6 text-right footer-buttons">
 94 |                     <ul class="list-inline social-buttons">
 95 |                         <li><a href="https://twitter.com/CMUDB"><i class="fa fa-twitter"></i></a>
 96 |                         </li>
 97 |                         <li><a href="https://github.com/cmu-db/cmdbac"><i class="fa fa-github"></i></a>
 98 |                         </li>
 99 |                     </ul>
100 |                 </div>
101 |             </div>
102 |         </div>
103 |     </footer>
104 |     <!-- END FOOTER -->
105 | 
106 |     <!-- Bootstrap core JavaScript
107 |     ================================================== -->
108 |     <!-- Placed at the end of the document so the pages load faster -->
109 |     <script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
110 |     <script src="{{STATIC_URL}}js/collapse.js"></script>
111 |     {{ analytics_code }}
112 | 
113 |     </body>
114 | </html>
115 | 


--------------------------------------------------------------------------------
/core/analyzers/mysqlanalyzer.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  3 | 
  4 | import logging
  5 | import datetime
  6 | import traceback
  7 | 
  8 | from baseanalyzer import BaseAnalyzer
  9 | 
 10 | ## =====================================================================
 11 | ## LOGGING CONFIGURATION
 12 | ## =====================================================================
 13 | LOG = logging.getLogger()
 14 | 
 15 | ## =====================================================================
 16 | ## MYSQL ANALYZER
 17 | ## =====================================================================
 18 | class MySQLAnalyzer(BaseAnalyzer):
 19 | 
 20 |     def __init__(self, deployer):
 21 |         BaseAnalyzer.__init__(self, deployer)
 22 | 
 23 |     def analyze_queries(self, queries):
 24 |         self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0)
 25 | 
 26 |         try:
 27 |             conn = self.deployer.get_database_connection()
 28 |             cur = conn.cursor()
 29 | 
 30 |             for query in queries:
 31 |                 try:
 32 |                     if self.is_valid_for_explain(query['raw']):
 33 |                         explain_query = 'EXPLAIN {};'.format(query['raw'])
 34 |                         # print explain_query
 35 |                         cur.execute(explain_query)
 36 |                         rows = cur.fetchall()
 37 |                         output = '\n'
 38 |                         for row in rows:
 39 |                             output += str(row) + '\n'
 40 |                         query['explain'] = output
 41 |                 except Exception, e:
 42 |                     pass
 43 |                     # LOG.exception(e)
 44 | 
 45 |             for query in queries:
 46 |                 try:
 47 |                     if self.is_valid_for_explain(query['raw']):
 48 |                         cur.execute(query['raw'])
 49 |                         cur.fetchall()
 50 | 
 51 |                         stats_query = 'SHOW SESSION STATUS;'
 52 |                         # print explain_query
 53 |                         cur.execute(stats_query)
 54 |                         rows = cur.fetchall()
 55 |                         output = '\n'
 56 |                         for row in rows:
 57 |                             output += str(row) + '\n'
 58 |                         query['stats'] = output
 59 |                 except Exception, e:
 60 |                     # traceback.print_exc()
 61 |                     pass
 62 |                     # pass
 63 |                     # LOG.exception(e)
 64 | 
 65 |             cur.close()
 66 |             conn.close()
 67 |         except Exception, e:
 68 |             LOG.exception(e)
 69 | 
 70 |     def analyze_database(self):
 71 |         try:
 72 |             conn = self.deployer.get_database_connection()
 73 |             cur = conn.cursor()
 74 |             database = self.deployer.get_database_name()
 75 | 
 76 |             # the number of tables
 77 |             cur.execute("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '{}';".format(database))
 78 |             self.database_stats['num_tables'] = int(cur.fetchone()[0])
 79 | 
 80 |             # the number of indexes
 81 |             cur.execute("SELECT COUNT(DISTINCT table_name, index_name) FROM information_schema.statistics WHERE table_schema = '{}';".format(database))
 82 |             self.database_stats['num_indexes'] = int(cur.fetchone()[0])
 83 | 
 84 |             # the number of constraints
 85 |             cur.execute("SELECT COUNT(*) FROM information_schema.table_constraints WHERE constraint_schema = '{}';".format(database))
 86 |             self.database_stats['num_constraints'] = int(cur.fetchone()[0])
 87 | 
 88 |             # the number of foreign keys
 89 |             cur.execute("SELECT COUNT(*) FROM information_schema.referential_constraints WHERE constraint_schema = '{}';".format(database))
 90 |             self.database_stats['num_foreignkeys'] = int(cur.fetchone()[0])
 91 | 
 92 |             # the full information of tables
 93 |             cur.execute("SELECT * FROM information_schema.tables WHERE table_schema = '{}';".format(database))
 94 |             self.database_informations['tables'] = str(cur.fetchall())
 95 | 
 96 |             # the full information of columns
 97 |             cur.execute("SELECT * from INFORMATION_SCHEMA.columns WHERE table_schema = '{}';".format(database))
 98 |             self.database_informations['columns'] = str(cur.fetchall())
 99 | 
100 |             # the full information of indexes
101 |             cur.execute("SELECT * FROM information_schema.statistics WHERE table_schema = '{}';".format(database))
102 |             self.database_informations['indexes'] = str(cur.fetchall())
103 | 
104 |             # the full information of constraints
105 |             cur.execute("SELECT * FROM information_schema.table_constraints WHERE constraint_schema = '{}';".format(database))
106 |             self.database_informations['constraints'] = str(cur.fetchall())
107 | 
108 |             # the full information of constraints
109 |             cur.execute("SELECT * FROM information_schema.key_column_usage WHERE constraint_schema = '{}';".format(database))
110 |             self.database_informations['key_column_usage'] = str(cur.fetchall())
111 | 
112 |             # the full information of foreign keys
113 |             cur.execute("SELECT * FROM information_schema.referential_constraints WHERE constraint_schema = '{}';".format(database))
114 |             self.database_informations['foreignkeys'] = str(cur.fetchall())
115 | 
116 |             # the full information of triggers
117 |             cur.execute("SELECT * FROM information_schema.triggers WHERE trigger_schema = '{}';".format(database))
118 |             self.database_informations['triggers'] = str(cur.fetchall())
119 | 
120 |             # the full information of views
121 |             cur.execute("SELECT * FROM information_schema.views WHERE table_schema = '{}';".format(database))
122 |             self.database_informations['views'] = str(cur.fetchall())
123 | 
124 |             cur.close()
125 |             conn.close()
126 |         except Exception, e:
127 |             LOG.exception(e)


--------------------------------------------------------------------------------
/analysis/foreign/foreign.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os, sys
  3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
  5 | 
  6 | import re
  7 | import csv
  8 | import numpy as np
  9 | import sqlparse
 10 | import traceback
 11 | from utils import filter_repository, dump_all_stats, pickle_dump
 12 | 
 13 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings")
 14 | import django
 15 | django.setup()
 16 | 
 17 | from library.models import *
 18 | 
 19 | def foreign_key_stats(directory = '.'):
 20 |     stats = {'foreign_key_count': {}, 'foreign_key_type': {}}
 21 | 
 22 |     for repo in Repository.objects.exclude(latest_successful_attempt = None):
 23 |         if filter_repository(repo):
 24 |             continue
 25 | 
 26 |         project_type_name = repo.project_type.name
 27 |         if project_type_name not in stats['foreign_key_count']:
 28 |             stats['foreign_key_count'][project_type_name] = []
 29 |         if project_type_name not in stats['foreign_key_type']:
 30 |                 stats['foreign_key_type'][project_type_name] = {}
 31 |         if 0:
 32 |             if project_type_name not in stats['join_key_constraint']:
 33 |                 stats['join_key_constraint'][project_type_name] = {}
 34 | 
 35 |         informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'columns')
 36 |         column_map = {}
 37 |         if len(informations) > 0:
 38 |             information = informations[0]
 39 |             if repo.latest_successful_attempt.database.name == 'PostgreSQL':
 40 |                 regex = '(\(.*?\))[,\]]'
 41 |             elif repo.latest_successful_attempt.database.name == 'MySQL':
 42 |                 regex = '(\(.*?\))[,\)]'
 43 | 
 44 |             for column in re.findall(regex, information.description):
 45 |                 cells = column.split(',')
 46 |                 table = str(cells[2]).replace("'", "").strip()
 47 |                 name = str(cells[3]).replace("'", "").strip()
 48 |                 _type = str(cells[7]).replace("'", "").strip()
 49 |                 column_map[table + '.' + name] = _type
 50 |                 column_map[name] = _type
 51 | 
 52 |         key_column_usage_informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'key_column_usage')
 53 |         constraint_informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'constraints')
 54 |         constraint_map = {}
 55 |         if len(key_column_usage_informations) > 0 and len(constraint_informations) > 0:
 56 |             if repo.latest_successful_attempt.database.name == 'PostgreSQL':
 57 |                 regex = '(\(.*?\))[,\]]'
 58 |             elif repo.latest_successful_attempt.database.name == 'MySQL':
 59 |                 regex = '(\(.*?\))[,\)]'
 60 | 
 61 |             merge_map = {}
 62 |             key_column_usage_information = key_column_usage_informations[0]
 63 |             for column in re.findall(regex, key_column_usage_information.description):
 64 |                 cells = column.split(',')
 65 |                 constraint_name = str(cells[2]).replace("'", "").strip()
 66 |                 table_name = str(cells[5]).replace("'", "").strip()
 67 |                 column_name = str(cells[6]).replace("'", "").strip()
 68 |                 merge_map_key = table_name + '.' + constraint_name
 69 |                 if merge_map_key in merge_map:
 70 |                     merge_map[merge_map_key].append(column_name)
 71 |                 else:
 72 |                     merge_map[merge_map_key] = [column_name]
 73 | 
 74 |             constraint_information = constraint_informations[0]
 75 |             for column in re.findall(regex, constraint_information.description):
 76 |                 cells = column.split(',')
 77 |                 constraint_name = str(cells[2]).replace("'", "").strip()
 78 |                 if repo.latest_successful_attempt.database.name == 'PostgreSQL':
 79 |                     table_name = str(cells[5]).replace("'", "").strip()
 80 |                     constraint_type = str(cells[6]).replace("'", "").strip()
 81 |                 elif repo.latest_successful_attempt.database.name == 'MySQL':
 82 |                     table_name = str(cells[4]).replace("'", "").strip()
 83 |                     constraint_type = str(cells[5])[:-1].replace("'", "").strip()
 84 |                 merge_map_key =  table_name + '.' + constraint_name
 85 |                 if merge_map_key in merge_map:
 86 |                     for column_name in merge_map[merge_map_key]:
 87 |                         constraint_map[table_name + '.' + column_name] = constraint_type
 88 |                         constraint_map[column_name] = constraint_type
 89 | 
 90 |                         if constraint_type == 'FOREIGN KEY':
 91 |                             _type = column_map[table_name + '.' + column_name]
 92 |                             stats['foreign_key_type'][project_type_name][_type] = stats['foreign_key_type'][project_type_name].get(_type, 0) + 1
 93 | 
 94 |             for action in Action.objects.filter(attempt = repo.latest_successful_attempt):
 95 |                 queries = Query.objects.filter(action = action)
 96 |                 foreign_key_count = 0
 97 | 
 98 |                 for query in queries:
 99 |                     parsed = sqlparse.parse(query.content)[0]
100 |                     tokens = parsed.tokens
101 | 
102 |                     for token in tokens:
103 |                         if isinstance(token, sqlparse.sql.Identifier):
104 |                             token_name = token.value.replace('"', '').replace('`', '')
105 |                             if token_name in constraint_map:
106 |                                 constraint = constraint_map[token_name]
107 |                                 if constraint == 'FOREIGN KEY':
108 |                                     foreign_key_count += 1
109 | 
110 |                     for explain in Explain.objects.filter(query = query):
111 |                         if 'FOREIGN' in explain.output:
112 |                             print explain.output
113 | 
114 |                 stats['foreign_key_count'][project_type_name].append(foreign_key_count)
115 | 
116 |     dump_all_stats(directory, stats)
117 | 
118 | def main():
119 |     foreign_key_stats()
120 | 
121 | if __name__ == '__main__':
122 |     main()
123 | 


--------------------------------------------------------------------------------
/core/crawlers/drupalcrawler.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  3 | 
  4 | import time
  5 | import re
  6 | import urllib
  7 | import urllib2
  8 | import logging
  9 | import urlparse
 10 | import requests
 11 | from bs4 import BeautifulSoup
 12 | from datetime import datetime
 13 | import traceback
 14 | 
 15 | from basecrawler import BaseCrawler
 16 | from library.models import *
 17 | import utils
 18 | 
 19 | ## =====================================================================
 20 | ## LOGGING CONFIGURATION
 21 | ## =====================================================================
 22 | 
 23 | LOG = logging.getLogger(__name__)
 24 | LOG_handler = logging.StreamHandler()
 25 | LOG_formatter = logging.Formatter(fmt='%(asctime)s [%(filename)s:%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s',
 26 |                                   datefmt='%m-%d-%Y %H:%M:%S')
 27 | LOG_handler.setFormatter(LOG_formatter)
 28 | LOG.addHandler(LOG_handler)
 29 | LOG.setLevel(logging.INFO)
 30 | 
 31 | ## =====================================================================
 32 | ## DRUPAL CONFIGURATION
 33 | ## =====================================================================
 34 | BASE_URL = 'https://www.drupal.org/project/{name}'
 35 | COMMIT_URL = 'https://www.drupal.org/node/{sha}'
 36 | SEARCH_URL = 'https://www.drupal.org/project/project_distribution'
 37 | DRUPAL_HOST = 'https://www.drupal.org'
 38 | DRUPAL_SLEEP = 1
 39 | 
 40 | ## =====================================================================
 41 | ## DRUPAL CRAWLER
 42 | ## =====================================================================
 43 | class DrupalCrawler(BaseCrawler):
 44 |     def __init__(self, crawlerStatus, auth):
 45 |         BaseCrawler.__init__(self, crawlerStatus)
 46 |     ## DEF
 47 | 
 48 |     def next_url(self):
 49 |         # Check whether there is a next url that we need to load
 50 |         # from where we left off from our last run\
 51 |         if not self.crawlerStatus.next_url is None and not self.crawlerStatus.next_url == '':
 52 |             return self.crawlerStatus.next_url
 53 | 
 54 |         # Otherwise, compute what the next page we want to load
 55 |         return SEARCH_URL
 56 |     ## DEF
 57 | 
 58 |     def search(self):
 59 |         # Load and parse!
 60 |         response = utils.query(self.next_url())
 61 |         soup = BeautifulSoup(response.text)
 62 |         titles = soup.find_all(class_='node-project-distribution')
 63 |         LOG.info("Found %d repositories" % len(titles))
 64 | 
 65 |         # Pick through the results and find repos
 66 |         for title in titles:
 67 |             name = title.contents[1].contents[0]['href'].split('/')[2]
 68 |             try:
 69 |                 self.add_repository(name)
 70 |             except:
 71 |                 traceback.print_exc()
 72 |             # Sleep for a little bit to prevent us from getting blocked
 73 |             time.sleep(DRUPAL_SLEEP)
 74 |         ## FOR
 75 | 
 76 |         # Figure out what is the next page that we need to load
 77 |         try:
 78 |             next_page = soup.find(class_='pager-next').contents[0]
 79 |         except:
 80 |             next_page = None
 81 |         if not next_page or not next_page.has_attr('href'):
 82 |             LOG.info("No next page link found!")
 83 |             self.crawlerStatus.next_url = None
 84 |         else:
 85 |             self.crawlerStatus.next_url = DRUPAL_HOST + next_page['href']
 86 | 
 87 |         # Make sure we update our crawler status
 88 |         LOG.info("Updating status for %s" % self.crawlerStatus)
 89 |         self.crawlerStatus.save()
 90 | 
 91 |         return
 92 |     ## DEF
 93 | 
 94 |     def get_api_data(self, name):
 95 |         data = {}
 96 |         data['url'] = self.crawlerStatus.source.get_url(name)
 97 |         response = requests.get(data['url'])
 98 |         soup = BeautifulSoup(response.text)
 99 |         data['time'] = soup.find('time').attrs['datetime']
100 |         return data
101 |     # DEF
102 | 
103 |     def add_repository(self, name, setup_scripts = None):
104 |         if Repository.objects.filter(name='drupal/' + name, source=self.crawlerStatus.source).exists():
105 |             LOG.info("Repository '%s' already exists" % name)
106 |         else:
107 |             api_data = self.get_api_data(name)
108 | 
109 |             # Create the new repository
110 |             repo = Repository()
111 |             repo.name = 'drupal/' + name
112 |             repo.source = self.crawlerStatus.source
113 |             repo.project_type = self.crawlerStatus.project_type
114 |             repo.last_attempt = None
115 |             repo.created_at = datetime.fromtimestamp(int(api_data['time'])).strftime("%Y-%m-%d %H:%M:%S")
116 |             repo.updated_at = repo.created_at
117 |             repo.pushed_at = repo.created_at
118 |             repo.homepage = api_data['url']
119 |             repo.size = -1
120 |             repo.stargazers_count = -1
121 |             repo.watchers_count = -1
122 |             repo.language = 'PHP'
123 |             repo.forks_count = -1
124 |             repo.open_issues_count = -1
125 |             repo.default_branch = 'master'
126 |             repo.network_count = -1
127 |             repo.subscribers_count = -1
128 |             repo.commits_count = -1
129 |             repo.branches_count = -1
130 |             repo.releases_count = -1
131 |             repo.contributors_count = -1
132 |             repo.setup_scripts = setup_scripts
133 |             repo.save()
134 |             LOG.info("Successfully created new repository '%s' [%d]" % (repo, repo.id))
135 |         ## IF
136 |     # DEF
137 | 
138 |     def get_latest_sha(self, repo_name):
139 |         url = BASE_URL.format(name = repo_name)
140 |         response = utils.query(url)
141 |         data = response.text
142 |         results = re.findall(COMMIT_URL.format(sha='(\d+)'), data)
143 |         return results[1]
144 |     # DEF
145 | 
146 |     def download_repository(self, repo_name, sha, zip_name):
147 |         url = BASE_URL.format(name = repo_name)
148 |         response = utils.query(url)
149 |         data = response.text
150 |         download_url = re.search('https://[^ ]*?\.zip', data).group(0)
151 | 
152 |         response = utils.query(download_url)
153 |         zip_file = open(zip_name, 'wb')
154 |         for chunk in response.iter_content(chunk_size=1024):
155 |             if chunk:
156 |                 zip_file.write(chunk)
157 |                 zip_file.flush()
158 |         zip_file.close()
159 |     # DEF


--------------------------------------------------------------------------------
/core/deployers/nodedeployer.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  3 | 
  4 | import logging
  5 | import re
  6 | import time
  7 | 
  8 | from basedeployer import BaseDeployer
  9 | from library.models import *
 10 | import utils
 11 | 
 12 | ## =====================================================================
 13 | ## LOGGING CONFIGURATION
 14 | ## =====================================================================
 15 | LOG = logging.getLogger()
 16 | 
 17 | ## =====================================================================
 18 | ## SETTINGS
 19 | ## =====================================================================
 20 | 
 21 | 
 22 | ## =====================================================================
 23 | ## NODE.JS DEPLOYER
 24 | ## =====================================================================
 25 | class NodeDeployer(BaseDeployer):
 26 |     def __init__(self, repo, database, deploy_id, database_config = None):
 27 |         BaseDeployer.__init__(self, repo, database, deploy_id, database_config)
 28 |         if database_config == None:
 29 |             self.database_config['name'] = 'node_app' + str(deploy_id)
 30 |         self.main_filename = None
 31 |     ## DEF
 32 | 
 33 |     def configure_settings(self, path):
 34 |         utils.replace_files_regex(path, "mysql\.createConnection\({.*?}.*?\);",
 35 |             """mysql.createConnection({{
 36 |                 host     : '{host}',
 37 |                 port     : '{port}',
 38 |                 user     : '{user}',
 39 |                 password : '{password}',
 40 |                 database : '{database}'
 41 |                 }});
 42 |             """.format(host=self.database_config['host'], port=self.database_config['port'],
 43 |                 user=self.database_config['username'],password=self.database_config['password'],
 44 |                 database=self.database_config['name']))
 45 |     ## DEF
 46 | 
 47 |     def install_requirements(self, path):
 48 |         if path:
 49 |             command = '{} && npm install'.format(utils.cd(path))
 50 |             out = utils.run_command(command)
 51 |             if out[1] == '':
 52 |                 return out[2]
 53 |             else:
 54 |                 return out[1]
 55 |         return ''
 56 |     ## DEF
 57 | 
 58 |     def get_main_url(self):
 59 |         return 'http://127.0.0.1:{}/'.format(self.port)
 60 |     ## DEF
 61 | 
 62 |     def sync_server(self, path):
 63 |         pass
 64 |     ## DEF
 65 | 
 66 |     def run_server(self, path):
 67 |         self.configure_network()
 68 |         LOG.info('Running server ...')
 69 |         command = '{} && node {}'.format(
 70 |             utils.cd(path), self.main_filename)
 71 |         return utils.run_command_async(command)
 72 |     ## DEF
 73 | 
 74 |     def get_runtime(self):
 75 |         out = utils.run_command('node -v')
 76 |         return {
 77 |             'executable': 'node',
 78 |             'version': out[1][1:]
 79 |         }
 80 |     ## DEF
 81 | 
 82 |     def find_port(self):
 83 |         out = utils.run_command('netstat -nlp | grep -i "node"')
 84 |         port = re.search('0 :::(\d+)', out[1])
 85 |         if port:
 86 |             self.port = port.group(1)
 87 | 
 88 |     def create_tables(self, deploy_path):
 89 |         executed = False
 90 |         sql_files = utils.search_file_regex(deploy_path, '.*\.sql')
 91 |         conn = self.get_database_connection()
 92 |         cur = conn.cursor()
 93 |         for sql_file in sql_files:
 94 |             executed = True
 95 |             for statement in open(sql_file).read().split(';'):
 96 |                 try:
 97 |                     cur.execute(statement)
 98 |                 except Exception, e:
 99 |                     print statement
100 |                     LOG.exception(e)
101 |         if self.database.name == 'MySQL':
102 |             conn.commit()
103 |         return executed
104 | 
105 |     def try_deploy(self, deploy_path):
106 |         LOG.info('Configuring settings ...')
107 |         self.kill_server()
108 |         self.clear_database()
109 |         self.configure_settings(deploy_path)
110 |         self.runtime = self.get_runtime()
111 |         LOG.info(self.runtime)
112 | 
113 |         self.attempt.database = self.get_database()
114 |         LOG.info('Database: ' + self.attempt.database.name)
115 | 
116 |         LOG.info('Create Tables ...')
117 |         try:
118 |             if not self.create_tables(deploy_path):
119 |                 LOG.error('No sql file found!')
120 |                 return ATTEMPT_STATUS_MISSING_REQUIRED_FILES
121 |         except Exception, e:
122 |             LOG.exception(e)
123 | 
124 |         LOG.info('Installing requirements ...')
125 |         out = self.install_requirements(deploy_path)
126 |         lines = out.split('\n')
127 |         packages = {}
128 |         for line in lines:
129 |             s = re.search('(.+?)@([0-9\.]+)', line)
130 |             if s:
131 |                 name, version = s.group(1), s.group(2)
132 |                 name = name.split(' ')[-1]
133 |                 packages[name] = version
134 | 
135 |         for name, version in packages.iteritems():
136 |             try:
137 |                 pkg, created = Package.objects.get_or_create(name=name, version=version, project_type=self.repo.project_type)
138 |                 self.packages_from_file.append(pkg)
139 |             except Exception, e:
140 |                 LOG.exception(e)
141 | 
142 |         self.run_server(deploy_path)
143 |         time.sleep(5)
144 | 
145 |         self.find_port()
146 | 
147 |         attemptStatus = self.check_server()
148 | 
149 |         return attemptStatus
150 |     ## DEF
151 | 
152 |     def deploy_repo_attempt(self, deploy_path):
153 |         package_jsons = utils.search_file(deploy_path, 'package.json')
154 |         if not package_jsons:
155 |             LOG.error('No package.json found!')
156 |             return ATTEMPT_STATUS_MISSING_REQUIRED_FILES
157 |         base_dir = sorted([os.path.dirname(package_json) for package_json in package_jsons])[0]
158 | 
159 |         for main_filename in ['server.js', 'app.js', 'main.js']:
160 |             if utils.search_file_norecur(base_dir, main_filename):
161 |                 self.main_filename = main_filename
162 |                 break
163 |         if self.main_filename == None:
164 |             LOG.error('No main file found!')
165 |             return ATTEMPT_STATUS_MISSING_REQUIRED_FILES
166 | 
167 |         self.setting_path = base_dir
168 | 
169 |         return self.try_deploy(base_dir)
170 |     ## DEF
171 | 
172 | ## CLASS


--------------------------------------------------------------------------------
/core/drivers/submit/submit.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir))
  3 | 
  4 | import mechanize
  5 | import cookielib
  6 | import string
  7 | import random
  8 | import traceback
  9 | import requests
 10 | import urlparse
 11 | from bs4 import BeautifulSoup
 12 | 
 13 | from patterns import patterns, match_any_pattern
 14 | import extract
 15 | 
 16 | def get_form_index(br, form):
 17 |     index = 0
 18 |     for f in br.forms():
 19 |         equal = True
 20 |         form['class'] = form['clazz']
 21 |         for name, value in form.iteritems():
 22 |             if name in f.attrs:
 23 |                 if str(f.attrs[name]).lower() != str(value).lower():
 24 |                     equal = False
 25 |                     break
 26 |         if equal:
 27 |             break
 28 |         index = index + 1
 29 |     return index
 30 | 
 31 | def submit_form(form, inputs, br = None):
 32 |     if br == None:
 33 |         br = mechanize.Browser()
 34 |         cj = cookielib.LWPCookieJar()
 35 |         br.set_cookiejar(cj)
 36 |         br.set_handle_robots(False)
 37 | 
 38 |     br.open(form['url'].encode("ascii","ignore"))
 39 |     br.select_form(nr=get_form_index(br, form))
 40 | 
 41 |     for input in form['inputs']:
 42 |         if input['name'] in inputs:
 43 |             try:
 44 |                 if br.find_control(name = input['name'], type = input['type']) == None:
 45 |                     continue
 46 |                 if input['type'] == 'file':
 47 |                     filename = inputs[input['name']]['filename']
 48 |                     upload_filename = os.path.basename(filename)
 49 |                     mime_type = inputs[input['name']]['mime_type']
 50 |                     br.form.add_file(open(filename), mime_type, upload_filename, name = input['name'])
 51 |                     br.form.set_all_readonly(False)
 52 |                 elif input['type'] == 'checkbox':
 53 |                     br.find_control(name = input['name'], type = input['type']).selected = inputs[input['name']]
 54 |                 else:
 55 |                     if br.find_control(name = input['name'], type = input['type']).readonly:
 56 |                         continue
 57 |                     if input['type'] == 'radio':
 58 |                         continue
 59 |                     br[input['name']] = inputs[input['name']]
 60 |             except:
 61 |                 # traceback.print_exc()
 62 |                 pass
 63 | 
 64 |     response = br.submit().code
 65 | 
 66 |     return response, br
 67 | 
 68 | def gen_random_value(chars = string.ascii_letters + string.digits, length = 0):
 69 |     if length == 0:
 70 |         length = random.choice(range(8, 21))
 71 |     return ''.join(random.choice(chars) for x in range(length))
 72 | 
 73 | def gen_random_true_false():
 74 |     return random.choice([True, False])
 75 | 
 76 | def gen_file(base_path, input):
 77 |     if input['name'] != '' and 'image' in input['name']:
 78 |         filename = os.path.join(os.path.dirname(__file__), os.pardir, "files", "image.jpg")
 79 |         mime_type = 'image/jpeg'
 80 |     else:
 81 |         filename = os.path.join(base_path, gen_random_value() + '.txt')
 82 |         with open(filename, 'w') as f:
 83 |             f.write(gen_random_value(length = 1000))
 84 |         f.close()
 85 |         mime_type = 'text/plain'
 86 |     return filename, mime_type
 87 | 
 88 | def fill_form(form, matched_patterns = {}, br = None):
 89 |     inputs = {}
 90 |     for input in form['inputs']:
 91 |         if input['value'] != '':
 92 |             continue
 93 |         for pattern_name in patterns:
 94 |             if input['type'] == 'hidden':
 95 |                 continue
 96 |             pattern, value = patterns[pattern_name]
 97 |             if match_any_pattern(input['name'], pattern) or match_any_pattern(input['type'], pattern):
 98 |                 if pattern_name in matched_patterns:
 99 |                     inputs[input['name']] = matched_patterns[pattern_name]
100 |                 else:
101 |                     inputs[input['name']] = value[0]
102 |                     matched_patterns[pattern_name] = value[0]
103 |                 break
104 |             elif input['type'] == 'checkbox':
105 |                 inputs[input['name']] = True
106 |             else:
107 |                 inputs[input['name']] = gen_random_value()
108 | 
109 |     response, br = submit_form(form, inputs, br)
110 | 
111 |     return matched_patterns, inputs, response, br
112 | 
113 | def fill_form_random(form, br, base_path = '/tmp'):
114 |     inputs = {}
115 |     for input in form['inputs']:
116 |         if input['value'] != '':
117 |             continue
118 |         if input['type'] == 'file':
119 |             filename, mime_type = gen_file(base_path, input)
120 |             inputs[input['name']] = {
121 |                     'filename' : filename,
122 |                     'mime_type': mime_type
123 |             }
124 |         elif input['type'] == 'checkbox':
125 |             inputs[input['name']] = gen_random_true_false()
126 |         else:
127 |             inputs[input['name']] = gen_random_value()
128 | 
129 |     response, br = submit_form(form, inputs, br)
130 | 
131 |     return inputs
132 | 
133 | def submit_form_fast(form, inputs, files, session):
134 |     new_url = urlparse.urljoin(form['url'], form['action'])
135 |     if files == None:
136 |         response = session.post(new_url, data = inputs)
137 |     else:
138 |         response = session.post(new_url, data = inputs, files = files)
139 |     return response
140 | 
141 | def fill_form_random_fast(form, session, base_path = '/tmp'):
142 |     inputs = {}
143 |     files = None
144 |     response = session.get(form['url'])
145 |     soup = BeautifulSoup(response.text)
146 |     for input in form['inputs']:
147 |         if input['value'] != '':
148 |             i = soup.find('input', {"name":input['name']})
149 |             if i:
150 |                 inputs[input['name']] = i['value']
151 |             continue
152 |         if input['type'] == 'file':
153 |             if files == None:
154 |                 files = {}
155 |             filename, mime_type = gen_file(base_path, input)
156 |             upload_filename = os.path.basename(filename)
157 |             files[input['name']] = (upload_filename, open(filename), mime_type)
158 |         elif input['type'] == 'checkbox':
159 |             inputs[input['name']] = gen_random_true_false()
160 |         else:
161 |             inputs[input['name']] = gen_random_value()
162 | 
163 |     response = submit_form_fast(form, inputs, files, session)
164 | 
165 |     return inputs


--------------------------------------------------------------------------------