├── blog ├── __init__.py ├── parsers │ ├── __init__.py │ └── markdown_parser.py ├── templatetags │ ├── __init__.py │ └── pinax_blog_tags.py ├── signals.py ├── templates │ ├── blog_base.html │ ├── rss_item.xml │ ├── dateline.html │ ├── atom_feed.xml │ ├── dateline_stale.html │ ├── rss_feed.xml │ ├── blog_list.html │ ├── blog_section_list.html │ ├── blog_post.html │ └── atom_entry.xml ├── managers.py ├── conf.py ├── utils.py ├── urls.py ├── admin.py └── forms.py ├── library ├── __init__.py ├── .gitignore ├── templatetags │ ├── __init__.py │ └── active_page.py ├── tests.py ├── static │ ├── img │ │ ├── django.png │ │ ├── drupal.png │ │ ├── github.png │ │ ├── grails.png │ │ ├── nodejs.png │ │ ├── favicon.png │ │ ├── no_image.gif │ │ ├── jumbotron-bg.jpg │ │ ├── ruby_on_rails.png │ │ ├── people │ │ │ ├── andypavlo.jpg │ │ │ ├── danavanaken.jpg │ │ │ └── zeyuanshang.jpg │ │ ├── glyphicons-halflings.png │ │ └── glyphicons-halflings-white.png │ ├── font-awesome │ │ ├── fonts │ │ │ ├── FontAwesome.otf │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.ttf │ │ │ └── fontawesome-webfont.woff │ │ ├── less │ │ │ ├── fixed-width.less │ │ │ ├── bordered-pulled.less │ │ │ ├── larger.less │ │ │ ├── core.less │ │ │ ├── list.less │ │ │ ├── font-awesome.less │ │ │ ├── stacked.less │ │ │ ├── rotated-flipped.less │ │ │ ├── spinning.less │ │ │ ├── path.less │ │ │ └── mixins.less │ │ └── scss │ │ │ ├── _fixed-width.scss │ │ │ ├── _bordered-pulled.scss │ │ │ ├── _larger.scss │ │ │ ├── _core.scss │ │ │ ├── _list.scss │ │ │ ├── font-awesome.scss │ │ │ ├── _stacked.scss │ │ │ ├── _spinning.scss │ │ │ ├── _path.scss │ │ │ ├── _rotated-flipped.scss │ │ │ └── _mixins.scss │ ├── fonts │ │ ├── glyphicons-halflings-regular.eot │ │ ├── glyphicons-halflings-regular.ttf │ │ ├── glyphicons-halflings-regular.woff │ │ └── glyphicons-halflings-regular.woff2 │ ├── js │ │ └── collapse.js │ └── md │ │ └── tools.md ├── fixtures │ ├── apistatistic.json │ ├── database.json │ ├── repositorysource.json │ ├── projecttype.json │ └── crawlerstatus.json ├── templates │ ├── queries.html │ ├── analytics │ │ └── analytics.html │ ├── search.html │ ├── admin │ │ ├── add_module.html │ │ └── add_repository.html │ ├── about.html │ ├── status │ │ └── attempt_status_codes.html │ └── base.html ├── context_processors.py ├── urls.py ├── serializers.py ├── admin.py └── forms.py ├── cmudbac ├── .gitignore ├── __init__.py ├── urls.py ├── wsgi.py └── settings_example.py ├── analysis ├── .gitignore ├── cluster │ └── .gitignore ├── utils.py ├── general │ ├── analyze_repository.py │ └── analyze_transactions.py └── foreign │ └── foreign.py ├── core ├── drivers │ ├── count │ │ ├── __init__.py │ │ └── count.py │ ├── extract │ │ ├── driver │ │ │ ├── __init__.py │ │ │ ├── spiders │ │ │ │ ├── __init__.py │ │ │ │ ├── url.py │ │ │ │ ├── url_with_cookie.py │ │ │ │ ├── form_with_cookie.py │ │ │ │ └── form.py │ │ │ ├── pipelines.py │ │ │ ├── items.py │ │ │ └── settings.py │ │ ├── __init__.py │ │ ├── scrapy.cfg │ │ └── extract.py │ ├── files │ │ └── image.jpg │ ├── __init__.py │ ├── submit │ │ ├── __init__.py │ │ ├── query.py │ │ ├── patterns.py │ │ ├── login.py │ │ ├── register.py │ │ └── submit.py │ ├── benchmarkdriver.py │ └── randomdriver.py ├── crawlers │ ├── __init__.py │ ├── basecrawler.py │ └── drupalcrawler.py ├── deployers │ ├── __init__.py │ └── nodedeployer.py ├── analyzers │ ├── __init__.py │ ├── baseanalyzer.py │ ├── sqlite3analyzer.py │ ├── postgresqlanalyzer.py │ └── mysqlanalyzer.py ├── utils │ ├── network.py │ ├── timeout.py │ ├── __init__.py │ ├── rvm.py │ ├── run.py │ ├── pip.py │ ├── file.py │ ├── data.py │ └── vagrant.py └── scripts │ ├── vagrant_deploy.py │ └── vagrant_benchmark.py ├── vagrant ├── .gitignore ├── requirements.txt ├── Vagrantfile_example └── bootstrap.sh ├── .gitignore ├── requirements.txt ├── README.md ├── manage.py ├── scripts ├── deploy_repo.py ├── remove_attempts.py ├── crawl_repos.py ├── run_driver.py ├── crawl_repo.py └── count_repos.py └── tools └── local-deployer.py /blog/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /library/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blog/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blog/templatetags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cmudbac/.gitignore: -------------------------------------------------------------------------------- 1 | settings.py -------------------------------------------------------------------------------- /library/.gitignore: -------------------------------------------------------------------------------- 1 | !*.json 2 | -------------------------------------------------------------------------------- /library/templatetags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /analysis/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | fig/* 3 | -------------------------------------------------------------------------------- /analysis/cluster/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.pdf 3 | -------------------------------------------------------------------------------- /core/drivers/count/__init__.py: -------------------------------------------------------------------------------- 1 | from count import count_query -------------------------------------------------------------------------------- /core/drivers/extract/driver/__init__.py: -------------------------------------------------------------------------------- 1 | from driver import * -------------------------------------------------------------------------------- /core/crawlers/__init__.py: -------------------------------------------------------------------------------- 1 | from basecrawler import * 2 | from githubcrawler import * -------------------------------------------------------------------------------- /library/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /core/drivers/files/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/core/drivers/files/image.jpg -------------------------------------------------------------------------------- /library/static/img/django.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/django.png -------------------------------------------------------------------------------- /library/static/img/drupal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/drupal.png -------------------------------------------------------------------------------- /library/static/img/github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/github.png -------------------------------------------------------------------------------- /library/static/img/grails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/grails.png -------------------------------------------------------------------------------- /library/static/img/nodejs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/nodejs.png -------------------------------------------------------------------------------- /vagrant/.gitignore: -------------------------------------------------------------------------------- 1 | Vagrantfile 2 | blog/ 3 | cmudbac/ 4 | core/ 5 | library/ 6 | tests/ 7 | scripts/ 8 | -------------------------------------------------------------------------------- /core/drivers/__init__.py: -------------------------------------------------------------------------------- 1 | from basedriver import * 2 | from benchmarkdriver import * 3 | from randomdriver import * -------------------------------------------------------------------------------- /library/static/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/favicon.png -------------------------------------------------------------------------------- /library/static/img/no_image.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/no_image.gif -------------------------------------------------------------------------------- /library/static/img/jumbotron-bg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/jumbotron-bg.jpg -------------------------------------------------------------------------------- /library/static/img/ruby_on_rails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/ruby_on_rails.png -------------------------------------------------------------------------------- /library/static/img/people/andypavlo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/people/andypavlo.jpg -------------------------------------------------------------------------------- /library/static/img/people/danavanaken.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/people/danavanaken.jpg -------------------------------------------------------------------------------- /library/static/img/people/zeyuanshang.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/people/zeyuanshang.jpg -------------------------------------------------------------------------------- /library/static/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /library/static/font-awesome/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /library/static/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /library/static/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /library/static/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /library/static/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /library/static/font-awesome/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /library/static/font-awesome/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /library/static/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant/ 2 | *.log 3 | *.pyc 4 | *.sqlite3 5 | *.kate-swp 6 | *.json 7 | *.box 8 | *.csv 9 | screenshot*.png 10 | dump/ 11 | *.pkl 12 | .env/* 13 | -------------------------------------------------------------------------------- /library/static/font-awesome/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/cmdbac/HEAD/library/static/font-awesome/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /cmudbac/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | # This will make sure the app is always imported when 4 | # Django starts so that shared_task will use this app. 5 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/fixed-width.less: -------------------------------------------------------------------------------- 1 | // Fixed Width Icons 2 | // ------------------------- 3 | .@{fa-css-prefix}-fw { 4 | width: (18em / 14); 5 | text-align: center; 6 | } 7 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_fixed-width.scss: -------------------------------------------------------------------------------- 1 | // Fixed Width Icons 2 | // ------------------------- 3 | .#{$fa-css-prefix}-fw { 4 | width: (18em / 14); 5 | text-align: center; 6 | } 7 | -------------------------------------------------------------------------------- /core/drivers/extract/__init__.py: -------------------------------------------------------------------------------- 1 | from extract import extract_forms, extract_all_forms, extract_all_forms_with_cookie 2 | from extract import extract_urls, extract_all_urls, extract_all_urls_with_cookie -------------------------------------------------------------------------------- /core/drivers/submit/__init__.py: -------------------------------------------------------------------------------- 1 | from register import register 2 | from login import login 3 | from submit import fill_form_random, fill_form_random_fast, gen_random_value 4 | from query import query_url -------------------------------------------------------------------------------- /library/fixtures/apistatistic.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "library.WebStatistic", 5 | "fields": { 6 | "name": "Attempt Info API", 7 | "count": 0 8 | } 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /core/deployers/__init__.py: -------------------------------------------------------------------------------- 1 | from basedeployer import * 2 | from djangodeployer import * 3 | from rordeployer import * 4 | from nodedeployer import * 5 | from drupaldeployer import * 6 | from grailsdeployer import * -------------------------------------------------------------------------------- /core/drivers/extract/driver/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /vagrant/requirements.txt: -------------------------------------------------------------------------------- 1 | selenium == 2.48.0 2 | djangorestframework == 3.3.1 3 | Django == 1.8.6 4 | Scrapy 5 | mechanize 6 | beautifulsoup4 7 | requests == 2.8.1 8 | hurry.filesize == 0.9 9 | django_appconf == 1.0.1 10 | -------------------------------------------------------------------------------- /blog/signals.py: -------------------------------------------------------------------------------- 1 | import django.dispatch 2 | 3 | 4 | post_viewed = django.dispatch.Signal(providing_args=["post", "request"]) 5 | post_published = django.dispatch.Signal(providing_args=["post"]) 6 | post_redirected = django.dispatch.Signal(providing_args=["post", "request"]) 7 | -------------------------------------------------------------------------------- /cmudbac/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import patterns, include, url 2 | from django.contrib import admin 3 | admin.autodiscover() 4 | 5 | urlpatterns = patterns('', 6 | url(r'', include('library.urls')), 7 | url(r'^blog/', include('blog.urls')), 8 | url(r'^admin/', include(admin.site.urls)), 9 | ) 10 | -------------------------------------------------------------------------------- /blog/templates/blog_base.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block body_class %}blog{% endblock %} 4 | 5 | {% block main %} 6 |
7 |
8 | {% block content %}{% endblock %} 9 |
10 |
11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /core/drivers/extract/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = driver.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = driver 12 | -------------------------------------------------------------------------------- /library/templates/queries.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | {% for query in queries %} 5 | 6 | 7 | 8 | {% endfor %} 9 | 10 |
{{ query.content|safe }}
11 |
-------------------------------------------------------------------------------- /core/drivers/submit/query.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import mechanize 5 | 6 | def query_url(url, br = None): 7 | if br == None: 8 | br = mechanize.Browser() 9 | br.set_handle_robots(False) 10 | 11 | br.open(url['url'].encode("ascii","ignore")) 12 | 13 | return 14 | -------------------------------------------------------------------------------- /core/drivers/extract/driver/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class DriverPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /blog/managers.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | from .conf import settings 4 | 5 | 6 | PUBLISHED_STATE = len(settings.PINAX_BLOG_UNPUBLISHED_STATES) + 1 7 | 8 | 9 | class PostManager(models.Manager): 10 | 11 | def published(self): 12 | return self.filter(published__isnull=False, state=PUBLISHED_STATE) 13 | 14 | def current(self): 15 | return self.published().order_by("-published") 16 | -------------------------------------------------------------------------------- /library/static/js/collapse.js: -------------------------------------------------------------------------------- 1 | $('.collapse').on('show.bs.collapse', function(event) { 2 | icon = $('#' + $(this).attr('id') + '-icon'); 3 | icon.removeClass("glyphicon-plus").addClass("glyphicon-minus"); 4 | event.stopPropagation(); 5 | }).on('hidden.bs.collapse', function(event) { 6 | icon = $('#' + $(this).attr('id') + '-icon'); 7 | icon.removeClass("glyphicon-minus").addClass("glyphicon-plus"); 8 | event.stopPropagation(); 9 | }); -------------------------------------------------------------------------------- /library/templatetags/active_page.py: -------------------------------------------------------------------------------- 1 | from django import template 2 | 3 | register = template.Library() 4 | 5 | @register.simple_tag 6 | def active_page(request, view_name): 7 | from django.core.urlresolvers import resolve, Resolver404 8 | if not request: 9 | return "" 10 | try: 11 | return "active" if resolve(request.path_info).url_name == view_name else "" 12 | except Resolver404: 13 | return "" -------------------------------------------------------------------------------- /core/drivers/count/count.py: -------------------------------------------------------------------------------- 1 | keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE'] 2 | 3 | def count_query(queries): 4 | ret = {} 5 | for keyword in keywords: 6 | ret[keyword] = 0 7 | ret['OTHER'] = 0 8 | for query in queries: 9 | counted = False 10 | for keyword in keywords: 11 | if keyword in query['raw'].upper(): 12 | ret[keyword] += 1 13 | counted = True 14 | if not counted: 15 | ret['OTHER'] += 1 16 | return ret 17 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/bordered-pulled.less: -------------------------------------------------------------------------------- 1 | // Bordered & Pulled 2 | // ------------------------- 3 | 4 | .@{fa-css-prefix}-border { 5 | padding: .2em .25em .15em; 6 | border: solid .08em @fa-border-color; 7 | border-radius: .1em; 8 | } 9 | 10 | .pull-right { float: right; } 11 | .pull-left { float: left; } 12 | 13 | .@{fa-css-prefix} { 14 | &.pull-left { margin-right: .3em; } 15 | &.pull-right { margin-left: .3em; } 16 | } 17 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_bordered-pulled.scss: -------------------------------------------------------------------------------- 1 | // Bordered & Pulled 2 | // ------------------------- 3 | 4 | .#{$fa-css-prefix}-border { 5 | padding: .2em .25em .15em; 6 | border: solid .08em $fa-border-color; 7 | border-radius: .1em; 8 | } 9 | 10 | .pull-right { float: right; } 11 | .pull-left { float: left; } 12 | 13 | .#{$fa-css-prefix} { 14 | &.pull-left { margin-right: .3em; } 15 | &.pull-right { margin-left: .3em; } 16 | } 17 | -------------------------------------------------------------------------------- /blog/templates/rss_item.xml: -------------------------------------------------------------------------------- 1 | 2 | {{ entry.title }} 3 | http://{{ current_site.domain }}{{ entry.get_absolute_url }} 4 | {{ entry.meta_description }} 5 | {{ entry.published|date:"D, d M Y H:i:s O" }} 6 | {{ entry.author.get_full_name }} 7 | http://{{ current_site.domain }}{{ entry.get_absolute_url }} 8 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/larger.less: -------------------------------------------------------------------------------- 1 | // Icon Sizes 2 | // ------------------------- 3 | 4 | /* makes the font 33% larger relative to the icon container */ 5 | .@{fa-css-prefix}-lg { 6 | font-size: (4em / 3); 7 | line-height: (3em / 4); 8 | vertical-align: -15%; 9 | } 10 | .@{fa-css-prefix}-2x { font-size: 2em; } 11 | .@{fa-css-prefix}-3x { font-size: 3em; } 12 | .@{fa-css-prefix}-4x { font-size: 4em; } 13 | .@{fa-css-prefix}-5x { font-size: 5em; } 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | sqlparse == 0.1.19 2 | Markdown == 2.6.4 3 | psycopg2 == 2.6.1 4 | twitter == 1.17.1 5 | selenium == 2.48.0 6 | djangorestframework == 3.3.1 7 | djangorestframework-filters==0.9.1 8 | Django == 1.8.6 9 | Scrapy == 1.0.3 10 | Pygments == 2.0.2 11 | django_appconf == 1.0.1 12 | mechanize == 0.2.5 13 | beautifulsoup4 == 4.4.1 14 | python-creole == 1.3.1 15 | requests == 2.8.1 16 | MySQL_python == 1.2.5 17 | hurry.filesize == 0.9 18 | Pillow==4.2.1 19 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_larger.scss: -------------------------------------------------------------------------------- 1 | // Icon Sizes 2 | // ------------------------- 3 | 4 | /* makes the font 33% larger relative to the icon container */ 5 | .#{$fa-css-prefix}-lg { 6 | font-size: (4em / 3); 7 | line-height: (3em / 4); 8 | vertical-align: -15%; 9 | } 10 | .#{$fa-css-prefix}-2x { font-size: 2em; } 11 | .#{$fa-css-prefix}-3x { font-size: 3em; } 12 | .#{$fa-css-prefix}-4x { font-size: 4em; } 13 | .#{$fa-css-prefix}-5x { font-size: 5em; } 14 | -------------------------------------------------------------------------------- /library/templates/analytics/analytics.html: -------------------------------------------------------------------------------- 1 | 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Carnegie Mellon Database Application Catalog 2 | 3 | For more details, Please visit the [CMDBAC Wiki](https://github.com/cmu-db/dbac/wiki "DBAC Wiki") page. 4 | 5 | ### Installation 6 | 7 | Please refer to the [installation instructions](https://github.com/cmu-db/dbac/wiki/Installation). 8 | 9 | ### Contributors 10 | 11 | * [Zeyuan Shang](http://www.shangzeyuan.com/) 12 | * [Andy Pavlo](http://www.cs.cmu.edu/~pavlo) 13 | * [Dana Van Aken](http://www.cs.cmu.edu/~dvaken) 14 | -------------------------------------------------------------------------------- /library/context_processors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Author: Zeyuan Shang 4 | # @Date: 2015-11-13 22:02:21 5 | # @Last Modified by: Zeyuan Shang 6 | # @Last Modified time: 2015-11-13 22:02:57 7 | from django.conf import settings 8 | from django.template.loader import render_to_string 9 | 10 | def analytics(request): 11 | return { 'analytics_code': render_to_string("analytics/analytics.html", { 'google_analytics_key': settings.GOOGLE_ANALYTICS_KEY }) } -------------------------------------------------------------------------------- /blog/templates/dateline.html: -------------------------------------------------------------------------------- 1 |

2 | {% if post.published %}{{ post.published|date:"F jS, Y" }}{% else %}Not published yet{% endif %} 3 | 4 | 5 |

6 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/core.less: -------------------------------------------------------------------------------- 1 | // Base Class Definition 2 | // ------------------------- 3 | 4 | .@{fa-css-prefix} { 5 | display: inline-block; 6 | font: normal normal normal 14px/1 FontAwesome; // shortening font declaration 7 | font-size: inherit; // can't have font-size inherit on line above, so need to override 8 | text-rendering: auto; // optimizelegibility throws things off #1094 9 | -webkit-font-smoothing: antialiased; 10 | -moz-osx-font-smoothing: grayscale; 11 | } 12 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/list.less: -------------------------------------------------------------------------------- 1 | // List Icons 2 | // ------------------------- 3 | 4 | .@{fa-css-prefix}-ul { 5 | padding-left: 0; 6 | margin-left: @fa-li-width; 7 | list-style-type: none; 8 | > li { position: relative; } 9 | } 10 | .@{fa-css-prefix}-li { 11 | position: absolute; 12 | left: -@fa-li-width; 13 | width: @fa-li-width; 14 | top: (2em / 14); 15 | text-align: center; 16 | &.@{fa-css-prefix}-lg { 17 | left: (-@fa-li-width + (4em / 14)); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_core.scss: -------------------------------------------------------------------------------- 1 | // Base Class Definition 2 | // ------------------------- 3 | 4 | .#{$fa-css-prefix} { 5 | display: inline-block; 6 | font: normal normal normal 14px/1 FontAwesome; // shortening font declaration 7 | font-size: inherit; // can't have font-size inherit on line above, so need to override 8 | text-rendering: auto; // optimizelegibility throws things off #1094 9 | -webkit-font-smoothing: antialiased; 10 | -moz-osx-font-smoothing: grayscale; 11 | } 12 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_list.scss: -------------------------------------------------------------------------------- 1 | // List Icons 2 | // ------------------------- 3 | 4 | .#{$fa-css-prefix}-ul { 5 | padding-left: 0; 6 | margin-left: $fa-li-width; 7 | list-style-type: none; 8 | > li { position: relative; } 9 | } 10 | .#{$fa-css-prefix}-li { 11 | position: absolute; 12 | left: -$fa-li-width; 13 | width: $fa-li-width; 14 | top: (2em / 14); 15 | text-align: center; 16 | &.#{$fa-css-prefix}-lg { 17 | left: -$fa-li-width + (4em / 14); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /core/analyzers/__init__.py: -------------------------------------------------------------------------------- 1 | from baseanalyzer import * 2 | from mysqlanalyzer import * 3 | from postgresqlanalyzer import * 4 | from sqlite3analyzer import * 5 | 6 | def get_analyzer(deployer): 7 | if deployer.get_database().name == 'MySQL': 8 | return MySQLAnalyzer(deployer) 9 | elif deployer.get_database().name == 'PostgreSQL': 10 | return PostgreSQLAnalyzer(deployer) 11 | elif deployer.get_database().name == 'SQLite3': 12 | return SQLite3Analyzer(deployer) 13 | else: 14 | return BaseAnalyzer(deployer) -------------------------------------------------------------------------------- /library/static/font-awesome/scss/font-awesome.scss: -------------------------------------------------------------------------------- 1 | /*! 2 | * Font Awesome 4.2.0 by @davegandy - http://fontawesome.io - @fontawesome 3 | * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) 4 | */ 5 | 6 | @import "variables"; 7 | @import "mixins"; 8 | @import "path"; 9 | @import "core"; 10 | @import "larger"; 11 | @import "fixed-width"; 12 | @import "list"; 13 | @import "bordered-pulled"; 14 | @import "spinning"; 15 | @import "rotated-flipped"; 16 | @import "stacked"; 17 | @import "icons"; 18 | -------------------------------------------------------------------------------- /blog/templates/atom_feed.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {{ feed_id }} 5 | 6 | {{ feed_title }} 7 | 8 | 9 | 10 | 11 | {{ feed_updated|date:"Y-m-d\TH:i:s\Z" }} 12 | 13 | {% for entry in entries %} 14 | {% include "atom_entry.xml" %} 15 | {% endfor %} 16 | 17 | -------------------------------------------------------------------------------- /blog/templates/dateline_stale.html: -------------------------------------------------------------------------------- 1 |

2 | {% if not post.stale %} 3 | {% if post.published %}{{ post.published|date:"jS F Y" }}{% else %}Not published yet{% endif %} 4 |
5 | {% endif %} 6 | by {{ post.author.get_full_name }} in 7 | {{ post.get_section_display|capfirst }} 8 |

9 | -------------------------------------------------------------------------------- /core/drivers/submit/patterns.py: -------------------------------------------------------------------------------- 1 | # Patterns 2 | email_patterns = ['email', 'mail'] 3 | email_values = ['testuser@gmail.com'] 4 | username_patterns = ['username', 'name'] 5 | username_values = ['testuser'] 6 | password_patterns = ['password', 'pass'] 7 | password_values = ['Test1234--'] 8 | patterns = { 9 | 'email': (email_patterns, email_values), 10 | 'username': (username_patterns, username_values), 11 | 'password': (password_patterns, password_values) 12 | } 13 | 14 | def match_any_pattern(name, patterns): 15 | return any(pattern in name.lower() for pattern in patterns) -------------------------------------------------------------------------------- /library/static/font-awesome/less/font-awesome.less: -------------------------------------------------------------------------------- 1 | /*! 2 | * Font Awesome 4.2.0 by @davegandy - http://fontawesome.io - @fontawesome 3 | * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) 4 | */ 5 | 6 | @import "variables.less"; 7 | @import "mixins.less"; 8 | @import "path.less"; 9 | @import "core.less"; 10 | @import "larger.less"; 11 | @import "fixed-width.less"; 12 | @import "list.less"; 13 | @import "bordered-pulled.less"; 14 | @import "spinning.less"; 15 | @import "rotated-flipped.less"; 16 | @import "stacked.less"; 17 | @import "icons.less"; 18 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/stacked.less: -------------------------------------------------------------------------------- 1 | // Stacked Icons 2 | // ------------------------- 3 | 4 | .@{fa-css-prefix}-stack { 5 | position: relative; 6 | display: inline-block; 7 | width: 2em; 8 | height: 2em; 9 | line-height: 2em; 10 | vertical-align: middle; 11 | } 12 | .@{fa-css-prefix}-stack-1x, .@{fa-css-prefix}-stack-2x { 13 | position: absolute; 14 | left: 0; 15 | width: 100%; 16 | text-align: center; 17 | } 18 | .@{fa-css-prefix}-stack-1x { line-height: inherit; } 19 | .@{fa-css-prefix}-stack-2x { font-size: 2em; } 20 | .@{fa-css-prefix}-inverse { color: @fa-inverse; } 21 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_stacked.scss: -------------------------------------------------------------------------------- 1 | // Stacked Icons 2 | // ------------------------- 3 | 4 | .#{$fa-css-prefix}-stack { 5 | position: relative; 6 | display: inline-block; 7 | width: 2em; 8 | height: 2em; 9 | line-height: 2em; 10 | vertical-align: middle; 11 | } 12 | .#{$fa-css-prefix}-stack-1x, .#{$fa-css-prefix}-stack-2x { 13 | position: absolute; 14 | left: 0; 15 | width: 100%; 16 | text-align: center; 17 | } 18 | .#{$fa-css-prefix}-stack-1x { line-height: inherit; } 19 | .#{$fa-css-prefix}-stack-2x { font-size: 2em; } 20 | .#{$fa-css-prefix}-inverse { color: $fa-inverse; } 21 | -------------------------------------------------------------------------------- /blog/templates/rss_feed.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ feed_title }} 6 | The latest posts from {{ feed_title }} 7 | {{ blog_url }} 8 | 9 | {{ feed_updated|date:"D, d M Y H:i:s O" }} 10 | {% for entry in entries %} 11 | {% include "rss_item.xml" %} 12 | {% endfor %} 13 | 14 | 15 | -------------------------------------------------------------------------------- /core/utils/network.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import time 4 | import requests 5 | import logging 6 | 7 | from run import run_command 8 | 9 | def query(url, auth = None): 10 | if auth == None: 11 | response = requests.get(url, verify=False) 12 | else: 13 | response = requests.get(url, auth=(auth['user'], auth['pass']), verify=False) 14 | return response 15 | 16 | def kill_port(port): 17 | return run_command('fuser -n tcp -k {}'.format(port)) 18 | 19 | def block_network(): 20 | return run_command('ufw enable') 21 | 22 | def unblock_network(): 23 | return run_command('ufw disable') -------------------------------------------------------------------------------- /core/utils/timeout.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | import errno 3 | import os 4 | import signal 5 | 6 | class TimeoutError(Exception): 7 | pass 8 | 9 | class timeout: 10 | def __init__(self, seconds=1, error_message='Timeout'): 11 | self.seconds = seconds 12 | self.error_message = error_message 13 | def handle_timeout(self, signum, frame): 14 | raise TimeoutError(self.error_message) 15 | def __enter__(self): 16 | signal.signal(signal.SIGALRM, self.handle_timeout) 17 | signal.alarm(self.seconds) 18 | def __exit__(self, type, value, traceback): 19 | signal.alarm(0) -------------------------------------------------------------------------------- /cmudbac/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for CMDBAC project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | """ 6 | 7 | import os 8 | import sys 9 | 10 | # Change the env variable where django looks for the settings module 11 | # http://stackoverflow.com/a/11817088 12 | import django.conf 13 | django.conf.ENVIRONMENT_VARIABLE = "DJANGO_CMDBAC_SETTINGS_MODULE" 14 | os.environ.setdefault("DJANGO_CMDBAC_SETTINGS_MODULE", "cmudbac.settings") 15 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 16 | 17 | from django.core.wsgi import get_wsgi_application 18 | application = get_wsgi_application() 19 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | sys.path.append(os.path.join(os.path.dirname(__file__), "core")) 5 | 6 | import utils 7 | 8 | def vagrant_init(): 9 | utils.vagrant_clear() 10 | utils.vagrant_setup() 11 | 12 | def vagrant_final(): 13 | utils.vagrant_clear() 14 | 15 | if __name__ == "__main__": 16 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 17 | 18 | from django.core.management import execute_from_command_line 19 | 20 | vagrant_init() 21 | 22 | try: 23 | execute_from_command_line(sys.argv) 24 | finally: 25 | vagrant_final() 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /blog/templates/blog_list.html: -------------------------------------------------------------------------------- 1 | {% extends "blog_base.html" %} 2 | 3 | {% block head_title %}News » {{ block.super }}{% endblock %} 4 | 5 | {% block content %} 6 |

News

7 | {% if post_list %} 8 |
9 | {% for post in post_list %} 10 |
11 |

{{ post.title }}

12 | {% include "dateline.html" %} 13 |
{{ post.content_html|safe }}
14 |
15 | {% endfor %} 16 |
17 | {% else %} 18 |

No posts have been published.

19 | {% endif %} 20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from run import run_command, run_command_async 2 | from network import query, kill_port, block_network, unblock_network 3 | from file import search_file, search_file_regex, search_file_norecur, search_dir, replace_file_regex, replace_files_regex, mk_dir, make_dir, rm_dir, unzip, cd, rename_file, copy_file, remove_file, get_size 4 | from pip import home_path, configure_env, to_env, pip_install, pip_install_text, pip_freeze 5 | from data import get_crawler, add_module, add_repo, delete_repo, deploy_repo, edit_distance 6 | from vagrant import vagrant_setup, vagrant_clear, vagrant_deploy, vagrant_benchmark 7 | from rvm import get_ruby_versions, use_ruby_version, install_ruby_version 8 | from timeout import timeout -------------------------------------------------------------------------------- /library/fixtures/database.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "library.database", 5 | "fields": { 6 | "name": "Unknown" 7 | } 8 | }, 9 | { 10 | "pk": 2, 11 | "model": "library.database", 12 | "fields": { 13 | "name": "Oracle" 14 | } 15 | }, 16 | { 17 | "pk": 3, 18 | "model": "library.database", 19 | "fields": { 20 | "name": "Other" 21 | } 22 | }, 23 | { 24 | "pk": 4, 25 | "model": "library.database", 26 | "fields": { 27 | "name": "PostgreSQL" 28 | } 29 | }, 30 | { 31 | "pk": 5, 32 | "model": "library.database", 33 | "fields": { 34 | "name": "SQLite3" 35 | } 36 | }, 37 | { 38 | "pk": 6, 39 | "model": "library.database", 40 | "fields": { 41 | "name": "MySQL" 42 | } 43 | } 44 | ] 45 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/rotated-flipped.less: -------------------------------------------------------------------------------- 1 | // Rotated & Flipped Icons 2 | // ------------------------- 3 | 4 | .@{fa-css-prefix}-rotate-90 { .fa-icon-rotate(90deg, 1); } 5 | .@{fa-css-prefix}-rotate-180 { .fa-icon-rotate(180deg, 2); } 6 | .@{fa-css-prefix}-rotate-270 { .fa-icon-rotate(270deg, 3); } 7 | 8 | .@{fa-css-prefix}-flip-horizontal { .fa-icon-flip(-1, 1, 0); } 9 | .@{fa-css-prefix}-flip-vertical { .fa-icon-flip(1, -1, 2); } 10 | 11 | // Hook for IE8-9 12 | // ------------------------- 13 | 14 | :root .@{fa-css-prefix}-rotate-90, 15 | :root .@{fa-css-prefix}-rotate-180, 16 | :root .@{fa-css-prefix}-rotate-270, 17 | :root .@{fa-css-prefix}-flip-horizontal, 18 | :root .@{fa-css-prefix}-flip-vertical { 19 | filter: none; 20 | } 21 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/spinning.less: -------------------------------------------------------------------------------- 1 | // Spinning Icons 2 | // -------------------------- 3 | 4 | .@{fa-css-prefix}-spin { 5 | -webkit-animation: fa-spin 2s infinite linear; 6 | animation: fa-spin 2s infinite linear; 7 | } 8 | 9 | @-webkit-keyframes fa-spin { 10 | 0% { 11 | -webkit-transform: rotate(0deg); 12 | transform: rotate(0deg); 13 | } 14 | 100% { 15 | -webkit-transform: rotate(359deg); 16 | transform: rotate(359deg); 17 | } 18 | } 19 | 20 | @keyframes fa-spin { 21 | 0% { 22 | -webkit-transform: rotate(0deg); 23 | transform: rotate(0deg); 24 | } 25 | 100% { 26 | -webkit-transform: rotate(359deg); 27 | transform: rotate(359deg); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_spinning.scss: -------------------------------------------------------------------------------- 1 | // Spinning Icons 2 | // -------------------------- 3 | 4 | .#{$fa-css-prefix}-spin { 5 | -webkit-animation: fa-spin 2s infinite linear; 6 | animation: fa-spin 2s infinite linear; 7 | } 8 | 9 | @-webkit-keyframes fa-spin { 10 | 0% { 11 | -webkit-transform: rotate(0deg); 12 | transform: rotate(0deg); 13 | } 14 | 100% { 15 | -webkit-transform: rotate(359deg); 16 | transform: rotate(359deg); 17 | } 18 | } 19 | 20 | @keyframes fa-spin { 21 | 0% { 22 | -webkit-transform: rotate(0deg); 23 | transform: rotate(0deg); 24 | } 25 | 100% { 26 | -webkit-transform: rotate(359deg); 27 | transform: rotate(359deg); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/path.less: -------------------------------------------------------------------------------- 1 | /* FONT PATH 2 | * -------------------------- */ 3 | 4 | @font-face { 5 | font-family: 'FontAwesome'; 6 | src: url('@{fa-font-path}/fontawesome-webfont.eot?v=@{fa-version}'); 7 | src: url('@{fa-font-path}/fontawesome-webfont.eot?#iefix&v=@{fa-version}') format('embedded-opentype'), 8 | url('@{fa-font-path}/fontawesome-webfont.woff?v=@{fa-version}') format('woff'), 9 | url('@{fa-font-path}/fontawesome-webfont.ttf?v=@{fa-version}') format('truetype'), 10 | url('@{fa-font-path}/fontawesome-webfont.svg?v=@{fa-version}#fontawesomeregular') format('svg'); 11 | // src: url('@{fa-font-path}/FontAwesome.otf') format('opentype'); // used when developing fonts 12 | font-weight: normal; 13 | font-style: normal; 14 | } 15 | -------------------------------------------------------------------------------- /library/fixtures/repositorysource.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "library.repositorysource", 5 | "fields": { 6 | "name": "GitHub", 7 | "commit_url": "https://github.com/${repo_name}/tree/${commit}", 8 | "base_url": "https://github.com/${repo_name}", 9 | "search_token": "CHANGE ME", 10 | "crawler_class": "GitHubCrawler", 11 | "logo": "img/github.png" 12 | } 13 | }, 14 | { 15 | "pk": 2, 16 | "model": "library.repositorysource", 17 | "fields": { 18 | "name": "Drupal", 19 | "commit_url": "https://www.drupal.org/node/${commit}", 20 | "base_url": "https://www.drupal.org/project/${repo_name}", 21 | "search_token": "CHANGE ME", 22 | "crawler_class": "DrupalCrawler", 23 | "logo": "img/drupal.png" 24 | } 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_path.scss: -------------------------------------------------------------------------------- 1 | /* FONT PATH 2 | * -------------------------- */ 3 | 4 | @font-face { 5 | font-family: 'FontAwesome'; 6 | src: url('#{$fa-font-path}/fontawesome-webfont.eot?v=#{$fa-version}'); 7 | src: url('#{$fa-font-path}/fontawesome-webfont.eot?#iefix&v=#{$fa-version}') format('embedded-opentype'), 8 | url('#{$fa-font-path}/fontawesome-webfont.woff?v=#{$fa-version}') format('woff'), 9 | url('#{$fa-font-path}/fontawesome-webfont.ttf?v=#{$fa-version}') format('truetype'), 10 | url('#{$fa-font-path}/fontawesome-webfont.svg?v=#{$fa-version}#fontawesomeregular') format('svg'); 11 | //src: url('#{$fa-font-path}/FontAwesome.otf') format('opentype'); // used when developing fonts 12 | font-weight: normal; 13 | font-style: normal; 14 | } 15 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_rotated-flipped.scss: -------------------------------------------------------------------------------- 1 | // Rotated & Flipped Icons 2 | // ------------------------- 3 | 4 | .#{$fa-css-prefix}-rotate-90 { @include fa-icon-rotate(90deg, 1); } 5 | .#{$fa-css-prefix}-rotate-180 { @include fa-icon-rotate(180deg, 2); } 6 | .#{$fa-css-prefix}-rotate-270 { @include fa-icon-rotate(270deg, 3); } 7 | 8 | .#{$fa-css-prefix}-flip-horizontal { @include fa-icon-flip(-1, 1, 0); } 9 | .#{$fa-css-prefix}-flip-vertical { @include fa-icon-flip(1, -1, 2); } 10 | 11 | // Hook for IE8-9 12 | // ------------------------- 13 | 14 | :root .#{$fa-css-prefix}-rotate-90, 15 | :root .#{$fa-css-prefix}-rotate-180, 16 | :root .#{$fa-css-prefix}-rotate-270, 17 | :root .#{$fa-css-prefix}-flip-horizontal, 18 | :root .#{$fa-css-prefix}-flip-vertical { 19 | filter: none; 20 | } 21 | -------------------------------------------------------------------------------- /blog/templates/blog_section_list.html: -------------------------------------------------------------------------------- 1 | {% extends "blog_base.html" %} 2 | 3 | {% block head_title %}{{ SITE_NAME }} ({{ section_name }}){% endblock %} 4 | 5 | {% block content %} 6 |

{{ section_name|title }}

7 | {% if posts %} 8 |
9 | {% for post in posts %} 10 |

{{ post.title }}

11 | {% include "dateline.html" %} 12 |
{{ post.teaser_html|safe }}
13 |

read more...

14 | {% endfor %} 15 |
16 | {% else %} 17 |

No blog posts have been published in this section.

18 | {% endif %} 19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /core/utils/rvm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from run import run_command 5 | from file import cd 6 | 7 | def get_ruby_versions(): 8 | command = 'source /usr/local/rvm/scripts/rvm && rvm list' 9 | output = run_command(command) 10 | versions = [] 11 | for line in output[1].split('\n'): 12 | s = re.search('ruby-(.+) \[', line) 13 | if s: 14 | versions.append(s.group(1)) 15 | return sorted(versions) 16 | 17 | def use_ruby_version(version): 18 | command = 'source /usr/local/rvm/scripts/rvm && rvm use {}'.format(version[:5]) 19 | return command 20 | 21 | def install_ruby_version(version): 22 | command = 'sudo su && source /usr/local/rvm/scripts/rvm && rvm install {} && gem install bundle && gem install bundler'.format(version) 23 | return run_command(command) -------------------------------------------------------------------------------- /core/drivers/extract/driver/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class DriverItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | pass 15 | 16 | class InputItem(scrapy.Item): 17 | id = scrapy.Field() 18 | name = scrapy.Field() 19 | type = scrapy.Field() 20 | value = scrapy.Field() 21 | 22 | class FormItem(scrapy.Item): 23 | action = scrapy.Field() 24 | url = scrapy.Field() 25 | method = scrapy.Field() 26 | inputs = scrapy.Field() 27 | id = scrapy.Field() 28 | clazz = scrapy.Field() 29 | enctype = scrapy.Field() 30 | 31 | class UrlItem(scrapy.Item): 32 | url = scrapy.Field() -------------------------------------------------------------------------------- /blog/templates/blog_post.html: -------------------------------------------------------------------------------- 1 | {% extends "blog_base.html" %} 2 | 3 | {% block head_title %}{{ post.title }} » {{ block.super }}{% endblock %} 4 | 5 | {% block extra_head %} 6 | 7 | 8 | 9 | {% endblock %} 10 | 11 | {% block content %} 12 |
13 |
14 |

{{ post.title }}

15 | {% include "dateline_stale.html" %} 16 | 17 | 20 | 21 |
{{ post.content_html|safe }}
22 | 23 |
24 |
25 | {% endblock %} 26 | -------------------------------------------------------------------------------- /core/drivers/submit/login.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import extract 5 | from patterns import patterns, match_any_pattern 6 | from submit import fill_form 7 | 8 | def get_login_form(forms): 9 | login_patterns = ['login', 'signin', 'sign-in', 'sign_in'] 10 | for form in forms: 11 | if match_any_pattern(form['action'], login_patterns): 12 | return form 13 | if form['action'] != '': 14 | continue 15 | if match_any_pattern(form['url'], login_patterns): 16 | return form 17 | return None 18 | 19 | def login(forms, matched_patterns): 20 | login_form = get_login_form(forms) 21 | if login_form == None: 22 | return None, None 23 | 24 | matched_patterns, inputs, response, br = fill_form(login_form, matched_patterns) 25 | 26 | return login_form, br 27 | -------------------------------------------------------------------------------- /blog/conf.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from django.conf import settings # noqa 4 | 5 | from appconf import AppConf 6 | 7 | 8 | DEFAULT_MARKUP_CHOICE_MAP = { 9 | "markdown": {"label": "Markdown", "parser": "blog.parsers.markdown_parser.parse"} 10 | } 11 | 12 | class PinaxBlogAppConf(AppConf): 13 | 14 | ALL_SECTION_NAME = "all" 15 | SECTIONS = [] 16 | UNPUBLISHED_STATES = [ 17 | "Draft" 18 | ] 19 | FEED_TITLE = "Blog" 20 | SECTION_FEED_TITLE = "Blog (%s)" 21 | MARKUP_CHOICE_MAP = DEFAULT_MARKUP_CHOICE_MAP 22 | MARKUP_CHOICES = DEFAULT_MARKUP_CHOICE_MAP 23 | SLUG_UNIQUE = False 24 | 25 | def configure_markup_choices(self, value): 26 | return [ 27 | (key, value[key]["label"]) 28 | for key in value.keys() 29 | ] 30 | 31 | class Meta: 32 | prefix = "pinax_blog" 33 | -------------------------------------------------------------------------------- /blog/templates/atom_entry.xml: -------------------------------------------------------------------------------- 1 | 2 | http://{{ current_site.domain }}{{ entry.get_absolute_url }} 3 | {{ entry.title }} 4 | 5 | 6 | {{ entry.updated|date:"Y-m-d\TH:i:s\Z" }} 7 | {{ entry.published|date:"Y-m-d\TH:i:s\Z" }} 8 | 9 | 10 | {{ entry.author.get_full_name }} 11 | 12 | 13 | 14 |
15 | {{ entry.teaser_html|safe }} 16 |
17 |
18 | 19 | 20 | {{ entry.teaser_html }} 21 | {{ entry.content_html }} 22 | 23 |
24 | -------------------------------------------------------------------------------- /blog/parsers/markdown_parser.py: -------------------------------------------------------------------------------- 1 | from markdown import Markdown 2 | from markdown.inlinepatterns import ImagePattern, IMAGE_LINK_RE 3 | 4 | from ..models import Image 5 | 6 | 7 | class ImageLookupImagePattern(ImagePattern): 8 | 9 | def sanitize_url(self, url): 10 | if url.startswith("http"): 11 | return url 12 | else: 13 | try: 14 | image = Image.objects.get(pk=int(url)) 15 | return image.image_path.url 16 | except Image.DoesNotExist: 17 | pass 18 | except ValueError: 19 | return url 20 | return "" 21 | 22 | 23 | def parse(text): 24 | md = Markdown(extensions=["codehilite", "tables", "smarty", "admonition", "toc"]) 25 | md.inlinePatterns["image_link"] = ImageLookupImagePattern(IMAGE_LINK_RE, md) 26 | html = md.convert(text) 27 | return html 28 | -------------------------------------------------------------------------------- /core/crawlers/basecrawler.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core")) 4 | 5 | import utils 6 | from library.models import * 7 | 8 | class BaseCrawler(object): 9 | def __init__(self, crawlerStatus, auth = None): 10 | self.crawlerStatus = crawlerStatus 11 | self.auth = auth 12 | # DEF 13 | 14 | def search(self): 15 | raise NotImplementedError("Unimplemented %s" % self.__init__.im_class) 16 | # DEF 17 | 18 | def crawl(self): 19 | nextResults = self.search() 20 | ## DEF 21 | 22 | def add_repository(self, name, setup_scripts = None): 23 | raise NotImplementedError("Unimplemented %s" % self.__init__.im_class) 24 | # DEF 25 | 26 | def download_repository(self, repo_name, sha, zip_name): 27 | raise NotImplementedError("Unimplemented %s" % self.__init__.im_class) 28 | # DEF 29 | 30 | ## CLASS -------------------------------------------------------------------------------- /library/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import patterns, include, url 2 | from rest_framework import routers 3 | import views 4 | 5 | router = routers.DefaultRouter() 6 | router.register(r'attempt', views.AttemptViewSet, base_name='attempt') 7 | router.register(r'repository', views.RepositoryViewSet, base_name='repository') 8 | 9 | urlpatterns = patterns('', 10 | url(r'^api/', include(router.urls)), 11 | url(r'^api/repositories/', views.RepositoryListView.as_view()), 12 | url(r'^$', 'library.views.home', name='home'), 13 | url(r'^repositories/$', 'library.views.repositories', name='repositories'), 14 | url(r'^repository/(?P.+)/(?P.+)/', 'library.views.repository', name='repository'), 15 | url(r'^attempt/(?P\d+)/', 'library.views.attempt', name='attempt'), 16 | url(r'^queries/(?P\d+)/', 'library.views.queries', name='queries'), 17 | url(r'^about/$', 'library.views.about', name='about'), 18 | url(r'^search/$', 'library.views.search', name='search') 19 | ) 20 | -------------------------------------------------------------------------------- /vagrant/Vagrantfile_example: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! 5 | VAGRANTFILE_API_VERSION = "2" 6 | 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 8 | # All Vagrant configuration is done here. The most common configuration 9 | # options are documented and commented below. For a complete reference, 10 | # please see the online documentation at vagrantup.com. 11 | 12 | config.vm.box = 'ubuntu/trusty64' 13 | config.vm.network :forwarded_port, host: 8100, guest: 8000 14 | config.vm.network :forwarded_port, host: 3100, guest: 3000 15 | config.vm.network :forwarded_port, host: 8180, guest: 8080 16 | config.vm.network :forwarded_port, host: 8181, guest: 8181 17 | 18 | config.vm.provider "virtualbox" do |vb| 19 | vb.memory = 1024 20 | vb.cpus = 1 21 | end 22 | 23 | config.vm.provision :shell, path: 'bootstrap.sh', keep_color: true 24 | 25 | end 26 | -------------------------------------------------------------------------------- /blog/utils.py: -------------------------------------------------------------------------------- 1 | from django.core.exceptions import ImproperlyConfigured 2 | try: 3 | from importlib import import_module 4 | except ImportError: 5 | from django.utils.importlib import import_module 6 | 7 | try: 8 | import twitter 9 | except ImportError: 10 | twitter = None 11 | 12 | 13 | from .conf import settings 14 | 15 | 16 | def can_tweet(): 17 | creds_available = (hasattr(settings, "TWITTER_USERNAME") and 18 | hasattr(settings, "TWITTER_PASSWORD")) 19 | return twitter and creds_available 20 | 21 | 22 | def load_path_attr(path): 23 | i = path.rfind(".") 24 | module, attr = path[:i], path[i + 1:] 25 | try: 26 | mod = import_module(module) 27 | except ImportError as e: 28 | raise ImproperlyConfigured("Error importing %s: '%s'" % (module, e)) 29 | try: 30 | attr = getattr(mod, attr) 31 | except AttributeError: 32 | raise ImproperlyConfigured("Module '%s' does not define a '%s'" % (module, attr)) 33 | return attr 34 | -------------------------------------------------------------------------------- /library/static/font-awesome/less/mixins.less: -------------------------------------------------------------------------------- 1 | // Mixins 2 | // -------------------------- 3 | 4 | .fa-icon() { 5 | display: inline-block; 6 | font: normal normal normal 14px/1 FontAwesome; // shortening font declaration 7 | font-size: inherit; // can't have font-size inherit on line above, so need to override 8 | text-rendering: auto; // optimizelegibility throws things off #1094 9 | -webkit-font-smoothing: antialiased; 10 | -moz-osx-font-smoothing: grayscale; 11 | } 12 | 13 | .fa-icon-rotate(@degrees, @rotation) { 14 | filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=@rotation); 15 | -webkit-transform: rotate(@degrees); 16 | -ms-transform: rotate(@degrees); 17 | transform: rotate(@degrees); 18 | } 19 | 20 | .fa-icon-flip(@horiz, @vert, @rotation) { 21 | filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=@rotation, mirror=1); 22 | -webkit-transform: scale(@horiz, @vert); 23 | -ms-transform: scale(@horiz, @vert); 24 | transform: scale(@horiz, @vert); 25 | } 26 | -------------------------------------------------------------------------------- /library/static/font-awesome/scss/_mixins.scss: -------------------------------------------------------------------------------- 1 | // Mixins 2 | // -------------------------- 3 | 4 | @mixin fa-icon() { 5 | display: inline-block; 6 | font: normal normal normal 14px/1 FontAwesome; // shortening font declaration 7 | font-size: inherit; // can't have font-size inherit on line above, so need to override 8 | text-rendering: auto; // optimizelegibility throws things off #1094 9 | -webkit-font-smoothing: antialiased; 10 | -moz-osx-font-smoothing: grayscale; 11 | } 12 | 13 | @mixin fa-icon-rotate($degrees, $rotation) { 14 | filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=#{$rotation}); 15 | -webkit-transform: rotate($degrees); 16 | -ms-transform: rotate($degrees); 17 | transform: rotate($degrees); 18 | } 19 | 20 | @mixin fa-icon-flip($horiz, $vert, $rotation) { 21 | filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=#{$rotation}); 22 | -webkit-transform: scale($horiz, $vert); 23 | -ms-transform: scale($horiz, $vert); 24 | transform: scale($horiz, $vert); 25 | } 26 | -------------------------------------------------------------------------------- /scripts/deploy_repo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core")) 5 | 6 | import time 7 | import traceback 8 | 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 10 | import django 11 | django.setup() 12 | from django.db.models import Q 13 | 14 | from library.models import * 15 | import utils 16 | 17 | def main(): 18 | if len(sys.argv) < 3: 19 | return 20 | deploy_id = int(sys.argv[1]) 21 | repo_name = sys.argv[2] 22 | if len(sys.argv) >= 4: 23 | database_name = sys.argv[3] 24 | else: 25 | database_name = 'MySQL' 26 | database = Database.objects.get(name = database_name) 27 | 28 | repo = Repository.objects.get(name = repo_name) 29 | print 'Attempting to deploy {} using {} ...'.format(repo, repo.project_type.deployer_class) 30 | try: 31 | utils.vagrant_deploy(repo, deploy_id, database) 32 | except: 33 | traceback.print_exc() 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /core/drivers/extract/driver/spiders/url.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import scrapy 4 | from scrapy.spiders import CrawlSpider, Rule 5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor 6 | 7 | from driver.items import UrlItem 8 | 9 | class UrlSpider(CrawlSpider): 10 | name = "url" 11 | allowed_domains = ["127.0.0.1"] 12 | 13 | def __init__(self, *args, **kwargs): 14 | super(UrlSpider, self).__init__(*args, **kwargs) 15 | 16 | self.start_urls = [kwargs.get('start_url')] 17 | 18 | follow = True if kwargs.get('follow') == 'true' else False 19 | self.rules = ( 20 | Rule (SgmlLinkExtractor(allow=('')), callback='parse_url', follow=follow), 21 | ) 22 | super(UrlSpider, self)._compile_rules() 23 | 24 | try: 25 | proxy = kwargs.get('proxy') 26 | service_args = [ 27 | '--proxy=' + proxy, 28 | '--proxy-type=http', 29 | ] 30 | except: 31 | service_args = None 32 | 33 | def parse_url(self, response): 34 | urlItem = UrlItem() 35 | urlItem['url'] = response.url 36 | yield urlItem 37 | -------------------------------------------------------------------------------- /blog/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url, patterns 2 | 3 | from .conf import settings 4 | from .views import ( 5 | BlogIndexView, 6 | DateBasedPostDetailView, 7 | SecretKeyPostDetailView, 8 | SectionIndexView, 9 | SlugUniquePostDetailView, 10 | StaffPostDetailView 11 | ) 12 | 13 | 14 | urlpatterns = patterns( 15 | "blog.views", 16 | url(r"^$", BlogIndexView.as_view(), name="blog"), 17 | url(r"^section/(?P
[-\w]+)/$", SectionIndexView.as_view(), name="blog_section"), 18 | url(r"^post/(?P\d+)/$", StaffPostDetailView.as_view(), name="blog_post_pk"), 19 | url(r"^post/(?P\w+)/$", SecretKeyPostDetailView.as_view(), name="blog_post_secret"), 20 | url(r"^feed/(?P
[-\w]+)/(?P[-\w]+)/$", "blog_feed", name="blog_feed"), 21 | ) 22 | 23 | 24 | if settings.PINAX_BLOG_SLUG_UNIQUE: 25 | urlpatterns += patterns( 26 | "", 27 | url(r"^(?P[-\w]+)/$", SlugUniquePostDetailView.as_view(), name="blog_post_slug") 28 | ) 29 | else: 30 | urlpatterns += patterns( 31 | "", 32 | url(r"^(?P\d{4})/(?P\d{2})/(?P\d{2})/(?P[-\w]+)/$", DateBasedPostDetailView.as_view(), name="blog_post"), 33 | ) 34 | -------------------------------------------------------------------------------- /scripts/remove_attempts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core")) 5 | 6 | import datetime 7 | import traceback 8 | 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 10 | import django 11 | django.setup() 12 | from django.db.models import Q 13 | 14 | from library.models import * 15 | import utils 16 | 17 | def remove_attempt(attempt): 18 | if attempt.repo.latest_attempt == attempt: 19 | attempt.repo.latest_attempt = None 20 | attempt.delete() 21 | 22 | def remove_unuseful_attempts(): 23 | reference_time = datetime.datetime.strptime('2016-01-01', '%Y-%m-%d') 24 | 25 | for repo in Repository.objects.all(): 26 | if repo.latest_successful_attempt == None: 27 | for attempt in Attempt.objects.filter(repo = repo).exclude(result = 'OK'): 28 | if attempt.stop_time < reference_time: 29 | remove_attempt(attempt) 30 | else: 31 | for attempt in Attempt.objects.filter(repo = repo).exclude(result = 'OK'): 32 | remove_attempt(attempt) 33 | 34 | def main(): 35 | remove_unuseful_attempts() 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /library/fixtures/projecttype.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "library.projecttype", 5 | "fields": { 6 | "logo": "img/django.png", 7 | "deployer_class": "DjangoDeployer", 8 | "default_port": 8000, 9 | "name": "Django", 10 | "filename": "models.py" 11 | } 12 | }, 13 | { 14 | "pk": 2, 15 | "model": "library.projecttype", 16 | "fields": { 17 | "logo": "img/ruby_on_rails.png", 18 | "deployer_class": "RoRDeployer", 19 | "default_port": 3000, 20 | "name": "Ruby on Rails", 21 | "filename": "database.yml" 22 | } 23 | }, 24 | { 25 | "pk": 3, 26 | "model": "library.projecttype", 27 | "fields": { 28 | "logo": "img/nodejs.png", 29 | "deployer_class": "NodeDeployer", 30 | "default_port": 8080, 31 | "name": "Node.js", 32 | "filename": "package.json" 33 | } 34 | }, 35 | { 36 | "pk": 4, 37 | "model": "library.projecttype", 38 | "fields": { 39 | "logo": "img/drupal.png", 40 | "deployer_class": "DrupalDeployer", 41 | "default_port": 8181, 42 | "name": "Drupal", 43 | "filename": "install.php" 44 | } 45 | }, 46 | { 47 | "pk": 5, 48 | "model": "library.projecttype", 49 | "fields": { 50 | "logo": "img/drupal.png", 51 | "deployer_class": "GrailsDeployer", 52 | "default_port": 8080, 53 | "name": "Grails", 54 | "filename": "application.properties" 55 | } 56 | } 57 | ] 58 | -------------------------------------------------------------------------------- /core/drivers/extract/driver/spiders/url_with_cookie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import scrapy 4 | from scrapy.spiders import CrawlSpider, Rule 5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor 6 | import cookielib 7 | 8 | from driver.items import UrlItem 9 | 10 | class UrlWithCookieSpider(CrawlSpider): 11 | name = "url_with_cookie" 12 | allowed_domains = ["127.0.0.1"] 13 | 14 | def __init__(self, *args, **kwargs): 15 | super(UrlWithCookieSpider, self).__init__(*args, **kwargs) 16 | 17 | self.start_urls = [kwargs.get('start_url')] 18 | self.cookiejar = cookielib.LWPCookieJar() 19 | self.cookiejar.load(kwargs.get('cookie_jar')) 20 | 21 | self.rules = ( 22 | Rule (SgmlLinkExtractor(allow=('')), callback='parse_url', follow=True, process_request='add_cookie_for_request'), 23 | ) 24 | super(UrlWithCookieSpider, self)._compile_rules() 25 | 26 | def add_cookie_for_request(self, request): 27 | for cookie in self.cookiejar: 28 | request.cookies[cookie.name] = cookie.value 29 | logout_patterns = ['logout', 'log-out', 'log_out'] 30 | if any(logout_pattern in request.url for logout_pattern in logout_patterns): 31 | return None 32 | return request 33 | 34 | def parse_url(self, response): 35 | urlItem = UrlItem() 36 | urlItem['url'] = response.url 37 | yield urlItem -------------------------------------------------------------------------------- /core/utils/run.py: -------------------------------------------------------------------------------- 1 | from subprocess import PIPE, Popen 2 | from multiprocessing import Pool 3 | import time 4 | import traceback 5 | 6 | def get_process_children(pid): 7 | p = Popen('ps --no-headers -o pid --ppid %d' % pid, shell = True, stdout = PIPE, stderr = PIPE) 8 | stdout, stderr = p.communicate() 9 | return [int(p) for p in stdout.split()] 10 | 11 | def run(args, cwd = None, shell = True, env = None, inputs = None): 12 | ''' 13 | Run a command 14 | ''' 15 | p = Popen(args, shell = shell, executable = '/bin/bash', stdin = PIPE, stdout = PIPE, stderr = PIPE, cwd = cwd, env = env) 16 | stdout, stderr = '', '' 17 | if inputs != None: 18 | for input in inputs: 19 | try: 20 | time.sleep(5) 21 | p.stdin.write(input) 22 | except: 23 | # traceback.print_exc() 24 | pass 25 | stdout, stderr = p.communicate() 26 | return p.returncode, stdout, stderr 27 | 28 | def run_command(command, timeout=0, input=None, cwd=None): 29 | if timeout > 0: 30 | commands = command.split('&&') 31 | commands[-1] = 'timeout {} {}'.format(timeout, commands[-1]) 32 | command = '&& '.join(commands) 33 | return run(command, inputs = input, cwd = cwd) 34 | 35 | def run_command_async(command, timeout=0, input=None, cwd=None): 36 | pool = Pool(processes=1) 37 | return pool.apply_async(run_command, [command, timeout, input, cwd]), pool -------------------------------------------------------------------------------- /scripts/crawl_repos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core")) 5 | 6 | import time 7 | import logging 8 | logging.basicConfig(filename='repo_crawler.log',level=logging.DEBUG) 9 | import json 10 | import traceback 11 | 12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 13 | import django 14 | django.setup() 15 | 16 | import crawlers 17 | from library.models import * 18 | 19 | def main(): 20 | if len(sys.argv) != 2: 21 | return 22 | project_id = int(sys.argv[1]) 23 | 24 | try: 25 | with open(os.path.join(os.path.dirname(__file__), os.pardir, "secrets", "secrets.json"), 'r') as auth_file: 26 | auth = json.load(auth_file) 27 | except: 28 | auth = None 29 | 30 | while True: 31 | cs = CrawlerStatus.objects.get(id = project_id) 32 | repo_source = cs.source 33 | project_type = cs.project_type 34 | 35 | moduleName = "crawlers.%s" % (repo_source.crawler_class.lower()) 36 | moduleHandle = __import__(moduleName, globals(), locals(), [repo_source.crawler_class]) 37 | klass = getattr(moduleHandle, repo_source.crawler_class) 38 | crawler = klass(cs, auth) 39 | 40 | try: 41 | crawler.crawl() 42 | except: 43 | traceback.print_exc() 44 | time.sleep(10) 45 | ## WHILE 46 | ## IF 47 | 48 | 49 | if __name__ == '__main__': 50 | main() -------------------------------------------------------------------------------- /scripts/run_driver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core")) 5 | 6 | import json 7 | import traceback 8 | 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 10 | import django 11 | django.setup() 12 | 13 | from drivers import * 14 | 15 | def main(): 16 | if len(sys.argv) < 2: 17 | return 18 | main_url = sys.argv[1] 19 | if len(sys.argv) >= 3: 20 | database_name = sys.argv[2] 21 | else: 22 | database_name = 'MySQL' 23 | database = Database.objects.get(name = database_name) 24 | 25 | print 'Driving ...' 26 | base_driver = BaseDriver(main_url, database, 'test') 27 | try: 28 | driverResult = base_driver.drive() 29 | except: 30 | traceback.print_exc() 31 | driverResult = {} 32 | 33 | print 'Random Walking ...' 34 | 35 | try: 36 | random_driver = RandomDriver(base_driver) 37 | random_driver.submit_forms() 38 | print random_driver.forms 39 | for form in random_driver.forms: 40 | if any(random_driver.equal_form(form, ret_form) for ret_form in driverResult['forms']): 41 | continue 42 | driverResult['forms'].append(form) 43 | except Exception, e: 44 | traceback.print_exc() 45 | 46 | print 'Driver Results:' 47 | print json.dumps(driverResult, indent=4, sort_keys=True) 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /scripts/crawl_repo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core")) 5 | 6 | import time 7 | import logging 8 | logging.basicConfig(filename='repo_crawler.log',level=logging.DEBUG) 9 | import json 10 | import traceback 11 | 12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 13 | import django 14 | django.setup() 15 | 16 | import crawlers 17 | from library.models import * 18 | import utils 19 | 20 | def add_module(): 21 | if len(sys.argv) != 5: 22 | return 23 | module_name = sys.argv[1] 24 | package_name = sys.argv[2] 25 | package_type_id = sys.argv[3] 26 | package_version = sys.argv[4] 27 | try: 28 | utils.add_module(module_name, package_name, package_type_id, package_version) 29 | print 'Successfully added new module {}'.format(module_name) 30 | except: 31 | print 'Failed to add new module {}'.format(repo_name) 32 | traceback.print_exc() 33 | 34 | def add_repository(): 35 | if len(sys.argv) != 3: 36 | return 37 | repo_name = sys.argv[1] 38 | repo_type_id = sys.argv[2] 39 | try: 40 | utils.add_repo(repo_name, repo_type_id, None) 41 | print 'Successfully added new repository {}'.format(repo_name) 42 | except: 43 | print 'Failed to add new repository {}'.format(repo_name) 44 | traceback.print_exc() 45 | 46 | def main(): 47 | # add_module() 48 | add_repository() 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /analysis/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: Zeyuan Shang 3 | # @Date: 2016-03-21 01:05:00 4 | # @Last Modified by: Zeyuan Shang 5 | # @Last Modified time: 2016-08-15 23:13:45 6 | import os 7 | import csv 8 | import pickle 9 | 10 | COMMITS_COUNT_THRESHOLD = 10 11 | 12 | def filter_repository(repo): 13 | if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD: 14 | return True 15 | return False 16 | 17 | def dump_stats(directory, description, values): 18 | with open(os.path.join(directory, description + '.csv'), 'wb') as csv_file: 19 | writer = csv.writer(csv_file) 20 | writer.writerow([description]) 21 | for label, stats in values.iteritems(): 22 | if isinstance(stats, list): 23 | for i in stats: 24 | writer.writerow([label, i]) 25 | elif isinstance(stats, dict): 26 | for key, value in stats.iteritems(): 27 | if isinstance(value, list): 28 | for second_value in value: 29 | writer.writerow([label, key, second_value]) 30 | else: 31 | writer.writerow([label, key, value]) 32 | else: 33 | writer.writerow([label, stats]) 34 | 35 | def dump_all_stats(directory, all_stats): 36 | for description in all_stats: 37 | dump_stats(directory, description, all_stats[description]) 38 | 39 | def pickle_dump(directory, description, data): 40 | with open(os.path.join(directory, description + '.pkl'), 'wb') as pickle_file: 41 | pickle.dump(data, pickle_file) -------------------------------------------------------------------------------- /library/fixtures/crawlerstatus.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "library.crawlerstatus", 5 | "fields": { 6 | "last_crawler_time": "2015-01-16T15:59:45", 7 | "source": 1, 8 | "project_type": 1, 9 | "next_url": "", 10 | "min_size": 100, 11 | "cur_size": 100, 12 | "max_size": 20000, 13 | "query": "django" 14 | } 15 | }, 16 | { 17 | "pk": 2, 18 | "model": "library.crawlerstatus", 19 | "fields": { 20 | "last_crawler_time": "2015-01-16T15:58:19", 21 | "source": 1, 22 | "project_type": 2, 23 | "next_url": "", 24 | "min_size": 100, 25 | "cur_size": 100, 26 | "max_size": 10000, 27 | "query": "" 28 | } 29 | }, 30 | { 31 | "pk": 3, 32 | "model": "library.crawlerstatus", 33 | "fields": { 34 | "last_crawler_time": "2015-01-16T15:58:19", 35 | "source": 1, 36 | "project_type": 3, 37 | "next_url": "", 38 | "min_size": 100, 39 | "cur_size": 100, 40 | "max_size": 10000, 41 | "query": "mysql" 42 | } 43 | }, 44 | { 45 | "pk": 4, 46 | "model": "library.crawlerstatus", 47 | "fields": { 48 | "last_crawler_time": "2015-01-16T15:58:19", 49 | "source": 2, 50 | "project_type": 4, 51 | "next_url": "", 52 | "min_size": 100, 53 | "cur_size": 100, 54 | "max_size": 10000, 55 | "query": "DRUPAL_ROOT" 56 | } 57 | }, 58 | { 59 | "pk": 5, 60 | "model": "library.crawlerstatus", 61 | "fields": { 62 | "last_crawler_time": "2015-01-16T15:58:19", 63 | "source": 1, 64 | "project_type": 5, 65 | "next_url": "", 66 | "min_size": 100, 67 | "cur_size": 100, 68 | "max_size": 10000, 69 | "query": "grails" 70 | } 71 | } 72 | ] 73 | -------------------------------------------------------------------------------- /library/templates/search.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% load staticfiles %} 3 | 4 | {% block title %}Repositories » {{ block.super }}{% endblock %} 5 | 6 | {% block main %} 7 | {% include "status/attempt_status_codes.html" %} 8 | {% include "admin/add_module.html" %} 9 | {% include "admin/add_repository.html" %} 10 | 11 | {% if messages %} 12 |
13 |
    14 | {% for message in messages %} 15 | {% if message.tags == 'success' %} 16 | 17 | {% endif %} 18 | {% if message.tags == 'error' %} 19 | 20 | {% endif %} 21 | {% endfor %} 22 |
23 | {% endif %} 24 | 25 | 26 | 27 |
    28 | 29 |
    30 |
    31 | {{ result_form }} 32 |
    33 | 34 |
    35 | {{ type_form }} 36 |
    37 | 38 |
    39 | 40 | 41 | 44 |
    45 | 46 |
    47 | Name: 48 | 49 |
    50 | 51 |
    52 |
    53 |
    54 | 55 | {% endblock %} 56 | -------------------------------------------------------------------------------- /library/serializers.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from rest_framework import serializers 3 | 4 | class RepositorySerializer(serializers.ModelSerializer): 5 | class Meta: 6 | model = Repository 7 | 8 | class RuntimeSerializer(serializers.ModelSerializer): 9 | class Meta: 10 | model = Runtime 11 | 12 | class DatabaseSerializer(serializers.ModelSerializer): 13 | class Meta: 14 | model = Database 15 | 16 | class PackageSerializer(serializers.ModelSerializer): 17 | class Meta: 18 | model = Package 19 | 20 | class DependencySerializer(serializers.ModelSerializer): 21 | package_info = PackageSerializer(source='package') 22 | 23 | class Meta: 24 | model = Dependency 25 | fields = ('id', 'source', 'attempt', 'package_info') 26 | 27 | class FieldSerializer(serializers.ModelSerializer): 28 | class Meta: 29 | model = Field 30 | 31 | class QuerySerializer(serializers.ModelSerializer): 32 | class Meta: 33 | model = Query 34 | 35 | class ActionSerializer(serializers.ModelSerializer): 36 | fields = FieldSerializer(many=True, read_only=True) 37 | queries = QuerySerializer(many=True, read_only=True) 38 | class Meta: 39 | model = Action 40 | 41 | 42 | class AttemptSerializer(serializers.ModelSerializer): 43 | repo_info = RepositorySerializer(source='repo') 44 | runtime_info = RuntimeSerializer(source='runtime') 45 | database_info = DatabaseSerializer(source='database') 46 | dependencies = DependencySerializer(source='dependency_set', many=True) 47 | actions = ActionSerializer(many=True, read_only = True) 48 | 49 | class Meta: 50 | model = Attempt 51 | fields = ('id', 'start_time', 'stop_time', 'repo_info', 'sha', 'size', 'log', 'hostname', 52 | 'runtime_info', 'database_info', 'result', 'register', 'login', 'actions_count', 'queries_count', 53 | 'dependencies', 'actions' 54 | ) -------------------------------------------------------------------------------- /core/analyzers/baseanalyzer.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import logging 5 | 6 | ## ===================================================================== 7 | ## LOGGING CONFIGURATION 8 | ## ===================================================================== 9 | LOG = logging.getLogger() 10 | 11 | ## ===================================================================== 12 | ## BASE ANALYZER 13 | ## ===================================================================== 14 | class BaseAnalyzer(object): 15 | 16 | def __init__(self, deployer): 17 | self.queries_stats = {} 18 | self.database_stats = {} 19 | self.database_informations = {} 20 | self.deployer = deployer 21 | 22 | def is_valid_for_explain(self, query): 23 | if not query: 24 | return False 25 | prefixes = ['show', 'begin', 'end', 'commit', 'set'] 26 | lowered_query = query.lower() 27 | if any(lowered_query.startswith(prefix) for prefix in prefixes): 28 | return False 29 | return True 30 | 31 | def count_transaction(self, queries): 32 | transaction = False 33 | transaction_count = 0 34 | for query in queries: 35 | if 'BEGIN' in query['content'].upper() or 'START TRANSACTION' in query['content'].upper(): 36 | transaction = True 37 | elif transaction: 38 | if 'COMMIT' in query['content'].upper(): 39 | # for each transaction, count the number of transactions 40 | transaction_count += 1 41 | transaction = False 42 | return transaction_count 43 | 44 | def analyze_queries(self, queries): 45 | raise NotImplementedError("Unimplemented %s" % self.__init__.im_class) 46 | 47 | def analyze_database(self): 48 | raise NotImplementedError("Unimplemented %s" % self.__init__.im_class) -------------------------------------------------------------------------------- /core/drivers/benchmarkdriver.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import logging 5 | import requests 6 | import re 7 | import copy 8 | import traceback 9 | import requests 10 | import mechanize 11 | 12 | from library.models import * 13 | from cmudbac.settings import * 14 | import utils 15 | import extract 16 | import submit 17 | import count 18 | from basedriver import BaseDriver 19 | 20 | ## ===================================================================== 21 | ## LOGGING CONFIGURATION 22 | ## ===================================================================== 23 | LOG = logging.getLogger() 24 | 25 | ## ===================================================================== 26 | ## BENCHMARK DRIVER 27 | ## ===================================================================== 28 | class BenchmarkDriver(BaseDriver): 29 | 30 | def __init__(self, driver): 31 | BaseDriver.__init__(self, driver.deployer) 32 | self.forms = driver.forms 33 | self.urls = driver.urls 34 | self.browser = mechanize.Browser() 35 | if driver.browser != None: 36 | self.browser.set_cookiejar(driver.browser._ua_handlers['_cookies'].cookiejar) 37 | self.browser.set_handle_robots(False) 38 | 39 | def submit_actions(self): 40 | actions_cnt = 0 41 | for form, browser_index in self.forms: 42 | try: 43 | if browser_index == 0: 44 | submit.fill_form_random(self.deployer.base_path, form, self.browser) 45 | else: 46 | submit.fill_form_random(self.deployer.base_path, form, None) 47 | except: 48 | pass 49 | actions_cnt += 1 50 | for url in self.urls: 51 | try: 52 | submit.query_url(url, self.browser) 53 | except: 54 | pass 55 | actions_cnt += 1 56 | return actions_cnt 57 | 58 | 59 | -------------------------------------------------------------------------------- /library/templates/admin/add_module.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/drivers/submit/register.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import re 5 | from urlparse import urlparse 6 | 7 | import extract 8 | from patterns import patterns, match_any_pattern 9 | from submit import fill_form 10 | 11 | def get_register_form(forms): 12 | register_patterns = ['register', 'signup', 'sign-up', 'sign_up'] 13 | for form in forms: 14 | if 'method' in form and form['method'] != 'post': 15 | continue 16 | if match_any_pattern(form['action'], register_patterns): 17 | return form 18 | if match_any_pattern(form['url'], register_patterns): 19 | return form 20 | if match_any_pattern(form.get('id', ''), register_patterns): 21 | return form 22 | return None 23 | 24 | def verify_email(deploy_path, form, matched_patterns): 25 | email_file = None 26 | for log_file in os.listdir(deploy_path): 27 | if log_file.endswith('.log'): 28 | email_file = log_file 29 | break 30 | if not email_file: 31 | return matched_patterns, None 32 | 33 | email_content = open(os.path.join(deploy_path, email_file)).read() 34 | verify_url = re.search('http://.+', email_content) 35 | if not verify_url: 36 | return matched_patterns, None 37 | verify_url = urlparse(verify_url.group(0))._replace(netloc = urlparse(form['url']).netloc) 38 | verify_url = verify_url.geturl() 39 | 40 | verify_forms = extract.extract_forms(verify_url) 41 | for verify_form in verify_forms: 42 | verify_form['url'] = verify_url 43 | matched_patterns, inputs, response, br = fill_form(verify_form, matched_patterns) 44 | 45 | return matched_patterns, inputs 46 | 47 | def register(deploy_path, forms): 48 | register_form = get_register_form(forms) 49 | print 'Register form: {}'.format(register_form) 50 | if register_form == None: 51 | return None, None, None 52 | 53 | matched_patterns, inputs, response, br = fill_form(register_form) 54 | 55 | if 'email' in matched_patterns: 56 | matched_patterns, part_inputs = verify_email(deploy_path, register_form, matched_patterns) 57 | if part_inputs != None: 58 | inputs.update(part_inputs) 59 | 60 | return register_form, matched_patterns, inputs 61 | -------------------------------------------------------------------------------- /core/utils/pip.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import expanduser 3 | 4 | from run import run_command 5 | from file import cd 6 | 7 | HOME_DIR = expanduser('~') 8 | 9 | def home_path(path): 10 | return os.path.join(HOME_DIR, path) 11 | 12 | def configure_env(path): 13 | command = 'virtualenv --no-site-packages {}'.format(path) 14 | return run_command(command) 15 | 16 | def to_env(path): 17 | return '{} && {}'.format(cd(path), 'source bin/activate') 18 | 19 | def pip_install(path, names, is_file, has_version = True): 20 | command = '{} && pip --no-cache-dir install'.format(to_env(path)) 21 | 22 | proxy = os.environ.get('http_proxy') 23 | if proxy: 24 | command = '{} --proxy {} '.format(command, proxy) 25 | if is_file: 26 | filename = home_path(names) 27 | command = '{} -r {}'.format(command, filename) 28 | else: 29 | for name in names: 30 | if isinstance(name, dict): 31 | if name.get('version', ''): 32 | command = '{} {}=={} '.format(command, name['name'], name['version']) 33 | else: 34 | command = '{} {}'.format(command, name['name']) 35 | else: 36 | if has_version and name.version != None and name.version != '': 37 | command = '{} {}=={} '.format(command, name.name, name.version) 38 | elif name.name == 'django': 39 | command = '{} {}==1.8.4'.format(command, name.name) 40 | else: 41 | command = '{} {}'.format(command, name.name) 42 | out = run_command(command) 43 | 44 | return out 45 | 46 | def pip_install_text(path, name): 47 | command = '{} && pip --no-cache-dir install'.format(to_env(path)) 48 | 49 | proxy = os.environ.get('http_proxy') 50 | if proxy: 51 | command = '{} --proxy {} '.format(command, proxy) 52 | command = '{} {} '.format(command, name) 53 | out = run_command(command) 54 | 55 | return out 56 | 57 | def pip_freeze(path): 58 | out = run_command('{} && pip freeze'.format(to_env(path))) 59 | out = out[1].strip().splitlines() 60 | out = [line for line in out if not ' ' in line and '==' in line] 61 | return out 62 | -------------------------------------------------------------------------------- /library/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from models import * 3 | 4 | class DependencyInline(admin.StackedInline): 5 | model = Dependency 6 | extra = 3 7 | 8 | class ProjectTypeAdmin(admin.ModelAdmin): 9 | list_display = [ 'name', 'filename', 'deployer_class' ] 10 | ## CLASS 11 | 12 | class RepositorySourceAdmin(admin.ModelAdmin): 13 | list_display = [ 'name', 'crawler_class', 'base_url', 'commit_url', 'search_token', ] 14 | ## CLASS 15 | 16 | class CrawlerStatusAdmin(admin.ModelAdmin): 17 | list_display = [ 'id', 'source', 'project_type', 'next_url', 'last_crawler_time', ] 18 | ## CLASS 19 | 20 | class RepositoryAdmin(admin.ModelAdmin): 21 | list_display = [ 'id', 'name', 'valid_project', 'get_project_type', 'source', 'commits_count', 'description', 'crawler_date', 'updated_date' ] 22 | list_filter = ['project_type', 'valid_project', 'crawler_date', 'updated_date'] 23 | fieldsets = [ 24 | (None, {'fields': ['name', 'project_type', 'source', 'description']}), 25 | ('Date information', {'fields': ['created_at', 'updated_at', 'pushed_at']}), 26 | ] 27 | 28 | def get_project_type(self, obj): 29 | return obj.project_type.name 30 | 31 | get_project_type.short_description = 'Project Type' 32 | # CLASS 33 | 34 | class AttemptAdmin(admin.ModelAdmin): 35 | list_display = [ 'id', 'repo', 'result_name', 'start_time', 'stop_time' ] 36 | list_filter = ['result', 'start_time'] 37 | raw_id_fields = [ 'repo' ] 38 | #inlines = [DependencyInline] 39 | # CLASS 40 | 41 | class PackageAdmin(admin.ModelAdmin): 42 | list_display = [ 'name', 'project_type', 'version', 'count' ] 43 | list_filter = ['project_type'] 44 | # CLASS 45 | 46 | # Register your models here. 47 | admin.site.register(ProjectType, ProjectTypeAdmin) 48 | admin.site.register(RepositorySource, RepositorySourceAdmin) 49 | admin.site.register(CrawlerStatus, CrawlerStatusAdmin) 50 | admin.site.register(Database) 51 | 52 | admin.site.register(Repository, RepositoryAdmin) 53 | admin.site.register(Package, PackageAdmin) 54 | admin.site.register(Dependency) 55 | admin.site.register(Attempt, AttemptAdmin) 56 | admin.site.register(Module) 57 | admin.site.register(WebStatistic) 58 | admin.site.register(Statistic) 59 | -------------------------------------------------------------------------------- /library/static/md/tools.md: -------------------------------------------------------------------------------- 1 | # Command Line Tool Tutorial 2 | 3 | ### Install Requirements 4 | To Start with, please install the required Python packages for running the command line tool. You can install them by pip as following 5 | ```sh 6 | $ pip install requests 7 | ``` 8 | 9 | Please add enough permission for the main.py file as following 10 | ```sh 11 | $ chmod +x main.py 12 | ``` 13 | 14 | ### Get Attempt Information 15 | You can get the sufficient information of an attempt by running this command: 16 | ```sh 17 | $ ./main.py info -attempt ATTEMPT 18 | ``` 19 | where *ATTEMPT* is the id of the attempt you want to inquire. 20 | 21 | If you want to know more information, you can type this command to get a help message: 22 | ```sh 23 | $ ./main.py info -h 24 | ``` 25 | 26 | ### Running Benchmark 27 | A lot of arguments are required to run the benchmark. You can type this command to get the full information: 28 | ```sh 29 | ./main.py benchmark -h 30 | ``` 31 | We have provide you with a comprehensive illustraions about the arguments: 32 | ```sh 33 | usage: main.py benchmark [-h] [-attempt ATTEMPT] [-database DATABASE] [-host HOST] [-port PORT] [-name NAME] [-username USERNAME] [-password PASSWORD] [-num_threads NUM_THREADS] [-timeout TIMEOUT] 34 | 35 | optional arguments: 36 | -h, --help show this help message and exit 37 | -attempt ATTEMPT, --attempt ATTEMPT 38 | the id of the attempt 39 | -database DATABASE, --database DATABASE 40 | the database you are using, e.g. mysql 41 | -host HOST, --host HOST 42 | the host address of your database server 43 | -port PORT, --port PORT 44 | the port of your database server 45 | -name NAME, --name NAME 46 | the name of your database 47 | -username USERNAME, --username USERNAME 48 | the username of your database server 49 | -password PASSWORD, --password PASSWORD 50 | the password of your database server 51 | -num_threads NUM_THREADS, --num_threads NUM_THREADS 52 | the number of threads you want to use to submit forms 53 | -timeout TIMEOUT, --timeout TIMEOUT 54 | the timeout for submitting forms 55 | ``` 56 | 57 | Then you can see the results if the arguments are correctly provided. 58 | -------------------------------------------------------------------------------- /library/templates/admin/add_repository.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /library/templates/about.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}About » {% endblock %} 4 | 5 | {% block main %} 6 |

    About

    7 | 8 | 9 |
    10 |

    The goal of the Database Application Catalog project is to find a large amount of database applications to use in various projects, including workload analysis, automatic tuning, and benchmarking. It searches the Internet for web-based database applications and run them locally in order to learn how they use a DBMS. 11 | 12 |

    All of the source code for the CMDBAC is available on GitHub under the Apache Software License. 13 |

    14 | 15 | 16 | 17 |
    18 |

    People

    19 | 20 | 21 |
    22 |
    23 | Zeyuan Shang
    Zeyuan Shang
    24 |
    Tsinghua University 25 |
    26 |
    27 | Dana Van Aken
    Dana Van Aken
    28 |
    Carnegie Mellon University 29 |
    30 |
    31 | Andy Pavlo
    Andy Pavlo
    32 |
    Carnegie Mellon University 33 |
    34 |
    35 | 36 | 37 |

    Alumni

    38 |
      39 |
    • Fangyu Gao (Carnegie Mellon University)
    • 40 |
    41 |
    42 | 43 | 44 | 45 |
    46 |

    Acknowledgements

    47 | This research was funded (in part) by the National Science Foundation (III-1423210). 48 |
    49 |
    50 | 51 | 52 | 53 | {% endblock %} -------------------------------------------------------------------------------- /analysis/general/analyze_repository.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) 5 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "core")) 6 | 7 | from utils import filter_repository 8 | 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 10 | import django 11 | django.setup() 12 | 13 | from library.models import * 14 | 15 | def repository_stats(): 16 | stats = {} 17 | 18 | for project_type in ProjectType.objects.all(): 19 | project_type_name = project_type.name 20 | stats[project_type_name] = [] 21 | 22 | for repo in Repository.objects.filter(project_type = project_type).exclude(latest_successful_attempt = None): 23 | if filter_repository(repo): 24 | continue 25 | transaction_count = 0 26 | 27 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt): 28 | transaction = '' 29 | for query in Query.objects.filter(action = action): 30 | if 'BEGIN' in query.content.upper() or 'START TRANSACTION' in query.content.upper(): 31 | transaction = query.content + '\n' 32 | elif transaction != '': 33 | transaction += query.content + '\n' 34 | if 'COMMIT' in query.content.upper(): 35 | transaction = transaction.strip('\n') 36 | 37 | # for each transaction, count the number of transactions 38 | transaction_count += 1 39 | 40 | if transaction_count > 0: 41 | stats[project_type_name].append((repo.commits_count, transaction_count, repo)) 42 | 43 | for project_type_name in stats: 44 | print project_type_name 45 | 46 | for commits_count, transaction_count, repo in sorted(stats[project_type_name], reverse = True): 47 | print repo.name, 'txns:{}'.format(transaction_count), 'commits:{}'.format(commits_count), 48 | print 'http://cmdbac.cs.cmu.edu/attempt/' + str(repo.latest_successful_attempt.id) 49 | 50 | print '------------------------------' 51 | 52 | def main(): 53 | # active 54 | repository_stats() 55 | 56 | # working 57 | 58 | # deprecated 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /blog/templatetags/pinax_blog_tags.py: -------------------------------------------------------------------------------- 1 | from django import template 2 | 3 | from ..models import Post, Section 4 | 5 | 6 | register = template.Library() 7 | 8 | 9 | class LatestBlogPostsNode(template.Node): 10 | 11 | def __init__(self, context_var): 12 | self.context_var = context_var 13 | 14 | def render(self, context): 15 | latest_posts = Post.objects.current()[:5] 16 | context[self.context_var] = latest_posts 17 | return "" 18 | 19 | 20 | @register.tag 21 | def latest_blog_posts(parser, token): 22 | bits = token.split_contents() 23 | return LatestBlogPostsNode(bits[2]) 24 | 25 | 26 | class LatestBlogPostNode(template.Node): 27 | 28 | def __init__(self, context_var): 29 | self.context_var = context_var 30 | 31 | def render(self, context): 32 | try: 33 | latest_post = Post.objects.current()[0] 34 | except IndexError: 35 | latest_post = None 36 | context[self.context_var] = latest_post 37 | return "" 38 | 39 | 40 | @register.tag 41 | def latest_blog_post(parser, token): 42 | bits = token.split_contents() 43 | return LatestBlogPostNode(bits[2]) 44 | 45 | 46 | class LatestSectionPostNode(template.Node): 47 | 48 | def __init__(self, section, context_var): 49 | self.section = template.Variable(section) 50 | self.context_var = context_var 51 | 52 | def render(self, context): 53 | section = self.section.resolve(context) 54 | 55 | post = Post.objects.published().filter(section__name=section).order_by("-published") 56 | try: 57 | post = post[0] 58 | except IndexError: 59 | post = None 60 | context[self.context_var] = post 61 | return "" 62 | 63 | 64 | @register.tag 65 | def latest_section_post(parser, token): 66 | """ 67 | {% latest_section_post "articles" as latest_article_post %} 68 | """ 69 | bits = token.split_contents() 70 | return LatestSectionPostNode(bits[1], bits[3]) 71 | 72 | 73 | class BlogSectionsNode(template.Node): 74 | 75 | def __init__(self, context_var): 76 | self.context_var = context_var 77 | 78 | def render(self, context): 79 | sections = Section.objects.filter(enabled=True) 80 | context[self.context_var] = sections 81 | return "" 82 | 83 | 84 | @register.tag 85 | def blog_sections(parser, token): 86 | """ 87 | {% blog_sections as blog_sections %} 88 | """ 89 | bits = token.split_contents() 90 | return BlogSectionsNode(bits[2]) 91 | -------------------------------------------------------------------------------- /core/analyzers/sqlite3analyzer.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import logging 5 | 6 | from baseanalyzer import BaseAnalyzer 7 | 8 | ## ===================================================================== 9 | ## LOGGING CONFIGURATION 10 | ## ===================================================================== 11 | LOG = logging.getLogger() 12 | 13 | ## ===================================================================== 14 | ## SQLITE3 ANALYZER 15 | ## ===================================================================== 16 | class SQLite3Analyzer(BaseAnalyzer): 17 | 18 | def __init__(self, deployer): 19 | BaseAnalyzer.__init__(self, deployer) 20 | 21 | def analyze_queries(self, queries): 22 | self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0) 23 | 24 | try: 25 | conn = self.deployer.get_database_connection() 26 | cur = conn.cursor() 27 | 28 | for query in queries: 29 | try: 30 | if self.is_valid_for_explain(query['raw']): 31 | explain_query = 'EXPLAIN {};'.format(query['raw']) 32 | # print explain_query 33 | cur.execute(explain_query) 34 | rows = cur.fetchall() 35 | output = '\n' 36 | for row in rows: 37 | output += str(row) + '\n' 38 | query['explain'] = output 39 | except Exception, e: 40 | pass 41 | # LOG.exception(e) 42 | 43 | cur.close() 44 | conn.close() 45 | except Exception, e: 46 | LOG.exception(e) 47 | 48 | def analyze_database(self): 49 | try: 50 | conn = self.deployer.get_database_connection() 51 | cur = conn.cursor() 52 | database = self.deployer.get_database_name() 53 | 54 | # the number of tables 55 | cur.execute("SELECT COUNT(*) FROM sqlite_master WHERE type = 'table';") 56 | self.database_stats['num_tables'] = int(cur.fetchone()[0]) 57 | 58 | # the number of indexes 59 | cur.execute("SELECT COUNT(*) FROM sqlite_master WHERE type = 'index';") 60 | self.database_stats['num_indexes'] = int(cur.fetchone()[0]) 61 | 62 | cur.close() 63 | conn.close() 64 | except Exception, e: 65 | LOG.exception(e) -------------------------------------------------------------------------------- /core/scripts/vagrant_deploy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) 5 | 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 7 | import django 8 | django.setup() 9 | from library.models import * 10 | from deployers import * 11 | from drivers import * 12 | from analyzers import * 13 | import utils 14 | 15 | def main(): 16 | if len(sys.argv) not in [3, 4]: 17 | return 18 | repo_name = sys.argv[1] 19 | deploy_id = sys.argv[2] 20 | if len(sys.argv) > 3: 21 | database_name = sys.argv[3] 22 | else: 23 | database_name = 'MySQL' 24 | print 'Database : {} ...'.format(database_name) 25 | 26 | repo = Repository.objects.get(name=repo_name) 27 | database = Database.objects.get(name=database_name) 28 | 29 | moduleName = "deployers.%s" % (repo.project_type.deployer_class.lower()) 30 | moduleHandle = __import__(moduleName, globals(), locals(), [repo.project_type.deployer_class]) 31 | klass = getattr(moduleHandle, repo.project_type.deployer_class) 32 | 33 | deployer = klass(repo, database, deploy_id) 34 | if deployer.deploy() != 0: 35 | deployer.kill_server() 36 | sys.exit(-1) 37 | 38 | print 'Driving ...' 39 | 40 | driver = BaseDriver(deployer.get_main_url(), deployer.get_database(), deployer.deploy_id, deployer.base_path, deployer.log_file) 41 | try: 42 | driverResult = driver.drive() 43 | except Exception, e: 44 | LOG.exception(e) 45 | driverResult = {} 46 | 47 | print 'Random Walking ...' 48 | 49 | try: 50 | random_driver = RandomDriver(driver) 51 | random_driver.start() 52 | print 'Random Walk Forms Count: {}'.format(len(random_driver.forms)) 53 | print 'Basic Forms Count: {}'.format(len(driverResult['forms'])) 54 | for form in random_driver.forms: 55 | if any(random_driver.equal_form(form, ret_form) for ret_form in driverResult['forms']): 56 | continue 57 | driverResult['forms'].append(form) 58 | except Exception, e: 59 | LOG.exception(e) 60 | 61 | deployer.kill_server() 62 | 63 | analyzer = get_analyzer(deployer) 64 | for form in driverResult['forms']: 65 | analyzer.analyze_queries(form['queries']) 66 | for url in driverResult['urls']: 67 | analyzer.analyze_queries(url['queries']) 68 | driverResult['statistics'] = analyzer.queries_stats 69 | analyzer.analyze_database() 70 | driverResult['statistics'].update(analyzer.database_stats) 71 | driverResult['informations'] = analyzer.database_informations 72 | 73 | deployer.save_attempt(ATTEMPT_STATUS_SUCCESS, driverResult) 74 | 75 | if __name__ == "__main__": 76 | main() -------------------------------------------------------------------------------- /blog/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.utils import timezone 3 | from django.utils.functional import curry 4 | 5 | from .forms import AdminPostForm 6 | from .models import Post, Image, ReviewComment, Section 7 | from .utils import can_tweet 8 | 9 | 10 | class ImageInline(admin.TabularInline): 11 | model = Image 12 | fields = ["image_path"] 13 | 14 | 15 | class ReviewInline(admin.TabularInline): 16 | model = ReviewComment 17 | 18 | 19 | def make_published(modeladmin, request, queryset): 20 | queryset = queryset.exclude(state=Post.STATE_CHOICES[-1][0], published__isnull=False) 21 | queryset.update(state=Post.STATE_CHOICES[-1][0]) 22 | queryset.filter(published__isnull=True).update(published=timezone.now()) 23 | make_published.short_description = "Publish selected posts" 24 | 25 | 26 | class PostAdmin(admin.ModelAdmin): 27 | list_display = ["title", "state", "section", "published", "show_secret_share_url"] 28 | list_filter = ["section", "state"] 29 | form = AdminPostForm 30 | actions = [make_published] 31 | fields = [ 32 | "section", 33 | "title", 34 | "slug", 35 | "author", 36 | "published", 37 | "markup", 38 | "teaser", 39 | "content", 40 | "description", 41 | "primary_image", 42 | "sharable_url", 43 | "state" 44 | ] 45 | readonly_fields = ["sharable_url"] 46 | 47 | if can_tweet(): 48 | fields.append("tweet") 49 | prepopulated_fields = {"slug": ("title",)} 50 | inlines = [ 51 | ImageInline, 52 | ReviewInline, 53 | ] 54 | 55 | def show_secret_share_url(self, obj): 56 | return '%s' % (obj.sharable_url, obj.sharable_url) 57 | show_secret_share_url.short_description = "Share this url" 58 | show_secret_share_url.allow_tags = True 59 | 60 | def formfield_for_dbfield(self, db_field, **kwargs): 61 | request = kwargs.get("request") 62 | if db_field.name == "author": 63 | ff = super(PostAdmin, self).formfield_for_dbfield(db_field, **kwargs) 64 | ff.initial = request.user.id 65 | return ff 66 | return super(PostAdmin, self).formfield_for_dbfield(db_field, **kwargs) 67 | 68 | def get_form(self, request, obj=None, **kwargs): 69 | kwargs.update({ 70 | "formfield_callback": curry(self.formfield_for_dbfield, request=request), 71 | }) 72 | return super(PostAdmin, self).get_form(request, obj, **kwargs) 73 | 74 | def save_form(self, request, form, change): 75 | # this is done for explicitness that we want form.save to commit 76 | # form.save doesn't take a commit kwarg for this reason 77 | return form.save() 78 | 79 | 80 | class SectionAdmin(admin.ModelAdmin): 81 | prepopulated_fields = {"slug": ("name",)} 82 | 83 | 84 | admin.site.register(Post, PostAdmin) 85 | admin.site.register(Image) 86 | admin.site.register(Section, SectionAdmin) 87 | -------------------------------------------------------------------------------- /library/forms.py: -------------------------------------------------------------------------------- 1 | from django import forms 2 | from models import * 3 | from django.template.loader import render_to_string 4 | from django.forms.fields import EMPTY_VALUES 5 | from django.utils.translation import ugettext as _ 6 | 7 | 8 | class ResultForm(forms.Form): 9 | results = forms.MultipleChoiceField( 10 | widget=forms.CheckboxSelectMultiple, 11 | choices=reversed(ATTEMPT_STATUS), 12 | required=False, 13 | label="Latest Attempt Status") 14 | 15 | class ProjectTypeForm(forms.Form): 16 | options = ProjectType.objects.all().values_list('name', 'name') 17 | types = forms.MultipleChoiceField( 18 | widget=forms.CheckboxSelectMultiple, 19 | choices=options, 20 | required=False, 21 | label="Project Type") 22 | 23 | class StatisticsForm(forms.Form): 24 | num_options = [('-1', 'Any'), ('0-10', 'Less than or equal to 10'), ('11-100', 'Between 11 and 100'), ('101-99999', 'More than 100')] 25 | ratio_options = [('-1', 'Any'), ('0-50', 'Lesson than or equal to 0.5'), ('51-100', '0.5-1'), ('101-99999', 'More than 1')] 26 | 27 | num_tables = forms.ChoiceField(choices=num_options, required = False, label = '# of Tables', widget=forms.Select(attrs={'class':'form-control'})) 28 | num_indexes = forms.ChoiceField(choices=num_options, required = False, label = '# of Indexes', widget=forms.Select(attrs={'class':'form-control'})) 29 | num_secondary_indexes = forms.ChoiceField(choices=num_options, required = False, label = '# of Secondary Indexes', widget=forms.Select(attrs={'class':'form-control'})) 30 | num_constraints = forms.ChoiceField(choices=num_options, required = False, label = '# of Constraints', widget=forms.Select(attrs={'class':'form-control'})) 31 | num_foreignkeys = forms.ChoiceField(choices=num_options, required = False, label = '# of Foreign Keys', widget=forms.Select(attrs={'class':'form-control'})) 32 | num_transactions = forms.ChoiceField(choices=num_options, required = False, label = '# of Transactions', widget=forms.Select(attrs={'class':'form-control'})) 33 | transaction_ratio = forms.ChoiceField(choices=ratio_options, required = False, label = 'Ratio of Txn/Action', widget=forms.Select(attrs={'class':'form-control'})) 34 | 35 | coverage_options = [('-1', 'Any'), ('0-20', 'Less than 20'), ('21-40', '21-40'), ('41-60', '41-60'), ('61-80', '61-80'), ('81-100', '81-100')] 36 | table_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Table Coverage', widget=forms.Select(attrs={'class':'form-control'})) 37 | column_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Column Coverage', widget=forms.Select(attrs={'class':'form-control'})) 38 | # index_coverage = forms.ChoiceField(choices=coverage_options, required = False, label = 'Index Coverage', widget=forms.Select(attrs={'class':'form-control'})) 39 | 40 | -------------------------------------------------------------------------------- /scripts/count_repos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core")) 5 | 6 | import time 7 | import traceback 8 | 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 10 | import django 11 | django.setup() 12 | from django.db.models import Q 13 | 14 | from library.models import * 15 | import utils 16 | 17 | COMMITS_COUNT_THRESHOLD = 10 18 | 19 | def count_deployed_repos(): 20 | stats = {} 21 | for repo in Repository.objects.exclude(latest_successful_attempt = None): 22 | if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD: 23 | continue 24 | if Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'key_column_usage'): 25 | stats[repo.project_type] = stats.get(repo.project_type, 0) + 1 26 | 27 | print stats 28 | 29 | def count_ruby_failed_repos(): 30 | count = 0 31 | for repo in Repository.objects.filter(latest_successful_attempt = None).filter(project_type = 2).exclude(latest_attempt = None): 32 | if repo.commits_count >= 0 and repo.commits_count <= COMMITS_COUNT_THRESHOLD: 33 | continue 34 | if 'Unable to find database.yml' in repo.latest_attempt.log: 35 | count += 1 36 | 37 | print count 38 | 39 | def count_ruby_repetive_queries(): 40 | repo_count = [0, 0] 41 | action_count = [0, 0] 42 | for repo in Repository.objects.exclude(latest_successful_attempt = None).filter(project_type = 2): 43 | repo_flag = False 44 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt): 45 | action_flag = False 46 | for query in Query.objects.filter(action = action): 47 | if 'SELECT 1' in query.content: 48 | repo_flag = True 49 | action_flag = True 50 | if action_flag: 51 | action_count[0] += 1 52 | action_count[1] += 1 53 | if repo_flag: 54 | repo_count[0] += 1 55 | repo_count[1] += 1 56 | 57 | print repo_count 58 | print action_count 59 | 60 | def count_wrong_marked_repos(): 61 | repo_count = 0 62 | for repo in Repository.objects.exclude(latest_successful_attempt = None): 63 | if repo.latest_successful_attempt.result != 'OK': 64 | repo_count += 1 65 | repo.latest_successful_attempt = None 66 | repo.save() 67 | for repo in Repository.objects.filter(project_type = 2): 68 | attempts = Attempt.objects.filter(repo = repo).filter(result = 'OK') 69 | if attempts: 70 | repo.latest_successful_attempt = list(attempts)[-1] 71 | repo.save() 72 | print repo_count 73 | 74 | def main(): 75 | # count_deployed_repos() 76 | # count_ruby_failed_repos() 77 | # count_ruby_repetive_queries() 78 | count_wrong_marked_repos() 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /core/utils/file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import re 4 | 5 | from run import run_command 6 | 7 | def search_file(directory_name, file_name): 8 | result = [] 9 | for root, dirs, files in os.walk(directory_name): 10 | for file in files: 11 | if file == file_name: 12 | path = os.path.join(root, file) 13 | if not os.path.islink(path): 14 | result.append(path) 15 | return result 16 | 17 | def search_file_regex(directory_name, file_name_pattern): 18 | result = [] 19 | for root, dirs, files in os.walk(directory_name): 20 | for file in files: 21 | if re.search(file_name_pattern, file): 22 | path = os.path.join(root, file) 23 | if not os.path.islink(path): 24 | result.append(path) 25 | return result 26 | 27 | def search_file_norecur(directory_name, file_name): 28 | for file in os.listdir(directory_name): 29 | if os.path.isfile(os.path.join(directory_name, file)) and file == file_name: 30 | return True 31 | return False 32 | 33 | def search_dir(directory_name, query_name): 34 | for root, dirs, files in os.walk(directory_name): 35 | for _dir in dirs: 36 | if query_name in _dir: 37 | path = os.path.join(root, _dir) 38 | return path 39 | 40 | def replace_file_regex(file, string_pattern, string): 41 | with open(file, "r+") as f: 42 | s = f.read() 43 | s = re.sub(string_pattern, string, s, flags=re.DOTALL) 44 | f.seek(0) 45 | f.write(s) 46 | f.truncate() 47 | f.close() 48 | 49 | def replace_files_regex(directory_name, string_pattern, string): 50 | for root, dirs, files in os.walk(directory_name): 51 | for file in files: 52 | replace_file_regex(os.path.join(root, file), string_pattern, string) 53 | 54 | def unzip(zip_name, dir_name): 55 | command = 'unzip -o -qq ' + zip_name + ' -d ' + dir_name 56 | out = run_command(command) 57 | 58 | def rm_dir(path): 59 | #if os.path.exists(path): 60 | # shutil.rmtree(path) 61 | os.system('sudo rm -rf {}'.format(path)) 62 | 63 | def mk_dir(path): 64 | if not os.path.exists(path): 65 | os.makedirs(path) 66 | 67 | def chmod_dir(path): 68 | if os.path.exists(path): 69 | os.chmod(path, 0777) 70 | 71 | def make_dir(path): 72 | rm_dir(path) 73 | mk_dir(path) 74 | chmod_dir(path) 75 | 76 | def cd(path): 77 | return "cd "+ path 78 | 79 | def rename_file(old_file, new_file): 80 | return run_command('mv {} {}'.format( 81 | old_file, 82 | new_file)) 83 | 84 | def copy_file(old_file, new_file): 85 | shutil.copy2(old_file, new_file) 86 | 87 | def remove_file(path): 88 | try: 89 | os.remove(path) 90 | except: 91 | pass 92 | 93 | def get_size(start_path = '.'): 94 | total_size = 0 95 | for dirpath, dirnames, filenames in os.walk(start_path): 96 | for f in filenames: 97 | try: 98 | fp = os.path.join(dirpath, f) 99 | total_size += os.path.getsize(fp) 100 | except: 101 | pass 102 | return total_size 103 | -------------------------------------------------------------------------------- /core/drivers/extract/extract.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) 3 | 4 | import utils 5 | import json 6 | from cmudbac.settings import * 7 | 8 | EXTRACT_WAIT_TIME = 0 9 | 10 | def extract_forms(url, follow = "false", cookie_jar = None, filename = "forms.json"): 11 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename)) 12 | 13 | if cookie_jar == None: 14 | try: 15 | out = utils.run_command('{} && {}'.format( 16 | utils.cd(os.path.dirname(os.path.abspath(__file__))), 17 | 'scrapy crawl form -o {} -a start_url="{}" -a follow={} -a proxy={}'.format(filename, url, follow, HTTP_PROXY)), EXTRACT_WAIT_TIME) 18 | except: 19 | out = utils.run_command('{} && {}'.format( 20 | utils.cd(os.path.dirname(os.path.abspath(__file__))), 21 | 'scrapy crawl form -o {} -a start_url="{}" -a follow={}'.format(filename, url, follow)), EXTRACT_WAIT_TIME) 22 | else: 23 | cookie_jar_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename.replace('.json', '.txt')) 24 | cookie_jar.save(cookie_jar_path) 25 | out = utils.run_command('{} && {}'.format( 26 | utils.cd(os.path.dirname(os.path.abspath(__file__))), 27 | 'scrapy crawl form_with_cookie -o {} -a start_url="{}" -a cookie_jar={}'.format(filename, url, cookie_jar_path)), EXTRACT_WAIT_TIME) 28 | 29 | with open(os.path.join(os.path.dirname(__file__), filename)) as json_forms: 30 | forms = json.load(json_forms) 31 | 32 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename)) 33 | 34 | return forms 35 | 36 | def extract_all_forms(url, filename): 37 | return extract_forms(url, "true", filename = filename) 38 | 39 | def extract_all_forms_with_cookie(url, cookie_jar, filename): 40 | return extract_forms(url, "true", cookie_jar, filename) 41 | 42 | def extract_urls(url, follow = "false", cookie_jar = None, filename = "urls.json"): 43 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename)) 44 | 45 | if cookie_jar == None: 46 | try: 47 | out = utils.run_command('{} && {}'.format( 48 | utils.cd(os.path.dirname(os.path.abspath(__file__))), 49 | 'scrapy crawl url -o {} -a start_url="{}" -a follow={} -a proxy={}'.format(filename, url, follow, HTTP_PROXY)), EXTRACT_WAIT_TIME) 50 | except: 51 | out = utils.run_command('{} && {}'.format( 52 | utils.cd(os.path.dirname(os.path.abspath(__file__))), 53 | 'scrapy crawl url -o {} -a start_url="{}" -a follow={}'.format(filename, url, follow)), EXTRACT_WAIT_TIME) 54 | else: 55 | cookie_jar_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename.replace('.json', '.txt')) 56 | cookie_jar.save(cookie_jar_path) 57 | out = utils.run_command('{} && {}'.format( 58 | utils.cd(os.path.dirname(os.path.abspath(__file__))), 59 | 'scrapy crawl url_with_cookie -o {} -a start_url="{}" -a cookie_jar={}'.format(filename, url, cookie_jar_path)), EXTRACT_WAIT_TIME) 60 | 61 | with open(os.path.join(os.path.dirname(__file__), filename)) as json_urls: 62 | urls = json.load(json_urls) 63 | 64 | utils.remove_file(os.path.join(os.path.dirname(__file__), filename)) 65 | return urls 66 | 67 | def extract_all_urls(url, filename): 68 | return extract_urls(url, "true", filename = filename) 69 | 70 | def extract_all_urls_with_cookie(url, cookie_jar, filename): 71 | return extract_urls(url, "true", cookie_jar, filename) -------------------------------------------------------------------------------- /core/drivers/extract/driver/spiders/form_with_cookie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import scrapy 4 | from scrapy.spiders import CrawlSpider, Rule 5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor 6 | import cookielib 7 | 8 | from driver.items import InputItem, FormItem 9 | 10 | class FormWithCookieSpider(CrawlSpider): 11 | name = "form_with_cookie" 12 | allowed_domains = ["127.0.0.1"] 13 | 14 | def __init__(self, *args, **kwargs): 15 | super(FormWithCookieSpider, self).__init__(*args, **kwargs) 16 | 17 | self.start_urls = [kwargs.get('start_url')] 18 | self.cookiejar = cookielib.LWPCookieJar() 19 | self.cookiejar.load(kwargs.get('cookie_jar')) 20 | 21 | self.rules = ( 22 | Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=True, process_request='add_cookie_for_request'), 23 | ) 24 | super(FormWithCookieSpider, self)._compile_rules() 25 | 26 | def add_cookie_for_request(self, request): 27 | for cookie in self.cookiejar: 28 | request.cookies[cookie.name] = cookie.value 29 | logout_patterns = ['logout', 'log-out', 'log_out'] 30 | if any(logout_pattern in request.url for logout_pattern in logout_patterns): 31 | return None 32 | return request 33 | 34 | def parse_form(self, response): 35 | for sel in response.xpath('//form'): 36 | formItem = FormItem() 37 | 38 | formItem['action'] = '' 39 | try: 40 | formItem['action'] = sel.xpath('@action').extract()[0] 41 | except: 42 | pass 43 | 44 | formItem['url'] = response.url 45 | 46 | formItem['method'] = '' 47 | try: 48 | formItem['method'] = sel.xpath('@method').extract()[0].lower() 49 | except: 50 | pass 51 | 52 | formItem['inputs'] = [] 53 | for ip in sel.xpath('.//input|.//textarea'): 54 | try: 55 | _id = ip.xpath('@id').extract()[0] 56 | except: 57 | _id = '' 58 | name = ip.xpath('@name').extract()[0] 59 | try: 60 | _type = ip.xpath('@type').extract()[0] 61 | except: 62 | _type = 'textarea' 63 | try: 64 | value = ip.xpath('@value').extract()[0] 65 | except: 66 | value = '' 67 | inputItem = InputItem() 68 | inputItem['id'] = _id 69 | inputItem['name'] = name 70 | inputItem['type'] = _type 71 | inputItem['value'] = value 72 | formItem['inputs'].append(inputItem) 73 | 74 | try: 75 | _id = sel.xpath('@id').extract()[0] 76 | except: 77 | _id = '' 78 | try: 79 | _class = sel.xpath('@class').extract()[0] 80 | except: 81 | _class = '' 82 | try: 83 | enctype = sel.xpath('@enctype').extract()[0] 84 | except: 85 | enctype = '' 86 | formItem['id'] = _id 87 | formItem['clazz'] = _class 88 | formItem['enctype'] = enctype 89 | 90 | yield formItem 91 | 92 | -------------------------------------------------------------------------------- /core/drivers/extract/driver/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for driver project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'driver' 13 | 14 | SPIDER_MODULES = ['driver.spiders'] 15 | NEWSPIDER_MODULE = 'driver.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'driver (+http://www.yourdomain.com)' 20 | 21 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 22 | CONCURRENT_REQUESTS=32 23 | 24 | # Configure a delay for requests for the same website (default: 0) 25 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 26 | # See also autothrottle settings and docs 27 | #DOWNLOAD_DELAY=3 28 | # The download delay setting will honor only one of: 29 | #CONCURRENT_REQUESTS_PER_DOMAIN=16 30 | #CONCURRENT_REQUESTS_PER_IP=16 31 | 32 | # Disable cookies (enabled by default) 33 | #COOKIES_ENABLED=False 34 | 35 | # Disable Telnet Console (enabled by default) 36 | #TELNETCONSOLE_ENABLED=False 37 | 38 | # Override the default request headers: 39 | #DEFAULT_REQUEST_HEADERS = { 40 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 41 | # 'Accept-Language': 'en', 42 | #} 43 | 44 | # Enable or disable spider middlewares 45 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 46 | #SPIDER_MIDDLEWARES = { 47 | # 'driver.middlewares.MyCustomSpiderMiddleware': 543, 48 | #} 49 | 50 | # Enable or disable downloader middlewares 51 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 52 | #DOWNLOADER_MIDDLEWARES = { 53 | # 'driver.middlewares.MyCustomDownloaderMiddleware': 543, 54 | #} 55 | 56 | # Enable or disable extensions 57 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 58 | #EXTENSIONS = { 59 | # 'scrapy.telnet.TelnetConsole': None, 60 | #} 61 | 62 | # Configure item pipelines 63 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 64 | #ITEM_PIPELINES = { 65 | # 'driver.pipelines.SomePipeline': 300, 66 | #} 67 | 68 | # Enable and configure the AutoThrottle extension (disabled by default) 69 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 70 | # NOTE: AutoThrottle will honour the standard settings for concurrency and delay 71 | #AUTOTHROTTLE_ENABLED=True 72 | # The initial download delay 73 | #AUTOTHROTTLE_START_DELAY=5 74 | # The maximum download delay to be set in case of high latencies 75 | #AUTOTHROTTLE_MAX_DELAY=60 76 | # Enable showing throttling stats for every response received: 77 | #AUTOTHROTTLE_DEBUG=False 78 | 79 | # Enable and configure HTTP caching (disabled by default) 80 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 81 | #HTTPCACHE_ENABLED=True 82 | #HTTPCACHE_EXPIRATION_SECS=0 83 | #HTTPCACHE_DIR='httpcache' 84 | #HTTPCACHE_IGNORE_HTTP_CODES=[] 85 | #HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage' 86 | 87 | # Disable S3 handler explicitly 88 | DOWNLOAD_HANDLERS={'s3': None} 89 | 90 | CLOSESPIDER_TIMEOUT=120 91 | -------------------------------------------------------------------------------- /blog/forms.py: -------------------------------------------------------------------------------- 1 | from django import forms 2 | from django.utils import timezone 3 | from django.utils.functional import curry 4 | 5 | from .conf import settings 6 | from .models import Post, Revision 7 | from .utils import can_tweet, load_path_attr 8 | from .signals import post_published 9 | 10 | 11 | FIELDS = [ 12 | "section", 13 | "author", 14 | "markup", 15 | "title", 16 | "slug", 17 | "teaser", 18 | "content", 19 | "description", 20 | "primary_image", 21 | "state", 22 | ] 23 | 24 | if can_tweet(): 25 | FIELDS.append("tweet") 26 | 27 | 28 | class AdminPostForm(forms.ModelForm): 29 | 30 | title = forms.CharField( 31 | max_length=90, 32 | widget=forms.TextInput(attrs={"style": "width: 50%;"}), 33 | ) 34 | slug = forms.CharField( 35 | widget=forms.TextInput(attrs={"style": "width: 50%;"}) 36 | ) 37 | teaser = forms.CharField( 38 | widget=forms.Textarea(attrs={"style": "width: 80%;"}), 39 | ) 40 | content = forms.CharField( 41 | widget=forms.Textarea(attrs={"style": "width: 80%; height: 300px;"}) 42 | ) 43 | description = forms.CharField( 44 | widget=forms.Textarea(attrs={"style": "width: 80%;"}), 45 | required=False 46 | ) 47 | if can_tweet(): 48 | tweet = forms.BooleanField( 49 | required=False, 50 | help_text="Checking this will send out a tweet for this post", 51 | ) 52 | 53 | class Meta: 54 | model = Post 55 | fields = FIELDS 56 | 57 | class Media: 58 | js = ("js/admin_post_form.js",) 59 | 60 | def __init__(self, *args, **kwargs): 61 | super(AdminPostForm, self).__init__(*args, **kwargs) 62 | 63 | post = self.instance 64 | 65 | # grab the latest revision of the Post instance 66 | latest_revision = post.latest() 67 | 68 | if latest_revision: 69 | # set initial data from the latest revision 70 | self.fields["teaser"].initial = latest_revision.teaser 71 | self.fields["content"].initial = latest_revision.content 72 | 73 | def save(self): 74 | published = False 75 | post = super(AdminPostForm, self).save(commit=False) 76 | 77 | if post.pk is None or Post.objects.filter(pk=post.pk, published=None).count(): 78 | if self.cleaned_data["state"] == Post.STATE_CHOICES[-1][0]: 79 | post.published = timezone.now() 80 | published = True 81 | 82 | render_func = curry( 83 | load_path_attr( 84 | settings.PINAX_BLOG_MARKUP_CHOICE_MAP[self.cleaned_data["markup"]]["parser"] 85 | ) 86 | ) 87 | 88 | post.teaser_html = render_func(self.cleaned_data["teaser"]) 89 | post.content_html = render_func(self.cleaned_data["content"]) 90 | post.updated = timezone.now() 91 | post.save() 92 | 93 | r = Revision() 94 | r.post = post 95 | r.title = post.title 96 | r.teaser = self.cleaned_data["teaser"] 97 | r.content = self.cleaned_data["content"] 98 | r.author = post.author 99 | r.updated = post.updated 100 | r.published = post.published 101 | r.save() 102 | 103 | if can_tweet() and self.cleaned_data["tweet"]: 104 | post.tweet() 105 | 106 | if published: 107 | post_published.send(sender=Post, post=post) 108 | 109 | return post 110 | -------------------------------------------------------------------------------- /core/utils/data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) 5 | 6 | import json 7 | import logging 8 | 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 10 | import django 11 | django.setup() 12 | import library 13 | from library.models import * 14 | import utils 15 | 16 | ## ===================================================================== 17 | ## LOGGING CONFIGURATION 18 | ## ===================================================================== 19 | LOG = logging.getLogger() 20 | 21 | def get_crawler(crawler_status, crawler_class): 22 | moduleName = "crawlers.%s" % (crawler_class.lower()) 23 | moduleHandle = __import__(moduleName, globals(), locals(), [crawler_class]) 24 | klass = getattr(moduleHandle, crawler_class) 25 | # FOR GITHUB 26 | try: 27 | with open(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "secrets", "secrets.json"), 'r') as auth_file: 28 | auth = json.load(auth_file) 29 | except: 30 | auth = None 31 | crawler = klass(crawler_status, auth) 32 | return crawler 33 | 34 | def add_module(module_name, package_name, package_type_id, package_version): 35 | project_type = ProjectType.objects.get(id=package_type_id) 36 | Package.objects.get_or_create(name = package_name, version = package_version, project_type = project_type) 37 | package = Package.objects.get(name = package_name, version = package_version, project_type = project_type) 38 | module = Module() 39 | module.name = module_name 40 | module.package = package 41 | module.save() 42 | 43 | def add_repo(repo_name, crawler_status_id, repo_setup_scripts): 44 | cs = CrawlerStatus.objects.get(id=crawler_status_id) 45 | repo_source = cs.source 46 | project_type = cs.project_type 47 | crawler = get_crawler(cs, repo_source.crawler_class) 48 | crawler.add_repository(repo_name, repo_setup_scripts) 49 | 50 | def deploy_repo(repo_name, database = 'PostgreSQL'): 51 | repo = Repository.objects.get(name=repo_name) 52 | print 'Attempting to deploy {} using {} ...'.format(repo, repo.project_type.deployer_class) 53 | try: 54 | result = utils.vagrant_deploy(repo, 0, database) 55 | except Exception, e: 56 | LOG.exception(e) 57 | raise e 58 | return result 59 | 60 | def delete_repo(repo_name): 61 | for repo in Repository.objects.filter(name=repo_name): 62 | repo.delete() 63 | 64 | def edit_distance(a, b, threshold = 3): 65 | dis = threshold + 1 66 | len_a = len(a) 67 | len_b = len(b) 68 | if abs(len_a - len_b) > threshold: 69 | return dis 70 | d0 = [0] * (max(len_a, len_b) + 1) 71 | d1 = [0] * (max(len_a, len_b) + 1) 72 | for i in range(len_a + 1): 73 | l = max(0, i - threshold) 74 | r = min(len_b, i + threshold) 75 | minDis = threshold + 1 76 | for j in range(l, r + 1): 77 | if i == 0: 78 | d1[j] = j 79 | elif j == 0: 80 | d1[j] = i 81 | else: 82 | if a[i - 1] == b[j - 1]: 83 | d1[j] = d0[j - 1] 84 | else: 85 | d1[j] = d0[j - 1] + 1 86 | if j > l: 87 | d1[j] = min(d1[j], d1[j - 1] + 1) 88 | if j < i + threshold: 89 | d1[j] = min(d1[j], d0[j] + 1) 90 | minDis = min(minDis, d1[j]) 91 | if minDis > threshold: 92 | return dis; 93 | d0, d1 = d1, d0 94 | 95 | dis = d0[len_b] 96 | return dis 97 | -------------------------------------------------------------------------------- /core/utils/vagrant.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | 4 | import shutil 5 | import traceback 6 | 7 | from run import run_command 8 | from file import cd 9 | 10 | copied_dir = ['cmudbac', 'library', 'blog', 'core', 'secrets', 'scripts'] 11 | vagrant_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, 'vagrant') 12 | copied_files = [] 13 | 14 | def vagrant_setup(): 15 | print 'Setuping Vagrant ...' 16 | 17 | ## Copy files 18 | for new_dir in copied_dir: 19 | old_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, new_dir) 20 | if os.path.exists(old_dir) and not os.path.exists(os.path.join(vagrant_dir, new_dir)): 21 | shutil.copytree(old_dir, os.path.join(vagrant_dir, new_dir)) 22 | 23 | # run_command('{} && {}'.format(cd(vagrant_dir), 'vagrant up')) 24 | 25 | def vagrant_clear(): 26 | # Delete files 27 | for new_dir in copied_dir: 28 | try: 29 | shutil.rmtree(os.path.join(vagrant_dir, new_dir)) 30 | except: 31 | pass 32 | 33 | # run_command('{} && {}'.format(cd(vagrant_dir), 'vagrant halt')) 34 | 35 | def set_vagrant_database(): 36 | settings_file = os.path.join(vagrant_dir, "cmudbac", "settings.py") 37 | settings = open(settings_file).read() 38 | if "'HOST': 'localhost'" in settings: 39 | settings = settings.replace("'HOST': 'localhost'", "'HOST': '10.0.2.2'") 40 | fout = open(settings_file, 'w') 41 | fout.write(settings) 42 | fout.flush() 43 | fout.close() 44 | 45 | def unset_vagrant_database(): 46 | settings_file = os.path.join(vagrant_dir, "cmudbac", "settings.py") 47 | settings = open(settings_file).read() 48 | if "'HOST': '10.0.2.2'" in settings: 49 | settings = settings.replace("'HOST': 'localhost'", "'HOST': 'localhost'") 50 | fout = open(settings_file, 'w') 51 | fout.write(settings) 52 | fout.flush() 53 | fout.close() 54 | 55 | def vagrant_deploy(repo, deploy_id, database): 56 | set_vagrant_database() 57 | out = os.system('{} && {}'.format( 58 | cd(vagrant_dir), 59 | 'vagrant ssh -c "{}"'.format( 60 | 'python /vagrant/core/scripts/vagrant_deploy.py {} {} {}'.format(repo, deploy_id, database)))) 61 | unset_vagrant_database() 62 | 63 | return out 64 | 65 | def vagrant_benchmark(attempt_info, database, benchmark, deploy_id = 1): 66 | # run the benchmark 67 | vagrant_setup() 68 | out = None 69 | temp_dir = None 70 | try: 71 | import json 72 | attempt_info_file_path = os.path.join(vagrant_dir, 'attempt_info.json') 73 | with open(attempt_info_file_path, 'w') as attempt_info_file: 74 | json.dump(attempt_info, attempt_info_file) 75 | command = '{} && {}'.format( 76 | cd(vagrant_dir), 77 | 'vagrant ssh -c "{}"'.format( 78 | 'python /vagrant/core/scripts/vagrant_benchmark.py --attempt_info="{attempt_info}" --deploy_id={deploy_id} {database} {benchmark}' 79 | .format(attempt_info=os.path.join('/vagrant', 'attempt_info.json'), deploy_id=deploy_id, 80 | database=' '.join('--{}={}'.format(key, value) for key, value in database.iteritems()), 81 | benchmark=' '.join('--{}={}'.format(key, value) for key, value in benchmark.iteritems()) 82 | ) 83 | ) 84 | ) 85 | out = os.system(command) 86 | return out 87 | except: 88 | traceback.print_exc() 89 | finally: 90 | try: 91 | vagrant_clear() 92 | except: 93 | pass 94 | 95 | return out -------------------------------------------------------------------------------- /tools/local-deployer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, "core", "utils")) 5 | 6 | import argparse 7 | import requests 8 | import traceback 9 | import json 10 | import vagrant 11 | 12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 13 | import django 14 | django.setup() 15 | 16 | CMDBAC_URL = "http://cmdbac.cs.cmu.edu/" 17 | ATTEMPT_INFO_URL = "/api/attempt/{id}/info/" 18 | 19 | ACTION_TYPES = ( 20 | "info", 21 | "deploy", 22 | ) 23 | 24 | DATABASE_TYPES = ( 25 | "mysql", 26 | "postgres", 27 | "sqlite" 28 | ) 29 | 30 | def parse_args(): 31 | aparser = argparse.ArgumentParser(description='CMDBAC Local Deployer Tool') 32 | 33 | # Actions 34 | aparser.add_argument('action', choices=ACTION_TYPES, \ 35 | help='Deployer Action') 36 | 37 | # Attempt Parameters 38 | agroup = aparser.add_argument_group('Deployment Parameters') 39 | agroup.add_argument('--catalog', default=CMDBAC_URL, metavar='URL', \ 40 | help='Catalog API URL') 41 | agroup.add_argument('--attempt', type=int, metavar='ID', \ 42 | help='Id of the attempt to deploy') 43 | agroup.add_argument('--num_threads', type=int, default=1, metavar='N', \ 44 | help='Number of threads you want to use to submit actions') 45 | agroup.add_argument('--timeout', type=int, metavar='T', \ 46 | help='Timeout for submitting actions (seconds)') 47 | agroup.add_argument('--db-size', type=int, \ 48 | help='The expected Database size, 10 stands for 10MB') 49 | 50 | # Database Parameters 51 | agroup = aparser.add_argument_group('Local Database Parameters') 52 | agroup.add_argument('--db-type', choices=DATABASE_TYPES, \ 53 | help='Database Type') 54 | agroup.add_argument('--db-host', type=str, \ 55 | help='Database Hostname') 56 | agroup.add_argument('--db-port', type=int, \ 57 | help='Databsae Port') 58 | agroup.add_argument('--db-name', type=str, \ 59 | help='Database Name') 60 | agroup.add_argument('--db-user', type=str, \ 61 | help='Database User') 62 | agroup.add_argument('--db-pass', type=str, \ 63 | help='Database Password') 64 | 65 | return vars(aparser.parse_args()) 66 | ## DEF 67 | 68 | def get_attempt_info(api_url, attempt_id): 69 | url = api_url + ATTEMPT_INFO_URL.format(id = attempt_id) 70 | response = requests.get(url) 71 | return response.json() 72 | ## DEF 73 | 74 | def run_attempt_benchmark(api_url, attempt_id, database, benchmark): 75 | attempt_info = get_attempt_info(api_url, attempt_id) 76 | print 'Running Benchmark for Attempt {}'.format(attempt_id) 77 | try: 78 | vagrant.vagrant_benchmark(attempt_info, database, benchmark) 79 | except Exception, e: 80 | traceback.print_exc() 81 | ## DEF 82 | 83 | if __name__ == "__main__": 84 | args = parse_args() 85 | 86 | if args["action"] == "info": 87 | attempt_info = get_attempt_info(args["catalog"], args["attempt"]) 88 | print json.dumps(attempt_info, indent = 4) 89 | elif args["action"] == "deploy": 90 | database = { 91 | 'database': args["db_type"], 92 | 'host': args["db_host"], 93 | 'port': args["db_port"], 94 | 'name': args["db_name"], 95 | 'username': args["db_user"], 96 | 'password': args["db_pass"] 97 | } 98 | benchmark = { 99 | 'num_threads': args["num_threads"], 100 | 'timeout': args["timeout"], 101 | 'size': arg["db_size"] 102 | } 103 | run_attempt_benchmark(args["catalog"], args["attempt"], database, benchmark) 104 | else: 105 | print "Invalid action '%s'" % args["action"] 106 | sys.exit(1) 107 | 108 | ## MAIN -------------------------------------------------------------------------------- /core/drivers/extract/driver/spiders/form.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import scrapy 4 | from scrapy.spiders import CrawlSpider, Rule 5 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor 6 | 7 | from driver.items import InputItem, FormItem 8 | from selenium import webdriver 9 | 10 | class FormSpider(CrawlSpider): 11 | name = "form" 12 | allowed_domains = ["127.0.0.1"] 13 | 14 | def __init__(self, *args, **kwargs): 15 | super(FormSpider, self).__init__(*args, **kwargs) 16 | 17 | self.start_urls = [kwargs.get('start_url')] 18 | 19 | follow = True if kwargs.get('follow') == 'true' else False 20 | self.rules = ( 21 | Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=follow), 22 | ) 23 | super(FormSpider, self)._compile_rules() 24 | 25 | try: 26 | proxy = kwargs.get('proxy') 27 | service_args = [ 28 | '--proxy=' + proxy, 29 | '--proxy-type=http', 30 | ] 31 | except: 32 | service_args = None 33 | self.browser = webdriver.PhantomJS(service_args=service_args) 34 | 35 | def closed(self, reason): 36 | self.browser.quit() 37 | 38 | def parse_form(self, response): 39 | register_patterns = ['register', 'signup', 'sign-up', 'sign_up'] 40 | if any(pattern in response.url for pattern in register_patterns): 41 | use_browser = True 42 | else: 43 | use_browser = False 44 | for sel in response.xpath('//form'): 45 | if use_browser: 46 | self.browser.get(response.url) 47 | formItem = FormItem() 48 | 49 | formItem['action'] = '' 50 | try: 51 | formItem['action'] = sel.xpath('@action').extract()[0] 52 | except: 53 | pass 54 | 55 | formItem['url'] = response.url 56 | 57 | formItem['method'] = '' 58 | try: 59 | formItem['method'] = sel.xpath('@method').extract()[0].lower() 60 | except: 61 | pass 62 | 63 | formItem['inputs'] = [] 64 | for ip in sel.xpath('.//input|.//textarea'): 65 | try: 66 | _id = ip.xpath('@id').extract()[0] 67 | except: 68 | _id = '' 69 | if _id != '': 70 | if use_browser: 71 | input_element = self.browser.find_element_by_id(_id) 72 | if not input_element.is_displayed(): 73 | continue 74 | try: 75 | name = ip.xpath('@name').extract()[0] 76 | except: 77 | name = '' 78 | try: 79 | _type = ip.xpath('@type').extract()[0] 80 | except: 81 | _type = 'textarea' 82 | try: 83 | value = ip.xpath('@value').extract()[0] 84 | except: 85 | value = '' 86 | inputItem = InputItem() 87 | inputItem['id'] = _id 88 | inputItem['name'] = name 89 | inputItem['type'] = _type 90 | inputItem['value'] = value 91 | formItem['inputs'].append(inputItem) 92 | 93 | try: 94 | _id = sel.xpath('@id').extract()[0] 95 | except: 96 | _id = '' 97 | try: 98 | _class = sel.xpath('@class').extract()[0] 99 | except: 100 | _class = '' 101 | try: 102 | enctype = sel.xpath('@enctype').extract()[0] 103 | except: 104 | enctype = '' 105 | formItem['id'] = _id 106 | formItem['clazz'] = _class 107 | formItem['enctype'] = enctype 108 | 109 | yield formItem 110 | 111 | -------------------------------------------------------------------------------- /library/templates/status/attempt_status_codes.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cmudbac/settings_example.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | """ 3 | Django settings for cmudbac project. 4 | 5 | For more information on this file, see 6 | https://docs.djangoproject.com/en/1.6/topics/settings/ 7 | 8 | For the full list of settings and their values, see 9 | https://docs.djangoproject.com/en/1.6/ref/settings/ 10 | """ 11 | 12 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 13 | import os 14 | BASE_DIR = os.path.dirname(os.path.dirname(__file__)) 15 | 16 | # Quick-start development settings - unsuitable for production 17 | # See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/ 18 | 19 | # SECURITY WARNING: keep the secret key used in production secret! 20 | SECRET_KEY = 'CHANGE ME' 21 | 22 | # SECURITY WARNING: don't run with debug turned on in production! 23 | DEBUG = True 24 | 25 | ALLOWED_HOSTS = [] 26 | 27 | # Application definition 28 | 29 | INSTALLED_APPS = ( 30 | 'django.contrib.admin', 31 | 'django.contrib.auth', 32 | 'django.contrib.contenttypes', 33 | 'django.contrib.sessions', 34 | 'django.contrib.messages', 35 | 'django.contrib.staticfiles', 36 | 'django.contrib.sites', 37 | 'rest_framework', 38 | 'library', 39 | 'blog' 40 | ) 41 | 42 | MIDDLEWARE_CLASSES = ( 43 | 'django.contrib.sessions.middleware.SessionMiddleware', 44 | 'django.middleware.cache.UpdateCacheMiddleware', 45 | 'django.middleware.common.CommonMiddleware', 46 | 'django.middleware.csrf.CsrfViewMiddleware', 47 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 48 | 'django.contrib.messages.middleware.MessageMiddleware', 49 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 50 | 'django.middleware.cache.FetchFromCacheMiddleware', 51 | ) 52 | 53 | ROOT_URLCONF = 'cmudbac.urls' 54 | 55 | WSGI_APPLICATION = 'cmudbac.wsgi.application' 56 | 57 | TEMPLATES = [ 58 | { 59 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 60 | 'APP_DIRS': True, 61 | 'OPTIONS': { 62 | 'context_processors': [ 63 | 'django.template.context_processors.debug', 64 | 'django.template.context_processors.request', 65 | 'django.contrib.auth.context_processors.auth', 66 | 'django.contrib.messages.context_processors.messages', 67 | 'django.core.context_processors.static', 68 | 'library.context_processors.analytics' 69 | ], 70 | }, 71 | }, 72 | ] 73 | 74 | 75 | # Database 76 | # https://docs.djangoproject.com/en/1.6/ref/settings/#databases 77 | 78 | DATABASES = { 79 | 'default': { 80 | 'ENGINE': 'django.db.backends.mysql', 81 | 'NAME': 'dbac', 82 | 'HOST': 'localhost', 83 | 'PORT': '3306', 84 | 'USER': 'CHANGE_ME', 85 | 'PASSWORD': 'CHANGE_ME', 86 | 'STORAGE_ENGINE': 'InnoDB' 87 | } 88 | } 89 | 90 | CACHES = { 91 | 'default': { 92 | 'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache', 93 | 'LOCATION': '/var/tmp/django_cache/cmdbac', 94 | } 95 | } 96 | 97 | LOG_FILE_LOCATION = { 98 | 'mysql': '/var/log/mysql/mysql.log', 99 | 'postgresql': '/var/log/postgresql/postgresql-9.3-main.log' 100 | } 101 | 102 | # Internationalization 103 | # https://docs.djangoproject.com/en/1.6/topics/i18n/ 104 | 105 | LANGUAGE_CODE = 'en-us' 106 | 107 | TIME_ZONE = 'America/New_York' 108 | 109 | USE_I18N = True 110 | 111 | USE_L10N = True 112 | 113 | #USE_TZ = True 114 | 115 | 116 | # Static files (CSS, JavaScript, Images) 117 | # https://docs.djangoproject.com/en/1.6/howto/static-files/ 118 | 119 | STATIC_URL = '/static/' 120 | 121 | HTTP_PROXY = '' 122 | 123 | REST_FRAMEWORK = { 124 | # Use Django's standard `django.contrib.auth` permissions, 125 | # or allow read-only access for unauthenticated users. 126 | 'DEFAULT_PERMISSION_CLASSES': [ 127 | 'rest_framework.permissions.AllowAny' 128 | ], 129 | 'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.PageNumberPagination', 130 | 'PAGE_SIZE': 50 131 | } 132 | 133 | # Google Analytics 134 | GOOGLE_ANALYTICS_KEY = '' 135 | -------------------------------------------------------------------------------- /analysis/general/analyze_transactions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) 5 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "core")) 6 | 7 | import re 8 | import csv 9 | import pickle 10 | from utils import filter_repository, dump_all_stats, pickle_dump 11 | 12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 13 | import django 14 | django.setup() 15 | 16 | from library.models import * 17 | 18 | TRANSACTION_DIRECTORY = 'transactions' 19 | 20 | def action_stats(directory = '.'): 21 | stats = {'action_query_count': {}} 22 | 23 | for repo in Repository.objects.exclude(latest_successful_attempt = None): 24 | if filter_repository(repo): 25 | continue 26 | 27 | project_type_name = repo.project_type.name 28 | if project_type_name not in stats['action_query_count']: 29 | stats['action_query_count'][project_type_name] = [] 30 | 31 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt): 32 | query_count = len(Query.objects.filter(action = action)) 33 | if query_count > 0: 34 | stats['action_query_count'][project_type_name].append(query_count) 35 | 36 | 37 | dump_all_stats(directory, stats) 38 | 39 | def transaction_stats(directory = '.'): 40 | stats = {'transaction_count': {}, 'transaction_query_count': {}, 'transaction_read_count': {}, 'transaction_write_count': {}} 41 | 42 | transactions = [] 43 | 44 | for repo in Repository.objects.exclude(latest_successful_attempt = None): 45 | if filter_repository(repo): 46 | continue 47 | 48 | project_type_name = repo.project_type.name 49 | if project_type_name not in stats['transaction_count']: 50 | stats['transaction_count'][project_type_name] = [] 51 | if project_type_name not in stats['transaction_query_count']: 52 | stats['transaction_query_count'][project_type_name] = [] 53 | if project_type_name not in stats['transaction_read_count']: 54 | stats['transaction_read_count'][project_type_name] = [] 55 | if project_type_name not in stats['transaction_write_count']: 56 | stats['transaction_write_count'][project_type_name] = [] 57 | 58 | 59 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt): 60 | transaction = '' 61 | query_count = 0 62 | transaction_count = 0 63 | 64 | for query in Query.objects.filter(action = action): 65 | if 'BEGIN' in query.content.upper() or 'START TRANSACTION' in query.content.upper() or 'SET AUTOCOMMIT=0' in query.content.upper(): 66 | transaction = query.content + '\n' 67 | query_count = 1 68 | elif transaction != '': 69 | transaction += query.content + '\n' 70 | query_count += 1 71 | if 'COMMIT' in query.content.upper(): 72 | transaction = transaction.strip('\n') 73 | 74 | # for each transaction, count the number of transactions 75 | transaction_count += 1 76 | 77 | # for each transaction, count the number of read/write 78 | read_count = len(re.findall('SELECT', transaction.upper())) 79 | stats['transaction_read_count'][project_type_name].append(read_count) 80 | write_count = 0 81 | for keyword in ['INSERT', 'DELETE', 'UPDATE']: 82 | write_count += len(re.findall(keyword, transaction.upper())) 83 | stats['transaction_write_count'][project_type_name].append(write_count) 84 | 85 | # for each transaction, count the queries 86 | query_count -= 2 87 | stats['transaction_query_count'][project_type_name].append(query_count) 88 | 89 | try: 90 | transactions.append((repo.name, repo.project_type.name, transaction)) 91 | except: 92 | pass 93 | 94 | transaction = '' 95 | 96 | if transaction_count > 0: 97 | stats['transaction_count'][project_type_name].append(transaction_count) 98 | 99 | pickle_dump(directory, 'transactions', transactions) 100 | 101 | dump_all_stats(directory, stats) 102 | 103 | def main(): 104 | # active 105 | action_stats(TRANSACTION_DIRECTORY) 106 | transaction_stats(TRANSACTION_DIRECTORY) 107 | 108 | # working 109 | 110 | # deprecated 111 | if __name__ == '__main__': 112 | main() 113 | -------------------------------------------------------------------------------- /core/drivers/randomdriver.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import logging 5 | import requests 6 | import re 7 | import traceback 8 | import requests 9 | import mechanize 10 | import random 11 | 12 | from library.models import * 13 | from cmudbac.settings import * 14 | import utils 15 | import extract 16 | import submit 17 | import count 18 | from basedriver import BaseDriver 19 | 20 | ## ===================================================================== 21 | ## LOGGING CONFIGURATION 22 | ## ===================================================================== 23 | LOG = logging.getLogger() 24 | 25 | MAX_RANDOM_WALK_DEPTH = 5 26 | 27 | ## ===================================================================== 28 | ## RANDOM DRIVER 29 | ## ===================================================================== 30 | class RandomDriver(BaseDriver): 31 | 32 | def __init__(self, driver): 33 | self.driver = driver 34 | self.start_urls = set(map(lambda url: url['url'], driver.urls)) 35 | self.database = self.driver.database 36 | if driver.browser != None: 37 | self.cookiejar = driver.browser._ua_handlers['_cookies'].cookiejar 38 | self.walked_path = set() 39 | self.log_file = driver.log_file 40 | 41 | def new_browser(self, cookiejar = None, url = None): 42 | browser = mechanize.Browser() 43 | if cookiejar != None: 44 | browser.set_cookiejar(self.cookiejar) 45 | browser.set_handle_robots(False) 46 | if url != None: 47 | browser.open(url) 48 | return browser 49 | 50 | def start(self): 51 | self.forms = [] 52 | self.urls = [] 53 | for url in self.start_urls: 54 | self.random_walk(self.new_browser(self.cookiejar, url)) 55 | 56 | def random_walk(self, browser, depth = MAX_RANDOM_WALK_DEPTH): 57 | if depth == 0: 58 | return 59 | 60 | try: 61 | last_line_no = self.check_log() 62 | browser_url = browser.geturl() 63 | cookiejar = browser._ua_handlers['_cookies'].cookiejar 64 | 65 | LOG.info('Walking URL: {}'.format(browser_url)) 66 | 67 | forms = list(enumerate(list(browser.forms()))) 68 | for idx, form in forms: 69 | key = '{}_{}'.format(browser_url, form.name) 70 | if key in self.walked_path: 71 | continue 72 | self.walked_path.add(key) 73 | 74 | browser.select_form(nr = idx) 75 | form_stats = { 76 | 'url': browser_url, 77 | 'method': form.method, 78 | 'inputs': [] 79 | } 80 | for control in form.controls: 81 | if control.type == 'text': 82 | browser[control.name] = submit.gen_random_value() 83 | form_stats['inputs'].append({ 84 | 'name': control.name, 85 | 'type': control.type 86 | }) 87 | succ = True 88 | try: 89 | traceback.print_exc() 90 | browser.submit() 91 | except: 92 | succ = False 93 | 94 | form_stats['queries'], form_stats['counter'] = self.process_logs(self.check_log(last_line_no), None) 95 | 96 | if all(not self.equal_form(form_stats, ret_form) for ret_form in self.forms): 97 | self.forms.append(form_stats) 98 | 99 | if succ: 100 | self.random_walk(browser, depth - 1) 101 | 102 | browser = self.new_browser(cookiejar, browser_url) 103 | 104 | links = list(browser.links()) 105 | for link in links: 106 | key = link.url 107 | if key in self.walked_path: 108 | continue 109 | self.walked_path.add(key) 110 | 111 | url = { 112 | 'url': link.url, 113 | 'queries': [], 114 | 'counter': {} 115 | } 116 | 117 | succ = True 118 | try: 119 | browser.follow_link(link) 120 | except: 121 | traceback.print_exc() 122 | succ = False 123 | 124 | url['queries'], url['counter'] = self.process_logs(self.check_log(last_line_no), None) 125 | 126 | if any(self.equal_url(url, ret_url) for ret_url in self.urls): 127 | continue 128 | 129 | if succ: 130 | self.random_walk(browser, depth - 1) 131 | 132 | browser = self.new_browser(cookiejar, browser_url) 133 | 134 | except: 135 | traceback.print_exc() -------------------------------------------------------------------------------- /vagrant/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # install the package that sometime needed for deploying django or ruby on rails apps 5 | # continue adding packages to this file if missing some packages may cause common errors 6 | 7 | # use this line if the host is using proxy, and change the proxy 8 | # http_proxy=http://proxy.pdl.cmu.edu:8080 9 | 10 | if [ -n "$http_proxy" ] 11 | then 12 | echo "use proxy: "$http_proxy 13 | echo "export http_proxy=\"$http_proxy\"" >> /home/vagrant/.bashrc 14 | echo "export https_proxy=\"$http_proxy\"" >> /home/vagrant/.bashrc 15 | 16 | export http_proxy="$http_proxy" 17 | export https_proxy="$http_proxy" 18 | 19 | echo "Acquire::http::Proxy \"$http_proxy\";" > /etc/apt/apt.conf 20 | else 21 | echo "not use proxy" 22 | fi 23 | 24 | # The output of all these installation steps is noisy. With this utility 25 | # the progress report is nice and concise. 26 | 27 | function install { 28 | echo Installing $1 29 | shift 30 | apt-get -y install "$@" >/dev/null 2>&1 31 | } 32 | 33 | echo updating package information 34 | install 'apt-repository' software-properties-common python-software-properties 35 | curl --silent --location https://deb.nodesource.com/setup_4.x | sudo bash - 36 | apt-get -y update >/dev/null 2>&1 37 | 38 | install 'development tools' build-essential unzip curl openssl libssl-dev libcurl4-openssl-dev zlib1g zlib1g-dev libgmp-dev 39 | install 'Python' python-dev python-software-properties 40 | 41 | # install Ruby 42 | command curl -sSL https://rvm.io/mpapis.asc | gpg --import - 43 | curl -sSL https://get.rvm.io | bash -s stable 44 | source /usr/local/rvm/scripts/rvm 45 | rvm install 1.9.3 46 | rvm install 2.0.0 47 | rvm install 2.2.2 48 | rvm use 1.9.3 --default 49 | gem install bundler 50 | gem install bundle 51 | rvm use 2.0.0 --default 52 | gem install bundler 53 | gem install bundle 54 | rvm use 2.2.2 --default 55 | gem install bundler 56 | gem install bundle 57 | install 'ruby' ruby-dev 58 | 59 | echo -e "\n- - - - - -\n" 60 | echo -n "Should be sqlite 3.8.1 or higher: sqlite " 61 | sqlite3 --version 62 | echo -n "Should be rvm 1.26.11 or higher: " 63 | rvm --version | sed '/^.*$/N;s/\n//g' | cut -c 1-11 64 | echo -n "Should be ruby 2.2.2: " 65 | ruby -v | cut -d " " -f 2 66 | echo -n "Should be Rails 4.2.1 or higher: " 67 | rails -v 68 | echo -e "\n- - - - - -\n" 69 | 70 | # install pip 71 | wget https://bootstrap.pypa.io/get-pip.py -O /home/vagrant/get-pip.py 72 | python /home/vagrant/get-pip.py 73 | echo 'export PYTHONUSERBASE="/home/vagrant/pip"' >> /home/vagrant/.bashrc 74 | 75 | # install Beautifulsoup 76 | echo installing Beautifulsoup 77 | pip install BeautifulSoup4 78 | 79 | # install Django 80 | echo installing Djano 81 | pip install django==1.8.6 82 | 83 | # install dependencies 84 | install 'Git' git 85 | git config --global http.proxy $http_proxy 86 | 87 | install 'SQLite' sqlite3 libsqlite3-dev 88 | 89 | install 'PostgreSQL' postgresql postgresql-contrib libpq-dev 90 | sudo -u postgres psql -U postgres -d postgres -c "alter user postgres with password 'postgres';" 91 | pip install psycopg2 92 | 93 | debconf-set-selections <<< "mysql-server mysql-server/root_password password root" 94 | debconf-set-selections <<< "mysql-server mysql-server/root_password_again password root" 95 | install 'MySQL' mysql-server libmysqlclient-dev 96 | pip install MySQL-python 97 | # mysql -u root --password=root -e "CREATE DATABASE vm" 98 | 99 | install 'Nodejs' nodejs 100 | 101 | install 'Nokogiri dependencies' libxml2 libxml2-dev libxslt1-dev imagemagick libmagickwand-dev 102 | 103 | # install scrapy 104 | echo installing scrapy 105 | pip install scrapy 106 | 107 | # web and env 108 | pip install mechanize 109 | pip install python-dateutil 110 | pip install virtualenv 111 | pip install hurry.filesize 112 | pip install selenium 113 | install 'phantomjs' phantomjs 114 | install 'firefox' firefox=28.0+build2-0ubuntu2 115 | install 'xvfb' xvfb 116 | pip install pyvirtualdisplay 117 | pip install djangorestframework 118 | pip install pinax-blog 119 | pip install pytz 120 | 121 | # install php 122 | install 'php' apache2 php5-mysql libapache2-mod-php5 mysql-server php5-dev php5-gd php5-curl php5-pgsql php5-sqlite 123 | 124 | # install drush 125 | wget http://files.drush.org/drush.phar 126 | php drush.phar core-status 127 | chmod +x drush.phar 128 | mv drush.phar /usr/local/bin/drush 129 | drush init 130 | drush dl php_server-7.x 131 | 132 | # Fix Dependencies 133 | apt-get -f -y install >/dev/null 2>&1 134 | 135 | # Needed for docs generation. 136 | update-locale LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8 LC_ALL=en_US.UTF-8 137 | 138 | pip install -r requirements.txt 139 | 140 | # configure MySQL logging 141 | mysql -u root --password=root -e "SET GLOBAL general_log = 'ON';" 142 | mysql -u root --password=root -e "SET GLOBAL general_log_file = '/var/log/mysql/mysql.log';" 143 | mysql -u root --password=root -e "SELECT 1;" 144 | chmod 777 /var/log/mysql/mysql.log 145 | 146 | echo 'all set, rock on!' 147 | -------------------------------------------------------------------------------- /core/analyzers/postgresqlanalyzer.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import logging 5 | import re 6 | 7 | from baseanalyzer import BaseAnalyzer 8 | 9 | ## ===================================================================== 10 | ## LOGGING CONFIGURATION 11 | ## ===================================================================== 12 | LOG = logging.getLogger() 13 | 14 | ## ===================================================================== 15 | ## POSTGRESQL ANALYZER 16 | ## ===================================================================== 17 | class PostgreSQLAnalyzer(BaseAnalyzer): 18 | 19 | def __init__(self, deployer): 20 | BaseAnalyzer.__init__(self, deployer) 21 | 22 | def analyze_queries(self, queries): 23 | self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0) 24 | 25 | try: 26 | conn = self.deployer.get_database_connection() 27 | conn.set_isolation_level(0) 28 | cur = conn.cursor() 29 | 30 | for query in queries: 31 | try: 32 | if self.is_valid_for_explain(query['raw']): 33 | explain_query = 'EXPLAIN ANALYZE {};'.format(query['raw']) 34 | # print explain_query 35 | cur.execute(explain_query) 36 | rows = cur.fetchall() 37 | output = '\n' 38 | for row in rows: 39 | output += row[0] + '\n' 40 | query['explain'] = output 41 | except Exception, e: 42 | pass 43 | # LOG.exception(e) 44 | 45 | conn.set_isolation_level(1) 46 | cur.close() 47 | conn.close() 48 | except Exception, e: 49 | LOG.exception(e) 50 | 51 | def analyze_database(self): 52 | try: 53 | conn = self.deployer.get_database_connection() 54 | cur = conn.cursor() 55 | database = self.deployer.get_database_name() 56 | 57 | # the number of tables 58 | cur.execute("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';") 59 | self.database_stats['num_tables'] = int(cur.fetchone()[0]) 60 | 61 | # the number of indexes 62 | cur.execute("SELECT COUNT(*) FROM pg_stat_all_indexes WHERE schemaname = 'public';") 63 | self.database_stats['num_indexes'] = int(cur.fetchone()[0]) 64 | 65 | # the number of constraints 66 | cur.execute("SELECT COUNT(*) FROM information_schema.table_constraints WHERE constraint_schema = 'public';") 67 | self.database_stats['num_constraints'] = int(cur.fetchone()[0]) 68 | 69 | # the number of foreign keys 70 | cur.execute("SELECT COUNT(*) FROM information_schema.referential_constraints WHERE constraint_schema = 'public';") 71 | self.database_stats['num_foreignkeys'] = int(cur.fetchone()[0]) 72 | 73 | # the full information of tables 74 | cur.execute("SELECT * FROM information_schema.tables WHERE table_schema = 'public';") 75 | self.database_informations['tables'] = str(cur.fetchall()) 76 | 77 | # the full information of columns 78 | cur.execute("SELECT * FROM information_schema.columns WHERE table_schema = 'public';") 79 | self.database_informations['columns'] = str(cur.fetchall()) 80 | 81 | # the full information of indexes 82 | cur.execute("SELECT * FROM pg_stat_all_indexes WHERE schemaname = 'public';") 83 | self.database_informations['indexes'] = str(cur.fetchall()) 84 | 85 | # the full information of constraints 86 | cur.execute("SELECT * FROM information_schema.table_constraints WHERE constraint_schema = 'public';") 87 | self.database_informations['constraints'] = str(cur.fetchall()) 88 | 89 | # the full information of constraints 90 | cur.execute("SELECT * FROM information_schema.key_column_usage WHERE constraint_schema = 'public';") 91 | self.database_informations['key_column_usage'] = str(cur.fetchall()) 92 | 93 | # the full information of foreign keys 94 | cur.execute("SELECT * FROM information_schema.referential_constraints WHERE constraint_schema = 'public';") 95 | self.database_informations['foreignkeys'] = str(cur.fetchall()) 96 | 97 | # the full information of triggers 98 | cur.execute("SELECT * FROM information_schema.triggers WHERE trigger_schema = 'public';") 99 | self.database_informations['triggers'] = str(cur.fetchall()) 100 | 101 | # the full information of views 102 | cur.execute("SELECT * FROM information_schema.views WHERE table_schema = 'public';") 103 | self.database_informations['views'] = str(cur.fetchall()) 104 | 105 | cur.close() 106 | conn.close() 107 | except Exception, e: 108 | LOG.exception(e) -------------------------------------------------------------------------------- /core/scripts/vagrant_benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) 5 | 6 | import argparse 7 | import datetime 8 | import socket 9 | import traceback 10 | import time 11 | import logging 12 | import json 13 | from multiprocessing import Process, Queue 14 | 15 | from deployers import * 16 | from drivers import * 17 | from analyzers import * 18 | import utils 19 | 20 | ## ===================================================================== 21 | ## LOGGING CONFIGURATION 22 | ## ===================================================================== 23 | LOG = logging.getLogger() 24 | 25 | def run_driver(driver, timeout, size, queue): 26 | cnt = 0 27 | start_time = time.time() 28 | stop_time = start_time + timeout 29 | new_driver = BenchmarkDriver(driver) 30 | try: 31 | while True: 32 | cnt += new_driver.submit_actions() 33 | if time.time() >= stop_time or get_database_size() >= size: 34 | break 35 | queue.put(cnt) 36 | except Exception, e: 37 | traceback.print_exc() 38 | queue.put(cnt) 39 | 40 | def get_database_size(deployer): 41 | deployer.database = Database() 42 | deployer.database.name = 'MySQL' 43 | conn = deployer.get_database_connection(False) 44 | cur = conn.cursor() 45 | cur.execute(''' 46 | SELECT Round(SUM(data_length + index_length) / 1024 / 1024, 1) 47 | FROM information_schema.tables 48 | WHERE table_schema = '{}' 49 | '''.format(deployer.database_config['name'])) 50 | size = cur.fetchone()[0] 51 | return size 52 | 53 | def main(): 54 | # parse args 55 | parser = argparse.ArgumentParser() 56 | parser.add_argument('--attempt_info', type=str) 57 | parser.add_argument('--deploy_id', type=int) 58 | parser.add_argument('--database', type=str) 59 | parser.add_argument('--host', type=str) 60 | parser.add_argument('--port', type=int) 61 | parser.add_argument('--name', type=str) 62 | parser.add_argument('--username', type=str) 63 | parser.add_argument('--password', type=str) 64 | parser.add_argument('--num_threads', type=int) 65 | parser.add_argument('--timeout', type=int) 66 | parser.add_argument('--size', type=int) 67 | args = parser.parse_args() 68 | 69 | # get args 70 | with open(args.attempt_info, 'r') as attempt_info_file: 71 | attempt_info = json.loads(attempt_info_file.read()) 72 | deploy_id = args.deploy_id 73 | database_config = { 74 | 'database': args.database, 75 | 'host': args.host, 76 | 'port': args.port, 77 | 'name': args.name, 78 | 'username': args.username, 79 | 'password': args.password 80 | } 81 | num_threads = args.num_threads 82 | timeout = args.timeout 83 | size = args.size 84 | 85 | # get deployer 86 | project_type = attempt_info['repo_info']['project_type'] 87 | deployer_class = { 88 | 1: 'DjangoDeployer', 89 | 2: 'RoRDeployer', 90 | 3: 'NodeDeployer', 91 | 4: 'DrupalDeployer', 92 | 5: 'GrailsDeployer' 93 | }[project_type] 94 | 95 | moduleName = "deployers.%s" % (deployer_class.lower()) 96 | moduleHandle = __import__(moduleName, globals(), locals(), [deployer_class]) 97 | klass = getattr(moduleHandle, deployer_class) 98 | 99 | deployer = klass(None, None, deploy_id, database_config) 100 | 101 | result = deployer.deploy(attempt_info) 102 | if result != 0: 103 | deployer.kill_server() 104 | sys.exit(-1) 105 | 106 | LOG.info('Running driver ...') 107 | driver = BaseDriver(deployer.get_main_url(), deployer.get_database(), deployer.deploy_id, deployer.base_path, deployer.log_file) 108 | try: 109 | driver.bootstrap() 110 | driver.initialize() 111 | except Exception, e: 112 | traceback.print_exc() 113 | 114 | LOG.info('Start Driving the Database ...') 115 | actions_cnt = 0 116 | processes = [] 117 | try: 118 | # disable logging of requests 119 | logging.getLogger("requests").setLevel(logging.WARNING) 120 | logging.getLogger("urllib3").setLevel(logging.WARNING) 121 | # multi-processing 122 | queue = Queue() 123 | for _ in range(num_threads): 124 | process = Process(target = run_driver, args = (driver, timeout, size, queue)) 125 | processes.append(process) 126 | process.start() 127 | for process in processes: 128 | process.join() 129 | for _ in range(num_threads): 130 | actions_cnt += queue.get() 131 | except Exception, e: 132 | traceback.print_exc() 133 | 134 | LOG.info('The number of actions submitted : {}'.format(actions_cnt)) 135 | 136 | # kill server 137 | deployer.kill_server() 138 | 139 | # analyze 140 | LOG.info('Analyzing queries ...') 141 | analyzer = get_analyzer(deployer) 142 | for form, _ in driver.forms: 143 | analyzer.analyze_queries(form['queries']) 144 | for url in driver.urls: 145 | analyzer.analyze_queries(url['queries']) 146 | LOG.info(analyzer.queries_stats) 147 | 148 | # extract database info 149 | LOG.info('Extracting database info ...') 150 | analyzer.analyze_database() 151 | LOG.info(analyzer.database_stats) 152 | 153 | LOG.info('Database Size : {} '.format(get_database_size(deployer))) 154 | 155 | LOG.info('Finishing ...') 156 | 157 | if __name__ == "__main__": 158 | main() 159 | -------------------------------------------------------------------------------- /library/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | {% block title %}{% endblock %}Carnegie Mellon Database Application Catalog (CMDBAC) 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 30 | 31 | 32 | 33 | 34 | {% load active_page %} 35 | 36 | 37 | 72 | 73 | 74 | 75 | {% block header %}{% endblock %} 76 | 77 | 78 | 79 |
    80 | {% block main %}{% endblock %} 81 |
    82 | 83 | 84 | 85 | 104 | 105 | 106 | 108 | 109 | 110 | 111 | {{ analytics_code }} 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /core/analyzers/mysqlanalyzer.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import logging 5 | import datetime 6 | import traceback 7 | 8 | from baseanalyzer import BaseAnalyzer 9 | 10 | ## ===================================================================== 11 | ## LOGGING CONFIGURATION 12 | ## ===================================================================== 13 | LOG = logging.getLogger() 14 | 15 | ## ===================================================================== 16 | ## MYSQL ANALYZER 17 | ## ===================================================================== 18 | class MySQLAnalyzer(BaseAnalyzer): 19 | 20 | def __init__(self, deployer): 21 | BaseAnalyzer.__init__(self, deployer) 22 | 23 | def analyze_queries(self, queries): 24 | self.queries_stats['num_transactions'] = self.count_transaction(queries) + self.queries_stats.get('num_transactions', 0) 25 | 26 | try: 27 | conn = self.deployer.get_database_connection() 28 | cur = conn.cursor() 29 | 30 | for query in queries: 31 | try: 32 | if self.is_valid_for_explain(query['raw']): 33 | explain_query = 'EXPLAIN {};'.format(query['raw']) 34 | # print explain_query 35 | cur.execute(explain_query) 36 | rows = cur.fetchall() 37 | output = '\n' 38 | for row in rows: 39 | output += str(row) + '\n' 40 | query['explain'] = output 41 | except Exception, e: 42 | pass 43 | # LOG.exception(e) 44 | 45 | for query in queries: 46 | try: 47 | if self.is_valid_for_explain(query['raw']): 48 | cur.execute(query['raw']) 49 | cur.fetchall() 50 | 51 | stats_query = 'SHOW SESSION STATUS;' 52 | # print explain_query 53 | cur.execute(stats_query) 54 | rows = cur.fetchall() 55 | output = '\n' 56 | for row in rows: 57 | output += str(row) + '\n' 58 | query['stats'] = output 59 | except Exception, e: 60 | # traceback.print_exc() 61 | pass 62 | # pass 63 | # LOG.exception(e) 64 | 65 | cur.close() 66 | conn.close() 67 | except Exception, e: 68 | LOG.exception(e) 69 | 70 | def analyze_database(self): 71 | try: 72 | conn = self.deployer.get_database_connection() 73 | cur = conn.cursor() 74 | database = self.deployer.get_database_name() 75 | 76 | # the number of tables 77 | cur.execute("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '{}';".format(database)) 78 | self.database_stats['num_tables'] = int(cur.fetchone()[0]) 79 | 80 | # the number of indexes 81 | cur.execute("SELECT COUNT(DISTINCT table_name, index_name) FROM information_schema.statistics WHERE table_schema = '{}';".format(database)) 82 | self.database_stats['num_indexes'] = int(cur.fetchone()[0]) 83 | 84 | # the number of constraints 85 | cur.execute("SELECT COUNT(*) FROM information_schema.table_constraints WHERE constraint_schema = '{}';".format(database)) 86 | self.database_stats['num_constraints'] = int(cur.fetchone()[0]) 87 | 88 | # the number of foreign keys 89 | cur.execute("SELECT COUNT(*) FROM information_schema.referential_constraints WHERE constraint_schema = '{}';".format(database)) 90 | self.database_stats['num_foreignkeys'] = int(cur.fetchone()[0]) 91 | 92 | # the full information of tables 93 | cur.execute("SELECT * FROM information_schema.tables WHERE table_schema = '{}';".format(database)) 94 | self.database_informations['tables'] = str(cur.fetchall()) 95 | 96 | # the full information of columns 97 | cur.execute("SELECT * from INFORMATION_SCHEMA.columns WHERE table_schema = '{}';".format(database)) 98 | self.database_informations['columns'] = str(cur.fetchall()) 99 | 100 | # the full information of indexes 101 | cur.execute("SELECT * FROM information_schema.statistics WHERE table_schema = '{}';".format(database)) 102 | self.database_informations['indexes'] = str(cur.fetchall()) 103 | 104 | # the full information of constraints 105 | cur.execute("SELECT * FROM information_schema.table_constraints WHERE constraint_schema = '{}';".format(database)) 106 | self.database_informations['constraints'] = str(cur.fetchall()) 107 | 108 | # the full information of constraints 109 | cur.execute("SELECT * FROM information_schema.key_column_usage WHERE constraint_schema = '{}';".format(database)) 110 | self.database_informations['key_column_usage'] = str(cur.fetchall()) 111 | 112 | # the full information of foreign keys 113 | cur.execute("SELECT * FROM information_schema.referential_constraints WHERE constraint_schema = '{}';".format(database)) 114 | self.database_informations['foreignkeys'] = str(cur.fetchall()) 115 | 116 | # the full information of triggers 117 | cur.execute("SELECT * FROM information_schema.triggers WHERE trigger_schema = '{}';".format(database)) 118 | self.database_informations['triggers'] = str(cur.fetchall()) 119 | 120 | # the full information of views 121 | cur.execute("SELECT * FROM information_schema.views WHERE table_schema = '{}';".format(database)) 122 | self.database_informations['views'] = str(cur.fetchall()) 123 | 124 | cur.close() 125 | conn.close() 126 | except Exception, e: 127 | LOG.exception(e) -------------------------------------------------------------------------------- /analysis/foreign/foreign.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 4 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) 5 | 6 | import re 7 | import csv 8 | import numpy as np 9 | import sqlparse 10 | import traceback 11 | from utils import filter_repository, dump_all_stats, pickle_dump 12 | 13 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cmudbac.settings") 14 | import django 15 | django.setup() 16 | 17 | from library.models import * 18 | 19 | def foreign_key_stats(directory = '.'): 20 | stats = {'foreign_key_count': {}, 'foreign_key_type': {}} 21 | 22 | for repo in Repository.objects.exclude(latest_successful_attempt = None): 23 | if filter_repository(repo): 24 | continue 25 | 26 | project_type_name = repo.project_type.name 27 | if project_type_name not in stats['foreign_key_count']: 28 | stats['foreign_key_count'][project_type_name] = [] 29 | if project_type_name not in stats['foreign_key_type']: 30 | stats['foreign_key_type'][project_type_name] = {} 31 | if 0: 32 | if project_type_name not in stats['join_key_constraint']: 33 | stats['join_key_constraint'][project_type_name] = {} 34 | 35 | informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'columns') 36 | column_map = {} 37 | if len(informations) > 0: 38 | information = informations[0] 39 | if repo.latest_successful_attempt.database.name == 'PostgreSQL': 40 | regex = '(\(.*?\))[,\]]' 41 | elif repo.latest_successful_attempt.database.name == 'MySQL': 42 | regex = '(\(.*?\))[,\)]' 43 | 44 | for column in re.findall(regex, information.description): 45 | cells = column.split(',') 46 | table = str(cells[2]).replace("'", "").strip() 47 | name = str(cells[3]).replace("'", "").strip() 48 | _type = str(cells[7]).replace("'", "").strip() 49 | column_map[table + '.' + name] = _type 50 | column_map[name] = _type 51 | 52 | key_column_usage_informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'key_column_usage') 53 | constraint_informations = Information.objects.filter(attempt = repo.latest_successful_attempt).filter(name = 'constraints') 54 | constraint_map = {} 55 | if len(key_column_usage_informations) > 0 and len(constraint_informations) > 0: 56 | if repo.latest_successful_attempt.database.name == 'PostgreSQL': 57 | regex = '(\(.*?\))[,\]]' 58 | elif repo.latest_successful_attempt.database.name == 'MySQL': 59 | regex = '(\(.*?\))[,\)]' 60 | 61 | merge_map = {} 62 | key_column_usage_information = key_column_usage_informations[0] 63 | for column in re.findall(regex, key_column_usage_information.description): 64 | cells = column.split(',') 65 | constraint_name = str(cells[2]).replace("'", "").strip() 66 | table_name = str(cells[5]).replace("'", "").strip() 67 | column_name = str(cells[6]).replace("'", "").strip() 68 | merge_map_key = table_name + '.' + constraint_name 69 | if merge_map_key in merge_map: 70 | merge_map[merge_map_key].append(column_name) 71 | else: 72 | merge_map[merge_map_key] = [column_name] 73 | 74 | constraint_information = constraint_informations[0] 75 | for column in re.findall(regex, constraint_information.description): 76 | cells = column.split(',') 77 | constraint_name = str(cells[2]).replace("'", "").strip() 78 | if repo.latest_successful_attempt.database.name == 'PostgreSQL': 79 | table_name = str(cells[5]).replace("'", "").strip() 80 | constraint_type = str(cells[6]).replace("'", "").strip() 81 | elif repo.latest_successful_attempt.database.name == 'MySQL': 82 | table_name = str(cells[4]).replace("'", "").strip() 83 | constraint_type = str(cells[5])[:-1].replace("'", "").strip() 84 | merge_map_key = table_name + '.' + constraint_name 85 | if merge_map_key in merge_map: 86 | for column_name in merge_map[merge_map_key]: 87 | constraint_map[table_name + '.' + column_name] = constraint_type 88 | constraint_map[column_name] = constraint_type 89 | 90 | if constraint_type == 'FOREIGN KEY': 91 | _type = column_map[table_name + '.' + column_name] 92 | stats['foreign_key_type'][project_type_name][_type] = stats['foreign_key_type'][project_type_name].get(_type, 0) + 1 93 | 94 | for action in Action.objects.filter(attempt = repo.latest_successful_attempt): 95 | queries = Query.objects.filter(action = action) 96 | foreign_key_count = 0 97 | 98 | for query in queries: 99 | parsed = sqlparse.parse(query.content)[0] 100 | tokens = parsed.tokens 101 | 102 | for token in tokens: 103 | if isinstance(token, sqlparse.sql.Identifier): 104 | token_name = token.value.replace('"', '').replace('`', '') 105 | if token_name in constraint_map: 106 | constraint = constraint_map[token_name] 107 | if constraint == 'FOREIGN KEY': 108 | foreign_key_count += 1 109 | 110 | for explain in Explain.objects.filter(query = query): 111 | if 'FOREIGN' in explain.output: 112 | print explain.output 113 | 114 | stats['foreign_key_count'][project_type_name].append(foreign_key_count) 115 | 116 | dump_all_stats(directory, stats) 117 | 118 | def main(): 119 | foreign_key_stats() 120 | 121 | if __name__ == '__main__': 122 | main() 123 | -------------------------------------------------------------------------------- /core/crawlers/drupalcrawler.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import time 5 | import re 6 | import urllib 7 | import urllib2 8 | import logging 9 | import urlparse 10 | import requests 11 | from bs4 import BeautifulSoup 12 | from datetime import datetime 13 | import traceback 14 | 15 | from basecrawler import BaseCrawler 16 | from library.models import * 17 | import utils 18 | 19 | ## ===================================================================== 20 | ## LOGGING CONFIGURATION 21 | ## ===================================================================== 22 | 23 | LOG = logging.getLogger(__name__) 24 | LOG_handler = logging.StreamHandler() 25 | LOG_formatter = logging.Formatter(fmt='%(asctime)s [%(filename)s:%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s', 26 | datefmt='%m-%d-%Y %H:%M:%S') 27 | LOG_handler.setFormatter(LOG_formatter) 28 | LOG.addHandler(LOG_handler) 29 | LOG.setLevel(logging.INFO) 30 | 31 | ## ===================================================================== 32 | ## DRUPAL CONFIGURATION 33 | ## ===================================================================== 34 | BASE_URL = 'https://www.drupal.org/project/{name}' 35 | COMMIT_URL = 'https://www.drupal.org/node/{sha}' 36 | SEARCH_URL = 'https://www.drupal.org/project/project_distribution' 37 | DRUPAL_HOST = 'https://www.drupal.org' 38 | DRUPAL_SLEEP = 1 39 | 40 | ## ===================================================================== 41 | ## DRUPAL CRAWLER 42 | ## ===================================================================== 43 | class DrupalCrawler(BaseCrawler): 44 | def __init__(self, crawlerStatus, auth): 45 | BaseCrawler.__init__(self, crawlerStatus) 46 | ## DEF 47 | 48 | def next_url(self): 49 | # Check whether there is a next url that we need to load 50 | # from where we left off from our last run\ 51 | if not self.crawlerStatus.next_url is None and not self.crawlerStatus.next_url == '': 52 | return self.crawlerStatus.next_url 53 | 54 | # Otherwise, compute what the next page we want to load 55 | return SEARCH_URL 56 | ## DEF 57 | 58 | def search(self): 59 | # Load and parse! 60 | response = utils.query(self.next_url()) 61 | soup = BeautifulSoup(response.text) 62 | titles = soup.find_all(class_='node-project-distribution') 63 | LOG.info("Found %d repositories" % len(titles)) 64 | 65 | # Pick through the results and find repos 66 | for title in titles: 67 | name = title.contents[1].contents[0]['href'].split('/')[2] 68 | try: 69 | self.add_repository(name) 70 | except: 71 | traceback.print_exc() 72 | # Sleep for a little bit to prevent us from getting blocked 73 | time.sleep(DRUPAL_SLEEP) 74 | ## FOR 75 | 76 | # Figure out what is the next page that we need to load 77 | try: 78 | next_page = soup.find(class_='pager-next').contents[0] 79 | except: 80 | next_page = None 81 | if not next_page or not next_page.has_attr('href'): 82 | LOG.info("No next page link found!") 83 | self.crawlerStatus.next_url = None 84 | else: 85 | self.crawlerStatus.next_url = DRUPAL_HOST + next_page['href'] 86 | 87 | # Make sure we update our crawler status 88 | LOG.info("Updating status for %s" % self.crawlerStatus) 89 | self.crawlerStatus.save() 90 | 91 | return 92 | ## DEF 93 | 94 | def get_api_data(self, name): 95 | data = {} 96 | data['url'] = self.crawlerStatus.source.get_url(name) 97 | response = requests.get(data['url']) 98 | soup = BeautifulSoup(response.text) 99 | data['time'] = soup.find('time').attrs['datetime'] 100 | return data 101 | # DEF 102 | 103 | def add_repository(self, name, setup_scripts = None): 104 | if Repository.objects.filter(name='drupal/' + name, source=self.crawlerStatus.source).exists(): 105 | LOG.info("Repository '%s' already exists" % name) 106 | else: 107 | api_data = self.get_api_data(name) 108 | 109 | # Create the new repository 110 | repo = Repository() 111 | repo.name = 'drupal/' + name 112 | repo.source = self.crawlerStatus.source 113 | repo.project_type = self.crawlerStatus.project_type 114 | repo.last_attempt = None 115 | repo.created_at = datetime.fromtimestamp(int(api_data['time'])).strftime("%Y-%m-%d %H:%M:%S") 116 | repo.updated_at = repo.created_at 117 | repo.pushed_at = repo.created_at 118 | repo.homepage = api_data['url'] 119 | repo.size = -1 120 | repo.stargazers_count = -1 121 | repo.watchers_count = -1 122 | repo.language = 'PHP' 123 | repo.forks_count = -1 124 | repo.open_issues_count = -1 125 | repo.default_branch = 'master' 126 | repo.network_count = -1 127 | repo.subscribers_count = -1 128 | repo.commits_count = -1 129 | repo.branches_count = -1 130 | repo.releases_count = -1 131 | repo.contributors_count = -1 132 | repo.setup_scripts = setup_scripts 133 | repo.save() 134 | LOG.info("Successfully created new repository '%s' [%d]" % (repo, repo.id)) 135 | ## IF 136 | # DEF 137 | 138 | def get_latest_sha(self, repo_name): 139 | url = BASE_URL.format(name = repo_name) 140 | response = utils.query(url) 141 | data = response.text 142 | results = re.findall(COMMIT_URL.format(sha='(\d+)'), data) 143 | return results[1] 144 | # DEF 145 | 146 | def download_repository(self, repo_name, sha, zip_name): 147 | url = BASE_URL.format(name = repo_name) 148 | response = utils.query(url) 149 | data = response.text 150 | download_url = re.search('https://[^ ]*?\.zip', data).group(0) 151 | 152 | response = utils.query(download_url) 153 | zip_file = open(zip_name, 'wb') 154 | for chunk in response.iter_content(chunk_size=1024): 155 | if chunk: 156 | zip_file.write(chunk) 157 | zip_file.flush() 158 | zip_file.close() 159 | # DEF -------------------------------------------------------------------------------- /core/deployers/nodedeployer.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import logging 5 | import re 6 | import time 7 | 8 | from basedeployer import BaseDeployer 9 | from library.models import * 10 | import utils 11 | 12 | ## ===================================================================== 13 | ## LOGGING CONFIGURATION 14 | ## ===================================================================== 15 | LOG = logging.getLogger() 16 | 17 | ## ===================================================================== 18 | ## SETTINGS 19 | ## ===================================================================== 20 | 21 | 22 | ## ===================================================================== 23 | ## NODE.JS DEPLOYER 24 | ## ===================================================================== 25 | class NodeDeployer(BaseDeployer): 26 | def __init__(self, repo, database, deploy_id, database_config = None): 27 | BaseDeployer.__init__(self, repo, database, deploy_id, database_config) 28 | if database_config == None: 29 | self.database_config['name'] = 'node_app' + str(deploy_id) 30 | self.main_filename = None 31 | ## DEF 32 | 33 | def configure_settings(self, path): 34 | utils.replace_files_regex(path, "mysql\.createConnection\({.*?}.*?\);", 35 | """mysql.createConnection({{ 36 | host : '{host}', 37 | port : '{port}', 38 | user : '{user}', 39 | password : '{password}', 40 | database : '{database}' 41 | }}); 42 | """.format(host=self.database_config['host'], port=self.database_config['port'], 43 | user=self.database_config['username'],password=self.database_config['password'], 44 | database=self.database_config['name'])) 45 | ## DEF 46 | 47 | def install_requirements(self, path): 48 | if path: 49 | command = '{} && npm install'.format(utils.cd(path)) 50 | out = utils.run_command(command) 51 | if out[1] == '': 52 | return out[2] 53 | else: 54 | return out[1] 55 | return '' 56 | ## DEF 57 | 58 | def get_main_url(self): 59 | return 'http://127.0.0.1:{}/'.format(self.port) 60 | ## DEF 61 | 62 | def sync_server(self, path): 63 | pass 64 | ## DEF 65 | 66 | def run_server(self, path): 67 | self.configure_network() 68 | LOG.info('Running server ...') 69 | command = '{} && node {}'.format( 70 | utils.cd(path), self.main_filename) 71 | return utils.run_command_async(command) 72 | ## DEF 73 | 74 | def get_runtime(self): 75 | out = utils.run_command('node -v') 76 | return { 77 | 'executable': 'node', 78 | 'version': out[1][1:] 79 | } 80 | ## DEF 81 | 82 | def find_port(self): 83 | out = utils.run_command('netstat -nlp | grep -i "node"') 84 | port = re.search('0 :::(\d+)', out[1]) 85 | if port: 86 | self.port = port.group(1) 87 | 88 | def create_tables(self, deploy_path): 89 | executed = False 90 | sql_files = utils.search_file_regex(deploy_path, '.*\.sql') 91 | conn = self.get_database_connection() 92 | cur = conn.cursor() 93 | for sql_file in sql_files: 94 | executed = True 95 | for statement in open(sql_file).read().split(';'): 96 | try: 97 | cur.execute(statement) 98 | except Exception, e: 99 | print statement 100 | LOG.exception(e) 101 | if self.database.name == 'MySQL': 102 | conn.commit() 103 | return executed 104 | 105 | def try_deploy(self, deploy_path): 106 | LOG.info('Configuring settings ...') 107 | self.kill_server() 108 | self.clear_database() 109 | self.configure_settings(deploy_path) 110 | self.runtime = self.get_runtime() 111 | LOG.info(self.runtime) 112 | 113 | self.attempt.database = self.get_database() 114 | LOG.info('Database: ' + self.attempt.database.name) 115 | 116 | LOG.info('Create Tables ...') 117 | try: 118 | if not self.create_tables(deploy_path): 119 | LOG.error('No sql file found!') 120 | return ATTEMPT_STATUS_MISSING_REQUIRED_FILES 121 | except Exception, e: 122 | LOG.exception(e) 123 | 124 | LOG.info('Installing requirements ...') 125 | out = self.install_requirements(deploy_path) 126 | lines = out.split('\n') 127 | packages = {} 128 | for line in lines: 129 | s = re.search('(.+?)@([0-9\.]+)', line) 130 | if s: 131 | name, version = s.group(1), s.group(2) 132 | name = name.split(' ')[-1] 133 | packages[name] = version 134 | 135 | for name, version in packages.iteritems(): 136 | try: 137 | pkg, created = Package.objects.get_or_create(name=name, version=version, project_type=self.repo.project_type) 138 | self.packages_from_file.append(pkg) 139 | except Exception, e: 140 | LOG.exception(e) 141 | 142 | self.run_server(deploy_path) 143 | time.sleep(5) 144 | 145 | self.find_port() 146 | 147 | attemptStatus = self.check_server() 148 | 149 | return attemptStatus 150 | ## DEF 151 | 152 | def deploy_repo_attempt(self, deploy_path): 153 | package_jsons = utils.search_file(deploy_path, 'package.json') 154 | if not package_jsons: 155 | LOG.error('No package.json found!') 156 | return ATTEMPT_STATUS_MISSING_REQUIRED_FILES 157 | base_dir = sorted([os.path.dirname(package_json) for package_json in package_jsons])[0] 158 | 159 | for main_filename in ['server.js', 'app.js', 'main.js']: 160 | if utils.search_file_norecur(base_dir, main_filename): 161 | self.main_filename = main_filename 162 | break 163 | if self.main_filename == None: 164 | LOG.error('No main file found!') 165 | return ATTEMPT_STATUS_MISSING_REQUIRED_FILES 166 | 167 | self.setting_path = base_dir 168 | 169 | return self.try_deploy(base_dir) 170 | ## DEF 171 | 172 | ## CLASS -------------------------------------------------------------------------------- /core/drivers/submit/submit.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir)) 3 | 4 | import mechanize 5 | import cookielib 6 | import string 7 | import random 8 | import traceback 9 | import requests 10 | import urlparse 11 | from bs4 import BeautifulSoup 12 | 13 | from patterns import patterns, match_any_pattern 14 | import extract 15 | 16 | def get_form_index(br, form): 17 | index = 0 18 | for f in br.forms(): 19 | equal = True 20 | form['class'] = form['clazz'] 21 | for name, value in form.iteritems(): 22 | if name in f.attrs: 23 | if str(f.attrs[name]).lower() != str(value).lower(): 24 | equal = False 25 | break 26 | if equal: 27 | break 28 | index = index + 1 29 | return index 30 | 31 | def submit_form(form, inputs, br = None): 32 | if br == None: 33 | br = mechanize.Browser() 34 | cj = cookielib.LWPCookieJar() 35 | br.set_cookiejar(cj) 36 | br.set_handle_robots(False) 37 | 38 | br.open(form['url'].encode("ascii","ignore")) 39 | br.select_form(nr=get_form_index(br, form)) 40 | 41 | for input in form['inputs']: 42 | if input['name'] in inputs: 43 | try: 44 | if br.find_control(name = input['name'], type = input['type']) == None: 45 | continue 46 | if input['type'] == 'file': 47 | filename = inputs[input['name']]['filename'] 48 | upload_filename = os.path.basename(filename) 49 | mime_type = inputs[input['name']]['mime_type'] 50 | br.form.add_file(open(filename), mime_type, upload_filename, name = input['name']) 51 | br.form.set_all_readonly(False) 52 | elif input['type'] == 'checkbox': 53 | br.find_control(name = input['name'], type = input['type']).selected = inputs[input['name']] 54 | else: 55 | if br.find_control(name = input['name'], type = input['type']).readonly: 56 | continue 57 | if input['type'] == 'radio': 58 | continue 59 | br[input['name']] = inputs[input['name']] 60 | except: 61 | # traceback.print_exc() 62 | pass 63 | 64 | response = br.submit().code 65 | 66 | return response, br 67 | 68 | def gen_random_value(chars = string.ascii_letters + string.digits, length = 0): 69 | if length == 0: 70 | length = random.choice(range(8, 21)) 71 | return ''.join(random.choice(chars) for x in range(length)) 72 | 73 | def gen_random_true_false(): 74 | return random.choice([True, False]) 75 | 76 | def gen_file(base_path, input): 77 | if input['name'] != '' and 'image' in input['name']: 78 | filename = os.path.join(os.path.dirname(__file__), os.pardir, "files", "image.jpg") 79 | mime_type = 'image/jpeg' 80 | else: 81 | filename = os.path.join(base_path, gen_random_value() + '.txt') 82 | with open(filename, 'w') as f: 83 | f.write(gen_random_value(length = 1000)) 84 | f.close() 85 | mime_type = 'text/plain' 86 | return filename, mime_type 87 | 88 | def fill_form(form, matched_patterns = {}, br = None): 89 | inputs = {} 90 | for input in form['inputs']: 91 | if input['value'] != '': 92 | continue 93 | for pattern_name in patterns: 94 | if input['type'] == 'hidden': 95 | continue 96 | pattern, value = patterns[pattern_name] 97 | if match_any_pattern(input['name'], pattern) or match_any_pattern(input['type'], pattern): 98 | if pattern_name in matched_patterns: 99 | inputs[input['name']] = matched_patterns[pattern_name] 100 | else: 101 | inputs[input['name']] = value[0] 102 | matched_patterns[pattern_name] = value[0] 103 | break 104 | elif input['type'] == 'checkbox': 105 | inputs[input['name']] = True 106 | else: 107 | inputs[input['name']] = gen_random_value() 108 | 109 | response, br = submit_form(form, inputs, br) 110 | 111 | return matched_patterns, inputs, response, br 112 | 113 | def fill_form_random(form, br, base_path = '/tmp'): 114 | inputs = {} 115 | for input in form['inputs']: 116 | if input['value'] != '': 117 | continue 118 | if input['type'] == 'file': 119 | filename, mime_type = gen_file(base_path, input) 120 | inputs[input['name']] = { 121 | 'filename' : filename, 122 | 'mime_type': mime_type 123 | } 124 | elif input['type'] == 'checkbox': 125 | inputs[input['name']] = gen_random_true_false() 126 | else: 127 | inputs[input['name']] = gen_random_value() 128 | 129 | response, br = submit_form(form, inputs, br) 130 | 131 | return inputs 132 | 133 | def submit_form_fast(form, inputs, files, session): 134 | new_url = urlparse.urljoin(form['url'], form['action']) 135 | if files == None: 136 | response = session.post(new_url, data = inputs) 137 | else: 138 | response = session.post(new_url, data = inputs, files = files) 139 | return response 140 | 141 | def fill_form_random_fast(form, session, base_path = '/tmp'): 142 | inputs = {} 143 | files = None 144 | response = session.get(form['url']) 145 | soup = BeautifulSoup(response.text) 146 | for input in form['inputs']: 147 | if input['value'] != '': 148 | i = soup.find('input', {"name":input['name']}) 149 | if i: 150 | inputs[input['name']] = i['value'] 151 | continue 152 | if input['type'] == 'file': 153 | if files == None: 154 | files = {} 155 | filename, mime_type = gen_file(base_path, input) 156 | upload_filename = os.path.basename(filename) 157 | files[input['name']] = (upload_filename, open(filename), mime_type) 158 | elif input['type'] == 'checkbox': 159 | inputs[input['name']] = gen_random_true_false() 160 | else: 161 | inputs[input['name']] = gen_random_value() 162 | 163 | response = submit_form_fast(form, inputs, files, session) 164 | 165 | return inputs --------------------------------------------------------------------------------