├── .gitignore
├── .travis.yml
├── COPYING.txt
├── README.md
├── data
    ├── .placeholder
    └── cork
    │   └── .placeholder
├── doc
    ├── Installation.rst
    ├── Makefile
    ├── Normalization.rst
    ├── WebAPI.rst
    ├── conf.py
    ├── index.rst
    └── update-pages.sh
├── feedpuller
    ├── __init__.py
    └── feedpuller.py
├── mnemosyne.cfg.dist
├── normalizer
    ├── __init__.py
    ├── modules
    │   ├── __init__.py
    │   ├── basenormalizer.py
    │   ├── beeswarm_hive.py
    │   ├── conpot_events.py
    │   ├── dionaea_binary.py
    │   ├── dionaea_capture.py
    │   ├── glastopf_events.py
    │   ├── glastopf_files.py
    │   ├── kippo_events.py
    │   ├── thug_events.py
    │   └── thug_files.py
    └── normalizer.py
├── persistance
    ├── __init__.py
    ├── mnemodb.py
    └── preagg_reports.py
├── requirements.txt
├── runner.py
├── test
    ├── __init__.py
    ├── test_normalizers
    │   ├── __init__.py
    │   ├── base_test.py
    │   ├── beeswarm_hive_test.py
    │   ├── conpot_test.py
    │   ├── data_samples
    │   │   ├── dionaea_mwbinary_sample1.txt
    │   │   ├── glastopf_files_sample1.txt
    │   │   ├── thug_events_sample1.xml
    │   │   ├── thug_events_sample2.xml
    │   │   └── thug_files_sample1.txt
    │   ├── dionaea_binary_test.py
    │   ├── dionaea_test.py
    │   ├── glastopf_files_test.py
    │   ├── glastopf_test.py
    │   ├── kippo_test.py
    │   ├── thug_files.py
    │   └── thug_test.py
    ├── test_persistance
    │   ├── __init__.py
    │   └── mnemodb_test.py
    └── test_webapi
    │   ├── test_d
    │       ├── __init__.py
    │       ├── dorks_test.py
    │       ├── files_test.py
    │       ├── helpers.py
    │       ├── hpfeeds_test.py
    │       └── urls_test.py
    │   └── test_v1
    │       ├── __init__.py
    │       ├── dorks_test.py
    │       ├── files_test.py
    │       ├── helpers.py
    │       ├── hpfeeds_test.py
    │       └── urls_test.py
└── webapi
    ├── __init__.py
    ├── admin.py
    ├── api
        ├── __init__.py
        ├── d
        │   ├── __init__.py
        │   ├── app.py
        │   ├── dorks.py
        │   ├── files.py
        │   ├── helpers.py
        │   ├── hpfeeds.py
        │   ├── sessions.py
        │   └── urls.py
        └── v1
        │   ├── __init__.py
        │   ├── app.py
        │   ├── dorks.py
        │   ├── files.py
        │   ├── helpers.py
        │   ├── hpfeeds.py
        │   ├── sessions.py
        │   └── urls.py
    ├── default_routes.py
    ├── mnemowebapi.py
    ├── shared_state.py
    └── views
        ├── admin_page.tpl
        └── login_form.tpl


/.gitignore:
--------------------------------------------------------------------------------
 1 | mnemosyne.cfg
 2 | *.py[co]
 3 | .idea
 4 | # Packages
 5 | *.egg
 6 | *.egg-info
 7 | dist
 8 | build
 9 | eggs
10 | parts
11 | bin
12 | var
13 | sdist
14 | develop-eggs
15 | .installed.cfg
16 | 
17 | # Installer logs
18 | pip-log.txt
19 | 
20 | # Unit test / coverage reports
21 | .coverage
22 | .tox
23 | data/auth
24 | data/beaker
25 | #Translations
26 | *.mo
27 | 
28 | #Mr Developer
29 | .mr.developer.cfg
30 | 
31 | .DS_store
32 | test.db
33 | log.txt
34 | .log
35 | *.log
36 | 
37 | #sphinx stuff
38 | doc/_build
39 | 
40 | .noseids
41 | 
42 | *.cache
43 | 
44 | *.lock
45 | data/cork/*.json
46 | 
47 | server.key
48 | 
49 | server.crt
50 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | services: mongodb
 3 | 
 4 | python:
 5 |   - "2.7"
 6 | 
 7 | virtualenv:
 8 |   system_site_packages: true
 9 | 
10 | before_install:
11 |  - "sudo apt-get update"
12 |  - "sudo apt-get install python-dev libevent-dev"
13 | 
14 | install:
15 |   - "sudo pip install -r requirements.txt --use-mirrors"
16 | 
17 | script:
18 |  - nosetests
19 |  
20 | after_success:
21 |  - pip install --upgrade distribute
22 |  - pip install --upgrade coveralls coverage sh
23 |  - nosetests --with-coverage --cover-package=mnemosyne
24 |  - coveralls
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Mnemosyne [![Build Status](https://travis-ci.org/johnnykv/mnemosyne.png?branch=master)](https://travis-ci.org/johnnykv/mnemosyne)
  2 | =========
  3 | ## About
  4 | Mnemosyne has three main objectives:
  5 | 
  6 | 1. Provide immutable persistence for [hpfeeds](https://redmine.honeynet.org/projects/hpfeeds/wiki).
  7 | 2. Normalization of data to enable sensor agnostic analysis.
  8 | 3. Expose the normalized data through a RESTful API.
  9 | 
 10 | ## Channels
 11 | Mnemosyne currently supports normalization of data from the following channels:
 12 | 
 13 | * dionaea.capture
 14 | * mwbinary.dionaea.sensorunique
 15 | * kippo.sessions
 16 | * glastopf.events
 17 | * glastopf.files
 18 | * thug.events
 19 | * thug.files
 20 | * beeswarm.hive
 21 | 
 22 | ## Preliminary REST API
 23 | 
 24 | Can be found at [http://johnnykv.github.com/mnemosyne/WebAPI.html](http://johnnykv.github.com/mnemosyne/WebAPI.html)
 25 | 
 26 | ## Speciality services
 27 | 
 28 | Mnemosyne is currently serving a speciality service, which on one side collects live dorks from the [Glastopf](https://github.com/glastopf/) [hpfeed](https://redmine.honeynet.org/projects/hpfeeds/wiki), and on the other side correlate the collected data, which allows virgin [Glastopf](https://github.com/glastopf/) instances to bootstrap themselves by using a Mnemosyne service.
 29 | 
 30 | # Example queries with curl
 31 | 
 32 | ### Login to the mnemosyne webservice with the provided credentials
 33 | ``` bash
 34 | curl -k -c cookies.txt -X POST -d "username=james&password=bond" https://mnemosyne.honeycloud.net:8282/login
 35 | [~]$ cat cookies.txt 
 36 | 
 37 | mnemosyne.honeycloud.net  FALSE	/	TRUE	2147487247	beaker.session.id	1f7x19deadbeef8f802fbabe18f1f01a
 38 | ```
 39 | 
 40 | 
 41 | ### Malicious websites
 42 | Searching for malicious websites in the .ru TLD using a regex. The result of this query is list of urls and hashes of binaries extracted from those urls.
 43 | At the moment most data in /urls is generated by [Thug](https://github.com/buffer/thug).
 44 | ``` bash
 45 | curl -k -b cookies.txt "<...>/api/v1/urls?url_regex=\.ru(\/|\:|$)" | python -mjson.tool
 46 | ```
 47 | ``` json
 48 | {
 49 |     "url": "http://xxxyyy.ru:8080/forum/links/django_version.php",
 50 |     "_id": "510c35bfc6b6082a30d50bba", 
 51 |     "extractions": [
 52 |         {
 53 |             "hashes": {
 54 |                 "md5": "f3a80b8b26579a4bfc591f834c767d15", 
 55 |                 "sha1": "22ff183597009e64ffc3d93f5bccfdb214d6a4bd", 
 56 |                 "sha512": "63cc541a0743c95037da2fe86cfddc881ad0128665171c50958142f2d8b87a3e90c3085286f774aceb15e28d515ed7b24e399b19579f42da894da206945fe023"
 57 |             }, 
 58 |             "timestamp": "2013-01-11T15:57:24.107000"
 59 |         }, 
 60 |         {
 61 |             "hashes": {
 62 |                 "md5": "b33310b69e12427d09e0123c385b3d47", 
 63 |                 "sha1": "aed4552dfe85ed33f0b77a28abc48c9f831623c5", 
 64 |                 "sha512": "ab2e1edb49a864c43864f432b0f18a4f097e90b262e4e2964814e022218ad2128a1cdb0402ee76eb75b382f53d145ae7ebd64d0bec7cd599d45ae9c799802b68"
 65 |             }, 
 66 |             "timestamp": "2013-01-11T15:57:24.107000"
 67 |         }, 
 68 |         {
 69 |             "hashes": {
 70 |                 "md5": "1f17127f63c975e28710739092117676", 
 71 |                 "sha1": "64da64d6d6aac36323d326282093e64c89dada40", 
 72 |                 "sha512": "210055a3a8614da004d92ba6edda8222d483d5563789f9443bb9d5c06481b9674fdf0eef1929410e8c44f9d66c2c7c07e0109e98a5eb92f326a3e8130801f4e7"
 73 |             }, 
 74 |             "timestamp": "2013-01-11T15:57:24.107000"
 75 |         }
 76 |     ], 
 77 |     "hpfeeds_ids": [
 78 |         "50f028a009ce4533628b1af7"
 79 |     ]
 80 | }
 81 | ```
 82 | 
 83 | ### Looking for files
 84 | ``` bash
 85 | curl -k -b cookies.txt "<...>/api/v1/files?hash=b420138b88eda83a51fea5298f72864a" | python -mjson.tool
 86 | ```
 87 | ```json
 88 | {
 89 |     "files": [
 90 |         {
 91 |             "_id": "510c3ce5c6b6082a30d548f3",
 92 |             "content_guess": "PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed",
 93 |             "data": "4d5a90000300000004000000ffff0000b8 <--- SNIP! --->",
 94 |             "encoding": "hex",
 95 |             "hashes": {
 96 |                 "md5": "b420138b88eda83a51fea5298f72864a",
 97 |                 "sha1": "0e644fc39a287e6f020ede6d6c9dd708b1a871ba",
 98 |                 "sha512": "98a2110f389790b5fd66f50e26e85465b3d22662245969b1fd03025194ef7a00a928c3709b57e20d165876231cdab12d38b7ff17e5c173b6562e924dc4087d85"
 99 |             },
100 |             "hpfeed_ids": [
101 |                 "50f3e41b09ce4533629cea00"
102 |             ]
103 |         }
104 |     ]
105 | }
106 | ```
107 | 
108 | ### Dorks
109 | Dorks collected by [Glastopf](https://github.com/glastopf/glastopf)
110 | ``` bash
111 | curl -k -b cookies.txt "<...>/api/v1/aux/dorks?limit=10" | python -mjson.tool
112 | ```
113 | ```json
114 | {
115 |     "dorks": [
116 |         {
117 |             "content": "/pivotx/includes/index.php", 
118 |             "count": 716, 
119 |             "firsttime": "2013-02-01T20:38:42+00:00", 
120 |             "lasttime": "2013-01-14T16:20:51.504000", 
121 |             "type": "inurl"
122 |         }, 
123 |         {
124 |             "content": "/axis-cgi/mjpg/wp-content/themes/diner/timthumb.php", 
125 |             "count": 545, 
126 |             "firsttime": "2013-02-01T20:38:32+00:00", 
127 |             "lasttime": "2013-01-14T16:26:03.036000", 
128 |             "type": "inurl"
129 |         }, 
130 |         {
131 |             "content": "/board/board/include/pivotx/includes/wp-content/pivotx/includes/timthumb.php", 
132 |             "count": 493, 
133 |             "firsttime": "2013-02-01T20:39:03+00:00", 
134 |             "lasttime": "2013-01-14T10:55:50.197000", 
135 |             "type": "inurl"
136 |         },
137 |         
138 |         <--- SNIP --- >
139 |         
140 |          ]   
141 | }
142 | ```
143 | 
144 | ### Sessions
145 | Searching for all honeypot attacks comming from an specific source port.
146 | ``` bash
147 | curl -k -b cookies.txt "<...>/api/v1/sessions?source_port=37337" | python -mjson.tool
148 | ```
149 | ```json
150 | 
151 | {
152 |   "sessions": [
153 |     {
154 |           "_id": "510c2f1209ce45385d3ed584", 
155 |           "honeypot": "dionaea", 
156 |           "attachments": [
157 |               {
158 |                   "description": "Binary extraction", 
159 |                   "hashes": {
160 |                       "md5": "984cef500b81e7ad2f7a69d9208e64e6", 
161 |                       "sha512": "e899155228a1d3b5ed9864a7fed944716b7b0a3061b76e0f720bf9f7f6c65c633d8fdd4799335b9d92238b4b18e8076718a87a5d7a6538fec4223f111224b5e5"
162 |                   }
163 |               }
164 |           ], 
165 |           "destination_ip": [
166 |               "xxx.yyy.zzz.ppp"
167 |           ], 
168 |           "destination_port": 445, 
169 |           "hpfeed_id": "50ec09b709ce451dac5c844e", 
170 |           "protocol": "microsoft-ds", 
171 |           "source_ip": "xxx.yy.xx.xxx", 
172 |           "source_port": 37337, 
173 |           "timestamp": "2013-01-08T11:57:43.390000"
174 |       }, 
175 |       {
176 |           "_id": "510c2fbc09ce45385d3fcd16",
177 |           "honeypot": "glastopf", 
178 |           "destination_port": 80, 
179 |           "hpfeed_id": "50ec74f509ce452427303b50", 
180 |           "protocol": "http", 
181 |           "session_http": {
182 |               "request": {
183 |                   "body": "", 
184 |                   "header": "{<--- SNIP --->}", 
185 |                   "host": "<--- SNIP --->", 
186 |                   "url": "<--- SNIP --->", 
187 |                   "verb": "GET"
188 |               }
189 |           }, 
190 |           "source_ip": "xxx.zzz.yy.zzz", 
191 |           "source_port": 37337, 
192 |           "timestamp": "2013-01-08T13:28:15"
193 |       },
194 |       ]
195 | }
196 | ```
197 | 
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/data/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/data/.placeholder


--------------------------------------------------------------------------------
/data/cork/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/data/cork/.placeholder


--------------------------------------------------------------------------------
/doc/Installation.rst:
--------------------------------------------------------------------------------
  1 | ************
  2 | Installation
  3 | ************
  4 | 
  5 | Prerequisites
  6 | =============
  7 | 
  8 | Get Mnemosyne::
  9 | 
 10 |     $ git clone https://github.com/johnnykv/mnemosyne.git
 11 | 
 12 | 
 13 | Install python requirements::
 14 | 
 15 |     $ pip install -r requirements.txt
 16 | 
 17 | 
 18 | Configuration
 19 | =============
 20 | 
 21 | Copy default configuration::
 22 | 
 23 |     cp mnemosyne.cfg.dist mnemosyne.cfg
 24 | 
 25 | While testing it is recommended to simplify the configuration as much as possible, example::
 26 | 
 27 |     [webapi]
 28 |     # 0.0.0.0 = listen on all interfaces
 29 |     host = 0.0.0.0
 30 |     port = 8181
 31 | 
 32 |     [mongodb]
 33 |     database = mnemosyne
 34 | 
 35 |     [hpfriends]
 36 |     ident = <IDENT FROM HPFRIENDS>
 37 |     secret = <SECRET FROM HPFRIENDS>
 38 |     host = hpfriends.honeycloud.net
 39 |     port = 20000
 40 |     channels = dionaea.capture
 41 | 
 42 |     [file_log]
 43 |     enabled = True
 44 |     file = mnemosyne.log
 45 | 
 46 |     [loggly_log]
 47 |     enabled = False
 48 |     token =
 49 | 
 50 | Running
 51 | =======
 52 | 
 53 | First off, you need to start mongod, afterwards you can start mnemosyne::
 54 | 
 55 |     $ python runner.py
 56 |     2013-09-09 21:44:58,541 (root) Starting mnemosyne. (Git: af0388c4a9a251bdafa0d2a5d1de262b7c94b08c)
 57 |     2013-09-09 21:44:58,542 (persistance.mnemodb) Connecting to mongodb, using "mnemosyne" as database.
 58 |     2013-09-09 21:44:58,543 (persistance.preagg_reports) Connecting to mongodb, using "mnemosyne" as database.
 59 |     2013-09-09 21:44:58,653 (root) Spawning hpfriends feed puller.
 60 |     2013-09-09 21:44:58,653 (root) Spawning web api.
 61 |     2013-09-09 21:44:58,653 (webapi.mnemowebapi) Cork authentication files not found, creating new files.
 62 |     2013-09-09 21:44:58,653 (webapi.mnemowebapi) Creating new authentication files, check STDOUT for the generated admin password.
 63 |     A 'admin' account has been created with the password 'df02548f-fc01-40ca-808a-15ba07aed8d5'
 64 |     2013-09-09 21:44:58,670 (root) Spawning normalizer
 65 |     2013-09-09 21:44:58,670 (pyhpfeeds) connecting to hpfriends.honeycloud.net:20000
 66 |     2013-09-09 21:44:58,671 (webapi.mnemowebapi) Starting web api, listening on 0.0.0.0:8181
 67 |     2013-09-09 21:44:59,236 (pyhpfeeds) info message name: hpfriends, rand: '}\xbb\xbb\xe1'
 68 |     2013-09-09 21:44:59,236 (pyhpfeeds) Sending subscription for dionaea.capture.
 69 | Please notice that the the generated admin password was printed on line 9.
 70 | 
 71 | After running mnemosyne for a short while, you can fire up the mongo console at check the database, example::
 72 | 
 73 |     $ mongo
 74 |     MongoDB shell version: 2.4.4
 75 |     connecting to: test
 76 |     Server has startup warnings:
 77 |     > use mnemosyne
 78 |     switched to db mnemosyne
 79 |     > db.hpfeed.count()
 80 |     28747
 81 |     > db.session.count()
 82 |     27154
 83 |     > db.session.find().limit(1).pretty()
 84 |     {
 85 |         "_id" : ObjectId("522e253b79b45e7673aa4a6d"),
 86 |         "destination_ip" : [
 87 |             "18.17.141.211"
 88 |         ],
 89 |         "protocol" : "microsoft-ds",
 90 |         "attachments" : [
 91 |             {
 92 |                 "hashes" : {
 93 |                     "sha512" : "e2de6f3a3927d92f213bf153f72f2a1407a1f9f350a54115f38453aa85a6087debdab2160f246ff3808d0f6b679b6dc421fa5d5f1aa6271684de31ec0952deb0",
 94 |                     "md5" : "94e689d7d6bc7c769d09a59066727497"
 95 |                 },
 96 |                 "description" : "Binary extraction"
 97 |             }
 98 |         ],
 99 |         "timestamp" : ISODate("2013-09-07T22:42:33.808Z"),
100 |         "source_ip" : "177.100.148.19",
101 |         "source_port" : 4483,
102 |         "destination_port" : 445,
103 |         "honeypot" : "dionaea",
104 |         "hpfeed_id" : ObjectId("522babd979b45e68a094614a")
105 |     }
106 |     >
107 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Mnemosyne.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Mnemosyne.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Mnemosyne"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Mnemosyne"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/doc/Normalization.rst:
--------------------------------------------------------------------------------
 1 | *************
 2 | Normalization
 3 | *************
 4 | 
 5 | 
 6 | Dataflow
 7 | ========
 8 | 
 9 | .. digraph:: foo
10 | 
11 | 
12 |       compound=true;
13 | 
14 |       subgraph cluster_1 {
15 |          style=filled;
16 |          fillcolor="cornsilk2"
17 |          node [style=filled];
18 |          feedpuller [label="Feedpuller"];
19 |          normalizer [label="Normalizer"];
20 |          webapi [label="WebAPI"];
21 |          label = "Mnemosyne";
22 |       }
23 | 
24 |       subgraph cluster_2 {
25 |          node [shape=box]
26 |          style=filled;
27 |          fillcolor=".7 .3 1.0";
28 |          node [style=filled];
29 |          hpfeed [label="HPFeeds"];
30 |          file [label="Files"]
31 |          url [label="URLs"]
32 |          session [label="Sessions"]
33 |          label = "Mongo collections";
34 |          labelloc = b;
35 |       }
36 | 
37 | 
38 |       hpf [label="hpfeeds.honeycloud.net"];
39 |       hpf -> feedpuller [color=red];
40 |       feedpuller -> hpfeed [color=red];
41 |       hpfeed -> normalizer [color=red];
42 |       normalizer -> file [color=green];
43 |       normalizer -> url [color=green];
44 |       normalizer -> session [color=green];
45 | 
46 | 
47 |       session -> webapi [ ltail=cluster_2, lhead=cluster_1];
48 | 
49 | 
50 | Overview
51 | ============
52 | 
53 | Some text on extractions, attachments and entities here - the big picture.
54 | 
55 | .. digraph:: foo
56 | 
57 | 
58 |    node [fontsize = "10", shape = "box", style="filled", fillcolor="aquamarine"];
59 | 
60 |    rankdir=LR;
61 |    "HPFeed" -> "Session"
62 |    "HPFeed" -> "URL"
63 |    "HPFeed" -> "File"
64 | 
65 |    node [fontsize = "7", shape = "circle", style="filled", fillcolor="aquamarine"];
66 |    "Session" -> "protocol"
67 | 
68 | 
69 | Mongo collections
70 | =================
71 | General information on collections here, also a bit about the schemaless database.
72 | 
73 | HPFeed
74 | ------
75 | Detailed description of the HPFeeds entity.
76 | 
77 | Session
78 | -------
79 | Detailed description of the Session entity.
80 | 
81 | URL
82 | ---
83 | Detailed description of the URL entity.
84 | 
85 | File
86 | ----
87 | Detailed description of the File entity.
88 | 
89 | Mongo queries
90 | =============
91 | Some interesting examples on mongo queries in relation to HP.


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Mnemosyne documentation build configuration file, created by
  4 | # sphinx-quickstart on Thu Jan  3 00:41:31 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = ['sphinx.ext.autodoc', 'sphinxcontrib.httpdomain', 'sphinx.ext.graphviz']
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ['_templates']
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = '.rst'
 35 | 
 36 | # The encoding of source files.
 37 | #source_encoding = 'utf-8-sig'
 38 | 
 39 | # The master toctree document.
 40 | master_doc = 'index'
 41 | 
 42 | # General information about the project.
 43 | project = u'Mnemosyne'
 44 | copyright = u'2013, Johnny Vestergaard'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = '0.1'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = '0.1'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = ['_build']
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | #html_theme = 'default'
 95 | html_theme = 'sphinxdoc'
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | #html_title = None
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | #html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'Mnemosynedoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 | 
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 | 
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |   ('index', 'Mnemosyne.tex', u'Mnemosyne Documentation',
187 |    u'Johnny Vestergaard', 'manual'),
188 | ]
189 | 
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 | 
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 | 
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 | 
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 | 
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 | 
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 | 
210 | 
211 | # -- Options for manual page output --------------------------------------------
212 | 
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 |     ('index', 'mnemosyne', u'Mnemosyne Documentation',
217 |      [u'Johnny Vestergaard'], 1)
218 | ]
219 | 
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 | 
223 | 
224 | # -- Options for Texinfo output ------------------------------------------------
225 | 
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | #  dir menu entry, description, category)
229 | texinfo_documents = [
230 |   ('index', 'Mnemosyne', u'Mnemosyne Documentation',
231 |    u'Johnny Vestergaard', 'Mnemosyne', 'One line description of project.',
232 |    'Miscellaneous'),
233 | ]
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 | 
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. Mnemosyne documentation master file, created by
 2 |    sphinx-quickstart on Thu Jan  3 00:41:31 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Mnemosyne's documentation!
 7 | =====================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    Installation
15 |    Normalization
16 |    WebAPI
17 | 
18 | 
19 | 
20 | Indices and tables
21 | ==================
22 | 
23 | * :ref:`genindex`
24 | * :ref:`modindex`
25 | * :ref:`search`
26 | 
27 | 


--------------------------------------------------------------------------------
/doc/update-pages.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #Script to update gh-pages
 3 | 
 4 | set -e
 5 | 
 6 | TMP_REPO=`mktemp -d -t mnemo-site`
 7 | TMP_HTML=`mktemp -d -t mnemo-site`
 8 | 
 9 | make html
10 | cp -R _build/html/* $TMP_HTML
11 | 
12 | git clone git@github.com:johnnykv/mnemosyne.git $TMP_REPO
13 | cd $TMP_REPO
14 | git checkout gh-pages
15 | git symbolic-ref HEAD refs/heads/gh-pages
16 | rm .git/index
17 | git clean -fdx
18 | cp -R $TMP_HTML/* $TMP_REPO
19 | touch $TMP_REPO/.nojekyll
20 | git add .
21 | git commit -a -m "Updated docs"
22 | git push origin gh-pages
23 | rm -rf $TMP_REPO
24 | rm -rf $TMP_HTML
25 | 


--------------------------------------------------------------------------------
/feedpuller/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/feedpuller/__init__.py


--------------------------------------------------------------------------------
/feedpuller/feedpuller.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from datetime import datetime
19 | import logging
20 | 
21 | import gevent
22 | 
23 | import hpfeeds
24 | 
25 | logger = logging.getLogger(__name__)
26 | 
27 | 
28 | class FeedPuller(object):
29 |     def __init__(self, database, ident, secret, port, host, feeds):
30 | 
31 |         self.database = database
32 | 
33 |         self.ident = ident
34 |         self.secret = secret
35 |         self.port = port
36 |         self.host = host
37 |         self.feeds = feeds
38 |         self.last_received = datetime.now()
39 |         self.hpc = None
40 |         self.enabled = True
41 | 
42 |     def start_listening(self):
43 | 
44 |         gevent.spawn_later(15, self._activity_checker)
45 |         while self.enabled:
46 |             try:
47 |                 self.hpc = hpfeeds.new(self.host, self.port, self.ident, self.secret)
48 | 
49 |                 def on_error(payload):
50 |                     logger.error('Error message from broker: {0}'.format(payload))
51 |                     self.hpc.stop()
52 | 
53 |                 def on_message(ident, chan, payload):
54 |                     self.last_received = datetime.now()
55 |                     if not any(x in chan for x in (';', '"', '{', '}')):
56 |                         self.database.insert_hpfeed(ident, chan, payload)
57 | 
58 |                 self.hpc.subscribe(self.feeds)
59 |                 self.hpc.run(on_message, on_error)
60 |             except Exception as ex:
61 |                 print ex
62 |                 self.hpc.stop()
63 |                 logger.exception('Exception caught: {0}'.format(ex))
64 |             #throttle
65 |             gevent.sleep(5)
66 | 
67 |     def stop(self):
68 |         self.hpc.stop()
69 |         self.enabled = False
70 |         logger.info("FeedPuller stopped.")
71 | 
72 |     def _activity_checker(self):
73 |         while self.enabled:
74 |             if self.hpc is not None and self.hpc.connected:
75 |                 difference = datetime.now() - self.last_received
76 |                 if difference.seconds > 15:
77 |                     logger.warning('No activity for 15 seconds, forcing reconnect')
78 |                     self.hpc.stop()
79 |             gevent.sleep(15)
80 | 


--------------------------------------------------------------------------------
/mnemosyne.cfg.dist:
--------------------------------------------------------------------------------
 1 | [webapi]
 2 | # 0.0.0.0 = listen on all interfaces
 3 | host = 0.0.0.0
 4 | port = 8181
 5 | 
 6 | [mongodb]
 7 | database = mnemosyne
 8 | 
 9 | [hpfriends]
10 | ident =
11 | secret =
12 | host = hpfriends.honeycloud.net
13 | port = 20000
14 | channels = conpot.events,thug.events,beeswarm.hive,dionaea.capture,thug.files,beeswarn.feeder,cuckoo.analysis,kippo.sessions,glastopf.events,glastopf.files,mwbinary.dionaea.sensorunique
15 | 
16 | [file_log]
17 | enabled = True
18 | file = mnemosyne.log
19 | 
20 | [loggly_log]
21 | enabled = False
22 | token =
23 | 


--------------------------------------------------------------------------------
/normalizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/normalizer/__init__.py


--------------------------------------------------------------------------------
/normalizer/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/normalizer/modules/__init__.py


--------------------------------------------------------------------------------
/normalizer/modules/basenormalizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import hashlib
19 | import socket
20 | import struct
21 | from urlparse import urlparse
22 | 
23 | 
24 | class BaseNormalizer(object):
25 |     ports_map = {22: 'ssh', 80: 'http', 135: 'dcom-scm', 445: 'microsoft-ds', 443: 'https"'}
26 | 
27 |     def normalize(self, data, channel_name, submission_timestamp):
28 |         pass
29 | 
30 |     def make_url(self, url):
31 |         url_dict = {}
32 |         result = urlparse(url)
33 |         url_dict['url'] = url
34 |         url_dict['scheme'] = result.scheme
35 |         url_dict['netloc'] = result.netloc
36 |         url_dict['path'] = result.path
37 |         url_dict['params'] = result.params
38 |         url_dict['query'] = result.query
39 |         url_dict['fragment'] = result.fragment
40 |         return url_dict
41 | 
42 |     def port_to_service(self, port_number):
43 |         if port_number in BaseNormalizer.ports_map:
44 |             return BaseNormalizer.ports_map[port_number]
45 |         else:
46 |             return None
47 | 
48 |     def generate_checksum_list(self, data):
49 |         result = {}
50 |         result['md5'] = hashlib.md5(data).hexdigest()
51 |         result['sha1'] = hashlib.sha1(data).hexdigest()
52 |         result['sha512'] = hashlib.sha512(data).hexdigest()
53 |         return result
54 | 
55 |     def is_RFC1918_addr(self, ip):
56 |         #10.0.0.0 = 167772160
57 |         #172.16.0.0 = 2886729728
58 |         #192.168.0.0 = 3232235520
59 |         RFC1918_net_bits = ((167772160, 8), (2886729728, 12), (3232235520, 16))
60 | 
61 |         #ip to decimal
62 |         ip = struct.unpack("!L", socket.inet_aton(ip))[0]
63 | 
64 |         for net, mask_bits in RFC1918_net_bits:
65 |             ip_masked = ip & (2 ** 32 - 1 << (32 - mask_bits))
66 |             if ip_masked == net:
67 |                 return True
68 | 
69 |         return False
70 | 
71 | 


--------------------------------------------------------------------------------
/normalizer/modules/beeswarm_hive.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import json
19 | from datetime import datetime
20 | from normalizer.modules.basenormalizer import BaseNormalizer
21 | 
22 | 
23 | class BeeswarmHive(BaseNormalizer):
24 |     channels = ('beeswarm.hive',)
25 | 
26 |     def normalize(self, data, channel, submission_timestamp):
27 |         o_data = json.loads(data)
28 | 
29 |         if self.is_RFC1918_addr(o_data['attacker_ip']):
30 |             return []
31 | 
32 |         auth_attempts = []
33 |         for attempt in o_data['login_attempts']:
34 |             auth_attempts.append({'login': attempt['username'], 'password': attempt['password']})
35 | 
36 |         session = {
37 |             'timestamp': datetime.strptime(o_data['timestamp'], '%Y-%m-%dT%H:%M:%S.%f'),
38 |             'source_ip': o_data['attacker_ip'],
39 |             'source_port': o_data['attacker_source_port'],
40 |             'destination_port': o_data['honey_port'],
41 |             'honeypot': 'beeswarm.hive',
42 |             'protocol': o_data['protocol'],
43 |             'auth_attempts': auth_attempts,
44 | 
45 |         }
46 | 
47 |         #honeypot operator might have opted out of sharing his honeypot ip
48 |         if 'honey_ip' in o_data:
49 |             session['destination_ip'] = o_data['honey_ip']
50 | 
51 |         relations = [{'session': session},]
52 | 
53 |         return relations
54 | 


--------------------------------------------------------------------------------
/normalizer/modules/conpot_events.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import json
19 | 
20 | from normalizer.modules.basenormalizer import BaseNormalizer
21 | 
22 | 
23 | class Conpot(BaseNormalizer):
24 |     channels = ('conpot.events',)
25 | 
26 |     def normalize(self, data, channel, submission_timestamp):
27 |         o_data = json.loads(data)
28 | 
29 |         if self.is_RFC1918_addr(o_data['remote'][0]):
30 |             return []
31 | 
32 |         session = {
33 |             'timestamp': submission_timestamp,
34 |             'source_ip': o_data['remote'][0],
35 |             'source_port': o_data['remote'][1],
36 |             'destination_port': 502,
37 |             'honeypot': 'conpot',
38 |             'protocol': o_data['data_type'],
39 |             'session_{0}'.format(o_data['data_type']): { 'pdus': o_data['data']}
40 | 
41 |             }
42 | 
43 |         relations = [{'session': session},]
44 | 
45 |         return relations


--------------------------------------------------------------------------------
/normalizer/modules/dionaea_binary.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import magic
19 | from normalizer.modules.basenormalizer import BaseNormalizer
20 | 
21 | 
22 | class DionaeaBinary(BaseNormalizer):
23 |     channels = ('mwbinary.dionaea.sensorunique',)
24 | 
25 |     def normalize(self, data, channel, submission_timestamp):
26 |         decoded = data.decode('hex')
27 |         hashes = super(DionaeaBinary, self).generate_checksum_list(decoded)
28 |         file_ = {
29 |             'encoding': 'hex',
30 |             'content_guess': magic.from_buffer(decoded),
31 |             'data': data,
32 |             'hashes': hashes
33 |         }
34 | 
35 |         relations = {'file': file_}
36 |         return [relations]
37 | 


--------------------------------------------------------------------------------
/normalizer/modules/dionaea_capture.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import json
19 | from normalizer.modules.basenormalizer import BaseNormalizer
20 | 
21 | 
22 | class DionaeaCaptures(BaseNormalizer):
23 |     channels = ('dionaea.capture', 'dionaea.capture.anon', 'dionaea.captures')
24 | 
25 |     def normalize(self, data, channel, submission_timestamp):
26 |         o_data = json.loads(data)
27 | 
28 |         if self.is_RFC1918_addr(o_data['saddr']):
29 |             return []
30 | 
31 |         session = {
32 |             'timestamp': submission_timestamp,
33 |             'source_ip': o_data['saddr'],
34 |             'source_port': int(o_data['sport']),
35 |             'destination_ip': o_data['daddr'],
36 |             'destination_port': int(o_data['dport']),
37 |             'honeypot': 'dionaea'
38 |         }
39 | 
40 |         if 'daddr' in o_data:
41 |             session['destination_ip'] = o_data['daddr'],
42 | 
43 |         protocol = super(DionaeaCaptures, self).port_to_service(int(o_data['dport']))
44 |         if protocol is not None:
45 |             session['protocol'] = protocol
46 | 
47 |         attachments = [
48 |             {
49 |                 'description': 'Binary extraction',
50 |                 'hashes':
51 |                     {'md5': o_data['md5'],
52 |                      'sha512': o_data['sha512']}
53 |             }, ]
54 | 
55 |         #url = {'url': o_data['url'],
56 |         #       'extractions': [{
57 |         #                           'timestamp': submission_timestamp,
58 |         #                           'hashes': {
59 |         #                               'md5': o_data['md5'],
60 |         #                               'sha512': o_data['sha512']
61 |         #                           }}]}
62 | 
63 |         session['attachments'] = attachments
64 | 
65 |         relations = {'session': session}
66 |         #relations = {'session': session, 'url': url}
67 |         return [relations]
68 | 


--------------------------------------------------------------------------------
/normalizer/modules/glastopf_events.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import json
 19 | import re
 20 | from datetime import datetime
 21 | from urlparse import urlparse
 22 | from BaseHTTPServer import BaseHTTPRequestHandler
 23 | from StringIO import StringIO
 24 | 
 25 | from normalizer.modules.basenormalizer import BaseNormalizer
 26 | 
 27 | 
 28 | class GlastopfEvents(BaseNormalizer):
 29 |     channels = ('glastopf.events',)
 30 | 
 31 |     def __init__(self):
 32 |         #dorks to be filtered out
 33 |         self.dork_filter = '/headers|favicon.ico|w00tw00t|/robots.txt'
 34 | 
 35 |     def normalize(self, data, channel, submission_timestamp):
 36 |         o_data = json.loads(data)
 37 | 
 38 |         if self.is_RFC1918_addr(o_data['source'][0]):
 39 |             return []
 40 | 
 41 |         relations = {}
 42 | 
 43 |         #only old versions of glastopf has the request key
 44 |         relations['session'] = self.make_session(o_data)
 45 |         relations['session']['session_http'] = self.make_session_http(o_data)
 46 |         dork = self.make_dork(o_data, submission_timestamp)
 47 |         if dork:
 48 |             relations['dork'] = dork
 49 | 
 50 |         return [relations]
 51 | 
 52 |     def make_dork(self, data, timestamp):
 53 |         #only old versions of glastopf has the request key
 54 |         if 'request' in data:
 55 |             dork = urlparse(self.make_url(data)).path
 56 |         else:
 57 |             dork = urlparse(data['request_url']).path
 58 |         if len(dork) > 1 and not re.match(r'.*({0}).*'.format(self.dork_filter), dork):
 59 |             return {'content': dork,
 60 |                     'type': 'inurl',
 61 |                     'timestamp': timestamp,
 62 |                     'count': 1}
 63 | 
 64 |     def make_session(self, data):
 65 |         session = {}
 66 |         session['timestamp'] = datetime.strptime(
 67 |             data['time'], '%Y-%m-%d %H:%M:%S')
 68 |         session['source_ip'] = data['source'][0]
 69 |         session['source_port'] = data['source'][1]
 70 |         #TODO: Extract from header if specified
 71 |         session['destination_port'] = 80
 72 |         session['protocol'] = 'http'
 73 |         session['honeypot'] = 'glastopf'
 74 | 
 75 |         return session
 76 | 
 77 |     def make_session_http(self, data):
 78 |         session_http = {}
 79 |         request = {}
 80 |         #glastopf's old logging format has the 'request' key
 81 |         if 'request' in data:
 82 |             request['header'] = json.dumps(data['request']['header'])
 83 |             if 'body' in data['request']:
 84 |                 request['body'] = data['request']['body']
 85 |             if 'Host' in data['request']['header']:
 86 |                 request['host'] = data['request']['header']['Host']
 87 |             request['verb'] = data['request']['method']
 88 | 
 89 |             request['url'] = self.make_url(data)
 90 |         #new glastopf logging format
 91 |         else:
 92 |             r = HTTPRequest(data['request_raw'])
 93 |             if 'host' in r.headers:
 94 |                 request['host'] = r.headers['host']
 95 |             #dict json loads?
 96 |             request['header'] = r.headers.items()
 97 |             request['verb'] = r.command
 98 |             request['path'] = r.path
 99 |             request['body'] = r.rfile.read()
100 | 
101 |         if len(request) != 0:
102 |             session_http['request'] = request
103 |         return session_http
104 | 
105 |     def clean_url(self, url):
106 |         if len(url) > 2 and url[:2] == '//':
107 |             url = url[1:]
108 |         return url
109 | 
110 |     def make_url(self, data):
111 |         """
112 |         Tries to make a valid URL from the attackers request.
113 |         note: Glastopf reports ['url'] as path + query string (omitting schema and netloc),
114 |         """
115 | 
116 |         if 'Host' in data['request']['header'] and not data['request']['url'].startswith('http'):
117 |             url = 'http://' + data['request']['header']['Host'] + data['request']['url']
118 |         else:
119 |             #best of luck!
120 |             url = data['request']['url']
121 |         return url
122 | 
123 | 
124 | #Thanks Brandon Rhodes!
125 | #http://stackoverflow.com/questions/4685217/parse-raw-http-headers
126 | class HTTPRequest(BaseHTTPRequestHandler):
127 |     def __init__(self, request_text):
128 |         self.rfile = StringIO(request_text)
129 |         self.raw_requestline = self.rfile.readline()
130 |         self.error_code = self.error_message = None
131 |         self.parse_request()
132 | 
133 |     def send_error(self, code, message):
134 |         self.error_code = code
135 |         self.error_message = message
136 | 


--------------------------------------------------------------------------------
/normalizer/modules/glastopf_files.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import base64
19 | 
20 | import magic
21 | 
22 | from normalizer.modules.basenormalizer import BaseNormalizer
23 | 
24 | 
25 | class GlastopfFiles(BaseNormalizer):
26 |     channels = ('glastopf.files',)
27 | 
28 |     def normalize(self, data, channel, submission_timestamp):
29 |         md5, data = data.split(' ', 1)
30 |         decoded = base64.b64decode(data)
31 |         hashes = super(GlastopfFiles, self).generate_checksum_list(decoded)
32 | 
33 |         file_ = {
34 |             'encoding': 'hex',
35 |             'content_guess': magic.from_buffer(decoded),
36 |             'data': decoded.encode('hex'),
37 |             'hashes': hashes
38 |         }
39 | 
40 |         relations = {'file': file_}
41 | 
42 |         return [relations, ]
43 | 


--------------------------------------------------------------------------------
/normalizer/modules/kippo_events.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import json
19 | from normalizer.modules.basenormalizer import BaseNormalizer
20 | 
21 | 
22 | class KippoEvents(BaseNormalizer):
23 |     channels = ('kippo.sessions',)
24 | 
25 |     def normalize(self, data, channel, submission_timestamp):
26 |         o_data = json.loads(data)
27 | 
28 |         if self.is_RFC1918_addr(o_data['peerIP']):
29 |             return []
30 | 
31 |         session = {
32 |             'timestamp': submission_timestamp,
33 |             'source_ip': o_data['peerIP'],
34 |             'source_port': o_data['peerPort'],
35 |             'destination_port': o_data['hostPort'],
36 |             'honeypot': 'kippo',
37 |             'protocol': 'ssh',
38 |             'session_ssh': {'version': o_data['version']}
39 |         }
40 | 
41 |         if 'ttylog' in o_data and o_data['ttylog'] is not None:
42 |             attachments = [
43 |                 {
44 |                     'description': 'Kippo session log (ttylog).',
45 |                     'data': o_data['ttylog']
46 |                 }, ]
47 | 
48 |             session['attachments'] = attachments
49 | 
50 |         if len(o_data['credentials']) > 0:
51 |             auth_attempts = []
52 |             for cred in o_data['credentials']:
53 |                 auth_attempts.append({'login': cred[0],
54 |                                       'password': cred[1]})
55 |             session['auth_attempts'] = auth_attempts
56 | 
57 |         relations = [{'session': session}, ]
58 | 
59 |         return relations
60 | 


--------------------------------------------------------------------------------
/normalizer/modules/thug_events.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import xml.etree.ElementTree as ET
19 | import re
20 | from datetime import datetime
21 | 
22 | from normalizer.modules.basenormalizer import BaseNormalizer
23 | 
24 | 
25 | class ThugEvents(BaseNormalizer):
26 |     channels = ('thug.events',)
27 | 
28 |     def normalize(self, data, channel, submission_timestamp):
29 |         #split up original payload, so that there are only one root element
30 |         data = '<THUG_DATA>' + data + '</THUG_DATA>'
31 | 
32 |         fake_root = ET.fromstring(escape_xml_illegal_chars(data))
33 | 
34 |         return_list = []
35 | 
36 |         #TODO: Register namespace with ElementTree?
37 |         for root in fake_root.findall('{http://maec.mitre.org/XMLSchema/maec-core-1}MAEC_Bundle'):
38 |             analysis = root.findall('./{http://maec.mitre.org/XMLSchema/maec-core-1}Analyses' +
39 |                                     '/{http://maec.mitre.org/XMLSchema/maec-core-1}Analysis')
40 |             for a in analysis:
41 |                 timestamp = datetime.strptime(
42 |                     a.attrib['start_datetime'], '%Y-%m-%d %H:%M:%S.%f')
43 | 
44 |                 data = {}
45 |                 object_element = a.find(
46 |                     '{http://maec.mitre.org/XMLSchema/maec-core-1}Subject/{http://maec.mitre.org/XMLSchema/maec-core-1}Object')
47 | 
48 |                 data['url'] = object_element.find(
49 |                     './{http://maec.mitre.org/XMLSchema/maec-core-1}Internet_Object_Attributes/{http://maec.mitre.org/XMLSchema/maec-core-1}URI').text
50 | 
51 |                 code_snippets = object_element.findall(
52 |                     './{http://maec.mitre.org/XMLSchema/maec-core-1}Associated_Code/{http://maec.mitre.org/XMLSchema/maec-core-1}Associated_Code_Snippet/{http://maec.mitre.org/XMLSchema/maec-core-1}Code_Snippet')
53 |                 for snippet in code_snippets:
54 |                     language = snippet.attrib['language']
55 |                     source = snippet.find('./{http://maec.mitre.org/XMLSchema/maec-core-1}Code_Segment').text
56 | 
57 |                     hashes = super(ThugEvents, self).generate_checksum_list(source)
58 | 
59 |                     file_ = {
60 |                         'encoding': 'hex',
61 |                         'content_guess': language,
62 |                         'data': source.encode('hex'),
63 |                         'hashes': hashes
64 |                     }
65 | 
66 |                     if 'extractions' not in data:
67 |                         data['extractions'] = []
68 |                     data['extractions'].append({'timestamp': timestamp,
69 |                                                 'hashes': hashes})
70 | 
71 |                     return_list.append({'file': file_})
72 |                 return_list.append({'url': data})
73 |         return return_list
74 | 
75 | #Thanks leo!
76 | #http://lsimons.wordpress.com/2011/03/17/stripping-illegal-characters-out-of-xml-in-python/
77 | #_illegal_xml_chars_RE = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]')
78 | _illegal_xml_chars_RE = re.compile(u'[\x0e-\x1F]')
79 | def escape_xml_illegal_chars(val, replacement='SOMETHING_REMOVED_BY_MNEMOSYNE'):
80 |     return _illegal_xml_chars_RE.sub(replacement, val)


--------------------------------------------------------------------------------
/normalizer/modules/thug_files.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import base64
19 | import json
20 | from normalizer.modules.basenormalizer import BaseNormalizer
21 | 
22 | 
23 | class ThugFiles(BaseNormalizer):
24 |     channels = ('thug.files',)
25 | 
26 |     def normalize(self, data, channel, submission_timestamp):
27 |         data = json.loads(data)
28 |         decoded = base64.b64decode(data['data'])
29 |         hashes = super(ThugFiles, self).generate_checksum_list(decoded)
30 | 
31 |         file_ = {
32 |             'encoding': 'hex',
33 |             'content_guess': data['type'],
34 |             'data': decoded.encode('hex'),
35 |             'hashes': hashes
36 |         }
37 | 
38 |         #TODO: Fix URL parsing
39 |         url_parts = super(ThugFiles, self).make_url(data['url'])
40 |         url = {
41 |             'url': 'http://' + url_parts['netloc'] + url_parts['path'],
42 |             'extractions': [
43 |                 {
44 |                     'hashes': hashes,
45 |                     'timestamp': submission_timestamp
46 |                 }
47 |             ]
48 | 
49 |         }
50 | 
51 |         relations = {'file': file_,
52 |                      'url': url}
53 | 
54 |         return [relations]
55 | 


--------------------------------------------------------------------------------
/normalizer/normalizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | from datetime import datetime
 19 | 
 20 | from modules import basenormalizer
 21 | from modules import glastopf_events
 22 | from modules import glastopf_files
 23 | from modules import thug_events
 24 | from modules import thug_files
 25 | from modules import kippo_events
 26 | from modules import dionaea_capture
 27 | from modules import dionaea_binary
 28 | from modules import beeswarm_hive
 29 | from modules import conpot_events
 30 | from bson import ObjectId
 31 | 
 32 | import gevent
 33 | from gevent.pool import Pool
 34 | 
 35 | import logging
 36 | import traceback
 37 | 
 38 | from xml.etree.ElementTree import ParseError
 39 | 
 40 | logger = logging.getLogger(__name__)
 41 | 
 42 | 
 43 | class Normalizer(object):
 44 |     def __init__(self, database):
 45 |         self.normalizers = {}
 46 |         #injected instance of database.Database
 47 |         self.database = database
 48 |         self.enabled = True
 49 | 
 50 |         #max number of concurrent mongodb inserters
 51 |         self.worker_pool = Pool(5)
 52 | 
 53 |         #map normalizers
 54 |         for n in basenormalizer.BaseNormalizer.__subclasses__():
 55 |             normalizer = n()
 56 |             for channel in normalizer.channels:
 57 |                 if channel in self.normalizers:
 58 |                     raise Exception('Only one normalizer for each channel allowed (%s).'.format(channel))
 59 |                 else:
 60 |                     self.normalizers[channel] = normalizer
 61 | 
 62 |     def start_processing(self, fetch_count=1500):
 63 | 
 64 |         oldest_id = ObjectId("ffffffffffffffffffffffff")
 65 |         while self.enabled:
 66 | 
 67 |             normalizations = 0
 68 |             error_list = []
 69 |             no_normalizers_warnings = []
 70 |             to_be_processed = self.database.get_hpfeed_data(oldest_id, fetch_count)
 71 |             to_be_inserted = []
 72 | 
 73 |             for hpfeed_item in to_be_processed:
 74 |                 try:
 75 |                     channel = hpfeed_item['channel']
 76 |                     if hpfeed_item['_id'] < oldest_id:
 77 |                         oldest_id = hpfeed_item['_id']
 78 |                     if channel in self.normalizers:
 79 |                         norm = self.normalizers[channel].normalize(hpfeed_item['payload'],
 80 |                                                                    channel, hpfeed_item['timestamp'])
 81 | 
 82 |                         #batch up normalized items
 83 |                         to_be_inserted.append((norm, hpfeed_item['_id']))
 84 |                         normalizations += 1
 85 |                     elif channel not in no_normalizers_warnings:
 86 |                         error_list.append({'_id': hpfeed_item['_id'],
 87 |                                            'last_error': "No normalizer found",
 88 |                                            'last_error_timestamp': datetime.now()})
 89 |                         logger.warning('No normalizer could be found for channel: {0}.'.format(channel))
 90 |                         no_normalizers_warnings.append(channel)
 91 |                 except Exception as err:
 92 |                     error_list.append({'_id': hpfeed_item['_id'],
 93 |                                        'last_error': err,
 94 |                                        'last_error_timestamp': datetime.now()})
 95 |                     logger.warning('Failed to normalize and import item with hpfeed id = {0}, channel = {1}. ({2}). '
 96 |                                     'Exception details has been stored in the database.'
 97 |                                     .format(hpfeed_item['_id'], hpfeed_item['channel'], err))
 98 | 
 99 |             if len(error_list) > 0:
100 |                 self.database.hpfeed_set_errors(error_list)
101 | 
102 |             if len(to_be_inserted):
103 |                 self.worker_pool.spawn(self.inserter, to_be_inserted)
104 | 
105 |             if normalizations is 0:
106 |                 oldest_id = ObjectId("ffffffffffffffffffffffff")
107 |                 gevent.sleep(3)
108 | 
109 |         gevent.joinall(self.worker_pool)
110 |         logger.info("Normalizer stopped.")
111 | 
112 |     def inserter(self, to_be_inserted):
113 |         for norm, id in to_be_inserted:
114 |             self.database.insert_normalized(norm, id)
115 | 
116 |     def stop(self):
117 |         self.enabled = False
118 | 


--------------------------------------------------------------------------------
/persistance/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/persistance/__init__.py


--------------------------------------------------------------------------------
/persistance/preagg_reports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | import logging
18 | from datetime import datetime
19 | 
20 | from pymongo import MongoClient
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | class ReportGenerator:
26 |     """
27 |     Generates pre-aggregated reports.
28 |     """
29 | 
30 |     def __init__(self, database_name):
31 |         logger.info('Connecting to mongodb, using "{0}" as database.'.format(database_name))
32 |         conn = MongoClient(w=0)
33 |         self.db = conn[database_name]
34 | 
35 |     def hpfeeds(self, entry):
36 |         hour = entry['timestamp'].hour
37 |         date = entry['timestamp'].strftime('%Y%m%d')
38 |         query = {'channel': entry['channel'], 'date': date}
39 | 
40 |         update = {'$inc': {'hourly.{0}'.format(hour): 1}}
41 |         self.db.daily_stats.update(query, update, upsert=True)
42 | 
43 |         #update total document
44 |         channel = entry['channel'].replace('.', '_')
45 |         self.db.daily_stats.update({'_id': 'total'},
46 |                                    {'$inc': {channel: 1}}, upsert=True)
47 | 
48 |     def do_legacy_hpfeeds(self):
49 |         max_objectid = self.db.hpfeed.find({}, fields={'_id': 1}).sort('_id', -1).limit(1)[0]['_id']
50 |         logger.info('Doing pre-aggregation of historic hpfeeds data.')
51 |         result = self.db.hpfeed.find({'_id': {'$lte': max_objectid}}, fields=['channel', 'timestamp'])
52 |         items = 0
53 |         for item in result:
54 |             self.hpfeeds(item)
55 |             items += 1
56 |         logger.info('Finished pre-aggregation of historic hpfeeds data. ({0} items.)'.format(items))
57 | 
58 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pymongo
 2 | gevent
 3 | bottle-mongodb
 4 | bottle-cork==0.6
 5 | git+https://github.com/defnull/bottle/
 6 | beaker
 7 | webtest
 8 | pycrypto
 9 | python-magic
10 | nose
11 | kumo
12 | git+https://github.com/rep/hpfeeds/
13 | sphinx
14 | sphinxcontrib-httpdomain


--------------------------------------------------------------------------------
/runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import gevent
 19 | import gevent.monkey
 20 | 
 21 | gevent.monkey.patch_all()
 22 | 
 23 | import os
 24 | import argparse
 25 | import logging
 26 | import sys
 27 | 
 28 | from ConfigParser import ConfigParser
 29 | from normalizer.normalizer import Normalizer
 30 | from persistance import mnemodb
 31 | from webapi import mnemowebapi
 32 | from feedpuller import feedpuller
 33 | 
 34 | 
 35 | logger = logging.getLogger()
 36 | 
 37 | 
 38 | def parse_config(config_file):
 39 |     if not os.path.isfile(config_file):
 40 |         sys.exit("Could not find configuration file: {0}".format(config_file))
 41 | 
 42 |     parser = ConfigParser()
 43 |     parser.read(config_file)
 44 | 
 45 |     log_file = None
 46 |     loggly_token = None
 47 | 
 48 |     if parser.getboolean('file_log', 'enabled'):
 49 |         log_file = parser.get('file_log', 'file')
 50 | 
 51 |     do_logging(log_file, loggly_token)
 52 | 
 53 |     config = {}
 54 | 
 55 |     if parser.getboolean('loggly_log', 'enabled'):
 56 |         config['loggly_token'] = parser.get('loggly_log', 'token')
 57 | 
 58 |     config['mongo_db'] = parser.get('mongodb', 'database')
 59 | 
 60 |     config['hpf_feeds'] = parser.get('hpfriends', 'channels').split(',')
 61 |     config['hpf_ident'] = parser.get('hpfriends', 'ident')
 62 |     config['hpf_secret'] = parser.get('hpfriends', 'secret')
 63 |     config['hpf_port'] = parser.getint('hpfriends', 'port')
 64 |     config['hpf_host'] = parser.get('hpfriends', 'host')
 65 | 
 66 |     config['webapi_port'] = parser.getint('webapi', 'port')
 67 |     config['webapi_host'] = parser.get('webapi', 'host')
 68 | 
 69 |     return config
 70 | 
 71 | 
 72 | def do_logging(file_log=None, loggly_token=None):
 73 |     logger.setLevel(logging.DEBUG)
 74 | 
 75 |     formatter = logging.Formatter('%(asctime)-15s (%(name)s) %(message)s')
 76 | 
 77 |     if file_log:
 78 |         file_log = logging.FileHandler(file_log)
 79 |         file_log.setLevel(logging.DEBUG)
 80 |         file_log.setFormatter(formatter)
 81 |         logger.addHandler(file_log)
 82 | 
 83 |     console_log = logging.StreamHandler()
 84 |     console_log.setLevel(logging.DEBUG)
 85 |     console_log.setFormatter(formatter)
 86 |     logger.addHandler(console_log)
 87 | 
 88 | 
 89 | if __name__ == '__main__':
 90 |     parser = argparse.ArgumentParser(description='Mnemosyne')
 91 |     parser.add_argument('--config', dest='config_file', default='mnemosyne.cfg')
 92 |     parser.add_argument('--reset', action='store_true', default=False)
 93 |     parser.add_argument('--stats', action='store_true', default=False)
 94 |     parser.add_argument('--webpath', default='webapi/views')
 95 |     parser.add_argument('--no_normalizer', action='store_true', default=False,
 96 |                         help='Do not start the normalizer')
 97 |     parser.add_argument('--no_feedpuller', action='store_true', default=False,
 98 |                         help='Do not start the broker which takes care of storing hpfeed data.')
 99 |     parser.add_argument('--no_webapi', action='store_true', default=False,
100 |                         help='Do not enable the webapi.')
101 | 
102 |     args = parser.parse_args()
103 |     c = parse_config(args.config_file)
104 | 
105 |     git_ref = "Unknown"
106 |     if os.path.isfile('.git/refs/heads/master'):
107 |         with open('.git/refs/heads/master', 'r') as f:
108 |             git_ref = f.readline().rstrip()
109 | 
110 |     logger.info('Starting mnemosyne. (Git: {0})'.format(git_ref))
111 | 
112 |     greenlets = {}
113 | 
114 |     db = mnemodb.MnemoDB(c['mongo_db'])
115 | 
116 |     webapi = None
117 |     hpfriends_puller = None
118 |     normalizer = None
119 | 
120 |     if args.reset:
121 |         print 'Renormalization (reset) of a large database can take several days.'
122 |         answer = raw_input('Write YES if you want to continue: ')
123 |         if answer == 'YES':
124 |             db.reset_normalized()
125 |         else:
126 |             print 'Aborting'
127 |             sys.exit(0)
128 | 
129 |     if not args.no_feedpuller:
130 |         logger.info("Spawning hpfriends feed puller.")
131 |         hpfriends_puller = feedpuller.FeedPuller(db, c['hpf_ident'], c['hpf_secret'], c['hpf_port'], c['hpf_host'], c['hpf_feeds'])
132 |         greenlets['hpfriends-puller'] = gevent.spawn(hpfriends_puller.start_listening)
133 | 
134 |     if not args.no_webapi:
135 |         logger.info("Spawning web api.")
136 |         #start web api and inject mongo info
137 |         if 'loggly_token' in c:
138 |             loggly_token = c['loggly_token']
139 |         else:
140 |             loggly_token = None
141 |         webapi = mnemowebapi.MnemoWebAPI(c['mongo_db'], static_file_path=args.webpath, loggly_token=loggly_token)
142 |         greenlets['webapi'] = gevent.spawn(webapi.start_listening, c['webapi_host'], c['webapi_port'])
143 | 
144 | 
145 |     if not args.no_normalizer:
146 |         #start menmo and inject persistence module
147 |         normalizer = Normalizer(db)
148 |         logger.info("Spawning normalizer")
149 |         greenlets['normalizer'] = gevent.spawn(normalizer.start_processing)
150 | 
151 |     try:
152 | 
153 |         if args.stats:
154 |             while True:
155 |                 counts = db.collection_count()
156 |                 log_string = 'Mongo collection count:'
157 |                 for key, value in counts.items():
158 |                     if key == 'hpfeed':
159 |                         value = '{0} ({1} in error state)'.format(value, db.get_hpfeed_error_count())
160 |                     log_string += ' {0}: {1}, '.format(key, value)
161 |                 logging.info(log_string)
162 |                 gevent.sleep(1800)
163 | 
164 |         gevent.joinall(greenlets.values())
165 |     except KeyboardInterrupt as err:
166 |         if hpfriends_puller:
167 |             logger.info('Stopping HPFriends puller')
168 |             hpfriends_puller.stop()
169 |         if normalizer:
170 |             logger.info('Stopping Normalizer')
171 |             normalizer.stop()
172 |         if 'webapi' in greenlets:
173 |             greenlets['webapi'].kill(block=False)
174 | 
175 |     #wait for greenlets to do a graceful stop
176 |     gevent.joinall(greenlets.values())
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/test/__init__.py


--------------------------------------------------------------------------------
/test/test_normalizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/test/test_normalizers/__init__.py


--------------------------------------------------------------------------------
/test/test_normalizers/base_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import unittest
19 | 
20 | from normalizer.modules import basenormalizer
21 | 
22 | 
23 | class BaseNormalizerTest(unittest.TestCase):
24 |     def test_RFC1918_filter(self):
25 |         """
26 |         Tests if RFC1918 (private networks) addresses are recognized
27 |         """
28 | 
29 |         sut = basenormalizer.BaseNormalizer()
30 |         #test if RFC1918 addresses returns true
31 |         self.assertTrue(sut.is_RFC1918_addr('192.168.4.4'))
32 |         self.assertTrue(sut.is_RFC1918_addr('10.1.2.3'))
33 |         self.assertTrue(sut.is_RFC1918_addr('172.16.5.5'))
34 | 
35 |         #test if non-RFC1918 addresses returns false
36 |         self.assertFalse(sut.is_RFC1918_addr('4.4.4.4'))
37 |         self.assertFalse(sut.is_RFC1918_addr('8.8.8.8'))
38 |         self.assertFalse(sut.is_RFC1918_addr('212.111.1.2'))
39 | 


--------------------------------------------------------------------------------
/test/test_normalizers/beeswarm_hive_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import unittest
19 | from normalizer.modules import beeswarm_hive
20 | from datetime import datetime
21 | 
22 | 
23 | class BeeswarnHiveTest(unittest.TestCase):
24 |     def test_session(self):
25 |         """
26 |         Test if a valid beeswarm json message get parsed as expected.
27 |         """
28 | 
29 |         input_submission_time = datetime(2012, 12, 14, 12, 22, 51)
30 |         input_string = "{\"honey_ip\": \"111.222.222.111\", \"attacker_ip\": \"123.123.123.123\", \"login_attempts\": [{\"username\": \"james\", \"timestamp\": \"2013-02-20T15:02:25.228523\", \"password\": \"bond\"}, {\"username\": \"a\", \"timestamp\": \"2013-02-20T15:02:27.467429\", \"password\": \"s\"}, {\"username\": \"wokka\", \"timestamp\": \"2013-02-20T15:02:27.804439\", \"password\": \"wokka\"}], \"honey_port\": 23, \"timestamp\": \"2013-02-20T15:02:23.432581\", \"attacker_source_port\": 56982, \"id\": \"f2deccc8-0395-488c-87a1-b40850f8aa78\", \"protocol\": \"telnet\"}"
31 | 
32 | 
33 |         auth_attempts = [{'login': 'james',
34 |                           'password': 'bond'},
35 |                          {'login': 'a',
36 |                           'password': 's'},
37 |                          {'login': 'wokka',
38 |                           'password': 'wokka'}]
39 | 
40 |         session = {
41 |             'timestamp': datetime(2012, 12, 14, 12, 22, 51),
42 |             'source_ip': '123.123.123.123',
43 |             'source_port': 56982,
44 |             'destination_port': 23,
45 |             'destination_ip': '111.222.222.111',
46 |             'honeypot': 'beeswarm.hive',
47 |             'protocol': 'telnet',
48 |             'auth_attempts': auth_attempts,
49 |         }
50 | 
51 |         expected_output = [{'session': session}, ]
52 | 
53 |         sut = beeswarm_hive.BeeswarmHive()
54 |         result = sut.normalize(input_string, 'beeswarm.hive', input_submission_time)
55 | 
56 |         #test if we got the correct amount of root (document) items
57 |         self.assertEqual(len(expected_output), len(result))
58 |         self.assertItemsEqual(expected_output[0]['session'], result[0]['session'])
59 |         self.assertItemsEqual(expected_output[0]['session']['auth_attempts'], result[0]['session']['auth_attempts'])
60 | 


--------------------------------------------------------------------------------
/test/test_normalizers/conpot_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import unittest
19 | from datetime import datetime
20 | 
21 | from normalizer.modules import conpot_events
22 | 
23 | 
24 | class ConpotTest(unittest.TestCase):
25 |     def test_session(self):
26 |         """
27 |         Test if a valid conpot json message get parsed as expected.
28 |         """
29 | 
30 |         input_submission_time = datetime(2012, 12, 14, 12, 22, 51)
31 |         input_string = "{\"data_type\":  \"modbus\", \"data\": [{\"request_pdu\": \"0300000002\", \"function_code\": 3, \"slave_id\": 1," \
32 |                        "\"response_pdu\": \"030400000001\"}, {\"request_pdu\": \"0100000002\", \"function_code\": 1, " \
33 |                        "\"slave_id\": 5, \"response_pdu\": \"010100\"}], \"remote\": [\"12.34.43.21\", 45358], " \
34 |                        "\"session_id\": \"069db6a6-5faa-4f3a-8de0-ce90af0e7b2c\"}"
35 | 
36 |         expected_session = {
37 |             'timestamp': datetime(2012, 12, 14, 12, 22, 51),
38 |             'source_ip': '12.34.43.21',
39 |             'source_port': 45358,
40 |             'destination_port': 502,
41 |             'honeypot': 'conpot',
42 |             'protocol': 'modbus',
43 |             'session_modbus':{ 'pdus':
44 |                 [
45 |                     {'request_pdu': '0300000002', 'function_code': 3, 'slave_id': 1, 'response_pdu': '030400000001'},
46 |                     {'request_pdu': '0100000002', 'function_code': 1, 'slave_id': 5, 'response_pdu': '010100'}
47 |                 ]
48 |             }
49 |         }
50 | 
51 |         expected_output = [{'session': expected_session},]
52 | 
53 |         sut = conpot_events.Conpot()
54 |         result = sut.normalize(input_string, 'beeswarm.hive', input_submission_time)
55 | 
56 |         #test if we got the correct amount of root (document) items
57 |         self.assertEqual(len(expected_output), len(result))
58 |         #test if we got the correct amount of keys
59 |         self.assertEqual(len(expected_output[0]), len(result[0]))
60 |         self.assertEqual(expected_output[0]['session']['source_ip'], result[0]['session']['source_ip'])
61 |         self.assertEqual(expected_output[0]['session']['source_port'], result[0]['session']['source_port'])
62 |         self.assertEqual(expected_output[0]['session']['destination_port'], result[0]['session']['destination_port'])
63 |         self.assertEqual(expected_output[0]['session']['honeypot'], result[0]['session']['honeypot'])
64 |         self.assertEqual(expected_output[0]['session']['protocol'], result[0]['session']['protocol'])
65 |         self.assertItemsEqual(expected_output[0]['session']['session_modbus']['pdus'],
66 |                               result[0]['session']['session_modbus']['pdus'])


--------------------------------------------------------------------------------
/test/test_normalizers/data_samples/glastopf_files_sample1.txt:
--------------------------------------------------------------------------------
1 | 755c4f9270db48f51f601638d2c4b4b0 R0lGODlhAT8BPz8/P8O/w7/DvyHDuQQBPz8/Pyw/Pz8/AT8BPz8CAkQBPzs/PD9waHAKQGVycm9yX3JlcG9ydGluZygwKTsgQHNldF90aW1lX2xpbWl0KDApOyAkbG9sID0gJF9HRVRbJ2xvbCddOyAkb3NjID0gJF9HRVRbJ29zYyddOwppZiAoaXNzZXQoJGxvbCkpIHsgZXZhbChnemluZmxhdGUoYmFzZTY0X2RlY29kZSgncFpKZGE4SXdGSWJ2Qi9zUE1RaE5RTVI5WE0wNUN2c2JnMURURTV2UkppRW5uUmJ4dnk5SnJlNUM4R0ozNWYxNDNrTW95TVlTK3JOeW4vNWwvNzcxSDNUOStBQlp4QUhmNk5JMVR2U202b0R4SlowQ2M5blZHNXBqeG01WDlaRGEyUUNFWGErVERRZVdZbnppWGEyb3FON0lvSzBoT2FXQUgyUFhBNUlOS1lyb2EwWFlERG9YaHRGT3ZsWnNxZ2s0YUF6SUNqaUFMTEpicHM4Y1hpUlFtajBEdjYwMmpINFplakZPOGFRVzRSWVFHMmhiY2NXZUdlVlZIdys2UXhrd1FIYyt6RzRGaHNvSGxrcmxhRjBnRXorR2RoQ0V0Q2FBaVlpY2pTS1lXc2dXS3NQdVRMb0tNVFMrdnprNm1mK2VMVFdLV0xXOWw4RG1LaUdjZFdER2g2ZWU4cit2UnRNdnNXOTBDMnhXS3JBcVZqZ25SNUw5WlN3ckQxVWQxY1hUNnZtVnI4a3BIU3RiaTRtZXA2UGlJZlRlNUZKU2ZnRT0nKSkpOyBkaWU7IH0KZWxzZWlmIChpc3NldCgkb3NjKSkgeyBldmFsKGd6aW5mbGF0ZShiYXNlNjRfZGVjb2RlKCdwWkhOYXNNd0VJVHZoYjZEWWd5V0laUzJsRjVDd0E5U0VJNDhpbFVjeVdobG1oRHk3bDNKK2VraGtFTlBFak03M3c1U3FYZmRldE1TUGo5VUIrMDd5TktUcmxmUFR5VUkyOG1tQWV4bHlXZFNvWHN2YmhZclpuSTZXdTlFbmpLb2o1d05JTEVXVmNXK05VSXVzQnZqWWJhVGI0Mjh4QlQybGlMSkNudm9LcnROdXViaFpRTGxNalB3MjFzbml5OVhYSTBUVnhvSTk0RFVZeGpVRFh0bU5EZDlMdlNBY3FDSTNibVkzeWlLYllneWhaclp1a0l1ZkI3YUlpcnRYWVJqUko1bEVhNVRla0RyNUlPVlkwc1UrekRkWFhveC83MjJzYVE0NnFlZytkTk5Rb3graEpzZnZnaEYvZmZWaW9MRFA3MGRJQmVOZ1RjY3FXdHhGTmwvNGJBSmFEdFdsMit2N3gvMVNweFNXVDE0U3ZTOG1wV0FPQVdYUTBuNUJRPT0nKSkpOyB9CmVsc2UgeyBldmFsKGd6aW5mbGF0ZShiYXNlNjRfZGVjb2RlKCdwVk5kaTlzd0VIdy91UCt3RVFiRmtDWnB5MEc1eEtHaEpFZHBvQVgzbmtJd2lpM1hBdHNTbGx3bmQrUy9WN0xzT0w0MDlLRis4Y2ZPakdkblYwN0ljMFZ6QlI1SVZUQWhVeUlUS29kTzhPTzcvM09MbXpMZXVUTnd3cGlsVkNQUFJmT09kN3BTdnFtVVRlWCtqb1lKQnp6ZkwrYjdZb0hISWhGQm1aT01EdDB4TnAvbWsvMENkN2lZRnhtUVVER2Vld2hCUmxYQ0l3OEpMaFVDbW9mcUtLaUhzakpWVEpCQ1RReitYVVFVUVdCVVBVUXFCQ3lxNzVlNmloNFVLU2l4cUxacHFZNnA1bFFzVXNuanc2Y0hjWmdsbFAxSzFPT0g2VlFjdE1MWWFiRGE3YVFWc2IxMDRpd1hwUUpyeldCYUwzVStDQ1I3MFMvdnB3aCtrN1RVanptdFRNV0VnYTUxTERjSTlZK1VabHRaV2U0enBteHJRU25TczlZWEM3dGx4enF3a3BkdVBrN1I0cWJ2OG45OGEzT2NSUC8wL1d4aGV2NW1oTFVhaTg5cjEzU3YxKzcxL2c3MDVTUWtqK29WaTdtZytkRHU0Z2h3aGQyWmhSaTZSYlVrK3hXR2NWVXdSZHZxQ05xWnV1QjVIcXlYRzMvbHdpdlUzU0M5cWpiVGR0K2Zsay9MalZsVE0zVTBnMU1uVGxOSmJ4UDk1Mi8reW9mQllIREpoamhNdVR5N2FkMmZlbUs0RTF0ZmViRGJaM3ljK3FIWjZMdlFkTzF6eU1WUkk5WmZOeXQvaSsyeDNHS1ZpY0RNQys5R3plRjFld25ZNmJUbitycVJmaFJ2WStpemErOS9KNS8rQUE9PScpKSk7IH0KZXZhbChnemluZmxhdGUoYmFzZTY0X2RlY29kZSgnalZIWmJzSXdFSHl2MUgrd0xDU0Qyb1IzU2prcTBvSlVqb1pRcVNySU1zbVdHT0lZT2U3QjMzZE5FSWY2MGlmYk83TXp1K04yaXpUYjExY1ZLeFh3SW9Vc0kvZUVVajhSRnFvMHFhdjZHL0ZJdnlFYkJhMzVsTjRoVjI2NUFhVXRJTFhDcDBINEdvVHZOQXlHNHlqZzNWNHZwQXRIK3pCYWxaS3hUcERMMWlMZThMVkloZlhpVk9RcjhFU2laTzZ0dFZhWnVQbldabXVnNkRCL0JUYlZoVjN1Y3FHZ2VyUmc1Y2xIM1dIQUZqV2ZNV2RqTlFjbHBKdWJiYlNTUDNxWmRsYXU0c2RhN1NrRm1DOHcvTURDN2Y1cFFBbnVUdjBUM28raUNlK1BweEZiSExMSVpMNkpqVk1kU2ZLZ0MvS01CVExhQ2RLNDZMeFF4dDR6S0F4ZVpzRTA0ck53NENDMEhFeklHK1pEbmo1eklYTlVPb3ZjSXhGK0ZaWk9QN1lmSkFXUndINlFSOHdkOGN2NDUyYWVoN0ROZHA3VmYwQW4wSEhwVkk5cDNwTHoxUEJWTG9xWDBxaUdMZTFXcy8wTCcpKSk7Cj8+Cg==


--------------------------------------------------------------------------------
/test/test_normalizers/data_samples/thug_events_sample1.xml:
--------------------------------------------------------------------------------
  1 | <MAEC_Bundle xmlns:ns1="http://xml/metadataSharing.xsd" xmlns="http://maec.mitre.org/XMLSchema/maec-core-1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maec.mitre.org/XMLSchema/maec-core-1 file:MAEC_v1.1.xsd" id="maec:thug:bnd:1" schema_version="1.100000">
  2 |     <Analyses>
  3 |         <Analysis start_datetime="2012-12-23 20:16:24.668999" id="maec:thug:ana:2" analysis_method="Dynamic">
  4 |             <Subject>
  5 |                 <Object object_name="http://xxx.yyy.zzz/CBYCSBJHYZ.php?php=receipt" type="URI" id="maec:thug:obj:3">
  6 |                     <Internet_Object_Attributes>
  7 |                         <URI>http://xxx.yyy.zzz/CBYCSBJHYZ.php?php=receipt</URI>
  8 |                     </Internet_Object_Attributes>
  9 |                 </Object>
 10 |             </Subject>
 11 |             <Tools_Used>
 12 |                 <Tool id="maec:thug:tol:1">
 13 |                     <Name>Thug</Name>
 14 |                     <Version>0.4.15</Version>
 15 |                     <Organization>The Honeynet Project</Organization>
 16 |                 </Tool>
 17 |             </Tools_Used>
 18 |         </Analysis>
 19 |     </Analyses>
 20 |     <Behaviors>
 21 |         <Behavior id="maec:thug:bhv:4">
 22 |             <Description>
 23 |                 <Text>[HTTP] URL: http://xxx.yyy.zzz/CBYCSBJHYZ.php?php=receipt (Status: 404, Referrer: None)</Text>
 24 |             </Description>
 25 |             <Discovery_Method tool_id="maec:thug:tol:5" method="Dynamic Analysis"/>
 26 |         </Behavior>
 27 |         <Behavior id="maec:thug:bhv:6">
 28 |             <Description>
 29 |                 <Text>[File Not Found] URL: http://xxx.yyy.zzz/CBYCSBJHYZ.php?php=receipt</Text>
 30 |             </Description>
 31 |             <Discovery_Method tool_id="maec:thug:tol:7" method="Dynamic Analysis"/>
 32 |         </Behavior>
 33 |     </Behaviors>
 34 |     <Pools/>
 35 | </MAEC_Bundle>
 36 | 
 37 | <MAEC_Bundle xmlns:ns1="http://xml/metadataSharing.xsd" xmlns="http://maec.mitre.org/XMLSchema/maec-core-1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maec.mitre.org/XMLSchema/maec-core-1 file:MAEC_v1.1.xsd" id="maec:thug:bnd:1" schema_version="1.100000">
 38 |     <Analyses>
 39 |         <Analysis start_datetime="2012-12-23 20:16:40.637549" id="maec:thug:ana:2" analysis_method="Dynamic">
 40 |             <Subject>
 41 |                 <Object object_name="http://uuu.uu:8080/forum/links/column.php" type="URI" id="maec:thug:obj:3">
 42 |                     <Internet_Object_Attributes>
 43 |                         <URI>http://uuu.uu:8080/forum/links/column.php</URI>
 44 |                     </Internet_Object_Attributes>
 45 |                 </Object>
 46 |             </Subject>
 47 |             <Tools_Used>
 48 |                 <Tool id="maec:thug:tol:1">
 49 |                     <Name>Thug</Name>
 50 |                     <Version>0.4.15</Version>
 51 |                     <Organization>The Honeynet Project</Organization>
 52 |                 </Tool>
 53 |             </Tools_Used>
 54 |         </Analysis>
 55 |     </Analyses>
 56 |     <Behaviors>
 57 |         <Behavior id="maec:thug:bhv:4">
 58 |             <Description>
 59 |                 <Text>[HTTP] URL: http://uuu.uu:8080/forum/links/column.php (Status: 400, Referrer: None)</Text>
 60 |             </Description>
 61 |             <Discovery_Method tool_id="maec:thug:tol:5" method="Dynamic Analysis"/>
 62 |         </Behavior>
 63 |         <Behavior id="maec:thug:bhv:6">
 64 |             <Description>
 65 |                 <Text>[Bad Request] URL: http://uuu.uu:8080/forum/links/column.php</Text>
 66 |             </Description>
 67 |             <Discovery_Method tool_id="maec:thug:tol:7" method="Dynamic Analysis"/>
 68 |         </Behavior>
 69 |     </Behaviors>
 70 |     <Pools/>
 71 | </MAEC_Bundle>
 72 | 
 73 | <MAEC_Bundle xmlns:ns1="http://xml/metadataSharing.xsd" xmlns="http://maec.mitre.org/XMLSchema/maec-core-1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maec.mitre.org/XMLSchema/maec-core-1 file:MAEC_v1.1.xsd" id="maec:thug:bnd:1" schema_version="1.100000">
 74 |     <Analyses>
 75 |         <Analysis start_datetime="2012-12-23 20:16:55.151345" id="maec:thug:ana:2" analysis_method="Dynamic">
 76 |             <Subject>
 77 |                 <Object object_name="http://ppp.aaa.mmm/wfgv.htm?php=receipt" type="URI" id="maec:thug:obj:3">
 78 |                     <Internet_Object_Attributes>
 79 |                         <URI>http://ppp.aaa.mmm/wfgv.htm?php=receipt</URI>
 80 |                     </Internet_Object_Attributes>
 81 |                 </Object>
 82 |             </Subject>
 83 |             <Tools_Used>
 84 |                 <Tool id="maec:thug:tol:1">
 85 |                     <Name>Thug</Name>
 86 |                     <Version>0.4.15</Version>
 87 |                     <Organization>The Honeynet Project</Organization>
 88 |                 </Tool>
 89 |             </Tools_Used>
 90 |         </Analysis>
 91 |     </Analyses>
 92 |     <Behaviors>
 93 |         <Behavior id="maec:thug:bhv:4">
 94 |             <Description>
 95 |                 <Text>[HTTP] URL: http://ppp.aaa.mmm/wfgv.htm?php=receipt (Status: 404, Referrer: None)</Text>
 96 |             </Description>
 97 |             <Discovery_Method tool_id="maec:thug:tol:5" method="Dynamic Analysis"/>
 98 |         </Behavior>
 99 |         <Behavior id="maec:thug:bhv:6">
100 |             <Description>
101 |                 <Text>[File Not Found] URL: http://ppp.aaa.mmm/wfgv.htm?php=receipt</Text>
102 |             </Description>
103 |             <Discovery_Method tool_id="maec:thug:tol:7" method="Dynamic Analysis"/>
104 |         </Behavior>
105 |     </Behaviors>
106 |     <Pools/>
107 | </MAEC_Bundle>


--------------------------------------------------------------------------------
/test/test_normalizers/data_samples/thug_files_sample1.txt:
--------------------------------------------------------------------------------
1 | {"url": "http://xxx.yyy.com/links/came_one_taking-others.php?kvs=31:1j:2v:1g:33&nqckr=3m:3a:33:33&avndsfrh=1j:1n:31:1l:1g:1m:1h:1n:1k:31:1p:1p:1f:1o:1h:1j:1l:1h:2v:2w:1g:1k&bblymzh=zgoco", "data": "Q1dTCqgFAAB4nH1Uy27bRhSd4Ws4kkhJliwp8jON+oirSHRcZOFNK8lxkSKNgQgBsrHDITWy2DKUKg7duqvCm/5G+gH9hgD9AgFB233XRX/BvUPGqBwHJSDOueeeuXN0LzjP0SbCDYQaGB2UTYTQQNkb771ezQPEaOjziG/vIvS6+kKXBDwl1JHLZ0gfzubs3BqHLJ50RkE8C9m5AVwguMHCcOrHem8OCpwYR9433BdK7BtDMQ+iUzqc8DAcTEfcTCIe+2zGb0+EmO13u2w09XjHn77s9oZ73fuO86DrJUEogsjOTuI/CD6PWEizMOIin6FEBGFcvWZnP7PTuk4eZGtmajCNBAsiPt+6LnoUwTHMF8EZz4Rr/1Nk/a21Mx6JeP+hXGSeCX/C5zSJRnwMR4zKD996T4uPmc/1Uy4GTzQf+lUA2IeufTsMfuQy6MkeysC6CgbTJBIViA5kvUAE06h//oS95CvXvHUeT9lI4XN9AOxVR6BPnWdPHz/l3yU8FuWllnX654L3NDkqeexhEIaSiY1EjO/tPrC/ly38OoERSFoFiTZL4okR8uhUTMzZNE6d5FJdutP25Lt3xoKQeSHXQvBjstFoMAnCUWG5V8V3emVda2v5xhBq759dpYZrakOpYx3pyLRqdq1YKzWVZrm50qw0q83VplnrqmVdMzBRTJpT8wXLLpbK5hWxTbBCFI1oOsEG0QjBJtEowTlawLSGaR3TW5jkNug2JngbEvQjTCmmOqYGpp9gehfTHUw/xbSN1zBZcYiyS5T7RNmjmqKi5QdjharyU1LhrYFfxUAEqWYeEhhrGKlIwSYtLpwT3aVMPyHtPCNfwka9RHO/qgtn8Qi5uYvf4fdHC/3p5tuNwwK+8DVAzRTpgNZSZABaTxE5ttqb7a1XhxZ27Z/76MI3gbmdhT8Vd3zqaX3Q5YD9oH1nSZe/wRS8/HEJKreg8mEJtz88Kivvowop9bG78mqJvVHNusHYJ5Xj6ldVxCrw/0p1hKhbeUd0tIp/U994ZPWvy0vP8nRW86yWsvMLq5fgFqNA5Y8a2LPGt7zCuFb/O1NJDBKABZm1PQP22S31v322Z6UZ2Kc3/rm8lAas1MCbcdOzIZcdKwuaO36xfQ/oopfr4btmcSGJTkqYKQFTU2Co5p2Fw5FrOO6a46477objbjqu4rhbz9FG+kwwSD+v5FB2yaY3K/oC0L8lnHCC", "sha1": "3d44fc9b69f221b28b69b8904fa8a65686a7accb", "type": "SWF", "md5": "6478baaaef1fea99d1c4520ec2d30002"}


--------------------------------------------------------------------------------
/test/test_normalizers/dionaea_binary_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import os
19 | import unittest
20 | from normalizer.modules import dionaea_binary
21 | 
22 | 
23 | class DionaeaBinaryTest(unittest.TestCase):
24 | 
25 |     def test_channels(self):
26 |         """
27 |         Test that the channel variable exists.
28 |         """
29 |         self.assertTrue(dionaea_binary.DionaeaBinary.channels)
30 | 
31 |     def test_event(self):
32 |         """
33 |         Test extraction of binary from dionaea and generation of correct hashes.
34 |         """
35 | 
36 |         input_string = open(os.path.dirname(__file__) + '/data_samples/dionaea_mwbinary_sample1.txt', 'r').read()
37 | 
38 |         expected_file = {'encoding': 'hex',
39 |                          'content_guess': 'PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed',
40 |                          'data': input_string,
41 |                          'hashes': {
42 |                              'md5': '1ba31509af22ad4fbad5b0fd2de50a41',
43 |                              'sha1': 'c5dc7b692801f9531454df8404d3ec19f0ccff94',
44 |                              'sha512': '3b8c3182ae5936b839211df46a417ab9866eef58e5c363ad090ce7fdc4d83d6195b10c682fb4103a825fb53be44db6afcc91bdd61668e52c62e0f473607ba93e'
45 |                          }}
46 |         expected_relation = [{'file': expected_file}]
47 | 
48 |         sut = dionaea_binary.DionaeaBinary()
49 |         actual = sut.normalize(input_string, 'mwbinary.dionaea.sensorunique', None)
50 |         self.assertTrue(len(expected_relation), len(actual))
51 |         self.assertEqual(expected_relation[0]['file'], actual[0]['file'])
52 | 


--------------------------------------------------------------------------------
/test/test_normalizers/dionaea_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import unittest
19 | from normalizer.modules import dionaea_capture
20 | from datetime import datetime
21 | 
22 | 
23 | class DionaeaTests(unittest.TestCase):
24 |     def test_channels(self):
25 |         """
26 |         Test that the channel variable exists.
27 |         """
28 |         self.assertTrue(dionaea_capture.DionaeaCaptures.channels)
29 | 
30 |     def test_valid_message(self):
31 |         """
32 |         Test correct parsing of a valid dionaea event.
33 |         """
34 |         input_time = datetime(2012, 12, 14, 12, 22, 51)
35 |         input_string = '{\"url\": \"http://118.167.12.21:1852/psgmioap\", \"daddr\": \"134.61.128.61\", '\
36 |                        '\"saddr\": \"118.167.12.21\", \"dport\": \"445\", \"sport\": \"3006\", '\
37 |                        '\"sha512\": \"8cbcec5fe75ee97fc3c18bafdd79cdb5d83bfb4190ba5093907d1ee1946'\
38 |                        '32813451b3aebfc8145452afae9ac5e413d2673746317c13b64856f3fcae12a109fd2\", '\
39 |                        '\"md5\": \"0724c68f973e4e35391849cfb5259f86\"}'
40 | 
41 |         attachments = [
42 |             {
43 |                 'description': 'Binary extraction',
44 |                 'hashes':
45 |                     {'md5': '0724c68f973e4e35391849cfb5259f86',
46 |                      'sha512': '8cbcec5fe75ee97fc3c18bafdd79cdb5d83bfb4190ba5093907d1ee194632813451b3aebfc8145452afae9ac5e413d2673746317c13b64856f3fcae12a109fd2'}
47 |             }, ]
48 | 
49 |         expected_session = {
50 |             'timestamp': datetime(2012, 12, 14, 12, 22, 51),
51 |             'source_ip': '118.167.12.21',
52 |             'source_port': 3006,
53 |             'destination_ip': '134.61.128.61',
54 |             'destination_port': 445,
55 |             'honeypot': 'dionaea',
56 |             'protocol': 'microsoft-ds',
57 |             'attachments': attachments
58 |         }
59 | 
60 |         url = {'url': 'http://118.167.12.21:1852/psgmioap',
61 |                'extractions': [{
62 |                                    'timestamp': datetime(2012, 12, 14, 12, 22, 51),
63 |                                    'hashes': {
64 |                                    'sha512': '8cbcec5fe75ee97fc3c18bafdd79cdb5d83bfb4190ba5093907d1ee194632813451b3aebfc8145452afae9ac5e413d2673746317c13b64856f3fcae12a109fd2',
65 |                                    'md5': '0724c68f973e4e35391849cfb5259f86'}}]}
66 | 
67 |         expected_relations = [{'session': expected_session, 'url': url}]
68 | 
69 |         sut = dionaea_capture.DionaeaCaptures()
70 |         actual = sut.normalize(input_string, 'dionaea.capture', input_time)
71 | 
72 |         self.assertItemsEqual(expected_relations[0]['session'],
73 |                               actual[0]['session'])
74 |         #self.assertItemsEqual(expected_relations[0]['url'],
75 |         #                      actual[0]['url'])
76 | 


--------------------------------------------------------------------------------
/test/test_normalizers/glastopf_files_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import unittest
19 | import os
20 | import base64
21 | from normalizer.modules import glastopf_files
22 | 
23 | 
24 | class GlastopfFilesTest(unittest.TestCase):
25 |     def test_channels(self):
26 |         """
27 |         Test that the channel variable exists.
28 |         """
29 |         self.assertTrue(glastopf_files.GlastopfFiles.channels)
30 | 
31 |     def test_event(self):
32 |         """
33 |         Test extraction of binary from dionaea and generation of correct hashes.
34 |         """
35 |         input_string = open(os.path.dirname(__file__) + '/data_samples/glastopf_files_sample1.txt', 'r').read()
36 |         tmp, encoded = input_string.split(' ', 1)
37 | 
38 |         expected_file = {'encoding': 'hex',
39 |                          'content_guess': 'GIF image data, version 89a, 16129 x 16129',
40 |                          'data': base64.b64decode(encoded).encode('hex'),
41 |                          'hashes': {
42 |                              'md5': '755c4f9270db48f51f601638d2c4b4b0',
43 |                              'sha1': '9ed97ccdd683aa8842a5473315e8b45bda168556',
44 |                              'sha512': 'bb1d9c92a7cdc8dbd61365c5d757729a2c8d131fb5f49da3e4a6818635f5e8eb40a2bf06e9a25a069b618d934c53b367f3327a37b65c50e66d60580ee178a135'
45 |                          }}
46 |         expected_relation = [{'file': expected_file}]
47 | 
48 |         sut = glastopf_files.GlastopfFiles()
49 |         actual = sut.normalize(input_string, 'glastopf.files', None)
50 | 
51 |         self.assertTrue(len(expected_relation), len(actual))
52 |         self.assertEqual(expected_relation[0]['file'], actual[0]['file'])
53 | 


--------------------------------------------------------------------------------
/test/test_normalizers/kippo_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import unittest
19 | from normalizer.modules import kippo_events
20 | from datetime import datetime
21 | 
22 | 
23 | class KippoTests(unittest.TestCase):
24 |     def test_session(self):
25 |         """
26 |         Test if a valid kippo json message get parsed as expected.
27 |         """
28 | 
29 |         input_submission_time = datetime(2012, 12, 14, 12, 22, 51)
30 |         input_string = """{\"peerIP\": \"223.5.23.53\", \"loggedin\": null, \"ttylog\": \"01babadeadbeef\", \"hostIP\": \"192.168.6.211\", \"peerPort\": 36868, \"version\": \"SSH-2.0-libssh-0.1\", \"hostPort\": 2222, \"credentials\": [[\"root\", \"123muie123\"]]}"""
31 |         session_ssh = {'version': 'SSH-2.0-libssh-0.1'}
32 | 
33 |         auth_attempts = [{'login': 'root',
34 |                           'password': '123muie123'}]
35 | 
36 |         attachments = [
37 |                 {
38 |                     'description': "Kippo session log (ttylog).",
39 |                     'data': '01babadeadbeef'
40 |                 }, ]
41 | 
42 |         session = {
43 |             'timestamp': datetime(2012, 12, 14, 12, 22, 51),
44 |             'source_ip': '1.2.3.4',
45 |             'source_port': 36228,
46 |             'destination_port': 2222,
47 |             'honeypot': 'kippo',
48 |             'protocol': 'ssh',
49 |             'session_ssh': session_ssh,
50 |             'auth_attempts': auth_attempts,
51 |             'attachments': attachments
52 |         }
53 | 
54 |         expected_output = [{'session': session}, ]
55 | 
56 |         sut = kippo_events.KippoEvents()
57 |         result = sut.normalize(input_string, 'kippo.sessions', input_submission_time)
58 | 
59 |         #test if we got the correct amount of root (document) items
60 |         self.assertEqual(len(expected_output), len(result))
61 | 
62 |         self.assertItemsEqual(expected_output[0]['session'], result[0]['session'])
63 |         self.assertItemsEqual(expected_output[0]['session']['session_ssh'], result[0]['session']['session_ssh'])
64 |         self.assertItemsEqual(expected_output[0]['session']['auth_attempts'], result[0]['session']['auth_attempts'])
65 |         self.assertItemsEqual(expected_output[0]['session']['attachments'], result[0]['session']['attachments'])
66 | 


--------------------------------------------------------------------------------
/test/test_normalizers/thug_files.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import unittest
19 | import os
20 | import base64
21 | from datetime import datetime
22 | from normalizer.modules import thug_files
23 | 
24 | 
25 | class ThugFilesTest(unittest.TestCase):
26 |     def test_channels(self):
27 |         """
28 |         Test that the channel variable exists.
29 |         """
30 |         self.assertTrue(thug_files.ThugFiles.channels)
31 | 
32 |     def test_event(self):
33 |         """
34 |         Test extraction of binary from thug.files
35 |         """
36 |         input_string = open(os.path.dirname(__file__) + '/data_samples/thug_files_sample1.txt', 'r').read()
37 | 
38 |         dt = datetime.now()
39 | 
40 | 
41 |         sut = thug_files.ThugFiles()
42 |         actual = sut.normalize(input_string, 'thug.files', dt)
43 |         #the result must contain exactly one dictionaries
44 |         self.assertEqual(1, len(actual))
45 |         actual_url = actual[0]['url']
46 |         actual_file = actual[0]['file']
47 | 
48 |         #check returned url dictionary
49 |         #{'url': 'http://xxx.yyy.com/links/came_one_taking-others.php',
50 |         # 'extractions': [{'hashes': {
51 |         #     'md5': '6478baaaef1fea99d1c4520ec2d30002',
52 |         #     'sha1': '3d44fc9b69f221b28b69b8904fa8a65686a7accb',
53 |         #     'sha512': '123'},
54 |         #                  'timestamp': dt}]}
55 | 
56 |         self.assertEqual('http://xxx.yyy.com/links/came_one_taking-others.php', actual_url['url'])
57 |         self.assertEquals(1, len(actual_url['extractions']))
58 |         self.assertEquals('6478baaaef1fea99d1c4520ec2d30002', actual_url['extractions'][0]['hashes']['md5'])
59 |         self.assertEquals('3d44fc9b69f221b28b69b8904fa8a65686a7accb', actual_url['extractions'][0]['hashes']['sha1'])
60 |         self.assertEquals('dd3168f82679e9d59ada7f0bfd213f744aca237f5f9e4bae3abcfed998f088a7c79d9d426a5ac0468959915d48a0586576069a51f7fa3ee6fa6affb5f14edd22', actual_url['extractions'][0]['hashes']['sha512'])
61 |         self.assertEquals(dt, actual_url['extractions'][0]['timestamp'])
62 | 
63 |         #check returned file dictionary
64 |         #{'encoding': 'hex',
65 |         #'content_guess': 'SWF',
66 |         #'data': 'xxx',
67 |         #'hashes': {
68 |         #    'md5': '6478baaaef1fea99d1c4520ec2d30002',
69 |         #    'sha1': '3d44fc9b69f221b28b69b8904fa8a65686a7accb',
70 |         #    'sha512': '123'
71 |         #}}
72 | 
73 |         self.assertEqual('hex', actual_file['encoding'])
74 |         self.assertEqual('SWF', actual_file['content_guess'])
75 |         self.assertEquals('6478baaaef1fea99d1c4520ec2d30002', actual_file['hashes']['md5'])
76 |         self.assertEquals('3d44fc9b69f221b28b69b8904fa8a65686a7accb', actual_file['hashes']['sha1'])
77 |         self.assertEquals('dd3168f82679e9d59ada7f0bfd213f744aca237f5f9e4bae3abcfed998f088a7c79d9d426a5ac0468959915d48a0586576069a51f7fa3ee6fa6affb5f14edd22', actual_file['hashes']['sha512'])
78 | 
79 | 


--------------------------------------------------------------------------------
/test/test_normalizers/thug_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import unittest
 19 | import datetime
 20 | import os
 21 | from normalizer.modules import thug_events
 22 | 
 23 | 
 24 | class ThugTests(unittest.TestCase):
 25 | 
 26 |     def test_channels(self):
 27 |         """
 28 |         Test that channel variable exists.
 29 |         """
 30 |         self.assertTrue(thug_events.ThugEvents.channels)
 31 | 
 32 |     def test_event_url_not_found(self):
 33 |         """
 34 |         Test normalization of basic thug event.
 35 |         """
 36 | 
 37 |         input_xml = '''<MAEC_Bundle xmlns:ns1="http://xml/metadataSharing.xsd" xmlns="http://maec.mitre.org/XMLSchema/maec-core-1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maec.mitre.org/XMLSchema/maec-core-1 file:MAEC_v1.1.xsd" id="maec:thug:bnd:1" schema_version="1.100000">
 38 |     <Analyses>
 39 |         <Analysis start_datetime="2012-12-23 20:16:55.151345" id="maec:thug:ana:2" analysis_method="Dynamic">
 40 |             <Subject>
 41 |                 <Object object_name="http://xxx.yyy.zzz/wfgv.htm?php=receipt" type="URI" id="maec:thug:obj:3">
 42 |                     <Internet_Object_Attributes>
 43 |                         <URI>http://xxx.yyy.zzz/wfgv.htm?php=receipt</URI>
 44 |                     </Internet_Object_Attributes>
 45 |                 </Object>
 46 |             </Subject>
 47 |             <Tools_Used>
 48 |                 <Tool id="maec:thug:tol:1">
 49 |                     <Name>Thug</Name>
 50 |                     <Version>0.4.15</Version>
 51 |                     <Organization>The Honeynet Project</Organization>
 52 |                 </Tool>
 53 |             </Tools_Used>
 54 |         </Analysis>
 55 |     </Analyses>
 56 |     <Behaviors>
 57 |         <Behavior id="maec:thug:bhv:4">
 58 |             <Description>
 59 |                 <Text>[HTTP] URL: http://xxx.yyy.zzz/wfgv.htm?php=receipt (Status: 404, Referrer: None)</Text>
 60 |             </Description>
 61 |             <Discovery_Method tool_id="maec:thug:tol:5" method="Dynamic Analysis"/>
 62 |         </Behavior>
 63 |         <Behavior id="maec:thug:bhv:6">
 64 |             <Description>
 65 |                 <Text>[File Not Found] URL: http://xxx.yyy.zzz/wfgv.htm?php=receipt</Text>
 66 |             </Description>
 67 |             <Discovery_Method tool_id="maec:thug:tol:7" method="Dynamic Analysis"/>
 68 |         </Behavior>
 69 |     </Behaviors>
 70 |     <Pools/>
 71 | </MAEC_Bundle>
 72 | '''
 73 |         expected_output = [{'url': {'url': 'http://xxx.yyy.zzz/wfgv.htm?php=receipt'}}]
 74 | 
 75 |         sut = thug_events.ThugEvents()
 76 |         actual = sut.normalize(input_xml, 'thug.events', None)
 77 | 
 78 |         self.assertEqual(len(expected_output), len(actual))
 79 | 
 80 |         self.assertEqual(expected_output, actual)
 81 | 
 82 |     def test_event_multiple_entried(self):
 83 |         """
 84 |         Test that hpfeeds data with multiple root elements get's parsed as expected.
 85 |         """
 86 | 
 87 |         input_xml = open(os.path.dirname(__file__) + '/data_samples/thug_events_sample1.xml', 'r').read()
 88 | 
 89 |         expected_output = [
 90 |             {'url': {'url': 'http://xxx.yyy.zzz/CBYCSBJHYZ.php?php=receipt'}},
 91 |             {'url': {'url': 'http://uuu.uu:8080/forum/links/column.php'}},
 92 |             {'url': {'url': 'http://ppp.aaa.mmm/wfgv.htm?php=receipt'}},
 93 |         ]
 94 | 
 95 |         sut = thug_events.ThugEvents()
 96 |         actual = sut.normalize(input_xml, 'thug.events', None)
 97 |         #print actual
 98 |         self.assertEqual(expected_output, actual)
 99 | 
100 |     def test_event_complex_sample(self):
101 |         """
102 |         Test parsing of a complex sample of thug data.
103 |         """
104 | 
105 |         input_xml = open(os.path.dirname(__file__) + '/data_samples/thug_events_sample2.xml', 'r').read()
106 | 
107 |         file_1 = {
108 |             'encoding': 'hex',
109 |             'content_guess': 'Javascript',
110 |             'data': 'placeholder',
111 |             'hashes': {'sha1': 'c80d2b26eec6a49068fdfda874e68b7aeb7669fa', 'sha512': '5a4ff355ba48b9c11ced27e06d32afa29a11c8ba230fd587288623d53f9927cbf84743884322182649c3c8fd706118541d54dfd7da68a82c606dd9e17c836887', 'md5': '864fff7df4a027049ed855dafc71e94d'}
112 |         }
113 | 
114 |         file_2 = {
115 |             'encoding': 'hex',
116 |             'content_guess': 'Javascript',
117 |             'data': 'placeholder',
118 |             'hashes': {'sha1': '65854a75eac74a727c7714b78f5cd4a9602063ab', 'sha512': '1c0c85f1c33c3da94c8015acbe8f7a54849081af590ad1493037e340e59ad34fd06a38a27ee48e84d9a0710d4a0e5c85fd0510bb9bb09319dec65be53d173af8', 'md5': '58a2ac97c6e16870a758ebc8501ebf7f'}
119 |         }
120 | 
121 |         file_3 = {
122 |             'encoding': 'hex',
123 |             'content_guess': 'Javascript',
124 |             'data': 'placeholder',
125 |             'hashes': {'sha1': '78a4a03e86463a0c624ac5077caeda00321da721', 'sha512': 'f9234c4afe440c3597d9d6c56e34e74fa746579fe15556f2e741625aaaf31ee6f1e63f17dbaffd5dd668b0b0f66e5d4cda0f756ad4ea505afdadc92566a17171', 'md5': '6d5a985d0e9bd02cdd6970c10770da0c'}
126 |         }
127 | 
128 |         url = {'url':
129 |                'http://1212122sss222.tankplay.com/news/guarantee-detain.html',
130 |                'extractions':
131 |                [{
132 |                 'timestamp': datetime.datetime(2012, 12, 23, 22, 8, 19, 467103),
133 |                 'hashes': {
134 |                     'sha1': 'c80d2b26eec6a49068fdfda874e68b7aeb7669fa',
135 |                     'sha512': '5a4ff355ba48b9c11ced27e06d32afa29a11c8ba230fd587288623d53f9927cbf84743884322182649c3c8fd706118541d54dfd7da68a82c606dd9e17c836887',
136 |                     'md5': '864fff7df4a027049ed855dafc71e94d'}
137 |                 }, {
138 |                 'timestamp': datetime.datetime(2012, 12, 23, 22, 8, 19, 467103),
139 |                 'hashes': {
140 |                     'sha1': '65854a75eac74a727c7714b78f5cd4a9602063ab',
141 |                     'sha512': '1c0c85f1c33c3da94c8015acbe8f7a54849081af590ad1493037e340e59ad34fd06a38a27ee48e84d9a0710d4a0e5c85fd0510bb9bb09319dec65be53d173af8',
142 |                     'md5': '58a2ac97c6e16870a758ebc8501ebf7f'}
143 |                 }, {
144 |                 'timestamp': datetime.datetime(2012, 12, 23, 22, 8, 19, 467103),
145 |                 'hashes': {
146 |                     'sha1': '78a4a03e86463a0c624ac5077caeda00321da721',
147 |                     'sha512': 'f9234c4afe440c3597d9d6c56e34e74fa746579fe15556f2e741625aaaf31ee6f1e63f17dbaffd5dd668b0b0f66e5d4cda0f756ad4ea505afdadc92566a17171',
148 |                     'md5': '6d5a985d0e9bd02cdd6970c10770da0c'}
149 |                 }]
150 |                }
151 | 
152 |         expected = [
153 |             {'url': url},
154 |             {'file': file_1},
155 |             {'file': file_2},
156 |             {'file': file_3},
157 |         ]
158 | 
159 |         sut = thug_events.ThugEvents()
160 |         actual = sut.normalize(input_xml, 'thug.events', None)
161 | 
162 |         #do not compare the actual data, hashes are good enough...
163 |         for d in actual:
164 |             for key, value in d.items():
165 |                 if key is 'file':
166 |                     value['data'] = 'placeholder'
167 | 
168 |         self.assertEqual(len(expected), len(actual))
169 | 
170 |         self.assertEqual(sorted(expected), sorted(actual))
171 | 


--------------------------------------------------------------------------------
/test/test_persistance/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/test/test_persistance/__init__.py


--------------------------------------------------------------------------------
/test/test_persistance/mnemodb_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import unittest
 19 | import uuid
 20 | from bson.objectid import ObjectId
 21 | from persistance import mnemodb
 22 | from pymongo import MongoClient
 23 | from datetime import datetime, timedelta
 24 | 
 25 | 
 26 | class MnemodbTests(unittest.TestCase):
 27 |     def setUp(self):
 28 |         """
 29 |         Generate new db name for each test.
 30 |         """
 31 |         self.dbname = str(uuid.uuid4())
 32 | 
 33 |     def tearDown(self):
 34 |         connection = MongoClient('localhost', 27017)
 35 |         connection.drop_database(self.dbname)
 36 |         connection.drop_database(self.dbname)
 37 | 
 38 |     def test_insert_hpfeed(self):
 39 |         """
 40 |         Test correct insertion of raw hpfeed entry into mongodb.
 41 |         """
 42 |         sut = mnemodb.MnemoDB(self.dbname)
 43 | 
 44 |         #ident, channel, payload
 45 |         insert_items = [
 46 |             ('ident1', 'testchannel1', 'payload1'),
 47 |             ('ident2', 'testchannel2', 'payload2'),
 48 |             ('ident3', 'testchannel3', 'payload3')
 49 |         ]
 50 | 
 51 |         for item in insert_items:
 52 |             sut.insert_hpfeed(item[0], item[1], item[2])
 53 | 
 54 |         #Check that the hpfeed collection has the correct amount of items
 55 |         db = MongoClient('localhost', 27017)[self.dbname]
 56 |         actual = list(db.hpfeed.find())
 57 |         #TODO: assert content of actual
 58 |         self.assertEqual(len(insert_items), len(actual))
 59 | 
 60 |     def test_reset(self):
 61 |         """
 62 |         Check if all normalized collections get dropped and that normalized is set to False in
 63 |         all hpfeed entries.
 64 |         """
 65 | 
 66 |         db = MongoClient('localhost', 27017)[self.dbname]
 67 | 
 68 |         #prepare and insert dummy values directly into the hpfeed collection
 69 |         insert_items = [
 70 |             {'channel': 'channel1', 'ident': 'ident1', 'payload': 'payload1', 'timestamp': datetime.utcnow(),
 71 |              'normalized': True},
 72 |             {'channel': 'channel2', 'ident': 'ident2', 'payload': 'payload2', 'timestamp': datetime.utcnow(),
 73 |              'normalized': True},
 74 |             {'channel': 'channel3', 'ident': 'ident3', 'payload': 'payload3', 'timestamp': datetime.utcnow(),
 75 |              'normalized': True, 'last_error': "Some error", 'last_error_timestamp': datetime.now()}
 76 |         ]
 77 | 
 78 |         for item in insert_items:
 79 |             db['hpfeed'].insert(item)
 80 |             #create a few dummy collection that we expect to get dropped
 81 |         db['somecollection1'].insert({'something': 'something'})
 82 |         db['somecollection2'].insert({'something': 'something'})
 83 | 
 84 |         sut = mnemodb.MnemoDB(self.dbname)
 85 |         #This is the function we are testing
 86 |         sut.reset_normalized()
 87 | 
 88 |         #has normalized collections been removed
 89 |         self.assertNotIn('somecollection1', db.collection_names())
 90 |         self.assertNotIn('somecollection2', db.collection_names())
 91 | 
 92 |         #has all normalized been set to True
 93 |         self.assertEquals(0, db['hpfeed'].find({'normalized': True}).count())
 94 |         #has last_error attribute been removed
 95 |         self.assertEquals(0, db['hpfeed'].find({'last_error': {'$exists': 1}}).count())
 96 |         #has last_error_timestamp attribute been removed
 97 |         self.assertEquals(0, db['hpfeed'].find({'last_error_timestamp': {'$exists': 1}}).count())
 98 | 
 99 | 
100 | 
101 |     def test_insert_dorks(self):
102 |         """
103 |         Test insertion into the dorks collection.
104 |         """
105 |         sut = mnemodb.MnemoDB(self.dbname)
106 | 
107 |         insert_items = [
108 |             {'dork':
109 |                  {'type': 'inurl', 'content': '/somedork.php', 'count': 1, 'timestamp': datetime.now()}},
110 |             {'dork':
111 |                  {'type': 'inurl', 'content': '/somedork.php', 'count': 1, 'timestamp': datetime.now()}},
112 |             {'dork':
113 |                  {'type': 'inurl', 'content': '/otherdork.php', 'count': 1, 'timestamp': datetime.now()}},
114 |         ]
115 | 
116 |         sut.insert_normalized(insert_items, ObjectId())
117 | 
118 |         db = MongoClient('localhost', 27017)[self.dbname]
119 | 
120 |         #we expect two entries in the database
121 |         db_entries = db['dork'].find().count()
122 |         self.assertEqual(2, db_entries)
123 | 
124 |         result_one = db['dork'].find_one({'content': '/somedork.php'})
125 |         self.assertIn('lasttime', result_one)
126 |         self.assertEqual(2, result_one['count'])
127 |         self.assertEqual('inurl', result_one['type'])
128 | 
129 |         result_one = db['dork'].find_one({'content': '/otherdork.php'})
130 |         self.assertIn('lasttime', result_one)
131 |         self.assertEqual(1, result_one['count'])
132 |         self.assertEqual('inurl', result_one['type'])
133 | 
134 |     def test_set_errors(self):
135 |         """
136 |         Test that error state is correctly set in the specified hpfeed entries.
137 |         """
138 | 
139 |         #the entry we are going to manipulate
140 |         o = ObjectId()
141 |         insert_items = [
142 |             {'channel': 'channel1', 'ident': 'ident1', 'payload': 'payload1', 'timestamp': datetime.utcnow(),
143 |              'normalized': True},
144 |             {'_id': o, 'channel': 'channel2', 'ident': 'ident2', 'payload': 'payload2', 'timestamp': datetime.utcnow(),
145 |              'normalized': True},
146 |             {'channel': 'channel3', 'ident': 'ident3', 'payload': 'payload3', 'timestamp': datetime.utcnow(),
147 |              'normalized': True}
148 |                 ]
149 | 
150 |         db = MongoClient('localhost', 27017)[self.dbname]
151 | 
152 |         for item in insert_items:
153 |             db['hpfeed'].insert(item)
154 | 
155 |         sut = mnemodb.MnemoDB(self.dbname)
156 |         #set a single item in error state
157 |         dt = datetime.now()
158 |         sut.hpfeed_set_errors([ {'_id': o,
159 |                                'last_error': "Some error message",
160 |                                'last_error_timestamp': dt} ])
161 | 
162 |         #retrieve it again and check if the error state was set.
163 |         r = db['hpfeed'].find_one({'_id': o})
164 |         self.assertEqual('Some error message', r['last_error'])
165 |         self.assertAlmostEqual(dt, r['last_error_timestamp'], delta=timedelta(seconds=1))
166 | 
167 |     def test_reset_errors_on_successful_normalization(self):
168 |         """
169 |         Test that error state is removed from hpfeed entries on successful normalizations.
170 |         """
171 | 
172 |         #the entry we are going to manipulate
173 |         o = ObjectId()
174 | 
175 | 
176 | 
177 |         db = MongoClient('localhost', 27017)[self.dbname]
178 | 
179 |         db['hpfeed'].insert( {'_id': o,
180 |                               'channel': 'channel3', 'ident': 'ident3',
181 |                               'payload': 'payload3', 'timestamp': datetime.utcnow(),
182 |                               'normalized': False,
183 |                               'last_error': 'Some error',
184 |                               'last_error_timestamp': datetime.now()})
185 | 
186 |         sut = mnemodb.MnemoDB(self.dbname)
187 | 
188 |         #insert dummy url entry
189 |         sut.insert_normalized([{'url': {'url': '/dummy/url'}}], o)
190 | 
191 |         #retrieve the item from the database
192 |         r = db['hpfeed'].find_one({'_id': o})
193 |         #check that stats is as expected
194 |         self.assertTrue(r['normalized'])
195 |         self.assertNotIn('last_error', r)
196 |         self.assertNotIn('last_error_timestamp', r)
197 | 
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/test/test_webapi/test_d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/test/test_webapi/test_d/__init__.py


--------------------------------------------------------------------------------
/test/test_webapi/test_d/dorks_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import unittest
 19 | import uuid
 20 | import helpers
 21 | import json
 22 | import os
 23 | import tempfile
 24 | import shutil
 25 | from pymongo import MongoClient
 26 | 
 27 | from datetime import datetime
 28 | 
 29 | class DorkTest(unittest.TestCase):
 30 |     @classmethod
 31 |     def setUpClass(cls):
 32 |         cls.tmpdir =  tempfile.mkdtemp()
 33 |         cls._dbname = str(uuid.uuid4())
 34 |         insert_data = []
 35 | 
 36 |         #type, content, count, timestamp
 37 |         test_data = (
 38 |             ('inurl', '/jamesBond.php', 1, datetime(2011, 1, 1)),
 39 |             ('inurl', '/some/path', 2, datetime(2012, 2, 2)),
 40 |             ('inurl', '/no/fjords/here', 3, datetime(2013, 3, 3)),
 41 |             )
 42 | 
 43 |         for type_, content, count, timestamp in test_data:
 44 |             entry = {'type': type_,
 45 |                      'content': content,
 46 |                      'count': count,
 47 |                      'lasttime': timestamp}
 48 |             insert_data.append(entry)
 49 | 
 50 |         c = MongoClient('localhost', 27017)
 51 | 
 52 |         for item in insert_data:
 53 |             c[cls._dbname].dork.insert(item)
 54 | 
 55 |         cls.sut = helpers.prepare_app(cls._dbname, cls.tmpdir, 'a_all')
 56 | 
 57 |     @classmethod
 58 |     def tearDownClass(cls):
 59 |         connection = MongoClient('localhost', 27017)
 60 |         connection.drop_database(cls._dbname)
 61 |         if os.path.isdir(cls.tmpdir):
 62 |             shutil.rmtree(cls.tmpdir)
 63 | 
 64 |     def test_get_dorks(self):
 65 |         sut = DorkTest.sut
 66 | 
 67 |         res = sut.get('/aux/dorks')
 68 |         result = json.loads(res.body)['dorks']
 69 | 
 70 |         expected = [{'content': '/jamesBond.php', 'count': 1, 'type': 'inurl',
 71 |                      'timestamp': '2011-01-01T00:00:00'},
 72 |                     {'content': '/some/path', 'count': 2, 'type': 'inurl',
 73 |                      'timestamp': '2012-02-02T00:00:00'},
 74 |                     {'content': '/no/fjords/here', 'count': 3,
 75 |                      'type': 'inurl', 'timestamp': '2013-03-03T00:00:00'}]
 76 |         #TODO: Compare the actual output with expected
 77 |         self.assertEqual(len(expected), len(result))
 78 | 
 79 |     def test_get_dorks_with_regex(self):
 80 |         sut = DorkTest.sut
 81 | 
 82 |         res = sut.get('/aux/dorks?regex=/fjords/')
 83 |         result = json.loads(res.body)['dorks']
 84 | 
 85 |         #TODO: Compare the actual output with expected
 86 |         self.assertEqual(1, len(result))
 87 | 
 88 |     def test_get_dorks_with_limit(self):
 89 |         sut = DorkTest.sut
 90 | 
 91 |         res = sut.get('/aux/dorks?limit=2')
 92 |         result = json.loads(res.body)['dorks']
 93 | 
 94 |         #TODO: Compare the actual output with expected
 95 |         self.assertEqual(2, len(result))
 96 | 
 97 |     def test_get_dorks_with_known_type(self):
 98 |         sut = DorkTest.sut
 99 | 
100 |         res = sut.get('/aux/dorks?type=inurl')
101 |         result = json.loads(res.body)['dorks']
102 | 
103 |         #TODO: Compare the actual output with expected
104 |         self.assertEqual(3, len(result))
105 | 
106 |     def test_get_dorks_with_unknown_type(self):
107 |         sut = DorkTest.sut
108 | 
109 |         res = sut.get('/aux/dorks?type=stuff')
110 |         result = json.loads(res.body)['dorks']
111 | 
112 |         #TODO: Compare the actual output with expected
113 |         self.assertEqual(0, len(result))
114 | 
115 |     def test_get_dorks_sorted_default(self):
116 |         """
117 |         Test if dorks are sorted descending by count by default
118 |         """
119 |         sut = DorkTest.sut
120 | 
121 |         res = sut.get('/aux/dorks')
122 |         result = json.loads(res.body)['dorks']
123 | 
124 |         self.assertEqual('/no/fjords/here', result[0]['content'])
125 |         self.assertEqual('/some/path', result[1]['content'])
126 |         self.assertEqual('/jamesBond.php', result[2]['content'])
127 | 
128 |     def test_get_dorks_sorted_ascending(self):
129 |         """
130 |         Tests ascending sorting when providing sort_order as parameter.
131 |         """
132 |         sut = DorkTest.sut
133 | 
134 |         res = sut.get('/aux/dorks?sort_order=1')
135 |         result = json.loads(res.body)['dorks']
136 | 
137 |         self.assertEqual('/jamesBond.php', result[0]['content'])
138 |         self.assertEqual('/some/path', result[1]['content'])
139 |         self.assertEqual('/no/fjords/here', result[2]['content'])


--------------------------------------------------------------------------------
/test/test_webapi/test_d/files_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | 
 19 | import unittest
 20 | import uuid
 21 | import json
 22 | import os
 23 | import tempfile
 24 | import shutil
 25 | 
 26 | from pymongo import MongoClient
 27 | 
 28 | import helpers
 29 | 
 30 | 
 31 | class FilesTests(unittest.TestCase):
 32 |     @classmethod
 33 |     def setUpClass(cls):
 34 |         cls.tmpdir = tempfile.mkdtemp()
 35 |         cls._dbname = str(uuid.uuid4())
 36 | 
 37 |         test_data = [{
 38 |                          "content_guess": "PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed",
 39 |                          "data": "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef",
 40 |                          "encoding": "hex",
 41 |                          "hashes": {
 42 |                              "md5": "10000000000000000000000000000md5",
 43 |                              "sha1": "100000000000000000000000000000000000sha1",
 44 |                              "sha512": "1000000000000000000000000000000000000000"
 45 |                                        "00000000000000000000000000000000000000000"
 46 |                                        "00000000000000000000000000000000000000000sha512"
 47 |                          },
 48 |                          "hpfeed_ids": [
 49 |                              "10f3e41b09ce4533629cea00"
 50 |                          ]
 51 |                      },
 52 |                      {
 53 |                          "content_guess": "PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed",
 54 |                          "data": "deadb33fdeadb33fdeadb33fdeadb33fdeadb33fdeadb33fdeadb33f",
 55 |                          "encoding": "hex",
 56 |                          "hashes": {
 57 |                              "md5": "20000000000000000000000000000md5",
 58 |                              "sha1": "200000000000000000000000000000000000sha1",
 59 |                              "sha512": "200000000000000000000000000000000000000"
 60 |                                        "000000000000000000000000000000000000000"
 61 |                                        "00000000000000000000000000000000000000000000sha512"
 62 |                          },
 63 |                          "hpfeed_ids": [
 64 |                              "20f3e41b09ce4533629cea00"
 65 |                          ]
 66 |                      }]
 67 | 
 68 |         c = MongoClient('localhost', 27017)
 69 | 
 70 |         for item in test_data:
 71 |             c[cls._dbname].file.insert(item)
 72 | 
 73 |         cls.sut = helpers.prepare_app(cls._dbname, cls.tmpdir, 'a_all')
 74 | 
 75 |     @classmethod
 76 |     def tearDownClass(cls):
 77 |         connection = MongoClient('localhost', 27017)
 78 |         connection.drop_database(cls._dbname)
 79 |         if os.path.isdir(cls.tmpdir):
 80 |             shutil.rmtree(cls.tmpdir)
 81 | 
 82 |     def test_md5_query(self):
 83 |         """
 84 |         Test if the correct data is returned when querying files bu md5 hash.
 85 |         """
 86 |         sut = FilesTests.sut
 87 | 
 88 |         res = sut.get('/files?hash=10000000000000000000000000000md5')
 89 |         result = json.loads(res.body)['files'][0]
 90 |         self.assertEquals('deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef', result['data'])
 91 | 
 92 |     def test_sha1_query(self):
 93 |         """
 94 |         Test if the correct data is returned when querying files bu md5 hash.
 95 |         """
 96 |         sut = FilesTests.sut
 97 | 
 98 |         res = sut.get('/files?hash=1000000000000000000000000000000000000000'
 99 |                       '00000000000000000000000000000000000000000'
100 |                       '00000000000000000000000000000000000000000sha512')
101 |         result = json.loads(res.body)['files'][0]
102 |         self.assertEquals('deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef', result['data'])
103 | 
104 |     def test_sha512_query(self):
105 |         """
106 |         Test if the correct data is returned when querying files bu md5 hash.
107 |         """
108 |         sut = FilesTests.sut
109 | 
110 |         res = sut.get('/files?hash=10000000000000000000000000000md5')
111 |         result = json.loads(res.body)['files'][0]
112 |         self.assertEquals('deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef', result['data'])
113 | 
114 |     def test_no_data_query(self):
115 |         """
116 |         Test to verify that the 'data' field is not returned when queried with no_data.
117 |         """
118 |         sut = FilesTests.sut
119 | 
120 |         res = sut.get('/files?hash=10000000000000000000000000000md5&no_data')
121 |         result = json.loads(res.body)['files'][0]
122 |         self.assertTrue('data' not in result)
123 |         self.assertEquals('PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed',
124 |                           result['content_guess'])
125 | 
126 | 


--------------------------------------------------------------------------------
/test/test_webapi/test_d/helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import bottle
 19 | from bottle.ext import mongo
 20 | from beaker.middleware import SessionMiddleware
 21 | from cork import Cork
 22 | from webtest import TestApp
 23 | import json
 24 | import webapi.shared_state as shared
 25 | 
 26 | 
 27 | def prepare_app(dbname, tmppath, user_name):
 28 | 
 29 |     setup_dir(tmppath)
 30 |     #mock auth mechanism
 31 |     shared.auth = MockedCorkAuth(tmppath, user_name)
 32 | 
 33 |     #must be imported AFTER mocking
 34 |     from webapi.api.d import app
 35 | 
 36 |     a = app.app
 37 | 
 38 |     #when unittesting we want exceptions to break stuff
 39 |     a.catchall = False
 40 | 
 41 |     for plug in a.plugins:
 42 |         if isinstance(plug, bottle.ext.mongo.MongoPlugin):
 43 |             a.uninstall(plug)
 44 | 
 45 |     plugin = bottle.ext.mongo.MongoPlugin(uri="localhost", db=dbname, json_mongo=True)
 46 |     a.install(plugin)
 47 | 
 48 |     #wrap root app in beaker middleware
 49 |     session_opts = {
 50 |         'session.type': 'memory',
 51 |         'session.cookie_expires': 300,
 52 |         'session.data_dir': tmppath,
 53 |         'session.auto': True,
 54 |         #set secure attribute on cookie
 55 |         'session.secure': True,
 56 |     }
 57 | 
 58 |     middlewared_app = SessionMiddleware(a, session_opts)
 59 |     sut = TestApp(middlewared_app)
 60 | 
 61 |     return sut
 62 | 
 63 | 
 64 | #following method and two classes taken from the Cork documentation
 65 | def setup_dir(testdir):
 66 |     """Setup test directory with valid JSON files"""
 67 | 
 68 |     #dummy users for testing
 69 |     users = {'admin': {'role': 'admin'},
 70 |              'a_all': {'role': 'access_all'},
 71 |              'a_norm': {'role': 'access_normalized'},
 72 |              'public': {'role': 'public'}}
 73 | 
 74 |     #set hash for all users
 75 |     for k, v in users.items():
 76 |         v['hash'] = Cork._hash(k, 'password')
 77 | 
 78 |     with open("%s/users.json" % testdir, 'w') as f:
 79 |         f.write(json.dumps(users))
 80 |     with open("%s/roles.json" % testdir, 'w') as f:
 81 |         f.write("""{"admin": 100, "access_all": 70, "access_normalized": 60,"public": 10}""")
 82 |     with open("%s/register.json" % testdir, 'w') as f:
 83 |         f.write("""{}""")
 84 | 
 85 | 
 86 | class MockedCorkAuth(Cork):
 87 |     def __init__(self, directory, user_name):
 88 |         super(MockedCorkAuth, self).__init__(directory)
 89 |         self.user_name = user_name
 90 | 
 91 |     @property
 92 |     def _beaker_session(self):
 93 |         return RoAttrDict(username=self.user_name)
 94 | 
 95 |     def _setup_cookie(self, username):
 96 |         global cookie_name
 97 |         cookie_name = username
 98 | 
 99 | 
100 | class RoAttrDict(dict):
101 |     """Read-only attribute-accessed dictionary.
102 |     Used to mock beaker's session objects
103 |     """
104 | 
105 |     def __getattr__(self, name):
106 |         return self[name]
107 | 


--------------------------------------------------------------------------------
/test/test_webapi/test_d/urls_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | 
 19 | import unittest
 20 | import uuid
 21 | import json
 22 | import os
 23 | import tempfile
 24 | import shutil
 25 | 
 26 | from pymongo import MongoClient
 27 | 
 28 | import helpers
 29 | 
 30 | 
 31 | class URLTest(unittest.TestCase):
 32 |     @classmethod
 33 |     def setUpClass(cls):
 34 |         cls.tmpdir = tempfile.mkdtemp()
 35 |         cls._dbname = str(uuid.uuid4())
 36 | 
 37 |         test_data = [{
 38 |                          "url": "http://herr-doctor.zz",
 39 | 
 40 |                          "extractions": [
 41 |                              {"hashes": {
 42 |                                  "md5": "10000000000000000000000000000md5",
 43 |                                  "sha1": "100000000000000000000000000000000000sha1",
 44 |                                  "sha512": "1000000000000000000000000000000000000000"
 45 |                                            "00000000000000000000000000000000000000000"
 46 |                                            "00000000000000000000000000000000000000000sha512"
 47 |                              },
 48 |                               "timestamp": "2013-03-16T19:12:51.279000"
 49 |                              },
 50 |                              {"hashes": {
 51 |                                  "md5": "20000000000000000000000000000md5",
 52 |                                  "sha1": "200000000000000000000000000000000000sha1",
 53 |                                  "sha512": "2000000000000000000000000000000000000000"
 54 |                                            "00000000000000000000000000000000000000000"
 55 |                                            "00000000000000000000000000000000000000000sha512"
 56 |                              },
 57 |                               "timestamp": "2013-02-16T19:12:51.279000"
 58 |                              }
 59 |                          ]
 60 |                      },
 61 |                      {
 62 |                          "url": "http://gerber.zz",
 63 | 
 64 |                          "extractions": [
 65 |                              {"hashes": {
 66 |                                  "md5": "30000000000000000000000000000md5",
 67 |                                  "sha1": "300000000000000000000000000000000000sha1",
 68 |                                  "sha512": "3000000000000000000000000000000000000000"
 69 |                                            "00000000000000000000000000000000000000000"
 70 |                                            "00000000000000000000000000000000000000000sha512"
 71 |                              },
 72 |                               "timestamp": "2013-03-16T19:12:51.279000"
 73 |                              },
 74 |                              {"hashes": {
 75 |                                  "md5": "10000000000000000000000000000md5",
 76 |                                  "sha1": "100000000000000000000000000000000000sha1",
 77 |                                  "sha512": "1000000000000000000000000000000000000000"
 78 |                                            "00000000000000000000000000000000000000000"
 79 |                                            "00000000000000000000000000000000000000000sha512"
 80 |                              },
 81 |                               "timestamp": "2013-02-16T19:12:51.279000"
 82 |                              }
 83 |                          ]
 84 |                      }
 85 |         ]
 86 | 
 87 |         c = MongoClient('localhost', 27017)
 88 | 
 89 |         for item in test_data:
 90 |             c[cls._dbname].url.insert(item)
 91 | 
 92 |         cls.sut = helpers.prepare_app(cls._dbname, cls.tmpdir, 'a_all')
 93 | 
 94 |     @classmethod
 95 |     def tearDownClass(cls):
 96 |         connection = MongoClient('localhost', 27017)
 97 |         connection.drop_database(cls._dbname)
 98 |         if os.path.isdir(cls.tmpdir):
 99 |             shutil.rmtree(cls.tmpdir)
100 | 
101 |     def test_md5_query(self):
102 |         """
103 |         Test if the correct URL's are returned when querying by md5 hash.
104 |         """
105 |         sut = URLTest.sut
106 | 
107 |         res = sut.get('/urls?hash=20000000000000000000000000000md5')
108 |         result = json.loads(res.body)['urls']
109 | 
110 |         self.assertEquals('http://herr-doctor.zz', result[0]['url'])
111 |         self.assertEquals(1, len(result))
112 | 
113 |     def test_sha1_query(self):
114 |         """
115 |         Test if the correct URL's are returned when querying by sha1 hash.
116 |         """
117 |         sut = URLTest.sut
118 | 
119 |         res = sut.get('/urls?hash=200000000000000000000000000000000000sha1')
120 |         result = json.loads(res.body)['urls']
121 |         self.assertEquals('http://herr-doctor.zz', result[0]['url'])
122 |         self.assertEquals(1, len(result))
123 | 
124 |     def test_sha512_query(self):
125 |         """
126 |         Test if the correct URL's are returned when querying by sha512 hash.
127 |         """
128 |         sut = URLTest.sut
129 | 
130 |         res = sut.get("/urls?hash=2000000000000000000000000000000000000000"
131 |                       "00000000000000000000000000000000000000000"
132 |                       "00000000000000000000000000000000000000000sha512")
133 |         result = json.loads(res.body)['urls']
134 |         self.assertEquals('http://herr-doctor.zz', result[0]['url'])
135 |         self.assertEquals(1, len(result))
136 | 
137 |     def test_query_with_multiple_results(self):
138 |         """
139 |         Tests if multiple URL's can be returned.
140 |         """
141 |         sut = URLTest.sut
142 | 
143 |         res = sut.get('/urls?hash=10000000000000000000000000000md5')
144 |         result = json.loads(res.body)['urls']
145 |         self.assertEquals(2, len(result))


--------------------------------------------------------------------------------
/test/test_webapi/test_v1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/test/test_webapi/test_v1/__init__.py


--------------------------------------------------------------------------------
/test/test_webapi/test_v1/dorks_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import unittest
 19 | import uuid
 20 | import helpers
 21 | import json
 22 | import os
 23 | import tempfile
 24 | import shutil
 25 | from pymongo import MongoClient
 26 | 
 27 | from datetime import datetime
 28 | 
 29 | class DorkTest(unittest.TestCase):
 30 |     @classmethod
 31 |     def setUpClass(cls):
 32 |         cls.tmpdir =  tempfile.mkdtemp()
 33 |         cls._dbname = str(uuid.uuid4())
 34 |         insert_data = []
 35 | 
 36 |         #type, content, count, timestamp
 37 |         test_data = (
 38 |             ('inurl', '/jamesBond.php', 1, datetime(2011, 1, 1)),
 39 |             ('inurl', '/some/path', 2, datetime(2012, 2, 2)),
 40 |             ('inurl', '/no/fjords/here', 3, datetime(2013, 3, 3)),
 41 |             )
 42 | 
 43 |         for type_, content, count, timestamp in test_data:
 44 |             entry = {'type': type_,
 45 |                      'content': content,
 46 |                      'count': count,
 47 |                      'lasttime': timestamp}
 48 |             insert_data.append(entry)
 49 | 
 50 |         c = MongoClient('localhost', 27017)
 51 | 
 52 |         for item in insert_data:
 53 |             c[cls._dbname].dork.insert(item)
 54 | 
 55 |         cls.sut = helpers.prepare_app(cls._dbname, cls.tmpdir, 'a_all')
 56 | 
 57 |     @classmethod
 58 |     def tearDownClass(cls):
 59 |         connection = MongoClient('localhost', 27017)
 60 |         connection.drop_database(cls._dbname)
 61 |         if os.path.isdir(cls.tmpdir):
 62 |             shutil.rmtree(cls.tmpdir)
 63 | 
 64 |     def test_get_dorks(self):
 65 |         sut = DorkTest.sut
 66 | 
 67 |         res = sut.get('/aux/dorks')
 68 |         result = json.loads(res.body)['dorks']
 69 | 
 70 |         expected = [{'content': '/jamesBond.php', 'count': 1, 'type': 'inurl',
 71 |                      'timestamp': '2011-01-01T00:00:00'},
 72 |                     {'content': '/some/path', 'count': 2, 'type': 'inurl',
 73 |                      'timestamp': '2012-02-02T00:00:00'},
 74 |                     {'content': '/no/fjords/here', 'count': 3,
 75 |                      'type': 'inurl', 'timestamp': '2013-03-03T00:00:00'}]
 76 |         #TODO: Compare the actual output with expected
 77 |         self.assertEqual(3, len(result))
 78 | 
 79 |     def test_get_dorks_with_regex(self):
 80 |         sut = DorkTest.sut
 81 | 
 82 |         res = sut.get('/aux/dorks?regex=/fjords/')
 83 |         result = json.loads(res.body)['dorks']
 84 | 
 85 |         #TODO: Compare the actual output with expected
 86 |         self.assertEqual(1, len(result))
 87 | 
 88 |     def test_get_dorks_with_limit(self):
 89 |         sut = DorkTest.sut
 90 | 
 91 |         res = sut.get('/aux/dorks?limit=2')
 92 |         result = json.loads(res.body)['dorks']
 93 | 
 94 |         #TODO: Compare the actual output with expected
 95 |         self.assertEqual(2, len(result))
 96 | 
 97 |     def test_get_dorks_with_known_type(self):
 98 |         sut = DorkTest.sut
 99 | 
100 |         res = sut.get('/aux/dorks?type=inurl')
101 |         result = json.loads(res.body)['dorks']
102 | 
103 |         #TODO: Compare the actual output with expected
104 |         self.assertEqual(3, len(result))
105 | 
106 |     def test_get_dorks_with_unknown_type(self):
107 |         sut = DorkTest.sut
108 | 
109 |         res = sut.get('/aux/dorks?type=stuff')
110 |         result = json.loads(res.body)['dorks']
111 | 
112 |         #TODO: Compare the actual output with expected
113 |         self.assertEqual(0, len(result))
114 | 
115 |     def test_get_dorks_sorted_default(self):
116 |         """
117 |         Test if dorks are sorted descending by count by default
118 |         """
119 |         sut = DorkTest.sut
120 | 
121 |         res = sut.get('/aux/dorks')
122 |         result = json.loads(res.body)['dorks']
123 | 
124 |         self.assertEqual('/no/fjords/here', result[0]['content'])
125 |         self.assertEqual('/some/path', result[1]['content'])
126 |         self.assertEqual('/jamesBond.php', result[2]['content'])
127 | 
128 |     def test_get_dorks_sorted_ascending(self):
129 |         """
130 |         Tests ascending sorting when providing sort_order as parameter.
131 |         """
132 |         sut = DorkTest.sut
133 | 
134 |         res = sut.get('/aux/dorks?sort_order=1')
135 |         result = json.loads(res.body)['dorks']
136 | 
137 |         self.assertEqual('/jamesBond.php', result[0]['content'])
138 |         self.assertEqual('/some/path', result[1]['content'])
139 |         self.assertEqual('/no/fjords/here', result[2]['content'])


--------------------------------------------------------------------------------
/test/test_webapi/test_v1/files_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | 
 19 | import unittest
 20 | import uuid
 21 | import json
 22 | import os
 23 | import tempfile
 24 | import shutil
 25 | 
 26 | from pymongo import MongoClient
 27 | 
 28 | import helpers
 29 | 
 30 | 
 31 | class FilesTests(unittest.TestCase):
 32 |     @classmethod
 33 |     def setUpClass(cls):
 34 |         cls.tmpdir = tempfile.mkdtemp()
 35 |         cls._dbname = str(uuid.uuid4())
 36 | 
 37 |         test_data = [{
 38 |                          "content_guess": "PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed",
 39 |                          "data": "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef",
 40 |                          "encoding": "hex",
 41 |                          "hashes": {
 42 |                              "md5": "10000000000000000000000000000md5",
 43 |                              "sha1": "100000000000000000000000000000000000sha1",
 44 |                              "sha512": "1000000000000000000000000000000000000000"
 45 |                                        "00000000000000000000000000000000000000000"
 46 |                                        "00000000000000000000000000000000000000000sha512"
 47 |                          },
 48 |                          "hpfeed_ids": [
 49 |                              "10f3e41b09ce4533629cea00"
 50 |                          ]
 51 |                      },
 52 |                      {
 53 |                          "content_guess": "PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed",
 54 |                          "data": "deadb33fdeadb33fdeadb33fdeadb33fdeadb33fdeadb33fdeadb33f",
 55 |                          "encoding": "hex",
 56 |                          "hashes": {
 57 |                              "md5": "20000000000000000000000000000md5",
 58 |                              "sha1": "200000000000000000000000000000000000sha1",
 59 |                              "sha512": "200000000000000000000000000000000000000"
 60 |                                        "000000000000000000000000000000000000000"
 61 |                                        "00000000000000000000000000000000000000000000sha512"
 62 |                          },
 63 |                          "hpfeed_ids": [
 64 |                              "20f3e41b09ce4533629cea00"
 65 |                          ]
 66 |                      }]
 67 | 
 68 |         c = MongoClient('localhost', 27017)
 69 | 
 70 |         for item in test_data:
 71 |             c[cls._dbname].file.insert(item)
 72 | 
 73 |         cls.sut = helpers.prepare_app(cls._dbname, cls.tmpdir, 'a_all')
 74 | 
 75 |     @classmethod
 76 |     def tearDownClass(cls):
 77 |         connection = MongoClient('localhost', 27017)
 78 |         connection.drop_database(cls._dbname)
 79 |         if os.path.isdir(cls.tmpdir):
 80 |             shutil.rmtree(cls.tmpdir)
 81 | 
 82 |     def test_md5_query(self):
 83 |         """
 84 |         Test if the correct data is returned when querying files bu md5 hash.
 85 |         """
 86 |         sut = FilesTests.sut
 87 | 
 88 |         res = sut.get('/files?hash=10000000000000000000000000000md5')
 89 |         result = json.loads(res.body)['files'][0]
 90 |         self.assertEquals('deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef', result['data'])
 91 | 
 92 |     def test_sha1_query(self):
 93 |         """
 94 |         Test if the correct data is returned when querying files bu md5 hash.
 95 |         """
 96 |         sut = FilesTests.sut
 97 | 
 98 |         res = sut.get('/files?hash=1000000000000000000000000000000000000000'
 99 |                       '00000000000000000000000000000000000000000'
100 |                       '00000000000000000000000000000000000000000sha512')
101 |         result = json.loads(res.body)['files'][0]
102 |         self.assertEquals('deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef', result['data'])
103 | 
104 |     def test_sha512_query(self):
105 |         """
106 |         Test if the correct data is returned when querying files bu md5 hash.
107 |         """
108 |         sut = FilesTests.sut
109 | 
110 |         res = sut.get('/files?hash=10000000000000000000000000000md5')
111 |         result = json.loads(res.body)['files'][0]
112 |         self.assertEquals('deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef', result['data'])
113 | 
114 |     def test_no_data_query(self):
115 |         """
116 |         Test to verify that the 'data' field is not returned when queried with no_data.
117 |         """
118 |         sut = FilesTests.sut
119 | 
120 |         res = sut.get('/files?hash=10000000000000000000000000000md5&no_data')
121 |         result = json.loads(res.body)['files'][0]
122 |         self.assertTrue('data' not in result)
123 |         self.assertEquals('PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, UPX compressed',
124 |                           result['content_guess'])
125 | 
126 | 


--------------------------------------------------------------------------------
/test/test_webapi/test_v1/helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import bottle
 19 | from bottle.ext import mongo
 20 | from beaker.middleware import SessionMiddleware
 21 | from cork import Cork
 22 | from webtest import TestApp
 23 | import json
 24 | import webapi.shared_state as shared
 25 | 
 26 | 
 27 | def prepare_app(dbname, tmppath, user_name):
 28 |     #mock auth mechanism
 29 |     setup_dir(tmppath)
 30 |     shared.auth = MockedCorkAuth(tmppath, user_name)
 31 |     #must be imported AFTER mocking
 32 |     from webapi.api.v1 import app
 33 | 
 34 |     a = app.app
 35 |     #when unittesting we want exceptions to break stuff
 36 |     a.catchall = False
 37 | 
 38 |     for plug in a.plugins:
 39 |         if isinstance(plug, bottle.ext.mongo.MongoPlugin):
 40 |             a.uninstall(plug)
 41 | 
 42 |     plugin = bottle.ext.mongo.MongoPlugin(uri="localhost", db=dbname, json_mongo=True)
 43 |     a.install(plugin)
 44 | 
 45 |     #wrap root app in beaker middleware
 46 |     session_opts = {
 47 |         'session.type': 'memory',
 48 |         'session.cookie_expires': 300,
 49 |         'session.data_dir': tmppath,
 50 |         'session.auto': True,
 51 |         #set secure attribute on cookie
 52 |         'session.secure': True,
 53 |     }
 54 | 
 55 |     middlewared_app = SessionMiddleware(a, session_opts)
 56 |     sut = TestApp(middlewared_app)
 57 | 
 58 |     return sut
 59 | 
 60 | #following method and two classes taken from the Cork documentation
 61 | def setup_dir(testdir):
 62 |     """Setup test directory with valid JSON files"""
 63 | 
 64 |     #dummy users for testing (user, role)
 65 |     users = {'admin': {'role': 'admin'},
 66 |              'a_all': {'role': 'access_all'},
 67 |              'a_norm': {'role': 'access_normalized'},
 68 |              'public': {'role': 'public'}}
 69 | 
 70 |     with open("%s/users.json" % testdir, 'w') as f:
 71 |         f.write(json.dumps(users))
 72 |     with open("%s/roles.json" % testdir, 'w') as f:
 73 |         f.write("""{"public": 10, "admin": 100, "access_all": 70, "access_normalized": 60}""")
 74 |     with open("%s/register.json" % testdir, 'w') as f:
 75 |         f.write("""{}""")
 76 | 
 77 | 
 78 | class MockedCorkAuth(Cork):
 79 |     """Mocked module where the current user is always 'admin'"""
 80 | 
 81 |     def __init__(self, directory, user_name):
 82 |         super(MockedCorkAuth, self).__init__(directory)
 83 |         self.user_name = user_name
 84 | 
 85 |     @property
 86 |     def _beaker_session(self):
 87 |         return RoAttrDict(username='a_all')
 88 | 
 89 |     def _setup_cookie(self, username):
 90 |         global cookie_name
 91 |         cookie_name = username
 92 | 
 93 | 
 94 | class RoAttrDict(dict):
 95 |     """Read-only attribute-accessed dictionary.
 96 |     Used to mock beaker's session objects
 97 |     """
 98 | 
 99 |     def __getattr__(self, name):
100 |         return self[name]
101 | 


--------------------------------------------------------------------------------
/test/test_webapi/test_v1/urls_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | 
 19 | import unittest
 20 | import uuid
 21 | import json
 22 | import os
 23 | import tempfile
 24 | import shutil
 25 | 
 26 | from pymongo import MongoClient
 27 | 
 28 | import helpers
 29 | 
 30 | 
 31 | class URLTest(unittest.TestCase):
 32 |     @classmethod
 33 |     def setUpClass(cls):
 34 |         cls.tmpdir = tempfile.mkdtemp()
 35 |         cls._dbname = str(uuid.uuid4())
 36 | 
 37 |         test_data = [{
 38 |                          "url": "http://herr-doctor.zz",
 39 | 
 40 |                          "extractions": [
 41 |                              {"hashes": {
 42 |                                  "md5": "10000000000000000000000000000md5",
 43 |                                  "sha1": "100000000000000000000000000000000000sha1",
 44 |                                  "sha512": "1000000000000000000000000000000000000000"
 45 |                                            "00000000000000000000000000000000000000000"
 46 |                                            "00000000000000000000000000000000000000000sha512"
 47 |                              },
 48 |                              "timestamp": "2013-03-16T19:12:51.279000"
 49 |                              },
 50 |                              {"hashes": {
 51 |                                  "md5": "20000000000000000000000000000md5",
 52 |                                  "sha1": "200000000000000000000000000000000000sha1",
 53 |                                  "sha512": "2000000000000000000000000000000000000000"
 54 |                                            "00000000000000000000000000000000000000000"
 55 |                                            "00000000000000000000000000000000000000000sha512"
 56 |                              },
 57 |                               "timestamp": "2013-02-16T19:12:51.279000"
 58 |                              }
 59 |                          ]
 60 |                      },
 61 |                      {
 62 |                          "url": "http://gerber.zz",
 63 | 
 64 |                          "extractions": [
 65 |                              {"hashes": {
 66 |                                  "md5": "30000000000000000000000000000md5",
 67 |                                  "sha1": "300000000000000000000000000000000000sha1",
 68 |                                  "sha512": "3000000000000000000000000000000000000000"
 69 |                                            "00000000000000000000000000000000000000000"
 70 |                                            "00000000000000000000000000000000000000000sha512"
 71 |                              },
 72 |                               "timestamp": "2013-03-16T19:12:51.279000"
 73 |                              },
 74 |                              {"hashes": {
 75 |                                  "md5": "10000000000000000000000000000md5",
 76 |                                  "sha1": "100000000000000000000000000000000000sha1",
 77 |                                  "sha512": "1000000000000000000000000000000000000000"
 78 |                                            "00000000000000000000000000000000000000000"
 79 |                                            "00000000000000000000000000000000000000000sha512"
 80 |                              },
 81 |                               "timestamp": "2013-02-16T19:12:51.279000"
 82 |                              }
 83 |                          ]
 84 |                      }
 85 |         ]
 86 | 
 87 |         c = MongoClient('localhost', 27017)
 88 | 
 89 |         for item in test_data:
 90 |             c[cls._dbname].url.insert(item)
 91 | 
 92 |         cls.sut = helpers.prepare_app(cls._dbname, cls.tmpdir, 'a_all')
 93 | 
 94 |     @classmethod
 95 |     def tearDownClass(cls):
 96 |         connection = MongoClient('localhost', 27017)
 97 |         connection.drop_database(cls._dbname)
 98 |         if os.path.isdir(cls.tmpdir):
 99 |             shutil.rmtree(cls.tmpdir)
100 | 
101 |     def test_md5_query(self):
102 |         """
103 |         Test if the correct URL's are returned when querying by md5 hash.
104 |         """
105 |         sut = URLTest.sut
106 | 
107 |         res = sut.get('/urls?hash=20000000000000000000000000000md5')
108 |         result = json.loads(res.body)['urls']
109 | 
110 |         self.assertEquals('http://herr-doctor.zz', result[0]['url'])
111 |         self.assertEquals(1, len(result))
112 | 
113 |     def test_sha1_query(self):
114 |         """
115 |         Test if the correct URL's are returned when querying by sha1 hash.
116 |         """
117 |         sut = URLTest.sut
118 | 
119 |         res = sut.get('/urls?hash=200000000000000000000000000000000000sha1')
120 |         result = json.loads(res.body)['urls']
121 |         self.assertEquals('http://herr-doctor.zz', result[0]['url'])
122 |         self.assertEquals(1, len(result))
123 | 
124 |     def test_sha512_query(self):
125 |         """
126 |         Test if the correct URL's are returned when querying by sha512 hash.
127 |         """
128 |         sut = URLTest.sut
129 | 
130 |         res = sut.get("/urls?hash=2000000000000000000000000000000000000000"
131 |                       "00000000000000000000000000000000000000000"
132 |                       "00000000000000000000000000000000000000000sha512")
133 |         result = json.loads(res.body)['urls']
134 |         self.assertEquals('http://herr-doctor.zz', result[0]['url'])
135 |         self.assertEquals(1, len(result))
136 | 
137 |     def test_query_with_multiple_results(self):
138 |         """
139 |         Tests if multiple URL's can be returned.
140 |         """
141 |         sut = URLTest.sut
142 | 
143 |         res = sut.get('/urls?hash=10000000000000000000000000000md5')
144 |         result = json.loads(res.body)['urls']
145 |         self.assertEquals(2, len(result))
146 | 


--------------------------------------------------------------------------------
/webapi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/webapi/__init__.py


--------------------------------------------------------------------------------
/webapi/admin.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import bottle
 19 | from bottle import get, post, route, static_file, view, HTTPError
 20 | import shared_state
 21 | import logging
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | @route('/unauth')
 26 | def login():
 27 |     return HTTPError(401, 'Unauthorized')
 28 | 
 29 | 
 30 | @post('/login')
 31 | def login():
 32 |     """Authenticate users"""
 33 |     username = post_get('username')
 34 |     password = post_get('password')
 35 |     logger.info("Authentication attempt with username: [{0}]".format(username))
 36 |     if shared_state.auth.login(username, password):
 37 |         return "You provided valid credentials"
 38 |     else:
 39 |         return HTTPError(401, 'Invalid credentials')
 40 | 
 41 | 
 42 | @route('/logout')
 43 | def logout():
 44 |     shared_state.auth.logout(success_redirect='/unauth')
 45 | 
 46 | 
 47 | @route('/admin')
 48 | @view('admin_page')
 49 | def admin():
 50 |     """Only admin users can see this"""
 51 |     shared_state.auth.require(role='admin', fail_redirect='/unauth')
 52 |     return dict(
 53 |         current_user=shared_state.auth.current_user,
 54 |         users=shared_state.auth.list_users(),
 55 |         roles=shared_state.auth.list_roles()
 56 |     )
 57 | 
 58 | 
 59 | @post('/create_user')
 60 | def create_user():
 61 |     try:
 62 |         shared_state.auth.create_user(postd().username, postd().role, postd().password)
 63 |         return dict(ok=True, msg='')
 64 |     except Exception, e:
 65 |         return dict(ok=False, msg=e.message)
 66 | 
 67 | 
 68 | @post('/delete_user')
 69 | def delete_user():
 70 |     try:
 71 |         shared_state.auth.delete_user(post_get('username'))
 72 |         return dict(ok=True, msg='')
 73 |     except Exception, e:
 74 |         return dict(ok=False, msg=e.message)
 75 | 
 76 | 
 77 | @post('/create_role')
 78 | def create_role():
 79 |     try:
 80 |         shared_state.auth.create_role(post_get('role'), post_get('level'))
 81 |         return dict(ok=True, msg='')
 82 |     except Exception, e:
 83 |         return dict(ok=False, msg=e.message)
 84 | 
 85 | 
 86 | @post('/delete_role')
 87 | def delete_role():
 88 |     try:
 89 |         shared_state.auth.delete_role(post_get('role'))
 90 |         return dict(ok=True, msg='')
 91 |     except Exception, e:
 92 |         return dict(ok=False, msg=e.message)
 93 | 
 94 | 
 95 | def postd():
 96 |     return bottle.request.forms
 97 | 
 98 | 
 99 | def post_get(name, default=''):
100 |     return bottle.request.POST.get(name, default).strip()


--------------------------------------------------------------------------------
/webapi/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/webapi/api/__init__.py


--------------------------------------------------------------------------------
/webapi/api/d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/webapi/api/d/__init__.py


--------------------------------------------------------------------------------
/webapi/api/d/app.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('webapi')
 3 | 
 4 | from bottle import Bottle
 5 | 
 6 | import webapi.shared_state as state
 7 | 
 8 | app = Bottle()
 9 | 
10 | if state.plug is not None:
11 |     app.install(state.plug)
12 | 
13 | auth = state.auth
14 | 
15 | import files
16 | import hpfeeds
17 | import sessions
18 | import urls
19 | import dorks
20 | 


--------------------------------------------------------------------------------
/webapi/api/d/dorks.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from cork import AAAException
19 | from bottle import response, get, request, HTTPError, request
20 | from helpers import jsonify
21 | from datetime import date, datetime
22 | from app import app
23 | from app import auth
24 | 
25 | @app.get('/aux/dorks')
26 | def get_dorks(mongodb):
27 |     try:
28 |         auth.require(role='public')
29 |     except AAAException as e:
30 |         return HTTPError(401, e.message)
31 | 
32 |     query_keys = request.query.keys()
33 |     query_dict = {}
34 | 
35 |     #set default parameters
36 |     sort_key = 'count'
37 |     sort_order = -1
38 |     limit = 200
39 | 
40 |     if 'sort_by' in query_keys:
41 |         sort_key = request.query.sort_by
42 | 
43 |     if 'sort_order' in query_keys:
44 |         try:
45 |             sort_order = int (request.query.sort_order)
46 |         except ValueError:
47 |             raise HTTPError(400, 'sort_order must be an integer.')
48 | 
49 |     if 'regex' in query_keys:
50 |         query_dict['content'] = {'$regex': request.query.regex}
51 | 
52 |     #inurl, intitle, etc.
53 |     if 'type' in query_keys:
54 |         query_dict['type'] = request.query.type
55 | 
56 |     if 'limit' in query_keys:
57 |             limit = int(request.query.limit)
58 | 
59 |     result = list(mongodb['dork'].find(query_dict).sort(sort_key, sort_order).limit(limit))
60 | 
61 |     #delete mongo _id - better way?
62 |     for entry in result:
63 |         entry['firsttime'] = entry['_id'].generation_time
64 |         del entry['_id']
65 | 
66 |     return jsonify({'dorks': result}, response)
67 | 


--------------------------------------------------------------------------------
/webapi/api/d/files.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from cork import AAAException
19 | from bottle import abort, request, response, HTTPError
20 | from helpers import simple_group, jsonify
21 | from app import app
22 | from app import auth
23 | 
24 | 
25 | @app.route('/files')
26 | @app.route('/files/')
27 | def get_files(mongodb):
28 |     try:
29 |         auth.require(role='access_normalized')
30 |     except AAAException as e:
31 |         return HTTPError(401, e.message)
32 | 
33 |     query_keys = request.query.keys()
34 |     query_dict = {}
35 | 
36 |     if 'limit' in query_keys:
37 |         limit = int(request.query.limit)
38 |     else:
39 |         limit = 50
40 | 
41 |     if 'hash' in query_keys:
42 |         hash_length = len(request.query['hash'])
43 |         if hash_length is 128:
44 |             query_dict['hashes.sha512'] = request.query['hash']
45 |         elif hash_length is 40:
46 |             query_dict['hashes.sha1'] = request.query['hash']
47 |         elif hash_length is 32:
48 |             query_dict['hashes.md5'] = request.query['hash']
49 |         else:
50 |             abort(400, '{0} could not be recognized as a supported hash. Currently supported hashes are: SHA1, SHA512 and MD5. ')
51 |     else:
52 |         abort(400, 'Only supported query parameter is "hash"')
53 | 
54 |     p_limit = {'_id': False}
55 | 
56 |     if 'no_data' in query_keys:
57 |         p_limit['data'] = False
58 | 
59 |     result = list(mongodb['file'].find(query_dict, fields=p_limit).limit(limit))
60 |     return jsonify({'files': result}, response)
61 | 
62 | 
63 | @app.route('/files/types')
64 | def files_types(mongodb):
65 |     try:
66 |         auth.require(role='access_normalized')
67 |     except AAAException as e:
68 |         return HTTPError(401, e.message)
69 |     result = simple_group('file', 'content_guess', mongodb)
70 |     return jsonify(result, response)


--------------------------------------------------------------------------------
/webapi/api/d/helpers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from bottle import abort, HTTPError
19 | from cork import AAAException, AuthException
20 | from datetime import datetime
21 | from bson import ObjectId
22 | from bson.code import Code
23 | 
24 | import json
25 | import uuid
26 | 
27 | 
28 | def simple_group(collection, attribute, mongodb):
29 |     """
30 |     Helper method to ease group_by operations.
31 |     """
32 |     reducer = Code("""
33 |         function (current, result) { result.count += 1; }
34 |         """)
35 |     #TODO: Convert to map/reduce. (current state sets read-lock)
36 |     result = mongodb[collection].group(key={attribute: 1}, condition={}, initial={"count": 0}, reduce=reducer)
37 |     output_rootname = attribute + 's'
38 |     #Why does pymongo return the aggregation as float?
39 |     for item in result:
40 |         item['count'] = int(item['count'])
41 |     return {output_rootname: result}
42 | 
43 | 
44 | def jsonify(i, r):
45 |     if i is None:
46 |         i = {}
47 |     if isinstance(i, dict):
48 |         #Attempt to serialize, raises exception on failure
49 |         json_response = json.dumps(i, default=json_default, sort_keys=True)
50 |         #Set content type only if serialization succesful
51 |         r.content_type = 'application/json'
52 |         return json_response
53 |     else:
54 |         abort(500, 'Error while trying to serialize to json.')
55 | 
56 | 
57 | def json_default(obj):
58 |     if isinstance(obj, datetime):
59 |         return obj.isoformat()
60 |     elif isinstance(obj, uuid.UUID):
61 |         return str(obj)
62 |     elif isinstance(obj, buffer):
63 |         return str(obj)
64 |     elif isinstance(obj, ObjectId):
65 |         return str(obj)
66 |     else:
67 |         return None
68 | 
69 | 


--------------------------------------------------------------------------------
/webapi/api/d/hpfeeds.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import bottle
 19 | from bottle import get, abort, request, response, HTTPError
 20 | from cork import AAAException
 21 | from bson import ObjectId
 22 | from bson.errors import InvalidId
 23 | from helpers import jsonify
 24 | from app import app
 25 | from app import auth
 26 | 
 27 | 
 28 | @app.get('/hpfeeds/')
 29 | @app.get('/hpfeeds')
 30 | def hpfeeds(mongodb):
 31 |     try:
 32 |         auth.require(role='access_all')
 33 |     except AAAException as e:
 34 |         return HTTPError(401, e.message)
 35 | 
 36 |     query_keys = request.query.keys()
 37 |     query_dict = {}
 38 | 
 39 |     mongo_keys = {'_id', 'id', 'channel'}
 40 | 
 41 |     #intersection
 42 |     common_keys = (set(query_keys) & mongo_keys)
 43 | 
 44 |     try:
 45 |         for item in common_keys:
 46 |             if item.endswith('_id'):
 47 |                 query_dict[item] = ObjectId(request.query[item])
 48 |             elif item == 'id':
 49 |                 query_dict['_' + item] = ObjectId(request.query[item])
 50 |             else:
 51 |                 query_dict[item] = request.query[item]
 52 |     except InvalidId:
 53 |         abort(400, 'Not a valid ObjectId.')
 54 | 
 55 |     if 'limit' in query_keys:
 56 |         limit = int(request.query.limit)
 57 |     else:
 58 |         limit = 50
 59 | 
 60 |     result = list(mongodb['hpfeed'].find(query_dict).sort('timestamp', -1).limit(limit))
 61 |     return jsonify({'hpfeeds': result}, response)
 62 | 
 63 | 
 64 | @app.get('/hpfeeds/stats')
 65 | def hpfeeds(mongodb):
 66 |     try:
 67 |         auth.require(role='access_all')
 68 |     except AAAException as e:
 69 |         return HTTPError(401, e.message)
 70 | 
 71 |     if 'date' in request.query and 'channel' in request.query:
 72 |         query = {'date': request.query.date, 'channel': request.query.channel}
 73 |     elif 'date' in request.query:
 74 |         query = {'date': request.query.date}
 75 |     elif 'channel' in request.query:
 76 |         query = {'channel': request.query.channel}
 77 |     else:
 78 |         abort(404, 'muhaha')
 79 | 
 80 |     results = list(mongodb['daily_stats'].find(query))
 81 | 
 82 |     for result in results:
 83 |         del result['_id']
 84 | 
 85 |     return jsonify({'stats': results}, response)
 86 | 
 87 | 
 88 | @app.get('/hpfeeds/stats/total')
 89 | def hpfeeds(mongodb):
 90 |     try:
 91 |         auth.require(role='access_all')
 92 |     except AAAException as e:
 93 |         return HTTPError(401, e.message)
 94 | 
 95 |     tmp_result = mongodb['daily_stats'].find_one({'_id': 'total'})
 96 |     del tmp_result['_id']
 97 | 
 98 |     result = []
 99 |     for key, value in tmp_result.items():
100 |         result.append({'channel': key, 'count': value})
101 | 
102 |     return jsonify({'stats': result}, response)


--------------------------------------------------------------------------------
/webapi/api/d/sessions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from bottle import response, request, HTTPError
19 | from cork import AAAException
20 | from bson import ObjectId
21 | from helpers import simple_group, jsonify
22 | from app import app
23 | from app import auth
24 | 
25 | 
26 | @app.get('/sessions')
27 | def sessions_get_by_query(mongodb):
28 |     try:
29 |         auth.require(role='access_normalized')
30 |     except AAAException as e:
31 |         return HTTPError(401, e.message)
32 | 
33 |     query_keys = request.query.keys()
34 |     query_dict = {}
35 | 
36 |     mongo_keys = {'id', '_id', 'protocol', 'source_ip', 'source_port', 'destination_ip', 'destination_port', 'honeypot'}
37 | 
38 |     #intersection
39 |     common_keys = (set(query_keys) & mongo_keys)
40 | 
41 |     for item in common_keys:
42 |         if item.endswith('_id'):
43 |             query_dict[item] = ObjectId(request.query[item])
44 |         elif item is 'id':
45 |             query_dict['_' + item] = ObjectId(request.query[item])
46 |         elif item.endswith('_port'):
47 |             query_dict[item] = int(request.query[item])
48 |         else:
49 |             query_dict[item] = request.query[item]
50 | 
51 |     if 'limit' in query_keys:
52 |         limit = int(request.query.limit)
53 |     else:
54 |         limit = 50
55 | 
56 |     #remove ip of honeypot if user is not authorized to see it
57 |     u = auth.current_user.role
58 |     lvl = auth._store.roles[u]
59 |     needed_lvl = auth._store.roles['access_normalized']
60 | 
61 |     p_limit = {'_id': False}
62 |     if lvl < needed_lvl:
63 |         p_limit = {'destination_ip': False}
64 | 
65 |     result = list(mongodb['session'].find(spec=query_dict, fields=p_limit).limit(limit))
66 |     return jsonify({'sessions': result}, response)
67 | 
68 | 
69 | @app.get('/sessions/protocols')
70 | def session_protocols(mongodb):
71 |     """
72 |     Returns a grouped list of all protocols intercepted.
73 |     Example:
74 |     {"protocols": [{"count": 680, "protocol": "http"},
75 |                    {"count": 125, "protocol": "ssh},
76 |                    {"count": 74,  "protocol": "imap}]}
77 |     """
78 |     auth.require(role='access_normalized')
79 |     result = simple_group('session', 'protocol', mongodb)
80 |     return jsonify(result, response)
81 | 


--------------------------------------------------------------------------------
/webapi/api/d/urls.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from bottle import get, abort, request, response, HTTPError
19 | from cork import AAAException
20 | from helpers import jsonify
21 | from app import app
22 | from app import auth
23 | 
24 | 
25 | @app.get('/urls')
26 | @app.get('/urls/')
27 | def urls(mongodb):
28 |     try:
29 |         auth.require(role='access_normalized')
30 |     except AAAException as e:
31 |         return HTTPError(401, e.message)
32 | 
33 |     query_keys = request.query.keys()
34 |     query_dict = {}
35 | 
36 |     if 'limit' in query_keys:
37 |         limit = int(request.query.limit)
38 |     else:
39 |         limit = 50
40 | 
41 |     if 'url_regex' in query_keys:
42 |         query_dict['url'] = {'$regex': request.query.url_regex}
43 | 
44 |     if 'hash' in query_keys:
45 |         hash_length = len(request.query['hash'])
46 |         if hash_length is 128:
47 |             query_dict['extractions.hashes.sha512'] = request.query['hash']
48 |         elif hash_length is 40:
49 |             query_dict['extractions.hashes.sha1'] = request.query['hash']
50 |         elif hash_length is 32:
51 |             query_dict['extractions.hashes.md5'] = request.query['hash']
52 |         else:
53 |             abort(400, '{0} could not be recognized as a supported hash. Currently supported hashes are: SHA1, SHA512 and MD5. ')
54 | 
55 |     result = list(mongodb['url'].find(query_dict, fields={'_id': False}).limit(limit))
56 |     return jsonify({'urls': result}, response)
57 | 


--------------------------------------------------------------------------------
/webapi/api/v1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnnykv/mnemosyne/54d635ec363868f15c8872cfefa7f4546f04e52e/webapi/api/v1/__init__.py


--------------------------------------------------------------------------------
/webapi/api/v1/app.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('webapi')
 3 | 
 4 | from bottle import Bottle
 5 | 
 6 | import webapi.shared_state as state
 7 | 
 8 | app = Bottle()
 9 | 
10 | if state.plug is not None:
11 |     app.install(state.plug)
12 | 
13 | auth = state.auth
14 | 
15 | import files
16 | import hpfeeds
17 | import sessions
18 | import urls
19 | import dorks
20 | 


--------------------------------------------------------------------------------
/webapi/api/v1/dorks.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from cork import AAAException
19 | from bottle import response, get, request, HTTPError, request
20 | from helpers import jsonify
21 | from datetime import date, datetime
22 | from app import app
23 | from app import auth
24 | 
25 | @app.get('/aux/dorks')
26 | def get_dorks(mongodb):
27 |     try:
28 |         auth.require(role='public')
29 |     except AAAException as e:
30 |         return HTTPError(401, e.message)
31 | 
32 |     query_keys = request.query.keys()
33 |     query_dict = {}
34 | 
35 |     #set default parameters
36 |     sort_key = 'count'
37 |     sort_order = -1
38 |     limit = 200
39 | 
40 |     if 'sort_by' in query_keys:
41 |         sort_key = request.query.sort_by
42 | 
43 |     if 'sort_order' in query_keys:
44 |         try:
45 |             sort_order = int (request.query.sort_order)
46 |         except ValueError:
47 |             raise HTTPError(400, 'sort_order must be an integer.')
48 | 
49 |     if 'regex' in query_keys:
50 |         query_dict['content'] = {'$regex': request.query.regex}
51 | 
52 |     #inurl, intitle, etc.
53 |     if 'type' in query_keys:
54 |         query_dict['type'] = request.query.type
55 | 
56 |     if 'limit' in query_keys:
57 |             limit = int(request.query.limit)
58 | 
59 |     result = list(mongodb['dork'].find(query_dict).sort(sort_key, sort_order).limit(limit))
60 | 
61 |     #delete mongo _id - better way?
62 |     for entry in result:
63 |         entry['firsttime'] = entry['_id'].generation_time
64 |         del entry['_id']
65 | 
66 |     return jsonify({'dorks': result}, response)
67 | 


--------------------------------------------------------------------------------
/webapi/api/v1/files.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from cork import AAAException
19 | from bottle import abort, request, response, HTTPError
20 | from helpers import simple_group, jsonify
21 | from app import app
22 | from app import auth
23 | 
24 | 
25 | @app.route('/files')
26 | @app.route('/files/')
27 | def get_files(mongodb):
28 |     try:
29 |         auth.require(role='access_normalized')
30 |     except AAAException as e:
31 |         return HTTPError(401, e.message)
32 | 
33 |     query_keys = request.query.keys()
34 |     query_dict = {}
35 | 
36 |     if 'limit' in query_keys:
37 |         limit = int(request.query.limit)
38 |     else:
39 |         limit = 50
40 | 
41 |     if 'hash' in query_keys:
42 |         hash_length = len(request.query['hash'])
43 |         if hash_length is 128:
44 |             query_dict['hashes.sha512'] = request.query['hash']
45 |         elif hash_length is 40:
46 |             query_dict['hashes.sha1'] = request.query['hash']
47 |         elif hash_length is 32:
48 |             query_dict['hashes.md5'] = request.query['hash']
49 |         else:
50 |             abort(400, '{0} could not be recognized as a supported hash. Currently supported hashes are: SHA1, SHA512 and MD5. ')
51 |     else:
52 |         abort(400, 'Only supported query parameter is "hash"')
53 | 
54 |     p_limit = {'_id': False}
55 | 
56 |     if 'no_data' in query_keys:
57 |         p_limit['data'] = False
58 | 
59 |     result = list(mongodb['file'].find(query_dict, fields=p_limit).limit(limit))
60 |     return jsonify({'files': result}, response)
61 | 
62 | 
63 | @app.route('/files/types')
64 | def files_types(mongodb):
65 |     try:
66 |         auth.require(role='access_normalized')
67 |     except AAAException as e:
68 |         return HTTPError(401, e.message)
69 |     return HTTPError(410, 'This part of the API has been temporarily disabled to due to performance issues.')
70 |     result = simple_group('file', 'content_guess', mongodb)
71 |     return jsonify(result, response)


--------------------------------------------------------------------------------
/webapi/api/v1/helpers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from bottle import abort, HTTPError
19 | from cork import AAAException, AuthException
20 | from datetime import datetime
21 | from bson import ObjectId
22 | from bson.code import Code
23 | 
24 | import json
25 | import uuid
26 | 
27 | 
28 | def simple_group(collection, attribute, mongodb):
29 |     """
30 |     Helper method to ease group_by operations.
31 |     """
32 |     #Disabled due to bringing the system down
33 |     reducer = Code("""
34 |         function (current, result) { result.count += 1; }
35 |         """)
36 |     #TODO: Convert to map/reduce. (current state sets read-lock)
37 |     result = mongodb[collection].group(key={attribute: 1}, condition={}, initial={"count": 0}, reduce=reducer)
38 |     output_rootname = attribute + 's'
39 |     #Why does pymongo return the aggregation as float?
40 |     for item in result:
41 |         item['count'] = int(item['count'])
42 |     return {output_rootname: result}
43 | 
44 | 
45 | def jsonify(i, r):
46 |     if i is None:
47 |         i = {}
48 |     if isinstance(i, dict):
49 |         #Attempt to serialize, raises exception on failure
50 |         json_response = json.dumps(i, default=json_default, sort_keys=True)
51 |         #Set content type only if serialization succesful
52 |         r.content_type = 'application/json'
53 |         return json_response
54 |     else:
55 |         abort(500, 'Error while trying to serialize to json.')
56 | 
57 | 
58 | def json_default(obj):
59 |     if isinstance(obj, datetime):
60 |         return obj.isoformat()
61 |     elif isinstance(obj, uuid.UUID):
62 |         return str(obj)
63 |     elif isinstance(obj, buffer):
64 |         return str(obj)
65 |     elif isinstance(obj, ObjectId):
66 |         return str(obj)
67 |     else:
68 |         return None
69 | 
70 | 


--------------------------------------------------------------------------------
/webapi/api/v1/hpfeeds.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import bottle
 19 | from bottle import get, abort, request, response, HTTPError
 20 | from cork import AAAException
 21 | from bson import ObjectId
 22 | from bson.errors import InvalidId
 23 | from helpers import jsonify
 24 | from app import app
 25 | from app import auth
 26 | 
 27 | 
 28 | @app.get('/hpfeeds/')
 29 | @app.get('/hpfeeds')
 30 | def hpfeeds(mongodb):
 31 |     try:
 32 |         auth.require(role='access_all')
 33 |     except AAAException as e:
 34 |         return HTTPError(401, e.message)
 35 | 
 36 |     query_keys = request.query.keys()
 37 |     query_dict = {}
 38 | 
 39 |     mongo_keys = {'_id', 'id', 'channel'}
 40 | 
 41 |     #intersection
 42 |     common_keys = (set(query_keys) & mongo_keys)
 43 | 
 44 |     try:
 45 |         for item in common_keys:
 46 |             if item.endswith('_id'):
 47 |                 query_dict[item] = ObjectId(request.query[item])
 48 |             elif item == 'id':
 49 |                 query_dict['_' + item] = ObjectId(request.query[item])
 50 |             else:
 51 |                 query_dict[item] = request.query[item]
 52 |     except InvalidId:
 53 |         abort(400, 'Not a valid ObjectId.')
 54 | 
 55 |     if 'limit' in query_keys:
 56 |         limit = int(request.query.limit)
 57 |     else:
 58 |         limit = 50
 59 | 
 60 |     result = list(mongodb['hpfeed'].find(query_dict).sort('timestamp', -1).limit(limit))
 61 |     return jsonify({'hpfeeds': result}, response)
 62 | 
 63 | 
 64 | @app.get('/hpfeeds/stats')
 65 | def hpfeeds(mongodb):
 66 |     try:
 67 |         auth.require(role='access_all')
 68 |     except AAAException as e:
 69 |         return HTTPError(401, e.message)
 70 | 
 71 |     if 'date' in request.query and 'channel' in request.query:
 72 |         query = {'date': request.query.date, 'channel': request.query.channel}
 73 |     elif 'date' in request.query:
 74 |         query = {'date': request.query.date}
 75 |     elif 'channel' in request.query:
 76 |         query = {'channel': request.query.channel}
 77 |     else:
 78 |         abort(404, 'Bad Request')
 79 | 
 80 |     results = list(mongodb['daily_stats'].find(query))
 81 | 
 82 |     for result in results:
 83 |         del result['_id']
 84 | 
 85 |     return jsonify({'stats': results}, response)
 86 | 
 87 | 
 88 | @app.get('/hpfeeds/stats/total')
 89 | def hpfeeds(mongodb):
 90 |     try:
 91 |         auth.require(role='access_all')
 92 |     except AAAException as e:
 93 |         return HTTPError(401, e.message)
 94 | 
 95 |     tmp_result = mongodb['daily_stats'].find_one({'_id': 'total'})
 96 |     del tmp_result['_id']
 97 | 
 98 |     result = []
 99 |     for key, value in tmp_result.items():
100 |         result.append({'channel': key, 'count': value})
101 | 
102 |     return jsonify({'stats': result}, response)


--------------------------------------------------------------------------------
/webapi/api/v1/sessions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from bottle import response, request, HTTPError
19 | from cork import AAAException
20 | from bson import ObjectId
21 | from helpers import simple_group, jsonify
22 | from app import app
23 | from app import auth
24 | 
25 | 
26 | @app.get('/sessions')
27 | @app.get('/sessions/')
28 | def sessions_get_by_query(mongodb):
29 |     try:
30 |         auth.require(role='access_normalized')
31 |     except AAAException as e:
32 |         return HTTPError(401, e.message)
33 | 
34 |     query_keys = request.query.keys()
35 |     query_dict = {}
36 | 
37 |     mongo_keys = {'id', '_id', 'protocol', 'source_ip', 'source_port', 'destination_ip', 'destination_port', 'honeypot'}
38 | 
39 |     #intersection
40 |     common_keys = (set(query_keys) & mongo_keys)
41 | 
42 |     for item in common_keys:
43 |         if item.endswith('_id'):
44 |             query_dict[item] = ObjectId(request.query[item])
45 |         elif item is 'id':
46 |             query_dict['_' + item] = ObjectId(request.query[item])
47 |         elif item.endswith('_port'):
48 |             query_dict[item] = int(request.query[item])
49 |         else:
50 |             query_dict[item] = request.query[item]
51 | 
52 |     if 'limit' in query_keys:
53 |         limit = int(request.query.limit)
54 |     else:
55 |         limit = 50
56 | 
57 |     #remove ip of honeypot if user is not authorized to see it
58 |     u = auth.current_user.role
59 |     lvl = auth._store.roles[u]
60 |     needed_lvl = auth._store.roles['access_normalized']
61 |     if lvl < needed_lvl:
62 |         p_limit = {'destination_ip': False}
63 |     else:
64 |         p_limit = None
65 | 
66 |     result = list(mongodb['session'].find(spec=query_dict, fields=p_limit).limit(limit))
67 |     return jsonify({'sessions': result}, response)
68 | 
69 | 
70 | @app.get('/sessions/protocols')
71 | def session_protocols(mongodb):
72 |     """
73 |     Returns a grouped list of all protocols intercepted.
74 |     Example:
75 |     {"protocols": [{"count": 680, "protocol": "http"},
76 |                {"count": 125, "protocol": "ssh},
77 |                {"count": 74,  "protocol": "imap}]}
78 |     """
79 |     return HTTPError(410, 'This part of the API has been temporarily disabled to due to performance issues.')
80 |     auth.require(role='access_normalized')
81 |     result = simple_group('session', 'protocol', mongodb)
82 |     return jsonify(result, response)
83 | 


--------------------------------------------------------------------------------
/webapi/api/v1/urls.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | from bottle import get, abort, request, response, HTTPError
19 | from cork import AAAException
20 | from helpers import jsonify
21 | from app import app
22 | from app import auth
23 | 
24 | 
25 | @app.get('/urls')
26 | @app.get('/urls/')
27 | def urls(mongodb):
28 |     try:
29 |         auth.require(role='access_normalized')
30 |     except AAAException as e:
31 |         return HTTPError(401, e.message)
32 | 
33 |     query_keys = request.query.keys()
34 |     query_dict = {}
35 | 
36 |     if 'limit' in query_keys:
37 |         limit = int(request.query.limit)
38 |     else:
39 |         limit = 50
40 | 
41 |     if 'url_regex' in query_keys:
42 |         query_dict['url'] = {'$regex': request.query.url_regex}
43 | 
44 |     if 'hash' in query_keys:
45 |         hash_length = len(request.query['hash'])
46 |         if hash_length is 128:
47 |             query_dict['extractions.hashes.sha512'] = request.query['hash']
48 |         elif hash_length is 40:
49 |             query_dict['extractions.hashes.sha1'] = request.query['hash']
50 |         elif hash_length is 32:
51 |             query_dict['extractions.hashes.md5'] = request.query['hash']
52 |         else:
53 |             abort(400, '{0} could not be recognized as a supported hash. Currently supported hashes are: SHA1, SHA512 and MD5. ')
54 | 
55 |     result = list(mongodb['url'].find(query_dict).limit(limit))
56 |     return jsonify({'urls': result}, response)
57 | 


--------------------------------------------------------------------------------
/webapi/default_routes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 Johnny Vestergaard <jkv@unixcluster.dk>
 2 | #
 3 | # This program is free software; you can redistribute it and/or
 4 | # modify it under the terms of the GNU General Public License
 5 | # as published by the Free Software Foundation; either version 2
 6 | # of the License, or (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 | 
18 | import bottle
19 | from bottle import get, route, static_file, view, post
20 | import shared_state
21 | import logging
22 | 
23 | logger = logging.getLogger(__name__)
24 | 
25 | @get('/')
26 | def get_index():
27 |     return static_file('index.html', root=shared_state.static_dir)
28 | 
29 | 
30 | @get('/<filename:path>')
31 | def static(filename):
32 |     return static_file(filename, root=shared_state.static_dir)
33 | 
34 | 


--------------------------------------------------------------------------------
/webapi/mnemowebapi.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Johnny Vestergaard <jkv@unixcluster.dk>
  2 | #
  3 | # This program is free software; you can redistribute it and/or
  4 | # modify it under the terms of the GNU General Public License
  5 | # as published by the Free Software Foundation; either version 2
  6 | # of the License, or (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc.,
 16 | # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | import bottle
 19 | import shared_state
 20 | import os
 21 | import uuid
 22 | import shared_state as shared
 23 | import logging
 24 | import types
 25 | from bottle import run, install, mount, request
 26 | from bottle.ext import mongo
 27 | from beaker.middleware import SessionMiddleware
 28 | from datetime import datetime
 29 | from kumo.loggly import Loggly
 30 | from cork import Cork
 31 | 
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | class MnemoWebAPI():
 35 |     """Exposes raw and normalized data from hpfeeds through a RESTful api"""
 36 | 
 37 |     def __init__(self, datebase_name, static_file_path=None, data_dir='./data', loggly_token=None):
 38 | 
 39 |         cork_dir = os.path.join(data_dir, 'cork')
 40 |         beaker_dir = os.path.join(data_dir, 'beaker')
 41 |         bottle.TEMPLATE_PATH.insert(0,'webapi/views/')
 42 | 
 43 |         #vars which must be visible across all webapi modules
 44 |         shared.static_dir = static_file_path
 45 |         shared.plug = bottle.ext.mongo.MongoPlugin(uri="localhost", db=datebase_name, json_mongo=True)
 46 | 
 47 |         #install mongo plugin for root app
 48 |         install(shared_state.plug)
 49 | 
 50 |         #check if cork files exists
 51 |         cork_files = ['users.json', 'roles.json', 'register.json']
 52 |         if not set(cork_files).issubset(set(os.listdir(cork_dir))):
 53 |             #if not, create them
 54 |             logger.info('Cork authentication files not found, creating new files.')
 55 |             shared.auth = self.populate_conf_directory(cork_dir)
 56 |         else:
 57 |             shared.auth = Cork(cork_dir)
 58 | 
 59 |         #admin depends on shared.auth
 60 |         import admin
 61 | 
 62 |         #import and mount api version 1 (stable)
 63 |         from webapi.api.v1 import app as api_v1
 64 |         mount('/api/v1/', api_v1.app)
 65 | 
 66 |         #import and mount development version (unstable)
 67 |         from webapi.api.d import app as api_d
 68 |         mount('/api/d/', api_d.app)
 69 | 
 70 |         #must be imported AFTER mounts.
 71 |         if shared.static_dir is not None:
 72 |             import default_routes
 73 | 
 74 |         #wrap root app in beaker middleware
 75 |         session_opts = {
 76 |             'session.type': 'file',
 77 |             'session.cookie_expires': False,
 78 |             'session.data_dir': beaker_dir,
 79 |             'session.auto': True,
 80 |             #set secure attribute on cookie
 81 |             'session.secure': True
 82 |             }
 83 | 
 84 |         self.app = bottle.app()
 85 |         if loggly_token:
 86 |             self.app = Loggly(bottle.app(), loggly_token)
 87 |         self.app = SessionMiddleware(self.app, session_opts)
 88 |         
 89 |         root_app = bottle.app()
 90 | 
 91 |         #setup logging hooks
 92 |         @root_app.hook('before_request')
 93 |         @api_d.app.hook('before_request')
 94 |         @api_v1.app.hook('before_request')
 95 |         def log_request():
 96 |             user_agent = ""
 97 |             if 'HTTP_USER_AGENT' in bottle.request.environ:
 98 |                 user_agent = bottle.request.environ['HTTP_USER_AGENT']
 99 |             if 'REMOTE_ADDR' in bottle.request.environ:
100 |                 remote_addr = bottle.request.environ['REMOTE_ADDR']
101 |             else:
102 |                 remote_addr = ""
103 |             if 'beaker.session' in bottle.request.environ:
104 |                 session = bottle.request.environ.get('beaker.session')
105 |                 username = session.get('username', None)
106 |             else:
107 |                 username = "None"
108 |             logger.info("[{0}/{1}] {2} {3} ({4})".format(remote_addr, username, request.method, request.fullpath, user_agent))
109 | 
110 |         def return_text(self, e):
111 |             return e.status
112 | 
113 |         #make sure error pages for API are pure text
114 |         api_d.app.default_error_handler = types.MethodType(return_text, self)
115 |         api_v1.app.default_error_handler = types.MethodType(return_text, self)
116 | 
117 |     def start_listening(self, host, port):
118 |         logger.info('Starting web api, listening on {0}:{1}'.format(host, port))
119 |         run(app=self.app, host=host, port=port, debug=False, server='gevent',
120 |             log="wsgi", quiet=True, keyfile='server.key', certfile='server.crt')
121 | 
122 |     #defaults
123 |     def populate_conf_directory(self, auth_dir):
124 |         """
125 |         Creation of basic auth files.
126 |         """
127 |         logger.info("Creating new authentication files, check STDOUT for the generated admin password.")
128 |         cork = Cork(auth_dir, initialize=True)
129 | 
130 |         cork._store.roles['admin'] = 100
131 |         cork._store.roles['access_all'] = 70
132 |         cork._store.roles['access_normalized'] = 60
133 |         cork._store.roles['public'] = 10
134 |         cork._store.save_roles()
135 | 
136 |         tstamp = str(datetime.utcnow())
137 | 
138 |         #default admin combo: admin/admin
139 |         username = 'admin'
140 |         password = str(uuid.uuid4())
141 |         cork._store.users[username] = {
142 |             'role': 'admin',
143 |             'hash': cork._hash(username, password),
144 |             'email_addr': username + '@localhost.local',
145 |             'desc': 'Default administrative account',
146 |             'creation_date': tstamp
147 |         }
148 |         cork._store.save_users()
149 |         #for security reasons we fdo not want this in the log files.
150 |         print "A 'admin' account has been created with the password '{0}'".format(password)
151 | 
152 |         return cork
153 | 
154 | 
155 | #for debugging
156 | if __name__ == '__main__':
157 | 
158 |     m = MnemoWebAPI('mnemosyne')
159 |     m.start_listening(host='localhost', port='8181')
160 | 


--------------------------------------------------------------------------------
/webapi/shared_state.py:
--------------------------------------------------------------------------------
1 | #Variables which must be shared within the webapi
2 | 
3 | plug = None
4 | static_dir = None
5 | auth = None


--------------------------------------------------------------------------------
/webapi/views/admin_page.tpl:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2 | <html>
  3 | <head>
  4 | <meta content="text/html; charset=utf-8" http-equiv="content-type">
  5 | <div id='main'>
  6 |     <h2>Cork - Administration page</h2>
  7 |     <p>Welcome {{current_user.username}}, your role is: {{current_user.role}},
  8 |     access time: {{current_user.session_accessed_time}}</p>
  9 |     <div id='commands'>
 10 |       <p>Create new user:</p>
 11 |       <form action="create_user" method="post">
 12 |           <p><label>Username</label> <input type="text" name="username" /></p>
 13 |           <p><label>Role</label> <input type="text" name="role" /></p>
 14 |           <p><label>Password</label> <input type="password" name="password" /></p>
 15 |           <button type="submit" > OK </button>
 16 |           <button type="button" class="close"> Cancel </button>
 17 |       </form>
 18 |       <br />
 19 |       <p>Delete user:</p>
 20 |       <form action="delete_user" method="post">
 21 |           <p><label>Username</label> <input type="text" name="username" /></p>
 22 |           <button type="submit" > OK </button>
 23 |           <button type="button" class="close"> Cancel </button>
 24 |       </form>
 25 |       <br />
 26 |       <p>Create new role:</p>
 27 |       <form action="create_role" method="post">
 28 |           <p><label>Role</label> <input type="text" name="role" /></p>
 29 |           <p><label>Level</label> <input type="text" name="level" /></p>
 30 |           <button type="submit" > OK </button>
 31 |           <button type="button" class="close"> Cancel </button>
 32 |       </form>
 33 |       <br />
 34 |       <p>Delete role:</p>
 35 |       <form action="delete_role" method="post">
 36 |           <p><label>Role</label> <input type="text" name="role" /></p>
 37 |           <button type="submit" > OK </button>
 38 |           <button type="button" class="close"> Cancel </button>
 39 |       </form>
 40 |     </div>
 41 |     <div id="users">
 42 |         <table>
 43 |             <tr><th>Username</th><th>Role</th><th>Email</th><th>Description</th></tr>
 44 |             %for u in users:
 45 |             <tr><td>{{u[0]}}</td><td>{{u[1]}}</td><td>{{u[2]}}</td><td>{{u[2]}}</td></tr>
 46 |             %end
 47 |         </table>
 48 |         <br/>
 49 |         <table>
 50 |             <tr><th>Role</th><th>Level</th></tr>
 51 |             %for r in roles:
 52 |             <tr><td>{{r[0]}}</td><td>{{r[1]}}</td></tr>
 53 |             %end
 54 |         </table>
 55 |         <p>(Reload page to refresh)</p>
 56 |     </div>
 57 | 
 58 |     <div class="clear"></div>
 59 | 
 60 |     <div id='status'><p>Ready.</p></div>
 61 |     <div id="urls">
 62 |       <a href="/">index</a> <a href="/logout">logout</a>
 63 |     </div>
 64 |     <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
 65 |     <script>
 66 |         // Prevent form submission, send POST asynchronously and parse returned JSON
 67 |         $('form').submit(function() {
 68 |             $("div#status").fadeIn(100);
 69 |             z = $(this);
 70 |             $.post($(this).attr('action'), $(this).serialize(), function(j){
 71 |               if (j.ok) {
 72 |                 $("div#status").css("background-color", "#f0fff0");
 73 |                 $("div#status p").text('Ok.');
 74 |               } else {
 75 |                 $("div#status").css("background-color", "#fff0f0");
 76 |                 $("div#status p").text(j.msg);
 77 |               }
 78 |               $("div#status").delay(800).fadeOut(500);
 79 |             }, "json");
 80 |             return false;
 81 |         });
 82 |     </script>
 83 | </div>
 84 | <style>
 85 | div#commands { width: 45%%; float: left}
 86 | div#users { width: 45%; float: right}
 87 | div#main {
 88 |     color: #777;
 89 |     margin: auto;
 90 |     margin-left: 5em;
 91 |     font-size: 80%;
 92 | }
 93 | input {
 94 |     background: #f8f8f8;
 95 |     border: 1px solid #777;
 96 |     margin: auto;
 97 | }
 98 | input:hover { background: #fefefe}
 99 | label {
100 |   width: 8em;
101 |   float: left;
102 |   text-align: right;
103 |   margin-right: 0.5em;
104 |   display: block
105 | }
106 | button {
107 |     margin-left: 13em;
108 | }
109 | button.close {
110 |     margin-left: .1em;
111 | }
112 | div#status {
113 |     border: 1px solid #999;
114 |     padding: .5em;
115 |     margin: 2em;
116 |     width: 15em;
117 |     -moz-border-radius: 10px;
118 |     border-radius: 10px;
119 | }
120 | .clear { clear: both;}
121 | div#urls {
122 |   position:absolute;
123 |   top:0;
124 |   right:1em;
125 | }
126 | </style>


--------------------------------------------------------------------------------
/webapi/views/login_form.tpl:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <html>
 3 | <head>
 4 | <meta content="text/html; charset=utf-8" http-equiv="content-type">
 5 | <div id="hbox">
 6 |   <div class="box">
 7 |       <h2>Login</h2>
 8 |       <p>Please insert your credentials:</p>
 9 |       <form action="login" method="post">
10 |           <input type="text" name="username" />
11 |           <input type="password" name="password" />
12 |           <br/><br/>
13 |           <button type="submit" > OK </button>
14 |           <button type="button" class="close"> Cancel </button>
15 |       </form>
16 |       <br />
17 |   </div>
18 |   <br/>
19 | </div>


--------------------------------------------------------------------------------