├── .gitignore ├── AUTHORS ├── LICENSE ├── Makefile.am ├── README.md ├── app ├── Makefile.am ├── curl_mget.cc ├── curl_mget.h ├── hist.cc ├── kmeans.cc ├── linear_regression.cc ├── matrix_mult.cc ├── matrix_mult2.cc ├── miw.cc ├── mm.hh ├── pca.cc ├── simple_log_compacter.cc ├── solr_commit.cc ├── string_match.cc ├── wc.cc ├── wr.cc ├── wr.hh └── wrmem.cc ├── autogen.sh ├── config.h.in ├── configure.ac ├── data ├── tests │ ├── created.log │ ├── match_file.txt │ ├── matching.log │ ├── matching_exact.log │ ├── matching_file.log │ ├── ratio.log │ ├── string.log │ ├── sum.log │ └── variance.log └── web_proxy_10lines.log ├── doc ├── metis:mittr10.pdf └── metis_mittr10.pdf ├── metis ├── Makefile.am ├── appbase.hh ├── application.cc ├── application.hh ├── array.hh ├── bench.hh ├── bsearch.hh ├── btree.cc.mine ├── btree.hh ├── cpumap.cc ├── cpumap.hh ├── defsplitter.hh ├── group.hh ├── ibs.cc ├── ibs.hh ├── map_bucket_manager.hh ├── mergesort.hh ├── micro │ ├── btree_unit.cc │ ├── misc.cc │ ├── search_unit.cc │ └── sf_sample.cc ├── mr-types.cc ├── mr-types.hh ├── pmcreg.hh ├── predictor.hh ├── profile.cc ├── profile.hh ├── psrs.hh ├── pthreadpool.cc ├── reduce_bucket_manager.hh ├── test_util.hh ├── thread.hh ├── threadinfo.cc └── threadinfo.hh ├── micro ├── btree_unit.cc ├── misc.cc ├── search_unit.cc └── sf_sample.cc ├── miw ├── Makefile.am ├── formats │ ├── Makefile.am │ ├── McAfee.fmt │ ├── McAfee.json │ ├── MicrosoftDNSlogs.fmt │ ├── MicrosoftDNSlogs.json │ ├── anon_bluecoat_format.fmt │ ├── anon_bluecoat_format.json │ ├── domain_controller_format.fmt │ ├── domain_controller_format.json │ ├── evtx.fmt │ ├── evtx.json │ ├── evtx2.fmt │ ├── evtx2.json │ ├── firewall_checkpoint.json │ ├── format_json2pb.py │ ├── paloalto.fmt │ ├── paloalto.json │ ├── protobuf_json.py │ ├── protobuf_json_writer.py │ ├── proxy_format.fmt │ ├── proxy_format.json │ ├── squid3_search_test.fmt │ ├── squid3_search_test.json │ ├── test_json2pb.py │ └── tests │ │ ├── created.fmt │ │ ├── created.json │ │ ├── filter.fmt │ │ ├── filter.json │ │ ├── match.fmt │ │ ├── match.json │ │ ├── match_exact.fmt │ │ ├── match_exact.json │ │ ├── match_exact_neg.fmt │ │ ├── match_exact_neg.json │ │ ├── match_file.json │ │ ├── ratio.fmt │ │ ├── ratio.json │ │ ├── sum.fmt │ │ ├── sum.json │ │ ├── variance-mean-sum.fmt │ │ ├── variance-mean-sum.json │ │ ├── variance.fmt │ │ └── variance.json ├── job.cc ├── job.h ├── log_definition.proto ├── log_format.cc ├── log_format.h ├── log_record.cc ├── log_record.h ├── mr_job.cc ├── mr_job.h └── str_utils.h ├── python ├── miw_job.py └── miwlogger.py └── tests ├── Makefile.am └── ut-mr-parsing.cc /.gitignore: -------------------------------------------------------------------------------- 1 | .deps 2 | obj 3 | data_tool/gen 4 | GNUmakefile 5 | config.h 6 | *.swp 7 | config.log 8 | config.status 9 | autom4te.cache 10 | *.tar.gz 11 | *.o 12 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | * Metis 2 | 3 | Yandong Mao 4 | ydmao@mit.edu 5 | 6 | Robert Morris 7 | rtm@csail.mit.edu 8 | 9 | Frans Kaashoek 10 | kaashoek@csail.mit.edu 11 | 12 | * MIW 13 | 14 | Emmanuel Benazera 15 | emmanuel.benazera@deepdetect.com -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Portions of this software are subject to the license below. The relevant 2 | source files are clearly marked; they refer to this file using the phrase 3 | "the Click LICENSE file". This license is an MIT license, plus a clause 4 | (taken from the W3C license) requiring prior written permission to use our 5 | names in publicity. The AUTHORS file lists the people who have contributed 6 | to this software. 7 | 8 | =========================================================================== 9 | 10 | (c) 1999-2009 Massachusetts Institute of Technology 11 | (c) 2000-2009 Mazu Networks, Inc. 12 | (c) 2001-2009 International Computer Science Institute 13 | (c) 2004-2009 Regents of the University of California 14 | (c) 2013-2015 SopraSteria 15 | 16 | Permission is hereby granted, free of charge, to any person obtaining a 17 | copy of this software and associated documentation files (the "Software"), 18 | to deal in the Software without restriction, including without limitation 19 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | and/or sell copies of the Software, and to permit persons to whom the 21 | Software is furnished to do so, subject to the following conditions: 22 | 23 | The above copyright notice and this permission notice shall be included in 24 | all copies or substantial portions of the Software. 25 | 26 | The name and trademarks of copyright holders may NOT be used in advertising 27 | or publicity pertaining to the Software without specific, written prior 28 | permission. Title to copyright in this Software and any associated 29 | documentation will at all times remain with copyright holders. 30 | 31 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 32 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 34 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 35 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 36 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 37 | DEALINGS IN THE SOFTWARE. 38 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = miw metis app tests 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## MIW 2 | MIW is the shortname for Mobile Investivation Workstation. MIW is a tool for the fast summarization and analysis of very large quantities of logs. It is written in C++/C++11 and provides some small Python tooling. 3 | 4 | MIW was built as an extremely efficient single-machine map-reduce alternative to Hadoop for processing hundreds of GB of logs. It is especially useful for on-premises log analysis, for generating condensed analytics out of raw logs, and for computing features useful in Machine Learning applications. 5 | 6 | Though rather young in its development MIW has already been integrated several industrial projects and run over billions of logs, from laptops to HPC. 7 | 8 | Main functionalities: 9 | At the moment, MIW supports implements the following features: 10 | 11 | - C++ map-reduce as an extension of Metis with support for multiple input files as input to the same task 12 | - auto-splitting of files based on available RAM 13 | - command line job launcher 14 | - configurable input log and job control formats in JSON 15 | - variety of outputs supported, from memory to JSON and CSV 16 | - variety of configuration options for preprocessing common log fields such as dates, time, URLs, ... 17 | - minimal Python job control utility 18 | 19 | Dependencies: 20 | 21 | - C++11 compiler + autotools 22 | - [protocol buffers](https://developers.google.com/protocol-buffers/?hl=en) for configuration and storage; 23 | - [boost](http://www.boost.org/) for network and tokenizer functionalities; 24 | - [glog](https://code.google.com/p/google-glog/) for logging events and debug; 25 | - [gflags](https://code.google.com/p/gflags/) for command line parsing; 26 | - [jsoncpp](https://github.com/open-source-parsers/jsoncpp) for JSON output; 27 | - [gtest](https://code.google.com/p/googletest/) for unit testing (optional); 28 | - [cppnetlib](http://cpp-netlib.org/) for preprocessing URIs; 29 | - [snappy](http://google.github.io/snappy/) for log compression; 30 | - [libcurl](http://curl.haxx.se/libcurl/) for connecting to external applications. 31 | 32 | Implementation: 33 | 34 | ### Authors 35 | MIW is designed and implemented by Emmanuel Benazera around the c++ map-reduce library Metis, on behalf of SopraSteria cybersecurity. 36 | 37 | ### Build 38 | Below are instructions for Linux systems: 39 | 40 | First, install dependencies 41 | ``` 42 | sudo apt-get install autotools-dev automake autoconf libtool pkg-config libprotobuf-dev protobuf-compiler python-protobuf libjsoncpp-dev libgoogle-glog-dev libgflags-dev libsnappy-dev libcurl4-openssl-dev libcppnetlib-dev python-simplejson 43 | ``` 44 | 45 | For compiling: 46 | ``` 47 | ./autogen.sh 48 | ./configure 49 | make 50 | ``` 51 | 52 | ### Documentation 53 | 54 | Using the main command line exe: 55 | ``` 56 | ./app/miw --help 57 | ``` 58 | yields the list of options: 59 | ``` 60 | Flags from job.cc: 61 | -appname (optional application name) type: string default: "" 62 | -autosplit (whether to autosplit file based on available memory 63 | type: bool default: false 64 | -compressed (whether to compress the original content) type: bool 65 | default: false 66 | -fnames (comma-separated input file names) type: string default: "" 67 | -format_name (processing format name) type: string default: "" 68 | -map_tasks (number of map tasks (default = auto)) type: int32 default: 0 69 | -memory_factor (heuristic value for autosplit of very large files, 70 | representing the expected memory requirement ratio vs the size of the 71 | file, e.g. 10 times more memory than log volume) type: double default: 10 72 | -merge_results (whether to merge results over multiple input files) 73 | type: bool default: false 74 | -ndisp (number of top records to show) type: int32 default: 5 75 | -nprocs (number of cores (default = auto)) type: int32 default: 0 76 | -ofname (output file name) type: string default: "" 77 | -output_format (output format (json, csv)) type: string default: "" 78 | -quiet (quietness) type: bool default: true 79 | -reduce_tasks (number of reduce tasks (default = auto)) type: int32 default: 0 80 | -skip_header (whether to skip first log line file as header) type: bool default: false 81 | -store_content (whether to store the original content in the processed output) type: bool default: false 82 | -tmp_save (whether to save temporary output of results after each file is processed) type: bool default: false 83 | ``` 84 | 85 | Example with a sampel of data from the repository: 86 | ``` 87 | ./app/miw -fnames data/web_proxy_10lines.log -format_name miw/formats/proxy_format -output_format csv -ofname test.csv 88 | ``` 89 | should yield 90 | ``` 91 | files=data/web_proxy_10lines.log 92 | I1203 16:06:42.315526 21243 job.cc:122] files size=1 93 | I1203 16:06:42.315587 21243 job.cc:127] Processing file=data/web_proxy_10lines.log 94 | 95 | logs preprocessing: results (TOP 5 from 2 keys, 4 logs): 96 | 2012-11-30_23_NqO3SB - 2 97 | 2012-11-30_23_- - 2 98 | 99 | Runtime in millisecond [4 cores] 100 | Sample: 15 Map: 0 Reduce: 0 Merge: 0 Sum: 15 Real: 16 101 | Number of Tasks of last Metis run 102 | Sample: 0 Map: 6 Reduce: 67 103 | I1203 16:06:42.330492 21243 job.cc:180] MR duration=0 seconds 104 | ``` 105 | 106 | The above call uses an existing format file. However, this is unlikely the provided formats match your logs. To generate your own log format: 107 | 108 | - Edit a json file in the manner of files in `miw/formats` 109 | - Convert it into a proto-buffer: 110 | ``` 111 | python format_json2pb.py yourformatfile.json yourformatfile.fmt 112 | ``` 113 | - Use the format in a call: 114 | ./app/miw -fnames yourlogfile.log -format_name yourformatfile -output_format csv -ofname test.csv 115 | 116 | Please note the omission of the `.fmt` extension. 117 | 118 | ### Log Formats 119 | 120 | The log formats are described in JSON, see the examples in `miw/formats`. They basically describe a log as a CSV like format, in which each column can be processed through a set of operations, from basic `counts` to `aggregations`, preprocessing of `time`, `date`, `url`. 121 | 122 | The best way to learn from the built-in possibilities at this point is to study the JSON files in `miw/formats`. 123 | 124 | ### Run tests 125 | 126 | There are examples of unit tests in `tests/ut-mr-parsing.cc`. Edit the file as needed for using your own formats and logs and run: 127 | ``` 128 | make ut_mr_parsing 129 | ./ut_mr_parsing 130 | ``` 131 | -------------------------------------------------------------------------------- /app/Makefile.am: -------------------------------------------------------------------------------- 1 | MAXCPUS = $(shell grep -c processor /proc/cpuinfo) 2 | AM_CXXFLAGS=-Wall -g -pipe -std=c++11 -fpermissive -fopenmp -O2 -g \ 3 | -I../miw -I../metis \ 4 | -fno-omit-frame-pointer -D_GNU_SOURCE -include ../config.h \ 5 | -DJTLS=__thread -DJSHARED_ATTR= \ 6 | -DJOS_CLINE=64 -DCACHE_LINE_SIZE=64 \ 7 | -DJOS_NCPU=$(MAXCPUS) -D__STDC_FORMAT_MACROS `pkg-config --cflags protobuf` 8 | AM_LDFLAGS=`pkg-config --libs protobuf` -L../miw/ -L../metis 9 | LDADD=curl_mget.o -lmiw -lmetis -lc -lm -lcurl -lz -lssl -lcrypto -lpthread -ldl -ljsoncpp -lrt -lprotobuf -lsnappy $(GLOG_LIBS) $(GFLAGS_LIBS) -lboost_system -lcppnetlib-uri 10 | bin_PROGRAMS=kmeans matrix_mult pca wc wr linear_regression hist string_match wrmem matrix_mult2 sf_sample btree_unit search_unit misc solr_commit miw simple_log_compacter 11 | 12 | kmeans_SOURCES=kmeans.cc 13 | matrix_mult_SOURCES=matrix_mult.cc 14 | pca_SOURCES=pca.cc 15 | wc_SOURCES=wc.cc 16 | wr_SOURCES=wr.cc 17 | linear_regression_SOURCES=linear_regression.cc 18 | hist_SOURCES=hist.cc 19 | string_match_SOURCES=string_match.cc 20 | wrmem_SOURCES=wrmem.cc 21 | matrix_mult2_SOURCES=matrix_mult2.cc 22 | sf_sample_SOURCES=../metis/micro/sf_sample.cc 23 | btree_unit_SOURCES=../metis/micro/btree_unit.cc 24 | search_unit_SOURCES=../metis/micro/search_unit.cc 25 | misc_SOURCES=../metis/micro/misc.cc 26 | bluecoat_log_proc_SOURCES=bluecoat_log_proc.cc 27 | log_compacter_SOURCES=log_compacter.cc 28 | solr_commit_SOURCES=solr_commit.cc 29 | miw_SOURCES=miw.cc 30 | simple_log_compacter_SOURCES=simple_log_compacter.cc 31 | -------------------------------------------------------------------------------- /app/curl_mget.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2009 Emmanuel Benazera 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef CURL_MGET_H 30 | #define CURL_MGET_H 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include 38 | 39 | template 40 | static std::string to_string (const T& t) 41 | { 42 | std::stringstream ss; 43 | ss << t; 44 | return ss.str(); 45 | }; 46 | 47 | typedef struct _cbget 48 | { 49 | _cbget() 50 | :_url(NULL),_output(NULL),_proxy_port(0),_headers(NULL),_status(0),_http_code(0),_handler(NULL), 51 | _content(NULL),_content_size(-1) 52 | {}; 53 | 54 | ~_cbget() 55 | {}; 56 | 57 | const char *_url; 58 | std::string *_output; 59 | 60 | long _connect_timeout_sec; 61 | long _transfer_timeout_sec; 62 | std::string _proxy_addr; 63 | short _proxy_port; 64 | const std::list *_headers; // forced http headers 65 | int _status; 66 | long _http_code; 67 | CURL *_handler; // optional 68 | std::string _cookies; // optional 69 | std::string _http_method; // optional 70 | std::string *_content; // optional 71 | int _content_size; // optional 72 | std::string _content_type; // optional. 73 | } cbget; 74 | 75 | void* pull_one_url(void *arg_cbget); 76 | 77 | class curl_mget 78 | { 79 | public: 80 | curl_mget(const int &nrequests, 81 | const long &connect_timeout_sec, 82 | const long &connect_timeout_ms, 83 | const long &transfer_timeout_sec, 84 | const long &transfer_timeout_ms); 85 | 86 | ~curl_mget(); 87 | 88 | // direct connection. 89 | std::string** www_mget(const std::vector &urls, const int &nrequests, 90 | const std::vector*> *headers, 91 | const std::string &proxy_addr, const short &proxy_port, 92 | std::vector &status, 93 | std::vector &http_codes, 94 | std::vector *chandlers=NULL, 95 | std::vector *cookies=NULL, 96 | const std::string &http_method="GET", 97 | std::string *content=NULL, 98 | const int &content_size=-1, 99 | const std::string &content_type=""); 100 | 101 | std::string* www_simple(const std::string &url, 102 | std::list *headers, 103 | int &status, 104 | long &http_code, 105 | const std::string &http_method="GET", 106 | std::string *content=NULL, 107 | const int &content_size=-1, 108 | const std::string &content_type="", 109 | const std::string &proxy_addr="", 110 | const short &proxy_port=0); 111 | public: 112 | int _nrequests; 113 | long _connect_timeout_sec; 114 | long _connect_timeout_ms; 115 | long _transfer_timeout_sec; 116 | long _transfer_timeout_ms; 117 | std::string _lang; 118 | const std::list *_headers; // forced http headers. 119 | 120 | std::string **_outputs; 121 | cbget **_cbgets; 122 | }; 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /app/matrix_mult.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007, Stanford University 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of Stanford University nor the 12 | * names of its contributors may be used to endorse or promote products 13 | * derived from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE FOR ANY 19 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "mm.hh" 41 | #include "bench.hh" 42 | 43 | enum { block_based = 1 }; 44 | 45 | int main(int argc, char *argv[]) { 46 | int matrix_len = 0; 47 | int *matrix_A_ptr, *matrix_B_ptr, *fdata_out; 48 | int nprocs = 0, map_tasks = 0; 49 | int quiet = 0; 50 | srand((unsigned) time(NULL)); 51 | if (argc < 2) { 52 | usage(argv[0]); 53 | exit(EXIT_FAILURE); 54 | } 55 | 56 | int c; 57 | while ((c = getopt(argc, argv, "p:m:ql:")) != -1) { 58 | switch (c) { 59 | case 'p': 60 | assert((nprocs = atoi(optarg)) >= 0); 61 | break; 62 | case 'm': 63 | map_tasks = atoi(optarg); 64 | break; 65 | case 'q': 66 | quiet = 1; 67 | break; 68 | case 'l': 69 | assert((matrix_len = atoi(optarg)) > 0); 70 | break; 71 | default: 72 | usage(argv[0]); 73 | exit(EXIT_FAILURE); 74 | } 75 | } 76 | matrix_A_ptr = safe_malloc(matrix_len * matrix_len); 77 | matrix_B_ptr = safe_malloc(matrix_len * matrix_len); 78 | fdata_out = safe_malloc(matrix_len * matrix_len); 79 | 80 | for (int i = 0; i < matrix_len; i++) 81 | for (int j = 0; j < matrix_len; j++) { 82 | matrix_A_ptr[i * matrix_len + j] = rand(); 83 | matrix_B_ptr[i * matrix_len + j] = rand(); 84 | } 85 | mapreduce_appbase::initialize(); 86 | mm app(block_based ? 0 : map_tasks, block_based); 87 | 88 | app.d_.matrix_len = matrix_len; 89 | app.d_.row_num = 0; 90 | app.d_.startrow = 0; 91 | app.d_.startcol = 0; 92 | app.d_.matrix_A = matrix_A_ptr; 93 | app.d_.matrix_B = matrix_B_ptr; 94 | app.d_.output = ((int *) fdata_out); 95 | 96 | app.set_ncore(nprocs); 97 | app.sched_run(); 98 | app.print_stats(); 99 | if (!quiet) { 100 | printf("First row of the output matrix:\n"); 101 | for (int i = 0; i < matrix_len; i++) 102 | printf("%d\t", fdata_out[i]); 103 | printf("\nLast row of the output matrix:\n"); 104 | for (int i = 0; i < matrix_len; i++) 105 | printf("%d\t", fdata_out[(matrix_len - 1) * matrix_len + i]); 106 | printf("\n"); 107 | } 108 | free(matrix_A_ptr); 109 | free(matrix_B_ptr); 110 | free(fdata_out); 111 | app.free_results(); 112 | mapreduce_appbase::deinitialize(); 113 | return 0; 114 | } 115 | -------------------------------------------------------------------------------- /app/matrix_mult2.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Matrix multiply optimized by Mark Roth (mroth@cs.sfu.ca) 3 | * SFU Systems Research Group (http://synar.cs.sfu.ca/systems-research.html) 4 | * 5 | * The optimizations are: 6 | * 7 | * - Swapping the order of the inner two most loops of matrixmult_map, which 8 | * improves performance by ~3.5x on input size 4096 running on a 24 core 9 | * AMD system with 64kb L1 cache and 2 way associativity. Performance also 10 | * seems to increase in general by 30% on other input sizes as cache line 11 | * reuse is increased. 12 | * 13 | * - Using processInnerLoop() to let gcc vectorize the inner loop at 14 | * O3. Speed up is ~3x over the above version. 15 | * 16 | * - The last optimization helps to prevent L1 collisions by pre-faulting 17 | * the matrix pages randomly. This is useful for caches that have a low 18 | * associativity and with inputs that are multiples of 2048. On an AMD 19 | * system with a 64kb 2 way associative cache, the patch makes about a 20 | * 20-30% improvement for input size 4096. 21 | */ 22 | /* Copyright (c) 2007, Stanford University 23 | * All rights reserved. 24 | * 25 | * Redistribution and use in source and binary forms, with or without 26 | * modification, are permitted provided that the following conditions are met: 27 | * * Redistributions of source code must retain the above copyright 28 | * notice, this list of conditions and the following disclaimer. 29 | * * Redistributions in binary form must reproduce the above copyright 30 | * notice, this list of conditions and the following disclaimer in the 31 | * documentation and/or other materials provided with the distribution. 32 | * * Neither the name of Stanford University nor the 33 | * names of its contributors may be used to endorse or promote products 34 | * derived from this software without specific prior written permission. 35 | * 36 | * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY 37 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 39 | * DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE FOR ANY 40 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 41 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 42 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 43 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 45 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 | */ 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include "mm.hh" 62 | #include "bench.hh" 63 | 64 | enum { block_based = 1 }; 65 | 66 | struct mm2 : public mm { 67 | mm2(int nsplit, bool block_based) : mm(nsplit, block_based) {} 68 | void map_function_block(split_t *); 69 | }; 70 | 71 | /** Extract inner loop to make auto vectorization easier to analyze */ 72 | void processInnerLoop(int* out, int out_offset, int* mat_a, int a_offset, int *mat_b, 73 | int b_offset, int start, int end) { 74 | int a = mat_a[a_offset]; 75 | for (int i = start; i < end; ++i) 76 | out[out_offset + i] += a * mat_b[b_offset + i]; 77 | } 78 | 79 | /** Multiplies the allocated regions of matrix to compute partial sums */ 80 | void mm2::map_function_block(split_t *args) { 81 | int end_i, end_j, end_k, a, c; 82 | prof_enterapp(); 83 | assert(args && args->data); 84 | mm_data_t *data = (mm_data_t *) (args->data); 85 | dprintf("%d Start Loop \n", data->row_num); 86 | int i = data->startrow; 87 | int j = data->startcol; 88 | dprintf("do %d %d of %d\n", i, j, data->matrix_len); 89 | for (int k = 0; k < data->matrix_len; k += block_len) { 90 | end_i = i + block_len; 91 | end_j = j + block_len; 92 | end_k = k + block_len; 93 | int end = (end_j < data->matrix_len) ? end_j : data->matrix_len; 94 | for (a = i; a < end_i && a < data->matrix_len; ++a) 95 | for (c = k; c < end_k && c < data->matrix_len; ++c) 96 | processInnerLoop(data->output, data->matrix_len * a, data->matrix_A, 97 | data->matrix_len * a + c, data->matrix_B, 98 | data->matrix_len * c, j, end); 99 | } 100 | dprintf("Finished Map task %d\n", data->row_num); 101 | fflush(stdout); 102 | free(data); 103 | prof_leaveapp(); 104 | } 105 | 106 | int main(int argc, char *argv[]) { 107 | int matrix_len = 0; 108 | int *matrix_A_ptr, *matrix_B_ptr, *fdata_out; 109 | int nprocs = 0, map_tasks = 0; 110 | int quiet = 0; 111 | srand((unsigned) time(NULL)); 112 | if (argc < 2) { 113 | usage(argv[0]); 114 | exit(EXIT_FAILURE); 115 | } 116 | 117 | int c; 118 | while ((c = getopt(argc, argv, "p:m:ql:")) != -1) { 119 | switch (c) { 120 | case 'p': 121 | assert((nprocs = atoi(optarg)) >= 0); 122 | break; 123 | case 'm': 124 | map_tasks = atoi(optarg); 125 | break; 126 | case 'q': 127 | quiet = 1; 128 | break; 129 | case 'l': 130 | assert((matrix_len = atoi(optarg)) > 0); 131 | break; 132 | default: 133 | usage(argv[0]); 134 | exit(EXIT_FAILURE); 135 | } 136 | } 137 | matrix_A_ptr = safe_malloc(matrix_len * matrix_len); 138 | matrix_B_ptr = safe_malloc(matrix_len * matrix_len); 139 | fdata_out = safe_malloc(matrix_len * matrix_len); 140 | 141 | for (int i = 0; i < matrix_len; i++) 142 | for (int j = 0; j < matrix_len; j++) { 143 | matrix_A_ptr[i * matrix_len + j] = rand(); 144 | matrix_B_ptr[i * matrix_len + j] = rand(); 145 | } 146 | 147 | mapreduce_appbase::initialize(); 148 | mm2 app(block_based ? 0 : map_tasks, block_based); 149 | 150 | app.d_.matrix_len = matrix_len; 151 | app.d_.row_num = 0; 152 | app.d_.startrow = 0; 153 | app.d_.startcol = 0; 154 | app.d_.matrix_A = matrix_A_ptr; 155 | app.d_.matrix_B = matrix_B_ptr; 156 | app.d_.output = ((int *) fdata_out); 157 | 158 | app.set_ncore(nprocs); 159 | app.sched_run(); 160 | app.print_stats(); 161 | if (!quiet) { 162 | printf("First row of the output matrix:\n"); 163 | for (int i = 0; i < matrix_len; i++) 164 | printf("%d\t", fdata_out[i]); 165 | printf("\nLast row of the output matrix:\n"); 166 | for (int i = 0; i < matrix_len; i++) 167 | printf("%d\t", fdata_out[(matrix_len - 1) * matrix_len + i]); 168 | printf("\n"); 169 | } 170 | free(matrix_A_ptr); 171 | free(matrix_B_ptr); 172 | free(fdata_out); 173 | app.free_results(); 174 | mapreduce_appbase::deinitialize(); 175 | return 0; 176 | } 177 | 178 | -------------------------------------------------------------------------------- /app/miw.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 SopraSteria 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "job.h" 30 | 31 | using namespace miw; 32 | 33 | int main(int argc, char **argv) 34 | { 35 | job j; 36 | return j.execute(argc,argv); 37 | } 38 | -------------------------------------------------------------------------------- /app/mm.hh: -------------------------------------------------------------------------------- 1 | #ifndef MM_HH_ 2 | #define MM_HH_ 1 3 | #include "application.hh" 4 | 5 | enum { block_len = 32 }; 6 | 7 | struct mm_data_t { 8 | int row_num; 9 | int startrow; 10 | int startcol; 11 | int *matrix_A; 12 | int *matrix_B; 13 | int matrix_len; 14 | int *output; 15 | }; 16 | 17 | /** @brief: Coordinates and location for each value in the matrix */ 18 | struct mm_key_t { 19 | int x_loc; 20 | int y_loc; 21 | int value; 22 | }; 23 | 24 | struct mm : public map_only { 25 | mm(int nsplit, bool block_based) : nsplit_(nsplit), block_based_(block_based) {} 26 | int key_compare(const void *v1, const void *v2); 27 | bool split(split_t *ma, int ncores) { 28 | return block_based_ ? split_block(ma, ncores) : split_nonblock(ma, ncores); 29 | } 30 | void map_function(split_t *ma) { 31 | block_based_ ? map_function_block(ma) : map_function_nonblock(ma); 32 | } 33 | bool split_block(split_t *ma, int ncores); 34 | bool split_nonblock(split_t *ma, int ncores); 35 | virtual void map_function_block(split_t *ma); 36 | void map_function_nonblock(split_t *ma); 37 | 38 | int nsplit_; 39 | bool block_based_; 40 | mm_data_t d_; 41 | }; 42 | 43 | int mm::key_compare(const void *v1, const void *v2) { 44 | prof_enterkcmp(); 45 | mm_key_t *key1 = (mm_key_t *) v1; 46 | mm_key_t *key2 = (mm_key_t *) v2; 47 | int r; 48 | if (key1->x_loc != key2->x_loc) 49 | r = key1->x_loc - key2->x_loc; 50 | else 51 | r = key1->y_loc - key2->y_loc; 52 | prof_leavekcmp(); 53 | return r; 54 | } 55 | 56 | bool mm::split_nonblock(split_t *out, int ncores) { 57 | /* Make a copy of the mm_data structure */ 58 | mm_data_t *data_out = safe_malloc(); 59 | *data_out = d_; 60 | /* Check whether the various terms exist */ 61 | if (nsplit_ == 0) 62 | nsplit_ = ncores * def_nsplits_per_core; 63 | uint64_t split_size = d_.matrix_len / nsplit_; 64 | assert(d_.row_num <= d_.matrix_len); 65 | printf("Required units is %ld\n", split_size); 66 | /* Reached the end of the matrix */ 67 | if (d_.row_num >= d_.matrix_len) { 68 | fflush(stdout); 69 | free(data_out); 70 | return false; 71 | } 72 | /* Compute available rows */ 73 | int available_rows = d_.matrix_len - d_.row_num; 74 | out->length = (split_size < size_t(available_rows)) ? split_size : available_rows; 75 | out->data = data_out; 76 | d_.row_num += out->length; 77 | dprintf("Allocated rows is %ld\n", out->length); 78 | return true; 79 | } 80 | 81 | /** @brief: Multiplies the allocated regions of matrix to compute partial sums */ 82 | void mm::map_function_nonblock(split_t *args) { 83 | int row_count = 0, x_loc, value; 84 | int *a_ptr, *b_ptr; 85 | prof_enterapp(); 86 | assert(args && args->data); 87 | mm_data_t *data = (mm_data_t *)args->data; 88 | while (row_count < int(args->length)) { 89 | a_ptr = data->matrix_A + (data->row_num + row_count) * data->matrix_len; 90 | for (int i = 0; i < data->matrix_len; i++) { 91 | b_ptr = data->matrix_B + i; 92 | value = 0; 93 | for (int j = 0; j < data->matrix_len; j++) { 94 | value += (a_ptr[j] * (*b_ptr)); 95 | b_ptr += data->matrix_len; 96 | } 97 | x_loc = (data->row_num + row_count); 98 | data->output[x_loc * data->matrix_len + i] = value; 99 | fflush(stdout); 100 | } 101 | dprintf("%d Loop\n", data->row_num); 102 | row_count++; 103 | } 104 | printf("Finished Map task %d\n", data->row_num); 105 | fflush(stdout); 106 | free(data); 107 | prof_leaveapp(); 108 | } 109 | 110 | /* @brief: Assign block_len elements in a row the output matrix */ 111 | bool mm::split_block(split_t *out, int ncore) { 112 | prof_enterapp(); 113 | /* Make a copy of the mm_data structure */ 114 | mm_data_t *data_out = safe_malloc(); 115 | *data_out = d_; 116 | if (d_.startrow >= d_.matrix_len) { 117 | free(data_out); 118 | prof_leaveapp(); 119 | return false; 120 | } 121 | /* Compute available rows */ 122 | out->data = data_out; 123 | d_.startcol += block_len; 124 | if (d_.startcol > d_.matrix_len) { 125 | d_.startrow += block_len; 126 | d_.startcol = 0; 127 | } 128 | prof_leaveapp(); 129 | return true; 130 | } 131 | 132 | /* Multiplies the allocated regions of matrix to compute partial sums */ 133 | void mm::map_function_block(split_t * args) { 134 | prof_enterapp(); 135 | assert(args && args->data); 136 | mm_data_t *data = (mm_data_t *)args->data; 137 | dprintf("%d Start Loop \n", data->row_num); 138 | int i = data->startrow; 139 | int j = data->startcol; 140 | dprintf("do %d %d of %d\n", i, j, data->matrix_len); 141 | for (int k = 0; k < data->matrix_len; k += block_len) { 142 | int end_i = i + block_len; 143 | int end_j = j + block_len; 144 | int end_k = k + block_len; 145 | for (int a = i; a < end_i && a < data->matrix_len; a++) 146 | for (int b = j; b < end_j && b < data->matrix_len; b++) 147 | for (int c = k; c < end_k && c < data->matrix_len; c++) 148 | data->output[data->matrix_len * a + b] += 149 | (data->matrix_A[data->matrix_len * a + c] * 150 | data->matrix_B[data->matrix_len * c + b]); 151 | } 152 | dprintf("Finished Map task %d\n", data->row_num); 153 | free(data); 154 | prof_leaveapp(); 155 | } 156 | 157 | inline void usage(char *fn) { 158 | printf("usage: %s [options]\n", fn); 159 | printf("options:\n"); 160 | printf(" -p nprocs : # of processors to use\n"); 161 | printf(" -m #map tasks : # of map tasks (pre-split input before MR)\n"); 162 | printf(" -q : quiet output (for batch test)\n"); 163 | printf(" -l : matrix dimentions. (assume squaure)\n"); 164 | } 165 | 166 | #endif 167 | -------------------------------------------------------------------------------- /app/simple_log_compacter.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 SopraSteria 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "job.h" 30 | 31 | using namespace miw; 32 | 33 | int main(int argc, char **argv) 34 | { 35 | job j; 36 | j.execute(argc,argv); 37 | } 38 | -------------------------------------------------------------------------------- /app/wr.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007, Stanford University 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of Stanford University nor the 12 | * names of its contributors may be used to endorse or promote products 13 | * derived from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE FOR ANY 19 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "wr.hh" 41 | #include "bench.hh" 42 | #ifdef JOS_USER 43 | #include "wc-datafile.h" 44 | #include 45 | #endif 46 | 47 | #define DEFAULT_NDISP 10 48 | 49 | static void usage(const char *prog) { 50 | printf("usage: %s [options]\n", prog); 51 | printf("options:\n"); 52 | printf 53 | (" -p #procs : # of processors to use (use all cores by default)\n"); 54 | printf 55 | (" -m #map tasks : # of map tasks (pre-split input before MR. 16 tasks per core by default)\n"); 56 | printf 57 | (" -r #reduce tasks : # of reduce tasks (16 tasks per core by default)\n"); 58 | printf(" -l ntops : # of top val. pairs to display\n"); 59 | printf(" -q : quiet output (for batch test)\n"); 60 | exit(EXIT_FAILURE); 61 | } 62 | 63 | int main(int argc, char *argv[]) { 64 | int nprocs = 0, map_tasks = 0, ndisp = 5, reduce_tasks = 0, quiet = 0; 65 | int c; 66 | if (argc < 2) 67 | usage(argv[0]); 68 | while ((c = getopt(argc - 1, argv + 1, "p:l:m:r:q")) != -1) { 69 | switch (c) { 70 | case 'p': 71 | nprocs = atoi(optarg); 72 | break; 73 | case 'l': 74 | ndisp = atoi(optarg); 75 | break; 76 | case 'm': 77 | map_tasks = atoi(optarg); 78 | break; 79 | case 'r': 80 | reduce_tasks = atoi(optarg); 81 | break; 82 | case 'q': 83 | quiet = 1; 84 | break; 85 | default: 86 | usage(argv[0]); 87 | exit(EXIT_FAILURE); 88 | break; 89 | } 90 | } 91 | 92 | mapreduce_appbase::initialize(); 93 | wr app(argv[1], map_tasks); 94 | app.set_ncore(nprocs); 95 | app.set_group_task(reduce_tasks); 96 | app.sched_run(); 97 | app.print_stats(); 98 | if (!quiet) 99 | print_top(&app.results_, ndisp, count(&app.results_)); 100 | app.free_results(); 101 | mapreduce_appbase::deinitialize(); 102 | return 0; 103 | } 104 | -------------------------------------------------------------------------------- /app/wr.hh: -------------------------------------------------------------------------------- 1 | #ifndef WR_HH 2 | #define WR_HH 3 | 4 | #include "application.hh" 5 | #include "defsplitter.hh" 6 | 7 | struct wr : public map_group { 8 | wr(char *d, size_t size, int nsplit) : s_(d, size, nsplit) {} 9 | wr(char *f, int nsplit) : s_(f, nsplit) {} 10 | 11 | void map_function(split_t *ma) { 12 | char k[1024]; 13 | size_t klen; 14 | split_word sw(ma); 15 | while (char *index = sw.fill(k, sizeof(k), klen)) 16 | map_emit(k, index, klen); 17 | } 18 | 19 | bool split(split_t *ma, int ncore) { 20 | return s_.split(ma, ncore, " \t\n\r\0"); 21 | } 22 | 23 | int key_compare(const void *k1, const void *k2) { 24 | return strcmp((const char *)k1, (const char *)k2); 25 | } 26 | void *key_copy(void *src, size_t s) { 27 | char *key = safe_malloc(s + 1); 28 | memcpy(key, src, s); 29 | key[s] = 0; 30 | return key; 31 | } 32 | void key_free(void *k) { 33 | free(k); 34 | } 35 | private: 36 | defsplitter s_; 37 | }; 38 | 39 | inline size_t count(xarray *wc_vals) { 40 | size_t nw = 0; 41 | for (size_t i = 0; i < wc_vals->size(); i++) 42 | nw += size_t(wc_vals->at(i)->len); 43 | return nw; 44 | } 45 | 46 | inline void print_top(xarray *wc_vals, size_t ndisp, size_t nw) { 47 | printf("\nwordreverseindex: results (TOP %zd from %zu keys, %zd words):\n", 48 | ndisp, wc_vals->size(), nw); 49 | ndisp = std::min(ndisp, wc_vals->size()); 50 | for (size_t i = 0; i < ndisp; ++i) { 51 | keyvals_len_t *w = wc_vals->at(i); 52 | printf("%15s - %d\n", (char *) w->key_, unsigned(w->len)); 53 | } 54 | } 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /app/wrmem.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007, Stanford University 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of Stanford University nor the 12 | * names of its contributors may be used to endorse or promote products 13 | * derived from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE FOR ANY 19 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "bench.hh" 41 | #include "wr.hh" 42 | #include "test_util.hh" 43 | 44 | #define DEFAULT_NDISP 10 45 | 46 | static void usage(char *prog) { 47 | printf("usage: %s [options]\n", prog); 48 | printf("options:\n"); 49 | printf(" -p #procs : # of processors to use (use all cores by default)\n"); 50 | printf(" -m #map tasks : # of map tasks (16 tasks per core by default)\n"); 51 | printf(" -r #reduce tasks : # of reduce tasks (determined by sampling by default)\n"); 52 | printf(" -l ntops : # of top key/value pairs to display\n"); 53 | printf(" -s inputsize : size of input in MB\n"); 54 | printf(" -q : quiet output (for batch test)\n"); 55 | exit(EXIT_FAILURE); 56 | } 57 | 58 | int main(int argc, char *argv[]) { 59 | affinity_set(0); 60 | int nprocs = 0, map_tasks = 0, ndisp = 5, reduce_tasks = 0, quiet = 0; 61 | uint64_t inputsize = 0x80000000; 62 | int c; 63 | while ((c = getopt(argc, argv, "p:l:m:r:qs:")) != -1) { 64 | switch (c) { 65 | case 'p': 66 | nprocs = atoi(optarg); 67 | break; 68 | case 'l': 69 | ndisp = atoi(optarg); 70 | break; 71 | case 'm': 72 | map_tasks = atoi(optarg); 73 | break; 74 | case 'r': 75 | reduce_tasks = atoi(optarg); 76 | break; 77 | case 's': 78 | inputsize = atol(optarg) * 1024 * 1024; 79 | break; 80 | case 'q': 81 | quiet = 1; 82 | break; 83 | default: 84 | usage(argv[0]); 85 | exit(EXIT_FAILURE); 86 | break; 87 | } 88 | } 89 | enum { wordlength = 3 }; 90 | uint32_t seed = 0; 91 | char *fdata = (char *) mmap(NULL, inputsize + 1, PROT_READ | PROT_WRITE, 92 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 93 | assert(fdata != MAP_FAILED); 94 | uint64_t pos = 0; 95 | size_t n = 0; 96 | for (uint64_t i = 0; i < inputsize / (wordlength + 1); ++i, ++n) { 97 | for (int j = 0; j < wordlength; ++j) 98 | fdata[pos++] = rnd(&seed) % 26 + 'A'; 99 | fdata[pos++] = ' '; 100 | } 101 | memset(&fdata[pos], 0, inputsize - pos); 102 | 103 | mapreduce_appbase::initialize(); 104 | wr app(fdata, inputsize, map_tasks); 105 | app.set_ncore(nprocs); 106 | app.set_group_task(reduce_tasks); 107 | app.sched_run(); 108 | app.print_stats(); 109 | size_t nw = count(&app.results_); 110 | CHECK_EQ(n, nw); 111 | if (!quiet) 112 | print_top(&app.results_, ndisp, nw); 113 | app.free_results(); 114 | mapreduce_appbase::deinitialize(); 115 | return 0; 116 | } 117 | -------------------------------------------------------------------------------- /config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define data structure for map phase */ 4 | #undef DEFAULT_MAP_DS 5 | 6 | /* Define if the C++ compiler understands constexpr. */ 7 | #undef HAVE_CXX_CONSTEXPR 8 | 9 | /* Define to 1 if you have the header file. */ 10 | #undef HAVE_DLFCN_H 11 | 12 | /* google glog requested */ 13 | #undef HAVE_GLOG 14 | 15 | /* Define to 1 if you have the header file. */ 16 | #undef HAVE_INTTYPES_H 17 | 18 | /* define if you have google gflags library */ 19 | #undef HAVE_LIB_GFLAGS 20 | 21 | /* define if you have google glog library */ 22 | #undef HAVE_LIB_GLOG 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #undef HAVE_MEMORY_H 26 | 27 | /* Define to 1 if you have the header file. */ 28 | #undef HAVE_SNAPPY_H 29 | 30 | /* Define to 1 if you have the header file. */ 31 | #undef HAVE_STDINT_H 32 | 33 | /* Define to 1 if you have the header file. */ 34 | #undef HAVE_STDLIB_H 35 | 36 | /* Define to 1 if you have the header file. */ 37 | #undef HAVE_STRINGS_H 38 | 39 | /* Define to 1 if you have the header file. */ 40 | #undef HAVE_STRING_H 41 | 42 | /* Define to 1 if you have the header file. */ 43 | #undef HAVE_SYS_STAT_H 44 | 45 | /* Define to 1 if you have the header file. */ 46 | #undef HAVE_SYS_TYPES_H 47 | 48 | /* Define to 1 if you have the header file. */ 49 | #undef HAVE_UNISTD_H 50 | 51 | /* "Protocol buffers available" */ 52 | #undef JSONCPP 53 | 54 | /* Define to the sub-directory in which libtool stores uninstalled libraries. 55 | */ 56 | #undef LT_OBJDIR 57 | 58 | /* map -> merge -> reduce */ 59 | #undef MAP_MERGE_REDUCE 60 | 61 | /* Name of package */ 62 | #undef PACKAGE 63 | 64 | /* Define to the address where bug reports for this package should be sent. */ 65 | #undef PACKAGE_BUGREPORT 66 | 67 | /* Define to the full name of this package. */ 68 | #undef PACKAGE_NAME 69 | 70 | /* Define to the full name and version of this package. */ 71 | #undef PACKAGE_STRING 72 | 73 | /* Define to the one symbol short name of this package. */ 74 | #undef PACKAGE_TARNAME 75 | 76 | /* Define to the home page for this package. */ 77 | #undef PACKAGE_URL 78 | 79 | /* Define to the version of this package. */ 80 | #undef PACKAGE_VERSION 81 | 82 | /* Define if you want metis to print out profiling results from performance 83 | counters */ 84 | #undef PROFILE_ENABLED 85 | 86 | /* "Protocol buffers available" */ 87 | #undef PROTOBUF 88 | 89 | /* Define to (Append-map -> Group) -> (psrs-merge, and-reduce) */ 90 | #undef SINGLE_APPEND_GROUP_FIRST 91 | 92 | /* Define to 1 if you have the ANSI C header files. */ 93 | #undef STDC_HEADERS 94 | 95 | /* Define if you want to use psrs for sorting */ 96 | #undef USE_PSRS 97 | 98 | /* Version number of package */ 99 | #undef VERSION 100 | -------------------------------------------------------------------------------- /data/tests/created.log: -------------------------------------------------------------------------------- 1 | 1,1 2 | 1,1 3 | 1,3 4 | 1,5 5 | 1,4 6 | 1,2 7 | 1,5 8 | 1,5 9 | 1,5 10 | 1,6 11 | 1,5 12 | -------------------------------------------------------------------------------- /data/tests/match_file.txt: -------------------------------------------------------------------------------- 1 | KO 2 | NA 3 | -------------------------------------------------------------------------------- /data/tests/matching.log: -------------------------------------------------------------------------------- 1 | 1,OOKK 2 | 2,OOKK 3 | 3,KO -------------------------------------------------------------------------------- /data/tests/matching_exact.log: -------------------------------------------------------------------------------- 1 | 1,OK 2 | 2,OK 3 | 3,KO 4 | 4,KO2 -------------------------------------------------------------------------------- /data/tests/matching_file.log: -------------------------------------------------------------------------------- 1 | 1,OK 2 | 2,KO 3 | 3,NA 4 | 4,OK -------------------------------------------------------------------------------- /data/tests/ratio.log: -------------------------------------------------------------------------------- 1 | 1,3,5,3,5 2 | 1,1,3,1,3 3 | 1,5,7,5,7 4 | 1,3,2,3,2 5 | 1,2,9,2,9 6 | 1,2,2,2,2 7 | -------------------------------------------------------------------------------- /data/tests/string.log: -------------------------------------------------------------------------------- 1 | 1,ok 2 | 1,ok 3 | 1,ok 4 | 1,denied 5 | 1,denied 6 | -------------------------------------------------------------------------------- /data/tests/sum.log: -------------------------------------------------------------------------------- 1 | 1,3,3.5 2 | 1,1,1.0 3 | 1,5,5.0 4 | 1,3,3.5 5 | 1,2,2.0 6 | 1,2,2.0 7 | -------------------------------------------------------------------------------- /data/tests/variance.log: -------------------------------------------------------------------------------- 1 | 1,3 2 | 1,1 3 | 1,5 4 | 1,3 5 | 1,2 6 | 1,2 7 | -------------------------------------------------------------------------------- /data/web_proxy_10lines.log: -------------------------------------------------------------------------------- 1 | #Fields: date time time-taken c-ip sc-status s-action sc-bytes cs-bytes cs-method cs-uri-scheme cs-host cs-uri-port cs-uri-path cs-uri-query cs-username cs-auth-group s-supplier-name rs(Content-Type) cs(Referer) cs(User-Agent) sc-filter-result cs-categories x-virus-id s-ip 2 | 2012-11-30 23:00:00 25 192.185.14.69 407 TCP_DENIED 1130 573 GET http solution.weborama.fr 80 /fcgi-bin/adserv.fcgi ?tag=849080&f=10&h=R&rnd=[RANDOM] - - - - - "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" DENIED "none" - 247.146.12.194 3 | 2012-11-30 23:00:00 1 192.185.14.69 407 TCP_DENIED 1453 657 GET http solution.weborama.fr 80 /fcgi-bin/adserv.fcgi ?tag=849080&f=10&h=R&rnd=[RANDOM] - - - - - "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" DENIED "none" - 247.146.12.194 4 | 2012-11-30 23:00:00 123 192.185.14.69 200 TCP_NC_MISS 2956 1373 GET http bs.serving-sys.com 80 /BurstingPipe/adServer.bs ?cn=rsb&c=28 NqO3SB FRtmCYpO\v81_xK19161g_l6e bs.serving-sys.com text/html - "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" OBSERVED "Advertisements" - 247.146.12.194 5 | 2012-11-30 23:00:00 83 192.185.14.69 302 TCP_NC_MISS 1165 821 GET http solution.weborama.fr 80 /fcgi-bin/adserv.fcgi ?tag=849080&f=10&h=R&rnd=[RANDOM] NqO3SB FRtmCYpO\v81_xK19161g_l6e solution.weborama.fr text/html;%20charset=iso-8859-1 - "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" OBSERVED "Information Technology" - 247.146.12.194 6 | -------------------------------------------------------------------------------- /doc/metis:mittr10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/doc/metis:mittr10.pdf -------------------------------------------------------------------------------- /doc/metis_mittr10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/doc/metis_mittr10.pdf -------------------------------------------------------------------------------- /metis/Makefile.am: -------------------------------------------------------------------------------- 1 | metisdir = $(libdir)/metis 2 | metis_LIBRARIES=libmetis.a 3 | MAXCPUS := $(shell grep -c processor /proc/cpuinfo) 4 | AM_CXXFLAGS=-Wall -g -pipe -std=c++11 -fpermissive -fopenmp -g -O2 @OPT_LEVEL@ \ 5 | -fno-omit-frame-pointer -D_GNU_SOURCE -include ../config.h \ 6 | -DJTLS=__thread -DJSHARED_ATTR= \ 7 | -DJOS_CLINE=64 -DCACHE_LINE_SIZE=64 \ 8 | -DJOS_NCPU=$(MAXCPUS) -D__STDC_FORMAT_MACROS 9 | AM_CPPFLAGS=-Imicro 10 | libmetis_a_SOURCES= pthreadpool.cc profile.cc ibs.cc cpumap.cc mr-types.cc application.cc threadinfo.cc 11 | -------------------------------------------------------------------------------- /metis/application.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef APPLICATION_HH_ 16 | #define APPLICATION_HH_ 1 17 | 18 | #include "mr-types.hh" 19 | #include "profile.hh" 20 | #include "bench.hh" 21 | #include "predictor.hh" 22 | #include "reduce_bucket_manager.hh" 23 | #include "appbase.hh" 24 | #include 25 | 26 | struct map_bucket_manager_base; 27 | 28 | template 29 | struct app_impl_base : public mapreduce_appbase { 30 | xarray results_; 31 | 32 | int application_type() { 33 | return at; 34 | } 35 | virtual ~app_impl_base() { 36 | reset(); 37 | } 38 | /* @brief: set the optional output compare function */ 39 | virtual int final_output_compare(const T *p1, const T *p2) { 40 | return this->key_compare(p1->key_, p2->key_); 41 | } 42 | void free_results() { 43 | for (size_t i = 0; i < results_.size(); ++i) { 44 | this->key_free(results_[i].key_); 45 | results_[i].reset(); 46 | } 47 | results_.shallow_free(); 48 | } 49 | 50 | void set_final_result() { 51 | rb_.transfer(0, &results_); 52 | } 53 | protected: 54 | int internal_final_output_compare(const void *p1, const void *p2) { 55 | return final_output_compare((T *)p1, (T *)p2); 56 | } 57 | reduce_bucket_manager rb_; 58 | 59 | reduce_bucket_manager_base *get_reduce_bucket_manager() { 60 | return &rb_; 61 | } 62 | bool skip_reduce_or_group_phase() { 63 | if (at == atype_maponly) 64 | return true; 65 | #ifdef MAP_MERGE_REDUCE 66 | #if USE_PSRS 67 | return true; 68 | #endif 69 | assert(0 && "TODO: support merge sort in MAP_MERGE_REDUCE mode\n"); 70 | #else 71 | return false; 72 | #endif 73 | } 74 | 75 | void verify_before_run() { 76 | assert(!results_.size()); 77 | } 78 | public: 79 | void reset() { 80 | rb_.reset(); 81 | mapreduce_appbase::reset(); 82 | } 83 | }; 84 | 85 | struct map_reduce : public app_impl_base { 86 | virtual ~map_reduce() {} 87 | /* @brief: if not zero, disable the sampling. */ 88 | void set_reduce_task(int nreduce_task) { 89 | nreduce_or_group_task_ = nreduce_task; 90 | } 91 | /* @brief: user defined reduce function. 92 | Should not be provided when using vm */ 93 | virtual void reduce_function(void *k, void **v, size_t length) { 94 | std::cout << "empty reduce\n"; 95 | assert(0); 96 | } 97 | /* @brief: combine @v 98 | @v: input and output parameter 99 | @return: the new length of v 100 | should not be provided when using vm */ 101 | virtual int combine_function(void *k, void **v, size_t length) { 102 | return length; 103 | } 104 | 105 | /* @brief: called for each key/value pair to update the value. 106 | @return: the updated value */ 107 | virtual void *modify_function(void *oldv, void *newv) { 108 | assert(0 && "Please overload modify_function"); 109 | } 110 | virtual bool has_value_modifier() const { 111 | return false; 112 | } 113 | protected: 114 | friend class static_appbase; 115 | void internal_reduce_emit(keyvals_t &p); 116 | void map_values_insert(keyvals_t *kvs, void *val); 117 | void map_values_move(keyvals_t *dst, keyvals_t *src); 118 | }; 119 | 120 | struct map_group : public app_impl_base { 121 | virtual ~map_group() {} 122 | /* @brief: if not zero, disables the sampling */ 123 | void set_group_task(int group_task) { 124 | nreduce_or_group_task_ = group_task; 125 | } 126 | protected: 127 | friend class static_appbase; 128 | void internal_reduce_emit(keyvals_t &p); 129 | }; 130 | 131 | struct map_only : public app_impl_base { 132 | virtual ~map_only() {} 133 | }; 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /metis/bench.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef BENCH_HH_ 16 | #define BENCH_HH_ 1 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #define JOS_PAGESIZE 4096 36 | enum { debug_print = 0 }; 37 | 38 | template 39 | inline T round_down(T n, M b) { 40 | uintptr_t r = uintptr_t(n); 41 | return (T)(r - r % b); 42 | } 43 | 44 | template 45 | inline T round_up(T n, M b) { 46 | uintptr_t r = uintptr_t(n); 47 | return round_down(r + b - 1, b); 48 | } 49 | 50 | template 51 | inline T *safe_malloc(int n = 1) { 52 | void *x = malloc(sizeof(T) * n); 53 | //assert(n == 0 || x); 54 | return (T *)x; 55 | } 56 | 57 | inline uint64_t tv2us(const timeval &v) { 58 | return uint64_t(v.tv_sec) * 1000000 + v.tv_usec; 59 | } 60 | 61 | inline uint64_t tv2ms(const timeval &v) { 62 | return tv2us(v) / 1000; 63 | } 64 | 65 | #define cond_printf(__exp, __fmt, __args...) \ 66 | do { \ 67 | if (__exp) \ 68 | printf(__fmt, ##__args); \ 69 | } while (0) 70 | 71 | #define dprintf(__fmt, __args...) cond_printf(debug_print, __fmt, ##__args) 72 | 73 | #define eprint(__fmt, __args...) \ 74 | do { \ 75 | fprintf(stderr, __fmt, ##__args); \ 76 | exit(EXIT_FAILURE); \ 77 | } while (0) 78 | 79 | template 80 | inline void *int2ptr(T i) { 81 | return (void *)(intptr_t(i)); 82 | } 83 | 84 | template 85 | inline T ptr2int(void *p) { 86 | return (T)(intptr_t(p)); 87 | } 88 | 89 | inline uint32_t rnd(uint32_t *seed) { 90 | *seed = *seed * 1103515245 + 12345; 91 | return *seed & 0x7fffffff; 92 | } 93 | 94 | inline uint64_t read_tsc(void) { 95 | uint32_t a, d; 96 | __asm __volatile("rdtsc":"=a"(a), "=d"(d)); 97 | return ((uint64_t) a) | (((uint64_t) d) << 32); 98 | } 99 | 100 | inline uint64_t read_pmc(uint32_t ecx) { 101 | uint32_t a, d; 102 | __asm __volatile("rdpmc":"=a"(a), "=d"(d):"c"(ecx)); 103 | return ((uint64_t) a) | (((uint64_t) d) << 32); 104 | } 105 | 106 | inline void mfence(void) { 107 | __asm __volatile("mfence" ::: "memory"); 108 | } 109 | 110 | inline void compiler_barrier() { 111 | __asm__ __volatile__("": : :"memory"); 112 | } 113 | 114 | inline void nop_pause(void) { 115 | __asm __volatile("pause"::); 116 | } 117 | 118 | inline uint64_t usec(void) { 119 | struct timeval tv; 120 | gettimeofday(&tv, 0); 121 | return uint64_t(tv.tv_sec) * 1000000 + tv.tv_usec; 122 | } 123 | 124 | inline uint64_t get_cpu_freq(void) { 125 | #ifdef JOS_USER 126 | return 2000 * 1024 * 1024; 127 | #else 128 | FILE *f = fopen("/proc/cpuinfo", "r"); 129 | assert(f != NULL); 130 | float freqf = 0; 131 | char *line = NULL; 132 | size_t len = 0; 133 | while (getline(&line, &len, f) != EOF && 134 | sscanf(line, "cpu MHz\t: %f", &freqf) != 1); 135 | if (line) 136 | free(line); 137 | fclose(f); 138 | return uint64_t(freqf * (1 << 20)); 139 | #endif 140 | } 141 | 142 | inline uint64_t cycle_to_ms(uint64_t x) { 143 | return (x * 1000) / get_cpu_freq(); 144 | } 145 | 146 | inline uint32_t get_core_count(void) { 147 | int r = sysconf(_SC_NPROCESSORS_ONLN); 148 | if (r < 0) 149 | eprint("get_core_count: error: %s\n", strerror(errno)); 150 | return r; 151 | } 152 | 153 | inline void lfence(void) { 154 | __asm __volatile("lfence" ::: "memory"); 155 | } 156 | 157 | inline int atomic_add32_ret(int *cnt) { 158 | int __c = 1; 159 | __asm__ __volatile("lock; xadd %0,%1":"+r"(__c), "+m"(*cnt)::"memory"); 160 | return __c; 161 | } 162 | 163 | template 164 | inline T prime_lower_bound(T x) { 165 | for (int q = 2; q < sqrt(double(x)); ++q) 166 | if (x % q == 0) 167 | ++x, q = 1; // restart 168 | return x; 169 | } 170 | 171 | inline int affinity_set(int cpu) { 172 | cpu_set_t cpuset; 173 | CPU_ZERO(&cpuset); 174 | CPU_SET(cpu, &cpuset); 175 | return sched_setaffinity(0, sizeof(cpuset), &cpuset); 176 | } 177 | 178 | // prefetch instruction 179 | inline void prefetch(const void *ptr) { 180 | #ifdef NOPREFETCH 181 | (void) ptr; 182 | #else 183 | typedef struct { char x[64]; } cacheline_t; 184 | asm volatile("prefetcht0 %0" : : "m" (*(const cacheline_t *)ptr)); 185 | #endif 186 | } 187 | inline void prefetchnta(const void *ptr) { 188 | #ifdef NOPREFETCH 189 | (void) ptr; 190 | #else 191 | typedef struct { char x[64]; } cacheline_t; 192 | asm volatile("prefetchnta %0" : : "m" (*(const cacheline_t *)ptr)); 193 | #endif 194 | } 195 | 196 | #endif 197 | -------------------------------------------------------------------------------- /metis/bsearch.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef BSEARCH_HH_ 16 | #define BSEARCH_HH_ 17 | 18 | namespace xsearch { 19 | 20 | template 21 | inline T set_true(bool *x, const T &t) { 22 | *x = true; 23 | return t; 24 | } 25 | 26 | template 27 | int lower_bound(const T *k, const T *a, int n, const F &f, bool *found) { 28 | *found = false; 29 | if (!n) return 0; 30 | int l = 0, r = n - 1; 31 | // invariant: the lower_bound is >= l 32 | while (l < r) { 33 | const int m = (l + r) / 2; 34 | const int c = f(k, &a[m]); 35 | if (!c) 36 | return set_true(found, m); 37 | if (c < 0) 38 | r = m - 1; 39 | else 40 | l = m + 1; 41 | } 42 | if (l > r) 43 | return l; 44 | const int c = f(k, &a[l]); 45 | if (!c) 46 | return set_true(found, l); 47 | return l + (c > 0); 48 | } 49 | 50 | template 51 | int upper_bound(const T *key, const T *a, int n, const F &f) { 52 | bool found = false; 53 | int p = lower_bound(key, a, n, f, &found); 54 | return p + found; 55 | } 56 | 57 | }; 58 | #endif 59 | -------------------------------------------------------------------------------- /metis/btree.cc.mine: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "btree.hh" 16 | #include "appbase.hh" 17 | #include 18 | #include 19 | #include 20 | #ifdef JOS_USER 21 | #include 22 | #endif 23 | 24 | void btree_type::init() { 25 | nk_ = 0; 26 | nlevel_ = 0; 27 | root_ = NULL; 28 | } 29 | 30 | // left < key <= right. Right is the new sibling 31 | void btree_type::insert_internal(void *key, btnode_base *left, btnode_base *right) { 32 | btnode_internal *parent = left->parent_; 33 | if (!parent) { 34 | btnode_internal *newroot = new btnode_internal; 35 | newroot->nk_ = 1; 36 | newroot->assign(0, left, key, right); 37 | root_ = newroot; 38 | left->parent_ = newroot; 39 | right->parent_ = newroot; 40 | ++nlevel_; 41 | } else { 42 | int ikey = parent->upper_bound_pos(key); 43 | // insert newkey at ikey, values at ikey + 1 44 | for (int i = parent->nk_ - 1; i >= ikey; i--) 45 | parent->e_[i + 1].k_ = parent->e_[i].k_; 46 | for (int i = parent->nk_; i >= ikey + 1; i--) 47 | parent->e_[i + 1].v_ = parent->e_[i].v_; 48 | parent->assign_right(ikey, key, right); 49 | ++parent->nk_; 50 | right->parent_ = parent; 51 | if (parent->need_split()) { 52 | void *newkey = parent->e_[order].k_; 53 | btnode_internal *newparent = parent->split(); 54 | // push up newkey 55 | insert_internal(newkey, parent, newparent); 56 | // fix parent pointers 57 | for (int i = 0; i < newparent->nk_ + 1; ++i) 58 | newparent->e_[i].v_->parent_ = newparent; 59 | } 60 | } 61 | } 62 | 63 | btnode_leaf *btree_type::get_leaf(void *key) { 64 | if (!nlevel_) { 65 | root_ = new btnode_leaf; 66 | nlevel_ = 1; 67 | nk_ = 0; 68 | return static_cast(root_); 69 | } 70 | btnode_base *node = root_; 71 | for (int i = 0; i < nlevel_ - 1; ++i) 72 | node = static_cast(node)->upper_bound(key); 73 | return static_cast(node); 74 | } 75 | 76 | // left < splitkey <= right. Right is the new sibling 77 | int btree_type::map_insert_sorted_copy_on_new(void *k, void *v, size_t keylen, unsigned hash) { 78 | btnode_leaf *leaf = get_leaf(k); 79 | int pos; 80 | bool found; 81 | if (!(found = leaf->lower_bound(k, &pos))) { 82 | void *ik = static_appbase::key_copy(k, keylen); 83 | leaf->insert(pos, ik, hash); 84 | ++ nk_; 85 | } 86 | leaf->e_[pos].map_value_insert(v); 87 | if (leaf->need_split()) { 88 | btnode_leaf *right = leaf->split(); 89 | insert_internal(right->e_[0].key, leaf, right); 90 | } 91 | return !found; 92 | } 93 | 94 | void btree_type::map_insert_sorted_new_and_raw(keyvals_t *p) { 95 | btnode_leaf *leaf = get_leaf(p->key); 96 | int pos; 97 | //assert(!leaf->lower_bound(p->key, &pos)); // must be new key // out by EB 98 | leaf->insert(pos, p->key, 0); // do not copy key 99 | ++ nk_; 100 | leaf->e_[pos] = *p; 101 | if (leaf->need_split()) { 102 | btnode_leaf *right = leaf->split(); 103 | insert_internal(right->e_[0].key, leaf, right); 104 | } 105 | } 106 | 107 | size_t btree_type::size() const { 108 | return nk_; 109 | } 110 | 111 | void btree_type::delete_level(btnode_base *node, int level) { 112 | for (int i = 0; level > 1 && i <= node->nk_; ++i) 113 | delete_level(static_cast(node)->e_[i].v_, level - 1); 114 | delete node; 115 | } 116 | 117 | void btree_type::shallow_free() { 118 | if (!nlevel_) 119 | return; 120 | delete_level(root_, nlevel_); 121 | init(); 122 | } 123 | 124 | btree_type::iterator btree_type::begin() { 125 | return iterator(first_leaf()); 126 | } 127 | 128 | btree_type::iterator btree_type::end() { 129 | return btree_type::iterator(NULL); 130 | } 131 | 132 | uint64_t btree_type::copy(xarray *dst) { 133 | return copy_traverse(dst, false); 134 | } 135 | 136 | uint64_t btree_type::transfer(xarray *dst) { 137 | uint64_t n = copy_traverse(dst, true); 138 | shallow_free(); 139 | return n; 140 | } 141 | 142 | uint64_t btree_type::copy_traverse(xarray *dst, bool clear_leaf) { 143 | assert(dst->size() == 0); 144 | if (!nlevel_) 145 | return 0; 146 | dst->resize(size()); 147 | btnode_leaf *leaf = first_leaf(); 148 | uint64_t n = 0; 149 | while (leaf) { 150 | memcpy(dst->at(n), leaf->e_, sizeof(keyvals_t) * leaf->nk_); 151 | n += leaf->nk_; 152 | if (clear_leaf) 153 | leaf->nk_ = 0; // quickly delete all key/values from the leaf 154 | leaf = leaf->next_; 155 | } 156 | assert(n == nk_); 157 | return n; 158 | } 159 | 160 | btnode_leaf *btree_type::first_leaf() const { 161 | if (!nk_) 162 | return NULL; 163 | btnode_base *node = root_; 164 | for (int i = 0; i < nlevel_ - 1; ++i) 165 | node = static_cast(node)->e_[0].v_; 166 | return static_cast(node); 167 | } 168 | -------------------------------------------------------------------------------- /metis/cpumap.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "cpumap.hh" 16 | 17 | static int logical_to_physical_[JOS_NCPU]; 18 | 19 | void cpumap_init() { 20 | for (int i = 0; i < JOS_NCPU; ++i) 21 | logical_to_physical_[i] = i; 22 | } 23 | 24 | int cpumap_physical_cpuid(int i) { 25 | return logical_to_physical_[i]; 26 | } 27 | -------------------------------------------------------------------------------- /metis/cpumap.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef CPUMAP_HH_ 16 | #define CPUMAP_HH_ 1 17 | 18 | enum { main_core = 0 }; 19 | void cpumap_init(); 20 | int cpumap_physical_cpuid(int i); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /metis/defsplitter.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef DEFSPLITTER_HH_ 16 | #define DEFSPLITTER_HH_ 1 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | struct mmap_file { 24 | mmap_file(const char *f) { 25 | assert((fd_ = open(f, O_RDONLY)) >= 0); 26 | struct stat fst; 27 | assert(fstat(fd_, &fst) == 0); 28 | size_ = fst.st_size; 29 | d_ = (char *)mmap(0, size_ + 1, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_, 0); 30 | assert(d_); 31 | } 32 | mmap_file() : fd_(-1) {} 33 | virtual ~mmap_file() { 34 | if (fd_ >= 0) { 35 | assert(munmap(d_, size_ + 1) == 0); 36 | assert(close(fd_) == 0); 37 | } 38 | } 39 | char &operator[](off_t i) { 40 | return d_[i]; 41 | } 42 | size_t size_; 43 | char *d_; 44 | private: 45 | int fd_; 46 | }; 47 | 48 | struct defsplitter { 49 | defsplitter(char *d, size_t size, size_t nsplit) 50 | : d_(d), size_(size), nsplit_(nsplit), pos_(0) { 51 | pthread_mutex_init(&mu_, 0); 52 | } 53 | defsplitter(const char *f, size_t nsplit) : nsplit_(nsplit), pos_(0), mf_(f) { 54 | pthread_mutex_init(&mu_, 0); 55 | size_ = mf_.size_; 56 | d_ = mf_.d_; 57 | } 58 | ~defsplitter() 59 | { 60 | } 61 | int prefault() { 62 | int sum = 0; 63 | for (size_t i = 0; i < size_; i += 4096) 64 | sum += d_[i]; 65 | return sum; 66 | } 67 | bool split(split_t *ma, int ncores, const char *stop, size_t align = 0) { 68 | pthread_mutex_lock(&mu_); 69 | if (pos_ >= size_) { 70 | pthread_mutex_unlock(&mu_); 71 | return false; 72 | } 73 | if (nsplit_ == 0) 74 | nsplit_ = ncores * def_nsplits_per_core; 75 | 76 | ma->data = (void *) &d_[pos_]; 77 | ma->length = std::min(size_ - pos_, size_ / nsplit_); 78 | ma->pos = pos_; 79 | if (align) { 80 | ma->length = round_down(ma->length, align); 81 | assert(ma->length); 82 | } 83 | 84 | pos_ += ma->length; 85 | for (; pos_ < size_ && stop && !strchr(stop, d_[pos_]); ++pos_, ++ma->length); 86 | 87 | pthread_mutex_unlock(&mu_); 88 | return true; 89 | }; 90 | 91 | void trim(size_t sz) { 92 | assert(sz <= size_); 93 | size_ = sz; 94 | } 95 | size_t size() const { 96 | return size_; 97 | } 98 | 99 | private: 100 | char *d_; 101 | size_t size_; 102 | size_t nsplit_; 103 | size_t pos_; 104 | mmap_file mf_; 105 | pthread_mutex_t mu_; 106 | }; 107 | 108 | struct split_word { 109 | split_word(split_t *ma) : ma_(ma), pos_(0) { 110 | assert(ma_ && ma_->data); 111 | } 112 | char *fill(char *k, size_t maxlen, size_t &klen) { 113 | char *d = (char *)ma_->data; 114 | klen = 0; 115 | for (; pos_ < ma_->length && !letter(d[pos_]); ++pos_) 116 | ; 117 | if (pos_ == ma_->length) 118 | return NULL; 119 | char *index = &d[pos_]; 120 | for (; pos_ < ma_->length && letter(d[pos_]); ++pos_) { 121 | k[klen++] = toupper(d[pos_]); 122 | //assert(klen < maxlen); 123 | if (klen == maxlen-1) 124 | { 125 | break; 126 | } 127 | } 128 | k[klen] = 0; 129 | return index; 130 | } 131 | private: 132 | bool letter(char c) { 133 | return toupper(c) >= 'A' && toupper(c) <= 'Z'; 134 | } 135 | split_t *ma_; 136 | size_t pos_; 137 | }; 138 | 139 | #endif 140 | -------------------------------------------------------------------------------- /metis/group.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef GROUP_HH_ 16 | #define GROUP_HH_ 1 17 | 18 | #include "mr-types.hh" 19 | #include "bench.hh" 20 | #include "appbase.hh" 21 | #include 22 | #include 23 | #ifdef JOS_USER 24 | #include 25 | #endif 26 | 27 | template 28 | inline void group_one_sorted(C &a, F &f, KF &kf) { 29 | // group and apply functor 30 | size_t n = a.size(); 31 | keyvals_t kvs; 32 | for (size_t i = 0; i < n;) { 33 | kvs.key_ = a[i].key_; 34 | kvs.map_value_move(&a[i]); 35 | ++i; 36 | for (; i < n && !static_appbase::key_compare(kvs.key_, a[i].key_); ++i) { 37 | kf(a[i].key_); 38 | kvs.map_value_move(&a[i]); 39 | } 40 | f(kvs); 41 | } 42 | } 43 | 44 | template 45 | inline void group_unsorted(C **a, int na, F &f, PC &pc, KF &kf) { 46 | if (na == 1) { 47 | a[0]->sort(pc); 48 | group_one_sorted(*a[0], f, kf); 49 | } 50 | if (na <= 1) 51 | return; 52 | size_t np = 0; 53 | for (int i = 0; i < na; i++) 54 | np += a[i]->size(); 55 | C *one = new C; 56 | one->set_capacity(np); 57 | for (int i = 0; i < na; i++) 58 | one->append(*a[i]); 59 | one->sort(pc); 60 | group_one_sorted(*one, f, kf); 61 | delete one; 62 | } 63 | 64 | template 65 | inline void group_sorted(C **nodes, int n, F &f, KF &kf) { 66 | if (!n) 67 | return; 68 | typename C::iterator it[JOS_NCPU]; 69 | for (int i = 0; i < n; i++) 70 | it[i] = nodes[i]->begin(); 71 | int marks[JOS_NCPU]; 72 | keyvals_t dst; 73 | while (1) { 74 | int min_idx = -1; 75 | bzero(marks, sizeof(marks)); 76 | int m = 0; 77 | // Find minimum key 78 | for (int i = 0; i < n; ++i) { 79 | if (it[i] == nodes[i]->end()) 80 | continue; 81 | int cmp = 0; 82 | if (min_idx >= 0) 83 | cmp = static_appbase::key_compare(it[min_idx]->key_, it[i]->key_); 84 | if (min_idx < 0 || cmp > 0) { 85 | ++ m; 86 | marks[i] = m; 87 | min_idx = i; 88 | } else if (!cmp) 89 | marks[i] = m; 90 | } 91 | if (min_idx < 0) 92 | break; 93 | // Merge all the values with the same mimimum key. 94 | dst.key_ = it[min_idx]->key_; 95 | for (int i = 0; i < n; ++i) { 96 | if (marks[i] != m) 97 | continue; 98 | dst.map_value_move(&(*it[i])); 99 | ++it[i]; 100 | for (; it[i] != nodes[i]->end() && 101 | static_appbase::key_compare(dst.key_, it[i]->key_) == 0; ++it[i]) { 102 | kf(it[i]->key_); 103 | it[i]->key_ = NULL; 104 | dst.map_value_move(&(*it[i])); 105 | } 106 | } 107 | f(dst); 108 | } 109 | } 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /metis/ibs.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "cpumap.hh" 22 | #include "ibs.hh" 23 | 24 | enum { ibs_enabled = 0 }; 25 | 26 | struct urecord { 27 | /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ 28 | uint64_t ibs_op_rip; 29 | /* MSRC001_1035 IBS Op Data Register */ 30 | uint64_t ibs_op_data1; 31 | /* MSRC001_1036 IBS Op Data 2 Register */ 32 | uint64_t ibs_op_data2; 33 | /* MSRC001_1037 IBS Op Data 3 Register */ 34 | uint64_t ibs_op_data3; 35 | /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ 36 | uint64_t ibs_dc_linear; 37 | /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ 38 | uint64_t ibs_dc_phys; 39 | 40 | char sdp_valid; 41 | char ibs_dc_kmem_cachep_name[32]; 42 | uint64_t bp; 43 | void *ibs_dc_kmem_cache_caller; 44 | int ibs_dc_kmem_cache_offset; 45 | 46 | unsigned long ts; 47 | } __attribute__ ((__packed__)); 48 | 49 | #define NUREC 3000 50 | #define PAGE_SIZE 4096 51 | #define NUREC_BYTES (NUREC * sizeof(struct urecord)) 52 | #define NUREC_SIZE ((NUREC_BYTES + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) 53 | #define NUREC_PAGES (NUREC_SIZE / PAGE_SIZE) 54 | 55 | static __thread uint64_t nsamples = 0; 56 | static __thread uint64_t latency = 0; 57 | 58 | static void writefile(const char *path, const char *str) { 59 | int fd = open(path, O_WRONLY); 60 | assert(fd >= 0); 61 | assert(write(fd, str, strlen(str)) == ssize_t(strlen(str))); 62 | close(fd); 63 | } 64 | 65 | static void readsamples(const char *path) { 66 | int fd = open(path, O_RDONLY); 67 | assert(fd >= 0); 68 | struct urecord *ur = 69 | (struct urecord *) mmap(0, NUREC_SIZE, PROT_READ, MAP_PRIVATE, fd, 0); 70 | assert(ur != MAP_FAILED); 71 | latency = 0; 72 | for (nsamples = 0; nsamples < NUREC && ur[nsamples].ibs_op_rip; 73 | nsamples++) 74 | latency += ((ur[nsamples].ibs_op_data3 >> 32) & 0xffff); 75 | close(fd); 76 | munmap(ur, NUREC_SIZE); 77 | } 78 | 79 | void ibs_start(int cid) { 80 | if (!ibs_enabled) 81 | return; 82 | cid = cpumap_physical_cpuid(cid); 83 | nsamples = 0; 84 | latency = 0; 85 | char path[512]; 86 | char value[20]; 87 | // clear samples 88 | sprintf(path, "/sys/kernel/amd10h-ibs/cpu%d/record", cid); 89 | writefile(path, "0"); 90 | // set opdata2 91 | sprintf(path, "/sys/kernel/amd10h-ibs/cpu%d/opdata2", cid); 92 | // notify the ibs module to record all types of cache refills 93 | writefile(path, "1 2 3"); 94 | // set opdata3 95 | sprintf(path, "/sys/kernel/amd10h-ibs/cpu%d/opdata3", cid); 96 | // track loads only (the latency for stores is not valid) 97 | sprintf(value, "%x", (1 << 7) | (1 << 0)); 98 | writefile(path, value); 99 | // set opdata3pred 100 | sprintf(path, "/sys/kernel/amd10h-ibs/cpu%d/opdata3pred", cid); 101 | writefile(path, "="); 102 | // set opctl to start 103 | sprintf(path, "/sys/kernel/amd10h-ibs/cpu%d/opctl", cid); 104 | sprintf(value, "%x", (0 << 19) | (1 << 17) | 0xffff); 105 | writefile(path, value); 106 | } 107 | 108 | void ibs_stop(int cid) { 109 | if (!ibs_enabled) 110 | return; 111 | cid = cpumap_physical_cpuid(cid); 112 | char path[512]; 113 | // set opctl to stop 114 | sprintf(path, "/sys/kernel/amd10h-ibs/cpu%d/opctl", cid); 115 | writefile(path, "0"); 116 | sprintf(path, "/sys/kernel/amd10h-ibs/cpu%d/record", cid); 117 | readsamples(path); 118 | } 119 | 120 | uint64_t ibs_read_count(int cid) { 121 | return nsamples; 122 | } 123 | 124 | uint64_t ibs_read_latency(int cid) { 125 | return latency; 126 | } 127 | -------------------------------------------------------------------------------- /metis/ibs.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef IBS_HH_ 16 | #define IBS_HH_ 1 17 | 18 | #include 19 | 20 | void ibs_start(int cid); 21 | void ibs_stop(int cid); 22 | 23 | uint64_t ibs_read_count(int cid); 24 | uint64_t ibs_read_latency(int cid); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /metis/mergesort.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef MERGESORT_HH_ 16 | #define MERGESORT_HH_ 17 | 18 | #include "bench.hh" 19 | #include "mr-types.hh" 20 | 21 | /** @brief: Merge @a[@afirst + @astep * i] (0 <= i < @nmya), and output to @sized_output */ 22 | template 23 | void mergesort_impl(C *a, size_t nmya, size_t afirst, size_t astep, F &pcmp, C &sized_output) { 24 | typedef typename C::iterator iterator_type; 25 | xarray ai; 26 | for (size_t i = 0; i < nmya; ++i) { 27 | iterator_type mi = a[afirst + i * astep].begin(); 28 | if (mi != mi.parent_end()) 29 | ai.push_back(mi); 30 | } 31 | size_t nsorted = 0; 32 | while (nsorted < sized_output.size()) { 33 | assert(ai.size()); 34 | int min_idx = 0; 35 | typename C::element_type *min_pair = ai[0].current(); 36 | for (size_t i = 1; i < ai.size(); ++i) 37 | if (pcmp(min_pair, ai[i].current()) > 0) { 38 | min_pair = ai[i].current(); 39 | min_idx = i; 40 | } 41 | sized_output[nsorted ++] = *min_pair; 42 | ++ai[min_idx]; 43 | if (ai[min_idx] == ai[min_idx].parent_end()) 44 | ai.remove(min_idx); 45 | } 46 | assert(!ai.size()); 47 | } 48 | 49 | template 50 | C *mergesort(xarray &a, size_t astep, size_t afirst, F &pcmp) { 51 | size_t nmya = a.size() / astep + (size_t(afirst) < (a.size() % astep)); 52 | size_t np = 0; 53 | for (size_t i = 0; i < nmya; i++) 54 | np += a[afirst + i * astep].size(); 55 | C *out = new C(np); 56 | if (np == 0) 57 | return out; 58 | mergesort_impl(a.array(), nmya, afirst, astep, pcmp, *out); 59 | dprintf("mergesort: afirst %zd astep %zd (collections %zd : nr-kvs %zu)\n", 60 | afirst, astep, a.size(), np); 61 | return out; 62 | } 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /metis/micro/btree_unit.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "btree.hh" 16 | #include "application.hh" 17 | #include "test_util.hh" 18 | #include 19 | #include 20 | using namespace std; 21 | 22 | struct mock_app : public map_only { 23 | int key_compare(const void *k1, const void *k2) { 24 | int64_t i1 = int64_t(k1); 25 | int64_t i2 = int64_t(k2); 26 | return i1 - i2; 27 | } 28 | 29 | bool split(split_t *ma, int ncore) { 30 | assert(0); 31 | } 32 | void map_function(split_t *ma) { 33 | assert(0); 34 | } 35 | }; 36 | 37 | typedef btree_param btree_param_type; 39 | typedef btree_type this_btree; 40 | 41 | void check_tree(this_btree &bt) { 42 | int64_t i = 1; 43 | auto it = bt.begin(); 44 | while (it != bt.end()) { 45 | CHECK_EQ(i, int64_t(it->key_)); 46 | CHECK_EQ(1, int64_t(it->size())); 47 | CHECK_EQ(i + 1, int64_t((*it)[0])); 48 | ++it; 49 | ++i; 50 | } 51 | assert(size_t(i) == (bt.size() + 1)); 52 | } 53 | 54 | void check_tree_copy(this_btree &bt) { 55 | xarray dst; 56 | bt.copy(&dst); 57 | for (int64_t i = 1; i <= int64_t(bt.size()); ++i) { 58 | CHECK_EQ(i, int64_t(dst[i - 1].key_)); 59 | CHECK_EQ(1, int64_t(dst[i - 1].size())); 60 | CHECK_EQ(i + 1, int64_t(dst[i - 1][0])); 61 | ++i; 62 | } 63 | } 64 | 65 | void check_tree_copy_and_free(this_btree &bt) { 66 | xarray dst; 67 | bt.copy(&dst); 68 | bt.shallow_free(); 69 | for (int64_t i = 1; i <= int64_t(bt.size()); ++i) { 70 | CHECK_EQ(i, int64_t(dst[i - 1].key_)); 71 | CHECK_EQ(1, int64_t(dst[i - 1].size())); 72 | CHECK_EQ(i + 1, int64_t(dst[i - 1][0])); 73 | ++i; 74 | } 75 | } 76 | 77 | void test1() { 78 | this_btree bt; 79 | bt.init(); 80 | check_tree(bt); 81 | check_tree_copy(bt); 82 | for (int64_t i = 1; i < 1000; ++i) { 83 | bt.map_insert_sorted_copy_on_new((void *)i, (void *)(i + 1), 4, 0); 84 | check_tree(bt); 85 | check_tree_copy(bt); 86 | } 87 | assert(bt.size() == 999); 88 | } 89 | 90 | void test2() { 91 | this_btree bt; 92 | bt.init(); 93 | check_tree(bt); 94 | check_tree_copy(bt); 95 | for (int64_t i = 1; i < 1000; ++i) { 96 | keyvals_t kvs; 97 | kvs.key_ = (void *)i; 98 | kvs.push_back((void *) (i + 1)); 99 | bt.map_insert_sorted_new_and_raw(&kvs); 100 | kvs.init(); 101 | check_tree(bt); 102 | check_tree_copy(bt); 103 | CHECK_EQ(size_t(i), bt.size()); 104 | } 105 | assert(bt.size() == 999); 106 | check_tree_copy_and_free(bt); 107 | } 108 | 109 | int main(int argc, char *argv[]) { 110 | mock_app app; 111 | static_appbase::set_app(&app); 112 | test1(); 113 | test2(); 114 | cerr << "PASS" << endl; 115 | return 0; 116 | } 117 | -------------------------------------------------------------------------------- /metis/micro/misc.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "bench.hh" 16 | #include "test_util.hh" 17 | #include 18 | 19 | int main(int argc, char *argv[]) { 20 | uint64_t f = get_cpu_freq(); 21 | std::cout << f << std::endl; 22 | CHECK_GT(f, uint64_t(0)); 23 | std::cout << "PASS" << std::endl; 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /metis/micro/search_unit.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "bsearch.hh" 16 | #include 17 | #include 18 | #include 19 | 20 | static int compare(const int *a, const int *b) { 21 | return *a - *b; 22 | } 23 | 24 | int main() { 25 | int x[] = {7, 8, 9, 10}; 26 | bool found; 27 | 28 | int key = 11; 29 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 4 && !found); 30 | 31 | key = 5; 32 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 0 && !found); 33 | 34 | key = 7; 35 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 0 && found); 36 | 37 | key = 10; 38 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 3 && found); 39 | 40 | int y[] = {7, 8, 9, 10, 11}; 41 | key = 12; 42 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 5 && !found); 43 | 44 | key = 5; 45 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 0 && !found); 46 | 47 | key = 7; 48 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 0 && found); 49 | 50 | key = 10; 51 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 3 && found); 52 | 53 | key = 11; 54 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 4 && found); 55 | 56 | printf("PASS\n"); 57 | } 58 | -------------------------------------------------------------------------------- /metis/micro/sf_sample.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "bench.hh" 24 | 25 | struct gstate_type { 26 | volatile int start; 27 | union { 28 | struct { 29 | volatile int ready; 30 | volatile uint64_t cycles; 31 | } v; 32 | char __pad[JOS_CLINE]; 33 | } state[JOS_NCPU] __attribute__ ((aligned(JOS_CLINE))); 34 | }; 35 | 36 | static gstate_type *gstate; 37 | 38 | static uint64_t ncores; 39 | 40 | enum { nmallocs = 1000000 }; 41 | 42 | void * 43 | worker(void *arg) 44 | { 45 | int c = ptr2int(arg); 46 | affinity_set(c); 47 | if (c) { 48 | gstate->state[c].v.ready = 1; 49 | while (!gstate->start) ; 50 | } else { 51 | for (uint64_t i = 1; i < ncores; i++) { 52 | while (!gstate->state[i].v.ready) ; 53 | gstate->state[i].v.ready = 0; 54 | } 55 | gstate->start = 1; 56 | } 57 | uint64_t start = read_tsc(); 58 | for (uint64_t i = 0; i < nmallocs; i++) { 59 | void *p = malloc(100); 60 | (void) p; 61 | } 62 | uint64_t end = read_tsc(); 63 | gstate->state[c].v.cycles = end - start; 64 | gstate->state[c].v.ready = 1; 65 | if (!c) { 66 | for (uint64_t i = 1; i < ncores; i++) 67 | while (!gstate->state[i].v.ready) ; 68 | uint64_t ncycles = 0; 69 | for (uint64_t i = 0; i < ncores; i++) 70 | ncycles += gstate->state[i].v.cycles; 71 | printf("Cycles per malloc: %ld\n", ncycles / nmallocs); 72 | } 73 | return NULL; 74 | } 75 | 76 | int 77 | main(int argc, char **argv) 78 | { 79 | affinity_set(0); 80 | if (argc < 2) { 81 | printf("Usage: <%s> number-cores\n", argv[0]); 82 | exit(EXIT_FAILURE); 83 | } 84 | ncores = atoi(argv[1]); 85 | assert(ncores <= JOS_NCPU); 86 | gstate = (gstate_type *) 87 | mmap(NULL, sizeof(gstate_type), PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 88 | -1, 0); 89 | memset(gstate, 0, sizeof(gstate_type)); 90 | if (gstate == MAP_FAILED) { 91 | printf("mmap error: %d\n", errno); 92 | exit(EXIT_FAILURE); 93 | } 94 | for (uint64_t i = 1; i < ncores; i++) { 95 | pthread_t tid; 96 | pthread_create(&tid, NULL, worker, int2ptr(i)); 97 | } 98 | uint64_t start = read_tsc(); 99 | worker(int2ptr(0)); 100 | uint64_t end = read_tsc(); 101 | printf("Total time %ld million cycles\n", (end - start) / 1000000); 102 | munmap(gstate, sizeof(*gstate)); 103 | } 104 | -------------------------------------------------------------------------------- /metis/mr-types.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "mr-types.hh" 16 | #include "group.hh" 17 | #include "btree.hh" 18 | #include "appbase.hh" 19 | 20 | struct append_functor { 21 | append_functor(xarray *x) : x_(x) {} 22 | void operator()(keyvals_t &kvs) { 23 | // kvs.vals is owned by callee 24 | x_->push_back(kvs); 25 | kvs.init(); 26 | } 27 | private: 28 | xarray *x_; 29 | }; 30 | 31 | bool keyval_arr_t::map_append_copy(void *key, void *val, size_t keylen, unsigned hash) { 32 | void *ik = static_appbase::key_copy(key, keylen); 33 | keyval_t tmp(ik, val, hash); 34 | push_back(tmp); 35 | return true; 36 | } 37 | 38 | void keyval_arr_t::map_append_raw(keyval_t *t) { 39 | push_back(*t); 40 | } 41 | 42 | bool keyvals_arr_t::map_insert_sorted_copy_on_new(void *key, void *val, size_t keylen, unsigned hash) { 43 | keyvals_t tmp(key, hash); 44 | size_t pos = 0; 45 | bool newkey = atomic_insert(&tmp, static_appbase::pair_comp, &pos); 46 | if (newkey) 47 | at(pos)->key_ = static_appbase::key_copy(key, keylen); 48 | at(pos)->map_value_insert(val); 49 | return newkey; 50 | } 51 | 52 | void keyvals_arr_t::map_insert_sorted_new_and_raw(keyvals_t *p) { 53 | size_t pos = 0; 54 | bool newkey = atomic_insert(p, static_appbase::pair_comp, &pos); 55 | assert(newkey); 56 | } 57 | 58 | void keyval_arr_t::transfer(xarray *dst) { 59 | append_functor f(dst); 60 | group_one_sorted(*this, f, static_appbase::key_free); 61 | this->init(); 62 | } 63 | 64 | void keyvals_t::map_value_insert(void *v) { 65 | static_appbase::map_values_insert(this, v); 66 | } 67 | 68 | void keyvals_t::map_value_move(keyval_t *src) { 69 | map_value_insert(src->val); 70 | src->reset(); 71 | } 72 | 73 | void keyvals_t::map_value_move(keyvals_t *src) { 74 | static_appbase::map_values_move(this, src); 75 | } 76 | 77 | void keyvals_t::map_value_move(keyvals_len_t *src) { 78 | assert(static_appbase::application_type() == atype_mapgroup); // must be mapgroup 79 | append(src->vals, src->len); 80 | src->reset(); 81 | } 82 | -------------------------------------------------------------------------------- /metis/mr-types.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef MR_TYPES_HH_ 16 | #define MR_TYPES_HH_ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "array.hh" 23 | 24 | struct split_t { 25 | void *data; 26 | size_t length; 27 | size_t pos; 28 | }; 29 | 30 | struct keyval_t { 31 | void *key_; 32 | void *val; 33 | unsigned hash; 34 | keyval_t() { 35 | init(); 36 | } 37 | explicit keyval_t(void *k) { 38 | set(k, NULL, 0); 39 | } 40 | keyval_t(void *k, unsigned h) { 41 | set(k, NULL, h); 42 | } 43 | keyval_t(void *k, void *v, unsigned h) { 44 | set(k, v, h); 45 | } 46 | keyval_t(void *k, void *v) { 47 | set(k, v, 0); 48 | } 49 | ~keyval_t() { 50 | reset(); 51 | } 52 | void assign(const keyval_t &a) { 53 | set(a.key_, a.val, a.hash); 54 | } 55 | void init() { 56 | set(NULL, NULL, 0); 57 | } 58 | void reset() { 59 | init(); 60 | } 61 | private: 62 | void set(void *k, void *v, unsigned h) { 63 | key_ = k; 64 | val = v; 65 | hash = h; 66 | } 67 | }; 68 | 69 | struct keyvals_len_t { 70 | void *key_; 71 | void **vals; 72 | uint64_t len; 73 | keyvals_len_t() { 74 | init(); 75 | } 76 | explicit keyvals_len_t(void *k) { 77 | set(k, NULL, 0); 78 | } 79 | keyvals_len_t(void *k, void **v, uint64_t l) { 80 | set(k, v, l); 81 | } 82 | ~keyvals_len_t() { 83 | reset(); 84 | } 85 | void assign(const keyvals_len_t &a) { 86 | set(a.key_, a.vals, a.len); 87 | } 88 | void init() { 89 | set(NULL, NULL, 0); 90 | } 91 | /* @brief: may need to free memory */ 92 | void reset() { 93 | if (vals) 94 | free(vals); 95 | init(); 96 | } 97 | private: 98 | void set(void *k, void **v, uint64_t l) { 99 | key_ = k; 100 | vals = v; 101 | len = l; 102 | } 103 | }; 104 | 105 | /* types used internally */ 106 | struct keyvals_len_arr_t: public xarray { 107 | }; 108 | 109 | struct keyvals_t : public xarray { 110 | void *key_; /* put key at the same offset with keyval_t */ 111 | unsigned hash; 112 | keyvals_t() { 113 | init(); 114 | } 115 | explicit keyvals_t(void *k) { 116 | init(); 117 | set(k, 0); 118 | } 119 | keyvals_t(void *k, unsigned h) { 120 | reset(); 121 | set(k, h); 122 | } 123 | ~keyvals_t() { 124 | reset(); 125 | } 126 | void init() { 127 | set(NULL, 0); 128 | xarray::init(); 129 | } 130 | void reset() { 131 | set(0, 0); 132 | xarray::clear(); 133 | } 134 | void assign(const keyvals_t &a) { 135 | set(a.key_, a.hash); 136 | xarray::assign(a); 137 | } 138 | void map_value_insert(void *v); 139 | void map_value_move(keyval_t *src); 140 | void map_value_move(keyvals_t *src); 141 | void map_value_move(keyvals_len_t *src); 142 | private: 143 | void set(void *k, unsigned h) { 144 | key_ = k; 145 | hash = h; 146 | } 147 | }; 148 | 149 | struct keyval_arr_t : public xarray { 150 | bool map_append_copy(void *k, void *v, size_t keylen, unsigned hash); 151 | void map_append_raw(keyval_t *p); 152 | void transfer(xarray *dst); 153 | using xarray::transfer; 154 | }; 155 | 156 | struct keyvals_arr_t : public xarray { 157 | bool map_insert_sorted_copy_on_new(void *k, void *v, size_t keylen, unsigned hash); 158 | void map_insert_sorted_new_and_raw(keyvals_t *p); 159 | }; 160 | 161 | enum task_type_t { 162 | MAP, 163 | REDUCE, 164 | MERGE, 165 | MR_PHASES, 166 | }; 167 | 168 | /* suggested number of map tasks per core. */ 169 | enum { def_nsplits_per_core = 16 }; 170 | 171 | enum app_type_t { 172 | atype_maponly = 0, 173 | atype_mapgroup, 174 | atype_mapreduce 175 | }; 176 | 177 | #endif 178 | -------------------------------------------------------------------------------- /metis/pmcreg.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef JOS_INC_KDEBUG_HH_ 16 | #define JOS_INC_KDEBUG_HH_ 1 17 | 18 | /* From Corey inc/kdebug.h */ 19 | 20 | /* 21 | * Hardware counters 22 | */ 23 | 24 | /* Performance Event Select register */ 25 | #define PES_OS_MODE (1 << 17) 26 | #define PES_USR_MODE (1 << 16) 27 | #define PES_INT_EN (1 << 20) 28 | #define PES_CNT_EN (1 << 22) 29 | #define PES_UNIT_SHIFT 8 30 | #define PES_CNT_TRSH_SHIFT 24 31 | #define PES_EVT(mas) \ 32 | (((uint64_t)(((mas >> 8) & 0x0F) << 32)) | (mas & 0x00FF)) 33 | #define PES_EVT_INTEL(mas) (mas & 0x00FF) 34 | 35 | /* Some event masks. 36 | * See "AMD Family 10h Processor BKDG" for details: 37 | * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.pdf 38 | */ 39 | #define DATA_CACHE_ACCESSES 0x040 40 | #define DATA_CACHE_MISSES 0x041 41 | #define DATA_CACHE_REFILLS 0x042 42 | #define DATA_CACHE_REFILLS_NB 0x01 /* northbridge */ 43 | #define DATA_CACHE_REFILLS_SS 0x02 /* share-state */ 44 | #define DATA_CACHE_REFILLS_ES 0x04 /* exclusive-state */ 45 | #define DATA_CACHE_REFILLS_OS 0x08 /* owned-state */ 46 | #define DATA_CACHE_REFILLS_MS 0x10 /* modified-state */ 47 | #define DATA_CACHE_EVICT 0x044 48 | #define DATA_CACHE_EVICT_IS 0x01 /* invalid-state */ 49 | #define DATA_CACHE_EVICT_SS 0x02 /* shared-state */ 50 | #define DATA_CACHE_EVICT_ES 0x04 /* exclusive-state */ 51 | #define DATA_CACHE_EVICT_OS 0x08 /* owned-state */ 52 | #define DATA_CACHE_EVICT_MS 0x10 /* modified-state */ 53 | #define DATA_CACHE_EVICT_NTA1 0x20 /* brought in by prefetch */ 54 | #define DATA_CACHE_EVICT_NTA0 0x40 /* not brought in by prefetch */ 55 | #define SYSTEM_READ_RESP 0x06c /* norhtbridge read responses */ 56 | #define SYSTEM_READ_RESP_ES 0x01 /* exclusive-state */ 57 | #define SYSTEM_READ_RESP_MS 0x02 /* modified-state */ 58 | #define SYSTEM_READ_RESP_SS 0x04 /* shared-state */ 59 | #define SYSTEM_READ_RESP_DE 0x10 /* data error */ 60 | #define L2_CACHE_MISSES 0x07e 61 | #define L2_CACHE_MISSES_IC 0x01 /* IC fill */ 62 | #define L2_CACHE_MISSES_DC 0x02 /* DC fill */ 63 | #define L2_CACHE_MISSES_TLB 0x04 /* TLB page table walk */ 64 | #define L2_CACHE_MISSES_PRE 0x08 /* DC hardware prefetch */ 65 | #define L3_CACHE_MISSES 0x4e1 66 | #define L3_CACHE_MISSES_EXCL 0x01 /* Read block exclusive */ 67 | #define L3_CACHE_MISSES_SHAR 0x02 /* Read block shared */ 68 | #define L3_CACHE_MISSES_MOD 0x04 /* Read block modify */ 69 | #define L3_CACHE_MISSES_CORE0 0x10 /* Core 0 select */ 70 | #define L3_CACHE_MISSES_CORE1 0x20 /* Core 1 select */ 71 | #define L3_CACHE_MISSES_CORE2 0x40 /* Core 2 select */ 72 | #define L3_CACHE_MISSES_CORE3 0x80 /* Core 3 select */ 73 | #define L2_CACHE_MISSES 0x07e 74 | #define L2_CACHE_MISSES_ICFILL 0x01 /* IC fill */ 75 | #define L2_CACHE_MISSES_DCFILL 0x02 /* DC fill */ 76 | #define L2_CACHE_MISSES_TLB 0x04 /* TLB page table walk */ 77 | #define L2_CACHE_MISSES_HWPR 0x08 /* Hardware prefetch from DC */ 78 | 79 | /* Intel at-retirement events. 80 | * See "Intel Arch. Manual Volume 3B" for details: 81 | * http://download.intel.com/design/processor/manuals/253669.pdf 82 | */ 83 | #define ITLB_MISS_RETIRED 0xc9 84 | #define ITLB_MISS_RETIRED_MASK 0x0 85 | #define RETIRED_CACHE_MISS 0xcb 86 | #define RETIRED_L1D_MISS_MASK 0x1 87 | #define RETIRED_L1D_LINE_MISS_MASK 0x2 88 | #define RETIRED_L2_MISS_MASK 0x4 89 | #define RETIRED_L2_LINE_MISS_MASK 0x8 90 | #define RETIRED_DTLB_MISS_MASK 0x10 91 | 92 | /* "Table 18-7" */ 93 | #define CORE_SELECT_ALL 0xC0 94 | #define CORE_SELECT_ME 0x40 95 | 96 | /* "Table 18-8" */ 97 | #define AGENT_SELCT_ALL 0x20 98 | #define AGENT_SELECT_ME 0x0 99 | 100 | /* "Table 18-9" */ 101 | #define HWPREFECT_ALL 0x30 102 | #define HWPREFETCH_INCL 0x10 103 | #define HWPREFETCH_EXCL 0x0 104 | 105 | /* "Table 18-10" */ 106 | #define MESI_MOD 0x8 107 | #define MESI_EXCL 0x4 108 | #define MESI_SHARE 0x2 109 | #define MESI_INVAL 0x1 110 | #endif 111 | -------------------------------------------------------------------------------- /metis/predictor.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef PREDICTOR_HH_ 16 | #define PREDICTOR_HH_ 1 17 | 18 | #include "mr-types.hh" 19 | 20 | struct rate_predict { 21 | void update_rate() { 22 | if (!last_) 23 | rate_ = current_; 24 | else 25 | rate_ = rate_ / 2 + (current_ - last_) / 2; 26 | last_ = current_; 27 | } 28 | void increment() { 29 | ++ current_; 30 | } 31 | /* @brief: the value of current after @n more intervals */ 32 | uint64_t predict(uint64_t n) { 33 | return rate_ * n + current_; 34 | } 35 | uint64_t rate_; 36 | uint64_t last_; 37 | uint64_t current_; 38 | }; 39 | 40 | union __attribute__ ((__packed__, __aligned__(JOS_CLINE))) predictor { 41 | struct { 42 | rate_predict key_; 43 | rate_predict pair_; 44 | uint64_t n_; // number of finished task 45 | }; 46 | char __pad[2 * JOS_CLINE]; 47 | 48 | void onepair(bool newkey) { 49 | key_.current_ += newkey; 50 | ++ pair_.current_; 51 | if (pair_.current_ % update_interval == 0) 52 | key_.update_rate(); 53 | } 54 | void task_finished() { 55 | pair_.update_rate(); 56 | ++n_; 57 | } 58 | void inc_predict(uint64_t *nk, uint64_t *np, int total_task) { 59 | uint64_t npi = pair_.predict(total_task - n_); 60 | *nk += key_.predict((npi - pair_.current_) / update_interval); 61 | *np += npi; 62 | } 63 | bool valid() { 64 | return n_; 65 | } 66 | private: 67 | enum { update_interval = 1000 }; 68 | }; 69 | 70 | inline uint64_t predict_nkey(predictor *e, int ne, int total_task) { 71 | uint64_t nk = 0; 72 | uint64_t np = 0; // # of keys and pairs per mapper 73 | int nvalid = 0; // # of workers that has sampled 74 | for (int i = 0; i < ne; ++i) 75 | if (e[i].valid()) 76 | ++ nvalid, e[i].inc_predict(&nk, &np, total_task); 77 | return nk /= nvalid; 78 | } 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /metis/profile.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include 16 | #include 17 | #include "profile.hh" 18 | #include "bench.hh" 19 | #include "ibs.hh" 20 | #include "mr-types.hh" 21 | #include "threadinfo.hh" 22 | 23 | #ifdef PROFILE_ENABLED 24 | enum { profile_app = 1 }; 25 | enum { profile_phases = 1 }; 26 | enum { profile_kcmp = 0 }; 27 | enum { profile_worker = 1 }; 28 | /* Make sure the pmcs are programmed before enabling pmc */ 29 | enum { pmc_enabled = 0 }; 30 | 31 | enum { pmc0, pmc1, pmc2, pmc3, ibslat, ibscnt, tsc, app_tsc, app_kcmp, 32 | app_pmc1, statcnt }; 33 | 34 | #define stringify(name) #name 35 | 36 | static const char *cname[] = { 37 | stringify(pmc0), 38 | stringify(pmc1), 39 | stringify(pmc2), 40 | stringify(pmc3), 41 | stringify(ibslat), 42 | stringify(ibscnt), 43 | stringify(tsc), 44 | stringify(app_tsc), 45 | stringify(app_kcmp), 46 | stringify(app_pmc1), 47 | }; 48 | 49 | static inline uint64_t __read_pmc(int ecx) { 50 | if (pmc_enabled) 51 | return read_pmc(ecx); 52 | else 53 | return 0; 54 | } 55 | 56 | struct __attribute__ ((aligned(JOS_CLINE))) percore_stat { 57 | uint64_t v[MR_PHASES][statcnt]; 58 | rusage ru_; 59 | 60 | void enterkcmp() { 61 | ++v[cp_][app_kcmp]; 62 | } 63 | void leavekcmp() { 64 | } 65 | void enterapp() { 66 | last_[app_tsc] = read_tsc(); 67 | last_[app_pmc1] = __read_pmc(1); 68 | } 69 | void leaveapp() { 70 | v[cp_][app_tsc] += read_tsc() - last_[app_tsc]; 71 | v[cp_][app_pmc1] += __read_pmc(1) - last_[app_pmc1]; 72 | } 73 | void worker_start(int phase, int cid) { 74 | cp_ = phase; 75 | v[cp_][app_tsc] = 0; 76 | v[cp_][app_kcmp] = 0; 77 | v[cp_][app_pmc1] = 0; 78 | for (int i = 0; i < 4; ++i) 79 | last_[i] = __read_pmc(i); 80 | 81 | ibs_start(cid); 82 | last_[ibscnt] = ibs_read_count(cid); 83 | last_[ibslat] = ibs_read_latency(cid); 84 | last_[tsc] = read_tsc(); 85 | } 86 | void worker_end(int phase, int cid) { 87 | assert(phase == cp_); 88 | for (int i = 0; i < 4; ++i) 89 | v[cp_][i] = __read_pmc(i) - last_[i]; 90 | 91 | ibs_stop(cid); 92 | v[cp_][ibslat] = ibs_read_latency(cid) - last_[ibslat]; 93 | v[cp_][ibscnt] = ibs_read_count(cid) - last_[ibscnt]; 94 | v[cp_][tsc] = read_tsc() - last_[tsc]; 95 | } 96 | void sum(uint64_t &app_tsc, uint64_t &kcmp) { 97 | app_tsc = 0; 98 | kcmp = 0; 99 | for (int i = 0; i < MR_PHASES; ++i) { 100 | app_tsc += v[i][app_tsc]; 101 | kcmp += v[i][app_kcmp]; 102 | } 103 | } 104 | 105 | private: 106 | int cp_; // current phase 107 | uint64_t last_[statcnt]; 108 | }; 109 | 110 | static percore_stat stats[JOS_NCPU]; 111 | 112 | void prof_enterkcmp() { 113 | if (profile_app) { 114 | threadinfo *ti = threadinfo::current(); 115 | stats[ti->cur_core_].enterkcmp(); 116 | } 117 | } 118 | 119 | void prof_leavekcmp() { 120 | if (profile_app) { 121 | threadinfo *ti = threadinfo::current(); 122 | stats[ti->cur_core_].leavekcmp(); 123 | } 124 | } 125 | 126 | void prof_enterapp() { 127 | if (profile_app) { 128 | threadinfo *ti = threadinfo::current(); 129 | stats[ti->cur_core_].enterapp(); 130 | } 131 | } 132 | 133 | void prof_leaveapp() { 134 | if (profile_app) { 135 | threadinfo *ti = threadinfo::current(); 136 | stats[ti->cur_core_].leaveapp(); 137 | } 138 | } 139 | 140 | void prof_worker_start(int phase, int cid) { 141 | stats[cid].worker_start(phase, cid); 142 | } 143 | 144 | void prof_worker_end(int phase, int cid) { 145 | stats[cid].worker_end(phase, cid); 146 | } 147 | 148 | static void prof_print_phase(int phase, int ncores, uint64_t scale) { 149 | uint64_t tots[statcnt]; 150 | memset(tots, 0, sizeof(tots)); 151 | printf("core\t"); 152 | #define WIDTH "10" 153 | for (int i = 0; i < statcnt; ++i) 154 | printf("%" WIDTH "s", cname[i]); 155 | printf("\n"); 156 | for (int i = 0; i < ncores; ++i) { 157 | printf("%d\t", i); 158 | for (int j = 0; j < statcnt; ++j) { 159 | printf("%" WIDTH "ld", stats[i].v[phase][j] / scale); 160 | tots[j] += stats[i].v[phase][j] / scale; 161 | } 162 | printf("\n"); 163 | } 164 | printf("total@%d\t", phase); 165 | for (int i = 0; i < statcnt; ++i) 166 | printf("%" WIDTH "ld", tots[i]); 167 | printf("\n"); 168 | printf("total[ibslat] / total[ibscnt] = %ld, total[pmc0] / total[pmc] = %4.2f\n", 169 | tots[ibslat] / (tots[ibscnt] + 1), 170 | (double) tots[pmc0] / (double) tots[pmc1]); 171 | } 172 | 173 | void prof_print(int ncores) { 174 | if (profile_kcmp) { 175 | uint64_t tt = 0; 176 | uint64_t tkcmp = 0; 177 | for (int i = 0; i < ncores; ++i) { 178 | uint64_t app_tsc, kcmp; 179 | std::cout << i << "\t" << cycle_to_ms(app_tsc) << "ms, kcmp " 180 | << kcmp << std::endl; 181 | tt += app_tsc; 182 | tkcmp += kcmp; 183 | } 184 | std::cout << "Average time spent in application is " << cycle_to_ms(tt) 185 | << ", total key_compare " << tkcmp << std::endl; 186 | } 187 | if (profile_worker) { 188 | uint64_t scale = 1000; 189 | printf("MAP[scale=%ld]\n", scale); 190 | prof_print_phase(MAP, ncores, scale); 191 | printf("REDUCE[scale=%ld]\n", scale); 192 | prof_print_phase(REDUCE, ncores, scale); 193 | printf("MERGE[scale=%ld]\n", scale); 194 | prof_print_phase(MERGE, ncores, scale); 195 | } 196 | } 197 | 198 | void prof_phase_init() { 199 | if (!profile_phases) 200 | return; 201 | threadinfo *ti = threadinfo::current(); 202 | assert(getrusage(RUSAGE_SELF, &stats[ti->cur_core_].ru_) == 0); 203 | } 204 | 205 | void prof_phase_end() { 206 | if (!profile_phases) 207 | return; 208 | rusage ru; 209 | assert(getrusage(RUSAGE_SELF, &ru) == 0); 210 | threadinfo *ti = threadinfo::current(); 211 | percore_stat *st = &stats[ti->cur_core_]; 212 | printf("time(ms) user: %ld, system: %ld\n", 213 | tv2ms(ru.ru_utime) - tv2ms(st->ru_.ru_utime), 214 | tv2ms(ru.ru_stime) - tv2ms(st->ru_.ru_stime)); 215 | } 216 | 217 | #endif 218 | -------------------------------------------------------------------------------- /metis/profile.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef PROFILE_HH_ 16 | #define PROFILE_HH_ 1 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #ifdef PROFILE_ENABLED 23 | void prof_enterapp(); 24 | void prof_leaveapp(); 25 | 26 | void prof_enterkcmp(); 27 | void prof_leavekcmp(); 28 | 29 | void prof_worker_start(int phase, int cid); 30 | void prof_worker_end(int phase, int cid); 31 | void prof_print(int ncores); 32 | 33 | void prof_phase_init(); 34 | void prof_phase_end(); 35 | 36 | #else 37 | 38 | #define prof_enterapp() 39 | #define prof_leaveapp() 40 | 41 | #define prof_enterkcmp() 42 | #define prof_leavekcmp() 43 | 44 | #define prof_phase_init() 45 | #define prof_phase_end() 46 | 47 | #define prof_worker_start(phase, cid) 48 | #define prof_worker_end(phase, cid) 49 | #define prof_print(ncpus) 50 | #endif 51 | #endif 52 | -------------------------------------------------------------------------------- /metis/pthreadpool.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "thread.hh" 16 | #include "mr-types.hh" 17 | #include "bench.hh" 18 | #include "cpumap.hh" 19 | #include "threadinfo.hh" 20 | #include 21 | #include 22 | 23 | struct __attribute__ ((aligned(JOS_CLINE))) athread_type { 24 | void *volatile a_; 25 | void *(*volatile f_) (void *); 26 | volatile char pending_; 27 | pthread_t tid_; 28 | volatile bool running_; 29 | 30 | template 31 | void set_task(void *arg, T &f) { 32 | a_ = arg; 33 | f_ = f; 34 | mfence(); 35 | pending_ = true; 36 | } 37 | 38 | void wait_finish() { 39 | while (running_) 40 | nop_pause(); 41 | } 42 | 43 | void wait_running() { 44 | while (pending_) 45 | nop_pause(); 46 | } 47 | void run_next_task() { 48 | while (!pending_) 49 | nop_pause(); 50 | running_ = true; 51 | pending_ = false; 52 | f_(a_); 53 | running_ = false; 54 | } 55 | }; 56 | 57 | namespace { 58 | 59 | athread_type tp_[JOS_NCPU]; 60 | bool tp_created_ = false; 61 | int ncore_ = 0; 62 | 63 | void *mthread_exit(void *) { 64 | pthread_exit(NULL); 65 | } 66 | 67 | void *mthread_entry(void *args) { 68 | threadinfo *ti = threadinfo::current(); 69 | ti->cur_core_ = ptr2int(args); 70 | assert(affinity_set(cpumap_physical_cpuid(ti->cur_core_)) == 0); 71 | while (true) 72 | tp_[ti->cur_core_].run_next_task(); 73 | } 74 | 75 | } 76 | 77 | void mthread_create(pthread_t * tid, int lid, void *(*start_routine) (void *), 78 | void *arg) { 79 | assert(tp_created_); 80 | if (lid == main_core) 81 | start_routine(arg); 82 | else { 83 | tp_[lid].wait_finish(); 84 | tp_[lid].set_task(arg, start_routine); 85 | tp_[lid].wait_running(); 86 | } 87 | } 88 | 89 | void mthread_join(pthread_t tid, int lid, void **retval) { 90 | tp_[lid].wait_finish(); 91 | if (retval) 92 | *retval = 0; 93 | } 94 | 95 | void mthread_init(int ncore) { 96 | if (tp_created_) 97 | return; 98 | threadinfo *ti = threadinfo::current(); 99 | cpumap_init(); 100 | ncore_ = ncore; 101 | ti->cur_core_ = main_core; 102 | assert(affinity_set(cpumap_physical_cpuid(main_core)) == 0); 103 | tp_created_ = true; 104 | bzero(tp_, sizeof(tp_)); 105 | for (int i = 0; i < ncore_; ++i) 106 | if (i == main_core) 107 | tp_[i].tid_ = pthread_self(); 108 | else 109 | assert(pthread_create(&tp_[i].tid_, NULL, mthread_entry, int2ptr(i)) == 0); 110 | } 111 | 112 | void mthread_finalize(void) { 113 | if (!tp_created_) 114 | return; 115 | for (int i = 0; i < ncore_; ++i) 116 | if (i != main_core) 117 | mthread_create(NULL, i, mthread_exit, NULL); 118 | for (int i = 0; i < ncore_; ++i) 119 | if (i != main_core) 120 | pthread_join(tp_[i].tid_, NULL); 121 | tp_created_ = false; 122 | } 123 | -------------------------------------------------------------------------------- /metis/reduce_bucket_manager.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef REDUCE_BUCKET_MANAGER_HH_ 16 | #define REDUCE_BUCKET_MANAGER_HH_ 1 17 | 18 | #include "mr-types.hh" 19 | #include "psrs.hh" 20 | #include "appbase.hh" 21 | #include "threadinfo.hh" 22 | 23 | struct reduce_bucket_manager_base { 24 | virtual ~reduce_bucket_manager_base() {} 25 | virtual void init(int n) = 0; 26 | virtual void reset() = 0; 27 | virtual void trim(size_t n) = 0; 28 | virtual size_t size() = 0; 29 | virtual void set_current_reduce_task(int i) = 0; 30 | virtual void merge_reduced_buckets(int ncpus, int lcpu) = 0; 31 | //virtual int rb0_size() = 0; 32 | virtual bool get_init() const = 0; 33 | }; 34 | 35 | template 36 | struct reduce_bucket_manager : public reduce_bucket_manager_base { 37 | void init(int n) { 38 | rb_.resize(n); 39 | for (int i = 0; i < n; ++i) 40 | rb_[i].init(); 41 | set_current_reduce_task(0); 42 | _init = true; 43 | } 44 | void reset() { 45 | rb_.resize(0); 46 | } 47 | void trim(size_t n) { 48 | rb_.trim(n); 49 | } 50 | size_t size() { 51 | return rb_.size(); 52 | } 53 | typedef xarray C; 54 | xarray *get(int p) { 55 | return &rb_[p]; 56 | } 57 | void emit(const T &p) { 58 | rb_[current_task()].push_back(p); 59 | } 60 | void set_current_reduce_task(int ir) { 61 | assert(size_t(ir) < rb_.size()); 62 | threadinfo::current()->cur_reduce_task_ = ir; 63 | } 64 | /** @brief: merge the output buckets of reduce phase, i.e. the final output. 65 | For psrs, the result is stored in rb_[0]; for mergesort, the result are 66 | spread in rb[0..(ncpus - 1)]. */ 67 | void merge_reduced_buckets(int ncpus, int lcpu) { 68 | C *out = NULL; 69 | const int use_psrs = USE_PSRS; 70 | if (!use_psrs) { 71 | out = mergesort(rb_, ncpus, lcpu, 72 | static_appbase::final_output_pair_comp); 73 | shallow_free_subarray(rb_, lcpu, ncpus); 74 | } else { 75 | // only main cpu has output 76 | if (lcpu == main_core) 77 | out = pi_.init(lcpu, sum_subarray(rb_)); 78 | assert(out || lcpu != main_core); 79 | C *myshare = pi_.do_psrs(rb_, ncpus, lcpu, 80 | static_appbase::final_output_pair_comp); 81 | myshare->init(); 82 | delete myshare; 83 | // Let one CPU free the input buckets 84 | if (lcpu == main_core) 85 | shallow_free_subarray(rb_); 86 | } 87 | if (out) { 88 | rb_[lcpu].swap(*out); 89 | delete out; 90 | } 91 | } 92 | void transfer(int p, C *dst) { 93 | assert(dst->size() == 0); 94 | get(p)->swap(*dst); 95 | } 96 | /*int rb0_size() 97 | { 98 | return rb_[0].size(); 99 | }*/ 100 | bool get_init() const 101 | { 102 | return _init; 103 | } 104 | private: 105 | int current_task() { 106 | return threadinfo::current()->cur_reduce_task_; 107 | } 108 | xarray rb_; // reduce buckets 109 | psrs pi_; 110 | bool _init = false; 111 | }; 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /metis/test_util.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef TEST_UTIL_HH_ 16 | #define TEST_UTIL_HH_ 1 17 | 18 | #include 19 | #include 20 | 21 | template 22 | inline void CHECK_EQ(const T1 &expected, const T2 &actual) { 23 | if (expected != actual) { 24 | std::cerr << "\tActual: " << actual 25 | << "\n\tExpected: " << expected << std::endl; 26 | assert(0); 27 | } 28 | } 29 | 30 | template 31 | inline void CHECK_GT(const T1 &actual, const T2 &comp) { 32 | if (actual <= comp) { 33 | std::cerr << "\tActual: " << actual 34 | << "\n\tExpected: > " << comp << std::endl; 35 | assert(0); 36 | } 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /metis/thread.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef THREAD_HH_ 16 | #define THREAD_HH_ 1 17 | 18 | #include 19 | #include 20 | 21 | void mthread_init(int ncore); 22 | void mthread_finalize(void); 23 | void mthread_create(pthread_t * tid, int lid, 24 | void *(*start_routine) (void *), void *arg); 25 | void mthread_join(pthread_t tid, int lid, void **exitcode); 26 | #endif 27 | -------------------------------------------------------------------------------- /metis/threadinfo.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "threadinfo.hh" 16 | 17 | bool threadinfo::created_ = false; 18 | pthread_key_t threadinfo::key_; 19 | 20 | -------------------------------------------------------------------------------- /metis/threadinfo.hh: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #ifndef THREADINFO_HH_ 16 | #define THREADINFO_HH_ 1 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | struct threadinfo { 23 | static threadinfo *current() { 24 | threadinfo *ti = (threadinfo *)pthread_getspecific(key_); 25 | if (!ti) { 26 | ti = (threadinfo *)malloc(sizeof(threadinfo)); 27 | pthread_setspecific(key_, ti); 28 | } 29 | return ti; 30 | } 31 | static void initialize() { 32 | assert(pthread_key_create(&key_, free) == 0); 33 | created_ = true; 34 | } 35 | static bool initialized() { 36 | return created_; 37 | } 38 | 39 | int cur_reduce_task_; 40 | int cur_core_; 41 | private: 42 | static bool created_; 43 | static pthread_key_t key_; 44 | }; 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /micro/btree_unit.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "btree.hh" 16 | #include "application.hh" 17 | #include "test_util.hh" 18 | #include 19 | #include 20 | using namespace std; 21 | 22 | struct mock_app : public map_only { 23 | int key_compare(const void *k1, const void *k2) { 24 | int64_t i1 = int64_t(k1); 25 | int64_t i2 = int64_t(k2); 26 | return i1 - i2; 27 | } 28 | 29 | bool split(split_t *ma, int ncore) { 30 | assert(0); 31 | } 32 | void map_function(split_t *ma) { 33 | assert(0); 34 | } 35 | }; 36 | 37 | typedef btree_param btree_param_type; 39 | typedef btree_type this_btree; 40 | 41 | void check_tree(this_btree &bt) { 42 | int64_t i = 1; 43 | auto it = bt.begin(); 44 | while (it != bt.end()) { 45 | CHECK_EQ(i, int64_t(it->key_)); 46 | CHECK_EQ(1, int64_t(it->size())); 47 | CHECK_EQ(i + 1, int64_t((*it)[0])); 48 | ++it; 49 | ++i; 50 | } 51 | assert(size_t(i) == (bt.size() + 1)); 52 | } 53 | 54 | void check_tree_copy(this_btree &bt) { 55 | xarray dst; 56 | bt.copy(&dst); 57 | for (int64_t i = 1; i <= int64_t(bt.size()); ++i) { 58 | CHECK_EQ(i, int64_t(dst[i - 1].key_)); 59 | CHECK_EQ(1, int64_t(dst[i - 1].size())); 60 | CHECK_EQ(i + 1, int64_t(dst[i - 1][0])); 61 | ++i; 62 | } 63 | } 64 | 65 | void check_tree_copy_and_free(this_btree &bt) { 66 | xarray dst; 67 | bt.copy(&dst); 68 | bt.shallow_free(); 69 | for (int64_t i = 1; i <= int64_t(bt.size()); ++i) { 70 | CHECK_EQ(i, int64_t(dst[i - 1].key_)); 71 | CHECK_EQ(1, int64_t(dst[i - 1].size())); 72 | CHECK_EQ(i + 1, int64_t(dst[i - 1][0])); 73 | ++i; 74 | } 75 | } 76 | 77 | void test1() { 78 | this_btree bt; 79 | bt.init(); 80 | check_tree(bt); 81 | check_tree_copy(bt); 82 | for (int64_t i = 1; i < 1000; ++i) { 83 | bt.map_insert_sorted_copy_on_new((void *)i, (void *)(i + 1), 4, 0); 84 | check_tree(bt); 85 | check_tree_copy(bt); 86 | } 87 | assert(bt.size() == 999); 88 | } 89 | 90 | void test2() { 91 | this_btree bt; 92 | bt.init(); 93 | check_tree(bt); 94 | check_tree_copy(bt); 95 | for (int64_t i = 1; i < 1000; ++i) { 96 | keyvals_t kvs; 97 | kvs.key_ = (void *)i; 98 | kvs.push_back((void *) (i + 1)); 99 | bt.map_insert_sorted_new_and_raw(&kvs); 100 | kvs.init(); 101 | check_tree(bt); 102 | check_tree_copy(bt); 103 | CHECK_EQ(size_t(i), bt.size()); 104 | } 105 | assert(bt.size() == 999); 106 | check_tree_copy_and_free(bt); 107 | } 108 | 109 | int main(int argc, char *argv[]) { 110 | mock_app app; 111 | static_appbase::set_app(&app); 112 | test1(); 113 | test2(); 114 | cerr << "PASS" << endl; 115 | return 0; 116 | } 117 | -------------------------------------------------------------------------------- /micro/misc.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "bench.hh" 16 | #include "test_util.hh" 17 | #include 18 | 19 | int main(int argc, char *argv[]) { 20 | uint64_t f = get_cpu_freq(); 21 | std::cout << f << std::endl; 22 | CHECK_GT(f, uint64_t(0)); 23 | std::cout << "PASS" << std::endl; 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /micro/search_unit.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include "bsearch.hh" 16 | #include 17 | #include 18 | #include 19 | 20 | static int compare(const int *a, const int *b) { 21 | return *a - *b; 22 | } 23 | 24 | int main() { 25 | int x[] = {7, 8, 9, 10}; 26 | bool found; 27 | 28 | int key = 11; 29 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 4 && !found); 30 | 31 | key = 5; 32 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 0 && !found); 33 | 34 | key = 7; 35 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 0 && found); 36 | 37 | key = 10; 38 | assert(xsearch::lower_bound(&key, x, 4, compare, &found) == 3 && found); 39 | 40 | int y[] = {7, 8, 9, 10, 11}; 41 | key = 12; 42 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 5 && !found); 43 | 44 | key = 5; 45 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 0 && !found); 46 | 47 | key = 7; 48 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 0 && found); 49 | 50 | key = 10; 51 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 3 && found); 52 | 53 | key = 11; 54 | assert(xsearch::lower_bound(&key, y, 5, compare, &found) == 4 && found); 55 | 56 | printf("PASS\n"); 57 | } 58 | -------------------------------------------------------------------------------- /micro/sf_sample.cc: -------------------------------------------------------------------------------- 1 | /* Metis 2 | * Yandong Mao, Robert Morris, Frans Kaashoek 3 | * Copyright (c) 2012 Massachusetts Institute of Technology 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to 7 | * deal in the Software without restriction, subject to the conditions listed 8 | * in the Metis LICENSE file. These conditions include: you must preserve this 9 | * copyright notice, and you cannot mention the copyright holders in 10 | * advertising related to the Software without their permission. The Software 11 | * is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This notice is a 12 | * summary of the Metis LICENSE file; the license in that file is legally 13 | * binding. 14 | */ 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "bench.hh" 24 | 25 | struct gstate_type { 26 | volatile int start; 27 | union { 28 | struct { 29 | volatile int ready; 30 | volatile uint64_t cycles; 31 | } v; 32 | char __pad[JOS_CLINE]; 33 | } state[JOS_NCPU] __attribute__ ((aligned(JOS_CLINE))); 34 | }; 35 | 36 | static gstate_type *gstate; 37 | 38 | static uint64_t ncores; 39 | 40 | enum { nmallocs = 1000000 }; 41 | 42 | void * 43 | worker(void *arg) 44 | { 45 | int c = ptr2int(arg); 46 | affinity_set(c); 47 | if (c) { 48 | gstate->state[c].v.ready = 1; 49 | while (!gstate->start) ; 50 | } else { 51 | for (uint64_t i = 1; i < ncores; i++) { 52 | while (!gstate->state[i].v.ready) ; 53 | gstate->state[i].v.ready = 0; 54 | } 55 | gstate->start = 1; 56 | } 57 | uint64_t start = read_tsc(); 58 | for (uint64_t i = 0; i < nmallocs; i++) { 59 | void *p = malloc(100); 60 | (void) p; 61 | } 62 | uint64_t end = read_tsc(); 63 | gstate->state[c].v.cycles = end - start; 64 | gstate->state[c].v.ready = 1; 65 | if (!c) { 66 | for (uint64_t i = 1; i < ncores; i++) 67 | while (!gstate->state[i].v.ready) ; 68 | uint64_t ncycles = 0; 69 | for (uint64_t i = 0; i < ncores; i++) 70 | ncycles += gstate->state[i].v.cycles; 71 | printf("Cycles per malloc: %ld\n", ncycles / nmallocs); 72 | } 73 | return NULL; 74 | } 75 | 76 | int 77 | main(int argc, char **argv) 78 | { 79 | affinity_set(0); 80 | if (argc < 2) { 81 | printf("Usage: <%s> number-cores\n", argv[0]); 82 | exit(EXIT_FAILURE); 83 | } 84 | ncores = atoi(argv[1]); 85 | assert(ncores <= JOS_NCPU); 86 | gstate = (gstate_type *) 87 | mmap(NULL, sizeof(gstate_type), PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 88 | -1, 0); 89 | memset(gstate, 0, sizeof(gstate_type)); 90 | if (gstate == MAP_FAILED) { 91 | printf("mmap error: %d\n", errno); 92 | exit(EXIT_FAILURE); 93 | } 94 | for (uint64_t i = 1; i < ncores; i++) { 95 | pthread_t tid; 96 | pthread_create(&tid, NULL, worker, int2ptr(i)); 97 | } 98 | uint64_t start = read_tsc(); 99 | worker(int2ptr(0)); 100 | uint64_t end = read_tsc(); 101 | printf("Total time %ld million cycles\n", (end - start) / 1000000); 102 | munmap(gstate, sizeof(*gstate)); 103 | } 104 | -------------------------------------------------------------------------------- /miw/Makefile.am: -------------------------------------------------------------------------------- 1 | protoc_inputs = log_definition.proto 2 | protoc_outputs = log_definition.pb.cc log_definition.pb.h 3 | $(protoc_outputs): $(protoc_inputs) 4 | protoc -I$(srcdir) --cpp_out=. $< 5 | 6 | miwdir = $(libdir)/miw 7 | 8 | MAXCPUS = $(shell grep -c processor /proc/cpuinfo) 9 | AM_CXXFLAGS=-Wall -g -pipe -std=c++11 -fpermissive -fopenmp -O2 \ 10 | -I../metis \ 11 | -fno-omit-frame-pointer -D_GNU_SOURCE -include ../config.h \ 12 | -DJTLS=__thread -DJSHARED_ATTR= \ 13 | -DJOS_CLINE=64 -DCACHE_LINE_SIZE=64 \ 14 | -DJOS_NCPU=$(MAXCPUS) -D__STDC_FORMAT_MACROS 15 | AM_CPPFLAGS=`pkg-config --cflags protobuf` 16 | miw_LTLIBRARIES=libmiw.la 17 | libmiw_la_SOURCES=log_format.cc log_format.h \ 18 | log_record.cc log_record.h mr_job.cc mr_job.h job.cc job.h str_utils.h 19 | nodist_libmiw_la_SOURCES=$(protoc_outputs) 20 | 21 | clean-local: 22 | rm -f log_definition.pb.h log_definition.pb.cc 23 | 24 | BUILT_SOURCES = $(protoc_outputs) 25 | 26 | EXTRA_DIST = $(protoc_inputs) 27 | 28 | SUBDIRS = . formats 29 | -------------------------------------------------------------------------------- /miw/formats/Makefile.am: -------------------------------------------------------------------------------- 1 | #AM_CXXFLAGS=-Wall -g -pipe -std=c++11 -fpermissive 2 | #AM_CPPFLAGS=-I${srcdir}/../ `pkg-config --cflags protobuf` 3 | #AM_LDFLAGS=-L$(top_builddir)/miw/ `pkg-config --libs protobuf` 4 | #LDADD = -lmiw 5 | 6 | #bin_PROGRAMS=generate_domain_controller generate_anon_bluecoat 7 | 8 | #generate_domain_controller_SOURCES=generate_domain_controller.cc 9 | #generate_anon_bluecoat_SOURCES=generate_anon_bluecoat.cc 10 | 11 | protoc_inputs = ../log_definition.proto 12 | protoc_outputs = ../log_definition_pb2.py 13 | $(protoc_outputs): $(protoc_inputs) 14 | protoc --proto_path ../ -I$(srcdir) --python_out=. $< 15 | 16 | all: $(protoc_outputs) 17 | -------------------------------------------------------------------------------- /miw/formats/McAfee.fmt: -------------------------------------------------------------------------------- 1 | 2 | paloalto, 3 | Eventstring  4 | 5 | IP Addressstring  6 | Computer Namestring  7 | Sourcestring 8 | Risk Namestring  9 | Occurrencesstring 10 | File Pathstring  11 | Descriptionstring 12 | Actual Actionstring 13 | Requested Actionstring 14 | Secondary Actionstring/ 15 | 16 | Event Datedate :minuter%m/%d/%Y %H:%M:%S 17 | Event Insert Timestring 18 | Domainstring 19 | User Namestring 20 | Serverstring 21 | Client Groupstring 22 | Source Computer Namestring 23 | Source Computer IPstring 24 | Application Namestring 25 | Application Hashstring 26 | Hash Algorithmstring 27 | Companystring 28 | Versionstring 29 | File Sizestring 30 | Detection Reasonstring# 31 | Minimum Sensitivity Levelstring& 32 | Permitted Application Reasonstring 33 | 34 | Web Domainstring 35 | Download sitestring 36 | Downloaded bystring 37 | 38 | Prevalencestring 39 | 40 | Reputationstring 41 | 42 | First Seenstring 43 | URL Tracking Statusstring 44 | Event End Datestring 45 | Timestampstring 46 | Operating Systemstring 47 | Deletedstring -------------------------------------------------------------------------------- /miw/formats/McAfee.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"McAfee", 3 | "delims":",", 4 | "fields":[ 5 | { 6 | "name":"Event", 7 | "key":true, 8 | "type":"string" 9 | }, 10 | { 11 | "name":"IP Address", 12 | "key":true, 13 | "type":"string" 14 | }, 15 | { 16 | "name":"Computer Name", 17 | "key":true, 18 | "type":"string" 19 | }, 20 | { 21 | "name":"Source", 22 | "type":"string" 23 | }, 24 | { 25 | "name":"Risk Name", 26 | "key":true, 27 | "type":"string" 28 | }, 29 | { 30 | "name":"Occurrences", 31 | "type":"string" 32 | }, 33 | { 34 | "name":"File Path", 35 | "key":true, 36 | "type":"string" 37 | }, 38 | { 39 | "name":"Description", 40 | "type":"string" 41 | }, 42 | { 43 | "name":"Actual Action", 44 | "type":"string" 45 | }, 46 | { 47 | "name":"Requested Action", 48 | "type":"string" 49 | }, 50 | { 51 | "name":"Secondary Action", 52 | "type":"string" 53 | }, 54 | { 55 | "name":"Event Date", 56 | "type":"date", 57 | "date_format":"%m/%d/%Y %H:%M:%S", 58 | "key":true, 59 | "processing":"minute" 60 | }, 61 | { 62 | "name":"Event Insert Time", 63 | "type":"string" 64 | }, 65 | { 66 | "name":"Domain", 67 | "type":"string" 68 | }, 69 | { 70 | "name":"User Name", 71 | "type":"string" 72 | }, 73 | { 74 | "name":"Server", 75 | "type":"string" 76 | }, 77 | { 78 | "name":"Client Group", 79 | "type":"string" 80 | }, 81 | { 82 | "name":"Source Computer Name", 83 | "type":"string" 84 | }, 85 | { 86 | "name":"Source Computer IP", 87 | "type":"string" 88 | }, 89 | { 90 | "name":"Application Name", 91 | "type":"string" 92 | }, 93 | { 94 | "name":"Application Hash", 95 | "type":"string" 96 | }, 97 | { 98 | "name":"Hash Algorithm", 99 | "type":"string" 100 | }, 101 | { 102 | "name":"Company", 103 | "type":"string" 104 | }, 105 | { 106 | "name":"Version", 107 | "type":"string" 108 | }, 109 | { 110 | "name":"File Size", 111 | "type":"string" 112 | }, 113 | { 114 | "name":"Detection Reason", 115 | "type":"string" 116 | }, 117 | { 118 | "name":"Minimum Sensitivity Level", 119 | "type":"string" 120 | }, 121 | { 122 | "name":"Permitted Application Reason", 123 | "type":"string" 124 | }, 125 | { 126 | "name":"Web Domain", 127 | "type":"string" 128 | }, 129 | { 130 | "name":"Download site", 131 | "type":"string" 132 | }, 133 | { 134 | "name":"Downloaded by", 135 | "type":"string" 136 | }, 137 | { 138 | "name":"Prevalence", 139 | "type":"string" 140 | }, 141 | { 142 | "name":"Reputation", 143 | "type":"string" 144 | }, 145 | { 146 | "name":"First Seen", 147 | "type":"string" 148 | }, 149 | { 150 | "name":"URL Tracking Status", 151 | "type":"string" 152 | }, 153 | { 154 | "name":"Event End Date", 155 | "type":"string" 156 | }, 157 | { 158 | "name":"Timestamp", 159 | "type":"string" 160 | }, 161 | { 162 | "name":"Operating System", 163 | "type":"string" 164 | }, 165 | { 166 | "name":"Deleted", 167 | "type":"string" 168 | } 169 | 170 | ] 171 | } 172 | -------------------------------------------------------------------------------- /miw/formats/MicrosoftDNSlogs.fmt: -------------------------------------------------------------------------------- 1 | 2 | dns  3 | datedate :dayr%d/%m/%Y 4 | timetime :minute 5 | protocol string 6 | sourcestring & 7 | targetstring jmicrosoftdnslogs -------------------------------------------------------------------------------- /miw/formats/MicrosoftDNSlogs.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"dns", 3 | "delims":" ", 4 | "fields":[ 5 | { 6 | "name":"date", 7 | "type":"date", 8 | "pos":"0", 9 | "date_format":"%d/%m/%Y", 10 | "key":true, 11 | "processing":"day" 12 | }, 13 | { 14 | "name":"time", 15 | "type":"time", 16 | "pos":"1", 17 | "key":true, 18 | "processing":"minute" 19 | }, 20 | { 21 | "name":"protocol", 22 | "type":"string", 23 | "pos":"6" 24 | }, 25 | { 26 | "name":"source", 27 | "type":"string", 28 | "pos":"8", 29 | "key":true 30 | }, 31 | { 32 | "name":"target", 33 | "type":"string", 34 | "pos":"8", 35 | "preprocessing":"microsoftdnslogs", 36 | "key":true 37 | } 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /miw/formats/anon_bluecoat_format.fmt: -------------------------------------------------------------------------------- 1 | 2 | anon_bluecoat  3 | username string ( 4 | 5 | ip_addressstring ( -------------------------------------------------------------------------------- /miw/formats/anon_bluecoat_format.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"anon_bluecoat", 3 | "delims":" ", 4 | "fields":[ 5 | { 6 | "name":"username", 7 | "pos":9, 8 | "type":"string", 9 | "key":true, 10 | }, 11 | { 12 | "name":"ip_address", 13 | "pos":4, 14 | "type":"string", 15 | "key":false, 16 | "aggregated":true, 17 | "aggregation":"union" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /miw/formats/domain_controller_format.fmt: -------------------------------------------------------------------------------- 1 | 2 | domain_controller,! 3 | datestring :dayr%Y-%m-%d 4 | timestring :hour 5 | Date/Timestring 6 | Workstationstring 7 | Hostnamestring( 8 | SourceAddress 9 | string(2 union_count 10 | Domain string 11 | DeviceHostNamestring 12 | EventDescriptionstring 13 | EventTypestring 14 | UserNamestring  15 | EventUserstring 16 | ReferenceIDstring 17 | LogonTypestring 18 | EventLogstring( 19 | DeviceAddressstring(2 union_count 20 | 21 | ResultCode string& 22 | EventCategoryName"string(2union 23 | MessageID$string 24 | Message&string -------------------------------------------------------------------------------- /miw/formats/domain_controller_format.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"domain_controller", 3 | "delims":",", 4 | "fields":[ 5 | { "name":"date", 6 | "pos":0, 7 | "type":"string", 8 | "date_format":"%Y-%m-%d", 9 | "key":true, 10 | "processing":"day" 11 | }, 12 | { 13 | "name":"time", 14 | "pos":1, 15 | "type":"string", 16 | "key": true, 17 | "processing":"hour" 18 | }, 19 | { 20 | "name":"Date/Time", 21 | "pos":2, 22 | "type":"string" 23 | }, 24 | { 25 | "name":"Workstation", 26 | "pos":3, 27 | "type":"string" 28 | }, 29 | { 30 | "name":"Hostname", 31 | "pos":4, 32 | "type":"string" 33 | }, 34 | { 35 | "name":"SourceAddress", 36 | "pos":5, 37 | "type":"string", 38 | "aggregated":true, 39 | "aggregation":"union_count" 40 | }, 41 | { 42 | "name":"Domain", 43 | "pos":6, 44 | "type":"string" 45 | }, 46 | { 47 | "name":"DeviceHostName", 48 | "pos":7, 49 | 50 | "type":"string" 51 | }, 52 | { 53 | "name":"EventDescription", 54 | "pos":8, 55 | "type":"string" 56 | }, 57 | { 58 | "name":"EventType", 59 | "pos":9, 60 | "type":"string" 61 | }, 62 | { 63 | "name":"UserName", 64 | "pos":10, 65 | "type":"string", 66 | "key":true 67 | }, 68 | { 69 | "name":"EventUser", 70 | "pos":11, 71 | "type":"string" 72 | }, 73 | { 74 | "name":"ReferenceID", 75 | "pos":12, 76 | "type":"string" 77 | }, 78 | { 79 | "name":"LogonType", 80 | "pos":13, 81 | "type":"string" 82 | }, 83 | { 84 | "name":"EventLog", 85 | "pos":14, 86 | "type":"string" 87 | }, 88 | { 89 | "name":"DeviceAddress", 90 | "pos":15, 91 | "type":"string", 92 | "aggregated":true, 93 | "aggregation":"union_count" 94 | }, 95 | { 96 | "name":"ResultCode", 97 | "pos":16, 98 | "type":"string" 99 | }, 100 | { 101 | "name":"EventCategoryName", 102 | "pos":17, 103 | "type":"string", 104 | "aggregated":true, 105 | "aggregation":"union" 106 | }, 107 | { 108 | "name":"MessageID", 109 | "pos":18, 110 | "type":"string" 111 | }, 112 | { 113 | "name":"Message", 114 | "pos":19, 115 | "type":"string" 116 | } 117 | ] 118 | } 119 | -------------------------------------------------------------------------------- /miw/formats/evtx.fmt: -------------------------------------------------------------------------------- 1 | 2 | evtx, 3 | datedate :dayr%m/%d/%Y 4 | timetime :minutex 5 | unknown3string 6 | unknown4string 7 | unknown5string 8 | Channelstring 9 | unknown7string 10 | unknown8string 11 | Computerstring  12 | Event_IDstring 13 | EventDatastringjevtxcsv 14 | unknown12int 15 | unknown13string 16 | unknown14int 17 | descriptionstring 18 | unknown16string 19 | unknown17string -------------------------------------------------------------------------------- /miw/formats/evtx.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"evtx", 3 | "delims":",", 4 | "fields":[ 5 | { 6 | "name":"date", 7 | "type":"date", 8 | "date_format":"%m/%d/%Y", 9 | "key":true, 10 | "processing":"day" 11 | }, 12 | { 13 | "name":"time", 14 | "type":"time", 15 | "key": true, 16 | "processing":"minute", 17 | "processing_offset":10 18 | }, 19 | { 20 | "name":"unknown3", 21 | "type":"string" 22 | }, 23 | { 24 | "name":"unknown4", 25 | "type":"string" 26 | }, 27 | { 28 | "name":"unknown5", 29 | "type":"string" 30 | }, 31 | { 32 | "name":"Channel", 33 | "type":"string" 34 | }, 35 | { 36 | "name":"unknown7", 37 | "type":"string" 38 | }, 39 | { 40 | "name":"unknown8", 41 | "type":"string" 42 | }, 43 | { 44 | "name":"Computer", 45 | "type":"string", 46 | "key":true 47 | }, 48 | { 49 | "name":"Event_ID", 50 | "type":"string" 51 | }, 52 | { 53 | "name":"EventData", 54 | "type":"string", 55 | "preprocessing":"evtxcsv" 56 | }, 57 | { 58 | "name":"unknown12", 59 | "type":"int" 60 | }, 61 | { 62 | "name":"unknown13", 63 | "type":"string" 64 | }, 65 | { 66 | "name":"unknown14", 67 | "type":"int" 68 | }, 69 | { 70 | "name":"description", 71 | "type":"string" 72 | }, 73 | { 74 | "name":"unknown16", 75 | "type":"string" 76 | }, 77 | { 78 | "name":"unknown17", 79 | "type":"string" 80 | } 81 | ] 82 | } 83 | -------------------------------------------------------------------------------- /miw/formats/evtx2.fmt: -------------------------------------------------------------------------------- 1 | 2 | evtx, 3 | datedate :dayr%m/%d/%Y 4 | timetime :hour 5 | timezonestring 6 | MACBstring 7 | sourcestring 8 | 9 | sourcetypestring 10 | typestring 11 | userstring 12 | hoststring  13 | shortstring 14 | descstringjevtxcsv2 15 | versionint 16 | filenamestring 17 | inodeint 18 | notesstring 19 | formatstring 20 | extrastring -------------------------------------------------------------------------------- /miw/formats/evtx2.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"evtx", 3 | "delims":",", 4 | "fields":[ 5 | { 6 | "name":"date", 7 | "type":"date", 8 | "key":true, 9 | "processing":"day", 10 | "date_format":"%m/%d/%Y" 11 | }, 12 | { 13 | "name":"time", 14 | "type":"time", 15 | "key": true, 16 | "processing":"hour" 17 | }, 18 | { 19 | "name":"timezone", 20 | "type":"string" 21 | }, 22 | { 23 | "name":"MACB", 24 | "type":"string" 25 | }, 26 | { 27 | "name":"source", 28 | "type":"string" 29 | }, 30 | { 31 | "name":"sourcetype", 32 | "type":"string" 33 | }, 34 | { 35 | "name":"type", 36 | "type":"string" 37 | }, 38 | { 39 | "name":"user", 40 | "type":"string" 41 | }, 42 | { 43 | "name":"host", 44 | "type":"string", 45 | "key":true 46 | }, 47 | { 48 | "name":"short", 49 | "type":"string" 50 | }, 51 | { 52 | "name":"desc", 53 | "type":"string", 54 | "preprocessing":"evtxcsv2" 55 | }, 56 | { 57 | "name":"version", 58 | "type":"int" 59 | }, 60 | { 61 | "name":"filename", 62 | "type":"string" 63 | }, 64 | { 65 | "name":"inode", 66 | "type":"int" 67 | }, 68 | { 69 | "name":"notes", 70 | "type":"string" 71 | }, 72 | { 73 | "name":"format", 74 | "type":"string" 75 | }, 76 | { 77 | "name":"extra", 78 | "type":"string" 79 | } 80 | ] 81 | } 82 | -------------------------------------------------------------------------------- /miw/formats/format_json2pb.py: -------------------------------------------------------------------------------- 1 | import os, sys, simplejson, log_definition_pb2 2 | from pprint import pprint 3 | import protobuf_json, protobuf_json_writer 4 | 5 | if len(sys.argv) < 3: 6 | print "Usage: " + sys.argv[0] + " " 7 | exit() 8 | 9 | f = open(sys.argv[1],'r') 10 | json_str = simplejson.loads(f.read()) 11 | f.close() 12 | msg_pb = log_definition_pb2.logdef() 13 | json_pb = protobuf_json.json2pb(msg_pb,json_str) 14 | 15 | #print protobuf_json_writer.proto2json(json_pb) 16 | 17 | fo = open(sys.argv[2],'w') 18 | fo.write(json_pb.SerializeToString()) 19 | fo.close() 20 | -------------------------------------------------------------------------------- /miw/formats/paloalto.fmt: -------------------------------------------------------------------------------- 1 | 2 | paloalto, 3 | Domainstring1 4 | Receive Timedate :minuter%Y/%m/%d %H:%M:%S 5 | Serial #string 6 | Typestring 7 | Threat/Content Typestring 8 | Config Versionstring 9 | Generate Timestring 10 | Source addressstring  11 | Destination addressstring  12 | NAT Source IPstring 13 | NAT Destination IPstring 14 | Rulestring 15 | Source Userstring 16 | Destination Userstring 17 | Applicationstring 18 | Virtual Systemstring 19 | Source Zonestring 20 | Destination Zonestring 21 | Inbound Interfacestring 22 | Outbound Interfacestring 23 | 24 | Log Actionstring 25 | Time Loggedstring 26 | 27 | Session IDstring 28 | Repeat Countstring 29 | Source Portstring  30 | Destination Portstring  31 | NAT Source Portstring 32 | NAT Destination Portstring 33 | Flagsstring 34 | IP Protocolstring  35 | Actionstring 36 | Bytesstring 37 | 38 | Bytes Sentstring 39 | Bytes Receivedstring 40 | Packetsstring 41 | 42 | Start Timestring 43 | Elapsed Time (sec)string 44 | Categorystring 45 | Paddingstring 46 | seqnostring 47 | actionflagsstring 48 | Source Countrystring 49 | Destination Countrystring 50 | cpaddingstring 51 | pkts_sentstring 52 | pkts_receivedstring -------------------------------------------------------------------------------- /miw/formats/paloalto.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"paloalto", 3 | "delims":",", 4 | "fields":[ 5 | { 6 | "name":"Domain", 7 | "type":"string" 8 | }, 9 | { 10 | "name":"Receive Time", 11 | "type":"date", 12 | "date_format":"%Y/%m/%d %H:%M:%S", 13 | "key":true, 14 | "processing":"minute" 15 | }, 16 | { 17 | "name":"Serial #", 18 | "type":"string" 19 | }, 20 | { 21 | "name":"Type", 22 | "type":"string" 23 | }, 24 | { 25 | "name":"Threat/Content Type", 26 | "type":"string" 27 | }, 28 | { 29 | "name":"Config Version", 30 | "type":"string" 31 | }, 32 | { 33 | "name":"Generate Time", 34 | "type":"string" 35 | }, 36 | { 37 | "name":"Source address", 38 | "key":true, 39 | "type":"string" 40 | }, 41 | { 42 | "name":"Destination address", 43 | "key":true, 44 | "type":"string" 45 | }, 46 | { 47 | "name":"NAT Source IP", 48 | "type":"string" 49 | }, 50 | { 51 | "name":"NAT Destination IP", 52 | "type":"string" 53 | }, 54 | { 55 | "name":"Rule", 56 | "type":"string" 57 | }, 58 | { 59 | "name":"Source User", 60 | "type":"string" 61 | }, 62 | { 63 | "name":"Destination User", 64 | "type":"string" 65 | }, 66 | { 67 | "name":"Application", 68 | "type":"string" 69 | }, 70 | { 71 | "name":"Virtual System", 72 | "type":"string" 73 | }, 74 | { 75 | "name":"Source Zone", 76 | "type":"string" 77 | }, 78 | { 79 | "name":"Destination Zone", 80 | "type":"string" 81 | }, 82 | { 83 | "name":"Inbound Interface", 84 | "type":"string" 85 | }, 86 | { 87 | "name":"Outbound Interface", 88 | "type":"string" 89 | }, 90 | { 91 | "name":"Log Action", 92 | "type":"string" 93 | }, 94 | { 95 | "name":"Time Logged", 96 | "type":"string" 97 | }, 98 | { 99 | "name":"Session ID", 100 | "type":"string" 101 | }, 102 | { 103 | "name":"Repeat Count", 104 | "type":"string" 105 | }, 106 | { 107 | "name":"Source Port", 108 | "key":true, 109 | "type":"string" 110 | }, 111 | { 112 | "name":"Destination Port", 113 | "key":true, 114 | "type":"string" 115 | }, 116 | { 117 | "name":"NAT Source Port", 118 | "type":"string" 119 | }, 120 | { 121 | "name":"NAT Destination Port", 122 | "type":"string" 123 | }, 124 | { 125 | "name":"Flags", 126 | "type":"string" 127 | }, 128 | { 129 | "name":"IP Protocol", 130 | "key":true, 131 | "key":true, 132 | "type":"string" 133 | }, 134 | { 135 | "name":"Action", 136 | "type":"string" 137 | }, 138 | { 139 | "name":"Bytes", 140 | "type":"string" 141 | }, 142 | { 143 | "name":"Bytes Sent", 144 | "type":"string" 145 | }, 146 | { 147 | "name":"Bytes Received", 148 | "type":"string" 149 | }, 150 | { 151 | "name":"Packets", 152 | "type":"string" 153 | }, 154 | { 155 | "name":"Start Time", 156 | "type":"string" 157 | }, 158 | { 159 | "name":"Elapsed Time (sec)", 160 | "type":"string" 161 | }, 162 | { 163 | "name":"Category", 164 | "type":"string" 165 | }, 166 | { 167 | "name":"Padding", 168 | "type":"string" 169 | }, 170 | { 171 | "name":"seqno", 172 | "type":"string" 173 | }, 174 | { 175 | "name":"actionflags", 176 | "type":"string" 177 | }, 178 | { 179 | "name":"Source Country", 180 | "type":"string" 181 | }, 182 | { 183 | "name":"Destination Country", 184 | "type":"string" 185 | }, 186 | { 187 | "name":"cpadding", 188 | "type":"string" 189 | }, 190 | { 191 | "name":"pkts_sent", 192 | "type":"string" 193 | }, 194 | { 195 | "name":"pkts_received", 196 | "type":"string" 197 | } 198 | ] 199 | } 200 | -------------------------------------------------------------------------------- /miw/formats/protobuf_json.py: -------------------------------------------------------------------------------- 1 | # JSON serialization support for Google's protobuf Messages 2 | # Copyright (c) 2009, Paul Dovbush 3 | # All rights reserved. 4 | # http://code.google.com/p/protobuf-json/ 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are 8 | # met: 9 | # 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following disclaimer 14 | # in the documentation and/or other materials provided with the 15 | # distribution. 16 | # * Neither the name of nor the names of its 17 | # contributors may be used to endorse or promote products derived from 18 | # this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | ''' 33 | Provide serialization and de-serialization of Google's protobuf Messages into/from JSON format. 34 | ''' 35 | 36 | # groups are deprecated and not supported; 37 | # Note that preservation of unknown fields is currently not available for Python (c) google docs 38 | # extensions is not supported from 0.0.5 (due to gpb2.3 changes) 39 | 40 | __version__='0.0.5' 41 | __author__='Paul Dovbush ' 42 | 43 | 44 | import json # py2.6+ TODO: add support for other JSON serialization modules 45 | from google.protobuf.descriptor import FieldDescriptor as FD 46 | 47 | 48 | class ParseError(Exception): pass 49 | 50 | 51 | def json2pb(pb, js): 52 | ''' convert JSON string to google.protobuf.descriptor instance ''' 53 | for field in pb.DESCRIPTOR.fields: 54 | if field.name not in js: 55 | continue 56 | if field.type == FD.TYPE_MESSAGE: 57 | pass 58 | elif field.type in _js2ftype: 59 | ftype = _js2ftype[field.type] 60 | else: 61 | raise ParseError("Field %s.%s of type '%d' is not supported" % (pb.__class__.__name__, field.name, field.type, )) 62 | value = js[field.name] 63 | if field.label == FD.LABEL_REPEATED: 64 | pb_value = getattr(pb, field.name, None) 65 | for v in value: 66 | if field.type == FD.TYPE_MESSAGE: 67 | json2pb(pb_value.add(), v) 68 | else: 69 | pb_value.append(ftype(v)) 70 | else: 71 | if field.type == FD.TYPE_MESSAGE: 72 | json2pb(getattr(pb, field.name, None), value) 73 | else: 74 | setattr(pb, field.name, ftype(value)) 75 | return pb 76 | 77 | 78 | 79 | def pb2json(pb): 80 | ''' convert google.protobuf.descriptor instance to JSON string ''' 81 | js = {} 82 | # fields = pb.DESCRIPTOR.fields #all fields 83 | fields = pb.ListFields() #only filled (including extensions) 84 | for field,value in fields: 85 | if field.type == FD.TYPE_MESSAGE: 86 | ftype = pb2json 87 | elif field.type in _ftype2js: 88 | ftype = _ftype2js[field.type] 89 | else: 90 | raise ParseError("Field %s.%s of type '%d' is not supported" % (pb.__class__.__name__, field.name, field.type, )) 91 | if field.label == FD.LABEL_REPEATED: 92 | js_value = [] 93 | for v in value: 94 | js_value.append(ftype(v)) 95 | else: 96 | js_value = ftype(value) 97 | js[field.name] = js_value 98 | return js 99 | 100 | 101 | _ftype2js = { 102 | FD.TYPE_DOUBLE: float, 103 | FD.TYPE_FLOAT: float, 104 | FD.TYPE_INT64: long, 105 | FD.TYPE_UINT64: long, 106 | FD.TYPE_INT32: int, 107 | FD.TYPE_FIXED64: float, 108 | FD.TYPE_FIXED32: float, 109 | FD.TYPE_BOOL: bool, 110 | FD.TYPE_STRING: unicode, 111 | #FD.TYPE_MESSAGE: pb2json, #handled specially 112 | FD.TYPE_BYTES: lambda x: x.encode('string_escape'), 113 | FD.TYPE_UINT32: int, 114 | FD.TYPE_ENUM: int, 115 | FD.TYPE_SFIXED32: float, 116 | FD.TYPE_SFIXED64: float, 117 | FD.TYPE_SINT32: int, 118 | FD.TYPE_SINT64: long, 119 | } 120 | 121 | _js2ftype = { 122 | FD.TYPE_DOUBLE: float, 123 | FD.TYPE_FLOAT: float, 124 | FD.TYPE_INT64: long, 125 | FD.TYPE_UINT64: long, 126 | FD.TYPE_INT32: int, 127 | FD.TYPE_FIXED64: float, 128 | FD.TYPE_FIXED32: float, 129 | FD.TYPE_BOOL: bool, 130 | FD.TYPE_STRING: unicode, 131 | # FD.TYPE_MESSAGE: json2pb, #handled specially 132 | FD.TYPE_BYTES: lambda x: x.decode('string_escape'), 133 | FD.TYPE_UINT32: int, 134 | FD.TYPE_ENUM: int, 135 | FD.TYPE_SFIXED32: float, 136 | FD.TYPE_SFIXED64: float, 137 | FD.TYPE_SINT32: int, 138 | FD.TYPE_SINT64: long, 139 | } 140 | 141 | -------------------------------------------------------------------------------- /miw/formats/protobuf_json_writer.py: -------------------------------------------------------------------------------- 1 | 2 | ''' 3 | Convert Google's protobuf Messages into commented JavaScript code. 4 | ''' 5 | 6 | # groups are deprecated and not supported; 7 | # Note that preservation of unknown fields is currently not available for Python (c) google docs 8 | # TODO: Support extensions 9 | 10 | __version__='0.0.1' 11 | __author__='Paul Dovbush ' 12 | 13 | 14 | from google.protobuf.descriptor import FieldDescriptor as _FieldDescriptor 15 | from google.protobuf.reflection import GeneratedProtocolMessageType as _GeneratedProtocolMessageType 16 | 17 | def _jsWriter_type_bytes(): 18 | return '""' 19 | _jsWriter_type_bytes.__name__='bytes' 20 | 21 | def _jsWriter_type_unicode(): 22 | return '""' 23 | _jsWriter_type_unicode.__name__='unicode' 24 | 25 | def _jsWriter_type_bool(): 26 | return 'false' 27 | _jsWriter_type_bool.__name__='bool' 28 | 29 | _jsWriter_types = { 30 | _FieldDescriptor.TYPE_DOUBLE: float, 31 | _FieldDescriptor.TYPE_FLOAT: float, 32 | _FieldDescriptor.TYPE_INT64: long, 33 | _FieldDescriptor.TYPE_UINT64: long, 34 | _FieldDescriptor.TYPE_INT32: int, 35 | _FieldDescriptor.TYPE_FIXED64: float, 36 | _FieldDescriptor.TYPE_FIXED32: float, 37 | _FieldDescriptor.TYPE_BOOL: _jsWriter_type_bool, 38 | _FieldDescriptor.TYPE_STRING: _jsWriter_type_unicode, 39 | # _FieldDescriptor.TYPE_MESSAGE: _msg2json, 40 | _FieldDescriptor.TYPE_BYTES: _jsWriter_type_bytes, 41 | _FieldDescriptor.TYPE_UINT32: int, 42 | _FieldDescriptor.TYPE_ENUM: int, 43 | _FieldDescriptor.TYPE_SFIXED32: float, 44 | _FieldDescriptor.TYPE_SFIXED64: float, 45 | _FieldDescriptor.TYPE_SINT32: int, 46 | _FieldDescriptor.TYPE_SINT64: long, 47 | } 48 | 49 | from StringIO import StringIO 50 | 51 | class jsWriter(StringIO): 52 | _level = 0 53 | def begin_msg(self, name): 54 | self.write('/* %s */ {\n' % name) 55 | self._level+=1 56 | def end_msg(self): 57 | self._level-=1 58 | self.seek(self.pos-2) 59 | self.write(self.print_indent+'\n}') 60 | @property 61 | def print_indent(self): 62 | s='' 63 | for i in range(0, self._level): 64 | s+='\t' 65 | return s 66 | def _indent_value(self, value): 67 | if type(value) in (str, unicode,): 68 | value=value.replace('\n','\n'+self.print_indent) 69 | return value.rstrip() 70 | return value 71 | def print_field(self, field_type, name, value, repeated): 72 | 73 | self.write(self.print_indent+name+':') 74 | if repeated: 75 | if field_type: 76 | self.write('/* %s[] */'%field_type) 77 | self._level+=1 78 | self.write(('[\n'+self.print_indent+'%s\n')%self._indent_value(value)) 79 | self._level-=1 80 | self.write(self.print_indent+']') 81 | else: 82 | if field_type: 83 | self.write('/* %s */'%field_type) 84 | self.write('%s'%self._indent_value(value)) 85 | 86 | self.write(',\n') 87 | 88 | def _msg2json(pb_msg): 89 | jso = jsWriter() 90 | try: 91 | fields = list(pb_msg.fields) 92 | # fields.extend(pb_msg.DESCRIPTOR.Extensions._known_extensions.values()) 93 | jso.begin_msg(pb_msg.name) 94 | for field in fields: 95 | if field.is_extension: 96 | name = field.name+':ext' 97 | else: 98 | name = field.name 99 | if field.type == _FieldDescriptor.TYPE_MESSAGE: 100 | # ftype=field.message_type.name 101 | ftype=None 102 | value = _msg2json(field.message_type) 103 | else: 104 | ftype=_jsWriter_types[field.type].__name__ 105 | value = _jsWriter_types[field.type]() 106 | jso.print_field(ftype, name, value, field.label == _FieldDescriptor.LABEL_REPEATED) 107 | jso.end_msg() 108 | except Exception, e: 109 | jso.seek(0) 110 | # e.args = (jso.read(),) 111 | print jso.read() 112 | raise 113 | jso.seek(0) 114 | return jso.read() 115 | 116 | def proto2json(module): 117 | res = '' 118 | for pb_msg_name in dir(module): 119 | pb_msg = getattr(module, pb_msg_name) 120 | if not isinstance(pb_msg, _GeneratedProtocolMessageType): 121 | continue 122 | res += _msg2json(pb_msg.DESCRIPTOR) + '\n\n' 123 | return res 124 | 125 | if __name__ == '__main__': 126 | from test import test_writer 127 | -------------------------------------------------------------------------------- /miw/formats/proxy_format.fmt: -------------------------------------------------------------------------------- 1 | 2 | proxy  3 | datedate :dayr%Y-%m-%d 4 | timetime :hour 5 | 6 | time-takenint 7 | c-ipstring 8 | sc-statusstring 9 | s-action 10 | string 11 | sc-bytes int(2mean 12 | cs-bytesint(2mean 13 | cs-methodstring 14 | cs-uri-schemestring 15 | cs-hoststring 16 | cs-uri-portint 17 | cs-uri-pathstring 18 | cs-uri-querystring 19 | cs-usernamestring  20 | cs-auth-groupstring 21 | s-supplier-name string 22 | rs(Content-Type)"string 23 | cs(Referer)$string 24 | cs(User-Agent)&string+ 25 | sc-filter-result(string(2 union_count" 26 | cs-categories*string(2union 27 | 28 | x-virus-id,string(2union 29 | s-ip.string*"2# -------------------------------------------------------------------------------- /miw/formats/proxy_format.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"proxy", 3 | "delims":" ", 4 | "quotechar":"\"", 5 | "commentchar":"#", 6 | "fields":[ 7 | { 8 | "name":"date", 9 | "pos":0, 10 | "type":"date", 11 | "date_format":"%Y-%m-%d", 12 | "key":true, 13 | "processing":"day" 14 | }, 15 | { 16 | "name":"time", 17 | "pos":1, 18 | "type":"time", 19 | "key":true, 20 | "processing":"hour" 21 | }, 22 | { 23 | "name":"time-taken", 24 | "pos":2, 25 | "type":"int" 26 | }, 27 | { 28 | "name":"c-ip", 29 | "pos":3, 30 | "type":"string" 31 | }, 32 | { 33 | "name":"sc-status", 34 | "pos":4, 35 | "type":"string" 36 | }, 37 | { 38 | "name":"s-action", 39 | "pos":5, 40 | "type":"string" 41 | }, 42 | { 43 | "name":"sc-bytes", 44 | "pos":6, 45 | "type":"int", 46 | "aggregated":true, 47 | "aggregation":"mean" 48 | }, 49 | { 50 | "name":"cs-bytes", 51 | "pos":7, 52 | "type":"int", 53 | "aggregated":true, 54 | "aggregation":"mean" 55 | }, 56 | { 57 | "name":"cs-method", 58 | "pos":8, 59 | "type":"string" 60 | }, 61 | { 62 | "name":"cs-uri-scheme", 63 | "pos":9, 64 | "type":"string" 65 | }, 66 | { 67 | "name":"cs-host", 68 | "pos":10, 69 | "type":"string" 70 | }, 71 | { 72 | "name":"cs-uri-port", 73 | "pos":11, 74 | "type":"int" 75 | }, 76 | { 77 | "name":"cs-uri-path", 78 | "pos":12, 79 | "type":"string" 80 | }, 81 | { 82 | "name":"cs-uri-query", 83 | "pos":13, 84 | "type":"string" 85 | }, 86 | { 87 | "name":"cs-username", 88 | "pos":14, 89 | "type":"string", 90 | "key":true 91 | }, 92 | { 93 | "name":"cs-auth-group", 94 | "pos":15, 95 | "type":"string" 96 | }, 97 | { 98 | "name":"s-supplier-name", 99 | "pos":16, 100 | "type":"string" 101 | }, 102 | { 103 | "name":"rs(Content-Type)", 104 | "pos":17, 105 | "type":"string" 106 | }, 107 | { 108 | "name":"cs(Referer)", 109 | "pos":18, 110 | "type":"string" 111 | }, 112 | { 113 | "name":"cs(User-Agent)", 114 | "pos":19, 115 | "type":"string" 116 | }, 117 | { 118 | "name":"sc-filter-result", 119 | "pos":20, 120 | "type":"string", 121 | "aggregated":true, 122 | "aggregation":"union" 123 | }, 124 | { 125 | "name":"cs-categories", 126 | "pos":21, 127 | "type":"string", 128 | "aggregated":true, 129 | "aggregation":"union" 130 | }, 131 | { 132 | "name":"x-virus-id", 133 | "pos":22, 134 | "type":"string", 135 | "aggregated":true, 136 | "aggregation":"union" 137 | }, 138 | { 139 | "name":"s-ip", 140 | "pos":23, 141 | "type":"string" 142 | } 143 | 144 | ] 145 | } 146 | -------------------------------------------------------------------------------- /miw/formats/squid3_search_test.fmt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/miw/formats/squid3_search_test.fmt -------------------------------------------------------------------------------- /miw/formats/squid3_search_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"squid3_features", 3 | "delims":" \"", 4 | "quotechar":"", 5 | "fields":[ 6 | { 7 | "name":"date", 8 | "pos":6, 9 | "type":"date", 10 | "date_format":"%Y-%m-%dT%H:%M:%S+00:00", 11 | "key":true, 12 | "processing":"day" 13 | }, 14 | { 15 | "name":"src", 16 | "pos":10, 17 | "type":"string", 18 | "aggregated":true, 19 | "aggregation":"union_count" 20 | }, 21 | { 22 | "name":"user", 23 | "pos":12, 24 | "type":"string", 25 | "key":true, 26 | "match": 27 | { 28 | "match_str":"root", 29 | "logic":"and" 30 | } 31 | }, 32 | { 33 | "name":"http_method", 34 | "pos": 15, 35 | "type":"string", 36 | "aggregated":true, 37 | "aggregation":"union_count" 38 | }, 39 | { 40 | "name":"url", 41 | "pos":16, 42 | "type":"url", 43 | "url_format":"%scheme://%host%port%path", 44 | "aggregated":true, 45 | "aggregation":"union_count" 46 | }, 47 | { 48 | "name":"http_code", 49 | "pos":18, 50 | "type":"string", 51 | "aggregated":true, 52 | "aggregation":"union_count", 53 | "match": 54 | { 55 | "match_str":"404", 56 | "logic":"or" 57 | } 58 | }, 59 | { 60 | "name":"rbytes", 61 | "pos":19, 62 | "type":"int", 63 | "aggregated":true, 64 | "aggregation":"mean" 65 | }, 66 | { 67 | "name":"response", 68 | "pos":20, 69 | "type":"string", 70 | "aggregated":true, 71 | "aggregation":"union_count", 72 | "match": 73 | { 74 | "match_str":"TCP_CLIENT_REFRESH_MISS:DIRECT", 75 | "logic":"or" 76 | } 77 | } 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /miw/formats/test_json2pb.py: -------------------------------------------------------------------------------- 1 | import os, sys, json, log_definition_pb2 2 | from pprint import pprint 3 | import protobuf_json 4 | 5 | msg = log_definition_pb2.logdef() 6 | msg.format_name = "test" 7 | msg.delims = " " 8 | 9 | pprint(msg.SerializeToString()) 10 | 11 | json_obj=protobuf_json.pb2json(msg) 12 | print json_obj 13 | -------------------------------------------------------------------------------- /miw/formats/tests/created.fmt: -------------------------------------------------------------------------------- 1 | 2 | variance, 3 | idint  4 | varfloat(2variance*" -------------------------------------------------------------------------------- /miw/formats/tests/created.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"variance", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"int", 10 | "key":true 11 | }, 12 | { 13 | "name":"var", 14 | "pos":1, 15 | "type":"float", 16 | "aggregated":true, 17 | "aggregation":"variance" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /miw/formats/tests/filter.fmt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/miw/formats/tests/filter.fmt -------------------------------------------------------------------------------- /miw/formats/tests/filter.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"filter", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"int", 10 | "key":true 11 | }, 12 | { 13 | "name":"str", 14 | "pos":1, 15 | "type":"string" 16 | }, 17 | { 18 | "name":"denied_count", 19 | "pos": 1, 20 | "type":"int", 21 | "filter":"denied", 22 | "filter_type":"contain" 23 | }, 24 | { 25 | "name":"ok_count", 26 | "pos": 1, 27 | "type":"int", 28 | "filter":"ok", 29 | "filter_type":"contain" 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /miw/formats/tests/match.fmt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/miw/formats/tests/match.fmt -------------------------------------------------------------------------------- /miw/formats/tests/match.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"match", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"string", 10 | "key":true 11 | }, 12 | { 13 | "name":"val", 14 | "pos":1, 15 | "type":"string" 16 | }, 17 | { 18 | "name":"testmatch", 19 | "pos":1, 20 | "type":"string", 21 | "match": 22 | { 23 | "match_str":"OK", 24 | "logic":"and" 25 | } 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /miw/formats/tests/match_exact.fmt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/miw/formats/tests/match_exact.fmt -------------------------------------------------------------------------------- /miw/formats/tests/match_exact.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"match", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"string", 10 | "key":true 11 | }, 12 | { 13 | "name":"val", 14 | "pos":1, 15 | "type":"string" 16 | }, 17 | { 18 | "name":"testmatch", 19 | "pos":1, 20 | "type":"string", 21 | "match": 22 | { 23 | "match_str":"OK", 24 | "logic":"and", 25 | "exact": true 26 | } 27 | } 28 | ] 29 | } 30 | -------------------------------------------------------------------------------- /miw/formats/tests/match_exact_neg.fmt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/miw/formats/tests/match_exact_neg.fmt -------------------------------------------------------------------------------- /miw/formats/tests/match_exact_neg.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"match", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"string", 10 | "key":true 11 | }, 12 | { 13 | "name":"val", 14 | "pos":1, 15 | "type":"string" 16 | }, 17 | { 18 | "name":"testmatch", 19 | "pos":1, 20 | "type":"string", 21 | "match": 22 | { 23 | "match_str":"KO", 24 | "logic":"and", 25 | "negative":true, 26 | "exact": true 27 | } 28 | } 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /miw/formats/tests/match_file.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"match", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"string", 10 | "key":true 11 | }, 12 | { 13 | "name":"val", 14 | "pos":1, 15 | "type":"string" 16 | }, 17 | { 18 | "name":"testmatch", 19 | "pos":1, 20 | "type":"string", 21 | "match": 22 | { 23 | "match_file":"../data/tests/match_file.txt", 24 | "logic":"and", 25 | "negative":true 26 | } 27 | } 28 | ] 29 | } 30 | -------------------------------------------------------------------------------- /miw/formats/tests/ratio.fmt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soprasteria/cybersecurity-miw/67eec32f44bf7c9e14771cf8a068490eb80a53f1/miw/formats/tests/ratio.fmt -------------------------------------------------------------------------------- /miw/formats/tests/ratio.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"ratio", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"int", 10 | "key":true 11 | }, 12 | { 13 | "name":"i1", 14 | "pos":1, 15 | "type":"int", 16 | "aggregated":true, 17 | "aggregation":"sum" 18 | }, 19 | { 20 | "name":"i2", 21 | "pos":2, 22 | "type":"int", 23 | "aggregated":true, 24 | "aggregation":"sum" 25 | }, 26 | { 27 | "name":"f1", 28 | "pos":3, 29 | "type":"float", 30 | "aggregated":true, 31 | "aggregation":"sum" 32 | }, 33 | { 34 | "name":"f2", 35 | "pos":4, 36 | "type":"float", 37 | "aggregated":true, 38 | "aggregation":"sum" 39 | }, 40 | { 41 | "name":"iratio", 42 | "numerator": "i1", 43 | "denominator": "i2", 44 | "type":"float", 45 | "aggregated":true, 46 | "aggregation":"ratio" 47 | }, 48 | { 49 | "name":"fratio", 50 | "numerator": "f1", 51 | "denominator": "f2", 52 | "type":"float", 53 | "aggregated":true, 54 | "aggregation":"ratio" 55 | }, 56 | { 57 | "name":"tratio", 58 | "numerator": "f1", 59 | "denominator": "logs", 60 | "type":"float", 61 | "aggregated":true, 62 | "aggregation":"ratio" 63 | } 64 | ] 65 | } 66 | -------------------------------------------------------------------------------- /miw/formats/tests/sum.fmt: -------------------------------------------------------------------------------- 1 | 2 | sum, 3 | idint  4 | v1int(2sum 5 | v2float(2sum*" -------------------------------------------------------------------------------- /miw/formats/tests/sum.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"sum", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"int", 10 | "key":true 11 | }, 12 | { 13 | "name":"v1", 14 | "pos":1, 15 | "type":"int", 16 | "aggregated":true, 17 | "aggregation":"sum" 18 | }, 19 | { 20 | "name":"v2", 21 | "pos":2, 22 | "type":"float", 23 | "aggregated":true, 24 | "aggregation":"sum" 25 | } 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /miw/formats/tests/variance-mean-sum.fmt: -------------------------------------------------------------------------------- 1 | 2 | variance-mean, 3 | idint  4 | varianceint(2variance 5 | meanint(2mean 6 | sumint(2sum*" -------------------------------------------------------------------------------- /miw/formats/tests/variance-mean-sum.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"variance-mean", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"int", 10 | "key":true 11 | }, 12 | { 13 | "name":"variance", 14 | "pos": 1, 15 | "type":"int", 16 | "aggregated": true, 17 | "aggregation": "variance" 18 | }, 19 | { 20 | "name":"mean", 21 | "type":"int", 22 | "pos": 1, 23 | "aggregated": true, 24 | "aggregation": "mean" 25 | }, 26 | { 27 | "name":"sum", 28 | "type":"int", 29 | "pos": 1, 30 | "aggregated": true, 31 | "aggregation": "sum" 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /miw/formats/tests/variance.fmt: -------------------------------------------------------------------------------- 1 | 2 | variance, 3 | idint  4 | varfloat(2variance*" -------------------------------------------------------------------------------- /miw/formats/tests/variance.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_name":"variance", 3 | "delims":",", 4 | "quotechar":"\"", 5 | "fields":[ 6 | { 7 | "name":"id", 8 | "pos":0, 9 | "type":"int", 10 | "key":true 11 | }, 12 | { 13 | "name":"var", 14 | "pos":1, 15 | "type":"float", 16 | "aggregated":true, 17 | "aggregation":"variance" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /miw/job.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 SopraSteria 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef MIW_JOB_H 30 | #define MIW_JOB_H 31 | 32 | #include "log_format.h" 33 | #include "mr_job.h" 34 | #include 35 | 36 | //#define DEFAULT_NDISP 10 37 | //#define DEBUG 38 | 39 | using namespace miw; 40 | 41 | namespace miw 42 | { 43 | 44 | class job 45 | { 46 | public: 47 | job() {} 48 | ~job() 49 | { 50 | if (_results) 51 | { 52 | mr_job::free_records(_results); 53 | for (size_t i = 0; i < _results->size(); ++i) 54 | (*_results)[i].reset(); 55 | _results->shallow_free(); 56 | } 57 | } 58 | 59 | // memory management 60 | unsigned long get_available_memory(); 61 | 62 | // input management 63 | bool file_size_autosplit(const size_t &fs, 64 | size_t &mfsize, 65 | size_t &nchunks); 66 | 67 | int execute(); 68 | int execute(int argc, char *argv[]); 69 | 70 | void run_mr_job(const char *fname, const int &nfile, const size_t &blength=0); 71 | void run_mr_job_merge_results(const char *fname, const int &nfile, const bool &run_end, const size_t &blength=0, const bool &newfile=true); 72 | 73 | void glog_init(char *argv[]); 74 | 75 | long _skipped_logs = 0; 76 | log_format _lf; 77 | std::ofstream _fout; /**< output file stream */ 78 | 79 | // options 80 | std::string _app_name; 81 | bool _store_content = false; // whether to store full content into index. 82 | bool _compressed = false; // whether to compress the original content while working on it. 83 | bool _autosplit = false; // whether to split input files based on heuristic of memory-usage. 84 | bool _merge_results = false; // whether to merge results over multiple inputop 85 | int _nchunks_split = 0; 86 | double _in_memory_factor = 10; // we expect to use at max 10 times more memory than log volume, for processing them. Very conservative value, used in auto-splitting the log files before processing them. 87 | std::string _output_format; // other values: json, csv 88 | bool _quiet = false; 89 | bool _skip_header = false; // whether to skip the first file line 90 | bool _tmp_save = false; // ability to save temporary results 91 | 92 | int _nprocs = 0; /**< number of used processors, when specified */ 93 | int _map_tasks = 0; /**< number of map tasks, when specified */ 94 | int _reduce_tasks = 0; /**< number of reduce tasks, when specified */ 95 | int _ndisp = 0; /**< number of top entries to show */ 96 | std::string _format_name; 97 | 98 | // map reduce inner job object 99 | mr_job *_mrj = nullptr; 100 | std::vector _files; /**< list of data file names */ 101 | std::string _ofname; /**< output file name */ 102 | 103 | // job results, when part of the output 104 | xarray *_results = nullptr; 105 | 106 | // glog init 107 | static bool _glog_init; 108 | }; 109 | 110 | } 111 | 112 | #endif 113 | -------------------------------------------------------------------------------- /miw/log_definition.proto: -------------------------------------------------------------------------------- 1 | message int_field 2 | { 3 | repeated int64 int_reap = 1; 4 | optional int64 holder = 2; /* for holding denominator in mean, etc.... */ 5 | } 6 | 7 | message string_field 8 | { 9 | repeated string str_reap = 1; 10 | repeated int32 str_count = 2; /* for union with counters. */ 11 | } 12 | 13 | message bool_field 14 | { 15 | repeated bool bool_reap = 1; 16 | } 17 | 18 | message float_field 19 | { 20 | repeated double float_reap = 1; 21 | optional double holder = 2; /* for holding denominator in mean, etc.... */ 22 | } 23 | 24 | message match_field 25 | { 26 | optional string match_str = 1; 27 | optional string logic = 2 [default = "or"]; 28 | optional bool negative = 3 [default = false]; 29 | optional string match_file = 4; 30 | optional bool exact = 5 [default = false]; 31 | } 32 | 33 | message field 34 | { 35 | required string name = 1; 36 | optional sint32 pos = 2 [default = -1]; /* field position, allows to skip fields */ 37 | required string type = 3; /* field type: double, float, string, ... */ 38 | optional bool key = 4 [default = false]; /* whether this field is use as key or part of the key */ 39 | optional bool aggregated = 5; /* whether to aggregate values of this field. Obviously stored. */ 40 | optional string aggregation = 6; /* type of aggregation (e.g. sum, union, ...) */ 41 | optional string processing = 7; /* field post-processing filter (e.g. day, ...) */ 42 | 43 | optional int_field int_fi = 8; 44 | optional string_field str_fi = 9; 45 | optional bool_field bool_fi = 10; 46 | optional float_field real_fi = 11; 47 | optional uint32 count = 12; /* aggregation counter. */ 48 | 49 | optional string preprocessing = 13; /* special pre-processing for generating more fields out of a single one */ 50 | optional string date_format = 14 [default = "%m/%d/%Y"]; /* special formatting, e.g. for dates */ 51 | optional uint32 processing_offset = 15 [default = 1]; /* offset for minutes and other discrete values */ 52 | optional string filter = 17; 53 | optional string filter_type = 18; 54 | optional string url_format = 20 [default = "%scheme://%host%port"]; /* special formatting for URLs (%scheme, %host, %port, %path, %query, %fragment) */ 55 | optional match_field match = 21; 56 | optional string numerator = 22; 57 | optional string denominator = 23; 58 | } 59 | 60 | message logdef 61 | { 62 | required string format_name = 1; /* log format name */ 63 | required string delims = 2; /* field delimiters */ 64 | repeated field fields = 3; /* definition of log fields */ 65 | optional string appname = 4; /* application name for the log set. */ 66 | optional string quotechar = 5; 67 | optional string commentchar = 6; 68 | } -------------------------------------------------------------------------------- /miw/log_format.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2015 SopraSteria 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | /** 30 | * Generic framework for defining and processing log formats. 31 | */ 32 | 33 | #ifndef LOG_FORMAT_H 34 | #define LOG_FORMAT_H 35 | 36 | #include "log_record.h" 37 | #include "log_definition.pb.h" 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | namespace miw 46 | { 47 | 48 | class log_format 49 | { 50 | public: 51 | log_format(); 52 | ~log_format(); 53 | 54 | int save(); 55 | 56 | int read(const std::string &name); 57 | 58 | //bool check() const; // XXX: unused ? 59 | 60 | static std::string chomp_cpp(const std::string &s); 61 | 62 | static void tokenize_simple(const std::string &str, 63 | std::vector &tokens, 64 | const std::string &delim); 65 | 66 | static void tokenize(const std::string &str, 67 | const int &length, 68 | std::vector &tokens, 69 | const std::string &delim, 70 | const std::string "echar); 71 | 72 | int parse_data(const std::string &data, 73 | const int &length, 74 | const std::string &appname, 75 | const bool &store_content, 76 | const bool &compressed, 77 | const bool &quiet, 78 | const size_t &pos, 79 | const bool &skip_header, 80 | std::vector &lrecords) const; 81 | 82 | log_record* parse_line(const std::string &line, 83 | const std::string &appname, 84 | const bool &store_content, 85 | const bool &compressed, 86 | const bool &quiet, 87 | int &skipped_logs); 88 | 89 | // custom pre-processing. 90 | int pre_process_evtxcsv(field *f, 91 | const std::string &token, 92 | std::vector &nfields) const; 93 | 94 | int pre_process_evtxcsv2(field *f, 95 | const std::string &token, 96 | std::vector &nfields) const; 97 | 98 | int pre_process_microsoftdnslogs(field *f, 99 | const std::string &token, 100 | std::vector &nfields) const; 101 | 102 | bool filter_contain(logdef &ldef, const int &i) const; 103 | 104 | std::unordered_map*> _match_file_fields; 105 | std::mutex _loading_match_file_mutex; 106 | logdef _ldef; // protocol buffer object. 107 | }; 108 | 109 | } 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /miw/log_record.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2015 SopraSteria 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | /** 30 | * In-memory log record. 31 | */ 32 | 33 | #ifndef LOG_RECORD_H 34 | #define LOG_RECORD_H 35 | 36 | #include "log_definition.pb.h" 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | namespace miw 43 | { 44 | 45 | class log_record 46 | { 47 | public: 48 | log_record(const std::string &key, 49 | const logdef &ld); 50 | ~log_record(); 51 | 52 | std::string key() const; 53 | 54 | void merge(log_record *lr); //TODO: need log format to check on aggregated fields etc ? 55 | 56 | void flatten_lines(); 57 | 58 | /*static void to_json_solr(field &f, Json::Value &jrec, 59 | std::string &date, std::string &time);*/ 60 | void to_json(field &f, const int &i, Json::Value &jrec, 61 | std::string &date, std::string &time); 62 | void to_json(Json::Value &jlrec); 63 | static void json_to_csv(const Json::Value &jl, 64 | std::string &csvline, 65 | const bool &header=false); 66 | 67 | // compression for storage. 68 | static std::string compress_log_lines(const std::string &line); 69 | static std::string uncompress_log_lines(const std::string &cline); 70 | 71 | // field aggregation functions. 72 | void aggregation_union(const int &i, 73 | const field &f, 74 | const bool &count, 75 | log_record *lr); 76 | 77 | void aggregation_sum(const int &i, 78 | const field &f); 79 | 80 | void aggregation_max(const int &i, 81 | const field &f); 82 | 83 | void aggregation_mean(const int &i, 84 | const field &f); 85 | 86 | void aggregation_count(const int &i, 87 | const field &f); 88 | 89 | void aggregation_variance(const int &i, 90 | const field &f); 91 | 92 | float compute_ratio(const std::string &numerator, 93 | const std::string &denominator); 94 | 95 | std::string _key; 96 | long int _sum; 97 | logdef _ld; 98 | std::vector _lines; // original log lines from which the compacted record was created. 99 | std::string _uncompressed_lines; 100 | std::string _compressed_lines; 101 | int _compressed_size; 102 | int _original_size; 103 | bool _compressed; 104 | std::unordered_map> _unos; // cache for string arrays in aggregated unions. 105 | }; 106 | 107 | } 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /miw/mr_job.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 SopraSteria 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "mr_job.h" 30 | #include "defsplitter.hh" 31 | 32 | //#define DEBUG 33 | 34 | void mr_job::map_function(split_t *ma) 35 | { 36 | std::vector log_records; 37 | std::string dat = (char*)ma->data; // XXX: copies the data 38 | _lf->parse_data(dat,ma->length,_app_name,_store_content,_compressed,_quiet,ma->pos,_skip_header,log_records); 39 | 40 | #ifdef DEBUG 41 | std::cout << "number of mapped records: " << log_records.size() << std::endl; 42 | #endif 43 | for (size_t i=0;i_sum = 1; 46 | std::string key = log_records.at(i)->key(); 47 | const char *key_str = key.c_str(); 48 | map_emit((void*)key_str,(void*)log_records.at(i),strlen(key_str)); 49 | } 50 | } 51 | 52 | int mr_job::combine_function(void *key_in, void **vals_in, size_t vals_len) 53 | { 54 | log_record **lrecords = (log_record**)vals_in; 55 | for (uint32_t i=1;imerge(lrecords[i]); 58 | delete lrecords[i]; 59 | } 60 | return 1; 61 | } 62 | 63 | void mr_job::reduce_function(void *key_in, void **vals_in, size_t vals_len) 64 | { 65 | log_record **lrecords = (log_record**)vals_in; 66 | for (uint32_t i=1;imerge(lrecords[i]); 69 | delete lrecords[i]; 70 | } 71 | reduce_emit(key_in,(void*)lrecords[0]); 72 | } 73 | 74 | void mr_job::print_top(xarray *wc_vals, int &ndisp) { 75 | size_t occurs = 0; 76 | std::multimap > ordered_records; 77 | std::multimap >::iterator mit; 78 | for (uint32_t i = 0; i < wc_vals->size(); i++) 79 | { 80 | log_record *lr = (log_record*)wc_vals->at(i)->val; 81 | occurs += lr->_sum; 82 | ordered_records.insert(std::pair(lr->_sum,lr->key())); 83 | if ((int)ordered_records.size() > ndisp) 84 | { 85 | mit = ordered_records.end(); 86 | mit--; 87 | ordered_records.erase(mit); 88 | } 89 | } 90 | printf("\nlogs preprocessing: results (TOP %d from %zu keys, %zd logs):\n", 91 | ndisp, wc_vals->size(), occurs); 92 | #ifdef HADOOP 93 | ndisp = wc_vals->size(); 94 | #else 95 | ndisp = std::min(ndisp, (int)wc_vals->size()); 96 | #endif 97 | int c = 0; 98 | mit = ordered_records.begin(); 99 | while(mit!=ordered_records.end()) 100 | { 101 | printf("%45s - %ld\n",(*mit).second.c_str(),(*mit).first); 102 | ++mit; 103 | if (c++ == ndisp) 104 | break; 105 | } 106 | std::cout << std::endl; 107 | } 108 | 109 | void mr_job::output_all(xarray *wc_vals, std::ostream &fout) 110 | { 111 | for (uint32_t i = 0; i < wc_vals->size(); i++) 112 | { 113 | keyval_t *w = wc_vals->at(i); 114 | fout << (char*)w->key_ << " - " << static_cast(w->val)->_sum << std::endl; 115 | } 116 | } 117 | 118 | void mr_job::output_json(xarray *wc_vals, std::ostream &fout) 119 | { 120 | Json::FastWriter writer; 121 | for (uint32_t i = 0; i < wc_vals->size(); i++) 122 | { 123 | log_record *lr = (log_record*)wc_vals->at(i)->val; 124 | Json::Value jrec; 125 | lr->to_json(jrec); 126 | if (!_compressed) 127 | lr->flatten_lines(); 128 | else 129 | { 130 | lr->_uncompressed_lines = log_record::uncompress_log_lines(lr->_compressed_lines); 131 | lr->_compressed_lines.clear(); 132 | lr->_original_size = lr->_uncompressed_lines.length(); 133 | } 134 | fout << writer.write(jrec); 135 | if (!lr->_uncompressed_lines.empty()) 136 | { 137 | Json::Value jrecc; 138 | jrecc["id"] = lr->key() + "_content"; 139 | jrecc["original_size"] = lr->_original_size; 140 | jrecc["content"]["add"] = lr->_uncompressed_lines; 141 | //fout << "{\"compressed_size\":" << lr->_compressed_size << ",\"content\":{\"add\":\"" << lr->_compressed_lines << "\"},\"id\":" << lr->key()+"_content" << ",\"original_size\":" << lr->_original_size << "}\n"; 142 | fout << writer.write(jrecc); 143 | } 144 | } 145 | } 146 | 147 | void mr_job::output_csv(xarray *wc_vals, const int &nfile, std::ostream &fout) 148 | { 149 | for (uint32_t i = 0; i < wc_vals->size(); i++) 150 | { 151 | log_record *lr = (log_record*)wc_vals->at(i)->val; 152 | Json::Value jrec; 153 | lr->to_json(jrec); 154 | std::string csvline; 155 | if (i == 0 && nfile <= 0) 156 | log_record::json_to_csv(jrec,csvline,true); // with header 157 | else log_record::json_to_csv(jrec,csvline); 158 | //TODO: add attached logs UUIDs to every entry 159 | fout << csvline; 160 | } 161 | } 162 | 163 | void mr_job::output_mem(xarray *wc_vals, xarray *results) 164 | { 165 | wc_vals->swap(*results); 166 | } 167 | -------------------------------------------------------------------------------- /miw/str_utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 SopraSteria 3 | * All rights reserved. 4 | * Author: Emmanuel Benazera 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * * Neither the name of SopraSteria nor the 14 | * names of its contributors may be used to endorse or promote products 15 | * derived from this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY SOPRASTERIA ``AS IS'' AND ANY 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL SOPRASTERIA BE LIABLE FOR ANY 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef MIW_STR_UTILS_H 30 | #define MIW_STR_UTILS_H 31 | 32 | #include 33 | #include 34 | 35 | namespace miw 36 | { 37 | class str_utils 38 | { 39 | public: 40 | static std::vector& str_split(const std::string &s, char delim, std::vector &elems) { 41 | std::stringstream ss(s); 42 | std::string item; 43 | while (std::getline(ss, item, delim)) { 44 | elems.push_back(item); 45 | } 46 | return elems; 47 | } 48 | 49 | static size_t replace_in_string(std::string &str, const std::string &pattern, 50 | const std::string &repl) 51 | { 52 | size_t p = 0; 53 | while ((p = str.find(pattern,p)) != std::string::npos) 54 | { 55 | str.replace(p,pattern.size(),repl); 56 | p += repl.size(); // in case we're replacing with a string that contains the pattern itself. 57 | } 58 | return p; 59 | } 60 | 61 | }; 62 | } 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /python/miw_job.py: -------------------------------------------------------------------------------- 1 | from subprocess import call 2 | from miwlogger import logger 3 | #import re 4 | 5 | # beware if a key is also a value 6 | def multi_replace(text, wordDict): 7 | for key in wordDict: 8 | text = text.replace(key, wordDict[key]) 9 | return text 10 | 11 | # class for running one or more MIW jobs 12 | ## TODO: 13 | ## - calling options and defaults in constructor 14 | class MIWJob: 15 | 16 | def __init__(self,miw_loc,miw_command=''): 17 | self.miw_loc = miw_loc 18 | if miw_command: 19 | self.miw_command = miw_command 20 | else: 21 | self.miw_command = '-fnames $fnames -ofname $ofname -format_name $format_files_repo/$logfile -output_format csv -autosplit -merge_results -memory_factor $memfactor' 22 | 23 | def run(self,miw_options): 24 | #print miw_options.keys() 25 | #pattern = re.compile(r'\b(' + '|'.join(re.escape(key) for key in miw_options.keys()) + r')\b') 26 | #local_command = pattern.sub(lambda x: miw_options[x.group()], self.miw_command) 27 | local_command = multi_replace(self.miw_command,miw_options) 28 | #print 'local_command=',local_command 29 | logger.debug("MIW job command=%s" % (local_command)) 30 | call_output = call(self.miw_loc + '/miw ' + local_command,shell=True) 31 | if call_output == 0: 32 | logger.debug('Successfully MIW job %s' % (local_command)) 33 | else: 34 | logger.error('Failed MIW job call %s' % (local_command)) 35 | return call_output 36 | -------------------------------------------------------------------------------- /python/miwlogger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | formatter = logging.Formatter("[%(levelname)s@%(asctime)s][%(filename)s:%(lineno)s - %(funcName)2s() ] %(message)s") 4 | stdout_handler = logging.StreamHandler() 5 | stdout_handler.setFormatter(formatter) 6 | logger = logging.getLogger(__name__) 7 | loglevel = logging.DEBUG 8 | logger.setLevel(loglevel) 9 | logger.addHandler(stdout_handler) 10 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | TESTS = $(check_PROGRAMS) 2 | check_PROGRAMS=ut_mr_parsing 3 | bin_PROGRAMS=ut_mr_parsing 4 | ut_mr_parsing_SOURCES=ut-mr-parsing.cc 5 | 6 | MAXCPUS = $(shell grep -c processor /proc/cpuinfo) 7 | AM_CXXFLAGS=-Wall -g -pipe -std=c++11 -fpermissive -fopenmp -O2 -g \ 8 | -I../miw -I../metis \ 9 | -fno-omit-frame-pointer -D_GNU_SOURCE -include ../config.h \ 10 | -DJTLS=__thread -DJSHARED_ATTR= \ 11 | -DJOS_CLINE=64 -DCACHE_LINE_SIZE=64 \ 12 | -DJOS_NCPU=$(MAXCPUS) -D__STDC_FORMAT_MACROS `pkg-config --cflags protobuf` 13 | AM_LDFLAGS=`pkg-config --libs protobuf` -L../miw/ -L../metis 14 | LDADD=-lmiw -lmetis -lc -lm -lcurl -lidn -lz -lssl -lcrypto -lpthread -ldl -ljsoncpp -lrt -lprotobuf -lsnappy -lboost_system -lcppnetlib-uri $(GLOG_LIBS) $(GFLAGS_LIBS) -lgtest -lgtest_main 15 | --------------------------------------------------------------------------------