├── solr-patches
├── use_http_retries-4.7.0.patch
└── no-stale-check-4.10.4.patch
├── rebar
├── priv
├── conf
│ └── _rest_managed.json
├── template_solr.xml
└── solr
│ └── contexts
│ └── solr-jetty-context.xml
├── docs
├── yz-batching-overview.png
├── yz-batching-worker.png
├── TAGGING.md
├── TESTING.md
├── RESOURCES.md
├── Q-AND-A.md
└── ADMIN.md
├── misc
└── bench
│ ├── src
│ └── bench.app.src
│ ├── rebar.config
│ ├── bin
│ ├── gen-bb-plots.sh
│ ├── smartos
│ │ ├── disk-collect.sh
│ │ ├── virtual-memory-collect.sh
│ │ ├── pid-cpu-mem-collect.sh
│ │ ├── network-transform.awk
│ │ ├── network-collect.sh
│ │ ├── disk-transform.awk
│ │ ├── virtual-memory-transform.awk
│ │ ├── pid-cpu-mem-transform.awk
│ │ ├── js
│ │ │ ├── throughput.js
│ │ │ ├── cpu.js
│ │ │ ├── network.js
│ │ │ ├── disk.js
│ │ │ └── latency.js
│ │ └── visualize.html
│ ├── calc-mean-thru.sh
│ ├── calc-med-latency.sh
│ ├── calc-95-latency.sh
│ ├── calc-99-latency.sh
│ ├── plot-3-dimensions.sh
│ ├── transform-raw.sh
│ ├── run-bench.sh
│ └── make-vis.sh
│ ├── cfgs
│ └── fruit
│ │ ├── query-hotel.config
│ │ ├── query-delta.config
│ │ ├── query-golf.config
│ │ ├── query-india.config
│ │ ├── query-echo.config
│ │ ├── query-charlie.config
│ │ ├── query-foxtrot.config
│ │ ├── query-beta.config
│ │ ├── query-alpha.config
│ │ ├── query-juliet.config
│ │ ├── query-kilo.config
│ │ └── load.config
│ └── schemas
│ └── fruit_schema.xml
├── .thumbs.yml
├── riak_test
├── intercepts
│ ├── intercept.hrl
│ ├── yz_solr_intercepts.erl
│ ├── yz_noop_extractor_intercepts.erl
│ ├── yz_solrq_drain_mgr_intercepts.erl
│ ├── yz_solrq_drain_fsm_intercepts.erl
│ └── yz_solrq_helper_intercepts.erl
├── yz_monitor_solr.erl
├── yz_handoff_blocking.erl
├── yz_wm_extract_test.erl
├── yz_solr_start_timeout.erl
├── yz_fuse_upgrade.erl
├── yz_test_listener.erl
├── yz_fallback.erl
├── yz_default_bucket_type_upgrade.erl
├── yz_errors.erl
├── yz_languages.erl
├── yz_ensemble.erl
├── yz_search_http.erl
└── yz_ring_resizing.erl
├── rel_etc
└── solr-log4j.properties
├── .travis.yml
├── src
├── yokozuna.app.src
├── yz_index_hashtree_sup.erl
├── yz_text_extractor.erl
├── yz_noop_extractor.erl
├── yz_fuse_stats_sidejob.erl
├── yz_stat_worker.erl
├── yz_solr_sup.erl
├── yz_general_sup.erl
├── yz_sup.erl
├── yz_solrq_queue_pair_sup.erl
├── rt_intercept_pt.erl
├── yz_console.erl
├── yz_entropy.erl
├── yz_bucket_validator.erl
├── yz_json_extractor.erl
└── yz_rs_migration.erl
├── test
├── test.json
├── yz_component_tests.erl
├── yz_test.hrl
├── utf8.txt
├── utf8.json
├── yz_pulseh.erl
├── yz_text_extractor_tests.erl
├── utf8.xml
├── yz_misc_tests.erl
├── yz_solrq_eqc_fuse.erl
├── yz_dt_extractor_tests.erl
└── yz_xml_extractor_tests.erl
├── .gitignore
├── README.md
├── rebar.config
├── java_src
└── com
│ └── basho
│ └── yokozuna
│ ├── query
│ └── SimpleQueryExample.java
│ ├── monitor
│ └── Monitor.java
│ └── handler
│ └── component
│ └── FQShardTranslator.java
├── tools
├── src-pkg.sh
├── build-jar.sh
├── build-solr.sh
└── grab-solr.sh
├── .travis.sh
├── Makefile
└── .rebar_plugins
└── rebar_test_plugin.erl
/solr-patches/use_http_retries-4.7.0.patch:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/rebar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basho/yokozuna/HEAD/rebar
--------------------------------------------------------------------------------
/priv/conf/_rest_managed.json:
--------------------------------------------------------------------------------
1 | {"initArgs":{},"managedList":[]}
2 |
--------------------------------------------------------------------------------
/docs/yz-batching-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basho/yokozuna/HEAD/docs/yz-batching-overview.png
--------------------------------------------------------------------------------
/docs/yz-batching-worker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basho/yokozuna/HEAD/docs/yz-batching-worker.png
--------------------------------------------------------------------------------
/misc/bench/src/bench.app.src:
--------------------------------------------------------------------------------
1 | {application, bench,
2 | [
3 | {description, "This is just here to make rebar happy."},
4 | {vsn, "0.0.0"}
5 | ]}.
6 |
--------------------------------------------------------------------------------
/.thumbs.yml:
--------------------------------------------------------------------------------
1 | minimum_reviewers: 2
2 | build_steps:
3 | - make clean
4 | - make test
5 | - make xref
6 | - make dialyzer
7 | merge: false
8 | org_mode: true
9 | timeout: 1790
10 |
--------------------------------------------------------------------------------
/misc/bench/rebar.config:
--------------------------------------------------------------------------------
1 | {erl_opts, [debug_info,
2 | {parse_transform, lager_transform}]}.
3 |
4 | {deps,
5 | [
6 | {basho_bench, ".*",
7 | {git, "git://github.com/basho/basho_bench", {branch, "master"}}}
8 | ]}.
9 |
--------------------------------------------------------------------------------
/riak_test/intercepts/intercept.hrl:
--------------------------------------------------------------------------------
1 | %% Copied from riak_test
2 | -define(I_TAG(S), "INTERCEPT: " ++ S).
3 | -define(I_INFO(Msg), error_logger:info_msg(?I_TAG(Msg))).
4 | -define(I_INFO(Msg, Args), error_logger:info_msg(?I_TAG(Msg), Args)).
5 |
--------------------------------------------------------------------------------
/priv/template_solr.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 | ${socketTimeout:0}
7 | ${connTimeout:0}
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/misc/bench/bin/gen-bb-plots.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Generate the BB summary plots for a set of runs.
4 | #
5 | # ./gen-bb-plots.sh
6 |
7 | BB_DIR=$1; shift
8 | DIR=$1; shift
9 |
10 | for d in $DIR/*; do
11 | name=$(basename $d)
12 | out_file=$d/$name.png
13 | echo "generating summary $out_file"
14 | $BB_DIR/priv/summary.r -i $d -o $d/$name.png
15 | done
16 |
--------------------------------------------------------------------------------
/rel_etc/solr-log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=WARN, rotate
2 |
3 | log4j.appender.rotate=org.apache.log4j.RollingFileAppender
4 | log4j.appender.rotate.File={{platform_log_dir}}/solr.log
5 | log4j.appender.rotate.MaxFileSize=10MB
6 | log4j.appender.rotate.MaxBackupIndex=5
7 | log4j.appender.rotate.layout=org.apache.log4j.PatternLayout
8 | log4j.appender.rotate.layout.ConversionPattern=%d [%p] <%t>@%F:%L %m%n
9 |
--------------------------------------------------------------------------------
/misc/bench/bin/smartos/disk-collect.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Collect disk metrics.
4 |
5 | ACTION=$1
6 |
7 | output=/tmp/disk-collect-raw.txt
8 |
9 | case $ACTION in
10 | start)
11 | pkill iostat
12 | iostat -rsxTu cmdk0 cmdk1 1 > $output
13 | ;;
14 | stop)
15 | pkill iostat
16 | ;;
17 | output)
18 | echo $output
19 | ;;
20 | esac
21 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: erlang
2 | otp_release:
3 | - R16B03
4 | cache:
5 | directories:
6 | - "$HOME/otp-basho"
7 | before_script:
8 | - ./.travis.sh build
9 | script:
10 | - ./.travis.sh test
11 | notifications:
12 | slack:
13 | secure: EL4ZCavW6oLbhgKc/bcjO4zoccYx/XrjiXvcki3S7UGUtVm+qT5rR3AfBGMkHmssfj4dn0u4xgfS67kvro/RMHrkrGx4Vqulnnd+57wixon/lGAMy527OoUvNU3rz6ZQGrb8LvEgRGCGARoW6ed6K9zccJAhB9vg2FbDsm5XzkY=
14 |
--------------------------------------------------------------------------------
/misc/bench/bin/smartos/virtual-memory-collect.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Collect virtual memory statistics.
4 |
5 | ACTION=$1
6 | output=/tmp/virtual-memory-collect-raw.txt
7 |
8 | case $ACTION in
9 | start)
10 | pkill vmstat
11 | vmstat -Tu 1 > $output
12 | ;;
13 | stop)
14 | pkill vmstat
15 | ;;
16 | output)
17 | echo $output
18 | ;;
19 | esac
20 |
--------------------------------------------------------------------------------
/src/yokozuna.app.src:
--------------------------------------------------------------------------------
1 | %% -*- erlang -*-
2 | {application, yokozuna,
3 | [
4 | {description, "Integrating Apache Solr into Riak"},
5 | {vsn, git},
6 | {registered, []},
7 | {applications, [
8 | kernel,
9 | stdlib,
10 | ibrowse,
11 | fuse,
12 | riak_core,
13 | riak_kv
14 | ]},
15 | {mod, { yz_app, []}},
16 | {env, []}
17 | ]}.
18 |
--------------------------------------------------------------------------------
/misc/bench/bin/smartos/pid-cpu-mem-collect.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Collect CPU and MEM metrics for specific processes.
4 |
5 | ACTION=$1; shift
6 | output=/tmp/pid-cpu-mem-collect-raw.txt
7 |
8 | case $ACTION in
9 | start)
10 | pkill prstat
11 | prstat -du -p "$(pgrep -d, -f 'beam|^[^ ]*java')" 1 > $output
12 | ;;
13 | stop)
14 | pkill prstat
15 | ;;
16 | output)
17 | echo $output
18 | ;;
19 | esac
20 |
--------------------------------------------------------------------------------
/misc/bench/bin/smartos/network-transform.awk:
--------------------------------------------------------------------------------
1 | # Format header
2 | NR == 1 {
3 | # First remove leading space
4 | sub(/ +/, "")
5 | # Next convert space to commas
6 | gsub(/ +/, ",")
7 | # Convert 'Time' header to 'timestamp'
8 | sub(/Time/, "timestamp")
9 | print
10 | next
11 | }
12 |
13 | # Ignore first record
14 | NR == 2 { next };
15 |
16 | # Ignore header reprints
17 | /.*Time.*Int.*rKb.*Sat.*/ { next }
18 |
19 | { gsub(/ +/,","); print }
20 |
--------------------------------------------------------------------------------
/misc/bench/bin/smartos/network-collect.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Collect network metrics
4 | #
5 | # This relies on building nicstat from source.
6 | #
7 | # See: http://www.brendangregg.com/K9Toolkit/nicstat.c
8 |
9 | ACTION=$1
10 | output=/tmp/network-collect-raw.txt
11 |
12 | case $ACTION in
13 | start)
14 | pkill nicstat
15 | ~/k9/nicstat 1 > $output
16 | ;;
17 | stop)
18 | pkill nicstat
19 | ;;
20 | output)
21 | echo $output
22 | ;;
23 | esac
24 |
--------------------------------------------------------------------------------
/misc/bench/bin/calc-mean-thru.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Use to calculate the mean throughput for a particular run of basho
4 | # bench.
5 | #
6 | # ./calc-mean-thru.sh
7 | #
8 | # for d in yz-fruit-query-*; do ./calc-mean-thru.sh $d; done
9 |
10 | RESULTS_DIR=$1
11 |
12 | echo -n "Mean ops/s for $RESULTS_DIR: "
13 | # Don't include the header, first 3 results or last result
14 | sed -e '1,4d' -e '$d' $RESULTS_DIR/summary.csv | \
15 | awk -F, '{ secs += $2; ops += $4 } END { printf("%f\n", ops / secs) }'
16 |
17 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-hotel.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Hotel
11 | %%
12 | %% I imagine this is worst case for smart conj. All terms match 100K
13 | %% docs.
14 | %%
15 | %% cardinalities: 100K, 100K, 100K
16 | {duration, }.
17 | {operations, [{{search, "strawberry AND kiwi AND orange", "id"}, 1}]}.
18 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-delta.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Delta
11 | %%
12 | %% This is a good case for smart conj b/c it has some largeish matches
13 | %% but one matches only 10
14 | %%
15 | %% cardinalities: 10K, 100, 10
16 | {duration, }.
17 | {operations, [{{search, "avocado AND nutmeg AND nance", "id"}, 1}]}.
18 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-golf.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Golf
11 | %%
12 | %% This is kind of a mix between bad/good for smart conj. 1k doc ids
13 | %% will have to be copied over.
14 | %%
15 | %% cardinalities: 1K, 10K, 100K
16 | {duration, }.
17 | {operations, [{{search, "lime AND raspberry AND grape", "id"}, 1}]}.
18 |
--------------------------------------------------------------------------------
/test/test.json:
--------------------------------------------------------------------------------
1 | {"name":"ryan",
2 | "age":29,
3 | "pets":["smokey", "bandit"],
4 | "books":[
5 | {"title":"Introduction to Information Retrieval",
6 | "authors":["Christopher D. Manning",
7 | "Prabhakar Raghavan",
8 | "Hinrich Schütze"]},
9 | {"title":"Principles of Distributed Database Systems",
10 | "authors":["M. Tamer Özsu", "Patrick Valduriez"]}
11 | ],
12 | "type":null,
13 | "alive":true,
14 | "married":false,
15 | "a_number":1.1e6,
16 | "lucky_numbers":[13,17,21],
17 | "misc":{},
18 | "kids":[]
19 | }
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | deps
3 | doc
4 | ebin
5 | priv/java_lib
6 | priv/conf/lang
7 | priv/conf/*.txt
8 | priv/solr/etc/create-solrtest.keystore.sh
9 | priv/solr/etc/webdefault.xml
10 | priv/solr/lib
11 | priv/solr/resources
12 | priv/solr/start.jar
13 | priv/solr/webapps
14 | priv/solr/solr-webapp
15 | build/
16 | *.class
17 | *.tgz
18 | .rebar/*
19 | riak_test/ebin
20 | tests/20*
21 | tests/current
22 | .eunit
23 | log
24 | bb-*-fruit*
25 | .local_dialyzer_plt
26 | .yokozuna_test_dialyzer_plt
27 | dialyzer_warnings
28 | dialyzer_unhandled_warnings
29 | /.eqc-info
30 | /current_counterexample.eqc
31 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-india.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% India
11 | %%
12 | %% This case is meant to check for overhead in smart conj and just
13 | %% test out querying many terms.
14 | %%
15 | %% cardinalities: 1, 10, 1, 100, 10
16 | {duration, }.
17 | {operations, [{{search, "korlan AND nunga AND genip AND nutmeg AND kumquat", "id"}, 1}]}.
18 |
--------------------------------------------------------------------------------
/priv/solr/contexts/solr-jetty-context.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | /webapps/solr.war
6 | /etc/webdefault.xml
7 | /solr-webapp
8 |
9 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-echo.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Echo
11 | %%
12 | %% Another good case, this one starts to potentially even out between
13 | %% old/new conj since all terms match small number of postings.
14 | %%
15 | %% cardinalities: 10, 1K, 10
16 | {duration, }.
17 | {operations, [{{search, "mulberry AND clementine AND peanut", "id"}, 1}]}.
18 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-charlie.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Charlie
11 | %%
12 | %% This is another great case for smart conj b/c all but one term
13 | %% matches 100K docs.
14 | %%
15 | %% cardinalities: 100K, 100K, 1, 100K, 100K, 100K
16 | {duration, }.
17 | {operations, [{{search, "apple AND grape AND elderberry AND orange AND pineapple AND strawberry", "id"}, 1}]}.
18 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-foxtrot.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Foxtrot
11 | %%
12 | %% This is potentially a bad case for smart conj since all terms match
13 | %% same number of docs and smart conj is sequential rather than parallel.
14 | %%
15 | %% cardinalities: 10K, 10K, 10K
16 | {duration, }.
17 | {operations, [{{search, "persimmon AND cherry AND tomato", "id"}, 1}]}.
18 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-beta.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Beta
11 | %%
12 | %% Another best case for smart conj, but unlike last one there is a
13 | %% total of 1 doc that matches so it must be streamed thru the other 2
14 | %% 100K matches.
15 | %%
16 | %% cardinalities: 100K, 100K, 1
17 | {duration, }.
18 | {operations, [{{search, "apple AND orange AND jujube", "id"}, 1}]}.
19 |
--------------------------------------------------------------------------------
/misc/bench/bin/calc-med-latency.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Use to calculate the median latency for a particular run of basho
4 | # bench.
5 | #
6 | # ./calc-median-latency.sh
7 | #
8 | # for d in yz-fruit-query-*; do ./calc-median-latency.sh $d; done
9 |
10 | RESULTS_DIR=$1
11 |
12 | for lat in $RESULTS_DIR/*latencies*
13 | do
14 | # taking average of all the medians, divide by 1000 to convert
15 | # from microseconds to milli
16 | echo -n "The mean median latency for $lat: "
17 | sed -e '1,4d' -e '$d' $lat | \
18 | awk -F, '{total += $6 } END { printf("%f\n", (total / NR) / 1000) }'
19 | done
20 |
--------------------------------------------------------------------------------
/test/yz_component_tests.erl:
--------------------------------------------------------------------------------
1 | -module(yz_component_tests).
2 | -compile(export_all).
3 |
4 | -include("yokozuna.hrl").
5 | -include_lib("eunit/include/eunit.hrl").
6 |
7 | disable_index_test()->
8 | yokozuna:disable(index),
9 | ?assertEqual(yz_kv:index({riak_object:new({<<"type">>, <<"bucket">>}, <<"key">>, <<"value">>), no_old_object}, delete, {}), ok).
10 |
11 | disable_search_test()->
12 | yokozuna:disable(search),
13 | {Available, _, _} = yz_wm_search:service_available({},{}),
14 | ?assertEqual(Available, false),
15 | Resp = yz_pb_search:process(ignore, ignore),
16 | ?assertEqual({error, "Search component disabled", ignore}, Resp).
17 |
--------------------------------------------------------------------------------
/src/yz_index_hashtree_sup.erl:
--------------------------------------------------------------------------------
1 | -module(yz_index_hashtree_sup).
2 | -behavior(supervisor).
3 | -include("yokozuna.hrl").
4 | -compile(export_all).
5 | -export([init/1]).
6 |
7 | %% @doc Get the list of trees.
8 | -spec trees() -> ['restarting' | 'undefined' | pid()].
9 | trees() ->
10 | Children = supervisor:which_children(?MODULE),
11 | [Pid || {_,Pid,_,_} <- Children].
12 |
13 | start_link() ->
14 | supervisor:start_link({local, ?MODULE}, ?MODULE, []).
15 |
16 | init(_Args) ->
17 | Spec = {ignored,
18 | {yz_index_hashtree, start_link, []},
19 | temporary, 5000, worker, [yz_index_hashtree]},
20 | {ok, {{simple_one_for_one, 10, 1}, [Spec]}}.
21 |
--------------------------------------------------------------------------------
/misc/bench/bin/calc-95-latency.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Use to calculate the 95th latency for a particular run of basho
4 | # bench.
5 | #
6 | # ./calc-95-latency.sh
7 | #
8 | # for d in query-*; do ./calc-95-latency.sh $d; done
9 |
10 | RESULTS_DIR=$1
11 |
12 | for lat in $RESULTS_DIR/*latencies*
13 | do
14 | # taking average of all 95th percentiles, divide by 1000 to
15 | # convert from microseconds to milli
16 | #
17 | # drop first 30 seconds and last 10 seconds to remove outliers
18 | echo -n "The mean 95th latency for $lat: "
19 | sed -e '1,4d' -e '$d' $lat | \
20 | awk -F, '{total += $7 } END { printf("%f\n", (total / NR) / 1000) }'
21 | done
22 |
--------------------------------------------------------------------------------
/misc/bench/bin/calc-99-latency.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Use to calculate the 99th percentile latency for a particular run of
4 | # basho bench.
5 | #
6 | # ./calc-99-latency.sh
7 | #
8 | # for d in bench-runs-dir/*; do ./calc-99-latency.sh $d; done
9 |
10 | RESULTS_DIR=$1
11 |
12 | for lat in $RESULTS_DIR/*latencies*
13 | do
14 | # taking average of all the 99th percentiles, divide by 1000 to
15 | # convert from microseconds to milli
16 | #
17 | # drop first 30 seconds and last line to remove outliers
18 | echo -n "The mean 99th latency for $lat: "
19 | sed -e '1,4d' -e '$d' $lat | \
20 | awk -F, '{total += $8 } END { printf("%f\n", (total / NR) / 1000) }'
21 | done
22 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-alpha.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Alpha
11 | %%
12 | %% This query is one of the best cases for the smarter conjunction b/c
13 | %% one of the queries matches 0 terms and two others match 100K docs.
14 | %% This means the old conjunction will do all the work of iterating
15 | %% 200K docs where the smart one will do 0.
16 | %%
17 | %% cardinalities: 100K, 100K, 0
18 | {duration, }.
19 | {operations, [{{search, "pineapple AND grape AND notafruit", "id"}, 1}]}.
20 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-juliet.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Juliet
11 | %%
12 | %% This case is meant to check for _any_ overhead imposed by smart
13 | %% conj. The thinking is that if all terms match a small number of
14 | %% docs then parallel will be better than sequential. If smart conj
15 | %% can break even here and on the worst case then that is really good.
16 | %%
17 | %% cardinalities: 1, 1
18 | {duration, }.
19 | {operations, [{{search, "citron AND jocote", "id"}, 1}]}.
20 |
21 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/query-kilo.config:
--------------------------------------------------------------------------------
1 | {mode, max}.
2 | {concurrent, }.
3 | {driver, yz_driver}.
4 | {code_paths, ["/misc/bench"]}.
5 | {index_path, "/riak/fruit"}.
6 | {search_path, "/search/fruit"}.
7 | {http_conns, }.
8 | {pb_conns, []}.
9 |
10 | %% Kilo
11 | %%
12 | %% This query is meant to see how much data is read from disk and
13 | %% transferred over the network. By default Riak Search will bomb on
14 | %% this because of it's default max results of 100k. This should show
15 | %% the stark contrast between Riak Search and Yokozuna when it comes
16 | %% to queries for common terms.
17 | %%
18 | %% cardinalities: 1M
19 | {duration, }.
20 | {operations, [{{search, "apricot", "id"}, 1}]}.
21 |
--------------------------------------------------------------------------------
/misc/bench/bin/plot-3-dimensions.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # ./plot-3-dimensions.sh
4 |
5 | TITLE=$1; shift
6 | YLABEL=$1; shift
7 | DAT=$1; shift
8 | out=${DAT%.dat}
9 |
10 | gnuplot < 0) {
10 | if (tmp ~ /^device.*/) {
11 | # remove the trailing comma
12 | sub(/%b,/, "%b", tmp)
13 | printf "timestamp,%s\n", tmp
14 | } else if (tmp ~ ts_regex) {
15 | ts=tmp
16 | break
17 | }
18 | }
19 | }
20 |
21 | /^extended.*/ { next }
22 |
23 | /^device.*/ { next }
24 |
25 | $0 ~ ts_regex {
26 | ts=$0
27 | next
28 | }
29 |
30 | { printf "%s,%s\n", strftime("%FT%T", ts), $0 }
31 |
32 |
--------------------------------------------------------------------------------
/misc/bench/bin/smartos/virtual-memory-transform.awk:
--------------------------------------------------------------------------------
1 | # Need to skip the first N lines which contain stats since boot. Look
2 | # for 1st occurance of timestamp after the first line. Ignore
3 | # everything before that.
4 | BEGIN {
5 | # ignore first line with this getline call
6 | getline
7 |
8 | ts_regex = "^[[:digit:]]+$"
9 | while ((getline tmp) > 0) {
10 | if (tmp ~ /.*r b w.*/) {
11 | # First remove leading spaces from header
12 | sub(/ +/, "", tmp)
13 | # Next convert spaces to commas
14 | gsub(/ +/, ",", tmp)
15 | printf "timestamp,%s\n", tmp
16 | } else if (tmp ~ ts_regex) {
17 | ts=tmp
18 | break
19 | }
20 | }
21 | }
22 |
23 | /kthr.*memory.*/ { next }
24 |
25 | /.*r b w.*/ { next }
26 |
27 | $0 ~ ts_regex {
28 | ts=$0
29 | next
30 | }
31 |
32 | {
33 | printf "%s", strftime("%FT%T", ts)
34 | gsub(/ +/,",")
35 | print
36 | }
37 |
38 |
--------------------------------------------------------------------------------
/misc/bench/cfgs/fruit/load.config:
--------------------------------------------------------------------------------
1 | %% Load data for the "fruit" benchmark. Up to 10M keys may be
2 | %% generated, but no larger. The fruit data is used to test the
3 | %% performance of boolean queries.
4 | {mode, max}.
5 | {concurrent, }.
6 | {driver, yz_driver}.
7 | {code_paths, ["/misc/bench"]}.
8 | {index_path, "/riak/fruit"}.
9 | {search_path, "/search/fruit"}.
10 | {http_conns, }.
11 |
12 | %% example
13 | %% {http_conns, [{"10.0.1.80", 8098},
14 | %% {"10.0.1.81", 8098},
15 | %% {"10.0.1.82", 8098},
16 | %% {"10.0.1.83", 8098},
17 | %% {"10.0.1.84", 8098}]}.
18 |
19 | %% This is needed to keep the driver from erroring.
20 | {pb_conns, []}.
21 |
22 | %% The following keygen/ops will load the data. Run this first and
23 | %% then execute the query benchmarks
24 | {duration, infinity}.
25 | {key_generator, {function, yz_driver, fruit_key_val_gen, [1000000]}}.
26 | {operations, [{load_fruit, 1}]}.
27 |
--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {cover_enabled, true}.
2 | {erl_opts, [warnings_as_errors,
3 | debug_info,
4 | {platform_define, "^[0-9]+", namespaced_types},
5 | {parse_transform, lager_transform}]}.
6 | {eunit_opts, [verbose]}.
7 |
8 | {xref_checks, []}.
9 | {xref_queries, [{"(XC - UC) || (XU - X - B)", []}]}.
10 |
11 | {deps,
12 | [
13 | {kvc, ".*", {git, "https://github.com/basho/kvc.git", {tag, "v1.5.0"}}},
14 | {riak_kv, ".*", {git, "https://github.com/basho/riak_kv.git", {tag, "2.1.8"}}},
15 | {ibrowse, "4.*", {git, "https://github.com/basho/ibrowse.git", {tag, "v4.3"}}},
16 | {fuse, "2.1.0", {git, "https://github.com/basho/fuse.git", {tag, "v2.1.0"}}},
17 | %% Needed for testing ONLY
18 | {riakc, ".*", {git, "https://github.com/basho/riak-erlang-client", {tag, "2.5.2"}}}
19 | ]}.
20 |
21 | {pre_hooks, [{compile, "./tools/grab-solr.sh"}]}.
22 |
23 | {plugin_dir, ".rebar_plugins"}.
24 | {plugins, [rebar_test_plugin]}.
25 | {riak_test,
26 | [
27 | {test_paths, ["riak_test"]},
28 | {test_output, "riak_test/ebin"}
29 | ]}.
30 |
--------------------------------------------------------------------------------
/misc/bench/bin/smartos/pid-cpu-mem-transform.awk:
--------------------------------------------------------------------------------
1 | # Need to skip the first N lines which contain stats since boot. Look
2 | # for 1st occurance of timestamp after the first line. Ignore
3 | # everything before that.
4 | BEGIN {
5 | # ignore first line with this getline call
6 | getline
7 | ts_regex = "^[[:digit:]]+$"
8 | while ((getline tmp) > 0) {
9 | if (tmp ~ /.*PID.*/) {
10 | # First remove leading spaces from header
11 | sub(/ +/, "", tmp)
12 | # Next remove trailing
13 | sub(/NLWP +/, "NLWP", tmp)
14 | # Next convert spaces to commas
15 | gsub(/ +/, ",", tmp)
16 | printf "timestamp,%s\n", tmp
17 | } else if (tmp ~ ts_regex) {
18 | ts=tmp
19 | break
20 | }
21 | }
22 | }
23 |
24 | /.*PID.*/ { next }
25 |
26 | /.*Total.*/ { next }
27 |
28 | $0 ~ ts_regex {
29 | ts=$0
30 | next
31 | }
32 |
33 | {
34 | printf "%s", strftime("%FT%T", ts)
35 | gsub(/ +/,",")
36 | gsub(/%/, "")
37 | print
38 | }
39 |
40 |
--------------------------------------------------------------------------------
/misc/bench/bin/transform-raw.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #> Usage:
4 | #>
5 | #> ./transform-raw.sh
48 |
49 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
64 |
65 |
66 |
67 |