├── solr-patches ├── use_http_retries-4.7.0.patch └── no-stale-check-4.10.4.patch ├── rebar ├── priv ├── conf │ └── _rest_managed.json ├── template_solr.xml └── solr │ └── contexts │ └── solr-jetty-context.xml ├── docs ├── yz-batching-overview.png ├── yz-batching-worker.png ├── TAGGING.md ├── TESTING.md ├── RESOURCES.md ├── Q-AND-A.md └── ADMIN.md ├── misc └── bench │ ├── src │ └── bench.app.src │ ├── rebar.config │ ├── bin │ ├── gen-bb-plots.sh │ ├── smartos │ │ ├── disk-collect.sh │ │ ├── virtual-memory-collect.sh │ │ ├── pid-cpu-mem-collect.sh │ │ ├── network-transform.awk │ │ ├── network-collect.sh │ │ ├── disk-transform.awk │ │ ├── virtual-memory-transform.awk │ │ ├── pid-cpu-mem-transform.awk │ │ ├── js │ │ │ ├── throughput.js │ │ │ ├── cpu.js │ │ │ ├── network.js │ │ │ ├── disk.js │ │ │ └── latency.js │ │ └── visualize.html │ ├── calc-mean-thru.sh │ ├── calc-med-latency.sh │ ├── calc-95-latency.sh │ ├── calc-99-latency.sh │ ├── plot-3-dimensions.sh │ ├── transform-raw.sh │ ├── run-bench.sh │ └── make-vis.sh │ ├── cfgs │ └── fruit │ │ ├── query-hotel.config │ │ ├── query-delta.config │ │ ├── query-golf.config │ │ ├── query-india.config │ │ ├── query-echo.config │ │ ├── query-charlie.config │ │ ├── query-foxtrot.config │ │ ├── query-beta.config │ │ ├── query-alpha.config │ │ ├── query-juliet.config │ │ ├── query-kilo.config │ │ └── load.config │ └── schemas │ └── fruit_schema.xml ├── .thumbs.yml ├── riak_test ├── intercepts │ ├── intercept.hrl │ ├── yz_solr_intercepts.erl │ ├── yz_noop_extractor_intercepts.erl │ ├── yz_solrq_drain_mgr_intercepts.erl │ ├── yz_solrq_drain_fsm_intercepts.erl │ └── yz_solrq_helper_intercepts.erl ├── yz_monitor_solr.erl ├── yz_handoff_blocking.erl ├── yz_wm_extract_test.erl ├── yz_solr_start_timeout.erl ├── yz_fuse_upgrade.erl ├── yz_test_listener.erl ├── yz_fallback.erl ├── yz_default_bucket_type_upgrade.erl ├── yz_errors.erl ├── yz_languages.erl ├── yz_ensemble.erl ├── yz_search_http.erl └── yz_ring_resizing.erl ├── rel_etc └── solr-log4j.properties ├── .travis.yml ├── src ├── yokozuna.app.src ├── yz_index_hashtree_sup.erl ├── yz_text_extractor.erl ├── yz_noop_extractor.erl ├── yz_fuse_stats_sidejob.erl ├── yz_stat_worker.erl ├── yz_solr_sup.erl ├── yz_general_sup.erl ├── yz_sup.erl ├── yz_solrq_queue_pair_sup.erl ├── rt_intercept_pt.erl ├── yz_console.erl ├── yz_entropy.erl ├── yz_bucket_validator.erl ├── yz_json_extractor.erl └── yz_rs_migration.erl ├── test ├── test.json ├── yz_component_tests.erl ├── yz_test.hrl ├── utf8.txt ├── utf8.json ├── yz_pulseh.erl ├── yz_text_extractor_tests.erl ├── utf8.xml ├── yz_misc_tests.erl ├── yz_solrq_eqc_fuse.erl ├── yz_dt_extractor_tests.erl └── yz_xml_extractor_tests.erl ├── .gitignore ├── README.md ├── rebar.config ├── java_src └── com │ └── basho │ └── yokozuna │ ├── query │ └── SimpleQueryExample.java │ ├── monitor │ └── Monitor.java │ └── handler │ └── component │ └── FQShardTranslator.java ├── tools ├── src-pkg.sh ├── build-jar.sh ├── build-solr.sh └── grab-solr.sh ├── .travis.sh ├── Makefile └── .rebar_plugins └── rebar_test_plugin.erl /solr-patches/use_http_retries-4.7.0.patch: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basho/yokozuna/HEAD/rebar -------------------------------------------------------------------------------- /priv/conf/_rest_managed.json: -------------------------------------------------------------------------------- 1 | {"initArgs":{},"managedList":[]} 2 | -------------------------------------------------------------------------------- /docs/yz-batching-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basho/yokozuna/HEAD/docs/yz-batching-overview.png -------------------------------------------------------------------------------- /docs/yz-batching-worker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basho/yokozuna/HEAD/docs/yz-batching-worker.png -------------------------------------------------------------------------------- /misc/bench/src/bench.app.src: -------------------------------------------------------------------------------- 1 | {application, bench, 2 | [ 3 | {description, "This is just here to make rebar happy."}, 4 | {vsn, "0.0.0"} 5 | ]}. 6 | -------------------------------------------------------------------------------- /.thumbs.yml: -------------------------------------------------------------------------------- 1 | minimum_reviewers: 2 2 | build_steps: 3 | - make clean 4 | - make test 5 | - make xref 6 | - make dialyzer 7 | merge: false 8 | org_mode: true 9 | timeout: 1790 10 | -------------------------------------------------------------------------------- /misc/bench/rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [debug_info, 2 | {parse_transform, lager_transform}]}. 3 | 4 | {deps, 5 | [ 6 | {basho_bench, ".*", 7 | {git, "git://github.com/basho/basho_bench", {branch, "master"}}} 8 | ]}. 9 | -------------------------------------------------------------------------------- /riak_test/intercepts/intercept.hrl: -------------------------------------------------------------------------------- 1 | %% Copied from riak_test 2 | -define(I_TAG(S), "INTERCEPT: " ++ S). 3 | -define(I_INFO(Msg), error_logger:info_msg(?I_TAG(Msg))). 4 | -define(I_INFO(Msg, Args), error_logger:info_msg(?I_TAG(Msg), Args)). 5 | -------------------------------------------------------------------------------- /priv/template_solr.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | ${socketTimeout:0} 7 | ${connTimeout:0} 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /misc/bench/bin/gen-bb-plots.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Generate the BB summary plots for a set of runs. 4 | # 5 | # ./gen-bb-plots.sh 6 | 7 | BB_DIR=$1; shift 8 | DIR=$1; shift 9 | 10 | for d in $DIR/*; do 11 | name=$(basename $d) 12 | out_file=$d/$name.png 13 | echo "generating summary $out_file" 14 | $BB_DIR/priv/summary.r -i $d -o $d/$name.png 15 | done 16 | -------------------------------------------------------------------------------- /rel_etc/solr-log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=WARN, rotate 2 | 3 | log4j.appender.rotate=org.apache.log4j.RollingFileAppender 4 | log4j.appender.rotate.File={{platform_log_dir}}/solr.log 5 | log4j.appender.rotate.MaxFileSize=10MB 6 | log4j.appender.rotate.MaxBackupIndex=5 7 | log4j.appender.rotate.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.rotate.layout.ConversionPattern=%d [%p] <%t>@%F:%L %m%n 9 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/disk-collect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Collect disk metrics. 4 | 5 | ACTION=$1 6 | 7 | output=/tmp/disk-collect-raw.txt 8 | 9 | case $ACTION in 10 | start) 11 | pkill iostat 12 | iostat -rsxTu cmdk0 cmdk1 1 > $output 13 | ;; 14 | stop) 15 | pkill iostat 16 | ;; 17 | output) 18 | echo $output 19 | ;; 20 | esac 21 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | otp_release: 3 | - R16B03 4 | cache: 5 | directories: 6 | - "$HOME/otp-basho" 7 | before_script: 8 | - ./.travis.sh build 9 | script: 10 | - ./.travis.sh test 11 | notifications: 12 | slack: 13 | secure: EL4ZCavW6oLbhgKc/bcjO4zoccYx/XrjiXvcki3S7UGUtVm+qT5rR3AfBGMkHmssfj4dn0u4xgfS67kvro/RMHrkrGx4Vqulnnd+57wixon/lGAMy527OoUvNU3rz6ZQGrb8LvEgRGCGARoW6ed6K9zccJAhB9vg2FbDsm5XzkY= 14 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/virtual-memory-collect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Collect virtual memory statistics. 4 | 5 | ACTION=$1 6 | output=/tmp/virtual-memory-collect-raw.txt 7 | 8 | case $ACTION in 9 | start) 10 | pkill vmstat 11 | vmstat -Tu 1 > $output 12 | ;; 13 | stop) 14 | pkill vmstat 15 | ;; 16 | output) 17 | echo $output 18 | ;; 19 | esac 20 | -------------------------------------------------------------------------------- /src/yokozuna.app.src: -------------------------------------------------------------------------------- 1 | %% -*- erlang -*- 2 | {application, yokozuna, 3 | [ 4 | {description, "Integrating Apache Solr into Riak"}, 5 | {vsn, git}, 6 | {registered, []}, 7 | {applications, [ 8 | kernel, 9 | stdlib, 10 | ibrowse, 11 | fuse, 12 | riak_core, 13 | riak_kv 14 | ]}, 15 | {mod, { yz_app, []}}, 16 | {env, []} 17 | ]}. 18 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/pid-cpu-mem-collect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Collect CPU and MEM metrics for specific processes. 4 | 5 | ACTION=$1; shift 6 | output=/tmp/pid-cpu-mem-collect-raw.txt 7 | 8 | case $ACTION in 9 | start) 10 | pkill prstat 11 | prstat -du -p "$(pgrep -d, -f 'beam|^[^ ]*java')" 1 > $output 12 | ;; 13 | stop) 14 | pkill prstat 15 | ;; 16 | output) 17 | echo $output 18 | ;; 19 | esac 20 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/network-transform.awk: -------------------------------------------------------------------------------- 1 | # Format header 2 | NR == 1 { 3 | # First remove leading space 4 | sub(/ +/, "") 5 | # Next convert space to commas 6 | gsub(/ +/, ",") 7 | # Convert 'Time' header to 'timestamp' 8 | sub(/Time/, "timestamp") 9 | print 10 | next 11 | } 12 | 13 | # Ignore first record 14 | NR == 2 { next }; 15 | 16 | # Ignore header reprints 17 | /.*Time.*Int.*rKb.*Sat.*/ { next } 18 | 19 | { gsub(/ +/,","); print } 20 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/network-collect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Collect network metrics 4 | # 5 | # This relies on building nicstat from source. 6 | # 7 | # See: http://www.brendangregg.com/K9Toolkit/nicstat.c 8 | 9 | ACTION=$1 10 | output=/tmp/network-collect-raw.txt 11 | 12 | case $ACTION in 13 | start) 14 | pkill nicstat 15 | ~/k9/nicstat 1 > $output 16 | ;; 17 | stop) 18 | pkill nicstat 19 | ;; 20 | output) 21 | echo $output 22 | ;; 23 | esac 24 | -------------------------------------------------------------------------------- /misc/bench/bin/calc-mean-thru.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Use to calculate the mean throughput for a particular run of basho 4 | # bench. 5 | # 6 | # ./calc-mean-thru.sh 7 | # 8 | # for d in yz-fruit-query-*; do ./calc-mean-thru.sh $d; done 9 | 10 | RESULTS_DIR=$1 11 | 12 | echo -n "Mean ops/s for $RESULTS_DIR: " 13 | # Don't include the header, first 3 results or last result 14 | sed -e '1,4d' -e '$d' $RESULTS_DIR/summary.csv | \ 15 | awk -F, '{ secs += $2; ops += $4 } END { printf("%f\n", ops / secs) }' 16 | 17 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-hotel.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Hotel 11 | %% 12 | %% I imagine this is worst case for smart conj. All terms match 100K 13 | %% docs. 14 | %% 15 | %% cardinalities: 100K, 100K, 100K 16 | {duration, }. 17 | {operations, [{{search, "strawberry AND kiwi AND orange", "id"}, 1}]}. 18 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-delta.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Delta 11 | %% 12 | %% This is a good case for smart conj b/c it has some largeish matches 13 | %% but one matches only 10 14 | %% 15 | %% cardinalities: 10K, 100, 10 16 | {duration, }. 17 | {operations, [{{search, "avocado AND nutmeg AND nance", "id"}, 1}]}. 18 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-golf.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Golf 11 | %% 12 | %% This is kind of a mix between bad/good for smart conj. 1k doc ids 13 | %% will have to be copied over. 14 | %% 15 | %% cardinalities: 1K, 10K, 100K 16 | {duration, }. 17 | {operations, [{{search, "lime AND raspberry AND grape", "id"}, 1}]}. 18 | -------------------------------------------------------------------------------- /test/test.json: -------------------------------------------------------------------------------- 1 | {"name":"ryan", 2 | "age":29, 3 | "pets":["smokey", "bandit"], 4 | "books":[ 5 | {"title":"Introduction to Information Retrieval", 6 | "authors":["Christopher D. Manning", 7 | "Prabhakar Raghavan", 8 | "Hinrich Schütze"]}, 9 | {"title":"Principles of Distributed Database Systems", 10 | "authors":["M. Tamer Özsu", "Patrick Valduriez"]} 11 | ], 12 | "type":null, 13 | "alive":true, 14 | "married":false, 15 | "a_number":1.1e6, 16 | "lucky_numbers":[13,17,21], 17 | "misc":{}, 18 | "kids":[] 19 | } 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | deps 3 | doc 4 | ebin 5 | priv/java_lib 6 | priv/conf/lang 7 | priv/conf/*.txt 8 | priv/solr/etc/create-solrtest.keystore.sh 9 | priv/solr/etc/webdefault.xml 10 | priv/solr/lib 11 | priv/solr/resources 12 | priv/solr/start.jar 13 | priv/solr/webapps 14 | priv/solr/solr-webapp 15 | build/ 16 | *.class 17 | *.tgz 18 | .rebar/* 19 | riak_test/ebin 20 | tests/20* 21 | tests/current 22 | .eunit 23 | log 24 | bb-*-fruit* 25 | .local_dialyzer_plt 26 | .yokozuna_test_dialyzer_plt 27 | dialyzer_warnings 28 | dialyzer_unhandled_warnings 29 | /.eqc-info 30 | /current_counterexample.eqc 31 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-india.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% India 11 | %% 12 | %% This case is meant to check for overhead in smart conj and just 13 | %% test out querying many terms. 14 | %% 15 | %% cardinalities: 1, 10, 1, 100, 10 16 | {duration, }. 17 | {operations, [{{search, "korlan AND nunga AND genip AND nutmeg AND kumquat", "id"}, 1}]}. 18 | -------------------------------------------------------------------------------- /priv/solr/contexts/solr-jetty-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | /webapps/solr.war 6 | /etc/webdefault.xml 7 | /solr-webapp 8 | 9 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-echo.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Echo 11 | %% 12 | %% Another good case, this one starts to potentially even out between 13 | %% old/new conj since all terms match small number of postings. 14 | %% 15 | %% cardinalities: 10, 1K, 10 16 | {duration, }. 17 | {operations, [{{search, "mulberry AND clementine AND peanut", "id"}, 1}]}. 18 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-charlie.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Charlie 11 | %% 12 | %% This is another great case for smart conj b/c all but one term 13 | %% matches 100K docs. 14 | %% 15 | %% cardinalities: 100K, 100K, 1, 100K, 100K, 100K 16 | {duration, }. 17 | {operations, [{{search, "apple AND grape AND elderberry AND orange AND pineapple AND strawberry", "id"}, 1}]}. 18 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-foxtrot.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Foxtrot 11 | %% 12 | %% This is potentially a bad case for smart conj since all terms match 13 | %% same number of docs and smart conj is sequential rather than parallel. 14 | %% 15 | %% cardinalities: 10K, 10K, 10K 16 | {duration, }. 17 | {operations, [{{search, "persimmon AND cherry AND tomato", "id"}, 1}]}. 18 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-beta.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Beta 11 | %% 12 | %% Another best case for smart conj, but unlike last one there is a 13 | %% total of 1 doc that matches so it must be streamed thru the other 2 14 | %% 100K matches. 15 | %% 16 | %% cardinalities: 100K, 100K, 1 17 | {duration, }. 18 | {operations, [{{search, "apple AND orange AND jujube", "id"}, 1}]}. 19 | -------------------------------------------------------------------------------- /misc/bench/bin/calc-med-latency.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Use to calculate the median latency for a particular run of basho 4 | # bench. 5 | # 6 | # ./calc-median-latency.sh 7 | # 8 | # for d in yz-fruit-query-*; do ./calc-median-latency.sh $d; done 9 | 10 | RESULTS_DIR=$1 11 | 12 | for lat in $RESULTS_DIR/*latencies* 13 | do 14 | # taking average of all the medians, divide by 1000 to convert 15 | # from microseconds to milli 16 | echo -n "The mean median latency for $lat: " 17 | sed -e '1,4d' -e '$d' $lat | \ 18 | awk -F, '{total += $6 } END { printf("%f\n", (total / NR) / 1000) }' 19 | done 20 | -------------------------------------------------------------------------------- /test/yz_component_tests.erl: -------------------------------------------------------------------------------- 1 | -module(yz_component_tests). 2 | -compile(export_all). 3 | 4 | -include("yokozuna.hrl"). 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | disable_index_test()-> 8 | yokozuna:disable(index), 9 | ?assertEqual(yz_kv:index({riak_object:new({<<"type">>, <<"bucket">>}, <<"key">>, <<"value">>), no_old_object}, delete, {}), ok). 10 | 11 | disable_search_test()-> 12 | yokozuna:disable(search), 13 | {Available, _, _} = yz_wm_search:service_available({},{}), 14 | ?assertEqual(Available, false), 15 | Resp = yz_pb_search:process(ignore, ignore), 16 | ?assertEqual({error, "Search component disabled", ignore}, Resp). 17 | -------------------------------------------------------------------------------- /src/yz_index_hashtree_sup.erl: -------------------------------------------------------------------------------- 1 | -module(yz_index_hashtree_sup). 2 | -behavior(supervisor). 3 | -include("yokozuna.hrl"). 4 | -compile(export_all). 5 | -export([init/1]). 6 | 7 | %% @doc Get the list of trees. 8 | -spec trees() -> ['restarting' | 'undefined' | pid()]. 9 | trees() -> 10 | Children = supervisor:which_children(?MODULE), 11 | [Pid || {_,Pid,_,_} <- Children]. 12 | 13 | start_link() -> 14 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 15 | 16 | init(_Args) -> 17 | Spec = {ignored, 18 | {yz_index_hashtree, start_link, []}, 19 | temporary, 5000, worker, [yz_index_hashtree]}, 20 | {ok, {{simple_one_for_one, 10, 1}, [Spec]}}. 21 | -------------------------------------------------------------------------------- /misc/bench/bin/calc-95-latency.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Use to calculate the 95th latency for a particular run of basho 4 | # bench. 5 | # 6 | # ./calc-95-latency.sh 7 | # 8 | # for d in query-*; do ./calc-95-latency.sh $d; done 9 | 10 | RESULTS_DIR=$1 11 | 12 | for lat in $RESULTS_DIR/*latencies* 13 | do 14 | # taking average of all 95th percentiles, divide by 1000 to 15 | # convert from microseconds to milli 16 | # 17 | # drop first 30 seconds and last 10 seconds to remove outliers 18 | echo -n "The mean 95th latency for $lat: " 19 | sed -e '1,4d' -e '$d' $lat | \ 20 | awk -F, '{total += $7 } END { printf("%f\n", (total / NR) / 1000) }' 21 | done 22 | -------------------------------------------------------------------------------- /misc/bench/bin/calc-99-latency.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Use to calculate the 99th percentile latency for a particular run of 4 | # basho bench. 5 | # 6 | # ./calc-99-latency.sh 7 | # 8 | # for d in bench-runs-dir/*; do ./calc-99-latency.sh $d; done 9 | 10 | RESULTS_DIR=$1 11 | 12 | for lat in $RESULTS_DIR/*latencies* 13 | do 14 | # taking average of all the 99th percentiles, divide by 1000 to 15 | # convert from microseconds to milli 16 | # 17 | # drop first 30 seconds and last line to remove outliers 18 | echo -n "The mean 99th latency for $lat: " 19 | sed -e '1,4d' -e '$d' $lat | \ 20 | awk -F, '{total += $8 } END { printf("%f\n", (total / NR) / 1000) }' 21 | done 22 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-alpha.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Alpha 11 | %% 12 | %% This query is one of the best cases for the smarter conjunction b/c 13 | %% one of the queries matches 0 terms and two others match 100K docs. 14 | %% This means the old conjunction will do all the work of iterating 15 | %% 200K docs where the smart one will do 0. 16 | %% 17 | %% cardinalities: 100K, 100K, 0 18 | {duration, }. 19 | {operations, [{{search, "pineapple AND grape AND notafruit", "id"}, 1}]}. 20 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-juliet.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Juliet 11 | %% 12 | %% This case is meant to check for _any_ overhead imposed by smart 13 | %% conj. The thinking is that if all terms match a small number of 14 | %% docs then parallel will be better than sequential. If smart conj 15 | %% can break even here and on the worst case then that is really good. 16 | %% 17 | %% cardinalities: 1, 1 18 | {duration, }. 19 | {operations, [{{search, "citron AND jocote", "id"}, 1}]}. 20 | 21 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/query-kilo.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | {concurrent, }. 3 | {driver, yz_driver}. 4 | {code_paths, ["/misc/bench"]}. 5 | {index_path, "/riak/fruit"}. 6 | {search_path, "/search/fruit"}. 7 | {http_conns, }. 8 | {pb_conns, []}. 9 | 10 | %% Kilo 11 | %% 12 | %% This query is meant to see how much data is read from disk and 13 | %% transferred over the network. By default Riak Search will bomb on 14 | %% this because of it's default max results of 100k. This should show 15 | %% the stark contrast between Riak Search and Yokozuna when it comes 16 | %% to queries for common terms. 17 | %% 18 | %% cardinalities: 1M 19 | {duration, }. 20 | {operations, [{{search, "apricot", "id"}, 1}]}. 21 | -------------------------------------------------------------------------------- /misc/bench/bin/plot-3-dimensions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # ./plot-3-dimensions.sh <y-label> <dat file> 4 | 5 | TITLE=$1; shift 6 | YLABEL=$1; shift 7 | DAT=$1; shift 8 | out=${DAT%.dat} 9 | 10 | gnuplot <<EOF 11 | 12 | reset 13 | set title "$TITLE" 14 | 15 | set terminal svg font "monospace" 16 | set output '$out.svg' 17 | 18 | set xtics rotate by -45 offset character -1, -0.5 19 | set ylabel "$YLABEL" 20 | 21 | set border linewidth 2 22 | set style line 1 lc rgb '#88BB44' lt 1 lw 2 pt 7 pi -1 ps 1.5 23 | set style line 2 lc rgb '#FF5544' lt 1 lw 2 pt 7 pi -1 ps 1.5 24 | set pointintervalbox 2 25 | 26 | set key below autotitle columnhead width 2 27 | 28 | set style data linespoints 29 | plot '$DAT' using 2:xtic(1) ls 1, '' u 3 ls 2 30 | EOF 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Yokozuna 2 | ========== 3 | 4 | _Yokozuna_ - Horizontal rope. The top rank in sumo, usually 5 | translated _Grand Champion_. The name comes from the rope a yokozuna 6 | wears. 7 | 8 | Yokozuna is the new implementation of Riak Search built atop Apache Solr. 9 | Download [Riak 2.0][downloads] to try Yokozuna. See the [official 10 | documentation][search-docs] for more information. 11 | 12 | [downloads]: http://docs.basho.com/riak/latest/downloads/ 13 | [search-docs]: http://docs.basho.com/riak/latest/dev/using/search/ 14 | 15 | Build Status 16 | ============ 17 | 18 | * Master: [![Build Status](https://travis-ci.org/basho/yokozuna.svg?branch=master)](https://travis-ci.org/basho/yokozuna) 19 | * Develop: [![Build Status](https://travis-ci.org/basho/yokozuna.svg?branch=develop)](https://travis-ci.org/basho/yokozuna) 20 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/disk-transform.awk: -------------------------------------------------------------------------------- 1 | # Need to skip the first N lines which contain stats since boot. Look 2 | # for 1st occurance of date string after the first line. Ignore 3 | # everything before that. 4 | BEGIN { 5 | # ignore first line with this getline call 6 | getline 7 | 8 | ts_regex = "^[[:digit:]]+$" 9 | while ((getline tmp) > 0) { 10 | if (tmp ~ /^device.*/) { 11 | # remove the trailing comma 12 | sub(/%b,/, "%b", tmp) 13 | printf "timestamp,%s\n", tmp 14 | } else if (tmp ~ ts_regex) { 15 | ts=tmp 16 | break 17 | } 18 | } 19 | } 20 | 21 | /^extended.*/ { next } 22 | 23 | /^device.*/ { next } 24 | 25 | $0 ~ ts_regex { 26 | ts=$0 27 | next 28 | } 29 | 30 | { printf "%s,%s\n", strftime("%FT%T", ts), $0 } 31 | 32 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/virtual-memory-transform.awk: -------------------------------------------------------------------------------- 1 | # Need to skip the first N lines which contain stats since boot. Look 2 | # for 1st occurance of timestamp after the first line. Ignore 3 | # everything before that. 4 | BEGIN { 5 | # ignore first line with this getline call 6 | getline 7 | 8 | ts_regex = "^[[:digit:]]+$" 9 | while ((getline tmp) > 0) { 10 | if (tmp ~ /.*r b w.*/) { 11 | # First remove leading spaces from header 12 | sub(/ +/, "", tmp) 13 | # Next convert spaces to commas 14 | gsub(/ +/, ",", tmp) 15 | printf "timestamp,%s\n", tmp 16 | } else if (tmp ~ ts_regex) { 17 | ts=tmp 18 | break 19 | } 20 | } 21 | } 22 | 23 | /kthr.*memory.*/ { next } 24 | 25 | /.*r b w.*/ { next } 26 | 27 | $0 ~ ts_regex { 28 | ts=$0 29 | next 30 | } 31 | 32 | { 33 | printf "%s", strftime("%FT%T", ts) 34 | gsub(/ +/,",") 35 | print 36 | } 37 | 38 | -------------------------------------------------------------------------------- /misc/bench/cfgs/fruit/load.config: -------------------------------------------------------------------------------- 1 | %% Load data for the "fruit" benchmark. Up to 10M keys may be 2 | %% generated, but no larger. The fruit data is used to test the 3 | %% performance of boolean queries. 4 | {mode, max}. 5 | {concurrent, <concurrent>}. 6 | {driver, yz_driver}. 7 | {code_paths, ["<path-to-yokozuna>/misc/bench"]}. 8 | {index_path, "/riak/fruit"}. 9 | {search_path, "/search/fruit"}. 10 | {http_conns, <hosts-ports>}. 11 | 12 | %% example 13 | %% {http_conns, [{"10.0.1.80", 8098}, 14 | %% {"10.0.1.81", 8098}, 15 | %% {"10.0.1.82", 8098}, 16 | %% {"10.0.1.83", 8098}, 17 | %% {"10.0.1.84", 8098}]}. 18 | 19 | %% This is needed to keep the driver from erroring. 20 | {pb_conns, []}. 21 | 22 | %% The following keygen/ops will load the data. Run this first and 23 | %% then execute the query benchmarks 24 | {duration, infinity}. 25 | {key_generator, {function, yz_driver, fruit_key_val_gen, [1000000]}}. 26 | {operations, [{load_fruit, 1}]}. 27 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {cover_enabled, true}. 2 | {erl_opts, [warnings_as_errors, 3 | debug_info, 4 | {platform_define, "^[0-9]+", namespaced_types}, 5 | {parse_transform, lager_transform}]}. 6 | {eunit_opts, [verbose]}. 7 | 8 | {xref_checks, []}. 9 | {xref_queries, [{"(XC - UC) || (XU - X - B)", []}]}. 10 | 11 | {deps, 12 | [ 13 | {kvc, ".*", {git, "https://github.com/basho/kvc.git", {tag, "v1.5.0"}}}, 14 | {riak_kv, ".*", {git, "https://github.com/basho/riak_kv.git", {tag, "2.1.8"}}}, 15 | {ibrowse, "4.*", {git, "https://github.com/basho/ibrowse.git", {tag, "v4.3"}}}, 16 | {fuse, "2.1.0", {git, "https://github.com/basho/fuse.git", {tag, "v2.1.0"}}}, 17 | %% Needed for testing ONLY 18 | {riakc, ".*", {git, "https://github.com/basho/riak-erlang-client", {tag, "2.5.2"}}} 19 | ]}. 20 | 21 | {pre_hooks, [{compile, "./tools/grab-solr.sh"}]}. 22 | 23 | {plugin_dir, ".rebar_plugins"}. 24 | {plugins, [rebar_test_plugin]}. 25 | {riak_test, 26 | [ 27 | {test_paths, ["riak_test"]}, 28 | {test_output, "riak_test/ebin"} 29 | ]}. 30 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/pid-cpu-mem-transform.awk: -------------------------------------------------------------------------------- 1 | # Need to skip the first N lines which contain stats since boot. Look 2 | # for 1st occurance of timestamp after the first line. Ignore 3 | # everything before that. 4 | BEGIN { 5 | # ignore first line with this getline call 6 | getline 7 | ts_regex = "^[[:digit:]]+$" 8 | while ((getline tmp) > 0) { 9 | if (tmp ~ /.*PID.*/) { 10 | # First remove leading spaces from header 11 | sub(/ +/, "", tmp) 12 | # Next remove trailing 13 | sub(/NLWP +/, "NLWP", tmp) 14 | # Next convert spaces to commas 15 | gsub(/ +/, ",", tmp) 16 | printf "timestamp,%s\n", tmp 17 | } else if (tmp ~ ts_regex) { 18 | ts=tmp 19 | break 20 | } 21 | } 22 | } 23 | 24 | /.*PID.*/ { next } 25 | 26 | /.*Total.*/ { next } 27 | 28 | $0 ~ ts_regex { 29 | ts=$0 30 | next 31 | } 32 | 33 | { 34 | printf "%s", strftime("%FT%T", ts) 35 | gsub(/ +/,",") 36 | gsub(/%/, "") 37 | print 38 | } 39 | 40 | -------------------------------------------------------------------------------- /misc/bench/bin/transform-raw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #> Usage: 4 | #> 5 | #> ./transform-raw.sh <script dir> <bench results dir> <bench name> 6 | 7 | usage() { 8 | grep '#>' $0 | sed 's/#>//' | sed '$d' 9 | } 10 | 11 | if [ ! $# -eq 3 ]; then 12 | echo "incorrect number of arguments" 13 | usage 14 | exit 1 15 | fi 16 | 17 | SCRIPT_DIR=$1; shift 18 | BENCH_RESULTS_DIR=$1; shift 19 | BENCH_NAME=$1; shift 20 | 21 | RUN_DIR=$BENCH_RESULTS_DIR/$BENCH_NAME 22 | 23 | for awk_script_path in $SCRIPT_DIR/*.awk 24 | do 25 | awk_script=$(basename $awk_script_path) 26 | metric=${awk_script%-transform.awk} 27 | src=$RUN_DIR/*${metric}*raw.txt 28 | 29 | for raw in $src; do 30 | tgt=${raw/-raw.txt/.csv} 31 | echo "transforming $raw to $tgt" 32 | if ! gawk -f $awk_script_path $raw > $tgt; then 33 | echo "failed to transform $src" 34 | exit 1 35 | fi 36 | done 37 | done 38 | 39 | gsed -i'' 's/ //g' $RUN_DIR/summary.csv 40 | 41 | for lat_file in $RUN_DIR/*latencies.csv; do 42 | gsed -i'' 's/ //g' $lat_file 43 | done 44 | -------------------------------------------------------------------------------- /riak_test/intercepts/yz_solr_intercepts.erl: -------------------------------------------------------------------------------- 1 | -module(yz_solr_intercepts). 2 | -compile(export_all). 3 | 4 | -type index_name() :: binary(). 5 | 6 | -define(M, yz_solr_orig). 7 | -define(FMT(S, Args), lists:flatten(io_lib:format(S, Args))). 8 | 9 | -spec slow_cores() -> {ok, []}. 10 | slow_cores() -> 11 | timer:sleep(6000), 12 | {ok, []}. 13 | 14 | -spec entropy_data_cant_complete(index_name(), list()) -> {error, term()}. 15 | entropy_data_cant_complete(Core, Filter) -> 16 | Params = [{wt, json}|Filter] -- [{continuation, none}], 17 | Params2 = proplists:substitute_aliases([{continuation, continue}, 18 | {limit,n}], Params), 19 | Opts = [{response_format, binary}], 20 | URL = ?FMT("~s/~s/entropy_data?~s", 21 | [yz_solr:base_url(), Core, mochiweb_util:urlencode(Params2)]), 22 | case ibrowse:send_req(URL, [], get, [], Opts, 0) of 23 | Error -> 24 | {error, Error} 25 | end. 26 | 27 | index_batch_call_orig(Core, Ops) -> 28 | ?M:index_batch_orig(Core, Ops). 29 | 30 | index_batch_returns_other_error(_Core, _Ops) -> 31 | {error, other, "Failed to index docs"}. 32 | -------------------------------------------------------------------------------- /test/yz_test.hrl: -------------------------------------------------------------------------------- 1 | %% Macros and functions to shared across tests. 2 | -include_lib("eunit/include/eunit.hrl"). 3 | 4 | -define(STACK_IF_FAIL(Expr), 5 | ?IF(try 6 | Expr, true 7 | catch _:_ -> 8 | false 9 | end, 10 | ok, 11 | begin 12 | Trace = erlang:get_stacktrace(), 13 | ?debugFmt("~n~p failed: ~p~n", [??Expr, Trace]), 14 | throw({expression_failed, ??Expr}) 15 | end)). 16 | 17 | %% A replacement for ?assertEqual that prints the entire binary so 18 | %% that bytes can be compared in case of mismatch. 19 | -define(assertPairsEq(S1,S2), 20 | ?IF(begin 21 | ?assertEqual(element(1, S1), element(1, S2)), 22 | element(2, S1) =:= element(2, S2) 23 | end, 24 | ok, 25 | begin 26 | Field = element(1, S1), 27 | ?debugFmt("~nfields not equal: ~s~n", [Field]), 28 | ?debugFmt("expected: ~p~n", [element(2,S1)]), 29 | ?debugFmt("actual: ~p~n", [element(2,S2)]), 30 | throw(pairs_not_equal) 31 | end)). 32 | -------------------------------------------------------------------------------- /src/yz_text_extractor.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | 21 | -module(yz_text_extractor). 22 | -include("yokozuna.hrl"). 23 | -compile(export_all). 24 | 25 | extract(Value) -> 26 | extract(Value, []). 27 | 28 | extract(Value, Opts) -> 29 | FieldName = field_name(Opts), 30 | [{FieldName, Value}]. 31 | 32 | -spec field_name(proplist()) -> any(). 33 | field_name(Opts) -> 34 | proplists:get_value(field_name, Opts, text). 35 | -------------------------------------------------------------------------------- /src/yz_noop_extractor.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | 21 | %% @doc A noop extractor. It performs no work, creates no index fields 22 | %% for the registered mime type by returning an empty. 23 | %% 24 | -module(yz_noop_extractor). 25 | -include("yokozuna.hrl"). 26 | -compile(export_all). 27 | -define(NOOP_RESULTS, []). 28 | 29 | extract(Value) -> 30 | extract(Value, ?NO_OPTIONS). 31 | 32 | -spec extract(binary(), proplist()) -> [{binary(), binary()}] | 33 | {error, any()}. 34 | extract(_Value, _Opts) -> 35 | ?NOOP_RESULTS. 36 | -------------------------------------------------------------------------------- /riak_test/yz_monitor_solr.erl: -------------------------------------------------------------------------------- 1 | %% @doc Ensure that JVM killing works. 2 | -module(yz_monitor_solr). 3 | -compile(export_all). 4 | -import(yz_rt, [host_entries/1]). 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | -define(CFG, [{yokozuna, [{enabled, true}]}]). 8 | 9 | confirm() -> 10 | random:seed(now()), 11 | Cluster = rt:build_cluster(1, ?CFG), 12 | rt:wait_for_cluster_service(Cluster, yokozuna), 13 | ok = test_solr_monitor(Cluster), 14 | pass. 15 | 16 | %% Kill the spawning Erlang process and verify the JVM is killed, too 17 | -spec test_solr_monitor([node()]) -> ok | fail. 18 | test_solr_monitor(Cluster) -> 19 | Node = hd(Cluster), 20 | ErlPid = rpc:call(Node, os, getpid, []), 21 | JvmPid = get_jvm_pid(Node), 22 | lager:info("kill -9 Riak: ~p", [ErlPid]), 23 | os:cmd("kill -9 " ++ ErlPid), 24 | rt:wait_until(nonode, fun(_M) -> yz_monitor_solr:is_jvm_dead_yet(JvmPid) end). 25 | 26 | %% Verify that the JVM really has died 27 | -spec is_jvm_dead_yet(string()) -> boolean(). 28 | is_jvm_dead_yet(JvmPid) -> 29 | lager:info("Checking to see if JVM is dead: ~p", [JvmPid]), 30 | Out = os:cmd("/bin/ps -ef | grep " ++ JvmPid ++ "| grep -v grep | grep -v dyld"), 31 | case Out of 32 | [] -> 33 | true; 34 | _ -> 35 | false 36 | end. 37 | 38 | %% Call gen_server getpid to find OS PID of JVM process 39 | -spec get_jvm_pid(node()) -> string(). 40 | get_jvm_pid(Node) -> 41 | Pid = rpc:call(Node, yz_solr_proc, getpid, []), 42 | integer_to_list(Pid). 43 | -------------------------------------------------------------------------------- /riak_test/yz_handoff_blocking.erl: -------------------------------------------------------------------------------- 1 | %% @doc Verify yokozuna cannot block kv handoff forever 2 | -module(yz_handoff_blocking). 3 | -compile(export_all). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | -define(FMT(S, Args), lists:flatten(io_lib:format(S, Args))). 6 | -define(INDEX, <<"handoff_blocking">>). 7 | -define(CFG, 8 | [{riak_core, 9 | [ 10 | {ring_creation_size, 16} 11 | ]}, 12 | {yokozuna, 13 | [ 14 | {enabled, true} 15 | ]} 16 | ]). 17 | 18 | confirm() -> 19 | [Node, Node2] = Cluster = rt:deploy_nodes(2, ?CFG), 20 | 21 | %% create an index on one node and populate it with some data 22 | yz_rt:create_index([Node], ?INDEX), 23 | ok = yz_rt:set_bucket_type_index([Node], ?INDEX), 24 | ConnInfo = yz_rt:connection_info([Node]), 25 | {Host, Port} = yz_rt:riak_http(proplists:get_value(Node, ConnInfo)), 26 | URL = ?FMT("http://~s:~s/types/~s/buckets/~s/keys/~s", 27 | [Host, integer_to_list(Port), ?INDEX, <<"bucket">>, <<"key">>]), 28 | Headers = [{"content-type", "text/plain"}], 29 | Body = <<"yokozuna">>, 30 | {ok, "204", _, _} = ibrowse:send_req(URL, Headers, put, Body, []), 31 | 32 | %% load and install the intercept 33 | rt_intercept:load_code(Node2, [filename:join([rt_config:get(yz_dir), "riak_test", "intercepts", "*.erl"])]), 34 | rt_intercept:add(Node2, {yz_solr, [{{cores,0}, slow_cores}]}), 35 | 36 | %% join a node 37 | rt:join_cluster(Cluster), 38 | ok = rt:wait_until_no_pending_changes(Cluster), 39 | yz_rt:wait_for_index(Cluster, ?INDEX), 40 | pass. 41 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/js/throughput.js: -------------------------------------------------------------------------------- 1 | function init_throughput() { 2 | var x = d3.scale.linear().range([0, width]); 3 | var y = d3.scale.linear().range([height, 0]); 4 | var xAxis = d3.svg.axis().scale(x).orient("bottom"); 5 | var yAxis = d3.svg.axis().scale(y).orient("left"); 6 | 7 | var line = d3.svg.line() 8 | .x(function(d) { return x(d.elapsed); }) 9 | .y(function(d) { return y(d.successful / d.window); }); 10 | 11 | var svg = d3.select("#throughput p.vis").append("svg") 12 | .attr("id", "throughput") 13 | .attr("width", width + margin.left + margin.right) 14 | .attr("height", height + margin.top + margin.bottom) 15 | .append("g") 16 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 17 | 18 | d3.csv("summary.csv", function(data) { 19 | x.domain(d3.extent(data, function(d) { return d.elapsed; })); 20 | y.domain([0, d3.max(data, function(d) { return (d.total / d.window); })]); 21 | 22 | svg.append("g") 23 | .attr("class", "x axis") 24 | .attr("transform", "translate(0," + height + ")") 25 | .call(xAxis); 26 | 27 | svg.append("g") 28 | .attr("class", "y axis") 29 | .call(yAxis); 30 | 31 | svg.append("text") 32 | .attr("text-anchor", "middle") 33 | .attr("transform", "translate(" + -(margin.left/2) + "," + (height/2) + ")rotate(-90)") 34 | .attr("class", "label") 35 | .text("Ops per s"); 36 | 37 | svg.append("path") 38 | .datum(data) 39 | .attr("class", "line") 40 | .attr("d", line); 41 | }); 42 | } 43 | -------------------------------------------------------------------------------- /riak_test/intercepts/yz_noop_extractor_intercepts.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2015 Basho Technologies, Inc. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %%------------------------------------------------------------------- 20 | 21 | %% Example from: 22 | %% http://docs.basho.com/riak/latest/dev/search/custom-extractors/#An-Example-Custom-Extractor 23 | 24 | -module(yz_noop_extractor_intercepts). 25 | -compile(export_all). 26 | -include("intercept.hrl"). 27 | 28 | extract_httpheader(Value) -> 29 | extract_httpheader(Value, []). 30 | 31 | extract_httpheader(Value, _Opts) -> 32 | {ok, 33 | {http_request, 34 | Method, 35 | {absoluteURI, http, Host, undefined, Uri}, 36 | _Version}, 37 | _Rest} = erlang:decode_packet(http, Value, []), 38 | [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. 39 | 40 | extract_non_unicode_data(Value) -> 41 | extract_non_unicode_data(Value, []). 42 | 43 | extract_non_unicode_data(_Value, _Opts) -> 44 | [{blob, <<9147374713>>}]. 45 | -------------------------------------------------------------------------------- /java_src/com/basho/yokozuna/query/SimpleQueryExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This is a simple query example to show that querying Yokozuna with 3 | * a standard Solr client works. 4 | * 5 | * Usage: 6 | * 7 | * java -cp priv/java_lib/yokozuna.jar:priv/solr-jars/WEB-INF/lib/* com.basho.yokozuna.query.SimpleQueryExample BASE_URL INDEX FIELD TERM 8 | * 9 | * Example: 10 | * 11 | * java -cp priv/java_lib/yokozuna.jar:priv/solr-jars/WEB-INF/lib/* com.basho.yokozuna.query.SimpleQueryExample http://localhost:8098/search fruit text apple 12 | */ 13 | 14 | package com.basho.yokozuna.query; 15 | 16 | import org.apache.solr.client.solrj.SolrRequest; 17 | import org.apache.solr.client.solrj.SolrServer; 18 | import org.apache.solr.client.solrj.SolrServerException; 19 | import org.apache.solr.client.solrj.impl.HttpSolrServer; 20 | import org.apache.solr.client.solrj.response.QueryResponse; 21 | import org.apache.solr.client.solrj.request.QueryRequest; 22 | import org.apache.solr.common.params.ModifiableSolrParams; 23 | 24 | public class SimpleQueryExample { 25 | 26 | public static void main(String[] args) throws SolrServerException { 27 | final String baseURL = args[0]; 28 | final String index = args[1]; 29 | final String field = args[2]; 30 | final String term = args[3]; 31 | 32 | final SolrServer solr = new HttpSolrServer(baseURL + "/" + index); 33 | final ModifiableSolrParams params = new ModifiableSolrParams(); 34 | params.set("qt", "/"); 35 | params.set("q", field + ":" + term); 36 | final SolrRequest req = new QueryRequest(params); 37 | 38 | final QueryResponse resp = solr.query(params); 39 | System.out.println("resp: " + resp); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/yz_fuse_stats_sidejob.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. 3 | %% 4 | %% This file is provided to you under the Apache License, 5 | %% Version 2.0 (the "License"); you may not use this file 6 | %% except in compliance with the License. You may obtain 7 | %% a copy of the License at 8 | %% 9 | %% http://www.apache.org/licenses/LICENSE-2.0 10 | %% 11 | %% Unless required by applicable law or agreed to in writing, 12 | %% software distributed under the License is distributed on an 13 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %% KIND, either express or implied. See the License for the 15 | %% specific language governing permissions and limitations 16 | %% under the License. 17 | %% 18 | %% ------------------------------------------------------------------- 19 | %% 20 | %% @doc yz_fuse_stats_sidejob is a module implementing for offloading 21 | %% statistics updates to a background worker pool maintained by sidejob 22 | %% so that the calls to exometer don't slow down indexing work. 23 | %% All operations are pass-through to yz_stat to keep the "how" of stats 24 | %% updates in one place. 25 | 26 | -module(yz_fuse_stats_sidejob). 27 | -behaviour(fuse_stats_plugin). 28 | -export([init/1, increment/2]). 29 | 30 | %% @doc Initialize exometer for `Name'. 31 | -spec init(Name :: atom()) -> ok. 32 | init(Name) -> 33 | yz_stat:initialize_fuse_stats(Name), 34 | ok. 35 | 36 | %% @doc Increment `Name's `Counter' spiral. 37 | -spec increment(Name :: atom(), Counter :: ok | blown | melt) -> ok. 38 | increment(Name, Counter) -> 39 | _ = yz_stat_worker:update({update_fuse_stat, Name, Counter}), 40 | ok. -------------------------------------------------------------------------------- /misc/bench/bin/smartos/visualize.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <meta charset="utf-8"> 3 | <style> 4 | body { 5 | font: 12px sans-serif; 6 | } 7 | 8 | .label { 9 | font: 14px sans-serif; 10 | } 11 | 12 | .axis path, 13 | .axis line { 14 | fill: none; 15 | stroke: #000; 16 | shape-rendering: crispEdges; 17 | } 18 | 19 | .x.axis path { 20 | display: none; 21 | } 22 | 23 | .line { 24 | fill: none; 25 | stroke: steelblue; 26 | stroke-width: 1.5px; 27 | } 28 | 29 | div.metric { 30 | float: left 31 | } 32 | 33 | p.control * { 34 | float: left 35 | } 36 | </style> 37 | 38 | <html> 39 | <body> 40 | 41 | <div id="throughput" class="metric"><p class="vis"/><p class="control"/></div> 42 | <div id="latencies" class="metric"><p class="vis"/><p class="control"/></div> 43 | <div id="cpu" class="metric"><p class="vis"/><p class="control"/></div> 44 | <div id="disk" class="metric"><p class="vis"/><p class="control"/></div> 45 | <div id="network" class="metric"><p class="vis"/><p class="control"/></div> 46 | 47 | <script src="http://d3js.org/d3.v2.js"></script> 48 | 49 | <script> 50 | var margin = {top: 20, right: 20, bottom: 30, left: 100}, 51 | width = 480 - margin.left - margin.right, 52 | height = 250 - margin.top - margin.bottom; 53 | </script> 54 | 55 | <script src="js/throughput.js"></script> 56 | <script src="js/latency.js"> </script> 57 | <script src="js/disk.js"> </script> 58 | <script src="js/cpu.js"> </script> 59 | <script src="js/network.js"> </script> 60 | 61 | <script> 62 | init_throughput(); 63 | </script> 64 | 65 | <!-- the next comment is used by make-vis.sh to insert scripts --> 66 | <!-- generate scripts here --> 67 | </body> 68 | </html> 69 | -------------------------------------------------------------------------------- /riak_test/intercepts/yz_solrq_drain_mgr_intercepts.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(yz_solrq_drain_mgr_intercepts). 21 | -compile(export_all). 22 | 23 | -define(M, yz_solrq_drain_mgr_orig). 24 | 25 | %% Add some sleep before the drain begins, in order to introduce 26 | %% a race condition in YZ AAE which we test for and accomodate. 27 | %% This is primarily needed to bring about the race on MacOS (and 28 | %% possibly other BSD systems); linux scheduling seems more aggressive. 29 | delay_drain(Params) -> 30 | timer:sleep(100), 31 | ?M:drain_orig(Params). 32 | 33 | %% Send a ping to the yz_test_listener every time we unlink and kill 34 | count_unlink_and_kill(Reference, Pid) -> 35 | gen_server:call({global, yz_test_listener}, {message, ping}), 36 | ?M:unlink_and_kill_orig(Reference, Pid). 37 | 38 | %% delegate to the original 39 | unlink_and_kill_orig(Reference, Pid) -> 40 | ?M:unlink_and_kill_orig(Reference, Pid). 41 | -------------------------------------------------------------------------------- /riak_test/intercepts/yz_solrq_drain_fsm_intercepts.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(yz_solrq_drain_fsm_intercepts). 21 | -compile(export_all). 22 | 23 | -define(M, yz_solrq_drain_fsm_orig). 24 | 25 | %% Crash the start_prepare message, in order to property handle 26 | %% failures in the drain manager handling code. 27 | prepare_crash(start, State) -> 28 | {stop, {error, something_bad_happened}, State}. 29 | 30 | %% Put a 1 second sleep in front of resume_workers. 31 | resume_workers_sleep_1s(Pid) -> 32 | timer:sleep(1000), 33 | ?M:resume_workers_orig(Pid). 34 | 35 | %% restore the original prepare 36 | prepare_orig(start, State) -> 37 | ?M:prepare_orig(start, State). 38 | 39 | %% restore the original resume_workers 40 | resume_workers_orig(Pid) -> 41 | ?M:resume_workers_orig(Pid). 42 | 43 | %% Timeout on a cancel, full stop 44 | cancel_timeout(_Pid, _CancelTimeout) -> 45 | lager:log(info, self(), "Intercepting cancel/2 and returning timeout"), 46 | timeout. 47 | 48 | %% restore the original cancel 49 | cancel_orig(Pid, CancelTimeout) -> 50 | ?M:cancel_orig(Pid, CancelTimeout). 51 | -------------------------------------------------------------------------------- /docs/TAGGING.md: -------------------------------------------------------------------------------- 1 | Tagging 2 | ========== 3 | 4 | Data stored in Riak is opaque to Riak. It doesn't know anything about 5 | the structure of the data stored in it. Whether the data be JSON or 6 | JPEG, it's all the same to Riak. 7 | 8 | On the contrary, the application storing the data often has intimate 9 | knowledge of the data. The application may want to tag it with 10 | attributes that give additional context. For example, tagging a 11 | picture with information such as who uploaded it and when it was 12 | taken. 13 | 14 | This is called _tagging_ in Yokozuna. It provides the ability to 15 | create additional index entries based on the object's metadata. 16 | 17 | HTTP 18 | ---------- 19 | 20 | **NOTE: This is subject to change in the 0.2 release. The current 21 | implementation of metadata in Riak requires all tags to be 22 | prefixed with `x-riak-meta`.** 23 | 24 | Tags can be added via custom HTTP headers. The `x-riak-meta-yz-tags` 25 | header tells Yokozuna which headers to use as tags. It's a CSV. 26 | 27 | x-riak-meta-yz-tags: x-riak-meta-user_s, x-riak-meta-description_t 28 | 29 | x-riak-meta-user_s: rzezeski 30 | x-riak-meta-description_t: Federal Hill at dusk. 31 | 32 | Yokozuna strips the `x-riak-meta` prefix and lower cases tag names 33 | before indexing. In this case the tags will be: `{<<"user_s">>, 34 | <<"rzezeski">>}`, and `{<<"description_t">>, <<"Federal Hill at 35 | dusk">>}`. 36 | 37 | A query against the description tag would look like so. 38 | 39 | q=description_t:dusk 40 | 41 | ### Multi-Valued Fields 42 | 43 | The tag values are passed verbatim to Solr. If you want a tag to be 44 | treated as a multi-valued field then you'll have to configure Solr to 45 | do so. This should be possible via Solr update processing but 46 | probably requires a custom processor. 47 | 48 | TODO: Create a custom processor and show example of creating a 49 | multi-valued field via tagging. 50 | 51 | x-riak-meta-keywords_ss: baltimore, dusk, landscape 52 | 53 | -------------------------------------------------------------------------------- /tools/src-pkg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Build a source package of Yokozuna. 4 | # 5 | ## SYNOPSIS 6 | ## 7 | ## ./src-pkg.sh [-rt riak_tag] working_dir version 8 | ## 9 | ## EXAMPLE 10 | ## 11 | ## ./src-pkg.sh /tmp 0.3.0 12 | ## 13 | ## ./src-pkg.sh -rt riak-2.0.0pre9 /tmp 0.13.0 14 | set -e 15 | 16 | usage() { 17 | echo 18 | grep '##' $0 | sed -r 's/##//' | sed '$d' 19 | } 20 | 21 | error() { 22 | echo ERROR: $1 23 | usage 24 | exit 1 25 | } 26 | 27 | while [ $# -gt 0 ] 28 | do 29 | case $1 in 30 | -rt) 31 | riak_tag=$2 32 | shift 33 | ;; 34 | -h) 35 | usage 36 | exit 0 37 | ;; 38 | -*) 39 | error "unrecognized option $1" 40 | ;; 41 | *) 42 | break 43 | ;; 44 | esac 45 | shift 46 | done 47 | 48 | if [ ! $# -eq 2 ]; then 49 | error "incorrect number of arguments" 50 | fi 51 | 52 | WD=$1; shift 53 | VSN=$1 54 | RIAK_DIR=riak-yokozuna-$VSN-src 55 | TGZ=$RIAK_DIR.tar.gz 56 | 57 | pushd $WD # in working dir 58 | 59 | if [ ! -d $RIAK_DIR ]; then 60 | git clone git://github.com/basho/riak.git $RIAK_DIR 61 | fi 62 | 63 | pushd $RIAK_DIR # in riak 64 | 65 | if [ -n $riak_tag ] 66 | then 67 | git checkout $riak_tag 68 | fi 69 | 70 | if [ -n $riak_tag ] 71 | then 72 | cp rebar.config.lock rebar.config 73 | fi 74 | 75 | sed -i"bak" -e "/{yokozuna.*/{ 76 | N 77 | N 78 | s#{yokozuna.*#{yokozuna, \".*\", {git, \"git://github.com/basho/yokozuna.git\", {tag, \"v${VSN}\"}}},# 79 | }" rebar.config 80 | 81 | git commit -am "Checkout Yokozuna version v$VSN" 82 | 83 | make PKG_ID="$RIAK_DIR" dist 84 | 85 | tar zxvf $TGZ 86 | pushd $RIAK_DIR/deps/yokozuna # in extracted pkg 87 | ./tools/grab-solr.sh 88 | rm -rf build/solr-4* 89 | popd # out extracted pkg 90 | tar -zcvf $TGZ $RIAK_DIR 91 | 92 | popd # out riak 93 | popd # out working dir 94 | -------------------------------------------------------------------------------- /riak_test/intercepts/yz_solrq_helper_intercepts.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2015 Basho Technologies, Inc. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %%------------------------------------------------------------------- 20 | -module(yz_solrq_helper_intercepts). 21 | -compile(export_all). 22 | 23 | -include("intercept.hrl"). 24 | 25 | -define(M, yz_solrq_helper_orig). 26 | 27 | handle_get_ops_for_no_sibling_deletes(LI, P, Obj) -> 28 | Lookup = ets:lookup(intercepts_tab, del_put), 29 | case Lookup of 30 | [] -> original_get_ops_for_no_sibling_deletes(LI, P, Obj); 31 | _ -> 32 | case proplists:get_value(del_put, Lookup) of 33 | 0 -> 34 | error_logger:info_msg( 35 | "Delete operation intercepted for BKey ~p", 36 | [{riak_object:bucket(Obj), riak_object:key(Obj)}]), 37 | ets:update_counter(intercepts_tab, del_put, 1), 38 | []; 39 | _ -> 40 | original_get_ops_for_no_sibling_deletes(LI, P, Obj) 41 | end 42 | end. 43 | 44 | original_get_ops_for_no_sibling_deletes(LI, P, Obj) -> 45 | error_logger:info_msg( 46 | "Delete operation original for BKey ~p", 47 | [{riak_object:bucket(Obj), riak_object:key(Obj)}]), 48 | ?M:get_ops_for_no_sibling_deletes_orig(LI, P, Obj). 49 | -------------------------------------------------------------------------------- /docs/TESTING.md: -------------------------------------------------------------------------------- 1 | Testing 2 | ========== 3 | 4 | ## Running Riak Test 5 | 6 | Riak Test is a tool for running integration tests against a Riak 7 | cluster. See the [Riak Test README][rt_readme] for more details. 8 | 9 | Clone the riak_test repo with the following command: 10 | 11 | git clone https://github.com/basho/riak_test 12 | 13 | Follow the instructions in the [Riak Test README][rt_readme] to build 14 | a `devrel` release and set it up for testing with riak_test. 15 | 16 | To successfully run all of the riak_test tests for yokozuna the 17 | basho_bench benchmarking tool is also required. Clone the basho_bench 18 | repo with the following command before running the tests: 19 | 20 | git clone https://github.com/basho/basho_bench 21 | 22 | ### Add Yokozuna Config 23 | 24 | Open `~/.riak_test.config` and add the following to the configuration 25 | stanza to be used for yokozuna testing: 26 | 27 | {basho_bench, "<path-to-basho_bench-repo>"}, 28 | {yz_dir, "<path-to-yokozuna-repo>"}, 29 | 30 | This will result in a configuration stanza similar to the following: 31 | 32 | {rtdev, [ 33 | {basho_bench, "<path-to-basho_bench-repo>"}, 34 | {yz_dir, "<path-to-yokozuna-repo>"}, 35 | {rt_project, "riak"}, 36 | {rt_harness, rtdev}, 37 | {rtdev_path, [{root, "/home/you/rt/riak"}, 38 | {current, "/home/you/rt/riak/current"}, 39 | {previous, "/home/you/rt/riak/riak-1.3.2"}, 40 | {legacy, "/home/you/rt/riak/riak-1.2.1"} 41 | ]} 42 | 43 | ]}. 44 | 45 | ### Compile Yokozuna Riak Test Files 46 | 47 | cd <path-to-yokozuna> 48 | make compile-riak-test 49 | 50 | At this point you should see `.beam` files in `riak_test/ebin`. 51 | 52 | ### Run the Test 53 | 54 | Finally, run the test. 55 | 56 | cd <path-to-riak_test-repo> 57 | ./riak_test -c rtdev -d <path-to-yokozuna-repo>/riak_test/ebin/ | tee rt.out 58 | 59 | [rt_readme]: https://github.com/basho/riak_test/blob/master/README.md 60 | -------------------------------------------------------------------------------- /riak_test/yz_wm_extract_test.erl: -------------------------------------------------------------------------------- 1 | %% @doc Test the extractor API in various ways. 2 | -module(yz_wm_extract_test). 3 | -compile(export_all). 4 | -import(yz_rt, [host_entries/1, select_random/1]). 5 | -include("yokozuna.hrl"). 6 | -include_lib("eunit/include/eunit.hrl"). 7 | 8 | -define(CFG, 9 | [ 10 | {riak_core, 11 | [ 12 | {ring_creation_size, 8} 13 | ]}, 14 | {yokozuna, 15 | [ 16 | {enabled, true} 17 | ]} 18 | ]). 19 | 20 | confirm() -> 21 | Cluster = rt:deploy_nodes(1, ?CFG), 22 | rt:wait_for_cluster_service(Cluster, yokozuna), 23 | confirm_check_if_registered_set_ct(Cluster), 24 | confirm_extract_on_content_type(Cluster), 25 | pass. 26 | 27 | %% @doc Confirm yz-extractor header works as a string in 28 | %% yz_wm_extract:check_if_registered_set_ct/4 29 | confirm_check_if_registered_set_ct(Cluster) -> 30 | HP = hd(host_entries(rt:connection_info(Cluster))), 31 | lager:info("confirm_check_if_registered_set_ct [~p]", [HP]), 32 | URL = extract_url(HP), 33 | Headers = [{?YZ_HEAD_EXTRACTOR,"yz_json_extractor"}], 34 | {ok, Status, _, _} = http(put, URL, Headers, "{\"name\":\"ryan\"}"), 35 | ?assertEqual("200", Status). 36 | 37 | %% @doc Confirm that the extractor works based on content-type. 38 | confirm_extract_on_content_type(Cluster) -> 39 | HP = hd(host_entries(rt:connection_info(Cluster))), 40 | lager:info("confirm_check_if_registered_set_ct [~p]", [HP]), 41 | URL = extract_url(HP), 42 | Headers = [{"content-type", "application/json"}], 43 | {ok, Status, _, Body} = http(put, URL, Headers, <<"{\"name\":\"ryan\"}">>), 44 | ?assertEqual("200", Status), 45 | ?assert(size(Body) > 0). 46 | 47 | %%%=================================================================== 48 | %%% Helpers 49 | %%%=================================================================== 50 | 51 | http(Method, URL, Headers, Body) -> 52 | Opts = [{response_format, binary}], 53 | ibrowse:send_req(URL, Headers, Method, Body, Opts). 54 | 55 | extract_url({Host,Port}) -> 56 | ?FMT("http://~s:~B/search/extract", [Host, Port]). 57 | -------------------------------------------------------------------------------- /riak_test/yz_solr_start_timeout.erl: -------------------------------------------------------------------------------- 1 | %% @doc Ensure that if Solr doesn't start before the startup wait, 2 | %% yokozuna tears down the Riak node. 3 | -module(yz_solr_start_timeout). 4 | -compile(export_all). 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | -define(CFG, [{yokozuna, [{enabled, true}]}]). 8 | 9 | confirm() -> 10 | %% this root_dir is a well-known name that causes yz_solr_proc to 11 | %% start something that will trigger yz_solr_proc's timeout 12 | Crippled = {root_dir, "data/::yz_solr_start_timeout::"}, 13 | [Node|_] = rt:deploy_nodes(1, [{yokozuna, [{enabled, true},Crippled]}]), 14 | 15 | %% node should start up successfully, but solr never will, so... 16 | rt:start_and_wait(Node), 17 | 18 | %% ... it should die in a bit 19 | ok = rt:wait_until_unpingable(Node), 20 | 21 | %% if it doesn't, we'll never get to this point, because 22 | %% the wait asserts failure on timeout 23 | 24 | %% if we did get here, check the log to make sure it was the 25 | %% startup wait that triggered 26 | 27 | Logs = rt:get_node_logs(), 28 | ?assert(find_startup_wait_log(Logs)), 29 | pass. 30 | 31 | %% Find "solr didn't start in alloted time" in console.log 32 | find_startup_wait_log([]) -> 33 | false; 34 | find_startup_wait_log([{Path, Port}|Rest]) -> 35 | case re:run(Path, "console\.log$") of 36 | {match, _} -> 37 | lager:info("Searching console log ~p ...", [Path]), 38 | case find_line(Port, file:read_line(Port)) of 39 | true -> true; 40 | _ -> find_startup_wait_log(Rest) 41 | end; 42 | nomatch -> 43 | find_startup_wait_log(Rest) 44 | end. 45 | 46 | find_line(_Port, eof) -> 47 | lager:info("Reached EOF but did not find timeout log entry."), 48 | false; 49 | find_line(Port, {ok, Data}) -> 50 | case re:run(Data, "solr didn't start in alloted time") of 51 | {match, _} -> 52 | lager:info("Found timeout log entry."), 53 | true; 54 | nomatch -> 55 | find_line(Port, file:read_line(Port)) 56 | end. 57 | -------------------------------------------------------------------------------- /docs/RESOURCES.md: -------------------------------------------------------------------------------- 1 | Resources 2 | ========= 3 | 4 | Yokozuna Talks 5 | -------------- 6 | 7 | * Riak Search 2.0 - [video][ricon2013wv] | [slides][ricon2013ws] 8 | 9 | * Yokozuna, Scaling Solr with Riak - Berlin Buzzwords 2013 - [video][bbuzz2013v] | [slides][bbuzz2013s] 10 | 11 | * Yokozuna: Distributed Search You Don't Think About - RICON|East 2013 - [video][re2013v] | [slides][re2013s] 12 | 13 | * Introducing Yokozuna - Riak + Solr - RICON|West 2012 - [video][rw2012v] | [slides][rw2012s] 14 | 15 | Solr Links 16 | ---------- 17 | 18 | * [Solr Homepage][solr-home] 19 | 20 | * [Solr 4.10.4 Reference Guide][solr-ref] - This is both a good reference and a place for beginners to start. The fundamental aspects of Solr are covered here. 21 | 22 | 23 | Books 24 | ----- 25 | 26 | * [Introduction to Information Retrieval][intro-to-ir] - 2008 27 | 28 | * [Lucene In Action][lia] - 2010 29 | 30 | * [Solr In Action][sia] - 2013 31 | 32 | 33 | [ricon2013wv]: http://youtu.be/-c1eynVLNMo 34 | [ricon2013ws]: http://www.slideshare.net/eredmond/riak-search-2-yokozuna 35 | [bbuzz2013v]: http://www.youtube.com/watch?v=ETJqu5SmwOc&list=PLq-odUc2x7i8Qg4j2fix-QN6bjup-QYJW&index=12&noredirect=1 36 | [bbuzz2013s]: https://speakerdeck.com/rzezeski/yokozuna-scaling-solr-with-riak 37 | 38 | [intro-to-ir]: http://www.amazon.com/Introduction-Information-Retrieval-Christopher-Manning/dp/0521865719/ref=sr_1_sc_1?ie=UTF8&qid=1382015914&sr=8-1-spell&keywords=intorudction+to+information+retrieval 39 | 40 | [lia]: http://www.amazon.com/Lucene-Action-Second-Edition-Covers/dp/1933988177/ref=sr_1_7?ie=UTF8&qid=1382015786&sr=8-7 41 | 42 | [rw2012v]: http://vimeo.com/54266574 43 | [rw2012s]: https://speakerdeck.com/basho/yokozuna-ricon 44 | 45 | [re2013v]: http://www.youtube.com/watch?v=0kLJxgqd8yU&noredirect=1 46 | [re2013s]: https://speakerdeck.com/rzezeski/yokozuna-distributed-search-you-dont-think-about 47 | 48 | [sia]: http://www.amazon.com/Solr-Action-Trey-Grainger/dp/1617291021/ref=sr_1_1?ie=UTF8&qid=1382017502&sr=8-1&keywords=solr+in+action 49 | 50 | [solr-home]: http://lucene.apache.org/solr/ 51 | 52 | [solr-ref]: https://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.10.pdf 53 | 54 | -------------------------------------------------------------------------------- /src/yz_stat_worker.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. 3 | %% 4 | %% This file is provided to you under the Apache License, 5 | %% Version 2.0 (the "License"); you may not use this file 6 | %% except in compliance with the License. You may obtain 7 | %% a copy of the License at 8 | %% 9 | %% http://www.apache.org/licenses/LICENSE-2.0 10 | %% 11 | %% Unless required by applicable law or agreed to in writing, 12 | %% software distributed under the License is distributed on an 13 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %% KIND, either express or implied. See the License for the 15 | %% specific language governing permissions and limitations 16 | %% under the License. 17 | %% 18 | %% ------------------------------------------------------------------- 19 | -module(yz_stat_worker). 20 | -behavior(gen_server). 21 | -compile(export_all). 22 | 23 | %% gen_server callbacks 24 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 25 | terminate/2, code_change/3]). 26 | 27 | -include("yokozuna.hrl"). 28 | 29 | %%%=================================================================== 30 | %%% API 31 | %%%=================================================================== 32 | 33 | update(StatUpdate) -> 34 | sidejob:unbounded_cast(yz_stat_sj, {update, StatUpdate}). 35 | 36 | %%%=================================================================== 37 | %%% Callbacks 38 | %%%=================================================================== 39 | 40 | init([_Name]) -> 41 | {ok, no_state}. 42 | 43 | handle_call(_Req, _From, S) -> 44 | ?WARN("Unexpected request received ~p", [_Req]), 45 | {reply, unexpected_req, S}. 46 | 47 | handle_cast({update, StatUpdate}, S) -> 48 | yz_stat:perform_update(StatUpdate), 49 | {noreply, S}; 50 | handle_cast(_Req, S) -> 51 | ?WARN("Unexpected request received ~p", [_Req]), 52 | {noreply, S}. 53 | 54 | handle_info(_Req, S) -> 55 | ?WARN("Unexpected request received ~p", [_Req]), 56 | {noreply, S}. 57 | 58 | terminate(_, _) -> 59 | ok. 60 | 61 | code_change(_, S, _) -> 62 | {ok, S}. 63 | -------------------------------------------------------------------------------- /test/utf8.txt: -------------------------------------------------------------------------------- 1 | english: The quick brown fox jumps over the lazy dog. 2 | jamaican: Chruu, a kwik di kwik brong fox a jomp huova di liezi daag de, yu no siit? 3 | irish: "An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó ṡlí do leasa ṫú?" "D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ." 4 | dutch: Pa's wijze lynx bezag vroom het fikse aquaduct. 5 | german_1: Falsches Üben von Xylophonmusik quält jeden größeren Zwerg. 6 | german_2: Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon. 7 | norwegian: Blåbærsyltetøy. 8 | danish: Høj bly gom vandt fræk sexquiz på wc. 9 | swedish: Flygande bäckasiner söka strax hwila på mjuka tuvor. 10 | icelandic: Sævör grét áðan því úlpan var ónýt. 11 | finnish: Törkylempijävongahdus. 12 | polish: Pchnąć w tę łódź jeża lub osiem skrzyń fig. 13 | czech: Příliš žluťoučký kůň úpěl ďábelské kódy. 14 | slovak: Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote. 15 | greek_monotonic: ξεσκεπάζω την ψυχοφθόρα βδελυγμία 16 | greek_polytonic: ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία 17 | russian: Съешь же ещё этих мягких французских булок да выпей чаю. 18 | bulgarian: Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон. 19 | sami: Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža. 20 | hungarian: Árvíztűrő tükörfúrógép. 21 | spanish: El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro. 22 | portuguese: O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico. 23 | french: Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés. 24 | esperanto: Eĥoŝanĝo ĉiuĵaŭde. 25 | hebrew: זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן. 26 | japanese_hiragana: 27 | いろはにほへど ちりぬるを 28 | わがよたれぞ つねならむ 29 | うゐのおくやま けふこえて 30 | あさきゆめみじ ゑひもせず 31 | japanese_kanji: 32 | 色は匂へど 散りぬるを 33 | 我が世誰ぞ 常ならむ 34 | 有為の奥山 今日越えて 35 | 浅き夢見じ 酔ひもせず 36 | английский: The quick brown fox jumps over the lazy dog. 37 | chinese: 38 | 花非花 39 | 雾非雾 40 | 夜半来 41 | 天明去 42 | 来如春梦几多时 43 | 去似朝云无觅处 44 | -------------------------------------------------------------------------------- /.travis.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | declare -r build_status="$(mktemp)" 7 | declare -r otp_name='OTP_R16B02_basho10' 8 | declare -r otp_build_log_dir="$HOME/.kerl/builds/$otp_name" 9 | declare -r otp_install_dir="$HOME/otp-basho" 10 | declare -r kerl_activate="$otp_install_dir/activate" 11 | 12 | function onexit 13 | { 14 | rm -f "$build_status" 15 | } 16 | 17 | trap onexit EXIT 18 | 19 | function build_ticker 20 | { 21 | local status="$(< $build_status)" 22 | while [[ $status == 'true' ]] 23 | do 24 | echo '------------------------------------------------------------------------------------------------------------------------------------------------' 25 | echo "$(date) building $otp_name ..." 26 | if ls $otp_build_log_dir/otp_build*.log > /dev/null 27 | then 28 | tail $otp_build_log_dir/otp_build*.log 29 | fi 30 | sleep 10 31 | status="$(< $build_status)" 32 | done 33 | echo '.' 34 | } 35 | 36 | 37 | function build_otp 38 | { 39 | if [[ -f $otp_install_dir/activate ]] 40 | then 41 | echo "Found $otp_name installation at $otp_install_dir" 42 | else 43 | export KERL_CONFIGURE_OPTIONS='--enable-hipe --enable-smp-support --enable-threads --enable-kernel-poll --without-odbc' 44 | rm -rf "$otp_install_dir" 45 | mkdir -p "$otp_install_dir" 46 | 47 | echo -n 'true' > "$build_status" 48 | build_ticker & 49 | kerl build git https://github.com/basho/otp.git "$otp_name" "$otp_name" 50 | echo -n 'false' > "$build_status" 51 | wait 52 | 53 | kerl install "$otp_name" "$otp_install_dir" 54 | fi 55 | 56 | exit 0 57 | } 58 | 59 | function do_tests 60 | { 61 | if ! hash escript 62 | then 63 | if [[ -f $kerl_activate ]] 64 | then 65 | set +o nounset 66 | set +o errexit 67 | source "$kerl_activate" 68 | set -o nounset 69 | set -o errexit 70 | else 71 | echo "Did not find $kerl_activate, exiting" 1>&2 72 | exit 1 73 | fi 74 | fi 75 | 76 | make 77 | make test 78 | } 79 | 80 | if [[ $1 == 'build' ]] 81 | then 82 | build_otp 83 | elif [[ $1 == 'test' ]] 84 | then 85 | do_tests 86 | else 87 | echo 'script argument must be "build" or "test"' 1>&2 88 | exit 1 89 | fi 90 | -------------------------------------------------------------------------------- /docs/Q-AND-A.md: -------------------------------------------------------------------------------- 1 | Question & Answer 2 | ================= 3 | 4 | For now this will serve as a place to answer questions that aren't 5 | quite enough to form their own documentation. 6 | 7 | How does Yokozuna deal with siblings? 8 | ------------------------------------- 9 | 10 | Yokozuna indexes **all** siblings. Yokozuna has a low-level hook into 11 | the vnode that detects any change to an object on-disk. If this 12 | object has siblings then Yokozuna will iterate all of them and create 13 | a Solr document for each one. Conversely, when siblings have been 14 | resolved Yokozuna will index the reconciled object and delete the 15 | siblings indexes. 16 | 17 | ### Implementation Details ### 18 | 19 | There is no easy way to determine when an object has gone from having 20 | siblings to not. Yokozuna is dumb in that if there are no siblings it 21 | **always** sends a delete request to Solr to remove any potential 22 | sibling indexes for that key. This is sent as a separate HTTP request 23 | than the index write. There is a branch, `combine-index-and-del` to 24 | merge these requests into one but it hasn't been completed yet. 25 | 26 | How does Yokozuna deal with Bitcask expiry? 27 | ------------------------------------------- 28 | 29 | Yokozuna has no notion of index expiry. Therefore, as data expires in 30 | Bitcask the Yokozuna indexes will remain. This means you will queries 31 | that return keys which no longer exist. Now, AAE will eventually 32 | catch this discrepancy but a) it could take a while and b) it relies 33 | on AAE being enabled. There is a [post on riak-users][1] that goes 34 | over the interactions between AAE and Bitcask expiry. 35 | 36 | That said, it would be fairly easy to add a notion of expiry to 37 | Yokozuna. A TTL or expiration time could be added to expiry docs. 38 | Then an expiry-server could be added to Yokozuna that ticks every few 39 | seconds and runs a delete-by-query. The query, of course, would 40 | detect any documents passed their expiration time. A config would be 41 | added to to set expiry on all docs. 42 | 43 | The problem is now there is a Bitcask and Yokozuna expiry config. An 44 | easy error to add to one and forget the other, or make a copy and 45 | paste error and drop a digit. It would be nice if expiry was a 46 | riak-level idea and could be set per bucket or even object. 47 | 48 | [1]: http://lists.basho.com/pipermail/riak-users_lists.basho.com/2013-April/011919.html 49 | -------------------------------------------------------------------------------- /test/utf8.json: -------------------------------------------------------------------------------- 1 | {"langs": 2 | {"english": "The quick brown fox jumps over the lazy dog.", 3 | "jamaican": "Chruu, a kwik di kwik brong fox a jomp huova di liezi daag de, yu no siit?", 4 | "irish": "\"An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó ṡlí do leasa ṫú?\" \"D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ.\"", 5 | "dutch": "Pa's wijze lynx bezag vroom het fikse aquaduct.", 6 | "german_1": "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg.", 7 | "german_2": "Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon.", 8 | "norwegian": "Blåbærsyltetøy.", 9 | "danish": "Høj bly gom vandt fræk sexquiz på wc.", 10 | "swedish": "Flygande bäckasiner söka strax hwila på mjuka tuvor.", 11 | "icelandic": "Sævör grét áðan því úlpan var ónýt.", 12 | "finnish": "Törkylempijävongahdus.", 13 | "polish": "Pchnąć w tę łódź jeża lub osiem skrzyń fig.", 14 | "czech": "Příliš žluťoučký kůň úpěl ďábelské kódy.", 15 | "slovak": "Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote.", 16 | "greek_monotonic": "ξεσκεπάζω την ψυχοφθόρα βδελυγμία", 17 | "greek_polytonic": "ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία", 18 | "russian": "Съешь же ещё этих мягких французских булок да выпей чаю.", 19 | "bulgarian": "Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон.", 20 | "sami": "Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža.", 21 | "hungarian": "Árvíztűrő tükörfúrógép.", 22 | "spanish": "El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.", 23 | "portuguese": "O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico.", 24 | "french": "Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés.", 25 | "esperanto": "Eĥoŝanĝo ĉiuĵaŭde.", 26 | "hebrew": "זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן.", 27 | "japanese_hiragana": " 28 | いろはにほへど ちりぬるを 29 | わがよたれぞ つねならむ 30 | うゐのおくやま けふこえて 31 | あさきゆめみじ ゑひもせず 32 | ", 33 | "japanese_kanji": " 34 | 色は匂へど 散りぬるを 35 | 我が世誰ぞ 常ならむ 36 | 有為の奥山 今日越えて 37 | 浅き夢見じ 酔ひもせず 38 | ", 39 | "английский": "The quick brown fox jumps over the lazy dog.", 40 | "chinese": " 41 | 花非花 42 | 雾非雾 43 | 夜半来 44 | 天明去 45 | 来如春梦几多时 46 | 去似朝云无觅处 47 | " 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /misc/bench/bin/run-bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #> Usage: 4 | #> 5 | #> ./run-bench.sh <hosts file> <key> <script dir> <bench root> <bench config> <bench results dir> <bench name> 6 | 7 | usage() { 8 | grep '#>' $0 | sed 's/#>//' | sed '$d' 9 | } 10 | 11 | error() { 12 | echo $1 13 | exit 1 14 | } 15 | 16 | if [ ! $# -eq 7 ]; then 17 | echo "incorrect number of arguments" 18 | usage 19 | exit 1 20 | fi 21 | 22 | HOSTS_FILE=$1; shift 23 | KEY=$1; shift 24 | SCRIPT_DIR=$1; shift 25 | BENCH_ROOT=$1; shift 26 | BENCH_CFG=$1; shift 27 | BENCH_RESULTS_DIR=$1; shift 28 | BENCH_NAME=$1; shift 29 | 30 | RUN_DIR=$BENCH_RESULTS_DIR/$BENCH_NAME 31 | 32 | verify_hosts() { 33 | while read user_host; do 34 | if ssh -fi $KEY $user_host 'exit'; then 35 | echo "verified login $user_host" 36 | else 37 | error "failed to login $user_host" 38 | fi 39 | done < $HOSTS_FILE 40 | } 41 | 42 | copy_collect_scripts() { 43 | while read user_host; do 44 | if ! scp -i $KEY -r $SCRIPT_DIR/*collect.sh $user_host:~/; then 45 | error "failed to copy collection scripts to $user_host" 46 | fi 47 | done < $HOSTS_FILE 48 | } 49 | 50 | start_collecting() { 51 | while read user_host; do 52 | for script in $SCRIPT_DIR/*collect.sh; do 53 | script=$(basename $script) 54 | ssh -fi $KEY $user_host "chmod a+x $script && ./$script start" 55 | done 56 | done < $HOSTS_FILE 57 | } 58 | 59 | stop_collecting() { 60 | while read user_host; do 61 | for script in $SCRIPT_DIR/*collect.sh; do 62 | script=$(basename $script) 63 | ssh -fi $KEY $user_host "./$script stop" 64 | done 65 | done < $HOSTS_FILE 66 | } 67 | 68 | copy_output() { 69 | while read user_host; do 70 | for script in $SCRIPT_DIR/*collect.sh; do 71 | output_path=$($script output) 72 | output_file=$(basename $output_path) 73 | src=$user_host:$output_path 74 | dest=$RUN_DIR/${user_host}-${output_file} 75 | echo "copying output from $src to $dest" 76 | scp -qi $KEY $src $dest 77 | done 78 | done < $HOSTS_FILE 79 | } 80 | 81 | run_bench() { 82 | $BENCH_ROOT/basho_bench -d $BENCH_RESULTS_DIR -n $BENCH_NAME $BENCH_CFG 83 | } 84 | 85 | verify_hosts 86 | copy_collect_scripts 87 | start_collecting 88 | run_bench 89 | sleep 10s 90 | stop_collecting 91 | copy_output 92 | -------------------------------------------------------------------------------- /test/yz_pulseh.erl: -------------------------------------------------------------------------------- 1 | -module(yz_pulseh). 2 | -compile([export_all]). 3 | 4 | -ifdef(EQC). 5 | 6 | compile(Module) -> 7 | compile(Module, []). 8 | 9 | compile(Module, UserOptions) -> 10 | %% Trigger load if not present 11 | _ = Module:module_info(), 12 | 13 | %% Then work out where we loaded it from 14 | case proplists:get_value(Module, code:all_loaded()) of 15 | undefined -> 16 | {error, not_loaded}; 17 | [$p, $u, $l, $s, $e, $d, $_ | BeamName] -> 18 | do_compile_beam(Module, BeamName, UserOptions); 19 | BeamName -> 20 | do_compile_beam(Module, BeamName, UserOptions) 21 | end. 22 | 23 | %% Beam is a binary or a .beam file name 24 | do_compile_beam(Module,Beam,UserOptions) -> 25 | %% Extract the abstract format and apply the pulse instrument 26 | %% every executable line and, as a side effect, initiate 27 | %% the database 28 | 29 | case get_abstract_code(Module, Beam) of 30 | no_abstract_code=E -> 31 | {error,E}; 32 | encrypted_abstract_code=E -> 33 | {error,E}; 34 | {_Vsn,Code} -> 35 | Forms0 = epp:interpret_file_attribute(Code), 36 | Forms = pulse_instrument:parse_transform(Forms0, UserOptions), 37 | 38 | %% We need to recover the source from the compilation 39 | %% info otherwise the newly compiled module will have 40 | %% source pointing to the current directory 41 | SourceInfo = get_source_info(Module, Beam), 42 | 43 | %% Compile and load the result 44 | %% It's necessary to check the result of loading since it may 45 | %% fail, for example if Module resides in a sticky directory 46 | {ok, Module, Binary} = compile:forms(Forms, SourceInfo ++ UserOptions), 47 | case code:load_binary(Module, "pulsed_" ++ Beam, Binary) of 48 | {module, Module} -> 49 | {ok, Module}; 50 | Error -> 51 | Error 52 | end 53 | end. 54 | get_abstract_code(Module, Beam) -> 55 | case beam_lib:chunks(Beam, [abstract_code]) of 56 | {ok, {Module, [{abstract_code, AbstractCode}]}} -> 57 | AbstractCode; 58 | {error,beam_lib,{key_missing_or_invalid,_,_}} -> 59 | encrypted_abstract_code; 60 | Error -> Error 61 | end. 62 | 63 | get_source_info(Module, Beam) -> 64 | case beam_lib:chunks(Beam, [compile_info]) of 65 | {ok, {Module, [{compile_info, Compile}]}} -> 66 | case lists:keyfind(source, 1, Compile) of 67 | { source, _ } = Tuple -> [Tuple]; 68 | false -> [] 69 | end; 70 | _ -> 71 | [] 72 | end. 73 | 74 | -endif. 75 | -------------------------------------------------------------------------------- /src/yz_solr_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | 21 | %% @doc Supervisor for the solr process manager. We want to have a 22 | %% very different restart policy for the JVM manager than for our 23 | %% other long-lived workers, so it is given its own supervisor. 24 | %% 25 | %% The strategy allows for no more than one restart in 3x the time 26 | %% that has been set for solr startup. This is to prevent Yokozuna 27 | %% from hanging around useless when Solr is failing to start. 28 | 29 | -module(yz_solr_sup). 30 | -behaviour(supervisor). 31 | -include("yokozuna.hrl"). 32 | -export([start_link/0]). 33 | -export([init/1]). 34 | 35 | 36 | %%%=================================================================== 37 | %%% API 38 | %%%=================================================================== 39 | 40 | start_link() -> 41 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 42 | 43 | 44 | %%%=================================================================== 45 | %%% Callbacks 46 | %%%=================================================================== 47 | 48 | init([]) -> 49 | Dir = ?YZ_ROOT_DIR, 50 | TempDir = ?YZ_TEMP_DIR, 51 | SolrPort = yz_solr:port(), 52 | SolrJMXPort = yz_solr:jmx_port(), 53 | 54 | SolrProc = {yz_solr_proc, 55 | {yz_solr_proc, start_link, [Dir, TempDir, SolrPort, SolrJMXPort]}, 56 | permanent, 5000, worker, [yz_solr_proc]}, 57 | 58 | Children = [SolrProc], 59 | 60 | %% if yz_solr_proc restarts more than once in 3x its startup wait 61 | %% time, it's probably not going to succeed on the third try 62 | MaxR = 1, 63 | MaxT = 3*yz_solr_proc:solr_startup_wait(), 64 | {ok, {{one_for_one, MaxR, MaxT}, Children}}. 65 | -------------------------------------------------------------------------------- /src/yz_general_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | 21 | %% @doc Supervisor for long-lived processes *other* than yz_solr_proc, 22 | %% because they don't want to live under yz_solr_sup's specific 23 | %% restart strategy. 24 | 25 | -module(yz_general_sup). 26 | -behaviour(supervisor). 27 | -include("yokozuna.hrl"). 28 | -export([start_link/0]). 29 | -export([init/1]). 30 | 31 | 32 | %%%=================================================================== 33 | %%% API 34 | %%%=================================================================== 35 | 36 | start_link() -> 37 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 38 | 39 | 40 | %%%=================================================================== 41 | %%% Callbacks 42 | %%%=================================================================== 43 | 44 | init([]) -> 45 | %% Create yz_events ETS table 46 | yz_events:create_table(), 47 | 48 | SolrQ = {yz_solrq_sup, 49 | {yz_solrq_sup, start_link, []}, 50 | permanent, infinity, supervisor, [yz_solrq_sup]}, 51 | 52 | Events = {yz_events, 53 | {yz_events, start_link, []}, 54 | permanent, 5000, worker, [yz_events]}, 55 | 56 | HashtreeSup = {yz_index_hashtree_sup, 57 | {yz_index_hashtree_sup, start_link, []}, 58 | permanent, infinity, supervisor, [yz_index_hashtree_sup]}, 59 | 60 | EntropyMgr = {yz_entropy_mgr, 61 | {yz_entropy_mgr, start_link, []}, 62 | permanent, 5000, worker, [yz_entropy_mgr]}, 63 | 64 | Cover = {yz_cover, 65 | {yz_cover, start_link, []}, 66 | permanent, 5000, worker, [yz_cover]}, 67 | 68 | Children = [SolrQ, Events, HashtreeSup, EntropyMgr, Cover], 69 | 70 | {ok, {{one_for_one, 5, 10}, Children}}. 71 | -------------------------------------------------------------------------------- /misc/bench/bin/make-vis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #> Usage: 4 | #> 5 | #> ./make-vis.sh <script dir> <bench results dir> <bench name> <www dir> 6 | 7 | usage() { 8 | grep '#>' $0 | sed 's/#>//' | sed '$d' 9 | } 10 | 11 | if [ ! $# -eq 4 ]; then 12 | echo "incorrect number of arguments" 13 | usage 14 | exit 1 15 | fi 16 | 17 | SCRIPT_DIR=$1; shift 18 | BENCH_RESULTS_DIR=$1; shift 19 | BENCH_NAME=$1; shift 20 | WWW_DIR=$1; shift 21 | 22 | RUN_DIR=$BENCH_RESULTS_DIR/$BENCH_NAME 23 | WWW_DIR=$WWW_DIR/$BENCH_NAME 24 | 25 | mkdir -p $WWW_DIR 26 | cp -vr $RUN_DIR/* $WWW_DIR 27 | cp -v $SCRIPT_DIR/visualize.html $WWW_DIR 28 | cp -vr $SCRIPT_DIR/js $WWW_DIR 29 | 30 | add_script() { 31 | script=$1 32 | 33 | gsed -i'' "/<!-- generate scripts here -->/ a\ 34 | $script\n" $WWW_DIR/visualize.html 35 | } 36 | 37 | for lat in $RUN_DIR/*_latencies.csv; do 38 | file=$(basename $lat) 39 | script="<script>init_latency(\"$file\")</script>" 40 | add_script $script 41 | done 42 | 43 | disks="" 44 | for disk in $RUN_DIR/*-disk-collect.csv; do 45 | file=$(basename $disk) 46 | name=${file%-disk-collect.csv} 47 | disks="{name:\"$name\",resource:\"$file\"},$disks" 48 | done 49 | add_script "<script>init_disks([${disks%,}],\"%b\",\"db\",\"Disk %b\",\"absolute\")</script>" 50 | add_script "<script>init_disks([${disks%,}],\"kr/s\",\"dkrs\",\"Disk kr/s\",\"relative\")</script>" 51 | add_script "<script>init_disks([${disks%,}],\"kw/s\",\"dkws\",\"Disk kw/s\",\"relative\")</script>" 52 | 53 | cpus="" 54 | for cpu in $RUN_DIR/*-pid-cpu-mem-collect.csv; do 55 | file=$(basename $cpu) 56 | name=${file%-pid-cpu-mem-collect.csv} 57 | cpus="{name:\"$name\",resource:\"$file\"},$cpus" 58 | done 59 | add_script "<script>init_cpus([${cpus%,}])</script>" 60 | 61 | nics="" 62 | for nic in $RUN_DIR/*-network-collect.csv; do 63 | file=$(basename $nic) 64 | name=${file%-network-collect.csv} 65 | nics="{name:\"$name\",resource:\"$file\"},$nics" 66 | done 67 | add_script "<script>init_nics([${nics%,}],\"rKb/s\",\"rkbs\",\"Network rKb/s\",\"relative\")</script>" 68 | add_script "<script>init_nics([${nics%,}],\"wKb/s\",\"wkbs\",\"Network wKb/s\",\"relative\")</script>" 69 | add_script "<script>init_nics([${nics%,}],\"rPk/s\",\"rpks\",\"Network rPk/s\",\"relative\")</script>" 70 | add_script "<script>init_nics([${nics%,}],\"wPk/s\",\"wpks\",\"Network wPk/s\",\"relative\")</script>" 71 | add_script "<script>init_nics([${nics%,}],\"rAvs\",\"ravs\",\"Network rAvs\",\"relative\")</script>" 72 | add_script "<script>init_nics([${nics%,}],\"wAvs\",\"wavs\",\"Network wAvs\",\"relative\")</script>" 73 | 74 | open http://localhost/$BENCH_NAME/visualize.html 75 | -------------------------------------------------------------------------------- /riak_test/yz_fuse_upgrade.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2016 Basho Technologies, Inc. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %%------------------------------------------------------------------- 20 | 21 | %% @doc Confirm that fuses are created for Solr indexes across upgrades. 22 | 23 | -module(yz_fuse_upgrade). 24 | -export([confirm/0]). 25 | 26 | -include("yokozuna.hrl"). 27 | -include_lib("eunit/include/eunit.hrl"). 28 | 29 | -define(OLD_BUCKET, <<"old_bucket">>). 30 | -define(OLD_INDEX, <<"old_index">>). 31 | -define(NEW_BUCKET, <<"new_bucket">>). 32 | -define(NEW_INDEX, <<"new_index">>). 33 | -define(CLUSTER_SIZE, 2). 34 | -define(CONFIG, 35 | [{riak_core, 36 | [{ring_creation_size, 16}]}, 37 | {yokozuna, 38 | [{enabled, true}]} 39 | ]). 40 | 41 | confirm() -> 42 | %% Fixing to a "pre-batching/fuse" version, this can be version <= 2.0.6. 43 | OldVsn = "2.0.5", 44 | 45 | Cluster = rt:build_cluster(lists:duplicate(?CLUSTER_SIZE, 46 | {OldVsn, ?CONFIG})), 47 | Node1 = hd(Cluster), 48 | 49 | yz_rt:create_index(Cluster, ?OLD_INDEX), 50 | yz_rt:set_index(Node1, ?OLD_BUCKET, ?OLD_INDEX), 51 | 52 | yz_rt:rolling_upgrade(Cluster, current, ?CONFIG, [riak_kv, yokozuna]), 53 | 54 | yz_rt:create_index(Cluster, ?NEW_INDEX), 55 | yz_rt:set_index(Node1, ?NEW_BUCKET, ?NEW_INDEX), 56 | 57 | ?assertEqual(ok, 58 | yz_rt:wait_until(Cluster, fun verify_fuse_for_old_index/1)), 59 | ?assertEqual(ok, 60 | yz_rt:wait_until(Cluster, fun verify_fuse_for_new_index/1)), 61 | 62 | pass. 63 | 64 | verify_fuse_for_old_index(Node) -> 65 | Result = rpc:call(Node, yz_fuse, check, [?OLD_INDEX]), 66 | lager:info("Fuse Check Old Index ~p", [Result]), 67 | ok =:= Result. 68 | 69 | verify_fuse_for_new_index(Node) -> 70 | Result = rpc:call(Node, yz_fuse, check, [?NEW_INDEX]), 71 | lager:info("Fuse Check New Index ~p", [Result]), 72 | ok =:= Result. 73 | -------------------------------------------------------------------------------- /src/yz_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | 21 | %% @doc Top-level supervisor for yokozuna. 22 | %% 23 | %% Starts no children if yokozuna is not enabled (`start_link(false)'). 24 | %% 25 | %% Starts two sub-supervisors if yokozuna is enabled: one supervisor 26 | %% for the JVM solr manager, and another supervisor for the rest of 27 | %% the long-lived yokozuna processes. This top-level supervisor's 28 | %% restart strategy is thus to allow zero restarts of its 29 | %% sub-supervisors. If those sub-supervisors exit, something is really 30 | %% wrong, and yokozuna should shut down. 31 | 32 | -module(yz_sup). 33 | -behaviour(supervisor). 34 | -include("yokozuna.hrl"). 35 | -export([start_link/1]). 36 | -export([init/1]). 37 | 38 | 39 | %%%=================================================================== 40 | %%% API 41 | %%%=================================================================== 42 | 43 | start_link(Enabled) -> 44 | supervisor:start_link({local, ?MODULE}, ?MODULE, [Enabled]). 45 | 46 | 47 | %%%=================================================================== 48 | %%% Callbacks 49 | %%%=================================================================== 50 | 51 | init([false]) -> 52 | %% Yokozuna is disabled, start a supervisor without any children. 53 | {ok, {{one_for_one, 5, 10}, []}}; 54 | 55 | init([_Enabled]) -> 56 | SolrSup = {yz_solr_sup, 57 | {yz_solr_sup, start_link, []}, 58 | permanent, 5000, supervisor, [yz_solr_sup]}, 59 | 60 | GeneralSup = {yz_general_sup, 61 | {yz_general_sup, start_link, []}, 62 | permanent, infinity, supervisor, [yz_general_sup]}, 63 | 64 | Children = [SolrSup, GeneralSup], 65 | 66 | %% if these sub-supervisors ever exit, there's something really 67 | %% wrong; don't try to restart them 68 | MaxR = 0, 69 | MaxT = 1, 70 | {ok, {{one_for_one, MaxR, MaxT}, Children}}. 71 | -------------------------------------------------------------------------------- /java_src/com/basho/yokozuna/monitor/Monitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. 3 | * 4 | * This file is provided to you under the Apache License, Version 2.0 (the 5 | * "License"); you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.basho.yokozuna.monitor; 18 | 19 | import java.io.IOException; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | /** 24 | * Kill Solr when stdin closes, as it will when the Erlang VM shuts 25 | * down or yz_solr_proc exits. 26 | */ 27 | public class Monitor extends Thread { 28 | protected static final Logger log = LoggerFactory.getLogger(Monitor.class); 29 | 30 | public Monitor() { 31 | // nothing to init 32 | } 33 | 34 | public void run() { 35 | try { 36 | if (log.isDebugEnabled()) { 37 | log.debug("Monitor attempting read on stdin"); 38 | } 39 | if (System.in.read() < 0) { 40 | if (log.isInfoEnabled()) { 41 | log.info("Yokozuna has exited - shutting down Solr"); 42 | } 43 | System.exit(0); 44 | } 45 | if (log.isDebugEnabled()) { 46 | log.debug("Monitoring succeeded reading stdin"); 47 | } 48 | } 49 | catch (final IOException ioe) { 50 | if (log.isInfoEnabled()) { 51 | log.info("Yokozuna has exited - shutting down Solr"); 52 | } 53 | System.exit(0); 54 | } 55 | } 56 | 57 | /** 58 | * Start monitoring stdin in a background thread 59 | */ 60 | public static Monitor monitor() { 61 | final Monitor m = new Monitor(); 62 | m.start(); 63 | return m; 64 | } 65 | 66 | /** 67 | * Main for testing 68 | */ 69 | public static void main(String[] args) { 70 | monitor(); 71 | 72 | try { 73 | while(true) { 74 | // hang out until thread sees stdin close 75 | Thread.sleep(1000); 76 | } 77 | } 78 | catch (final InterruptedException ie) { 79 | // nothing to do but shutdown 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /misc/bench/schemas/fruit_schema.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8" ?> 2 | <schema name="fruit" version="1.5"> 3 | 4 | <fields> 5 | <field name="_yz_id" type="_yz_str" indexed="true" stored="true" required="true" multiValued="false"/> 6 | 7 | <field name="text" type="text_ws" indexed="true" stored="false" multiValued="true"/> 8 | 9 | <field name="date_register" type="tdate" indexed="true" stored="true" multiValued="false" /> 10 | 11 | <field name="_version_" type="long" indexed="true" stored="true"/> 12 | 13 | <!-- Entropy Data: Data related to anti-entropy --> 14 | <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> 15 | 16 | <!-- Partition Number: Used as a filter query param --> 17 | <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> 18 | 19 | <!-- First Partition Number: The first partition in this doc's 20 | preflist, used for further filtering on overlapping partitions. --> 21 | <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> 22 | 23 | <!-- If there is a sibling, use vtag to differentiate them --> 24 | <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> 25 | 26 | <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> 27 | 28 | <!-- Riak Bucket: The bucket of the Riak object this doc corresponds to. --> 29 | <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> 30 | 31 | <!-- Riak Key: The key of the Riak object this doc corresponds to. --> 32 | <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> 33 | 34 | <!-- Node: Stores a flag if this doc is the product of a failed object extration --> 35 | <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> 36 | 37 | </fields> 38 | 39 | <uniqueKey>_yz_id</uniqueKey> 40 | 41 | <types> 42 | 43 | <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> 44 | 45 | <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> 46 | 47 | <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> 48 | <!-- A Trie based date field for faster date range queries and date faceting. --> 49 | <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> 50 | 51 | <!-- A text field that only splits on whitespace for exact matching of words --> 52 | <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> 53 | <analyzer> 54 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> 55 | </analyzer> 56 | </fieldType> 57 | </types> 58 | 59 | </schema> 60 | -------------------------------------------------------------------------------- /test/yz_text_extractor_tests.erl: -------------------------------------------------------------------------------- 1 | -module(yz_text_extractor_tests). 2 | -compile(export_all). 3 | -include_lib("yz_test.hrl"). 4 | 5 | -define(UTF8_EXPECT, 6 | <<"english: The quick brown fox jumps over the lazy dog. 7 | jamaican: Chruu, a kwik di kwik brong fox a jomp huova di liezi daag de, yu no siit? 8 | irish: \"An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó ṡlí do leasa ṫú?\" \"D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ.\" 9 | dutch: Pa's wijze lynx bezag vroom het fikse aquaduct. 10 | german_1: Falsches Üben von Xylophonmusik quält jeden größeren Zwerg. 11 | german_2: Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon. 12 | norwegian: Blåbærsyltetøy. 13 | danish: Høj bly gom vandt fræk sexquiz på wc. 14 | swedish: Flygande bäckasiner söka strax hwila på mjuka tuvor. 15 | icelandic: Sævör grét áðan því úlpan var ónýt. 16 | finnish: Törkylempijävongahdus. 17 | polish: Pchnąć w tę łódź jeża lub osiem skrzyń fig. 18 | czech: Příliš žluťoučký kůň úpěl ďábelské kódy. 19 | slovak: Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote. 20 | greek_monotonic: ξεσκεπάζω την ψυχοφθόρα βδελυγμία 21 | greek_polytonic: ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία 22 | russian: Съешь же ещё этих мягких французских булок да выпей чаю. 23 | bulgarian: Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон. 24 | sami: Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža. 25 | hungarian: Árvíztűrő tükörfúrógép. 26 | spanish: El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro. 27 | portuguese: O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico. 28 | french: Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés. 29 | esperanto: Eĥoŝanĝo ĉiuĵaŭde. 30 | hebrew: זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן. 31 | japanese_hiragana: 32 | いろはにほへど ちりぬるを 33 | わがよたれぞ つねならむ 34 | うゐのおくやま けふこえて 35 | あさきゆめみじ ゑひもせず 36 | japanese_kanji: 37 | 色は匂へど 散りぬるを 38 | 我が世誰ぞ 常ならむ 39 | 有為の奥山 今日越えて 40 | 浅き夢見じ 酔ひもせず 41 | английский: The quick brown fox jumps over the lazy dog. 42 | chinese: 43 | 花非花 44 | 雾非雾 45 | 夜半来 46 | 天明去 47 | 来如春梦几多时 48 | 去似朝云无觅处 49 | ">>). 50 | 51 | 52 | utf8_test() -> 53 | {ok, Txt} = file:read_file("../test/utf8.txt"), 54 | Result = yz_text_extractor:extract(Txt), 55 | case Result of 56 | {error, Reason} -> 57 | ?debugFmt("~nextract/1 failed: ~s~n", [Reason]), 58 | throw(extract_failed); 59 | _ -> 60 | ok 61 | end, 62 | ?assertEqual([{text, ?UTF8_EXPECT}], Result), 63 | ?STACK_IF_FAIL(yz_solr:prepare_json([{doc, Result}])). 64 | -------------------------------------------------------------------------------- /test/utf8.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding='UTF-8'?> 2 | 3 | <!-- Example text pulled from http://www.columbia.edu/~fdc/utf8/index.html#notes --> 4 | <langs> 5 | <english attr="The quick">The quick brown fox jumps over the lazy dog.</english> 6 | <jamaican>Chruu, a kwik di kwik brong fox a jomp huova di liezi daag de, yu no siit?</jamaican> 7 | <irish>"An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó ṡlí do leasa ṫú?" "D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ."</irish> 8 | <dutch>Pa's wijze lynx bezag vroom het fikse aquaduct.</dutch> 9 | <german_1 attr="Falsches Üben">Falsches Üben von Xylophonmusik quält jeden größeren Zwerg.</german_1> 10 | <german_2>Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon.</german_2> 11 | <norwegian>Blåbærsyltetøy.</norwegian> 12 | <danish>Høj bly gom vandt fræk sexquiz på wc.</danish> 13 | <swedish>Flygande bäckasiner söka strax hwila på mjuka tuvor.</swedish> 14 | <icelandic>Sævör grét áðan því úlpan var ónýt.</icelandic> 15 | <finnish>Törkylempijävongahdus.</finnish> 16 | <polish>Pchnąć w tę łódź jeża lub osiem skrzyń fig.</polish> 17 | <czech>Příliš žluťoučký kůň úpěl ďábelské kódy.</czech> 18 | <slovak>Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote.</slovak> 19 | <greek_monotonic>ξεσκεπάζω την ψυχοφθόρα βδελυγμία</greek_monotonic> 20 | <greek_polytonic>ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία</greek_polytonic> 21 | <russian>Съешь же ещё этих мягких французских булок да выпей чаю.</russian> 22 | <bulgarian>Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон.</bulgarian> 23 | <sami>Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža.</sami> 24 | <hungarian>Árvíztűrő tükörfúrógép.</hungarian> 25 | <spanish>El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.</spanish> 26 | <portuguese>O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico.</portuguese> 27 | <french>Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés.</french> 28 | <esperanto>Eĥoŝanĝo ĉiuĵaŭde.</esperanto> 29 | <hebrew>זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן.</hebrew> 30 | <japanese_hiragana> 31 | いろはにほへど ちりぬるを 32 | わがよたれぞ つねならむ 33 | うゐのおくやま けふこえて 34 | あさきゆめみじ ゑひもせず 35 | </japanese_hiragana> 36 | <japanese_kanji> 37 | 色は匂へど 散りぬるを 38 | 我が世誰ぞ 常ならむ 39 | 有為の奥山 今日越えて 40 | 浅き夢見じ 酔ひもせず 41 | </japanese_kanji> 42 | <английский>The quick brown fox jumps over the lazy dog.</английский> 43 | <chinese 作者="Bai Juyi" title="The Bloom is not a Bloom"> 44 | 花非花 45 | 雾非雾 46 | 夜半来 47 | 天明去 48 | 来如春梦几多时 49 | 去似朝云无觅处 50 | </chinese> 51 | </langs> 52 | -------------------------------------------------------------------------------- /test/yz_misc_tests.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2013 Basho Technologies, Inc. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %%-------------------------------------------------------------------- 20 | 21 | %% @doc EUnit and Exercise yz_misc. 22 | -module(yz_misc_tests). 23 | 24 | -compile(export_all). 25 | 26 | -include_lib("eunit/include/eunit.hrl"). 27 | 28 | should_copy_skip_test() -> 29 | Thisfile = atom_to_list(?MODULE) ++ ".erl", 30 | ?assertNot(yz_misc:should_copy(skip, Thisfile, Thisfile)), 31 | ?assert(yz_misc:should_copy(skip, "<nofile>", "<nofile>")), 32 | ?assert(yz_misc:should_copy(skip, Thisfile, "<nofile>")). 33 | 34 | should_copy_update_test() -> 35 | Thisfile = atom_to_list(?MODULE) ++ ".erl", 36 | %% same file and no file acts the same as skip 37 | ?assertNot(yz_misc:should_copy(update, Thisfile, Thisfile)), 38 | ?assert(yz_misc:should_copy(update, "<nofile>", "<nofile>")), 39 | ?assert(yz_misc:should_copy(update, Thisfile, "<nofile>")), 40 | %% No file to update 41 | ?assertNot(yz_misc:should_copy(update, "<nofile>", Thisfile)), 42 | %% Compare timestamps 43 | Newfile = lists:flatten(io_lib:format("~p-~p",[ 44 | erlang:phash2(make_ref()),node()])), 45 | try 46 | ok = file:write_file(Newfile, "data"), 47 | ?assert(yz_misc:should_copy(update, Newfile, Thisfile)), 48 | ?assertNot(yz_misc:should_copy(update, Thisfile, Newfile)) 49 | after 50 | file:delete(Newfile) 51 | end. 52 | 53 | should_copy_overwrite_test() -> 54 | Thisfile = atom_to_list(?MODULE) ++ ".erl", 55 | ?assert(yz_misc:should_copy(overwrite, Thisfile, Thisfile)), 56 | ?assert(yz_misc:should_copy(overwrite, "<nofile>", "<nofile>")), 57 | ?assert(yz_misc:should_copy(overwrite, Thisfile, "<nofile>")). 58 | 59 | should_verify_name_test() -> 60 | ?assertEqual({ok, <<"just-fine">>}, yz_index:verify_name(<<"just-fine">>)), 61 | ?assertEqual({error, invalid_name}, yz_index:verify_name(<<"bad/slash">>)), 62 | ?assertEqual({error, invalid_name}, yz_index:verify_name(<<"out-of-range-", 129>>)), 63 | ?assertEqual({error, invalid_name}, yz_index:verify_name(<<"out-of-range-", 31>>)), 64 | ?assertEqual({ok, <<"just-in-range- ">>}, yz_index:verify_name(<<"just-in-range-", 32>>)). 65 | -------------------------------------------------------------------------------- /riak_test/yz_test_listener.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2016 Basho Technologies, Inc. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %%------------------------------------------------------------------- 20 | -module(yz_test_listener). 21 | 22 | -behaviour(gen_server). 23 | 24 | %% API 25 | -export([start/0, stop/0, messages/0, clear/0]). 26 | 27 | %% gen_server callbacks 28 | -export([init/1, 29 | handle_call/3, 30 | handle_cast/2, 31 | handle_info/2, 32 | terminate/2, 33 | code_change/3]). 34 | 35 | -define(SERVER, ?MODULE). 36 | 37 | -record(state, {messages=[]}). 38 | 39 | %%%=================================================================== 40 | %%% API 41 | %%%=================================================================== 42 | 43 | start() -> 44 | {ok, _Pid} = gen_server:start_link({global, ?SERVER}, ?MODULE, [], []). 45 | 46 | stop() -> 47 | gen_server:call({global, ?SERVER}, stop). 48 | 49 | messages() -> 50 | gen_server:call({global, ?SERVER}, messages). 51 | 52 | clear() -> 53 | gen_server:call({global, ?SERVER}, clear). 54 | 55 | %%%=================================================================== 56 | %%% gen_server callbacks 57 | %%%=================================================================== 58 | 59 | init([]) -> 60 | {ok, #state{}}. 61 | 62 | handle_call({message, Message}, _From, #state{messages = Messages} = State) -> 63 | {reply, ok, State#state{messages=[Message | Messages]}}; 64 | handle_call(messages, _From, #state{messages = Messages} = State) -> 65 | {reply, Messages, State}; 66 | handle_call(clear, _From, State) -> 67 | {reply, ok, State#state{messages=[]}}; 68 | handle_call(stop, _From, State) -> 69 | {stop, normal, ok, State}; 70 | handle_call(_Request, _From, State) -> 71 | {reply, ok, State}. 72 | 73 | handle_cast(_Request, State) -> 74 | {noreply, State}. 75 | 76 | %handle_info({message, Message}, #state{messages = Messages} = State) -> 77 | % {noreply, State#state{messages=[Message | Messages]}}; 78 | handle_info(_Info, State) -> 79 | {noreply, State}. 80 | 81 | terminate(_Reason, _State) -> 82 | ok. 83 | 84 | code_change(_OldVsn, State, _Extra) -> 85 | {ok, State}. 86 | 87 | %%%=================================================================== 88 | %%% Internal functions 89 | %%%=================================================================== 90 | -------------------------------------------------------------------------------- /riak_test/yz_fallback.erl: -------------------------------------------------------------------------------- 1 | %% @doc Verify that fallback data is handled properly. I.e. not indexed. 2 | -module(yz_fallback). 3 | -compile(export_all). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | -define(NUM_KEYS, 1000). 6 | -define(INDEX, <<"fallback">>). 7 | -define(BUCKET, {?INDEX, <<"bucket">>}). 8 | -define(KEY, <<"key">>). 9 | -define(FMT(S, Args), lists:flatten(io_lib:format(S, Args))). 10 | -define(CFG, 11 | [{riak_core, 12 | [ 13 | {ring_creation_size, 16} 14 | ]}, 15 | {yokozuna, 16 | [ 17 | {enabled, true} 18 | ]} 19 | ]). 20 | 21 | confirm() -> 22 | Cluster = rt:build_cluster(2, ?CFG), 23 | rt:wait_for_cluster_service(Cluster, yokozuna), 24 | create_index(Cluster, ?INDEX), 25 | Cluster2 = take_node_down(Cluster), 26 | write_obj(Cluster2, ?INDEX, ?BUCKET, ?KEY), 27 | check_fallbacks(Cluster2, ?INDEX, ?BUCKET, ?KEY), 28 | ANode = yz_rt:select_random(Cluster2), 29 | ?assertEqual(ok, yz_rt:search_expect(ANode, ?INDEX, "*", "*", 1)), 30 | pass. 31 | 32 | check_fallbacks(Cluster, Index, Bucket, Key) -> 33 | Node = yz_rt:select_random(Cluster), 34 | KVPreflist = kv_preflist(Node, Bucket, Key), 35 | FallbackPreflist = filter_fallbacks(KVPreflist), 36 | LogicalFallbackPL = make_logical(Node, FallbackPreflist), 37 | [?assertEqual(ok, yz_rt:search_expect(FNode, solr, Index, "_yz_pn", 38 | integer_to_list(LPN), 0)) 39 | || {LPN, FNode} <- LogicalFallbackPL]. 40 | 41 | create_index(Cluster, Index) -> 42 | yz_rt:create_index(Cluster, Index), 43 | ok = yz_rt:set_bucket_type_index(Cluster, Index), 44 | timer:sleep(5000). 45 | 46 | make_logical(Node, Preflist) -> 47 | rpc:call(Node, yz_misc, convert_preflist, [Preflist, logical]). 48 | 49 | filter_fallbacks(Preflist) -> 50 | [PartitionNode || {{_,_} = PartitionNode, fallback} <- Preflist]. 51 | 52 | kv_preflist(Node, Bucket, Key) -> 53 | {ok, Ring} = rpc:call(Node, riak_core_ring_manager, get_my_ring, []), 54 | BucketProps = rpc:call(Node, riak_core_bucket, get_bucket, [Bucket, Ring]), 55 | DocIdx = rpc:call(Node, riak_core_util, chash_key, [{Bucket,Key}]), 56 | N = proplists:get_value(n_val,BucketProps), 57 | UpNodes = rpc:call(Node, riak_core_node_watcher, nodes, [riak_kv]), 58 | riak_core_apl:get_apl_ann(DocIdx, N, Ring, UpNodes). 59 | 60 | take_node_down(Cluster) -> 61 | DownNode = yz_rt:select_random(Cluster), 62 | rt:stop(DownNode), 63 | timer:sleep(5000), 64 | Cluster -- [DownNode]. 65 | 66 | write_obj(Cluster, Index, {BType, BName}, Key) -> 67 | Node = yz_rt:select_random(Cluster), 68 | {Host, Port} = riak_hp(Node, Cluster), 69 | lager:info("write obj to node ~p", [Node]), 70 | URL = ?FMT("http://~s:~s/types/~s/buckets/~s/keys/~s", 71 | [Host, integer_to_list(Port), BType, BName, Key]), 72 | Headers = [{"content-type", "text/plain"}], 73 | Body = <<"yokozuna">>, 74 | {ok, "204", _, _} = ibrowse:send_req(URL, Headers, put, Body, []), 75 | yz_rt:commit(Cluster, Index). 76 | 77 | riak_hp(Node, Cluster) -> 78 | CI = yz_rt:connection_info(Cluster), 79 | yz_rt:riak_http(proplists:get_value(Node, CI)). 80 | -------------------------------------------------------------------------------- /solr-patches/no-stale-check-4.10.4.patch: -------------------------------------------------------------------------------- 1 | commit 13449f7dc2ac0d205180611f2de6e1371cd88a4b 2 | Author: Zeeshan Lakhani <zeeshan.lakhani@gmail.com> 3 | Date: Thu May 7 00:20:08 2015 +0900 4 | 5 | Update patches for Solr 4.10.4 6 | 7 | diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java 8 | index f574f29..42168df 100644 9 | --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java 10 | +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java 11 | @@ -29,6 +29,7 @@ import java.util.concurrent.ThreadPoolExecutor; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | import org.apache.commons.lang.StringUtils; 15 | +import org.apache.http.params.HttpConnectionParams; 16 | import org.apache.http.client.HttpClient; 17 | import org.apache.http.impl.client.DefaultHttpClient; 18 | import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; 19 | @@ -163,7 +164,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. 20 | clientParams.set(HttpClientUtil.PROP_USE_RETRY, false); 21 | } 22 | this.defaultClient = HttpClientUtil.createClient(clientParams); 23 | - 24 | + this.defaultClient.getParams().setParameter(HttpConnectionParams.STALE_CONNECTION_CHECK, false); 25 | + this.defaultClient.getParams().setParameter(HttpConnectionParams.TCP_NODELAY, true); 26 | // must come after createClient 27 | if (useRetries) { 28 | // our default retry handler will never retry on IOException if the request has been sent already, 29 | diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java 30 | index fcbc80d..c98c982 100644 31 | --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java 32 | +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java 33 | @@ -17,6 +17,9 @@ 34 | package org.apache.solr.client.solrj.impl; 35 | 36 | import java.io.IOException; 37 | +import java.util.concurrent.Executors; 38 | +import java.util.concurrent.ScheduledExecutorService; 39 | +import java.util.concurrent.TimeUnit; 40 | import java.io.InputStream; 41 | import java.util.zip.GZIPInputStream; 42 | import java.util.zip.InflaterInputStream; 43 | @@ -115,6 +118,16 @@ public class HttpClientUtil { 44 | logger.debug("Creating new http client, config:" + config); 45 | } 46 | final DefaultHttpClient httpClient = new SystemDefaultHttpClient(); 47 | + final org.apache.http.conn.ClientConnectionManager mgr = httpClient.getConnectionManager(); 48 | + 49 | + // Note: The sweeper task is assuming hard-coded Jetty max-idle of 50s. 50 | + final Runnable sweeper = new Runnable() { 51 | + public void run() { 52 | + mgr.closeIdleConnections(40, TimeUnit.SECONDS); 53 | + } 54 | + }; 55 | + final ScheduledExecutorService stp = Executors.newScheduledThreadPool(1); 56 | + stp.scheduleWithFixedDelay(sweeper, 5, 5, TimeUnit.SECONDS); 57 | configureClient(httpClient, config); 58 | return httpClient; 59 | } 60 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/js/cpu.js: -------------------------------------------------------------------------------- 1 | function init_cpus(cpus) { 2 | var cpuData = []; 3 | var yCol = "CPU"; 4 | var x = d3.time.scale().range([0, width]); 5 | var y = d3.scale.linear().range([height, 0]); 6 | var xAxis = d3.svg.axis().scale(x).orient("bottom"); 7 | var yAxis = d3.svg.axis().scale(y).orient("left"); 8 | var colors = d3.scale.category20(); 9 | var parseDate = d3.time.format("%Y-%m-%dT%H:%M:%S").parse; 10 | 11 | var line = d3.svg.line() 12 | .x(function(d) { return x(d.timestamp); }) 13 | .y(function(d) { return y(d[yCol]); }); 14 | 15 | var svg = d3.select("#cpu p.vis").append("svg") 16 | .attr("width", width + margin.left + margin.right) 17 | .attr("height", height + margin.top + margin.bottom) 18 | .append("g") 19 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 20 | 21 | svg.append("g") 22 | .attr("class", "cpuu x axis") 23 | .attr("transform", "translate(0," + height + ")") 24 | .call(xAxis); 25 | 26 | svg.append("g") 27 | .attr("class", "cpuu y axis") 28 | .call(yAxis); 29 | 30 | svg.append("text") 31 | .attr("text-anchor", "middle") 32 | .attr("transform", "translate(" + -(margin.left/2) + "," + (height/2) + ")rotate(-90)") 33 | .attr("class", "label") 34 | .text("CPU %"); 35 | 36 | var redraw = function() { 37 | var keys = cpuData.map(function(d) { return d.key; }); 38 | colors.domain(keys); 39 | 40 | x.domain([ 41 | d3.min(cpuData, function(c) { 42 | return d3.min(c.values, function(d) { return d.timestamp; }) 43 | }), 44 | d3.max(cpuData, function(c) { 45 | return d3.max(c.values, function(d) { return d.timestamp; }) 46 | }) 47 | ]); 48 | y.domain([0,100]); 49 | 50 | var usage = svg.selectAll(".cpu_usage") 51 | .data(cpuData, function(d) { return d.key; }); 52 | 53 | usage.enter().append("path") 54 | .attr("class", "line") 55 | .attr("d", function(d) { return line(d.values); }) 56 | .style("stroke", function(d) { return colors(d.key); }); 57 | 58 | d3.transition(usage) 59 | .attr("d", function(d) { return line(d.values); }); 60 | 61 | usage.exit().remove(); 62 | 63 | d3.select(".cpuu.x.axis").call(xAxis); 64 | d3.select(".cpuu.y.axis").call(yAxis); 65 | 66 | }; 67 | 68 | var add_cpu_data = function(name, resource) { 69 | d3.csv(resource, function(data) { 70 | // scrub data to essential elements 71 | data = data.map(function(d) { 72 | var tmp = {process: d["PROCESS/NLWP"], 73 | timestamp: parseDate(d.timestamp)}; 74 | tmp[yCol] = +d[yCol]; 75 | return tmp; 76 | }); 77 | 78 | data = d3.nest() 79 | .key(function(d) { return name + "-" + d.process; }) 80 | .entries(data); 81 | 82 | cpuData = d3.merge([cpuData, data]); 83 | redraw(); 84 | }) 85 | }; 86 | 87 | cpus.forEach(function(d) { add_cpu_data(d.name, d.resource); }); 88 | }; 89 | -------------------------------------------------------------------------------- /tools/build-jar.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # 3 | # Build JAR file containing customer Solr request handlers. 4 | set -e 5 | 6 | 7 | function sha 8 | { 9 | file=$1 10 | sha_file=$2 11 | 12 | if which sha1sum; then 13 | sha1sum $file > $sha_file 14 | elif which shasum; then 15 | shasum -a 1 $file > $sha_file 16 | else 17 | echo "Unable to locate program to compute SHA1" 18 | exit 1 19 | fi 20 | } 21 | 22 | if [ ! -x "`which javac`" ] || [ ! -x "`which jar`" ]; then 23 | echo "Couldn't find javac and/or jar, which is needed to compile Yokozuna." 24 | exit 1 25 | fi 26 | 27 | if ! javac -version 2>&1 | egrep "1\.7\.[0-9_.]+" 28 | then 29 | echo "JDK 1.7 must be used to compile these jars" 30 | exit 1 31 | fi 32 | 33 | if [ $(basename $PWD) != "tools" ] 34 | then 35 | echo "This script must be run from tools directory" 36 | exit 1 37 | fi 38 | 39 | 40 | echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NOTICE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" 41 | echo "% %" 42 | echo "% If building new jars to be uploaded to s3 then make sure to update the %" 43 | echo "% YZ_JAR_VSN and MON_JAR_VSN variables. %" 44 | echo "% %" 45 | echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" 46 | 47 | echo "Build the yokozuna.jar..." 48 | 49 | SOLR_DIR=../priv/solr 50 | SOLR_WAR=$SOLR_DIR/webapps/solr.war 51 | SOLR_JAR_DIR=../build/solr-jars 52 | 53 | if [ ! -e $SOLR_WAR ]; then 54 | echo "Download the Solr package..." 55 | ./grab-solr.sh 56 | fi 57 | 58 | if [ ! -e $SOLR_JAR_DIR ]; then 59 | echo "Explode the WAR..." 60 | mkdir $SOLR_JAR_DIR 61 | cp $SOLR_WAR $SOLR_JAR_DIR 62 | pushd $SOLR_JAR_DIR 63 | jar xf solr.war WEB-INF/lib 64 | mv WEB-INF/lib/* . 65 | rm -rf WEB-INF solr.war 66 | popd 67 | # copy logging jars 68 | cp $SOLR_DIR/lib/ext/* $SOLR_JAR_DIR 69 | fi 70 | 71 | 72 | echo "Compile..." 73 | javac -cp "$SOLR_JAR_DIR/*" \ 74 | ../java_src/com/basho/yokozuna/handler/*.java \ 75 | ../java_src/com/basho/yokozuna/handler/component/*.java \ 76 | ../java_src/com/basho/yokozuna/query/*.java \ 77 | ../java_src/com/basho/yokozuna/monitor/*.java 78 | 79 | echo "Create yokozuna.jar..." 80 | if [ ! -e "../priv/java_lib" ]; then 81 | mkdir ../priv/java_lib 82 | fi 83 | 84 | YZ_JAR_VSN=3 85 | YZ_JAR_NAME=yokozuna-$YZ_JAR_VSN.jar 86 | YZ_JAR_SHA=$YZ_JAR_NAME.sha 87 | 88 | jar cvf $YZ_JAR_NAME \ 89 | -C ../java_src/ com/basho/yokozuna/handler \ 90 | -C ../java_src/ com/basho/yokozuna/query 91 | 92 | sha $YZ_JAR_NAME $YZ_JAR_SHA 93 | 94 | echo "Finished building yokozuna.jar..." 95 | 96 | # monitor has to be packaged separately because it relies on the 97 | # dynamic classpath the jetty/solr set up 98 | echo "Create yz_monitor.jar..." 99 | 100 | MON_JAR_VSN=1 101 | MON_JAR_NAME=yz_monitor-$MON_JAR_VSN.jar 102 | MON_JAR_SHA=$MON_JAR_NAME.sha 103 | jar cvf $MON_JAR_NAME \ 104 | -C ../java_src/ com/basho/yokozuna/monitor 105 | 106 | sha $MON_JAR_NAME $MON_JAR_SHA 107 | 108 | echo "Finished building yz_monitor.jar..." 109 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | EXOMETER_PACKAGES = "(basic)" 2 | export EXOMETER_PACKAGES 3 | PULSE_TESTS = yz_solrq_eqc 4 | 5 | REBAR ?= $(shell pwd)/rebar 6 | 7 | .PHONY: deps rel stagedevrel test 8 | 9 | all: deps compile-riak-test 10 | 11 | compile: deps 12 | $(REBAR) compile 13 | 14 | compile-riak-test: compile 15 | $(REBAR) skip_deps=true riak_test_compile 16 | 17 | deps: 18 | $(REBAR) get-deps 19 | 20 | clean: 21 | $(REBAR) clean 22 | rm -rf riak_test/ebin 23 | rm -rf build 24 | git clean -dfx priv/ 25 | 26 | distclean: clean 27 | $(REBAR) delete-deps 28 | 29 | ## 30 | ## Dialyzer 31 | ## 32 | DIALYZER_APPS = kernel stdlib sasl erts ssl tools os_mon runtime_tools crypto inets \ 33 | xmerl webtool snmp public_key mnesia eunit syntax_tools compiler 34 | DIALYZER_FLAGS = -Wno_return 35 | TEST_PLT = .yokozuna_test_dialyzer_plt 36 | RIAK_TEST_PATH = riak_test 37 | 38 | include tools.mk 39 | 40 | ${TEST_PLT}: compile-riak-test 41 | @if [ -d $(RIAK_TEST_PATH) ]; then \ 42 | if [ -f $(TEST_PLT) ]; then \ 43 | dialyzer --check_plt --plt $(TEST_PLT) $(RIAK_TEST_PATH)/ebin && \ 44 | dialyzer --add_to_plt --plt $(TEST_PLT) --apps edoc --output_plt $(TEST_PLT) ebin $(RIAK_TEST_PATH)/ebin ; test $$? -ne 1; \ 45 | else \ 46 | dialyzer --build_plt --apps edoc --output_plt $(TEST_PLT) ebin $(RIAK_TEST_PATH)/ebin ; test $$? -ne 1; \ 47 | fi \ 48 | fi 49 | 50 | dialyzer-rt-run: 51 | @echo "==> $(shell basename $(shell pwd)) (dialyzer_rt)" 52 | @PLTS="$(PLT) $(LOCAL_PLT) $(TEST_PLT)"; \ 53 | if [ -f dialyzer.ignore-warnings ]; then \ 54 | if [ $$(grep -cvE '[^[:space:]]' dialyzer.ignore-warnings) -ne 0 ]; then \ 55 | echo "ERROR: dialyzer.ignore-warnings contains a blank/empty line, this will match all messages!"; \ 56 | exit 1; \ 57 | fi; \ 58 | dialyzer $(DIALYZER_FLAGS) --plts $${PLTS} -c $(RIAK_TEST_PATH)/ebin > dialyzer_warnings ; \ 59 | cat dialyzer.ignore-warnings \ 60 | | sed -E 's/^([^:]+:)[^:]+:/\1/' \ 61 | | sort \ 62 | | uniq -c \ 63 | | sed -E '/.*\.erl: /!s/^[[:space:]]*[0-9]+[[:space:]]*//' \ 64 | > dialyzer.ignore-warnings.tmp ; \ 65 | egrep -v "^[[:space:]]*(done|Checking|Proceeding|Compiling)" dialyzer_warnings \ 66 | | sed -E 's/^([^:]+:)[^:]+:/\1/' \ 67 | | sort \ 68 | | uniq -c \ 69 | | sed -E '/.*\.erl: /!s/^[[:space:]]*[0-9]+[[:space:]]*//' \ 70 | | grep -F -f dialyzer.ignore-warnings.tmp -v \ 71 | | sed -E 's/^[[:space:]]*[0-9]+[[:space:]]*//' \ 72 | | sed -E 's/([]\^:+?|()*.$${}\[])/\\\1/g' \ 73 | | sed -E 's/(\\\.erl\\\:)/\1[[:digit:]]+:/g' \ 74 | | sed -E 's/^(.*)$$/^[[:space:]]*\1$$/g' \ 75 | > dialyzer_unhandled_warnings ; \ 76 | rm dialyzer.ignore-warnings.tmp; \ 77 | if [ $$(cat dialyzer_unhandled_warnings | wc -l) -gt 0 ]; then \ 78 | egrep -f dialyzer_unhandled_warnings dialyzer_warnings ; \ 79 | found_warnings=1; \ 80 | fi; \ 81 | [ "$$found_warnings" != 1 ] ; \ 82 | else \ 83 | dialyzer -Wno_return $(DIALYZER_FLAGS) --plts $${PLTS} -c $(RIAK_TEST_PATH)/ebin; \ 84 | fi 85 | 86 | dialyzer_rt: deps ${PLT} ${LOCAL_PLT} $(TEST_PLT) dialyzer-rt-run 87 | 88 | ## 89 | ## Purity 90 | ## 91 | ## NOTE: Must add purity to ERL_LIBS for these targets to work 92 | build_purity_plt: 93 | @erl -noshell -run purity_cli main -extra --build-plt --apps $(APPS) deps/*/ebin ebin 94 | 95 | purity: 96 | @erl -noshell -run purity_cli main -extra -v -s stats --with-reasons -l 3 --apps ebin 97 | -------------------------------------------------------------------------------- /tools/build-solr.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Build Solr from source, applying local patches. 4 | # 5 | #> Usage: 6 | #> 7 | #> ./build-solr.sh [--git] [--patch-dir <PATCH DIR>] <WORK DIR> <NAME> <URL> 8 | #> 9 | #> Example: 10 | #> 11 | #> ./build-solr.sh --patch-dir ~/yokozuna/solr-patches /tmp/build-solr solr-4.10.4-yz http://archive.apache.org/dist/lucene/solr/4.10.4/solr-4.10.4-src.tgz | tee build-solr.out 12 | 13 | set -e 14 | 15 | error() 16 | { 17 | echo "ERROR: $1" 18 | exit 1 19 | } 20 | 21 | usage() 22 | { 23 | grep "#>" $0 | sed -e 's/#>//' -e '$d' 24 | } 25 | 26 | apply_patches() 27 | { 28 | if test -e $PATCH_DIR; then 29 | echo "applying patches in $PATCH_DIR" 30 | for p in $PATCH_DIR/*.patch; do 31 | patch -p1 < $p 32 | done 33 | fi 34 | } 35 | 36 | download() 37 | { 38 | if which wget > /dev/null; then 39 | wget --no-check-certificate --progress=dot:mega $1 40 | elif which curl > /dev/null; then 41 | curl --insecure --progress-bar -O $1 42 | fi 43 | } 44 | 45 | IS_GIT=0 46 | PATCH_DIR="" 47 | while test $# -gt 0 48 | do 49 | case $1 in 50 | --git) 51 | # <URL> is a git URL 52 | IS_GIT=1 53 | ;; 54 | --patch-dir) 55 | PATCH_DIR=$2 56 | # make absolute 57 | cd $PATCH_DIR 58 | PATCH_DIR=$(pwd) 59 | cd - 60 | shift 61 | ;; 62 | -*) 63 | error "unrecognized option: $1" 64 | ;; 65 | *) 66 | break 67 | ;; 68 | esac 69 | shift 70 | done 71 | 72 | if test $# != 3; then 73 | echo "ERROR: incorrect number of arguments: $#" 74 | usage 75 | exit 1 76 | fi 77 | 78 | WORK_DIR=$1; shift 79 | NAME=$1; shift 80 | URL=$1; shift 81 | 82 | if ! javac -version 2>&1 | egrep "1\.7\.[0-9_.]+" 83 | then 84 | echo "JDK 1.7 must be used to compile Solr" 85 | exit 1 86 | fi 87 | 88 | if [ ! -x "`which ant`" ]; then 89 | echo "Couldn't find ant, which is needed to compile Solr." 90 | exit 1 91 | fi 92 | 93 | if test ! -e $WORK_DIR; then 94 | mkdir $WORK_DIR 95 | fi 96 | 97 | cd $WORK_DIR 98 | # make absolute if not already 99 | WORK_DIR=$(pwd) 100 | 101 | if test $IS_GIT -eq 1; then 102 | echo "FIKE" 103 | SOLR_DIR=$(basename $URL) 104 | git clone $URL 105 | else 106 | SOLR_FILE=$(basename $URL) 107 | SOLR_DIR=${SOLR_FILE%-src.tgz} 108 | 109 | if test ! -e $SOLR_FILE; then 110 | download $URL 111 | fi 112 | 113 | if test ! -e $SOLR_DIR; then 114 | tar zxvf $SOLR_FILE 115 | fi 116 | fi 117 | 118 | mv $SOLR_DIR $NAME 119 | SOLR_DIR=$NAME 120 | 121 | cd $SOLR_DIR 122 | echo "buildling Solr from $SOLR_DIR" 123 | 124 | apply_patches 125 | ant ivy-bootstrap 126 | ant compile 127 | 128 | cd solr 129 | # NOTE: needed for 4.0 release 130 | # mkdir test-framework/lib 131 | ant dist example 132 | 133 | cd .. 134 | mv solr $NAME 135 | tar zcvf $NAME.tgz \ 136 | --exclude='build*' \ 137 | --exclude=cloud-dev \ 138 | --exclude=core \ 139 | --exclude=package \ 140 | --exclude=scripts \ 141 | --exclude=site \ 142 | --exclude=solrj \ 143 | --exclude=test-framework \ 144 | --exclude=testlogging.properties \ 145 | --exclude=example/etc/solrtest.keystore \ 146 | $NAME 147 | mv $NAME solr 148 | -------------------------------------------------------------------------------- /java_src/com/basho/yokozuna/handler/component/FQShardTranslator.java: -------------------------------------------------------------------------------- 1 | package com.basho.yokozuna.handler.component; 2 | 3 | import org.apache.solr.common.params.ModifiableSolrParams; 4 | import org.apache.solr.common.params.ShardParams; 5 | import org.apache.solr.common.params.SolrParams; 6 | import org.apache.solr.handler.component.ResponseBuilder; 7 | import org.apache.solr.handler.component.SearchComponent; 8 | import org.apache.solr.handler.component.ShardRequest; 9 | import org.apache.solr.request.SolrQueryRequest; 10 | 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import java.io.IOException; 15 | 16 | /** 17 | * Translate custom Yokozuna filter query shard params to normal 18 | * filter query in the case of a non-distributed request. 19 | * 20 | * Allow setting filter queries per-node. This allows Yokozuna to 21 | * apply a filter query to a specific node without requiring a special 22 | * node field. That, in turn, leads to a smaller index, less memory 23 | * usage, and less CPU time spent on node filtering. 24 | * 25 | * It works by translating custom per-node filter queries into a 26 | * traditional filter query for the destination node. For example, for 27 | * a distributed query across 2 shards the `shards` parameter might 28 | * look like so. 29 | * 30 | * ?shards=10.0.1.100:10014/solr/index,10.0.1.101:10014/solr/index 31 | * 32 | * This SearchComponent allows applying a filter query exclusively to 33 | * each shard by passing a query param with the name `$host:$port` and 34 | * a value of the filter query to run. For example: 35 | * 36 | * ?10.0.1.100:10014=_yz_pn:1 OR _yz_pn:7 OR ...\ 37 | * &10.0.1.101:10014=_yz_pn:4 OR _yz_pn:10 OR ... 38 | * 39 | */ 40 | public class FQShardTranslator extends SearchComponent { 41 | protected static final Logger log = LoggerFactory.getLogger(FQShardTranslator.class); 42 | public static final String COMPONENT_NAME = "fq_shard_translator"; 43 | 44 | @Override 45 | public void prepare(ResponseBuilder rb) throws IOException { 46 | SolrQueryRequest req = rb.req; 47 | SolrParams params = req.getParams(); 48 | 49 | if (!isDistrib(params)) { 50 | String shardUrl = params.get(ShardParams.SHARD_URL); 51 | if (shardUrl != null) { 52 | String hostPort = shardUrl.substring(0, shardUrl.indexOf('/')); 53 | ModifiableSolrParams mp = new ModifiableSolrParams(params); 54 | mp.add("fq", params.get(hostPort)); 55 | req.setParams(mp); 56 | } 57 | } 58 | } 59 | 60 | @Override 61 | public void process(ResponseBuilder rb) throws IOException { 62 | return; 63 | } 64 | 65 | @Override 66 | public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) { 67 | return; 68 | } 69 | 70 | @Override 71 | public String getDescription() { 72 | return "Yokozuna's FQ Shard Translator"; 73 | } 74 | 75 | @Override 76 | public String getSource() { 77 | return "https://github.com/basho/yokozuna"; 78 | } 79 | 80 | private boolean isDistrib(SolrParams params) { 81 | // Based on HttpShardHandler because rb.isDistrib is not public. 82 | boolean distrib = params.getBool("distrib", false); 83 | String shards = params.get(ShardParams.SHARDS); 84 | boolean hasShardURL = shards != null; 85 | 86 | return hasShardURL || distrib; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /riak_test/yz_default_bucket_type_upgrade.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2015 Basho Technologies, Inc. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %%-------------------------------------------------------------------- 20 | 21 | %% @doc Test that checks to make sure that default bucket_types 22 | %% do not lose data when expiring/clearing AAE trees when 23 | %% trees are rebuilt for comparison. 24 | %% @end 25 | 26 | 27 | -module(yz_default_bucket_type_upgrade). 28 | -compile(export_all). 29 | -include_lib("eunit/include/eunit.hrl"). 30 | -include_lib("riakc/include/riakc.hrl"). 31 | 32 | -define(N, 3). 33 | -define(YZ_CAP, {yokozuna, handle_legacy_default_bucket_type_aae}). 34 | -define(INDEX, <<"test_upgrade_idx">>). 35 | -define(BUCKET, <<"test_upgrade_bucket">>). 36 | -define(SEQMAX, 2000). 37 | -define(CFG, 38 | [{riak_core, 39 | [ 40 | {ring_creation_size, 16}, 41 | {default_bucket_props, 42 | [ 43 | {n_val, ?N}, 44 | {allow_mult, true}, 45 | {dvv_enabled, true} 46 | ]} 47 | ]}, 48 | {riak_kv, 49 | [ 50 | {anti_entropy_build_limit, {100, 1000}}, 51 | {anti_entropy_concurrency, 8} 52 | ] 53 | }, 54 | {yokozuna, 55 | [ 56 | {anti_entropy_tick, 1000}, 57 | {enabled, true} 58 | ]} 59 | ]). 60 | 61 | confirm() -> 62 | %% This test explicitly requires an upgrade from 2.0.5 to test a 63 | %% new capability 64 | OldVsn = "2.0.5", 65 | 66 | [_, Node|_] = Cluster = rt:build_cluster(lists:duplicate(4, {OldVsn, ?CFG})), 67 | rt:wait_for_cluster_service(Cluster, yokozuna), 68 | 69 | [rt:assert_capability(ANode, ?YZ_CAP, {unknown_capability, ?YZ_CAP}) || ANode <- Cluster], 70 | 71 | GenKeys = yz_rt:gen_keys(?SEQMAX), 72 | KeyCount = length(GenKeys), 73 | lager:info("KeyCount ~p", [KeyCount]), 74 | 75 | OldPid = rt:pbc(Node), 76 | 77 | yz_rt:pb_write_data(Cluster, OldPid, ?INDEX, ?BUCKET, GenKeys), 78 | yz_rt:commit(Cluster, ?INDEX), 79 | 80 | yz_rt:verify_num_found_query(Cluster, ?INDEX, KeyCount), 81 | 82 | %% Upgrade 83 | yz_rt:rolling_upgrade(Cluster, current), 84 | 85 | [rt:assert_capability(ANode, ?YZ_CAP, v1) || ANode <- Cluster], 86 | [rt:assert_supported(rt:capability(ANode, all), ?YZ_CAP, [v1, v0]) || ANode <- Cluster], 87 | 88 | yz_rt:verify_num_found_query(Cluster, ?INDEX, KeyCount), 89 | 90 | lager:info("Write one more piece of data"), 91 | Pid = rt:pbc(Node), 92 | ok = rt:pbc_write(Pid, ?BUCKET, <<"foo">>, <<"foo">>, "text/plain"), 93 | yz_rt:commit(Cluster, ?INDEX), 94 | 95 | yz_rt:expire_trees(Cluster), 96 | yz_rt:verify_num_found_query(Cluster, ?INDEX, KeyCount + 1), 97 | 98 | pass. 99 | -------------------------------------------------------------------------------- /src/yz_solrq_queue_pair_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. 3 | %% 4 | %% This file is provided to you under the Apache License, 5 | %% Version 2.0 (the "License"); you may not use this file 6 | %% except in compliance with the License. You may obtain 7 | %% a copy of the License at 8 | %% 9 | %% http://www.apache.org/licenses/LICENSE-2.0 10 | %% 11 | %% Unless required by applicable law or agreed to in writing, 12 | %% software distributed under the License is distributed on an 13 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %% KIND, either express or implied. See the License for the 15 | %% specific language governing permissions and limitations 16 | %% under the License. 17 | %% 18 | %% ------------------------------------------------------------------- 19 | -module(yz_solrq_queue_pair_sup). 20 | 21 | -include("yokozuna.hrl"). 22 | 23 | -behaviour(supervisor). 24 | 25 | %% API 26 | -export([start_link/2]). 27 | 28 | %% Supervisor callbacks 29 | -export([init/1]). 30 | 31 | -define(SERVER, ?MODULE). 32 | 33 | %%%=================================================================== 34 | %%% API functions 35 | %%%=================================================================== 36 | 37 | %%-------------------------------------------------------------------- 38 | %% @doc 39 | %% Starts the supervisor 40 | %% 41 | %% @end 42 | %%-------------------------------------------------------------------- 43 | -spec(start_link(index_name(), p()) -> 44 | {ok, Pid :: pid()} | {error, Reason :: term()}). 45 | start_link(Index, Partition) -> 46 | supervisor:start_link(?MODULE, [Index, Partition]). 47 | 48 | %%%=================================================================== 49 | %%% Supervisor callbacks 50 | %%%=================================================================== 51 | 52 | %%-------------------------------------------------------------------- 53 | %% @private 54 | %% @doc 55 | %% Whenever a supervisor is started using supervisor:start_link/[2,3], 56 | %% this function is called by the new process to find out about 57 | %% restart strategy, maximum restart frequency and child 58 | %% specifications. 59 | %% 60 | %% @end 61 | %%-------------------------------------------------------------------- 62 | -spec(init(Args :: term()) -> 63 | {ok, {SupFlags :: {RestartStrategy :: supervisor:strategy(), 64 | MaxR :: non_neg_integer(), MaxT :: non_neg_integer()}, 65 | [ChildSpec :: supervisor:child_spec()] 66 | }}). 67 | init([Index, Partition]) -> 68 | RestartStrategy = one_for_all, 69 | MaxRestarts = 10, 70 | MaxSecondsBetweenRestarts = 10, 71 | 72 | SupFlags = {RestartStrategy, MaxRestarts, MaxSecondsBetweenRestarts}, 73 | 74 | HelperChild = helper_spec(Index, Partition), 75 | WorkerChild = worker_spec(Index, Partition), 76 | 77 | {ok, {SupFlags, [HelperChild, WorkerChild]}}. 78 | 79 | %%%=================================================================== 80 | %%% Internal functions 81 | %%%=================================================================== 82 | 83 | 84 | helper_spec(Index, Partition) -> 85 | child_spec(helper, yz_solrq_helper, Index, Partition). 86 | 87 | worker_spec(Index, Partition) -> 88 | child_spec(worker, yz_solrq_worker, Index, Partition). 89 | 90 | child_spec(Type, Module, Index, Partition) -> 91 | {{Type, Index, Partition}, {Module, start_link, [Index, Partition]}, 92 | permanent, 5000, worker, [Module]}. 93 | -------------------------------------------------------------------------------- /src/rt_intercept_pt.erl: -------------------------------------------------------------------------------- 1 | %% XXX 2 | %% ------------------------------------------------------------------- 3 | %% TODO: This file was copied from riak_test in order to allow for using 4 | %% the parse transform from a riak_test whose source code is in the 5 | %% Yokozuna repository. This file should be deleted if or when a better 6 | %% mechanism is found for using the rt_intercept_pt parse transform 7 | %% external to the riak_test repository. 8 | %% ------------------------------------------------------------------- 9 | %% XXX 10 | 11 | -module(rt_intercept_pt). 12 | -export([parse_transform/2]). 13 | 14 | %% This parse transform looks for calls to rt_intercept:add/2, and if found 15 | %% potentially modifies the second argument. The second argument can be a 16 | %% list of intercept tuples or a single intercept tuple. An intercept tuple 17 | %% can have either 2 or 3 elements, but either way, a final element of the 18 | %% form 19 | %% 20 | %% [{{F,Arity},{[var], fun()}}] 21 | %% 22 | %% is transformed into 23 | %% 24 | %% [{{F,Arity},{[{varname, var}], tuple()}}] 25 | %% 26 | %% Only the second element of this tuple is modified. In the first form the 27 | %% fun() is an anonymous interceptor function and [var] represents the list 28 | %% of free variables used within the function but defined in the context in 29 | %% which the function is defined. The list of vars is transformed into a 30 | %% list of 2-tuples of var name and var, while the function is replaced 31 | %% with its abstract format (which, since we are already dealing with 32 | %% abstract format, is actually the abstract format of its abstract 33 | %% format). If the final element of the argument tuple (or list of argument 34 | %% tuples) is instead 35 | %% 36 | %% [{{F,Arity}, fun()}] 37 | %% 38 | %% then the fun() is assumed to not use any free variables from the context 39 | %% in which the function is defined. This is transformed to 40 | %% 41 | %% [{{F,Arity},{[], tuple()}}] 42 | %% 43 | %% which is the same as the prior transformation but with an empty list of 44 | %% free variables. A final element of any other form is left as is. 45 | 46 | parse_transform(Forms, _) -> 47 | forms(Forms). 48 | 49 | forms([F|Forms]) -> 50 | [form(F)|forms(Forms)]; 51 | forms(F) -> 52 | form(F). 53 | 54 | form({function,LF,F,A,Clauses}) -> 55 | {function,LF,F,A,forms(Clauses)}; 56 | form({clause,L,H,G,B}) -> 57 | {clause,L,H,G,forms(B)}; 58 | form({match,L,Lhs,Rhs}) -> 59 | {match,L,forms(Lhs),forms(Rhs)}; 60 | form({call,L,{remote,_,{atom,_,rt_intercept},{atom,_,AddFunction}}=Fun,Args}) 61 | when AddFunction == add; AddFunction == add_and_save -> 62 | [Node, Intercept] = Args, 63 | {call,L,Fun,[Node,intercept(Intercept)]}; 64 | form(F) when is_tuple(F) -> 65 | list_to_tuple(forms(tuple_to_list(F))); 66 | form(F) -> 67 | F. 68 | 69 | intercept({tuple,L,[Mod,Intercepts]}) -> 70 | {tuple,L,[Mod,intercepts(Intercepts)]}; 71 | intercept({tuple,L,[Mod,ModInt,Intercepts]}) -> 72 | {tuple,L,[Mod,ModInt,intercepts(Intercepts)]}. 73 | 74 | intercepts({cons,L1,{tuple,L2,[FA,Int]},T}) -> 75 | {cons,L1,{tuple,L2,[FA,intercepts(Int)]},intercepts(T)}; 76 | intercepts({tuple,L,[FreeVars,{'fun',LF,_}=Fun]}) -> 77 | {tuple,L,[freevars(FreeVars),erl_parse:abstract(Fun, LF)]}; 78 | intercepts({'fun',L,_}=Fun) -> 79 | {tuple,L,[{nil,L},erl_parse:abstract(Fun, L)]}; 80 | intercepts(F) -> 81 | F. 82 | 83 | freevars({cons,L,H,T}) -> 84 | {cons,L,freevar(H),freevars(T)}; 85 | freevars({nil,_}=Nil) -> 86 | Nil. 87 | 88 | freevar({var,L,V}=Var) -> 89 | {tuple,L,[{atom,L,V},Var]}; 90 | freevar(Term) -> 91 | Term. 92 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/js/network.js: -------------------------------------------------------------------------------- 1 | // ydomain should be "absolute" or "relative" 2 | function init_nics(nics, ycol, cclass, label, ydomain) { 3 | // stores the transformed data for each nic 4 | var nicData = []; 5 | 6 | var x = d3.time.scale().range([0, width]); 7 | var y = d3.scale.linear().range([height, 0]); 8 | var xAxis = d3.svg.axis().scale(x).orient("bottom"); 9 | var yAxis = d3.svg.axis().scale(y).orient("left"); 10 | var colors = d3.scale.category20(); 11 | 12 | var line = d3.svg.line() 13 | .x(function(d) { return x(d.timestamp); }) 14 | .y(function(d) { return y(d[ycol]); }); 15 | 16 | var svg = d3.select("#network p.vis").append("svg") 17 | .attr("width", width + margin.left + margin.right) 18 | .attr("height", height + margin.top + margin.bottom) 19 | .append("g") 20 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 21 | 22 | var parseDate = d3.time.format("%H:%M:%S").parse; 23 | 24 | svg.append("g") 25 | .attr("class", cclass + " x axis") 26 | .attr("transform", "translate(0," + height + ")") 27 | .call(xAxis); 28 | 29 | svg.append("g") 30 | .attr("class", cclass + " y axis") 31 | .call(yAxis); 32 | 33 | svg.append("text") 34 | .attr("text-anchor", "middle") 35 | .attr("transform", "translate(" + -(margin.left/2) + "," + (height/2) + ")rotate(-90)") 36 | .attr("class", "label") 37 | .text(label); 38 | 39 | var redraw = function() { 40 | 41 | var names = nicData.map(function(d) { return d.name; }); 42 | colors.domain(names); 43 | 44 | x.domain([ 45 | d3.min(nicData, function(c) { 46 | return d3.min(c.values, function(d) { return d.timestamp; }) 47 | }), 48 | d3.max(nicData, function(c) { 49 | return d3.max(c.values, function(d) { return d.timestamp; }) 50 | }) 51 | ]); 52 | if (ydomain === "absolute") { 53 | y.domain([0,100]); 54 | } else { 55 | y.domain([ 56 | d3.min(nicData, function(c) { 57 | return d3.min(c.values, function(d) { return d[ycol]; }) 58 | }), 59 | d3.max(nicData, function(c) { 60 | return d3.max(c.values, function(d) { return d[ycol]; }) 61 | }) 62 | ]); 63 | } 64 | 65 | var lines = svg.selectAll("." + cclass + "_line") 66 | .data(nicData, function(d) { return d.name; }); 67 | 68 | lines.enter().append("path") 69 | .attr("class", "line " + cclass + "_line") 70 | .attr("d", function(d) { return line(d.values); }) 71 | .style("stroke", function(d) { return colors(d.name); }); 72 | 73 | d3.transition(lines) 74 | .attr("d", function(d) { return line(d.values); }); 75 | 76 | lines.exit().remove(); 77 | 78 | d3.select("." + cclass + ".x.axis").call(xAxis); 79 | d3.select("." + cclass + ".y.axis").call(yAxis); 80 | }; 81 | 82 | var add_nic_data = function(name, resource) { 83 | d3.csv(resource, function(data) { 84 | var values = data.map(function(d) { 85 | var tmp = {timestamp: parseDate(d.timestamp)}; 86 | tmp[ycol] = +d[ycol]; 87 | return tmp; 88 | }); 89 | 90 | nicData.push({name:name, values:values}); 91 | redraw(); 92 | }) 93 | }; 94 | 95 | nics.forEach(function(d) { add_nic_data(d.name, d.resource); }); 96 | }; 97 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/js/disk.js: -------------------------------------------------------------------------------- 1 | // ydomain should be "absolute" or "relative" 2 | function init_disks(disks, ycol, cclass, label, ydomain) { 3 | // stores the transformed data for each disk 4 | var diskData = []; 5 | 6 | var x = d3.time.scale().range([0, width]); 7 | var y = d3.scale.linear().range([height, 0]); 8 | var xAxis = d3.svg.axis().scale(x).orient("bottom"); 9 | var yAxis = d3.svg.axis().scale(y).orient("left"); 10 | var colors = d3.scale.category20(); 11 | 12 | var line = d3.svg.line() 13 | .x(function(d) { return x(d.timestamp); }) 14 | .y(function(d) { return y(d[ycol]); }); 15 | 16 | var svg = d3.select("#disk p.vis").append("svg") 17 | .attr("width", width + margin.left + margin.right) 18 | .attr("height", height + margin.top + margin.bottom) 19 | .append("g") 20 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 21 | 22 | var parseDate = d3.time.format("%Y-%m-%dT%H:%M:%S").parse; 23 | 24 | svg.append("g") 25 | .attr("class", cclass + " x axis") 26 | .attr("transform", "translate(0," + height + ")") 27 | .call(xAxis); 28 | 29 | svg.append("g") 30 | .attr("class", cclass + " y axis") 31 | .call(yAxis); 32 | 33 | svg.append("text") 34 | .attr("text-anchor", "middle") 35 | .attr("transform", "translate(" + -(margin.left/2) + "," + (height/2) + ")rotate(-90)") 36 | .attr("class", "label") 37 | .text(label); 38 | 39 | var redraw = function() { 40 | 41 | var names = diskData.map(function(d) { return d.name; }); 42 | colors.domain(names); 43 | 44 | // length of time is same for all, just pull from first 45 | x.domain([ 46 | d3.min(diskData, function(c) { 47 | return d3.min(c.values, function(d) { return d.timestamp; }) 48 | }), 49 | d3.max(diskData, function(c) { 50 | return d3.max(c.values, function(d) { return d.timestamp; }) 51 | }) 52 | ]); 53 | if (ydomain === "absolute") { 54 | y.domain([0,100]); 55 | } else { 56 | y.domain([ 57 | d3.min(diskData, function(c) { 58 | return d3.min(c.values, function(d) { return d[ycol]; }) 59 | }), 60 | d3.max(diskData, function(c) { 61 | return d3.max(c.values, function(d) { return d[ycol]; }) 62 | }) 63 | ]); 64 | } 65 | 66 | var busy = svg.selectAll("." + cclass + "_line") 67 | .data(diskData, function(d) { return d.name; }); 68 | 69 | busy.enter().append("path") 70 | .attr("class", "line " + cclass + "_line") 71 | .attr("d", function(d) { return line(d.values); }) 72 | .style("stroke", function(d) { return colors(d.name); }); 73 | 74 | d3.transition(busy) 75 | .attr("d", function(d) { return line(d.values); }); 76 | 77 | busy.exit().remove(); 78 | 79 | d3.select("." + cclass + ".x.axis").call(xAxis); 80 | d3.select("." + cclass + ".y.axis").call(yAxis); 81 | }; 82 | 83 | var add_disk_data = function(name, resource) { 84 | d3.csv(resource, function(data) { 85 | var values = data.map(function(d) { 86 | var tmp = {timestamp: parseDate(d.timestamp)}; 87 | tmp[ycol] = +d[ycol]; 88 | return tmp; 89 | }); 90 | 91 | diskData.push({name:name, values:values}); 92 | redraw(); 93 | }) 94 | }; 95 | 96 | disks.forEach(function(d) { add_disk_data(d.name, d.resource); }); 97 | }; 98 | -------------------------------------------------------------------------------- /src/yz_console.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2014-2016 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(yz_console). 21 | 22 | -behavior(clique_handler). 23 | 24 | -include("yokozuna.hrl"). 25 | 26 | %% New clique CLI code: 27 | -export([register_cli/0, 28 | format_dist_query_value/1, 29 | dist_query_cfg_change/2]). 30 | 31 | %% Old pre-clique CLI callbacks: 32 | -export([aae_status/1, 33 | switch_to_new_search/1]). 34 | 35 | -spec register_cli() -> ok. 36 | register_cli() -> 37 | clique:register_config_whitelist(["search.dist_query"]), 38 | clique:register_formatter(["search.dist_query"], fun format_dist_query_value/1), 39 | clique:register_config(["search", "dist_query"], fun dist_query_cfg_change/2), 40 | ok. 41 | 42 | %% @doc Print the Active Anti-Entropy status to stdout. 43 | -spec aae_status([]) -> ok. 44 | aae_status([]) -> 45 | ExchangeInfo = yz_kv:compute_exchange_info(), 46 | riak_kv_console:aae_exchange_status(ExchangeInfo), 47 | io:format("~n"), 48 | TreeInfo = yz_kv:compute_tree_info(), 49 | riak_kv_console:aae_tree_status(TreeInfo), 50 | io:format("~n"), 51 | riak_kv_console:aae_repair_status(ExchangeInfo), 52 | ok. 53 | 54 | %% @doc Switch over HTTP solr route and PB handling from legacy 55 | %% riak_search to yokozuna. This will multicall to all nodes in the 56 | %% cluster. If any nodes are down report them to stderr and return an 57 | %% error tuple. Once the switch is made the system cannot be switched 58 | %% back without restarting the cluster. 59 | -spec switch_to_new_search([]) -> ok | {error, {nodes_down, [node()]}}. 60 | switch_to_new_search([]) -> 61 | {_Good, Down} = riak_core_util:rpc_every_member_ann(yokozuna, switch_to_yokozuna, [], 5000), 62 | case Down of 63 | [] -> 64 | ok; 65 | _ -> 66 | Down2 = [atom_to_list(Node) || Node <- Down], 67 | DownStr = string:join(Down2, " "), 68 | io:format(standard_error, "The following nodes could not be reached: ~s", [DownStr]), 69 | {error, {nodes_down, Down}} 70 | end. 71 | 72 | 73 | %% @doc Callback for changes to dist_query enabled flag. When this flag is set to "on", 74 | %% then this node participates in distributed queries and will be included in 75 | %% cover plans when queries are made through yokozuna. When disabled, the node 76 | %% will be excluded in cover plans, meaning that it will not be consulted as part 77 | %% of a distributed query. Note that you can still query though this node; 78 | %% the node, however, will not be consulted in a Solr distrubuted query. 79 | dist_query_cfg_change(["search", "dist_query"], "on") -> 80 | set_dist_query(true); 81 | dist_query_cfg_change(["search", "dist_query"], "off") -> 82 | set_dist_query(false). 83 | 84 | set_dist_query(Val) -> 85 | {ok, OldVal} = yz_solr_proc:set_dist_query(Val), 86 | io_lib:format("Previous value: ~p", [format_dist_query_value(OldVal)]). 87 | 88 | format_dist_query_value(true) -> "on"; 89 | format_dist_query_value(false) -> "off". 90 | -------------------------------------------------------------------------------- /misc/bench/bin/smartos/js/latency.js: -------------------------------------------------------------------------------- 1 | function init_latency(resource) { 2 | 3 | var latNames = ["mean", "median", "95th", "99th", "99_9th", "max"]; 4 | var latencies; 5 | var x = d3.scale.linear().range([0, width]); 6 | var y = d3.scale.linear().range([height, 0]); 7 | var xAxis = d3.svg.axis().scale(x).orient("bottom"); 8 | var yAxis = d3.svg.axis().scale(y).orient("left"); 9 | var latColors = d3.scale.category10(); 10 | 11 | var line = d3.svg.line() 12 | .interpolate("basis") 13 | .x(function(d) { return x(d.elapsed); }) 14 | .y(function(d) { return y(d.value / 1000); }); // convert to ms 15 | 16 | 17 | var svg = d3.select("#latencies p.vis").append("svg") 18 | .attr("width", width + margin.left + margin.right) 19 | .attr("height", height + margin.top + margin.bottom) 20 | .append("g") 21 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 22 | 23 | svg.append("text") 24 | .attr("class", "label") 25 | .text(resource); 26 | 27 | svg.append("g") 28 | .attr("class", "lat x axis") 29 | .attr("transform", "translate(0," + height + ")"); 30 | 31 | svg.append("g") 32 | .attr("class", "lat y axis"); 33 | 34 | svg.append("text") 35 | .attr("text-anchor", "middle") 36 | .attr("transform", "translate(" + -(margin.left/2) + "," + (height/2) + ")rotate(-90)") 37 | .attr("class", "label") 38 | .text("Latency ms"); 39 | 40 | var latBoxes = d3.select("#latencies p.control").selectAll("p") 41 | .data(latNames) 42 | .enter() 43 | .append("p") 44 | .attr("class", "selection") 45 | .text(function(d) { return d; }) 46 | .append("input") 47 | .attr("type", "checkbox") 48 | .attr("name", "lats") 49 | .attr("value", function(d) { return d; }) 50 | .attr("checked", true); 51 | 52 | var redraw = function() { 53 | 54 | var checked = d3.selectAll("#latencies p.control p input:checked")[0] 55 | .map(function(c) { return c.value }); 56 | 57 | latColors.domain(latNames); 58 | var selected = latencies.filter(function(o) { 59 | return checked.indexOf(o.name) != -1; 60 | }); 61 | 62 | x.domain(d3.extent(selected[0].values, function(d) { return d.elapsed; })); 63 | y.domain([ 64 | d3.min(selected, function(c) { 65 | return d3.min(c.values, function(v) { return v.value / 1000; }); 66 | }), 67 | d3.max(selected, function(c) { 68 | return d3.max(c.values, function(v) { return v.value / 1000; }); 69 | }) 70 | ]); 71 | 72 | var lat = svg.selectAll(".latency") 73 | .data(selected, function(d) { return d.name; }); 74 | 75 | lat.enter().append("path") 76 | .attr("class", "line latency") 77 | .attr("d", function(d) { return line(d.values); }) 78 | .style("stroke", function(d) { return latColors(d.name); }) 79 | 80 | var latUpdate = d3.transition(lat) 81 | .attr("d", function(d) { return line(d.values); }); 82 | 83 | lat.exit().remove(); 84 | 85 | d3.select(".lat.x.axis").call(xAxis); 86 | d3.select(".lat.y.axis").call(yAxis); 87 | }; 88 | 89 | latBoxes.on("change", redraw); 90 | 91 | d3.csv(resource, function(data) { 92 | 93 | latencies = latNames.map(function(name) { 94 | return { 95 | name: name, 96 | values: data.map(function(d) { 97 | return {elapsed: d.elapsed, value: d[name]}; 98 | }) 99 | }; 100 | }); 101 | 102 | redraw(); 103 | }); 104 | } 105 | -------------------------------------------------------------------------------- /src/yz_entropy.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | %% @doc This module contains functionality related to entropy. 21 | -module(yz_entropy). 22 | -compile(export_all). 23 | -include("yokozuna.hrl"). 24 | 25 | %% @doc Iterate all the entropy data in `Index' calling `Fun' for 26 | %% every 100 entries. 27 | -spec iterate_entropy_data(index_name(), list(), function()) -> 28 | ok|error|not_available. 29 | iterate_entropy_data(Index, Filter, Fun) -> 30 | case yz_solr:ping(Index) of 31 | true -> 32 | Filter2 = [{continuation, none}, 33 | {limit, 34 | app_helper:get_env(?YZ_APP_NAME, 35 | entropy_data_limit, 100)}|Filter], 36 | case get_entropy_data(Index, Filter2) of 37 | {ok, ED} -> 38 | iterate_entropy_data(Index, Filter2, Fun, ED); 39 | {Err, _ED} -> 40 | Err 41 | end; 42 | _ -> 43 | ?NOTICE("Can't ping Solr index ~p to start iterating over entropy data", [Index]), 44 | not_available 45 | end. 46 | 47 | %%%=================================================================== 48 | %%% Private 49 | %%%=================================================================== 50 | 51 | -spec iterate_entropy_data(index_name(), list(), function(), ED::entropy_data()) 52 | -> ok|error. 53 | iterate_entropy_data(Index, Filter, Fun, #entropy_data{more=true, 54 | continuation=Cont, 55 | pairs=Pairs}) -> 56 | %% apply function to pairs before iterating through the next set 57 | lists:foreach(Fun, Pairs), 58 | Filter2 = lists:keyreplace(continuation, 1, Filter, {continuation, Cont}), 59 | case get_entropy_data(Index, Filter2) of 60 | {ok, ED} -> 61 | iterate_entropy_data(Index, Filter2, Fun, ED); 62 | {Err, _ED} -> 63 | Err 64 | end; 65 | iterate_entropy_data(_, _, Fun, #entropy_data{more=false, 66 | pairs=Pairs}) -> 67 | lists:foreach(Fun, Pairs). 68 | 69 | -spec get_entropy_data(index_name(), list()) -> 70 | {ok|error, entropy_data()}. 71 | get_entropy_data(Index, Filter) -> 72 | case yz_solr:entropy_data(Index, Filter) of 73 | {error, {error, req_timedout}} -> 74 | ?ERROR("failed to iterate over entropy data due to request" 75 | ++ " exceeding timeout ~b for filter params ~p", 76 | [?YZ_SOLR_ED_REQUEST_TIMEOUT, Filter]), 77 | {error, #entropy_data{more=false, pairs=[]}}; 78 | {error, Err} -> 79 | ?ERROR("failed to iterate over entropy data due to request" 80 | ++ " error ~p for filter params ~p", [Err, Filter]), 81 | {error, #entropy_data{more=false, pairs=[]}}; 82 | ED -> 83 | {ok, ED} 84 | end. 85 | -------------------------------------------------------------------------------- /riak_test/yz_errors.erl: -------------------------------------------------------------------------------- 1 | %% @doc Ensure that error messages return as expected 2 | -module(yz_errors). 3 | -compile(export_all). 4 | -import(yz_rt, [host_entries/1, 5 | search_expect/5]). 6 | -include("yokozuna.hrl"). 7 | -include_lib("eunit/include/eunit.hrl"). 8 | 9 | -define(NO_HEADERS, []). 10 | -define(CFG, 11 | [{riak_core, 12 | [ 13 | {handoff_concurrency, 16}, 14 | {inactivity_timeout, 1000}, 15 | {ring_creation_size, 16} 16 | ]}, 17 | {yokozuna, 18 | [ 19 | {enabled, true} 20 | ]} 21 | ]). 22 | 23 | confirm() -> 24 | random:seed(now()), 25 | Cluster = rt:build_cluster(4, ?CFG), 26 | rt:wait_for_cluster_service(Cluster, yokozuna), 27 | ok = test_errors(Cluster), 28 | pass. 29 | 30 | test_errors(Cluster) -> 31 | ok = expect_bad_json(Cluster), 32 | ok = expect_bad_xml(Cluster), 33 | ok = expect_bad_query(Cluster), 34 | ok. 35 | 36 | expect_bad_json(Cluster) -> 37 | Index = <<"bad_json">>, 38 | Bucket = {<<"bad_json">>,<<"bucket">>}, 39 | Node = yz_rt:select_random(Cluster), 40 | HP = yz_rt:select_random(host_entries(rt:connection_info(Cluster))), 41 | ok = yz_rt:create_index_http(Cluster, Index), 42 | lager:info("Write bad json [~p]", [HP]), 43 | URL = bucket_url(HP, Bucket, "test"), 44 | Opts = [], 45 | CT = "application/json", 46 | Headers = [{"content-type", CT}], 47 | Body = "{\"bad\": \"unclosed\"", 48 | {ok, "204", _, _} = ibrowse:send_req(URL, Headers, put, Body, Opts), 49 | %% Sleep for soft commit 50 | timer:sleep(1100), 51 | %% still store the value in riak 52 | {ok, "200", _, Body} = ibrowse:send_req(URL, [{"accept", CT}], get, []), 53 | %% Sleep for soft commit 54 | timer:sleep(1100), 55 | ?assertEqual(ok, search_expect(Node, Index, ?YZ_ERR_FIELD_S, "1", 1)). 56 | 57 | expect_bad_xml(Cluster) -> 58 | Index = <<"bad_xml">>, 59 | Bucket = {Index,<<"bucket">>}, 60 | Node = yz_rt:select_random(Cluster), 61 | HP = yz_rt:select_random(host_entries(rt:connection_info(Cluster))), 62 | ok = yz_rt:create_index_http(Cluster, Index), 63 | lager:info("Write bad xml [~p]", [HP]), 64 | URL = bucket_url(HP, Bucket, "test"), 65 | Opts = [], 66 | CT = "application/xml", 67 | Headers = [{"content-type", CT}], 68 | Body = "<\"bad\" \"xml\"></", 69 | {ok, "204", _, _} = ibrowse:send_req(URL, Headers, put, Body, Opts), 70 | yz_rt:commit(Cluster, Index), 71 | %% still store the value in riak 72 | {ok, "200", _, Body} = ibrowse:send_req(URL, [{"accept", CT}], get, []), 73 | ?assertEqual(ok, search_expect(Node, Index, ?YZ_ERR_FIELD_S, "1", 1)). 74 | 75 | expect_bad_query(Cluster) -> 76 | Index = <<"bad_query">>, 77 | Bucket = {Index, <<"bucket">>}, 78 | HP = yz_rt:select_random(host_entries(rt:connection_info(Cluster))), 79 | ok = yz_rt:create_index_http(Cluster, Index), 80 | lager:info("Write bad query [~p]", [HP]), 81 | URL = bucket_url(HP, Bucket, "test"), 82 | Opts = [], 83 | CT = "text/plain", 84 | Headers = [{"content-type", CT}], 85 | Body = "", 86 | {ok, "204", _, _} = ibrowse:send_req(URL, Headers, put, Body, Opts), 87 | yz_rt:commit(Cluster, Index), 88 | %% still store the value in riak 89 | {ok, "200", _, Body} = ibrowse:send_req(URL, [{"accept", CT}], get, []), 90 | %% send a bad query 91 | SearchURL = search_url(HP, Index) ++ "?q=*:*&sort=sco+desc", 92 | {ok, "400", _, _} = ibrowse:send_req(SearchURL, [], get, []), 93 | ok. 94 | 95 | bucket_url({Host,Port}, {BType, BName}, Key) -> 96 | ?FMT("http://~s:~B/types/~s/buckets/~s/keys/~s", [Host, Port, BType, BName, Key]). 97 | 98 | search_url({Host,Port}, Index) -> 99 | ?FMT("http://~s:~B/solr/~s/select", [Host, Port, Index]). 100 | -------------------------------------------------------------------------------- /src/yz_bucket_validator.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. 3 | %% 4 | %% This file is provided to you under the Apache License, 5 | %% Version 2.0 (the "License"); you may not use this file 6 | %% except in compliance with the License. You may obtain 7 | %% a copy of the License at 8 | %% 9 | %% http://www.apache.org/licenses/LICENSE-2.0 10 | %% 11 | %% Unless required by applicable law or agreed to in writing, 12 | %% software distributed under the License is distributed on an 13 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %% KIND, either express or implied. See the License for the 15 | %% specific language governing permissions and limitations 16 | %% under the License. 17 | %% 18 | %% ------------------------------------------------------------------- 19 | %% 20 | %% @doc Verifies that a bucket type's `n_val' property matches any 21 | %% associated index's `n_val' 22 | -module(yz_bucket_validator). 23 | -export([validate/4]). 24 | -include("yokozuna.hrl"). 25 | 26 | -type prop() :: {PropName::atom(), PropValue::any()}. 27 | -type error() :: {PropName::atom(), ErrorReason::atom()}. 28 | -type props() :: [prop()]. 29 | -type errors() :: [error()]. 30 | 31 | %% @doc Performs two validations. The first is validating that an index 32 | %% exists before a bucket-type/bucket can be associated to it by setting `search_index'. 33 | %% The second checks that the bucket-type/bucket has the same `n_val' as the associated 34 | %% index's `n_val'. 35 | -spec validate(create | update, 36 | {riak_core_bucket_type:bucket_type(), undefined | binary()} | binary(), 37 | undefined | props(), 38 | props()) -> {props(), errors()}. 39 | validate(_CreateOrUpdate, _Bucket, ExistingProps, BucketProps) -> 40 | Props = case {ExistingProps, BucketProps} of 41 | {undefined, BucketProps} -> 42 | BucketProps; 43 | {ExistingProps, undefined} -> 44 | ExistingProps; 45 | {ExistingProps, BucketProps} -> 46 | riak_core_bucket_props:merge(BucketProps, ExistingProps) 47 | end, 48 | case get_search_index_info(Props) of 49 | {error, no_search_index} -> 50 | {BucketProps, []}; 51 | {error, Msg} -> 52 | {proplists:delete(search_index,BucketProps), [{search_index,Msg}]}; 53 | {Index, INVal} -> 54 | BNVal = proplists:get_value(n_val, Props), 55 | validate_n_val(Index, INVal, BNVal, BucketProps) 56 | end. 57 | 58 | %% @private 59 | %% 60 | -spec validate_n_val(index_name(), n(), n(), props()) -> {props(), errors()}. 61 | validate_n_val(Index, INVal, BNVal, BucketProps) -> 62 | case INVal of 63 | INVal when INVal == BNVal -> 64 | {BucketProps, []}; 65 | _ -> 66 | Error = ?FMT("Bucket n_val ~p must match the associated " 67 | "search_index ~s n_val ~p", [BNVal,Index,INVal]), 68 | {proplists:delete(n_val, BucketProps), [{n_val, list_to_binary(Error)}]} 69 | end. 70 | 71 | %% @private 72 | %% 73 | -spec get_search_index_info(props()) -> {error, atom()} | {index_name(), n()}. 74 | get_search_index_info(Props) -> 75 | case proplists:get_value(search_index, Props) of 76 | undefined -> 77 | {error, no_search_index}; 78 | ?YZ_INDEX_TOMBSTONE -> 79 | {error, no_search_index}; 80 | Index -> 81 | case yz_index:exists(Index) of 82 | false -> 83 | {error, list_to_binary(?FMT("~s does not exist", [Index]))}; 84 | true -> 85 | {Index, index_n_val(Index)} 86 | end 87 | end. 88 | 89 | %% @private 90 | %% 91 | %% @doc Get search_index from the bucket props, and return the 92 | %% index's n_val. If it doesn't exist, return undefined. 93 | index_n_val(Index) -> 94 | yz_index:get_n_val(yz_index:get_index_info(Index)). 95 | -------------------------------------------------------------------------------- /riak_test/yz_languages.erl: -------------------------------------------------------------------------------- 1 | %% -*- coding: utf-8 -*- 2 | %% @doc Test the index adminstration API in various ways. 3 | -module(yz_languages). 4 | -compile(export_all). 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | -define(FMT(S, Args), lists:flatten(io_lib:format(S, Args))). 8 | -define(CFG, 9 | [ 10 | {riak_core, 11 | [ 12 | {ring_creation_size, 8} 13 | ]}, 14 | {yokozuna, 15 | [ 16 | {enabled, true} 17 | ]} 18 | ]). 19 | 20 | confirm() -> 21 | random:seed(now()), 22 | Cluster = rt:build_cluster(1, ?CFG), 23 | rt:wait_for_cluster_service(Cluster, yokozuna), 24 | confirm_body_search_encoding(Cluster), 25 | confirm_language_field_type(Cluster), 26 | confirm_tag_encoding(Cluster), 27 | confirm_reserved_word_safety(Cluster), 28 | pass. 29 | 30 | select_random(List) -> 31 | Length = length(List), 32 | Idx = random:uniform(Length), 33 | lists:nth(Idx, List). 34 | 35 | host_entries(ClusterConnInfo) -> 36 | [proplists:get_value(http, I) || {_,I} <- ClusterConnInfo]. 37 | 38 | bucket_url({Host,Port}, {BType, BName}, Key) -> 39 | ?FMT("http://~s:~B/types/~s/buckets/~s/keys/~s", 40 | [Host, Port, BType, BName, Key]). 41 | 42 | store_and_search(Cluster, Bucket, Index, CT, Body, Field, Term) -> 43 | Headers = [{"Content-Type", CT}], 44 | store_and_search(Cluster, Bucket, Index, Headers, CT, Body, Field, Term). 45 | 46 | store_and_search(Cluster, Bucket, Index, Headers, CT, Body, Field, Term) -> 47 | store_and_search(Cluster, Bucket, Index, "test", Headers, CT, Body, Field, Term). 48 | 49 | store_and_search(Cluster, Bucket, Index, Key, Headers, CT, Body, Field, Term) -> 50 | HP = select_random(host_entries(rt:connection_info(Cluster))), 51 | yz_rt:create_index_http(Cluster, HP, Index), 52 | URL = bucket_url(HP, Bucket, Key), 53 | lager:info("Storing to bucket ~s", [URL]), 54 | {ok, "204", _, _} = ibrowse:send_req(URL, Headers, put, Body), 55 | yz_rt:commit(Cluster, Index), 56 | {ok, "200", _, ReturnedBody} = ibrowse:send_req(URL, [{"accept", CT}], get, 57 | []), 58 | ?assertEqual(Body, list_to_binary(ReturnedBody)), 59 | lager:info("Verify values are indexed"), 60 | Node = yz_rt:select_random(Cluster), 61 | ?assertEqual(ok, yz_rt:search_expect(Node, Index, Field, Term, 1)), 62 | ok. 63 | 64 | confirm_body_search_encoding(Cluster) -> 65 | Index = <<"test_iso_8859_8">>, 66 | Bucket = {Index, <<"b">>}, 67 | lager:info("confirm_iso_8859_8 ~s", [Index]), 68 | Body = <<"א בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ"/utf8>>, 69 | store_and_search(Cluster, Bucket, Index, "text/plain", Body, "text", "בָּרָא"). 70 | 71 | confirm_language_field_type(Cluster) -> 72 | Index = <<"test_shift_jis">>, 73 | Bucket = {Index, <<"b">>}, 74 | lager:info("confirm_shift_jis ~s", [Index]), 75 | Body = <<"{\"text_ja\" : \"私はハイビスカスを食べるのが 大好き\"}"/utf8>>, 76 | store_and_search(Cluster, Bucket, Index, "application/json", Body, "text_ja", "大好き"). 77 | 78 | confirm_tag_encoding(Cluster) -> 79 | Index = <<"test_iso_8859_6">>, 80 | Bucket = {Index, <<"b">>}, 81 | lager:info("confirm_iso_8859_6 ~s", [Index]), 82 | Body = <<"أردت أن أقرأ كتابا عن تاريخ المرأة في فرنسا"/utf8>>, 83 | Headers = [{"Content-Type", "text/plain"}, 84 | {"x-riak-meta-yz-tags", "x-riak-meta-arabic_s"}, 85 | {"x-riak-meta-arabic_s", <<"أقرأ"/utf8>>}], 86 | store_and_search(Cluster, Bucket, Index, Headers, "text/plain", Body, "arabic_s", "أقرأ"). 87 | 88 | confirm_reserved_word_safety(Cluster) -> 89 | Index = <<"reserved">>, 90 | Bucket = {Index, <<"b">>}, 91 | lager:info("confirm_reserved_word_safety ~s", [Index]), 92 | Body = <<"whatever">>, 93 | Headers = [{"Content-Type", "text/plain"}], 94 | RKey = "OR", 95 | store_and_search(Cluster, Bucket, Index, RKey, Headers, "text/plain", Body, "text", "whatever"). 96 | -------------------------------------------------------------------------------- /docs/ADMIN.md: -------------------------------------------------------------------------------- 1 | Administration 2 | ========== 3 | 4 | ## Index Creation 5 | 6 | Before Riak data may be indexed an _index_ must be created. 7 | The easiest way to accomplish this is to use the HTTP index 8 | resource. 9 | 10 | ### HTTP Index Admin 11 | 12 | To create a new index, PUT to `/search/index` path, suffixed with your 13 | index name, with a content type of `application/json`. The JSON 14 | content itself is optional and allows specifying a schema to use 15 | besides the default schema. 16 | 17 | ```bash 18 | curl -i -XPUT http://localhost:8098/search/index/my_index \ 19 | -H 'content-type: application/json' \ 20 | -d '{"schema":"my_schema"}' 21 | ``` 22 | 23 | A `204 No Content` should be returned if successful, or a `409 24 | Conflict` code if the index already exists. 25 | 26 | To get information about the index, issue a GET request to the same URL. 27 | 28 | ```bash 29 | curl http://localhost:8098/search/index/my_index | jsonpp 30 | { 31 | "name":"my_index", 32 | "schema":"my_schema" 33 | } 34 | ``` 35 | 36 | If you leave off the index name from the GET request, all installed 37 | indexes will be installed as a JSON array. 38 | 39 | Finally, when you are done with the index, you can issue a DELETE 40 | method with an index name to remove the index. 41 | 42 | ```bash 43 | curl -XDELETE http://localhost:8098/search/index/my_index 44 | ``` 45 | 46 | Index Association 47 | ----------------- 48 | 49 | In order for bucket data to be indexed an index must be associated 50 | with it. An index may be associated at the type or bucket level. If 51 | applied at the type level all buckets under that type will inherit the 52 | index association. If set at the bucket level the association applies 53 | only to that bucket. If applied at both levels then the bucket-level 54 | association will override. There are two main patterns for structuring 55 | index association: one-to-one and one-to-many. 56 | 57 | ### One-to-One ### 58 | 59 | In this structure each bucket has it's own index. This should be used 60 | when the data in each bucket is different and can't share a common 61 | schema. It's also useful for physical separation of indexes as each 62 | index maps to its own Solr Core. This potentially allows lower 63 | latency queries and efficient deletion of index data. Here's an 64 | example of creating a data type with two buckets containing data 65 | different enough that they warrant separate indexes. 66 | 67 | **N.B.**: Currently bucket-type create takes JSON on command line but 68 | that is slated to change by the official Riak 2.0 release. 69 | 70 | ``` 71 | riak-admin bucket-type create data '{"props":{}}' 72 | 73 | riak-admin bucket-type activate data 74 | 75 | curl -XPUT -H 'content-type: application/json' 'http://localhost:10018/search/index/people -d '{"schema":"people.xml"}' 76 | 77 | curl -XPUT -H 'content-type: application/json' 'http://localhost:10018/search/index/events -d '{"schema":"events.xml"}' 78 | 79 | curl -XPUT -H 'content-type: application/json' 'http://localhost:10018/types/data/buckets/people/props' -d '{"props":{"search_index":"people"}}' 80 | 81 | curl -XPUT -H 'content-type: application/json' 'http://localhost:10018/types/data/buckets/events/props' -d '{"props":{"search_index":"events"}}' 82 | ``` 83 | 84 | ### Many-to-One ### 85 | 86 | In this structure each bucket inherits the index associated to its 87 | type. Thus each bucket shares the same index. This structure should be 88 | used when the bucket data is the same but a logical separation in the 89 | Riak key-space is desired. Here is an example where the `people` type 90 | groups people under a specific bucket based on type. 91 | 92 | ``` 93 | riak-admin bucket-type create people '{"props":{"search_index":"people"}}' 94 | 95 | riak-admin bucket-type activate people 96 | 97 | curl -XPUT -H 'content-type: application/json' 'http://localhost:10018/search/index/people -d '{"schema":"people.xml"}' 98 | 99 | curl ... 'http://localhost:10018/types/people/buckets/maryland/keys/ryan_zezeski' ... 100 | curl ... 'http://localhost:10018/types/people/buckets/oregon/keys/eric_redmond' ... 101 | ``` 102 | -------------------------------------------------------------------------------- /src/yz_json_extractor.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | 21 | %% @doc An extractor for JSON. Nested objects have their fields 22 | %% concatenated with `field_separator'. An array is converted into a 23 | %% multi-valued field. 24 | %% 25 | %% Example: 26 | %% ``` 27 | %% {"name":"ryan", 28 | %% "info":{"city":"Baltimore", 29 | %% "visited":["Boston", "New York", "San Francisco"]}} 30 | %% 31 | %% [{<<"info_visited">>,<<"San Francisco">>}, 32 | %% {<<"info_visited">>,<<"New York">>}, 33 | %% {<<"info_visited">>,<<"Boston">>}, 34 | %% {<<"info_city">>,<<"Baltimore">>}, 35 | %% {<<"name">>,<<"ryan">>}] 36 | %% ''' 37 | %% Options: 38 | %% 39 | %% `field_separator' - Use a different field separator than the 40 | %% default of `.'. 41 | 42 | -module(yz_json_extractor). 43 | -compile(export_all). 44 | -include("yokozuna.hrl"). 45 | -define(DEFAULT_FIELD_SEPARATOR, <<".">>). 46 | -record(state, { 47 | fields = [], 48 | field_separator = ?DEFAULT_FIELD_SEPARATOR 49 | }). 50 | -type state() :: #state{}. 51 | 52 | -spec extract(binary()) -> fields() | {error, any()}. 53 | extract(Value) -> 54 | extract(Value, ?NO_OPTIONS). 55 | 56 | -spec extract(binary(), proplist()) -> fields() | {error, any()}. 57 | extract(Value, Opts) -> 58 | Sep = proplists:get_value(field_separator, Opts, ?DEFAULT_FIELD_SEPARATOR), 59 | extract_fields(Value, #state{field_separator=Sep}). 60 | 61 | -spec extract_fields(binary(), state()) -> fields(). 62 | extract_fields(Value, S) -> 63 | Struct = mochijson2:decode(Value), 64 | S2 = extract_fields(undefined, Struct, S), 65 | S2#state.fields. 66 | 67 | -spec extract_fields(binary() | undefined, term(), state()) -> state(). 68 | %% Object 69 | extract_fields(CurrentName, {struct, JSONFields}, S) -> 70 | lists:foldl(extract_field(CurrentName), S, JSONFields); 71 | 72 | %% Array 73 | extract_fields(CurrentName, Array, S) when is_list(Array) -> 74 | lists:foldl(extract_element(CurrentName), S, Array); 75 | 76 | %% null value 77 | extract_fields(_, null, S) -> 78 | S; 79 | 80 | %% Value 81 | extract_fields(CurrentName, Value, S) -> 82 | Fields = S#state.fields, 83 | S#state{fields=[{CurrentName, clean_value(Value)}|Fields]}. 84 | 85 | -spec extract_field(binary()) -> fun(({binary(), binary()}, state()) -> state()). 86 | extract_field(CurrentName) -> 87 | fun({Name, Val}, S) -> 88 | Separator = S#state.field_separator, 89 | FieldName = new_field_name(CurrentName, Name, Separator), 90 | extract_fields(FieldName, Val, S) 91 | end. 92 | 93 | -spec extract_element(binary()) -> fun((binary(), state()) -> state()). 94 | extract_element(CurrentName) -> 95 | fun(Element, S) -> 96 | extract_fields(CurrentName, Element, S) 97 | end. 98 | 99 | -spec new_field_name(binary() | undefined, binary(), binary()) -> binary(). 100 | new_field_name(undefined, FieldName, _) -> 101 | FieldName; 102 | new_field_name(CurrentName, FieldName, Separator) -> 103 | <<CurrentName/binary,Separator/binary,FieldName/binary>>. 104 | 105 | clean_value(Value) -> 106 | case is_number(Value) of 107 | true -> list_to_binary(mochinum:digits(Value)); 108 | false -> Value 109 | end. -------------------------------------------------------------------------------- /.rebar_plugins/rebar_test_plugin.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% ------------------------------------------------------------------- 6 | 7 | -module(rebar_test_plugin). 8 | 9 | -export([ 10 | client_test_clean/2, 11 | client_test_compile/2, 12 | client_test_run/2, 13 | int_test_clean/2, 14 | int_test_compile/2, 15 | int_test_run/2, 16 | riak_test_clean/2, 17 | riak_test_compile/2 18 | ]). 19 | 20 | %% =================================================================== 21 | %% Public API 22 | %% =================================================================== 23 | client_test_clean(Config, AppFile) -> 24 | case should_i_run(Config) of 25 | false -> ok; 26 | _ -> test_clean(client_test, Config, AppFile) 27 | end. 28 | 29 | client_test_compile(Config, AppFile) -> 30 | case should_i_run(Config) of 31 | false -> ok; 32 | _ -> test_compile(client_test, Config, AppFile) 33 | end. 34 | 35 | client_test_run(Config, AppFile) -> 36 | case should_i_run(Config) of 37 | false -> ok; 38 | _ -> test_run(client_test, Config, AppFile) 39 | end. 40 | 41 | int_test_clean(Config, AppFile) -> 42 | case should_i_run(Config) of 43 | false -> ok; 44 | _ -> test_clean(int_test, Config, AppFile) 45 | end. 46 | 47 | int_test_compile(Config, AppFile) -> 48 | case should_i_run(Config) of 49 | false -> ok; 50 | _ -> test_compile(int_test, Config, AppFile) 51 | end. 52 | 53 | int_test_run(Config, AppFile) -> 54 | case should_i_run(Config) of 55 | false -> ok; 56 | _ -> test_run(int_test, Config, AppFile) 57 | end. 58 | 59 | riak_test_clean(Config, AppFile) -> 60 | case should_i_run(Config) of 61 | false -> ok; 62 | _ -> test_clean(riak_test, Config, AppFile) 63 | end. 64 | 65 | riak_test_compile(Config, AppFile) -> 66 | case should_i_run(Config) of 67 | false -> ok; 68 | _ -> test_compile(riak_test, Config, AppFile) 69 | end. 70 | 71 | %% =================================================================== 72 | %% Private Functions - pronounced Funk-tee-owns, not funk-ee-towns 73 | %% =================================================================== 74 | should_i_run(Config) -> 75 | rebar_utils:processing_base_dir(Config). 76 | 77 | option(TestType, Key, Config) -> 78 | case proplists:get_value(TestType, element(3, Config), not_configured) of 79 | not_configured -> {error, not_configured}; 80 | TestConfig -> 81 | proplists:get_value(Key, TestConfig, {error, not_set}) 82 | end. 83 | 84 | test_clean(TestType, Config, _AppFile) -> 85 | case option(TestType, test_output, Config) of 86 | {error, not_set} -> 87 | io:format("No test_output directory set, check your rebar.config"); 88 | TestOutputDir -> 89 | io:format("Removing test_output dir ~s~n", [TestOutputDir]), 90 | rebar_file_utils:rm_rf(TestOutputDir) 91 | end, 92 | ok. 93 | 94 | test_compile(TestType, Config, AppFile) -> 95 | CompilationConfig = compilation_config(TestType, Config), 96 | OutputDir = option(TestType, test_output, Config), 97 | rebar_erlc_compiler:compile(CompilationConfig, AppFile), 98 | ok. 99 | 100 | test_run(TestType, Config, _AppFile) -> 101 | OutputDir = option(TestType, test_output, Config), 102 | Cwd = rebar_utils:get_cwd(), 103 | ok = file:set_cwd([Cwd, $/, OutputDir]), 104 | EunitResult = (catch eunit:test("./")), 105 | %% Return to original working dir 106 | ok = file:set_cwd(Cwd), 107 | EunitResult. 108 | 109 | 110 | compilation_config(TestType, Conf) -> 111 | C1 = rebar_config:set(Conf, TestType, undefined), 112 | C2 = rebar_config:set(C1, plugins, undefined), 113 | ErlOpts = rebar_utils:erl_opts(Conf), 114 | ErlOpts1 = proplists:delete(src_dirs, ErlOpts), 115 | ErlOpts2 = [{d, 'TEST'}, {d, 'EQC'}, {outdir, option(TestType, test_output, Conf)}, {src_dirs, option(TestType, test_paths, Conf)} | ErlOpts1], 116 | rebar_config:set(C2, erl_opts, ErlOpts2). 117 | -------------------------------------------------------------------------------- /riak_test/yz_ensemble.erl: -------------------------------------------------------------------------------- 1 | -module(yz_ensemble). 2 | -compile(export_all). 3 | -include_lib("eunit/include/eunit.hrl"). 4 | 5 | -define(CFG, 6 | [ 7 | {riak_core, 8 | [ 9 | {ring_creation_size, 8} 10 | ]}, 11 | {yokozuna, 12 | [ 13 | {enabled, true} 14 | ]} 15 | ]). 16 | 17 | confirm() -> 18 | NumNodes = 3, 19 | NVal = 3, 20 | ConfigB = ensemble_util:fast_config(NVal), 21 | Config = ConfigB ++ [{yokozuna, [{enabled, true}]}], 22 | lager:info("Building cluster and waiting for ensemble to stablize"), 23 | Nodes = build_cluster_with_yz_support(NumNodes, Config, NVal), 24 | rt:wait_for_cluster_service(Nodes, yokozuna), 25 | vnode_util:load(Nodes), 26 | Node = hd(Nodes), 27 | 28 | lager:info("Creating/activating 'strong' bucket type"), 29 | rt:create_and_activate_bucket_type(Node, <<"strong">>, 30 | [{consistent, true}, {n_val, NVal}]), 31 | 32 | Bucket = {<<"strong">>, <<"test">>}, 33 | Index = <<"testi">>, 34 | create_index(Node, Index), 35 | set_bucket_props(Node, Bucket, Index), 36 | 37 | verify_ensemble_delete_support(Nodes, Bucket, Index), 38 | 39 | pass. 40 | 41 | 42 | %% @private 43 | %% @doc Populates then deletes from SC bucket 44 | verify_ensemble_delete_support(Cluster, Bucket, Index) -> 45 | %% Yz only supports UTF-8 compatible keys 46 | Keys = [<<N:64/integer>> || N <- lists:seq(1,2000), 47 | not lists:any(fun(E) -> E > 127 end,binary_to_list(<<N:64/integer>>))], 48 | 49 | PBC = rt:pbc(hd(Cluster)), 50 | 51 | lager:info("Writing ~p keys", [length(Keys)]), 52 | [ok = rt:pbc_write(PBC, Bucket, Key, Key, "text/plain") || Key <- Keys], 53 | yz_rt:commit(Cluster, Index), 54 | 55 | %% soft commit wait, then check that last key is indexed 56 | lager:info("Search for keys to verify they exist"), 57 | LKey = lists:last(Keys), 58 | rt:wait_until(fun() -> 59 | {M, _} = riakc_pb_socket:search(PBC, Index, query_value(LKey)), 60 | ok == M 61 | end), 62 | [{ok, _} = 63 | riakc_pb_socket:search(PBC, Index, query_value(Key)) || Key <- Keys], 64 | 65 | lager:info("Deleting keys"), 66 | [riakc_pb_socket:delete(PBC, Bucket, Key) || Key <- Keys], 67 | yz_rt:commit(Cluster, Index), 68 | rt:wait_until(fun() -> 69 | case riakc_pb_socket:search(PBC, Index, query_value(LKey)) of 70 | {ok,{search_results,Res,_,_}} -> 71 | lager:info("RES: ~p ~p~n", [Res, LKey]), 72 | Res == []; 73 | S -> 74 | lager:info("OTHER: ~p ~p~n", [S, LKey]), 75 | false 76 | end 77 | end), 78 | [ {ok,{search_results,[],_,_}} = 79 | riakc_pb_socket:search(PBC, Index, query_value(Key)) || Key <- Keys], 80 | 81 | ok. 82 | 83 | 84 | %% @private 85 | %% @doc build a cluster from ensemble_util + yz support 86 | %% 87 | %% NOTE: There's a timing issue that causes join_cluster to hang the r_t 88 | %% node when adding yokozuna and ensemble support. Waiting for yokozuna 89 | %% to load on each node allows join_cluster to complete consistently 90 | build_cluster_with_yz_support(Num, Config, NVal) -> 91 | Nodes = rt:deploy_nodes(Num, Config), 92 | [rt:wait_for_cluster_service([N], yokozuna) || N <- Nodes], 93 | Node = hd(Nodes), 94 | rt:join_cluster(Nodes), 95 | ensemble_util:wait_until_cluster(Nodes), 96 | ensemble_util:wait_for_membership(Node), 97 | ensemble_util:wait_until_stable(Node, NVal), 98 | Nodes. 99 | 100 | %% @private 101 | %% @doc Builds a simple riak key query 102 | query_value(Value) -> 103 | V2 = iolist_to_binary(re:replace(Value, "\"", "%22")), 104 | V3 = iolist_to_binary(re:replace(V2, "\\\\", "%5C")), 105 | <<"_yz_rk:\"",V3/binary,"\"">>. 106 | 107 | %% pulled from yz_rt 108 | 109 | %% @private 110 | create_index(Node, Index) -> 111 | lager:info("Creating index ~s [~p]", [Index, Node]), 112 | ok = rpc:call(Node, yz_index, create, [Index]). 113 | 114 | %% @private 115 | set_bucket_props(Node, Bucket, Index) -> 116 | Props = [{search_index, Index}], 117 | rpc:call(Node, riak_core_bucket, set_bucket, [Bucket, Props]). 118 | -------------------------------------------------------------------------------- /tools/grab-solr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Script to grab Solr and embed in priv dir. This script assumes it is 4 | # being called from root dir or tools dir. 5 | # 6 | # Usage: 7 | # ./grab-solr.sh 8 | # 9 | # specify SOLR_PKG_DIR to skip the solr download and use a local copy 10 | set -e 11 | 12 | if [ $(basename $PWD) != "tools" ] 13 | then 14 | cd tools 15 | fi 16 | 17 | PRIV_DIR=../priv 18 | CONF_DIR=$PRIV_DIR/conf 19 | SOLR_DIR=$PRIV_DIR/solr 20 | BUILD_DIR=../build 21 | VSN=solr-4.10.4-yz-2 22 | FILENAME=$VSN.tgz 23 | TMP_DIR=/var/tmp/yokozuna 24 | TMP_FILE=$TMP_DIR/$FILENAME 25 | SRC_DIR=$BUILD_DIR/$VSN 26 | EXAMPLE_DIR=$SRC_DIR/example 27 | COL1_DIR=$EXAMPLE_DIR/solr/collection1 28 | : ${ARTIFACT_URL_PREFIX:="http://s3.amazonaws.com/files.basho.com"} 29 | 30 | check_for_solr() 31 | { 32 | # $SOLR_DIR is preloaded with xml files, so check for the generated jar 33 | test -e $SOLR_DIR/start.jar 34 | } 35 | 36 | download() 37 | { 38 | if which wget > /dev/null; then 39 | wget --no-check-certificate --progress=dot:mega $1 40 | elif which curl > /dev/null; then 41 | curl --insecure --progress-bar -O $1 42 | elif which fetch > /dev/null; then 43 | fetch --no-verify-peer $1 44 | fi 45 | } 46 | 47 | get_solr() 48 | { 49 | if [ -z ${SOLR_PKG_DIR+x} ] 50 | then 51 | if [ -e $TMP_FILE ]; then 52 | echo "Using cached copy of Solr $TMP_FILE" 53 | ln -s $TMP_FILE $FILENAME 54 | else 55 | echo "Pulling Solr from S3" 56 | download "${ARTIFACT_URL_PREFIX}/solr/$FILENAME" 57 | if [ -d $TMP_DIR ]; then 58 | cp $FILENAME $TMP_DIR 59 | else 60 | mkdir -m 1777 $TMP_DIR 61 | cp $FILENAME $TMP_DIR 62 | fi 63 | fi 64 | else 65 | # This is now obsolete thanks to implicit caching above 66 | # but will leave in for now as to not break anyone. 67 | echo "Using local copy of Solr $SOLR_PKG_DIR/$FILENAME" 68 | cp $SOLR_PKG_DIR/$FILENAME ./ 69 | fi 70 | tar zxf $FILENAME 71 | } 72 | 73 | if ! check_for_solr 74 | then 75 | 76 | echo "Create dir $BUILD_DIR" 77 | if [ ! -e $BUILD_DIR ]; then 78 | mkdir $BUILD_DIR 79 | fi 80 | 81 | cd $BUILD_DIR 82 | 83 | if [ ! -e $SRC_DIR ] 84 | then 85 | get_solr 86 | fi 87 | 88 | echo "Creating Solr dir $SOLR_DIR" 89 | 90 | # Explicitly copy files needed rather than copying everything and 91 | # removing which requires using cp -rn (since $SOLR_DIR/etc has files 92 | # which shouldn't be overwritten). For whatever reason, cp -n causes 93 | # non-zero exit code when files that would have been overwritten are 94 | # detected. 95 | cp -r $EXAMPLE_DIR/etc/create-solrtest.keystore.sh $SOLR_DIR/etc 96 | cp -r $EXAMPLE_DIR/etc/webdefault.xml $SOLR_DIR/etc 97 | cp -r $EXAMPLE_DIR/lib $SOLR_DIR 98 | cp -r $COL1_DIR/conf/lang $CONF_DIR 99 | cp $COL1_DIR/conf/protwords.txt $CONF_DIR 100 | cp $COL1_DIR/conf/stopwords.txt $CONF_DIR 101 | cp $COL1_DIR/conf/synonyms.txt $CONF_DIR 102 | cp $COL1_DIR/conf/mapping-* $CONF_DIR 103 | # TODO: does resources need to be copied? 104 | cp -r $EXAMPLE_DIR/resources $SOLR_DIR 105 | cp -r $EXAMPLE_DIR/solr-webapp $SOLR_DIR 106 | cp -r $EXAMPLE_DIR/start.jar $SOLR_DIR 107 | cp -r $EXAMPLE_DIR/webapps $SOLR_DIR 108 | 109 | echo "Solr dir created successfully" 110 | fi 111 | 112 | JAVA_LIB=../priv/java_lib 113 | YZ_JAR_VSN=3 114 | YZ_JAR_NAME=yokozuna-$YZ_JAR_VSN.jar 115 | 116 | if [ ! -e $JAVA_LIB/$YZ_JAR_NAME ] 117 | then 118 | if [ ! -d $JAVA_LIB ] 119 | then 120 | mkdir $JAVA_LIB 121 | fi 122 | 123 | echo "Downloading $YZ_JAR_NAME" 124 | download "${ARTIFACT_URL_PREFIX}/yokozuna/$YZ_JAR_NAME" 125 | mv $YZ_JAR_NAME $JAVA_LIB/$YZ_JAR_NAME 126 | fi 127 | 128 | EXT_LIB=../priv/solr/lib/ext 129 | MON_JAR_VSN=1 130 | MON_JAR_NAME=yz_monitor-$MON_JAR_VSN.jar 131 | 132 | if [ ! -e $EXT_LIB/$MON_JAR_NAME ] 133 | then 134 | echo "Downloading $MON_JAR_NAME" 135 | download "${ARTIFACT_URL_PREFIX}/yokozuna/$MON_JAR_NAME" 136 | mv $MON_JAR_NAME $EXT_LIB/$MON_JAR_NAME 137 | fi 138 | -------------------------------------------------------------------------------- /src/yz_rs_migration.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | %% @doc This code is only needed for riak search migration and can be 21 | %% pulled once riak search is removed from riak. 22 | 23 | -module(yz_rs_migration). 24 | -compile(export_all). 25 | -include("yokozuna.hrl"). 26 | 27 | %% @doc Determine if Riak Search is enabled. 28 | -spec is_riak_search_enabled() -> boolean(). 29 | is_riak_search_enabled() -> 30 | app_helper:get_env(?RS_SVC, enabled, false). 31 | 32 | %% @doc Remove Riak Search pre-commit hook from all buckets when Riak 33 | %% Search is disabled. 34 | %% 35 | %% Previous versions of Riak had a bug in `set_bucket' which caused 36 | %% bucket fixups to leak into the raw ring. If a user is upgrading 37 | %% from one of these versions, and enabled search on a bucket, then 38 | %% the Riak Searc hook will be in the raw ring. After migrating to 39 | %% Yokozuna these hooks must be removed to avoid errors. 40 | -spec strip_rs_hooks(IsRSEnabled :: boolean(), ring()) -> ok. 41 | strip_rs_hooks(true, _) -> 42 | ok; 43 | strip_rs_hooks(false, Ring) -> 44 | Buckets = riak_core_ring:get_buckets(Ring), 45 | strip_rs_hooks_2(Ring, Buckets). 46 | 47 | %%%=================================================================== 48 | %%% EVERYTHING BELOW IS FOR BUG CAUSED BY LEAKED BUCKET FIXUPS. 49 | %%% 50 | %%% Much of this code was copied from `riak_search_kv_hook'. 51 | %%%=================================================================== 52 | 53 | %% @private 54 | %% 55 | %% @doc Given current pre-commit hook generate a new one with all 56 | %% instances of the Riak Search hook removed. 57 | %% 58 | %% `Changed' - A boolean indicating if the `Precommit' value changed. 59 | %% 60 | %% `NewPrecommit' - The new pre-commit hook. 61 | -spec gen_new_precommit([term()]) -> {Changed :: boolean(), 62 | NewPrecommit :: [term()]}. 63 | gen_new_precommit(Precommit) -> 64 | %% Strip ALL Riak Search hooks. 65 | NewPrecommit = lists:filter(fun ?MODULE:not_rs_hook/1, Precommit), 66 | Changed = not (Precommit =:= NewPrecommit), 67 | {Changed, NewPrecommit}. 68 | 69 | %% @private 70 | %% 71 | %% @doc Retrieve the pre-commit hook from bucket properties. If it 72 | %% doesn not exist then default to the empty list. 73 | -spec get_precommit([term()]) -> [term()]. 74 | get_precommit(BProps) -> 75 | case proplists:get_value(precommit, BProps, []) of 76 | X when is_list(X) -> X; 77 | {struct, _}=X -> [X] 78 | end. 79 | 80 | %% @private 81 | %% 82 | %% @doc Predicate function which returns `true' if the `Hook' is NOT a 83 | %% Riak Search hook. For use with `lists:filter/2'. 84 | -spec not_rs_hook(term()) -> boolean(). 85 | not_rs_hook(Hook) -> 86 | not (Hook == rs_precommit_def()). 87 | 88 | %% @private 89 | %% 90 | %% @doc The definition of the Riak Search pre-commit hook. 91 | -spec rs_precommit_def() -> term(). 92 | rs_precommit_def() -> 93 | {struct, [{<<"mod">>,<<"riak_search_kv_hook">>}, 94 | {<<"fun">>,<<"precommit">>}]}. 95 | 96 | strip_rs_hooks_2(_Ring, []) -> 97 | ok; 98 | strip_rs_hooks_2(Ring, [Bucket|Rest]) -> 99 | BProps = riak_core_bucket:get_bucket(Bucket, Ring), 100 | Precommit = get_precommit(BProps), 101 | {Changed, NewPreHook} = gen_new_precommit(Precommit), 102 | case Changed of 103 | true -> 104 | riak_core_bucket:set_bucket(Bucket, [{precommit, NewPreHook}]), 105 | ok; 106 | false -> 107 | ok 108 | end, 109 | strip_rs_hooks_2(Ring, Rest). 110 | -------------------------------------------------------------------------------- /riak_test/yz_search_http.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2015 Basho Technologies, Inc. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %%------------------------------------------------------------------- 20 | 21 | %% @doc Test that checks through various HTTP calls and methods 22 | %% related to Yokozuna 23 | %% @end 24 | 25 | -module(yz_search_http). 26 | 27 | -compile(export_all). 28 | -include("yokozuna.hrl"). 29 | -include_lib("eunit/include/eunit.hrl"). 30 | 31 | -define(RING_SIZE, 8). 32 | -define(DEFAULT_TIMEOUT, 60000). 33 | -define(CFG, 34 | [ 35 | {riak_core, 36 | [ 37 | {ring_creation_size, ?RING_SIZE} 38 | ]}, 39 | {yokozuna, 40 | [ 41 | {enabled, true}, 42 | {solr_request_timeout, ?DEFAULT_TIMEOUT} 43 | ]} 44 | ]). 45 | -define(NO_HEADERS, []). 46 | -define(NO_BODY, <<>>). 47 | -define(INDEX, <<"test_search_http">>). 48 | -define(TYPE, <<"data_foo">>). 49 | -define(BUCKET, {?TYPE, <<"test_search_http">>}). 50 | 51 | confirm() -> 52 | [Node1|_] = Cluster = rt:build_cluster(4, ?CFG), 53 | rt:wait_for_cluster_service(Cluster, yokozuna), 54 | ok = yz_rt:create_bucket_type(Cluster, ?TYPE), 55 | ok = yz_rt:create_index_http(Cluster, ?INDEX), 56 | yz_rt:set_index(Node1, ?BUCKET, ?INDEX), 57 | HP = hd(yz_rt:host_entries(rt:connection_info(Cluster))), 58 | Key = <<"Problem">>, 59 | Value = <<"FOR YOU">>, 60 | ok = yz_rt:http_put(HP, ?BUCKET, Key, Value), 61 | yz_rt:commit(Cluster, ?INDEX), 62 | URL = yz_rt:search_url(HP, ?INDEX), 63 | 64 | test_search_get_and_post_query(Node1, HP, URL, ?INDEX), 65 | test_post_as_get(URL), 66 | test_post_as_get_with_wrong_content_types(URL), 67 | test_get_and_post_no_params(URL), 68 | 69 | pass. 70 | 71 | test_search_get_and_post_query(Node, HP, URL, Index) -> 72 | {ok, "200", _, _} = yz_rt:search(HP, Index, "*", "*"), 73 | ?assertEqual(ok, yz_rt:search_expect(Node, Index, "text", "F*", 1)), 74 | CT = {content_type, "application/x-www-form-urlencoded"}, 75 | Headers = [CT], 76 | Params = [{q, "text:F*"}, {wt, "json"}], 77 | Body = mochiweb_util:urlencode(Params), 78 | lager:info("Check post with content-type ~s and message body"), 79 | {ok, "200", _, R} = yz_rt:http(post, URL, Headers, Body), 80 | ?assert(yz_rt:verify_count(1, R)). 81 | 82 | test_post_as_get(URL) -> 83 | CT = {content_type, "application/x-www-form-urlencoded"}, 84 | Headers = [CT], 85 | {ok, "200", _, R} = yz_rt:http(post, URL ++ "?q=text:F*&wt=json", 86 | Headers, ?NO_BODY), 87 | ?assert(yz_rt:verify_count(1, R)). 88 | 89 | test_post_as_get_with_wrong_content_types(URL) -> 90 | CT1 = {content_type, "application/x-www-form-urlen"}, 91 | lager:info("Check misspelled content-type ~s", [element(2, CT1)]), 92 | Headers1 = [CT1], 93 | {ok, Status1, _, _} = yz_rt:http(post, URL ++ "?q=text:F*&wt=json", 94 | Headers1, ?NO_BODY), 95 | ?assertEqual("415", Status1), 96 | 97 | CT2 = {content_type, "application/json"}, 98 | Headers2 = [CT2], 99 | lager:info("Check non-applicable content-type ~s", [element(2, CT2)]), 100 | Params = [{q, "text:F*"}, {wt, "json"}], 101 | Body = mochiweb_util:urlencode(Params), 102 | {ok, Status2, _, _} = yz_rt:http(post, URL, Headers2, Body), 103 | ?assertEqual("415", Status2). 104 | 105 | test_get_and_post_no_params(URL) -> 106 | {ok, Status1, _, _} = yz_rt:http(get, URL, ?NO_HEADERS, ?NO_BODY), 107 | 108 | CT = {content_type, "application/x-www-form-urlencoded"}, 109 | Headers = [CT], 110 | {ok, Status2, _, _} = yz_rt:http(post, URL, Headers, ?NO_BODY), 111 | ?assertEqual("200", Status1), 112 | ?assertEqual("200", Status2). 113 | -------------------------------------------------------------------------------- /test/yz_solrq_eqc_fuse.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% Copyright (c) 2015 Basho Technologies, Inc. All Rights Reserved. 3 | %% 4 | %% This file is provided to you under the Apache License, 5 | %% Version 2.0 (the "License"); you may not use this file 6 | %% except in compliance with the License. You may obtain 7 | %% a copy of the License at 8 | %% 9 | %% http://www.apache.org/licenses/LICENSE-2.0 10 | %% 11 | %% Unless required by applicable law or agreed to in writing, 12 | %% software distributed under the License is distributed on an 13 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %% KIND, either express or implied. See the License for the 15 | %% specific language governing permissions and limitations 16 | %% under the License. 17 | %% 18 | %% ------------------------------------------------------------------- 19 | -module(yz_solrq_eqc_fuse). 20 | 21 | -behaviour(gen_server). 22 | 23 | %% API 24 | -export([start_link/0, stop/0, reset/0, ask/2, melt/1, melts/1]). 25 | 26 | %% gen_server callbacks 27 | -export([ 28 | init/1, 29 | handle_call/3, 30 | handle_cast/2, 31 | handle_info/2, 32 | terminate/2, 33 | code_change/3 34 | ]). 35 | 36 | -define(SERVER, ?MODULE). 37 | 38 | -record(state, { 39 | indices = dict:new(), 40 | threshold = 1, % 1 melt is enough to blow a fuse 41 | interval = 100 % recover after 100ms 42 | }). 43 | 44 | %%%=================================================================== 45 | %%% API 46 | %%%=================================================================== 47 | 48 | start_link() -> 49 | gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). 50 | 51 | stop() -> 52 | gen_server:stop(?MODULE). 53 | 54 | reset() -> 55 | gen_server:call(?MODULE, reset). 56 | 57 | ask(Index, Context) -> 58 | gen_server:call(?MODULE, {ask, Index, Context}). 59 | 60 | melt(Index) -> 61 | gen_server:call(?MODULE, {melt, Index}). 62 | 63 | melts(Index) -> 64 | gen_server:call(?MODULE, {melts, Index}). 65 | 66 | %%%=================================================================== 67 | %%% gen_server callbacks 68 | %%%=================================================================== 69 | 70 | init([]) -> 71 | {ok, #state{}}. 72 | 73 | handle_call(reset, _From, _State) -> 74 | {reply, ok, #state{}}; 75 | 76 | handle_call( 77 | {ask, Index, _Context}, _From, #state{indices = Indices} = State 78 | ) -> 79 | IndexState = 80 | case dict:find(Index, Indices) of 81 | {ok, {blown, _Melts}} -> 82 | blown; 83 | _ -> 84 | ok 85 | end, 86 | {reply, IndexState, State}; 87 | 88 | handle_call({melt, Index}, _From, #state{indices = Indices, threshold = Threshold, interval = Interval} = State) -> 89 | NewMelts = 90 | case dict:find(Index, Indices) of 91 | {ok, {_FuseState, Melts}} -> 92 | Melts + 1; 93 | error -> 94 | 1 95 | end, 96 | FuseState = 97 | case NewMelts == Threshold of 98 | true -> 99 | yz_solrq:blown_fuse(yz_fuse:index_for_fuse_name(Index)), 100 | erlang:send_after(Interval, ?MODULE, {recover, Index}), 101 | blown; 102 | _ -> 103 | ok 104 | end, 105 | {reply, ok, State#state{indices = dict:store(Index, {FuseState, NewMelts}, Indices)}}; 106 | 107 | handle_call({melts, Index}, _From, #state{indices = Indices} = State) -> 108 | TotalMelts = case dict:find(Index, Indices) of 109 | {ok, {_FuseState, Melts}} -> Melts; 110 | _ -> 0 111 | end, 112 | {reply, TotalMelts, State}; 113 | 114 | handle_call(_Request, _From, State) -> 115 | {reply, ok, State}. 116 | 117 | 118 | handle_cast(_Request, State) -> 119 | {noreply, State}. 120 | 121 | handle_info({recover, Index}, #state{indices=Indices} = State) -> 122 | {ok, {_FuseState, Melts}} = dict:find(Index, Indices), 123 | yz_solrq:healed_fuse(yz_fuse:index_for_fuse_name(Index)), 124 | {noreply, State#state{indices=dict:store(Index, {ok, Melts}, Indices)}}; 125 | handle_info(_Info, State) -> 126 | {noreply, State}. 127 | 128 | terminate(_Reason, _State) -> 129 | ok. 130 | 131 | code_change(_OldVsn, State, _Extra) -> 132 | {ok, State}. 133 | 134 | %%%=================================================================== 135 | %%% Internal functions 136 | %%%=================================================================== 137 | -------------------------------------------------------------------------------- /test/yz_dt_extractor_tests.erl: -------------------------------------------------------------------------------- 1 | -module(yz_dt_extractor_tests). 2 | -compile(export_all). 3 | -include_lib("yz_test.hrl"). 4 | -include_lib("riak_kv/include/riak_kv_types.hrl"). 5 | 6 | 7 | %% Test counter extract 8 | counter_test() -> 9 | CounterBin = binary_crdt(counter), 10 | Result = yz_dt_extractor:extract(CounterBin), 11 | Expect = [{<<"counter">>, <<"10">>}], 12 | 13 | valid_extraction(Result, Expect). 14 | 15 | %% Test set extract 16 | set_test() -> 17 | SetBin = binary_crdt(set), 18 | Result = yz_dt_extractor:extract(SetBin), 19 | Expect = [{<<"set">>, <<"Riak">>}, 20 | {<<"set">>, <<"Cassandra">>}, 21 | {<<"set">>, <<"Voldemort">>}], 22 | 23 | valid_extraction(Result, Expect). 24 | 25 | 26 | %% Test hll extract 27 | hll_test() -> 28 | HllBin = binary_crdt(hll), 29 | Result = yz_dt_extractor:extract(HllBin), 30 | Expect = [{<<"hll">>, <<"9">>}], 31 | valid_extraction(Result, Expect). 32 | 33 | %% Test gset extract 34 | gset_test() -> 35 | SetBin = binary_crdt(gset), 36 | Result = yz_dt_extractor:extract(SetBin), 37 | Expect = [{<<"gset">>, <<"Dublin">>}, 38 | {<<"gset">>, <<"Tel Aviv">>}, 39 | {<<"gset">>, <<"Stoke-on-Trent">>}], 40 | valid_extraction(Result, Expect). 41 | 42 | %% Test map extract 43 | map_test() -> 44 | MapBin = binary_crdt(map), 45 | Result = yz_dt_extractor:extract(MapBin), 46 | Expect = [{<<"activated_flag">>, true}, 47 | {<<"name_register">>, <<"Ryan Zezeski">>}, 48 | {<<"phones_set">>, <<"555-5555">>}, 49 | {<<"phones_set">>, <<"867-5309">>}, 50 | {<<"page_views_counter">>, <<"1502">>}, 51 | {<<"events_map.RICON_register">>, <<"spoke">>}, 52 | {<<"events_map.Surge_register">>, <<"attended">>}], 53 | 54 | valid_extraction(Result, Expect). 55 | 56 | field_separator_test() -> 57 | MapBin = binary_crdt(map), 58 | Result = yz_dt_extractor:extract(MapBin, [{field_separator, <<"--">>}]), 59 | Expect = [{<<"activated_flag">>, true}, 60 | {<<"name_register">>, <<"Ryan Zezeski">>}, 61 | {<<"phones_set">>, <<"555-5555">>}, 62 | {<<"phones_set">>, <<"867-5309">>}, 63 | {<<"page_views_counter">>, <<"1502">>}, 64 | {<<"events_map--RICON_register">>, <<"spoke">>}, 65 | {<<"events_map--Surge_register">>, <<"attended">>}], 66 | 67 | valid_extraction(Result, Expect). 68 | 69 | 70 | valid_extraction(Result, Expect) -> 71 | ?assertEqual((length(Expect)), (length(Result))), 72 | Pairs = lists:zip(lists:sort(Expect), lists:sort(Result)), 73 | [ ?assertEqual(E,R) || {E,R} <- Pairs], 74 | ?STACK_IF_FAIL(yz_solr:prepare_json([{doc, Result}])). 75 | 76 | binary_crdt(Type) -> 77 | riak_kv_crdt:to_binary(raw_type(Type)). 78 | 79 | raw_type(map) -> 80 | ?MAP_TYPE( 81 | element(2,?MAP_TYPE:update( 82 | {update, 83 | [ 84 | {update, {<<"activated">>, ?FLAG_TYPE}, enable}, 85 | {update, {<<"name">>, ?REG_TYPE}, {assign, <<"Ryan Zezeski">> }}, 86 | {update, {<<"phones">>, ?SET_TYPE}, {add_all, [<<"555-5555">>, <<"867-5309">>]}}, 87 | {update, {<<"page_views">>, ?EMCNTR_TYPE}, {increment, 1502}}, 88 | {update, {<<"events">>, ?MAP_TYPE}, 89 | {update, 90 | [ 91 | {update, {<<"RICON">>, ?REG_TYPE}, {assign, <<"spoke">>}}, 92 | {update, {<<"Surge">>, ?REG_TYPE}, {assign, <<"attended">>}} 93 | ]}} 94 | ]}, <<0>>, ?MAP_TYPE:new()))); 95 | raw_type(set) -> 96 | ?SET_TYPE( 97 | element(2,?SET_TYPE:update({add_all, [<<"Riak">>, <<"Cassandra">>, <<"Voldemort">>]}, 98 | <<0>>, ?SET_TYPE:new())) 99 | ); 100 | raw_type(gset) -> 101 | ?GSET_TYPE( 102 | element(2,?GSET_TYPE:update({add_all, [<<"Dublin">>, <<"Tel Aviv">>, <<"Stoke-on-Trent">>]}, 103 | nil, ?GSET_TYPE:new())) 104 | ); 105 | raw_type(counter) -> 106 | ?COUNTER_TYPE( 107 | element(2,?COUNTER_TYPE:update({increment, 10}, <<0>>, ?COUNTER_TYPE:new()))); 108 | raw_type(hll) -> 109 | ?HLL_TYPE( 110 | element(2,?HLL_TYPE:update({add_all, [<<"T">>, <<"h">>, <<"r">>, <<"i">>, 111 | <<"l">>, <<"l">>, <<"a">>, <<"s">>, 112 | <<"f">>, <<"u">>, <<"h">>]}, 113 | <<0>>, ?HLL_TYPE:new()))). 114 | -------------------------------------------------------------------------------- /test/yz_xml_extractor_tests.erl: -------------------------------------------------------------------------------- 1 | %% -*- coding: utf-8 -*- 2 | -module(yz_xml_extractor_tests). 3 | -compile(export_all). 4 | -include_lib("yz_test.hrl"). 5 | 6 | make_name_test() -> 7 | Expect = <<"one.two.three.four">>, 8 | Stack = ["four", "three", "two", "one"], 9 | Result = yz_xml_extractor:make_name(<<".">>, Stack), 10 | ?assertEqual(Expect, Result), 11 | 12 | Expect2 = <<"one_two_three_four_five">>, 13 | Stack2 = ["five", "four", "three", "two", "one"], 14 | Result2 = yz_xml_extractor:make_name(<<"_">>, Stack2), 15 | ?assertEqual(Expect2, Result2). 16 | 17 | %% Verify that the XML extractor maintains UTF-8 encoding. 18 | utf8_test() -> 19 | {ok, SrcXML} = file:read_file("../test/utf8.xml"), 20 | Result = yz_xml_extractor:extract(SrcXML), 21 | case Result of 22 | {error, Reason} -> 23 | ?debugFmt("~nextract/1 failed: ~s~n", [Reason]), 24 | throw(extract_failed); 25 | _ -> 26 | ok 27 | end, 28 | Expect = 29 | [{<<"langs.english">>, <<"The quick brown fox jumps over the lazy dog.">>}, 30 | {<<"langs.english@attr">>, <<"The quick">>}, 31 | {<<"langs.jamaican">>, <<"Chruu, a kwik di kwik brong fox a jomp huova di liezi daag de, yu no siit?">>}, 32 | {<<"langs.irish">>, <<"\"An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó ṡlí do leasa ṫú?\" \"D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ.\""/utf8>>}, 33 | {<<"langs.dutch">>, <<"Pa's wijze lynx bezag vroom het fikse aquaduct."/utf8>>}, 34 | {<<"langs.german_1">>, <<"Falsches Üben von Xylophonmusik quält jeden größeren Zwerg."/utf8>>}, 35 | {<<"langs.german_1@attr">>, <<"Falsches Üben"/utf8>>}, 36 | {<<"langs.german_2">>, <<"Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon."/utf8>>}, 37 | {<<"langs.norwegian">>, <<"Blåbærsyltetøy."/utf8>>}, 38 | {<<"langs.danish">>, <<"Høj bly gom vandt fræk sexquiz på wc."/utf8>>}, 39 | {<<"langs.swedish">>, <<"Flygande bäckasiner söka strax hwila på mjuka tuvor."/utf8>>}, 40 | {<<"langs.icelandic">>, <<"Sævör grét áðan því úlpan var ónýt."/utf8>>}, 41 | {<<"langs.finnish">>, <<"Törkylempijävongahdus."/utf8>>}, 42 | {<<"langs.polish">>, <<"Pchnąć w tę łódź jeża lub osiem skrzyń fig."/utf8>>}, 43 | {<<"langs.czech">>, <<"Příliš žluťoučký kůň úpěl ďábelské kódy."/utf8>>}, 44 | {<<"langs.slovak">>, <<"Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote."/utf8>>}, 45 | {<<"langs.greek_monotonic">>, <<"ξεσκεπάζω την ψυχοφθόρα βδελυγμία"/utf8>>}, 46 | {<<"langs.greek_polytonic">>, <<"ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία"/utf8>>}, 47 | {<<"langs.russian">>, <<"Съешь же ещё этих мягких французских булок да выпей чаю."/utf8>>}, 48 | {<<"langs.bulgarian"/utf8>>, <<"Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон."/utf8>>}, 49 | {<<"langs.sami">>, <<"Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža."/utf8>>}, 50 | {<<"langs.hungarian">>, <<"Árvíztűrő tükörfúrógép."/utf8>>}, 51 | {<<"langs.spanish">>, <<"El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro."/utf8>>}, 52 | {<<"langs.portuguese">>, <<"O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico."/utf8>>}, 53 | {<<"langs.french">>, <<"Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés."/utf8>>}, 54 | {<<"langs.esperanto">>, <<"Eĥoŝanĝo ĉiuĵaŭde."/utf8>>}, 55 | {<<"langs.hebrew">>, <<"זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן."/utf8>>}, 56 | {<<"langs.japanese_hiragana">>, <<" 57 | いろはにほへど ちりぬるを 58 | わがよたれぞ つねならむ 59 | うゐのおくやま けふこえて 60 | あさきゆめみじ ゑひもせず 61 | "/utf8>>}, 62 | {<<"langs.japanese_kanji">>, <<" 63 | 色は匂へど 散りぬるを 64 | 我が世誰ぞ 常ならむ 65 | 有為の奥山 今日越えて 66 | 浅き夢見じ 酔ひもせず 67 | "/utf8>>}, 68 | {<<"langs.английский"/utf8>>, <<"The quick brown fox jumps over the lazy dog.">>}, 69 | {<<"langs.chinese">>, <<" 70 | 花非花 71 | 雾非雾 72 | 夜半来 73 | 天明去 74 | 来如春梦几多时 75 | 去似朝云无觅处 76 | "/utf8>>}, 77 | {<<"langs.chinese@作者"/utf8>>, <<"Bai Juyi">>}, 78 | {<<"langs.chinese@title"/utf8>>, <<"The Bloom is not a Bloom">>}], 79 | ?assertEqual(length(Expect), length(Result)), 80 | Pairs = lists:zip(lists:keysort(1, Expect), lists:keysort(1, Result)), 81 | [?assertPairsEq(E,R) || {E,R} <- Pairs], 82 | ?STACK_IF_FAIL(yz_solr:prepare_json([{doc, Result}])). 83 | -------------------------------------------------------------------------------- /riak_test/yz_ring_resizing.erl: -------------------------------------------------------------------------------- 1 | -module(yz_ring_resizing). 2 | -compile(export_all). 3 | -include_lib("eunit/include/eunit.hrl"). 4 | -include("yokozuna.hrl"). 5 | 6 | %% @doc Test ring resizing while indexing and querying 7 | %% 8 | -define(FRUIT_SCHEMA_NAME, <<"fruit">>). 9 | -define(BUCKET_TYPE, <<"data">>). 10 | -define(INDEX, <<"fruit_index">>). 11 | -define(INDEX_N_VAL, 4). 12 | -define(BUCKET, {?BUCKET_TYPE, <<"fruit">>}). 13 | -define(NUM_KEYS, 10000). 14 | -define(SUCCESS, 0). 15 | -define(CFG, 16 | [{riak_core, 17 | [ 18 | %% Allow handoff to happen more quickly. 19 | {handoff_concurrency, 3}, 20 | 21 | %% Use smaller ring size so that test runs faster. 22 | {ring_creation_size, 16}, 23 | 24 | %% Reduce the tick so that ownership handoff will happen 25 | %% more quickly. 26 | {vnode_management_timer, 1000} 27 | ]}, 28 | {yokozuna, 29 | [ 30 | {enabled, true}, 31 | 32 | %% Perform a full check every second so that non-owned 33 | %% postings are deleted promptly. This makes sure that 34 | %% postings are removed concurrent to async query during 35 | %% resize. 36 | {events_full_check_after, 2} 37 | ]} 38 | ]). 39 | -define(SHRINK_SIZE, 32). 40 | -define(EXPAND_SIZE, 64). 41 | 42 | confirm() -> 43 | case yz_rt:bb_driver_setup() of 44 | {ok, YZBenchDir} -> 45 | random:seed(now()), 46 | 47 | %% build the 4 node cluster 48 | [ANode|_] = Cluster = rt:build_cluster(4, ?CFG), 49 | rt:wait_for_cluster_service(Cluster, yokozuna), 50 | PBConns = yz_rt:open_pb_conns(Cluster), 51 | 52 | %% Index and load data 53 | setup_indexing(Cluster, PBConns, YZBenchDir), 54 | {0, _} = yz_rt:load_data(Cluster, ?BUCKET, YZBenchDir, ?NUM_KEYS), 55 | yz_rt:commit(Cluster, ?INDEX), 56 | yz_rt:verify_num_match(Cluster, ?INDEX, ?NUM_KEYS), 57 | %% Resize the ring -- size up, and make sure it completes 58 | lager:info("Resizing ring to ~p", [?EXPAND_SIZE]), 59 | submit_resize(?EXPAND_SIZE, ANode), 60 | ensure_ring_resized(Cluster), 61 | pass; 62 | {error, bb_driver_build_failed} -> 63 | lager:info("Failed to build the yokozuna basho_bench driver" 64 | " required for this test"), 65 | fail 66 | end. 67 | 68 | %% The following section is commented out because ring-resizing downward currently 69 | %% presents an unresolved issue in YZ. There is still value in the test, however, 70 | %% and when the issue is resolve, this code should be un-commented. 71 | 72 | %% start another query 73 | %% Ref2 = async_query(Cluster, YZBenchDir), 74 | %% timer:sleep(10000), 75 | 76 | %% ring resize -- size down, and check it and query complete 77 | %% lager:info("resizing ring to ~p", [?SHRINK_SIZE]), 78 | %% submit_resize(?SHRINK_SIZE, ANode), 79 | %% ensure_ring_resized(Cluster), 80 | 81 | %% check_status(wait_for(Ref2)), 82 | %% yz_rt:close_pb_conns(PBConns), 83 | 84 | %% check_status({Status,_}) -> 85 | %% ?assertEqual(?SUCCESS, Status). 86 | 87 | read_schema(YZBenchDir) -> 88 | Path = filename:join([YZBenchDir, "schemas", "fruit_schema.xml"]), 89 | {ok, RawSchema} = file:read_file(Path), 90 | RawSchema. 91 | 92 | setup_indexing(Cluster, PBConns, YZBenchDir) -> 93 | Node = yz_rt:select_random(Cluster), 94 | PBConn = yz_rt:select_random(PBConns), 95 | 96 | yz_rt:create_bucket_type(Cluster, ?BUCKET_TYPE), 97 | 98 | RawSchema = read_schema(YZBenchDir), 99 | yz_rt:store_schema(PBConn, ?FRUIT_SCHEMA_NAME, RawSchema), 100 | yz_rt:wait_for_schema(Cluster, ?FRUIT_SCHEMA_NAME, RawSchema), 101 | ok = yz_rt:create_index(Cluster, ?INDEX, ?FRUIT_SCHEMA_NAME, ?INDEX_N_VAL), 102 | yz_rt:set_index(Node, ?BUCKET, ?INDEX, ?INDEX_N_VAL). 103 | 104 | wait_for(Ref) -> 105 | rt:wait_for_cmd(Ref). 106 | 107 | submit_resize(NewSize, Node) -> 108 | ?assertEqual(ok, rpc:call(Node, riak_core_claimant, resize_ring, [NewSize])), 109 | {ok, _, _} = rpc:call(Node, riak_core_claimant, plan, []), 110 | ?assertEqual(ok, rpc:call(Node, riak_core_claimant, commit, [])). 111 | 112 | ensure_ring_resized(Cluster) -> 113 | IsResizeComplete = 114 | fun(Node) -> 115 | lager:debug("Waiting for is_resize_complete on node ~p", [Node]), 116 | Ring = rpc:call(Node, yz_misc, get_ring, [transformed]), 117 | rpc:call(Node, riak_core_ring, is_resize_complete, [Ring]) 118 | end, 119 | ?assertEqual(ok, yz_rt:wait_until(Cluster, IsResizeComplete)). 120 | --------------------------------------------------------------------------------