├── LICENSE ├── README.md ├── build_settings.example ├── create-dist-tarball.sh ├── dist-setup ├── get-images ├── registry-login └── untag-images ├── dist ├── LICENSE.txt ├── NOTICE.txt ├── api-server │ ├── get-token │ ├── seldon-server-settings.txt │ ├── start-api-server-server │ └── stop-api-server-server ├── consul │ ├── setup-alg-training.sh │ ├── setup-dbs.sh │ ├── setup-spark.sh │ ├── start-consul │ └── stop-consul ├── data-logs │ ├── start-data-logs │ └── stop-data-logs ├── kafka │ ├── start-kafka-server │ └── stop-kafka-server ├── memcache │ ├── start-memcache-server │ └── stop-memcache-server ├── movie_recommender_demo │ ├── create_recommender_demo.sh │ ├── create_sample_data │ ├── download_and_create_data.sh │ └── start-all-for-models ├── mysql │ ├── setup_local_mysql_data │ ├── start-mysql-server │ └── stop-mysql-server ├── mysql_data │ ├── start-mysql-data │ └── stop-mysql-data ├── seldon-models │ ├── setup-local-models │ ├── start-seldon-models │ └── stop-seldon-models ├── semvec │ └── create-movielens-sv ├── setup-local.sh ├── spark │ ├── jobs │ │ ├── run-movielens-cluster.sh │ │ ├── run-movielens-item-similarity.sh │ │ └── run-movielens-topic-model.sh │ ├── start-spark-offline-server │ ├── start-spark-streaming-server │ ├── stop-spark-offline-server │ └── stop-spark-streaming-server ├── start-all ├── start-all-recsvm ├── start-all-with-local-logs ├── stop-all ├── td-agent │ ├── start-td-agent-server │ └── stop-td-agent-server ├── testing │ ├── example-web-recommendations │ └── test-demo ├── your_data │ ├── actions_data │ │ └── example_actions.csv │ ├── activate_models.sh │ ├── clear_actions.sh │ ├── create_models.sh │ ├── items_data │ │ └── example_items.csv │ ├── load_actions.sh │ ├── load_items.sh │ ├── load_schema.sh │ ├── load_users.sh │ ├── schema │ │ └── example_schema.json │ └── users_data │ │ └── example_users.csv └── zookeeper │ ├── setup-movielens.sh │ ├── start-zookeeper-server │ ├── stop-zookeeper-server │ ├── zkshell │ └── zoo-movielens.cfg ├── full-build ├── build-all └── clean-all ├── images ├── README.txt ├── api_server_data_logs_image │ ├── Dockerfile │ ├── Makefile │ └── apps │ │ └── bin │ │ └── keep_alive ├── api_server_demo_image │ ├── Dockerfile │ ├── Makefile │ ├── add_js_embedly_prefix.sh │ ├── api-spec │ │ └── seldon │ ├── startup.sh │ └── swagger │ │ ├── css │ │ ├── reset.css │ │ └── screen.css │ │ ├── images │ │ ├── explorer_icons.png │ │ ├── logo_small.png │ │ ├── pet_store_api.png │ │ ├── throbber.gif │ │ └── wordnik_api.png │ │ ├── index.html │ │ ├── index.html.orig │ │ ├── lib │ │ ├── backbone-min.js │ │ ├── handlebars-1.0.0.js │ │ ├── highlight.7.3.pack.js │ │ ├── jquery-1.8.0.min.js │ │ ├── jquery.ba-bbq.min.js │ │ ├── jquery.slideto.min.js │ │ ├── jquery.wiggle.min.js │ │ ├── shred.bundle.js │ │ ├── shred │ │ │ └── content.js │ │ ├── swagger-client.js │ │ ├── swagger-oauth.js │ │ ├── swagger.js │ │ └── underscore-min.js │ │ ├── o2c.html │ │ ├── swagger-ui.js │ │ └── swagger-ui.min.js ├── api_server_image │ ├── Dockerfile │ ├── Makefile │ ├── __API_SERVER_VERSION__ │ ├── apps │ │ └── api-server │ │ │ └── startup.sh │ ├── create-webapps-from-github │ ├── create-webapps-from-local │ ├── create-webapps-from-s3 │ └── tomcat_env │ │ └── setenv.sh ├── build-all-images ├── clean-all-images ├── consul_image │ ├── Dockerfile │ ├── Makefile │ └── config │ │ └── consul.json ├── java7jre_image │ ├── Dockerfile │ └── Makefile ├── kafka_image │ ├── Dockerfile │ ├── Makefile │ ├── broker-list.sh │ └── start-kafka.sh ├── memcache_image │ ├── Dockerfile │ ├── Makefile │ └── test_memcache │ │ └── test_memcache.sh ├── movielens_data_transform │ ├── Dockerfile │ ├── Makefile │ └── scripts │ │ ├── combine_item_data_sources.py │ │ ├── download_movielens_data.sh │ │ ├── getFreebaseData.py │ │ └── run.sh ├── mysql_data │ ├── Dockerfile │ ├── Makefile │ ├── create-movielens-dbs.sh │ ├── movielens-db-setup │ ├── movielens_sql │ │ ├── api-data.sql │ │ ├── api-schema.sql │ │ ├── movielens-schema.sql │ │ ├── test1-schema.sql │ │ ├── test2-schema.sql │ │ ├── test3-schema.sql │ │ ├── test4-schema.sql │ │ ├── test5-schema.sql │ │ └── testclient-schema.sql │ ├── mysql-shell │ ├── remove-local-mysql-data-resource │ └── setup-local-mysql-data-resource ├── mysql_image │ ├── Dockerfile │ ├── Makefile │ ├── my.cnf │ ├── readme.txt │ └── setup_local.sh ├── push-all-images ├── seldon-models │ ├── Dockerfile │ ├── Makefile │ ├── apps │ │ └── bin │ │ │ └── keep_alive │ ├── get-movielens-models.sh │ ├── remove-local-models-resource │ └── setup-local-models-resource ├── seldon-tools │ ├── Dockerfile │ ├── Makefile │ ├── apps │ │ └── bin │ │ │ └── keep_alive │ └── scripts │ │ ├── import │ │ ├── add_attr_schema.py │ │ ├── add_attr_schema.sh │ │ ├── add_items.py │ │ ├── add_items.sh │ │ ├── add_users.py │ │ ├── add_users.sh │ │ ├── attr_schema.json │ │ ├── clear_actions.sh │ │ ├── create_actions_json.py │ │ ├── create_actions_json.sh │ │ └── sample.py │ │ ├── models │ │ ├── item-similarity │ │ │ ├── activate.sh │ │ │ ├── createItemSimilaritySql.py │ │ │ └── create_sql_and_upload.sh │ │ └── word2vec │ │ │ ├── transformToSV.sh │ │ │ └── word2vecToSV.py │ │ └── zookeeper │ │ ├── zkcmd.py │ │ └── zklines.py ├── semantic_vectors_image │ ├── Dockerfile │ ├── Makefile │ └── scripts │ │ ├── models │ │ ├── stopwords.italian │ │ └── stopwords.txt │ │ ├── run-training-consul.sh │ │ └── run-training.sh ├── spark_image │ ├── Dockerfile │ ├── Makefile │ ├── copy-jars-to-app-dir │ ├── get-app-jars │ ├── readme.txt │ ├── run-container │ ├── spark-jobs │ │ ├── cluster-users-by-taxonomy.sh │ │ ├── item-similarity.sh │ │ ├── job-group-actions │ │ ├── job-group-actions-test │ │ ├── matrix-factorization.sh │ │ ├── session-items.sh │ │ ├── spark-streaming-job │ │ ├── topic-model-session-tags.sh │ │ └── word2vec.sh │ └── startup-scripts │ │ ├── run-keep-alive │ │ └── run-streaming-job ├── td_agent_image │ ├── Dockerfile │ ├── Makefile │ └── td-agent.conf ├── tomcat7_image │ ├── Dockerfile │ └── Makefile ├── vw_image │ ├── Dockerfile │ ├── Makefile │ └── topic_model │ │ ├── Makefile │ │ ├── create-vw.py │ │ ├── decode-model.py │ │ ├── process-vw.py │ │ ├── processPredictions.py │ │ ├── remapTags.py │ │ ├── run-training.sh │ │ ├── run.sh │ │ └── update-zk-node.sh └── zookeeper_image │ ├── Dockerfile │ ├── Makefile │ └── scripts │ └── update_zk.py ├── install-build-dependencies-ubuntu.sh ├── tmp.txt └── vagrant ├── seldontestvm-test ├── Makefile └── Vagrantfile ├── seldonvm-build-trusty ├── Makefile ├── Vagrantfile ├── bootstrap.sh ├── end-user-files │ └── Vagrantfile.template ├── generate_build_details ├── generate_end_user_vagrant_file ├── generate_vagrant_json ├── save-and-load-images │ ├── Makefile │ ├── images │ │ └── .keep │ ├── images_list.sh │ ├── load_images │ └── save_images ├── settings ├── settings.seldontestvm ├── settings.seldonvm ├── update_vm ├── update_vm.seldontestvm ├── update_vm.seldonvm └── vm_vagrant.json.template ├── seldonvm-build ├── Makefile ├── Vagrantfile ├── config.rb ├── coreos-vagrant │ ├── config.rb │ └── user-data ├── end-user-files │ └── Vagrantfile.template ├── generate_build_details ├── generate_end_user_vagrant_file ├── generate_vagrant_json ├── readme.txt ├── save-and-load-images │ ├── Makefile │ ├── images │ │ └── .keep │ ├── images_list.sh │ ├── load_images │ └── save_images ├── settings ├── settings.seldontestvm ├── settings.seldonvm ├── update_vm ├── update_vm.seldontestvm ├── update_vm.seldonvm ├── user-data └── vm_vagrant.json.template ├── seldonvm-test ├── Makefile └── Vagrantfile └── vagrant-nfs └── install_vagrant_sudoers_for_mac /README.md: -------------------------------------------------------------------------------- 1 | # Seldon Virtual Machine 2 | 3 | This project packages into a single virtual machine all the Seldon projects so they can be tested easily. 4 | 5 | The docs related to this can be found at [http://docs.seldon.io/getting-started.html](http://docs.seldon.io/getting-started.html) 6 | 7 | This project provides functionality to: 8 | 9 | * Package each Seldon dependency as a Docker container, e.g.: 10 | * [Seldon Server](https://github.com/SeldonIO/seldon-server) - the real time prediction server 11 | * [Seldon Spark](https://github.com/SeldonIO/seldon-spark) - offline and streaming jobs in Spark 12 | * Package standard components as docker containers, e.g.: 13 | * Mysql 14 | * Zookeeper 15 | * Consul 16 | * Create a Vagrant VM from the Docker containers 17 | * Provide a Movie Recommender demo integration that goes from iniital raw data to final recommender system. An online version of the final demo can be viewed [here](http://www.seldon.io/movie-demo/). 18 | * Provide an API explorer. An online version of this can be viewed [here](http://www.seldon.io/api-explorer/) 19 | 20 | This VM built and ready to use can be accessed by joining our [Beta program](http://www.seldon.io/open-source) and dowloading the [Vagrant VM](http://docs.seldon.io/vm.html) or using the [AWS AMI](http://docs.seldon.io/vm-aws.html) pre-built Docker instance. 21 | 22 | We will give scripts and instructions on how to build the VM yourself in the future. The dependencies needed are: 23 | 24 | * node 25 | * npm 26 | * docker 27 | * make 28 | * java jdk 29 | * jq 30 | * maven 31 | * bower 32 | * grunt 33 | 34 | An initial script that installs these for Ubuntu (assumes an ubuntu user) can be found in 35 | 36 | ``` 37 | install-dependencies-ubuntu.sh 38 | ``` 39 | 40 | We welcome help to build this VM on different systems. Please clone the project and contact support at seldon dot io if you wish to contribute. 41 | 42 | 43 | -------------------------------------------------------------------------------- /build_settings.example: -------------------------------------------------------------------------------- 1 | PRIVATE_REGISTRY_HOST=my.com 2 | PRIVATE_REGISTRY_PORT=8080 3 | PRIVATE_REGISTRY_USER=myuser 4 | PRIVATE_REGISTRY_PASSWORD=mypass 5 | PRIVATE_REGISTRY_EMAIL=me@my.com 6 | 7 | -------------------------------------------------------------------------------- /create-dist-tarball.sh: -------------------------------------------------------------------------------- 1 | tar -czvf dist.tar.gz --exclude logs dist dist-setup 2 | -------------------------------------------------------------------------------- /dist-setup/get-images: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | PROJ_DIR=.. 8 | source ${PROJ_DIR}/build_settings 9 | 10 | ${STARTUP_DIR}/registry-login 11 | 12 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/java7jre_image 13 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/tomcat7_image 14 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/zookeeper_image 15 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/memcache_image 16 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/mysql_image 17 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/mysql_data 18 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/api_server_demo_image 19 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/api_server_data_logs_image 20 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/seldon-models 21 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/td_agent_image 22 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/kafka_image 23 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/spark_image 24 | docker pull ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/consul_image 25 | 26 | rm -fv ~/.dockercfg 27 | 28 | -------------------------------------------------------------------------------- /dist-setup/registry-login: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | PROJ_DIR=.. 8 | source ${PROJ_DIR}/build_settings 9 | 10 | docker login -u ${PRIVATE_REGISTRY_USER} -p ${PRIVATE_REGISTRY_PASSWORD} -e "${PRIVATE_REGISTRY_EMAIL}" https://${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT} 11 | 12 | -------------------------------------------------------------------------------- /dist-setup/untag-images: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | PROJ_DIR=.. 8 | source ${PROJ_DIR}/build_settings 9 | 10 | declare -a arr=( \ 11 | "api_server_demo_image" \ 12 | "api_server_data_logs_image" \ 13 | "mysql_image" \ 14 | "mysql_data" \ 15 | "kafka_image" \ 16 | "memcache_image" \ 17 | "td_agent_image" \ 18 | "zookeeper_image" \ 19 | "spark_image" \ 20 | "seldon-models" \ 21 | "java7jre_image" \ 22 | "tomcat7_image" \ 23 | "consul_image" \ 24 | ) 25 | 26 | for i in "${arr[@]}" 27 | do 28 | set +o errexit 29 | docker tag --force=true ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/$i $i 30 | docker rmi ${PRIVATE_REGISTRY_HOST}:${PRIVATE_REGISTRY_PORT}/$i 31 | set -o errexit 32 | 33 | done 34 | 35 | -------------------------------------------------------------------------------- /dist/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ================================================================================ 2 | Copyright 2011-2015 Seldon Technologies Ltd, Rummble Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ================================================================================ 16 | 17 | This project includes sub-components with separate copyright notices and license terms. Compliance with all copyright laws and software license agreements included in the notice section of this file are the responsibility of the user. 18 | 19 | **[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)** 20 | 21 | * [Apache Commons](http://commons.apache.org) - Copyright © 2015 The Apache Software Foundation 22 | * [Apache Spark](http://spark.apache.org) - Copyright © 2015 The Apache Software Foundation 23 | * [Apache Hadoop](http://hadoop.apache.org/) - Copyright © 2015 The Apache Software Foundation 24 | * [Apache Mahout](http://mahout.apache.org/) - Copyright © 2015 The Apache Software Foundation 25 | * [Apache Zookeeper](http://zookeeper.apache.org/) - Copyright © 2009-2015 The Apache Software Foundation 26 | * [Grafana](http://grafana.org/) - Copyright © 2012-2013 Elasticsearch BV, Torkel Ödegaard 27 | * [Guava](https://code.google.com/p/guava-libraries/) - Copyright © 2014 Google 28 | * [word2vec](https://code.google.com/p/word2vec/) - Copyright © 2013 Google Inc. 29 | 30 | 31 | **[The BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause)** 32 | 33 | * [semantic vectors](https://code.google.com/p/semanticvectors/) - Copyright (c) 2008, The SemanticVectors Authors 34 | 35 | **[GNU General Public License, version 2](http://www.gnu.org/licenses/old-licenses/gpl-2.0.html)** 36 | 37 | * [MySQL Commmunity Server](http://www.mysql.com) - Copyright © 2015 Oracle Corporation 38 | 39 | **[The MIT License (MIT)](http://opensource.org/licenses/MIT)** 40 | 41 | * [InfluxDB](http://influxdb.com/) - Copyright © 2013-2015 Errplane Inc. 42 | * [scopt](https://github.com/scopt/scopt) - Copyright © 2010 Google 43 | * [Vagrant](https://www.vagrantup.com/) - Copyright (c) 2010-2015 Mitchell Hashimoto 44 | 45 | **Mozilla Public License, version 2.0** 46 | 47 | * [Consul](http://consul.io) - Copyright © 2015 HashiCorp 48 | 49 | **Other Copyright Notices** 50 | 51 | * [MovieLens 10M dataset](http://www.movielens.org) - © 2015 GroupLens Research Project at the University of Minnesota 52 | -------------------------------------------------------------------------------- /dist/api-server/get-token: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | curl "http://192.168.59.103:8080/token?consumer_key=yxokkkencw&consumer_secret=fgrukwcbzw" 9 | 10 | -------------------------------------------------------------------------------- /dist/api-server/seldon-server-settings.txt: -------------------------------------------------------------------------------- 1 | set /config/memcached_servers memcache_server:11211 2 | set /config/dbcp {"dbs":[{"driverClassName":"com.mysql.jdbc.ReplicationDriver","jdbc":"jdbc:mysql:replication://mysql_server:3306,mysql_server:3306?characterEncoding=utf8&roundRobinLoadBalance=true&transformedBitIsBoolean=true&zeroDateTimeBehavior=convertToNull&rewriteBatchedStatements=true","name":"ClientDB","password":"mypass","user":"root"}]} 3 | set /all_clients/movielens {"DB_JNDI_NAME":"ClientDB"} 4 | set /config/default_strategy {"algorithms":[{"config":[],"filters":[],"includers":[],"name":"recentItemsRecommender"}],"combiner":"firstSuccessfulCombiner"} 5 | 6 | -------------------------------------------------------------------------------- /dist/api-server/start-api-server-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting api-server ---" 9 | 10 | cat ${STARTUP_DIR}/seldon-server-settings.txt | docker run \ 11 | --rm \ 12 | -i -a stdin -a stdout -a stderr \ 13 | --name seldon_tools \ 14 | --link zookeeper_server_container:zookeeper_server \ 15 | seldon-tools /seldon-tools/scripts/zookeeper/zklines.py \ 16 | --zk-hosts zookeeper_server 17 | 18 | export EMBEDLY_KEY=${EMBEDLY_KEY:-} 19 | 20 | docker run -d \ 21 | --name api_server_container -p 8080:8080 \ 22 | --link mysql_server_container:mysql_server \ 23 | --link zookeeper_server_container:zookeeper_server \ 24 | --link memcache_server_container:memcache_server \ 25 | --volumes-from api_server_data_logs_container \ 26 | --volumes-from seldon-models \ 27 | -e EMBEDLY_KEY=$EMBEDLY_KEY \ 28 | -e SELDON_ZKSERVERS=zookeeper_server \ 29 | ${REGISTRY_PREFIX}api_server_demo_image bash -c '/apps/api-server/startup.sh' 30 | 31 | -------------------------------------------------------------------------------- /dist/api-server/stop-api-server-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping api-server ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=api_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/consul/setup-alg-training.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | ######################## 7 | # Movielens algorithms # 8 | ######################## 9 | 10 | # clustering of users by taxonomy 11 | docker exec consul curl -s -X PUT -d '{"min_cluster_size":0, "delta":0.01 }' 'http://localhost:8500/v1/kv/seldon/movielens/algs/cluster_by_taxonomy' 12 | # matrix factorization 13 | docker exec consul curl -s -X PUT -d '{"rank":30, "lambda":0.1, "alpha":1, "iterations":5 }' 'http://localhost:8500/v1/kv/seldon/movielens/algs/matrix_factorization' 14 | # item similarity 15 | docker exec consul curl -s -X PUT -d '{"min_users_per_item":0, "min_items_per_user":0, "max_users_per_item":100000, "threshold":0.01, "dimsum_threshold":0.1 }' 'http://localhost:8500/v1/kv/seldon/movielens/algs/item_similarity' 16 | # semantic vectors 17 | docker exec consul curl -s -X PUT -d '{"item_limit":"30000","attr_names":"description","base_output_folder":"/seldon-models/movielens/svtext/1"}' 'http://localhost:8500/v1/kv/seldon/testclient/algs/semantic_vectors' 18 | #word2vec 19 | docker exec consul curl -s -X PUT -d '{"min_actions_per_user":0, "max_actions_per_user":30000, "max_session_gap":-1 }' 'http://localhost:8500/v1/kv/seldon/movielens/algs/session_items' 20 | docker exec consul curl -s -X PUT -d '{"min_word_count":50, "vector_size":30 }' 'http://localhost:8500/v1/kv/seldon/movielens/algs/word2vec' 21 | #topic models 22 | docker exec consul curl -s -X PUT -d '{"tag_attr":"description" }' 'http://localhost:8500/v1/kv/seldon/movielens/algs/topic_model_session_tags' 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /dist/consul/setup-dbs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | #setup database settings 7 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/movielens?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/movielens/db_read' 8 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/movielens?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/movielens/db_write' 9 | 10 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/testclient?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/testclient/db_read' 11 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/testclient?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/testclient/db_write' 12 | 13 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test1?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test1/db_read' 14 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test1?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test1/db_write' 15 | 16 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test2?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test2/db_read' 17 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test2?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test2/db_write' 18 | 19 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test3?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test3/db_read' 20 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test3?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test3/db_write' 21 | 22 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test4?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test4/db_read' 23 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test4?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test4/db_write' 24 | 25 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test5?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test5/db_read' 26 | docker exec consul curl -s -X PUT -d '{"host":"mysql_server","username":"root","password":"mypass","jdbc":"jdbc:mysql://mysql_server:3306/test5?characterEncoding=utf8&user=root&password=mypass"}' 'http://localhost:8500/v1/kv/seldon/test5/db_write' 27 | -------------------------------------------------------------------------------- /dist/consul/setup-spark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | #setup database settings 7 | docker exec consul curl -s -X PUT -d '{"executor_memory":"2g"}' 'http://localhost:8500/v1/kv/seldon/spark' 8 | -------------------------------------------------------------------------------- /dist/consul/start-consul: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting consul ---" 9 | docker run --name="consul" -h node1 -d consul_image -server -bootstrap 10 | 11 | COUNT=0 12 | MAX_COUNT=10 13 | 14 | IS_CONSUL_READY=false 15 | 16 | while true; do 17 | COUNT=$((COUNT+1)) 18 | if [ $COUNT -gt $MAX_COUNT ]; then 19 | break 20 | fi 21 | 22 | LEADER_STATUS=$(docker exec -i -t consul curl -s 'http://localhost:8500/v1/status/leader'|sed -e 's/"//g') 23 | LEADER_STATUS_STRING_LENGTH=${#LEADER_STATUS} 24 | if [ $LEADER_STATUS_STRING_LENGTH -gt 0 ]; then 25 | IS_CONSUL_READY=true 26 | break 27 | fi 28 | 29 | WAIT_SECS=3 30 | echo "Consul not ready, sleeping ${WAIT_SECS} secs..." 31 | sleep $WAIT_SECS 32 | done 33 | 34 | if [ "$IS_CONSUL_READY" = "true" ];then 35 | echo "Consul now ready." 36 | ${STARTUP_DIR}/setup-dbs.sh 37 | ${STARTUP_DIR}/setup-spark.sh 38 | ${STARTUP_DIR}/setup-alg-training.sh 39 | else 40 | echo "Consul not ready!" 41 | exit 1 42 | fi 43 | 44 | -------------------------------------------------------------------------------- /dist/consul/stop-consul: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping consul ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=consul 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/data-logs/start-data-logs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting data logs ---" 9 | 10 | if [[ $# > 0 ]]; then 11 | LOCAL_LOGS_DIR=$1 12 | docker run --name api_server_data_logs_container -d -v ${LOCAL_LOGS_DIR}:/data-logs ${REGISTRY_PREFIX}api_server_data_logs_image 13 | else 14 | docker run --name api_server_data_logs_container -d ${REGISTRY_PREFIX}api_server_data_logs_image 15 | fi 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /dist/data-logs/stop-data-logs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping data logs ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=api_server_data_logs_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/kafka/start-kafka-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting kafka ---" 9 | docker run -d \ 10 | --name kafka_server_container \ 11 | --link zookeeper_server_container:zk \ 12 | --volumes-from api_server_data_logs_container \ 13 | -p 9092:9092 \ 14 | -v /var/run/docker.sock:/var/run/docker.sock \ 15 | ${REGISTRY_PREFIX}kafka_image 16 | 17 | -------------------------------------------------------------------------------- /dist/kafka/stop-kafka-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping kafka ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=kafka_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/memcache/start-memcache-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting memcache ---" 9 | docker run --name memcache_server_container -d ${REGISTRY_PREFIX}memcache_image 10 | 11 | -------------------------------------------------------------------------------- /dist/memcache/stop-memcache-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping memcache ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=memcache_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/movie_recommender_demo/create_recommender_demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | CLIENT=movielens 9 | MODELS=( "matrix_factorization" "semantic_vectors" "word2vec" "item_similarity") 10 | YOUR_DATA_DIR=${STARTUP_DIR}/../your_data 11 | 12 | echo "restarting all containers to get clean state..." 13 | ${STARTUP_DIR}/../stop-all 14 | ${STARTUP_DIR}/start-all-for-models 15 | 16 | ${STARTUP_DIR}/download_and_create_data.sh $CLIENT 17 | 18 | for model in "${MODELS[@]}" 19 | do 20 | echo "Creating model $model for client $CLIENT" 21 | ${YOUR_DATA_DIR}/create_models.sh ${CLIENT} ${model} 22 | ${YOUR_DATA_DIR}/activate_models.sh ${CLIENT} ${model} 23 | done 24 | 25 | ${STARTUP_DIR}/../start-all 26 | 27 | -------------------------------------------------------------------------------- /dist/movie_recommender_demo/create_sample_data: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | SELDON_DATA_DIR=${STARTUP_DIR}/seldon_data 9 | 10 | SAMPLE_SEED="MYSEED" 11 | SAMPLE_PERCENT=1 12 | 13 | if [ ! -f "${SELDON_DATA_DIR}/movielens_actions.csv.full" ];then 14 | cp -v "${SELDON_DATA_DIR}/movielens_actions.csv" "${SELDON_DATA_DIR}/movielens_actions.csv.full" 15 | fi 16 | 17 | if [ ! -f "${SELDON_DATA_DIR}/movielens_actions.csv.sample" ];then 18 | echo "creating ${SELDON_DATA_DIR}/movielens_actions.csv.sample" 19 | head -1 "${SELDON_DATA_DIR}/movielens_actions.csv.full" > "${SELDON_DATA_DIR}/movielens_actions.csv.sample" 20 | tail -n+2 "${SELDON_DATA_DIR}/movielens_actions.csv.full" | docker run \ 21 | --rm \ 22 | -i -a stdin -a stdout -a stderr \ 23 | --name="movielens_sample_data" \ 24 | seldon-tools /seldon-tools/scripts/import/sample.py --sample-percent ${SAMPLE_PERCENT} --random-seed ${SAMPLE_SEED} >> "${SELDON_DATA_DIR}/movielens_actions.csv.sample" 25 | fi 26 | 27 | if [ -r "${SELDON_DATA_DIR}/movielens_actions.csv.sample" ]; then 28 | echo "using sample" 29 | docker run --rm -i -t -v ${SELDON_DATA_DIR}:/seldon_data seldon-tools bash -c 'rm -fv /seldon_data/movielens_actions.csv' 30 | cp -v "${SELDON_DATA_DIR}/movielens_actions.csv.sample" "${SELDON_DATA_DIR}/movielens_actions.csv" 31 | fi 32 | 33 | -------------------------------------------------------------------------------- /dist/movie_recommender_demo/download_and_create_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | DIST_DIR=${STARTUP_DIR}/.. 8 | [ -f ${DIST_DIR}/run_settings ] && source ${DIST_DIR}/run_settings 9 | 10 | if [[ $# < 1 ]]; then 11 | echo "Need " 12 | exit 1 13 | fi 14 | 15 | CLIENT=$1 16 | YOUR_DATA_DIR=${STARTUP_DIR}/../your_data 17 | 18 | mkdir -p raw_data 19 | mkdir -p seldon_data 20 | 21 | # download data and transform and combine data sources 22 | docker run --name="movielens_data_transform" -it --rm -v ${STARTUP_DIR}/seldon_data:/movielens/seldon -v ${STARTUP_DIR}/raw_data:/movielens/data movielens_data_transform /movielens/scripts/run.sh 23 | 24 | if [ "${CREATE_MOVIELENS_SAMPLE_DATA:-}" == "true" ]; then 25 | echo "-- Creating Movelens Sample Data --" 26 | ${STARTUP_DIR}/create_sample_data 27 | fi 28 | 29 | clear_actions() { 30 | echo "-- clearing actions for ${CLIENT} ---" 31 | ${YOUR_DATA_DIR}/clear_actions.sh ${CLIENT} 32 | } 33 | 34 | add_item_attributes() { 35 | echo "-- adding item attributes for ${CLIENT} --" 36 | local SCHEMA_FILE=movielens_items.json 37 | cp -v ${STARTUP_DIR}/seldon_data/${SCHEMA_FILE} ${YOUR_DATA_DIR}/schema/${SCHEMA_FILE} 38 | ${YOUR_DATA_DIR}/load_schema.sh ${CLIENT} ${SCHEMA_FILE} 39 | } 40 | 41 | import_items() { 42 | echo "-- importing items for ${CLIENT}" 43 | local ITEMS_FILE=movielens_items.csv 44 | cp -v ${STARTUP_DIR}/seldon_data/${ITEMS_FILE} ${YOUR_DATA_DIR}/items_data/${ITEMS_FILE} 45 | ${YOUR_DATA_DIR}/load_items.sh ${CLIENT} ${ITEMS_FILE} 46 | } 47 | 48 | import_users() { 49 | echo "-- importing users for ${CLIENT}" 50 | local USERS_FILE=movielens_users.csv 51 | cp -v ${STARTUP_DIR}/seldon_data/${USERS_FILE} ${YOUR_DATA_DIR}/users_data/${USERS_FILE} 52 | ${YOUR_DATA_DIR}/load_users.sh ${CLIENT} ${USERS_FILE} 53 | } 54 | 55 | create_user_actions() { 56 | echo "-- creating user actions for ${CLIENT}" 57 | local ACTIONS_FILE=movielens_actions.csv 58 | cp -v ${STARTUP_DIR}/seldon_data/${ACTIONS_FILE} ${YOUR_DATA_DIR}/actions_data/${ACTIONS_FILE} 59 | ${YOUR_DATA_DIR}/load_actions.sh ${CLIENT} ${ACTIONS_FILE} 60 | } 61 | 62 | clear_actions 63 | add_item_attributes 64 | import_items 65 | import_users 66 | create_user_actions 67 | 68 | -------------------------------------------------------------------------------- /dist/movie_recommender_demo/start-all-for-models: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | export USE_LOCAL_LOGS_DIR=${USE_LOCAL_LOGS_DIR:-} 9 | export USE_LOCAL_MYSQL_DATA_DIR=${USE_LOCAL_MYSQL_DATA_DIR-true} 10 | export USE_LOCAL_MODELS_DIR=${USE_LOCAL_MODELS_DIR-true} 11 | 12 | export REGISTRY_PREFIX= 13 | if [[ ${USE_LOCAL_LOGS_DIR} == "true" ]];then 14 | ${STARTUP_DIR}/../data-logs/start-data-logs ${STARTUP_DIR}/../local-logs 15 | else 16 | ${STARTUP_DIR}/../data-logs/start-data-logs 17 | fi 18 | 19 | if [[ ${USE_LOCAL_MODELS_DIR} == "true" ]];then 20 | ${STARTUP_DIR}/../seldon-models/start-seldon-models ${STARTUP_DIR}/../local-models 21 | else 22 | ${STARTUP_DIR}/../seldon-models/start-seldon-models 23 | fi 24 | 25 | if [[ ! ${USE_LOCAL_MYSQL_DATA_DIR} == "true" ]];then 26 | ${STARTUP_DIR}/../mysql_data/start-mysql-data 27 | fi 28 | 29 | ${STARTUP_DIR}/../zookeeper/start-zookeeper-server 30 | ${STARTUP_DIR}/../consul/start-consul 31 | ${STARTUP_DIR}/../mysql/start-mysql-server 32 | ${STARTUP_DIR}/../spark/start-spark-offline-server 33 | 34 | -------------------------------------------------------------------------------- /dist/mysql/setup_local_mysql_data: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | DIST_DIR=${STARTUP_DIR}/.. 8 | 9 | recreate_local_mysql_data() { 10 | rm -rfv local_mysql_data 11 | mkdir local_mysql_data 12 | tar xf resources/mysql_data.tar.gz --strip-components=1 -C local_mysql_data 13 | } 14 | 15 | cd ${DIST_DIR} 16 | if [ ! -d "./local_mysql_data" ]; then 17 | echo "-- re-building local_mysql_data --" 18 | recreate_local_mysql_data 19 | else 20 | echo "-- existing local_mysql_data --" 21 | fi 22 | -------------------------------------------------------------------------------- /dist/mysql/start-mysql-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | DIST_DIR=${STARTUP_DIR}/.. 8 | 9 | echo "--- starting mysql ---" 10 | 11 | OPT_VOLUMES_FROM= 12 | OPT_V= 13 | 14 | if [[ ! ${USE_LOCAL_MYSQL_DATA_DIR} == "true" ]];then 15 | OPT_VOLUMES_FROM="--volumes-from mysql_data" 16 | else 17 | ${STARTUP_DIR}/setup_local_mysql_data 18 | OPT_V="-v ${DIST_DIR}/local_mysql_data:/mysql_data" 19 | fi 20 | 21 | docker run \ 22 | --name mysql_server_container \ 23 | ${OPT_VOLUMES_FROM} \ 24 | --volumes-from seldon-models \ 25 | ${OPT_V} \ 26 | -e MYSQL_ROOT_PASSWORD=mypass \ 27 | -d \ 28 | --entrypoint="/bin/sh" \ 29 | ${REGISTRY_PREFIX}mysql_image -c 'mysqld --user=root --datadir=/mysql_data' 30 | 31 | WAIT_SECS=12 32 | echo waiting ${WAIT_SECS} secs... 33 | sleep ${WAIT_SECS} 34 | 35 | -------------------------------------------------------------------------------- /dist/mysql/stop-mysql-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping mysql ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=mysql_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/mysql_data/start-mysql-data: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting mysql ---" 9 | 10 | docker run \ 11 | --name mysql_data \ 12 | -d \ 13 | ${REGISTRY_PREFIX}mysql_data nginx -g 'daemon off;' 14 | 15 | WAIT_SECS=2 16 | echo waiting ${WAIT_SECS} secs... 17 | sleep ${WAIT_SECS} 18 | 19 | -------------------------------------------------------------------------------- /dist/mysql_data/stop-mysql-data: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping mysql data container---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=mysql_data 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | docker stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/seldon-models/setup-local-models: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | DIST_DIR=${STARTUP_DIR}/.. 8 | 9 | recreate_local_models() { 10 | mkdir -p local-models 11 | } 12 | 13 | cd ${DIST_DIR} 14 | if [ ! -d "./local-models" ]; then 15 | echo "-- re-building local-models --" 16 | recreate_local_models 17 | else 18 | echo "-- existing local-models --" 19 | fi 20 | -------------------------------------------------------------------------------- /dist/seldon-models/start-seldon-models: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting seldon models data container ---" 9 | 10 | if [[ $# > 0 ]]; then 11 | LOCAL_MODELS_DIR=$1 12 | ${STARTUP_DIR}/setup-local-models 13 | docker run --name seldon-models -d -v ${LOCAL_MODELS_DIR}:/seldon-models ${REGISTRY_PREFIX}seldon-models 14 | else 15 | docker run --name seldon-models -d ${REGISTRY_PREFIX}seldon-models 16 | fi 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /dist/seldon-models/stop-seldon-models: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping seldon models data container ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=seldon-models 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/semvec/create-movielens-sv: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | docker run --name="testing_semvec" --rm --volumes-from seldon-models --link mysql_server_container:mysql_server --link consul:consul ${REGISTRY_PREFIX}semvec_training bash -c "/scripts/run-training.sh movielens 30000 description /seldon-models" 9 | -------------------------------------------------------------------------------- /dist/setup-local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export USE_LOCAL_LOGS_DIR=true 3 | export USE_LOCAL_MODELS_DIR=true 4 | export REGISTRY_PREFIX= 5 | export USE_LOCAL_MYSQL_DATA_DIR= 6 | -------------------------------------------------------------------------------- /dist/spark/jobs/run-movielens-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | docker exec -it spark_offline_server_container bash -c "/spark-jobs/cluster-users-by-taxomonmy.py -start-day 1 -num-days 1 -client movielens -min-cluster-size 0 -delta 0.1" 7 | 8 | 9 | -------------------------------------------------------------------------------- /dist/spark/jobs/run-movielens-item-similarity.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | docker exec -it spark_offline_server_container bash -c "/spark-jobs/item-similarity.py -start-day 1 -num-days 1 -client movielens -min-users-per-item 0 -min-items-per-user 0 -threshold 0.001" 7 | 8 | 9 | -------------------------------------------------------------------------------- /dist/spark/jobs/run-movielens-topic-model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | docker exec -it spark_offline_server_container bash -c "/spark-jobs/topic-modeling-create-session-tags.py -client movielens -start-day 1 -num-days 1 -tag-attr-id 20 -table text" 7 | docker run --volumes-from seldon-models ${REGISTRY_PREFIX}vw_topic_model /scripts/run.sh movielens local /seldon-models/movielens/user_tag_count/1 /seldon-models/movielens/topics/1 8 | 9 | -------------------------------------------------------------------------------- /dist/spark/start-spark-offline-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | DIST_DIR=${STARTUP_DIR}/.. 8 | [ -f ${DIST_DIR}/run_settings ] && source ${DIST_DIR}/run_settings 9 | 10 | RUN_START_SPARK_OFFLINE_DOCKER_OPTS=${RUN_START_SPARK_OFFLINE_DOCKER_OPTS:-} 11 | 12 | echo "--- starting spark offline ---" 13 | docker run -d \ 14 | --name spark_offline_server_container \ 15 | --link mysql_server_container:mysql_server \ 16 | --link consul:consul \ 17 | --link zookeeper_server_container:zookeeper_server \ 18 | --volumes-from api_server_data_logs_container \ 19 | --volumes-from seldon-models \ 20 | ${RUN_START_SPARK_OFFLINE_DOCKER_OPTS} \ 21 | ${REGISTRY_PREFIX}spark_image /bin/bash -c '/startup-scripts/run-keep-alive' 22 | 23 | -------------------------------------------------------------------------------- /dist/spark/start-spark-streaming-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | PROJDIR=${STARTUP_DIR}/../.. 8 | 9 | echo "--- starting spark streaming ---" 10 | docker run -d \ 11 | --name spark_streaming_server_container \ 12 | --link mysql_server_container:mysql_server \ 13 | --link zookeeper_server_container:zookeeper_server \ 14 | ${REGISTRY_PREFIX}spark_image /bin/bash -c '/startup-scripts/run-streaming-job' 15 | 16 | -------------------------------------------------------------------------------- /dist/spark/stop-spark-offline-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping spark offline ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=spark_offline_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/spark/stop-spark-streaming-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping spark streaming ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=spark_streaming_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/start-all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | DIST_DIR=${STARTUP_DIR} 8 | [ -f ${DIST_DIR}/run_settings ] && source ${DIST_DIR}/run_settings 9 | 10 | # set some defaults 11 | export USE_LOCAL_LOGS_DIR=${USE_LOCAL_LOGS_DIR:-} 12 | export USE_LOCAL_MYSQL_DATA_DIR=${USE_LOCAL_MYSQL_DATA_DIR-true} 13 | export USE_LOCAL_MODELS_DIR=${USE_LOCAL_MODELS_DIR-true} 14 | export REGISTRY_PREFIX= 15 | echo "*** startup env ***" 16 | echo "[USE_LOCAL_LOGS_DIR=${USE_LOCAL_LOGS_DIR}]" 17 | echo "[USE_LOCAL_MYSQL_DATA_DIR=${USE_LOCAL_MYSQL_DATA_DIR}]" 18 | echo "[USE_LOCAL_MODELS_DIR=${USE_LOCAL_MODELS_DIR}]" 19 | echo "[REGISTRY_PREFIX=${REGISTRY_PREFIX}]" 20 | echo "*******************" 21 | 22 | ${STARTUP_DIR}/stop-all 23 | 24 | if [[ ${USE_LOCAL_LOGS_DIR} == "true" ]];then 25 | ${STARTUP_DIR}/data-logs/start-data-logs ${STARTUP_DIR}/local-logs 26 | else 27 | ${STARTUP_DIR}/data-logs/start-data-logs 28 | fi 29 | 30 | if [[ ${USE_LOCAL_MODELS_DIR} == "true" ]];then 31 | ${STARTUP_DIR}/seldon-models/start-seldon-models ${STARTUP_DIR}/local-models 32 | else 33 | ${STARTUP_DIR}/seldon-models/start-seldon-models 34 | fi 35 | 36 | if [[ ! ${USE_LOCAL_MYSQL_DATA_DIR} == "true" ]];then 37 | ${STARTUP_DIR}/mysql_data/start-mysql-data 38 | fi 39 | 40 | ${STARTUP_DIR}/memcache/start-memcache-server 41 | ${STARTUP_DIR}/zookeeper/start-zookeeper-server 42 | ${STARTUP_DIR}/consul/start-consul 43 | #${STARTUP_DIR}/kafka/start-kafka-server 44 | ${STARTUP_DIR}/mysql/start-mysql-server 45 | ${STARTUP_DIR}/td-agent/start-td-agent-server 46 | #${STARTUP_DIR}/spark/start-spark-streaming-server 47 | ${STARTUP_DIR}/spark/start-spark-offline-server 48 | ${STARTUP_DIR}/api-server/start-api-server-server 49 | 50 | -------------------------------------------------------------------------------- /dist/start-all-recsvm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | export USE_LOCAL_LOGS_DIR=true 9 | export USE_LOCAL_MYSQL_DATA_DIR= 10 | 11 | mkdir -p local-logs 12 | 13 | ${STARTUP_DIR}/start-all 14 | 15 | -------------------------------------------------------------------------------- /dist/start-all-with-local-logs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | export USE_LOCAL_LOGS_DIR=true 9 | 10 | mkdir -p local-logs 11 | 12 | ${STARTUP_DIR}/start-all 13 | 14 | -------------------------------------------------------------------------------- /dist/stop-all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | ${STARTUP_DIR}/api-server/stop-api-server-server 9 | ${STARTUP_DIR}/spark/stop-spark-offline-server 10 | ${STARTUP_DIR}/spark/stop-spark-streaming-server 11 | ${STARTUP_DIR}/td-agent/stop-td-agent-server 12 | ${STARTUP_DIR}/mysql/stop-mysql-server 13 | #${STARTUP_DIR}/kafka/stop-kafka-server 14 | ${STARTUP_DIR}/consul/stop-consul 15 | ${STARTUP_DIR}/zookeeper/stop-zookeeper-server 16 | ${STARTUP_DIR}/memcache/stop-memcache-server 17 | ${STARTUP_DIR}/data-logs/stop-data-logs 18 | ${STARTUP_DIR}/seldon-models/stop-seldon-models 19 | ${STARTUP_DIR}/mysql_data/stop-mysql-data 20 | 21 | 22 | -------------------------------------------------------------------------------- /dist/td-agent/start-td-agent-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- starting td-agent ---" 9 | 10 | docker exec -i -t api_server_data_logs_container bash -c 'mkdir -p /data-logs/fluentd' 11 | 12 | docker run \ 13 | --name="td_agent_server_container" \ 14 | --volumes-from api_server_data_logs_container \ 15 | -d \ 16 | ${REGISTRY_PREFIX}td_agent_image 17 | 18 | -------------------------------------------------------------------------------- /dist/td-agent/stop-td-agent-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping td-agent ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=td_agent_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/your_data/actions_data/example_actions.csv: -------------------------------------------------------------------------------- 1 | user_id,item_id,value,time 2 | 1,2,1,1422128735 3 | 2,2,1,1422752450 4 | 3,3,1,1422735290 5 | 4,1,1,1422792312 6 | 1,1,1,1422795111 7 | 4,3,1,1422754829 -------------------------------------------------------------------------------- /dist/your_data/activate_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 2 ]]; then 9 | echo "Need " 10 | exit 1 11 | fi 12 | 13 | CLIENT=$1 14 | MODEL=$2 15 | 16 | echo "Activating ${MODEL}" 17 | 18 | set_zk_node() { 19 | local ZK_NODE_PATH="$1" 20 | local ZK_NODE_VALUE="$2" 21 | 22 | docker run --rm -i -t --name seldon_tools --link zookeeper_server_container:zk seldon-tools /seldon-tools/scripts/zookeeper/zkcmd.py --zk-hosts zk --cmd set --cmd-args "${ZK_NODE_PATH}" "${ZK_NODE_VALUE}" 23 | } 24 | 25 | do_matrix_factorization() { 26 | set_zk_node '/config/mf' "${CLIENT}" 27 | } 28 | 29 | do_item_similarity() { 30 | docker run --name="activate_item_similarity_model" -it --rm --volumes-from seldon-models --link mysql_server_container:mysql_server --link consul:consul seldon-tools bash -c "/seldon-tools/scripts/models/item-similarity/activate.sh ${CLIENT}" 31 | } 32 | 33 | do_semantic_vectors() { 34 | set_zk_node '/config/svtext' "${CLIENT}" 35 | } 36 | 37 | do_word2vec() { 38 | set_zk_node '/config/word2vec' "${CLIENT}" 39 | } 40 | 41 | case $MODEL in 42 | matrix_factorization) 43 | do_matrix_factorization 44 | ;; 45 | item_similarity) 46 | do_item_similarity 47 | ;; 48 | semantic_vectors) 49 | do_semantic_vectors 50 | ;; 51 | word2vec) 52 | do_word2vec 53 | ;; 54 | *) 55 | echo "ignoring unkown model[$MODEL]" 56 | ;; 57 | esac 58 | 59 | -------------------------------------------------------------------------------- /dist/your_data/clear_actions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 1 ]]; then 9 | echo "Need " 10 | exit 1 11 | fi 12 | 13 | CLIENT=$1 14 | 15 | #clear actions 16 | docker run --name="your_data_clear_actions" -it --rm --link mysql_server_container:mysql_server --link consul:consul seldon-tools /seldon-tools/scripts/import/clear_actions.sh ${CLIENT} 17 | -------------------------------------------------------------------------------- /dist/your_data/create_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 2 ]]; then 9 | echo "Need " 10 | exit 1 11 | fi 12 | 13 | CLIENT=$1 14 | MODEL=$2 15 | 16 | set_zk_node() { 17 | local ZK_NODE_PATH="$1" 18 | local ZK_NODE_VALUE="$2" 19 | 20 | docker run --rm -i -t --name seldon_tools --link zookeeper_server_container:zk seldon-tools /seldon-tools/scripts/zookeeper/zkcmd.py --zk-hosts zk --cmd set --cmd-args "${ZK_NODE_PATH}" "${ZK_NODE_VALUE}" 21 | } 22 | 23 | do_matrix_factorization() { 24 | set_zk_node "/all_clients/${CLIENT}/offline/matrix-factorization" \ 25 | '{"activate":true,"alpha":1,"days":1,"inputPath":"/seldon-models","iterations":5,"lambda":0.1,"local":true,"outputPath":"/seldon-models","rank":30,"startDay":1}' 26 | 27 | docker exec -it spark_offline_server_container bash -c "/spark-jobs/matrix-factorization.sh ${CLIENT}" 28 | } 29 | 30 | do_item_similarity() { 31 | set_zk_node "/all_clients/${CLIENT}/offline/similar-items" \ 32 | '{"inputPath":"/seldon-models","outputPath":"/seldon-models","days":1,"sample":0.25,"limit":100,"dimsum_threshold":0.5}' 33 | 34 | docker exec -it spark_offline_server_container bash -c "/spark-jobs/item-similarity.sh ${CLIENT}" 35 | 36 | docker run --name="upload_item_similarity_model" -it --rm --volumes-from seldon-models --link mysql_server_container:mysql_server --link consul:consul seldon-tools bash -c "/seldon-tools/scripts/models/item-similarity/create_sql_and_upload.sh ${CLIENT}" 37 | } 38 | 39 | do_semantic_vectors() { 40 | MYSQL_HOST=$(docker inspect --format '{{ .NetworkSettings.IPAddress }}' 'mysql_server_container') 41 | MYSQL_ROOT_PASSWORD=mypass 42 | JOB_CONFIG='{"inputPath":"/seldon-models","outputPath":"/seldon-models","startDay":1,"days":1,"activate":true,"itemType":1,"itemLimit":10000,"tagAttrs":"movielens_tags_full","jdbc":"jdbc:mysql://'${MYSQL_HOST}':3306/'${CLIENT}'?user=root&password='${MYSQL_ROOT_PASSWORD}'&characterEncoding=utf8"}' 43 | set_zk_node "/all_clients/${CLIENT}/offline/semvec" "${JOB_CONFIG}" 44 | 45 | docker run --rm -i -t \ 46 | --volumes-from seldon-models \ 47 | --link zookeeper_server_container:zk \ 48 | seldonio/semantic-vectors-for-seldon bash -c "./semvec/semantic-vectors.py --client ${CLIENT} --zookeeper zk:2181" 49 | } 50 | 51 | do_word2vec() { 52 | set_zk_node "all_clients/${CLIENT}/offline/sessionitems" \ 53 | '{"inputPath":"/seldon-models","outputPath":"/seldon-models","startDay":1,"days":1,"maxIntraSessionGapSecs":-1,"minActionsPerUser":0,"maxActionsPerUser":100000}' 54 | 55 | docker exec -it spark_offline_server_container bash -c "/spark-jobs/session-items.sh ${CLIENT}" 56 | 57 | set_zk_node "all_clients/${CLIENT}/offline/word2vec" \ 58 | '{"inputPath":"/seldon-models","outputPath":"/seldon-models","activate":true,"startDay":1,"days":1,"activate":true,"minWordCount":50,"vectorSize":200}' 59 | 60 | docker exec -it spark_offline_server_container bash -c "/spark-jobs/word2vec.sh ${CLIENT}" 61 | } 62 | 63 | case $MODEL in 64 | matrix_factorization) 65 | do_matrix_factorization 66 | ;; 67 | item_similarity) 68 | do_item_similarity 69 | ;; 70 | semantic_vectors) 71 | do_semantic_vectors 72 | ;; 73 | word2vec) 74 | do_word2vec 75 | ;; 76 | *) 77 | echo "ignoring unkown model[$MODEL]" 78 | ;; 79 | esac 80 | 81 | -------------------------------------------------------------------------------- /dist/your_data/items_data/example_items.csv: -------------------------------------------------------------------------------- 1 | id,name,title,artist,genre,price,is_compilation,sales_count,tags 2 | 1,"tune1","tune1","artist1","pop",10,0,1,"pop,fast,dance,female,american" 3 | 2,"tune2","tune2","artist2","rock",20,1,20,"rock,country,american,male,guitar" 4 | 3,"tune3","tune3","artist1","rock",10,1,30,"rock,heavy metal,sad,male" -------------------------------------------------------------------------------- /dist/your_data/load_actions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 2 ]]; then 9 | echo "Need and " 10 | exit 1 11 | fi 12 | 13 | CLIENT=$1 14 | ACTIONS_FILE=$2 15 | 16 | # create user actions (movie view history) 17 | docker run --name="your_data_load_actions" -it --rm -v ${STARTUP_DIR}/actions_data:/your_data/actions_data --volumes-from seldon-models --link mysql_server_container:mysql_server --link consul:consul seldon-tools /seldon-tools/scripts/import/create_actions_json.sh ${CLIENT} /your_data/actions_data/${ACTIONS_FILE} /seldon-models/${CLIENT}/actions/1/actions.json 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /dist/your_data/load_items.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 2 ]]; then 9 | echo "Need and " 10 | exit 1 11 | fi 12 | 13 | CLIENT=$1 14 | ITEMS_FILE=$2 15 | docker run --name="your_data_load_items" -it --rm -v ${STARTUP_DIR}/items_data:/your_data/items_data --link mysql_server_container:mysql_server --link consul:consul seldon-tools /seldon-tools/scripts/import/add_items.sh ${CLIENT} /your_data/items_data/${ITEMS_FILE} 16 | 17 | -------------------------------------------------------------------------------- /dist/your_data/load_schema.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 2 ]]; then 9 | echo "Need and " 10 | exit 1 11 | fi 12 | 13 | CLIENT=$1 14 | SCHEMA_FILE=$2 15 | 16 | # import item attributes into database 17 | docker run --name="your_data_create_item_attrs" -it --rm -v ${STARTUP_DIR}/schema:/your_data/schema --link mysql_server_container:mysql_server --link consul:consul seldon-tools /seldon-tools/scripts/import/add_attr_schema.sh ${CLIENT} /your_data/schema/${SCHEMA_FILE} 18 | 19 | -------------------------------------------------------------------------------- /dist/your_data/load_users.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 2 ]]; then 9 | echo "Need and " 10 | exit 1 11 | fi 12 | 13 | CLIENT=$1 14 | USERS_FILE=$2 15 | 16 | docker run --name="your_data_load_users" -it --rm -v ${STARTUP_DIR}/users_data:/your_data/users_data --link mysql_server_container:mysql_server --link consul:consul seldon-tools /seldon-tools/scripts/import/add_users.sh ${CLIENT} /your_data/users_data/${USERS_FILE} 17 | 18 | -------------------------------------------------------------------------------- /dist/your_data/schema/example_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "types": [{ 3 | "type_id": 1, 4 | "type_name": "music", 5 | "type_attrs": [ 6 | {"name":"title","value_type":"string"}, 7 | {"name":"artist","value_type":"string"}, 8 | {"name":"genre","value_type":["pop","rock","rap"]}, 9 | {"name":"price","value_type":"double"}, 10 | {"name":"is_compilation","value_type":"boolean"}, 11 | {"name":"sales_count", "value_type":"int"}, 12 | {"name":"tags","value_type":"text"} 13 | ] 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /dist/your_data/users_data/example_users.csv: -------------------------------------------------------------------------------- 1 | id,username 2 | 1,phil 3 | 2,clive 4 | 3,gurminder 5 | 4,alex -------------------------------------------------------------------------------- /dist/zookeeper/setup-movielens.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | #setup core nodes 7 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /clients blank")' 8 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /movielens blank")' 9 | 10 | #setup semvec 11 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /clients/svtext movielens")' 12 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /movielens/svtext local://seldon-models/movielens_demo/semvec")' 13 | 14 | #setup word2vec 15 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /clients/word2vec movielens")' 16 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /movielens/word2vec local://seldon-models/movielens_demo/word2vec")' 17 | 18 | #setup mf 19 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /clients/mf movielens")' 20 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /movielens/mf local://seldon-models/movielens_demo/mf")' 21 | 22 | #setup topics 23 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /clients/topics movielens")' 24 | docker exec zookeeper_server_container bash -c '/opt/zookeeper/bin/zkCli.sh < <(echo "create /movielens/topics local://seldon-models/movielens_demo/topics")' 25 | -------------------------------------------------------------------------------- /dist/zookeeper/start-zookeeper-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | DIST_DIR=${STARTUP_DIR}/.. 8 | 9 | echo "--- starting zookeeper ---" 10 | #docker run --name zookeeper_server_container -d -p 2181:2181 -p 2888:2888 -p 3888:3888 ${REGISTRY_PREFIX}zookeeper_image 11 | 12 | docker run \ 13 | -d \ 14 | --name zookeeper_server_container \ 15 | -v ${DIST_DIR}/local_zookeeper_data:/tmp/zookeeper \ 16 | ${REGISTRY_PREFIX}zookeeper_image 17 | 18 | echo "waiting 2 secs" 19 | sleep 2 20 | echo "setup movielens zookeeper settings" 21 | cat ${STARTUP_DIR}/zoo-movielens.cfg | xargs -I {} docker exec zookeeper_server_container bash -c "python /zookeeper/scripts/update_zk.py < <(echo {})" 22 | if [ -f ${STARTUP_DIR}/"zoo.cfg" ]; 23 | then 24 | echo "setup custom client zookeeper settings" 25 | cat ${STARTUP_DIR}/zoo.cfg | xargs -I {} docker exec zookeeper_server_container bash -c "python /zookeeper/scripts/update_zk.py < <(echo {})" 26 | fi 27 | -------------------------------------------------------------------------------- /dist/zookeeper/stop-zookeeper-server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | echo "--- stopping zookeeper ---" 9 | 10 | DOCKER_CMD=docker 11 | CONTAINER_ID=zookeeper_server_container 12 | 13 | set +o errexit 14 | IS_RUNNING=$(${DOCKER_CMD} inspect -f {{.State.Running}} ${CONTAINER_ID} 2> /dev/null) 15 | set -o errexit 16 | 17 | if [ "${IS_RUNNING}" == "true" ]; then 18 | ${DOCKER_CMD} stop ${CONTAINER_ID} 19 | fi 20 | 21 | EXITED_CONTAINER_ID=$(docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}') 22 | if [ ! -z "$EXITED_CONTAINER_ID" ]; then 23 | docker ps -a|grep ' Exited '|grep "${CONTAINER_ID}"|awk '{print $1}'|xargs docker rm 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /dist/zookeeper/zkshell: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | DOCKER_CMD=docker 9 | CONTAINER_ID=zookeeper_server_container 10 | 11 | ZOOKEEPER_HOME=/opt/zookeeper 12 | 13 | ${DOCKER_CMD} exec -i -t \ 14 | ${CONTAINER_ID} \ 15 | /opt/zookeeper/bin/zkCli.sh 16 | 17 | -------------------------------------------------------------------------------- /dist/zookeeper/zoo-movielens.cfg: -------------------------------------------------------------------------------- 1 | append,/clients/svtext,movielens 2 | set,/movielens/svtext,local://seldon-models/movielens_demo/semvec 3 | append,/clients/word2vec,movielens 4 | set,/movielens/word2vec,local://seldon-models/movielens_demo/word2vec 5 | append,/clients/mf,movielens 6 | set,/movielens/mf,local://seldon-models/movielens_demo/mf 7 | append,/clients/topics,movielens 8 | set,/movielens/topics,local://seldon-models/movielens_demo/topics 9 | -------------------------------------------------------------------------------- /full-build/build-all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o nounset 3 | set -o errexit 4 | 5 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | PROJ_DIR=${STARTUP_DIR}/.. 7 | 8 | build_images() { 9 | cd ${PROJ_DIR}/images 10 | ./build-all-images 11 | } 12 | 13 | build_imagetars() { 14 | cd ${PROJ_DIR}/vagrant/seldonvm-build-trusty/save-and-load-images 15 | make save_images 16 | } 17 | 18 | build_vmbox() { 19 | cd ${PROJ_DIR}/vagrant/seldonvm-build-trusty 20 | make build 21 | } 22 | 23 | show_build_time() { 24 | BUILD_START_TIME=$1 25 | BUILD_END_TIME=$2 26 | echo "--- all builds finished ---" 27 | echo "--- start: ${BUILD_START_TIME} ---" 28 | echo "--- end: ${BUILD_END_TIME} ---" 29 | } 30 | 31 | BUILD_START_TIME=$(date) 32 | build_images 33 | build_imagetars 34 | build_vmbox 35 | BUILD_END_TIME=$(date) 36 | 37 | show_build_time "${BUILD_START_TIME}" "${BUILD_END_TIME}" 38 | 39 | -------------------------------------------------------------------------------- /full-build/clean-all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o nounset 3 | set -o errexit 4 | 5 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | PROJ_DIR=${STARTUP_DIR}/.. 7 | 8 | clean_images() { 9 | cd ${PROJ_DIR}/images 10 | ./clean-all-images 11 | } 12 | 13 | clean_imagetars() { 14 | cd ${PROJ_DIR}/vagrant/seldonvm-build-trusty/save-and-load-images 15 | make clean 16 | } 17 | 18 | clean_vmbox() { 19 | cd ${PROJ_DIR}/vagrant/seldonvm-build-trusty 20 | make clean 21 | } 22 | 23 | clean_images 24 | clean_imagetars 25 | clean_vmbox 26 | 27 | -------------------------------------------------------------------------------- /images/README.txt: -------------------------------------------------------------------------------- 1 | Not all data to create the images is inside the hg project. Some data needs to be downloaded. 2 | 3 | mysql_data: 4 | run mysql_data/get-image-data.sh 5 | 6 | api-server_data_logs_image: 7 | run get-movielens-models.sh 8 | 9 | api_server_image: 10 | mvn build api-server and then run create-webapps 11 | 12 | spark_image: 13 | run copy-jars-to-app-dir 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /images/api_server_data_logs_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:trusty 2 | 3 | ENV HOME /root 4 | 5 | ADD apps /apps 6 | 7 | VOLUME ["/data-logs"] 8 | 9 | # Define default command. 10 | CMD ["/apps/bin/keep_alive"] 11 | 12 | -------------------------------------------------------------------------------- /images/api_server_data_logs_image/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE_NAME=api_server_data_logs_image 2 | PROJ_DIR=../.. 3 | -include $(PROJ_DIR)/build_settings 4 | 5 | dummy: 6 | @echo dummy 7 | 8 | build_image: 9 | docker build --force-rm=true -t $(IMAGE_NAME) . 10 | 11 | push_to_registry: 12 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 13 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 14 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 15 | 16 | -------------------------------------------------------------------------------- /images/api_server_data_logs_image/apps/bin/keep_alive: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | while true; do 9 | sleep 1 10 | done 11 | -------------------------------------------------------------------------------- /images/api_server_demo_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM api_server_image 2 | 3 | ADD webapps/api-spec /webapps/api-spec 4 | ADD webapps/swagger /webapps/swagger 5 | ADD webapps/movie-demo webapps/movie-demo 6 | 7 | ADD startup.sh /apps/api-server/startup.sh 8 | ADD add_js_embedly_prefix.sh /apps/api-server/add_js_embedly_prefix.sh 9 | 10 | -------------------------------------------------------------------------------- /images/api_server_demo_image/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE=api_server_demo_image 2 | SELDON_VERSION=0.2 3 | EMBEDLY_KEY= 4 | 5 | IMAGE_NAME=$(IMAGE) 6 | PROJ_DIR=../.. 7 | -include $(PROJ_DIR)/build_settings 8 | 9 | MOVIE_DEMO_FRONTEND_TAG=tags/v1.3.1 10 | 11 | dummy: 12 | @echo dummy 13 | 14 | swagger-codegen: 15 | git clone https://github.com/swagger-api/swagger-codegen 16 | cd swagger-codegen ; git checkout tags/2.0.17 ; npm install js-yaml ; cd .. 17 | 18 | webapps/api-spec/seldon.json:swagger-codegen 19 | mkdir -p webapps/api-spec 20 | node swagger-codegen/bin/yml2swagger.js api-spec/seldon webapps/api-spec 21 | 22 | webapps/movie-demo: 23 | git clone https://github.com/SeldonIO/movie-demo-frontend 24 | cd movie-demo-frontend ; git checkout $(MOVIE_DEMO_FRONTEND_TAG) ; npm install ; bower install ; cd .. 25 | cd movie-demo-frontend ; grunt build ; cd .. 26 | mkdir -p webapps/movie-demo 27 | cp -R movie-demo-frontend/dist/* webapps/movie-demo 28 | 29 | webapps/swagger: 30 | mkdir -p webapps 31 | cp -R swagger/ webapps/swagger 32 | 33 | webapps: webapps/swagger webapps/movie-demo webapps/api-spec/seldon.json 34 | 35 | build_image: webapps 36 | docker build --force-rm=true -t ${IMAGE} . 37 | 38 | push_to_registry: 39 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 40 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 41 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 42 | 43 | clean: 44 | @rm -rf webapps 45 | @rm -rf swagger-codegen 46 | @rm -rf movie-demo-frontend 47 | -------------------------------------------------------------------------------- /images/api_server_demo_image/add_js_embedly_prefix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | export EMBEDLY_KEY=${EMBEDLY_KEY:-} 7 | 8 | JS_SCRIPT=$(ls /webapps/movie-demo/scripts/scripts.*.js) 9 | 10 | if [ -z "${EMBEDLY_KEY}" ]; then 11 | sed -i -e 's///g' ${JS_SCRIPT} 12 | else 13 | sed -i -e 's//http:\/\/i.embed.ly\/1\/display\/resize?key='${EMBEDLY_KEY}'\&url=/g' ${JS_SCRIPT} 14 | fi 15 | 16 | -------------------------------------------------------------------------------- /images/api_server_demo_image/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | mkdir -p /data-logs/seldon-server 7 | ln -snv /data-logs/seldon-server /apps/tomcat7/logs/ 8 | 9 | /apps/api-server/add_js_embedly_prefix.sh 10 | 11 | /apps/tomcat7/bin/catalina.sh run 12 | 13 | -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/css/reset.css: -------------------------------------------------------------------------------- 1 | /* http://meyerweb.com/eric/tools/css/reset/ v2.0 | 20110126 */ 2 | html, 3 | body, 4 | div, 5 | span, 6 | applet, 7 | object, 8 | iframe, 9 | h1, 10 | h2, 11 | h3, 12 | h4, 13 | h5, 14 | h6, 15 | p, 16 | blockquote, 17 | pre, 18 | a, 19 | abbr, 20 | acronym, 21 | address, 22 | big, 23 | cite, 24 | code, 25 | del, 26 | dfn, 27 | em, 28 | img, 29 | ins, 30 | kbd, 31 | q, 32 | s, 33 | samp, 34 | small, 35 | strike, 36 | strong, 37 | sub, 38 | sup, 39 | tt, 40 | var, 41 | b, 42 | u, 43 | i, 44 | center, 45 | dl, 46 | dt, 47 | dd, 48 | ol, 49 | ul, 50 | li, 51 | fieldset, 52 | form, 53 | label, 54 | legend, 55 | table, 56 | caption, 57 | tbody, 58 | tfoot, 59 | thead, 60 | tr, 61 | th, 62 | td, 63 | article, 64 | aside, 65 | canvas, 66 | details, 67 | embed, 68 | figure, 69 | figcaption, 70 | footer, 71 | header, 72 | hgroup, 73 | menu, 74 | nav, 75 | output, 76 | ruby, 77 | section, 78 | summary, 79 | time, 80 | mark, 81 | audio, 82 | video { 83 | margin: 0; 84 | padding: 0; 85 | border: 0; 86 | font-size: 100%; 87 | font: inherit; 88 | vertical-align: baseline; 89 | } 90 | /* HTML5 display-role reset for older browsers */ 91 | article, 92 | aside, 93 | details, 94 | figcaption, 95 | figure, 96 | footer, 97 | header, 98 | hgroup, 99 | menu, 100 | nav, 101 | section { 102 | display: block; 103 | } 104 | body { 105 | line-height: 1; 106 | } 107 | ol, 108 | ul { 109 | list-style: none; 110 | } 111 | blockquote, 112 | q { 113 | quotes: none; 114 | } 115 | blockquote:before, 116 | blockquote:after, 117 | q:before, 118 | q:after { 119 | content: ''; 120 | content: none; 121 | } 122 | table { 123 | border-collapse: collapse; 124 | border-spacing: 0; 125 | } 126 | -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/images/explorer_icons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldonIO/seldon-vm/f49e2cea3cb6919ee058265c4524df27555e848f/images/api_server_demo_image/swagger/images/explorer_icons.png -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/images/logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldonIO/seldon-vm/f49e2cea3cb6919ee058265c4524df27555e848f/images/api_server_demo_image/swagger/images/logo_small.png -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/images/pet_store_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldonIO/seldon-vm/f49e2cea3cb6919ee058265c4524df27555e848f/images/api_server_demo_image/swagger/images/pet_store_api.png -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/images/throbber.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldonIO/seldon-vm/f49e2cea3cb6919ee058265c4524df27555e848f/images/api_server_demo_image/swagger/images/throbber.gif -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/images/wordnik_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldonIO/seldon-vm/f49e2cea3cb6919ee058265c4524df27555e848f/images/api_server_demo_image/swagger/images/wordnik_api.png -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/lib/jquery.slideto.min.js: -------------------------------------------------------------------------------- 1 | (function(b){b.fn.slideto=function(a){a=b.extend({slide_duration:"slow",highlight_duration:3E3,highlight:true,highlight_color:"#FFFF99"},a);return this.each(function(){obj=b(this);b("body").animate({scrollTop:obj.offset().top},a.slide_duration,function(){a.highlight&&b.ui.version&&obj.effect("highlight",{color:a.highlight_color},a.highlight_duration)})})}})(jQuery); 2 | -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/lib/jquery.wiggle.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | jQuery Wiggle 3 | Author: WonderGroup, Jordan Thomas 4 | URL: http://labs.wondergroup.com/demos/mini-ui/index.html 5 | License: MIT (http://en.wikipedia.org/wiki/MIT_License) 6 | */ 7 | jQuery.fn.wiggle=function(o){var d={speed:50,wiggles:3,travel:5,callback:null};var o=jQuery.extend(d,o);return this.each(function(){var cache=this;var wrap=jQuery(this).wrap('
').css("position","relative");var calls=0;for(i=1;i<=o.wiggles;i++){jQuery(this).animate({left:"-="+o.travel},o.speed).animate({left:"+="+o.travel*2},o.speed*2).animate({left:"-="+o.travel},o.speed,function(){calls++;if(jQuery(cache).parent().hasClass('wiggle-wrap')){jQuery(cache).parent().replaceWith(cache);} 8 | if(calls==o.wiggles&&jQuery.isFunction(o.callback)){o.callback();}});}});}; -------------------------------------------------------------------------------- /images/api_server_demo_image/swagger/o2c.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/api_server_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tomcat7_image 2 | 3 | ENV HOME /root 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | ADD apps /apps 7 | 8 | RUN rm -rf /webapps 9 | 10 | ADD webapps /webapps 11 | ADD tomcat_env/setenv.sh /apps/tomcat7/bin/setenv.sh 12 | 13 | # Define default command. 14 | CMD /apps/tomcat7/bin/catalina.sh run 15 | 16 | -------------------------------------------------------------------------------- /images/api_server_image/Makefile: -------------------------------------------------------------------------------- 1 | SELDON_VERSION=0.2 2 | 3 | IMAGE_NAME=api_server_image 4 | PROJ_DIR=../.. 5 | -include $(PROJ_DIR)/build_settings 6 | 7 | dummy: 8 | @echo dummy 9 | 10 | API_SERVER_VERSION=$(shell cat ./__API_SERVER_VERSION__) 11 | 12 | check_version: 13 | @echo "API_SERVER_VERSION[$(API_SERVER_VERSION)]" 14 | 15 | webapps: 16 | @./create-webapps-from-github ${API_SERVER_VERSION} 17 | 18 | build_image: webapps 19 | docker build --force-rm=true -t api_server_image . 20 | docker tag -f api_server_image api_server_image:$(API_SERVER_VERSION) 21 | 22 | clean: 23 | @rm -rfv ./webapps 24 | @rm -rfv ./*.war 25 | @rm -rfv ./seldon-server 26 | 27 | push_to_registry: 28 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 29 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 30 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 31 | 32 | -------------------------------------------------------------------------------- /images/api_server_image/__API_SERVER_VERSION__: -------------------------------------------------------------------------------- 1 | v0.91.1 2 | -------------------------------------------------------------------------------- /images/api_server_image/apps/api-server/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | mkdir -p /data-logs/seldon-server 9 | ln -snv /data-logs/seldon-server /apps/tomcat7/logs/ 10 | 11 | /apps/tomcat7/bin/catalina.sh run 12 | 13 | -------------------------------------------------------------------------------- /images/api_server_image/create-webapps-from-github: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -o nounset 3 | set -o errexit 4 | ##set -o xtrace 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | CLONE_SELDON_SERVER="git clone -q https://github.com/SeldonIO/seldon-server.git" 9 | 10 | if [[ $# < 1 ]]; then 11 | echo "Needs API version" 12 | exit 1 13 | fi 14 | VERSION=$1 15 | 16 | SELDON_SERVER_TAG=tags/${VERSION} 17 | 18 | cd ${STARTUP_DIR} 19 | rm -rf seldon-server* seldon-server*.war 20 | echo 'cloning seldon-server project from github' 21 | $CLONE_SELDON_SERVER 22 | cd ${STARTUP_DIR}/seldon-server 23 | echo "using version[${SELDON_SERVER_TAG}]" 24 | git checkout ${SELDON_SERVER_TAG} &> /dev/null 25 | cd ${STARTUP_DIR}/seldon-server/server 26 | echo 'building seldon-server project' 27 | mvn package -DskipTests -q 28 | cp target/seldon-server*.war ${STARTUP_DIR}/ 29 | cd ${STARTUP_DIR} 30 | 31 | WAR_FILE=`ls seldon-server*.war` 32 | rm -rf ./webapps 33 | 34 | mkdir -p ./webapps/ROOT 35 | 36 | unzip -qx -d ./webapps/ROOT/ ${WAR_FILE} &> /dev/null 37 | 38 | # clear up 39 | rm -rf seldon-server* seldon-server*.war 40 | echo 'done' 41 | 42 | -------------------------------------------------------------------------------- /images/api_server_image/create-webapps-from-local: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 1 ]]; then 9 | echo "Need full path to warfile" 10 | exit 1 11 | fi 12 | 13 | WAR_FILE_FPATH=$1 14 | 15 | cd ${STARTUP_DIR} 16 | 17 | echo $WAR_FILE_FPATH 18 | 19 | WAR_FILE=$(basename "${WAR_FILE_FPATH}") 20 | 21 | API_SERVER_VERSION=$(echo "${WAR_FILE}"|sed -e 's/seldon-server-//' -e 's/.war$//') 22 | echo "--- API_SERVER_VERSION[${API_SERVER_VERSION}] ---" 23 | echo "${API_SERVER_VERSION}" > __API_SERVER_VERSION__ 24 | 25 | if [ ! -f "./${WAR_FILE}" ]; then 26 | cp "${WAR_FILE_FPATH}" . 27 | fi 28 | 29 | rm -rfv ./webapps 30 | 31 | mkdir -p ./webapps/ROOT 32 | 33 | unzip -x -d ./webapps/ROOT/ ${WAR_FILE} 34 | 35 | -------------------------------------------------------------------------------- /images/api_server_image/create-webapps-from-s3: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 1 ]]; then 9 | echo "Need version" 10 | exit 1 11 | fi 12 | 13 | VERSION=$1 14 | 15 | API_SERVER_VERSION=$(cat __API_SERVER_VERSION__) 16 | echo "--- API_SERVER_VERSION[${API_SERVER_VERSION}] ---" 17 | 18 | WAR_FILE_FPATH=s3://seldon-vm/${VERSION}/images/api_server_image/api-server-${API_SERVER_VERSION}.war 19 | 20 | cd ${STARTUP_DIR} 21 | 22 | echo $WAR_FILE_FPATH 23 | 24 | WAR_FILE=$(basename "${WAR_FILE_FPATH}") 25 | 26 | if [ ! -f "./${WAR_FILE}" ]; then 27 | echo "--- fetching ${WAR_FILE_FPATH} ---" 28 | aws s3 cp $WAR_FILE_FPATH . 29 | fi 30 | 31 | rm -rfv ./webapps 32 | 33 | mkdir -p ./webapps/ROOT 34 | 35 | unzip -x -d ./webapps/ROOT/ ${WAR_FILE} 36 | 37 | -------------------------------------------------------------------------------- /images/api_server_image/tomcat_env/setenv.sh: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /images/build-all-images: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o nounset 3 | set -o errexit 4 | 5 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | PROJ_DIR=${STARTUP_DIR}/.. 7 | [ -f ${PROJ_DIR}/build_settings ] && source ${PROJ_DIR}/build_settings 8 | 9 | BUILD_START_TIME=$(date) 10 | 11 | build_images() { 12 | for i in "${IMAGES_LIST[@]}"; do 13 | echo "--- building $i ---" 14 | cd "${STARTUP_DIR}/$i" 15 | make build_image SELDON_VERSION=0.2 16 | done 17 | } 18 | 19 | declare -a IMAGES_LIST=( \ 20 | "java7jre_image" \ 21 | "tomcat7_image" \ 22 | "zookeeper_image" \ 23 | "api_server_image" \ 24 | "api_server_demo_image" \ 25 | "api_server_data_logs_image" \ 26 | "seldon-models" \ 27 | "mysql_image" \ 28 | "mysql_data" \ 29 | "memcache_image" \ 30 | "td_agent_image" \ 31 | "spark_image" \ 32 | "seldon-tools" \ 33 | "consul_image" \ 34 | "movielens_data_transform" \ 35 | ) 36 | 37 | BUILD_EXCLUDED_IMAGES=${BUILD_EXCLUDED_IMAGES:-} 38 | if [ ! -z ${BUILD_EXCLUDED_IMAGES} ]; then 39 | for i in "${BUILD_EXCLUDED_IMAGES[@]}"; do 40 | echo "Removing [${i}] from build" 41 | IMAGES_LIST=( ${IMAGES_LIST[@]/$i} ) 42 | done 43 | fi 44 | 45 | if [ "${#IMAGES_LIST[@]}" -gt 0 ];then 46 | build_images 47 | fi 48 | 49 | BUILD_END_TIME=$(date) 50 | 51 | echo "--- build finished ---" 52 | echo "--- start: ${BUILD_START_TIME} ---" 53 | echo "--- end: ${BUILD_END_TIME} ---" 54 | 55 | -------------------------------------------------------------------------------- /images/clean-all-images: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o nounset 3 | set -o errexit 4 | 5 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | PROJ_DIR=${STARTUP_DIR}/.. 7 | [ -f ${PROJ_DIR}/build_settings ] && source ${PROJ_DIR}/build_settings 8 | 9 | clean_images() { 10 | for i in "${IMAGES_LIST[@]}"; do 11 | echo "--- cleaning $i ---" 12 | cd "${STARTUP_DIR}/$i" 13 | make clean 14 | done 15 | } 16 | 17 | declare -a IMAGES_LIST=( \ 18 | "mysql_image" \ 19 | "mysql_data" \ 20 | "spark_image" \ 21 | "api_server_image" \ 22 | "seldon-models" \ 23 | "api_server_demo_image" \ 24 | "movielens_data_transform" \ 25 | ) 26 | 27 | BUILD_EXCLUDED_IMAGES=${BUILD_EXCLUDED_IMAGES:-} 28 | if [ ! -z ${BUILD_EXCLUDED_IMAGES} ];then 29 | for i in "${BUILD_EXCLUDED_IMAGES[@]}"; do 30 | echo "Removing [${i}] from build" 31 | IMAGES_LIST=( ${IMAGES_LIST[@]/$i} ) 32 | done 33 | fi 34 | 35 | if [ "${#IMAGES_LIST[@]}" -gt 0 ];then 36 | clean_images 37 | fi 38 | 39 | -------------------------------------------------------------------------------- /images/consul_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM progrium/consul 2 | 3 | -------------------------------------------------------------------------------- /images/consul_image/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE_NAME=consul_image 2 | PROJ_DIR=../.. 3 | -include $(PROJ_DIR)/build_settings 4 | 5 | dummy: 6 | @echo dummy 7 | 8 | build_image: 9 | docker build --force-rm=true -t consul_image . 10 | 11 | push_to_registry: 12 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 13 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 14 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 15 | 16 | -------------------------------------------------------------------------------- /images/consul_image/config/consul.json: -------------------------------------------------------------------------------- 1 | { 2 | "client_addr": "0.0.0.0", 3 | "data_dir": "/data", 4 | "ports": { 5 | "dns": 53 6 | }, 7 | "recursor": "8.8.8.8", 8 | "ui_dir": "/ui" 9 | } 10 | -------------------------------------------------------------------------------- /images/java7jre_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:trusty 2 | 3 | ENV HOME /root 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN ( apt-get update && \ 7 | apt-get install -y openjdk-7-jre-headless && \ 8 | apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*) 9 | 10 | ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64 11 | 12 | # Define default command. 13 | CMD ["bash"] 14 | 15 | -------------------------------------------------------------------------------- /images/java7jre_image/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE_NAME=java7jre_image 2 | PROJ_DIR=../.. 3 | -include $(PROJ_DIR)/build_settings 4 | 5 | dummy: 6 | @echo dummy 7 | 8 | build_image: 9 | docker build --force-rm=true -t $(IMAGE_NAME) . 10 | 11 | create_and_run_container: 12 | docker run -t -i --rm --name="java7jre_container" $(IMAGE_NAME) bash 13 | 14 | push_to_registry: 15 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 16 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 17 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 18 | -------------------------------------------------------------------------------- /images/kafka_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java7jre_image 2 | 3 | RUN ( [ -e /usr/lib/apt/methods/https ] || { apt-get update && apt-get install -y apt-transport-https; } && \ 4 | apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 36A1D7869245C8950F966E92D8576A8BA88D21E9 && \ 5 | sh -c "echo deb https://get.docker.com/ubuntu docker main > /etc/apt/sources.list.d/docker.list" && \ 6 | apt-get update && \ 7 | apt-get install -y lxc-docker-1.3.3 && \ 8 | apt-get install -y wget && \ 9 | apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*) 10 | 11 | RUN ( wget -q http://mirror.vorboss.net/apache/kafka/0.8.1.1/kafka_2.8.0-0.8.1.1.tgz -O /tmp/kafka_2.8.0-0.8.1.1.tgz && \ 12 | tar xfz /tmp/kafka_2.8.0-0.8.1.1.tgz -C /opt && \ 13 | rm -fv /tmp/kafka_2.8.0-0.8.1.1.tgz) 14 | 15 | VOLUME ["/kafka"] 16 | 17 | ENV KAFKA_HOME /opt/kafka_2.8.0-0.8.1.1 18 | ADD start-kafka.sh /usr/bin/start-kafka.sh 19 | ADD broker-list.sh /usr/bin/broker-list.sh 20 | CMD start-kafka.sh 21 | 22 | -------------------------------------------------------------------------------- /images/kafka_image/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE_NAME=kafka_image 2 | PROJ_DIR=../.. 3 | -include $(PROJ_DIR)/build_settings 4 | 5 | dummy: 6 | @echo dummy 7 | 8 | build_image: 9 | docker build --force-rm=true -t $(IMAGE_NAME) . 10 | 11 | push_to_registry: 12 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 13 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 14 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 15 | 16 | -------------------------------------------------------------------------------- /images/kafka_image/broker-list.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONTAINERS=$(docker ps | grep 9092 | awk '{print $1}') 4 | BROKERS=$(for CONTAINER in $CONTAINERS; do docker port $CONTAINER 9092 | sed -e "s/0.0.0.0:/$HOST_IP:/g"; done) 5 | echo $BROKERS | sed -e 's/ /,/g' 6 | -------------------------------------------------------------------------------- /images/kafka_image/start-kafka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -z "$KAFKA_ADVERTISED_HOST_NAME" ]]; then 4 | export KAFKA_ADVERTISED_HOST_NAME=$(ifconfig eth0 | awk '/inet / { print $2 }' | sed -e 's/addr://') 5 | fi 6 | 7 | if [[ -z "$KAFKA_ADVERTISED_PORT" ]]; then 8 | export KAFKA_ADVERTISED_PORT=$(docker port `hostname` 9092 | sed -r "s/.*:(.*)/\1/g") 9 | fi 10 | if [[ -z "$KAFKA_BROKER_ID" ]]; then 11 | export KAFKA_BROKER_ID=$KAFKA_ADVERTISED_PORT 12 | fi 13 | if [[ -z "$KAFKA_LOG_DIRS" ]]; then 14 | export KAFKA_LOG_DIRS="/kafka/kafka-logs-$KAFKA_BROKER_ID" 15 | fi 16 | if [[ -z "$KAFKA_ZOOKEEPER_CONNECT" ]]; then 17 | export KAFKA_ZOOKEEPER_CONNECT=$(env | grep ZK.*PORT_2181_TCP= | sed -e 's|.*tcp://||' | paste -sd ,) 18 | fi 19 | 20 | if [[ -n "$KAFKA_HEAP_OPTS" ]]; then 21 | sed -r -i "s/^(export KAFKA_HEAP_OPTS)=\"(.*)\"/\1=\"$KAFKA_HEAP_OPTS\"/g" $KAFKA_HOME/bin/kafka-server-start.sh 22 | unset KAFKA_HEAP_OPTS 23 | fi 24 | 25 | for VAR in `env` 26 | do 27 | if [[ $VAR =~ ^KAFKA_ && ! $VAR =~ ^KAFKA_HOME ]]; then 28 | kafka_name=`echo "$VAR" | sed -r "s/KAFKA_(.*)=.*/\1/g" | tr '[:upper:]' '[:lower:]' | tr _ .` 29 | env_var=`echo "$VAR" | sed -r "s/(.*)=.*/\1/g"` 30 | if egrep -q "(^|^#)$kafka_name" $KAFKA_HOME/config/server.properties; then 31 | sed -r -i "s@(^|^#)($kafka_name)=(.*)@\2=${!env_var}@g" $KAFKA_HOME/config/server.properties #note that no config values may contain an '@' char 32 | else 33 | echo "$kafka_name=${!env_var}" >> $KAFKA_HOME/config/server.properties 34 | fi 35 | fi 36 | done 37 | 38 | $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties 39 | -------------------------------------------------------------------------------- /images/memcache_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:trusty 2 | 3 | ENV HOME /root 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN ( apt-get update && \ 7 | apt-get install -y memcached && \ 8 | apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*) 9 | 10 | ADD test_memcache /test_memcache 11 | 12 | # Port to expose (default: 11211) 13 | EXPOSE 11211 14 | 15 | # Default Memcached run command arguments 16 | CMD ["-m", "128"] 17 | 18 | # Set the user to run Memcached daemon 19 | USER daemon 20 | 21 | # Set the entrypoint to memcached binary 22 | ENTRYPOINT memcached 23 | 24 | -------------------------------------------------------------------------------- /images/memcache_image/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE_NAME=memcache_image 2 | PROJ_DIR=../.. 3 | -include $(PROJ_DIR)/build_settings 4 | 5 | dummy: 6 | @echo dummy 7 | 8 | build_image: 9 | docker build --force-rm=true -t $(IMAGE_NAME) . 10 | 11 | push_to_registry: 12 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 13 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 14 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 15 | 16 | -------------------------------------------------------------------------------- /images/memcache_image/test_memcache/test_memcache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "stats settings" | nc localhost 11211 4 | 5 | -------------------------------------------------------------------------------- /images/movielens_data_transform/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:trusty 2 | 3 | ENV HOME /root 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN \ 7 | apt-get update && \ 8 | apt-get install -y python python-pip python-dev make curl jq wget unzip libmysqlclient-dev && \ 9 | pip install unicodecsv && \ 10 | pip install MySQL-python && \ 11 | apt-get remove -y --auto-remove gcc python-pip python-dev make libmysqlclient-dev && \ 12 | apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 13 | 14 | ADD ./freebase_data /freebase_data 15 | ADD ./scripts /movielens/scripts 16 | 17 | # Define default command. 18 | CMD ["bash"] 19 | -------------------------------------------------------------------------------- /images/movielens_data_transform/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE_NAME=movielens_data_transform 2 | PROJ_DIR=../.. 3 | -include $(PROJ_DIR)/build_settings 4 | 5 | DIR := ${CURDIR} 6 | dummy: 7 | @echo dummy 8 | 9 | freebase_data/freebase.json: 10 | mkdir -p freebase_data 11 | cd freebase_data ; wget http://s3-eu-west-1.amazonaws.com/static.seldon.io/datasets/freebase-movies/freebase.json ; cd .. 12 | 13 | build_image: freebase_data/freebase.json 14 | docker build -t ${IMAGE_NAME} . 15 | 16 | push_to_registry: 17 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 18 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 19 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 20 | 21 | test_out: 22 | mkdir -p ${DIR}/test_out 23 | 24 | test_create_csv:test_out 25 | mkdir -p test_out 26 | mkdir -p raw_data 27 | docker run --name="movielens_data_transform" -it --rm -v ${DIR}/test_out:/movielens/seldon -v ${DIR}/raw_data:/movielens/data ${IMAGE_NAME} /movielens/scripts/run.sh 28 | 29 | test_create_actions: 30 | docker run --name="movielens_create_actions" -it --rm -v ${DIR}/test_out:/movielens/seldon --volumes-from seldon-models --link mysql_server_container:mysql_server --link consul:consul seldon-tools /seldon-tools/scripts/import/create_actions_json.sh movielens /movielens/seldon/movielens_actions.csv /movielens/seldon/actions.json 31 | 32 | clean: 33 | @rm -fv freebase_data/freebase.json 34 | 35 | -------------------------------------------------------------------------------- /images/movielens_data_transform/scripts/download_movielens_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | DATA_FOLDER=/movielens/data 7 | 8 | cd ${DATA_FOLDER} 9 | 10 | #Get movielens 10m dataset 11 | if [ ! -d "${DATA_FOLDER}/ml-10M100K" ]; then 12 | wget http://files.grouplens.org/datasets/movielens/ml-10m.zip 13 | unzip ml-10m.zip 14 | rm ml-10m.zip 15 | fi 16 | 17 | #get hetrec dataset 18 | if [ ! -d "${DATA_FOLDER}/hetrec2011-movielens" ]; then 19 | mkdir -p /movielens/data/hetrec2011-movielens 20 | cd /movielens/data/hetrec2011-movielens 21 | wget http://files.grouplens.org/datasets/hetrec2011/hetrec2011-movielens-2k-v2.zip 22 | unzip hetrec2011-movielens-2k-v2.zip 23 | rm hetrec2011-movielens-2k-v2.zip 24 | fi 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /images/movielens_data_transform/scripts/getFreebaseData.py: -------------------------------------------------------------------------------- 1 | import json 2 | import urllib 3 | import getopt, argparse 4 | import re 5 | from time import sleep 6 | 7 | parser = argparse.ArgumentParser(prog='monitorClientsDb.py') 8 | parser.add_argument('-movies', help='movielens 10m movies.dat file', required=True) 9 | 10 | opts = vars(parser.parse_args()) 11 | 12 | api_key = open(".freebase_api_key").read() 13 | service_url = 'https://www.googleapis.com/freebase/v1/mqlread' 14 | 15 | c = 0 16 | with open(opts['movies']) as textfile1: 17 | for line in textfile1: 18 | line = line.rstrip() 19 | (id,title,tags) = line.split('::') 20 | m = re.search(r"([^,]+).*\(([0-9]+)\)$", title) 21 | # print m.group(1),m.group(2) 22 | year = m.group(2) 23 | yearNext = str(int(year)+1) 24 | name = m.group(1) 25 | name = name.strip() 26 | query = [{'id': None, 'subjects':[], 'genre':[],'directed_by':[],'initial_release_date' : None, 'initial_release_date>' : year,'initial_release_date<' : yearNext, 'name':None, 'name~=': name, 'type': '/film/film',"starring": [{"actor": None,"mid": None}], "/common/topic/image":[{ "id":None, "optional":True}],'limit':1}] 27 | params = {'query': json.dumps(query),'key': api_key} 28 | url = service_url + '?' + urllib.urlencode(params) 29 | response = json.loads(urllib.urlopen(url).read()) 30 | if 'result' in response and len(response['result']) == 1: 31 | for film in response['result']: 32 | film['movielens_id'] = id 33 | film['movielens_title'] = title 34 | film['freebase_search_name'] = name 35 | j = json.dumps(film,sort_keys=True) 36 | print j 37 | # print film['name'],film['initial_release_date'],film['directed_by'],film['subjects'],film['genre'],film['starring'][0:2] 38 | else: 39 | response['movielens_id'] = id 40 | response['movielens_title'] = title 41 | response['freebase_search_name'] = name 42 | j = json.dumps(response,sort_keys=True) 43 | print j 44 | sleep(0.25) 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /images/movielens_data_transform/scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | #download the movielens data 7 | echo "downloading movielens 10m dataset and hetrec 2011 data" 8 | /movielens/scripts/download_movielens_data.sh 9 | # copy freebase data 10 | cp /freebase_data/freebase.json /movielens/data 11 | 12 | # create the item meta data csv 13 | echo "create item meta data csv" 14 | python /movielens/scripts/combine_item_data_sources.py -movielens-tags /movielens/data/ml-10M100K/tags.dat -freebase-movies /movielens/data/freebase.json -hetrec-movies /movielens/data/hetrec2011-movielens/movies.dat -movielens-movies /movielens/data/ml-10M100K/movies.dat -csv /movielens/seldon/movielens_items.csv -item-attr-json /movielens/seldon/movielens_items.json 15 | 16 | #create the user csv (no demographic data so just the ids) 17 | echo "create user meta data csv" 18 | cat <(echo "id") <(cat /movielens/data/ml-10M100K/ratings.dat | awk -F'::' '{print $1}' | sort -n | uniq) > /movielens/seldon/movielens_users.csv 19 | 20 | #create actions csv file in correct format 21 | echo "create actions csv" 22 | echo "user_id,item_id,value,time" > /movielens/seldon/movielens_actions.csv 23 | cat /movielens/data/ml-10M100K/ratings.dat | awk -F"::" 'BEGIN{OFS=","}{print $1,$2,$3,$4}' >> /movielens/seldon/movielens_actions.csv 24 | 25 | 26 | -------------------------------------------------------------------------------- /images/mysql_data/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx 2 | 3 | ENV HOME /root 4 | 5 | VOLUME ["/mysql_data"] 6 | 7 | ADD /mysql_data /mysql_data 8 | # Define default command. 9 | CMD ["bash"] 10 | 11 | -------------------------------------------------------------------------------- /images/mysql_data/Makefile: -------------------------------------------------------------------------------- 1 | SELDON_VERSION=0.2 2 | VERSION=1.0 3 | IMAGE=mysql_data 4 | 5 | IMAGE_NAME=$(IMAGE) 6 | PROJ_DIR=../.. 7 | -include $(PROJ_DIR)/build_settings 8 | 9 | mysql_data: 10 | @./create-movielens-dbs.sh 11 | @./setup-local-mysql-data-resource 12 | 13 | build_image: mysql_data 14 | @make build 15 | 16 | build: 17 | docker build --force-rm=true -t ${IMAGE} . 18 | 19 | start: 20 | docker run -d -v $(CURDIR)/mysql_data:/mysql_data -v $(CURDIR):/workdir --name="${IMAGE}" ${IMAGE} nginx -g 'daemon off;' 21 | 22 | login: 23 | docker exec -it ${IMAGE} bash 24 | 25 | push_to_registry: 26 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 27 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 28 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 29 | 30 | clean: 31 | @rm -rfv mysql_data 32 | @rm -fv backup.tar.gz 33 | @rm -fv mysql_data_empty.tar.gz 34 | @rm -rfv mysql_data_empty 35 | @rm -fv mysql_data.tar.gz 36 | @./remove-local-mysql-data-resource 37 | 38 | -------------------------------------------------------------------------------- /images/mysql_data/create-movielens-dbs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | PROJDIR=${WORK_DIR}/../.. 8 | 9 | rm -fr ${WORK_DIR}/mysql_data 10 | mkdir -p ${WORK_DIR}/mysql_data 11 | make build 12 | make start 13 | pushd ../mysql_image/; make start; popd 14 | echo "Sleeping to allow mysql to start" 15 | sleep 25 16 | 17 | 18 | ${WORK_DIR}/movielens-db-setup 19 | ${PROJDIR}/dist/mysql/stop-mysql-server 20 | #rm -f ${WORK_DIR}/backup.tar.gz 21 | #${WORK_DIR}/backup-mysql-data 22 | docker run --rm --volumes-from mysql_data ubuntu:trusty /bin/bash -c 'chmod -R a+rwx /mysql_data' 23 | ${PROJDIR}/dist/mysql_data/stop-mysql-data 24 | #${WORK_DIR}/extract-data ${WORK_DIR}/backup.tar.gz 25 | echo "Success - ./mysql_data created" 26 | 27 | -------------------------------------------------------------------------------- /images/mysql_data/movielens-db-setup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | cd ${STARTUP_DIR} 9 | echo -n 'adding movielens and test dbs ...' 10 | ./mysql-shell movielens_sql/api-schema.sql 11 | ./mysql-shell movielens_sql/api-data.sql 12 | 13 | ./mysql-shell movielens_sql/testclient-schema.sql 14 | 15 | ./mysql-shell movielens_sql/test1-schema.sql 16 | ./mysql-shell movielens_sql/test2-schema.sql 17 | ./mysql-shell movielens_sql/test3-schema.sql 18 | ./mysql-shell movielens_sql/test4-schema.sql 19 | ./mysql-shell movielens_sql/test5-schema.sql 20 | 21 | ./mysql-shell movielens_sql/movielens-schema.sql 22 | echo ' done' 23 | -------------------------------------------------------------------------------- /images/mysql_data/movielens_sql/api-data.sql: -------------------------------------------------------------------------------- 1 | USE api; 2 | 3 | INSERT INTO consumer values('yxokkkencw', 'fgrukwcbzw', 'Test Client','testclient',now(),NULL,1,0,'all'); 4 | INSERT INTO consumer values('aik6kieyu6eugae2raec2Oog', '', 'Test Client','testclient',now(),NULL,1,0,'js'); 5 | 6 | INSERT INTO consumer values('test1consumer', 'test1secret', 'Test Client 1','test1',now(),NULL,1,0,'all'); 7 | INSERT INTO consumer values('test1js', '', 'Test Client 1 js','test1',now(),NULL,1,0,'js'); 8 | 9 | INSERT INTO consumer values('test2consumer', 'test2secret', 'Test Client 2','test2',now(),NULL,1,0,'all'); 10 | INSERT INTO consumer values('test2js', '', 'Test Client 2 js','test2',now(),NULL,1,0,'js'); 11 | 12 | INSERT INTO consumer values('test3consumer', 'test3secret', 'Test Client 3','test3',now(),NULL,1,0,'all'); 13 | INSERT INTO consumer values('test3js', '', 'Test Client 3 js','test3',now(),NULL,1,0,'js'); 14 | 15 | INSERT INTO consumer values('test4consumer', 'test4secret', 'Test Client 4','test4',now(),NULL,1,0,'all'); 16 | INSERT INTO consumer values('test4js', '', 'Test Client 4 js','test4',now(),NULL,1,0,'js'); 17 | 18 | INSERT INTO consumer values('test5consumer', 'test5secret', 'Test Client 5','test5',now(),NULL,1,0,'all'); 19 | INSERT INTO consumer values('test5js', '', 'Test Client 5 js','test5',now(),NULL,1,0,'js'); 20 | 21 | INSERT INTO consumer values('tnedddvlho', 'lzufhqbomz', 'Movielens Client','movielens',now(),NULL,1,0,'all'); 22 | INSERT INTO consumer values('uggyecofcz', '', 'Movielens Client','movielens',now(),NULL,1,0,'js'); 23 | 24 | -------------------------------------------------------------------------------- /images/mysql_data/movielens_sql/api-schema.sql: -------------------------------------------------------------------------------- 1 | -- MySQL dump 10.13 Distrib 5.6.21, for linux-glibc2.5 (x86_64) 2 | -- 3 | -- Host: live2-replica1.cef545wkntpn.eu-west-1.rds.amazonaws.com Database: api 4 | -- ------------------------------------------------------ 5 | -- Server version 5.6.19-log 6 | 7 | /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; 8 | /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; 9 | /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; 10 | /*!40101 SET NAMES utf8 */; 11 | /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; 12 | /*!40103 SET TIME_ZONE='+00:00' */; 13 | /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; 14 | /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; 15 | /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; 16 | /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; 17 | 18 | -- 19 | -- Current Database: `api` 20 | -- 21 | 22 | CREATE DATABASE /*!32312 IF NOT EXISTS*/ `api` /*!40100 DEFAULT CHARACTER SET latin1 */; 23 | 24 | USE `api`; 25 | 26 | -- 27 | -- Table structure for table `consumer` 28 | -- 29 | 30 | DROP TABLE IF EXISTS `consumer`; 31 | /*!40101 SET @saved_cs_client = @@character_set_client */; 32 | /*!40101 SET character_set_client = utf8 */; 33 | CREATE TABLE `consumer` ( 34 | `consumer_key` varchar(50) NOT NULL, 35 | `consumer_secret` varchar(50) DEFAULT NULL, 36 | `name` varchar(75) DEFAULT NULL, 37 | `short_name` varchar(25) DEFAULT NULL, 38 | `time` datetime DEFAULT NULL, 39 | `url` varchar(2048) DEFAULT NULL, 40 | `active` tinyint(1) DEFAULT NULL, 41 | `secure` tinyint(1) DEFAULT NULL, 42 | `scope` varchar(25) DEFAULT NULL, 43 | PRIMARY KEY (`consumer_key`) 44 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1; 45 | /*!40101 SET character_set_client = @saved_cs_client */; 46 | 47 | -- 48 | -- Table structure for table `token` 49 | -- 50 | 51 | DROP TABLE IF EXISTS `token`; 52 | /*!40101 SET @saved_cs_client = @@character_set_client */; 53 | /*!40101 SET character_set_client = utf8 */; 54 | CREATE TABLE `token` ( 55 | `token_key` varchar(50) NOT NULL, 56 | `time` datetime DEFAULT NULL, 57 | `type` varchar(15) DEFAULT NULL, 58 | `scope` varchar(15) DEFAULT NULL, 59 | `expires_in` int(11) DEFAULT NULL, 60 | `active` tinyint(1) DEFAULT NULL, 61 | `consumer` varchar(50) DEFAULT NULL, 62 | PRIMARY KEY (`token_key`), 63 | KEY `fk_consumer` (`consumer`) 64 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1; 65 | /*!40101 SET character_set_client = @saved_cs_client */; 66 | 67 | /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; 68 | /*!40101 SET SQL_MODE=@OLD_SQL_MODE */; 69 | /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; 70 | /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; 71 | /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; 72 | /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; 73 | /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; 74 | /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; 75 | 76 | -- Dump completed on 2014-12-01 13:52:38 77 | 78 | -------------------------------------------------------------------------------- /images/mysql_data/mysql-shell: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | REGISTRY_PREFIX= 9 | 10 | if [[ $# < 1 ]]; then 11 | docker exec -it mysql_server_container sh -c 'exec mysql -u root -p${MYSQL_ROOT_PASSWORD}' 12 | else 13 | SQL_SCRIPT=$1 14 | docker exec -it mysql_server_container sh -c 'exec mysql -u root -p${MYSQL_ROOT_PASSWORD} " 8 | exit 1 9 | fi 10 | 11 | CLIENT=$1 12 | IN=$2 13 | 14 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_write?raw` 15 | echo "${JSON}" 16 | DB_HOST=`echo "${JSON}" | jq -r ".host"` 17 | DB_USER=`echo "${JSON}" | jq -r ".username"` 18 | DB_PASS=`echo "${JSON}" | jq -r ".password"` 19 | 20 | if [[ -z "${DB_HOST}" || -z "${DB_USER}" || -z "${DB_PASS}" ]]; then 21 | echo "Can't get keys for db settings. Stopping." 22 | exit -1 23 | fi 24 | python /seldon-tools/scripts/import/add_attr_schema.py -schema-file ${IN} -db-host ${DB_HOST} -db-user ${DB_USER} -db-pass ${DB_PASS} -client ${CLIENT} -clean 25 | python /seldon-tools/scripts/import/add_attr_schema.py -schema-file ${IN} -db-host ${DB_HOST} -db-user ${DB_USER} -db-pass ${DB_PASS} -client ${CLIENT} 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/add_items.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | if [[ $# < 2 ]]; then 7 | echo "Need " 8 | exit 1 9 | fi 10 | 11 | CLIENT=$1 12 | IN=$2 13 | 14 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_write?raw` 15 | echo "${JSON}" 16 | DB_HOST=`echo "${JSON}" | jq -r ".host"` 17 | DB_USER=`echo "${JSON}" | jq -r ".username"` 18 | DB_PASS=`echo "${JSON}" | jq -r ".password"` 19 | 20 | if [[ -z "${DB_HOST}" || -z "${DB_USER}" || -z "${DB_PASS}" ]]; then 21 | echo "Can't get keys for db settings. Stopping." 22 | exit -1 23 | fi 24 | 25 | python /seldon-tools/scripts/import/add_items.py -items ${IN} -db-host ${DB_HOST} -db-user ${DB_USER} -db-pass ${DB_PASS} -client ${CLIENT} 26 | 27 | 28 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/add_users.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | import datetime 4 | import sys 5 | import getopt, argparse 6 | from collections import defaultdict 7 | import json 8 | import MySQLdb 9 | import unicodecsv 10 | USER_INSERT = "insert into users (client_user_id, username, first_op, last_op,type,num_op, active) values (%(id)s, %(name)s, now(), now(), 1,1,1)" 11 | 12 | 13 | def validateCSV(csv_file): 14 | with open(csv_file) as csvFile: 15 | reader = unicodecsv.DictReader(csvFile,encoding='utf-8') 16 | line = reader.next() 17 | for field_name in line: 18 | if not field_name == 'id' and not field_name == 'username': 19 | print 'only id or username fields allowed' 20 | exit(1) 21 | 22 | def doUserInserts(csv_file, db): 23 | with open(csv_file) as csvFile: 24 | reader = unicodecsv.DictReader(csvFile,encoding='utf-8') 25 | inserts = [] 26 | insertNum = 0 27 | for line in reader: 28 | client_id = line['id'] 29 | name = '' 30 | if 'name' in line: 31 | name = line['name'] 32 | inserts.append({'name':name,'id':client_id}) 33 | if len(inserts) > 1000: 34 | insertNum+=1 35 | reallyDoInserts(USER_INSERT, inserts, insertNum, db) 36 | inserts = [] 37 | 38 | insertNum+=1 39 | reallyDoInserts(USER_INSERT, inserts, insertNum, db) 40 | db.commit() 41 | print 'finished user inserts' 42 | 43 | def reallyDoInserts(insertStatement, params, num, db): 44 | cur = db.cursor() 45 | print "inserting user batch", num,'into the db' 46 | cur.executemany(insertStatement, params) 47 | 48 | 49 | def cleanUpDb(db): 50 | dbc = db.cursor() 51 | dbc.execute('truncate table users') 52 | 53 | parser = argparse.ArgumentParser(prog="add_users.py") 54 | parser.add_argument('-users', help='user csv file', required=True) 55 | parser.add_argument('-db-host', help='database host', required=False, default="localhost") 56 | parser.add_argument('-db-user', help='database username', required=False, default="root") 57 | parser.add_argument('-db-pass', help='database password', required=False, default="root") 58 | parser.add_argument('-client', help='client/database name', required=False, default="testclient") 59 | 60 | opts = vars(parser.parse_args()) 61 | 62 | db = MySQLdb.connect(user=opts['db_user'],db=opts['client'],passwd=opts['db_pass'], host=opts['db_host']) 63 | db.set_character_set('utf8') 64 | dbc = db.cursor() 65 | dbc.execute('SET NAMES utf8;') 66 | dbc.execute('SET CHARACTER SET utf8;') 67 | dbc.execute('SET character_set_connection=utf8;') 68 | dbc.execute("SET GLOBAL max_allowed_packet=1073741824") 69 | try: 70 | validateCSV(opts['users']) 71 | doUserInserts(opts['users'], db) 72 | except: 73 | print 'Unexpected error ...', sys.exc_info()[0] 74 | print 'Clearing DB of users' 75 | try: 76 | cleanUpDb(db) 77 | except: 78 | print 'couldn\'t clean up db' 79 | raise 80 | print "Successfully ran all inserts" -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/add_users.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | if [[ $# < 2 ]]; then 7 | echo "Need " 8 | exit 1 9 | fi 10 | 11 | CLIENT=$1 12 | IN=$2 13 | 14 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_write?raw` 15 | echo "${JSON}" 16 | DB_HOST=`echo "${JSON}" | jq -r ".host"` 17 | DB_USER=`echo "${JSON}" | jq -r ".username"` 18 | DB_PASS=`echo "${JSON}" | jq -r ".password"` 19 | 20 | if [[ -z "${DB_HOST}" || -z "${DB_USER}" || -z "${DB_PASS}" ]]; then 21 | echo "Can't get keys for db settings. Stopping." 22 | exit -1 23 | fi 24 | 25 | python /seldon-tools/scripts/import/add_users.py -users ${IN} -db-host ${DB_HOST} -db-user ${DB_USER} -db-pass ${DB_PASS} -client ${CLIENT} 26 | 27 | 28 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/attr_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "types": [{ 3 | "type_id": 1, 4 | "type_name": "music", 5 | "type_attrs": [ 6 | {"name":"Title","value_type":"string"}, 7 | {"name":"Artist","value_type":"string"}, 8 | {"name":"Genre","value_type":["pop","rock","rap"]}, 9 | {"name":"Price","value_type":"double"}, 10 | {"name":"is_compilation","value_type":"boolean"}, 11 | {"name":"sales_count", "value_type":"int"} 12 | ] 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/clear_actions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | if [[ $# < 1 ]]; then 7 | echo "Need " 8 | exit 1 9 | fi 10 | 11 | CLIENT=$1 12 | 13 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_write?raw` 14 | echo "${JSON}" 15 | DB_HOST=`echo "${JSON}" | jq -r ".host"` 16 | DB_USER=`echo "${JSON}" | jq -r ".username"` 17 | DB_PASS=`echo "${JSON}" | jq -r ".password"` 18 | 19 | if [[ -z "${DB_HOST}" || -z "${DB_USER}" || -z "${DB_PASS}" ]]; then 20 | echo "Can't get keys for db settings. Stopping." 21 | exit -1 22 | fi 23 | 24 | echo "truncate actions" | mysql -h ${DB_HOST} -u${DB_USER} -p${DB_PASS} ${CLIENT} 25 | 26 | 27 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/create_actions_json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | import datetime 4 | import sys 5 | import getopt, argparse 6 | from collections import defaultdict 7 | import json 8 | import MySQLdb 9 | import unicodecsv 10 | 11 | parser = argparse.ArgumentParser(prog='monitorClientsDb.py') 12 | parser.add_argument('-actions', help='actions csv file', required=True) 13 | parser.add_argument('-db-host', help='database host', required=True) 14 | parser.add_argument('-db-user', help='database username', required=True) 15 | parser.add_argument('-db-pass', help='database password', required=False) 16 | parser.add_argument('-client', help='client/database name', required=False) 17 | parser.add_argument('-out', help='json output file', required=False) 18 | 19 | opts = vars(parser.parse_args()) 20 | client = opts['client'] 21 | if opts['db_pass']: 22 | db = MySQLdb.connect(user=opts['db_user'], passwd=opts['db_pass'],db=client, host=opts['db_host']) 23 | else: 24 | db = MySQLdb.connect(user=opts['db_user'],db=client, host=opts['db_host']) 25 | 26 | def getItemId(db,cache,client_item_id): 27 | if client_item_id in cache: 28 | return cache[client_item_id] 29 | else: 30 | cursor = db.cursor() 31 | cursor.execute("""select item_id, client_item_id from items""") 32 | rows = cursor.fetchall() 33 | for row in rows: 34 | itemId = long(row[0]) 35 | client_item_id_from_db = row[1] 36 | cache[client_item_id_from_db] = itemId 37 | cursor.close() 38 | return cache[client_item_id] 39 | 40 | def getUserId(db,cache,client_user_id): 41 | if client_user_id in cache: 42 | return cache[client_user_id] 43 | else: 44 | cursor = db.cursor() 45 | cursor.execute("""select user_id,client_user_id from users""") 46 | rows = cursor.fetchall() 47 | for row in rows: 48 | userId = long(row[0]) 49 | client_user_id_from_db = row[1] 50 | cache[client_user_id_from_db] = userId 51 | 52 | cursor.close() 53 | return cache[client_user_id] 54 | 55 | userCache = {} 56 | itemCache = {} 57 | count = 0 58 | with open(opts['actions']) as csvfile, open(opts['out'],'w') as outfile: 59 | reader = unicodecsv.DictReader(csvfile,encoding='utf-8') 60 | for f in reader: 61 | item = getItemId(db,itemCache,f["item_id"]) 62 | user = getUserId(db,userCache,f["user_id"]) 63 | action_type = 1 64 | action = {} 65 | action["userid"] = int(user) 66 | action["client_userid"] = f["item_id"] 67 | action["itemid"] = int(item) 68 | action["client_itemid"] = f["user_id"] 69 | action["value"] = float(f["value"]) 70 | utc = datetime.datetime.fromtimestamp(int(f["time"])).strftime('%Y-%m-%dT%H:%M:%SZ') 71 | action["timestamp_utc"] = utc 72 | action["rectag"] = "default" 73 | action["type"] = action_type 74 | action["client"] = client 75 | s = json.dumps(action,sort_keys=True) 76 | outfile.write(s+"\n") 77 | count += 1 78 | if count % 50000 == 0: 79 | print "Processed "+str(count)+" actions" 80 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/create_actions_json.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | if [[ $# < 3 ]]; then 7 | echo "Need " 8 | exit 1 9 | fi 10 | 11 | CLIENT=$1 12 | IN=$2 13 | OUT=$3 14 | OUT_FOLDER=`dirname $3` 15 | mkdir -p $OUT_FOLDER 16 | 17 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_write?raw` 18 | echo "${JSON}" 19 | DB_HOST=`echo "${JSON}" | jq -r ".host"` 20 | DB_USER=`echo "${JSON}" | jq -r ".username"` 21 | DB_PASS=`echo "${JSON}" | jq -r ".password"` 22 | 23 | if [[ -z "${DB_HOST}" || -z "${DB_USER}" || -z "${DB_PASS}" ]]; then 24 | echo "Can't get keys for db settings. Stopping." 25 | exit -1 26 | fi 27 | echo "Creating actions file..." 28 | python /seldon-tools/scripts/import/create_actions_json.py -actions ${IN} -db-host ${DB_HOST} -db-user ${DB_USER} -db-pass ${DB_PASS} -client ${CLIENT} -out ${OUT} 29 | 30 | 31 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/import/sample.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import argparse 5 | import random 6 | 7 | def getOpts(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--sample-percent', type=int, dest='sample_percent', help="the sample rate to use (1 - 100)", required=True) 10 | parser.add_argument('--random-seed', type=str, dest='random_seed', default=None, help="use seed for consistent samples") 11 | parser.add_argument('args', nargs=argparse.REMAINDER) # catch rest (non-options) as args 12 | opts = vars(parser.parse_args()) 13 | return opts 14 | 15 | def process_line(opts, line): 16 | sample_percent=opts['sample_percent'] 17 | if random.randint(1,100) <= sample_percent: 18 | sys.stdout.write(line) 19 | 20 | def process_file(opts, f): 21 | for line in f: 22 | process_line(opts, line) 23 | 24 | def validate_opts(opts): 25 | sample_percent=opts['sample_percent'] 26 | if (sample_percent < 1) or (sample_percent > 100): 27 | raise ValueError("sample_percent %s is invalid, needs to be 1 - 100" % sample_percent) 28 | 29 | def checkSeed(opts): 30 | random_seed=opts['random_seed'] 31 | if random_seed != None: 32 | random.seed(random_seed) 33 | 34 | def main(): 35 | opts = getOpts() 36 | validate_opts(opts) 37 | checkSeed(opts) 38 | if len(opts['args']) > 0: 39 | for filename in opts['args']: 40 | f = open(filename) 41 | process_file(opts, f) 42 | f.close() 43 | else: 44 | process_file(opts, sys.stdin) 45 | 46 | if __name__ == "__main__": 47 | main() 48 | 49 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/models/item-similarity/activate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | cd ${WORK_DIR} 8 | 9 | if [[ $# < 1 ]]; then 10 | echo "Need client" 11 | exit 1 12 | fi 13 | 14 | CLIENT=$1 15 | 16 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_write?raw` 17 | echo "${JSON}" 18 | DB_HOST=`echo "${JSON}" | jq -r ".host"` 19 | DB_USER=`echo "${JSON}" | jq -r ".username"` 20 | DB_PASS=`echo "${JSON}" | jq -r ".password"` 21 | 22 | if [[ -z "${DB_HOST}" || -z "${DB_USER}" || -z "${DB_PASS}" ]]; then 23 | echo "Can't get keys for db settings. Stopping." 24 | exit -1 25 | fi 26 | 27 | echo "rename table item_similarity to item_similarity_old,item_similarity_new to item_similarity,item_similarity_old to item_similarity_new;" | mysql -h${DB_HOST} -u${DB_USER} -p${DB_PASS} ${CLIENT} 28 | 29 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/models/item-similarity/createItemSimilaritySql.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import getopt, argparse 3 | import json 4 | import sys 5 | 6 | batchSize = 5000 7 | numInserts = 0 8 | sqlInsertPrefix = "insert into item_similarity_new values " 9 | print "truncate item_similarity_new;" 10 | sql = sqlInsertPrefix 11 | for line in sys.stdin: 12 | line = line.rstrip() 13 | j = json.loads(line) 14 | item1 = j['item1'] 15 | item2 = j['item2'] 16 | sim = j['sim'] 17 | if numInserts > 0: 18 | sql = sql + "," 19 | sql = sql + " (%s,%s,%s)" % (item1,item2,sim) 20 | numInserts += 1 21 | if numInserts >= batchSize: 22 | sql = sql + ";" 23 | print sql 24 | numInserts = 0; 25 | sql = sqlInsertPrefix 26 | 27 | if numInserts > 0: 28 | sql = sql + ";" 29 | print sql; 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/models/item-similarity/create_sql_and_upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | cd ${WORK_DIR} 8 | 9 | if [[ $# < 1 ]]; then 10 | echo "Need client" 11 | exit 1 12 | fi 13 | 14 | CLIENT=$1 15 | 16 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/algs/item_similarity?raw` 17 | echo "${JSON}" 18 | if [[ ! -z "${JSON}" ]]; then 19 | START_DAY=`echo "${JSON}" | jq -r ".start_day // empty"` 20 | if [ "${START_DAY}" = 'yesterday' ]; then 21 | START_DAY=$(($(perl -e 'use POSIX;print strftime "%s",localtime time-86400;')/86400)) 22 | fi 23 | if [[ -z "${START_DAY}" ]]; then 24 | START_DAY=1 25 | fi 26 | else 27 | START_DAY=1 28 | fi 29 | 30 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_write?raw` 31 | echo "${JSON}" 32 | DB_HOST=`echo "${JSON}" | jq -r ".host"` 33 | DB_USER=`echo "${JSON}" | jq -r ".username"` 34 | DB_PASS=`echo "${JSON}" | jq -r ".password"` 35 | 36 | if [[ -z "${DB_HOST}" || -z "${DB_USER}" || -z "${DB_PASS}" ]]; then 37 | echo "Can't get keys for db settings. Stopping." 38 | exit -1 39 | fi 40 | 41 | echo "create sql for item similarity upload" 42 | cat /seldon-models/${CLIENT}/item-similarity/${START_DAY}/part* | python /seldon-tools/scripts/models/item-similarity/createItemSimilaritySql.py > /seldon-models/${CLIENT}/item-similarity/${START_DAY}/upload.sql 43 | echo "uploading item similarity sql to client ${CLIENT}" 44 | mysql -h${DB_HOST} -u${DB_USER} -p${DB_PASS} ${CLIENT} < /seldon-models/${CLIENT}/item-similarity/${START_DAY}/upload.sql 45 | 46 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/models/word2vec/transformToSV.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | cd ${WORK_DIR} 8 | 9 | if [[ $# < 1 ]]; then 10 | echo "Need client" 11 | exit 1 12 | fi 13 | 14 | CLIENT=$1 15 | 16 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/algs/word2vec?raw` 17 | echo "${JSON}" 18 | if [[ ! -z "${JSON}" ]]; then 19 | START_DAY=`echo "${JSON}" | jq -r ".start_day // empty"` 20 | if [ "${START_DAY}" = 'yesterday' ]; then 21 | START_DAY=$(($(perl -e 'use POSIX;print strftime "%s",localtime time-86400;')/86400)) 22 | fi 23 | if [[ -z "${START_DAY}" ]]; then 24 | START_DAY=1 25 | fi 26 | VECTOR_SIZE=`echo "${JSON}" | jq -r ".vector_size // empty"` 27 | if [[ -z "${VECTOR_SIZE}" ]]; then 28 | VECTOR_SIZE=30 29 | fi 30 | else 31 | START_DAY=1 32 | VECTOR_SIZE=30 33 | fi 34 | 35 | 36 | cat /seldon-models/${CLIENT}/word2vec/${START_DAY}/part-* | python word2vecToSV.py -d ${VECTOR_SIZE} > /seldon-models/${CLIENT}/word2vec/${START_DAY}/docvectors.txt 37 | echo "-vectortype REAL -dimension ${VECTOR_SIZE}" > /seldon-models/${CLIENT}/word2vec/${START_DAY}/termvectors.txt 38 | 39 | 40 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/models/word2vec/word2vecToSV.py: -------------------------------------------------------------------------------- 1 | import sys, getopt, argparse 2 | 3 | parser = argparse.ArgumentParser(prog='prune_words') 4 | parser.add_argument('-d', help='dimension', required=True) 5 | opts = vars(parser.parse_args()) 6 | 7 | dim = int(opts['d']) 8 | 9 | print "-vectortype REAL -dimension "+str(dim) 10 | for line in sys.stdin: 11 | line = line.rstrip() 12 | parts = line.split(',') 13 | print "|".join(parts) 14 | 15 | 16 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/zookeeper/zkcmd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gurminder Sunner" 4 | 5 | import pprint 6 | import sys 7 | import argparse 8 | from kazoo.client import KazooClient 9 | 10 | def doCmdUsingHosts(zk_hosts, cmd, cmd_args): 11 | zk_client = getKazooClient(zk_hosts) 12 | zk_client.start() 13 | doCmd(zk_client, cmd, cmd_args) 14 | zk_client.stop() 15 | 16 | def doCmd(zk_client, cmd, cmd_args): 17 | if cmd == 'set': 18 | thePath = cmd_args[0] 19 | theValue = cmd_args[1] 20 | retVal = None 21 | if zk_client.exists(thePath): 22 | retVal = zk_client.set(thePath,theValue) 23 | else: 24 | retVal = zk_client.create(thePath,theValue,makepath=True) 25 | print "[{cmd}][{thePath}][{theValue}]".format(cmd=cmd,thePath=thePath,theValue=theValue) 26 | elif cmd == 'get': 27 | thePath = cmd_args[0] 28 | retVal = None 29 | theValue = None 30 | if zk_client.exists(thePath): 31 | retVal = zk_client.get(thePath) 32 | theValue = retVal[0] 33 | print "[{cmd}][{thePath}][{theValue}]".format(cmd=cmd,thePath=thePath,theValue=theValue) 34 | 35 | def getOpts(): 36 | parser = argparse.ArgumentParser(description='Some Description') 37 | parser.add_argument('--zk-hosts', help="the zookeeper hosts", required=True) 38 | parser.add_argument('--cmd', help="the cmd to use", required=True) 39 | parser.add_argument('--cmd-args', help="the cmd args to use", nargs='+') 40 | parser.add_argument('args', nargs=argparse.REMAINDER) # catch rest (non-options) as args 41 | opts = vars(parser.parse_args()) 42 | return opts 43 | 44 | def getKazooClient(zk_hosts): 45 | zk_client = KazooClient(hosts=zk_hosts) 46 | return zk_client 47 | 48 | def main(): 49 | opts = getOpts() 50 | #print opts 51 | ##doCmd(opts['zk_hosts'], opts['cmd'], opts['cmd_args']) 52 | doCmdUsingHosts(opts['zk_hosts'], opts['cmd'], opts['cmd_args']) 53 | 54 | if __name__ == "__main__": 55 | main() 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /images/seldon-tools/scripts/zookeeper/zklines.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gurminder Sunner" 4 | 5 | import sys 6 | import os 7 | import argparse 8 | 9 | script_dir = os.path.dirname( os.path.realpath(__file__) ) 10 | sys.path.append( script_dir ) 11 | import zkcmd 12 | 13 | def process_line(zk_client, line): 14 | parts =line.split(None,2) 15 | cmd=parts[0] 16 | cmd_args=parts[1:] 17 | zkcmd.doCmd(zk_client, cmd, cmd_args) 18 | 19 | def process_file(zk_client, f): 20 | for line_raw in f: 21 | line = line_raw.strip() # remove whiespace and nl 22 | if len(line) > 0: 23 | process_line(zk_client, line) 24 | 25 | def getOpts(): 26 | parser = argparse.ArgumentParser(description='Some Description') 27 | parser.add_argument('--zk-hosts', help="the zookeeper hosts", required=True) 28 | parser.add_argument('args', nargs=argparse.REMAINDER) # catch rest (non-options) as args 29 | opts = vars(parser.parse_args()) 30 | return opts 31 | 32 | def main(): 33 | opts = getOpts() 34 | #print opts 35 | zk_client = zkcmd.getKazooClient(opts['zk_hosts']) 36 | zk_client.start() 37 | filenames = opts['args'] 38 | if len(filenames) > 0: 39 | for filename in filenames: 40 | f = open(filename) 41 | process_file(zk_client, f) 42 | f.close() 43 | else: 44 | process_file(zk_client, sys.stdin) 45 | zk_client.stop() 46 | 47 | if __name__ == "__main__": 48 | main() 49 | 50 | -------------------------------------------------------------------------------- /images/semantic_vectors_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java7jre_image 2 | 3 | ENV HOME /root 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN apt-get update 7 | 8 | RUN apt-get install -y python-pip zookeeper curl jq 9 | 10 | RUN pip install awscli 11 | 12 | ADD ./scripts /scripts 13 | 14 | # Define default command. 15 | CMD ["bash"] 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /images/semantic_vectors_image/scripts/models/stopwords.italian: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldonIO/seldon-vm/f49e2cea3cb6919ee058265c4524df27555e848f/images/semantic_vectors_image/scripts/models/stopwords.italian -------------------------------------------------------------------------------- /images/semantic_vectors_image/scripts/run-training-consul.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | cd ${WORK_DIR} 8 | 9 | if [[ $# < 1 ]]; then 10 | echo "Need client" 11 | exit 1 12 | fi 13 | 14 | CLIENT=$1 15 | 16 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_read?raw` 17 | echo "${JSON}" 18 | if [[ ! -z "${JSON}" ]]; then 19 | JDBC=`echo "${JSON}" | jq -r ".jdbc // empty"` 20 | if [[ -z "${JDBC}" ]]; then 21 | JDBC="jdbc:mysql://mysql_server:3306/${CLIENT}?characterEncoding=utf8&user=root&password=mypass" 22 | fi 23 | else 24 | JDBC="jdbc:mysql://mysql_server:3306/${CLIENT}?characterEncoding=utf8&user=root&password=mypass" 25 | fi 26 | 27 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/algs/semantic_vectors?raw` 28 | echo "${JSON}" 29 | if [[ ! -z "${JSON}" ]]; then 30 | START_DAY=`echo "${JSON}" | jq -r ".start_day // empty"` 31 | if [ "${START_DAY}" = 'yesterday' ]; then 32 | START_DAY=$(($(perl -e 'use POSIX;print strftime "%s",localtime time-86400;')/86400)) 33 | fi 34 | if [[ -z "${START_DAY}" ]]; then 35 | START_DAY=1 36 | fi 37 | ATTR_NAMES=`echo "${JSON}" | jq -r ".attr_names // empty"` 38 | if [[ -z "${ATTR_NAMES}" ]]; then 39 | ATTR_NAMES="tags" 40 | fi 41 | ITEM_LIMIT=`echo "${JSON}" | jq -r ".item_limit // empty"` 42 | if [[ -z "${ITEM_LIMIT}" ]]; then 43 | ITEM_LIMIT=500000 44 | fi 45 | DATA_FOLDER=`echo "${JSON}" | jq -r ".data_folder // empty"` 46 | if [[ -z "${DATA_FOLDER}" ]]; then 47 | DATA_FOLDER="/seldon-models" 48 | fi 49 | else 50 | START_DAY=1 51 | ATTR_NAMES="tags" 52 | ITEM_LIMIT=500000 53 | DATA_FOLDER="/seldon-models" 54 | fi 55 | 56 | rm -rf ${DATA_FOLDER}/${CLIENT}/svtext/${START_DAY} 57 | ${WORK_DIR}/run-training.sh ${CLIENT} ${ITEM_LIMIT} ${ATTR_NAMES} ${DATA_FOLDER}/${CLIENT}/svtext/${START_DAY} ${JDBC} 58 | 59 | 60 | -------------------------------------------------------------------------------- /images/semantic_vectors_image/scripts/run-training.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | cd ${WORK_DIR} 8 | 9 | if [[ $# < 5 ]]; then 10 | echo "Need client num-items attr_names (comma separated) output-folder jdbc-url" 11 | exit 1 12 | fi 13 | 14 | CLIENT=$1 15 | ITEM_LIMIT=$2 16 | ATTR_NAMES=$3 17 | DATA_FOLDER=$4 18 | JDBC=$5 19 | 20 | PREFIX=svtext 21 | JAR_FILE=semvec-lucene-tools.jar 22 | SV_PARAMS="-raw-ids -use-item-attrs -attr-names ${ATTR_NAMES} -recreate -debug -item-limit ${ITEM_LIMIT}" 23 | 24 | java -cp ${JAR_FILE} io.seldon.semvec.CreateLuceneIndexFromDb -l index -jdbc ${JDBC} -itemType 1 ${SV_PARAMS} 25 | 26 | java -cp ${JAR_FILE} pitt.search.semanticvectors.BuildIndex -trainingcycles 1 -maxnonalphabetchars -1 -minfrequency 0 -maxfrequency 1000000 -luceneindexpath index -indexfileformat text 27 | 28 | mkdir -p ${DATA_FOLDER} 29 | 30 | echo "copy dbs to folder ${DATA_FOLDER}" 31 | cp termvectors.txt ${DATA_FOLDER}/termvectors.txt 32 | cp docvectors.txt ${DATA_FOLDER}/docvectors.txt 33 | 34 | if [ -n "$AWS_UPLOAD" ]; then 35 | aws s3 cp --region eu-west-1 termvectors.txt s3:/${DATA_FOLDER}/termvectors.txt 36 | aws s3 cp --region eu-west-1 docvectors.txt s3:/${DATA_FOLDER}/docvectors.txt 37 | fi 38 | 39 | if [ -n "$ZOOKEEPER_HOST" ]; then 40 | /usr/share/zookeeper/bin/zkCli.sh -server ${ZOOKEEPER_HOST} set /${CLIENT}/${PREFIX} ${DATA_FOLDER} 41 | fi 42 | 43 | 44 | -------------------------------------------------------------------------------- /images/spark_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java7jre_image 2 | 3 | ENV HOME /root 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN ( SPARK_VERSION=1.3.0 && \ 7 | apt-get update && \ 8 | apt-get install -y wget && \ 9 | wget -O /tmp/spark.tgz http://d3kbcqa49mib13.cloudfront.net/spark-${SPARK_VERSION}-bin-cdh4.tgz && \ 10 | cd /opt && tar xvf /tmp/spark.tgz && \ 11 | rm -fv /tmp/spark.tgz && \ 12 | ln -sn spark-${SPARK_VERSION}-bin-cdh4 /opt/spark && \ 13 | apt-get install -y python python-pip curl jq libgfortran3 && \ 14 | pip install python-consul && \ 15 | apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*) 16 | 17 | ADD app /app 18 | ADD spark-jobs /spark-jobs 19 | ADD startup-scripts /startup-scripts 20 | 21 | # Define default command. 22 | CMD ["/apps/bin/keep_alive"] 23 | 24 | -------------------------------------------------------------------------------- /images/spark_image/copy-jars-to-app-dir: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 1 ]]; then 9 | echo "Need full path to dir containing jars" 10 | exit 1 11 | fi 12 | 13 | JARS_DIR_FPATH=$1 14 | 15 | cd ${STARTUP_DIR} 16 | 17 | echo $JARS_DIR_FPATH 18 | 19 | rm -rfv ./app 20 | 21 | mkdir -p ./app 22 | 23 | cd ${STARTUP_DIR}/app 24 | 25 | cp -v "${JARS_DIR_FPATH}/spark-streaming-assembly-1.0.jar" . 26 | cp -v "${JARS_DIR_FPATH}/sparkjobgroupactions-1.0.0-jar-with-dependencies.jar" . 27 | #cp -v "${JARS_DIR_FPATH}/SeldonMF-assembly-1.0.jar" . 28 | #cp -v "${JARS_DIR_FPATH}/spark-0.0.1-SNAPSHOT-jar-with-dependencies.jar" . 29 | #cp -v "${JARS_DIR_FPATH}/spark-1.0-jar-with-dependencies.jar" . 30 | 31 | -------------------------------------------------------------------------------- /images/spark_image/get-app-jars: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | if [[ $# < 1 ]]; then 9 | echo "Need version" 10 | exit 1 11 | fi 12 | 13 | VERSION=$1 14 | 15 | 16 | cd ${STARTUP_DIR} 17 | rm -rfv ./app 18 | 19 | FPATH="s3://seldon-vm/${VERSION}/images/spark_image/app.tar.gz" 20 | echo "--- fetching ${FPATH} ---" 21 | aws s3 cp ${FPATH} . 22 | tar xvf app.tar.gz 23 | 24 | -------------------------------------------------------------------------------- /images/spark_image/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SeldonIO/seldon-vm/f49e2cea3cb6919ee058265c4524df27555e848f/images/spark_image/readme.txt -------------------------------------------------------------------------------- /images/spark_image/run-container: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | docker run --rm -i -t --name="spark_container" spark_image bash 9 | 10 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/cluster-users-by-taxonomy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | cd ${WORK_DIR} 8 | 9 | if [[ $# < 1 ]]; then 10 | echo "Need client" 11 | exit 1 12 | fi 13 | 14 | CLIENT=$1 15 | 16 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_read?raw` 17 | echo "${JSON}" 18 | 19 | ARGS="" 20 | if [[ ! -z "${JSON}" ]]; then 21 | JDBC=`echo "${JSON}" | jq -r ".jdbc // empty"` 22 | 23 | if [[ -z "${JDBC}" ]]; then 24 | ARGS=" --jdbc jdbc:mysql://mysql_server:3306/${CLIENT}?characterEncoding=utf8&user=root&password=mypass" 25 | else 26 | ARGS=" --jdbc ${JDBC}" 27 | fi 28 | else 29 | ARGS=" --jdbc jdbc:mysql://mysql_server:3306/${CLIENT}?characterEncoding=utf8&user=root&password=mypass" 30 | fi 31 | 32 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/algs/cluster_by_taxonomy?raw` 33 | echo "${JSON}" 34 | 35 | if [[ ! -z "${JSON}" ]]; then 36 | START_DAY=`echo "${JSON}" | jq -r ".start_day // empty"` 37 | if [ "${START_DAY}" = 'yesterday' ]; then 38 | START_DAY=$(($(perl -e 'use POSIX;print strftime "%s",localtime time-86400;')/86400)) 39 | fi 40 | if [[ ! -z "${START_DAY}" ]]; then 41 | ARGS="${ARGS} --start-day ${START_DAY}" 42 | else 43 | START_DAY=1 44 | fi 45 | NUM_DAYS=`echo "${JSON}" | jq -r ".num_days // empty"` 46 | if [[ ! -z "${NUM_DAYS}" ]]; then 47 | ARGS="${ARGS} --numdays ${NUM_DAYS}" 48 | fi 49 | DATA_FOLDER=`echo "${JSON}" | jq -r ".data_folder // empty"` 50 | if [[ ! -z "${DATA_FOLDER}" ]]; then 51 | ARGS="${ARGS} --input-path ${DATA_FOLDER} --output-path ${DATA_FOLDER}" 52 | else 53 | DATA_FOLDER=/seldon-models 54 | fi 55 | MIN_CLUSTER_SIZE=`echo "${JSON}" | jq -r ".min_cluster_size // empty"` 56 | if [[ ! -z "${MIN_CLUSTER_SIZE}" ]]; then 57 | ARGS="${ARGS} --minClusterSize ${MIN_CLUSTER_SIZE}" 58 | fi 59 | CLUSTER_DELTA=`echo "${JSON}" | jq -r ".delta // empty"` 60 | if [[ ! -z "${CLUSTER_DELTA}" ]]; then 61 | ARGS="${ARGS} --delta ${CLUSTER_DELTA}" 62 | fi 63 | else 64 | DATA_FOLDER=/seldon-models 65 | START_DAY=1 66 | fi 67 | echo "ARGS are ${ARGS}" 68 | 69 | JSON=`curl -s http://consul:8500/v1/kv/seldon/spark?raw` 70 | echo "${JSON}" 71 | if [[ -z "${JSON}" ]]; then 72 | JSON="{}" 73 | fi 74 | MEM=`echo "${JSON}" | jq -r ".executor_memory // empty"` 75 | if [[ -z "${MEM}" ]]; then 76 | MEM="5g" 77 | fi 78 | echo "Running with executor-memory ${MEM}" 79 | 80 | JAR_FILE=`ls /app` 81 | JAR_FILE=`basename ${JAR_FILE}` 82 | echo "jar = ${JAR_FILE}" 83 | 84 | rm -rf ${DATA_FOLDER}/${CLIENT}/cluster/${START_DAY} 85 | /opt/spark/bin/spark-submit \ 86 | --class io.seldon.spark.cluster.ClusterUsersByDimension \ 87 | --executor-memory ${MEM} \ 88 | --driver-memory ${MEM} \ 89 | /app/${JAR_FILE} \ 90 | --client $CLIENT \ 91 | --local \ 92 | ${ARGS} 93 | 94 | 95 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/item-similarity.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | cd ${WORK_DIR} 9 | 10 | if [[ $# < 1 ]]; then 11 | echo "Need client" 12 | exit 1 13 | fi 14 | 15 | CLIENT=$1 16 | 17 | DATA_FOLDER=/seldon-models 18 | START_DAY=1 19 | MEM="2g" 20 | 21 | rm -rf ${DATA_FOLDER}/${CLIENT}/item-similarity/${START_DAY} 22 | 23 | JAR_FILE=`ls /app` 24 | JAR_FILE=`basename ${JAR_FILE}` 25 | JAR_FILE_PATH=/app/${JAR_FILE} 26 | SPARK_HOME=/opt/spark 27 | 28 | echo "jar = ${JAR_FILE_PATH}" 29 | echo "Running with executor-memory ${MEM}" 30 | 31 | ${SPARK_HOME}/bin/spark-submit \ 32 | --class "io.seldon.spark.mllib.SimilarItems" \ 33 | --master "local" \ 34 | --executor-memory ${MEM} \ 35 | --driver-memory ${MEM} \ 36 | ${JAR_FILE_PATH} \ 37 | --client ${CLIENT} \ 38 | --zookeeper zookeeper_server \ 39 | --startDay ${START_DAY} 40 | 41 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/job-group-actions: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | cd /opt/spark 9 | 10 | YESTERDAY=$(perl -e 'use POSIX;print strftime "%Y%m%d",localtime time-86400;') 11 | 12 | JAR_FILE=`ls /app` 13 | JAR_FILE=`basename ${JAR_FILE}` 14 | echo "jar = ${JAR_FILE}" 15 | 16 | ./bin/spark-submit \ 17 | --class "io.seldon.spark.actions.GroupActionsJob" \ 18 | --master local[1] \ 19 | /app/${JAR_FILE} \ 20 | --aws-access-key-id "" \ 21 | --aws-secret-access-key "" \ 22 | --input-path-pattern "/data-logs/fluentd/actions.%y/%m%d/*/*" \ 23 | --input-date-string "${YESTERDAY}" \ 24 | --output-path-dir "/seldon-models" \ 25 | --gzip-output 26 | 27 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/job-group-actions-test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | cd /opt/spark 9 | 10 | DATE_YESTERDAY=$(perl -e 'use POSIX;print strftime "%Y%m%d",localtime time-86400;') 11 | DATE_OTHER=20141217 12 | DATE_TODAY=$(perl -e 'use POSIX;print strftime "%Y%m%d",localtime time;') 13 | 14 | INPUT_DATE_STRING=${DATE_TODAY} 15 | 16 | JAR_FILE=`ls /app` 17 | JAR_FILE=`basename ${JAR_FILE}` 18 | echo "jar = ${JAR_FILE}" 19 | 20 | ./bin/spark-submit \ 21 | --class "io.seldon.spark.actions.GroupActionsJob" \ 22 | --master local[1] \ 23 | /app/${JAR_FILE} \ 24 | --aws-access-key-id "" \ 25 | --aws-secret-access-key "" \ 26 | --input-path-pattern "/data-logs/fluentd/actions.%y/%m%d/*/*" \ 27 | --input-date-string "${INPUT_DATE_STRING}" \ 28 | --output-path-dir "/data-logs/seldon-data-test" \ 29 | --gzip-output 30 | 31 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/matrix-factorization.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | cd ${WORK_DIR} 9 | 10 | if [[ $# < 1 ]]; then 11 | echo "Need client" 12 | exit 1 13 | fi 14 | 15 | CLIENT=$1 16 | 17 | DATA_FOLDER=/seldon-models 18 | START_DAY=1 19 | MEM="2g" 20 | 21 | rm -rf ${DATA_FOLDER}/${CLIENT}/matrix-factorization/${START_DAY} 22 | 23 | JAR_FILE=`ls /app` 24 | JAR_FILE=`basename ${JAR_FILE}` 25 | JAR_FILE_PATH=/app/${JAR_FILE} 26 | SPARK_HOME=/opt/spark 27 | 28 | echo "jar = ${JAR_FILE_PATH}" 29 | echo "Running with executor-memory ${MEM}" 30 | 31 | ${SPARK_HOME}/bin/spark-submit \ 32 | --class "io.seldon.spark.mllib.MfModelCreation" \ 33 | --master "local" \ 34 | --executor-memory ${MEM} \ 35 | --driver-memory ${MEM} \ 36 | ${JAR_FILE_PATH} \ 37 | --client ${CLIENT} \ 38 | --zookeeper zookeeper_server \ 39 | --startDay ${START_DAY} 40 | 41 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/session-items.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | cd ${WORK_DIR} 9 | 10 | if [[ $# < 1 ]]; then 11 | echo "Need client" 12 | exit 1 13 | fi 14 | 15 | CLIENT=$1 16 | 17 | JOB_OUTPUT_FOLDLER_NAME=sessionitems 18 | JOB_CLASS="io.seldon.spark.topics.SessionItems" 19 | MEM="2g" 20 | 21 | DATA_FOLDER=/seldon-models 22 | START_DAY=1 23 | JAR_FILE=`ls /app` 24 | JAR_FILE=`basename ${JAR_FILE}` 25 | JAR_FILE_PATH=/app/${JAR_FILE} 26 | SPARK_HOME=/opt/spark 27 | OUTPUT_FPATH=${DATA_FOLDER}/${CLIENT}/${JOB_OUTPUT_FOLDLER_NAME}/${START_DAY} 28 | 29 | echo "jar = ${JAR_FILE_PATH}" 30 | echo "job_class[${JOB_CLASS}]" 31 | echo "Running with executor-memory ${MEM}" 32 | 33 | rm -rf ${OUTPUT_FPATH} && echo "removed $OUTPUT_FPATH" 34 | 35 | ${SPARK_HOME}/bin/spark-submit \ 36 | --class ${JOB_CLASS} \ 37 | --master "local" \ 38 | --executor-memory ${MEM} \ 39 | --driver-memory ${MEM} \ 40 | ${JAR_FILE_PATH} \ 41 | --client ${CLIENT} \ 42 | --zookeeper zookeeper_server \ 43 | --startDay ${START_DAY} 44 | 45 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/spark-streaming-job: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | JAR_FILE=`ls /app` 9 | JAR_FILE=`basename ${JAR_FILE}` 10 | echo "jar = ${JAR_FILE}" 11 | 12 | cd /opt/spark 13 | 14 | ./bin/spark-submit \ 15 | --class io.seldon.spark.streaming.MostPopularJob \ 16 | --master local[1] \ 17 | /app/${JAR_FILE} \ 18 | --zk-quorum zookeeper_server \ 19 | --kafka-group-id spark-streaming-group \ 20 | --kafka-topics actionstopic \ 21 | --kafka-thread-partitions 1 \ 22 | --jdbc 'jdbc:mysql://mysql_server:3306/?user=root&password=mypass' \ 23 | --clients movielens,test1,test2,test3,test4,test5 24 | 25 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/topic-model-session-tags.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | cd ${WORK_DIR} 8 | 9 | if [[ $# < 1 ]]; then 10 | echo "Need client" 11 | exit 1 12 | fi 13 | 14 | CLIENT=$1 15 | 16 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/db_read?raw` 17 | echo "${JSON}" 18 | JDBC=`echo "${JSON}" | jq -r ".jdbc"` 19 | 20 | if [[ -z "${JDBC}" ]]; then 21 | echo "Can't get JDBC from consul for client ${CLIENT}" 22 | exit -1 23 | fi 24 | 25 | ARGS=" --jdbc ${JDBC}" 26 | 27 | JSON=`curl -s http://consul:8500/v1/kv/seldon/${CLIENT}/algs/topic_model?raw` 28 | echo "${JSON}" 29 | 30 | if [[ ! -z "${JSON}" ]]; then 31 | START_DAY=`echo "${JSON}" | jq -r ".start_day // empty"` 32 | if [ "${START_DAY}" = 'yesterday' ]; then 33 | START_DAY=$(($(perl -e 'use POSIX;print strftime "%s",localtime time-86400;')/86400)) 34 | fi 35 | if [[ ! -z "${START_DAY}" ]]; then 36 | ARGS="${ARGS} --start-day ${START_DAY}" 37 | else 38 | START_DAY=1 39 | fi 40 | NUM_DAYS=`echo "${JSON}" | jq -r ".num_days // empty"` 41 | if [[ ! -z "${NUM_DAYS}" ]]; then 42 | ARGS="${ARGS} --numdays ${NUM_DAYS}" 43 | fi 44 | DATA_FOLDER=`echo "${JSON}" | jq -r ".data_folder // empty"` 45 | if [[ ! -z "${DATA_FOLDER}" ]]; then 46 | ARGS="${ARGS} --input-path ${DATA_FOLDER} --output-path ${DATA_FOLDER}" 47 | else 48 | DATA_FOLDER=/seldon-models 49 | fi 50 | # alg specific args 51 | TAG_ATTR=`echo "${JSON}" | jq -r ".tag_attr // empty"` 52 | if [[ ! -z "${TAG_ATTR}" ]]; then 53 | ARGS="${ARGS} --tagAttr ${TAG_ATTR} " 54 | else 55 | echo "You must specify tag_attr in JSON config" 56 | exit -1 57 | fi 58 | else 59 | echo "You must specify tag_attr in JSON config" 60 | exit -1 61 | fi 62 | 63 | JSON=`curl -s http://consul:8500/v1/kv/seldon/spark?raw` 64 | echo "${JSON}" 65 | if [[ -z "${JSON}" ]]; then 66 | JSON="{}" 67 | fi 68 | MEM=`echo "${JSON}" | jq -r ".executor_memory // empty"` 69 | if [[ -z "${MEM}" ]]; then 70 | MEM="5g" 71 | fi 72 | echo "Running with executor-memory ${MEM}" 73 | 74 | JAR_FILE=`ls /app` 75 | JAR_FILE=`basename ${JAR_FILE}` 76 | echo "jar = ${JAR_FILE}" 77 | 78 | rm -rf ${DATA_FOLDER}/${CLIENT}/sessiontags/${START_DAY} 79 | /opt/spark/bin/spark-submit \ 80 | --class io.seldon.spark.topics.createVWTopicTraining \ 81 | --executor-memory ${MEM} \ 82 | --driver-memory ${MEM} \ 83 | /app/${JAR_FILE} \ 84 | --client $CLIENT \ 85 | --local \ 86 | ${ARGS} 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /images/spark_image/spark-jobs/word2vec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | WORK_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | cd ${WORK_DIR} 9 | 10 | if [[ $# < 1 ]]; then 11 | echo "Need client" 12 | exit 1 13 | fi 14 | 15 | CLIENT=$1 16 | 17 | JOB_OUTPUT_FOLDLER_NAME=word2vec 18 | JOB_CLASS="io.seldon.spark.features.Word2VecJob" 19 | MEM="2g" 20 | 21 | DATA_FOLDER=/seldon-models 22 | START_DAY=1 23 | JAR_FILE=`ls /app` 24 | JAR_FILE=`basename ${JAR_FILE}` 25 | JAR_FILE_PATH=/app/${JAR_FILE} 26 | SPARK_HOME=/opt/spark 27 | OUTPUT_FPATH=${DATA_FOLDER}/${CLIENT}/${JOB_OUTPUT_FOLDLER_NAME}/${START_DAY} 28 | 29 | echo "jar = ${JAR_FILE_PATH}" 30 | echo "job_class[${JOB_CLASS}]" 31 | echo "Running with executor-memory ${MEM}" 32 | 33 | rm -rf ${OUTPUT_FPATH} && echo "removed $OUTPUT_FPATH" 34 | 35 | ${SPARK_HOME}/bin/spark-submit \ 36 | --class ${JOB_CLASS} \ 37 | --master "local" \ 38 | --executor-memory ${MEM} \ 39 | --driver-memory ${MEM} \ 40 | ${JAR_FILE_PATH} \ 41 | --client ${CLIENT} \ 42 | --zookeeper zookeeper_server \ 43 | --startDay ${START_DAY} 44 | 45 | -------------------------------------------------------------------------------- /images/spark_image/startup-scripts/run-keep-alive: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | while true; do 9 | sleep 1 10 | done 11 | -------------------------------------------------------------------------------- /images/spark_image/startup-scripts/run-streaming-job: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | 6 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 7 | 8 | /spark-jobs/spark-streaming-job 9 | 10 | -------------------------------------------------------------------------------- /images/td_agent_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:trusty 2 | 3 | ENV HOME /root 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN ( apt-get update && \ 7 | apt-get install curl -y && \ 8 | curl http://packages.treasuredata.com/GPG-KEY-td-agent | apt-key add - && \ 9 | echo "deb http://packages.treasuredata.com/2/ubuntu/trusty/ trusty contrib" | tee /etc/apt/sources.list.d/treasure-data.list && \ 10 | apt-get update && \ 11 | apt-get install -y --force-yes td-agent && \ 12 | apt-get install -y make gcc patch && \ 13 | td-agent-gem install fluent-plugin-kafka --no-document && \ 14 | apt-get remove -y --auto-remove curl make gcc patch ruby-dev && \ 15 | apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*) 16 | 17 | ADD td-agent.conf /etc/td-agent/td-agent.conf 18 | 19 | # Expose the default port 20 | EXPOSE 8888 21 | 22 | CMD ["td-agent"] 23 | 24 | -------------------------------------------------------------------------------- /images/td_agent_image/Makefile: -------------------------------------------------------------------------------- 1 | IMAGE_NAME=td_agent_image 2 | PROJ_DIR=../.. 3 | -include $(PROJ_DIR)/build_settings 4 | 5 | dummy: 6 | @echo dummy 7 | 8 | build_image: 9 | docker build --force-rm=true -t $(IMAGE_NAME) . 10 | 11 | push_to_registry: 12 | docker login -u $(PRIVATE_REGISTRY_USER) -p $(PRIVATE_REGISTRY_PASSWORD) -e "$(PRIVATE_REGISTRY_EMAIL)" https://$(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT) && \ 13 | docker tag $(IMAGE_NAME) $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) && \ 14 | docker push $(PRIVATE_REGISTRY_HOST):$(PRIVATE_REGISTRY_PORT)/$(IMAGE_NAME) 15 | 16 | -------------------------------------------------------------------------------- /images/td_agent_image/td-agent.conf: -------------------------------------------------------------------------------- 1 | 2 | type tail 3 | format /^(?