├── .gitignore ├── NOTICE.txt ├── README.md ├── build.sbt ├── data ├── handmade-event-description.txt ├── integration-test-expected.txt ├── rank-test-query-expected.txt ├── sample-handmade-data.txt ├── sample-handmade-data1.txt ├── sample-handmade-data2.txt ├── sample-handmade-data3.txt ├── sample-rank-data.txt ├── sample-rank-empty-action-data.txt ├── sample-rank-empty-set-data.txt └── sample_movielens_data.txt ├── docs └── universal_recommender.md ├── engine.json ├── engine.json.minimum ├── engine.json.spark-tuning ├── event-names-test-engine.json ├── examples ├── handmade-engine.json ├── hot-3-day-engine.json ├── import_handmade.py ├── import_handmade_pop_test.py ├── import_movielens_eventserver.py ├── integration-test ├── integration-test-pop-model ├── multi-query-handmade.sh ├── multi-query-movielens.sh ├── pop-engine-4-days-ago.json ├── pop-engine.json ├── pop-test-query.sh ├── rank │ ├── import_rank.py │ ├── integration-test-rank │ ├── multi-query-rank.sh │ ├── rank-engine-user-define.json │ └── rank-engine.json ├── single-query-eventNames.sh ├── single-query-handmade.sh ├── trend-engine-4-days-ago.json └── trend-engine.json ├── project └── plugins.sbt ├── scalastyle-config.xml ├── src └── main │ └── scala │ ├── DataSource.scala │ ├── Engine.scala │ ├── EsClient.scala │ ├── PopModel.scala │ ├── Preparator.scala │ ├── Serving.scala │ ├── URAlgorithm.scala │ ├── URModel.scala │ └── package.scala └── template.json /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache/ 6 | .history/ 7 | .lib/ 8 | dist/* 9 | target/ 10 | lib_managed/ 11 | src_managed/ 12 | project/boot/ 13 | project/plugins/project/ 14 | 15 | # Scala-IDE specific 16 | .scala_dependencies 17 | .worksheet 18 | 19 | # Mac specific 20 | .DS_Store 21 | 22 | # IntelliJ IDEA 23 | *.iml 24 | .idea 25 | 26 | #PredictionIO specific 27 | manifest.json 28 | target/ 29 | pio.log 30 | /pio.sbt 31 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | ============================================================== 2 | ActionML LLC 3 | Copyright 2015 and onwards ActionML LLC 4 | ============================================================== 5 | 6 | This product includes software developed by 7 | ActionML (http://actionml.com/). 8 | 9 | It includes software from other Apache Software Foundation projects, 10 | including, but not limited to: 11 | - Elasticsearch (Apache 2 license) 12 | - Apache Hadoop 13 | - Apache Spark 14 | - Apache Spark 15 | - Apache Mahout 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Universal Recommender 2 | 3 | The Universal Recommender (UR) is a new type of collaborative filtering recommender based on an algorithm that can use data from a wide variety of user taste indicators—it is called the Correlated Cross-Occurrence algorithm. Unlike matrix factorization embodied in things like MLlib's ALS, CCO is able to ingest any number of user actions, events, profile data, and contextual information. It then serves results in a fast and scalable way. It also supports item properties for filtering and boosting recommendations and can therefor be considered a hybrid collaborative filtering and content-based recommender. 4 | 5 | The use of multiple **types** of data fundamentally changes the way a recommender is used and, when employed correctly, will provide a significant increase in quality of recommendations vs. using only one user event. Most recommenders, for instance, can only use "purchase" events. Using all we know about a user and their context allows us to much better predict their preferences. 6 | 7 | #Documentation 8 | 9 | - [The Universal Recommender](http://actionml.com/docs/ur) 10 | - [The Correlated Cross-Occurrence Algorithm](http://mahout.apache.org/users/algorithms/intro-cooccurrence-spark.html) 11 | - [The Universal Recommender Slide Deck](http://www.slideshare.net/pferrel/unified-recommender-39986309) 12 | 13 | 14 | All docs for the Universal Recommender are [here](http://actionml.com/docs/ur) and are now hosted in a separate repo at [https://github.com/actionml/docs.actionml.com](https://github.com/actionml/docs.actionml.com). If you wish to change or edit those docs make a PR to that repo. 15 | 16 | 17 | # Version Changelog 18 | 19 | ## v0.4.2 **Replaces 0.4.1** 20 | 21 | - Fixes a `pio build` failure triggered by the release of Apache PIO. If you have problems building v0.4.0 use this version. It is meant to be used with PredictionIO-0.9.7-aml. 22 | - **Requires a custom build of Apache Mahout**: instructions on the [doc site](http://actionml.com/docs/ur_quickstart) This is temporary until the next Mahout release, when we will update to 0.4.3 (uses predicitonio-0.9.7-aml) and 0.5.0 (which uses predictionio-0.10.0 from Apache) 23 | 24 | ## v0.4.0 25 | 26 | - This version requires PredictionIO-0.9.7-aml found [here](http://actionml/docs/install). 27 | - New tuning params are now available for each "indicator" type, making indicators with a small number of possible values much more useful—things like gender or category-preference. See docs for [configuring the UR](http://actionml.com/docs/ur_config) and look for the `indicators` parameter. 28 | - New forms of recommendations backfill allow all items to be recommended even if they have no user events yet. Backfill types include random and user defined. See docs for [configuring the UR](http://actionml.com/docs/ur_config) and look for the `rankings` parameter. 29 | 30 | ## v0.3.0 31 | 32 | - This version require PredictionIO-0.9.7-aml from the ActionML repo [here](http://actionml/docs/install). 33 | - Now supports the `SelfCleanedDataSource` trait. Adding params to the `DataSource` part of `engine.json` allows control of de-duplication, property event compaction, and a time window of event. The time window is used to age out the oldest events. Note: this only works with the ActionML fork of PredictionIO found in the repo mentioned above. 34 | - changed `backfillField: duration` to accept Scala Duration strings. This will require changes to all engine.json files that were using the older # of seconds duration. 35 | - added support for indicator predictiveness testing with the MAP@k tool 36 | - fixed a bug which requires that in the engine.json the `typeName` is required to be `"items"`, with this release the type can be more descriptive. 37 | 38 | ## v0.2.3 39 | 40 | - removed isEmpty calls that were taking an extremely long time to execute, results in considerable speedup. Now the vast majority of `pio train` time is taken up by writing to Elasticsearch. This can be optimized by creating and ES cluster or giving ES lots of memory. 41 | 42 | ## v0.2.2 43 | 44 | - a query with no item or user will get recommendations based on popularity 45 | - a new integration test has been added 46 | - a regression bug where some ids were being tokenized by Elasticsearch, leading to incorrect results, was fixed. **NOTE: for users with complex ids containing dashes or spaces this is an important fix.** 47 | - a dateRange in the query now takes precidence to the item attached expiration and avaiable dates. 48 | 49 | ## v0.2.1 50 | 51 | - date ranges attached to items will be compared to the prediction servers current data if no date is provided in the query. 52 | 53 | ## v0.2.0 54 | 55 | - date range filters implemented 56 | - hot/trending/popular used for backfill and when no other recommendations are returned by the query 57 | - filters/bias < 0 caused scores to be altered in v0.1.1 fixed in this version so filters have no effect on scoring. 58 | - the model is now hot-swapped in Elasticsearch so no downtime should be seen, in fact there is no need to run `pio deploy` to make the new model active. 59 | - it is now possible to have an engine.json (call it something else) dedicated to recalculating the popularity model. This allows fast updates to poularity without recalculating the collaborative filtering model. 60 | - Elasticsearch can now be in cluster mode 61 | 62 | ## v0.1.1 63 | 64 | - ids are now exact matches, for v0.1.0 the ids had to be lower case and were subject to tokenizing analysis so using that version is not recommended. 65 | 66 | ## v0.1.0 67 | 68 | - user and item based queries supported 69 | - multiple usage events supported 70 | - filters and boosts supported on item properties and on user or item based results. 71 | - fast writing to Elasticsearch using Spark 72 | - convention over configuration for queries, defaults make simple/typical queries simple and overrides add greater expressiveness. 73 | 74 | # Known issues 75 | 76 | - see the github [issues list](https://github.com/PredictionIO/template-scala-parallel-universal-recommendation/issues) 77 | 78 | 79 | #License 80 | This Software is licensed under the Apache Software Foundation version 2 licence found here: http://www.apache.org/licenses/LICENSE-2.0 81 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import scalariform.formatter.preferences._ 2 | import com.typesafe.sbt.SbtScalariform 3 | import com.typesafe.sbt.SbtScalariform.ScalariformKeys 4 | 5 | name := "template-scala-parallel-universal-recommendation" 6 | 7 | version := "0.4.2" 8 | 9 | organization := "io.prediction" 10 | 11 | val mahoutVersion = "0.13.0-SNAPSHOT" 12 | 13 | val pioVersion = "0.9.7-aml" 14 | 15 | libraryDependencies ++= Seq( 16 | "io.prediction" %% "core" % pioVersion % "provided", 17 | "org.apache.spark" %% "spark-core" % "1.4.0" % "provided", 18 | "org.apache.spark" %% "spark-mllib" % "1.4.0" % "provided", 19 | "org.xerial.snappy" % "snappy-java" % "1.1.1.7", 20 | // Mahout's Spark libs 21 | "org.apache.mahout" %% "mahout-math-scala" % mahoutVersion, 22 | "org.apache.mahout" %% "mahout-spark" % mahoutVersion 23 | exclude("org.apache.spark", "spark-core_2.10"), 24 | "org.apache.mahout" % "mahout-math" % mahoutVersion, 25 | "org.apache.mahout" % "mahout-hdfs" % mahoutVersion 26 | exclude("com.thoughtworks.xstream", "xstream") 27 | exclude("org.apache.hadoop", "hadoop-client"), 28 | // other external libs 29 | "com.thoughtworks.xstream" % "xstream" % "1.4.4" 30 | exclude("xmlpull", "xmlpull"), 31 | "org.elasticsearch" % "elasticsearch-spark_2.10" % "2.1.2" 32 | exclude("org.apache.spark", "spark-catalyst_2.10") 33 | exclude("org.apache.spark", "spark-sql_2.10"), 34 | "org.json4s" %% "json4s-native" % "3.2.10") 35 | .map(_.exclude("org.apache.lucene","lucene-core")).map(_.exclude("org.apache.lucene","lucene-analyzers-common")) 36 | 37 | resolvers += Resolver.mavenLocal 38 | 39 | SbtScalariform.scalariformSettings 40 | 41 | ScalariformKeys.preferences := ScalariformKeys.preferences.value 42 | .setPreference(AlignSingleLineCaseStatements, true) 43 | .setPreference(DoubleIndentClassDeclaration, true) 44 | .setPreference(DanglingCloseParenthesis, Prevent) 45 | .setPreference(MultilineScaladocCommentsStartOnFirstLine, true) 46 | 47 | assemblyMergeStrategy in assembly := { 48 | case "plugin.properties" => MergeStrategy.discard 49 | case PathList(ps @ _*) if ps.last endsWith "package-info.class" => 50 | MergeStrategy.first 51 | case x => 52 | val oldStrategy = (assemblyMergeStrategy in assembly).value 53 | oldStrategy(x) 54 | } 55 | -------------------------------------------------------------------------------- /data/handmade-event-description.txt: -------------------------------------------------------------------------------- 1 | Event: purchase entity_id: u1 target_entity_id: iphone 2 | Event: purchase entity_id: u1 target_entity_id: ipad 3 | Event: purchase entity_id: u2 target_entity_id: nexus 4 | Event: purchase entity_id: u2 target_entity_id: galaxy 5 | Event: purchase entity_id: u3 target_entity_id: surface 6 | Event: purchase entity_id: u4 target_entity_id: iphone 7 | Event: purchase entity_id: u4 target_entity_id: galaxy 8 | Event: view entity_id: u1 target_entity_id: phones 9 | Event: view entity_id: u1 target_entity_id: mobile_acc 10 | Event: view entity_id: u2 target_entity_id: phones 11 | Event: view entity_id: u2 target_entity_id: tablets 12 | Event: view entity_id: u2 target_entity_id: mobile_acc 13 | Event: view entity_id: u3 target_entity_id: mobile_acc 14 | Event: view entity_id: u4 target_entity_id: phones 15 | Event: view entity_id: u4 target_entity_id: tablets 16 | Event: view entity_id: u4 target_entity_id: soap 17 | Event: view entity_id: u5 target_entity_id: soap 18 | Event: $set entity_id: iphone properties/catagory: phones properties/date: 2015-08-30T12:24:41 properties/expiredate: 2015-09-01T12:24:41 19 | Event: $set entity_id: ipad properties/catagory: tablets properties/availabledate: 2015-08-29T12:24:41 properties/date: 2015-08-31T12:24:41 properties/expiredate: 2015-09-02T12:24:41 20 | Event: $set entity_id: nexus properties/catagory: tablets properties/availabledate: 2015-08-30T12:24:41 properties/date: 2015-09-01T12:24:41 properties/expiredate: 2015-09-03T12:24:41 21 | Event: $set entity_id: galaxy properties/catagory: phones properties/date: 2015-09-02T12:24:41 properties/expiredate: 2015-09-04T12:24:41 22 | Event: $set entity_id: surface properties/catagory: tablets properties/availabledate: 2015-09-01T12:24:41 properties/date: 2015-09-03T12:24:41 -------------------------------------------------------------------------------- /data/integration-test-expected.txt: -------------------------------------------------------------------------------- 1 | 2 | Queries to illustrate many use cases on a small standard dataset and for an automated integration test. 3 | 4 | WARNING: for this to produce the correct result you must: 5 | 1. Import data with 6 | $ python examples/import_handmade.py --access_key 7 | 2. Copy handmade-engine.json to engine.json. 8 | 3. Run 'pio build', 'pio train', and 'pio deploy' 9 | 4. The queries must be run the same day as the import was done because date filters are part of the test. 10 | NOTE: due to available and expire dates you should never see the Iphone 5 or Iphone 6. 11 | 12 | ============ simple user recs ============ 13 | 14 | Recommendations for user: u1 15 | 16 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]} 17 | 18 | Recommendations for user: U 2 19 | 20 | {"itemScores":[{"item":"Iphone 4","score":0.2215922623872757},{"item":"Ipad-retina","score":0.05624813959002495},{"item":"Surface","score":0.029876574873924255}]} 21 | 22 | Recommendations for user: u-3 23 | 24 | {"itemScores":[{"item":"Iphone 4","score":0.18315871059894562},{"item":"Galaxy","score":0.18315871059894562},{"item":"Nexus","score":0.18315871059894562},{"item":"Ipad-retina","score":0.07201286405324936}]} 25 | 26 | Recommendations for user: u-4 27 | 28 | {"itemScores":[{"item":"Nexus","score":0.5046969652175903},{"item":"Ipad-retina","score":0.08661800622940063},{"item":"Surface","score":0.0}]} 29 | 30 | Recommendations for user: u5 31 | 32 | {"itemScores":[{"item":"Iphone 4","score":0.800000011920929},{"item":"Galaxy","score":0.800000011920929},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]} 33 | 34 | ============ simple similar item recs ============ 35 | 36 | Recommendations for item: Iphone 4 37 | 38 | {"itemScores":[{"item":"Galaxy","score":0.4085645079612732},{"item":"Nexus","score":0.1371881514787674},{"item":"Ipad-retina","score":0.11633750051259995},{"item":"Surface","score":0.022334998473525047}]} 39 | 40 | Recommendations for item: Ipad-retina 41 | 42 | {"itemScores":[{"item":"Iphone 4","score":0.8513996005058289},{"item":"Galaxy","score":0.1894429624080658},{"item":"Nexus","score":0.1894429624080658},{"item":"Surface","score":0.04049335792660713}]} 43 | 44 | Recommendations for item: Nexus 45 | 46 | {"itemScores":[{"item":"Galaxy","score":0.576367199420929},{"item":"Iphone 4","score":0.19353307783603668},{"item":"Ipad-retina","score":0.04849598929286003},{"item":"Surface","score":0.02529095485806465}]} 47 | 48 | Recommendations for item: Galaxy 49 | 50 | {"itemScores":[{"item":"Iphone 4","score":0.3865432143211365},{"item":"Nexus","score":0.3865432143211365},{"item":"Ipad-retina","score":0.03013293445110321},{"item":"Surface","score":0.01600530743598938}]} 51 | 52 | Recommendations for item: Surface 53 | 54 | {"itemScores":[{"item":"Iphone 4","score":0.4070388376712799},{"item":"Galaxy","score":0.4070388376712799},{"item":"Nexus","score":0.4070388376712799},{"item":"Ipad-retina","score":0.17534448206424713}]} 55 | 56 | ============ popular item recs only ============ 57 | 58 | query with no item or user id, ordered by popularity 59 | 60 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]} 61 | 62 | Recommendations for non-existant user: xyz, all from popularity 63 | 64 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]} 65 | 66 | Recommendations for non-existant item: xyz, all from popularity 67 | 68 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]} 69 | 70 | Recommendations for no user no item, all from popularity, Tablets filter 71 | 72 | {"itemScores":[{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0},{"item":"Surface","score":0.0}]} 73 | 74 | Recommendations for no user no item, all from popularity, Tablets boost 75 | 76 | {"itemScores":[{"item":"Nexus","score":0.9369767904281616},{"item":"Surface","score":0.6666666865348816},{"item":"Ipad-retina","score":0.204568549990654},{"item":"Iphone 4","score":0.0}]} 77 | 78 | Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter 79 | 80 | {"itemScores":[{"item":"Ipad-retina","score":0.21918058395385742},{"item":"Iphone 4","score":0.0}]} 81 | 82 | ============ dateRange filter ============ 83 | 84 | Recommendations for user: u1 85 | 86 | {"itemScores":[{"item":"Nexus","score":0.2259630560874939},{"item":"Surface","score":0.049329958856105804}]} 87 | 88 | ============ query with item and user *EXPERIMENTAL* ============ 89 | 90 | Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter 91 | 92 | {"itemScores":[{"item":"Galaxy","score":0.49276190996170044},{"item":"Nexus","score":0.2750934064388275},{"item":"Surface","score":0.05210217833518982}]} 93 | -------------------------------------------------------------------------------- /data/rank-test-query-expected.txt: -------------------------------------------------------------------------------- 1 | 2 | Queries to illustrate many use cases on a small standard dataset and for an automated integration test. 3 | 4 | WARNING: for this to produce the correct result you must: 5 | 1. Import data with 6 | $ python examples/import_rank.py --access_key 7 | 2. Copy rank-engine.json to engine.json. 8 | 3. Run 'pio build', 'pio train', and 'pio deploy' 9 | 4. The queries must be run the same day as the import was done because date filters are part of the test. 10 | 11 | ============ simple user recs ============ 12 | 13 | Recommendations for user: user-1 14 | 15 | {"itemScores":[{"item":"product-3","score":0.3595937192440033},{"item":"product-2","score":0.10758151859045029},{"item":"product-5","score":0.06365098059177399},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 16 | 17 | Recommendations for user: user-2 18 | 19 | {"itemScores":[{"item":"product-4","score":0.6799420118331909},{"item":"product-1","score":0.2569144368171692},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 20 | 21 | Recommendations for user: user-3 22 | 23 | {"itemScores":[{"item":"product-2","score":0.3595937192440033},{"item":"product-1","score":0.3595937192440033},{"item":"product-5","score":0.017842993140220642},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 24 | 25 | Recommendations for user: user-4 26 | 27 | {"itemScores":[{"item":"product-1","score":0.2559533715248108},{"item":"product-3","score":0.0944056436419487},{"item":"product-2","score":0.0},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 28 | 29 | Recommendations for user: user-5 30 | 31 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 32 | 33 | ============ simple similar item recs ============ 34 | 35 | Recommendations for item: product-1 36 | 37 | {"itemScores":[{"item":"product-3","score":0.40796521306037903},{"item":"product-4","score":0.3626357316970825},{"item":"product-5","score":0.07773856818675995},{"item":"product-2","score":0.0770743265748024},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 38 | 39 | Recommendations for item: product-2 40 | 41 | {"itemScores":[{"item":"product-4","score":0.8485281467437744},{"item":"product-3","score":0.20341692864894867},{"item":"product-1","score":0.20341692864894867},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 42 | 43 | Recommendations for item: product-3 44 | 45 | {"itemScores":[{"item":"product-4","score":0.28767499327659607},{"item":"product-1","score":0.21575623750686646},{"item":"product-2","score":0.06454890966415405},{"item":"product-5","score":0.010705795139074326},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 46 | 47 | Recommendations for item: product-4 48 | 49 | {"itemScores":[{"item":"product-2","score":0.07302875816822052},{"item":"product-3","score":0.07302875071763992},{"item":"product-1","score":0.07302875071763992},{"item":"product-5","score":0.029496734961867332},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 50 | 51 | Recommendations for item: product-5 52 | 53 | {"itemScores":[{"item":"product-4","score":0.4954302906990051},{"item":"product-1","score":0.28767499327659607},{"item":"product-3","score":0.1290978193283081},{"item":"product-2","score":0.0},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 54 | 55 | ============ popular item recs only ============ 56 | 57 | query with no item or user id, ordered by popularity 58 | 59 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 60 | 61 | Recommendations for non-existant user: xyz, all from popularity 62 | 63 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 64 | 65 | Recommendations for non-existant item: xyz, all from popularity 66 | 67 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 68 | 69 | Recommendations for no user no item, all from popularity, red color filter 70 | 71 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-1","score":0.0}]} 72 | 73 | Recommendations for no user no item, all from popularity, green boost 74 | 75 | {"itemScores":[{"item":"product-4","score":0.5},{"item":"product-2","score":0.5},{"item":"product-3","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 76 | 77 | Recommendations for no user no item, all from popularity, red color boost, S size filter 78 | 79 | {"itemScores":[{"item":"product-1","score":0.6000000238418579},{"item":"product-4","score":0.0},{"item":"product-5","score":0.0},{"item":"product-9","score":0.0}]} 80 | 81 | ============ dateRange filter ============ 82 | 83 | Recommendations for user: user-1 84 | 85 | {"itemScores":[{"item":"product-6","score":0.0},{"item":"product-7","score":0.0}]} 86 | 87 | ============ query with item and user *EXPERIMENTAL* ============ 88 | 89 | Recommendations for user-1 & product-1 90 | 91 | {"itemScores":[{"item":"product-3","score":0.7042884230613708},{"item":"product-2","score":0.14845967292785645},{"item":"product-5","score":0.12810268998146057},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]} 92 | -------------------------------------------------------------------------------- /data/sample-handmade-data.txt: -------------------------------------------------------------------------------- 1 | u1,purchase,Iphone 6 2 | u1,purchase,Iphone 5 3 | u1,purchase,Iphone 4 4 | u1,purchase,Ipad-retina 5 | U 2,purchase,Nexus 6 | U 2,purchase,Galaxy 7 | u-3,purchase,Surface 8 | u-4,purchase,Iphone 5 9 | u-4,purchase,Iphone 5 10 | u-4,purchase,Iphone 5 11 | u-4,purchase,Iphone 5 12 | u-4,purchase,Iphone 4 13 | u-4,purchase,Galaxy 14 | u1,view,Phones 15 | u1,view,Phones 16 | u1,view,Phones 17 | u1,view,Phones 18 | u1,view,Phones 19 | u1,view,Phones 20 | u1,view,Mobile-acc 21 | U 2,view,Phones 22 | U 2,view,Tablets 23 | U 2,view,Mobile-acc 24 | u-3,view,Mobile-acc 25 | u-4,view,Phones 26 | u-4,view,Tablets 27 | u-4,view,Soap 28 | u5,view,Soap 29 | Iphone 6,$set,categories:Phones:Electronics:Apple 30 | Iphone 5,$set,categories:Phones:Electronics:Apple 31 | Iphone 4,$set,categories:Phones:Electronics:Apple 32 | Ipad-retina,$set,categories:Tablets:Electronics:Apple 33 | Nexus,$set,categories:Tablets:Electronics:Google 34 | Galaxy,$set,categories:Phones:Electronics:Samsung 35 | Surface,$set,categories:Tablets:Electronics:Microsoft 36 | Iphone 4,$set,countries:United States:Canada:Estados Unidos Mexicanos 37 | Ipad-retina,$set,countries:United States:Estados Unidos Mexicanos 38 | Nexus,$set,countries:United States:Canada 39 | Galaxy,$set,countries:United States 40 | Surface,$set,countries:United States:Canada 41 | Iphone 6,$set,categories:Cameras 42 | Iphone 5,$set,categories:Cameras 43 | Iphone 4,$set,categories:Cameras 44 | Ipad-retina,$set,categories:Computers 45 | Nexus,$set,categories:Computers 46 | Galaxy,$set,categories:Cameras 47 | Surface,$set,categories:Computers 48 | Iphone 4,$set,countries:Cuba 49 | Ipad-retina,$set,countries:Cuba 50 | Nexus,$set,countries:Cuba 51 | Galaxy,$set,countries:Cuba 52 | Surface,$set,countries:Cuba 53 | u1,purchase,Iphone 6 54 | u1,purchase,Iphone 5 55 | u1,purchase,Iphone 4 56 | u1,purchase,Ipad-retina 57 | U 2,purchase,Nexus 58 | U 2,purchase,Galaxy 59 | u-3,purchase,Surface 60 | u-4,purchase,Iphone 5 61 | u-4,purchase,Iphone 4 62 | u-4,purchase,Galaxy 63 | u1,view,Phones 64 | u1,view,Mobile-acc 65 | U 2,view,Phones 66 | U 2,view,Tablets 67 | U 2,view,Mobile-acc 68 | u-3,view,Mobile-acc 69 | u-4,view,Phones 70 | u-4,view,Tablets 71 | u-4,view,Soap 72 | u5,view,Soap 73 | u1,purchase,Galaxy 74 | u1,purchase,Galaxy 75 | u1,purchase,Galaxy 76 | u1,purchase,Galaxy 77 | U 2,purchase,Iphone 5 78 | U 2,purchase,Iphone 5 79 | u1,view,Soap 80 | u1,view,Soap 81 | u-3,view,Mobile-acc 82 | u-4,view,Mobile-acc 83 | u-4,view,Mobile-acc 84 | Iphone 6,$set,categories:Phones:Electronics:Apple 85 | Iphone 5,$set,categories:Phones:Electronics:Apple 86 | Iphone 4,$set,categories:Phones:Electronics:Apple 87 | Ipad-retina,$set,categories:Tablets:Electronics:Apple 88 | Nexus,$set,categories:Tablets:Electronics:Google 89 | Galaxy,$set,categories:Phones:Electronics:Samsung 90 | Surface,$set,categories:Tablets:Electronics:Microsoft 91 | Iphone 4,$set,countries:United States:Canada:Estados Unidos Mexicanos 92 | Ipad-retina,$set,countries:United States:Estados Unidos Mexicanos 93 | Nexus,$set,countries:United States:Canada 94 | Galaxy,$set,countries:United States 95 | Surface,$set,countries:United States:Canada 96 | Iphone 6,$set,defaultRank:7 97 | Iphone 5,$set,defaultRank:6 98 | Iphone 4,$set,defaultRank:5 99 | Nexus,$set,defaultRank:4 100 | Ipad-retina,$set,defaultRank:3 101 | Galaxy,$set,defaultRank:2 102 | Surface,$set,defaultRank:1 103 | -------------------------------------------------------------------------------- /data/sample-handmade-data1.txt: -------------------------------------------------------------------------------- 1 | u-1,purchase,Iphone 6 2 | u-2,purchase,Iphone 6 3 | u-3,purchase,Iphone 6 4 | u-4,purchase,Iphone 6 5 | u-5,purchase,Iphone 6 6 | u-6,purchase,Iphone 6 7 | u-1,purchase,Iphone 5 8 | u-2,purchase,Iphone 5 9 | u-1,purchase,Iphone 4 10 | u-1,purchase,Ipad-retina 11 | u-2,purchase,Ipad-retina 12 | u-1,purchase,Nexus 13 | u-2,purchase,Nexus 14 | u-3,purchase,Nexus 15 | u-4,purchase,Nexus 16 | u-5,purchase,Nexus 17 | u-6,purchase,Nexus 18 | u-7,purchase,Nexus 19 | -------------------------------------------------------------------------------- /data/sample-handmade-data2.txt: -------------------------------------------------------------------------------- 1 | U 1,purchase,Iphone 6 2 | U 2,purchase,Iphone 6 3 | U 1,purchase,Iphone 5 4 | U 2,purchase,Iphone 5 5 | U 3,purchase,Iphone 5 6 | U 4,purchase,Iphone 5 7 | U 1,purchase,Iphone 4 8 | U 1,purchase,Ipad-retina 9 | U 1,purchase,Nexus 10 | U 2,purchase,Nexus 11 | U 3,purchase,Nexus 12 | U 4,purchase,Nexus 13 | U 5,purchase,Nexus 14 | U 6,purchase,Nexus 15 | U 7,purchase,Nexus 16 | -------------------------------------------------------------------------------- /data/sample-handmade-data3.txt: -------------------------------------------------------------------------------- 1 | u1,purchase,Iphone 6 2 | u1,purchase,Iphone 5 3 | u2,purchase,Iphone 5 4 | u3,purchase,Iphone 5 5 | u4,purchase,Iphone 5 6 | u5,purchase,Iphone 5 7 | u6,purchase,Iphone 5 8 | u1,purchase,Iphone 4 9 | u1,purchase,Ipad-retina 10 | u2,purchase,Ipad-retina 11 | u3,purchase,Ipad-retina 12 | u4,purchase,Ipad-retina 13 | u5,purchase,Ipad-retina 14 | u6,purchase,Ipad-retina 15 | u1,purchase,Nexus 16 | u2,purchase,Nexus 17 | u3,purchase,Nexus 18 | u4,purchase,Nexus 19 | u5,purchase,Nexus 20 | u6,purchase,Nexus 21 | u7,purchase,Nexus 22 | -------------------------------------------------------------------------------- /data/sample-rank-data.txt: -------------------------------------------------------------------------------- 1 | user-1,show,product-1 2 | user-2,show,product-2 3 | user-3,show,product-3 4 | user-4,show,product-4 5 | user-1,like,product-4 6 | user-2,like,product-3 7 | user-3,like,product-2 8 | user-4,like,product-1 9 | product-1,$set,color:red:black 10 | product-2,$set,color:green:black 11 | product-3,$set,color:red:black 12 | product-4,$set,color:green:black 13 | user-1,show,product-4 14 | user-2,show,product-3 15 | user-3,like,product-3 16 | user-4,like,product-4 17 | product-1,$set,defaultRank:1.0 18 | product-2,$set,defaultRank:2.7 19 | product-3,$set,defaultRank:3.2 20 | product-4,$set,defaultRank:4.7 21 | product-5,$set,defaultRank:5.0 22 | product-6,$set,defaultRank:6.9 23 | product-7,$set,defaultRank:7.15 24 | product-8,$set,defaultRank:8.07 25 | user-1,like,product-3 26 | user-4,like,product-6 27 | user-2,show,product-3 28 | user-3,show,product-4 29 | user-4,show,product-5 30 | product-1,$set,size:S:M 31 | product-2,$set,size:SX:XL 32 | product-3,$set,size:XL:X 33 | product-4,$set,size:X:XL:S 34 | product-5,$set,size:M:S:XS 35 | product-9,$set,size:M:S:XS -------------------------------------------------------------------------------- /data/sample-rank-empty-action-data.txt: -------------------------------------------------------------------------------- 1 | product-1,$set,defaultRank:1.0 2 | product-2,$set,defaultRank:2.7 3 | product-3,$set,defaultRank:3.2 4 | product-4,$set,defaultRank:4.7 5 | product-5,$set,defaultRank:5.0 6 | product-6,$set,defaultRank:6.9 7 | product-7,$set,defaultRank:7.15 8 | product-8,$set,defaultRank:8.07 -------------------------------------------------------------------------------- /data/sample-rank-empty-set-data.txt: -------------------------------------------------------------------------------- 1 | user-1,show,product-1 2 | user-2,show,product-2 3 | user-3,show,product-3 4 | user-4,show,product-4 5 | user-1,like,product-4 6 | user-2,like,product-4 7 | user-3,like,product-2 8 | user-4,like,product-1 9 | user-1,like,product-1 10 | user-2,like,product-1 11 | user-3,like,product-1 12 | user-3,unknown,product-5 13 | product-6,$set,defaultRank:6.9 -------------------------------------------------------------------------------- /data/sample_movielens_data.txt: -------------------------------------------------------------------------------- 1 | 0::2::3 2 | 0::3::1 3 | 0::5::2 4 | 0::9::4 5 | 0::11::1 6 | 0::12::2 7 | 0::15::1 8 | 0::17::1 9 | 0::19::1 10 | 0::21::1 11 | 0::23::1 12 | 0::26::3 13 | 0::27::1 14 | 0::28::1 15 | 0::29::1 16 | 0::30::1 17 | 0::31::1 18 | 0::34::1 19 | 0::37::1 20 | 0::41::2 21 | 0::44::1 22 | 0::45::2 23 | 0::46::1 24 | 0::47::1 25 | 0::48::1 26 | 0::50::1 27 | 0::51::1 28 | 0::54::1 29 | 0::55::1 30 | 0::59::2 31 | 0::61::2 32 | 0::64::1 33 | 0::67::1 34 | 0::68::1 35 | 0::69::1 36 | 0::71::1 37 | 0::72::1 38 | 0::77::2 39 | 0::79::1 40 | 0::83::1 41 | 0::87::1 42 | 0::89::2 43 | 0::91::3 44 | 0::92::4 45 | 0::94::1 46 | 0::95::2 47 | 0::96::1 48 | 0::98::1 49 | 0::99::1 50 | 1::2::2 51 | 1::3::1 52 | 1::4::2 53 | 1::6::1 54 | 1::9::3 55 | 1::12::1 56 | 1::13::1 57 | 1::14::1 58 | 1::16::1 59 | 1::19::1 60 | 1::21::3 61 | 1::27::1 62 | 1::28::3 63 | 1::33::1 64 | 1::36::2 65 | 1::37::1 66 | 1::40::1 67 | 1::41::2 68 | 1::43::1 69 | 1::44::1 70 | 1::47::1 71 | 1::50::1 72 | 1::54::1 73 | 1::56::2 74 | 1::57::1 75 | 1::58::1 76 | 1::60::1 77 | 1::62::4 78 | 1::63::1 79 | 1::67::1 80 | 1::68::4 81 | 1::70::2 82 | 1::72::1 83 | 1::73::1 84 | 1::74::2 85 | 1::76::1 86 | 1::77::3 87 | 1::78::1 88 | 1::81::1 89 | 1::82::1 90 | 1::85::3 91 | 1::86::2 92 | 1::88::2 93 | 1::91::1 94 | 1::92::2 95 | 1::93::1 96 | 1::94::2 97 | 1::96::1 98 | 1::97::1 99 | 2::4::3 100 | 2::6::1 101 | 2::8::5 102 | 2::9::1 103 | 2::10::1 104 | 2::12::3 105 | 2::13::1 106 | 2::15::2 107 | 2::18::2 108 | 2::19::4 109 | 2::22::1 110 | 2::26::1 111 | 2::28::1 112 | 2::34::4 113 | 2::35::1 114 | 2::37::5 115 | 2::38::1 116 | 2::39::5 117 | 2::40::4 118 | 2::47::1 119 | 2::50::1 120 | 2::52::2 121 | 2::54::1 122 | 2::55::1 123 | 2::57::2 124 | 2::58::2 125 | 2::59::1 126 | 2::61::1 127 | 2::62::1 128 | 2::64::1 129 | 2::65::1 130 | 2::66::3 131 | 2::68::1 132 | 2::71::3 133 | 2::76::1 134 | 2::77::1 135 | 2::78::1 136 | 2::80::1 137 | 2::83::5 138 | 2::85::1 139 | 2::87::2 140 | 2::88::1 141 | 2::89::4 142 | 2::90::1 143 | 2::92::4 144 | 2::93::5 145 | 3::0::1 146 | 3::1::1 147 | 3::2::1 148 | 3::7::3 149 | 3::8::3 150 | 3::9::1 151 | 3::14::1 152 | 3::15::1 153 | 3::16::1 154 | 3::18::4 155 | 3::19::1 156 | 3::24::3 157 | 3::26::1 158 | 3::29::3 159 | 3::33::1 160 | 3::34::3 161 | 3::35::1 162 | 3::36::3 163 | 3::37::1 164 | 3::38::2 165 | 3::43::1 166 | 3::44::1 167 | 3::46::1 168 | 3::47::1 169 | 3::51::5 170 | 3::52::3 171 | 3::56::1 172 | 3::58::1 173 | 3::60::3 174 | 3::62::1 175 | 3::65::2 176 | 3::66::1 177 | 3::67::1 178 | 3::68::2 179 | 3::70::1 180 | 3::72::2 181 | 3::76::3 182 | 3::79::3 183 | 3::80::4 184 | 3::81::1 185 | 3::83::1 186 | 3::84::1 187 | 3::86::1 188 | 3::87::2 189 | 3::88::4 190 | 3::89::1 191 | 3::91::1 192 | 3::94::3 193 | 4::1::1 194 | 4::6::1 195 | 4::8::1 196 | 4::9::1 197 | 4::10::1 198 | 4::11::1 199 | 4::12::1 200 | 4::13::1 201 | 4::14::2 202 | 4::15::1 203 | 4::17::1 204 | 4::20::1 205 | 4::22::1 206 | 4::23::1 207 | 4::24::1 208 | 4::29::4 209 | 4::30::1 210 | 4::31::1 211 | 4::34::1 212 | 4::35::1 213 | 4::36::1 214 | 4::39::2 215 | 4::40::3 216 | 4::41::4 217 | 4::43::2 218 | 4::44::1 219 | 4::45::1 220 | 4::46::1 221 | 4::47::1 222 | 4::49::2 223 | 4::50::1 224 | 4::51::1 225 | 4::52::4 226 | 4::54::1 227 | 4::55::1 228 | 4::60::3 229 | 4::61::1 230 | 4::62::4 231 | 4::63::3 232 | 4::65::1 233 | 4::67::2 234 | 4::69::1 235 | 4::70::4 236 | 4::71::1 237 | 4::73::1 238 | 4::78::1 239 | 4::84::1 240 | 4::85::1 241 | 4::87::3 242 | 4::88::3 243 | 4::89::2 244 | 4::96::1 245 | 4::97::1 246 | 4::98::1 247 | 4::99::1 248 | 5::0::1 249 | 5::1::1 250 | 5::4::1 251 | 5::5::1 252 | 5::8::1 253 | 5::9::3 254 | 5::10::2 255 | 5::13::3 256 | 5::15::1 257 | 5::19::1 258 | 5::20::3 259 | 5::21::2 260 | 5::23::3 261 | 5::27::1 262 | 5::28::1 263 | 5::29::1 264 | 5::31::1 265 | 5::36::3 266 | 5::38::2 267 | 5::39::1 268 | 5::42::1 269 | 5::48::3 270 | 5::49::4 271 | 5::50::3 272 | 5::51::1 273 | 5::52::1 274 | 5::54::1 275 | 5::55::5 276 | 5::56::3 277 | 5::58::1 278 | 5::60::1 279 | 5::61::1 280 | 5::64::3 281 | 5::65::2 282 | 5::68::4 283 | 5::70::1 284 | 5::71::1 285 | 5::72::1 286 | 5::74::1 287 | 5::79::1 288 | 5::81::2 289 | 5::84::1 290 | 5::85::1 291 | 5::86::1 292 | 5::88::1 293 | 5::90::4 294 | 5::91::2 295 | 5::95::2 296 | 5::99::1 297 | 6::0::1 298 | 6::1::1 299 | 6::2::3 300 | 6::5::1 301 | 6::6::1 302 | 6::9::1 303 | 6::10::1 304 | 6::15::2 305 | 6::16::2 306 | 6::17::1 307 | 6::18::1 308 | 6::20::1 309 | 6::21::1 310 | 6::22::1 311 | 6::24::1 312 | 6::25::5 313 | 6::26::1 314 | 6::28::1 315 | 6::30::1 316 | 6::33::1 317 | 6::38::1 318 | 6::39::1 319 | 6::43::4 320 | 6::44::1 321 | 6::45::1 322 | 6::48::1 323 | 6::49::1 324 | 6::50::1 325 | 6::53::1 326 | 6::54::1 327 | 6::55::1 328 | 6::56::1 329 | 6::58::4 330 | 6::59::1 331 | 6::60::1 332 | 6::61::3 333 | 6::63::3 334 | 6::66::1 335 | 6::67::3 336 | 6::68::1 337 | 6::69::1 338 | 6::71::2 339 | 6::73::1 340 | 6::75::1 341 | 6::77::1 342 | 6::79::1 343 | 6::81::1 344 | 6::84::1 345 | 6::85::3 346 | 6::86::1 347 | 6::87::1 348 | 6::88::1 349 | 6::89::1 350 | 6::91::2 351 | 6::94::1 352 | 6::95::2 353 | 6::96::1 354 | 7::1::1 355 | 7::2::2 356 | 7::3::1 357 | 7::4::1 358 | 7::7::1 359 | 7::10::1 360 | 7::11::2 361 | 7::14::2 362 | 7::15::1 363 | 7::16::1 364 | 7::18::1 365 | 7::21::1 366 | 7::22::1 367 | 7::23::1 368 | 7::25::5 369 | 7::26::1 370 | 7::29::4 371 | 7::30::1 372 | 7::31::3 373 | 7::32::1 374 | 7::33::1 375 | 7::35::1 376 | 7::37::2 377 | 7::39::3 378 | 7::40::2 379 | 7::42::2 380 | 7::44::1 381 | 7::45::2 382 | 7::47::4 383 | 7::48::1 384 | 7::49::1 385 | 7::53::1 386 | 7::54::1 387 | 7::55::1 388 | 7::56::1 389 | 7::59::1 390 | 7::61::2 391 | 7::62::3 392 | 7::63::2 393 | 7::66::1 394 | 7::67::3 395 | 7::74::1 396 | 7::75::1 397 | 7::76::3 398 | 7::77::1 399 | 7::81::1 400 | 7::82::1 401 | 7::84::2 402 | 7::85::4 403 | 7::86::1 404 | 7::92::2 405 | 7::96::1 406 | 7::97::1 407 | 7::98::1 408 | 8::0::1 409 | 8::2::4 410 | 8::3::2 411 | 8::4::2 412 | 8::5::1 413 | 8::7::1 414 | 8::9::1 415 | 8::11::1 416 | 8::15::1 417 | 8::18::1 418 | 8::19::1 419 | 8::21::1 420 | 8::29::5 421 | 8::31::3 422 | 8::33::1 423 | 8::35::1 424 | 8::36::1 425 | 8::40::2 426 | 8::44::1 427 | 8::45::1 428 | 8::50::1 429 | 8::51::1 430 | 8::52::5 431 | 8::53::5 432 | 8::54::1 433 | 8::55::1 434 | 8::56::1 435 | 8::58::4 436 | 8::60::3 437 | 8::62::4 438 | 8::64::1 439 | 8::67::3 440 | 8::69::1 441 | 8::71::1 442 | 8::72::3 443 | 8::77::3 444 | 8::78::1 445 | 8::79::1 446 | 8::83::1 447 | 8::85::5 448 | 8::86::1 449 | 8::88::1 450 | 8::90::1 451 | 8::92::2 452 | 8::95::4 453 | 8::96::3 454 | 8::97::1 455 | 8::98::1 456 | 8::99::1 457 | 9::2::3 458 | 9::3::1 459 | 9::4::1 460 | 9::5::1 461 | 9::6::1 462 | 9::7::5 463 | 9::9::1 464 | 9::12::1 465 | 9::14::3 466 | 9::15::1 467 | 9::19::1 468 | 9::21::1 469 | 9::22::1 470 | 9::24::1 471 | 9::25::1 472 | 9::26::1 473 | 9::30::3 474 | 9::32::4 475 | 9::35::2 476 | 9::36::2 477 | 9::37::2 478 | 9::38::1 479 | 9::39::1 480 | 9::43::3 481 | 9::49::5 482 | 9::50::3 483 | 9::53::1 484 | 9::54::1 485 | 9::58::1 486 | 9::59::1 487 | 9::60::1 488 | 9::61::1 489 | 9::63::3 490 | 9::64::3 491 | 9::68::1 492 | 9::69::1 493 | 9::70::3 494 | 9::71::1 495 | 9::73::2 496 | 9::75::1 497 | 9::77::2 498 | 9::81::2 499 | 9::82::1 500 | 9::83::1 501 | 9::84::1 502 | 9::86::1 503 | 9::87::4 504 | 9::88::1 505 | 9::90::3 506 | 9::94::2 507 | 9::95::3 508 | 9::97::2 509 | 9::98::1 510 | 10::0::3 511 | 10::2::4 512 | 10::4::3 513 | 10::7::1 514 | 10::8::1 515 | 10::10::1 516 | 10::13::2 517 | 10::14::1 518 | 10::16::2 519 | 10::17::1 520 | 10::18::1 521 | 10::21::1 522 | 10::22::1 523 | 10::24::1 524 | 10::25::3 525 | 10::28::1 526 | 10::35::1 527 | 10::36::1 528 | 10::37::1 529 | 10::38::1 530 | 10::39::1 531 | 10::40::4 532 | 10::41::2 533 | 10::42::3 534 | 10::43::1 535 | 10::49::3 536 | 10::50::1 537 | 10::51::1 538 | 10::52::1 539 | 10::55::2 540 | 10::56::1 541 | 10::58::1 542 | 10::63::1 543 | 10::66::1 544 | 10::67::2 545 | 10::68::1 546 | 10::75::1 547 | 10::77::1 548 | 10::79::1 549 | 10::86::1 550 | 10::89::3 551 | 10::90::1 552 | 10::97::1 553 | 10::98::1 554 | 11::0::1 555 | 11::6::2 556 | 11::9::1 557 | 11::10::1 558 | 11::11::1 559 | 11::12::1 560 | 11::13::4 561 | 11::16::1 562 | 11::18::5 563 | 11::19::4 564 | 11::20::1 565 | 11::21::1 566 | 11::22::1 567 | 11::23::5 568 | 11::25::1 569 | 11::27::5 570 | 11::30::5 571 | 11::32::5 572 | 11::35::3 573 | 11::36::2 574 | 11::37::2 575 | 11::38::4 576 | 11::39::1 577 | 11::40::1 578 | 11::41::1 579 | 11::43::2 580 | 11::45::1 581 | 11::47::1 582 | 11::48::5 583 | 11::50::4 584 | 11::51::3 585 | 11::59::1 586 | 11::61::1 587 | 11::62::1 588 | 11::64::1 589 | 11::66::4 590 | 11::67::1 591 | 11::69::5 592 | 11::70::1 593 | 11::71::3 594 | 11::72::3 595 | 11::75::3 596 | 11::76::1 597 | 11::77::1 598 | 11::78::1 599 | 11::79::5 600 | 11::80::3 601 | 11::81::4 602 | 11::82::1 603 | 11::86::1 604 | 11::88::1 605 | 11::89::1 606 | 11::90::4 607 | 11::94::2 608 | 11::97::3 609 | 11::99::1 610 | 12::2::1 611 | 12::4::1 612 | 12::6::1 613 | 12::7::3 614 | 12::8::1 615 | 12::14::1 616 | 12::15::2 617 | 12::16::4 618 | 12::17::5 619 | 12::18::2 620 | 12::21::1 621 | 12::22::2 622 | 12::23::3 623 | 12::24::1 624 | 12::25::1 625 | 12::27::5 626 | 12::30::2 627 | 12::31::4 628 | 12::35::5 629 | 12::38::1 630 | 12::41::1 631 | 12::44::2 632 | 12::45::1 633 | 12::50::4 634 | 12::51::1 635 | 12::52::1 636 | 12::53::1 637 | 12::54::1 638 | 12::56::2 639 | 12::57::1 640 | 12::60::1 641 | 12::63::1 642 | 12::64::5 643 | 12::66::3 644 | 12::67::1 645 | 12::70::1 646 | 12::72::1 647 | 12::74::1 648 | 12::75::1 649 | 12::77::1 650 | 12::78::1 651 | 12::79::3 652 | 12::82::2 653 | 12::83::1 654 | 12::84::1 655 | 12::85::1 656 | 12::86::1 657 | 12::87::1 658 | 12::88::1 659 | 12::91::3 660 | 12::92::1 661 | 12::94::4 662 | 12::95::2 663 | 12::96::1 664 | 12::98::2 665 | 13::0::1 666 | 13::3::1 667 | 13::4::2 668 | 13::5::1 669 | 13::6::1 670 | 13::12::1 671 | 13::14::2 672 | 13::15::1 673 | 13::17::1 674 | 13::18::3 675 | 13::20::1 676 | 13::21::1 677 | 13::22::1 678 | 13::26::1 679 | 13::27::1 680 | 13::29::3 681 | 13::31::1 682 | 13::33::1 683 | 13::40::2 684 | 13::43::2 685 | 13::44::1 686 | 13::45::1 687 | 13::49::1 688 | 13::51::1 689 | 13::52::2 690 | 13::53::3 691 | 13::54::1 692 | 13::62::1 693 | 13::63::2 694 | 13::64::1 695 | 13::68::1 696 | 13::71::1 697 | 13::72::3 698 | 13::73::1 699 | 13::74::3 700 | 13::77::2 701 | 13::78::1 702 | 13::79::2 703 | 13::83::3 704 | 13::85::1 705 | 13::86::1 706 | 13::87::2 707 | 13::88::2 708 | 13::90::1 709 | 13::93::4 710 | 13::94::1 711 | 13::98::1 712 | 13::99::1 713 | 14::1::1 714 | 14::3::3 715 | 14::4::1 716 | 14::5::1 717 | 14::6::1 718 | 14::7::1 719 | 14::9::1 720 | 14::10::1 721 | 14::11::1 722 | 14::12::1 723 | 14::13::1 724 | 14::14::3 725 | 14::15::1 726 | 14::16::1 727 | 14::17::1 728 | 14::20::1 729 | 14::21::1 730 | 14::24::1 731 | 14::25::2 732 | 14::27::1 733 | 14::28::1 734 | 14::29::5 735 | 14::31::3 736 | 14::34::1 737 | 14::36::1 738 | 14::37::2 739 | 14::39::2 740 | 14::40::1 741 | 14::44::1 742 | 14::45::1 743 | 14::47::3 744 | 14::48::1 745 | 14::49::1 746 | 14::51::1 747 | 14::52::5 748 | 14::53::3 749 | 14::54::1 750 | 14::55::1 751 | 14::56::1 752 | 14::62::4 753 | 14::63::5 754 | 14::67::3 755 | 14::68::1 756 | 14::69::3 757 | 14::71::1 758 | 14::72::4 759 | 14::73::1 760 | 14::76::5 761 | 14::79::1 762 | 14::82::1 763 | 14::83::1 764 | 14::88::1 765 | 14::93::3 766 | 14::94::1 767 | 14::95::2 768 | 14::96::4 769 | 14::98::1 770 | 15::0::1 771 | 15::1::4 772 | 15::2::1 773 | 15::5::2 774 | 15::6::1 775 | 15::7::1 776 | 15::13::1 777 | 15::14::1 778 | 15::15::1 779 | 15::17::2 780 | 15::19::2 781 | 15::22::2 782 | 15::23::2 783 | 15::25::1 784 | 15::26::3 785 | 15::27::1 786 | 15::28::2 787 | 15::29::1 788 | 15::32::1 789 | 15::33::2 790 | 15::34::1 791 | 15::35::2 792 | 15::36::1 793 | 15::37::1 794 | 15::39::1 795 | 15::42::1 796 | 15::46::5 797 | 15::48::2 798 | 15::50::2 799 | 15::51::1 800 | 15::52::1 801 | 15::58::1 802 | 15::62::1 803 | 15::64::3 804 | 15::65::2 805 | 15::72::1 806 | 15::73::1 807 | 15::74::1 808 | 15::79::1 809 | 15::80::1 810 | 15::81::1 811 | 15::82::2 812 | 15::85::1 813 | 15::87::1 814 | 15::91::2 815 | 15::96::1 816 | 15::97::1 817 | 15::98::3 818 | 16::2::1 819 | 16::5::3 820 | 16::6::2 821 | 16::7::1 822 | 16::9::1 823 | 16::12::1 824 | 16::14::1 825 | 16::15::1 826 | 16::19::1 827 | 16::21::2 828 | 16::29::4 829 | 16::30::2 830 | 16::32::1 831 | 16::34::1 832 | 16::36::1 833 | 16::38::1 834 | 16::46::1 835 | 16::47::3 836 | 16::48::1 837 | 16::49::1 838 | 16::50::1 839 | 16::51::5 840 | 16::54::5 841 | 16::55::1 842 | 16::56::2 843 | 16::57::1 844 | 16::60::1 845 | 16::63::2 846 | 16::65::1 847 | 16::67::1 848 | 16::72::1 849 | 16::74::1 850 | 16::80::1 851 | 16::81::1 852 | 16::82::1 853 | 16::85::5 854 | 16::86::1 855 | 16::90::5 856 | 16::91::1 857 | 16::93::1 858 | 16::94::3 859 | 16::95::2 860 | 16::96::3 861 | 16::98::3 862 | 16::99::1 863 | 17::2::1 864 | 17::3::1 865 | 17::6::1 866 | 17::10::4 867 | 17::11::1 868 | 17::13::2 869 | 17::17::5 870 | 17::19::1 871 | 17::20::5 872 | 17::22::4 873 | 17::28::1 874 | 17::29::1 875 | 17::33::1 876 | 17::34::1 877 | 17::35::2 878 | 17::37::1 879 | 17::38::1 880 | 17::45::1 881 | 17::46::5 882 | 17::47::1 883 | 17::49::3 884 | 17::51::1 885 | 17::55::5 886 | 17::56::3 887 | 17::57::1 888 | 17::58::1 889 | 17::59::1 890 | 17::60::1 891 | 17::63::1 892 | 17::66::1 893 | 17::68::4 894 | 17::69::1 895 | 17::70::1 896 | 17::72::1 897 | 17::73::3 898 | 17::78::1 899 | 17::79::1 900 | 17::82::2 901 | 17::84::1 902 | 17::90::5 903 | 17::91::3 904 | 17::92::1 905 | 17::93::1 906 | 17::94::4 907 | 17::95::2 908 | 17::97::1 909 | 18::1::1 910 | 18::4::3 911 | 18::5::2 912 | 18::6::1 913 | 18::7::1 914 | 18::10::1 915 | 18::11::4 916 | 18::12::2 917 | 18::13::1 918 | 18::15::1 919 | 18::18::1 920 | 18::20::1 921 | 18::21::2 922 | 18::22::1 923 | 18::23::2 924 | 18::25::1 925 | 18::26::1 926 | 18::27::1 927 | 18::28::5 928 | 18::29::1 929 | 18::31::1 930 | 18::32::1 931 | 18::36::1 932 | 18::38::5 933 | 18::39::5 934 | 18::40::1 935 | 18::42::1 936 | 18::43::1 937 | 18::44::4 938 | 18::46::1 939 | 18::47::1 940 | 18::48::1 941 | 18::51::2 942 | 18::55::1 943 | 18::56::1 944 | 18::57::1 945 | 18::62::1 946 | 18::63::1 947 | 18::66::3 948 | 18::67::1 949 | 18::70::1 950 | 18::75::1 951 | 18::76::3 952 | 18::77::1 953 | 18::80::3 954 | 18::81::3 955 | 18::82::1 956 | 18::83::5 957 | 18::84::1 958 | 18::97::1 959 | 18::98::1 960 | 18::99::2 961 | 19::0::1 962 | 19::1::1 963 | 19::2::1 964 | 19::4::1 965 | 19::6::2 966 | 19::11::1 967 | 19::12::1 968 | 19::14::1 969 | 19::23::1 970 | 19::26::1 971 | 19::31::1 972 | 19::32::4 973 | 19::33::1 974 | 19::34::1 975 | 19::37::1 976 | 19::38::1 977 | 19::41::1 978 | 19::43::1 979 | 19::45::1 980 | 19::48::1 981 | 19::49::1 982 | 19::50::2 983 | 19::53::2 984 | 19::54::3 985 | 19::55::1 986 | 19::56::2 987 | 19::58::1 988 | 19::61::1 989 | 19::62::1 990 | 19::63::1 991 | 19::64::1 992 | 19::65::1 993 | 19::69::2 994 | 19::72::1 995 | 19::74::3 996 | 19::76::1 997 | 19::78::1 998 | 19::79::1 999 | 19::81::1 1000 | 19::82::1 1001 | 19::84::1 1002 | 19::86::1 1003 | 19::87::2 1004 | 19::90::4 1005 | 19::93::1 1006 | 19::94::4 1007 | 19::95::2 1008 | 19::96::1 1009 | 19::98::4 1010 | 20::0::1 1011 | 20::1::1 1012 | 20::2::2 1013 | 20::4::2 1014 | 20::6::1 1015 | 20::8::1 1016 | 20::12::1 1017 | 20::21::2 1018 | 20::22::5 1019 | 20::24::2 1020 | 20::25::1 1021 | 20::26::1 1022 | 20::29::2 1023 | 20::30::2 1024 | 20::32::2 1025 | 20::39::1 1026 | 20::40::1 1027 | 20::41::2 1028 | 20::45::2 1029 | 20::48::1 1030 | 20::50::1 1031 | 20::51::3 1032 | 20::53::3 1033 | 20::55::1 1034 | 20::57::2 1035 | 20::60::1 1036 | 20::61::1 1037 | 20::64::1 1038 | 20::66::1 1039 | 20::70::2 1040 | 20::72::1 1041 | 20::73::2 1042 | 20::75::4 1043 | 20::76::1 1044 | 20::77::4 1045 | 20::78::1 1046 | 20::79::1 1047 | 20::84::2 1048 | 20::85::2 1049 | 20::88::3 1050 | 20::89::1 1051 | 20::90::3 1052 | 20::91::1 1053 | 20::92::2 1054 | 20::93::1 1055 | 20::94::4 1056 | 20::97::1 1057 | 21::0::1 1058 | 21::2::4 1059 | 21::3::1 1060 | 21::7::2 1061 | 21::11::1 1062 | 21::12::1 1063 | 21::13::1 1064 | 21::14::3 1065 | 21::17::1 1066 | 21::19::1 1067 | 21::20::1 1068 | 21::21::1 1069 | 21::22::1 1070 | 21::23::1 1071 | 21::24::1 1072 | 21::27::1 1073 | 21::29::5 1074 | 21::30::2 1075 | 21::38::1 1076 | 21::40::2 1077 | 21::43::3 1078 | 21::44::1 1079 | 21::45::1 1080 | 21::46::1 1081 | 21::48::1 1082 | 21::51::1 1083 | 21::53::5 1084 | 21::54::1 1085 | 21::55::1 1086 | 21::56::1 1087 | 21::58::3 1088 | 21::59::3 1089 | 21::64::1 1090 | 21::66::1 1091 | 21::68::1 1092 | 21::71::1 1093 | 21::73::1 1094 | 21::74::4 1095 | 21::80::1 1096 | 21::81::1 1097 | 21::83::1 1098 | 21::84::1 1099 | 21::85::3 1100 | 21::87::4 1101 | 21::89::2 1102 | 21::92::2 1103 | 21::96::3 1104 | 21::99::1 1105 | 22::0::1 1106 | 22::3::2 1107 | 22::5::2 1108 | 22::6::2 1109 | 22::9::1 1110 | 22::10::1 1111 | 22::11::1 1112 | 22::13::1 1113 | 22::14::1 1114 | 22::16::1 1115 | 22::18::3 1116 | 22::19::1 1117 | 22::22::5 1118 | 22::25::1 1119 | 22::26::1 1120 | 22::29::3 1121 | 22::30::5 1122 | 22::32::4 1123 | 22::33::1 1124 | 22::35::1 1125 | 22::36::3 1126 | 22::37::1 1127 | 22::40::1 1128 | 22::41::3 1129 | 22::44::1 1130 | 22::45::2 1131 | 22::48::1 1132 | 22::51::5 1133 | 22::55::1 1134 | 22::56::2 1135 | 22::60::3 1136 | 22::61::1 1137 | 22::62::4 1138 | 22::63::1 1139 | 22::65::1 1140 | 22::66::1 1141 | 22::68::4 1142 | 22::69::4 1143 | 22::70::3 1144 | 22::71::1 1145 | 22::74::5 1146 | 22::75::5 1147 | 22::78::1 1148 | 22::80::3 1149 | 22::81::1 1150 | 22::82::1 1151 | 22::84::1 1152 | 22::86::1 1153 | 22::87::3 1154 | 22::88::5 1155 | 22::90::2 1156 | 22::92::3 1157 | 22::95::2 1158 | 22::96::2 1159 | 22::98::4 1160 | 22::99::1 1161 | 23::0::1 1162 | 23::2::1 1163 | 23::4::1 1164 | 23::6::2 1165 | 23::10::4 1166 | 23::12::1 1167 | 23::13::4 1168 | 23::14::1 1169 | 23::15::1 1170 | 23::18::4 1171 | 23::22::2 1172 | 23::23::4 1173 | 23::24::1 1174 | 23::25::1 1175 | 23::26::1 1176 | 23::27::5 1177 | 23::28::1 1178 | 23::29::1 1179 | 23::30::4 1180 | 23::32::5 1181 | 23::33::2 1182 | 23::36::3 1183 | 23::37::1 1184 | 23::38::1 1185 | 23::39::1 1186 | 23::43::1 1187 | 23::48::5 1188 | 23::49::5 1189 | 23::50::4 1190 | 23::53::1 1191 | 23::55::5 1192 | 23::57::1 1193 | 23::59::1 1194 | 23::60::1 1195 | 23::61::1 1196 | 23::64::4 1197 | 23::65::5 1198 | 23::66::2 1199 | 23::67::1 1200 | 23::68::3 1201 | 23::69::1 1202 | 23::72::1 1203 | 23::73::3 1204 | 23::77::1 1205 | 23::82::2 1206 | 23::83::1 1207 | 23::84::1 1208 | 23::85::1 1209 | 23::87::3 1210 | 23::88::1 1211 | 23::95::2 1212 | 23::97::1 1213 | 24::4::1 1214 | 24::6::3 1215 | 24::7::1 1216 | 24::10::2 1217 | 24::12::1 1218 | 24::15::1 1219 | 24::19::1 1220 | 24::24::1 1221 | 24::27::3 1222 | 24::30::5 1223 | 24::31::1 1224 | 24::32::3 1225 | 24::33::1 1226 | 24::37::1 1227 | 24::39::1 1228 | 24::40::1 1229 | 24::42::1 1230 | 24::43::3 1231 | 24::45::2 1232 | 24::46::1 1233 | 24::47::1 1234 | 24::48::1 1235 | 24::49::1 1236 | 24::50::1 1237 | 24::52::5 1238 | 24::57::1 1239 | 24::59::4 1240 | 24::63::4 1241 | 24::65::1 1242 | 24::66::1 1243 | 24::67::1 1244 | 24::68::3 1245 | 24::69::5 1246 | 24::71::1 1247 | 24::72::4 1248 | 24::77::4 1249 | 24::78::1 1250 | 24::80::1 1251 | 24::82::1 1252 | 24::84::1 1253 | 24::86::1 1254 | 24::87::1 1255 | 24::88::2 1256 | 24::89::1 1257 | 24::90::5 1258 | 24::91::1 1259 | 24::92::1 1260 | 24::94::2 1261 | 24::95::1 1262 | 24::96::5 1263 | 24::98::1 1264 | 24::99::1 1265 | 25::1::3 1266 | 25::2::1 1267 | 25::7::1 1268 | 25::9::1 1269 | 25::12::3 1270 | 25::16::3 1271 | 25::17::1 1272 | 25::18::1 1273 | 25::20::1 1274 | 25::22::1 1275 | 25::23::1 1276 | 25::26::2 1277 | 25::29::1 1278 | 25::30::1 1279 | 25::31::2 1280 | 25::33::4 1281 | 25::34::3 1282 | 25::35::2 1283 | 25::36::1 1284 | 25::37::1 1285 | 25::40::1 1286 | 25::41::1 1287 | 25::43::1 1288 | 25::47::4 1289 | 25::50::1 1290 | 25::51::1 1291 | 25::53::1 1292 | 25::56::1 1293 | 25::58::2 1294 | 25::64::2 1295 | 25::67::2 1296 | 25::68::1 1297 | 25::70::1 1298 | 25::71::4 1299 | 25::73::1 1300 | 25::74::1 1301 | 25::76::1 1302 | 25::79::1 1303 | 25::82::1 1304 | 25::84::2 1305 | 25::85::1 1306 | 25::91::3 1307 | 25::92::1 1308 | 25::94::1 1309 | 25::95::1 1310 | 25::97::2 1311 | 26::0::1 1312 | 26::1::1 1313 | 26::2::1 1314 | 26::3::1 1315 | 26::4::4 1316 | 26::5::2 1317 | 26::6::3 1318 | 26::7::5 1319 | 26::13::3 1320 | 26::14::1 1321 | 26::16::1 1322 | 26::18::3 1323 | 26::20::1 1324 | 26::21::3 1325 | 26::22::5 1326 | 26::23::5 1327 | 26::24::5 1328 | 26::27::1 1329 | 26::31::1 1330 | 26::35::1 1331 | 26::36::4 1332 | 26::40::1 1333 | 26::44::1 1334 | 26::45::2 1335 | 26::47::1 1336 | 26::48::1 1337 | 26::49::3 1338 | 26::50::2 1339 | 26::52::1 1340 | 26::54::4 1341 | 26::55::1 1342 | 26::57::3 1343 | 26::58::1 1344 | 26::61::1 1345 | 26::62::2 1346 | 26::66::1 1347 | 26::68::4 1348 | 26::71::1 1349 | 26::73::4 1350 | 26::76::1 1351 | 26::81::3 1352 | 26::85::1 1353 | 26::86::3 1354 | 26::88::5 1355 | 26::91::1 1356 | 26::94::5 1357 | 26::95::1 1358 | 26::96::1 1359 | 26::97::1 1360 | 27::0::1 1361 | 27::9::1 1362 | 27::10::1 1363 | 27::18::4 1364 | 27::19::3 1365 | 27::20::1 1366 | 27::22::2 1367 | 27::24::2 1368 | 27::25::1 1369 | 27::27::3 1370 | 27::28::1 1371 | 27::29::1 1372 | 27::31::1 1373 | 27::33::3 1374 | 27::40::1 1375 | 27::42::1 1376 | 27::43::1 1377 | 27::44::3 1378 | 27::45::1 1379 | 27::51::3 1380 | 27::52::1 1381 | 27::55::3 1382 | 27::57::1 1383 | 27::59::1 1384 | 27::60::1 1385 | 27::61::1 1386 | 27::64::1 1387 | 27::66::3 1388 | 27::68::1 1389 | 27::70::1 1390 | 27::71::2 1391 | 27::72::1 1392 | 27::75::3 1393 | 27::78::1 1394 | 27::80::3 1395 | 27::82::1 1396 | 27::83::3 1397 | 27::86::1 1398 | 27::87::2 1399 | 27::90::1 1400 | 27::91::1 1401 | 27::92::1 1402 | 27::93::1 1403 | 27::94::2 1404 | 27::95::1 1405 | 27::98::1 1406 | 28::0::3 1407 | 28::1::1 1408 | 28::2::4 1409 | 28::3::1 1410 | 28::6::1 1411 | 28::7::1 1412 | 28::12::5 1413 | 28::13::2 1414 | 28::14::1 1415 | 28::15::1 1416 | 28::17::1 1417 | 28::19::3 1418 | 28::20::1 1419 | 28::23::3 1420 | 28::24::3 1421 | 28::27::1 1422 | 28::29::1 1423 | 28::33::1 1424 | 28::34::1 1425 | 28::36::1 1426 | 28::38::2 1427 | 28::39::2 1428 | 28::44::1 1429 | 28::45::1 1430 | 28::49::4 1431 | 28::50::1 1432 | 28::52::1 1433 | 28::54::1 1434 | 28::56::1 1435 | 28::57::3 1436 | 28::58::1 1437 | 28::59::1 1438 | 28::60::1 1439 | 28::62::3 1440 | 28::63::1 1441 | 28::65::1 1442 | 28::75::1 1443 | 28::78::1 1444 | 28::81::5 1445 | 28::82::4 1446 | 28::83::1 1447 | 28::85::1 1448 | 28::88::2 1449 | 28::89::4 1450 | 28::90::1 1451 | 28::92::5 1452 | 28::94::1 1453 | 28::95::2 1454 | 28::98::1 1455 | 28::99::1 1456 | 29::3::1 1457 | 29::4::1 1458 | 29::5::1 1459 | 29::7::2 1460 | 29::9::1 1461 | 29::10::3 1462 | 29::11::1 1463 | 29::13::3 1464 | 29::14::1 1465 | 29::15::1 1466 | 29::17::3 1467 | 29::19::3 1468 | 29::22::3 1469 | 29::23::4 1470 | 29::25::1 1471 | 29::29::1 1472 | 29::31::1 1473 | 29::32::4 1474 | 29::33::2 1475 | 29::36::2 1476 | 29::38::3 1477 | 29::39::1 1478 | 29::42::1 1479 | 29::46::5 1480 | 29::49::3 1481 | 29::51::2 1482 | 29::59::1 1483 | 29::61::1 1484 | 29::62::1 1485 | 29::67::1 1486 | 29::68::3 1487 | 29::69::1 1488 | 29::70::1 1489 | 29::74::1 1490 | 29::75::1 1491 | 29::79::2 1492 | 29::80::1 1493 | 29::81::2 1494 | 29::83::1 1495 | 29::85::1 1496 | 29::86::1 1497 | 29::90::4 1498 | 29::93::1 1499 | 29::94::4 1500 | 29::97::1 1501 | 29::99::1 1502 | -------------------------------------------------------------------------------- /docs/universal_recommender.md: -------------------------------------------------------------------------------- 1 | # Universal Recommender 2 | 3 | ##Quick Start 4 | 5 | 1. [Install the PredictionIO framework](https://docs.prediction.io/install/) **be sure to choose HBase and Elasticsearch** for storage. This template requires Elasticsearch. 6 | 2. Make sure the PIO console and services are running, check with `pio status` 7 | 3. [Install this template](https://docs.prediction.io/start/download/) **be sure to specify this template** with `pio template get PredictionIO/template-scala-parallel-universal-recommendation` 8 | 9 | **To import and experiment with the simple example data** 10 | 11 | 1. Create a new app name, change `appName` in `engine.json` 12 | 2. Run `pio app new **your-new-app-name**` 13 | 4. Import sample events by running `python data/import_handmade.py --access_key **your-access-key**` where the key can be retrieved with `pio app list` 14 | 3. The engine.json file in the root directory of your new UR template is set up for the data you just imported (make sure to create a new one for your data) Edit this file and change the `appName` parameter to match what you called the app in step #2 15 | 5. Perform `pio build`, `pio train`, and `pio deploy` 16 | 6. To execute some sample queries run `./examples/query-handmade.sh` 17 | 18 | If there are timeouts, enable the delays that are commented out in the script—for now. In the production environment the engines will "warm up" with caching and will execute queries much faster. Also all services can be configured or scaled to meet virtually any performance needs. 19 | 20 | **See the [Github README.md](https://github.com/PredictionIO/template-scala-parallel-universal-recommendation) for further usage instructions** 21 | 22 | ##What is a Universal Recommender 23 | 24 | The Universal Recommender (UR) will accept a range of data, auto correlate it, and allow for very flexible queries. The UR is different from most recommenders in these ways: 25 | 26 | * It takes a single very strong "primary" event type—one that clearly reflects a user's preference—and correlates any number of other event types to the primary event. This has the effect of using virtually any user action to recommend the primary action. Much of a user’s clickstream can be used to make recommendations. If a user has no history of the primary action (purchase for instance) but does have history of views, personalized recommendations for purchases can still be made. With user purchase history the recommendations become better. ALS-type recommenders have been used with event weights but except for ratings these often do not result in better performance. 27 | * It can boost and filter based on events or item metadata/properties. This means it can give personalized recs that are biased toward “SciFi” and filtered to only include “Promoted” items when the business rules call for this. 28 | * It can use a user's context to make recommendations even when the user is new. If usage data has been gathered for other users for referring URL, device type, or location, for instance, there may be a correlation between this data and items preferred. The UR can detect this **if** it exists and recommend based on this context, even to new users. We call this "micro-segmented" recommendations since they are not personal but group users based on limited contextual information. These will not be as good as when more is know about the user but may be better than simply returning popular items. 29 | * It includes a fallback to some form of item popularity when there is no other information known about the user (not implemented in v0.1.0). 30 | * All of the above can be mixed into a single query for blended results and so the query can be tuned to a great many applications. Also since only one query is made and boosting is supported, a query can be constructed with several fallbacks. Usage data is most important so boost that high, micro-segemnting data may be better than popularity so boost that lower, and popularity fills in if no other recommendations are available. 31 | 32 | Other features: 33 | 34 | * Makes recommendations based on realtime user history. Even anonymous users will get recommendations if they have recorded preference history and a user-id. There is no hard requirement to retrain the model to make this happen. 35 | 36 | TBD: 37 | 38 | * Date range filters based on Date properties of items 39 | * Populatiy type recommendations backfill for returning "trending", or "hot" items when no other recommendations are available from the training data. 40 | * Content-based correlators for content-based recommendations 41 | 42 | ## References 43 | 44 | * Other documentation of the algorithm is [here](http://mahout.apache.org/users/algorithms/intro-cooccurrence-spark.html) 45 | * A free ebook, which talks about the general idea: [Practical Machine Learning](https://www.mapr.com/practical-machine-learning). 46 | * A slide deck, which talks about mixing actions and other indicator types, including content-based ones: [Creating a Unified Recommender](http://www.slideshare.net/pferrel/unified-recommender-39986309?ref=http://occamsmachete.com/ml/) 47 | * Two blog posts: What's New in Recommenders: part [#1](http://occamsmachete.com/ml/2014/08/11/mahout-on-spark-whats-new-in-recommenders/) [#2](http://occamsmachete.com/ml/2014/09/09/mahout-on-spark-whats-new-in-recommenders-part-2/) 48 | * A post describing the log-likelihood ratio: [Surprise and Coincidence](http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html) LLR is used to reduce noise in the data while keeping the calculations O(n) complexity. 49 | -------------------------------------------------------------------------------- /engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment":" This config file uses default settings for all but the required values see README.md for docs", 3 | "id": "default", 4 | "description": "Default settings", 5 | "engineFactory": "org.template.RecommendationEngine", 6 | "datasource": { 7 | "params" : { 8 | "name": "sample-handmade-data.txt", 9 | "appName": "handmade", 10 | "eventNames": ["purchase", "view"], 11 | } 12 | }, 13 | "sparkConf": { 14 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 15 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 16 | "spark.kryo.referenceTracking": "false", 17 | "spark.kryoserializer.buffer": "300m", 18 | "es.index.auto.create": "true" 19 | }, 20 | "algorithms": [ 21 | { 22 | "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames", 23 | "name": "ur", 24 | "params": { 25 | "appName": "handmade", 26 | "indexName": "urindex", 27 | "typeName": "items", 28 | "comment": "must have data for the first event or the model will not build, other events are optional", 29 | "indicators": [ 30 | { 31 | "name": "purchase" 32 | },{ 33 | "name": "view", 34 | "maxCorrelatorsPerItem": 50 35 | } 36 | ] 37 | } 38 | } 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /engine.json.minimum: -------------------------------------------------------------------------------- 1 | { 2 | "comment":" This config file uses default settings for all but the required values see README.md for docs", 3 | "id": "default", 4 | "description": "Default settings", 5 | "engineFactory": "org.template.RecommendationEngine", 6 | "datasource": { 7 | "params" : { 8 | "name": "sample-handmade-data.txt", 9 | "appName": "handmade", 10 | "eventNames": ["purchase", "view"] 11 | } 12 | }, 13 | "sparkConf": { 14 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 15 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 16 | "spark.kryo.referenceTracking": "false", 17 | "spark.kryoserializer.buffer.mb": "300", 18 | "spark.kryoserializer.buffer": "300m", 19 | "es.index.auto.create": "true" 20 | }, 21 | "algorithms": [ 22 | { 23 | "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames", 24 | "name": "ur", 25 | "params": { 26 | "appName": "handmade", 27 | "indexName": "urindex", 28 | "typeName": "items", 29 | "comment": "must have data for the first event or the model will not build, other events are optional", 30 | "eventNames": ["purchase", "view"] 31 | } 32 | } 33 | ] 34 | } 35 | 36 | -------------------------------------------------------------------------------- /engine.json.spark-tuning: -------------------------------------------------------------------------------- 1 | { 2 | "comment":" This config file uses default settings for all but the required values see README.md for docs", 3 | "id": "default", 4 | "description": "Default settings", 5 | "engineFactory": "org.template.RecommendationEngine", 6 | "datasource": { 7 | "params" : { 8 | "name": "sample-handmade-data.txt", 9 | "appName": "handmade", 10 | "eventNames": ["purchase", "view"] 11 | } 12 | }, 13 | "sparkConf": { 14 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 15 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 16 | "spark.kryo.referenceTracking": "false", 17 | "spark.kryoserializer.buffer": "300m", 18 | "spark.executor.memory": "4g", 19 | "spark.executor.cores": "2", 20 | "spark.task.cpus": "2", 21 | "spark.default.parallelism": "16", 22 | "es.index.auto.create": "true" 23 | }, 24 | "algorithms": [ 25 | { 26 | "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames", 27 | "name": "ur", 28 | "params": { 29 | "appName": "handmade", 30 | "indexName": "urindex", 31 | "typeName": "items", 32 | "comment": "must have data for the first event or the model will not build, other events are optional", 33 | "eventNames": ["purchase", "view"] 34 | } 35 | } 36 | ] 37 | } 38 | 39 | -------------------------------------------------------------------------------- /event-names-test-engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment":" This config file uses default settings for all but the required values see README.md for docs", 3 | "id": "default", 4 | "description": "Default settings", 5 | "engineFactory": "org.template.RecommendationEngine", 6 | "datasource": { 7 | "params" : { 8 | "name": "sample-handmade-data.txt", 9 | "appName": "handmade", 10 | "eventNames": ["purchase", "view"] 11 | } 12 | }, 13 | "sparkConf": { 14 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 15 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 16 | "spark.kryo.referenceTracking": "false", 17 | "spark.kryoserializer.buffer.mb": "300", 18 | "spark.kryoserializer.buffer": "300m", 19 | "spark.executor.memory": "4g", 20 | "es.index.auto.create": "true" 21 | }, 22 | "algorithms": [ 23 | { 24 | "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames", 25 | "name": "ur", 26 | "params": { 27 | "appName": "handmade", 28 | "indexName": "urindex", 29 | "typeName": "items", 30 | "comment": "must have data for the first event or the model will not build, other events are optional", 31 | "eventNames": ["purchase", "view"], 32 | "blacklistEvents": [] 33 | } 34 | } 35 | ] 36 | } 37 | 38 | -------------------------------------------------------------------------------- /examples/handmade-engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment":" This config file uses default settings for all but the required values see README.md for docs", 3 | "id": "default", 4 | "description": "Default settings", 5 | "engineFactory": "org.template.RecommendationEngine", 6 | "datasource": { 7 | "params" : { 8 | "name": "sample-handmade-data.txt", 9 | "appName": "handmade", 10 | "eventNames": ["purchase", "view"], 11 | "eventWindow": { 12 | "duration": "24 days", 13 | "removeDuplicates":true, 14 | "compressProperties":true 15 | } 16 | } 17 | }, 18 | "sparkConf": { 19 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 20 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 21 | "spark.kryo.referenceTracking": "false", 22 | "spark.kryoserializer.buffer": "300m", 23 | "es.index.auto.create": "true" 24 | }, 25 | "algorithms": [ 26 | { 27 | "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames", 28 | "name": "ur", 29 | "params": { 30 | "appName": "handmade", 31 | "indexName": "urindex", 32 | "typeName": "items", 33 | "comment": "must have data for the first event or the model will not build, other events are optional", 34 | "indicators": [ 35 | { 36 | "name": "purchase" 37 | },{ 38 | "name": "view", 39 | "maxCorrelatorsPerItem": 50 40 | } 41 | ], 42 | "availableDateName": "available", 43 | "expireDateName": "expires", 44 | "dateName": "date", 45 | "num": 4 46 | } 47 | } 48 | ] 49 | } 50 | -------------------------------------------------------------------------------- /examples/hot-3-day-engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "popularity-only", 3 | "description": "Default settings", 4 | "engineFactory": "org.template.RecommendationEngine", 5 | "datasource": { 6 | "params" : { 7 | "name": "sample-handmade-data.txt", 8 | "appName": "handmade", 9 | "eventNames": ["purchase"] 10 | } 11 | }, 12 | "sparkConf": { 13 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 14 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 15 | "spark.kryo.referenceTracking": "false", 16 | "spark.kryoserializer.buffer": "300m", 17 | "spark.executor.memory": "4g", 18 | "es.index.auto.create": "true" 19 | }, 20 | "algorithms": [ 21 | { 22 | "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill", 23 | "name": "ur", 24 | "params": { 25 | "appName": "handmade", 26 | "indexName": "urindex", 27 | "typeName": "items", 28 | "eventNames": ["purchase"], 29 | "rankings": [{ 30 | "type": "hot", 31 | "duration": 259200 32 | }] 33 | } 34 | } 35 | ] 36 | } 37 | 38 | -------------------------------------------------------------------------------- /examples/import_handmade.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import sample data for recommendation engine 3 | """ 4 | 5 | import predictionio 6 | import argparse 7 | import random 8 | import datetime 9 | import pytz 10 | 11 | RATE_ACTIONS_DELIMITER = "," 12 | PROPERTIES_DELIMITER = ":" 13 | SEED = 1 14 | 15 | 16 | def import_events(client, file): 17 | f = open(file, 'r') 18 | random.seed(SEED) 19 | count = 0 20 | # year, month, day[, hour[, minute[, second[ 21 | #event_date = datetime.datetime(2015, 8, 13, 12, 24, 41) 22 | now_date = datetime.datetime.now(pytz.utc) # - datetime.timedelta(days=2.7) 23 | current_date = now_date 24 | event_time_increment = datetime.timedelta(days= -0.8) 25 | available_date_increment = datetime.timedelta(days= 0.8) 26 | event_date = now_date - datetime.timedelta(days= 2.4) 27 | available_date = event_date + datetime.timedelta(days=-2) 28 | expire_date = event_date + datetime.timedelta(days=2) 29 | print "Importing data..." 30 | 31 | for line in f: 32 | data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER) 33 | # For demonstration purpose action names are taken from input along with secondary actions on 34 | # For the UR add some item metadata 35 | 36 | if (data[1] == "purchase"): 37 | client.create_event( 38 | event=data[1], 39 | entity_type="user", 40 | entity_id=data[0], 41 | target_entity_type="item", 42 | target_entity_id=data[2], 43 | event_time = current_date 44 | ) 45 | print "Event: " + data[1] + " entity_id: " + data[0] + " target_entity_id: " + data[2] + \ 46 | " current_date: " + current_date.isoformat() 47 | elif (data[1] == "view"): # assumes other event type is 'view' 48 | client.create_event( 49 | event=data[1], 50 | entity_type="user", 51 | entity_id=data[0], 52 | target_entity_type="item", # type of item in this action 53 | target_entity_id=data[2], 54 | event_time = current_date 55 | ) 56 | print "Event: " + data[1] + " entity_id: " + data[0] + " target_entity_id: " + data[2] + \ 57 | " current_date: " + current_date.isoformat() 58 | elif (data[1] == "$set"): # must be a set event 59 | properties = data[2].split(PROPERTIES_DELIMITER) 60 | prop_name = properties.pop(0) 61 | prop_value = properties if not prop_name == 'defaultRank' else float(properties[0]) 62 | client.create_event( 63 | event=data[1], 64 | entity_type="item", 65 | entity_id=data[0], 66 | event_time=current_date, 67 | properties={prop_name: prop_value} 68 | ) 69 | print "Event: " + data[1] + " entity_id: " + data[0] + " properties/"+prop_name+": " + str(properties) + \ 70 | " current_date: " + current_date.isoformat() 71 | count += 1 72 | current_date += event_time_increment 73 | 74 | items = ['Iphone 6', 'Ipad-retina', 'Nexus', 'Surface', 'Iphone 4', 'Galaxy', 'Iphone 5'] 75 | print "All items: " + str(items) 76 | for item in items: 77 | 78 | client.create_event( 79 | event="$set", 80 | entity_type="item", 81 | entity_id=item, 82 | properties={"expires": expire_date.isoformat(), 83 | "available": available_date.isoformat(), 84 | "date": event_date.isoformat()} 85 | ) 86 | print "Event: $set entity_id: " + item + \ 87 | " properties/availableDate: " + available_date.isoformat() + \ 88 | " properties/date: " + event_date.isoformat() + \ 89 | " properties/expireDate: " + expire_date.isoformat() 90 | expire_date += available_date_increment 91 | event_date += available_date_increment 92 | available_date += available_date_increment 93 | count += 1 94 | 95 | f.close() 96 | print "%s events are imported." % count 97 | 98 | 99 | if __name__ == '__main__': 100 | parser = argparse.ArgumentParser( 101 | description="Import sample data for recommendation engine") 102 | parser.add_argument('--access_key', default='invald_access_key') 103 | parser.add_argument('--url', default="http://localhost:7070") 104 | parser.add_argument('--file', default="./data/sample-handmade-data.txt") 105 | 106 | args = parser.parse_args() 107 | print args 108 | 109 | client = predictionio.EventClient( 110 | access_key=args.access_key, 111 | url=args.url, 112 | threads=5, 113 | qsize=500) 114 | import_events(client, args.file) 115 | -------------------------------------------------------------------------------- /examples/import_handmade_pop_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import sample data for recommendation engine 3 | """ 4 | 5 | import predictionio 6 | import argparse 7 | import random 8 | import datetime 9 | import pytz 10 | from tzlocal import get_localzone 11 | 12 | RATE_ACTIONS_DELIMITER = "," 13 | PROPERTIES_DELIMITER = ":" 14 | SEED = 1 15 | local_tz = get_localzone() 16 | 17 | def import_events(client, file, days_offset): 18 | f = open(file, 'r') 19 | random.seed(SEED) 20 | count = 0 21 | event_date = datetime.datetime.now(tz=local_tz) + datetime.timedelta(days=days_offset) 22 | print "Importing data..." 23 | 24 | for line in f: 25 | data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER) 26 | # For demonstration purpose action names are taken from input along with secondary actions on 27 | # For the UR add some item metadata 28 | 29 | if (data[1] == "purchase"): 30 | client.create_event( 31 | event=data[1], 32 | entity_type="user", 33 | entity_id=data[0], 34 | target_entity_type="item", 35 | target_entity_id=data[2], 36 | event_time=event_date 37 | ) 38 | print "Event: " + data[1] + " user: " + data[0] + " item: " + data[2] + " date: " + str(event_date) 39 | elif (data[1] == "view"): # assumes other event type is 'view' 40 | client.create_event( 41 | event=data[1], 42 | entity_type="user", 43 | entity_id=data[0], 44 | target_entity_type="item", # type of item in this action 45 | target_entity_id=data[2], 46 | event_time=event_date 47 | ) 48 | print "Event: " + data[1] + " user: " + data[0] + " item: " + data[2] + " date: " + str(event_date) 49 | count += 1 50 | f.close() 51 | print "%s events are imported." % count 52 | 53 | 54 | if __name__ == '__main__': 55 | parser = argparse.ArgumentParser( 56 | description="Import sample data for recommendation engine") 57 | parser.add_argument('--access_key', default='invald_access_key') 58 | parser.add_argument('--url', default="http://localhost:7070") 59 | parser.add_argument('--file1', default="./data/sample-handmade-data1.txt") 60 | parser.add_argument('--file2', default="./data/sample-handmade-data2.txt") 61 | parser.add_argument('--file3', default="./data/sample-handmade-data3.txt") 62 | 63 | args = parser.parse_args() 64 | print args 65 | 66 | client = predictionio.EventClient( 67 | access_key=args.access_key, 68 | url=args.url, 69 | threads=5, 70 | qsize=500) 71 | # this is to spread events around two time periods, now->3 days ago, and 4 days ago to 6 days ago 72 | # popular, trending, and hot over the a 3-day period ending on offset_days, which would be the most recent, 73 | # the duration of the actual pop-model calc is in the engin.json so these dates work with some multiple of a 74 | # day for that value (expressed in seconds). This allows us to test the pop-model as well as the "offsetDate" 75 | # in the prams for training. The pop-model queries should have the same results for both timespans if the 76 | # "offsetDate" is now, and now - 4days 77 | import_events(client, args.file1, 0)# last 3 days 78 | import_events(client, args.file2, -1) 79 | import_events(client, args.file3, -2)# first batch ends 2 days in the past 80 | import_events(client, args.file1, -4)# starting 4 days in the past, so skips 2 days for tests of the offset date 81 | import_events(client, args.file2, -5) 82 | import_events(client, args.file3, -6) 83 | -------------------------------------------------------------------------------- /examples/import_movielens_eventserver.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import sample data for recommendation engine 3 | """ 4 | 5 | import predictionio 6 | import argparse 7 | import random 8 | 9 | RATE_ACTIONS_DELIMITER = "::" 10 | SEED = 3 11 | 12 | def import_events(client, file): 13 | f = open(file, 'r') 14 | random.seed(SEED) 15 | count = 0 16 | print "Importing data..." 17 | for line in f: 18 | data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER) 19 | # For demonstration purpose, randomly mix in some buy events 20 | # For the UR add some item metadata 21 | if (random.randint(0, 1) == 1): 22 | client.create_event( 23 | event="rate", 24 | entity_type="user", 25 | entity_id=data[0], 26 | target_entity_type="item", 27 | target_entity_id=data[1], 28 | ) 29 | else: 30 | client.create_event( 31 | event="buy", 32 | entity_type="user", 33 | entity_id=data[0], 34 | target_entity_type="item", 35 | target_entity_id=data[1], 36 | ) 37 | if (random.randint(0, 1) == 1): 38 | client.create_event( 39 | event="$set", 40 | entity_type="item", 41 | entity_id=data[1], 42 | properties= { "category": ["cat1", "cat5"] } 43 | ) 44 | else: 45 | client.create_event( 46 | event="$set", 47 | entity_type="item", 48 | entity_id=data[1], 49 | properties= { "category": ["cat1", "cat2"] } 50 | ) 51 | count += 1 52 | f.close() 53 | print "%s events are imported." % count 54 | 55 | if __name__ == '__main__': 56 | parser = argparse.ArgumentParser( 57 | description="Import sample data for recommendation engine") 58 | parser.add_argument('--access_key', default='invald_access_key') 59 | parser.add_argument('--url', default="http://localhost:7070") 60 | parser.add_argument('--file', default="./data/sample_movielens_data.txt") 61 | 62 | args = parser.parse_args() 63 | print args 64 | 65 | client = predictionio.EventClient( 66 | access_key=args.access_key, 67 | url=args.url, 68 | threads=5, 69 | qsize=500) 70 | import_events(client, args.file) 71 | -------------------------------------------------------------------------------- /examples/integration-test: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # exit on any error 4 | set -e 5 | 6 | echo "" 7 | echo "Integration test for The Universal Recommender." 8 | echo "If some step fails check that your engine.json file has been restored" 9 | echo "or look for it in 'user-engine.json'" 10 | echo "" 11 | 12 | echo "Checking for needed files" 13 | if [ ! -f examples/handmade-engine.json ]; then 14 | echo "File not found: examples/handmade-engine.json" 15 | exit 1 16 | fi 17 | 18 | if [ ! -f data/sample-handmade-data.txt ]; then 19 | echo "File not found: data/sample-handmade-data.txt" 20 | exit 1 21 | fi 22 | 23 | if [ -f user-engine.json ]; then 24 | echo "File user-engine.json found, this may be an error so we cannot replace engine.json" 25 | exit 1 26 | fi 27 | 28 | if [ ! -f data/integration-test-expected.txt ]; then 29 | echo "File not found: data/integration-test-expected.txt" 30 | exit 1 31 | fi 32 | 33 | echo "" 34 | echo "Checking status, should exit if pio is not running." 35 | pio status 36 | pio app new handmade || true 37 | 38 | echo "" 39 | echo "Checking to see if handmade app exists, should exit if not." 40 | pio app show handmade 41 | 42 | echo "" 43 | echo "Moving engine.json to user-engine.json" 44 | cp -n engine.json user-engine.json 45 | 46 | echo "" 47 | echo "Moving examples/handmade-engine.json to engine.json for integration test." 48 | cp examples/handmade-engine.json engine.json 49 | 50 | echo "" 51 | echo "Deleting handmade app data since the test is date dependent" 52 | pio app data-delete handmade -f 53 | 54 | echo "" 55 | echo "Importing data for integration test" 56 | # get the access_key from pio app list 57 | ACCESS_KEY=`pio app show handmade | grep Key | cut -f 7 -d ' '` 58 | echo -n "Access key: " 59 | echo $ACCESS_KEY 60 | python examples/import_handmade.py --access_key $ACCESS_KEY 61 | 62 | echo "" 63 | echo "Building and delpoying model" 64 | pio build 65 | pio train -- --driver-memory 4g --executor-memory 4g 66 | echo "Model will remain deployed after this test" 67 | nohup pio deploy > deploy.out & 68 | echo "Waiting 30 seconds for the server to start" 69 | sleep 30 70 | 71 | #echo "" 72 | #echo "Running test query." 73 | #./examples/multi-query-handmade.sh > test.out 74 | 75 | #this is due bug where first query had bad results 76 | #TODO: Investigate and squash 77 | 78 | ./examples/multi-query-handmade.sh > test.out 79 | 80 | echo "" 81 | echo "Restoring engine.json" 82 | mv user-engine.json engine.json 83 | 84 | echo "" 85 | echo "Differences between expected and actual results, none is a passing test." 86 | echo "Note: differences in ordering of results with the same score is allowed." 87 | diff data/integration-test-expected.txt test.out 88 | 89 | deploy_pid=`jps -lm | grep "onsole deploy" | cut -f 1 -d ' '` 90 | echo "Killing the deployed test PredictionServer" 91 | kill "$deploy_pid" 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /examples/integration-test-pop-model: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # exit on any error 4 | set -e 5 | 6 | echo "" 7 | echo "Integration test for The Universal Recommender." 8 | echo "If some step fails check that your engine.json file has been restored" 9 | echo "or look for it in 'user-engine.json'" 10 | echo "" 11 | 12 | echo "Checking for needed files" 13 | if [ ! -f hot-3-day-engine.json ]; then 14 | echo "File not found: hot-3-day-engine.json" 15 | exit 1 16 | fi 17 | 18 | if [ ! -f trend-2-day-engine.json ]; then 19 | echo "File not found: trend-2-day-engine.json" 20 | exit 1 21 | fi 22 | 23 | if [ ! -f pop-1-day-engine.json ]; then 24 | echo "File not found: pop-1-day-engine.json" 25 | exit 1 26 | fi 27 | 28 | if [ ! -f examples/pop-test-query.sh ]; then 29 | echo "File not found: examples/pop-test-query.sh" 30 | exit 1 31 | fi 32 | 33 | if [ ! -f examples/import_handmade_pop_test.py ]; then 34 | echo "File not found: examples/import_handmade_pop_test.py" 35 | exit 1 36 | fi 37 | 38 | if [ -f user-engine.json ]; then 39 | echo "File user-engine.json found, this may be an error so we cannot replace engine.json" 40 | exit 1 41 | fi 42 | 43 | if [ ! -f data/integration-test-expected-pop-model.txt ]; then 44 | echo "File not found: data/integration-test-expected-pop-model.txt" 45 | # exit 1 46 | fi 47 | 48 | echo "" 49 | echo "Checking status, should exit if pio is not running." 50 | pio status 51 | 52 | echo "" 53 | echo "Checking to see if handmade app exists, should exit if not." 54 | pio app show handmade 55 | 56 | echo "" 57 | echo "Deleting handmade app data since the test is date dependent" 58 | pio app data-delete handmade 59 | 60 | echo "" 61 | echo "Importing data for integration test" 62 | # get the access_key from pio app list 63 | ACCESS_KEY=`pio app show handmade | grep Key | cut -f 7 -d ' '` 64 | echo -n "Access key: " 65 | echo $ACCESS_KEY 66 | python examples/import_handmade_pop_test.py --access_key $ACCESS_KEY 67 | 68 | echo "" 69 | echo "Moving engine.json to user-engine.json" 70 | cp -n engine.json user-engine.json 71 | 72 | echo "" 73 | echo "Popularity model integration test." 74 | cp hot-3-day-engine.json engine.json 75 | 76 | echo "" 77 | echo "=============== Building and delpoying 'hot' model" 78 | pio build 79 | pio train -- --driver-memory 2g 80 | nohup pio deploy > deploy.out & 81 | echo "Waiting 20 seconds for the server to start" 82 | sleep 20 83 | 84 | echo "" 85 | echo "Running test query." 86 | ./examples/pop-test-query.sh > test.out 87 | 88 | echo "" 89 | cp trend-2-day-engine.json engine.json 90 | 91 | echo "" 92 | echo "=============== Building and delpoying 'trending' model" 93 | pio build 94 | pio train -- --driver-memory 2g 95 | nohup pio deploy > deploy.out & 96 | echo "Waiting 20 seconds for the server to start" 97 | sleep 20 98 | 99 | echo "" 100 | echo "Running test query." 101 | ./examples/pop-test-query.sh >> test.out 102 | 103 | echo "" 104 | cp pop-1-day-engine.json engine.json 105 | 106 | echo "" 107 | echo "=============== Building and delpoying 'popular' model" 108 | pio build 109 | pio train -- --driver-memory 2g 110 | nohup pio deploy > deploy.out & 111 | echo "Waiting 20 seconds for the server to start" 112 | sleep 20 113 | 114 | echo "" 115 | echo "Running test query." 116 | ./examples/pop-test-query.sh >> test.out 117 | 118 | echo "" 119 | echo "Restoring engine.json" 120 | mv user-engine.json engine.json 121 | 122 | echo "" 123 | echo "Differences between expected and actual results, none is a passing test:" 124 | diff data/integration-test-expected-pop-model.txt test.out 125 | 126 | echo "" 127 | echo "Note that the engine is still deployed until killed." 128 | -------------------------------------------------------------------------------- /examples/multi-query-handmade.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "" 4 | echo "Queries to illustrate many use cases on a small standard dataset and for an automated integration test." 5 | echo "" 6 | echo "WARNING: for this to produce the correct result you must:" 7 | echo " 1. Import data with" 8 | echo " $ python examples/import_handmade.py --access_key " 9 | echo " 2. Copy handmade-engine.json to engine.json." 10 | echo " 3. Run 'pio build', 'pio train', and 'pio deploy'" 11 | echo " 4. The queries must be run the same day as the import was done because date filters are part of the test." 12 | echo "NOTE: due to available and expire dates you should never see the Iphone 5 or Iphone 6." 13 | 14 | echo "" 15 | echo "============ simple user recs ============" 16 | echo "" 17 | echo "Recommendations for user: u1" 18 | echo "" 19 | curl -H "Content-Type: application/json" -d ' 20 | { 21 | "user": "u1" 22 | }' http://localhost:8000/queries.json 23 | echo "" 24 | 25 | 26 | echo "" 27 | echo "Recommendations for user: U 2" 28 | echo "" 29 | curl -H "Content-Type: application/json" -d ' 30 | { 31 | "user": "U 2" 32 | }' http://localhost:8000/queries.json 33 | echo "" 34 | 35 | 36 | echo "" 37 | echo "Recommendations for user: u-3" 38 | echo "" 39 | curl -H "Content-Type: application/json" -d ' 40 | { 41 | "user": "u-3" 42 | }' http://localhost:8000/queries.json 43 | echo "" 44 | 45 | 46 | echo "" 47 | echo "Recommendations for user: u-4" 48 | echo "" 49 | curl -H "Content-Type: application/json" -d ' 50 | { 51 | "user": "u-4" 52 | }' http://localhost:8000/queries.json 53 | echo "" 54 | 55 | 56 | echo "" 57 | echo "Recommendations for user: u5" 58 | echo "" 59 | curl -H "Content-Type: application/json" -d ' 60 | { 61 | "user": "u5" 62 | }' http://localhost:8000/queries.json 63 | echo "" 64 | 65 | echo "" 66 | echo "============ simple similar item recs ============" 67 | echo "" 68 | echo "Recommendations for item: Iphone 4" 69 | echo "" 70 | curl -H "Content-Type: application/json" -d ' 71 | { 72 | "item": "Iphone 4" 73 | }' http://localhost:8000/queries.json 74 | echo "" 75 | 76 | echo "" 77 | echo "Recommendations for item: Ipad-retina" 78 | echo "" 79 | curl -H "Content-Type: application/json" -d ' 80 | { 81 | "item": "Ipad-retina" 82 | }' http://localhost:8000/queries.json 83 | echo "" 84 | 85 | echo "" 86 | echo "Recommendations for item: Nexus" 87 | echo "" 88 | curl -H "Content-Type: application/json" -d ' 89 | { 90 | "item": "Nexus" 91 | }' http://localhost:8000/queries.json 92 | echo "" 93 | 94 | echo "" 95 | echo "Recommendations for item: Galaxy" 96 | echo "" 97 | curl -H "Content-Type: application/json" -d ' 98 | { 99 | "item": "Galaxy" 100 | }' http://localhost:8000/queries.json 101 | echo "" 102 | 103 | echo "" 104 | echo "Recommendations for item: Surface" 105 | echo "" 106 | curl -H "Content-Type: application/json" -d ' 107 | { 108 | "item": "Surface" 109 | }' http://localhost:8000/queries.json 110 | echo "" 111 | 112 | echo "" 113 | echo "============ popular item recs only ============" 114 | echo "" 115 | echo "query with no item or user id, ordered by popularity" 116 | echo "" 117 | curl -H "Content-Type: application/json" -d ' 118 | { 119 | }' http://localhost:8000/queries.json 120 | echo "" 121 | 122 | echo "" 123 | echo "Recommendations for non-existant user: xyz, all from popularity" 124 | echo "" 125 | curl -H "Content-Type: application/json" -d ' 126 | { 127 | "user": "xyz" 128 | }' http://localhost:8000/queries.json 129 | echo "" 130 | 131 | echo "" 132 | echo "Recommendations for non-existant item: xyz, all from popularity" 133 | echo "" 134 | curl -H "Content-Type: application/json" -d ' 135 | { 136 | "item": "xyz" 137 | }' http://localhost:8000/queries.json 138 | echo "" 139 | 140 | 141 | echo "" 142 | echo "Recommendations for no user no item, all from popularity, Tablets filter" 143 | echo "" 144 | curl -H "Content-Type: application/json" -d ' 145 | { 146 | "fields": [{ 147 | "name": "categories", 148 | "values": ["Tablets"], 149 | "bias": -1 150 | }] 151 | }' http://localhost:8000/queries.json 152 | echo "" 153 | 154 | 155 | echo "" 156 | echo "Recommendations for no user no item, all from popularity, Tablets boost" 157 | echo "" 158 | curl -H "Content-Type: application/json" -d ' 159 | { 160 | "fields": [{ 161 | "name": "categories", 162 | "values": ["Tablets"], 163 | "bias": 1.05 164 | }] 165 | }' http://localhost:8000/queries.json 166 | echo "" 167 | 168 | 169 | echo "" 170 | echo "Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter" 171 | echo "" 172 | curl -H "Content-Type: application/json" -d ' 173 | { 174 | "fields": [{ 175 | "name": "categories", 176 | "values": ["Tablets"], 177 | "bias": 1.05 178 | }, { 179 | "name": "countries", 180 | "values": ["Estados Unidos Mexicanos"], 181 | "bias": -1 182 | }] 183 | }' http://localhost:8000/queries.json 184 | echo "" 185 | 186 | 187 | echo "" 188 | echo "============ dateRange filter ============" 189 | echo "" 190 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 191 | BEFORE=`date --date="tomorrow" --iso-8601=seconds` 192 | AFTER=`date --date="1 day ago" --iso-8601=seconds` 193 | else 194 | BEFORE=`date -v +1d +"%Y-%m-%dT%H:%M:%SZ"` 195 | AFTER=`date -v -1d +"%Y-%m-%dT%H:%M:%SZ"` 196 | fi 197 | #echo "before: $BEFORE after: $AFTER" 198 | echo "Recommendations for user: u1" 199 | echo "" 200 | curl -H "Content-Type: application/json" -d " 201 | { 202 | \"user\": \"u1\", 203 | \"dateRange\": { 204 | \"name\": \"date\", 205 | \"before\": \"$BEFORE\", 206 | \"after\": \"$AFTER\" 207 | } 208 | }" http://localhost:8000/queries.json 209 | echo "" 210 | 211 | echo "" 212 | echo "============ query with item and user *EXPERIMENTAL* ============" 213 | # This is experimental, use at your own risk, not well founded in theory 214 | echo "" 215 | echo "Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter" 216 | echo "" 217 | curl -H "Content-Type: application/json" -d ' 218 | { 219 | "user": "u1", 220 | "item": "Iphone 4" 221 | }' http://localhost:8000/queries.json 222 | echo "" 223 | 224 | -------------------------------------------------------------------------------- /examples/multi-query-movielens.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "" 3 | echo "Simple personalized query" 4 | echo "" 5 | curl -H "Content-Type: application/json" -d ' 6 | { 7 | "user": "1", 8 | "num": 10 9 | }' http://localhost:8000/queries.json 10 | echo "" 11 | 12 | #sleep 2 13 | 14 | echo "" 15 | echo "Simple similar item query" 16 | echo "" 17 | curl -H "Content-Type: application/json" -d ' 18 | { 19 | "item": "62", 20 | "num": 15 21 | }' http://localhost:8000/queries.json 22 | echo "" 23 | 24 | #sleep 2 25 | 26 | echo "" 27 | echo "Simple personalized query with category boost" 28 | echo "" 29 | curl -H "Content-Type: application/json" -d ' 30 | { 31 | "user": "1", 32 | "num": 20, 33 | "fields": [{ 34 | "name": "category", 35 | "values": ["cat5"], 36 | "bias": 1.005 37 | }] 38 | }' http://localhost:8000/queries.json 39 | echo "" 40 | echo "" 41 | -------------------------------------------------------------------------------- /examples/pop-engine-4-days-ago.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "popularity-only", 3 | "description": "Default settings", 4 | "engineFactory": "org.template.RecommendationEngine", 5 | "datasource": { 6 | "params" : { 7 | "name": "sample-handmade-data.txt", 8 | "appName": "handmade", 9 | "eventNames": ["purchase", "view"] 10 | } 11 | }, 12 | "sparkConf": { 13 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 14 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 15 | "spark.kryo.referenceTracking": "false", 16 | "spark.kryoserializer.buffer.mb": "300", 17 | "spark.kryoserializer.buffer": "300m", 18 | "spark.executor.memory": "4g", 19 | "es.index.auto.create": "true" 20 | }, 21 | "algorithms": [ 22 | { 23 | "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill", 24 | "name": "ur", 25 | "params": { 26 | "appName": "handmade", 27 | "indexName": "urindex", 28 | "typeName": "items", 29 | "eventNames": ["purchase", "view"], 30 | "recsModel": "backfill", 31 | "rankings": [{ 32 | "name": "popRank", 33 | "type": "popular", 34 | "eventNames": ["purchase", "view"], 35 | "duration": 259200, 36 | "comment": "VERY IMPORTANT that the line below be set to today - 4 days for integration-test-pop-model", 37 | "offsetDate": "2016-01-19T11:55:07Z" 38 | }] 39 | } 40 | } 41 | ] 42 | } 43 | 44 | -------------------------------------------------------------------------------- /examples/pop-engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "popularity-only", 3 | "description": "Default settings", 4 | "engineFactory": "org.template.RecommendationEngine", 5 | "datasource": { 6 | "params" : { 7 | "name": "sample-handmade-data.txt", 8 | "appName": "handmade", 9 | "eventNames": ["purchase", "view"] 10 | } 11 | }, 12 | "sparkConf": { 13 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 14 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 15 | "spark.kryo.referenceTracking": "false", 16 | "spark.kryoserializer.buffer.mb": "300", 17 | "spark.kryoserializer.buffer": "300m", 18 | "spark.executor.memory": "4g", 19 | "es.index.auto.create": "true" 20 | }, 21 | "algorithms": [ 22 | { 23 | "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill", 24 | "name": "ur", 25 | "params": { 26 | "appName": "handmade", 27 | "indexName": "urindex", 28 | "typeName": "items", 29 | "eventNames": ["purchase", "view"], 30 | "recsModel": "backfill", 31 | "rankings": [{ 32 | "name": "popRank", 33 | "type": "popular", 34 | "eventNames": ["purchase", "view"], 35 | "duration": 259200 36 | }] 37 | } 38 | } 39 | ] 40 | } 41 | 42 | -------------------------------------------------------------------------------- /examples/pop-test-query.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "" 4 | echo "Recommendations for popular using default pop model" 5 | echo "" 6 | curl -H "Content-Type: application/json" -d ' 7 | { 8 | }' http://localhost:8000/queries.json 9 | echo "" 10 | 11 | -------------------------------------------------------------------------------- /examples/rank/import_rank.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import sample data for recommendation engine 3 | """ 4 | 5 | import predictionio 6 | import argparse 7 | import random 8 | import datetime 9 | import pytz 10 | 11 | RATE_ACTIONS_DELIMITER = "," 12 | PROPERTIES_DELIMITER = ":" 13 | SEED = 1 14 | 15 | 16 | def import_events(client, file): 17 | f = open(file, 'r') 18 | random.seed(SEED) 19 | count = 0 20 | # year, month, day[, hour[, minute[, second[ 21 | #event_date = datetime.datetime(2015, 8, 13, 12, 24, 41) 22 | now_date = datetime.datetime.now(pytz.utc) # - datetime.timedelta(days=2.7) 23 | current_date = now_date 24 | event_time_increment = datetime.timedelta(days= -0.8) 25 | available_date_increment = datetime.timedelta(days= 0.8) 26 | event_date = now_date - datetime.timedelta(days= 2.4) 27 | available_date = event_date + datetime.timedelta(days=-2) 28 | expire_date = event_date + datetime.timedelta(days=2) 29 | print "Importing data..." 30 | 31 | items = set() 32 | for line in f: 33 | data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER) 34 | # For demonstration purpose action names are taken from input along with secondary actions on 35 | # For the UR add some item metadata 36 | 37 | action = data[1] 38 | if action in ('$set', '$unset', '$delete'): 39 | item_id = data[0] 40 | items.add(item_id) 41 | properties = data[2].split(PROPERTIES_DELIMITER) 42 | prop_name = properties.pop(0) 43 | prop_value = properties if not prop_name == 'defaultRank' else float( 44 | properties[0]) 45 | client.create_event( 46 | event=action, 47 | entity_type="item", 48 | entity_id=item_id, 49 | event_time=current_date, 50 | properties={prop_name: prop_value} 51 | ) 52 | print( 53 | 'Event: {0} entity_id: {1} properties/{2}: {3} current_date: {4}'.format( 54 | action, item_id, prop_name, str(prop_value),current_date.isoformat())) 55 | 56 | else: 57 | user_id = data[0] 58 | item_id = data[2] 59 | client.create_event( 60 | event=action, 61 | entity_type="user", 62 | entity_id=user_id, 63 | target_entity_type="item", 64 | target_entity_id=item_id, 65 | event_time=current_date 66 | ) 67 | print( 68 | 'Event: {0} entity_id: {1} target_entity_id: {2} current_date: {3}' 69 | .format(action, item_id, item_id, current_date.isoformat())) 70 | count += 1 71 | current_date += event_time_increment 72 | 73 | print "All items: " + str(items) 74 | for item in items: 75 | 76 | client.create_event( 77 | event="$set", 78 | entity_type="item", 79 | entity_id=item, 80 | properties={"expires": expire_date.isoformat(), 81 | "available": available_date.isoformat(), 82 | "date": event_date.isoformat()} 83 | ) 84 | print "Event: $set entity_id: " + item + \ 85 | " properties/availableDate: " + available_date.isoformat() + \ 86 | " properties/date: " + event_date.isoformat() + \ 87 | " properties/expireDate: " + expire_date.isoformat() 88 | expire_date += available_date_increment 89 | event_date += available_date_increment 90 | available_date += available_date_increment 91 | count += 1 92 | 93 | f.close() 94 | print "%s events are imported." % count 95 | 96 | 97 | if __name__ == '__main__': 98 | parser = argparse.ArgumentParser( 99 | description="Import sample data for recommendation engine") 100 | parser.add_argument('--access_key', default='123456789') 101 | parser.add_argument('--url', default="http://localhost:7070") 102 | parser.add_argument('--file', default="./data/sample-rank-data.txt") 103 | 104 | args = parser.parse_args() 105 | print args 106 | 107 | client = predictionio.EventClient( 108 | access_key=args.access_key, 109 | url=args.url, 110 | threads=5, 111 | qsize=500) 112 | import_events(client, args.file) 113 | -------------------------------------------------------------------------------- /examples/rank/integration-test-rank: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | RED='\033[0;31m' 4 | GREEN='\033[0;32m' 5 | YELLOW='\033[0;33m' 6 | NC='\033[0m' # No Color 7 | 8 | APP_NAME='default-rank' 9 | APP_ACCESS_KEY='123456789' 10 | 11 | LINE="==================================================================" 12 | # exit on any error 13 | #set -e 14 | 15 | check_file () { 16 | if [ ! -f $1 ]; then 17 | echo -e "${RED}File not found: $1${NC}" 18 | exit 1 19 | fi 20 | } 21 | 22 | echo -e "${GREEN}${LINE}" 23 | echo -e "Integration test [Rank] for The Universal Recommender." 24 | echo -e "If some step fails check that your engine.json file has been restored or look for it in 'user-engine.json'" 25 | echo -e "${LINE}${NC}" 26 | 27 | echo -e "${GREEN}${LINE}" 28 | echo -e "Checking for needed files" 29 | echo -e "${LINE}${NC}" 30 | 31 | if [ -f user-engine.json ]; then 32 | echo -e "${RED}File user-engine.json found, this may be an error so we cannot replace engine.json${NC}" 33 | exit 1 34 | fi 35 | 36 | check_file examples/rank/rank-engine.json 37 | check_file data/sample-rank-data.txt 38 | check_file data/rank-test-query-expected.txt 39 | 40 | echo -e "${GREEN}${LINE}" 41 | echo -e "Checking status, should exit if pio is not running." 42 | echo -e "${LINE}${NC}" 43 | pio status 44 | pio app new ${APP_NAME} --access-key ${APP_ACCESS_KEY} || true 45 | 46 | echo -e "${GREEN}${LINE}" 47 | echo -e "Checking to see if ${APP_NAME} app exists, should exit if not." 48 | echo -e "${LINE}${NC}" 49 | pio app show default-rank 50 | 51 | echo -e "${GREEN}${LINE}" 52 | echo -e "Moving engine.json to user-engine.json" 53 | echo -e "${LINE}${NC}" 54 | cp -n engine.json user-engine.json 55 | 56 | echo -e "${GREEN}${LINE}" 57 | echo -e "THE FIRST SERIES OF TESTS" 58 | echo -e "${LINE}${NC}" 59 | 60 | echo -e "${GREEN}${LINE}" 61 | echo -e "Moving examples/rank/rank-engine.json to engine.json for integration test." 62 | echo -e "${LINE}${NC}" 63 | cp examples/rank/rank-engine.json engine.json 64 | 65 | echo -e "${GREEN}${LINE}" 66 | echo -e "Deleting ${APP_NAME} app data since the test is date dependent" 67 | echo -e "${LINE}${NC}" 68 | pio app data-delete ${APP_NAME} -f 69 | 70 | echo -e "${GREEN}${LINE}" 71 | echo -e "Importing data for integration test" 72 | echo -e "${LINE}${NC}" 73 | python examples/rank/import_rank.py --access_key ${APP_ACCESS_KEY} --file './data/sample-rank-data.txt' 74 | 75 | echo -e "${GREEN}${LINE}" 76 | echo -e "Building and delpoying model" 77 | echo -e "${LINE}${NC}" 78 | pio build 79 | pio train -- --executor-memory 1g --driver-memory 1g --master local 80 | 81 | echo -e "${GREEN}${LINE}" 82 | echo -e "WARNING the model will be undeployed after this test, " 83 | echo -e "so any running PredictionServer will be stopped" 84 | nohup pio deploy > deploy-rank.out & 85 | echo -e "Waiting 30 seconds for the server to start" 86 | echo -e "${LINE}${NC}" 87 | sleep 30 88 | 89 | echo -e "${GREEN}${LINE}" 90 | echo -e "Running test query." 91 | echo -e "${LINE}${NC}" 92 | ./examples/rank/multi-query-rank.sh > rank-query-test-result.out 93 | 94 | echo -e "${GREEN}${LINE}" 95 | echo -e "Restoring engine.json" 96 | echo -e "${LINE}${NC}" 97 | mv user-engine.json engine.json 98 | 99 | deploy_pid=`jps -lm | grep "onsole deploy" | cut -f 1 -d ' '` 100 | echo -e "${GREEN}${LINE}" 101 | echo -e "Killing the deployed PredictionServer" 102 | echo -e "${LINE}${NC}" 103 | kill "$deploy_pid" 104 | 105 | DIFF_RESULT=`diff data/rank-test-query-expected.txt rank-query-test-result.out` 106 | 107 | if [[ -z "${DIFF_RESULT// }" ]]; then 108 | echo -e "${GREEN}${LINE}" 109 | echo -e "ALL TESTS PASS SUCCESS$" 110 | rm rank-query-test-result.out 111 | rm deploy-rank.out 112 | else 113 | echo -e "${RED}${LINE}" 114 | echo -e "ONE OR MORE TESTS FAILURE:" 115 | echo -e "${LINE}" 116 | echo ${DIFF_RESULT} 117 | fi 118 | echo -e "${LINE}${NC}" 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /examples/rank/multi-query-rank.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "" 4 | echo "Queries to illustrate many use cases on a small standard dataset and for an automated integration test." 5 | echo "" 6 | echo "WARNING: for this to produce the correct result you must:" 7 | echo " 1. Import data with" 8 | echo " $ python examples/import_rank.py --access_key " 9 | echo " 2. Copy rank-engine.json to engine.json." 10 | echo " 3. Run 'pio build', 'pio train', and 'pio deploy'" 11 | echo " 4. The queries must be run the same day as the import was done because date filters are part of the test." 12 | 13 | echo "" 14 | echo "============ simple user recs ============" 15 | echo "" 16 | echo "Recommendations for user: user-1" 17 | echo "" 18 | curl -H "Content-Type: application/json" -d ' 19 | { 20 | "user": "user-1" 21 | }' http://localhost:8000/queries.json 22 | echo "" 23 | 24 | echo "" 25 | echo "Recommendations for user: user-2" 26 | echo "" 27 | curl -H "Content-Type: application/json" -d ' 28 | { 29 | "user": "user-2" 30 | }' http://localhost:8000/queries.json 31 | echo "" 32 | 33 | echo "" 34 | echo "Recommendations for user: user-3" 35 | echo "" 36 | curl -H "Content-Type: application/json" -d ' 37 | { 38 | "user": "user-3" 39 | }' http://localhost:8000/queries.json 40 | echo "" 41 | 42 | echo "" 43 | echo "Recommendations for user: user-4" 44 | echo "" 45 | curl -H "Content-Type: application/json" -d ' 46 | { 47 | "user": "user-4" 48 | }' http://localhost:8000/queries.json 49 | echo "" 50 | 51 | echo "" 52 | echo "Recommendations for user: user-5" 53 | echo "" 54 | curl -H "Content-Type: application/json" -d ' 55 | { 56 | "user": "user-5" 57 | }' http://localhost:8000/queries.json 58 | echo "" 59 | 60 | echo "" 61 | echo "============ simple similar item recs ============" 62 | echo "" 63 | echo "Recommendations for item: product-1" 64 | echo "" 65 | curl -H "Content-Type: application/json" -d ' 66 | { 67 | "item": "product-1" 68 | }' http://localhost:8000/queries.json 69 | echo "" 70 | 71 | echo "" 72 | echo "Recommendations for item: product-2" 73 | echo "" 74 | curl -H "Content-Type: application/json" -d ' 75 | { 76 | "item": "product-2" 77 | }' http://localhost:8000/queries.json 78 | echo "" 79 | 80 | echo "" 81 | echo "Recommendations for item: product-3" 82 | echo "" 83 | curl -H "Content-Type: application/json" -d ' 84 | { 85 | "item": "product-3" 86 | }' http://localhost:8000/queries.json 87 | echo "" 88 | 89 | echo "" 90 | echo "Recommendations for item: product-4" 91 | echo "" 92 | curl -H "Content-Type: application/json" -d ' 93 | { 94 | "item": "product-4" 95 | }' http://localhost:8000/queries.json 96 | echo "" 97 | 98 | echo "" 99 | echo "Recommendations for item: product-5" 100 | echo "" 101 | curl -H "Content-Type: application/json" -d ' 102 | { 103 | "item": "product-5" 104 | }' http://localhost:8000/queries.json 105 | echo "" 106 | 107 | echo "" 108 | echo "============ popular item recs only ============" 109 | echo "" 110 | echo "query with no item or user id, ordered by popularity" 111 | echo "" 112 | curl -H "Content-Type: application/json" -d ' 113 | { 114 | }' http://localhost:8000/queries.json 115 | echo "" 116 | 117 | echo "" 118 | echo "Recommendations for non-existant user: xyz, all from popularity" 119 | echo "" 120 | curl -H "Content-Type: application/json" -d ' 121 | { 122 | "user": "xyz" 123 | }' http://localhost:8000/queries.json 124 | echo "" 125 | 126 | echo "" 127 | echo "Recommendations for non-existant item: xyz, all from popularity" 128 | echo "" 129 | curl -H "Content-Type: application/json" -d ' 130 | { 131 | "item": "xyz" 132 | }' http://localhost:8000/queries.json 133 | echo "" 134 | 135 | 136 | echo "" 137 | echo "Recommendations for no user no item, all from popularity, red color filter" 138 | echo "" 139 | curl -H "Content-Type: application/json" -d ' 140 | { 141 | "fields": [{ 142 | "name": "color", 143 | "values": ["red"], 144 | "bias": -1 145 | }] 146 | }' http://localhost:8000/queries.json 147 | echo "" 148 | 149 | 150 | echo "" 151 | echo "Recommendations for no user no item, all from popularity, green boost" 152 | echo "" 153 | curl -H "Content-Type: application/json" -d ' 154 | { 155 | "fields": [{ 156 | "name": "color", 157 | "values": ["green"], 158 | "bias": 1.05 159 | }] 160 | }' http://localhost:8000/queries.json 161 | echo "" 162 | 163 | 164 | echo "" 165 | echo "Recommendations for no user no item, all from popularity, red color boost, S size filter" 166 | echo "" 167 | curl -H "Content-Type: application/json" -d ' 168 | { 169 | "fields": [{ 170 | "name": "color", 171 | "values": ["red"], 172 | "bias": 1.05 173 | }, { 174 | "name": "size", 175 | "values": ["S"], 176 | "bias": -1 177 | }] 178 | }' http://localhost:8000/queries.json 179 | echo "" 180 | 181 | 182 | echo "" 183 | echo "============ dateRange filter ============" 184 | echo "" 185 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 186 | BEFORE=`date --date="tomorrow" --iso-8601=seconds` 187 | AFTER=`date --date="1 day ago" --iso-8601=seconds` 188 | else 189 | BEFORE=`date -v +1d +"%Y-%m-%dT%H:%M:%SZ"` 190 | AFTER=`date -v -1d +"%Y-%m-%dT%H:%M:%SZ"` 191 | fi 192 | #echo "before: $BEFORE after: $AFTER" 193 | echo "Recommendations for user: user-1" 194 | echo "" 195 | curl -H "Content-Type: application/json" -d " 196 | { 197 | \"user\": \"user-1\", 198 | \"dateRange\": { 199 | \"name\": \"date\", 200 | \"before\": \"$BEFORE\", 201 | \"after\": \"$AFTER\" 202 | } 203 | }" http://localhost:8000/queries.json 204 | echo "" 205 | 206 | echo "" 207 | echo "============ query with item and user *EXPERIMENTAL* ============" 208 | # This is experimental, use at your own risk, not well founded in theory 209 | echo "" 210 | echo "Recommendations for user-1 & product-1" 211 | echo "" 212 | curl -H "Content-Type: application/json" -d ' 213 | { 214 | "user": "user-1", 215 | "item": "product-1" 216 | }' http://localhost:8000/queries.json 217 | echo "" 218 | 219 | -------------------------------------------------------------------------------- /examples/rank/rank-engine-user-define.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment":" This config file uses default settings for all but the required values see README.md for docs", 3 | "id": "default", 4 | "description": "Default settings", 5 | "engineFactory": "org.template.RecommendationEngine", 6 | "datasource": { 7 | "params" : { 8 | "appName": "default-rank", 9 | "eventNames": ["show", "like"] 10 | } 11 | }, 12 | "sparkConf": { 13 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 14 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 15 | "spark.kryo.referenceTracking": "false", 16 | "spark.kryoserializer.buffer": "300m", 17 | "spark.executor.memory": "4g", 18 | "es.index.auto.create": "true" 19 | }, 20 | "algorithms": [ 21 | { 22 | "name": "ur", 23 | "params": { 24 | "appName": "default-rank", 25 | "indexName": "urindex", 26 | "typeName": "items", 27 | "recsModel": "backfill", 28 | "eventNames": ["show", "like"], 29 | "rankings":[ 30 | { 31 | "name": "defaultRank", 32 | "type": "userDefined" 33 | } 34 | ] 35 | } 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /examples/rank/rank-engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment":" This config file uses default settings for all but the required values see README.md for docs", 3 | "id": "default", 4 | "description": "Default settings", 5 | "engineFactory": "org.template.RecommendationEngine", 6 | "datasource": { 7 | "params" : { 8 | "appName": "default-rank", 9 | "eventNames": ["show", "like"] 10 | } 11 | }, 12 | "sparkConf": { 13 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 14 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 15 | "spark.kryo.referenceTracking": "false", 16 | "spark.kryoserializer.buffer": "300m", 17 | "spark.executor.memory": "4g", 18 | "es.index.auto.create": "true" 19 | }, 20 | "algorithms": [ 21 | { 22 | "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames", 23 | "name": "ur", 24 | "params": { 25 | "comment": "must have data for the first event or the model will not build, other events are optional", 26 | "appName": "default-rank", 27 | "indexName": "urindex", 28 | "typeName": "items", 29 | "recsModel": "all", 30 | "eventNames": ["show", "like"], 31 | "rankings":[ 32 | { 33 | "name": "popularRank", 34 | "type": "popular", 35 | "eventNames": ["show", "like"], 36 | "duration": "3650 days", 37 | "endDate": "ISO8601-date" 38 | },{ 39 | "name": "defaultRank", 40 | "type": "userDefined" 41 | },{ 42 | "name": "uniqueRank", 43 | "type": "random" 44 | } 45 | ] 46 | } 47 | } 48 | ] 49 | } 50 | -------------------------------------------------------------------------------- /examples/single-query-eventNames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "Recommendations from popular" 4 | echo "" 5 | curl -H "Content-Type: application/json" -d ' 6 | { 7 | }' http://localhost:8000/queries.json 8 | echo "" 9 | echo "" 10 | 11 | echo "Recommendations for user: u1 purchase and view events" 12 | echo "" 13 | curl -H "Content-Type: application/json" -d ' 14 | { 15 | "user": "u1" 16 | }' http://localhost:8000/queries.json 17 | echo "" 18 | echo "" 19 | 20 | echo "Recommendations for user: u1 from purchase event alone, should have some non-popular based recs" 21 | echo "" 22 | curl -H "Content-Type: application/json" -d ' 23 | { 24 | "user": "u1", 25 | "eventNames": ["purchase"] 26 | }' http://localhost:8000/queries.json 27 | echo "" 28 | echo "" 29 | 30 | echo "Recommendations for user: u1 from view event alone, should have some non-popular based recs" 31 | echo "" 32 | curl -H "Content-Type: application/json" -d ' 33 | { 34 | "user": "u1", 35 | "eventNames": ["view"] 36 | }' http://localhost:8000/queries.json 37 | echo "" 38 | echo "" 39 | 40 | -------------------------------------------------------------------------------- /examples/single-query-handmade.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "Recommendations for user: u1" 4 | echo "" 5 | curl -H "Content-Type: application/json" -d ' 6 | { 7 | "user": "u1" 8 | }' http://localhost:8000/queries.json 9 | echo "" 10 | 11 | -------------------------------------------------------------------------------- /examples/trend-engine-4-days-ago.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "popularity-only", 3 | "description": "Default settings", 4 | "engineFactory": "org.template.RecommendationEngine", 5 | "datasource": { 6 | "params" : { 7 | "name": "sample-handmade-data.txt", 8 | "appName": "handmade", 9 | "eventNames": ["purchase", "view"] 10 | } 11 | }, 12 | "sparkConf": { 13 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 14 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 15 | "spark.kryo.referenceTracking": "false", 16 | "spark.kryoserializer.buffer.mb": "300", 17 | "spark.kryoserializer.buffer": "300m", 18 | "spark.executor.memory": "4g", 19 | "es.index.auto.create": "true" 20 | }, 21 | "algorithms": [ 22 | { 23 | "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill", 24 | "name": "ur", 25 | "params": { 26 | "appName": "handmade", 27 | "indexName": "urindex", 28 | "typeName": "items", 29 | "eventNames": ["purchase", "view"], 30 | "recsModel": "backfill", 31 | "rankings": [{ 32 | "name": "trendRank", 33 | "type": "trending", 34 | "eventNames": ["purchase", "view"], 35 | "duration": 259200, 36 | "comment": "VERY IMPORTANT that the line below be set to today - 4 days for integration-test-pop-model", 37 | "offsetDate": "2016-01-19T11:55:07Z" 38 | }] 39 | } 40 | } 41 | ] 42 | } 43 | 44 | -------------------------------------------------------------------------------- /examples/trend-engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "popularity-only", 3 | "description": "Default settings", 4 | "engineFactory": "org.template.RecommendationEngine", 5 | "datasource": { 6 | "params" : { 7 | "name": "sample-handmade-data.txt", 8 | "appName": "handmade", 9 | "eventNames": ["purchase", "view"] 10 | } 11 | }, 12 | "sparkConf": { 13 | "spark.serializer": "org.apache.spark.serializer.KryoSerializer", 14 | "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", 15 | "spark.kryo.referenceTracking": "false", 16 | "spark.kryoserializer.buffer.mb": "300", 17 | "spark.kryoserializer.buffer": "300m", 18 | "spark.executor.memory": "4g", 19 | "es.index.auto.create": "true" 20 | }, 21 | "algorithms": [ 22 | { 23 | "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill", 24 | "name": "ur", 25 | "params": { 26 | "appName": "handmade", 27 | "indexName": "urindex", 28 | "typeName": "items", 29 | "eventNames": ["purchase", "view"], 30 | "recsModel": "backfill", 31 | "rankings": [{ 32 | "name": "trendRank", 33 | "type": "trending", 34 | "eventNames": ["purchase", "view"], 35 | "duration": 259200 36 | }] 37 | } 38 | } 39 | ] 40 | } 41 | 42 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += Resolver.typesafeRepo("releases") 2 | 3 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.6.0") 4 | 5 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0") 6 | 7 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.13.0") 8 | -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- 1 | 2 | Scalastyle standard configuration 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /src/main/scala/DataSource.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import _root_.io.prediction.controller.{ EmptyActualResult, EmptyEvaluationInfo, PDataSource, Params } 21 | import _root_.io.prediction.data.storage.PropertyMap 22 | import _root_.io.prediction.data.store.PEventStore 23 | import grizzled.slf4j.Logger 24 | import io.prediction.core.{ EventWindow, SelfCleaningDataSource } 25 | import org.apache.spark.SparkContext 26 | import org.apache.spark.rdd.RDD 27 | import org.template.conversions.{ ActionID, ItemID } 28 | import org.template.conversions._ 29 | 30 | /** Taken from engine.json these are passed in to the DataSource constructor 31 | * 32 | * @param appName registered name for the app 33 | * @param eventNames a list of named events expected. The first is the primary event, the rest are secondary. These 34 | * will be used to create the primary correlator and cross-cooccurrence secondary correlators. 35 | */ 36 | case class DataSourceParams( 37 | appName: String, 38 | eventNames: List[String], // IMPORTANT: eventNames must be exactly the same as URAlgorithmParams eventNames 39 | eventWindow: Option[EventWindow]) extends Params 40 | 41 | /** Read specified events from the PEventStore and creates RDDs for each event. A list of pairs (eventName, eventRDD) 42 | * are sent to the Preparator for further processing. 43 | * @param dsp parameters taken from engine.json 44 | */ 45 | class DataSource(val dsp: DataSourceParams) 46 | extends PDataSource[TrainingData, EmptyEvaluationInfo, Query, EmptyActualResult] 47 | with SelfCleaningDataSource { 48 | 49 | @transient override lazy implicit val logger: Logger = Logger[this.type] 50 | 51 | override def appName: String = dsp.appName 52 | override def eventWindow: Option[EventWindow] = dsp.eventWindow 53 | 54 | drawInfo("Init DataSource", Seq( 55 | ("══════════════════════════════", "════════════════════════════"), 56 | ("App name", appName), 57 | ("Event window", eventWindow), 58 | ("Event names", dsp.eventNames))) 59 | 60 | /** Reads events from PEventStore and create and RDD for each */ 61 | override def readTraining(sc: SparkContext): TrainingData = { 62 | 63 | val eventNames = dsp.eventNames 64 | cleanPersistedPEvents(sc) 65 | val eventsRDD = PEventStore.find( 66 | appName = dsp.appName, 67 | entityType = Some("user"), 68 | eventNames = Some(eventNames), 69 | targetEntityType = Some(Some("item")))(sc).repartition(sc.defaultParallelism) 70 | 71 | // now separate the events by event name 72 | val actionRDDs: List[(ActionID, RDD[(UserID, ItemID)])] = eventNames.map { eventName => 73 | val actionRDD = eventsRDD.filter { event => 74 | require(eventNames.contains(event.event), s"Unexpected event $event is read.") // is this really needed? 75 | require(event.entityId.nonEmpty && event.targetEntityId.get.nonEmpty, "Empty user or item ID") 76 | eventName.equals(event.event) 77 | }.map { event => 78 | (event.entityId, event.targetEntityId.get) 79 | } 80 | 81 | (eventName, actionRDD) 82 | } filterNot { case (_, actionRDD) => actionRDD.isEmpty() } 83 | 84 | logger.debug(s"Received actions for events ${actionRDDs.map(_._1)}") 85 | 86 | // aggregating all $set/$unsets for metadata fields, which are attached to items 87 | val fieldsRDD: RDD[(ItemID, PropertyMap)] = PEventStore.aggregateProperties( 88 | appName = dsp.appName, 89 | entityType = "item")(sc) 90 | // logger.debug(s"FieldsRDD\n${fieldsRDD.take(25).mkString("\n")}") 91 | 92 | // Have a list of (actionName, RDD), for each action 93 | // todo: some day allow data to be content, which requires rethinking how to use EventStore 94 | TrainingData(actionRDDs, fieldsRDD) 95 | } 96 | } 97 | 98 | /** Low level RDD based representation of the data ready for the Preparator 99 | * 100 | * @param actions List of Tuples (actionName, actionRDD)qw 101 | * @param fieldsRDD RDD of item keyed PropertyMap for item metadata 102 | */ 103 | case class TrainingData( 104 | actions: Seq[(ActionID, RDD[(UserID, ItemID)])], 105 | fieldsRDD: RDD[(ItemID, PropertyMap)]) extends Serializable { 106 | 107 | override def toString: String = { 108 | val a = actions.map { t => 109 | s"${t._1} actions: [count:${t._2.count()}] + sample:${t._2.take(2).toList} " 110 | }.toString() 111 | val f = s"Item metadata: [count:${fieldsRDD.count}] + sample:${fieldsRDD.take(2).toList} " 112 | a + f 113 | } 114 | 115 | } -------------------------------------------------------------------------------- /src/main/scala/Engine.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import grizzled.slf4j.Logger 21 | import io.prediction.controller.{ EmptyActualResult, EmptyEvaluationInfo, Engine, EngineFactory } 22 | import org.template.conversions._ 23 | 24 | /** This file contains case classes that are used with reflection to specify how query and config 25 | * JSON is to be parsed. the Query case class, for instance defines the way a JSON query is to be 26 | * formed. The same for param case classes. 27 | */ 28 | 29 | /** The Query spec with optional values. The only hard rule is that there must be either a user or 30 | * an item id. All other values are optional. 31 | */ 32 | case class Query( 33 | user: Option[String] = None, // must be a user or item id 34 | userBias: Option[Float] = None, // default: whatever is in algorithm params or 1 35 | item: Option[String] = None, // must be a user or item id 36 | itemBias: Option[Float] = None, // default: whatever is in algorithm params or 1 37 | fields: Option[List[Field]] = None, // default: whatever is in algorithm params or None 38 | currentDate: Option[String] = None, // if used will override dateRange filter, currentDate must lie between the item's 39 | // expireDateName value and availableDateName value, all are ISO 8601 dates 40 | dateRange: Option[DateRange] = None, // optional before and after filter applied to a date field 41 | blacklistItems: Option[List[String]] = None, // default: whatever is in algorithm params or None 42 | returnSelf: Option[Boolean] = None, // means for an item query should the item itself be returned, defaults 43 | // to what is in the algorithm params or false 44 | num: Option[Int] = None, // default: whatever is in algorithm params, which itself has a default--probably 20 45 | eventNames: Option[List[String]], // names used to ID all user actions 46 | withRanks: Option[Boolean] = None) // Add to ItemScore rank fields values, default fasle 47 | extends Serializable 48 | 49 | /** Used to specify how Fields are represented in engine.json */ 50 | case class Field( // no optional values for fields, whne specified 51 | name: String, // name of metadata field 52 | values: Seq[String], // fields can have multiple values like tags of a single value as when using hierarchical 53 | // taxonomies 54 | bias: Float) // any positive value is a boost, negative is a filter 55 | extends Serializable 56 | 57 | /** Used to specify the date range for a query */ 58 | case class DateRange( 59 | name: String, // name of item property for the date comparison 60 | before: Option[String], // empty strings means no filter 61 | after: Option[String]) // both empty should be ignored 62 | extends Serializable 63 | 64 | /** results of a URAlgoritm.predict */ 65 | case class PredictedResult( 66 | itemScores: Array[ItemScore]) 67 | extends Serializable 68 | 69 | case class ItemScore( 70 | item: ItemID, // item id 71 | score: Double, // used to rank, original score returned from teh search engine 72 | ranks: Option[Map[String, Double]] = None) extends Serializable 73 | 74 | object RecommendationEngine extends EngineFactory { 75 | 76 | @transient lazy implicit val logger: Logger = Logger[this.type] 77 | drawActionML 78 | 79 | def apply(): Engine[TrainingData, EmptyEvaluationInfo, PreparedData, Query, PredictedResult, EmptyActualResult] = { 80 | new Engine( 81 | classOf[DataSource], 82 | classOf[Preparator], 83 | Map("ur" -> classOf[URAlgorithm]), // IMPORTANT: "ur" must be the "name" of the parameter set in engine.json 84 | classOf[Serving]) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/main/scala/EsClient.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import java.util 21 | 22 | import grizzled.slf4j.Logger 23 | import io.prediction.data.storage._ 24 | import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest 25 | import org.apache.spark.SparkContext 26 | import org.apache.spark.rdd.RDD 27 | import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest 28 | import org.elasticsearch.action.admin.indices.create.CreateIndexRequest 29 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest 30 | import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest 31 | import org.elasticsearch.action.admin.indices.refresh.RefreshRequest 32 | import org.elasticsearch.action.get.GetResponse 33 | import org.elasticsearch.client.transport.TransportClient 34 | import org.elasticsearch.common.settings.{ ImmutableSettings, Settings } 35 | import org.joda.time.DateTime 36 | import org.json4s.jackson.JsonMethods._ 37 | import org.elasticsearch.spark._ 38 | import org.elasticsearch.node.NodeBuilder._ 39 | import org.elasticsearch.search.SearchHits 40 | import org.json4s.JValue 41 | import org.template.conversions.{ ItemID, ItemProps } 42 | 43 | import scala.collection.immutable 44 | import scala.collection.parallel.mutable 45 | 46 | /** Elasticsearch notes: 47 | * 1) every query clause wil laffect scores unless it has a constant_score and boost: 0 48 | * 2) the Spark index writer is fast but must assemble all data for the index before the write occurs 49 | * 3) many operations must be followed by a refresh before the action takes effect--sortof like a transaction commit 50 | * 4) to use like a DB you must specify that the index of fields are `not_analyzed` so they won't be lowercased, 51 | * stemmed, tokenized, etc. Then the values are literal and must match exactly what is in the query (no analyzer) 52 | */ 53 | 54 | /** Defines methods to use on Elasticsearch. */ 55 | object EsClient { 56 | @transient lazy val logger: Logger = Logger[this.type] 57 | 58 | private lazy val client = if (Storage.getConfig("ELASTICSEARCH").nonEmpty) 59 | new elasticsearch.StorageClient(Storage.getConfig("ELASTICSEARCH").get).client 60 | else 61 | throw new IllegalStateException("No Elasticsearch client configuration detected, check your pio-env.sh for" + 62 | "proper configuration settings") 63 | 64 | // wrong way that uses only default settings, which will be a localhost ES sever. 65 | //private lazy val client = new elasticsearch.StorageClient(StorageClientConfig()).client 66 | 67 | /** Delete all data from an instance but do not commit it. Until the "refresh" is done on the index 68 | * the changes will not be reflected. 69 | * @param indexName will delete all types under this index, types are not used by the UR 70 | * @param refresh 71 | * @return true if all is well 72 | */ 73 | def deleteIndex(indexName: String, refresh: Boolean = false): Boolean = { 74 | //val debug = client.connectedNodes() 75 | if (client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet().isExists) { 76 | val delete = client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet() 77 | if (!delete.isAcknowledged) { 78 | logger.info(s"Index $indexName wasn't deleted, but may have quietly failed.") 79 | } else { 80 | // now refresh to get it 'committed' 81 | // todo: should do this after the new index is created so no index downtime 82 | if (refresh) refreshIndex(indexName) 83 | } 84 | true 85 | } else { 86 | logger.warn(s"Elasticsearch index: $indexName wasn't deleted because it didn't exist. This may be an error.") 87 | false 88 | } 89 | } 90 | 91 | /** Creates a new empty index in Elasticsearch and initializes mappings for fields that will be used 92 | * @param indexName elasticsearch name 93 | * @param indexType names the type of index, usually use the item name 94 | * @param fieldNames ES field names 95 | * @param typeMappings indicates which ES fields are to be not_analyzed without norms 96 | * @param refresh should the index be refreshed so the create is committed 97 | * @return true if all is well 98 | */ 99 | def createIndex( 100 | indexName: String, 101 | indexType: String, 102 | fieldNames: List[String], 103 | typeMappings: Map[String, String] = Map.empty, 104 | refresh: Boolean = false): Boolean = { 105 | if (!client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet().isExists) { 106 | var mappings = """ 107 | |{ 108 | | "properties": { 109 | """.stripMargin.replace("\n", "") 110 | 111 | def mappingsField(`type`: String) = { 112 | s""" 113 | | : { 114 | | "type": "${`type`}", 115 | | "index": "not_analyzed", 116 | | "norms" : { 117 | | "enabled" : false 118 | | } 119 | | }, 120 | """.stripMargin.replace("\n", "") 121 | } 122 | 123 | val mappingsTail = """ 124 | | "id": { 125 | | "type": "string", 126 | | "index": "not_analyzed", 127 | | "norms" : { 128 | | "enabled" : false 129 | | } 130 | | } 131 | | } 132 | |} 133 | """.stripMargin.replace("\n", "") 134 | 135 | fieldNames.foreach { fieldName => 136 | if (typeMappings.contains(fieldName)) 137 | mappings += (fieldName + mappingsField(typeMappings(fieldName))) 138 | else // unspecified fields are treated as not_analyzed strings 139 | mappings += (fieldName + mappingsField("string")) 140 | } 141 | mappings += mappingsTail // any other string is not_analyzed 142 | // logger.debug(s"ES mapping: $mappings") 143 | 144 | val cir = new CreateIndexRequest(indexName).mapping(indexType, mappings) 145 | val create = client.admin().indices().create(cir).actionGet() 146 | if (!create.isAcknowledged) { 147 | logger.info(s"Index $indexName wasn't created, but may have quietly failed.") 148 | } else { 149 | // now refresh to get it 'committed' 150 | // todo: should do this after the new index is created so no index downtime 151 | if (refresh) refreshIndex(indexName) 152 | } 153 | true 154 | } else { 155 | logger.warn(s"Elasticsearch index: $indexName wasn't created because it already exists. This may be an error.") 156 | false 157 | } 158 | } 159 | 160 | /** Commits any pending changes to the index */ 161 | def refreshIndex(indexName: String): Unit = { 162 | client.admin().indices().refresh(new RefreshRequest(indexName)).actionGet() 163 | } 164 | 165 | /** Create new index and hot-swap the new after it's indexed and ready to take over, then delete the old */ 166 | def hotSwap( 167 | alias: String, 168 | typeName: String, 169 | indexRDD: RDD[Map[String, Any]], 170 | fieldNames: List[String], 171 | typeMappings: Map[String, String] = Map.empty): Unit = { 172 | // get index for alias, change a char, create new one with new id and index it, swap alias and delete old one 173 | val aliasMetadata = client.admin().indices().prepareGetAliases(alias).get().getAliases 174 | val newIndex = alias + "_" + DateTime.now().getMillis.toString 175 | 176 | logger.debug(s"Create new index: $newIndex, $typeName, $fieldNames, $typeMappings") 177 | createIndex(newIndex, typeName, fieldNames, typeMappings) 178 | 179 | val newIndexURI = "/" + newIndex + "/" + typeName 180 | // logger.debug(s"Save to ES[$newIndexURI]:\n${indexRDD.take(25).mkString("\n")}") 181 | indexRDD.saveToEs(newIndexURI, Map("es.mapping.id" -> "id")) 182 | //refreshIndex(newIndex) 183 | 184 | if (!aliasMetadata.isEmpty 185 | && aliasMetadata.get(alias) != null 186 | && aliasMetadata.get(alias).get(0) != null) { // was alias so remove the old one 187 | //append the DateTime to the alias to create an index name 188 | val oldIndex = aliasMetadata.get(alias).get(0).getIndexRouting 189 | client.admin().indices().prepareAliases() 190 | .removeAlias(oldIndex, alias) 191 | .addAlias(newIndex, alias) 192 | .execute().actionGet() 193 | deleteIndex(oldIndex) // now can safely delete the old one since it's not used 194 | } else { // todo: could be more than one index with 'alias' so 195 | // no alias so add one 196 | //to clean up any indexes that exist with the alias name 197 | val indices = util.Arrays.asList(client.admin().indices().prepareGetIndex().get().indices()).get(0) 198 | if (indices.contains(alias)) { 199 | //refreshIndex(alias) 200 | deleteIndex(alias) // index named like the new alias so delete it 201 | } 202 | // slight downtime, but only for one case of upgrading the UR engine from v0.1.x to v0.2.0+ 203 | client.admin().indices().prepareAliases() 204 | .addAlias(newIndex, alias) 205 | .execute().actionGet() 206 | } 207 | // clean out any old indexes that were the product of a failed train? 208 | val indices = util.Arrays.asList(client.admin().indices().prepareGetIndex().get().indices()).get(0) 209 | indices.map { index => 210 | if (index.contains(alias) && index != newIndex) deleteIndex(index) //clean out any old orphaned indexes 211 | } 212 | 213 | } 214 | 215 | /** Performs a search using the JSON query String 216 | * 217 | * @param query the JSON query string parable by Elasticsearch 218 | * @param indexName the index to search 219 | * @return a [PredictedResults] collection 220 | */ 221 | def search(query: String, indexName: String): Option[SearchHits] = { 222 | val sr = client.prepareSearch(indexName).setSource(query).get() 223 | if (!sr.isTimedOut) { 224 | Some(sr.getHits) 225 | } else { 226 | None 227 | } 228 | } 229 | 230 | /** Gets the "source" field of an Elasticsearch document 231 | * 232 | * @param indexName index that contains the doc/item 233 | * @param typeName type name used to construct ES REST URI 234 | * @param doc for UR the item id 235 | * @return source [java.util.Map] of field names to any valid field values or null if empty 236 | */ 237 | def getSource(indexName: String, typeName: String, doc: String): util.Map[String, AnyRef] = { 238 | client.prepareGet(indexName, typeName, doc) 239 | .execute() 240 | .actionGet().getSource 241 | } 242 | 243 | /* 244 | public Set getIndicesFromAliasName(String aliasName) { 245 | 246 | IndicesAdminClient iac = client.admin().indices(); 247 | ImmutableOpenMap> map = iac.getAliases(new GetAliasesRequest(aliasName)) 248 | .actionGet().getAliases(); 249 | 250 | final Set allIndices = new HashSet<>(); 251 | map.keysIt().forEachRemaining(allIndices::add); 252 | return allIndices; 253 | } 254 | */ 255 | def getIndexName(alias: String): Option[String] = { 256 | 257 | val allIndicesMap = client.admin().indices().getAliases(new GetAliasesRequest(alias)).actionGet().getAliases 258 | 259 | if (allIndicesMap.size() == 1) { // must be a 1-1 mapping of alias <-> index 260 | var indexName: String = "" 261 | val itr = allIndicesMap.keysIt() 262 | while (itr.hasNext) 263 | indexName = itr.next() 264 | Some(indexName) // the one index the alias points to 265 | } else { 266 | // delete all the indices that are pointed to by the alias, they can't be used 267 | logger.warn("There is no 1-1 mapping of index to alias so deleting the old indexes that are referenced by the " + 268 | "alias. This may have been caused by a crashed or stopped `pio train` operation so try running it again.") 269 | if (!allIndicesMap.isEmpty) { 270 | val i = allIndicesMap.keys().toArray.asInstanceOf[Array[String]] 271 | for (indexName <- i) { 272 | deleteIndex(indexName, refresh = true) 273 | } 274 | } 275 | None // if more than one abort, need to clean up bad aliases 276 | } 277 | } 278 | 279 | def getRDD( 280 | alias: String, 281 | typeName: String)(implicit sc: SparkContext): RDD[(ItemID, ItemProps)] = { 282 | getIndexName(alias) 283 | .map(index => sc.esJsonRDD(alias + "/" + typeName) map { case (itemId, json) => itemId -> DataMap(json).fields }) 284 | .getOrElse(sc.emptyRDD) 285 | } 286 | } -------------------------------------------------------------------------------- /src/main/scala/PopModel.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import grizzled.slf4j.Logger 21 | import io.prediction.data.storage.Event 22 | import io.prediction.data.store.PEventStore 23 | import org.apache.spark.SparkContext 24 | import org.apache.spark.rdd.RDD 25 | import org.joda.time.format.ISODateTimeFormat 26 | import org.joda.time.{ DateTime, Interval } 27 | import org.template.conversions.{ ItemID, ItemProps } 28 | 29 | import scala.language.postfixOps 30 | import scala.util.Random 31 | 32 | object RankingFieldName { 33 | val UserRank = "userRank" 34 | val UniqueRank = "uniqueRank" 35 | val PopRank = "popRank" 36 | val TrendRank = "trendRank" 37 | val HotRank = "hotRank" 38 | val UnknownRank = "unknownRank" 39 | def toSeq: Seq[String] = Seq(UserRank, UniqueRank, PopRank, TrendRank, HotRank) 40 | override def toString: String = s"$UserRank, $UniqueRank, $PopRank, $TrendRank, $HotRank" 41 | } 42 | 43 | object RankingType { 44 | val Popular = "popular" 45 | val Trending = "trending" 46 | val Hot = "hot" 47 | val UserDefined = "userDefined" 48 | val Random = "random" 49 | def toSeq: Seq[String] = Seq(Popular, Trending, Hot, UserDefined, Random) 50 | override def toString: String = s"$Popular, $Trending, $Hot, $UserDefined, $Random" 51 | } 52 | 53 | class PopModel(fieldsRDD: RDD[(ItemID, ItemProps)])(implicit sc: SparkContext) { 54 | 55 | @transient lazy val logger: Logger = Logger[this.type] 56 | 57 | def calc( 58 | modelName: String, 59 | eventNames: Seq[String], 60 | appName: String, 61 | duration: Int = 0, 62 | offsetDate: Option[String] = None): RDD[(ItemID, Double)] = { 63 | 64 | // todo: make end manditory and fill it with "now" upstream if not specified, will simplify logic here 65 | // end should always be 'now' except in unusual conditions like for testing 66 | val end = if (offsetDate.isEmpty) DateTime.now else { 67 | try { 68 | ISODateTimeFormat.dateTimeParser().parseDateTime(offsetDate.get) 69 | } catch { 70 | case e: IllegalArgumentException => 71 | logger.warn("Bad end for popModel: " + offsetDate.get + " using 'now'") 72 | DateTime.now 73 | } 74 | } 75 | 76 | val interval = new Interval(end.minusSeconds(duration), end) 77 | 78 | // based on type of popularity model return a set of (item-id, ranking-number) for all items 79 | logger.info(s"PopModel $modelName using end: $end, and duration: $duration, interval: $interval") 80 | 81 | // if None? debatable, this is either an error or may need to default to popular, why call popModel otherwise 82 | modelName match { 83 | case RankingType.Popular => calcPopular(appName, eventNames, interval) 84 | case RankingType.Trending => calcTrending(appName, eventNames, interval) 85 | case RankingType.Hot => calcHot(appName, eventNames, interval) 86 | case RankingType.Random => calcRandom(appName, interval) 87 | case RankingType.UserDefined => sc.emptyRDD 88 | case unknownRankingType => 89 | logger.warn( 90 | s""" 91 | |Bad rankings param type=[$unknownRankingType] in engine definition params, possibly a bad json value. 92 | |Use one of the available parameter values ($RankingType).""".stripMargin) 93 | sc.emptyRDD 94 | } 95 | 96 | } 97 | 98 | /** Create random rank for all items */ 99 | def calcRandom( 100 | appName: String, 101 | interval: Interval): RDD[(ItemID, Double)] = { 102 | 103 | val events = eventsRDD(appName = appName, interval = interval) 104 | val actionsRDD = events.map(_.targetEntityId).filter(_.isDefined).map(_.get).distinct() 105 | val itemsRDD = fieldsRDD.map { case (itemID, _) => itemID } 106 | 107 | // logger.debug(s"ActionsRDD: ${actionsRDD.take(25).mkString(", ")}") 108 | // logger.debug(s"ItemsRDD: ${itemsRDD.take(25).mkString(", ")}") 109 | actionsRDD.union(itemsRDD).distinct().map { itemID => itemID -> Random.nextDouble() } 110 | } 111 | 112 | /** Creates a rank from the number of named events per item for the duration */ 113 | def calcPopular( 114 | appName: String, 115 | eventNames: Seq[String], 116 | interval: Interval): RDD[(ItemID, Double)] = { 117 | val events = eventsRDD(appName, eventNames, interval) 118 | events.map { e => (e.targetEntityId, e.event) } 119 | .groupByKey() 120 | .map { case (itemID, itEvents) => (itemID.get, itEvents.size.toDouble) } 121 | .reduceByKey(_ + _) // make this a double in Elaseticsearch) 122 | } 123 | 124 | /** Creates a rank for each item by dividing the duration in two and counting named events in both buckets 125 | * then dividing most recent by less recent. This ranks by change in popularity or velocity of populatiy change. 126 | * Interval(start, end) end instant is always greater than or equal to the start instant. 127 | */ 128 | def calcTrending( 129 | appName: String, 130 | eventNames: Seq[String], 131 | interval: Interval): RDD[(ItemID, Double)] = { 132 | 133 | logger.info(s"Current Interval: $interval, ${interval.toDurationMillis}") 134 | val halfInterval = interval.toDurationMillis / 2 135 | val olderInterval = new Interval(interval.getStart, interval.getStart.plus(halfInterval)) 136 | logger.info(s"Older Interval: $olderInterval") 137 | val newerInterval = new Interval(interval.getStart.plus(halfInterval), interval.getEnd) 138 | logger.info(s"Newer Interval: $newerInterval") 139 | 140 | val olderPopRDD = calcPopular(appName, eventNames, olderInterval) 141 | if (!olderPopRDD.isEmpty()) { 142 | val newerPopRDD = calcPopular(appName, eventNames, newerInterval) 143 | newerPopRDD.join(olderPopRDD).map { 144 | case (item, (newerScore, olderScore)) => item -> (newerScore - olderScore) 145 | } 146 | } else sc.emptyRDD 147 | 148 | } 149 | 150 | /** Creates a rank for each item by divding all events per item into three buckets and calculating the change in 151 | * velocity over time, in other words the acceleration of popularity change. 152 | */ 153 | def calcHot( 154 | appName: String, 155 | eventNames: Seq[String] = List.empty, 156 | interval: Interval): RDD[(ItemID, Double)] = { 157 | 158 | logger.info(s"Current Interval: $interval, ${interval.toDurationMillis}") 159 | val olderInterval = new Interval(interval.getStart, interval.getStart.plus(interval.toDurationMillis / 3)) 160 | logger.info(s"Older Interval: $olderInterval") 161 | val middleInterval = new Interval(olderInterval.getEnd, olderInterval.getEnd.plus(olderInterval.toDurationMillis)) 162 | logger.info(s"Middle Interval: $middleInterval") 163 | val newerInterval = new Interval(middleInterval.getEnd, interval.getEnd) 164 | logger.info(s"Newer Interval: $newerInterval") 165 | 166 | val olderPopRDD = calcPopular(appName, eventNames, olderInterval) 167 | if (!olderPopRDD.isEmpty()) { // todo: may want to allow an interval with no events, give them 0 counts 168 | val middlePopRDD = calcPopular(appName, eventNames, middleInterval) 169 | if (!middlePopRDD.isEmpty()) { 170 | val newerPopRDD = calcPopular(appName, eventNames, newerInterval) 171 | val newVelocityRDD = newerPopRDD.join(middlePopRDD).map { 172 | case (item, (newerScore, middleScore)) => item -> (newerScore - middleScore) 173 | } 174 | val oldVelocityRDD = middlePopRDD.join(olderPopRDD).map { 175 | case (item, (middleScore, olderScore)) => item -> (middleScore - olderScore) 176 | } 177 | newVelocityRDD.join(oldVelocityRDD).map { 178 | case (item, (newVelocity, oldVelocity)) => item -> (newVelocity - oldVelocity) 179 | } 180 | } else sc.emptyRDD 181 | } else sc.emptyRDD 182 | } 183 | 184 | def eventsRDD( 185 | appName: String, 186 | eventNames: Seq[String] = Seq.empty, 187 | interval: Interval): RDD[Event] = { 188 | 189 | logger.info(s"PopModel getting eventsRDD for startTime: ${interval.getStart} and endTime ${interval.getEnd}") 190 | PEventStore.find( 191 | appName = appName, 192 | startTime = Some(interval.getStart), 193 | untilTime = Some(interval.getEnd), 194 | eventNames = if (eventNames.nonEmpty) Some(eventNames) else None)(sc) 195 | } 196 | 197 | } 198 | 199 | object PopModel { 200 | 201 | def apply(fieldsRDD: RDD[(ItemID, ItemProps)])(implicit sc: SparkContext): PopModel = { 202 | new PopModel(fieldsRDD) 203 | } 204 | 205 | val nameByType: Map[String, String] = Map( 206 | RankingType.Popular -> RankingFieldName.PopRank, 207 | RankingType.Trending -> RankingFieldName.TrendRank, 208 | RankingType.Hot -> RankingFieldName.HotRank, 209 | RankingType.UserDefined -> RankingFieldName.UserRank, 210 | RankingType.Random -> RankingFieldName.UniqueRank).withDefaultValue(RankingFieldName.UnknownRank) 211 | 212 | } 213 | -------------------------------------------------------------------------------- /src/main/scala/Preparator.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import io.prediction.controller.PPreparator 21 | import org.apache.mahout.math.indexeddataset.{ BiDictionary, IndexedDataset } 22 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark 23 | import org.apache.spark.SparkContext 24 | import org.apache.spark.rdd.RDD 25 | import org.template.conversions._ 26 | 27 | class Preparator 28 | extends PPreparator[TrainingData, PreparedData] { 29 | 30 | /** Create [[org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark]] rdd backed 31 | * "distributed row matrices" from the input string keyed rdds. 32 | * @param sc Spark context 33 | * @param trainingData list of (actionName, actionRDD) 34 | * @return list of (correlatorName, correlatorIndexedDataset) 35 | */ 36 | def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { 37 | // now that we have all actions in separate RDDs we must merge any user dictionaries and 38 | // make sure the same user ids map to the correct events 39 | var userDictionary: Option[BiDictionary] = None 40 | 41 | val indexedDatasets = trainingData.actions.map { 42 | case (eventName, eventIDS) => 43 | 44 | // passing in previous row dictionary will use the values if they exist 45 | // and append any new ids, so after all are constructed we have all user ids in the last dictionary 46 | val ids = IndexedDatasetSpark(eventIDS, userDictionary)(sc) 47 | userDictionary = Some(ids.rowIDs) 48 | (eventName, ids) 49 | } 50 | 51 | // now make sure all matrices have identical row space since this corresponds to all users 52 | // todo: check to see that there are events in primary event IndexedDataset and abort if not. 53 | val rowAdjustedIds = userDictionary map { userDict => 54 | indexedDatasets.map { 55 | case (eventName, eventIDS) => 56 | (eventName, eventIDS.create(eventIDS.matrix, userDictionary.get, eventIDS.columnIDs).newRowCardinality(userDict.size)) 57 | } 58 | } getOrElse Seq.empty 59 | 60 | val fieldsRDD: RDD[(ItemID, ItemProps)] = trainingData.fieldsRDD.map { 61 | case (itemId, propMap) => itemId -> propMap.fields 62 | } 63 | PreparedData(rowAdjustedIds, fieldsRDD) 64 | } 65 | 66 | } 67 | 68 | case class PreparedData( 69 | actions: Seq[(ActionID, IndexedDataset)], 70 | fieldsRDD: RDD[(ItemID, ItemProps)]) extends Serializable -------------------------------------------------------------------------------- /src/main/scala/Serving.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import io.prediction.controller.LServing 21 | 22 | class Serving 23 | extends LServing[Query, PredictedResult] { 24 | 25 | override def serve( 26 | query: Query, 27 | predictedResults: Seq[PredictedResult]): PredictedResult = { 28 | predictedResults.head 29 | } 30 | } -------------------------------------------------------------------------------- /src/main/scala/URAlgorithm.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import java.util 21 | 22 | import grizzled.slf4j.Logger 23 | import io.prediction.controller.{ P2LAlgorithm, Params } 24 | import io.prediction.data.storage.{ DataMap, Event, NullModel, PropertyMap } 25 | import io.prediction.data.store.LEventStore 26 | import org.apache.mahout.math.cf.{ DownsamplableCrossOccurrenceDataset, SimilarityAnalysis } 27 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark 28 | import org.apache.spark.SparkContext 29 | import org.apache.spark.rdd.RDD 30 | import org.joda.time.DateTime 31 | import org.json4s.JValue 32 | import org.json4s.JsonAST._ 33 | import org.json4s.JsonDSL._ 34 | import org.json4s.jackson.JsonMethods._ 35 | import org.template.conversions._ 36 | 37 | import scala.collection.JavaConverters._ 38 | import scala.concurrent.duration.Duration 39 | import scala.language.{ implicitConversions, postfixOps } 40 | 41 | /** Available value for algorithm param "RecsModel" */ 42 | object RecsModel { // todo: replace this with rankings 43 | val All = "all" 44 | val CF = "collabFiltering" 45 | val BF = "backfill" 46 | override def toString: String = s"$All, $CF, $BF" 47 | } 48 | 49 | /** Setting the option in the params case class doesn't work as expected when the param is missing from 50 | * engine.json so set these for use in the algorithm when they are not present in the engine.json 51 | */ 52 | object defaultURAlgorithmParams { 53 | val DefaultMaxEventsPerEventType = 500 54 | val DefaultNum = 20 55 | val DefaultMaxCorrelatorsPerEventType = 50 56 | val DefaultMaxQueryEvents = 100 // default number of user history events to use in recs query 57 | 58 | val DefaultExpireDateName = "expireDate" // default name for the expire date property of an item 59 | val DefaultAvailableDateName = "availableDate" //defualt name for and item's available after date 60 | val DefaultDateName = "date" // when using a date range in the query this is the name of the item's date 61 | val DefaultRecsModel = RecsModel.All // use CF + backfill 62 | val DefaultRankingParams = RankingParams() 63 | val DefaultBackfillFieldName = RankingFieldName.PopRank 64 | val DefaultBackfillType = RankingType.Popular 65 | val DefaultBackfillDuration = "3650 days" // for all time 66 | 67 | val DefaultReturnSelf = false 68 | } 69 | 70 | /* default values must be set in code not the case class declaration 71 | case class BackfillField( 72 | name: Option[String] = Some(defaultURAlgorithmParams.DefaultBackfillFieldName), 73 | backfillType: Option[String] = Some(defaultURAlgorithmParams.DefaultBackfillType), // may be 'hot', or 'trending' also 74 | eventNames: Option[Seq[String]] = None, // None means use the algo eventNames list, otherwise a list of events 75 | offsetDate: Option[String] = None, // used only for tests, specifies the offset date to start the duration so the most 76 | // recent date for events going back by from the more recent offsetDate - duration 77 | duration: Option[String] = Some(defaultURAlgorithmParams.DefaultBackfillDuration)) // duration worth of events 78 | // to use in calculation of backfill 79 | 80 | case class URAlgorithmParams( 81 | appName: String, // filled in from engine.json 82 | indexName: String, // can optionally be used to specify the elasticsearch index name 83 | typeName: String, // can optionally be used to specify the elasticsearch type name 84 | recsModel: Option[String] = Some(defaultURAlgorithmParams.DefaultRecsModel), // "all", "collabFiltering", "backfill" 85 | eventNames: Seq[String], // names used to ID all user actions 86 | blacklistEvents: Option[Seq[String]] = None,// None means use the primary event, empty array means no filter 87 | // number of events in user-based recs query 88 | maxQueryEvents: Option[Int] = Some(defaultURAlgorithmParams.DefaultMaxQueryEvents), 89 | maxEventsPerEventType: Option[Int] = Some(defaultURAlgorithmParams.DefaultMaxEventsPerEventType), 90 | maxCorrelatorsPerEventType: Option[Int] = Some(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType), 91 | num: Option[Int] = Some(defaultURAlgorithmParams.DefaultNum), // default max # of recs requested 92 | userBias: Option[Float] = None, // will cause the default search engine boost of 1.0 93 | itemBias: Option[Float] = None, // will cause the default search engine boost of 1.0 94 | returnSelf: Option[Boolean] = None, // query building logic defaults this to false 95 | fields: Option[Seq[Field]] = None, //defaults to no fields 96 | // leave out for default or popular 97 | backfillField: Option[BackfillField] = None, 98 | // name of date property field for when the item is available 99 | availableDateName: Option[String] = Some(defaultURAlgorithmParams.DefaultAvailableDateName), 100 | // name of date property field for when an item is no longer available 101 | expireDateName: Option[String] = Some(defaultURAlgorithmParams.DefaultExpireDateName), 102 | // used as the subject of a dateRange in queries, specifies the name of the item property 103 | dateName: Option[String] = Some(defaultURAlgorithmParams.DefaultDateName), 104 | seed: Option[Long] = None) // seed is not used presently 105 | extends Params //fixed default make it reproducible unless supplied 106 | */ 107 | 108 | case class RankingParams( 109 | name: Option[String] = None, 110 | `type`: Option[String] = None, // See [[org.template.BackfillType]] 111 | eventNames: Option[Seq[String]] = None, // None means use the algo eventNames list, otherwise a list of events 112 | offsetDate: Option[String] = None, // used only for tests, specifies the offset date to start the duration so the most 113 | // recent date for events going back by from the more recent offsetDate - duration 114 | endDate: Option[String] = None, 115 | duration: Option[String] = None) { // duration worth of events to use in calculation of backfill 116 | override def toString: String = { 117 | s""" 118 | |name: $name, 119 | |type: ${`type`}, 120 | |eventNames: $eventNames, 121 | |offsetDate: $offsetDate, 122 | |endDate: $endDate, 123 | |duration: $duration 124 | |""".stripMargin 125 | } 126 | } 127 | 128 | case class IndicatorParams( 129 | name: String, // must match one in eventNames 130 | maxItemsPerUser: Option[Int], // defaults to maxEventsPerEventType 131 | maxCorrelatorsPerItem: Option[Int], // defaults to maxCorrelatorsPerEventType 132 | minLLR: Option[Double]) // defaults to none, takes precendence over maxCorrelatorsPerItem 133 | 134 | case class URAlgorithmParams( 135 | appName: String, // filled in from engine.json 136 | indexName: String, // can optionally be used to specify the elasticsearch index name 137 | typeName: String, // can optionally be used to specify the elasticsearch type name 138 | recsModel: Option[String] = None, // "all", "collabFiltering", "backfill" 139 | eventNames: Option[Seq[String]], // names used to ID all user actions 140 | blacklistEvents: Option[Seq[String]] = None, // None means use the primary event, empty array means no filter 141 | // number of events in user-based recs query 142 | maxQueryEvents: Option[Int] = None, 143 | maxEventsPerEventType: Option[Int] = None, 144 | maxCorrelatorsPerEventType: Option[Int] = None, 145 | num: Option[Int] = None, // default max # of recs requested 146 | userBias: Option[Float] = None, // will cause the default search engine boost of 1.0 147 | itemBias: Option[Float] = None, // will cause the default search engine boost of 1.0 148 | returnSelf: Option[Boolean] = None, // query building logic defaults this to false 149 | fields: Option[Seq[Field]] = None, //defaults to no fields 150 | // leave out for default or popular 151 | rankings: Option[Seq[RankingParams]] = None, 152 | // name of date property field for when the item is available 153 | availableDateName: Option[String] = None, 154 | // name of date property field for when an item is no longer available 155 | expireDateName: Option[String] = None, 156 | // used as the subject of a dateRange in queries, specifies the name of the item property 157 | dateName: Option[String] = None, 158 | indicators: Option[List[IndicatorParams]] = None, // control params per matrix pair 159 | seed: Option[Long] = None) // seed is not used presently 160 | extends Params //fixed default make it reproducible unless supplied 161 | 162 | /** Creates cooccurrence, cross-cooccurrence and eventually content correlators with 163 | * [[org.apache.mahout.math.cf.SimilarityAnalysis]] The analysis part of the recommender is 164 | * done here but the algorithm can predict only when the coocurrence data is indexed in a 165 | * search engine like Elasticsearch. This is done in URModel.save. 166 | * 167 | * @param ap taken from engine.json to describe limits and event types 168 | */ 169 | class URAlgorithm(val ap: URAlgorithmParams) 170 | extends P2LAlgorithm[PreparedData, NullModel, Query, PredictedResult] { 171 | 172 | @transient lazy implicit val logger: Logger = Logger[this.type] 173 | 174 | case class BoostableCorrelators(actionName: String, itemIDs: Seq[ItemID], boost: Option[Float]) { 175 | def toFilterCorrelators: FilterCorrelators = { 176 | FilterCorrelators(actionName, itemIDs) 177 | } 178 | } 179 | case class FilterCorrelators(actionName: String, itemIDs: Seq[ItemID]) 180 | 181 | val appName: String = ap.appName 182 | val recsModel: String = ap.recsModel.getOrElse(defaultURAlgorithmParams.DefaultRecsModel) 183 | //val eventNames: Seq[String] = ap.eventNames 184 | 185 | val userBias: Float = ap.userBias.getOrElse(1f) 186 | val itemBias: Float = ap.itemBias.getOrElse(1f) 187 | val maxQueryEvents: Int = ap.maxQueryEvents.getOrElse(defaultURAlgorithmParams.DefaultMaxQueryEvents) 188 | val limit: Int = ap.num.getOrElse(defaultURAlgorithmParams.DefaultNum) 189 | 190 | val blacklistEvents: Seq[String] = ap.blacklistEvents.getOrEmpty 191 | val returnSelf: Boolean = ap.returnSelf.getOrElse(defaultURAlgorithmParams.DefaultReturnSelf) 192 | val fields: Seq[Field] = ap.fields.getOrEmpty 193 | 194 | val randomSeed: Int = ap.seed.getOrElse(System.currentTimeMillis()).toInt 195 | val maxCorrelatorsPerEventType: Int = ap.maxCorrelatorsPerEventType 196 | .getOrElse(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType) 197 | val maxEventsPerEventType: Int = ap.maxEventsPerEventType 198 | .getOrElse(defaultURAlgorithmParams.DefaultMaxEventsPerEventType) 199 | 200 | lazy val modelEventNames = if (ap.indicators.isEmpty) { 201 | if (ap.eventNames.isEmpty) { 202 | throw new IllegalArgumentException("No eventNames or indicators in engine.json and one of these is required") 203 | } else ap.eventNames.get 204 | } else { 205 | var eventNames = Seq.empty[String] 206 | ap.indicators.get.foreach { indicator => 207 | eventNames = eventNames :+ indicator.name 208 | } 209 | eventNames 210 | } 211 | 212 | // Unique by 'type' ranking params, if collision get first. 213 | lazy val rankingsParams: Seq[RankingParams] = ap.rankings.getOrElse(Seq(RankingParams( 214 | name = Some(defaultURAlgorithmParams.DefaultBackfillFieldName), 215 | `type` = Some(defaultURAlgorithmParams.DefaultBackfillType), 216 | eventNames = Some(modelEventNames.take(1)), 217 | offsetDate = None, 218 | endDate = None, 219 | duration = Some(defaultURAlgorithmParams.DefaultBackfillDuration)))).groupBy(_.`type`).map(_._2.head).toSeq 220 | 221 | val rankingFieldNames: Seq[String] = rankingsParams map { rankingParams => 222 | val rankingType = rankingParams.`type`.getOrElse(defaultURAlgorithmParams.DefaultBackfillType) 223 | val rankingFieldName = rankingParams.name.getOrElse(PopModel.nameByType(rankingType)) 224 | rankingFieldName 225 | } 226 | 227 | val dateNames: Seq[String] = Seq( 228 | ap.dateName, 229 | ap.availableDateName, 230 | ap.expireDateName).collect { case Some(date) => date } distinct 231 | 232 | val esIndex: String = ap.indexName 233 | val esType: String = ap.typeName 234 | 235 | drawInfo("Init URAlgorithm", Seq( 236 | ("══════════════════════════════", "════════════════════════════"), 237 | ("App name", appName), 238 | ("ES index name", esIndex), 239 | ("ES type name", esType), 240 | ("RecsModel", recsModel), 241 | ("Event names", modelEventNames), 242 | ("══════════════════════════════", "════════════════════════════"), 243 | ("Random seed", randomSeed), 244 | ("MaxCorrelatorsPerEventType", maxCorrelatorsPerEventType), 245 | ("MaxEventsPerEventType", maxEventsPerEventType), 246 | ("══════════════════════════════", "════════════════════════════"), 247 | ("User bias", userBias), 248 | ("Item bias", itemBias), 249 | ("Max query events", maxQueryEvents), 250 | ("Limit", limit), 251 | ("══════════════════════════════", "════════════════════════════"), 252 | ("Rankings:", "")) ++ rankingsParams.map(x => (x.`type`.get, x.name))) 253 | 254 | def train(sc: SparkContext, data: PreparedData): NullModel = { 255 | 256 | recsModel match { 257 | case RecsModel.All => calcAll(data)(sc) 258 | case RecsModel.CF => calcAll(data, calcPopular = false)(sc) 259 | case RecsModel.BF => calcPop(data)(sc) 260 | // error, throw an exception 261 | case unknownRecsModel => 262 | throw new IllegalArgumentException( 263 | s""" 264 | |Bad algorithm param recsModel=[$unknownRecsModel] in engine definition params, possibly a bad json value. 265 | |Use one of the available parameter values ($RecsModel).""".stripMargin) 266 | } 267 | } 268 | 269 | /** Calculates recs model as well as popularity model */ 270 | def calcAll( 271 | data: PreparedData, 272 | calcPopular: Boolean = true)(implicit sc: SparkContext): NullModel = { 273 | 274 | // No one likes empty training data. 275 | require( 276 | data.actions.take(1).nonEmpty, 277 | s""" 278 | |Primary action in PreparedData cannot be empty. 279 | |Please check if DataSource generates TrainingData 280 | |and Preparator generates PreparedData correctly.""".stripMargin) 281 | 282 | //val backfillParams = ap.backfillField.getOrElse(defaultURAlgorithmParams.DefaultBackfillParams) 283 | //val nonDefaultMappings = Map(backfillParams.name.getOrElse(defaultURAlgorithmParams.DefaultBackfillFieldName) -> "float") 284 | 285 | logger.info("Actions read now creating correlators") 286 | val cooccurrenceIDSs = if (ap.indicators.isEmpty) { // using one global set of algo params 287 | SimilarityAnalysis.cooccurrencesIDSs( 288 | data.actions.map(_._2).toArray, 289 | randomSeed = ap.seed.getOrElse(System.currentTimeMillis()).toInt, 290 | maxInterestingItemsPerThing = ap.maxCorrelatorsPerEventType 291 | .getOrElse(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType), 292 | maxNumInteractions = ap.maxEventsPerEventType.getOrElse(defaultURAlgorithmParams.DefaultMaxEventsPerEventType)) 293 | .map(_.asInstanceOf[IndexedDatasetSpark]) 294 | } else { // using params per matrix pair, these take the place of eventNames, maxCorrelatorsPerEventType, 295 | // and maxEventsPerEventType! 296 | val indicators = ap.indicators.get 297 | val iDs = data.actions.map(_._2).toSeq 298 | val datasets = iDs.zipWithIndex.map { 299 | case (iD, i) => 300 | new DownsamplableCrossOccurrenceDataset( 301 | iD, 302 | indicators(i).maxItemsPerUser.getOrElse(defaultURAlgorithmParams.DefaultMaxEventsPerEventType), 303 | indicators(i).maxCorrelatorsPerItem.getOrElse(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType), 304 | indicators(i).minLLR) 305 | }.toList 306 | 307 | SimilarityAnalysis.crossOccurrenceDownsampled( 308 | datasets, 309 | ap.seed.getOrElse(System.currentTimeMillis()).toInt) 310 | .map(_.asInstanceOf[IndexedDatasetSpark]) 311 | } 312 | 313 | val cooccurrenceCorrelators = cooccurrenceIDSs.zip(data.actions.map(_._1)).map(_.swap) //add back the actionNames 314 | 315 | val propertiesRDD: RDD[(ItemID, ItemProps)] = if (calcPopular) { 316 | val ranksRdd = getRanksRDD(data.fieldsRDD) 317 | data.fieldsRDD.fullOuterJoin(ranksRdd).map { 318 | case (item, (Some(fieldsPropMap), Some(rankPropMap))) => item -> (fieldsPropMap ++ rankPropMap) 319 | case (item, (Some(fieldsPropMap), None)) => item -> fieldsPropMap 320 | case (item, (None, Some(rankPropMap))) => item -> rankPropMap 321 | case (item, _) => item -> Map.empty 322 | } 323 | } else { 324 | sc.emptyRDD 325 | } 326 | 327 | logger.info("Correlators created now putting into URModel") 328 | new URModel( 329 | coocurrenceMatrices = cooccurrenceCorrelators, 330 | propertiesRDDs = Seq(propertiesRDD), 331 | typeMappings = getRankingMapping).save(dateNames, esIndex, esType) 332 | new NullModel 333 | } 334 | 335 | /** This function creates a URModel from an existing index in Elasticsearch + new popularity ranking 336 | * It is used when you want to re-calc the popularity model between training on useage data. It leaves 337 | * the part of the model created from usage data alone and only modifies the popularity ranking. 338 | */ 339 | def calcPop(data: PreparedData)(implicit sc: SparkContext): NullModel = { 340 | 341 | // Aggregating all $set/$unsets properties, which are attached to items 342 | val fieldsRDD: RDD[(ItemID, ItemProps)] = data.fieldsRDD 343 | // Calc new ranking properties for all items 344 | val ranksRdd: RDD[(ItemID, ItemProps)] = getRanksRDD(fieldsRDD) 345 | // Current items RDD from ES 346 | val currentMetadataRDD: RDD[(ItemID, ItemProps)] = EsClient.getRDD(esIndex, esType) 347 | val propertiesRDD: RDD[(ItemID, ItemProps)] = currentMetadataRDD.fullOuterJoin(ranksRdd) map { 348 | case (itemId, maps) => 349 | maps match { 350 | case (Some(metaProp), Some(rankProp)) => itemId -> (metaProp ++ rankProp) 351 | case (None, Some(rankProp)) => itemId -> rankProp 352 | case (Some(metaProp), None) => itemId -> metaProp 353 | case _ => itemId -> Map.empty 354 | } 355 | } 356 | // logger.debug(s"RanksRdd\n${ranksRdd.take(25).mkString("\n")}") 357 | 358 | // returns the existing model plus new popularity ranking 359 | new URModel( 360 | propertiesRDDs = Seq(fieldsRDD.cache(), propertiesRDD.cache()), 361 | typeMappings = getRankingMapping).save(dateNames, esIndex, esType) 362 | new NullModel 363 | } 364 | 365 | var queryEventNames: Seq[String] = Seq.empty[String] // if passed in with the query overrides the engine.json list--used in MAP@k 366 | //testing, this only effects which events are used in queries. 367 | 368 | /** Return a list of items recommended for a user identified in the query 369 | * The ES json query looks like this: 370 | * { 371 | * "size": 20 372 | * "query": { 373 | * "bool": { 374 | * "should": [ 375 | * { 376 | * "terms": { 377 | * "rate": ["0", "67", "4"] 378 | * } 379 | * }, 380 | * { 381 | * "terms": { 382 | * "buy": ["0", "32"], 383 | * "boost": 2 384 | * } 385 | * }, 386 | * { // categorical boosts 387 | * "terms": { 388 | * "category": ["cat1"], 389 | * "boost": 1.05 390 | * } 391 | * } 392 | * ], 393 | * "must": [ // categorical filters 394 | * { 395 | * "terms": { 396 | * "category": ["cat1"], 397 | * "boost": 0 398 | * } 399 | * }, 400 | * { 401 | * "must_not": [//blacklisted items 402 | * { 403 | * "ids": { 404 | * "values": ["items-id1", "item-id2", ...] 405 | * } 406 | * }, 407 | * { 408 | * "constant_score": {// date in query must fall between the expire and available dates of an item 409 | * "filter": { 410 | * "range": { 411 | * "availabledate": { 412 | * "lte": "2015-08-30T12:24:41-07:00" 413 | * } 414 | * } 415 | * }, 416 | * "boost": 0 417 | * } 418 | * }, 419 | * { 420 | * "constant_score": {// date range filter in query must be between these item property values 421 | * "filter": { 422 | * "range" : { 423 | * "expiredate" : { 424 | * "gte": "2015-08-15T11:28:45.114-07:00" 425 | * "lt": "2015-08-20T11:28:45.114-07:00" 426 | * } 427 | * } 428 | * }, "boost": 0 429 | * } 430 | * }, 431 | * { 432 | * "constant_score": { // this orders popular items for backfill 433 | * "filter": { 434 | * "match_all": {} 435 | * }, 436 | * "boost": 0.000001 // must have as least a small number to be boostable 437 | * } 438 | * } 439 | * } 440 | * } 441 | * } 442 | * 443 | * @param model Ignored! since the model is already in Elasticsearch 444 | * @param query contains query spec 445 | * @todo Need to prune that query to minimum required for data include, for instance no need for the popularity 446 | * ranking if no PopModel is being used, same for "must" clause and dates. 447 | */ 448 | def predict(model: NullModel, query: Query): PredictedResult = { 449 | 450 | queryEventNames = query.eventNames.getOrElse(modelEventNames) // eventNames in query take precedence 451 | 452 | val (queryStr, blacklist) = buildQuery(ap, query, rankingFieldNames) 453 | val searchHitsOpt = EsClient.search(queryStr, esIndex) 454 | 455 | val withRanks = query.withRanks.getOrElse(false) 456 | val predictedResult = searchHitsOpt match { 457 | case Some(searchHits) => 458 | val recs = searchHits.getHits.map { hit => 459 | if (withRanks) { 460 | val source = hit.getSource 461 | val ranks: Map[String, Double] = rankingsParams map { backfillParams => 462 | val backfillType = backfillParams.`type`.getOrElse(defaultURAlgorithmParams.DefaultBackfillType) 463 | val backfillFieldName = backfillParams.name.getOrElse(PopModel.nameByType(backfillType)) 464 | backfillFieldName -> source.get(backfillFieldName).asInstanceOf[Double] 465 | } toMap 466 | 467 | ItemScore(hit.getId, hit.getScore.toDouble, 468 | ranks = if (ranks.nonEmpty) Some(ranks) else None) 469 | } else { 470 | ItemScore(hit.getId, hit.getScore.toDouble) 471 | } 472 | } 473 | logger.info(s"Results: ${searchHits.getHits.length} retrieved of a possible ${searchHits.totalHits()}") 474 | PredictedResult(recs) 475 | 476 | case _ => 477 | logger.info(s"No results for query ${parse(queryStr)}") 478 | PredictedResult(Array.empty[ItemScore]) 479 | } 480 | 481 | // should have all blacklisted items excluded 482 | // todo: need to add dithering, mean, sigma, seed required, make a seed that only changes on some fixed time 483 | // period so the recs ordering stays fixed for that time period. 484 | predictedResult 485 | } 486 | 487 | /** Calculate all fields and items needed for ranking. 488 | * 489 | * @param fieldsRDD all items with their fields 490 | * @param sc the current Spark context 491 | * @return 492 | */ 493 | def getRanksRDD(fieldsRDD: RDD[(ItemID, ItemProps)])(implicit sc: SparkContext): RDD[(ItemID, ItemProps)] = { 494 | val popModel = PopModel(fieldsRDD) 495 | val rankRDDs: Seq[(String, RDD[(ItemID, Double)])] = rankingsParams map { rankingParams => 496 | val rankingType = rankingParams.`type`.getOrElse(defaultURAlgorithmParams.DefaultBackfillType) 497 | val rankingFieldName = rankingParams.name.getOrElse(PopModel.nameByType(rankingType)) 498 | val durationAsString = rankingParams.duration.getOrElse(defaultURAlgorithmParams.DefaultBackfillDuration) 499 | val duration = Duration(durationAsString).toSeconds.toInt 500 | val backfillEvents = rankingParams.eventNames.getOrElse(modelEventNames.take(1)) 501 | val offsetDate = rankingParams.offsetDate 502 | val rankRdd = popModel.calc(modelName = rankingType, eventNames = backfillEvents, appName, duration, offsetDate) 503 | rankingFieldName -> rankRdd 504 | } 505 | 506 | // logger.debug(s"RankRDDs[${rankRDDs.size}]\n${rankRDDs.map(_._1).mkString(", ")}\n${rankRDDs.map(_._2.take(25).mkString("\n")).mkString("\n\n")}") 507 | rankRDDs.foldLeft[RDD[(ItemID, ItemProps)]](sc.emptyRDD) { 508 | case (leftRdd, (fieldName, rightRdd)) => 509 | leftRdd.fullOuterJoin(rightRdd).map { 510 | case (itemId, (Some(propMap), Some(rank))) => itemId -> (propMap + (fieldName -> JDouble(rank))) 511 | case (itemId, (Some(propMap), None)) => itemId -> propMap 512 | case (itemId, (None, Some(rank))) => itemId -> Map(fieldName -> JDouble(rank)) 513 | case (itemId, _) => itemId -> Map.empty 514 | } 515 | } 516 | } 517 | 518 | /** Build a query from default algorithms params and the query itself taking into account defaults */ 519 | def buildQuery( 520 | ap: URAlgorithmParams, 521 | query: Query, 522 | backfillFieldNames: Seq[String] = Seq.empty): (String, Seq[Event]) = { 523 | 524 | try { 525 | // create a list of all query correlators that can have a bias (boost or filter) attached 526 | val (boostable, events) = getBiasedRecentUserActions(query) 527 | 528 | // since users have action history and items have correlators and both correspond to the same "actions" like 529 | // purchase or view, we'll pass both to the query if the user history or items correlators are empty 530 | // then metadata or backfill must be relied on to return results. 531 | val numRecs = query.num.getOrElse(limit) 532 | val should = buildQueryShould(query, boostable) 533 | val must = buildQueryMust(query, boostable) 534 | val mustNot = buildQueryMustNot(query, events) 535 | val sort = buildQuerySort() 536 | 537 | val json = 538 | ("size" -> numRecs) ~ 539 | ("query" -> 540 | ("bool" -> 541 | ("should" -> should) ~ 542 | ("must" -> must) ~ 543 | ("must_not" -> mustNot) ~ 544 | ("minimum_should_match" -> 1))) ~ 545 | ("sort" -> sort) 546 | 547 | val compactJson = compact(render(json)) 548 | 549 | logger.info(s"Query:\n$compactJson") 550 | (compactJson, events) 551 | } catch { 552 | case e: IllegalArgumentException => ("", Seq.empty[Event]) 553 | } 554 | } 555 | 556 | /** Build should query part */ 557 | def buildQueryShould(query: Query, boostable: Seq[BoostableCorrelators]): Seq[JValue] = { 558 | 559 | // create a list of all boosted query correlators 560 | val recentUserHistory: Seq[BoostableCorrelators] = if (userBias >= 0f) { 561 | boostable.slice(0, maxQueryEvents - 1) 562 | } else { 563 | Seq.empty 564 | } 565 | 566 | val similarItems: Seq[BoostableCorrelators] = if (itemBias >= 0f) { 567 | getBiasedSimilarItems(query) 568 | } else { 569 | Seq.empty 570 | } 571 | 572 | val boostedMetadata = getBoostedMetadata(query) 573 | val allBoostedCorrelators = recentUserHistory ++ similarItems ++ boostedMetadata 574 | 575 | val shouldFields: Seq[JValue] = allBoostedCorrelators.map { 576 | case BoostableCorrelators(actionName, itemIDs, boost) => 577 | render("terms" -> (actionName -> itemIDs) ~ ("boost" -> boost)) 578 | } 579 | 580 | val shouldScore: JValue = parse( 581 | """ 582 | |{ 583 | | "constant_score": { 584 | | "filter": { 585 | | "match_all": {} 586 | | }, 587 | | "boost": 0 588 | | } 589 | |} 590 | |""".stripMargin) 591 | 592 | shouldFields :+ shouldScore 593 | } 594 | 595 | /** Build must query part */ 596 | def buildQueryMust(query: Query, boostable: Seq[BoostableCorrelators]): Seq[JValue] = { 597 | 598 | // create a lsit of all query correlators that are to be used to filter results 599 | val recentUserHistoryFilter: Seq[FilterCorrelators] = if (userBias < 0f) { 600 | // strip any boosts 601 | boostable.map(_.toFilterCorrelators).slice(0, maxQueryEvents - 1) 602 | } else { 603 | Seq.empty 604 | } 605 | 606 | val similarItemsFilter: Seq[FilterCorrelators] = if (itemBias < 0f) { 607 | getBiasedSimilarItems(query).map(_.toFilterCorrelators) 608 | } else { 609 | Seq.empty 610 | } 611 | 612 | val filteringMetadata = getFilteringMetadata(query) 613 | val filteringDateRange = getFilteringDateRange(query) 614 | val allFilteringCorrelators = recentUserHistoryFilter ++ similarItemsFilter ++ filteringMetadata 615 | 616 | val mustFields: Seq[JValue] = allFilteringCorrelators.map { 617 | case FilterCorrelators(actionName, itemIDs) => 618 | render("terms" -> (actionName -> itemIDs) ~ ("boost" -> 0)) 619 | } 620 | mustFields ++ filteringDateRange 621 | } 622 | 623 | /** Build not must query part */ 624 | def buildQueryMustNot(query: Query, events: Seq[Event]): JValue = { 625 | val mustNotFields: JValue = render("ids" -> ("values" -> getExcludedItems(events, query)) ~ ("boost" -> 0)) 626 | mustNotFields 627 | } 628 | 629 | /** Build sort query part */ 630 | def buildQuerySort(): Seq[JValue] = if (recsModel == RecsModel.All || recsModel == RecsModel.BF) { 631 | val sortByScore: Seq[JValue] = Seq(parse("""{"_score": {"order": "desc"}}""")) 632 | val sortByRanks: Seq[JValue] = rankingFieldNames map { fieldName => 633 | parse(s"""{ "$fieldName": { "unmapped_type": "double", "order": "desc" } }""") 634 | } 635 | sortByScore ++ sortByRanks 636 | } else { 637 | Seq.empty 638 | } 639 | 640 | /** Create a list of item ids that the user has interacted with or are not to be included in recommendations */ 641 | def getExcludedItems(userEvents: Seq[Event], query: Query): Seq[String] = { 642 | 643 | val blacklistedItems = userEvents.filter { event => 644 | // either a list or an empty list of filtering events so honor them 645 | blacklistEvents match { 646 | case Nil => modelEventNames.head equals event.event 647 | case _ => blacklistEvents contains event.event 648 | } 649 | }.map(_.targetEntityId.getOrElse("")) ++ query.blacklistItems.getOrEmpty.distinct 650 | 651 | // Now conditionally add the query item itself 652 | val includeSelf = query.returnSelf.getOrElse(returnSelf) 653 | val allExcludedItems = if (!includeSelf && query.item.nonEmpty) { 654 | blacklistedItems :+ query.item.get 655 | } // add the query item to be excuded 656 | else { 657 | blacklistedItems 658 | } 659 | allExcludedItems.distinct 660 | } 661 | 662 | /** Get similar items for an item, these are already in the action correlators in ES */ 663 | def getBiasedSimilarItems(query: Query): Seq[BoostableCorrelators] = { 664 | if (query.item.nonEmpty) { 665 | val m = EsClient.getSource(esIndex, esType, query.item.get) 666 | 667 | if (m != null) { 668 | val itemEventBias = query.itemBias.getOrElse(itemBias) 669 | val itemEventsBoost = if (itemEventBias > 0 && itemEventBias != 1) Some(itemEventBias) else None 670 | modelEventNames.map { action => 671 | val items: Seq[String] = try { 672 | if (m.containsKey(action) && m.get(action) != null) { 673 | m.get(action).asInstanceOf[util.ArrayList[String]].asScala 674 | } else { 675 | Seq.empty[String] 676 | } 677 | } catch { 678 | case cce: ClassCastException => 679 | logger.warn(s"Bad value in item [${query.item}] corresponding to key: [$action] that was not a Seq[String] ignored.") 680 | Seq.empty[String] 681 | } 682 | val rItems = if (items.size <= maxQueryEvents) items else items.slice(0, maxQueryEvents - 1) 683 | BoostableCorrelators(action, rItems, itemEventsBoost) 684 | } 685 | } else { 686 | Seq.empty 687 | } // no similar items 688 | } else { 689 | Seq.empty[BoostableCorrelators] 690 | } // no item specified 691 | } 692 | 693 | /** Get recent events of the user on items to create the recommendations query from */ 694 | def getBiasedRecentUserActions(query: Query): (Seq[BoostableCorrelators], Seq[Event]) = { 695 | 696 | val recentEvents = try { 697 | LEventStore.findByEntity( 698 | appName = appName, 699 | // entityType and entityId is specified for fast lookup 700 | entityType = "user", 701 | entityId = query.user.get, 702 | // one query per eventName is not ideal, maybe one query for lots of events then split by eventName 703 | //eventNames = Some(Seq(action)),// get all and separate later 704 | eventNames = Some(queryEventNames), // get all and separate later 705 | targetEntityType = None, 706 | // limit = Some(maxQueryEvents), // this will get all history then each action can be limited before using in 707 | // the query 708 | latest = true, 709 | // set time limit to avoid super long DB access 710 | timeout = Duration(200, "millis")).toList 711 | } catch { 712 | case e: scala.concurrent.TimeoutException => 713 | logger.error(s"Timeout when read recent events. Empty list is used. $e") 714 | Seq.empty[Event] 715 | case e: NoSuchElementException => // todo: bad form to use an exception to check if there is a user id 716 | logger.info("No user id for recs, returning similar items for the item specified") 717 | Seq.empty[Event] 718 | case e: Exception => // fatal because of error, an empty query 719 | logger.error(s"Error when read recent events: $e") 720 | throw e 721 | } 722 | 723 | val userEventBias = query.userBias.getOrElse(userBias) 724 | val userEventsBoost = if (userEventBias > 0 && userEventBias != 1) Some(userEventBias) else None 725 | val rActions = queryEventNames.map { action => 726 | var items = Seq.empty[String] 727 | 728 | for (event <- recentEvents) 729 | if (event.event == action && items.size < maxQueryEvents) { 730 | items = event.targetEntityId.get +: items 731 | // todo: may throw exception and we should ignore the event instead of crashing 732 | } 733 | // userBias may be None, which will cause no JSON output for this 734 | BoostableCorrelators(action, items.distinct, userEventsBoost) 735 | } 736 | (rActions, recentEvents) 737 | } 738 | 739 | /** get all metadata fields that potentially have boosts (not filters) */ 740 | def getBoostedMetadata(query: Query): Seq[BoostableCorrelators] = { 741 | val paramsBoostedFields = fields.filter(_.bias < 0f) 742 | val queryBoostedFields = query.fields.getOrEmpty.filter(_.bias >= 0f) 743 | 744 | (queryBoostedFields ++ paramsBoostedFields) 745 | .map(field => BoostableCorrelators(field.name, field.values, Some(field.bias))) 746 | .distinct // de-dup and favor query fields 747 | } 748 | 749 | /** get all metadata fields that are filters (not boosts) */ 750 | def getFilteringMetadata(query: Query): Seq[FilterCorrelators] = { 751 | val paramsFilterFields = fields.filter(_.bias >= 0f) 752 | val queryFilterFields = query.fields.getOrEmpty.filter(_.bias < 0f) 753 | 754 | (queryFilterFields ++ paramsFilterFields) 755 | .map(field => FilterCorrelators(field.name, field.values)) 756 | .distinct // de-dup and favor query fields 757 | } 758 | 759 | /** get part of query for dates and date ranges */ 760 | def getFilteringDateRange(query: Query): Seq[JValue] = { 761 | 762 | // currentDate in the query overrides the dateRange in the same query so ignore daterange if both 763 | val currentDate = query.currentDate.getOrElse(DateTime.now().toDateTimeISO.toString) 764 | 765 | val json: Seq[JValue] = if (query.dateRange.nonEmpty && 766 | (query.dateRange.get.after.nonEmpty || query.dateRange.get.before.nonEmpty)) { 767 | val name = query.dateRange.get.name 768 | val before = query.dateRange.get.before.getOrElse("") 769 | val after = query.dateRange.get.after.getOrElse("") 770 | val rangeStart = s""" 771 | |{ 772 | | "constant_score": { 773 | | "filter": { 774 | | "range": { 775 | | "$name": { 776 | """.stripMargin 777 | 778 | val rangeAfter = s""" 779 | | "gt": "$after" 780 | """.stripMargin 781 | 782 | val rangeBefore = s""" 783 | | "lt": "$before" 784 | """.stripMargin 785 | 786 | val rangeEnd = s""" 787 | | } 788 | | } 789 | | }, 790 | | "boost": 0 791 | | } 792 | |} 793 | """.stripMargin 794 | 795 | var range = rangeStart 796 | if (!after.isEmpty) { 797 | range += rangeAfter 798 | if (!before.isEmpty) range += "," 799 | } 800 | if (!before.isEmpty) range += rangeBefore 801 | range += rangeEnd 802 | 803 | Seq(parse(range)) 804 | } else if (ap.availableDateName.nonEmpty && ap.expireDateName.nonEmpty) { // use the query date or system date 805 | val availableDate = ap.availableDateName.get // never None 806 | val expireDate = ap.expireDateName.get 807 | val available = s""" 808 | |{ 809 | | "constant_score": { 810 | | "filter": { 811 | | "range": { 812 | | "$availableDate": { 813 | | "lte": "$currentDate" 814 | | } 815 | | } 816 | | }, 817 | | "boost": 0 818 | | } 819 | |} 820 | """.stripMargin 821 | val expire = s""" 822 | |{ 823 | | "constant_score": { 824 | | "filter": { 825 | | "range": { 826 | | "$expireDate": { 827 | | "gt": "$currentDate" 828 | | } 829 | | } 830 | | }, 831 | | "boost": 0 832 | | } 833 | |} 834 | """.stripMargin 835 | 836 | Seq(parse(available), parse(expire)) 837 | } else { 838 | logger.info( 839 | """ 840 | |Misconfigured date information, either your engine.json date settings or your query's dateRange is incorrect. 841 | |Ingoring date information for this query.""".stripMargin) 842 | Seq.empty 843 | } 844 | json 845 | } 846 | 847 | def getRankingMapping: Map[String, String] = rankingFieldNames map { fieldName => 848 | fieldName -> "float" 849 | } toMap 850 | 851 | } 852 | -------------------------------------------------------------------------------- /src/main/scala/URModel.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * ActionML licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import grizzled.slf4j.Logger 21 | import io.prediction.data.storage.DataMap 22 | import org.apache.mahout.math.indexeddataset.IndexedDataset 23 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark 24 | import org.apache.spark.SparkContext 25 | import org.apache.spark.rdd.RDD 26 | import org.joda.time.DateTime 27 | import org.json4s.JsonAST.JArray 28 | import org.json4s._ 29 | import org.template.conversions.{ IndexedDatasetConversions, ItemID, ItemProps } 30 | 31 | /** Universal Recommender models to save in ES */ 32 | class URModel( 33 | coocurrenceMatrices: Seq[(ItemID, IndexedDataset)] = Seq.empty, 34 | propertiesRDDs: Seq[RDD[(ItemID, ItemProps)]] = Seq.empty, 35 | typeMappings: Map[String, String] = Map.empty, // maps fieldname that need type mapping in Elasticsearch 36 | nullModel: Boolean = false)(implicit sc: SparkContext) { 37 | 38 | @transient lazy val logger: Logger = Logger[this.type] 39 | 40 | /** Save all fields to be indexed by Elasticsearch and queried for recs 41 | * This will is something like a table with row IDs = item IDs and separate fields for all 42 | * cooccurrence and cross-cooccurrence correlators and metadata for each item. Metadata fields are 43 | * limited to text term collections so vector types. Scalar values can be used but depend on 44 | * Elasticsearch's support. One exception is the Data scalar, which is also supported 45 | * @return always returns true since most other reasons to not save cause exceptions 46 | */ 47 | def save(dateNames: Seq[String], esIndex: String, esType: String): Boolean = { 48 | 49 | logger.debug(s"Start save model") 50 | 51 | if (nullModel) throw new IllegalStateException("Saving a null model created from loading an old one.") 52 | 53 | // for ES we need to create the entire index in an rdd of maps, one per item so we'll use 54 | // convert cooccurrence matrices into correlators as RDD[(itemID, (actionName, Seq[itemID])] 55 | // do they need to be in Elasticsearch format 56 | logger.info("Converting cooccurrence matrices into correlators") 57 | val correlatorRDDs: Seq[RDD[(ItemID, ItemProps)]] = coocurrenceMatrices.map { 58 | case (actionName, dataset) => 59 | dataset.asInstanceOf[IndexedDatasetSpark].toStringMapRDD(actionName) 60 | } 61 | 62 | logger.info("Group all properties RDD") 63 | val groupedRDD: RDD[(ItemID, ItemProps)] = groupAll(correlatorRDDs ++ propertiesRDDs) 64 | // logger.debug(s"Grouped RDD\n${groupedRDD.take(25).mkString("\n")}") 65 | 66 | val esRDD: RDD[Map[String, Any]] = groupedRDD.mapPartitions { iter => 67 | iter map { 68 | case (itemId, itemProps) => 69 | val propsMap = itemProps.map { 70 | case (propName, propValue) => 71 | propName -> URModel.extractJvalue(dateNames, propName, propValue) 72 | } 73 | propsMap + ("id" -> itemId) 74 | } 75 | } 76 | // logger.debug(s"ES RDD\n${esRDD.take(25).mkString("\n")}") 77 | 78 | val esFields: List[String] = esRDD.flatMap(_.keySet).distinct().collect.toList 79 | logger.info(s"ES fields[${esFields.size}]: $esFields") 80 | 81 | EsClient.hotSwap(esIndex, esType, esRDD, esFields, typeMappings) 82 | true 83 | } 84 | 85 | def groupAll(fields: Seq[RDD[(ItemID, ItemProps)]]): RDD[(ItemID, ItemProps)] = { 86 | fields.fold(sc.emptyRDD[(ItemID, ItemProps)])(_ ++ _).reduceByKey(_ ++ _) 87 | } 88 | } 89 | 90 | object URModel { 91 | @transient lazy val logger: Logger = Logger[this.type] 92 | 93 | /** This is actually only used to read saved values and since they are in Elasticsearch we don't need to read 94 | * this means we create a null model since it will not be used. 95 | * todo: we should rejigger the template framework so this is not required. 96 | * @param id ignored 97 | * @param params ignored 98 | * @param sc ignored 99 | * @return dummy null model 100 | */ 101 | def apply(id: String, params: URAlgorithmParams, sc: Option[SparkContext]): URModel = { 102 | // todo: need changes in PIO to remove the need for this 103 | new URModel(null, null, null, nullModel = true)(sc.get) 104 | } 105 | 106 | def extractJvalue(dateNames: Seq[String], key: String, value: Any): Any = value match { 107 | case JArray(list) => list.map(extractJvalue(dateNames, key, _)) 108 | case JString(s) => 109 | if (dateNames.contains(key)) { 110 | new DateTime(s).toDate 111 | } else if (RankingFieldName.toSeq.contains(key)) { 112 | s.toDouble 113 | } else { 114 | s 115 | } 116 | case JDouble(double) => double 117 | case JInt(int) => int 118 | case JBool(bool) => bool 119 | case _ => value 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /src/main/scala/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright ActionML, LLC under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.template 19 | 20 | import grizzled.slf4j.Logger 21 | 22 | import scala.collection.JavaConversions._ 23 | import org.apache.mahout.sparkbindings.SparkDistributedContext 24 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark 25 | import org.apache.mahout.sparkbindings._ 26 | import org.apache.spark.rdd.RDD 27 | import org.json4s._ 28 | 29 | /** Utility conversions for IndexedDatasetSpark */ 30 | package object conversions { 31 | 32 | type UserID = String 33 | type ActionID = String 34 | type ItemID = String 35 | // Item properties (fieldName, fieldValue) 36 | type ItemProps = Map[String, JValue] 37 | 38 | def drawActionML(implicit logger: Logger): Unit = { 39 | val actionML = 40 | """ 41 | | 42 | | _ _ __ __ _ 43 | | /\ | | (_) | \/ | | 44 | | / \ ___| |_ _ ___ _ __ | \ / | | 45 | | / /\ \ / __| __| |/ _ \| '_ \| |\/| | | 46 | | / ____ \ (__| |_| | (_) | | | | | | | |____ 47 | | /_/ \_\___|\__|_|\___/|_| |_|_| |_|______| 48 | | 49 | | 50 | """.stripMargin 51 | 52 | logger.info(actionML) 53 | } 54 | 55 | def drawInfo(title: String, dataMap: Seq[(String, Any)])(implicit logger: Logger): Unit = { 56 | val leftAlignFormat = "║ %-30s%-28s ║" 57 | 58 | val line = "═" * 60 59 | 60 | val preparedTitle = "║ %-58s ║".format(title) 61 | val data = dataMap.map { 62 | case (key, value) => 63 | leftAlignFormat.format(key, value) 64 | } mkString "\n" 65 | 66 | logger.info( 67 | s""" 68 | |╔$line╗ 69 | |$preparedTitle 70 | |$data 71 | |╚$line╝ 72 | |""".stripMargin) 73 | 74 | } 75 | 76 | implicit class OptionCollection[T](collectionOpt: Option[Seq[T]]) { 77 | def getOrEmpty: Seq[T] = { 78 | collectionOpt.getOrElse(Seq.empty[T]) 79 | } 80 | } 81 | 82 | implicit class IndexedDatasetConversions(val indexedDataset: IndexedDatasetSpark) { 83 | def toStringMapRDD(actionName: ActionID): RDD[(ItemID, ItemProps)] = { 84 | @transient lazy val logger = Logger[this.type] 85 | 86 | //val matrix = indexedDataset.matrix.checkpoint() 87 | val rowIDDictionary = indexedDataset.rowIDs 88 | implicit val sc = indexedDataset.matrix.context.asInstanceOf[SparkDistributedContext].sc 89 | val rowIDDictionary_bcast = sc.broadcast(rowIDDictionary) 90 | 91 | val columnIDDictionary = indexedDataset.columnIDs 92 | val columnIDDictionary_bcast = sc.broadcast(columnIDDictionary) 93 | 94 | // may want to mapPartition and create bulk updates as a slight optimization 95 | // creates an RDD of (itemID, Map[correlatorName, list-of-correlator-values]) 96 | indexedDataset.matrix.rdd.map[(ItemID, ItemProps)] { 97 | case (rowNum, itemVector) => 98 | 99 | // turn non-zeros into list for sorting 100 | var itemList = List[(Int, Double)]() 101 | for (ve <- itemVector.nonZeroes) { 102 | itemList = itemList :+ (ve.index, ve.get) 103 | } 104 | //sort by highest strength value descending(-) 105 | val vector = itemList.sortBy { elem => -elem._2 } 106 | 107 | val itemID = rowIDDictionary_bcast.value.inverse.getOrElse(rowNum, "INVALID_ITEM_ID") 108 | try { 109 | 110 | require(itemID != "INVALID_ITEM_ID", s"Bad row number in matrix, skipping item $rowNum") 111 | require(vector.nonEmpty, s"No values so skipping item $rowNum") 112 | 113 | // create a list of element ids 114 | val values = JArray(vector.map { item => 115 | JString(columnIDDictionary_bcast.value.inverse.getOrElse(item._1, "")) // should always be in the dictionary 116 | }) 117 | 118 | (itemID, Map(actionName -> values)) 119 | 120 | } catch { 121 | case cce: IllegalArgumentException => //non-fatal, ignore line 122 | null.asInstanceOf[(ItemID, ItemProps)] 123 | } 124 | 125 | }.filter(_ != null) 126 | } 127 | } 128 | 129 | } 130 | -------------------------------------------------------------------------------- /template.json: -------------------------------------------------------------------------------- 1 | {"pio": {"version": { "min": "0.9.7-aml" }}} 2 | --------------------------------------------------------------------------------