├── .gitignore
├── NOTICE.txt
├── README.md
├── build.sbt
├── data
    ├── handmade-event-description.txt
    ├── integration-test-expected.txt
    ├── rank-test-query-expected.txt
    ├── sample-handmade-data.txt
    ├── sample-handmade-data1.txt
    ├── sample-handmade-data2.txt
    ├── sample-handmade-data3.txt
    ├── sample-rank-data.txt
    ├── sample-rank-empty-action-data.txt
    ├── sample-rank-empty-set-data.txt
    └── sample_movielens_data.txt
├── docs
    └── universal_recommender.md
├── engine.json
├── engine.json.minimum
├── engine.json.spark-tuning
├── event-names-test-engine.json
├── examples
    ├── handmade-engine.json
    ├── hot-3-day-engine.json
    ├── import_handmade.py
    ├── import_handmade_pop_test.py
    ├── import_movielens_eventserver.py
    ├── integration-test
    ├── integration-test-pop-model
    ├── multi-query-handmade.sh
    ├── multi-query-movielens.sh
    ├── pop-engine-4-days-ago.json
    ├── pop-engine.json
    ├── pop-test-query.sh
    ├── rank
    │   ├── import_rank.py
    │   ├── integration-test-rank
    │   ├── multi-query-rank.sh
    │   ├── rank-engine-user-define.json
    │   └── rank-engine.json
    ├── single-query-eventNames.sh
    ├── single-query-handmade.sh
    ├── trend-engine-4-days-ago.json
    └── trend-engine.json
├── project
    └── plugins.sbt
├── scalastyle-config.xml
├── src
    └── main
    │   └── scala
    │       ├── DataSource.scala
    │       ├── Engine.scala
    │       ├── EsClient.scala
    │       ├── PopModel.scala
    │       ├── Preparator.scala
    │       ├── Serving.scala
    │       ├── URAlgorithm.scala
    │       ├── URModel.scala
    │       └── package.scala
└── template.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | 
 4 | # sbt specific
 5 | .cache/
 6 | .history/
 7 | .lib/
 8 | dist/*
 9 | target/
10 | lib_managed/
11 | src_managed/
12 | project/boot/
13 | project/plugins/project/
14 | 
15 | # Scala-IDE specific
16 | .scala_dependencies
17 | .worksheet
18 | 
19 | # Mac specific
20 | .DS_Store
21 | 
22 | # IntelliJ IDEA
23 | *.iml
24 | .idea
25 | 
26 | #PredictionIO specific
27 | manifest.json
28 | target/
29 | pio.log
30 | /pio.sbt
31 | 


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
 1 | ==============================================================
 2 |  ActionML LLC
 3 |  Copyright 2015 and onwards ActionML LLC
 4 | ==============================================================
 5 | 
 6 | This product includes software developed by
 7 | ActionML (http://actionml.com/).
 8 | 
 9 | It includes software from other Apache Software Foundation projects,
10 | including, but not limited to:
11 |   - Elasticsearch (Apache 2 license)
12 |   - Apache Hadoop
13 |   - Apache Spark
14 |   - Apache Spark
15 |   - Apache Mahout
16 | 
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The Universal Recommender
 2 | 
 3 | The Universal Recommender (UR) is a new type of collaborative filtering recommender based on an algorithm that can use data from a wide variety of user taste indicators&mdash;it is called the Correlated Cross-Occurrence algorithm. Unlike  matrix factorization embodied in things like MLlib's ALS, CCO is able to ingest any number of user actions, events, profile data, and contextual information. It then serves results in a fast and scalable way. It also supports item properties for filtering and boosting recommendations and can therefor be considered a hybrid collaborative filtering and content-based recommender.
 4 | 
 5 | The use of multiple **types** of data fundamentally changes the way a recommender is used and, when employed correctly, will provide a significant increase in quality of recommendations vs. using only one user event. Most recommenders, for instance, can only use "purchase" events. Using all we know about a user and their context allows us to much better predict their preferences.
 6 | 
 7 | #Documentation
 8 | 
 9 |  - [The Universal Recommender](http://actionml.com/docs/ur)
10 |  - [The Correlated Cross-Occurrence Algorithm](http://mahout.apache.org/users/algorithms/intro-cooccurrence-spark.html)
11 |  - [The Universal Recommender Slide Deck](http://www.slideshare.net/pferrel/unified-recommender-39986309)
12 | 
13 | 
14 | All docs for the Universal Recommender are [here](http://actionml.com/docs/ur) and are now hosted in a separate repo at [https://github.com/actionml/docs.actionml.com](https://github.com/actionml/docs.actionml.com). If you wish to change or edit those docs make a PR to that repo.
15 | 
16 | 
17 | # Version Changelog
18 | 
19 | ## v0.4.2 **Replaces 0.4.1**
20 | 
21 |  - Fixes a `pio build` failure triggered by the release of Apache PIO. If you have problems building v0.4.0 use this version. It is meant to be used with PredictionIO-0.9.7-aml.
22 |  - **Requires a custom build of Apache Mahout**: instructions on the [doc site](http://actionml.com/docs/ur_quickstart) This is temporary until the next Mahout release, when we will update to 0.4.3 (uses predicitonio-0.9.7-aml) and 0.5.0 (which uses predictionio-0.10.0 from Apache)
23 | 
24 | ## v0.4.0
25 | 
26 |  - This version requires PredictionIO-0.9.7-aml found [here](http://actionml/docs/install).
27 |  - New tuning params are now available for each "indicator" type, making indicators with a small number of possible values much more useful&mdash;things like gender or category-preference. See docs for [configuring the UR](http://actionml.com/docs/ur_config) and look for the `indicators` parameter.
28 |  - New forms of recommendations backfill allow all items to be recommended even if they have no user events yet. Backfill types include random and user defined. See docs for [configuring the UR](http://actionml.com/docs/ur_config) and look for the `rankings` parameter.
29 | 
30 | ## v0.3.0
31 | 
32 |  - This version require PredictionIO-0.9.7-aml from the ActionML repo [here](http://actionml/docs/install).
33 |  - Now supports the `SelfCleanedDataSource` trait. Adding params to the `DataSource` part of `engine.json` allows control of de-duplication, property event compaction, and a time window of event. The time window is used to age out the oldest events. Note: this only works with the ActionML fork of PredictionIO found in the repo mentioned above.
34 |  - changed `backfillField: duration` to accept Scala Duration strings. This will require changes to all engine.json files that were using the older # of seconds duration.
35 |  - added support for indicator predictiveness testing with the MAP@k tool
36 |  - fixed a bug which requires that in the engine.json the `typeName` is required to be `"items"`, with this release the type can be more descriptive.
37 | 
38 | ## v0.2.3
39 | 
40 |  - removed isEmpty calls that were taking an extremely long time to execute, results in considerable speedup. Now the vast majority of `pio train` time is taken up by writing to Elasticsearch. This can be optimized by creating and ES cluster or giving ES lots of memory.
41 |  
42 | ## v0.2.2
43 | 
44 |  - a query with no item or user will get recommendations based on popularity
45 |  - a new integration test has been added
46 |  - a regression bug where some ids were being tokenized by Elasticsearch, leading to incorrect results, was fixed. **NOTE: for users with complex ids containing dashes or spaces this is an important fix.**
47 |  - a dateRange in the query now takes precidence to the item attached expiration and avaiable dates. 
48 | 
49 | ## v0.2.1
50 | 
51 |  - date ranges attached to items will be compared to the prediction servers current data if no date is provided in the query. 
52 | 
53 | ## v0.2.0
54 | 
55 |  - date range filters implemented
56 |  - hot/trending/popular used for backfill and when no other recommendations are returned by the query
57 |  - filters/bias < 0 caused scores to be altered in v0.1.1 fixed in this version so filters have no effect on scoring.
58 |  - the model is now hot-swapped in Elasticsearch so no downtime should be seen, in fact there is no need to run `pio deploy` to make the new model active.
59 |  - it is now possible to have an engine.json (call it something else) dedicated to recalculating the popularity model. This allows fast updates to poularity without recalculating the collaborative filtering model.
60 |  - Elasticsearch can now be in cluster mode
61 | 
62 | ## v0.1.1
63 | 
64 |  - ids are now exact matches, for v0.1.0 the ids had to be lower case and were subject to tokenizing analysis so using that version is not recommended.
65 | 
66 | ## v0.1.0
67 | 
68 |  - user and item based queries supported
69 |  - multiple usage events supported
70 |  - filters and boosts supported on item properties and on user or item based results.
71 |  - fast writing to Elasticsearch using Spark
72 |  - convention over configuration for queries, defaults make simple/typical queries simple and overrides add greater expressiveness.
73 | 
74 | # Known issues
75 | 
76 |  - see the github [issues list](https://github.com/PredictionIO/template-scala-parallel-universal-recommendation/issues)
77 |  
78 |  
79 | #License
80 | This Software is licensed under the Apache Software Foundation version 2 licence found here: http://www.apache.org/licenses/LICENSE-2.0
81 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | import scalariform.formatter.preferences._
 2 | import com.typesafe.sbt.SbtScalariform
 3 | import com.typesafe.sbt.SbtScalariform.ScalariformKeys
 4 | 
 5 | name := "template-scala-parallel-universal-recommendation"
 6 | 
 7 | version := "0.4.2"
 8 | 
 9 | organization := "io.prediction"
10 | 
11 | val mahoutVersion = "0.13.0-SNAPSHOT"
12 | 
13 | val pioVersion = "0.9.7-aml"
14 | 
15 | libraryDependencies ++= Seq(
16 |   "io.prediction"    %% "core" % pioVersion % "provided",
17 |   "org.apache.spark" %% "spark-core" % "1.4.0" % "provided",
18 |   "org.apache.spark" %% "spark-mllib" % "1.4.0" % "provided",
19 |   "org.xerial.snappy" % "snappy-java" % "1.1.1.7",
20 |   // Mahout's Spark libs
21 |   "org.apache.mahout" %% "mahout-math-scala" % mahoutVersion,
22 |   "org.apache.mahout" %% "mahout-spark" % mahoutVersion
23 |     exclude("org.apache.spark", "spark-core_2.10"),
24 |   "org.apache.mahout"  % "mahout-math" % mahoutVersion,
25 |   "org.apache.mahout"  % "mahout-hdfs" % mahoutVersion
26 |     exclude("com.thoughtworks.xstream", "xstream")
27 |     exclude("org.apache.hadoop", "hadoop-client"),
28 |   // other external libs
29 |   "com.thoughtworks.xstream" % "xstream" % "1.4.4"
30 |     exclude("xmlpull", "xmlpull"),
31 |   "org.elasticsearch" % "elasticsearch-spark_2.10" % "2.1.2"
32 |     exclude("org.apache.spark", "spark-catalyst_2.10")
33 |     exclude("org.apache.spark", "spark-sql_2.10"),
34 |   "org.json4s" %% "json4s-native" % "3.2.10")
35 |   .map(_.exclude("org.apache.lucene","lucene-core")).map(_.exclude("org.apache.lucene","lucene-analyzers-common"))
36 | 
37 | resolvers += Resolver.mavenLocal
38 | 
39 | SbtScalariform.scalariformSettings
40 | 
41 | ScalariformKeys.preferences := ScalariformKeys.preferences.value
42 |   .setPreference(AlignSingleLineCaseStatements, true)
43 |   .setPreference(DoubleIndentClassDeclaration, true)
44 |   .setPreference(DanglingCloseParenthesis, Prevent)
45 |   .setPreference(MultilineScaladocCommentsStartOnFirstLine, true)
46 | 
47 | assemblyMergeStrategy in assembly := {
48 |   case "plugin.properties" => MergeStrategy.discard
49 |   case PathList(ps @ _*) if ps.last endsWith "package-info.class" =>
50 |     MergeStrategy.first
51 |   case x =>
52 |     val oldStrategy = (assemblyMergeStrategy in assembly).value
53 |     oldStrategy(x)
54 | }
55 | 


--------------------------------------------------------------------------------
/data/handmade-event-description.txt:
--------------------------------------------------------------------------------
 1 | Event: purchase entity_id: u1 target_entity_id: iphone
 2 | Event: purchase entity_id: u1 target_entity_id: ipad
 3 | Event: purchase entity_id: u2 target_entity_id: nexus
 4 | Event: purchase entity_id: u2 target_entity_id: galaxy
 5 | Event: purchase entity_id: u3 target_entity_id: surface
 6 | Event: purchase entity_id: u4 target_entity_id: iphone
 7 | Event: purchase entity_id: u4 target_entity_id: galaxy
 8 | Event: view entity_id: u1 target_entity_id: phones
 9 | Event: view entity_id: u1 target_entity_id: mobile_acc
10 | Event: view entity_id: u2 target_entity_id: phones
11 | Event: view entity_id: u2 target_entity_id: tablets
12 | Event: view entity_id: u2 target_entity_id: mobile_acc
13 | Event: view entity_id: u3 target_entity_id: mobile_acc
14 | Event: view entity_id: u4 target_entity_id: phones
15 | Event: view entity_id: u4 target_entity_id: tablets
16 | Event: view entity_id: u4 target_entity_id: soap
17 | Event: view entity_id: u5 target_entity_id: soap
18 | Event: $set entity_id: iphone properties/catagory: phones properties/date: 2015-08-30T12:24:41 properties/expiredate: 2015-09-01T12:24:41
19 | Event: $set entity_id: ipad properties/catagory: tablets properties/availabledate: 2015-08-29T12:24:41 properties/date: 2015-08-31T12:24:41 properties/expiredate: 2015-09-02T12:24:41
20 | Event: $set entity_id: nexus properties/catagory: tablets properties/availabledate: 2015-08-30T12:24:41 properties/date: 2015-09-01T12:24:41 properties/expiredate: 2015-09-03T12:24:41
21 | Event: $set entity_id: galaxy properties/catagory: phones properties/date: 2015-09-02T12:24:41 properties/expiredate: 2015-09-04T12:24:41
22 | Event: $set entity_id: surface properties/catagory: tablets properties/availabledate: 2015-09-01T12:24:41 properties/date: 2015-09-03T12:24:41


--------------------------------------------------------------------------------
/data/integration-test-expected.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Queries to illustrate many use cases on a small standard dataset and for an automated integration test.
 3 | 
 4 | WARNING: for this to produce the correct result you must:
 5 |   1. Import data with
 6 |      $ python examples/import_handmade.py --access_key <your-app-accesskey>
 7 |   2. Copy handmade-engine.json to engine.json.
 8 |   3. Run 'pio build', 'pio train', and 'pio deploy'
 9 |   4. The queries must be run the same day as the import was done because date filters are part of the test.
10 | NOTE: due to available and expire dates you should never see the Iphone 5 or Iphone 6.
11 | 
12 | ============ simple user recs ============
13 | 
14 | Recommendations for user: u1
15 | 
16 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]}
17 | 
18 | Recommendations for user: U 2
19 | 
20 | {"itemScores":[{"item":"Iphone 4","score":0.2215922623872757},{"item":"Ipad-retina","score":0.05624813959002495},{"item":"Surface","score":0.029876574873924255}]}
21 | 
22 | Recommendations for user: u-3
23 | 
24 | {"itemScores":[{"item":"Iphone 4","score":0.18315871059894562},{"item":"Galaxy","score":0.18315871059894562},{"item":"Nexus","score":0.18315871059894562},{"item":"Ipad-retina","score":0.07201286405324936}]}
25 | 
26 | Recommendations for user: u-4
27 | 
28 | {"itemScores":[{"item":"Nexus","score":0.5046969652175903},{"item":"Ipad-retina","score":0.08661800622940063},{"item":"Surface","score":0.0}]}
29 | 
30 | Recommendations for user: u5
31 | 
32 | {"itemScores":[{"item":"Iphone 4","score":0.800000011920929},{"item":"Galaxy","score":0.800000011920929},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]}
33 | 
34 | ============ simple similar item recs ============
35 | 
36 | Recommendations for item: Iphone 4
37 | 
38 | {"itemScores":[{"item":"Galaxy","score":0.4085645079612732},{"item":"Nexus","score":0.1371881514787674},{"item":"Ipad-retina","score":0.11633750051259995},{"item":"Surface","score":0.022334998473525047}]}
39 | 
40 | Recommendations for item: Ipad-retina
41 | 
42 | {"itemScores":[{"item":"Iphone 4","score":0.8513996005058289},{"item":"Galaxy","score":0.1894429624080658},{"item":"Nexus","score":0.1894429624080658},{"item":"Surface","score":0.04049335792660713}]}
43 | 
44 | Recommendations for item: Nexus
45 | 
46 | {"itemScores":[{"item":"Galaxy","score":0.576367199420929},{"item":"Iphone 4","score":0.19353307783603668},{"item":"Ipad-retina","score":0.04849598929286003},{"item":"Surface","score":0.02529095485806465}]}
47 | 
48 | Recommendations for item: Galaxy
49 | 
50 | {"itemScores":[{"item":"Iphone 4","score":0.3865432143211365},{"item":"Nexus","score":0.3865432143211365},{"item":"Ipad-retina","score":0.03013293445110321},{"item":"Surface","score":0.01600530743598938}]}
51 | 
52 | Recommendations for item: Surface
53 | 
54 | {"itemScores":[{"item":"Iphone 4","score":0.4070388376712799},{"item":"Galaxy","score":0.4070388376712799},{"item":"Nexus","score":0.4070388376712799},{"item":"Ipad-retina","score":0.17534448206424713}]}
55 | 
56 | ============ popular item recs only ============
57 | 
58 | query with no item or user id, ordered by popularity
59 | 
60 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]}
61 | 
62 | Recommendations for non-existant user: xyz, all from popularity
63 | 
64 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]}
65 | 
66 | Recommendations for non-existant item: xyz, all from popularity
67 | 
68 | {"itemScores":[{"item":"Iphone 4","score":0.0},{"item":"Galaxy","score":0.0},{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0}]}
69 | 
70 | Recommendations for no user no item, all from popularity, Tablets filter
71 | 
72 | {"itemScores":[{"item":"Nexus","score":0.0},{"item":"Ipad-retina","score":0.0},{"item":"Surface","score":0.0}]}
73 | 
74 | Recommendations for no user no item, all from popularity, Tablets boost
75 | 
76 | {"itemScores":[{"item":"Nexus","score":0.9369767904281616},{"item":"Surface","score":0.6666666865348816},{"item":"Ipad-retina","score":0.204568549990654},{"item":"Iphone 4","score":0.0}]}
77 | 
78 | Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter
79 | 
80 | {"itemScores":[{"item":"Ipad-retina","score":0.21918058395385742},{"item":"Iphone 4","score":0.0}]}
81 | 
82 | ============ dateRange filter ============
83 | 
84 | Recommendations for user: u1
85 | 
86 | {"itemScores":[{"item":"Nexus","score":0.2259630560874939},{"item":"Surface","score":0.049329958856105804}]}
87 | 
88 | ============ query with item and user *EXPERIMENTAL* ============
89 | 
90 | Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter
91 | 
92 | {"itemScores":[{"item":"Galaxy","score":0.49276190996170044},{"item":"Nexus","score":0.2750934064388275},{"item":"Surface","score":0.05210217833518982}]}
93 | 


--------------------------------------------------------------------------------
/data/rank-test-query-expected.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Queries to illustrate many use cases on a small standard dataset and for an automated integration test.
 3 | 
 4 | WARNING: for this to produce the correct result you must:
 5 |   1. Import data with
 6 |      $ python examples/import_rank.py --access_key <your-app-accesskey>
 7 |   2. Copy rank-engine.json to engine.json.
 8 |   3. Run 'pio build', 'pio train', and 'pio deploy'
 9 |   4. The queries must be run the same day as the import was done because date filters are part of the test.
10 | 
11 | ============ simple user recs ============
12 | 
13 | Recommendations for user: user-1
14 | 
15 | {"itemScores":[{"item":"product-3","score":0.3595937192440033},{"item":"product-2","score":0.10758151859045029},{"item":"product-5","score":0.06365098059177399},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
16 | 
17 | Recommendations for user: user-2
18 | 
19 | {"itemScores":[{"item":"product-4","score":0.6799420118331909},{"item":"product-1","score":0.2569144368171692},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
20 | 
21 | Recommendations for user: user-3
22 | 
23 | {"itemScores":[{"item":"product-2","score":0.3595937192440033},{"item":"product-1","score":0.3595937192440033},{"item":"product-5","score":0.017842993140220642},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
24 | 
25 | Recommendations for user: user-4
26 | 
27 | {"itemScores":[{"item":"product-1","score":0.2559533715248108},{"item":"product-3","score":0.0944056436419487},{"item":"product-2","score":0.0},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
28 | 
29 | Recommendations for user: user-5
30 | 
31 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
32 | 
33 | ============ simple similar item recs ============
34 | 
35 | Recommendations for item: product-1
36 | 
37 | {"itemScores":[{"item":"product-3","score":0.40796521306037903},{"item":"product-4","score":0.3626357316970825},{"item":"product-5","score":0.07773856818675995},{"item":"product-2","score":0.0770743265748024},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
38 | 
39 | Recommendations for item: product-2
40 | 
41 | {"itemScores":[{"item":"product-4","score":0.8485281467437744},{"item":"product-3","score":0.20341692864894867},{"item":"product-1","score":0.20341692864894867},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
42 | 
43 | Recommendations for item: product-3
44 | 
45 | {"itemScores":[{"item":"product-4","score":0.28767499327659607},{"item":"product-1","score":0.21575623750686646},{"item":"product-2","score":0.06454890966415405},{"item":"product-5","score":0.010705795139074326},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
46 | 
47 | Recommendations for item: product-4
48 | 
49 | {"itemScores":[{"item":"product-2","score":0.07302875816822052},{"item":"product-3","score":0.07302875071763992},{"item":"product-1","score":0.07302875071763992},{"item":"product-5","score":0.029496734961867332},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
50 | 
51 | Recommendations for item: product-5
52 | 
53 | {"itemScores":[{"item":"product-4","score":0.4954302906990051},{"item":"product-1","score":0.28767499327659607},{"item":"product-3","score":0.1290978193283081},{"item":"product-2","score":0.0},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
54 | 
55 | ============ popular item recs only ============
56 | 
57 | query with no item or user id, ordered by popularity
58 | 
59 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
60 | 
61 | Recommendations for non-existant user: xyz, all from popularity
62 | 
63 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
64 | 
65 | Recommendations for non-existant item: xyz, all from popularity
66 | 
67 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-4","score":0.0},{"item":"product-2","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
68 | 
69 | Recommendations for no user no item, all from popularity, red color filter
70 | 
71 | {"itemScores":[{"item":"product-3","score":0.0},{"item":"product-1","score":0.0}]}
72 | 
73 | Recommendations for no user no item, all from popularity, green boost
74 | 
75 | {"itemScores":[{"item":"product-4","score":0.5},{"item":"product-2","score":0.5},{"item":"product-3","score":0.0},{"item":"product-1","score":0.0},{"item":"product-6","score":0.0},{"item":"product-5","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
76 | 
77 | Recommendations for no user no item, all from popularity, red color boost, S size filter
78 | 
79 | {"itemScores":[{"item":"product-1","score":0.6000000238418579},{"item":"product-4","score":0.0},{"item":"product-5","score":0.0},{"item":"product-9","score":0.0}]}
80 | 
81 | ============ dateRange filter ============
82 | 
83 | Recommendations for user: user-1
84 | 
85 | {"itemScores":[{"item":"product-6","score":0.0},{"item":"product-7","score":0.0}]}
86 | 
87 | ============ query with item and user *EXPERIMENTAL* ============
88 | 
89 | Recommendations for user-1 & product-1
90 | 
91 | {"itemScores":[{"item":"product-3","score":0.7042884230613708},{"item":"product-2","score":0.14845967292785645},{"item":"product-5","score":0.12810268998146057},{"item":"product-6","score":0.0},{"item":"product-8","score":0.0},{"item":"product-7","score":0.0},{"item":"product-9","score":0.0}]}
92 | 


--------------------------------------------------------------------------------
/data/sample-handmade-data.txt:
--------------------------------------------------------------------------------
  1 | u1,purchase,Iphone 6
  2 | u1,purchase,Iphone 5
  3 | u1,purchase,Iphone 4
  4 | u1,purchase,Ipad-retina
  5 | U 2,purchase,Nexus
  6 | U 2,purchase,Galaxy
  7 | u-3,purchase,Surface
  8 | u-4,purchase,Iphone 5
  9 | u-4,purchase,Iphone 5
 10 | u-4,purchase,Iphone 5
 11 | u-4,purchase,Iphone 5
 12 | u-4,purchase,Iphone 4
 13 | u-4,purchase,Galaxy
 14 | u1,view,Phones
 15 | u1,view,Phones
 16 | u1,view,Phones
 17 | u1,view,Phones
 18 | u1,view,Phones
 19 | u1,view,Phones
 20 | u1,view,Mobile-acc
 21 | U 2,view,Phones
 22 | U 2,view,Tablets
 23 | U 2,view,Mobile-acc
 24 | u-3,view,Mobile-acc
 25 | u-4,view,Phones
 26 | u-4,view,Tablets
 27 | u-4,view,Soap
 28 | u5,view,Soap
 29 | Iphone 6,$set,categories:Phones:Electronics:Apple
 30 | Iphone 5,$set,categories:Phones:Electronics:Apple
 31 | Iphone 4,$set,categories:Phones:Electronics:Apple
 32 | Ipad-retina,$set,categories:Tablets:Electronics:Apple
 33 | Nexus,$set,categories:Tablets:Electronics:Google
 34 | Galaxy,$set,categories:Phones:Electronics:Samsung
 35 | Surface,$set,categories:Tablets:Electronics:Microsoft
 36 | Iphone 4,$set,countries:United States:Canada:Estados Unidos Mexicanos
 37 | Ipad-retina,$set,countries:United States:Estados Unidos Mexicanos
 38 | Nexus,$set,countries:United States:Canada
 39 | Galaxy,$set,countries:United States
 40 | Surface,$set,countries:United States:Canada
 41 | Iphone 6,$set,categories:Cameras
 42 | Iphone 5,$set,categories:Cameras
 43 | Iphone 4,$set,categories:Cameras
 44 | Ipad-retina,$set,categories:Computers
 45 | Nexus,$set,categories:Computers
 46 | Galaxy,$set,categories:Cameras
 47 | Surface,$set,categories:Computers
 48 | Iphone 4,$set,countries:Cuba
 49 | Ipad-retina,$set,countries:Cuba
 50 | Nexus,$set,countries:Cuba
 51 | Galaxy,$set,countries:Cuba
 52 | Surface,$set,countries:Cuba
 53 | u1,purchase,Iphone 6
 54 | u1,purchase,Iphone 5
 55 | u1,purchase,Iphone 4
 56 | u1,purchase,Ipad-retina
 57 | U 2,purchase,Nexus
 58 | U 2,purchase,Galaxy
 59 | u-3,purchase,Surface
 60 | u-4,purchase,Iphone 5
 61 | u-4,purchase,Iphone 4
 62 | u-4,purchase,Galaxy
 63 | u1,view,Phones
 64 | u1,view,Mobile-acc
 65 | U 2,view,Phones
 66 | U 2,view,Tablets
 67 | U 2,view,Mobile-acc
 68 | u-3,view,Mobile-acc
 69 | u-4,view,Phones
 70 | u-4,view,Tablets
 71 | u-4,view,Soap
 72 | u5,view,Soap
 73 | u1,purchase,Galaxy
 74 | u1,purchase,Galaxy
 75 | u1,purchase,Galaxy
 76 | u1,purchase,Galaxy
 77 | U 2,purchase,Iphone 5
 78 | U 2,purchase,Iphone 5
 79 | u1,view,Soap
 80 | u1,view,Soap
 81 | u-3,view,Mobile-acc
 82 | u-4,view,Mobile-acc
 83 | u-4,view,Mobile-acc
 84 | Iphone 6,$set,categories:Phones:Electronics:Apple
 85 | Iphone 5,$set,categories:Phones:Electronics:Apple
 86 | Iphone 4,$set,categories:Phones:Electronics:Apple
 87 | Ipad-retina,$set,categories:Tablets:Electronics:Apple
 88 | Nexus,$set,categories:Tablets:Electronics:Google
 89 | Galaxy,$set,categories:Phones:Electronics:Samsung
 90 | Surface,$set,categories:Tablets:Electronics:Microsoft
 91 | Iphone 4,$set,countries:United States:Canada:Estados Unidos Mexicanos
 92 | Ipad-retina,$set,countries:United States:Estados Unidos Mexicanos
 93 | Nexus,$set,countries:United States:Canada
 94 | Galaxy,$set,countries:United States
 95 | Surface,$set,countries:United States:Canada
 96 | Iphone 6,$set,defaultRank:7
 97 | Iphone 5,$set,defaultRank:6
 98 | Iphone 4,$set,defaultRank:5
 99 | Nexus,$set,defaultRank:4
100 | Ipad-retina,$set,defaultRank:3
101 | Galaxy,$set,defaultRank:2
102 | Surface,$set,defaultRank:1
103 | 


--------------------------------------------------------------------------------
/data/sample-handmade-data1.txt:
--------------------------------------------------------------------------------
 1 | u-1,purchase,Iphone 6
 2 | u-2,purchase,Iphone 6
 3 | u-3,purchase,Iphone 6
 4 | u-4,purchase,Iphone 6
 5 | u-5,purchase,Iphone 6
 6 | u-6,purchase,Iphone 6
 7 | u-1,purchase,Iphone 5
 8 | u-2,purchase,Iphone 5
 9 | u-1,purchase,Iphone 4
10 | u-1,purchase,Ipad-retina
11 | u-2,purchase,Ipad-retina
12 | u-1,purchase,Nexus
13 | u-2,purchase,Nexus
14 | u-3,purchase,Nexus
15 | u-4,purchase,Nexus
16 | u-5,purchase,Nexus
17 | u-6,purchase,Nexus
18 | u-7,purchase,Nexus
19 | 


--------------------------------------------------------------------------------
/data/sample-handmade-data2.txt:
--------------------------------------------------------------------------------
 1 | U 1,purchase,Iphone 6
 2 | U 2,purchase,Iphone 6
 3 | U 1,purchase,Iphone 5
 4 | U 2,purchase,Iphone 5
 5 | U 3,purchase,Iphone 5
 6 | U 4,purchase,Iphone 5
 7 | U 1,purchase,Iphone 4
 8 | U 1,purchase,Ipad-retina
 9 | U 1,purchase,Nexus
10 | U 2,purchase,Nexus
11 | U 3,purchase,Nexus
12 | U 4,purchase,Nexus
13 | U 5,purchase,Nexus
14 | U 6,purchase,Nexus
15 | U 7,purchase,Nexus
16 | 


--------------------------------------------------------------------------------
/data/sample-handmade-data3.txt:
--------------------------------------------------------------------------------
 1 | u1,purchase,Iphone 6
 2 | u1,purchase,Iphone 5
 3 | u2,purchase,Iphone 5
 4 | u3,purchase,Iphone 5
 5 | u4,purchase,Iphone 5
 6 | u5,purchase,Iphone 5
 7 | u6,purchase,Iphone 5
 8 | u1,purchase,Iphone 4
 9 | u1,purchase,Ipad-retina
10 | u2,purchase,Ipad-retina
11 | u3,purchase,Ipad-retina
12 | u4,purchase,Ipad-retina
13 | u5,purchase,Ipad-retina
14 | u6,purchase,Ipad-retina
15 | u1,purchase,Nexus
16 | u2,purchase,Nexus
17 | u3,purchase,Nexus
18 | u4,purchase,Nexus
19 | u5,purchase,Nexus
20 | u6,purchase,Nexus
21 | u7,purchase,Nexus
22 | 


--------------------------------------------------------------------------------
/data/sample-rank-data.txt:
--------------------------------------------------------------------------------
 1 | user-1,show,product-1
 2 | user-2,show,product-2
 3 | user-3,show,product-3
 4 | user-4,show,product-4
 5 | user-1,like,product-4
 6 | user-2,like,product-3
 7 | user-3,like,product-2
 8 | user-4,like,product-1
 9 | product-1,$set,color:red:black
10 | product-2,$set,color:green:black
11 | product-3,$set,color:red:black
12 | product-4,$set,color:green:black
13 | user-1,show,product-4
14 | user-2,show,product-3
15 | user-3,like,product-3
16 | user-4,like,product-4
17 | product-1,$set,defaultRank:1.0
18 | product-2,$set,defaultRank:2.7
19 | product-3,$set,defaultRank:3.2
20 | product-4,$set,defaultRank:4.7
21 | product-5,$set,defaultRank:5.0
22 | product-6,$set,defaultRank:6.9
23 | product-7,$set,defaultRank:7.15
24 | product-8,$set,defaultRank:8.07
25 | user-1,like,product-3
26 | user-4,like,product-6
27 | user-2,show,product-3
28 | user-3,show,product-4
29 | user-4,show,product-5
30 | product-1,$set,size:S:M
31 | product-2,$set,size:SX:XL
32 | product-3,$set,size:XL:X
33 | product-4,$set,size:X:XL:S
34 | product-5,$set,size:M:S:XS
35 | product-9,$set,size:M:S:XS


--------------------------------------------------------------------------------
/data/sample-rank-empty-action-data.txt:
--------------------------------------------------------------------------------
1 | product-1,$set,defaultRank:1.0
2 | product-2,$set,defaultRank:2.7
3 | product-3,$set,defaultRank:3.2
4 | product-4,$set,defaultRank:4.7
5 | product-5,$set,defaultRank:5.0
6 | product-6,$set,defaultRank:6.9
7 | product-7,$set,defaultRank:7.15
8 | product-8,$set,defaultRank:8.07


--------------------------------------------------------------------------------
/data/sample-rank-empty-set-data.txt:
--------------------------------------------------------------------------------
 1 | user-1,show,product-1
 2 | user-2,show,product-2
 3 | user-3,show,product-3
 4 | user-4,show,product-4
 5 | user-1,like,product-4
 6 | user-2,like,product-4
 7 | user-3,like,product-2
 8 | user-4,like,product-1
 9 | user-1,like,product-1
10 | user-2,like,product-1
11 | user-3,like,product-1
12 | user-3,unknown,product-5
13 | product-6,$set,defaultRank:6.9


--------------------------------------------------------------------------------
/data/sample_movielens_data.txt:
--------------------------------------------------------------------------------
   1 | 0::2::3
   2 | 0::3::1
   3 | 0::5::2
   4 | 0::9::4
   5 | 0::11::1
   6 | 0::12::2
   7 | 0::15::1
   8 | 0::17::1
   9 | 0::19::1
  10 | 0::21::1
  11 | 0::23::1
  12 | 0::26::3
  13 | 0::27::1
  14 | 0::28::1
  15 | 0::29::1
  16 | 0::30::1
  17 | 0::31::1
  18 | 0::34::1
  19 | 0::37::1
  20 | 0::41::2
  21 | 0::44::1
  22 | 0::45::2
  23 | 0::46::1
  24 | 0::47::1
  25 | 0::48::1
  26 | 0::50::1
  27 | 0::51::1
  28 | 0::54::1
  29 | 0::55::1
  30 | 0::59::2
  31 | 0::61::2
  32 | 0::64::1
  33 | 0::67::1
  34 | 0::68::1
  35 | 0::69::1
  36 | 0::71::1
  37 | 0::72::1
  38 | 0::77::2
  39 | 0::79::1
  40 | 0::83::1
  41 | 0::87::1
  42 | 0::89::2
  43 | 0::91::3
  44 | 0::92::4
  45 | 0::94::1
  46 | 0::95::2
  47 | 0::96::1
  48 | 0::98::1
  49 | 0::99::1
  50 | 1::2::2
  51 | 1::3::1
  52 | 1::4::2
  53 | 1::6::1
  54 | 1::9::3
  55 | 1::12::1
  56 | 1::13::1
  57 | 1::14::1
  58 | 1::16::1
  59 | 1::19::1
  60 | 1::21::3
  61 | 1::27::1
  62 | 1::28::3
  63 | 1::33::1
  64 | 1::36::2
  65 | 1::37::1
  66 | 1::40::1
  67 | 1::41::2
  68 | 1::43::1
  69 | 1::44::1
  70 | 1::47::1
  71 | 1::50::1
  72 | 1::54::1
  73 | 1::56::2
  74 | 1::57::1
  75 | 1::58::1
  76 | 1::60::1
  77 | 1::62::4
  78 | 1::63::1
  79 | 1::67::1
  80 | 1::68::4
  81 | 1::70::2
  82 | 1::72::1
  83 | 1::73::1
  84 | 1::74::2
  85 | 1::76::1
  86 | 1::77::3
  87 | 1::78::1
  88 | 1::81::1
  89 | 1::82::1
  90 | 1::85::3
  91 | 1::86::2
  92 | 1::88::2
  93 | 1::91::1
  94 | 1::92::2
  95 | 1::93::1
  96 | 1::94::2
  97 | 1::96::1
  98 | 1::97::1
  99 | 2::4::3
 100 | 2::6::1
 101 | 2::8::5
 102 | 2::9::1
 103 | 2::10::1
 104 | 2::12::3
 105 | 2::13::1
 106 | 2::15::2
 107 | 2::18::2
 108 | 2::19::4
 109 | 2::22::1
 110 | 2::26::1
 111 | 2::28::1
 112 | 2::34::4
 113 | 2::35::1
 114 | 2::37::5
 115 | 2::38::1
 116 | 2::39::5
 117 | 2::40::4
 118 | 2::47::1
 119 | 2::50::1
 120 | 2::52::2
 121 | 2::54::1
 122 | 2::55::1
 123 | 2::57::2
 124 | 2::58::2
 125 | 2::59::1
 126 | 2::61::1
 127 | 2::62::1
 128 | 2::64::1
 129 | 2::65::1
 130 | 2::66::3
 131 | 2::68::1
 132 | 2::71::3
 133 | 2::76::1
 134 | 2::77::1
 135 | 2::78::1
 136 | 2::80::1
 137 | 2::83::5
 138 | 2::85::1
 139 | 2::87::2
 140 | 2::88::1
 141 | 2::89::4
 142 | 2::90::1
 143 | 2::92::4
 144 | 2::93::5
 145 | 3::0::1
 146 | 3::1::1
 147 | 3::2::1
 148 | 3::7::3
 149 | 3::8::3
 150 | 3::9::1
 151 | 3::14::1
 152 | 3::15::1
 153 | 3::16::1
 154 | 3::18::4
 155 | 3::19::1
 156 | 3::24::3
 157 | 3::26::1
 158 | 3::29::3
 159 | 3::33::1
 160 | 3::34::3
 161 | 3::35::1
 162 | 3::36::3
 163 | 3::37::1
 164 | 3::38::2
 165 | 3::43::1
 166 | 3::44::1
 167 | 3::46::1
 168 | 3::47::1
 169 | 3::51::5
 170 | 3::52::3
 171 | 3::56::1
 172 | 3::58::1
 173 | 3::60::3
 174 | 3::62::1
 175 | 3::65::2
 176 | 3::66::1
 177 | 3::67::1
 178 | 3::68::2
 179 | 3::70::1
 180 | 3::72::2
 181 | 3::76::3
 182 | 3::79::3
 183 | 3::80::4
 184 | 3::81::1
 185 | 3::83::1
 186 | 3::84::1
 187 | 3::86::1
 188 | 3::87::2
 189 | 3::88::4
 190 | 3::89::1
 191 | 3::91::1
 192 | 3::94::3
 193 | 4::1::1
 194 | 4::6::1
 195 | 4::8::1
 196 | 4::9::1
 197 | 4::10::1
 198 | 4::11::1
 199 | 4::12::1
 200 | 4::13::1
 201 | 4::14::2
 202 | 4::15::1
 203 | 4::17::1
 204 | 4::20::1
 205 | 4::22::1
 206 | 4::23::1
 207 | 4::24::1
 208 | 4::29::4
 209 | 4::30::1
 210 | 4::31::1
 211 | 4::34::1
 212 | 4::35::1
 213 | 4::36::1
 214 | 4::39::2
 215 | 4::40::3
 216 | 4::41::4
 217 | 4::43::2
 218 | 4::44::1
 219 | 4::45::1
 220 | 4::46::1
 221 | 4::47::1
 222 | 4::49::2
 223 | 4::50::1
 224 | 4::51::1
 225 | 4::52::4
 226 | 4::54::1
 227 | 4::55::1
 228 | 4::60::3
 229 | 4::61::1
 230 | 4::62::4
 231 | 4::63::3
 232 | 4::65::1
 233 | 4::67::2
 234 | 4::69::1
 235 | 4::70::4
 236 | 4::71::1
 237 | 4::73::1
 238 | 4::78::1
 239 | 4::84::1
 240 | 4::85::1
 241 | 4::87::3
 242 | 4::88::3
 243 | 4::89::2
 244 | 4::96::1
 245 | 4::97::1
 246 | 4::98::1
 247 | 4::99::1
 248 | 5::0::1
 249 | 5::1::1
 250 | 5::4::1
 251 | 5::5::1
 252 | 5::8::1
 253 | 5::9::3
 254 | 5::10::2
 255 | 5::13::3
 256 | 5::15::1
 257 | 5::19::1
 258 | 5::20::3
 259 | 5::21::2
 260 | 5::23::3
 261 | 5::27::1
 262 | 5::28::1
 263 | 5::29::1
 264 | 5::31::1
 265 | 5::36::3
 266 | 5::38::2
 267 | 5::39::1
 268 | 5::42::1
 269 | 5::48::3
 270 | 5::49::4
 271 | 5::50::3
 272 | 5::51::1
 273 | 5::52::1
 274 | 5::54::1
 275 | 5::55::5
 276 | 5::56::3
 277 | 5::58::1
 278 | 5::60::1
 279 | 5::61::1
 280 | 5::64::3
 281 | 5::65::2
 282 | 5::68::4
 283 | 5::70::1
 284 | 5::71::1
 285 | 5::72::1
 286 | 5::74::1
 287 | 5::79::1
 288 | 5::81::2
 289 | 5::84::1
 290 | 5::85::1
 291 | 5::86::1
 292 | 5::88::1
 293 | 5::90::4
 294 | 5::91::2
 295 | 5::95::2
 296 | 5::99::1
 297 | 6::0::1
 298 | 6::1::1
 299 | 6::2::3
 300 | 6::5::1
 301 | 6::6::1
 302 | 6::9::1
 303 | 6::10::1
 304 | 6::15::2
 305 | 6::16::2
 306 | 6::17::1
 307 | 6::18::1
 308 | 6::20::1
 309 | 6::21::1
 310 | 6::22::1
 311 | 6::24::1
 312 | 6::25::5
 313 | 6::26::1
 314 | 6::28::1
 315 | 6::30::1
 316 | 6::33::1
 317 | 6::38::1
 318 | 6::39::1
 319 | 6::43::4
 320 | 6::44::1
 321 | 6::45::1
 322 | 6::48::1
 323 | 6::49::1
 324 | 6::50::1
 325 | 6::53::1
 326 | 6::54::1
 327 | 6::55::1
 328 | 6::56::1
 329 | 6::58::4
 330 | 6::59::1
 331 | 6::60::1
 332 | 6::61::3
 333 | 6::63::3
 334 | 6::66::1
 335 | 6::67::3
 336 | 6::68::1
 337 | 6::69::1
 338 | 6::71::2
 339 | 6::73::1
 340 | 6::75::1
 341 | 6::77::1
 342 | 6::79::1
 343 | 6::81::1
 344 | 6::84::1
 345 | 6::85::3
 346 | 6::86::1
 347 | 6::87::1
 348 | 6::88::1
 349 | 6::89::1
 350 | 6::91::2
 351 | 6::94::1
 352 | 6::95::2
 353 | 6::96::1
 354 | 7::1::1
 355 | 7::2::2
 356 | 7::3::1
 357 | 7::4::1
 358 | 7::7::1
 359 | 7::10::1
 360 | 7::11::2
 361 | 7::14::2
 362 | 7::15::1
 363 | 7::16::1
 364 | 7::18::1
 365 | 7::21::1
 366 | 7::22::1
 367 | 7::23::1
 368 | 7::25::5
 369 | 7::26::1
 370 | 7::29::4
 371 | 7::30::1
 372 | 7::31::3
 373 | 7::32::1
 374 | 7::33::1
 375 | 7::35::1
 376 | 7::37::2
 377 | 7::39::3
 378 | 7::40::2
 379 | 7::42::2
 380 | 7::44::1
 381 | 7::45::2
 382 | 7::47::4
 383 | 7::48::1
 384 | 7::49::1
 385 | 7::53::1
 386 | 7::54::1
 387 | 7::55::1
 388 | 7::56::1
 389 | 7::59::1
 390 | 7::61::2
 391 | 7::62::3
 392 | 7::63::2
 393 | 7::66::1
 394 | 7::67::3
 395 | 7::74::1
 396 | 7::75::1
 397 | 7::76::3
 398 | 7::77::1
 399 | 7::81::1
 400 | 7::82::1
 401 | 7::84::2
 402 | 7::85::4
 403 | 7::86::1
 404 | 7::92::2
 405 | 7::96::1
 406 | 7::97::1
 407 | 7::98::1
 408 | 8::0::1
 409 | 8::2::4
 410 | 8::3::2
 411 | 8::4::2
 412 | 8::5::1
 413 | 8::7::1
 414 | 8::9::1
 415 | 8::11::1
 416 | 8::15::1
 417 | 8::18::1
 418 | 8::19::1
 419 | 8::21::1
 420 | 8::29::5
 421 | 8::31::3
 422 | 8::33::1
 423 | 8::35::1
 424 | 8::36::1
 425 | 8::40::2
 426 | 8::44::1
 427 | 8::45::1
 428 | 8::50::1
 429 | 8::51::1
 430 | 8::52::5
 431 | 8::53::5
 432 | 8::54::1
 433 | 8::55::1
 434 | 8::56::1
 435 | 8::58::4
 436 | 8::60::3
 437 | 8::62::4
 438 | 8::64::1
 439 | 8::67::3
 440 | 8::69::1
 441 | 8::71::1
 442 | 8::72::3
 443 | 8::77::3
 444 | 8::78::1
 445 | 8::79::1
 446 | 8::83::1
 447 | 8::85::5
 448 | 8::86::1
 449 | 8::88::1
 450 | 8::90::1
 451 | 8::92::2
 452 | 8::95::4
 453 | 8::96::3
 454 | 8::97::1
 455 | 8::98::1
 456 | 8::99::1
 457 | 9::2::3
 458 | 9::3::1
 459 | 9::4::1
 460 | 9::5::1
 461 | 9::6::1
 462 | 9::7::5
 463 | 9::9::1
 464 | 9::12::1
 465 | 9::14::3
 466 | 9::15::1
 467 | 9::19::1
 468 | 9::21::1
 469 | 9::22::1
 470 | 9::24::1
 471 | 9::25::1
 472 | 9::26::1
 473 | 9::30::3
 474 | 9::32::4
 475 | 9::35::2
 476 | 9::36::2
 477 | 9::37::2
 478 | 9::38::1
 479 | 9::39::1
 480 | 9::43::3
 481 | 9::49::5
 482 | 9::50::3
 483 | 9::53::1
 484 | 9::54::1
 485 | 9::58::1
 486 | 9::59::1
 487 | 9::60::1
 488 | 9::61::1
 489 | 9::63::3
 490 | 9::64::3
 491 | 9::68::1
 492 | 9::69::1
 493 | 9::70::3
 494 | 9::71::1
 495 | 9::73::2
 496 | 9::75::1
 497 | 9::77::2
 498 | 9::81::2
 499 | 9::82::1
 500 | 9::83::1
 501 | 9::84::1
 502 | 9::86::1
 503 | 9::87::4
 504 | 9::88::1
 505 | 9::90::3
 506 | 9::94::2
 507 | 9::95::3
 508 | 9::97::2
 509 | 9::98::1
 510 | 10::0::3
 511 | 10::2::4
 512 | 10::4::3
 513 | 10::7::1
 514 | 10::8::1
 515 | 10::10::1
 516 | 10::13::2
 517 | 10::14::1
 518 | 10::16::2
 519 | 10::17::1
 520 | 10::18::1
 521 | 10::21::1
 522 | 10::22::1
 523 | 10::24::1
 524 | 10::25::3
 525 | 10::28::1
 526 | 10::35::1
 527 | 10::36::1
 528 | 10::37::1
 529 | 10::38::1
 530 | 10::39::1
 531 | 10::40::4
 532 | 10::41::2
 533 | 10::42::3
 534 | 10::43::1
 535 | 10::49::3
 536 | 10::50::1
 537 | 10::51::1
 538 | 10::52::1
 539 | 10::55::2
 540 | 10::56::1
 541 | 10::58::1
 542 | 10::63::1
 543 | 10::66::1
 544 | 10::67::2
 545 | 10::68::1
 546 | 10::75::1
 547 | 10::77::1
 548 | 10::79::1
 549 | 10::86::1
 550 | 10::89::3
 551 | 10::90::1
 552 | 10::97::1
 553 | 10::98::1
 554 | 11::0::1
 555 | 11::6::2
 556 | 11::9::1
 557 | 11::10::1
 558 | 11::11::1
 559 | 11::12::1
 560 | 11::13::4
 561 | 11::16::1
 562 | 11::18::5
 563 | 11::19::4
 564 | 11::20::1
 565 | 11::21::1
 566 | 11::22::1
 567 | 11::23::5
 568 | 11::25::1
 569 | 11::27::5
 570 | 11::30::5
 571 | 11::32::5
 572 | 11::35::3
 573 | 11::36::2
 574 | 11::37::2
 575 | 11::38::4
 576 | 11::39::1
 577 | 11::40::1
 578 | 11::41::1
 579 | 11::43::2
 580 | 11::45::1
 581 | 11::47::1
 582 | 11::48::5
 583 | 11::50::4
 584 | 11::51::3
 585 | 11::59::1
 586 | 11::61::1
 587 | 11::62::1
 588 | 11::64::1
 589 | 11::66::4
 590 | 11::67::1
 591 | 11::69::5
 592 | 11::70::1
 593 | 11::71::3
 594 | 11::72::3
 595 | 11::75::3
 596 | 11::76::1
 597 | 11::77::1
 598 | 11::78::1
 599 | 11::79::5
 600 | 11::80::3
 601 | 11::81::4
 602 | 11::82::1
 603 | 11::86::1
 604 | 11::88::1
 605 | 11::89::1
 606 | 11::90::4
 607 | 11::94::2
 608 | 11::97::3
 609 | 11::99::1
 610 | 12::2::1
 611 | 12::4::1
 612 | 12::6::1
 613 | 12::7::3
 614 | 12::8::1
 615 | 12::14::1
 616 | 12::15::2
 617 | 12::16::4
 618 | 12::17::5
 619 | 12::18::2
 620 | 12::21::1
 621 | 12::22::2
 622 | 12::23::3
 623 | 12::24::1
 624 | 12::25::1
 625 | 12::27::5
 626 | 12::30::2
 627 | 12::31::4
 628 | 12::35::5
 629 | 12::38::1
 630 | 12::41::1
 631 | 12::44::2
 632 | 12::45::1
 633 | 12::50::4
 634 | 12::51::1
 635 | 12::52::1
 636 | 12::53::1
 637 | 12::54::1
 638 | 12::56::2
 639 | 12::57::1
 640 | 12::60::1
 641 | 12::63::1
 642 | 12::64::5
 643 | 12::66::3
 644 | 12::67::1
 645 | 12::70::1
 646 | 12::72::1
 647 | 12::74::1
 648 | 12::75::1
 649 | 12::77::1
 650 | 12::78::1
 651 | 12::79::3
 652 | 12::82::2
 653 | 12::83::1
 654 | 12::84::1
 655 | 12::85::1
 656 | 12::86::1
 657 | 12::87::1
 658 | 12::88::1
 659 | 12::91::3
 660 | 12::92::1
 661 | 12::94::4
 662 | 12::95::2
 663 | 12::96::1
 664 | 12::98::2
 665 | 13::0::1
 666 | 13::3::1
 667 | 13::4::2
 668 | 13::5::1
 669 | 13::6::1
 670 | 13::12::1
 671 | 13::14::2
 672 | 13::15::1
 673 | 13::17::1
 674 | 13::18::3
 675 | 13::20::1
 676 | 13::21::1
 677 | 13::22::1
 678 | 13::26::1
 679 | 13::27::1
 680 | 13::29::3
 681 | 13::31::1
 682 | 13::33::1
 683 | 13::40::2
 684 | 13::43::2
 685 | 13::44::1
 686 | 13::45::1
 687 | 13::49::1
 688 | 13::51::1
 689 | 13::52::2
 690 | 13::53::3
 691 | 13::54::1
 692 | 13::62::1
 693 | 13::63::2
 694 | 13::64::1
 695 | 13::68::1
 696 | 13::71::1
 697 | 13::72::3
 698 | 13::73::1
 699 | 13::74::3
 700 | 13::77::2
 701 | 13::78::1
 702 | 13::79::2
 703 | 13::83::3
 704 | 13::85::1
 705 | 13::86::1
 706 | 13::87::2
 707 | 13::88::2
 708 | 13::90::1
 709 | 13::93::4
 710 | 13::94::1
 711 | 13::98::1
 712 | 13::99::1
 713 | 14::1::1
 714 | 14::3::3
 715 | 14::4::1
 716 | 14::5::1
 717 | 14::6::1
 718 | 14::7::1
 719 | 14::9::1
 720 | 14::10::1
 721 | 14::11::1
 722 | 14::12::1
 723 | 14::13::1
 724 | 14::14::3
 725 | 14::15::1
 726 | 14::16::1
 727 | 14::17::1
 728 | 14::20::1
 729 | 14::21::1
 730 | 14::24::1
 731 | 14::25::2
 732 | 14::27::1
 733 | 14::28::1
 734 | 14::29::5
 735 | 14::31::3
 736 | 14::34::1
 737 | 14::36::1
 738 | 14::37::2
 739 | 14::39::2
 740 | 14::40::1
 741 | 14::44::1
 742 | 14::45::1
 743 | 14::47::3
 744 | 14::48::1
 745 | 14::49::1
 746 | 14::51::1
 747 | 14::52::5
 748 | 14::53::3
 749 | 14::54::1
 750 | 14::55::1
 751 | 14::56::1
 752 | 14::62::4
 753 | 14::63::5
 754 | 14::67::3
 755 | 14::68::1
 756 | 14::69::3
 757 | 14::71::1
 758 | 14::72::4
 759 | 14::73::1
 760 | 14::76::5
 761 | 14::79::1
 762 | 14::82::1
 763 | 14::83::1
 764 | 14::88::1
 765 | 14::93::3
 766 | 14::94::1
 767 | 14::95::2
 768 | 14::96::4
 769 | 14::98::1
 770 | 15::0::1
 771 | 15::1::4
 772 | 15::2::1
 773 | 15::5::2
 774 | 15::6::1
 775 | 15::7::1
 776 | 15::13::1
 777 | 15::14::1
 778 | 15::15::1
 779 | 15::17::2
 780 | 15::19::2
 781 | 15::22::2
 782 | 15::23::2
 783 | 15::25::1
 784 | 15::26::3
 785 | 15::27::1
 786 | 15::28::2
 787 | 15::29::1
 788 | 15::32::1
 789 | 15::33::2
 790 | 15::34::1
 791 | 15::35::2
 792 | 15::36::1
 793 | 15::37::1
 794 | 15::39::1
 795 | 15::42::1
 796 | 15::46::5
 797 | 15::48::2
 798 | 15::50::2
 799 | 15::51::1
 800 | 15::52::1
 801 | 15::58::1
 802 | 15::62::1
 803 | 15::64::3
 804 | 15::65::2
 805 | 15::72::1
 806 | 15::73::1
 807 | 15::74::1
 808 | 15::79::1
 809 | 15::80::1
 810 | 15::81::1
 811 | 15::82::2
 812 | 15::85::1
 813 | 15::87::1
 814 | 15::91::2
 815 | 15::96::1
 816 | 15::97::1
 817 | 15::98::3
 818 | 16::2::1
 819 | 16::5::3
 820 | 16::6::2
 821 | 16::7::1
 822 | 16::9::1
 823 | 16::12::1
 824 | 16::14::1
 825 | 16::15::1
 826 | 16::19::1
 827 | 16::21::2
 828 | 16::29::4
 829 | 16::30::2
 830 | 16::32::1
 831 | 16::34::1
 832 | 16::36::1
 833 | 16::38::1
 834 | 16::46::1
 835 | 16::47::3
 836 | 16::48::1
 837 | 16::49::1
 838 | 16::50::1
 839 | 16::51::5
 840 | 16::54::5
 841 | 16::55::1
 842 | 16::56::2
 843 | 16::57::1
 844 | 16::60::1
 845 | 16::63::2
 846 | 16::65::1
 847 | 16::67::1
 848 | 16::72::1
 849 | 16::74::1
 850 | 16::80::1
 851 | 16::81::1
 852 | 16::82::1
 853 | 16::85::5
 854 | 16::86::1
 855 | 16::90::5
 856 | 16::91::1
 857 | 16::93::1
 858 | 16::94::3
 859 | 16::95::2
 860 | 16::96::3
 861 | 16::98::3
 862 | 16::99::1
 863 | 17::2::1
 864 | 17::3::1
 865 | 17::6::1
 866 | 17::10::4
 867 | 17::11::1
 868 | 17::13::2
 869 | 17::17::5
 870 | 17::19::1
 871 | 17::20::5
 872 | 17::22::4
 873 | 17::28::1
 874 | 17::29::1
 875 | 17::33::1
 876 | 17::34::1
 877 | 17::35::2
 878 | 17::37::1
 879 | 17::38::1
 880 | 17::45::1
 881 | 17::46::5
 882 | 17::47::1
 883 | 17::49::3
 884 | 17::51::1
 885 | 17::55::5
 886 | 17::56::3
 887 | 17::57::1
 888 | 17::58::1
 889 | 17::59::1
 890 | 17::60::1
 891 | 17::63::1
 892 | 17::66::1
 893 | 17::68::4
 894 | 17::69::1
 895 | 17::70::1
 896 | 17::72::1
 897 | 17::73::3
 898 | 17::78::1
 899 | 17::79::1
 900 | 17::82::2
 901 | 17::84::1
 902 | 17::90::5
 903 | 17::91::3
 904 | 17::92::1
 905 | 17::93::1
 906 | 17::94::4
 907 | 17::95::2
 908 | 17::97::1
 909 | 18::1::1
 910 | 18::4::3
 911 | 18::5::2
 912 | 18::6::1
 913 | 18::7::1
 914 | 18::10::1
 915 | 18::11::4
 916 | 18::12::2
 917 | 18::13::1
 918 | 18::15::1
 919 | 18::18::1
 920 | 18::20::1
 921 | 18::21::2
 922 | 18::22::1
 923 | 18::23::2
 924 | 18::25::1
 925 | 18::26::1
 926 | 18::27::1
 927 | 18::28::5
 928 | 18::29::1
 929 | 18::31::1
 930 | 18::32::1
 931 | 18::36::1
 932 | 18::38::5
 933 | 18::39::5
 934 | 18::40::1
 935 | 18::42::1
 936 | 18::43::1
 937 | 18::44::4
 938 | 18::46::1
 939 | 18::47::1
 940 | 18::48::1
 941 | 18::51::2
 942 | 18::55::1
 943 | 18::56::1
 944 | 18::57::1
 945 | 18::62::1
 946 | 18::63::1
 947 | 18::66::3
 948 | 18::67::1
 949 | 18::70::1
 950 | 18::75::1
 951 | 18::76::3
 952 | 18::77::1
 953 | 18::80::3
 954 | 18::81::3
 955 | 18::82::1
 956 | 18::83::5
 957 | 18::84::1
 958 | 18::97::1
 959 | 18::98::1
 960 | 18::99::2
 961 | 19::0::1
 962 | 19::1::1
 963 | 19::2::1
 964 | 19::4::1
 965 | 19::6::2
 966 | 19::11::1
 967 | 19::12::1
 968 | 19::14::1
 969 | 19::23::1
 970 | 19::26::1
 971 | 19::31::1
 972 | 19::32::4
 973 | 19::33::1
 974 | 19::34::1
 975 | 19::37::1
 976 | 19::38::1
 977 | 19::41::1
 978 | 19::43::1
 979 | 19::45::1
 980 | 19::48::1
 981 | 19::49::1
 982 | 19::50::2
 983 | 19::53::2
 984 | 19::54::3
 985 | 19::55::1
 986 | 19::56::2
 987 | 19::58::1
 988 | 19::61::1
 989 | 19::62::1
 990 | 19::63::1
 991 | 19::64::1
 992 | 19::65::1
 993 | 19::69::2
 994 | 19::72::1
 995 | 19::74::3
 996 | 19::76::1
 997 | 19::78::1
 998 | 19::79::1
 999 | 19::81::1
1000 | 19::82::1
1001 | 19::84::1
1002 | 19::86::1
1003 | 19::87::2
1004 | 19::90::4
1005 | 19::93::1
1006 | 19::94::4
1007 | 19::95::2
1008 | 19::96::1
1009 | 19::98::4
1010 | 20::0::1
1011 | 20::1::1
1012 | 20::2::2
1013 | 20::4::2
1014 | 20::6::1
1015 | 20::8::1
1016 | 20::12::1
1017 | 20::21::2
1018 | 20::22::5
1019 | 20::24::2
1020 | 20::25::1
1021 | 20::26::1
1022 | 20::29::2
1023 | 20::30::2
1024 | 20::32::2
1025 | 20::39::1
1026 | 20::40::1
1027 | 20::41::2
1028 | 20::45::2
1029 | 20::48::1
1030 | 20::50::1
1031 | 20::51::3
1032 | 20::53::3
1033 | 20::55::1
1034 | 20::57::2
1035 | 20::60::1
1036 | 20::61::1
1037 | 20::64::1
1038 | 20::66::1
1039 | 20::70::2
1040 | 20::72::1
1041 | 20::73::2
1042 | 20::75::4
1043 | 20::76::1
1044 | 20::77::4
1045 | 20::78::1
1046 | 20::79::1
1047 | 20::84::2
1048 | 20::85::2
1049 | 20::88::3
1050 | 20::89::1
1051 | 20::90::3
1052 | 20::91::1
1053 | 20::92::2
1054 | 20::93::1
1055 | 20::94::4
1056 | 20::97::1
1057 | 21::0::1
1058 | 21::2::4
1059 | 21::3::1
1060 | 21::7::2
1061 | 21::11::1
1062 | 21::12::1
1063 | 21::13::1
1064 | 21::14::3
1065 | 21::17::1
1066 | 21::19::1
1067 | 21::20::1
1068 | 21::21::1
1069 | 21::22::1
1070 | 21::23::1
1071 | 21::24::1
1072 | 21::27::1
1073 | 21::29::5
1074 | 21::30::2
1075 | 21::38::1
1076 | 21::40::2
1077 | 21::43::3
1078 | 21::44::1
1079 | 21::45::1
1080 | 21::46::1
1081 | 21::48::1
1082 | 21::51::1
1083 | 21::53::5
1084 | 21::54::1
1085 | 21::55::1
1086 | 21::56::1
1087 | 21::58::3
1088 | 21::59::3
1089 | 21::64::1
1090 | 21::66::1
1091 | 21::68::1
1092 | 21::71::1
1093 | 21::73::1
1094 | 21::74::4
1095 | 21::80::1
1096 | 21::81::1
1097 | 21::83::1
1098 | 21::84::1
1099 | 21::85::3
1100 | 21::87::4
1101 | 21::89::2
1102 | 21::92::2
1103 | 21::96::3
1104 | 21::99::1
1105 | 22::0::1
1106 | 22::3::2
1107 | 22::5::2
1108 | 22::6::2
1109 | 22::9::1
1110 | 22::10::1
1111 | 22::11::1
1112 | 22::13::1
1113 | 22::14::1
1114 | 22::16::1
1115 | 22::18::3
1116 | 22::19::1
1117 | 22::22::5
1118 | 22::25::1
1119 | 22::26::1
1120 | 22::29::3
1121 | 22::30::5
1122 | 22::32::4
1123 | 22::33::1
1124 | 22::35::1
1125 | 22::36::3
1126 | 22::37::1
1127 | 22::40::1
1128 | 22::41::3
1129 | 22::44::1
1130 | 22::45::2
1131 | 22::48::1
1132 | 22::51::5
1133 | 22::55::1
1134 | 22::56::2
1135 | 22::60::3
1136 | 22::61::1
1137 | 22::62::4
1138 | 22::63::1
1139 | 22::65::1
1140 | 22::66::1
1141 | 22::68::4
1142 | 22::69::4
1143 | 22::70::3
1144 | 22::71::1
1145 | 22::74::5
1146 | 22::75::5
1147 | 22::78::1
1148 | 22::80::3
1149 | 22::81::1
1150 | 22::82::1
1151 | 22::84::1
1152 | 22::86::1
1153 | 22::87::3
1154 | 22::88::5
1155 | 22::90::2
1156 | 22::92::3
1157 | 22::95::2
1158 | 22::96::2
1159 | 22::98::4
1160 | 22::99::1
1161 | 23::0::1
1162 | 23::2::1
1163 | 23::4::1
1164 | 23::6::2
1165 | 23::10::4
1166 | 23::12::1
1167 | 23::13::4
1168 | 23::14::1
1169 | 23::15::1
1170 | 23::18::4
1171 | 23::22::2
1172 | 23::23::4
1173 | 23::24::1
1174 | 23::25::1
1175 | 23::26::1
1176 | 23::27::5
1177 | 23::28::1
1178 | 23::29::1
1179 | 23::30::4
1180 | 23::32::5
1181 | 23::33::2
1182 | 23::36::3
1183 | 23::37::1
1184 | 23::38::1
1185 | 23::39::1
1186 | 23::43::1
1187 | 23::48::5
1188 | 23::49::5
1189 | 23::50::4
1190 | 23::53::1
1191 | 23::55::5
1192 | 23::57::1
1193 | 23::59::1
1194 | 23::60::1
1195 | 23::61::1
1196 | 23::64::4
1197 | 23::65::5
1198 | 23::66::2
1199 | 23::67::1
1200 | 23::68::3
1201 | 23::69::1
1202 | 23::72::1
1203 | 23::73::3
1204 | 23::77::1
1205 | 23::82::2
1206 | 23::83::1
1207 | 23::84::1
1208 | 23::85::1
1209 | 23::87::3
1210 | 23::88::1
1211 | 23::95::2
1212 | 23::97::1
1213 | 24::4::1
1214 | 24::6::3
1215 | 24::7::1
1216 | 24::10::2
1217 | 24::12::1
1218 | 24::15::1
1219 | 24::19::1
1220 | 24::24::1
1221 | 24::27::3
1222 | 24::30::5
1223 | 24::31::1
1224 | 24::32::3
1225 | 24::33::1
1226 | 24::37::1
1227 | 24::39::1
1228 | 24::40::1
1229 | 24::42::1
1230 | 24::43::3
1231 | 24::45::2
1232 | 24::46::1
1233 | 24::47::1
1234 | 24::48::1
1235 | 24::49::1
1236 | 24::50::1
1237 | 24::52::5
1238 | 24::57::1
1239 | 24::59::4
1240 | 24::63::4
1241 | 24::65::1
1242 | 24::66::1
1243 | 24::67::1
1244 | 24::68::3
1245 | 24::69::5
1246 | 24::71::1
1247 | 24::72::4
1248 | 24::77::4
1249 | 24::78::1
1250 | 24::80::1
1251 | 24::82::1
1252 | 24::84::1
1253 | 24::86::1
1254 | 24::87::1
1255 | 24::88::2
1256 | 24::89::1
1257 | 24::90::5
1258 | 24::91::1
1259 | 24::92::1
1260 | 24::94::2
1261 | 24::95::1
1262 | 24::96::5
1263 | 24::98::1
1264 | 24::99::1
1265 | 25::1::3
1266 | 25::2::1
1267 | 25::7::1
1268 | 25::9::1
1269 | 25::12::3
1270 | 25::16::3
1271 | 25::17::1
1272 | 25::18::1
1273 | 25::20::1
1274 | 25::22::1
1275 | 25::23::1
1276 | 25::26::2
1277 | 25::29::1
1278 | 25::30::1
1279 | 25::31::2
1280 | 25::33::4
1281 | 25::34::3
1282 | 25::35::2
1283 | 25::36::1
1284 | 25::37::1
1285 | 25::40::1
1286 | 25::41::1
1287 | 25::43::1
1288 | 25::47::4
1289 | 25::50::1
1290 | 25::51::1
1291 | 25::53::1
1292 | 25::56::1
1293 | 25::58::2
1294 | 25::64::2
1295 | 25::67::2
1296 | 25::68::1
1297 | 25::70::1
1298 | 25::71::4
1299 | 25::73::1
1300 | 25::74::1
1301 | 25::76::1
1302 | 25::79::1
1303 | 25::82::1
1304 | 25::84::2
1305 | 25::85::1
1306 | 25::91::3
1307 | 25::92::1
1308 | 25::94::1
1309 | 25::95::1
1310 | 25::97::2
1311 | 26::0::1
1312 | 26::1::1
1313 | 26::2::1
1314 | 26::3::1
1315 | 26::4::4
1316 | 26::5::2
1317 | 26::6::3
1318 | 26::7::5
1319 | 26::13::3
1320 | 26::14::1
1321 | 26::16::1
1322 | 26::18::3
1323 | 26::20::1
1324 | 26::21::3
1325 | 26::22::5
1326 | 26::23::5
1327 | 26::24::5
1328 | 26::27::1
1329 | 26::31::1
1330 | 26::35::1
1331 | 26::36::4
1332 | 26::40::1
1333 | 26::44::1
1334 | 26::45::2
1335 | 26::47::1
1336 | 26::48::1
1337 | 26::49::3
1338 | 26::50::2
1339 | 26::52::1
1340 | 26::54::4
1341 | 26::55::1
1342 | 26::57::3
1343 | 26::58::1
1344 | 26::61::1
1345 | 26::62::2
1346 | 26::66::1
1347 | 26::68::4
1348 | 26::71::1
1349 | 26::73::4
1350 | 26::76::1
1351 | 26::81::3
1352 | 26::85::1
1353 | 26::86::3
1354 | 26::88::5
1355 | 26::91::1
1356 | 26::94::5
1357 | 26::95::1
1358 | 26::96::1
1359 | 26::97::1
1360 | 27::0::1
1361 | 27::9::1
1362 | 27::10::1
1363 | 27::18::4
1364 | 27::19::3
1365 | 27::20::1
1366 | 27::22::2
1367 | 27::24::2
1368 | 27::25::1
1369 | 27::27::3
1370 | 27::28::1
1371 | 27::29::1
1372 | 27::31::1
1373 | 27::33::3
1374 | 27::40::1
1375 | 27::42::1
1376 | 27::43::1
1377 | 27::44::3
1378 | 27::45::1
1379 | 27::51::3
1380 | 27::52::1
1381 | 27::55::3
1382 | 27::57::1
1383 | 27::59::1
1384 | 27::60::1
1385 | 27::61::1
1386 | 27::64::1
1387 | 27::66::3
1388 | 27::68::1
1389 | 27::70::1
1390 | 27::71::2
1391 | 27::72::1
1392 | 27::75::3
1393 | 27::78::1
1394 | 27::80::3
1395 | 27::82::1
1396 | 27::83::3
1397 | 27::86::1
1398 | 27::87::2
1399 | 27::90::1
1400 | 27::91::1
1401 | 27::92::1
1402 | 27::93::1
1403 | 27::94::2
1404 | 27::95::1
1405 | 27::98::1
1406 | 28::0::3
1407 | 28::1::1
1408 | 28::2::4
1409 | 28::3::1
1410 | 28::6::1
1411 | 28::7::1
1412 | 28::12::5
1413 | 28::13::2
1414 | 28::14::1
1415 | 28::15::1
1416 | 28::17::1
1417 | 28::19::3
1418 | 28::20::1
1419 | 28::23::3
1420 | 28::24::3
1421 | 28::27::1
1422 | 28::29::1
1423 | 28::33::1
1424 | 28::34::1
1425 | 28::36::1
1426 | 28::38::2
1427 | 28::39::2
1428 | 28::44::1
1429 | 28::45::1
1430 | 28::49::4
1431 | 28::50::1
1432 | 28::52::1
1433 | 28::54::1
1434 | 28::56::1
1435 | 28::57::3
1436 | 28::58::1
1437 | 28::59::1
1438 | 28::60::1
1439 | 28::62::3
1440 | 28::63::1
1441 | 28::65::1
1442 | 28::75::1
1443 | 28::78::1
1444 | 28::81::5
1445 | 28::82::4
1446 | 28::83::1
1447 | 28::85::1
1448 | 28::88::2
1449 | 28::89::4
1450 | 28::90::1
1451 | 28::92::5
1452 | 28::94::1
1453 | 28::95::2
1454 | 28::98::1
1455 | 28::99::1
1456 | 29::3::1
1457 | 29::4::1
1458 | 29::5::1
1459 | 29::7::2
1460 | 29::9::1
1461 | 29::10::3
1462 | 29::11::1
1463 | 29::13::3
1464 | 29::14::1
1465 | 29::15::1
1466 | 29::17::3
1467 | 29::19::3
1468 | 29::22::3
1469 | 29::23::4
1470 | 29::25::1
1471 | 29::29::1
1472 | 29::31::1
1473 | 29::32::4
1474 | 29::33::2
1475 | 29::36::2
1476 | 29::38::3
1477 | 29::39::1
1478 | 29::42::1
1479 | 29::46::5
1480 | 29::49::3
1481 | 29::51::2
1482 | 29::59::1
1483 | 29::61::1
1484 | 29::62::1
1485 | 29::67::1
1486 | 29::68::3
1487 | 29::69::1
1488 | 29::70::1
1489 | 29::74::1
1490 | 29::75::1
1491 | 29::79::2
1492 | 29::80::1
1493 | 29::81::2
1494 | 29::83::1
1495 | 29::85::1
1496 | 29::86::1
1497 | 29::90::4
1498 | 29::93::1
1499 | 29::94::4
1500 | 29::97::1
1501 | 29::99::1
1502 | 


--------------------------------------------------------------------------------
/docs/universal_recommender.md:
--------------------------------------------------------------------------------
 1 | # Universal Recommender
 2 | 
 3 | ##Quick Start
 4 | 
 5 |  1. [Install the PredictionIO framework](https://docs.prediction.io/install/) **be sure to choose HBase and Elasticsearch** for storage. This template requires Elasticsearch.
 6 |  2. Make sure the PIO console and services are running, check with `pio status`
 7 |  3. [Install this template](https://docs.prediction.io/start/download/) **be sure to specify this template** with `pio template get PredictionIO/template-scala-parallel-universal-recommendation`
 8 |  
 9 | **To import and experiment with the simple example data**
10 | 
11 | 1. Create a new app name, change `appName` in `engine.json`
12 | 2. Run `pio app new **your-new-app-name**`
13 | 4. Import sample events by running `python data/import_handmade.py --access_key **your-access-key**` where the key can be retrieved with `pio app list`
14 | 3. The engine.json file in the root directory of your new UR template is set up for the data you just imported (make sure to create a new one for your data) Edit this file and change the `appName` parameter to match what you called the app in step #2
15 | 5. Perform `pio build`, `pio train`, and `pio deploy`
16 | 6. To execute some sample queries run `./examples/query-handmade.sh`
17 | 
18 | If there are timeouts, enable the delays that are commented out in the script&mdash;for now. In the production environment the engines will "warm up" with caching and will execute queries much faster. Also all services can be configured or scaled to meet virtually any performance needs.
19 | 
20 | **See the [Github README.md](https://github.com/PredictionIO/template-scala-parallel-universal-recommendation) for further usage instructions**
21 | 
22 | ##What is a Universal Recommender
23 | 
24 | The Universal Recommender (UR) will accept a range of data, auto correlate it, and allow for very flexible queries. The UR is different from most recommenders in these ways:
25 | 
26 | * It takes a single very strong "primary" event type&mdash;one that clearly reflects a user's preference&mdash;and correlates any number of other event types to the primary event. This has the effect of using virtually any user action to recommend the primary action. Much of a user’s clickstream can be used to make recommendations. If a user has no history of the primary action (purchase for instance) but does have history of views, personalized recommendations for purchases can still be made. With user purchase history the recommendations become better. ALS-type recommenders have been used with event weights but except for ratings these often do not result in better performance.
27 | * It can boost and filter based on events or item metadata/properties. This means it can give personalized recs that are biased toward “SciFi” and filtered to only include “Promoted” items when the business rules call for this.
28 | * It can use a user's context to make recommendations even when the user is new. If usage data has been gathered for other users for referring URL, device type, or location, for instance, there may be a correlation between this data and items preferred. The UR can detect this **if** it exists and recommend based on this context, even to new users. We call this "micro-segmented" recommendations since they are not personal but group users based on limited contextual information. These will not be as good as when more is know about the user but may be better than simply returning popular items.
29 | * It includes a fallback to some form of item popularity when there is no other information known about the user (not implemented in v0.1.0).
30 | * All of the above can be mixed into a single query for blended results and so the query can be tuned to a great many applications. Also since only one query is made and boosting is supported, a query can be constructed with several fallbacks. Usage data is most important so boost that high, micro-segemnting data may be better than popularity so boost that lower, and popularity fills in if no other recommendations are available.
31 | 
32 | Other features:
33 | 
34 |  * Makes recommendations based on realtime user history. Even anonymous users will get recommendations if they have recorded preference history and a user-id. There is no hard requirement to retrain the model to make this happen. 
35 |  
36 | TBD:
37 | 
38 |  * Date range filters based on Date properties of items
39 |  * Populatiy type recommendations backfill for returning "trending", or "hot" items when no other recommendations are available from the training data. 
40 |  * Content-based correlators for content-based recommendations
41 | 
42 | ## References
43 | 
44 |  * Other documentation of the algorithm is [here](http://mahout.apache.org/users/algorithms/intro-cooccurrence-spark.html)
45 |  * A free ebook, which talks about the general idea: [Practical Machine Learning](https://www.mapr.com/practical-machine-learning).
46 |  * A slide deck, which talks about mixing actions and other indicator types, including content-based ones: [Creating a Unified Recommender](http://www.slideshare.net/pferrel/unified-recommender-39986309?ref=http://occamsmachete.com/ml/)
47 |  * Two blog posts: What's New in Recommenders: part [#1](http://occamsmachete.com/ml/2014/08/11/mahout-on-spark-whats-new-in-recommenders/) [#2](http://occamsmachete.com/ml/2014/09/09/mahout-on-spark-whats-new-in-recommenders-part-2/)
48 |  * A post describing the log-likelihood ratio: [Surprise and Coincidence](http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html) LLR is used to reduce noise in the data while keeping the calculations O(n) complexity.
49 | 


--------------------------------------------------------------------------------
/engine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "comment":" This config file uses default settings for all but the required values see README.md for docs",
 3 |   "id": "default",
 4 |   "description": "Default settings",
 5 |   "engineFactory": "org.template.RecommendationEngine",
 6 |   "datasource": {
 7 |     "params" : {
 8 |       "name": "sample-handmade-data.txt",
 9 |       "appName": "handmade",
10 |       "eventNames": ["purchase", "view"],
11 |     }
12 |   },
13 |   "sparkConf": {
14 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
15 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
16 |     "spark.kryo.referenceTracking": "false",
17 |     "spark.kryoserializer.buffer": "300m",
18 |     "es.index.auto.create": "true"
19 |   },
20 |   "algorithms": [
21 |     {
22 |       "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames",
23 |       "name": "ur",
24 |       "params": {
25 |         "appName": "handmade",
26 |         "indexName": "urindex",
27 |         "typeName": "items",
28 |         "comment": "must have data for the first event or the model will not build, other events are optional",
29 |         "indicators": [
30 |           {
31 |             "name": "purchase"
32 |           },{
33 |             "name": "view",
34 |             "maxCorrelatorsPerItem": 50
35 |           }
36 |         ]
37 |       }
38 |     }
39 |   ]
40 | }
41 | 


--------------------------------------------------------------------------------
/engine.json.minimum:
--------------------------------------------------------------------------------
 1 | {
 2 |   "comment":" This config file uses default settings for all but the required values see README.md for docs",
 3 |   "id": "default",
 4 |   "description": "Default settings",
 5 |   "engineFactory": "org.template.RecommendationEngine",
 6 |   "datasource": {
 7 |     "params" : {
 8 |       "name": "sample-handmade-data.txt",
 9 |       "appName": "handmade",
10 |       "eventNames": ["purchase", "view"]
11 |     }
12 |   },
13 |   "sparkConf": {
14 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
15 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
16 |     "spark.kryo.referenceTracking": "false",
17 |     "spark.kryoserializer.buffer.mb": "300",
18 |     "spark.kryoserializer.buffer": "300m",
19 |     "es.index.auto.create": "true"
20 |   },
21 |   "algorithms": [
22 |     {
23 |       "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames",
24 |       "name": "ur",
25 |       "params": {
26 |         "appName": "handmade",
27 |         "indexName": "urindex",
28 |         "typeName": "items",
29 |         "comment": "must have data for the first event or the model will not build, other events are optional",
30 |         "eventNames": ["purchase", "view"]
31 |       }
32 |     }
33 |   ]
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/engine.json.spark-tuning:
--------------------------------------------------------------------------------
 1 | {
 2 |   "comment":" This config file uses default settings for all but the required values see README.md for docs",
 3 |   "id": "default",
 4 |   "description": "Default settings",
 5 |   "engineFactory": "org.template.RecommendationEngine",
 6 |   "datasource": {
 7 |     "params" : {
 8 |       "name": "sample-handmade-data.txt",
 9 |       "appName": "handmade",
10 |       "eventNames": ["purchase", "view"]
11 |     }
12 |   },
13 |   "sparkConf": {
14 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
15 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
16 |     "spark.kryo.referenceTracking": "false",
17 |     "spark.kryoserializer.buffer": "300m",
18 |     "spark.executor.memory": "4g",
19 |     "spark.executor.cores": "2",
20 |     "spark.task.cpus": "2",
21 |     "spark.default.parallelism": "16",
22 |     "es.index.auto.create": "true"
23 |   },
24 |   "algorithms": [
25 |     {
26 |       "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames",
27 |       "name": "ur",
28 |       "params": {
29 |         "appName": "handmade",
30 |         "indexName": "urindex",
31 |         "typeName": "items",
32 |         "comment": "must have data for the first event or the model will not build, other events are optional",
33 |         "eventNames": ["purchase", "view"]
34 |       }
35 |     }
36 |   ]
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/event-names-test-engine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "comment":" This config file uses default settings for all but the required values see README.md for docs",
 3 |   "id": "default",
 4 |   "description": "Default settings",
 5 |   "engineFactory": "org.template.RecommendationEngine",
 6 |   "datasource": {
 7 |     "params" : {
 8 |       "name": "sample-handmade-data.txt",
 9 |       "appName": "handmade",
10 |       "eventNames": ["purchase", "view"]
11 |     }
12 |   },
13 |   "sparkConf": {
14 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
15 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
16 |     "spark.kryo.referenceTracking": "false",
17 |     "spark.kryoserializer.buffer.mb": "300",
18 |     "spark.kryoserializer.buffer": "300m",
19 |     "spark.executor.memory": "4g",
20 |     "es.index.auto.create": "true"
21 |   },
22 |   "algorithms": [
23 |     {
24 |       "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames",
25 |       "name": "ur",
26 |       "params": {
27 |         "appName": "handmade",
28 |         "indexName": "urindex",
29 |         "typeName": "items",
30 |         "comment": "must have data for the first event or the model will not build, other events are optional",
31 |         "eventNames": ["purchase", "view"],
32 |         "blacklistEvents": []
33 |       }
34 |     }
35 |   ]
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/examples/handmade-engine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "comment":" This config file uses default settings for all but the required values see README.md for docs",
 3 |   "id": "default",
 4 |   "description": "Default settings",
 5 |   "engineFactory": "org.template.RecommendationEngine",
 6 |   "datasource": {
 7 |     "params" : {
 8 |       "name": "sample-handmade-data.txt",
 9 |       "appName": "handmade",
10 |       "eventNames": ["purchase", "view"],
11 |       "eventWindow": {
12 |         "duration": "24 days",
13 |         "removeDuplicates":true,
14 |         "compressProperties":true
15 |       }
16 |     }
17 |   },
18 |   "sparkConf": {
19 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
20 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
21 |     "spark.kryo.referenceTracking": "false",
22 |     "spark.kryoserializer.buffer": "300m",
23 |     "es.index.auto.create": "true"
24 |   },
25 |   "algorithms": [
26 |     {
27 |       "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames",
28 |       "name": "ur",
29 |       "params": {
30 |         "appName": "handmade",
31 |         "indexName": "urindex",
32 |         "typeName": "items",
33 |         "comment": "must have data for the first event or the model will not build, other events are optional",
34 |         "indicators": [
35 |           {
36 |             "name": "purchase"
37 |           },{
38 |             "name": "view",
39 |             "maxCorrelatorsPerItem": 50
40 |           }
41 |         ],
42 |         "availableDateName": "available",
43 |         "expireDateName": "expires",
44 |         "dateName": "date",
45 |         "num": 4
46 |       }
47 |     }
48 |   ]
49 | }
50 | 


--------------------------------------------------------------------------------
/examples/hot-3-day-engine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "popularity-only",
 3 |   "description": "Default settings",
 4 |   "engineFactory": "org.template.RecommendationEngine",
 5 |   "datasource": {
 6 |     "params" : {
 7 |       "name": "sample-handmade-data.txt",
 8 |       "appName": "handmade",
 9 |       "eventNames": ["purchase"]
10 |     }
11 |   },
12 |   "sparkConf": {
13 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
14 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
15 |     "spark.kryo.referenceTracking": "false",
16 |     "spark.kryoserializer.buffer": "300m",
17 |     "spark.executor.memory": "4g",
18 |     "es.index.auto.create": "true"
19 |   },
20 |   "algorithms": [
21 |     {
22 |       "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill",
23 |       "name": "ur",
24 |       "params": {
25 |         "appName": "handmade",
26 |         "indexName": "urindex",
27 |         "typeName": "items",
28 |         "eventNames": ["purchase"],
29 |         "rankings": [{
30 |           "type": "hot",
31 |           "duration": 259200
32 |         }]
33 |       }
34 |     }
35 |   ]
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/examples/import_handmade.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Import sample data for recommendation engine
  3 | """
  4 | 
  5 | import predictionio
  6 | import argparse
  7 | import random
  8 | import datetime
  9 | import pytz
 10 | 
 11 | RATE_ACTIONS_DELIMITER = ","
 12 | PROPERTIES_DELIMITER = ":"
 13 | SEED = 1
 14 | 
 15 | 
 16 | def import_events(client, file):
 17 |   f = open(file, 'r')
 18 |   random.seed(SEED)
 19 |   count = 0
 20 |   # year, month, day[, hour[, minute[, second[
 21 |   #event_date = datetime.datetime(2015, 8, 13, 12, 24, 41)
 22 |   now_date = datetime.datetime.now(pytz.utc) # - datetime.timedelta(days=2.7)
 23 |   current_date = now_date
 24 |   event_time_increment = datetime.timedelta(days= -0.8)
 25 |   available_date_increment = datetime.timedelta(days= 0.8)
 26 |   event_date = now_date - datetime.timedelta(days= 2.4)
 27 |   available_date = event_date + datetime.timedelta(days=-2)
 28 |   expire_date = event_date + datetime.timedelta(days=2)
 29 |   print "Importing data..."
 30 | 
 31 |   for line in f:
 32 |     data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER)
 33 |     # For demonstration purpose action names are taken from input along with secondary actions on
 34 |     # For the UR add some item metadata
 35 | 
 36 |     if (data[1] == "purchase"):
 37 |       client.create_event(
 38 |         event=data[1],
 39 |         entity_type="user",
 40 |         entity_id=data[0],
 41 |         target_entity_type="item",
 42 |         target_entity_id=data[2],
 43 |         event_time = current_date
 44 |       )
 45 |       print "Event: " + data[1] + " entity_id: " + data[0] + " target_entity_id: " + data[2] + \
 46 |             " current_date: " + current_date.isoformat()
 47 |     elif (data[1] == "view"):  # assumes other event type is 'view'
 48 |       client.create_event(
 49 |         event=data[1],
 50 |         entity_type="user",
 51 |         entity_id=data[0],
 52 |         target_entity_type="item",  # type of item in this action
 53 |         target_entity_id=data[2],
 54 |         event_time = current_date
 55 |       )
 56 |       print "Event: " + data[1] + " entity_id: " + data[0] + " target_entity_id: " + data[2] + \
 57 |             " current_date: " + current_date.isoformat()
 58 |     elif (data[1] == "$set"):  # must be a set event
 59 |       properties = data[2].split(PROPERTIES_DELIMITER)
 60 |       prop_name = properties.pop(0)
 61 |       prop_value = properties if not prop_name == 'defaultRank' else float(properties[0])
 62 |       client.create_event(
 63 |         event=data[1],
 64 |         entity_type="item",
 65 |         entity_id=data[0],
 66 |         event_time=current_date,
 67 |         properties={prop_name: prop_value}
 68 |       )
 69 |       print "Event: " + data[1] + " entity_id: " + data[0] + " properties/"+prop_name+": " + str(properties) + \
 70 |           " current_date: " + current_date.isoformat()
 71 |     count += 1
 72 |     current_date += event_time_increment
 73 | 
 74 |   items = ['Iphone 6', 'Ipad-retina', 'Nexus', 'Surface', 'Iphone 4', 'Galaxy', 'Iphone 5']
 75 |   print "All items: " + str(items)
 76 |   for item in items:
 77 | 
 78 |     client.create_event(
 79 |       event="$set",
 80 |       entity_type="item",
 81 |       entity_id=item,
 82 |       properties={"expires": expire_date.isoformat(),
 83 |                   "available": available_date.isoformat(),
 84 |                   "date": event_date.isoformat()}
 85 |     )
 86 |     print "Event: $set entity_id: " + item + \
 87 |             " properties/availableDate: " + available_date.isoformat() + \
 88 |             " properties/date: " + event_date.isoformat() + \
 89 |             " properties/expireDate: " + expire_date.isoformat()
 90 |     expire_date += available_date_increment
 91 |     event_date += available_date_increment
 92 |     available_date += available_date_increment
 93 |     count += 1
 94 | 
 95 |   f.close()
 96 |   print "%s events are imported." % count
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |   parser = argparse.ArgumentParser(
101 |     description="Import sample data for recommendation engine")
102 |   parser.add_argument('--access_key', default='invald_access_key')
103 |   parser.add_argument('--url', default="http://localhost:7070")
104 |   parser.add_argument('--file', default="./data/sample-handmade-data.txt")
105 | 
106 |   args = parser.parse_args()
107 |   print args
108 | 
109 |   client = predictionio.EventClient(
110 |     access_key=args.access_key,
111 |     url=args.url,
112 |     threads=5,
113 |     qsize=500)
114 |   import_events(client, args.file)
115 | 


--------------------------------------------------------------------------------
/examples/import_handmade_pop_test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Import sample data for recommendation engine
 3 | """
 4 | 
 5 | import predictionio
 6 | import argparse
 7 | import random
 8 | import datetime
 9 | import pytz
10 | from tzlocal import get_localzone
11 | 
12 | RATE_ACTIONS_DELIMITER = ","
13 | PROPERTIES_DELIMITER = ":"
14 | SEED = 1
15 | local_tz = get_localzone()
16 | 
17 | def import_events(client, file, days_offset):
18 |   f = open(file, 'r')
19 |   random.seed(SEED)
20 |   count = 0
21 |   event_date = datetime.datetime.now(tz=local_tz) + datetime.timedelta(days=days_offset)
22 |   print "Importing data..."
23 | 
24 |   for line in f:
25 |     data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER)
26 |     # For demonstration purpose action names are taken from input along with secondary actions on
27 |     # For the UR add some item metadata
28 | 
29 |     if (data[1] == "purchase"):
30 |       client.create_event(
31 |         event=data[1],
32 |         entity_type="user",
33 |         entity_id=data[0],
34 |         target_entity_type="item",
35 |         target_entity_id=data[2],
36 |         event_time=event_date
37 |       )
38 |       print "Event: " + data[1] + " user: " + data[0] + " item: " + data[2] + " date: " + str(event_date)
39 |     elif (data[1] == "view"):  # assumes other event type is 'view'
40 |       client.create_event(
41 |         event=data[1],
42 |         entity_type="user",
43 |         entity_id=data[0],
44 |         target_entity_type="item",  # type of item in this action
45 |         target_entity_id=data[2],
46 |         event_time=event_date
47 |       )
48 |       print "Event: " + data[1] + " user: " + data[0] + " item: " + data[2] + " date: " + str(event_date)
49 |     count += 1
50 |   f.close()
51 |   print "%s events are imported." % count
52 | 
53 | 
54 | if __name__ == '__main__':
55 |   parser = argparse.ArgumentParser(
56 |     description="Import sample data for recommendation engine")
57 |   parser.add_argument('--access_key', default='invald_access_key')
58 |   parser.add_argument('--url', default="http://localhost:7070")
59 |   parser.add_argument('--file1', default="./data/sample-handmade-data1.txt")
60 |   parser.add_argument('--file2', default="./data/sample-handmade-data2.txt")
61 |   parser.add_argument('--file3', default="./data/sample-handmade-data3.txt")
62 | 
63 |   args = parser.parse_args()
64 |   print args
65 | 
66 |   client = predictionio.EventClient(
67 |     access_key=args.access_key,
68 |     url=args.url,
69 |     threads=5,
70 |     qsize=500)
71 |   # this is to spread events around two time periods, now->3 days ago, and 4 days ago to 6 days ago
72 |   # popular, trending, and hot over the a 3-day period ending on offset_days, which would be the most recent,
73 |   # the duration of the actual pop-model calc is in the engin.json so these dates work with some multiple of a
74 |   # day for that value (expressed in seconds). This allows us to test the pop-model as well as the "offsetDate"
75 |   # in the prams for training. The pop-model queries should have the same results for both timespans if the
76 |   # "offsetDate" is now, and now - 4days
77 |   import_events(client, args.file1, 0)# last 3 days
78 |   import_events(client, args.file2, -1)
79 |   import_events(client, args.file3, -2)# first batch ends 2 days in the past
80 |   import_events(client, args.file1, -4)# starting 4 days in the past, so skips 2 days for tests of the offset date
81 |   import_events(client, args.file2, -5)
82 |   import_events(client, args.file3, -6)
83 | 


--------------------------------------------------------------------------------
/examples/import_movielens_eventserver.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Import sample data for recommendation engine
 3 | """
 4 | 
 5 | import predictionio
 6 | import argparse
 7 | import random
 8 | 
 9 | RATE_ACTIONS_DELIMITER = "::"
10 | SEED = 3
11 | 
12 | def import_events(client, file):
13 |   f = open(file, 'r')
14 |   random.seed(SEED)
15 |   count = 0
16 |   print "Importing data..."
17 |   for line in f:
18 |     data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER)
19 |     # For demonstration purpose, randomly mix in some buy events
20 |     # For the UR add some item metadata
21 |     if (random.randint(0, 1) == 1):
22 |       client.create_event(
23 |         event="rate",
24 |         entity_type="user",
25 |         entity_id=data[0],
26 |         target_entity_type="item",
27 |         target_entity_id=data[1],
28 |       )
29 |     else:
30 |       client.create_event(
31 |         event="buy",
32 |         entity_type="user",
33 |         entity_id=data[0],
34 |         target_entity_type="item",
35 |         target_entity_id=data[1],
36 |       )
37 |     if (random.randint(0, 1) == 1):
38 |       client.create_event(
39 |         event="$set",
40 |         entity_type="item",
41 |         entity_id=data[1],
42 |         properties= { "category": ["cat1", "cat5"] }
43 |       )
44 |     else:
45 |       client.create_event(
46 |         event="$set",
47 |         entity_type="item",
48 |         entity_id=data[1],
49 |         properties= { "category": ["cat1", "cat2"] }
50 |       )
51 |     count += 1
52 |   f.close()
53 |   print "%s events are imported." % count
54 | 
55 | if __name__ == '__main__':
56 |   parser = argparse.ArgumentParser(
57 |     description="Import sample data for recommendation engine")
58 |   parser.add_argument('--access_key', default='invald_access_key')
59 |   parser.add_argument('--url', default="http://localhost:7070")
60 |   parser.add_argument('--file', default="./data/sample_movielens_data.txt")
61 | 
62 |   args = parser.parse_args()
63 |   print args
64 | 
65 |   client = predictionio.EventClient(
66 |     access_key=args.access_key,
67 |     url=args.url,
68 |     threads=5,
69 |     qsize=500)
70 |   import_events(client, args.file)
71 | 


--------------------------------------------------------------------------------
/examples/integration-test:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # exit on any error
 4 | set -e
 5 | 
 6 | echo ""
 7 | echo "Integration test for The Universal Recommender."
 8 | echo "If some step fails check that your engine.json file has been restored"
 9 | echo "or look for it in 'user-engine.json'"
10 | echo ""
11 | 
12 | echo "Checking for needed files"
13 | if [ ! -f examples/handmade-engine.json ]; then
14 |     echo "File not found: examples/handmade-engine.json"
15 |     exit 1
16 | fi
17 | 
18 | if [ ! -f data/sample-handmade-data.txt ]; then
19 |     echo "File not found: data/sample-handmade-data.txt"
20 |     exit 1
21 | fi
22 | 
23 | if [ -f user-engine.json ]; then
24 |     echo "File user-engine.json found, this may be an error so we cannot replace engine.json"
25 |     exit 1
26 | fi
27 | 
28 | if [ ! -f data/integration-test-expected.txt ]; then
29 |     echo "File not found: data/integration-test-expected.txt"
30 |     exit 1
31 | fi
32 | 
33 | echo ""
34 | echo "Checking status, should exit if pio is not running."
35 | pio status
36 | pio app new handmade || true
37 | 
38 | echo ""
39 | echo "Checking to see if handmade app exists, should exit if not."
40 | pio app show handmade
41 | 
42 | echo ""
43 | echo "Moving engine.json to user-engine.json"
44 | cp -n engine.json user-engine.json
45 | 
46 | echo ""
47 | echo "Moving examples/handmade-engine.json to engine.json for integration test."
48 | cp examples/handmade-engine.json engine.json
49 | 
50 | echo ""
51 | echo "Deleting handmade app data since the test is date dependent"
52 | pio app data-delete handmade -f
53 | 
54 | echo ""
55 | echo "Importing data for integration test"
56 | # get the access_key from pio app list
57 | ACCESS_KEY=`pio app show handmade | grep Key | cut -f 7 -d ' '`
58 | echo -n "Access key: "
59 | echo $ACCESS_KEY
60 | python examples/import_handmade.py --access_key $ACCESS_KEY
61 | 
62 | echo ""
63 | echo "Building and delpoying model"
64 | pio build
65 | pio train  -- --driver-memory 4g --executor-memory 4g
66 | echo "Model will remain deployed after this test"
67 | nohup pio deploy > deploy.out &
68 | echo "Waiting 30 seconds for the server to start"
69 | sleep 30
70 | 
71 | #echo ""
72 | #echo "Running test query."
73 | #./examples/multi-query-handmade.sh > test.out
74 | 
75 | #this is due bug where first query had bad results
76 | #TODO: Investigate and squash
77 | 
78 | ./examples/multi-query-handmade.sh > test.out
79 | 
80 | echo ""
81 | echo "Restoring engine.json"
82 | mv user-engine.json engine.json
83 | 
84 | echo ""
85 | echo "Differences between expected and actual results, none is a passing test."
86 | echo "Note: differences in ordering of results with the same score is allowed."
87 | diff data/integration-test-expected.txt test.out
88 | 
89 | deploy_pid=`jps -lm | grep "onsole deploy" | cut -f 1 -d ' '`
90 | echo "Killing the deployed test PredictionServer"
91 | kill "$deploy_pid"
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/examples/integration-test-pop-model:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # exit on any error
  4 | set -e
  5 | 
  6 | echo ""
  7 | echo "Integration test for The Universal Recommender."
  8 | echo "If some step fails check that your engine.json file has been restored"
  9 | echo "or look for it in 'user-engine.json'"
 10 | echo ""
 11 | 
 12 | echo "Checking for needed files"
 13 | if [ ! -f hot-3-day-engine.json ]; then
 14 |     echo "File not found: hot-3-day-engine.json"
 15 |     exit 1
 16 | fi
 17 | 
 18 | if [ ! -f trend-2-day-engine.json ]; then
 19 |     echo "File not found: trend-2-day-engine.json"
 20 |     exit 1
 21 | fi
 22 | 
 23 | if [ ! -f pop-1-day-engine.json ]; then
 24 |     echo "File not found: pop-1-day-engine.json"
 25 |     exit 1
 26 | fi
 27 | 
 28 | if [ ! -f examples/pop-test-query.sh ]; then
 29 |     echo "File not found: examples/pop-test-query.sh"
 30 |     exit 1
 31 | fi
 32 | 
 33 | if [ ! -f examples/import_handmade_pop_test.py ]; then
 34 |     echo "File not found: examples/import_handmade_pop_test.py"
 35 |     exit 1
 36 | fi
 37 | 
 38 | if [ -f user-engine.json ]; then
 39 |     echo "File user-engine.json found, this may be an error so we cannot replace engine.json"
 40 |     exit 1
 41 | fi
 42 | 
 43 | if [ ! -f data/integration-test-expected-pop-model.txt ]; then
 44 |     echo "File not found: data/integration-test-expected-pop-model.txt"
 45 | #    exit 1
 46 | fi
 47 | 
 48 | echo ""
 49 | echo "Checking status, should exit if pio is not running."
 50 | pio status
 51 | 
 52 | echo ""
 53 | echo "Checking to see if handmade app exists, should exit if not."
 54 | pio app show handmade
 55 | 
 56 | echo ""
 57 | echo "Deleting handmade app data since the test is date dependent"
 58 | pio app data-delete handmade
 59 | 
 60 | echo ""
 61 | echo "Importing data for integration test"
 62 | # get the access_key from pio app list
 63 | ACCESS_KEY=`pio app show handmade | grep Key | cut -f 7 -d ' '`
 64 | echo -n "Access key: "
 65 | echo $ACCESS_KEY
 66 | python examples/import_handmade_pop_test.py --access_key $ACCESS_KEY
 67 | 
 68 | echo ""
 69 | echo "Moving engine.json to user-engine.json"
 70 | cp -n engine.json user-engine.json
 71 | 
 72 | echo ""
 73 | echo "Popularity model integration test."
 74 | cp hot-3-day-engine.json engine.json
 75 | 
 76 | echo ""
 77 | echo "=============== Building and delpoying 'hot' model"
 78 | pio build
 79 | pio train  -- --driver-memory 2g
 80 | nohup pio deploy > deploy.out &
 81 | echo "Waiting 20 seconds for the server to start"
 82 | sleep 20
 83 | 
 84 | echo ""
 85 | echo "Running test query."
 86 | ./examples/pop-test-query.sh > test.out
 87 | 
 88 | echo ""
 89 | cp trend-2-day-engine.json engine.json
 90 | 
 91 | echo ""
 92 | echo "=============== Building and delpoying 'trending' model"
 93 | pio build
 94 | pio train  -- --driver-memory 2g
 95 | nohup pio deploy > deploy.out &
 96 | echo "Waiting 20 seconds for the server to start"
 97 | sleep 20
 98 | 
 99 | echo ""
100 | echo "Running test query."
101 | ./examples/pop-test-query.sh >> test.out
102 | 
103 | echo ""
104 | cp pop-1-day-engine.json engine.json
105 | 
106 | echo ""
107 | echo "=============== Building and delpoying 'popular' model"
108 | pio build
109 | pio train  -- --driver-memory 2g
110 | nohup pio deploy > deploy.out &
111 | echo "Waiting 20 seconds for the server to start"
112 | sleep 20
113 | 
114 | echo ""
115 | echo "Running test query."
116 | ./examples/pop-test-query.sh >> test.out
117 | 
118 | echo ""
119 | echo "Restoring engine.json"
120 | mv user-engine.json engine.json
121 | 
122 | echo ""
123 | echo "Differences between expected and actual results, none is a passing test:"
124 | diff data/integration-test-expected-pop-model.txt test.out
125 | 
126 | echo ""
127 | echo "Note that the engine is still deployed until killed."
128 | 


--------------------------------------------------------------------------------
/examples/multi-query-handmade.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | echo ""
  4 | echo "Queries to illustrate many use cases on a small standard dataset and for an automated integration test."
  5 | echo ""
  6 | echo "WARNING: for this to produce the correct result you must:"
  7 | echo "  1. Import data with"
  8 | echo "     $ python examples/import_handmade.py --access_key <your-app-accesskey>"
  9 | echo "  2. Copy handmade-engine.json to engine.json."
 10 | echo "  3. Run 'pio build', 'pio train', and 'pio deploy'"
 11 | echo "  4. The queries must be run the same day as the import was done because date filters are part of the test."
 12 | echo "NOTE: due to available and expire dates you should never see the Iphone 5 or Iphone 6."
 13 | 
 14 | echo ""
 15 | echo "============ simple user recs ============"
 16 | echo ""
 17 | echo "Recommendations for user: u1"
 18 | echo ""
 19 | curl -H "Content-Type: application/json" -d '
 20 | {
 21 |     "user": "u1"
 22 | }' http://localhost:8000/queries.json
 23 | echo ""
 24 | 
 25 | 
 26 | echo ""
 27 | echo "Recommendations for user: U 2"
 28 | echo ""
 29 | curl -H "Content-Type: application/json" -d '
 30 | {
 31 |     "user": "U 2"
 32 | }' http://localhost:8000/queries.json
 33 | echo ""
 34 | 
 35 | 
 36 | echo ""
 37 | echo "Recommendations for user: u-3"
 38 | echo ""
 39 | curl -H "Content-Type: application/json" -d '
 40 | {
 41 |     "user": "u-3"
 42 | }' http://localhost:8000/queries.json
 43 | echo ""
 44 | 
 45 | 
 46 | echo ""
 47 | echo "Recommendations for user: u-4"
 48 | echo ""
 49 | curl -H "Content-Type: application/json" -d '
 50 | {
 51 |     "user": "u-4"
 52 | }' http://localhost:8000/queries.json
 53 | echo ""
 54 | 
 55 | 
 56 | echo ""
 57 | echo "Recommendations for user: u5"
 58 | echo ""
 59 | curl -H "Content-Type: application/json" -d '
 60 | {
 61 |     "user": "u5"
 62 | }' http://localhost:8000/queries.json
 63 | echo ""
 64 | 
 65 | echo ""
 66 | echo "============ simple similar item recs ============"
 67 | echo ""
 68 | echo "Recommendations for item: Iphone 4"
 69 | echo ""
 70 | curl -H "Content-Type: application/json" -d '
 71 | {
 72 |     "item": "Iphone 4"
 73 | }' http://localhost:8000/queries.json
 74 | echo ""
 75 | 
 76 | echo ""
 77 | echo "Recommendations for item: Ipad-retina"
 78 | echo ""
 79 | curl -H "Content-Type: application/json" -d '
 80 | {
 81 |     "item": "Ipad-retina"
 82 | }' http://localhost:8000/queries.json
 83 | echo ""
 84 | 
 85 | echo ""
 86 | echo "Recommendations for item: Nexus"
 87 | echo ""
 88 | curl -H "Content-Type: application/json" -d '
 89 | {
 90 |     "item": "Nexus"
 91 | }' http://localhost:8000/queries.json
 92 | echo ""
 93 | 
 94 | echo ""
 95 | echo "Recommendations for item: Galaxy"
 96 | echo ""
 97 | curl -H "Content-Type: application/json" -d '
 98 | {
 99 |     "item": "Galaxy"
100 | }' http://localhost:8000/queries.json
101 | echo ""
102 | 
103 | echo ""
104 | echo "Recommendations for item: Surface"
105 | echo ""
106 | curl -H "Content-Type: application/json" -d '
107 | {
108 |     "item": "Surface"
109 | }' http://localhost:8000/queries.json
110 | echo ""
111 | 
112 | echo ""
113 | echo "============ popular item recs only ============"
114 | echo ""
115 | echo "query with no item or user id, ordered by popularity"
116 | echo ""
117 | curl -H "Content-Type: application/json" -d '
118 | {
119 | }' http://localhost:8000/queries.json
120 | echo ""
121 | 
122 | echo ""
123 | echo "Recommendations for non-existant user: xyz, all from popularity"
124 | echo ""
125 | curl -H "Content-Type: application/json" -d '
126 | {
127 |     "user": "xyz"
128 | }' http://localhost:8000/queries.json
129 | echo ""
130 | 
131 | echo ""
132 | echo "Recommendations for non-existant item: xyz, all from popularity"
133 | echo ""
134 | curl -H "Content-Type: application/json" -d '
135 | {
136 |     "item": "xyz"
137 | }' http://localhost:8000/queries.json
138 | echo ""
139 | 
140 | 
141 | echo ""
142 | echo "Recommendations for no user no item, all from popularity, Tablets filter"
143 | echo ""
144 | curl -H "Content-Type: application/json" -d '
145 | {
146 |     "fields": [{
147 |         "name": "categories",
148 |         "values": ["Tablets"],
149 |         "bias": -1
150 |     }]
151 | }' http://localhost:8000/queries.json
152 | echo ""
153 | 
154 | 
155 | echo ""
156 | echo "Recommendations for no user no item, all from popularity, Tablets boost"
157 | echo ""
158 | curl -H "Content-Type: application/json" -d '
159 | {
160 |     "fields": [{
161 |         "name": "categories",
162 |         "values": ["Tablets"],
163 |         "bias": 1.05
164 |     }]
165 | }' http://localhost:8000/queries.json
166 | echo ""
167 | 
168 | 
169 | echo ""
170 | echo "Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter"
171 | echo ""
172 | curl -H "Content-Type: application/json" -d '
173 | {
174 |     "fields": [{
175 |         "name": "categories",
176 |         "values": ["Tablets"],
177 |         "bias": 1.05
178 |     }, {
179 |         "name": "countries",
180 |         "values": ["Estados Unidos Mexicanos"],
181 |         "bias": -1
182 |     }]
183 | }' http://localhost:8000/queries.json
184 | echo ""
185 | 
186 | 
187 | echo ""
188 | echo "============ dateRange filter ============"
189 | echo ""
190 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
191 |   BEFORE=`date --date="tomorrow" --iso-8601=seconds`
192 |   AFTER=`date --date="1 day ago" --iso-8601=seconds`
193 | else
194 |   BEFORE=`date -v +1d +"%Y-%m-%dT%H:%M:%SZ"`
195 |   AFTER=`date -v -1d +"%Y-%m-%dT%H:%M:%SZ"`
196 | fi
197 | #echo "before: $BEFORE after: $AFTER"
198 | echo "Recommendations for user: u1"
199 | echo ""
200 | curl -H "Content-Type: application/json" -d "
201 | {
202 |     \"user\": \"u1\",
203 |     \"dateRange\": {
204 |         \"name\": \"date\",
205 |         \"before\": \"$BEFORE\",
206 |         \"after\": \"$AFTER\"
207 |     }
208 | }" http://localhost:8000/queries.json
209 | echo ""
210 | 
211 | echo ""
212 | echo "============ query with item and user *EXPERIMENTAL* ============"
213 | # This is experimental, use at your own risk, not well founded in theory
214 | echo ""
215 | echo "Recommendations for no user no item, all from popularity, Tablets boost, Estados Unidos Mexicanos filter"
216 | echo ""
217 | curl -H "Content-Type: application/json" -d '
218 | {
219 |     "user": "u1",
220 |     "item": "Iphone 4"
221 | }' http://localhost:8000/queries.json
222 | echo ""
223 | 
224 | 


--------------------------------------------------------------------------------
/examples/multi-query-movielens.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | echo ""
 3 | echo "Simple personalized query"
 4 | echo ""
 5 | curl -H "Content-Type: application/json" -d '
 6 | {
 7 |     "user": "1",
 8 |     "num": 10
 9 | }' http://localhost:8000/queries.json
10 | echo ""
11 | 
12 | #sleep 2
13 | 
14 | echo ""
15 | echo "Simple similar item query"
16 | echo ""
17 | curl -H "Content-Type: application/json" -d '
18 | {
19 |     "item": "62",
20 |     "num": 15
21 | }' http://localhost:8000/queries.json
22 | echo ""
23 | 
24 | #sleep 2
25 | 
26 | echo ""
27 | echo "Simple personalized query with category boost"
28 | echo ""
29 | curl -H "Content-Type: application/json" -d '
30 | {
31 |     "user": "1",
32 |     "num": 20,
33 |     "fields": [{
34 |         "name": "category",
35 |         "values": ["cat5"],
36 |         "bias": 1.005
37 |     }]
38 | }' http://localhost:8000/queries.json
39 | echo ""
40 | echo ""
41 | 


--------------------------------------------------------------------------------
/examples/pop-engine-4-days-ago.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "popularity-only",
 3 |   "description": "Default settings",
 4 |   "engineFactory": "org.template.RecommendationEngine",
 5 |   "datasource": {
 6 |     "params" : {
 7 |       "name": "sample-handmade-data.txt",
 8 |       "appName": "handmade",
 9 |       "eventNames": ["purchase", "view"]
10 |     }
11 |   },
12 |   "sparkConf": {
13 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
14 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
15 |     "spark.kryo.referenceTracking": "false",
16 |     "spark.kryoserializer.buffer.mb": "300",
17 |     "spark.kryoserializer.buffer": "300m",
18 |     "spark.executor.memory": "4g",
19 |     "es.index.auto.create": "true"
20 |   },
21 |   "algorithms": [
22 |     {
23 |       "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill",
24 |       "name": "ur",
25 |       "params": {
26 |         "appName": "handmade",
27 |         "indexName": "urindex",
28 |         "typeName": "items",
29 |         "eventNames": ["purchase", "view"],
30 |         "recsModel": "backfill",
31 |         "rankings": [{
32 |           "name": "popRank",
33 |           "type": "popular",
34 |           "eventNames": ["purchase", "view"],
35 |           "duration": 259200,
36 |           "comment": "VERY IMPORTANT that the line below be set to today - 4 days for integration-test-pop-model",
37 |           "offsetDate": "2016-01-19T11:55:07Z"
38 |         }]
39 |       }
40 |     }
41 |   ]
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/examples/pop-engine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "popularity-only",
 3 |   "description": "Default settings",
 4 |   "engineFactory": "org.template.RecommendationEngine",
 5 |   "datasource": {
 6 |     "params" : {
 7 |       "name": "sample-handmade-data.txt",
 8 |       "appName": "handmade",
 9 |       "eventNames": ["purchase", "view"]
10 |     }
11 |   },
12 |   "sparkConf": {
13 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
14 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
15 |     "spark.kryo.referenceTracking": "false",
16 |     "spark.kryoserializer.buffer.mb": "300",
17 |     "spark.kryoserializer.buffer": "300m",
18 |     "spark.executor.memory": "4g",
19 |     "es.index.auto.create": "true"
20 |   },
21 |   "algorithms": [
22 |     {
23 |       "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill",
24 |       "name": "ur",
25 |       "params": {
26 |         "appName": "handmade",
27 |         "indexName": "urindex",
28 |         "typeName": "items",
29 |         "eventNames": ["purchase", "view"],
30 |         "recsModel": "backfill",
31 |         "rankings": [{
32 |           "name": "popRank",
33 |           "type": "popular",
34 |           "eventNames": ["purchase", "view"],
35 |           "duration": 259200
36 |         }]
37 |       }
38 |     }
39 |   ]
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/examples/pop-test-query.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | echo ""
 4 | echo "Recommendations for popular using default pop model"
 5 | echo ""
 6 | curl -H "Content-Type: application/json" -d '
 7 | {
 8 | }' http://localhost:8000/queries.json
 9 | echo ""
10 | 
11 | 


--------------------------------------------------------------------------------
/examples/rank/import_rank.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Import sample data for recommendation engine
  3 | """
  4 | 
  5 | import predictionio
  6 | import argparse
  7 | import random
  8 | import datetime
  9 | import pytz
 10 | 
 11 | RATE_ACTIONS_DELIMITER = ","
 12 | PROPERTIES_DELIMITER = ":"
 13 | SEED = 1
 14 | 
 15 | 
 16 | def import_events(client, file):
 17 |   f = open(file, 'r')
 18 |   random.seed(SEED)
 19 |   count = 0
 20 |   # year, month, day[, hour[, minute[, second[
 21 |   #event_date = datetime.datetime(2015, 8, 13, 12, 24, 41)
 22 |   now_date = datetime.datetime.now(pytz.utc) # - datetime.timedelta(days=2.7)
 23 |   current_date = now_date
 24 |   event_time_increment = datetime.timedelta(days= -0.8)
 25 |   available_date_increment = datetime.timedelta(days= 0.8)
 26 |   event_date = now_date - datetime.timedelta(days= 2.4)
 27 |   available_date = event_date + datetime.timedelta(days=-2)
 28 |   expire_date = event_date + datetime.timedelta(days=2)
 29 |   print "Importing data..."
 30 | 
 31 |   items = set()
 32 |   for line in f:
 33 |     data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER)
 34 |     # For demonstration purpose action names are taken from input along with secondary actions on
 35 |     # For the UR add some item metadata
 36 | 
 37 |     action = data[1]
 38 |     if action in ('$set', '$unset', '$delete'):
 39 |         item_id = data[0]
 40 |         items.add(item_id)
 41 |         properties = data[2].split(PROPERTIES_DELIMITER)
 42 |         prop_name = properties.pop(0)
 43 |         prop_value = properties if not prop_name == 'defaultRank' else float(
 44 |             properties[0])
 45 |         client.create_event(
 46 |             event=action,
 47 |             entity_type="item",
 48 |             entity_id=item_id,
 49 |             event_time=current_date,
 50 |             properties={prop_name: prop_value}
 51 |         )
 52 |         print(
 53 |             'Event: {0} entity_id: {1} properties/{2}: {3} current_date: {4}'.format(
 54 |                 action, item_id, prop_name, str(prop_value),current_date.isoformat()))
 55 | 
 56 |     else:
 57 |         user_id = data[0]
 58 |         item_id = data[2]
 59 |         client.create_event(
 60 |             event=action,
 61 |             entity_type="user",
 62 |             entity_id=user_id,
 63 |             target_entity_type="item",
 64 |             target_entity_id=item_id,
 65 |             event_time=current_date
 66 |         )
 67 |         print(
 68 |             'Event: {0} entity_id: {1} target_entity_id: {2} current_date: {3}'
 69 |             .format(action, item_id, item_id, current_date.isoformat()))
 70 |     count += 1
 71 |     current_date += event_time_increment
 72 | 
 73 |   print "All items: " + str(items)
 74 |   for item in items:
 75 | 
 76 |     client.create_event(
 77 |       event="$set",
 78 |       entity_type="item",
 79 |       entity_id=item,
 80 |       properties={"expires": expire_date.isoformat(),
 81 |                   "available": available_date.isoformat(),
 82 |                   "date": event_date.isoformat()}
 83 |     )
 84 |     print "Event: $set entity_id: " + item + \
 85 |             " properties/availableDate: " + available_date.isoformat() + \
 86 |             " properties/date: " + event_date.isoformat() + \
 87 |             " properties/expireDate: " + expire_date.isoformat()
 88 |     expire_date += available_date_increment
 89 |     event_date += available_date_increment
 90 |     available_date += available_date_increment
 91 |     count += 1
 92 | 
 93 |   f.close()
 94 |   print "%s events are imported." % count
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |   parser = argparse.ArgumentParser(
 99 |     description="Import sample data for recommendation engine")
100 |   parser.add_argument('--access_key', default='123456789')
101 |   parser.add_argument('--url', default="http://localhost:7070")
102 |   parser.add_argument('--file', default="./data/sample-rank-data.txt")
103 | 
104 |   args = parser.parse_args()
105 |   print args
106 | 
107 |   client = predictionio.EventClient(
108 |     access_key=args.access_key,
109 |     url=args.url,
110 |     threads=5,
111 |     qsize=500)
112 |   import_events(client, args.file)
113 | 


--------------------------------------------------------------------------------
/examples/rank/integration-test-rank:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | RED='\033[0;31m'
  4 | GREEN='\033[0;32m'
  5 | YELLOW='\033[0;33m'
  6 | NC='\033[0m' # No Color
  7 | 
  8 | APP_NAME='default-rank'
  9 | APP_ACCESS_KEY='123456789'
 10 | 
 11 | LINE="=================================================================="
 12 | # exit on any error
 13 | #set -e
 14 | 
 15 | check_file () {
 16 |     if [ ! -f $1 ]; then
 17 |         echo -e "${RED}File not found: $1${NC}"
 18 |         exit 1
 19 |     fi
 20 | }
 21 | 
 22 | echo -e "${GREEN}${LINE}"
 23 | echo -e "Integration test [Rank] for The Universal Recommender."
 24 | echo -e "If some step fails check that your engine.json file has been restored or look for it in 'user-engine.json'"
 25 | echo -e "${LINE}${NC}"
 26 | 
 27 | echo -e "${GREEN}${LINE}"
 28 | echo -e "Checking for needed files"
 29 | echo -e "${LINE}${NC}"
 30 | 
 31 | if [ -f user-engine.json ]; then
 32 |     echo -e "${RED}File user-engine.json found, this may be an error so we cannot replace engine.json${NC}"
 33 |     exit 1
 34 | fi
 35 | 
 36 | check_file examples/rank/rank-engine.json
 37 | check_file data/sample-rank-data.txt
 38 | check_file data/rank-test-query-expected.txt
 39 | 
 40 | echo -e "${GREEN}${LINE}"
 41 | echo -e "Checking status, should exit if pio is not running."
 42 | echo -e "${LINE}${NC}"
 43 | pio status
 44 | pio app new ${APP_NAME} --access-key ${APP_ACCESS_KEY} || true
 45 | 
 46 | echo -e "${GREEN}${LINE}"
 47 | echo -e "Checking to see if ${APP_NAME} app exists, should exit if not."
 48 | echo -e "${LINE}${NC}"
 49 | pio app show default-rank
 50 | 
 51 | echo -e "${GREEN}${LINE}"
 52 | echo -e "Moving engine.json to user-engine.json"
 53 | echo -e "${LINE}${NC}"
 54 | cp -n engine.json user-engine.json
 55 | 
 56 | echo -e "${GREEN}${LINE}"
 57 | echo -e "THE FIRST SERIES OF TESTS"
 58 | echo -e "${LINE}${NC}"
 59 | 
 60 | echo -e "${GREEN}${LINE}"
 61 | echo -e "Moving examples/rank/rank-engine.json to engine.json for integration test."
 62 | echo -e "${LINE}${NC}"
 63 | cp examples/rank/rank-engine.json engine.json
 64 | 
 65 | echo -e "${GREEN}${LINE}"
 66 | echo -e "Deleting ${APP_NAME} app data since the test is date dependent"
 67 | echo -e "${LINE}${NC}"
 68 | pio app data-delete ${APP_NAME} -f
 69 | 
 70 | echo -e "${GREEN}${LINE}"
 71 | echo -e "Importing data for integration test"
 72 | echo -e "${LINE}${NC}"
 73 | python examples/rank/import_rank.py --access_key ${APP_ACCESS_KEY} --file './data/sample-rank-data.txt'
 74 | 
 75 | echo -e "${GREEN}${LINE}"
 76 | echo -e "Building and delpoying model"
 77 | echo -e "${LINE}${NC}"
 78 | pio build
 79 | pio train -- --executor-memory 1g --driver-memory 1g --master local
 80 | 
 81 | echo -e "${GREEN}${LINE}"
 82 | echo -e "WARNING the model will be undeployed after this test, "
 83 | echo -e "so any running PredictionServer will be stopped"
 84 | nohup pio deploy > deploy-rank.out &
 85 | echo -e "Waiting 30 seconds for the server to start"
 86 | echo -e "${LINE}${NC}"
 87 | sleep 30
 88 | 
 89 | echo -e "${GREEN}${LINE}"
 90 | echo -e "Running test query."
 91 | echo -e "${LINE}${NC}"
 92 | ./examples/rank/multi-query-rank.sh > rank-query-test-result.out
 93 | 
 94 | echo -e "${GREEN}${LINE}"
 95 | echo -e "Restoring engine.json"
 96 | echo -e "${LINE}${NC}"
 97 | mv user-engine.json engine.json
 98 | 
 99 | deploy_pid=`jps -lm | grep "onsole deploy" | cut -f 1 -d ' '`
100 | echo -e "${GREEN}${LINE}"
101 | echo -e "Killing the deployed PredictionServer"
102 | echo -e "${LINE}${NC}"
103 | kill "$deploy_pid"
104 | 
105 | DIFF_RESULT=`diff data/rank-test-query-expected.txt rank-query-test-result.out`
106 | 
107 | if [[ -z "${DIFF_RESULT// }" ]]; then
108 |     echo -e "${GREEN}${LINE}"
109 |     echo -e "ALL TESTS PASS SUCCESS$"
110 |     rm rank-query-test-result.out
111 |     rm deploy-rank.out
112 | else
113 |     echo -e "${RED}${LINE}"
114 |     echo -e "ONE OR MORE TESTS FAILURE:"
115 |     echo -e "${LINE}"
116 |     echo ${DIFF_RESULT}
117 | fi
118 | echo -e "${LINE}${NC}"
119 | 
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/examples/rank/multi-query-rank.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | echo ""
  4 | echo "Queries to illustrate many use cases on a small standard dataset and for an automated integration test."
  5 | echo ""
  6 | echo "WARNING: for this to produce the correct result you must:"
  7 | echo "  1. Import data with"
  8 | echo "     $ python examples/import_rank.py --access_key <your-app-accesskey>"
  9 | echo "  2. Copy rank-engine.json to engine.json."
 10 | echo "  3. Run 'pio build', 'pio train', and 'pio deploy'"
 11 | echo "  4. The queries must be run the same day as the import was done because date filters are part of the test."
 12 | 
 13 | echo ""
 14 | echo "============ simple user recs ============"
 15 | echo ""
 16 | echo "Recommendations for user: user-1"
 17 | echo ""
 18 | curl -H "Content-Type: application/json" -d '
 19 | {
 20 |     "user": "user-1"
 21 | }' http://localhost:8000/queries.json
 22 | echo ""
 23 | 
 24 | echo ""
 25 | echo "Recommendations for user: user-2"
 26 | echo ""
 27 | curl -H "Content-Type: application/json" -d '
 28 | {
 29 |     "user": "user-2"
 30 | }' http://localhost:8000/queries.json
 31 | echo ""
 32 | 
 33 | echo ""
 34 | echo "Recommendations for user: user-3"
 35 | echo ""
 36 | curl -H "Content-Type: application/json" -d '
 37 | {
 38 |     "user": "user-3"
 39 | }' http://localhost:8000/queries.json
 40 | echo ""
 41 | 
 42 | echo ""
 43 | echo "Recommendations for user: user-4"
 44 | echo ""
 45 | curl -H "Content-Type: application/json" -d '
 46 | {
 47 |     "user": "user-4"
 48 | }' http://localhost:8000/queries.json
 49 | echo ""
 50 | 
 51 | echo ""
 52 | echo "Recommendations for user: user-5"
 53 | echo ""
 54 | curl -H "Content-Type: application/json" -d '
 55 | {
 56 |     "user": "user-5"
 57 | }' http://localhost:8000/queries.json
 58 | echo ""
 59 | 
 60 | echo ""
 61 | echo "============ simple similar item recs ============"
 62 | echo ""
 63 | echo "Recommendations for item: product-1"
 64 | echo ""
 65 | curl -H "Content-Type: application/json" -d '
 66 | {
 67 |     "item": "product-1"
 68 | }' http://localhost:8000/queries.json
 69 | echo ""
 70 | 
 71 | echo ""
 72 | echo "Recommendations for item: product-2"
 73 | echo ""
 74 | curl -H "Content-Type: application/json" -d '
 75 | {
 76 |     "item": "product-2"
 77 | }' http://localhost:8000/queries.json
 78 | echo ""
 79 | 
 80 | echo ""
 81 | echo "Recommendations for item: product-3"
 82 | echo ""
 83 | curl -H "Content-Type: application/json" -d '
 84 | {
 85 |     "item": "product-3"
 86 | }' http://localhost:8000/queries.json
 87 | echo ""
 88 | 
 89 | echo ""
 90 | echo "Recommendations for item: product-4"
 91 | echo ""
 92 | curl -H "Content-Type: application/json" -d '
 93 | {
 94 |     "item": "product-4"
 95 | }' http://localhost:8000/queries.json
 96 | echo ""
 97 | 
 98 | echo ""
 99 | echo "Recommendations for item: product-5"
100 | echo ""
101 | curl -H "Content-Type: application/json" -d '
102 | {
103 |     "item": "product-5"
104 | }' http://localhost:8000/queries.json
105 | echo ""
106 | 
107 | echo ""
108 | echo "============ popular item recs only ============"
109 | echo ""
110 | echo "query with no item or user id, ordered by popularity"
111 | echo ""
112 | curl -H "Content-Type: application/json" -d '
113 | {
114 | }' http://localhost:8000/queries.json
115 | echo ""
116 | 
117 | echo ""
118 | echo "Recommendations for non-existant user: xyz, all from popularity"
119 | echo ""
120 | curl -H "Content-Type: application/json" -d '
121 | {
122 |     "user": "xyz"
123 | }' http://localhost:8000/queries.json
124 | echo ""
125 | 
126 | echo ""
127 | echo "Recommendations for non-existant item: xyz, all from popularity"
128 | echo ""
129 | curl -H "Content-Type: application/json" -d '
130 | {
131 |     "item": "xyz"
132 | }' http://localhost:8000/queries.json
133 | echo ""
134 | 
135 | 
136 | echo ""
137 | echo "Recommendations for no user no item, all from popularity, red color filter"
138 | echo ""
139 | curl -H "Content-Type: application/json" -d '
140 | {
141 |     "fields": [{
142 |         "name": "color",
143 |         "values": ["red"],
144 |         "bias": -1
145 |     }]
146 | }' http://localhost:8000/queries.json
147 | echo ""
148 | 
149 | 
150 | echo ""
151 | echo "Recommendations for no user no item, all from popularity, green boost"
152 | echo ""
153 | curl -H "Content-Type: application/json" -d '
154 | {
155 |     "fields": [{
156 |         "name": "color",
157 |         "values": ["green"],
158 |         "bias": 1.05
159 |     }]
160 | }' http://localhost:8000/queries.json
161 | echo ""
162 | 
163 | 
164 | echo ""
165 | echo "Recommendations for no user no item, all from popularity, red color boost, S size filter"
166 | echo ""
167 | curl -H "Content-Type: application/json" -d '
168 | {
169 |     "fields": [{
170 |         "name": "color",
171 |         "values": ["red"],
172 |         "bias": 1.05
173 |     }, {
174 |         "name": "size",
175 |         "values": ["S"],
176 |         "bias": -1
177 |     }]
178 | }' http://localhost:8000/queries.json
179 | echo ""
180 | 
181 | 
182 | echo ""
183 | echo "============ dateRange filter ============"
184 | echo ""
185 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
186 |   BEFORE=`date --date="tomorrow" --iso-8601=seconds`
187 |   AFTER=`date --date="1 day ago" --iso-8601=seconds`
188 | else
189 |   BEFORE=`date -v +1d +"%Y-%m-%dT%H:%M:%SZ"`
190 |   AFTER=`date -v -1d +"%Y-%m-%dT%H:%M:%SZ"`
191 | fi
192 | #echo "before: $BEFORE after: $AFTER"
193 | echo "Recommendations for user: user-1"
194 | echo ""
195 | curl -H "Content-Type: application/json" -d "
196 | {
197 |     \"user\": \"user-1\",
198 |     \"dateRange\": {
199 |         \"name\": \"date\",
200 |         \"before\": \"$BEFORE\",
201 |         \"after\": \"$AFTER\"
202 |     }
203 | }" http://localhost:8000/queries.json
204 | echo ""
205 | 
206 | echo ""
207 | echo "============ query with item and user *EXPERIMENTAL* ============"
208 | # This is experimental, use at your own risk, not well founded in theory
209 | echo ""
210 | echo "Recommendations for user-1 & product-1"
211 | echo ""
212 | curl -H "Content-Type: application/json" -d '
213 | {
214 |     "user": "user-1",
215 |     "item": "product-1"
216 | }' http://localhost:8000/queries.json
217 | echo ""
218 | 
219 | 


--------------------------------------------------------------------------------
/examples/rank/rank-engine-user-define.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "comment":" This config file uses default settings for all but the required values see README.md for docs",
 3 |   "id": "default",
 4 |   "description": "Default settings",
 5 |   "engineFactory": "org.template.RecommendationEngine",
 6 |   "datasource": {
 7 |     "params" : {
 8 |       "appName": "default-rank",
 9 |       "eventNames": ["show", "like"]
10 |     }
11 |   },
12 |   "sparkConf": {
13 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
14 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
15 |     "spark.kryo.referenceTracking": "false",
16 |     "spark.kryoserializer.buffer": "300m",
17 |     "spark.executor.memory": "4g",
18 |     "es.index.auto.create": "true"
19 |   },
20 |   "algorithms": [
21 |     {
22 |       "name": "ur",
23 |       "params": {
24 |         "appName": "default-rank",
25 |         "indexName": "urindex",
26 |         "typeName": "items",
27 |         "recsModel": "backfill",
28 |         "eventNames": ["show", "like"],
29 |         "rankings":[
30 |           {
31 |             "name": "defaultRank",
32 |             "type": "userDefined"
33 |           }
34 |         ]
35 |       }
36 |     }
37 |   ]
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/rank/rank-engine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "comment":" This config file uses default settings for all but the required values see README.md for docs",
 3 |   "id": "default",
 4 |   "description": "Default settings",
 5 |   "engineFactory": "org.template.RecommendationEngine",
 6 |   "datasource": {
 7 |     "params" : {
 8 |       "appName": "default-rank",
 9 |       "eventNames": ["show", "like"]
10 |     }
11 |   },
12 |   "sparkConf": {
13 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
14 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
15 |     "spark.kryo.referenceTracking": "false",
16 |     "spark.kryoserializer.buffer": "300m",
17 |     "spark.executor.memory": "4g",
18 |     "es.index.auto.create": "true"
19 |   },
20 |   "algorithms": [
21 |     {
22 |       "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames",
23 |       "name": "ur",
24 |       "params": {
25 |         "comment": "must have data for the first event or the model will not build, other events are optional",
26 |         "appName": "default-rank",
27 |         "indexName": "urindex",
28 |         "typeName": "items",
29 |         "recsModel": "all",
30 |         "eventNames": ["show", "like"],
31 |         "rankings":[
32 |           {
33 |             "name": "popularRank",
34 |             "type": "popular",
35 |             "eventNames": ["show", "like"],
36 |             "duration": "3650 days",
37 |             "endDate": "ISO8601-date"
38 |           },{
39 |             "name": "defaultRank",
40 |             "type": "userDefined"
41 |           },{
42 |             "name": "uniqueRank",
43 |             "type": "random"
44 |           }
45 |         ]
46 |       }
47 |     }
48 |   ]
49 | }
50 | 


--------------------------------------------------------------------------------
/examples/single-query-eventNames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | echo "Recommendations from popular"
 4 | echo ""
 5 | curl -H "Content-Type: application/json" -d '
 6 | {
 7 | }' http://localhost:8000/queries.json
 8 | echo ""
 9 | echo ""
10 | 
11 | echo "Recommendations for user: u1 purchase and view events"
12 | echo ""
13 | curl -H "Content-Type: application/json" -d '
14 | {
15 |     "user": "u1"
16 | }' http://localhost:8000/queries.json
17 | echo ""
18 | echo ""
19 | 
20 | echo "Recommendations for user: u1 from purchase event alone, should have some non-popular based recs"
21 | echo ""
22 | curl -H "Content-Type: application/json" -d '
23 | {
24 |     "user": "u1",
25 |     "eventNames": ["purchase"]
26 | }' http://localhost:8000/queries.json
27 | echo ""
28 | echo ""
29 | 
30 | echo "Recommendations for user: u1 from view event alone, should have some non-popular based recs"
31 | echo ""
32 | curl -H "Content-Type: application/json" -d '
33 | {
34 |     "user": "u1",
35 |     "eventNames": ["view"]
36 | }' http://localhost:8000/queries.json
37 | echo ""
38 | echo ""
39 | 
40 | 


--------------------------------------------------------------------------------
/examples/single-query-handmade.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | echo "Recommendations for user: u1"
 4 | echo ""
 5 | curl -H "Content-Type: application/json" -d '
 6 | {
 7 |     "user": "u1"
 8 | }' http://localhost:8000/queries.json
 9 | echo ""
10 | 
11 | 


--------------------------------------------------------------------------------
/examples/trend-engine-4-days-ago.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "popularity-only",
 3 |   "description": "Default settings",
 4 |   "engineFactory": "org.template.RecommendationEngine",
 5 |   "datasource": {
 6 |     "params" : {
 7 |       "name": "sample-handmade-data.txt",
 8 |       "appName": "handmade",
 9 |       "eventNames": ["purchase", "view"]
10 |     }
11 |   },
12 |   "sparkConf": {
13 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
14 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
15 |     "spark.kryo.referenceTracking": "false",
16 |     "spark.kryoserializer.buffer.mb": "300",
17 |     "spark.kryoserializer.buffer": "300m",
18 |     "spark.executor.memory": "4g",
19 |     "es.index.auto.create": "true"
20 |   },
21 |   "algorithms": [
22 |     {
23 |       "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill",
24 |       "name": "ur",
25 |       "params": {
26 |         "appName": "handmade",
27 |         "indexName": "urindex",
28 |         "typeName": "items",
29 |         "eventNames": ["purchase", "view"],
30 |         "recsModel": "backfill",
31 |         "rankings": [{
32 |           "name": "trendRank",
33 |           "type": "trending",
34 |           "eventNames": ["purchase", "view"],
35 |           "duration": 259200,
36 |           "comment": "VERY IMPORTANT that the line below be set to today - 4 days for integration-test-pop-model",
37 |           "offsetDate": "2016-01-19T11:55:07Z"
38 |         }]
39 |       }
40 |     }
41 |   ]
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/examples/trend-engine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "popularity-only",
 3 |   "description": "Default settings",
 4 |   "engineFactory": "org.template.RecommendationEngine",
 5 |   "datasource": {
 6 |     "params" : {
 7 |       "name": "sample-handmade-data.txt",
 8 |       "appName": "handmade",
 9 |       "eventNames": ["purchase", "view"]
10 |     }
11 |   },
12 |   "sparkConf": {
13 |     "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
14 |     "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
15 |     "spark.kryo.referenceTracking": "false",
16 |     "spark.kryoserializer.buffer.mb": "300",
17 |     "spark.kryoserializer.buffer": "300m",
18 |     "spark.executor.memory": "4g",
19 |     "es.index.auto.create": "true"
20 |   },
21 |   "algorithms": [
22 |     {
23 |       "comment": "setup to only calculate a popularity model for *hot* and add it to the existing model for backfill",
24 |       "name": "ur",
25 |       "params": {
26 |         "appName": "handmade",
27 |         "indexName": "urindex",
28 |         "typeName": "items",
29 |         "eventNames": ["purchase", "view"],
30 |         "recsModel": "backfill",
31 |         "rankings": [{
32 |           "name": "trendRank",
33 |           "type": "trending",
34 |           "eventNames": ["purchase", "view"],
35 |           "duration": 259200
36 |         }]
37 |       }
38 |     }
39 |   ]
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | resolvers += Resolver.typesafeRepo("releases")
2 | 
3 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.6.0")
4 | 
5 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0")
6 | 
7 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.13.0")
8 | 


--------------------------------------------------------------------------------
/scalastyle-config.xml:
--------------------------------------------------------------------------------
  1 | <scalastyle>
  2 |  <name>Scalastyle standard configuration</name>
  3 |  <check level="warning" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
  4 |  <check level="warning" class="org.scalastyle.file.FileLengthChecker" enabled="true">
  5 |   <parameters>
  6 |    <parameter name="maxFileLength"><![CDATA[800]]></parameter>
  7 |   </parameters>
  8 |  </check>
  9 |  <check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
 10 |   <parameters>
 11 |    <parameter name="header"><![CDATA[// Copyright (C) 2011-2012 the original author or authors.
 12 | // See the LICENCE.txt file distributed with this work for additional
 13 | // information regarding copyright ownership.
 14 | //
 15 | // Licensed under the Apache License, Version 2.0 (the "License");
 16 | // you may not use this file except in compliance with the License.
 17 | // You may obtain a copy of the License at
 18 | //
 19 | // http://www.apache.org/licenses/LICENSE-2.0
 20 | //
 21 | // Unless required by applicable law or agreed to in writing, software
 22 | // distributed under the License is distributed on an "AS IS" BASIS,
 23 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 24 | // See the License for the specific language governing permissions and
 25 | // limitations under the License.]]></parameter>
 26 |   </parameters>
 27 |  </check>
 28 |  <check level="warning" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
 29 |  <check level="warning" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
 30 |  <check level="warning" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
 31 |  <check level="warning" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
 32 |   <parameters>
 33 |    <parameter name="maxLineLength"><![CDATA[160]]></parameter>
 34 |    <parameter name="tabSize"><![CDATA[4]]></parameter>
 35 |   </parameters>
 36 |  </check>
 37 |  <check level="warning" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
 38 |   <parameters>
 39 |    <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
 40 |   </parameters>
 41 |  </check>
 42 |  <check level="warning" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
 43 |   <parameters>
 44 |    <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
 45 |   </parameters>
 46 |  </check>
 47 |  <check level="warning" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
 48 |   <parameters>
 49 |    <parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
 50 |   </parameters>
 51 |  </check>
 52 |  <check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
 53 |  <check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
 54 |   <parameters>
 55 |    <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter>
 56 |   </parameters>
 57 |  </check>
 58 |  <check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
 59 |   <parameters>
 60 |    <parameter name="maxParameters"><![CDATA[8]]></parameter>
 61 |   </parameters>
 62 |  </check>
 63 |  <check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true">
 64 |   <parameters>
 65 |    <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter>
 66 |   </parameters>
 67 |  </check>
 68 |  <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
 69 |  <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check>
 70 |  <check level="warning" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check>
 71 |  <check level="warning" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check>
 72 |  <check level="warning" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check>
 73 |  <check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
 74 |  <check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
 75 |  <check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
 76 |  <check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true">
 77 |   <parameters>
 78 |    <parameter name="regex"><![CDATA[println]]></parameter>
 79 |   </parameters>
 80 |  </check>
 81 |  <check level="warning" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true">
 82 |   <parameters>
 83 |    <parameter name="maxTypes"><![CDATA[30]]></parameter>
 84 |   </parameters>
 85 |  </check>
 86 |  <check level="warning" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true">
 87 |   <parameters>
 88 |    <parameter name="maximum"><![CDATA[10]]></parameter>
 89 |   </parameters>
 90 |  </check>
 91 |  <check level="warning" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
 92 |  <check level="warning" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="true"></check>
 93 |  <check level="warning" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
 94 |   <parameters>
 95 |    <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
 96 |    <parameter name="doubleLineAllowed"><![CDATA[false]]></parameter>
 97 |   </parameters>
 98 |  </check>
 99 |  <check level="warning" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true">
100 |   <parameters>
101 |    <parameter name="maxLength"><![CDATA[50]]></parameter>
102 |   </parameters>
103 |  </check>
104 |  <check level="warning" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true">
105 |   <parameters>
106 |    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
107 |   </parameters>
108 |  </check>
109 |  <check level="warning" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
110 |   <parameters>
111 |    <parameter name="maxMethods"><![CDATA[30]]></parameter>
112 |   </parameters>
113 |  </check>
114 |  <check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
115 |  <check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
116 |  <check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
117 | </scalastyle>


--------------------------------------------------------------------------------
/src/main/scala/DataSource.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright ActionML, LLC under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.template
 19 | 
 20 | import _root_.io.prediction.controller.{ EmptyActualResult, EmptyEvaluationInfo, PDataSource, Params }
 21 | import _root_.io.prediction.data.storage.PropertyMap
 22 | import _root_.io.prediction.data.store.PEventStore
 23 | import grizzled.slf4j.Logger
 24 | import io.prediction.core.{ EventWindow, SelfCleaningDataSource }
 25 | import org.apache.spark.SparkContext
 26 | import org.apache.spark.rdd.RDD
 27 | import org.template.conversions.{ ActionID, ItemID }
 28 | import org.template.conversions._
 29 | 
 30 | /** Taken from engine.json these are passed in to the DataSource constructor
 31 |  *
 32 |  *  @param appName registered name for the app
 33 |  *  @param eventNames a list of named events expected. The first is the primary event, the rest are secondary. These
 34 |  *                   will be used to create the primary correlator and cross-cooccurrence secondary correlators.
 35 |  */
 36 | case class DataSourceParams(
 37 |   appName: String,
 38 |   eventNames: List[String], // IMPORTANT: eventNames must be exactly the same as URAlgorithmParams eventNames
 39 |   eventWindow: Option[EventWindow]) extends Params
 40 | 
 41 | /** Read specified events from the PEventStore and creates RDDs for each event. A list of pairs (eventName, eventRDD)
 42 |  *  are sent to the Preparator for further processing.
 43 |  *  @param dsp parameters taken from engine.json
 44 |  */
 45 | class DataSource(val dsp: DataSourceParams)
 46 |     extends PDataSource[TrainingData, EmptyEvaluationInfo, Query, EmptyActualResult]
 47 |     with SelfCleaningDataSource {
 48 | 
 49 |   @transient override lazy implicit val logger: Logger = Logger[this.type]
 50 | 
 51 |   override def appName: String = dsp.appName
 52 |   override def eventWindow: Option[EventWindow] = dsp.eventWindow
 53 | 
 54 |   drawInfo("Init DataSource", Seq(
 55 |     ("══════════════════════════════", "════════════════════════════"),
 56 |     ("App name", appName),
 57 |     ("Event window", eventWindow),
 58 |     ("Event names", dsp.eventNames)))
 59 | 
 60 |   /** Reads events from PEventStore and create and RDD for each */
 61 |   override def readTraining(sc: SparkContext): TrainingData = {
 62 | 
 63 |     val eventNames = dsp.eventNames
 64 |     cleanPersistedPEvents(sc)
 65 |     val eventsRDD = PEventStore.find(
 66 |       appName = dsp.appName,
 67 |       entityType = Some("user"),
 68 |       eventNames = Some(eventNames),
 69 |       targetEntityType = Some(Some("item")))(sc).repartition(sc.defaultParallelism)
 70 | 
 71 |     // now separate the events by event name
 72 |     val actionRDDs: List[(ActionID, RDD[(UserID, ItemID)])] = eventNames.map { eventName =>
 73 |       val actionRDD = eventsRDD.filter { event =>
 74 |         require(eventNames.contains(event.event), s"Unexpected event $event is read.") // is this really needed?
 75 |         require(event.entityId.nonEmpty && event.targetEntityId.get.nonEmpty, "Empty user or item ID")
 76 |         eventName.equals(event.event)
 77 |       }.map { event =>
 78 |         (event.entityId, event.targetEntityId.get)
 79 |       }
 80 | 
 81 |       (eventName, actionRDD)
 82 |     } filterNot { case (_, actionRDD) => actionRDD.isEmpty() }
 83 | 
 84 |     logger.debug(s"Received actions for events ${actionRDDs.map(_._1)}")
 85 | 
 86 |     // aggregating all $set/$unsets for metadata fields, which are attached to items
 87 |     val fieldsRDD: RDD[(ItemID, PropertyMap)] = PEventStore.aggregateProperties(
 88 |       appName = dsp.appName,
 89 |       entityType = "item")(sc)
 90 |     //    logger.debug(s"FieldsRDD\n${fieldsRDD.take(25).mkString("\n")}")
 91 | 
 92 |     // Have a list of (actionName, RDD), for each action
 93 |     // todo: some day allow data to be content, which requires rethinking how to use EventStore
 94 |     TrainingData(actionRDDs, fieldsRDD)
 95 |   }
 96 | }
 97 | 
 98 | /** Low level RDD based representation of the data ready for the Preparator
 99 |  *
100 |  *  @param actions List of Tuples (actionName, actionRDD)qw
101 |  *  @param fieldsRDD RDD of item keyed PropertyMap for item metadata
102 |  */
103 | case class TrainingData(
104 |     actions: Seq[(ActionID, RDD[(UserID, ItemID)])],
105 |     fieldsRDD: RDD[(ItemID, PropertyMap)]) extends Serializable {
106 | 
107 |   override def toString: String = {
108 |     val a = actions.map { t =>
109 |       s"${t._1} actions: [count:${t._2.count()}] + sample:${t._2.take(2).toList} "
110 |     }.toString()
111 |     val f = s"Item metadata: [count:${fieldsRDD.count}] + sample:${fieldsRDD.take(2).toList} "
112 |     a + f
113 |   }
114 | 
115 | }


--------------------------------------------------------------------------------
/src/main/scala/Engine.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright ActionML, LLC under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.template
19 | 
20 | import grizzled.slf4j.Logger
21 | import io.prediction.controller.{ EmptyActualResult, EmptyEvaluationInfo, Engine, EngineFactory }
22 | import org.template.conversions._
23 | 
24 | /** This file contains case classes that are used with reflection to specify how query and config
25 |  *  JSON is to be parsed. the Query case class, for instance defines the way a JSON query is to be
26 |  *  formed. The same for param case classes.
27 |  */
28 | 
29 | /** The Query spec with optional values. The only hard rule is that there must be either a user or
30 |  *  an item id. All other values are optional.
31 |  */
32 | case class Query(
33 |   user: Option[String] = None, // must be a user or item id
34 |   userBias: Option[Float] = None, // default: whatever is in algorithm params or 1
35 |   item: Option[String] = None, // must be a user or item id
36 |   itemBias: Option[Float] = None, // default: whatever is in algorithm params or 1
37 |   fields: Option[List[Field]] = None, // default: whatever is in algorithm params or None
38 |   currentDate: Option[String] = None, // if used will override dateRange filter, currentDate must lie between the item's
39 |   // expireDateName value and availableDateName value, all are ISO 8601 dates
40 |   dateRange: Option[DateRange] = None, // optional before and after filter applied to a date field
41 |   blacklistItems: Option[List[String]] = None, // default: whatever is in algorithm params or None
42 |   returnSelf: Option[Boolean] = None, // means for an item query should the item itself be returned, defaults
43 |   // to what is in the algorithm params or false
44 |   num: Option[Int] = None, // default: whatever is in algorithm params, which itself has a default--probably 20
45 |   eventNames: Option[List[String]], // names used to ID all user actions
46 |   withRanks: Option[Boolean] = None) // Add to ItemScore rank fields values, default fasle
47 |     extends Serializable
48 | 
49 | /** Used to specify how Fields are represented in engine.json */
50 | case class Field( // no optional values for fields, whne specified
51 |   name: String, // name of metadata field
52 |   values: Seq[String], // fields can have multiple values like tags of a single value as when using hierarchical
53 |   // taxonomies
54 |   bias: Float) // any positive value is a boost, negative is a filter
55 |     extends Serializable
56 | 
57 | /** Used to specify the date range for a query */
58 | case class DateRange(
59 |   name: String, // name of item property for the date comparison
60 |   before: Option[String], // empty strings means no filter
61 |   after: Option[String]) // both empty should be ignored
62 |     extends Serializable
63 | 
64 | /** results of a URAlgoritm.predict */
65 | case class PredictedResult(
66 |   itemScores: Array[ItemScore])
67 |     extends Serializable
68 | 
69 | case class ItemScore(
70 |   item: ItemID, // item id
71 |   score: Double, // used to rank, original score returned from teh search engine
72 |   ranks: Option[Map[String, Double]] = None) extends Serializable
73 | 
74 | object RecommendationEngine extends EngineFactory {
75 | 
76 |   @transient lazy implicit val logger: Logger = Logger[this.type]
77 |   drawActionML
78 | 
79 |   def apply(): Engine[TrainingData, EmptyEvaluationInfo, PreparedData, Query, PredictedResult, EmptyActualResult] = {
80 |     new Engine(
81 |       classOf[DataSource],
82 |       classOf[Preparator],
83 |       Map("ur" -> classOf[URAlgorithm]), // IMPORTANT: "ur" must be the "name" of the parameter set in engine.json
84 |       classOf[Serving])
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/main/scala/EsClient.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright ActionML, LLC under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.template
 19 | 
 20 | import java.util
 21 | 
 22 | import grizzled.slf4j.Logger
 23 | import io.prediction.data.storage._
 24 | import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest
 25 | import org.apache.spark.SparkContext
 26 | import org.apache.spark.rdd.RDD
 27 | import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest
 28 | import org.elasticsearch.action.admin.indices.create.CreateIndexRequest
 29 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest
 30 | import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest
 31 | import org.elasticsearch.action.admin.indices.refresh.RefreshRequest
 32 | import org.elasticsearch.action.get.GetResponse
 33 | import org.elasticsearch.client.transport.TransportClient
 34 | import org.elasticsearch.common.settings.{ ImmutableSettings, Settings }
 35 | import org.joda.time.DateTime
 36 | import org.json4s.jackson.JsonMethods._
 37 | import org.elasticsearch.spark._
 38 | import org.elasticsearch.node.NodeBuilder._
 39 | import org.elasticsearch.search.SearchHits
 40 | import org.json4s.JValue
 41 | import org.template.conversions.{ ItemID, ItemProps }
 42 | 
 43 | import scala.collection.immutable
 44 | import scala.collection.parallel.mutable
 45 | 
 46 | /** Elasticsearch notes:
 47 |  *  1) every query clause wil laffect scores unless it has a constant_score and boost: 0
 48 |  *  2) the Spark index writer is fast but must assemble all data for the index before the write occurs
 49 |  *  3) many operations must be followed by a refresh before the action takes effect--sortof like a transaction commit
 50 |  *  4) to use like a DB you must specify that the index of fields are `not_analyzed` so they won't be lowercased,
 51 |  *    stemmed, tokenized, etc. Then the values are literal and must match exactly what is in the query (no analyzer)
 52 |  */
 53 | 
 54 | /** Defines methods to use on Elasticsearch. */
 55 | object EsClient {
 56 |   @transient lazy val logger: Logger = Logger[this.type]
 57 | 
 58 |   private lazy val client = if (Storage.getConfig("ELASTICSEARCH").nonEmpty)
 59 |     new elasticsearch.StorageClient(Storage.getConfig("ELASTICSEARCH").get).client
 60 |   else
 61 |     throw new IllegalStateException("No Elasticsearch client configuration detected, check your pio-env.sh for" +
 62 |       "proper configuration settings")
 63 | 
 64 |   // wrong way that uses only default settings, which will be a localhost ES sever.
 65 |   //private lazy val client = new elasticsearch.StorageClient(StorageClientConfig()).client
 66 | 
 67 |   /** Delete all data from an instance but do not commit it. Until the "refresh" is done on the index
 68 |    *  the changes will not be reflected.
 69 |    *  @param indexName will delete all types under this index, types are not used by the UR
 70 |    *  @param refresh
 71 |    *  @return true if all is well
 72 |    */
 73 |   def deleteIndex(indexName: String, refresh: Boolean = false): Boolean = {
 74 |     //val debug = client.connectedNodes()
 75 |     if (client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet().isExists) {
 76 |       val delete = client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet()
 77 |       if (!delete.isAcknowledged) {
 78 |         logger.info(s"Index $indexName wasn't deleted, but may have quietly failed.")
 79 |       } else {
 80 |         // now refresh to get it 'committed'
 81 |         // todo: should do this after the new index is created so no index downtime
 82 |         if (refresh) refreshIndex(indexName)
 83 |       }
 84 |       true
 85 |     } else {
 86 |       logger.warn(s"Elasticsearch index: $indexName wasn't deleted because it didn't exist. This may be an error.")
 87 |       false
 88 |     }
 89 |   }
 90 | 
 91 |   /** Creates a new empty index in Elasticsearch and initializes mappings for fields that will be used
 92 |    *  @param indexName elasticsearch name
 93 |    *  @param indexType names the type of index, usually use the item name
 94 |    *  @param fieldNames ES field names
 95 |    *  @param typeMappings indicates which ES fields are to be not_analyzed without norms
 96 |    *  @param refresh should the index be refreshed so the create is committed
 97 |    *  @return true if all is well
 98 |    */
 99 |   def createIndex(
100 |     indexName: String,
101 |     indexType: String,
102 |     fieldNames: List[String],
103 |     typeMappings: Map[String, String] = Map.empty,
104 |     refresh: Boolean = false): Boolean = {
105 |     if (!client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet().isExists) {
106 |       var mappings = """
107 |         |{
108 |         |  "properties": {
109 |         """.stripMargin.replace("\n", "")
110 | 
111 |       def mappingsField(`type`: String) = {
112 |         s"""
113 |         |    : {
114 |         |      "type": "${`type`}",
115 |         |      "index": "not_analyzed",
116 |         |      "norms" : {
117 |         |        "enabled" : false
118 |         |      }
119 |         |    },
120 |         """.stripMargin.replace("\n", "")
121 |       }
122 | 
123 |       val mappingsTail = """
124 |         |    "id": {
125 |         |      "type": "string",
126 |         |      "index": "not_analyzed",
127 |         |      "norms" : {
128 |         |        "enabled" : false
129 |         |      }
130 |         |    }
131 |         |  }
132 |         |}
133 |       """.stripMargin.replace("\n", "")
134 | 
135 |       fieldNames.foreach { fieldName =>
136 |         if (typeMappings.contains(fieldName))
137 |           mappings += (fieldName + mappingsField(typeMappings(fieldName)))
138 |         else // unspecified fields are treated as not_analyzed strings
139 |           mappings += (fieldName + mappingsField("string"))
140 |       }
141 |       mappings += mappingsTail // any other string is not_analyzed
142 |       //      logger.debug(s"ES mapping: $mappings")
143 | 
144 |       val cir = new CreateIndexRequest(indexName).mapping(indexType, mappings)
145 |       val create = client.admin().indices().create(cir).actionGet()
146 |       if (!create.isAcknowledged) {
147 |         logger.info(s"Index $indexName wasn't created, but may have quietly failed.")
148 |       } else {
149 |         // now refresh to get it 'committed'
150 |         // todo: should do this after the new index is created so no index downtime
151 |         if (refresh) refreshIndex(indexName)
152 |       }
153 |       true
154 |     } else {
155 |       logger.warn(s"Elasticsearch index: $indexName wasn't created because it already exists. This may be an error.")
156 |       false
157 |     }
158 |   }
159 | 
160 |   /** Commits any pending changes to the index */
161 |   def refreshIndex(indexName: String): Unit = {
162 |     client.admin().indices().refresh(new RefreshRequest(indexName)).actionGet()
163 |   }
164 | 
165 |   /** Create new index and hot-swap the new after it's indexed and ready to take over, then delete the old */
166 |   def hotSwap(
167 |     alias: String,
168 |     typeName: String,
169 |     indexRDD: RDD[Map[String, Any]],
170 |     fieldNames: List[String],
171 |     typeMappings: Map[String, String] = Map.empty): Unit = {
172 |     // get index for alias, change a char, create new one with new id and index it, swap alias and delete old one
173 |     val aliasMetadata = client.admin().indices().prepareGetAliases(alias).get().getAliases
174 |     val newIndex = alias + "_" + DateTime.now().getMillis.toString
175 | 
176 |     logger.debug(s"Create new index: $newIndex, $typeName, $fieldNames, $typeMappings")
177 |     createIndex(newIndex, typeName, fieldNames, typeMappings)
178 | 
179 |     val newIndexURI = "/" + newIndex + "/" + typeName
180 |     //    logger.debug(s"Save to ES[$newIndexURI]:\n${indexRDD.take(25).mkString("\n")}")
181 |     indexRDD.saveToEs(newIndexURI, Map("es.mapping.id" -> "id"))
182 |     //refreshIndex(newIndex)
183 | 
184 |     if (!aliasMetadata.isEmpty
185 |       && aliasMetadata.get(alias) != null
186 |       && aliasMetadata.get(alias).get(0) != null) { // was alias so remove the old one
187 |       //append the DateTime to the alias to create an index name
188 |       val oldIndex = aliasMetadata.get(alias).get(0).getIndexRouting
189 |       client.admin().indices().prepareAliases()
190 |         .removeAlias(oldIndex, alias)
191 |         .addAlias(newIndex, alias)
192 |         .execute().actionGet()
193 |       deleteIndex(oldIndex) // now can safely delete the old one since it's not used
194 |     } else { // todo: could be more than one index with 'alias' so
195 |       // no alias so add one
196 |       //to clean up any indexes that exist with the alias name
197 |       val indices = util.Arrays.asList(client.admin().indices().prepareGetIndex().get().indices()).get(0)
198 |       if (indices.contains(alias)) {
199 |         //refreshIndex(alias)
200 |         deleteIndex(alias) // index named like the new alias so delete it
201 |       }
202 |       // slight downtime, but only for one case of upgrading the UR engine from v0.1.x to v0.2.0+
203 |       client.admin().indices().prepareAliases()
204 |         .addAlias(newIndex, alias)
205 |         .execute().actionGet()
206 |     }
207 |     // clean out any old indexes that were the product of a failed train?
208 |     val indices = util.Arrays.asList(client.admin().indices().prepareGetIndex().get().indices()).get(0)
209 |     indices.map { index =>
210 |       if (index.contains(alias) && index != newIndex) deleteIndex(index) //clean out any old orphaned indexes
211 |     }
212 | 
213 |   }
214 | 
215 |   /** Performs a search using the JSON query String
216 |    *
217 |    *  @param query the JSON query string parable by Elasticsearch
218 |    *  @param indexName the index to search
219 |    *  @return a [PredictedResults] collection
220 |    */
221 |   def search(query: String, indexName: String): Option[SearchHits] = {
222 |     val sr = client.prepareSearch(indexName).setSource(query).get()
223 |     if (!sr.isTimedOut) {
224 |       Some(sr.getHits)
225 |     } else {
226 |       None
227 |     }
228 |   }
229 | 
230 |   /** Gets the "source" field of an Elasticsearch document
231 |    *
232 |    *  @param indexName index that contains the doc/item
233 |    *  @param typeName type name used to construct ES REST URI
234 |    *  @param doc for UR the item id
235 |    *  @return source [java.util.Map] of field names to any valid field values or null if empty
236 |    */
237 |   def getSource(indexName: String, typeName: String, doc: String): util.Map[String, AnyRef] = {
238 |     client.prepareGet(indexName, typeName, doc)
239 |       .execute()
240 |       .actionGet().getSource
241 |   }
242 | 
243 |   /*
244 |   public Set<String> getIndicesFromAliasName(String aliasName) {
245 | 
246 |     IndicesAdminClient iac = client.admin().indices();
247 |     ImmutableOpenMap<String, List<AliasMetaData>> map = iac.getAliases(new GetAliasesRequest(aliasName))
248 |             .actionGet().getAliases();
249 | 
250 |     final Set<String> allIndices = new HashSet<>();
251 |     map.keysIt().forEachRemaining(allIndices::add);
252 |     return allIndices;
253 | }
254 |    */
255 |   def getIndexName(alias: String): Option[String] = {
256 | 
257 |     val allIndicesMap = client.admin().indices().getAliases(new GetAliasesRequest(alias)).actionGet().getAliases
258 | 
259 |     if (allIndicesMap.size() == 1) { // must be a 1-1 mapping of alias <-> index
260 |       var indexName: String = ""
261 |       val itr = allIndicesMap.keysIt()
262 |       while (itr.hasNext)
263 |         indexName = itr.next()
264 |       Some(indexName) // the one index the alias points to
265 |     } else {
266 |       // delete all the indices that are pointed to by the alias, they can't be used
267 |       logger.warn("There is no 1-1 mapping of index to alias so deleting the old indexes that are referenced by the " +
268 |         "alias. This may have been caused by a crashed or stopped `pio train` operation so try running it again.")
269 |       if (!allIndicesMap.isEmpty) {
270 |         val i = allIndicesMap.keys().toArray.asInstanceOf[Array[String]]
271 |         for (indexName <- i) {
272 |           deleteIndex(indexName, refresh = true)
273 |         }
274 |       }
275 |       None // if more than one abort, need to clean up bad aliases
276 |     }
277 |   }
278 | 
279 |   def getRDD(
280 |     alias: String,
281 |     typeName: String)(implicit sc: SparkContext): RDD[(ItemID, ItemProps)] = {
282 |     getIndexName(alias)
283 |       .map(index => sc.esJsonRDD(alias + "/" + typeName) map { case (itemId, json) => itemId -> DataMap(json).fields })
284 |       .getOrElse(sc.emptyRDD)
285 |   }
286 | }


--------------------------------------------------------------------------------
/src/main/scala/PopModel.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright ActionML, LLC under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.template
 19 | 
 20 | import grizzled.slf4j.Logger
 21 | import io.prediction.data.storage.Event
 22 | import io.prediction.data.store.PEventStore
 23 | import org.apache.spark.SparkContext
 24 | import org.apache.spark.rdd.RDD
 25 | import org.joda.time.format.ISODateTimeFormat
 26 | import org.joda.time.{ DateTime, Interval }
 27 | import org.template.conversions.{ ItemID, ItemProps }
 28 | 
 29 | import scala.language.postfixOps
 30 | import scala.util.Random
 31 | 
 32 | object RankingFieldName {
 33 |   val UserRank = "userRank"
 34 |   val UniqueRank = "uniqueRank"
 35 |   val PopRank = "popRank"
 36 |   val TrendRank = "trendRank"
 37 |   val HotRank = "hotRank"
 38 |   val UnknownRank = "unknownRank"
 39 |   def toSeq: Seq[String] = Seq(UserRank, UniqueRank, PopRank, TrendRank, HotRank)
 40 |   override def toString: String = s"$UserRank, $UniqueRank, $PopRank, $TrendRank, $HotRank"
 41 | }
 42 | 
 43 | object RankingType {
 44 |   val Popular = "popular"
 45 |   val Trending = "trending"
 46 |   val Hot = "hot"
 47 |   val UserDefined = "userDefined"
 48 |   val Random = "random"
 49 |   def toSeq: Seq[String] = Seq(Popular, Trending, Hot, UserDefined, Random)
 50 |   override def toString: String = s"$Popular, $Trending, $Hot, $UserDefined, $Random"
 51 | }
 52 | 
 53 | class PopModel(fieldsRDD: RDD[(ItemID, ItemProps)])(implicit sc: SparkContext) {
 54 | 
 55 |   @transient lazy val logger: Logger = Logger[this.type]
 56 | 
 57 |   def calc(
 58 |     modelName: String,
 59 |     eventNames: Seq[String],
 60 |     appName: String,
 61 |     duration: Int = 0,
 62 |     offsetDate: Option[String] = None): RDD[(ItemID, Double)] = {
 63 | 
 64 |     // todo: make end manditory and fill it with "now" upstream if not specified, will simplify logic here
 65 |     // end should always be 'now' except in unusual conditions like for testing
 66 |     val end = if (offsetDate.isEmpty) DateTime.now else {
 67 |       try {
 68 |         ISODateTimeFormat.dateTimeParser().parseDateTime(offsetDate.get)
 69 |       } catch {
 70 |         case e: IllegalArgumentException =>
 71 |           logger.warn("Bad end for popModel: " + offsetDate.get + " using 'now'")
 72 |           DateTime.now
 73 |       }
 74 |     }
 75 | 
 76 |     val interval = new Interval(end.minusSeconds(duration), end)
 77 | 
 78 |     // based on type of popularity model return a set of (item-id, ranking-number) for all items
 79 |     logger.info(s"PopModel $modelName using end: $end, and duration: $duration, interval: $interval")
 80 | 
 81 |     // if None? debatable, this is either an error or may need to default to popular, why call popModel otherwise
 82 |     modelName match {
 83 |       case RankingType.Popular     => calcPopular(appName, eventNames, interval)
 84 |       case RankingType.Trending    => calcTrending(appName, eventNames, interval)
 85 |       case RankingType.Hot         => calcHot(appName, eventNames, interval)
 86 |       case RankingType.Random      => calcRandom(appName, interval)
 87 |       case RankingType.UserDefined => sc.emptyRDD
 88 |       case unknownRankingType =>
 89 |         logger.warn(
 90 |           s"""
 91 |              |Bad rankings param type=[$unknownRankingType] in engine definition params, possibly a bad json value.
 92 |              |Use one of the available parameter values ($RankingType).""".stripMargin)
 93 |         sc.emptyRDD
 94 |     }
 95 | 
 96 |   }
 97 | 
 98 |   /** Create random rank for all items */
 99 |   def calcRandom(
100 |     appName: String,
101 |     interval: Interval): RDD[(ItemID, Double)] = {
102 | 
103 |     val events = eventsRDD(appName = appName, interval = interval)
104 |     val actionsRDD = events.map(_.targetEntityId).filter(_.isDefined).map(_.get).distinct()
105 |     val itemsRDD = fieldsRDD.map { case (itemID, _) => itemID }
106 | 
107 |     //    logger.debug(s"ActionsRDD: ${actionsRDD.take(25).mkString(", ")}")
108 |     //    logger.debug(s"ItemsRDD: ${itemsRDD.take(25).mkString(", ")}")
109 |     actionsRDD.union(itemsRDD).distinct().map { itemID => itemID -> Random.nextDouble() }
110 |   }
111 | 
112 |   /** Creates a rank from the number of named events per item for the duration */
113 |   def calcPopular(
114 |     appName: String,
115 |     eventNames: Seq[String],
116 |     interval: Interval): RDD[(ItemID, Double)] = {
117 |     val events = eventsRDD(appName, eventNames, interval)
118 |     events.map { e => (e.targetEntityId, e.event) }
119 |       .groupByKey()
120 |       .map { case (itemID, itEvents) => (itemID.get, itEvents.size.toDouble) }
121 |       .reduceByKey(_ + _) // make this a double in Elaseticsearch)
122 |   }
123 | 
124 |   /** Creates a rank for each item by dividing the duration in two and counting named events in both buckets
125 |    *  then dividing most recent by less recent. This ranks by change in popularity or velocity of populatiy change.
126 |    *  Interval(start, end) end instant is always greater than or equal to the start instant.
127 |    */
128 |   def calcTrending(
129 |     appName: String,
130 |     eventNames: Seq[String],
131 |     interval: Interval): RDD[(ItemID, Double)] = {
132 | 
133 |     logger.info(s"Current Interval: $interval, ${interval.toDurationMillis}")
134 |     val halfInterval = interval.toDurationMillis / 2
135 |     val olderInterval = new Interval(interval.getStart, interval.getStart.plus(halfInterval))
136 |     logger.info(s"Older Interval: $olderInterval")
137 |     val newerInterval = new Interval(interval.getStart.plus(halfInterval), interval.getEnd)
138 |     logger.info(s"Newer Interval: $newerInterval")
139 | 
140 |     val olderPopRDD = calcPopular(appName, eventNames, olderInterval)
141 |     if (!olderPopRDD.isEmpty()) {
142 |       val newerPopRDD = calcPopular(appName, eventNames, newerInterval)
143 |       newerPopRDD.join(olderPopRDD).map {
144 |         case (item, (newerScore, olderScore)) => item -> (newerScore - olderScore)
145 |       }
146 |     } else sc.emptyRDD
147 | 
148 |   }
149 | 
150 |   /** Creates a rank for each item by divding all events per item into three buckets and calculating the change in
151 |    *  velocity over time, in other words the acceleration of popularity change.
152 |    */
153 |   def calcHot(
154 |     appName: String,
155 |     eventNames: Seq[String] = List.empty,
156 |     interval: Interval): RDD[(ItemID, Double)] = {
157 | 
158 |     logger.info(s"Current Interval: $interval, ${interval.toDurationMillis}")
159 |     val olderInterval = new Interval(interval.getStart, interval.getStart.plus(interval.toDurationMillis / 3))
160 |     logger.info(s"Older Interval: $olderInterval")
161 |     val middleInterval = new Interval(olderInterval.getEnd, olderInterval.getEnd.plus(olderInterval.toDurationMillis))
162 |     logger.info(s"Middle Interval: $middleInterval")
163 |     val newerInterval = new Interval(middleInterval.getEnd, interval.getEnd)
164 |     logger.info(s"Newer Interval: $newerInterval")
165 | 
166 |     val olderPopRDD = calcPopular(appName, eventNames, olderInterval)
167 |     if (!olderPopRDD.isEmpty()) { // todo: may want to allow an interval with no events, give them 0 counts
168 |       val middlePopRDD = calcPopular(appName, eventNames, middleInterval)
169 |       if (!middlePopRDD.isEmpty()) {
170 |         val newerPopRDD = calcPopular(appName, eventNames, newerInterval)
171 |         val newVelocityRDD = newerPopRDD.join(middlePopRDD).map {
172 |           case (item, (newerScore, middleScore)) => item -> (newerScore - middleScore)
173 |         }
174 |         val oldVelocityRDD = middlePopRDD.join(olderPopRDD).map {
175 |           case (item, (middleScore, olderScore)) => item -> (middleScore - olderScore)
176 |         }
177 |         newVelocityRDD.join(oldVelocityRDD).map {
178 |           case (item, (newVelocity, oldVelocity)) => item -> (newVelocity - oldVelocity)
179 |         }
180 |       } else sc.emptyRDD
181 |     } else sc.emptyRDD
182 |   }
183 | 
184 |   def eventsRDD(
185 |     appName: String,
186 |     eventNames: Seq[String] = Seq.empty,
187 |     interval: Interval): RDD[Event] = {
188 | 
189 |     logger.info(s"PopModel getting eventsRDD for startTime: ${interval.getStart} and endTime ${interval.getEnd}")
190 |     PEventStore.find(
191 |       appName = appName,
192 |       startTime = Some(interval.getStart),
193 |       untilTime = Some(interval.getEnd),
194 |       eventNames = if (eventNames.nonEmpty) Some(eventNames) else None)(sc)
195 |   }
196 | 
197 | }
198 | 
199 | object PopModel {
200 | 
201 |   def apply(fieldsRDD: RDD[(ItemID, ItemProps)])(implicit sc: SparkContext): PopModel = {
202 |     new PopModel(fieldsRDD)
203 |   }
204 | 
205 |   val nameByType: Map[String, String] = Map(
206 |     RankingType.Popular -> RankingFieldName.PopRank,
207 |     RankingType.Trending -> RankingFieldName.TrendRank,
208 |     RankingType.Hot -> RankingFieldName.HotRank,
209 |     RankingType.UserDefined -> RankingFieldName.UserRank,
210 |     RankingType.Random -> RankingFieldName.UniqueRank).withDefaultValue(RankingFieldName.UnknownRank)
211 | 
212 | }
213 | 


--------------------------------------------------------------------------------
/src/main/scala/Preparator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright ActionML, LLC under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.template
19 | 
20 | import io.prediction.controller.PPreparator
21 | import org.apache.mahout.math.indexeddataset.{ BiDictionary, IndexedDataset }
22 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
23 | import org.apache.spark.SparkContext
24 | import org.apache.spark.rdd.RDD
25 | import org.template.conversions._
26 | 
27 | class Preparator
28 |     extends PPreparator[TrainingData, PreparedData] {
29 | 
30 |   /** Create [[org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark]] rdd backed
31 |    *  "distributed row matrices" from the input string keyed rdds.
32 |    *  @param sc Spark context
33 |    *  @param trainingData list of (actionName, actionRDD)
34 |    *  @return list of (correlatorName, correlatorIndexedDataset)
35 |    */
36 |   def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {
37 |     // now that we have all actions in separate RDDs we must merge any user dictionaries and
38 |     // make sure the same user ids map to the correct events
39 |     var userDictionary: Option[BiDictionary] = None
40 | 
41 |     val indexedDatasets = trainingData.actions.map {
42 |       case (eventName, eventIDS) =>
43 | 
44 |         // passing in previous row dictionary will use the values if they exist
45 |         // and append any new ids, so after all are constructed we have all user ids in the last dictionary
46 |         val ids = IndexedDatasetSpark(eventIDS, userDictionary)(sc)
47 |         userDictionary = Some(ids.rowIDs)
48 |         (eventName, ids)
49 |     }
50 | 
51 |     // now make sure all matrices have identical row space since this corresponds to all users
52 |     // todo: check to see that there are events in primary event IndexedDataset and abort if not.
53 |     val rowAdjustedIds = userDictionary map { userDict =>
54 |       indexedDatasets.map {
55 |         case (eventName, eventIDS) =>
56 |           (eventName, eventIDS.create(eventIDS.matrix, userDictionary.get, eventIDS.columnIDs).newRowCardinality(userDict.size))
57 |       }
58 |     } getOrElse Seq.empty
59 | 
60 |     val fieldsRDD: RDD[(ItemID, ItemProps)] = trainingData.fieldsRDD.map {
61 |       case (itemId, propMap) => itemId -> propMap.fields
62 |     }
63 |     PreparedData(rowAdjustedIds, fieldsRDD)
64 |   }
65 | 
66 | }
67 | 
68 | case class PreparedData(
69 |   actions: Seq[(ActionID, IndexedDataset)],
70 |   fieldsRDD: RDD[(ItemID, ItemProps)]) extends Serializable


--------------------------------------------------------------------------------
/src/main/scala/Serving.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright ActionML, LLC under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.template
19 | 
20 | import io.prediction.controller.LServing
21 | 
22 | class Serving
23 |     extends LServing[Query, PredictedResult] {
24 | 
25 |   override def serve(
26 |     query: Query,
27 |     predictedResults: Seq[PredictedResult]): PredictedResult = {
28 |     predictedResults.head
29 |   }
30 | }


--------------------------------------------------------------------------------
/src/main/scala/URAlgorithm.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright ActionML, LLC under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.template
 19 | 
 20 | import java.util
 21 | 
 22 | import grizzled.slf4j.Logger
 23 | import io.prediction.controller.{ P2LAlgorithm, Params }
 24 | import io.prediction.data.storage.{ DataMap, Event, NullModel, PropertyMap }
 25 | import io.prediction.data.store.LEventStore
 26 | import org.apache.mahout.math.cf.{ DownsamplableCrossOccurrenceDataset, SimilarityAnalysis }
 27 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
 28 | import org.apache.spark.SparkContext
 29 | import org.apache.spark.rdd.RDD
 30 | import org.joda.time.DateTime
 31 | import org.json4s.JValue
 32 | import org.json4s.JsonAST._
 33 | import org.json4s.JsonDSL._
 34 | import org.json4s.jackson.JsonMethods._
 35 | import org.template.conversions._
 36 | 
 37 | import scala.collection.JavaConverters._
 38 | import scala.concurrent.duration.Duration
 39 | import scala.language.{ implicitConversions, postfixOps }
 40 | 
 41 | /** Available value for algorithm param "RecsModel" */
 42 | object RecsModel { // todo: replace this with rankings
 43 |   val All = "all"
 44 |   val CF = "collabFiltering"
 45 |   val BF = "backfill"
 46 |   override def toString: String = s"$All, $CF, $BF"
 47 | }
 48 | 
 49 | /** Setting the option in the params case class doesn't work as expected when the param is missing from
 50 |  *  engine.json so set these for use in the algorithm when they are not present in the engine.json
 51 |  */
 52 | object defaultURAlgorithmParams {
 53 |   val DefaultMaxEventsPerEventType = 500
 54 |   val DefaultNum = 20
 55 |   val DefaultMaxCorrelatorsPerEventType = 50
 56 |   val DefaultMaxQueryEvents = 100 // default number of user history events to use in recs query
 57 | 
 58 |   val DefaultExpireDateName = "expireDate" // default name for the expire date property of an item
 59 |   val DefaultAvailableDateName = "availableDate" //defualt name for and item's available after date
 60 |   val DefaultDateName = "date" // when using a date range in the query this is the name of the item's date
 61 |   val DefaultRecsModel = RecsModel.All // use CF + backfill
 62 |   val DefaultRankingParams = RankingParams()
 63 |   val DefaultBackfillFieldName = RankingFieldName.PopRank
 64 |   val DefaultBackfillType = RankingType.Popular
 65 |   val DefaultBackfillDuration = "3650 days" // for all time
 66 | 
 67 |   val DefaultReturnSelf = false
 68 | }
 69 | 
 70 | /* default values must be set in code not the case class declaration
 71 | case class BackfillField(
 72 |   name: Option[String] = Some(defaultURAlgorithmParams.DefaultBackfillFieldName),
 73 |   backfillType: Option[String] = Some(defaultURAlgorithmParams.DefaultBackfillType), // may be 'hot', or 'trending' also
 74 |   eventNames: Option[Seq[String]] = None, // None means use the algo eventNames list, otherwise a list of events
 75 |   offsetDate: Option[String] = None, // used only for tests, specifies the offset date to start the duration so the most
 76 |   // recent date for events going back by from the more recent offsetDate - duration
 77 |   duration: Option[String] = Some(defaultURAlgorithmParams.DefaultBackfillDuration)) // duration worth of events
 78 |   // to use in calculation of backfill
 79 | 
 80 | case class URAlgorithmParams(
 81 |   appName: String, // filled in from engine.json
 82 |   indexName: String, // can optionally be used to specify the elasticsearch index name
 83 |   typeName: String, // can optionally be used to specify the elasticsearch type name
 84 |   recsModel: Option[String] = Some(defaultURAlgorithmParams.DefaultRecsModel), // "all", "collabFiltering", "backfill"
 85 |   eventNames: Seq[String], // names used to ID all user actions
 86 |   blacklistEvents: Option[Seq[String]] = None,// None means use the primary event, empty array means no filter
 87 |   // number of events in user-based recs query
 88 |   maxQueryEvents: Option[Int] = Some(defaultURAlgorithmParams.DefaultMaxQueryEvents),
 89 |   maxEventsPerEventType: Option[Int] = Some(defaultURAlgorithmParams.DefaultMaxEventsPerEventType),
 90 |   maxCorrelatorsPerEventType: Option[Int] = Some(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType),
 91 |   num: Option[Int] = Some(defaultURAlgorithmParams.DefaultNum), // default max # of recs requested
 92 |   userBias: Option[Float] = None, // will cause the default search engine boost of 1.0
 93 |   itemBias: Option[Float] = None, // will cause the default search engine boost of 1.0
 94 |   returnSelf: Option[Boolean] = None, // query building logic defaults this to false
 95 |   fields: Option[Seq[Field]] = None, //defaults to no fields
 96 |   // leave out for default or popular
 97 |   backfillField: Option[BackfillField] = None,
 98 |   // name of date property field for when the item is available
 99 |   availableDateName: Option[String] = Some(defaultURAlgorithmParams.DefaultAvailableDateName),
100 |   // name of date property field for when an item is no longer available
101 |   expireDateName: Option[String] = Some(defaultURAlgorithmParams.DefaultExpireDateName),
102 |   // used as the subject of a dateRange in queries, specifies the name of the item property
103 |   dateName: Option[String] = Some(defaultURAlgorithmParams.DefaultDateName),
104 |   seed: Option[Long] = None) // seed is not used presently
105 |   extends Params //fixed default make it reproducible unless supplied
106 |   */
107 | 
108 | case class RankingParams(
109 |     name: Option[String] = None,
110 |     `type`: Option[String] = None, // See [[org.template.BackfillType]]
111 |     eventNames: Option[Seq[String]] = None, // None means use the algo eventNames list, otherwise a list of events
112 |     offsetDate: Option[String] = None, // used only for tests, specifies the offset date to start the duration so the most
113 |     // recent date for events going back by from the more recent offsetDate - duration
114 |     endDate: Option[String] = None,
115 |     duration: Option[String] = None) { // duration worth of events to use in calculation of backfill
116 |   override def toString: String = {
117 |     s"""
118 |        |name: $name,
119 |        |type: ${`type`},
120 |        |eventNames: $eventNames,
121 |        |offsetDate: $offsetDate,
122 |        |endDate: $endDate,
123 |        |duration: $duration
124 |        |""".stripMargin
125 |   }
126 | }
127 | 
128 | case class IndicatorParams(
129 |   name: String, // must match one in eventNames
130 |   maxItemsPerUser: Option[Int], // defaults to maxEventsPerEventType
131 |   maxCorrelatorsPerItem: Option[Int], // defaults to maxCorrelatorsPerEventType
132 |   minLLR: Option[Double]) // defaults to none, takes precendence over maxCorrelatorsPerItem
133 | 
134 | case class URAlgorithmParams(
135 |   appName: String, // filled in from engine.json
136 |   indexName: String, // can optionally be used to specify the elasticsearch index name
137 |   typeName: String, // can optionally be used to specify the elasticsearch type name
138 |   recsModel: Option[String] = None, // "all", "collabFiltering", "backfill"
139 |   eventNames: Option[Seq[String]], // names used to ID all user actions
140 |   blacklistEvents: Option[Seq[String]] = None, // None means use the primary event, empty array means no filter
141 |   // number of events in user-based recs query
142 |   maxQueryEvents: Option[Int] = None,
143 |   maxEventsPerEventType: Option[Int] = None,
144 |   maxCorrelatorsPerEventType: Option[Int] = None,
145 |   num: Option[Int] = None, // default max # of recs requested
146 |   userBias: Option[Float] = None, // will cause the default search engine boost of 1.0
147 |   itemBias: Option[Float] = None, // will cause the default search engine boost of 1.0
148 |   returnSelf: Option[Boolean] = None, // query building logic defaults this to false
149 |   fields: Option[Seq[Field]] = None, //defaults to no fields
150 |   // leave out for default or popular
151 |   rankings: Option[Seq[RankingParams]] = None,
152 |   // name of date property field for when the item is available
153 |   availableDateName: Option[String] = None,
154 |   // name of date property field for when an item is no longer available
155 |   expireDateName: Option[String] = None,
156 |   // used as the subject of a dateRange in queries, specifies the name of the item property
157 |   dateName: Option[String] = None,
158 |   indicators: Option[List[IndicatorParams]] = None, // control params per matrix pair
159 |   seed: Option[Long] = None) // seed is not used presently
160 |     extends Params //fixed default make it reproducible unless supplied
161 | 
162 | /** Creates cooccurrence, cross-cooccurrence and eventually content correlators with
163 |  *  [[org.apache.mahout.math.cf.SimilarityAnalysis]] The analysis part of the recommender is
164 |  *  done here but the algorithm can predict only when the coocurrence data is indexed in a
165 |  *  search engine like Elasticsearch. This is done in URModel.save.
166 |  *
167 |  *  @param ap taken from engine.json to describe limits and event types
168 |  */
169 | class URAlgorithm(val ap: URAlgorithmParams)
170 |     extends P2LAlgorithm[PreparedData, NullModel, Query, PredictedResult] {
171 | 
172 |   @transient lazy implicit val logger: Logger = Logger[this.type]
173 | 
174 |   case class BoostableCorrelators(actionName: String, itemIDs: Seq[ItemID], boost: Option[Float]) {
175 |     def toFilterCorrelators: FilterCorrelators = {
176 |       FilterCorrelators(actionName, itemIDs)
177 |     }
178 |   }
179 |   case class FilterCorrelators(actionName: String, itemIDs: Seq[ItemID])
180 | 
181 |   val appName: String = ap.appName
182 |   val recsModel: String = ap.recsModel.getOrElse(defaultURAlgorithmParams.DefaultRecsModel)
183 |   //val eventNames: Seq[String] = ap.eventNames
184 | 
185 |   val userBias: Float = ap.userBias.getOrElse(1f)
186 |   val itemBias: Float = ap.itemBias.getOrElse(1f)
187 |   val maxQueryEvents: Int = ap.maxQueryEvents.getOrElse(defaultURAlgorithmParams.DefaultMaxQueryEvents)
188 |   val limit: Int = ap.num.getOrElse(defaultURAlgorithmParams.DefaultNum)
189 | 
190 |   val blacklistEvents: Seq[String] = ap.blacklistEvents.getOrEmpty
191 |   val returnSelf: Boolean = ap.returnSelf.getOrElse(defaultURAlgorithmParams.DefaultReturnSelf)
192 |   val fields: Seq[Field] = ap.fields.getOrEmpty
193 | 
194 |   val randomSeed: Int = ap.seed.getOrElse(System.currentTimeMillis()).toInt
195 |   val maxCorrelatorsPerEventType: Int = ap.maxCorrelatorsPerEventType
196 |     .getOrElse(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType)
197 |   val maxEventsPerEventType: Int = ap.maxEventsPerEventType
198 |     .getOrElse(defaultURAlgorithmParams.DefaultMaxEventsPerEventType)
199 | 
200 |   lazy val modelEventNames = if (ap.indicators.isEmpty) {
201 |     if (ap.eventNames.isEmpty) {
202 |       throw new IllegalArgumentException("No eventNames or indicators in engine.json and one of these is required")
203 |     } else ap.eventNames.get
204 |   } else {
205 |     var eventNames = Seq.empty[String]
206 |     ap.indicators.get.foreach { indicator =>
207 |       eventNames = eventNames :+ indicator.name
208 |     }
209 |     eventNames
210 |   }
211 | 
212 |   // Unique by 'type' ranking params, if collision get first.
213 |   lazy val rankingsParams: Seq[RankingParams] = ap.rankings.getOrElse(Seq(RankingParams(
214 |     name = Some(defaultURAlgorithmParams.DefaultBackfillFieldName),
215 |     `type` = Some(defaultURAlgorithmParams.DefaultBackfillType),
216 |     eventNames = Some(modelEventNames.take(1)),
217 |     offsetDate = None,
218 |     endDate = None,
219 |     duration = Some(defaultURAlgorithmParams.DefaultBackfillDuration)))).groupBy(_.`type`).map(_._2.head).toSeq
220 | 
221 |   val rankingFieldNames: Seq[String] = rankingsParams map { rankingParams =>
222 |     val rankingType = rankingParams.`type`.getOrElse(defaultURAlgorithmParams.DefaultBackfillType)
223 |     val rankingFieldName = rankingParams.name.getOrElse(PopModel.nameByType(rankingType))
224 |     rankingFieldName
225 |   }
226 | 
227 |   val dateNames: Seq[String] = Seq(
228 |     ap.dateName,
229 |     ap.availableDateName,
230 |     ap.expireDateName).collect { case Some(date) => date } distinct
231 | 
232 |   val esIndex: String = ap.indexName
233 |   val esType: String = ap.typeName
234 | 
235 |   drawInfo("Init URAlgorithm", Seq(
236 |     ("══════════════════════════════", "════════════════════════════"),
237 |     ("App name", appName),
238 |     ("ES index name", esIndex),
239 |     ("ES type name", esType),
240 |     ("RecsModel", recsModel),
241 |     ("Event names", modelEventNames),
242 |     ("══════════════════════════════", "════════════════════════════"),
243 |     ("Random seed", randomSeed),
244 |     ("MaxCorrelatorsPerEventType", maxCorrelatorsPerEventType),
245 |     ("MaxEventsPerEventType", maxEventsPerEventType),
246 |     ("══════════════════════════════", "════════════════════════════"),
247 |     ("User bias", userBias),
248 |     ("Item bias", itemBias),
249 |     ("Max query events", maxQueryEvents),
250 |     ("Limit", limit),
251 |     ("══════════════════════════════", "════════════════════════════"),
252 |     ("Rankings:", "")) ++ rankingsParams.map(x => (x.`type`.get, x.name)))
253 | 
254 |   def train(sc: SparkContext, data: PreparedData): NullModel = {
255 | 
256 |     recsModel match {
257 |       case RecsModel.All => calcAll(data)(sc)
258 |       case RecsModel.CF  => calcAll(data, calcPopular = false)(sc)
259 |       case RecsModel.BF  => calcPop(data)(sc)
260 |       // error, throw an exception
261 |       case unknownRecsModel =>
262 |         throw new IllegalArgumentException(
263 |           s"""
264 |              |Bad algorithm param recsModel=[$unknownRecsModel] in engine definition params, possibly a bad json value.
265 |              |Use one of the available parameter values ($RecsModel).""".stripMargin)
266 |     }
267 |   }
268 | 
269 |   /** Calculates recs model as well as popularity model */
270 |   def calcAll(
271 |     data: PreparedData,
272 |     calcPopular: Boolean = true)(implicit sc: SparkContext): NullModel = {
273 | 
274 |     // No one likes empty training data.
275 |     require(
276 |       data.actions.take(1).nonEmpty,
277 |       s"""
278 |          |Primary action in PreparedData cannot be empty.
279 |          |Please check if DataSource generates TrainingData
280 |          |and Preparator generates PreparedData correctly.""".stripMargin)
281 | 
282 |     //val backfillParams = ap.backfillField.getOrElse(defaultURAlgorithmParams.DefaultBackfillParams)
283 |     //val nonDefaultMappings = Map(backfillParams.name.getOrElse(defaultURAlgorithmParams.DefaultBackfillFieldName) -> "float")
284 | 
285 |     logger.info("Actions read now creating correlators")
286 |     val cooccurrenceIDSs = if (ap.indicators.isEmpty) { // using one global set of algo params
287 |       SimilarityAnalysis.cooccurrencesIDSs(
288 |         data.actions.map(_._2).toArray,
289 |         randomSeed = ap.seed.getOrElse(System.currentTimeMillis()).toInt,
290 |         maxInterestingItemsPerThing = ap.maxCorrelatorsPerEventType
291 |           .getOrElse(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType),
292 |         maxNumInteractions = ap.maxEventsPerEventType.getOrElse(defaultURAlgorithmParams.DefaultMaxEventsPerEventType))
293 |         .map(_.asInstanceOf[IndexedDatasetSpark])
294 |     } else { // using params per matrix pair, these take the place of eventNames, maxCorrelatorsPerEventType,
295 |       // and maxEventsPerEventType!
296 |       val indicators = ap.indicators.get
297 |       val iDs = data.actions.map(_._2).toSeq
298 |       val datasets = iDs.zipWithIndex.map {
299 |         case (iD, i) =>
300 |           new DownsamplableCrossOccurrenceDataset(
301 |             iD,
302 |             indicators(i).maxItemsPerUser.getOrElse(defaultURAlgorithmParams.DefaultMaxEventsPerEventType),
303 |             indicators(i).maxCorrelatorsPerItem.getOrElse(defaultURAlgorithmParams.DefaultMaxCorrelatorsPerEventType),
304 |             indicators(i).minLLR)
305 |       }.toList
306 | 
307 |       SimilarityAnalysis.crossOccurrenceDownsampled(
308 |         datasets,
309 |         ap.seed.getOrElse(System.currentTimeMillis()).toInt)
310 |         .map(_.asInstanceOf[IndexedDatasetSpark])
311 |     }
312 | 
313 |     val cooccurrenceCorrelators = cooccurrenceIDSs.zip(data.actions.map(_._1)).map(_.swap) //add back the actionNames
314 | 
315 |     val propertiesRDD: RDD[(ItemID, ItemProps)] = if (calcPopular) {
316 |       val ranksRdd = getRanksRDD(data.fieldsRDD)
317 |       data.fieldsRDD.fullOuterJoin(ranksRdd).map {
318 |         case (item, (Some(fieldsPropMap), Some(rankPropMap))) => item -> (fieldsPropMap ++ rankPropMap)
319 |         case (item, (Some(fieldsPropMap), None))              => item -> fieldsPropMap
320 |         case (item, (None, Some(rankPropMap)))                => item -> rankPropMap
321 |         case (item, _)                                        => item -> Map.empty
322 |       }
323 |     } else {
324 |       sc.emptyRDD
325 |     }
326 | 
327 |     logger.info("Correlators created now putting into URModel")
328 |     new URModel(
329 |       coocurrenceMatrices = cooccurrenceCorrelators,
330 |       propertiesRDDs = Seq(propertiesRDD),
331 |       typeMappings = getRankingMapping).save(dateNames, esIndex, esType)
332 |     new NullModel
333 |   }
334 | 
335 |   /** This function creates a URModel from an existing index in Elasticsearch + new popularity ranking
336 |    *  It is used when you want to re-calc the popularity model between training on useage data. It leaves
337 |    *  the part of the model created from usage data alone and only modifies the popularity ranking.
338 |    */
339 |   def calcPop(data: PreparedData)(implicit sc: SparkContext): NullModel = {
340 | 
341 |     // Aggregating all $set/$unsets properties, which are attached to items
342 |     val fieldsRDD: RDD[(ItemID, ItemProps)] = data.fieldsRDD
343 |     // Calc new ranking properties for all items
344 |     val ranksRdd: RDD[(ItemID, ItemProps)] = getRanksRDD(fieldsRDD)
345 |     // Current items RDD from ES
346 |     val currentMetadataRDD: RDD[(ItemID, ItemProps)] = EsClient.getRDD(esIndex, esType)
347 |     val propertiesRDD: RDD[(ItemID, ItemProps)] = currentMetadataRDD.fullOuterJoin(ranksRdd) map {
348 |       case (itemId, maps) =>
349 |         maps match {
350 |           case (Some(metaProp), Some(rankProp)) => itemId -> (metaProp ++ rankProp)
351 |           case (None, Some(rankProp))           => itemId -> rankProp
352 |           case (Some(metaProp), None)           => itemId -> metaProp
353 |           case _                                => itemId -> Map.empty
354 |         }
355 |     }
356 |     //    logger.debug(s"RanksRdd\n${ranksRdd.take(25).mkString("\n")}")
357 | 
358 |     // returns the existing model plus new popularity ranking
359 |     new URModel(
360 |       propertiesRDDs = Seq(fieldsRDD.cache(), propertiesRDD.cache()),
361 |       typeMappings = getRankingMapping).save(dateNames, esIndex, esType)
362 |     new NullModel
363 |   }
364 | 
365 |   var queryEventNames: Seq[String] = Seq.empty[String] // if passed in with the query overrides the engine.json list--used in MAP@k
366 |   //testing, this only effects which events are used in queries.
367 | 
368 |   /** Return a list of items recommended for a user identified in the query
369 |    *  The ES json query looks like this:
370 |    *  {
371 |    *    "size": 20
372 |    *    "query": {
373 |    *      "bool": {
374 |    *        "should": [
375 |    *          {
376 |    *            "terms": {
377 |    *              "rate": ["0", "67", "4"]
378 |    *            }
379 |    *          },
380 |    *          {
381 |    *            "terms": {
382 |    *              "buy": ["0", "32"],
383 |    *              "boost": 2
384 |    *            }
385 |    *          },
386 |    *          { // categorical boosts
387 |    *            "terms": {
388 |    *              "category": ["cat1"],
389 |    *              "boost": 1.05
390 |    *            }
391 |    *          }
392 |    *        ],
393 |    *        "must": [ // categorical filters
394 |    *          {
395 |    *            "terms": {
396 |    *              "category": ["cat1"],
397 |    *              "boost": 0
398 |    *            }
399 |    *          },
400 |    *         {
401 |    *        "must_not": [//blacklisted items
402 |    *          {
403 |    *            "ids": {
404 |    *              "values": ["items-id1", "item-id2", ...]
405 |    *            }
406 |    *          },
407 |    *         {
408 |    *           "constant_score": {// date in query must fall between the expire and available dates of an item
409 |    *             "filter": {
410 |    *               "range": {
411 |    *                 "availabledate": {
412 |    *                   "lte": "2015-08-30T12:24:41-07:00"
413 |    *                 }
414 |    *               }
415 |    *             },
416 |    *             "boost": 0
417 |    *           }
418 |    *         },
419 |    *         {
420 |    *           "constant_score": {// date range filter in query must be between these item property values
421 |    *             "filter": {
422 |    *               "range" : {
423 |    *                 "expiredate" : {
424 |    *                   "gte": "2015-08-15T11:28:45.114-07:00"
425 |    *                   "lt": "2015-08-20T11:28:45.114-07:00"
426 |    *                 }
427 |    *               }
428 |    *             }, "boost": 0
429 |    *           }
430 |    *         },
431 |    *         {
432 |    *           "constant_score": { // this orders popular items for backfill
433 |    *              "filter": {
434 |    *                 "match_all": {}
435 |    *              },
436 |    *              "boost": 0.000001 // must have as least a small number to be boostable
437 |    *           }
438 |    *        }
439 |    *      }
440 |    *    }
441 |    *  }
442 |    *
443 |    *  @param model <strong>Ignored!</strong> since the model is already in Elasticsearch
444 |    *  @param query contains query spec
445 |    *  @todo Need to prune that query to minimum required for data include, for instance no need for the popularity
446 |    *       ranking if no PopModel is being used, same for "must" clause and dates.
447 |    */
448 |   def predict(model: NullModel, query: Query): PredictedResult = {
449 | 
450 |     queryEventNames = query.eventNames.getOrElse(modelEventNames) // eventNames in query take precedence
451 | 
452 |     val (queryStr, blacklist) = buildQuery(ap, query, rankingFieldNames)
453 |     val searchHitsOpt = EsClient.search(queryStr, esIndex)
454 | 
455 |     val withRanks = query.withRanks.getOrElse(false)
456 |     val predictedResult = searchHitsOpt match {
457 |       case Some(searchHits) =>
458 |         val recs = searchHits.getHits.map { hit =>
459 |           if (withRanks) {
460 |             val source = hit.getSource
461 |             val ranks: Map[String, Double] = rankingsParams map { backfillParams =>
462 |               val backfillType = backfillParams.`type`.getOrElse(defaultURAlgorithmParams.DefaultBackfillType)
463 |               val backfillFieldName = backfillParams.name.getOrElse(PopModel.nameByType(backfillType))
464 |               backfillFieldName -> source.get(backfillFieldName).asInstanceOf[Double]
465 |             } toMap
466 | 
467 |             ItemScore(hit.getId, hit.getScore.toDouble,
468 |               ranks = if (ranks.nonEmpty) Some(ranks) else None)
469 |           } else {
470 |             ItemScore(hit.getId, hit.getScore.toDouble)
471 |           }
472 |         }
473 |         logger.info(s"Results: ${searchHits.getHits.length} retrieved of a possible ${searchHits.totalHits()}")
474 |         PredictedResult(recs)
475 | 
476 |       case _ =>
477 |         logger.info(s"No results for query ${parse(queryStr)}")
478 |         PredictedResult(Array.empty[ItemScore])
479 |     }
480 | 
481 |     // should have all blacklisted items excluded
482 |     // todo: need to add dithering, mean, sigma, seed required, make a seed that only changes on some fixed time
483 |     // period so the recs ordering stays fixed for that time period.
484 |     predictedResult
485 |   }
486 | 
487 |   /** Calculate all fields and items needed for ranking.
488 |    *
489 |    *  @param fieldsRDD all items with their fields
490 |    *  @param sc the current Spark context
491 |    *  @return
492 |    */
493 |   def getRanksRDD(fieldsRDD: RDD[(ItemID, ItemProps)])(implicit sc: SparkContext): RDD[(ItemID, ItemProps)] = {
494 |     val popModel = PopModel(fieldsRDD)
495 |     val rankRDDs: Seq[(String, RDD[(ItemID, Double)])] = rankingsParams map { rankingParams =>
496 |       val rankingType = rankingParams.`type`.getOrElse(defaultURAlgorithmParams.DefaultBackfillType)
497 |       val rankingFieldName = rankingParams.name.getOrElse(PopModel.nameByType(rankingType))
498 |       val durationAsString = rankingParams.duration.getOrElse(defaultURAlgorithmParams.DefaultBackfillDuration)
499 |       val duration = Duration(durationAsString).toSeconds.toInt
500 |       val backfillEvents = rankingParams.eventNames.getOrElse(modelEventNames.take(1))
501 |       val offsetDate = rankingParams.offsetDate
502 |       val rankRdd = popModel.calc(modelName = rankingType, eventNames = backfillEvents, appName, duration, offsetDate)
503 |       rankingFieldName -> rankRdd
504 |     }
505 | 
506 |     //    logger.debug(s"RankRDDs[${rankRDDs.size}]\n${rankRDDs.map(_._1).mkString(", ")}\n${rankRDDs.map(_._2.take(25).mkString("\n")).mkString("\n\n")}")
507 |     rankRDDs.foldLeft[RDD[(ItemID, ItemProps)]](sc.emptyRDD) {
508 |       case (leftRdd, (fieldName, rightRdd)) =>
509 |         leftRdd.fullOuterJoin(rightRdd).map {
510 |           case (itemId, (Some(propMap), Some(rank))) => itemId -> (propMap + (fieldName -> JDouble(rank)))
511 |           case (itemId, (Some(propMap), None))       => itemId -> propMap
512 |           case (itemId, (None, Some(rank)))          => itemId -> Map(fieldName -> JDouble(rank))
513 |           case (itemId, _)                           => itemId -> Map.empty
514 |         }
515 |     }
516 |   }
517 | 
518 |   /** Build a query from default algorithms params and the query itself taking into account defaults */
519 |   def buildQuery(
520 |     ap: URAlgorithmParams,
521 |     query: Query,
522 |     backfillFieldNames: Seq[String] = Seq.empty): (String, Seq[Event]) = {
523 | 
524 |     try {
525 |       // create a list of all query correlators that can have a bias (boost or filter) attached
526 |       val (boostable, events) = getBiasedRecentUserActions(query)
527 | 
528 |       // since users have action history and items have correlators and both correspond to the same "actions" like
529 |       // purchase or view, we'll pass both to the query if the user history or items correlators are empty
530 |       // then metadata or backfill must be relied on to return results.
531 |       val numRecs = query.num.getOrElse(limit)
532 |       val should = buildQueryShould(query, boostable)
533 |       val must = buildQueryMust(query, boostable)
534 |       val mustNot = buildQueryMustNot(query, events)
535 |       val sort = buildQuerySort()
536 | 
537 |       val json =
538 |         ("size" -> numRecs) ~
539 |           ("query" ->
540 |             ("bool" ->
541 |               ("should" -> should) ~
542 |               ("must" -> must) ~
543 |               ("must_not" -> mustNot) ~
544 |               ("minimum_should_match" -> 1))) ~
545 |               ("sort" -> sort)
546 | 
547 |       val compactJson = compact(render(json))
548 | 
549 |       logger.info(s"Query:\n$compactJson")
550 |       (compactJson, events)
551 |     } catch {
552 |       case e: IllegalArgumentException => ("", Seq.empty[Event])
553 |     }
554 |   }
555 | 
556 |   /** Build should query part */
557 |   def buildQueryShould(query: Query, boostable: Seq[BoostableCorrelators]): Seq[JValue] = {
558 | 
559 |     // create a list of all boosted query correlators
560 |     val recentUserHistory: Seq[BoostableCorrelators] = if (userBias >= 0f) {
561 |       boostable.slice(0, maxQueryEvents - 1)
562 |     } else {
563 |       Seq.empty
564 |     }
565 | 
566 |     val similarItems: Seq[BoostableCorrelators] = if (itemBias >= 0f) {
567 |       getBiasedSimilarItems(query)
568 |     } else {
569 |       Seq.empty
570 |     }
571 | 
572 |     val boostedMetadata = getBoostedMetadata(query)
573 |     val allBoostedCorrelators = recentUserHistory ++ similarItems ++ boostedMetadata
574 | 
575 |     val shouldFields: Seq[JValue] = allBoostedCorrelators.map {
576 |       case BoostableCorrelators(actionName, itemIDs, boost) =>
577 |         render("terms" -> (actionName -> itemIDs) ~ ("boost" -> boost))
578 |     }
579 | 
580 |     val shouldScore: JValue = parse(
581 |       """
582 |         |{
583 |         |  "constant_score": {
584 |         |    "filter": {
585 |         |      "match_all": {}
586 |         |    },
587 |         |    "boost": 0
588 |         |  }
589 |         |}
590 |         |""".stripMargin)
591 | 
592 |     shouldFields :+ shouldScore
593 |   }
594 | 
595 |   /** Build must query part */
596 |   def buildQueryMust(query: Query, boostable: Seq[BoostableCorrelators]): Seq[JValue] = {
597 | 
598 |     // create a lsit of all query correlators that are to be used to filter results
599 |     val recentUserHistoryFilter: Seq[FilterCorrelators] = if (userBias < 0f) {
600 |       // strip any boosts
601 |       boostable.map(_.toFilterCorrelators).slice(0, maxQueryEvents - 1)
602 |     } else {
603 |       Seq.empty
604 |     }
605 | 
606 |     val similarItemsFilter: Seq[FilterCorrelators] = if (itemBias < 0f) {
607 |       getBiasedSimilarItems(query).map(_.toFilterCorrelators)
608 |     } else {
609 |       Seq.empty
610 |     }
611 | 
612 |     val filteringMetadata = getFilteringMetadata(query)
613 |     val filteringDateRange = getFilteringDateRange(query)
614 |     val allFilteringCorrelators = recentUserHistoryFilter ++ similarItemsFilter ++ filteringMetadata
615 | 
616 |     val mustFields: Seq[JValue] = allFilteringCorrelators.map {
617 |       case FilterCorrelators(actionName, itemIDs) =>
618 |         render("terms" -> (actionName -> itemIDs) ~ ("boost" -> 0))
619 |     }
620 |     mustFields ++ filteringDateRange
621 |   }
622 | 
623 |   /** Build not must query part */
624 |   def buildQueryMustNot(query: Query, events: Seq[Event]): JValue = {
625 |     val mustNotFields: JValue = render("ids" -> ("values" -> getExcludedItems(events, query)) ~ ("boost" -> 0))
626 |     mustNotFields
627 |   }
628 | 
629 |   /** Build sort query part */
630 |   def buildQuerySort(): Seq[JValue] = if (recsModel == RecsModel.All || recsModel == RecsModel.BF) {
631 |     val sortByScore: Seq[JValue] = Seq(parse("""{"_score": {"order": "desc"}}"""))
632 |     val sortByRanks: Seq[JValue] = rankingFieldNames map { fieldName =>
633 |       parse(s"""{ "$fieldName": { "unmapped_type": "double", "order": "desc" } }""")
634 |     }
635 |     sortByScore ++ sortByRanks
636 |   } else {
637 |     Seq.empty
638 |   }
639 | 
640 |   /** Create a list of item ids that the user has interacted with or are not to be included in recommendations */
641 |   def getExcludedItems(userEvents: Seq[Event], query: Query): Seq[String] = {
642 | 
643 |     val blacklistedItems = userEvents.filter { event =>
644 |       // either a list or an empty list of filtering events so honor them
645 |       blacklistEvents match {
646 |         case Nil => modelEventNames.head equals event.event
647 |         case _   => blacklistEvents contains event.event
648 |       }
649 |     }.map(_.targetEntityId.getOrElse("")) ++ query.blacklistItems.getOrEmpty.distinct
650 | 
651 |     // Now conditionally add the query item itself
652 |     val includeSelf = query.returnSelf.getOrElse(returnSelf)
653 |     val allExcludedItems = if (!includeSelf && query.item.nonEmpty) {
654 |       blacklistedItems :+ query.item.get
655 |     } // add the query item to be excuded
656 |     else {
657 |       blacklistedItems
658 |     }
659 |     allExcludedItems.distinct
660 |   }
661 | 
662 |   /** Get similar items for an item, these are already in the action correlators in ES */
663 |   def getBiasedSimilarItems(query: Query): Seq[BoostableCorrelators] = {
664 |     if (query.item.nonEmpty) {
665 |       val m = EsClient.getSource(esIndex, esType, query.item.get)
666 | 
667 |       if (m != null) {
668 |         val itemEventBias = query.itemBias.getOrElse(itemBias)
669 |         val itemEventsBoost = if (itemEventBias > 0 && itemEventBias != 1) Some(itemEventBias) else None
670 |         modelEventNames.map { action =>
671 |           val items: Seq[String] = try {
672 |             if (m.containsKey(action) && m.get(action) != null) {
673 |               m.get(action).asInstanceOf[util.ArrayList[String]].asScala
674 |             } else {
675 |               Seq.empty[String]
676 |             }
677 |           } catch {
678 |             case cce: ClassCastException =>
679 |               logger.warn(s"Bad value in item [${query.item}] corresponding to key: [$action] that was not a Seq[String] ignored.")
680 |               Seq.empty[String]
681 |           }
682 |           val rItems = if (items.size <= maxQueryEvents) items else items.slice(0, maxQueryEvents - 1)
683 |           BoostableCorrelators(action, rItems, itemEventsBoost)
684 |         }
685 |       } else {
686 |         Seq.empty
687 |       } // no similar items
688 |     } else {
689 |       Seq.empty[BoostableCorrelators]
690 |     } // no item specified
691 |   }
692 | 
693 |   /** Get recent events of the user on items to create the recommendations query from */
694 |   def getBiasedRecentUserActions(query: Query): (Seq[BoostableCorrelators], Seq[Event]) = {
695 | 
696 |     val recentEvents = try {
697 |       LEventStore.findByEntity(
698 |         appName = appName,
699 |         // entityType and entityId is specified for fast lookup
700 |         entityType = "user",
701 |         entityId = query.user.get,
702 |         // one query per eventName is not ideal, maybe one query for lots of events then split by eventName
703 |         //eventNames = Some(Seq(action)),// get all and separate later
704 |         eventNames = Some(queryEventNames), // get all and separate later
705 |         targetEntityType = None,
706 |         // limit = Some(maxQueryEvents), // this will get all history then each action can be limited before using in
707 |         // the query
708 |         latest = true,
709 |         // set time limit to avoid super long DB access
710 |         timeout = Duration(200, "millis")).toList
711 |     } catch {
712 |       case e: scala.concurrent.TimeoutException =>
713 |         logger.error(s"Timeout when read recent events. Empty list is used. $e")
714 |         Seq.empty[Event]
715 |       case e: NoSuchElementException => // todo: bad form to use an exception to check if there is a user id
716 |         logger.info("No user id for recs, returning similar items for the item specified")
717 |         Seq.empty[Event]
718 |       case e: Exception => // fatal because of error, an empty query
719 |         logger.error(s"Error when read recent events: $e")
720 |         throw e
721 |     }
722 | 
723 |     val userEventBias = query.userBias.getOrElse(userBias)
724 |     val userEventsBoost = if (userEventBias > 0 && userEventBias != 1) Some(userEventBias) else None
725 |     val rActions = queryEventNames.map { action =>
726 |       var items = Seq.empty[String]
727 | 
728 |       for (event <- recentEvents)
729 |         if (event.event == action && items.size < maxQueryEvents) {
730 |           items = event.targetEntityId.get +: items
731 |           // todo: may throw exception and we should ignore the event instead of crashing
732 |         }
733 |       // userBias may be None, which will cause no JSON output for this
734 |       BoostableCorrelators(action, items.distinct, userEventsBoost)
735 |     }
736 |     (rActions, recentEvents)
737 |   }
738 | 
739 |   /** get all metadata fields that potentially have boosts (not filters) */
740 |   def getBoostedMetadata(query: Query): Seq[BoostableCorrelators] = {
741 |     val paramsBoostedFields = fields.filter(_.bias < 0f)
742 |     val queryBoostedFields = query.fields.getOrEmpty.filter(_.bias >= 0f)
743 | 
744 |     (queryBoostedFields ++ paramsBoostedFields)
745 |       .map(field => BoostableCorrelators(field.name, field.values, Some(field.bias)))
746 |       .distinct // de-dup and favor query fields
747 |   }
748 | 
749 |   /** get all metadata fields that are filters (not boosts) */
750 |   def getFilteringMetadata(query: Query): Seq[FilterCorrelators] = {
751 |     val paramsFilterFields = fields.filter(_.bias >= 0f)
752 |     val queryFilterFields = query.fields.getOrEmpty.filter(_.bias < 0f)
753 | 
754 |     (queryFilterFields ++ paramsFilterFields)
755 |       .map(field => FilterCorrelators(field.name, field.values))
756 |       .distinct // de-dup and favor query fields
757 |   }
758 | 
759 |   /** get part of query for dates and date ranges */
760 |   def getFilteringDateRange(query: Query): Seq[JValue] = {
761 | 
762 |     // currentDate in the query overrides the dateRange in the same query so ignore daterange if both
763 |     val currentDate = query.currentDate.getOrElse(DateTime.now().toDateTimeISO.toString)
764 | 
765 |     val json: Seq[JValue] = if (query.dateRange.nonEmpty &&
766 |       (query.dateRange.get.after.nonEmpty || query.dateRange.get.before.nonEmpty)) {
767 |       val name = query.dateRange.get.name
768 |       val before = query.dateRange.get.before.getOrElse("")
769 |       val after = query.dateRange.get.after.getOrElse("")
770 |       val rangeStart = s"""
771 |                           |{
772 |                           |  "constant_score": {
773 |                           |    "filter": {
774 |                           |      "range": {
775 |                           |        "$name": {
776 |         """.stripMargin
777 | 
778 |       val rangeAfter = s"""
779 |                           |          "gt": "$after"
780 |         """.stripMargin
781 | 
782 |       val rangeBefore = s"""
783 |                            |          "lt": "$before"
784 |         """.stripMargin
785 | 
786 |       val rangeEnd = s"""
787 |                         |        }
788 |                         |      }
789 |                         |    },
790 |                         |    "boost": 0
791 |                         |  }
792 |                         |}
793 |         """.stripMargin
794 | 
795 |       var range = rangeStart
796 |       if (!after.isEmpty) {
797 |         range += rangeAfter
798 |         if (!before.isEmpty) range += ","
799 |       }
800 |       if (!before.isEmpty) range += rangeBefore
801 |       range += rangeEnd
802 | 
803 |       Seq(parse(range))
804 |     } else if (ap.availableDateName.nonEmpty && ap.expireDateName.nonEmpty) { // use the query date or system date
805 |       val availableDate = ap.availableDateName.get // never None
806 |       val expireDate = ap.expireDateName.get
807 |       val available = s"""
808 |                          |{
809 |                          |  "constant_score": {
810 |                          |    "filter": {
811 |                          |      "range": {
812 |                          |        "$availableDate": {
813 |                          |          "lte": "$currentDate"
814 |                          |        }
815 |                          |      }
816 |                          |    },
817 |                          |    "boost": 0
818 |                          |  }
819 |                          |}
820 |         """.stripMargin
821 |       val expire = s"""
822 |                       |{
823 |                       |  "constant_score": {
824 |                       |    "filter": {
825 |                       |      "range": {
826 |                       |        "$expireDate": {
827 |                       |          "gt": "$currentDate"
828 |                       |        }
829 |                       |      }
830 |                       |    },
831 |                       |    "boost": 0
832 |                       |  }
833 |                       |}
834 |         """.stripMargin
835 | 
836 |       Seq(parse(available), parse(expire))
837 |     } else {
838 |       logger.info(
839 |         """
840 |           |Misconfigured date information, either your engine.json date settings or your query's dateRange is incorrect.
841 |           |Ingoring date information for this query.""".stripMargin)
842 |       Seq.empty
843 |     }
844 |     json
845 |   }
846 | 
847 |   def getRankingMapping: Map[String, String] = rankingFieldNames map { fieldName =>
848 |     fieldName -> "float"
849 |   } toMap
850 | 
851 | }
852 | 


--------------------------------------------------------------------------------
/src/main/scala/URModel.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright ActionML, LLC under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * ActionML licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.template
 19 | 
 20 | import grizzled.slf4j.Logger
 21 | import io.prediction.data.storage.DataMap
 22 | import org.apache.mahout.math.indexeddataset.IndexedDataset
 23 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
 24 | import org.apache.spark.SparkContext
 25 | import org.apache.spark.rdd.RDD
 26 | import org.joda.time.DateTime
 27 | import org.json4s.JsonAST.JArray
 28 | import org.json4s._
 29 | import org.template.conversions.{ IndexedDatasetConversions, ItemID, ItemProps }
 30 | 
 31 | /** Universal Recommender models to save in ES */
 32 | class URModel(
 33 |     coocurrenceMatrices: Seq[(ItemID, IndexedDataset)] = Seq.empty,
 34 |     propertiesRDDs: Seq[RDD[(ItemID, ItemProps)]] = Seq.empty,
 35 |     typeMappings: Map[String, String] = Map.empty, // maps fieldname that need type mapping in Elasticsearch
 36 |     nullModel: Boolean = false)(implicit sc: SparkContext) {
 37 | 
 38 |   @transient lazy val logger: Logger = Logger[this.type]
 39 | 
 40 |   /** Save all fields to be indexed by Elasticsearch and queried for recs
 41 |    *  This will is something like a table with row IDs = item IDs and separate fields for all
 42 |    *  cooccurrence and cross-cooccurrence correlators and metadata for each item. Metadata fields are
 43 |    *  limited to text term collections so vector types. Scalar values can be used but depend on
 44 |    *  Elasticsearch's support. One exception is the Data scalar, which is also supported
 45 |    *  @return always returns true since most other reasons to not save cause exceptions
 46 |    */
 47 |   def save(dateNames: Seq[String], esIndex: String, esType: String): Boolean = {
 48 | 
 49 |     logger.debug(s"Start save model")
 50 | 
 51 |     if (nullModel) throw new IllegalStateException("Saving a null model created from loading an old one.")
 52 | 
 53 |     // for ES we need to create the entire index in an rdd of maps, one per item so we'll use
 54 |     // convert cooccurrence matrices into correlators as RDD[(itemID, (actionName, Seq[itemID])]
 55 |     // do they need to be in Elasticsearch format
 56 |     logger.info("Converting cooccurrence matrices into correlators")
 57 |     val correlatorRDDs: Seq[RDD[(ItemID, ItemProps)]] = coocurrenceMatrices.map {
 58 |       case (actionName, dataset) =>
 59 |         dataset.asInstanceOf[IndexedDatasetSpark].toStringMapRDD(actionName)
 60 |     }
 61 | 
 62 |     logger.info("Group all properties RDD")
 63 |     val groupedRDD: RDD[(ItemID, ItemProps)] = groupAll(correlatorRDDs ++ propertiesRDDs)
 64 |     //    logger.debug(s"Grouped RDD\n${groupedRDD.take(25).mkString("\n")}")
 65 | 
 66 |     val esRDD: RDD[Map[String, Any]] = groupedRDD.mapPartitions { iter =>
 67 |       iter map {
 68 |         case (itemId, itemProps) =>
 69 |           val propsMap = itemProps.map {
 70 |             case (propName, propValue) =>
 71 |               propName -> URModel.extractJvalue(dateNames, propName, propValue)
 72 |           }
 73 |           propsMap + ("id" -> itemId)
 74 |       }
 75 |     }
 76 |     //    logger.debug(s"ES RDD\n${esRDD.take(25).mkString("\n")}")
 77 | 
 78 |     val esFields: List[String] = esRDD.flatMap(_.keySet).distinct().collect.toList
 79 |     logger.info(s"ES fields[${esFields.size}]: $esFields")
 80 | 
 81 |     EsClient.hotSwap(esIndex, esType, esRDD, esFields, typeMappings)
 82 |     true
 83 |   }
 84 | 
 85 |   def groupAll(fields: Seq[RDD[(ItemID, ItemProps)]]): RDD[(ItemID, ItemProps)] = {
 86 |     fields.fold(sc.emptyRDD[(ItemID, ItemProps)])(_ ++ _).reduceByKey(_ ++ _)
 87 |   }
 88 | }
 89 | 
 90 | object URModel {
 91 |   @transient lazy val logger: Logger = Logger[this.type]
 92 | 
 93 |   /** This is actually only used to read saved values and since they are in Elasticsearch we don't need to read
 94 |    *  this means we create a null model since it will not be used.
 95 |    *  todo: we should rejigger the template framework so this is not required.
 96 |    *  @param id ignored
 97 |    *  @param params ignored
 98 |    *  @param sc ignored
 99 |    *  @return dummy null model
100 |    */
101 |   def apply(id: String, params: URAlgorithmParams, sc: Option[SparkContext]): URModel = {
102 |     // todo: need changes in PIO to remove the need for this
103 |     new URModel(null, null, null, nullModel = true)(sc.get)
104 |   }
105 | 
106 |   def extractJvalue(dateNames: Seq[String], key: String, value: Any): Any = value match {
107 |     case JArray(list) => list.map(extractJvalue(dateNames, key, _))
108 |     case JString(s) =>
109 |       if (dateNames.contains(key)) {
110 |         new DateTime(s).toDate
111 |       } else if (RankingFieldName.toSeq.contains(key)) {
112 |         s.toDouble
113 |       } else {
114 |         s
115 |       }
116 |     case JDouble(double) => double
117 |     case JInt(int)       => int
118 |     case JBool(bool)     => bool
119 |     case _               => value
120 |   }
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/src/main/scala/package.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright ActionML, LLC under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.template
 19 | 
 20 | import grizzled.slf4j.Logger
 21 | 
 22 | import scala.collection.JavaConversions._
 23 | import org.apache.mahout.sparkbindings.SparkDistributedContext
 24 | import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
 25 | import org.apache.mahout.sparkbindings._
 26 | import org.apache.spark.rdd.RDD
 27 | import org.json4s._
 28 | 
 29 | /** Utility conversions for IndexedDatasetSpark */
 30 | package object conversions {
 31 | 
 32 |   type UserID = String
 33 |   type ActionID = String
 34 |   type ItemID = String
 35 |   // Item properties (fieldName, fieldValue)
 36 |   type ItemProps = Map[String, JValue]
 37 | 
 38 |   def drawActionML(implicit logger: Logger): Unit = {
 39 |     val actionML =
 40 |       """
 41 |         |
 42 |         |               _   _             __  __ _
 43 |         |     /\       | | (_)           |  \/  | |
 44 |         |    /  \   ___| |_ _  ___  _ __ | \  / | |
 45 |         |   / /\ \ / __| __| |/ _ \| '_ \| |\/| | |
 46 |         |  / ____ \ (__| |_| | (_) | | | | |  | | |____
 47 |         | /_/    \_\___|\__|_|\___/|_| |_|_|  |_|______|
 48 |         |
 49 |         |
 50 |       """.stripMargin
 51 | 
 52 |     logger.info(actionML)
 53 |   }
 54 | 
 55 |   def drawInfo(title: String, dataMap: Seq[(String, Any)])(implicit logger: Logger): Unit = {
 56 |     val leftAlignFormat = "║ %-30s%-28s ║"
 57 | 
 58 |     val line = "═" * 60
 59 | 
 60 |     val preparedTitle = "║ %-58s ║".format(title)
 61 |     val data = dataMap.map {
 62 |       case (key, value) =>
 63 |         leftAlignFormat.format(key, value)
 64 |     } mkString "\n"
 65 | 
 66 |     logger.info(
 67 |       s"""
 68 |          |╔$line╗
 69 |          |$preparedTitle
 70 |          |$data
 71 |          |╚$line╝
 72 |          |""".stripMargin)
 73 | 
 74 |   }
 75 | 
 76 |   implicit class OptionCollection[T](collectionOpt: Option[Seq[T]]) {
 77 |     def getOrEmpty: Seq[T] = {
 78 |       collectionOpt.getOrElse(Seq.empty[T])
 79 |     }
 80 |   }
 81 | 
 82 |   implicit class IndexedDatasetConversions(val indexedDataset: IndexedDatasetSpark) {
 83 |     def toStringMapRDD(actionName: ActionID): RDD[(ItemID, ItemProps)] = {
 84 |       @transient lazy val logger = Logger[this.type]
 85 | 
 86 |       //val matrix = indexedDataset.matrix.checkpoint()
 87 |       val rowIDDictionary = indexedDataset.rowIDs
 88 |       implicit val sc = indexedDataset.matrix.context.asInstanceOf[SparkDistributedContext].sc
 89 |       val rowIDDictionary_bcast = sc.broadcast(rowIDDictionary)
 90 | 
 91 |       val columnIDDictionary = indexedDataset.columnIDs
 92 |       val columnIDDictionary_bcast = sc.broadcast(columnIDDictionary)
 93 | 
 94 |       // may want to mapPartition and create bulk updates as a slight optimization
 95 |       // creates an RDD of (itemID, Map[correlatorName, list-of-correlator-values])
 96 |       indexedDataset.matrix.rdd.map[(ItemID, ItemProps)] {
 97 |         case (rowNum, itemVector) =>
 98 | 
 99 |           // turn non-zeros into list for sorting
100 |           var itemList = List[(Int, Double)]()
101 |           for (ve <- itemVector.nonZeroes) {
102 |             itemList = itemList :+ (ve.index, ve.get)
103 |           }
104 |           //sort by highest strength value descending(-)
105 |           val vector = itemList.sortBy { elem => -elem._2 }
106 | 
107 |           val itemID = rowIDDictionary_bcast.value.inverse.getOrElse(rowNum, "INVALID_ITEM_ID")
108 |           try {
109 | 
110 |             require(itemID != "INVALID_ITEM_ID", s"Bad row number in  matrix, skipping item $rowNum")
111 |             require(vector.nonEmpty, s"No values so skipping item $rowNum")
112 | 
113 |             // create a list of element ids
114 |             val values = JArray(vector.map { item =>
115 |               JString(columnIDDictionary_bcast.value.inverse.getOrElse(item._1, "")) // should always be in the dictionary
116 |             })
117 | 
118 |             (itemID, Map(actionName -> values))
119 | 
120 |           } catch {
121 |             case cce: IllegalArgumentException => //non-fatal, ignore line
122 |               null.asInstanceOf[(ItemID, ItemProps)]
123 |           }
124 | 
125 |       }.filter(_ != null)
126 |     }
127 |   }
128 | 
129 | }
130 | 


--------------------------------------------------------------------------------
/template.json:
--------------------------------------------------------------------------------
1 | {"pio": {"version": { "min": "0.9.7-aml" }}}
2 | 


--------------------------------------------------------------------------------