forked from apache/predictionio
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First version of using Events in ItemRank engine.
- Loading branch information
Showing
8 changed files
with
182 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
package io.prediction.engines.itemrank | ||
|
||
import io.prediction.controller.LDataSource | ||
import io.prediction.controller.Params | ||
import io.prediction.data.view.LBatchView | ||
|
||
import org.joda.time.DateTime | ||
|
||
case class EventsDataSoureParams( | ||
val appId: Int, | ||
// default None to include all itypes | ||
val itypes: Option[Set[String]] = None, // train items with these itypes | ||
// actions for training | ||
val actions: Set[String], | ||
val startTime: Option[DateTime], // event starttime | ||
val untilTime: Option[DateTime], // event untiltime | ||
val attributeNames: AttributeNames | ||
) extends Params | ||
|
||
|
||
class EventsDataSource(dsp: EventsDataSoureParams) | ||
extends LDataSource[DataSourceParams, | ||
DataParams, TrainingData, Query, Actual] { | ||
|
||
@transient lazy val batchView = new LBatchView(dsp.appId, | ||
dsp.startTime, dsp.untilTime) | ||
|
||
override | ||
def readTraining(): TrainingData = { | ||
|
||
val attributeNames = dsp.attributeNames | ||
// uid => (UserTD, uindex) | ||
val usersMap: Map[String, (UserTD, Int)] = batchView | ||
.aggregateProperties(attributeNames.user) | ||
.zipWithIndex | ||
.map { case ((entityId, dataMap), index) => | ||
val userTD = new UserTD(uid = entityId) | ||
(entityId -> (userTD, index + 1)) // make index starting from 1 | ||
} | ||
|
||
val itemsMap = batchView | ||
.aggregateProperties(attributeNames.item) | ||
.map { case (entityId, dataMap) => | ||
val itemTD = new ItemTD( | ||
iid = entityId, | ||
itypes = dataMap.get[List[String]](attributeNames.itypes), | ||
starttime = dataMap.getOpt[DateTime](attributeNames.starttime) | ||
.map(_.getMillis), | ||
endtime = dataMap.getOpt[DateTime](attributeNames.endtime) | ||
.map(_.getMillis), | ||
inactive = dataMap.getOpt[Boolean](attributeNames.inactive) | ||
.getOrElse(false) | ||
) | ||
(entityId -> itemTD) | ||
}.filter { case (id, (itemTD)) => | ||
dsp.itypes.map{ t => | ||
!(itemTD.itypes.toSet.intersect(t).isEmpty) | ||
}.getOrElse(true) | ||
}.zipWithIndex.map { case ((id, itemTD), index) => | ||
(id -> (itemTD, index + 1)) | ||
} | ||
|
||
val u2iActions = batchView.events | ||
.filter{ e => | ||
attributeNames.u2iActions.contains(e.event) && | ||
dsp.actions.contains(e.event) && | ||
usersMap.contains(e.entityId) && | ||
// if the event doesn't have targetEntityId, also include it | ||
// although it's error case. | ||
// check and flag error in next step | ||
e.targetEntityId.map(itemsMap.contains(_)).getOrElse(true) | ||
}.map { e => | ||
// make sure targetEntityId exist in this event | ||
require((e.targetEntityId != None), | ||
s"u2i Event: ${e} cannot have targetEntityId empty.") | ||
new U2IActionTD( | ||
uindex = usersMap(e.entityId)._2, | ||
iindex = itemsMap(e.targetEntityId.get)._2, | ||
action = e.event, | ||
v = e.properties.getOpt[Int](attributeNames.rating), | ||
t = e.eventTime.getMillis | ||
) | ||
} | ||
|
||
new TrainingData( | ||
users = usersMap.map { case (k, (v1, v2)) => (v2, v1) }, | ||
items = itemsMap.map { case (k, (v1, v2)) => (v2, v1) }, | ||
u2iActions = u2iActions | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
engines/src/main/scala/itemrank/examples/params/OLDdatasource.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"appid": 1, | ||
"actions": [ "view", "like", "conversion", "rate" ], | ||
"hours": 24, | ||
"trainStart" : "2014-04-01T00:00:00.000Z", | ||
"testStart" : "2014-04-20T00:00:00.000Z", | ||
"testUntil" : "2014-04-21T00:00:00.000Z", | ||
"goal": ["conversion", "view"], | ||
"verbose" : true | ||
} |
18 changes: 11 additions & 7 deletions
18
engines/src/main/scala/itemrank/examples/params/datasource.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,14 @@ | ||
{ | ||
"appid": 1, | ||
"appId": 3, | ||
"actions": [ "view", "like", "conversion", "rate" ], | ||
"hours": 24, | ||
"trainStart" : "2014-04-01T00:00:00.000Z", | ||
"testStart" : "2014-04-20T00:00:00.000Z", | ||
"testUntil" : "2014-04-21T00:00:00.000Z", | ||
"goal": ["conversion", "view"], | ||
"verbose" : true | ||
"attributeNames" : { | ||
"user" : "user", | ||
"item" : "item", | ||
"u2iActions" : [ "view", "like", "conversion", "rate" ], | ||
"itypes" : "pio_itypes", | ||
"starttime" : "starttime", | ||
"endtime" : "endtime", | ||
"inactive" : "inactive", | ||
"rating" : "pio_rate" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package org.sample.test | ||
|
||
import io.prediction.controller._ | ||
|
||
object Runner { | ||
|
||
def main(args: Array[String]) { | ||
|
||
Workflow.run( | ||
dataSourceClassOpt = Some(classOf[MyDataSource]), | ||
params = WorkflowParams( | ||
verbose = 3, | ||
batch = "MyDataSource") | ||
) | ||
|
||
} | ||
|
||
} |