added support for sample weights

etlundquist · May 29, 2020 · 0648a5e · 0648a5e
1 parent 5144a73
commit 0648a5e
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 37 deletions.
diff --git a/rankfm/numba_methods.py b/rankfm/numba_methods.py
@@ -46,10 +46,37 @@ def isin_2(item, items):
 
 
 @nb.njit
-def _fit(interactions, user_items, item_idx, regularization, learning_rate, learning_schedule, learning_exponent, epochs, verbose, x_uf, x_if, w_i, w_if, v_u, v_i, v_uf, v_if):
+def assert_finite(w_i, w_if, v_u, v_i, v_uf, v_if):
+ """assert all model weights are finite"""
+
+ assert np.isfinite(np.sum(w_i)), "item weights [w_i] are not finite - try decreasing feature/sample_weight magnitudes"
+ assert np.isfinite(np.sum(w_if)), "item feature weights [w_if] are not finite - try decreasing feature/sample_weight magnitudes"
+ assert np.isfinite(np.sum(v_u)), "user factors [v_u] are not finite - try decreasing feature/sample_weight magnitudes"
+ assert np.isfinite(np.sum(v_i)), "item factors [v_i] are not finite - try decreasing feature/sample_weight magnitudes"
+ assert np.isfinite(np.sum(v_uf)), "user-feature factors [v_uf] are not finite - try decreasing feature/sample_weight magnitudes"
+ assert np.isfinite(np.sum(v_if)), "item-feature factors [v_if] are not finite - try decreasing feature/sample_weight magnitudes"
+
+
+@nb.njit
+def reg_penalty(regularization, w_i, w_if, v_u, v_i, v_uf, v_if):
+ """calculate the total regularization penalty for all model weights"""
+
+ penalty = 0.0
+ penalty += np.sum(regularization * np.square(w_i))
+ penalty += np.sum(regularization * np.square(w_if))
+ penalty += np.sum(regularization * np.square(v_u))
+ penalty += np.sum(regularization * np.square(v_i))
+ penalty += np.sum(regularization * np.square(v_uf))
+ penalty += np.sum(regularization * np.square(v_if))
+ return penalty
+
+
+@nb.njit
+def _fit(interactions, sample_weight, user_items, item_idx, regularization, learning_rate, learning_schedule, learning_exponent, epochs, verbose, x_uf, x_if, w_i, w_if, v_u, v_i, v_uf, v_if):
  """private JIT model-fitting function
 
  :param interactions: np.array[int32] of observed [user_idx, item_idx] iteractions
+ :param sample_weight: vector of importance weights for each observed interaction
  :param user_items: typed dict [int32 -> int32[:]] mapping user_idx to set of observed item_idx
  :param item_idx: np.array[int32] of unique item_idx values found in interactions data
  :param regularization: L2 regularization penalty
@@ -61,13 +88,11 @@ def _fit(interactions, user_items, item_idx, regularization, learning_rate, lear
  :return: updated model weights (w_i, w_if, v_u, v_i, v_uf, v_if)
  """
 
- # define matrix dimension shapes
+ # define matrix dimension shapes and shuffle index
  P = x_uf.shape[1]
  Q = x_if.shape[1]
  F = v_i.shape[1]
  I = len(item_idx)
-
- # define shuffle index to randomly permute each epoch
  shuffle_index = np.arange(len(interactions))
 
  for epoch in range(epochs):
@@ -85,9 +110,10 @@ def _fit(interactions, user_items, item_idx, regularization, learning_rate, lear
 
  for row in shuffle_index:
 
- # locate the user (u) and observed item (i)
+ # locate the user (u), observed item (i), and sample weight (sw)
  u = interactions[row, 0]
  i = interactions[row, 1]
+ sw = sample_weight[row]
 
  # randomly sample an unobserved item (j) for the user
  while True:
@@ -108,6 +134,8 @@ def _fit(interactions, user_items, item_idx, regularization, learning_rate, lear
  log_likelihood += np.log(1 / (1 + np.exp(-pairwise_utility)))
 
  # calculate derivatives of the model penalized log-likelihood function
+ # NOTE: apply the sample weights to d_LL/d_g(pu) to scale the magnitude of the gradient step updates
+ # NOTE: sample weights are applied like frequency weights: gradient updates are scaled as if there were W (u, i, j) pairs
 
  d_con = 1.0 / (np.exp(pairwise_utility) + 1.0)
  d_reg = 2.0 * regularization
@@ -136,25 +164,20 @@ def _fit(interactions, user_items, item_idx, regularization, learning_rate, lear
  d_v_if[q, f] = (x_if[i][q] - x_if[j][q]) * (v_u[u][f] + np.dot(v_uf.T[f], x_uf[u]))
 
  # update model weights for this (u, i, j) triplet with a gradient step
- w_i[i] += eta * ((d_con * d_w_i) - (d_reg * w_i[i]))
- w_i[j] += eta * ((d_con * d_w_j) - (d_reg * w_i[j]))
- w_if += eta * ((d_con * d_w_if) - (d_reg * w_if))
- v_u[u] += eta * ((d_con * d_v_u) - (d_reg * v_u[u]))
- v_i[i] += eta * ((d_con * d_v_i) - (d_reg * v_i[i]))
- v_i[j] += eta * ((d_con * d_v_j) - (d_reg * v_i[j]))
- v_uf += eta * ((d_con * d_v_uf) - (d_reg * v_uf))
- v_if += eta * ((d_con * d_v_if) - (d_reg * v_if))
-
- # calculate the cumulative penalized log-likelihood for this training epoch
- penalty = 0.0
- penalty += np.sum(regularization * np.square(w_i))
- penalty += np.sum(regularization * np.square(w_if))
- penalty += np.sum(regularization * np.square(v_u))
- penalty += np.sum(regularization * np.square(v_i))
- penalty += np.sum(regularization * np.square(v_uf))
- penalty += np.sum(regularization * np.square(v_if))
+ w_i[i] += eta * (sw * (d_con * d_w_i) - (d_reg * w_i[i]))
+ w_i[j] += eta * (sw * (d_con * d_w_j) - (d_reg * w_i[j]))
+ w_if += eta * (sw * (d_con * d_w_if) - (d_reg * w_if))
+ v_u[u] += eta * (sw * (d_con * d_v_u) - (d_reg * v_u[u]))
+ v_i[i] += eta * (sw * (d_con * d_v_i) - (d_reg * v_i[i]))
+ v_i[j] += eta * (sw * (d_con * d_v_j) - (d_reg * v_i[j]))
+ v_uf += eta * (sw * (d_con * d_v_uf) - (d_reg * v_uf))
+ v_if += eta * (sw * (d_con * d_v_if) - (d_reg * v_if))
+
+ # assert all model weights are finite as of the end of this epoch
+ assert_finite(w_i, w_if, v_u, v_i, v_uf, v_if)
 
  if verbose:
+ penalty = reg_penalty(regularization, w_i, w_if, v_u, v_i, v_uf, v_if)
  log_likelihood = round(log_likelihood - penalty, 2)
  print("\ntraining epoch:", epoch)
  print("log likelihood:", log_likelihood)

diff --git a/rankfm/rankfm.py b/rankfm/rankfm.py
@@ -68,8 +68,11 @@ def _reset_state(self):
  self.user_to_index = None
  self.item_to_index = None
 
- # user/item interactions
+ # user/item interactions and sample importance weights
  self.interactions = None
+ self.sample_weight = None
+
+ # dictionary user observed items lookups
  self.user_items_py = None
  self.user_items_nb = None
 
@@ -91,12 +94,13 @@ def _reset_state(self):
  self.is_fit = False
 
 
- def _init_all(self, interactions, user_features=None, item_features=None):
+ def _init_all(self, interactions, user_features=None, item_features=None, sample_weight=None):
  """index the raw interaction and user/item features data to numpy arrays
 
  :param interactions: dataframe of observed user/item interactions: [user_id, item_id]
  :param user_features: dataframe of user metadata features: [user_id, uf_1, ..., uf_n]
  :param item_features: dataframe of item metadata features: [item_id, if_1, ..., if_n]
+ :param sample_weight: vector of importance weights for each observed interaction
  :return: None
  """
 
@@ -122,7 +126,7 @@ def _init_all(self, interactions, user_features=None, item_features=None):
  self.item_idx = np.arange(len(self.item_id), dtype=np.int32)
 
  # map the interactions to internal index positions
- self._init_interactions(interactions)
+ self._init_interactions(interactions, sample_weight)
 
  # map the user/item features to internal index positions
  self._init_features(user_features, item_features)
@@ -131,10 +135,11 @@ def _init_all(self, interactions, user_features=None, item_features=None):
  self._init_weights(user_features, item_features)
 
 
- def _init_interactions(self, interactions):
+ def _init_interactions(self, interactions, sample_weight):
  """map new interaction data to existing internal user/item indexes
 
  :param interactions: dataframe of observed user/item interactions: [user_id, item_id]
+ :param sample_weight: vector of importance weights for each observed interaction
  :return: None
  """
 
@@ -150,6 +155,15 @@ def _init_interactions(self, interactions):
  self.interactions['item_id'] = self.interactions['item_id'].map(self.item_to_index).astype(np.int32)
  self.interactions = self.interactions.rename({'user_id': 'user_idx', 'item_id': 'item_idx'}, axis=1).dropna().astype(np.int32)
 
+ # store the sample weights internally or create a vector of ones if not passed
+ if sample_weight is not None:
+ assert isinstance(sample_weight, (np.ndarray, pd.Series)), "[sample_weight] must be np.ndarray or pd.series"
+ assert sample_weight.ndim == 1, "[sample_weight] must a vector (ndim=1)"
+ assert len(sample_weight) == len(interactions), "[sample_weight] must have the same length as [interactions]"
+ self.sample_weight = get_data(sample_weight).astype(np.float32)
+ else:
+ self.sample_weight = np.ones(len(self.interactions), dtype=np.float32, order='C')
+
  # create python/numba lookup dictionaries containing the set of observed items for each user
  # NOTE: the typed numba dictionary will be used to sample unobserved items during training
  # NOTE: the interactions data must be converted to np.ndarray prior to training to use @njit
@@ -180,7 +194,7 @@ def _init_features(self, user_features=None, item_features=None):
  else:
  raise KeyError('the users in [user_features] do not match the users in [interactions]')
  else:
- self.x_uf = np.zeros([len(self.user_idx), 1]).astype(np.float32)
+ self.x_uf = np.zeros([len(self.user_idx), 1], dtype=np.float32, order='C')
 
  # store the item features as a ndarray [IxQ] row-ordered by item index position
  if item_features is not None:
@@ -192,7 +206,7 @@ def _init_features(self, user_features=None, item_features=None):
  else:
  raise KeyError('the items in [item_features] do not match the items in [interactions]')
  else:
- self.x_if = np.zeros([len(self.item_idx), 1]).astype(np.float32)
+ self.x_if = np.zeros([len(self.item_idx), 1], dtype=np.float32, order='C')
 
 
  def _init_weights(self, user_features, item_features):
@@ -211,14 +225,14 @@ def _init_weights(self, user_features, item_features):
  if user_features is not None:
  self.v_uf = np.random.normal(loc=0, scale=self.sigma, size=[self.x_uf.shape[1], self.factors]).astype(np.float32)
  else:
- self.v_uf = np.zeros([self.x_uf.shape[1], self.factors]).astype(np.float32)
+ self.v_uf = np.zeros([self.x_uf.shape[1], self.factors], dtype=np.float32, order='C')
 
  # randomly initialize item feature factors if item features were supplied
  # NOTE: set all item feature factor weights to zero to prevent random scoring influence otherwise
  if item_features is not None:
  self.v_if = np.random.normal(loc=0, scale=self.sigma, size=[self.x_if.shape[1], self.factors]).astype(np.float32)
  else:
- self.v_if = np.zeros([self.x_if.shape[1], self.factors]).astype(np.float32)
+ self.v_if = np.zeros([self.x_if.shape[1], self.factors], dtype=np.float32, order='C')
 
 
 
@@ -227,40 +241,43 @@ def _init_weights(self, user_features, item_features):
  # -------------------------------
 
 
- def fit(self, interactions, user_features=None, item_features=None, epochs=1, verbose=False):
+ def fit(self, interactions, user_features=None, item_features=None, sample_weight=None, epochs=1, verbose=False):
  """clear previous model state and learn new model weights using the input data
 
  :param interactions: dataframe of observed user/item interactions: [user_id, item_id]
  :param user_features: dataframe of user metadata features: [user_id, uf_1, ..., uf_n]
  :param item_features: dataframe of item metadata features: [item_id, if_1, ..., if_n]
+ :param sample_weight: vector of importance weights for each observed interaction
  :param epochs: number of training epochs (full passes through observed interactions)
  :param verbose: whether to print epoch number and log-likelihood during training
  :return: self
  """
 
  self._reset_state()
- self.fit_partial(interactions, user_features, item_features, epochs, verbose)
+ self.fit_partial(interactions, user_features, item_features, sample_weight, epochs, verbose)
 
 
- def fit_partial(self, interactions, user_features=None, item_features=None, epochs=1, verbose=False):
+ def fit_partial(self, interactions, user_features=None, item_features=None, sample_weight=None, epochs=1, verbose=False):
  """learn or update model weights using the input data and resuming from the current model state
 
  :param interactions: dataframe of observed user/item interactions: [user_id, item_id]
  :param user_features: dataframe of user metadata features: [user_id, uf_1, ..., uf_n]
  :param item_features: dataframe of item metadata features: [item_id, if_1, ..., if_n]
+ :param sample_weight: vector of importance weights for each observed interaction
  :param epochs: number of training epochs (full passes through observed interactions)
  :param verbose: whether to print epoch number and log-likelihood during training
  :return: self
  """
 
  if self.is_fit:
- self._init_interactions(interactions)
+ self._init_interactions(interactions, sample_weight)
  self._init_features(user_features, item_features)
  else:
- self._init_all(interactions, user_features, item_features)
+ self._init_all(interactions, user_features, item_features, sample_weight)
 
  updated_weights = _fit(
  self.interactions,
+ self.sample_weight,
  self.user_items_nb,
  self.item_idx,
  self.regularization,

diff --git a/rankfm/utils.py b/rankfm/utils.py
@@ -9,10 +9,10 @@ def get_data(obj):
  :return: the object's underlying np.ndarray data
  """
 
- if obj.__class__.__name__ == 'DataFrame':
+ if obj.__class__.__name__ in ('DataFrame', 'Series'):
  data = obj.values
  elif obj.__class__.__name__ == 'ndarray':
  data = obj
  else:
- raise TypeError("input data must be in either pd.dataframe or np.ndarray format")
+ raise TypeError("input data must be in either pd.dataframe/pd.series or np.ndarray format")
  return data