From ef5dfd08e9002584f4098532cd0282f7b37d9d0a Mon Sep 17 00:00:00 2001
From: Eric Lundquist <ericlundquist@EricLundquist.local>
Date: Tue, 26 May 2020 12:18:37 -0700
Subject: [PATCH] added basic tests

---
 .gitignore           |   9 +-
 README.md            |  21 +++--
 rankfm/rankfm.py     |  29 +++----
 tests/test_rankfm.py | 203 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 227 insertions(+), 35 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6c438f4..e5c968c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
-# excluded folders
+# exclude data and private notebooks
 data/
-notebooks/old/
+examples/ignore/
 
 # system files
 *.DS_Store
@@ -17,8 +17,6 @@ lib/
 lib64/
 parts/
 sdist/
-var/
-wheels/
 pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
@@ -26,9 +24,6 @@ share/python-wheels/
 *.egg
 MANIFEST
 
-# sphinx documentation
-docs/_build/
-
 # spark stuff
 */derby.log
 */metastore_db/
diff --git a/README.md b/README.md
index 6c3aafe..44780ac 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,12 @@
 # RankFM
 
-RankFM is a python implementation of the general Factorization Machines model class described in [Rendle 2010](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) adapted for collaborative filtering recommendation/ranking problems with implicit feedback user-item interaction data. It uses the Bayesian Personalized Ranking (BPR-OPT) optimization criteria described in [Rendle 2009](https://arxiv.org/pdf/1205.2618.pdf) to learn model weights via Stochastic Gradient Descent (SGD). It can also incorporate user and/or item auxiliary features to augment the main interaction data which may increase model performance, especially in contexts where interaction data is highly sparse but rich user/item metadata features exist.
+RankFM is a python implementation of the general Factorization Machines model class described in [Rendle 2010](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) adapted for collaborative filtering recommendation/ranking problems with implicit feedback user-item interaction data. It uses the Bayesian Personalized Ranking (BPR-OPT) optimization criteria described in [Rendle 2009](https://arxiv.org/pdf/1205.2618.pdf) to learn model weights via Stochastic Gradient Descent (SGD). It can also incorporate user and/or item auxiliary features to augment the main interaction data, which may increase model performance, especially in contexts where the interaction data is highly sparse but rich user and/or item metadata features exist.
 
-RankFM's core training/prediction/recommendation subroutines are converted to optimized machine code at runtime using the excellent [Numba](http://numba.pydata.org/) LLVM JIT compiler which can compile Python numerical algorithms to run at speeds approaching C/Fortran. This makes it possible to scale model training and recommendation to millions of user/item interactions.
+The core training/prediction/recommendation subroutines are converted to optimized machine code at runtime using the [Numba](http://numba.pydata.org/) LLVM JIT compiler. This makes it possible to scale model training and recommendation to millions of user/item interactions. Designed for ease-of-use, RankFM accepts both `pd.DataFrame` and `np.ndarray` inputs. You do not have to convert your data to `scipy.sparse` matrices or re-map user/item identifiers to array indexes prior to use - RankFM internally maps all user/item identifiers to zero-based integer indexes, but always converts its outputs back to the original user/item identifiers from your data, which can be arbitrary (non-zero-based, non-consecutive) integers or even strings.
 
-Designed for ease-of-use, RankFM accepts both `pd.DataFrame` and `np.ndarray` inputs. You do not have to convert your data to `scipy.sparse` matrices or re-map user/item identifiers to array indexes prior to use - internally RankFM maps all user/item identifiers to zero-based integer indexes, but always converts its output back to the original user/item identifiers from your data, which can be arbitrary (non-zero-based, non-consecutive) integers or even strings.
+In addition to the familiar `fit()`, `predict()`, `recommend()` methods, RankFM includes additional utilities `similiar_users()` and `similar_items()` to find the most similar users/items to a given user/item based on latent factor space embeddings. A number of popular recommendation/ranking evaluation metric functions have been included in the separate `evaluation` module to streamline model tuning and validation. See the **Quickstart** section below to get started, and the `quickstart.ipynb` notebook in the `/examples` folder for a more in-depth walkthrough.
 
-In addition to the familiar `fit()`, `predict()`, `recommend()` methods, RankFM includes additional utilities to find the most similar users/items to a given user/item based on user/item latent factor space embeddings. A number of popular recommendation/ranking evaluation metric functions are also included in the `evaluation` module to streamline model performance tuning and evaluation.
-
-See the **Quickstart** section below to get started, and the `quickstart.ipynb` notebook in the `/examples` folder for a more in-depth walkthrough. This package is currently under active development pre-release, and should not yet be considered stable. Release, build status, and PyPI information will be added once things get to a stable and satisfactory state for an initial release. The core functionality is mostly in place and working, but automated tests and CI workflows need to be added, and I need to teach myself how to do all that stuff first :)
+This package is currently under active development pre-release, and should not yet be considered stable. Release, build status, and PyPI information will be added once things get to a stable and satisfactory state for an initial release. The core functionality is mostly in place and working, but automated tests and CI workflows need to be added, and I need to teach myself how to do all that stuff first :)
 
 ---
 ### Dependencies
@@ -34,7 +32,7 @@ Let's first look at the required shape of the interaction data:
 | 5       | 377     |
 | 8       | 610     |
 
-It has just two columns: a `user_id` and an `item_id` (although you can name these fields whatever you want or use a numpy array instead). Notice that there is no `rating` column - this library is for **implicit feedback** data (e.g. watches, page views, purchases, clicks) as opposed to **explicit feedback** data (e.g. 1-5 ratings, thumbs up/down). Implicit feedback is far more common in real-world recommendation contexts and doesn't suffer from the missing-not-at-random problem of pure explicit feedback approaches. Maciej Kula (legendary open-source recsys developer) provides an [excellent overview of the differences](https://resources.bibblio.org/hubfs/share/2018-01-24-RecSysLDN-Ravelin.pdf).
+It has just two columns: a `user_id` and an `item_id` (you can name these fields whatever you want or use a numpy array instead). Notice that there is no `rating` column - this library is for **implicit feedback** data (e.g. watches, page views, purchases, clicks) as opposed to **explicit feedback** data (e.g. 1-5 ratings, thumbs up/down). Implicit feedback is far more common in real-world recommendation contexts and doesn't suffer from the missing-not-at-random problem of pure explicit feedback approaches. Maciej Kula (legendary open-source recsys developer) provides an [excellent overview of the differences](https://resources.bibblio.org/hubfs/share/2018-01-24-RecSysLDN-Ravelin.pdf).
 
 Now let's import the library, initialize our model, and fit on the training data:
 ```python
@@ -44,13 +42,13 @@ model = RankFM(factors=10, regularization=0.01, learning_rate=0.1, learning_sche
 model.fit(interactions_train, epochs=20, verbose=True)
 # NOTE: this takes about 90 seconds for 750,000 interactions on my 2.3 GHz i5 8GB RAM MacBook
 ```
-If you set `verbose=True` the model will print the current epoch number as well as the epoch's log-likelihood during training. This can be useful to gauge both computational speed and training performance by epoch. If the log likelihood is not increasing then try upping the `learning_rate` or lowering the `regularization`. If the log likelihood is starting to sometimes decrease in later training epochs try lowering the `learning_rate` or using `learning_schedule='invscaling'` to gradually decrease the learning rate over time.
+If you set `verbose=True` the model will print the current epoch number as well as the epoch's log-likelihood during training. This can be useful to gauge both computational speed and training performance by epoch. If the log likelihood is not increasing then try upping the `learning_rate` or lowering the `regularization`. If the log likelihood is starting to bounce up and down try lowering the `learning_rate` or using `learning_schedule='invscaling'` to decrease the learning rate over time.
 
 Now let's generate some user-item model scores from the validation data:
 ```python
 valid_scores = model.predict(interactions_valid, cold_start='nan')
 ```
-this will produce an array of real-valued model scores generated using the Factorization Machine model equation. You can interpret it as a measure of the predicted utility of a user (u) getting recommended an item (i). The `cold_start='nan'` option can be used to set scores to `np.nan` for user/item pairs not found in the training data, or `cold_start='drop'` can be specified to drop those pairs so the results contain no missing values.
+this will produce an array of real-valued model scores generated using the Factorization Machines model equation. You can interpret it as a measure of the predicted utility of item (i) for user (u). The `cold_start='nan'` option can be used to set scores to `np.nan` for user/item pairs not found in the training data, or `cold_start='drop'` can be specified to drop those pairs so the results contain no missing values.
 
 Now let's generate our topN recommended movies for each user:
 ```python
@@ -81,6 +79,7 @@ dcg: 0.704
 precision: 0.152
 recall: 0.068
 ```
+[That's a Bingo!](https://www.youtube.com/watch?v=q5pESPQpXxE)
 
 Now let's find the most similar other movies for a few movies based on their embedding representations in latent factor space:
 ```python
@@ -99,7 +98,7 @@ model.similar_items(589, n_items=10)
 480                      Jurassic Park (1993)
 1200                            Aliens (1986)
 ```
-I hope you like explosions...
+[I hope you like explosions...](https://www.youtube.com/watch?v=uENYMZNzg9w)
 
 ```python
 # Being John Malkovich (1999)
@@ -117,7 +116,7 @@ model.similar_items(2997, n_items=10)
 2908     Boys Don't Cry (1999)
 3481      High Fidelity (2000)
 ```
-Let's get weird...
+[Let's get weird...](https://www.youtube.com/watch?v=lIpev8JXJHQ&t=5s)
 
 ---
 That's all for now. To see more in-depth worked examples in jupyter notebook format head to the `/examples` folder. Be sure to check back for added functionality and PyPI release status in the near future as soon as I teach myself how to use CI workflows and go where few data scientists have gone before: a comprehensive set of unit tests. Stay tuned...
diff --git a/rankfm/rankfm.py b/rankfm/rankfm.py
index 6f16025..9a02d67 100644
--- a/rankfm/rankfm.py
+++ b/rankfm/rankfm.py
@@ -247,14 +247,12 @@ def fit_partial(self, interactions, user_features=None, item_features=None, epoc
         :return: self
         """
 
-        # initialize necessary internal data structures
         if self.is_fit:
             self._init_interactions(interactions)
             self._init_features(user_features, item_features)
         else:
             self._init_all(interactions, user_features, item_features)
 
-        # call numba internals
         updated_weights = _fit(
             self.interactions,
             self.user_items_nb,
@@ -285,18 +283,16 @@ def predict(self, pairs, cold_start='nan'):
 
         :param pairs: dataframe of [user, item] pairs to score
         :param cold_start: whether to generate missing values ('nan') or drop ('drop') user/item pairs not found in training data
-        :return: vector of real-valued model scores
+        :return: np.array of real-valued model scores
         """
 
-        # ensure that the model has been fit before attempting to generate predictions
+        assert pairs.shape[1] == 2, "[pairs] should be: [user_id, item_id]"
         assert self.is_fit, "you must fit the model prior to generating predictions"
 
-        # map raw user/item identifiers to internal index positions
         pred_pairs = pd.DataFrame(pairs.copy(), columns=['user_id', 'item_id'])
         pred_pairs['user_id'] = pred_pairs['user_id'].map(self.user_to_index)
         pred_pairs['item_id'] = pred_pairs['item_id'].map(self.item_to_index)
 
-        # call numba internals
         pred_pairs = pred_pairs.to_numpy().astype(np.float32)
         scores = _predict(
             pred_pairs,
@@ -321,17 +317,16 @@ def predict(self, pairs, cold_start='nan'):
     def recommend(self, users, n_items=10, filter_previous=False, cold_start='nan'):
         """calculate the topN items for each user
 
-        :param users: list-like of user identifiers for which to generate recommendations
+        :param users: iterable of user identifiers for which to generate recommendations
         :param n_items: number of recommended items to generate for each user
         :param filter_previous: remove observed training items from generated recommendations
         :param cold_start: whether to generate missing values ('nan') or drop ('drop') users not found in training data
         :return: pandas dataframe where the index values are user identifiers and the columns are recommended items
         """
 
-        # ensure that the model has been fit before attempting to generate predictions
+        assert getattr(users, '__iter__', False), "[users] must be an iterable (e.g. list, array, series)"
         assert self.is_fit, "you must fit the model prior to generating recommendations"
 
-        # call numba internals
         user_idx = pd.Series(users).map(self.user_to_index).to_numpy(dtype=np.float32)
         rec_items = _recommend(
             user_idx,
@@ -362,10 +357,11 @@ def similar_items(self, item_id, n_items=10):
 
         :param item_id: item to search
         :param n_items: number of similar items to return
-        :return: topN most similar items wrt latent factor representations
+        :return: np.array of topN most similar items wrt latent factor representations
         """
 
         # ensure that the model has been fit before attempting to generate predictions
+        assert item_id in self.item_id, "you must select an [item_id] present in the training data"
         assert self.is_fit, "you must fit the model prior to generating similarities"
 
         try:
@@ -379,7 +375,7 @@ def similar_items(self, item_id, n_items=10):
 
         # calculate the most similar N items excluding the search item
         similarities = pd.Series(np.dot(lr_all_items, lr_item)).drop(item_idx).sort_values(ascending=False)[:n_items]
-        most_similar = pd.Series(similarities.index).map(self.index_to_item)
+        most_similar = pd.Series(similarities.index).map(self.index_to_item).values
         return most_similar
 
 
@@ -388,10 +384,11 @@ def similar_users(self, user_id, n_users=10):
 
         :param user_id: user to search
         :param n_users: number of similar users to return
-        :return: topN most similar users wrt latent factor representations
+        :return: np.array of topN most similar users wrt latent factor representations
         """
 
         # ensure that the model has been fit before attempting to generate predictions
+        assert user_id in self.user_id, "you must select an [user_id] present in the training data"
         assert self.is_fit, "you must fit the model prior to generating similarities"
 
         try:
@@ -399,12 +396,12 @@ def similar_users(self, user_id, n_users=10):
         except (KeyError, TypeError):
             print("user_id={} not found in training data".format(user_id))
 
-        # calculate item latent representations in F dimensional factor space
-        lr_user = self.v_i[user_idx] + np.dot(self.v_uf.T, self.x_uf[user_idx])
-        lr_all_users = self.v_i + np.dot(self.x_uf, self.v_uf)
+        # calculate user latent representations in F dimensional factor space
+        lr_user = self.v_u[user_idx] + np.dot(self.v_uf.T, self.x_uf[user_idx])
+        lr_all_users = self.v_u + np.dot(self.x_uf, self.v_uf)
 
         # calculate the most similar N users excluding the search user
         similarities = pd.Series(np.dot(lr_all_users, lr_user)).drop(user_idx).sort_values(ascending=False)[:n_users]
-        most_similar = pd.Series(similarities.index).map(self.index_to_user)
+        most_similar = pd.Series(similarities.index).map(self.index_to_user).values
         return most_similar
 
diff --git a/tests/test_rankfm.py b/tests/test_rankfm.py
index 90d253e..9bf6773 100644
--- a/tests/test_rankfm.py
+++ b/tests/test_rankfm.py
@@ -41,6 +41,13 @@
     (3, 3, 3), (3, 6, 4), (3, 4, 5)
 ], columns=['user_id', 'item_id', 'rating'], dtype=np.int32)
 
+# valid interactions with disjoint user/items
+intx_valid_disjoint = pd.DataFrame([
+    (1, 1), (1, 3), (1, 5),
+    (2, 1), (2, 2), (2, 7),
+    (4, 3), (4, 7), (4, 4)
+], columns=['user_id', 'item_id'], dtype=np.int32)
+
 # user features
 # -------------
 
@@ -115,6 +122,12 @@
     (6, 0, 0, "G", 0.00)
 ], columns=['item_id', 'bin_1', 'bin_2', 'str', 'cnt'])
 
+# user iterables
+# --------------
+
+train_users = np.array([1, 2, 3])
+valid_users = np.array([1, 2, 4, 5])
+
 # ------------------------------
 # test basic model functionality
 # ------------------------------
@@ -178,10 +191,198 @@ def test__fit__bad__if_str_cols():
         model = RankFM(factors=2)
         model.fit(intx_train_pd_int, item_features=if_str_cols)
 
+# score prediction
+# ----------------
+
+def test__predict__good__train():
+    """test the predict() method on the training inputs"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    scores = model.predict(intx_train_pd_int)
+
+    shape = scores.shape == (9,)
+    dtype = scores.dtype == np.float32
+    nmiss = np.sum(np.isnan(scores).astype(np.int32)) == 0
+    assert shape and dtype and nmiss
+
+def test__predict__good__disjoint_nan():
+    """test the predict() method on disjoint validation pairs with the cold_start='nan' option"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    scores = model.predict(intx_valid_disjoint, cold_start='nan')
+
+    shape = scores.shape == (9,)
+    dtype = scores.dtype == np.float32
+    nmiss = np.sum(np.isnan(scores).astype(np.int32)) == 4
+    assert shape and dtype and nmiss
+
+def test__predict__good__disjoint_drop():
+    """test the predict() method on disjoint validation pairs with the cold_start='drop' option"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    scores = model.predict(intx_valid_disjoint, cold_start='drop')
+
+    shape = scores.shape == (5,)
+    dtype = scores.dtype == np.float32
+    nmiss = np.sum(np.isnan(scores).astype(np.int32)) == 0
+    assert shape and dtype and nmiss
+
+# user recommendation
+# -------------------
+
+def test__recommend__good__train():
+    """test the recommend() method on the training users"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    recs = model.recommend(train_users, n_items=3)
+
+    klass = isinstance(recs, pd.DataFrame)
+    shape = recs.shape == (3, 3)
+    index = np.array_equal(recs.index.values, train_users)
+    items = recs.isin(intx_train_pd_int['item_id'].values).all().all()
+    assert klass and shape and index and items
+
+def test__recommend__good__train__filter():
+    """test the recommend() method on the training users but filter previous items"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    recs = model.recommend(train_users, n_items=3, filter_previous=True)
+
+    klass = isinstance(recs, pd.DataFrame)
+    shape = recs.shape == (3, 3)
+    index = np.array_equal(recs.index.values, train_users)
+    items = recs.isin(intx_train_pd_int['item_id'].values).all().all()
+
+    recs_long = recs.stack().reset_index().drop('level_1', axis=1)
+    recs_long.columns = ['user_id', 'item_id']
+    intersect = pd.merge(intx_train_pd_int, recs_long, on=['user_id', 'item_id'], how='inner').empty
+    assert klass and shape and index and items and intersect
+
+def test__recommend__good__valid__nan():
+    """test the recommend() method on a disjoint set of validation users"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    recs = model.recommend(valid_users, n_items=3, cold_start='nan')
+
+    klass = isinstance(recs, pd.DataFrame)
+    shape = recs.shape == (4, 3)
+    index = np.array_equal(sorted(recs.index.values), sorted(valid_users))
+    items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all()
+    new_users = list(set(valid_users) - set(train_users))
+    nmiss = recs.loc[new_users].isnull().all().all()
+    assert klass and shape and index and items and nmiss
+
+def test__recommend__good__valid__drop():
+    """test the recommend() method on a disjoint set of validation users"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    recs = model.recommend(valid_users, n_items=3, cold_start='drop')
+
+    klass = isinstance(recs, pd.DataFrame)
+    shape = recs.shape == (2, 3)
+    index = np.isin(recs.index.values, valid_users).all()
+    items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all()
+
+    same_users = list(set(valid_users) & set(train_users))
+    match_users = np.array_equal(sorted(same_users), sorted(recs.index.values))
+    assert klass and shape and index and items and match_users
+
+# similar items/users
+# -------------------
+
+def test__similar_items__good():
+    """test the similar_items() method for a valid [item_id]"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    similar = model.similar_items(1, n_items=3)
+
+    shape = similar.shape == (3,)
+    items = np.isin(similar, intx_train_pd_int['item_id'].unique()).all()
+    assert shape and items
+
+def test__similar_items__bad():
+    """ensure the similar_items() method raises an exception for an item not in training data"""
+
+    with pytest.raises(AssertionError):
+        model = RankFM(factors=2)
+        model.fit(intx_train_pd_int)
+        similar = model.similar_items(99, n_items=3)
+
+
+def test__similar_users__good():
+    """test the similar_users() method for a valid [user_id]"""
+
+    model = RankFM(factors=2)
+    model.fit(intx_train_pd_int)
+    similar = model.similar_users(1, n_users=2)
+
+    shape = similar.shape == (2,)
+    users = np.isin(similar, intx_train_pd_int['user_id'].unique()).all()
+    assert shape and users
+
+def test__similar_users__bad():
+    """ensure the similar_users() method raises an exception for an user not in training data"""
 
+    with pytest.raises(AssertionError):
+        model = RankFM(factors=2)
+        model.fit(intx_train_pd_int)
+        similar = model.similar_users(9, n_users=1)
 
 
 
+# model evaluation
+# ----------------
 
 # model = RankFM(factors=2)
-# model.fit(interactions=intx_train_pd_int, user_features=uf_no_id)
+# model.fit(intx_train_pd_int)
+# recs = model.recommend(valid_users, n_items=3, cold_start='drop')
+
+# klass = isinstance(recs, pd.DataFrame)
+# shape = recs.shape == (2, 3)
+# index = np.array_equal(recs.index.values, valid_users)
+# items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all()
+
+# same_users = list(set(valid_users) & set(train_users))
+# match_users = np.array_equal(sorted(same_users), sorted(recs.index.values))
+# assert klass and shape and index and items and match_users
+
+
+
+# model = RankFM(factors=2)
+# model.fit(intx_train_pd_int)
+# recs = model.recommend(valid_users, n_items=3, cold_start='drop')
+
+# klass = isinstance(recs, pd.DataFrame)
+# shape = recs.shape == (4, 3)
+# index = np.array_equal(recs.index.values, valid_users)
+# items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all()
+
+# same_users = list(set(valid_users) & set(train_users))
+# match_users = np.array_equal(sorted(same_users), sorted(recs.index.values))
+# assert klass and shape and index and items and match_users
+
+# model = RankFM(factors=2)
+# model.fit(interactions=intx_train_pd_int)
+# scores = model.predict(intx_train_pd_int)
+# recs = model.recommend(train_users, n_items=3, filter_previous=True)
+
+# model.similar_users(1, n_users=1)
+# model.similar_users(9, n_users=1)
+
+# model.similar_items(1, n_items=3)
+# model.similar_items(99, n_items=3)
+
+# v_u = model.v_u
+
+# x_uf = model.x_uf
+# v_uf = model.v_uf
+
+# res_inner = np.dot(x_uf, v_uf)
\ No newline at end of file