import numpy as np
class MatrixFactorization:
def __init__(self, matrix, features_count, is_set_function, learning_rate=1e-3, regularization_factor=2e-2):
self.matrix = matrix
self.features_count = features_count
self.alpha = learning_rate
self.beta = regularization_factor
self.is_set = is_set_function
self.global_bias = self.compute_global_bias()
self.f1w = np.random.rand(matrix.shape[0], features_count) * self.global_bias # factor 1 weight
self.f1b = np.zeros((matrix.shape[0])) # factor 1 bias
self.f2w = np.random.rand(features_count, matrix.shape[1]) * self.global_bias # factor 2 weight
self.f2b = np.zeros((matrix.shape[1])) # factor 2 bias
self.samples = self.compute_samples()
def compute_global_bias(self):
n = 0
s = 0
for row in self.matrix:
for x in row:
if self.is_set(x):
n += 1
s += x
return s / n
def compute_samples(self):
samples = []
for i in range(self.matrix.shape[0]):
for j in range(self.matrix.shape[1]):
if self.is_set(self.matrix[i, j]):
samples.append((i, j, self.matrix[i, j]))
return samples
def shuffle(self):
np.random.shuffle(self.samples)
def stochastic_gradient_decent(self):
for i, j, p in self.samples:
prediction = self.predict(i, j)
error = prediction - self.matrix[i, j]
# Updating the biases, the global bias is a constant.
self.f1b -= self.alpha * (error - self.beta * self.f1b[i])
self.f2b -= self.alpha * (error - self.beta * self.f2b[j])
self.f1w[i, :] -= self.alpha * (error * self.f2w[:, j] - self.beta * self.f1w[i, :])
self.f2w[:, j] -= self.alpha * (error * self.f1w[i, :] - self.beta * self.f2w[:, j])
def cost(self):
return np.sum((self.construct_matrix() - self.matrix) ** 2)
def predict(self, i, j):
return self.f1w[i, :] @ self.f2w[:, j] + self.global_bias + self.f1b[i] + self.f2b[j]
def construct_matrix(self):
return self.f1w @ self.f2w + self.global_bias + self.f1b[:, np.newaxis] + self.f2b[np.newaxis, :]
def train(self, epsilon):
self.shuffle()
while True:
cost = self.cost()
print(cost)
if cost < epsilon:
break
self.stochastic_gradient_decent()
def AlwaysTrue(x):
return True
def NonNegative(x):
return x >= 0
def set_random_indices(matrix, n, value):
indices = np.random.choice(np.arange(0, matrix.size, 1), n)
i = indices // matrix.shape[1]
j = indices % matrix.shape[0]
matrix[i, j] = value
def test(matrix, n_unknowns=5, learning_rate=0.0001, regularization_factor=0.01, epsilon=1e-2):
copy = np.copy(matrix)
set_random_indices(copy, n_unknowns, -1)
# Note that the error won't go beyond some
# point if the regularization factor is not 0
m = MatrixFactorization(copy, 2, NonNegative, learning_rate, regularization_factor)
m.train(epsilon)
print('\nFactor 1:')
print('W: \n', m.f1w)
print('B: \n', m.f1b)
print('\nFactor 2:')
print('W: \n', m.f2w)
print('B: \n', m.f2b)
print('\nGlobal Bias:', m.global_bias)
print('\nMatrix:')
print(matrix)
print('\nPrediction:')
prediction = m.construct_matrix()
print(prediction)
print('\nDifference:')
print(prediction - matrix)
if __name__ == '__main__':
matrix = np.array([[3, 1, 1, 3, 1],
[1, 2, 4, 1, 3],
[3, 1, 1, 3, 1],
[4, 3, 5, 4, 4]])
# test(matrix, 0)
test(matrix, 2)
After learning about Collaborative Filtering, and Content-based Filtering from the course, I stumbled upon a technique called “Matrix Factorization”.
Using stochastic gradient descent, I approximate latent features vector for both the users and the items (or whatever the 2 factors of the matrix are).
I have a test function that sets some random elements in the matrix to -1, which is equivalent to having the value not set (to be predicted).
For a 4 * 5 matrix, it works fine when there are no unknowns, but if I set only 2 unknowns, it does not converge. I’ve tried a lot of different hyperparameters, but it doesn’t seem to work.
I’m wondering, is it a bug in the code that is causing this? Why is it not converging?