Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ license = { file = "LICENSE" }
requires-python = ">=3.1"
dependencies = [
"numpy>=1.16.6",
"scipy>=1.2.3",
"threadpoolctl>=2.1.0 ; python_full_version >= '3.5'",
Comment on lines 14 to 18
"tqdm>=4.64.1",
]
Expand Down
28 changes: 22 additions & 6 deletions src/fastl2lir/fastl2lir.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
from tqdm import tqdm
from scipy import linalg as sp_linalg

pv = sys.version_info

Expand All @@ -18,10 +19,25 @@
class FastL2LiR(object):
'''Fast L2-regularized linear regression class.'''

def __init__(self, W=np.array([]), b=np.array([]), verbose=False):
def __init__(self, W=np.array([]), b=np.array([]), verbose=False, solver='scipy'):
self.__W = W
self.__b = b
self.__verbose = verbose
self.__solver = solver

# Choose linear solver once to avoid branching at call sites
if solver == 'scipy':
def _solve(a, b):
try:
return sp_linalg.solve(a, b, assume_a='pos', check_finite=False)
except sp_linalg.LinAlgError:
return sp_linalg.solve(a, b, assume_a='sym', check_finite=False)
elif solver == 'numpy':
def _solve(a, b):
return np.linalg.solve(a, b)
else:
raise ValueError('Unknown solver: %s' % solver)
self.__solve = _solve

@property
def W(self):
Expand Down Expand Up @@ -245,10 +261,10 @@ def __sub_fit(self, X, Y, alpha=0, n_feat=0, use_all_features=True, dtype=np.flo
# Choose the more efficient method based on matrix dimensions
if X.shape[0] > X.shape[1]:
# Use primal form for tall matrices (more samples than features)
Wb = np.linalg.solve(np.matmul(X.T, X) + alpha * np.eye(X.shape[1], dtype=dtype), np.matmul(X.T, Y))
Wb = self.__solve(np.matmul(X.T, X) + alpha * np.eye(X.shape[1], dtype=dtype), np.matmul(X.T, Y))
else:
# Use dual form for wide matrices (more features than samples)
Wb = np.matmul(X.T, np.linalg.solve(np.matmul(X, X.T) + alpha * np.eye(X.shape[0], dtype=dtype), Y))
Wb = np.matmul(X.T, self.__solve(np.matmul(X, X.T) + alpha * np.eye(X.shape[0], dtype=dtype), Y))

W = Wb[0:-1, :]
b = Wb[-1, :][np.newaxis, :] # Returning b as a 2D array
Expand All @@ -274,7 +290,7 @@ def __sub_fit(self, X, Y, alpha=0, n_feat=0, use_all_features=True, dtype=np.flo
I = I[0:n_feat]
I = np.hstack((I, X.shape[1]-1))
W0_sub = (W0.ravel()[(I + (I * W0.shape[1]).reshape((-1, 1))).ravel()]).reshape(I.size, I.size)
Wb = np.linalg.solve(W0_sub, W1[index_outputDim][I].reshape(-1, 1))
Wb = self.__solve(W0_sub, W1[index_outputDim][I].reshape(-1, 1))
for index_selectedDim in range(n_feat):
W[index_outputDim, I[index_selectedDim]] = Wb[index_selectedDim]
b[0, index_outputDim] = Wb[-1]
Expand All @@ -287,7 +303,7 @@ def __sub_fit(self, X, Y, alpha=0, n_feat=0, use_all_features=True, dtype=np.flo
I = I[0:n_feat]
I = np.hstack((I, X.shape[1]-1))
W0_sub = (W0.ravel()[(I + (I * W0.shape[1]).reshape((-1,1))).ravel()]).reshape(I.size, I.size)
Wb = np.linalg.solve(W0_sub, W1[index_outputDim][I].reshape(-1,1))
Wb = self.__solve(W0_sub, W1[index_outputDim][I].reshape(-1,1))
for index_selectedDim in range(n_feat):
W[index_outputDim, I[index_selectedDim]] = Wb[index_selectedDim]
b[0, index_outputDim] = Wb[-1]
Expand Down Expand Up @@ -344,7 +360,7 @@ def __sub_fit_save_select_feat(
newX = np.hstack((newX, np.ones((newX.shape[0], 1), dtype=dtype))) # Add one column to rightmost column
W0 = np.matmul(newX.T, newX) + alpha * np.eye(newX.shape[1], dtype=dtype)
W1 = np.matmul(selY.ravel(), newX).reshape(-1,1)
Wb = np.linalg.solve(W0, W1)
Wb = self.__solve(W0, W1)
for index_selectedDim in range(n_feat):
W[index_outputDim, I[index_selectedDim]] = Wb[index_selectedDim]
b[0, index_outputDim] = Wb[-1]
Expand Down
18 changes: 18 additions & 0 deletions tests/test_fastl2lir.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,24 @@ def test_chunk(self):

np.testing.assert_array_almost_equal(yp_2d, data['yp_2d'])

def test_solver_numpy_matches_scipy(self):
'''numpy solver produces same result as scipy solver (default).'''
data = np.load('./tests/testdata_basic.npz')

model_scipy = fastl2lir.FastL2LiR(solver='scipy')
model_numpy = fastl2lir.FastL2LiR(solver='numpy')

model_scipy.fit(data['x_tr'], data['y_2d'])
model_numpy.fit(data['x_tr'], data['y_2d'])

np.testing.assert_array_almost_equal(model_numpy.W, model_scipy.W)
np.testing.assert_array_almost_equal(model_numpy.b, model_scipy.b)

Comment on lines +152 to +154
def test_solver_invalid(self):
'''Invalid solver name raises ValueError.'''
with self.assertRaises(ValueError):
fastl2lir.FastL2LiR(solver='cholesky')

def test_reshape(self):
'''Test for reshaping.'''
Y_shape = (200, 10, 10, 5)
Expand Down