import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.utils import check_array
0)
np.random.seed("ggplot")
matplotlib.style.use("figure.figsize"] = [10, 4] plt.rcParams[
Introduction
Today I’ve decided to write a quick blog post about a feature engineering trick I’ve learned recently that enables us to fit smooth monotonic curves to data. This method is particularly useful when modeling relationships that are known to be monotonic, although not necessarily linear. For this, we will use \(I\)-splines introduced by J. O. These are monotonically increasing functions that one can construct from regular old \(B\)-splines. The two key properties of \(I\)-splines that we’ll leverage are that they are monotonically increasing and bounded between \(0\) and \(1\). So, if we take a linear combination of them with non-negative coefficients, the resulting function will again be monotonically increasing. Furthermore, a convex combination (a linear combination with non-negative coefficients that sum up to \(1\)) of \(I\)-splines results in a function that is also bounded between \(0\) and \(1\).
In this blog post we will give an implementation of \(I\)-splines loosely following their Rccp implementation (although another implementation is also available in the dms_variants Python package) and then we will see how they perform on data. We will not discuss splines in general; for an introduction I recommend reading The Elements of Statistical Learning, Chapter 5.
Spline plots
Here’s an implementation of \(I\)-splines built on top of \(B\)-splines with some visualizations of the basis functions. In addition, we can also see some random linear combinations of these basis functions to give us an idea about what functions can be approximated this way.
def inter_bspline(p, k, x, y):
= y.shape
num_rows, num_cols = np.zeros((num_rows, num_cols - 1))
z
for j in range(num_cols - 1):
= k[j + p - 1] - k[j]
denom_A = k[j + p] - k[j + 1]
denom_B
= (x - k[j]) / denom_A * y[:, j] if denom_A > 0 else 0
A = (k[j + p] - x) / denom_B * y[:, j + 1] if denom_B > 0 else 0
B = A + B
z[:, j]
return z
def b_spline(p, knots, x):
= knots.shape[0]
N = x.shape[0]
num_rows = np.zeros((num_rows, N - 1))
B
for j in range(N - 1):
= np.where((x >= knots[j]) & (x < knots[j + 1]), 1, 0)
B[:, j]
for deg in range(1, p):
= inter_bspline(deg + 1, knots, x, B)
B
return B
def i_spline(
p,
knots,
x,
):= b_spline(
bsp
p,
knots,
x,
)= np.cumsum(
cumsummed -1],
bsp[:, ::=1,
axis-1]
)[:, :
return np.where(
>= knots[0]) & (x <= knots[-1]),
(x
cumsummed.T,
np.where(< knots[0]),
(x 0,
1,
),
).T
= np.array([0.3, 0.5, 0.6])
knots
= b_spline(
bsp 4,
4) - 1e-6, knots, np.ones(4) + 1e-6]),
np.concatenate([np.zeros(0, 1, 100),
np.linspace(
)
= plt.subplots(2, 2, figsize=(15, 10), sharex=True)
fig, ax
for i in range(bsp.shape[1]):
0][0].plot(
ax[0, 1, 100),
np.linspace(
bsp[:, i],=0.75,
linewidth
)
= i_spline(
isp 4,
4) - 1e-6, knots, np.ones(4) + 1e-6]),
np.concatenate([np.zeros(0, 1, 100),
np.linspace(
)
for i in range(isp.shape[1]):
0][1].plot(
ax[0, 1, 100),
np.linspace(
isp[:, i],=0.75,
linewidth
)
for knot in knots:
0][0].axvline(
ax[="red", linestyle="--", label="Knots" if knot == knots[0] else None
knot, color
)0][1].axvline(
ax[
knot,="red",
color="--",
linestyle
)
0][0].set_title("B-Spline")
ax[0][1].set_title("I-Spline")
ax[
0][0].set_ylim(0, 1)
ax[0][1].set_ylim(0, 1)
ax[
for _ in range(20):
= np.abs(np.random.normal(size=isp.shape[1]))
coeffs_isp = np.random.normal(size=bsp.shape[1])
coeffs_bsp
1][0].plot(
ax[0, 1, 100),
np.linspace(@ coeffs_bsp,
bsp =0.3,
alpha
)1][1].plot(
ax[0, 1, 100),
np.linspace(@ coeffs_isp,
isp =0.3,
alpha
)
1][0].set_title("Random B-Splines")
ax[1][1].set_title("Random I-Splines")
ax[
fig.legend()
Sklearn wrapper
For the sake of simplicity we provide an sklearn wrapper around the i_spline
function. Note that this transformer will not work with arrays containing multiple features, but with a little effort, we could extend it to multiple dimensions. In the two dimensional case, this can be done by constructing the basis functions \(f_1, \dots, f_n\), \(g_1, \dots, g_m\) for both features separately and then taking \[h_{i, j}(x, y) = f_i(x) g_j(y) \text{ for each } i=1, \dots, n,\hskip 2pt j=1, \dots, m.\] The easiest way to obtain this would be to use the PolynomialFeatures
transformer with interaction_only=True
. This of course can be generalized to more dimensions.
class ISplineTransformer1D(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin):
def __init__(self, p=4, n_knots=10, decreasing=False, knots=None):
self.p = p
self.knots = np.array(knots) if knots is not None else None
self.n_knots = n_knots
self.decreasing = decreasing
self.x_min_ = None
self.x_max_ = None
self.knots_ = None
def fit(self, X, y=None):
= check_array(X)
X = (-1 if self.decreasing else 1) * X
X self.x_min_ = np.min(X) - 1e-6
self.x_max_ = np.max(X) + 1e-6
if self.knots is None:
= np.linspace(self.x_min_, self.x_max_, self.n_knots)
extended_knots else:
= self.knots
extended_knots
self.knots_ = np.concatenate(
[self.p) + self.x_min_,
np.zeros(
extended_knots,self.p) + self.x_max_,
np.zeros(
],
)
return self
def transform(self, X):
return i_spline(
self.p,
self.knots_,
-1 if self.decreasing else 1) * X[:, 0],
( )
Data Generation
In this post, we will work with synthetic data generated by \[y_i = X_i^2 + 0.1 \sin(20 X_i) + 1 + \varepsilon_i,\] where \(\varepsilon_i \sim \operatorname{N}(0, 0.1)\) are independent.
= 1000
N
= np.random.uniform(0, 1, size=N)
X = X ** 2 + .1 * np.sin(20 * X) + 1 + .1 * np.random.randn(N)
y
plt.scatter(X, y)
Fit Scikit-Learn pipeline
We will use Scikit Learn’s LinearRegression
with the setting positive=True
to enforce monotonicity. When we compare the predictions to those of sklearn’s IsotonicRegression
, we find a very similar fit, however it is way smoother.
We note, however, that in both cases outside of the training distribution we get constant predictions.
= make_pipeline(
pipe =10),
ISplineTransformer1D(n_knots=True),
LinearRegression(positive
)None], y)
pipe.fit(X[:,
= IsotonicRegression(out_of_bounds="clip")
iso
iso.fit(X, y)
= np.linspace(-0.2, 1.2, 100)[:, None]
X_test
0], pipe.predict(X_test), color="blue", linewidth=2, label="ISpline")
plt.plot(X_test[:,
plt.plot(0], iso.predict(X_test), color="green", linewidth=2, label="Isotonic"
X_test[:,
)
=0.3, label="Data")
plt.scatter(X, y, alpha
plt.legend()