Skip to content

Commit

Permalink
Working estimation using scipy.linalg.lstsq
Browse files Browse the repository at this point in the history
  • Loading branch information
open-risk committed Feb 28, 2019
1 parent d1fd1cb commit 650c3c9
Show file tree
Hide file tree
Showing 10 changed files with 305 additions and 527 deletions.
184 changes: 150 additions & 34 deletions correlationMatrix/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import scale

from scipy.linalg import lstsq

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html

import matplotlib.pyplot as plt

import correlationMatrix as cm
Expand Down Expand Up @@ -297,6 +301,9 @@ def decompose(self, method):
"""
pass

def stress(self, scenario, method):
pass

@property
def validation_status(self):
return self.validated
Expand Down Expand Up @@ -637,8 +644,9 @@ def calculate_correlation(self, model_name, input1_url, input2_url):
class FactorCorrelationMatrix(CorrelationMatrix):
""" The FactorCorrelationMatrix class fits a variety of factor models
It stores the derived
It stores the derived parameters and modelled correlation matrix values
TODO compute and store confidence intervals
"""

Expand All @@ -659,36 +667,6 @@ def fit(self, data, method='UniformSingleFactor'):
"""

# Response (dependent) variables
Y = data.values
# Control that the response variables (r) have the right correlation
# rho = data.corr(method='pearson').values
# print(rho)

# Compute the row average (Market factor)
F = data.mean(axis=1).values
# print(np.std(F0))

# print(np.std(F1))
# Normalize the market factor to unit variance
# Replicate the market factor for the multiple regression
# X = np.tile(F1, (Y.shape[1], 1)).transpose()
X = scale(F, with_mean=False, with_std=True)
# np.reshape(X, (100, 1))
# np.reshape(X, (-1, 1))

print(Y.shape)
print(X.shape)

corrs = []
for i in range(Y.shape[1]):
rho, p = sp.pearsonr(X, Y[:, i])
corrs.append(rho)
print(np.mean(corrs)**2)




# print(data.describe())
# print(len(data.index))
# rho, p = sp.pearsonr(data['S4'], F)
Expand All @@ -704,18 +682,45 @@ def fit(self, data, method='UniformSingleFactor'):
# print(rho)

if method == 'UniformSingleFactor':
# Response (dependent) variables
Y = data.values
# Control that the response variables (r) have the right correlation
# rho = data.corr(method='pearson').values
# print(rho)

# Compute the row average (Market factor)
F = data.mean(axis=1).values
# print(np.std(F0))

# print(np.std(F1))
# Normalize the market factor to unit variance
# Replicate the market factor for the multiple regression
# X = np.tile(F1, (Y.shape[1], 1)).transpose()
X = scale(F, with_mean=False, with_std=True)
# np.reshape(X, (100, 1))
# np.reshape(X, (-1, 1))

print(Y.shape)
print(X.shape)

corrs = []
for i in range(Y.shape[1]):
rho, p = sp.pearsonr(X, Y[:, i])
corrs.append(rho)
print(np.mean(corrs) ** 2)
# res = smf.ols(formula='r ~ F', data=data).fit()
# estimator = LinearRegression(fit_intercept=False)
# results = estimator.fit(X, Y)
# print(dir(results))
# print(results.coef_)
# b = np.linalg.lstsq(X[:, 0], Y[:,0])

b = lstsq(X[:, 0], Y[:, 0])

# X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
# # y = 1 * x_0 + 2 * x_1 + 3
# y = np.dot(X, np.array([1, 2])) + 3
reg = LinearRegression().fit(X, Y)
print(reg.coef_)
# reg = LinearRegression().fit(X, Y)
# print(reg.coef_)

# estimator = ols._MultivariateOLS(X, Y)
# results = estimator.fit()
Expand All @@ -724,3 +729,114 @@ def fit(self, data, method='UniformSingleFactor'):
# plt.show()

# self.matrix = rho

elif method == 'CAPMModel':
print("Lets do this")

raw_data = data.values

print(raw_data.shape)
n = raw_data.shape[1] - 1
# the market factor
X = raw_data[:, 3]
# the single stock
Y = raw_data[:, 0]

# for i in range(3):
# Y = raw_data[:, i]
# print(i, sp.pearsonr(Y, X))

# plt.plot(X, Y)
# plt.show()

"""
scipy lstsq API
Parameters:
a : (M, N) array_like Left hand side matrix (2-D array).
b : (M,) or (M, K) array_like Right hand side matrix or vector (1-D or 2-D array).
cond : float, optional Cutoff for ‘small’ singular values; used to determine effective rank of a.
Singular values smaller than rcond * largest_singular_value are considered zero.
overwrite_a : bool, optional Discard data in a (may enhance performance). Default is False.
overwrite_b : bool, optional Discard data in b (may enhance performance). Default is False.
check_finite : bool, optional Whether to check that the input matrices contain only finite numbers.
Disabling may give a performance gain, but may result in problems (crashes, non-termination)
if the inputs do contain infinities or NaNs.
lapack_driver : str, optional Which LAPACK driver is used to solve the least-squares problem.
Options are 'gelsd', 'gelsy', 'gelss'. Default ('gelsd') is a good choice. However, 'gelsy' can be slightly faster on many problems. 'gelss' was used historically. It is generally slow but uses less memory.
Returns:
x : (N,) or (N, K) ndarray Least-squares solution. Return shape matches shape of b.
residues : (0,) or () or (K,) ndarray Sums of residues, squared 2-norm for each column in b - a x.
If rank of matrix a is < N or N > M, or 'gelsy' is used, this is a length zero array. If b was 1-D,
this is a () shape array (numpy scalar), otherwise the shape is (K,).
rank : int Effective rank of matrix a.
s : (min(M,N),) ndarray or None Singular values of a. The condition number of a is abs(s[0] / s[-1]).
None is returned when 'gelsy' is used.
"""

#TODO stack the individual stock values when the model ask for single loading
#TODO grouped stock loadings

# Find all the loadings of entities to the market factor
a = X
n = len(a)
a.shape = (n, 1)
for i in range(3):
b = raw_data[:, i]
p, res, rnk, s = lstsq(a, b)
print(p, res, rnk, s)

# # Control that the response variables (r) have the right correlation
# # rho = data.corr(method='pearson').values
# # print(rho)
#
# # Compute the row average (Market factor)
# F = data.mean(axis=1).values
# # print(np.std(F0))
#
# # print(np.std(F1))
# # Normalize the market factor to unit variance
# # Replicate the market factor for the multiple regression
# # X = np.tile(F1, (Y.shape[1], 1)).transpose()
# X = scale(F, with_mean=False, with_std=True)
# # np.reshape(X, (100, 1))
# # np.reshape(X, (-1, 1))
#
# print(Y.shape)
# print(X.shape)
#
# corrs = []
# for i in range(Y.shape[1]):
# rho, p = sp.pearsonr(X, Y[:, i])
# corrs.append(rho)
# print(np.mean(corrs)**2)

elif method == 'APTModel':

raw_data = data.values
print(raw_data.shape)

m = 3
n = raw_data.shape[1]
a = raw_data[:, n-m:n]
print(a.shape)

# b = raw_data[:, 5]
# n = raw_data.shape[1] - 1
# # the market factor
# X = raw_data[:, 3]
# # the single stock
# Y = raw_data[:, 0]
#
# a = X
# n = len(a)
# a.shape = (n, 1)
for i in range(n-m):
b = raw_data[:, i]
p, res, rnk, s = lstsq(a, b)
print(p, res, rnk, s)

82 changes: 82 additions & 0 deletions correlationMatrix/utils/dataset_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,85 @@ def multivariate_normal(correlationmatrix, sample):
data = np.random.multivariate_normal(mean, correlationmatrix.matrix, sample)
columns = ['S' + str(i) for i in range(0, correlationmatrix.dimension)]
return pd.DataFrame(data, columns=columns)


def capm_model(n, b, sample):
"""
Generate samples from a stylized CAPM model
Suitable for testing estimation algorithms
The data format is a table with columns per entity ID and a final market factor
:param int n: The number of distinct entities to simulate
:param list: A list of loading to the market factor
:param int sample: The number of samples to simulate
:rtype: pandas dataframe
.. note:: This emulates typical downloads of data from market data API's
"""
if len(b) != n:
print("Inconsistent input values, length of loadings matrix differs from simulated entities")
exit()

errors = np.random.normal(loc=0.0, scale=1.0, size=(n, sample))
market_factor = np.random.normal(loc=0.0, scale=1.0, size=(1, sample))
returns = np.outer(b, market_factor) + errors
print(returns.shape)
print(market_factor.shape)
print(returns.shape)
data = np.append(returns, market_factor, 0)
print(data.shape)
columns = ['S' + str(i) for i in range(0, n)]
columns.append('Market Factor')
print(columns)
print(data[:, :5])
df = pd.DataFrame(data.transpose(), columns=columns)
return df


def apt_model(n, b, m, rho, sample):
"""
Generate samples from a stylized APT model
Suitable for testing estimation algorithms
The data format is a table with columns per entity ID and market factors
:param int n: The number of distinct entities to simulate
:param list b: A list of loading to the market factor
:param int m: The number of market factors
:param rho: The correlation matrix to use for the simulation of the market factors
:param int sample: The number of samples to simulate
:rtype: pandas dataframe
.. note:: This emulates typical downloads of data from market data API's
"""
if len(b) != m:
print("Inconsistent input values, length of loadings matrix differs from simulated market factors")
exit()

if rho.dimension != m:
print("Inconsistent input values, dimension of factor correlation matrix differs from simulated factors")
exit()

mean = np.zeros(rho.dimension)
market_factors = np.random.multivariate_normal(mean, rho.matrix, sample).transpose()

errors = np.random.normal(loc=0.0, scale=1.0, size=(n, sample))

print(market_factors.shape)
print(errors.shape)

returns = np.dot(b, market_factors) + errors
print(returns.shape)

data = np.append(returns, market_factors, 0)
print(data.shape)
columns = ['S' + str(i) for i in range(0, n)] + ['F' + str(i) for i in range(0, m)]
print(columns)
print(data[:, :5])
df = pd.DataFrame(data.transpose(), columns=columns)
return df
Loading

0 comments on commit 650c3c9

Please sign in to comment.