"""
Vertical and Horizontal Functional Principal Component Analysis using SRSF
moduleauthor:: Derek Tucker <dtucker@stat.fsu.edu>
"""
import numpy as np
import utility_functions as uf
from scipy.linalg import norm
import matplotlib.pyplot as plt
import plot_style as plot
import collections
[docs]def vertfPCA(fn, time, qn, no=1, showplot=True):
"""
This function calculates vertical functional principal component analysis on aligned data
:param fn: numpy ndarray of shape (M,N) of M aligned functions with N samples
:param time: vector of size N describing the sample points
:param qn: numpy ndarray of shape (M,N) of M aligned SRSF with N samples
:param no: number of components to extract (default = 1)
:param showplot: Shows plots of results using matplotlib (default = T)
:type showplot: bool
:type no: int
:rtype: tuple of numpy ndarray
:return q_pca: srsf principal directions
:return f_pca: functional principal directions
:return latent: latent values
:return coef: coefficients
:return U: eigenvectors
"""
coef = np.arange(-2., 3.)
Nstd = coef.shape[0]
# FPCA
mq_new = qn.mean(axis=1)
N = mq_new.shape[0]
mididx = np.round(time.shape[0] / 2)
m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :]))
mqn = np.append(mq_new, m_new.mean())
qn2 = np.vstack((qn, m_new))
K = np.cov(qn2)
U, s, V = np.linalg.svd(K)
stdS = np.sqrt(s)
# compute the PCA in the q domain
q_pca = np.ndarray(shape=(N + 1, Nstd, no), dtype=float)
for k in xrange(0, no):
for l in xrange(0, Nstd):
q_pca[:, l, k] = mqn + coef[l] * stdS[k] * U[:, k]
# compute the correspondence in the f domain
f_pca = np.ndarray(shape=(N, Nstd, no), dtype=float)
for k in xrange(0, no):
for l in xrange(0, Nstd):
f_pca[:, l, k] = uf.cumtrapzmid(time, q_pca[0:N, l, k] * np.abs(q_pca[0:N, l, k]),
np.sign(q_pca[N, l, k]) * (q_pca[N, l, k] ** 2))
N2 = qn.shape[1]
c = np.zeros((N2, no))
for k in xrange(0, no):
for l in xrange(0, N2):
c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn) * U[:, k])
vfpca_results = collections.namedtuple('vfpca', ['q_pca', 'f_pca', 'latent', 'coef', 'U'])
vfpca = vfpca_results(q_pca, f_pca, s, c, U)
if showplot:
CBcdict = {
'Bl': (0, 0, 0),
'Or': (.9, .6, 0),
'SB': (.35, .7, .9),
'bG': (0, .6, .5),
'Ye': (.95, .9, .25),
'Bu': (0, .45, .7),
'Ve': (.8, .4, 0),
'rP': (.8, .6, .7),
}
cl = sorted(CBcdict.keys())
fig, ax = plt.subplots(2, no)
for k in xrange(0, no):
axt = ax[0, k]
for l in xrange(0, Nstd):
axt.plot(time, q_pca[0:N, l, k], color=CBcdict[cl[l]])
axt.hold(True)
axt.set_title('q domain: PD %d' % (k + 1))
plot.rstyle(axt)
axt = ax[1, k]
for l in xrange(0, Nstd):
axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]])
axt.hold(True)
axt.set_title('f domain: PD %d' % (k + 1))
plot.rstyle(axt)
fig.set_tight_layout(True)
cumm_coef = 100 * np.cumsum(s) / sum(s)
idx = np.arange(0, N + 1) + 1
plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage")
plt.xlabel("Percentage")
plt.ylabel("Index")
plt.show()
return vfpca
[docs]def horizfPCA(gam, time, no, showplot=True):
"""
This function calculates horizontal functional principal component analysis on aligned data
:param gam: numpy ndarray of shape (M,N) of M warping functions
:param time: vector of size N describing the sample points
:param no: number of components to extract (default = 1)
:param showplot: Shows plots of results using matplotlib (default = T)
:type showplot: bool
:type no: int
:rtype: tuple of numpy ndarray
:return q_pca: srsf principal directions
:return f_pca: functional principal directions
:return latent: latent values
:return coef: coefficients
:return U: eigenvectors
"""
# Calculate Shooting Vectors
mu, gam_mu, psi, vec = uf.SqrtMean(gam)
tau = np.arange(1, 6)
TT = time.shape[0]
# TFPCA
K = np.cov(vec)
U, s, V = np.linalg.svd(K)
vm = vec.mean(axis=1)
gam_pca = np.ndarray(shape=(tau.shape[0], mu.shape[0] + 1, no), dtype=float)
psi_pca = np.ndarray(shape=(tau.shape[0], mu.shape[0], no), dtype=float)
for j in xrange(0, no):
for k in tau:
v = (k - 3) * np.sqrt(s[j]) * U[:, j]
vn = norm(v) / np.sqrt(TT)
if vn < 0.0001:
psi_pca[k-1, :, j] = mu
else:
psi_pca[k-1, :, j] = np.cos(vn) * mu + np.sin(vn) * v / vn
tmp = np.zeros(TT)
tmp[1:TT] = np.cumsum(psi_pca[k-1, :, j] * psi_pca[k-1, :, j])
gam_pca[k-1, :, j] = tmp / np.double(TT)
hfpca_results = collections.namedtuple('hfpca', ['gam_pca', 'psi_pca', 'latent', 'U'])
hfpca = hfpca_results(gam_pca, psi_pca, s, U)
if showplot:
CBcdict = {
'Bl': (0, 0, 0),
'Or': (.9, .6, 0),
'SB': (.35, .7, .9),
'bG': (0, .6, .5),
'Ye': (.95, .9, .25),
'Bu': (0, .45, .7),
'Ve': (.8, .4, 0),
'rP': (.8, .6, .7),
}
fig, ax = plt.subplots(1, no)
for k in xrange(0, no):
axt = ax[k]
axt.set_color_cycle(CBcdict[c] for c in sorted(CBcdict.keys()))
tmp = gam_pca[:, :, k]
axt.plot(np.linspace(0, 1, TT), tmp.transpose())
axt.set_title('PD %d' % (k + 1))
axt.set_aspect('equal')
plot.rstyle(axt)
fig.set_tight_layout(True)
cumm_coef = 100 * np.cumsum(s) / sum(s)
idx = np.arange(0, TT-1) + 1
plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage")
plt.xlabel("Percentage")
plt.ylabel("Index")
plt.show()
return hfpca