function [pca_samples, params] = cosmo_pca(samples, retain)
% Principal Component Analysis
%
% [pca_samples,params]=cosmo_pca(samples[,retain])
%
% Input:
% samples M x N numeric matrix
% retain (optional) number of components to retain;
% must be less than or equal to N. Default: N
%
% Output:
% pca_samples M x retain samples in Principal Component
% space, after samples have been centered
% params struct with fields:
% .coef M x retain Principal Component coefficients
% .mu M x 1 column-wise average of samples
% It holds that:
% samples=bsxfun(@plus,params.mu,...
% pca_samples*params.coef')
% .explained 1 x N Percentage of explained variance
%
%
% Examples:
% samples=[ 2.0317 -0.8918 -0.8258;...
% 0.5838 1.8439 1.1656;...
% -1.4437 -0.2617 -1.9207;...
% -0.5177 2.3387 0.4412;...
% 1.1908 -0.2040 -0.2088;...
% -1.3265 2.7235 0.1476];
% %
% % apply PCA, keeping two dimensions
% [pca_samples,params]=cosmo_pca(samples,2);
% %
% % show samples in PC space
% cosmo_disp(pca_samples);
% %|| [ -2.64 0.654
% %|| 0.923 1.43
% %|| -0.723 -2.48
% %|| 1.64 0.265
% %|| -1.46 0.569
% %|| 2.27 -0.438 ]
% %
% % show parameters
% cosmo_disp(params);
% %|| .coef
% %|| [ -0.512 0.744
% %|| 0.794 0.219
% %|| 0.328 0.632 ]
% %|| .mu
% %|| [ 0.0864 0.925 -0.2 ]
% %|| .explained
% %|| [ 66
% %|| 33.3
% %|| 0.676 ]
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
if nargin < 2
retain = [];
end
verify_parameters(samples, retain);
ndim = get_number_of_components(samples, retain);
% subtract mean
mu = mean(samples, 1);
samples_demu = bsxfun(@minus, samples, mu);
% singular value decomposition
[u, s, w] = svd(samples_demu, 'econ');
% extract eigen values
[nrow, ncol] = size(samples);
samples_is_vector = nrow == 1 || ncol == 1;
if samples_is_vector
% single eigen value
eigvals = s(1);
else
% take diagonal
eigvals = diag(s);
end
if ndim == 0
% separate case for zero dimensions
pca_samples = zeros(1, 0);
coef = zeros(ncol, 0);
else
pca_samples_rand_sign = bsxfun(@times, u(:, 1:ndim), eigvals(1:ndim)');
[coef, sgn] = max_abs_positive_columnwise(w(:, 1:ndim));
pca_samples = bsxfun(@times, pca_samples_rand_sign, sgn);
end
% store coefficients
params = struct();
params.coef = coef;
params.mu = mu;
nexpl = min([nrow - 1, ncol]);
if nrow == 1 || ncol == 0
% special case for empty vecto with explained variance
params.explained = zeros(1, 0);
else
explained_ratio = eigvals'.^2;
params.explained = 100 * explained_ratio(1:nexpl) / sum(explained_ratio);
end
function ncomp = get_number_of_components(samples, retain)
max_retain = size(samples, 2);
if isempty(retain)
retain = max_retain;
end
if retain > max_retain
error('retain argument %d must be less than %d', ...
retain, max_retain);
end
[nrow, ncol] = size(samples);
ncomp = min([nrow - 1, ncol, retain]);
function [coef_pos, sgn] = max_abs_positive_columnwise(coef)
% swap sign for each column in which the maximum absolute value is
% negative. sgn contains the sign used in each column (-1 or 1)
[unused, i] = max(abs(coef), [], 1);
[nrows, ncols] = size(coef);
mx_idx = (0:(ncols - 1)) * nrows + i;
mx = coef(mx_idx);
sgn = (mx > 0) * 2 - 1;
coef_pos = bsxfun(@times, coef, sgn);
function verify_parameters(samples, retain)
if ~isnumeric(samples)
error('samples argument must be numeric');
end
if numel(size(samples)) > 2
error('samples argument must be a matrix');
end
if ~(isempty(retain) || ...
(isscalar(retain) && ...
retain > 0 && ...
isequal(round(retain), retain)))
error('retain argument must be positive integer');
end