function [cell_indices, unique_values] = cosmo_index_unique(values)
% index unique (combinations of) elements
%
% [cell_indices, unique_values]=cosmo_index_unique(values)
%
% Input:
% values either:
% - cell with K elements, each of which must be
% either a vector with M elements or a cell with
% M strings (each element in each cell is treated
% as a row); or
% - MxK matrix
%
% Returns:
% cell_indices Ux1 cell, if along the input there are U unique
% combinations of values (element-wise). The K-th
% element has U_K indices in the range 1:M indicating
% the rows in the input have the same value
% unique_values either:
% - Kx1 cell, each with U elements, containing the
% unique combinations of values of the input
% [if the input is a cell]; or
% - UxK cell, containing the unique rows in the input
%
% Examples:
% [i,u]=cosmo_index_unique({[3 2 2 2 1],[3 2 3 3 3]});
% cosmo_disp(i);
% %|| { [ 5 ]
% %|| [ 2 ]
% %|| [ 3
% %|| 4 ]
% %|| [ 1 ] }
% cosmo_disp(u);
% %|| { [ 1 [ 3
% %|| 2 2
% %|| 2 3
% %|| 3 ] 3 ] }
%
% % the same operation in matrix operation (input is transposed)
% [i,u]=cosmo_index_unique([3 2 2 2 1;3 2 3 3 3]');
% cosmo_disp(i);
% %|| { [ 5 ]
% %|| [ 2 ]
% %|| [ 3
% %|| 4 ]
% %|| [ 1 ] }
% cosmo_disp(u);
% %|| [ 1 3
% %|| 2 2
% %|| 2 3
% %|| 3 3 ]
%
% % it also works if (some of the) input contains cell strings
% [i,u]=cosmo_index_unique({{'ccc','bb','bb','bb','a'},...
% [4 3 4 4 4]});
% cosmo_disp(i);
% %|| { [ 5 ]
% %|| [ 2 ]
% %|| [ 3
% %|| 4 ]
% %|| [ 1 ] }
% cosmo_disp(u);
% %|| { { 'a' [ 4
% %|| 'bb' 3
% %|| 'bb' 4
% %|| 'ccc' } 4 ] }
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
return_unique_values = nargout >= 2;
[idxs, input_is_array] = index_unique_per_value(values);
[idxs_sorted, i] = sortrows(idxs);
msk = [true; any(diff(idxs_sorted, 1), 2)];
unq_pos = find(msk);
nidxs = size(idxs, 1);
cell_sizes = diff([unq_pos; (nidxs + 1)]);
% convert to cell representation
% cell_indices=mat2cell(i,cell_sizes,1);
cell_indices = quick_mat2cell_vec(i, cell_sizes);
% assertEqual(cell_indices,cell_indices2);
if isempty(i)
singleton_idx = idxs;
else
singleton_idx = i(unq_pos);
end
if return_unique_values
unique_values = get_unique_values(values, singleton_idx, ...
input_is_array);
end
function c = quick_mat2cell_vec(i, cell_sizes)
n = numel(cell_sizes);
c = cell(n, 1);
pos = 0;
for k = 1:n
ncell = cell_sizes(k);
c{k} = i(pos + (1:ncell));
pos = pos + ncell;
end
function unique_values = get_unique_values(values, first_idx, input_is_array)
if input_is_array
% return matrix
unique_values = values(first_idx, :);
else
% return cell with values
ndim = numel(values);
unique_values = cell(1, ndim);
for k = 1:ndim
vdim = cosmo_slice(values{k}(:), first_idx);
unique_values{k} = vdim;
end
end
function [idxs, input_is_array] = index_unique_per_value(values)
% finds the indices of unique elements for each element
% in values (that must be a cell)
input_is_array = (islogical(values) || isnumeric(values)) && ...
numel(size(values)) == 2;
if input_is_array
ndim = size(values, 2);
elseif iscell(values)
ndim = numel(values);
else
error('input must be matrix or cell');
end
if ndim == 0
% no values, return
idxs = [];
return
end
for k = 1:ndim
if input_is_array
vs = values(:, k);
else
vs = values{k};
end
if numel(vs) == 0
% no values, return
idxs = [];
return
end
idx = unique_indices_from_vector(vs);
% ensure all elements in values have the same size
nv = numel(idx);
if k == 1
nv_first = nv;
% allocate space for output
idxs = zeros(nv, ndim);
else
if nv ~= nv_first
error('element %d has %d values, first has %d', ...
k, nv, nv_first);
end
end
% store indices
idxs(:, k) = idx;
end
function idx = unique_indices_from_vector(vs)
if ~is_1d(vs)
error('element %d is not one-dimensional', k);
end
[unused, unused, idx] = unique(vs);
function tf = is_1d(x)
tf = sum(size(x) > 1) <= 1;