cosmo index unique

function [cell_indices, unique_values] = cosmo_index_unique(values)
    % index unique (combinations of) elements
    %
    % [cell_indices, unique_values]=cosmo_index_unique(values)
    %
    % Input:
    %   values              either:
    %                       - cell with K elements, each of which must be
    %                         either a vector with M elements or a cell with
    %                         M strings (each element in each cell is treated
    %                         as a row); or
    %                       - MxK matrix
    %
    % Returns:
    %   cell_indices        Ux1 cell, if along the input there are U unique
    %                       combinations of values (element-wise). The K-th
    %                       element has U_K indices in the range 1:M indicating
    %                       the rows in the input have the same value
    %   unique_values       either:
    %                       - Kx1 cell, each with U elements, containing the
    %                         unique combinations of values of the input
    %                         [if the input is a cell]; or
    %                       - UxK cell, containing the unique rows in the input
    %
    % Examples:
    %     [i,u]=cosmo_index_unique({[3 2 2 2 1],[3 2 3 3 3]});
    %     cosmo_disp(i);
    %     %|| { [ 5 ]
    %     %||   [ 2 ]
    %     %||   [ 3
    %     %||     4 ]
    %     %||   [ 1 ] }
    %     cosmo_disp(u);
    %     %|| { [ 1    [ 3
    %     %||     2      2
    %     %||     2      3
    %     %||     3 ]    3 ] }
    %
    %     % the same operation in matrix operation (input is transposed)
    %     [i,u]=cosmo_index_unique([3 2 2 2 1;3 2 3 3 3]');
    %     cosmo_disp(i);
    %     %|| { [ 5 ]
    %     %||   [ 2 ]
    %     %||   [ 3
    %     %||     4 ]
    %     %||   [ 1 ] }
    %     cosmo_disp(u);
    %     %|| [ 1         3
    %     %||   2         2
    %     %||   2         3
    %     %||   3         3 ]
    %
    %     % it also works if (some of the) input contains cell strings
    %     [i,u]=cosmo_index_unique({{'ccc','bb','bb','bb','a'},...
    %                                 [4 3 4 4 4]});
    %     cosmo_disp(i);
    %     %|| { [ 5 ]
    %     %||   [ 2 ]
    %     %||   [ 3
    %     %||     4 ]
    %     %||   [ 1 ] }
    %     cosmo_disp(u);
    %     %|| { { 'a'      [ 4
    %     %||     'bb'       3
    %     %||     'bb'       4
    %     %||     'ccc' }    4 ] }
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #

    return_unique_values = nargout >= 2;

    [idxs, input_is_array] = index_unique_per_value(values);

    [idxs_sorted, i] = sortrows(idxs);

    msk = [true; any(diff(idxs_sorted, 1), 2)];
    unq_pos = find(msk);

    nidxs = size(idxs, 1);
    cell_sizes = diff([unq_pos; (nidxs + 1)]);

    % convert to cell representation
    % cell_indices=mat2cell(i,cell_sizes,1);
    cell_indices = quick_mat2cell_vec(i, cell_sizes);
    % assertEqual(cell_indices,cell_indices2);

    if isempty(i)
        singleton_idx = idxs;
    else
        singleton_idx = i(unq_pos);
    end

    if return_unique_values
        unique_values = get_unique_values(values, singleton_idx, ...
                                          input_is_array);
    end

function c = quick_mat2cell_vec(i, cell_sizes)
    n = numel(cell_sizes);
    c = cell(n, 1);
    pos = 0;
    for k = 1:n
        ncell = cell_sizes(k);
        c{k} = i(pos + (1:ncell));
        pos = pos + ncell;
    end

function unique_values = get_unique_values(values, first_idx, input_is_array)
    if input_is_array
        % return matrix
        unique_values = values(first_idx, :);
    else
        % return cell with values
        ndim = numel(values);
        unique_values = cell(1, ndim);
        for k = 1:ndim
            vdim = cosmo_slice(values{k}(:), first_idx);
            unique_values{k} = vdim;
        end
    end

function [idxs, input_is_array] = index_unique_per_value(values)
    % finds the indices of unique elements for each element
    % in values (that must be a cell)
    input_is_array = (islogical(values) || isnumeric(values)) && ...
                        numel(size(values)) == 2;
    if input_is_array
        ndim = size(values, 2);
    elseif iscell(values)
        ndim = numel(values);
    else
        error('input must be matrix or cell');
    end

    if ndim == 0
        % no values, return
        idxs = [];
        return
    end

    for k = 1:ndim
        if input_is_array
            vs = values(:, k);
        else
            vs = values{k};
        end

        if numel(vs) == 0
            % no values, return
            idxs = [];
            return
        end

        idx = unique_indices_from_vector(vs);

        % ensure all elements in values have the same size
        nv = numel(idx);
        if k == 1
            nv_first = nv;

            % allocate space for output
            idxs = zeros(nv, ndim);
        else
            if nv ~= nv_first
                error('element %d has %d values, first has %d', ...
                      k, nv, nv_first);
            end
        end

        % store indices
        idxs(:, k) = idx;
    end

function idx = unique_indices_from_vector(vs)
    if ~is_1d(vs)
        error('element %d is not one-dimensional', k);
    end

    [unused, unused, idx] = unique(vs);

function tf = is_1d(x)
    tf = sum(size(x) > 1) <= 1;