run bad double dipping analysis skl¶

%% Double dipping

% Warning: this exercise shows the *bad* practice of double dipping
% (also known as circular analysis). You must never, ever use
% results double dipping to interpret results for a real analysis that you
% would publish.

nfeatures = 100;
nsamples_per_class = 200;
nclasses = 2;
niter = 1000;

% compute number of samples
nsamples = nclasses * nsamples_per_class;

% set targets
targets = repmat((1:nclasses)', nsamples_per_class, 1);

% allocate space for output
accuracies = zeros(niter, 2);

for iter = 1:niter
    % generate random gaussian train data of size nsamples x nfeatures
    % assign the result to a variable 'train_data'
    %%%% >>> Your code here <<< %%%%

    % for the double dipping test data, assign 'double_dipping_test_data'
    % to be the same as the training data.
    %
    % *** WARNING ***
    % For real data analyses (that you would publish in a paper) you
    % must never do double dipping analysis - its results are invalid
    % ****************
    %%%% >>> Your code here <<< %%%%

    % for the independent data, generate random gaussian data (of the
    % same size as train_data) and assign to a variable
    % 'independent_test_data'
    %%%% >>> Your code here <<< %%%%

    % compute class labels predictions for both test sets using
    % cosmo_classify_lda. Store the predictions in
    % 'double_dipping_pred' and 'independent_pred', respectively
    %%%% >>> Your code here <<< %%%%

    % compute classification accuracies
    double_dipping_acc = mean(double_dipping_pred == targets);
    independent_acc = mean(independent_pred == targets);

    % store accuracies in the iter-th row of the 'accuracies' matrix
    %%%% >>> Your code here <<< %%%%
end

% show histogram
hist(accuracies, 100);
legend({'double dipping', 'independent'});
run bad double dipping analysis skl¶

Previous topic

Next topic

This Page