time | Calls | line |
---|
< 0.01 | 1 | 1 | classdef Optimizer_cpu
|
| | 2 | properties
|
| | 3 | C;
|
| | 4 |
|
| | 5 | nclass;
|
| | 6 | nfeature;
|
| | 7 | nsample;
|
| | 8 |
|
| | 9 | P;
|
| | 10 | mu;
|
| | 11 | Sigma;
|
| | 12 | end
|
| | 13 | methods
|
| | 14 | function [o] = split_until_all_classes_within_confidence_interval(o, samples)
|
| | 15 | o.nclass = 1;
|
| | 16 | [o.nfeature, o.nsample] = size(samples);
|
| | 17 |
|
| | 18 | o.P = ones(1, 1, 'single');
|
| | 19 | o.mu = mean(samples, 2);
|
| | 20 | o.Sigma = cov(samples');
|
| | 21 |
|
| | 22 | o.C = config_cpu();
|
| | 23 |
|
| | 24 | while o.nclass < o.C.MAX_CLASSES
|
| | 25 | tsplit = toc;
|
| | 26 | pdfs = pdfs_from_samples(o, samples);
|
| | 27 | weights = weights_from_pdfs(o, pdfs);
|
| | 28 | scores = score(o, weights, samples);
|
| | 29 |
|
| | 30 | [iclass,ifeature,worst_score] = determine_least_fitting_class(o, scores);
|
| | 31 | if o.nclass >= o.C.MIN_CLASSES && worst_score <= 1.0
|
| | 32 | break
|
| | 33 | end
|
| | 34 | o = split_class(o, iclass, ifeature, worst_score);
|
| | 35 | [o, niteration] = MoGEM(o, samples);
|
| | 36 | fprintf('%-10s %10.6f %10.6f score %7.2f class %2d feature %d nMoGEMIteration %4lu\n', 'SPLIT', tsplit, toc, worst_score, iclass-1, ifeature-1, niteration);
|
| | 37 | end
|
| | 38 | end
|
| | 39 |
|
| | 40 | function [pdfs] = pdfs_from_samples(o, samples)
|
| | 41 | pdfs = zeros(o.nclass, o.nsample, 'single');
|
| | 42 | for i = 1:o.nclass
|
| | 43 | mu_deviation = samples - repmat(o.mu(:,i), 1, o.nsample);
|
| | 44 | qmd = sum(mu_deviation .* (o.Sigma(:,:,i)\mu_deviation), 1);
|
| | 45 |
|
| | 46 | ldS = 0.5*log(abs(det(o.Sigma(:,:,i))));
|
| | 47 | lC = log(o.P(i)) - (o.nfeature/2)*log(2*pi);
|
| | 48 |
|
| | 49 | pdfs(i,:) = exp(lC+(qmd/-2)-ldS);
|
| | 50 | end
|
| | 51 | pdfs = pdfs .* isfinite(pdfs);
|
| | 52 | end
|
| | 53 |
|
| | 54 | function [weights] = weights_from_pdfs(o, pdfs)
|
| | 55 | samplewise_total = sum(pdfs,1);
|
| | 56 | samplewise_total(find(not(isfinite(samplewise_total(:))))) = inf;
|
| | 57 | weights = pdfs ./ repmat(samplewise_total, o.nclass, 1);
|
| | 58 | weights(find(not(isfinite(weights(:))))) = 0;
|
| | 59 | end
|
| | 60 |
|
| | 61 | function [scores] = score(o, weights, samples)
|
| | 62 | cl = 1 - (1-o.C.CONFIDENCE_LEVEL)/(o.nfeature*o.nclass);
|
| | 63 | threshold = chi2inv(cl, (o.C.NPROBABILITY_BINS - o.C.DOFS));
|
| | 64 |
|
| | 65 | classwise_share = sum(weights, 2);
|
| | 66 | scores = zeros(o.nfeature, o.nclass, 'single');
|
| | 67 | for j = 1:o.nclass
|
| | 68 | expected_distribution = classwise_share(j)/o.C.NPROBABILITY_BINS;
|
| | 69 | for i = 1:o.nfeature
|
| | 70 | edges = single(norminv((0:o.C.NPROBABILITY_BINS)/o.C.NPROBABILITY_BINS, o.mu(i,j), sqrt(o.Sigma(i,i,j))));
|
| | 71 | [~,~,bin_membership] = histcounts(samples(i,:), edges);
|
| | 72 | weight_bins = zeros(o.C.NPROBABILITY_BINS, 1, 'single');
|
| | 73 | for b = 1:o.C.NPROBABILITY_BINS
|
| | 74 | weight_bins(b) = sum((bin_membership==b) .* weights(j,:));
|
| | 75 | end
|
| | 76 | scores(i,j) = sum((weight_bins-expected_distribution).^2/expected_distribution) / threshold;
|
| | 77 | end
|
| | 78 | end
|
| | 79 | end
|
| | 80 |
|
| | 81 | function [iclass, ifeature, worst_score] = determine_least_fitting_class(o, scores)
|
| | 82 | [worst_score,iscore] = max(scores(:));
|
| | 83 | iclass = floor((iscore-1) / o.nfeature) + 1;
|
| | 84 | ifeature = mod(iscore-1, o.nfeature) + 1;
|
| | 85 | end
|
| | 86 |
|
| | 87 | function [o] = split_class(o, iclass, ifeature, worst_score)
|
| | 88 | o.nclass = o.nclass + 1;
|
| | 89 | o.P(o.nclass) = o.P(iclass) / 2;
|
| | 90 | o.P(iclass) = o.P(iclass) / 2;
|
| | 91 |
|
| | 92 | mean_shift = zeros(o.nfeature, 1, 'single');
|
| | 93 | mean_shift(ifeature) = 0.5*sqrt(o.Sigma(ifeature,ifeature,iclass));
|
| | 94 | o.mu(:,o.nclass) = o.mu(:,iclass) + mean_shift;
|
| | 95 | o.mu(:,iclass) = o.mu(:,iclass) - mean_shift;
|
| | 96 |
|
| | 97 | o.Sigma(:,:,o.nclass) = o.Sigma(:,:,iclass);
|
| | 98 | end
|
| | 99 |
|
| | 100 | function [o, i] = MoGEM(o, samples)
|
| | 101 | log_likelihood_prev = 0.0;
|
| | 102 | for i = 1:o.C.MAX_EM_ITERATIONS
|
| | 103 | pdfs = pdfs_from_samples(o, samples);
|
| | 104 | weights = weights_from_pdfs(o, pdfs);
|
| | 105 | o = maximization(o, weights, samples);
|
| | 106 |
|
| | 107 | samplewise_total = sum(pdfs,1);
|
| | 108 | log_likelihood = sum(log((samplewise_total>0) .* samplewise_total));
|
| | 109 | if i >= o.C.MIN_EM_ITERATIONS && abs((log_likelihood_prev-log_likelihood) / log_likelihood) <= o.C.EM_EPSILON
|
| | 110 | break;
|
| | 111 | end
|
| | 112 | log_likelihood_prev = log_likelihood;
|
| | 113 | end
|
| | 114 | end
|
| | 115 |
|
| | 116 | function [o] = maximization(o, weights, samples)
|
| | 117 | o.P = mean(weights,2);
|
| | 118 | for i = 1:o.nclass
|
| | 119 | weight_share = o.P(i) * o.nsample;
|
| | 120 | if weight_share >= 2
|
| | 121 | w = repmat(weights(i,:), o.nfeature, 1);
|
| | 122 | o.mu(:,i) = sum(w.*samples,2) / weight_share;
|
| | 123 | mu_deviation = samples - repmat(o.mu(:,i), 1, o.nsample);
|
| | 124 | o.Sigma(:,:,i) = (mu_deviation * (w.*mu_deviation)') / weight_share;
|
| | 125 | else
|
| | 126 | fprintf('%-10s %10.6f %10.6f class %d\n', 'SINGULAR', 0, 0, i-1);
|
| | 127 | o.P(i) = 1/o.nclass;
|
| | 128 | o.mu(:,i) = samples(:,1+round(N-1)*rand);
|
| | 129 | o.Sigma(:,:,i) = cov(samples') / o.nclass;
|
| | 130 | end
|
| | 131 | end
|
| | 132 | end
|
| | 133 | end
|
| | 134 | end
|