function [grad_W, grad_b] = Compute_Gradients(X, Y, P, W, lambda) d = size(X, 1); N = size(X, 2); n_b = 1; ones_n_b = double(zeros(n_b, 1) + 1); random_data_indices = randsample(N, n_b, true); X_Batch = X(:, random_data_indices) Y_Batch = Y(:, random_data_indices) P_Batch = P(:, random_data_indices) G_Batch = -(Y_Batch - P_Batch) grad_W = (1 / n_b) * G_Batch * X_Batch' + 2 * lambda * W; grad_b = (1 / n_b) * G_Batch * ones_n_b; end function P = Evaluate_Classifier(X, W, b) N = size(X, 2); K = size(W, 1); P = zeros(K, N); for column = 1:N s = W * X(:,column) + b; p = exp(s); p = p / sum(p); P(:,column) = p; end end