function [grad_W, grad_b] = Compute_Gradients(X, Y, P, W, lambda)
d = size(X, 1);
N = size(X, 2);
n_b = 1;
ones_n_b = double(zeros(n_b, 1) + 1);
random_data_indices = randsample(N, n_b, true);
X_Batch = X(:, random_data_indices)
Y_Batch = Y(:, random_data_indices)
P_Batch = P(:, random_data_indices)
G_Batch = -(Y_Batch - P_Batch)
grad_W = (1 / n_b) * G_Batch * X_Batch' + 2 * lambda * W;
grad_b = (1 / n_b) * G_Batch * ones_n_b;
end
function P = Evaluate_Classifier(X, W, b)
N = size(X, 2);
K = size(W, 1);
P = zeros(K, N);
for column = 1:N
s = W * X(:,column) + b;
p = exp(s);
p = p / sum(p);
P(:,column) = p;
end
end