function [cost, grad] = softCost(X, Y, theta)
	XTheta = X*theta;

	sm = softmax(XTheta);

	smY = sm .* Y;
	m = sum(smY')';

	cost = -sum(log(m));

	grad = X' * (Y - sm);
	grad = grad/norm(grad, 2);
end

function [sm] = softmax(M)
	S = calcS(M);

	sm = exp(M - repmat(S, 1, size(M, 2)));
end

function [S] = calcS(XTheta)
	% XTheta in n * k
	c = max(XTheta')'; % Find maximum for each row (in n * 1).

	res1 = XTheta - repmat(c, 1, size(XTheta, 2));

	res2 = exp(res1)';

	res3 = sum(res2)';

	S = log(res3) + c;
end
