[P(p=1|x; heta) = h_ heta(x) = frac{exp( heta^Tx)}{1+exp( heta^Tx)} \
P(p=0|x; heta) = 1 - h_ heta(x) = frac{1}{1+exp( heta^Tx)}
]
[egin{align}
L( heta) & = L( heta;X,vec{y}) = p(vec{y}|X;θ) \
& =prod_{i=1}^mp(y^{(i)}|x^{(i)}; heta) \
& =prod_{i=1}^m(h_ heta(x^{(i)}))^{y^{(i)}}(1-h_ heta(x^{(i)}))^{1-y^{(i)}}
end{align}
]
[egin{align}
l( heta) & = log L( heta) \
& = sum_{i=1}^m left[ y^{(i)}log h(x^{(i)}) + (1-y^{(i)})log (1-h(x^{(i)}))
ight] \
& = sum_{i=1}^m left[ y^{(i)}log frac{h(x^{(i)})}{1-h(x^{(i)})} + log (1-h(x^{(i)}))
ight] \
& = sum_{i=1}^m left[ y^{(i)} heta^Tx + log (1-h(x^{(i)}))
ight]
end{align}
]
[frac{partial}{partial heta_j}l( heta) = frac{partial}{partial heta_j}y heta^Tx + frac{partial}{partial heta_j}log(1-h(x))
]
[frac{partial}{partial heta_j}y heta^Tx = yx_j
]
[egin{align}
frac{partial}{partial heta_j}log(1-h(x)) & = frac{1}{1-h(x)}*frac{partial}{partial heta_j}(1-h(x)) \
& = frac{1}{1-h(x)}*(-1)frac{partial}{partial heta_j}h(x) \
& = frac{1}{1-h(x)}*(-1)frac{partial}{partial heta_j} frac{g( heta^Tx)}{1+g( heta^Tx)} \
& = frac{1}{1-h(x)}*(-1)frac{partial}{partial heta_j} frac{g( heta^Tx)}{1+g( heta^Tx)} \
end{align}
]
[egin{align}
frac{partial}{partial heta_j}log(1-h(x)) & = frac{partial log(1-h(x))}{partial (1-h(x))} * frac{partial (1-h(x))}{partial h(x)} * frac{partial h(x)}{partial g( heta^Tx)} * frac{partial g( heta^Tx)}{partial heta^Tx} * frac{partial heta^Tx}{partial heta_j} \
& = frac{1}{1-h( heta(x))} * (-1) * frac{1}{(1-g( heta^Tx))^2} * g( heta^Tx) * x_j \
& = frac{1}{1-h( heta(x))} * (-1) * frac{1}{1-g( heta^Tx)} * frac{g( heta^Tx)}{1-g( heta^Tx)} * x_j \
& = frac{1}{1-h( heta^Tx)} * (-1) * (1-h( heta^Tx) * h( heta^Tx) * x_j \
& = - h( heta^Tx)x_j
end{align}
]
[frac{partial}{partial heta_j}l( heta) = yx_j - h( heta^Tx)x_j = (y-h( heta^Tx))x_j
]