\[ \frac{ C_{i,D}} {D}\]
\[Info(D)=\sum_{i=1}^{m} p_{i}log_{2}(p_{i})\]
\[Info_{A}(D)=\sum_{j=1}^{v} \frac{D_{j}}{D}\times Info (D_{j})\]
\[Gain(A)=Info(D)Info_{A}(D)\]
\[SplitInfo_{A}(D)=\sum_{j=1}^{v} \frac{D_{j}}{D}\times log_{2}(\frac{D_{j}}{D})\]
\[GainRatio(A) = \frac{Gain(A)}{SplitInfo(A)}\]
\[gini(D)=1\sum_{j=1}^{n}p^{2}j\]
\[gini_{A}(D)=\frac{D_{1}}{D}gini(D_{1})+\frac{D_{2}}{D} gini(D_{1})\]
\[\Delta gini(A)=gini(D)gini_{A}(D)\]
\[gini(D)=1\left ( \frac{9}{14} \right )^{2}\left ( \frac{5}{14} \right )^{2}=0.459\]
\[gini_{income\epsilon \left \{ low,medium \right \}}(D)=\left ( \frac{10}{14} \right )Gini(D_{1})+\left ( \frac{4}{14} \right )Gini(D_{2})\]
\[\frac{10}{14}\left (1\left ( \frac{7}{10} \right )^{2}\left ( \frac{3}{10} \right )^{2} \right )+\frac{4}{14}\left (1\left ( \frac{2}{4} \right )^{2}\left ( \frac{2}{4} \right )^{2} \right )\]
\[=0.443=gini_{income\epsilon \left \{ high \right \}}(D)\]
\[P(B)=\sum_{i=1}^{M}P(BA_{i})P(A_{i})\]
\[P(HX)=\frac{P(XH)P(H)}{P(X)}=P(XH)\times P(H)/P(X)\]
\[P(C_{i}X)=\frac{P(XC_{i})P(C_{i})}{P(X)}\]
\[P(C_{i}X)=P(XC_{i})P(C_{i})\]
needs to be maximized
\[P(XC_{i})=\prod_{k=1}^{n}P(x_{k}C_{i})=P(x_{1}C_{i})\times P(x_{2}C_{i})\times ... \times P(x_{n}C_{i})\]
\[g(x,\mu,\sigma)=\frac{1}{\sqrt{2\pi}\sigma}e^{\frac{(x\mu)^{2}}{2\sigma ^{2}}}\]
\[P(XC_{i})=g(x_{k},\mu_{C_{i}},\sigma_{C_{i}})\]
\[P(XC_{i})=\prod_{k=1}^{n}P(x_{k}C_{i})\]
\[FOILGain=pos{'} \times \left ( log_{2}\frac{pos'}{pos'+neg'}log_{2}\frac{pos}{pos+neg} \right )\]
\[FOILPrune(R)=\frac{posneg}{pos+neg} \right )\]


\[precision=\frac{TP}{TP+FP}\]
\[recall=\frac{TP}{TP+FN}\]
\[F=\frac{2\times precision \times recall}{precision+recall}\]
Actual Class\Predicted class 
cancer = yes 
cancer = no 
Total 
Recognition(%) 

cancer = yes 
90 
210 
300 
30.00 ( sensitivity 

cancer = no 
140 
9560 
9700 
98.56 ( specificity) 

Total 
230 
9770 
10000 
96.40 ( accuracy ) 
Precision = 90/230 = 39.13% Recall = 90/300 = 30.00%
\[Acc(M)=\frac{1}{k}\sum_{i=1}^{k}(0.632 \times Acc(M_{i})_{testset}+0.368 \times Acc(M_{i})_{trainset})\]
\[t=\frac{\bar{err}(M_{1})\bar{err}(M_{2})}{\sqrt{var(M_{1}M_{2})/k}}\]
\[var(M_{1}M_{2})=\frac{1}{k}\sum_{i=1}^{k}\left [err(M_{1})_{i}  err(M_{2})_{i} (\bar{err}(M_{1})\bar{err}(M_{2}))\right ]^{2}\]
\[var(M_{1}M_{2})=\sqrt{\frac{var(M_{1})}{k_{1}}+\frac{var(M_{2})}{k_{2}}}\]
where k1 & k2 are # of crossvalidation samples used for M1 & M2, resp.



Test error (generalization error): the average loss over the test set
Mean absolute error:
\[\frac{\sum_{i=1}^{d}y_{i}y_{i}^{'}}{d}\]
Mean squared error:
\[\frac{\sum_{i=1}^{d}(y_{i}y_{i}^{'})^{2}}{d}\]
Relative absolute error:
\[\frac{\sum_{i=1}^{d}y_{i}y_{i}^{'}}{\sum_{i=1}^{d}y_{i}\bar{y}}\]
Relative squared error:
\[\frac{\sum_{i=1}^{d}(y_{i}y_{i}^{'})^{2}}{\sum_{i=1}^{d}(y_{i}\bar{y})^{2}}\]
The mean squarederror exaggerates the presence of outliers
\[error(M_{i})=\sum_{j}^{d}w_{j}\times err (X_{j})\]
\[log \frac{1error(M_{i})}{error(M_{i})}\]