一元线性回归
[y=a+b x+epsilon
]
[egin{aligned}
Q equiv Q(a, b) &=sum_{i=1}^{n} e_{i}^{2} \
&=sum_{t=1}^{n}left(y_{i}-hat{y}_{i}
ight)^{2} \
&=sum_{i=1}^{n}left(y_{i}-a-b x_{i}
ight)^{2}
end{aligned}
]
[egin{aligned}
a &=ar{y}-b ar{x} \
&=sum_{i=1}^{n}left[frac{1}{n}-frac{ar{x}left(x_{i}-ar{x}
ight)}{sum_{j=1}^{n}left(x_{i}-ar{x}
ight)^{3}}
ight] y_{i}
end{aligned}
]
[egin{aligned}
operatorname{Var}(a) &=sum_{i=1}^{n}left[frac{1}{n}-frac{frac{i}{x}left(x_{i}-ar{x}
ight)}{sum_{j=1}^{n}left(x_{i}-ar{x}
ight)^{2}}
ight]^{2} operatorname{Var}left(y_{i}
ight) \
&=left[frac{1}{n}+frac{ar{x}^{2}}{sum_{i=1}^{n}left(x_{i}-ar{x}
ight)^{2}}
ight] sigma^{2}
end{aligned}
]
[y=eta_{0}+eta_{1} x_{1}+eta_{2} x_{2}+ldots+eta_{p} x_{p}+varepsilon
]
Gauss-Markov条件:
[egin{array}{l}
Eleft(varepsilon_{i}
ight)=0, operatorname{Var}left(varepsilon_{i}
ight)=sigma^{2}, i=1, ldots, n \
operatorname{Cov}left(varepsilon_{i}, varepsilon_{j}
ight)=0, i
eq j, ext { andi }, j=1, ldots, n
end{array}]
正态分布条件:
[left{egin{array}{l}
varepsilon_{i} sim Nleft(0, sigma^{2}
ight) \
varepsilon_{1}, varepsilon_{2}, ldots, varepsilon_{n} quad ext { 相互独立 }
end{array}
ight.]
一元线性的矩阵表示
[y_{i}=eta_{0}+eta_{1} x_{i}+varepsilon_{i}
]
[egin{array}{l}
oldsymbol{y}=left(y_{1}, ldots, y_{n}
ight)^{ op}, quad mathbf{1}=(1, ldots, 1)^{ op} \
oldsymbol{x}=left(x_{1}, ldots, x_{n}
ight)^{ op}, quad oldsymbol{X}=(1, oldsymbol{x})_{n imes 2} \
varepsilon=left(varepsilon_{1}, ldots, varepsilon_{n}
ight)^{ op}, quad oldsymbol{eta}=left(eta_{0}, eta_{1}
ight)^{ op}
end{array}]
[left{egin{array}{l}
y=X eta+varepsilon \
E(varepsilon)=0 \
operatorname{Var}(varepsilon)=sigma^{2} I_{n}
end{array}
ight.]
[Qleft(eta_{0}, eta_{1}
ight)=sum_{i=1}^{n}left(y_{i}-eta_{0}-eta_{1} x_{i}
ight)^{2}
]
[hat{oldsymbol{eta}}=left(hat{eta}_{0}, hat{eta}_{1}
ight)^{ op}
]
[hat{oldsymbol{eta}}=arg min limits_{oldsymbol{eta} in R^{2}} Q(oldsymbol{eta})
]
[left{egin{array}{l}
frac{partial Q}{partial eta_{0}}=sum_{i=1}^{n}y_{i}-eta_{0}-eta_{1} x_{i}=0 \
frac{partial Q}{partial eta_{1}}=x_isum_{i=1}^{n}left(y_{i}-eta_{0}-eta_{1} x_{i}
ight)= 0
end{array}
ight.]
适当化简:
[left{egin{array}{ll}
hat{eta_{0}}+ar{x} hat{eta_{1}} & =ar{y} \
ar{x} hat{eta_{0}}+frac{sum_{i=1}^{n} x_{i}^{2}}{n} hat{eta_{1}} & =frac{sumlimits_{i=1}^{n} x_{i} y_{i}}{n}
end{array}
ight.]
[
ightarrow nar{x}ar{y}-sum x_iy_i=hat{eta_1}(sum x_i^2-nar{x}^2)
]
[S_{xx}=sumlimits_{i=1}^{n}left(x_{i}-ar{x}
ight)^{2}=sumlimits_{i=1}^{n}x_i^2-nar{x}^2, S_{x y}=sumlimits_{i=1}^{n} x_{i} y_{i}-n ar{x} ar{y}
]
[egin{array}{l}
hat{eta}_{0}=ar{y}-hat{eta}_{1} ar{x} \
hat{eta}_{1}=frac{S_{x y}}{S_{x x}}
end{array}]
性质:
线性性:
线性的定义: 关于随机变量$$left{y_{i}, i=1, ldots, n
ight}$$的线性称之为线性估计量
[egin{aligned}
hat{eta}_{1} &=frac{sum x_{i} y_{i}}{sum x_{i}^{2}}=frac{sum x_{i}left(Y_{i}-ar{Y}
ight)}{sum x_{i}^{2}} (加减分离)\
&=frac{sum x_{i} Y_{i}}{sum x_{i}^{2}}-frac{ar{Y} sum x_{i}}{sum x_{i}^{2}}=frac{sum x_{i} Y_{i}}{sum x_{i}^{2}}=KY_iend{aligned}]
(k_{i}=frac{left(x_{i}-ar{x}
ight)}{sum_{i=1}^{n}left(x_{i}-ar{x}
ight)^{2}})
[sum x_{i}=sumleft(X_{i}-ar{X}
ight)^2=sum X_{i}^2-n ar{X}^2=0
]
无偏性:
(ar{x} =frac{1}{n}sum_{i=1}^{n}x_i)
(hat{eta}_{1}=frac{S_{x y}}{S_{x x}}=frac{y_i-ar{y}}{x_i-ar{x}}=eta_{1}(error))
(E(hat{eta}_{1})=E(frac{y_i-ar{y}}{x_i-ar{x}})=E(eta_{1})=eta_{1}(error))
[egin{aligned}
Eleft(hat{eta}_{1}
ight) &=Eleft(sum b_{i} y_{i}
ight)=sum b_{i} Eleft(y_{i}
ight)=sum b_{i}left(eta_{0}+eta_{1} x_{i}
ight) \
&=sum frac{x_{i}-ar{x}}{sumleft(x_{i}-ar{x}
ight)^{2}}left(eta_{0}+eta_{1} x_{i}
ight) \
&=eta_{0} sum frac{x_{i}-ar{x}}{sumleft(x_{i}-ar{x}
ight)^{2}}+eta_{1} sum frac{left(x_{i}-ar{x}
ight) x_{i}}{sumleft(x_{i}-ar{x}
ight)^{2}}=eta_{1}
end{aligned}
]
[egin{array}{l}
operatorname{Var}left(hat{eta}_{1}
ight)=operatorname{Var}left(sum b_{i} y_{i}
ight)=sum operatorname{Var}left(b_{i} y_{i}
ight)=sum b_{i}^{2} operatorname{Var}left(y_{i}
ight)=sumleft(frac{x_{i}-ar{x}}{sumleft(x_{i}-ar{x}
ight)^{2}}
ight)^{2} sigma^{2} \
=sigma^{2} frac{sumleft(x_{i}-ar{x}
ight)^{2}}{left[sumleft(x_{i}-ar{x}
ight)^{2}
ight]^{2}}=frac{sigma^{2}}{sumleft(x_{i}-ar{x}
ight)^{2}}=frac{sigma^{2}}{S_{x x}}
end{array}
]
[egin{aligned}
hat{eta}_{0} &=ar{y}-hat{eta}_{1} ar{x}=frac{1}{n} 1^{prime} oldsymbol{y}-ar{x} oldsymbol{c}^{prime} oldsymbol{y} \
operatorname{Var}left(hat{eta}_{0}
ight) &=operatorname{Var}left(frac{1}{n} mathbf{1}^{prime} oldsymbol{y}
ight)+operatorname{Var}left(ar{x} oldsymbol{c}^{prime} oldsymbol{y}
ight)-2 operatorname{Cov}left(frac{1}{n} mathbf{1}^{prime} oldsymbol{y}, ar{x} oldsymbol{c}^{prime} oldsymbol{y}
ight)
end{aligned}
]
最佳线性无偏估计(BLUE)
Best Linear Unbiased Estimation
对于参数 ( heta) 的一个无偏估计 (hat{ heta}) ,如果对于任何一个它的无偏估计 ( ilde{ heta}), 都有 (operatorname{var}(hat{ heta}) leq operatorname{var}( ilde{ heta}),) 则称 (hat{ heta}) 是 ( heta) 的最佳线性无偏估计。
平方和分解
SST (total sum of squares):总离差平方和
[SST=sum_{i=1}^{n}left(y_{i}-ar{y}
ight)^{2}
]
Tips:$$sum_{i=1}{n}left(y_{i}-ar{y}
ight)2=sum_{i=1}^{n} y_{i}^2-n ar{y}^2$$
SSR(Sum of Squares for regression):回归平方和
[SSR=sum_{i=1}^{n}left(hat{y}_{i}-ar{y}
ight)^{2} =sum(hat{eta_0}+hat{eta_1}x_i-ar{y})=hat{eta_1}^2l_{xx}
]
SSE:
[S S E=sum_{i=1}^{n}left(y_{i}-hat{y}_{i}
ight)^{2}=e_i^2
]
[egin{aligned}
y-ar{y}=(y-hat{y})+(hat{y}-ar{y}) & \
Rightarrow sum(y-y)^{2}=sum(y-hat{y})^{2}+Sigma(hat{y}-ar{y})^{2}+2 Sigma(y-hat{y})(hat{y}-ar{y}) \
ecause Sigma(y-hat{y})(hat{y}-ar{y}) &=Sigma(y-hat{y})(a+b x-ar{y}) \
&=Sigma(y-hat{y})[(a-ar{y})+b x] \
&=(a-ar{y}) Sigma(y-hat{y})+b Sigma(y-hat{y}) x \
&=(a-ar{y}) Sigma(y-a-b x)+b Sigma(y-a-b x) x
end{aligned}
]
根据最小二乘法原理, 有:
[Sigma(y-a-b x)=0, quad Sigma(y-a-b x) x=0
]
( herefore Sigma(y-hat{y})(hat{y}-ar{y})=0)
( herefore Sigma(y-y)^{2}=Sigma(y-hat{y})^{2}+Sigma(hat{y}-ar{y})^{2})
[SST=SSR+SSE
]
OLS的方差
[egin{aligned}
hat{eta}_{0} &=ar{y}-hat{eta}_{1} ar{x}=frac{1}{n} mathbf{1}^{prime} oldsymbol{y}-ar{x} oldsymbol{c}^{prime} oldsymbol{y} \
operatorname{var}left(hat{eta}_{0}
ight) &=operatorname{Var}left(frac{1}{n} mathbf{1}^{prime} oldsymbol{y}
ight)+operatorname{Var}left(ar{x} oldsymbol{c}^{prime} oldsymbol{y}
ight)-2 operatorname{cov}left(frac{1}{n} oldsymbol{1}^{prime} oldsymbol{y}, ar{x} oldsymbol{c}^{prime} oldsymbol{y}
ight)
end{aligned}
]