sql:
def SubDependentDataSql(self, DatabaseName, TableName, DependentVariable, IndependentVariable): """ DependentVariable 因变量 IndependentVariable 自变量 得到因变量的数据子数据: { N 均值, 标准差等 } """ subdependentdatasql = """ select * from ( select repDB.`{}` as `{}` ,COUNT(1) as N ,SUM(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as SUM ,STDDEV_SAMP(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as STD ,MAX(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as MAX ,MIN(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as MIN ,AVG(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as AVG from {}.{} as repDB where trim(Ifnull(repDB.`{}`, '')) <> '' AND trim(Ifnull(repDB.`{}`, '')) <> '' group by repDB.`{}` ) as SUB order by CONVERT(SUB.`{}` , SIGNED) """.format( DependentVariable, DependentVariable, IndependentVariable, IndependentVariable, IndependentVariable, IndependentVariable, IndependentVariable, DatabaseName, TableName, IndependentVariable, DependentVariable, DependentVariable, DependentVariable)
求中值前先查数据sql:
def MedianValueDataSql(self, DatabaseName, TableName, DependentVariable, IndependentVariable, optionID): """ 求中值sql, 因变量有几个optionid, 就需要循环这条sql几遍 :return: """ medianvaluedatasql = """ select Ifnull(repDB.`{}`, 0) as `{}` from {}.{} as repDB where trim(Ifnull(repDB.`{}`, '')) <> '' AND trim(Ifnull(repDB.`{}`, '')) = {} ORDER BY `{}`; """.format(IndependentVariable, IndependentVariable, DatabaseName, TableName, IndependentVariable, DependentVariable, optionID, IndependentVariable )
标准误平均值
标准误是样本均值与总体均值的误差估计
标准误=标准差/sqrt(样本量) (标准差就是样本算出来的标准差)