查找有关疾病风险评分的代码,找到了一个关于银行信贷的R包,不知道行不行得通,日后再细看
#这一步需要安装N多个包 library(devtools) devtools::install_github("ayhandis/creditR") library(creditR) ls("package:creditR") data("germancredit") str(germancredit) head(germancredit) sample_data <- germancredit[,c("duration.in.month","credit.amount", "installment.rate.in.percentage.of.disposable.income", "age.in.years","creditability")] sample_data$creditability <- ifelse(sample_data$creditability == "bad",1,0) missing_ratio(sample_data) traintest <- train_test_split(sample_data,123,0.70) train <- traintest$train test <- traintest$test woerules <- woe.binning(df = train,target.var = "creditability",pred.var = train,event.class = 1) train_woe <- woe.binning.deploy(train, woerules, add.woe.or.dum.var='woe') train_woe <- woe.get.clear.data(train_woe,default_flag = "creditability",prefix = "woe") test_woe <- woe.binning.deploy(test, woerules, add.woe.or.dum.var='woe') test_woe <- woe.get.clear.data(test_woe,default_flag = "creditability",prefix = "woe") IV.calc.data(train_woe,"creditability") Gini.univariate.data(train_woe,"creditability") eliminated_data <- Gini_elimination(train_woe,"creditability",0.10) str(eliminated_data) clustering_data <- variable.clustering(eliminated_data,"creditability", 2) clustering_data selected_data <- variable.clustering.gini(eliminated_data,"creditability", 2) correlation.cluster(eliminated_data,clustering_data,variables = "variable",clusters = "Group") model= glm(formula = creditability ~ ., family = binomial(link = "logit"), data = eliminated_data) summary(model) woe.glm.feature.importance(eliminated_data,model,"creditability") ms_train_data <- cbind(eliminated_data,model$fitted.values) ms_test_data <- cbind(test_woe[,colnames(eliminated_data)], predict(model,type = "response", newdata = test_woe)) colnames(ms_train_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned", "woe.installment.rate.in.percentage.of.disposable.income.binned", "creditability","PD") colnames(ms_test_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned", "woe.installment.rate.in.percentage.of.disposable.income.binned", "creditability","PD") regression_calibration <- regression.calibration(model,test_woe,"creditability") regression_calibration$calibration_data regression_calibration$calibration_model regression_calibration$calibration_formula master_scale <- master.scale(ms_train_data,"creditability","PD") master_scale ms_train_data$Score = log(ms_train_data$PD/(1-ms_train_data$PD)) ms_test_data$Score = log(ms_test_data$PD/(1-ms_test_data$PD)) bayesian_method <- bayesian.calibration(data = master_scale,average_score ="Score",total_observations = "Total.Observations",PD = "PD",central_tendency = 0.05,calibration_data = ms_train_data,calibration_data_score ="Score") bayesian_method$Calibration.model bayesian_method$Calibration.formula scaled.score(bayesian_method$calibration_data, "calibrated_pd", 3000, 15) vif.calc(model) Gini(model$fitted.values,ms_train_data$creditability) k.fold.cross.validation.glm(ms_train_data,"creditability",5,1) Kolmogorov.Smirnov(ms_train_data,"creditability","PD") Kolmogorov.Smirnov(ms_test_data,"creditability","PD") SSI.calc.data(train_woe,test_woe,"creditability") Herfindahl.Hirschman.Index(master_scale,"Total.Observations") Anchor.point(master_scale,"PD","Total.Observations",0.30) chisquare.test(master_scale,"PD","Bad.Count","Total.Observations",0.90) master_scale$DR <- master_scale$Bad.Count/master_scale$Total.Observations Binomial.test(master_scale,"Total.Observations","PD","DR",0.90,"one")
https://www.mediecogroup.com/news_detail/514/1/
https://www.mediecogroup.com/method_topic_article_detail/281/?ty=methods
https://www.mediecogroup.com/method_topic_article_detail/296/?ty=methods
https://blog.csdn.net/tMb8Z9Vdm66wH68VX1/article/details/89369428
https://www.analyticsvidhya.com/blog/2019/03/introduction-creditr-r-package-enhance-credit-risk-scoring-validation-r-codes/