• R语言决策树分类模型


    rm(list=ls())
    gc()
    
    memory.limit(4000)
    library(corrplot)
    library(rpart)
    data_health<-read.csv("D:/smart_data0608/smart_data_section_good_15.txt",header=FALSE,sep="	",na.strings="None")#读健康数据
    data_fault<-read.csv("D:/smart_data0608/smart_data_section_failTrainSet_last24h.txt",header=FALSE,sep="	",na.strings="None")#读故障数据-训练数据
    data_fault_test<-read.csv("D:/smart_data0608/smart_data_section_failTestSet_last24h.txt",header=FALSE,sep="	",na.strings="None")#读故障数据—测试数据
    
    colnames(data_health) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count")  #列改名
    
    colnames(data_fault) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count")  #列改名
    
    colnames(data_fault_test) <- c("id","serial_number","update_time","smart_health_status","current_drive_temperature","drive_trip_temperature","elements_in_grown_defect_list","manufactured_time","cycle_count","load_unload_count","load_unload_count","load_unload_cycles","blocks_sent_to_initiator","blocks_received_from_initiator","blocks_read_from_cache","num_commands_size_not_larger_than_segment_size ","num_commands_size_larger_than_segment_size","num_hours_powered_up","num_minutes_next_test","read_corrected_ecc_fast","read_corrected_ecc_delayed","read_corrected_re","read_total_errors_corrected","read_correction_algo_invocations","read_gigabytes_processed","read_total_uncorrected_errors","write_corrected_ecc_fast","write_corrected_ecc_delayed","write_corrected_re","write_total_errors_corrected","write_correction_algo_invocations","write_gigabytes_processed","write_total_uncorrected_errors","verify_corrected_ecc_fast","verify_corrected_ecc_delayed","verify_corrected_re","verify_total_errors_corrected","verify_correction_algo_invocations","verify_gigabytes_processed","verify_total_uncorrected_errors","non_medium_error_count")  #列改名
    
    data_health$label <- 0
    data_fault$label <- 1
    data_fault_test$label <- 1
    
    #决策树
    n <- nrow(data_fault)
    dataNewTraining<-rbind(data_fault,data_health[sample(1:(nrow(data_health[1:(nrow(data_health)*0.7),])),n*20),])
    dataNewTest<-rbind(data_fault_test,data_health[-(1:(nrow(data_health)*0.7)),])
    
    pdf(file='D:/smart_data0608/smartDT_last24h.pdf',family="GB1")
    dt <- rpart(label~ current_drive_temperature + elements_in_grown_defect_list + read_corrected_ecc_fast + read_corrected_ecc_delayed + read_corrected_re + read_total_errors_corrected + read_correction_algo_invocations + read_gigabytes_processed + read_total_uncorrected_errors + write_corrected_ecc_fast + write_corrected_ecc_delayed + write_corrected_re + write_total_errors_corrected + write_correction_algo_invocations + write_gigabytes_processed + write_total_uncorrected_errors,data = dataNewTraining, method = "class")
    plot(dt,main="smartDT");text(dt)
    dev.off()
    
    rawPredictScore = predict(dt,dataNewTest)
    predictScore <- data.frame(rawPredictScore)
    predictScore$label <- 2
    predictScore[predictScore$X0 > predictScore$X1,][,"label"]=0
    predictScore[predictScore$X0 <= predictScore$X1,][,"label"]=1
    
    write.table(data.frame(predictScore$label,dataNewTest$label,dataNewTest$update_time,dataNewTest$serial_number), file="D:/smart_data0608/smartTestSetWithSerNO_last24h.txt",row.names= F ,col.names= F ,sep="	") 
    

      

    分类结果:

    //smartTestSetWithSerNO_last24h
    健康样本数/健康判为故障样本数:583670/978
    健康磁盘数/健康判为故障磁盘数:4150/12
    健康样本预测率为:0.9983243956345195
    健康盘预测率为:0.9971084337349397
    --------------------------------
    故障样本数/故障判为故障样本数:170/169
    故障磁盘数/故障判为故障磁盘数:11/11
    故障样本预测率为:0.9941176470588236
    故障盘预测率为:1.0


  • 相关阅读:
    【高端黑】软件工程师去理发店
    [SQL]用于提取组内最新数据,左连接,内连接,not exist三种方案中,到底谁最快?
    Oracle数据库访问客户端 sqldeveloper-19.2.1.247.2212-x64 下载
    《木兰辞》中最精彩的六句
    SqlComparison
    别让情绪扰乱心绪
    50年内神秘消失的恒星
    java命名总结
    针对nginx,来具体聊聊正向代理与反向代理 (转载)
    Nginx可以做什么?(转载)
  • 原文地址:https://www.cnblogs.com/xiaodf/p/5027169.html
Copyright © 2020-2023  润新知