• IsolationForest算法spark实现


    /*
    Notice:
    需要事先将IsolationForest算法源码利用mvn方式jar包,才可以使用import org.apache.spark.ml.iforest.IForest
    scala源代码地址:https://github.com/titicaca/spark-iforest
    
    python库sklearn.ensemble.IsolationForest官方文档地址:
    https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html
    */
    
    import org.apache.spark.ml.feature.StringIndexer
    import org.apache.spark.ml.feature.VectorAssembler
    import org.apache.spark.ml.iforest.IForest
    import org.apache.spark.ml.Pipeline
    import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
    
    
    // Wisconsin Breast Cancer Dataset
    val dataset = (spark.read.option("inferSchema", "true")
                  .csv("/anomaly-detection/breastw.csv"))
    
    
    
    // Index label values: 2 -> 0, 4 -> 1
    val indexer = (new StringIndexer()
                   .setInputCol("_c10")
                   .setOutputCol("label"))
    
    val assembler = (new VectorAssembler()
                        .setInputCols(dataset.columns.filter(!_.contains("label")))
                        .setOutputCol("features"))
    
    val iForest = (new IForest()
                       .setNumTrees(100)
                       .setMaxSamples(256)
                       .setContamination(0.35)
                       .setBootstrap(false)
                       .setMaxDepth(100)
                       .setSeed(123456L))
    
    val pipeline = new Pipeline().setStages(Array(indexer, assembler, iForest))
    
    
    // let's split the dataset into a training and test dataframe
    val Array(trainDF, testDF) = dataset.randomSplit(Array(0.8, 0.2),seed = 123456L)
    
    val model = pipeline.fit(trainDF)
    val predictions = model.transform(testDF)
    
    
    // What was the overall accuracy of the model, using AUC
    val evaluator = (new BinaryClassificationEvaluator()
       .setLabelCol("label")
       .setRawPredictionCol("prediction")
       .setMetricName("areaUnderROC"))
    
    val auc = evaluator.evaluate(predictions)
    println(s"The model's auc: $auc")
    
    /*
    
    scala> val auc = evaluator.evaluate(predictions)
    auc: Double = 0.9311653116531164
    
    scala> println(s"The model's auc: $auc")
    The model's auc: 0.9311653116531164
    
    */

    https://www.liangzl.com/get-article-detail-36344.html

  • 相关阅读:
    MinDoc v0.6 发布,轻量级文档在线管理系统
    PostMessage和SendMessage有什么区别?(有EnumChildWindowsProc的例子)
    将QuickReport报表保存为图片(使用TMetaFile和TMetafileCanvas)
    如何将JPEG缩略图放到LISTVIEW中(delphi listview自绘图形)
    栈和队列
    SQL调优日志--内存问题
    Nancy之实现API
    JAVA和.NET互调用
    NET Core环境并创建运行ASP.NET网站
    React Native
  • 原文地址:https://www.cnblogs.com/wangleBlogs/p/12653935.html
Copyright © 2020-2023  润新知