• 说说 ML.NET and AutoML


    经常参加培训讲座。发现最受欢迎的讲座之一是"ML.NET和AutoML的介绍"。ML.NET是一个代码库,可用于创建经典(非神经网络)机器学习预测模型。AutoML 是命令行工具中的非正式术语,可自动为您生成ML.NET代码。

    以下是我使用的两个数据文件,演示ML.NET程序的源代码,以及 AutoML 的 shell 命令。目标是从年龄、工作类型、年收入和工作满意度中预测一个人的性别。

    文件: employees_norm_train.tsv

    isMale	age	job	income	satisfac
    False	0.66	mgmt	0.5210	low
    True	0.35	tech	0.8610	medium
    False	0.24	tech	0.4410	high
    True	0.43	sale	0.5170	medium
    True	0.37	mgmt	0.8860	medium
    True	0.30	sale	0.8790	low
    False	0.40	mgmt	0.2020	medium
    False	0.58	tech	0.2650	low
    True	0.27	mgmt	0.8480	low
    False	0.33	sale	0.5600	medium
    True	0.59	tech	0.2330	high
    True	0.52	sale	0.8700	high
    False	0.41	mgmt	0.5170	medium
    True	0.22	sale	0.3500	high
    False	0.61	sale	0.2980	low
    True	0.46	mgmt	0.6780	medium
    True	0.59	mgmt	0.8430	low
    False	0.28	tech	0.7730	high
    True	0.46	sale	0.8930	medium
    False	0.48	tech	0.2920	medium
    False	0.28	mgmt	0.6690	medium
    False	0.23	sale	0.8970	high
    True	0.60	mgmt	0.6270	high
    True	0.29	sale	0.7760	low
    True	0.24	tech	0.8750	high
    False	0.51	mgmt	0.4090	medium
    True	0.22	sale	0.8910	low
    True	0.19	tech	0.5380	low
    False	0.25	sale	0.9000	high
    True	0.44	tech	0.8980	medium
    True	0.35	mgmt	0.5380	medium
    True	0.29	sale	0.7610	low
    False	0.25	mgmt	0.3450	medium
    False	0.66	mgmt	0.2210	low
    False	0.43	tech	0.7450	medium
    True	0.42	sale	0.8520	medium
    True	0.44	mgmt	0.6580	medium
    False	0.42	sale	0.6970	medium
    True	0.56	tech	0.3680	high
    True	0.38	mgmt	0.2600	low

    文件: employees_norm_test.tsv

    isMale	age	job	income	satisfac
    True	0.50	mgmt	0.5470	medium
    False	0.67	tech	0.3200	low
    False	0.23	sale	0.7510	high
    True	0.18	tech	0.7950	low
    False	0.33	mgmt	0.6210	medium
    True	0.47	sale	0.4650	medium
    True	0.59	sale	0.7420	high
    True	0.51	tech	0.4970	medium
    False	0.33	tech	0.2630	medium
    False	0.35	mgmt	0.8300	high

    文件: GenderMLdotNETProgram.cs

    using System;
    using Microsoft.ML;
    using Microsoft.ML.Data;
    using Microsoft.ML.Trainers;
    namespace GenderMLdotNET
    {
      class GenderMLdotNETProgram
      {
        static void Main(string[] args)
        {
          Console.WriteLine("
    Begin ML.NET gender demo 
    ");
          MLContext mlc = new MLContext(seed: 1);
    
          // 1. load data and create data pipeline
          Console.WriteLine("
    Loading norm data into memory 
    ");
          string trainDataPath =
            "..\..\..\Data\employees_norm_train.tsv";
    
          IDataView trainData =
            mlc.Data.LoadFromTextFile
            (trainDataPath, '	', hasHeader: true);
    
          var a = mlc.Transforms.Categorical.OneHotEncoding(new[]
           { new InputOutputColumnPair("job", "job") });
          var b = mlc.Transforms.Categorical.OneHotEncoding(new[]
            { new InputOutputColumnPair("satisfac", "satisfac") });
          var c = mlc.Transforms.Concatenate("Features", new[]
            { "age", "job", "income", "satisfac" });
          var dataPipe = a.Append(b).Append(c);
    
          Console.WriteLine("Creating logistic regression model");
          var options =
            new LbfgsLogisticRegressionBinaryTrainer.Options()
          {
            LabelColumnName = "isMale",
            FeatureColumnName = "Features",
            MaximumNumberOfIterations = 100,
            OptimizationTolerance = 1e-8f
          };
    
          var trainer =
            mlc.BinaryClassification.Trainers.
            LbfgsLogisticRegression(options);
          var trainPipe = dataPipe.Append(trainer);
          Console.WriteLine("Starting training");
          ITransformer model = trainPipe.Fit(trainData);
          Console.WriteLine("Training complete");
    
          // 3. evaluate model
          IDataView predictions = model.Transform(trainData);
          var metrics = mlc.BinaryClassification.
            EvaluateNonCalibrated(predictions, "isMale", "Score");
          Console.Write("Model accuracy on training data = ");
          Console.WriteLine(metrics.Accuracy.ToString("F4") + "
    ");
    
          // 4. use model
          ModelInput X = new ModelInput();
          X.Age = 0.32f; X.Job = "mgmt"; X.Income = 0.4900f;
          X.Satisfac = "medium";
    
          var pe = mlc.Model.CreatePredictionEngine(model);
          var Y = pe.Predict(X);
          Console.Write("Set age = 32, job = mgmt, income = $49K, ");
          Console.WriteLine("satisfac = medium");
          Console.Write("Predicted isMale : ");
          Console.WriteLine(Y.PredictedLabel);
    
          Console.WriteLine("
    End ML.NET demo ");
          Console.ReadLine();
        } // Main
      } // Program
    
      class ModelOutput
      {
        [ColumnName("predictedLabel")]
        public bool PredictedLabel { get; set; }
    
        [ColumnName("score")]
        public float Score { get; set; }
      }
    
      class ModelInput
      {
        [ColumnName("isMale"), LoadColumn(0)]
        public bool IsMale { get; set; }
    
        [ColumnName("age"), LoadColumn(1)]
        public float Age { get; set; }
    
        [ColumnName("job"), LoadColumn(2)]
        public string Job { get; set; }
    
        [ColumnName("income"), LoadColumn(3)]
        public float Income { get; set; }
    
        [ColumnName("satisfac"), LoadColumn(4)]
        public string Satisfac { get; set; }
      }
    } // ns

    AutoML命令:

    mlnet auto-train ^
    --task binary-classification ^
    --dataset ".Dataemployees_norm_train.tsv" ^
    --test-dataset ".Dataemployees_norm_test.tsv" ^
    --label-column-name isMale ^
    --max-exploration-time 60 ^
    --name PredictGenderAutoML
  • 相关阅读:
    pycharm上运行django服务器端、以及创建app方法
    Python实现淘宝秒杀聚划算自动提醒源码
    Python版:Selenium2.0之WebDriver学习总结_实例1
    windows上使用pip下载东西时报编码错误问题解决方法
    模块购物商城和ATM机代码:
    Python网页信息采集:使用PhantomJS采集淘宝天猫商品内容
    android用户界面之ProgressBar教程实例汇总
    推荐12个亲测Android开发源码(包括应用、游戏、效果等等)
    Android开发各种demo集合
    Android Service
  • 原文地址:https://www.cnblogs.com/BeanHsiang/p/12546140.html
Copyright © 2020-2023  润新知