经常参加培训讲座。发现最受欢迎的讲座之一是"ML.NET和AutoML的介绍"。ML.NET是一个代码库,可用于创建经典(非神经网络)机器学习预测模型。AutoML 是命令行工具中的非正式术语,可自动为您生成ML.NET代码。
以下是我使用的两个数据文件,演示ML.NET程序的源代码,以及 AutoML 的 shell 命令。目标是从年龄、工作类型、年收入和工作满意度中预测一个人的性别。
文件: employees_norm_train.tsv
isMale age job income satisfac False 0.66 mgmt 0.5210 low True 0.35 tech 0.8610 medium False 0.24 tech 0.4410 high True 0.43 sale 0.5170 medium True 0.37 mgmt 0.8860 medium True 0.30 sale 0.8790 low False 0.40 mgmt 0.2020 medium False 0.58 tech 0.2650 low True 0.27 mgmt 0.8480 low False 0.33 sale 0.5600 medium True 0.59 tech 0.2330 high True 0.52 sale 0.8700 high False 0.41 mgmt 0.5170 medium True 0.22 sale 0.3500 high False 0.61 sale 0.2980 low True 0.46 mgmt 0.6780 medium True 0.59 mgmt 0.8430 low False 0.28 tech 0.7730 high True 0.46 sale 0.8930 medium False 0.48 tech 0.2920 medium False 0.28 mgmt 0.6690 medium False 0.23 sale 0.8970 high True 0.60 mgmt 0.6270 high True 0.29 sale 0.7760 low True 0.24 tech 0.8750 high False 0.51 mgmt 0.4090 medium True 0.22 sale 0.8910 low True 0.19 tech 0.5380 low False 0.25 sale 0.9000 high True 0.44 tech 0.8980 medium True 0.35 mgmt 0.5380 medium True 0.29 sale 0.7610 low False 0.25 mgmt 0.3450 medium False 0.66 mgmt 0.2210 low False 0.43 tech 0.7450 medium True 0.42 sale 0.8520 medium True 0.44 mgmt 0.6580 medium False 0.42 sale 0.6970 medium True 0.56 tech 0.3680 high True 0.38 mgmt 0.2600 low
文件: employees_norm_test.tsv
isMale age job income satisfac True 0.50 mgmt 0.5470 medium False 0.67 tech 0.3200 low False 0.23 sale 0.7510 high True 0.18 tech 0.7950 low False 0.33 mgmt 0.6210 medium True 0.47 sale 0.4650 medium True 0.59 sale 0.7420 high True 0.51 tech 0.4970 medium False 0.33 tech 0.2630 medium False 0.35 mgmt 0.8300 high
文件: GenderMLdotNETProgram.cs
using System; using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; namespace GenderMLdotNET { class GenderMLdotNETProgram { static void Main(string[] args) { Console.WriteLine(" Begin ML.NET gender demo "); MLContext mlc = new MLContext(seed: 1); // 1. load data and create data pipeline Console.WriteLine(" Loading norm data into memory "); string trainDataPath = "..\..\..\Data\employees_norm_train.tsv"; IDataView trainData = mlc.Data.LoadFromTextFile (trainDataPath, ' ', hasHeader: true); var a = mlc.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("job", "job") }); var b = mlc.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("satisfac", "satisfac") }); var c = mlc.Transforms.Concatenate("Features", new[] { "age", "job", "income", "satisfac" }); var dataPipe = a.Append(b).Append(c); Console.WriteLine("Creating logistic regression model"); var options = new LbfgsLogisticRegressionBinaryTrainer.Options() { LabelColumnName = "isMale", FeatureColumnName = "Features", MaximumNumberOfIterations = 100, OptimizationTolerance = 1e-8f }; var trainer = mlc.BinaryClassification.Trainers. LbfgsLogisticRegression(options); var trainPipe = dataPipe.Append(trainer); Console.WriteLine("Starting training"); ITransformer model = trainPipe.Fit(trainData); Console.WriteLine("Training complete"); // 3. evaluate model IDataView predictions = model.Transform(trainData); var metrics = mlc.BinaryClassification. EvaluateNonCalibrated(predictions, "isMale", "Score"); Console.Write("Model accuracy on training data = "); Console.WriteLine(metrics.Accuracy.ToString("F4") + " "); // 4. use model ModelInput X = new ModelInput(); X.Age = 0.32f; X.Job = "mgmt"; X.Income = 0.4900f; X.Satisfac = "medium"; var pe = mlc.Model.CreatePredictionEngine(model); var Y = pe.Predict(X); Console.Write("Set age = 32, job = mgmt, income = $49K, "); Console.WriteLine("satisfac = medium"); Console.Write("Predicted isMale : "); Console.WriteLine(Y.PredictedLabel); Console.WriteLine(" End ML.NET demo "); Console.ReadLine(); } // Main } // Program class ModelOutput { [ColumnName("predictedLabel")] public bool PredictedLabel { get; set; } [ColumnName("score")] public float Score { get; set; } } class ModelInput { [ColumnName("isMale"), LoadColumn(0)] public bool IsMale { get; set; } [ColumnName("age"), LoadColumn(1)] public float Age { get; set; } [ColumnName("job"), LoadColumn(2)] public string Job { get; set; } [ColumnName("income"), LoadColumn(3)] public float Income { get; set; } [ColumnName("satisfac"), LoadColumn(4)] public string Satisfac { get; set; } } } // ns
AutoML命令:
mlnet auto-train ^ --task binary-classification ^ --dataset ".Dataemployees_norm_train.tsv" ^ --test-dataset ".Dataemployees_norm_test.tsv" ^ --label-column-name isMale ^ --max-exploration-time 60 ^ --name PredictGenderAutoML