• Intel DAAL AI加速 ——传统决策树和随机森林


    # file: dt_cls_dense_batch.py
    #===============================================================================
    # Copyright 2014-2018 Intel Corporation.
    #
    # This software and the related documents are Intel copyrighted  materials,  and
    # your use of  them is  governed by the  express license  under which  they were
    # provided to you (License).  Unless the License provides otherwise, you may not
    # use, modify, copy, publish, distribute,  disclose or transmit this software or
    # the related documents without Intel's prior written permission.
    #
    # This software and the related documents  are provided as  is,  with no express
    # or implied  warranties,  other  than those  that are  expressly stated  in the
    # License.
    #===============================================================================
    
    ## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a>
    ## example dt_cls_dense_batch.py
    
    import os
    import sys
    
    from daal.algorithms.decision_tree.classification import prediction, training
    from daal.algorithms import classifier
    from daal.data_management import (
        FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
    )
    utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
    if utils_folder not in sys.path:
        sys.path.insert(0, utils_folder)
    from utils import printNumericTables
    
    DAAL_PREFIX = os.path.join('..', 'data')
    
    # Input data set parameters
    trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')
    pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')
    testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv')
    
    nFeatures = 5
    nClasses = 5
    
    # Model object for the decision tree classification algorithm
    model = None
    predictionResult = None
    testGroundTruth = None
    
    
    def trainModel():
        global model
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
        trainDataSource = FileDataSource(
            trainDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for training data and labels
        trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(trainData, trainGroundTruth)
    
        # Retrieve the data from the input file
        trainDataSource.loadDataBlock(mergedData)
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
        pruneDataSource = FileDataSource(
            pruneDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for pruning data and labels
        pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
    
        # Retrieve the data from the input file
        pruneDataSource.loadDataBlock(pruneMergedData)
    
        # Create an algorithm object to train the decision tree classification model
        algorithm = training.Batch(nClasses)
    
        # Pass the training data set and dependent values to the algorithm
        algorithm.input.set(classifier.training.data, trainData)
        algorithm.input.set(classifier.training.labels, trainGroundTruth)
        algorithm.input.setTable(training.dataForPruning, pruneData)
        algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)
    
        # Train the decision tree classification model and retrieve the results of the training algorithm
        trainingResult = algorithm.compute()
        model = trainingResult.get(classifier.training.model)
    
    def testModel():
        global testGroundTruth, predictionResult
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
        testDataSource = FileDataSource(
            testDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for testing data and labels
        testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(testData, testGroundTruth)
    
        # Retrieve the data from input file
        testDataSource.loadDataBlock(mergedData)
    
        # Create algorithm objects for decision tree classification prediction with the default method
        algorithm = prediction.Batch()
    
        # Pass the testing data set and trained model to the algorithm
        #print("Number of columns: {}".format(testData.getNumberOfColumns()))
        algorithm.input.setTable(classifier.prediction.data,  testData)
        algorithm.input.setModel(classifier.prediction.model, model)
    
        # Compute prediction results and retrieve algorithm results
        # (Result class from classifier.prediction)
        predictionResult = algorithm.compute()
    
    
    def printResults():
    
        printNumericTables(
            testGroundTruth,
            predictionResult.get(classifier.prediction.prediction),
            "Ground truth", "Classification results",
            "Decision tree classification results (first 20 observations):",
            20, flt64=False
        )
    
    if __name__ == "__main__":
    
        trainModel()
        testModel()
        printResults()
    

      

    随机森林的:

    # file: df_cls_dense_batch.py
    #===============================================================================
    # Copyright 2014-2018 Intel Corporation.
    #
    # This software and the related documents are Intel copyrighted  materials,  and
    # your use of  them is  governed by the  express license  under which  they were
    # provided to you (License).  Unless the License provides otherwise, you may not
    # use, modify, copy, publish, distribute,  disclose or transmit this software or
    # the related documents without Intel's prior written permission.
    #
    # This software and the related documents  are provided as  is,  with no express
    # or implied  warranties,  other  than those  that are  expressly stated  in the
    # License.
    #===============================================================================
    
    ## <a name="DAAL-EXAMPLE-PY-DF_CLS_DENSE_BATCH"></a>
    ## example df_cls_dense_batch.py
    
    import os
    import sys
    
    from daal.algorithms import decision_forest
    from daal.algorithms.decision_forest.classification import prediction, training
    from daal.algorithms import classifier
    from daal.data_management import (
        FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
        MergedNumericTable, features
    )
    
    utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
    if utils_folder not in sys.path:
        sys.path.insert(0, utils_folder)
    from utils import printNumericTable, printNumericTables
    
    DAAL_PREFIX = os.path.join('..', 'data')
    
    # Input data set parameters
    trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_train.csv')
    testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_test.csv')
    
    nFeatures = 3
    nClasses = 5
    
    # Decision forest parameters
    nTrees = 10
    minObservationsInLeafNode = 8
    
    # Model object for the decision forest classification algorithm
    model = None
    predictionResult = None
    testGroundTruth = None
    
    
    def trainModel():
        global model
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
        trainDataSource = FileDataSource(
            trainDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for training data and labels
        trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(trainData, trainGroundTruth)
    
        # Retrieve the data from the input file
        trainDataSource.loadDataBlock(mergedData)
    
        #  Get the dictionary and update it with additional information about data
        dict = trainData.getDictionary()
    
        #  Add a feature type to the dictionary
        dict[0].featureType = features.DAAL_CONTINUOUS
        dict[1].featureType = features.DAAL_CONTINUOUS
        dict[2].featureType = features.DAAL_CATEGORICAL
    
        # Create an algorithm object to train the decision forest classification model
        algorithm = training.Batch(nClasses)
        algorithm.parameter.nTrees = nTrees
        algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
        algorithm.parameter.featuresPerNode = nFeatures
        algorithm.parameter.varImportance = decision_forest.training.MDI
        algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
    
        # Pass the training data set and dependent values to the algorithm
        algorithm.input.set(classifier.training.data, trainData)
        algorithm.input.set(classifier.training.labels, trainGroundTruth)
    
        # Train the decision forest classification model and retrieve the results of the training algorithm
        trainingResult = algorithm.compute()
        model = trainingResult.get(classifier.training.model)
        printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")
        printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ")
    
    def testModel():
        global testGroundTruth, predictionResult
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
        testDataSource = FileDataSource(
            testDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for testing data and labels
        testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(testData, testGroundTruth)
    
        # Retrieve the data from input file
        testDataSource.loadDataBlock(mergedData)
    
        #  Get the dictionary and update it with additional information about data
        dict = testData.getDictionary()
    
        #  Add a feature type to the dictionary
        dict[0].featureType = features.DAAL_CONTINUOUS
        dict[1].featureType = features.DAAL_CONTINUOUS
        dict[2].featureType = features.DAAL_CATEGORICAL
    
        # Create algorithm objects for decision forest classification prediction with the default method
        algorithm = prediction.Batch(nClasses)
    
        # Pass the testing data set and trained model to the algorithm
        algorithm.input.setTable(classifier.prediction.data,  testData)
        algorithm.input.setModel(classifier.prediction.model, model)
    
        # Compute prediction results and retrieve algorithm results
        # (Result class from classifier.prediction)
        predictionResult = algorithm.compute()
    
    
    def printResults():
        printNumericTable(predictionResult.get(classifier.prediction.prediction),"Decision forest prediction results (first 10 rows):",10)
        printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10);
    
    if __name__ == "__main__":
    
        trainModel()
        testModel()
        printResults()
    

      

  • 相关阅读:
    Vue,动画-修改v-前缀
    Vue,动画-使用过度类名实现动画(渐变)
    sqli-labs闯关之21-30关
    sqli-labs闯关之11-20关,第18关有burpsuit的具体使用方法
    sqli-labs闯关之1-10关
    sqli-labs的搭建
    DVWA-SQL注入
    华为NAT配置
    基础过滤工具——ACL控制访问列表(Access Control List)
    DHCP——基于接口地址的池的DHCP
  • 原文地址:https://www.cnblogs.com/bonelee/p/9703150.html
Copyright © 2020-2023  润新知