Intel daal4py demo运行过程

daal安装（记得先安装anaconda）：

git clone https://github.com/IntelPython/daal4py.git
cd daal4py
conda create -n DAAL4PY -c intel -c intel/label/test -c conda-forge python=3.6 mpich cnc tbb-devel daal daal-include cython jinja2 numpy
source activate DAAL4PY
export CNCROOT=$CONDA_PREFIX
export TBBROOT=$CONDA_PREFIX
export DAALROOT=$CONDA_PREFIX
python setup.py build_ext
python setup.py install
# 运行后面的demo

source deactivate DAAL4PY # 退出

注意：安装过程较慢，耐心等待。

随机森林：

#*******************************************************************************
# Copyright 2014-2018 Intel Corporation
# All Rights Reserved.
#
# This software is licensed under the Apache License, Version 2.0 (the
# "License"), the following terms apply:
#
# You may not use this file except in compliance with the License.  You may
# obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
#*******************************************************************************

# daal4py Decision Forest Classification example for shared memory systems

import daal4py as d4p
import numpy as np

# let's try to use pandas' fast csv reader
try:
    import pandas
    read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32).values
except:
    # fall back to numpy loadtxt
    read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32)


def main():
    # input data file
    infile = "./data/batch/df_classification_train.csv"
    testfile = "./data/batch/df_classification_test.csv"

    # Configure a training object (5 classes)
    train_algo = d4p.decision_forest_classification_training(5, nTrees=10, minObservationsInLeafNode=8, featuresPerNode=3, engine = d4p.engines_mt19937(seed=777),
                                                             varImportance='MDI', bootstrap=True, resultsToCompute='computeOutOfBagError')
    
    # Read data. Let's use 3 features per observation
    data   = read_csv(infile, range(3))
    labels = read_csv(infile, range(3,4))
    train_result = train_algo.compute(data, labels)
    # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance

    # Now let's do some prediction
    predict_algo = d4p.decision_forest_classification_prediction(5)
    # read test data (with same #features)
    pdata = read_csv(testfile, range(3))
    plabels = read_csv(testfile, range(3,4))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert(predict_result.prediction.shape == (pdata.shape[0], 1))

    return (train_result, predict_result, plabels)


if __name__ == "__main__":
    (train_result, predict_result, plabels) = main()
    print("
Variable importance results:
", train_result.variableImportance)
    print("
OOB error:
", train_result.outOfBagError)
    print("
Decision forest prediction results (first 10 rows):
", predict_result.prediction[0:10])
    print("
Ground truth (first 10 rows):
", plabels[0:10])
    print('All looks good!')

demo示例数据：

0.00125126,0.563585,8,2,
0.193304,0.808741,12,1,
0.585009,0.479873,6,1,
0.350291,0.895962,13,4,
0.82284,0.746605,11,2,
0.174108,0.858943,12,0,
0.710501,0.513535,10,2,
0.303995,0.0149846,1,2,
0.0914029,0.364452,4,0,
0.147313,0.165899,0,4,
0.988525,0.445692,7,2,
0.119083,0.00466933,0,2,
0.0089114,0.37788,4,2,
0.531663,0.571184,10,3,
0.601764,0.607166,10,4,
0.166234,0.663045,8,4,
0.450789,0.352123,5,3,
0.0570391,0.607685,8,4,
0.783319,0.802606,15,3,
0.519883,0.30195,6,2,
0.875973,0.726676,11,1,
0.955901,0.925718,15,3,
0.539354,0.142338,2,3,
0.462081,0.235328,1,2,
0.862239,0.209601,3,1,
0.779656,0.843654,15,3,
0.996796,0.999695,15,2,
0.611499,0.392438,6,0,
0.266213,0.297281,5,2,
0.840144,0.0237434,3,1,
0.375866,0.0926237,1,0,
0.677206,0.0562151,2,3,
0.00878933,0.91879,12,2,
0.275887,0.272897,5,2,
0.587909,0.691183,10,4,
0.837611,0.726493,11,1,
0.484939,0.205359,1,2,
0.743736,0.468459,6,2,
0.457961,0.949156,13,3,
0.744438,0.10828,2,2,
0.599048,0.385235,6,0,
0.735008,0.608966,10,2,
0.572405,0.361339,6,0,
0.151555,0.225105,0,3,
0.425153,0.802881,13,3,

计算均值方差等统计特征：

#*******************************************************************************

# Copyright 2014-2018 Intel Corporation

# All Rights Reserved.

#

# This software is licensed under the Apache License, Version 2.0 (the

# "License"), the following terms apply:

#

# You may not use this file except in compliance with the License.  You may

# obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

#

# See the License for the specific language governing permissions and

# limitations under the License.

#*******************************************************************************



# daal4py low order moments example for shared memory systems



import daal4py as d4p

import numpy as np



# let's try to use pandas' fast csv reader

try:

    import pandas

    read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float64).values

except:

    # fall back to numpy loadtxt

    read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',', ndmin=2)





def main():

    # read data from file

    file = "./data/batch/covcormoments_dense.csv"

    data = read_csv(file, range(10))



    # compute

    alg = d4p.low_order_moments()

    res = alg.compute(data)



    # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,

    # mean, secondOrderRawMoment, variance, standardDeviation, variation

    assert res.minimum.shape == (1, data.shape[1])

    assert res.maximum.shape == (1, data.shape[1])

    assert res.sum.shape == (1, data.shape[1])

    assert res.sumSquares.shape == (1, data.shape[1])

    assert res.sumSquaresCentered.shape == (1, data.shape[1])

    assert res.mean.shape == (1, data.shape[1])

    assert res.secondOrderRawMoment.shape == (1, data.shape[1])

    assert res.variance.shape == (1, data.shape[1])

    assert res.standardDeviation.shape == (1, data.shape[1])

    assert res.variation.shape == (1, data.shape[1])



    return res





if __name__ == "__main__":

    res = main()

    # print results

    print("
Minimum:
", res.minimum)

    print("
Maximum:
", res.maximum)

    print("
Sum:
", res.sum)

    print("
Sum of squares:
", res.sumSquares)

    print("
Sum of squared difference from the means:
", res.sumSquaresCentered)

    print("
Mean:
", res.mean)

    print("
Second order raw moment:
", res.secondOrderRawMoment)

    print("
Variance:
", res.variance)

    print("
Standard deviation:
", res.standardDeviation)

    print("
Variation:
", res.variation)

    print('All looks good!')

相关阅读:
*滚动条cs
*关于JS动态添加事件
 *JS获取地址栏参数
 *关于Iframe内嵌页面右边总是有空白滚动条的处理
 JAVASCRIPT中使用DOM操作XML文档
 页面间传递变量的方法及使用范围的讨论
 javascript打开模式窗口的用法
 *关于Session*
*NUnit使用
 Eclipse jQuery plugin spket
原文地址：https://www.cnblogs.com/bonelee/p/9881478.html