• 华为软挑2020之深度学习题目


    1.1 输入信息

    输入分为三部分:

    l train_data.txt为已经做好特征工程处理的本地训练集文件。每一行为一条数据记录,以逗号分开。最后一列为类别(二分类),前面的列为特征值。

    l test_data.txt 为需要预测的本地测试集文件。特征数和训练集一致。不含类别信息。

    示例代码为准确率和性能待优化的参考代码,支持的语言分别为C++/Python/JAVA

     

    answer.txttest_data.txt的二分类结果,用于练习的时候使用。

    1.1 输出信息

    输出信息为一个文件result.txt,按行顺序放置测试集记录的预测结果,每一行代表一条训练数据的二分类结果。

    1.2 限制条件

    l 选手拿到的训练集和测试集并不是最终判题用的数据。

    示例代码的算法实现为LR(逻辑回归),选手可以将其改为其它的机器学习算法,但程序中定义的输入输出文件路径不能改。

    l 不允许使用外部机器学习库。

    示例代码(LR,逻辑回归)

      1 import math
      2 import datetime
      3 import sys
      4 import numpy as np
      5 
      6 
      7 class LR:
      8     def __init__(self, train_file_name, test_file_name, predict_result_file_name):
      9         self.train_file = train_file_name
     10         self.predict_file = test_file_name
     11         self.predict_result_file = predict_result_file_name
     12         self.max_iters = 760
     13         self.rate = 0.1
     14         self.feats = []
     15         self.labels = []
     16         self.feats_test = []
     17         self.labels_predict = []
     18         self.param_num = 0
     19         self.weight = []
     20 
     21     def loadDataSet(self, file_name, label_existed_flag):
     22         feats = []
     23         labels = []
     24         fr = open(file_name)
     25         lines = fr.readlines()
     26         for line in lines:
     27             temp = []
     28             allInfo = line.strip().split(',')
     29             dims = len(allInfo)
     30             if label_existed_flag == 1:
     31                 for index in range(dims-1):
     32                     temp.append(float(allInfo[index]))
     33                 feats.append(temp)
     34                 labels.append(float(allInfo[dims-1]))
     35             else:
     36                 for index in range(dims):
     37                     temp.append(float(allInfo[index]))
     38                 feats.append(temp)
     39         fr.close()
     40         feats = np.array(feats)
     41         labels = np.array(labels)
     42         return feats, labels
     43 
     44     def loadTrainData(self):
     45         self.feats, self.labels = self.loadDataSet(self.train_file, 1)
     46 
     47     def loadTestData(self):
     48         self.feats_test, self.labels_predict = self.loadDataSet(
     49             self.predict_file, 0)
     50 
     51     def savePredictResult(self):
     52         print(self.labels_predict)
     53         f = open(self.predict_result_file, 'w')
     54         for i in range(len(self.labels_predict)):
     55             f.write(str(self.labels_predict[i])+"
    ")
     56         f.close()
     57 
     58     def sigmod(self, x):
     59         return 1/(1+np.exp(-x))
     60 
     61     def printInfo(self):
     62         print(self.train_file)
     63         print(self.predict_file)
     64         print(self.predict_result_file)
     65         print(self.feats)
     66         print(self.labels)
     67         print(self.feats_test)
     68         print(self.labels_predict)
     69 
     70     def initParams(self):
     71         self.weight = np.ones((self.param_num,), dtype=np.float)
     72 
     73     def compute(self, recNum, param_num, feats, w):
     74         return self.sigmod(np.dot(feats, w))
     75 
     76     def error_rate(self, recNum, label, preval):
     77         return np.power(label - preval, 2).sum()
     78 
     79     def predict(self):
     80         self.loadTestData()
     81         preval = self.compute(len(self.feats_test),
     82                               self.param_num, self.feats_test, self.weight)
     83         self.labels_predict = (preval+0.5).astype(np.int)
     84         self.savePredictResult()
     85 
     86     def train(self):
     87         self.loadTrainData()
     88         recNum = len(self.feats)
     89         self.param_num = len(self.feats[0])
     90         #print(self.param_num)
     91         self.initParams()
     92         ISOTIMEFORMAT = '%Y-%m-%d %H:%M:%S,f'
     93         for i in range(self.max_iters):
     94             preval = self.compute(recNum, self.param_num,
     95                                   self.feats, self.weight)
     96             sum_err = self.error_rate(recNum, self.labels, preval)
     97             if i%30 == 0:
     98                 print("Iters:" + str(i) + " error:" + str(sum_err))
     99                 theTime = datetime.datetime.now().strftime(ISOTIMEFORMAT)
    100                 print(theTime)
    101             err = self.labels - preval
    102             delt_w = np.dot(self.feats.T, err)
    103             delt_w /= recNum
    104             self.weight += self.rate*delt_w
    105 
    106 
    107 def print_help_and_exit():
    108     print("usage:python3 main.py train_data.txt test_data.txt predict.txt [debug]")
    109     sys.exit(-1)
    110 
    111 
    112 def parse_args():
    113     debug = False
    114     if len(sys.argv) == 2:
    115         if sys.argv[1] == 'debug':
    116             print("test mode")
    117             debug = True
    118         else:
    119             print_help_and_exit()
    120     return debug
    121 
    122 
    123 if __name__ == "__main__":
    124     #debug = parse_args()
    125     train_file =  "./data/train_data.txt"
    126     test_file = "./data/test_data.txt"
    127     predict_file = "./data/result.txt"
    128     lr = LR(train_file, test_file, predict_file)
    129     lr.train()
    130     lr.predict()
    131     debug=True
    132 
    133     if debug:
    134         answer_file ="./data/answer.txt"
    135         f_a = open(answer_file, 'r')
    136         f_p = open(predict_file, 'r')
    137         a = []
    138         p = []
    139         lines = f_a.readlines()
    140         for line in lines:
    141             a.append(int(float(line.strip())))
    142         f_a.close()
    143 
    144         lines = f_p.readlines()
    145         for line in lines:
    146             p.append(int(float(line.strip())))
    147         f_p.close()
    148 
    149         print("answer lines:%d" % (len(a)))
    150         print("predict lines:%d" % (len(p)))
    151 
    152         errline = 0
    153         for i in range(len(a)):
    154             if a[i] != p[i]:
    155                 errline += 1
    156 
    157         accuracy = (len(a)-errline)/len(a)
    158         print("accuracy:%f" %(accuracy))
  • 相关阅读:
    理清一下JavaScript面向对象思路
    IE的CSS渲染跟其它浏览器有什么不同
    页面元素的CSS渲染优先级
    push与createElement性能比较
    关于JavaScript的push()函数
    关于JavaScript的沙箱模式
    JavaScript SandBox沙箱设计模式
    用live()方法给新增节点绑定事件
    深入JavaScript对象创建的细节
    Keras class_weight和sample_weight用法
  • 原文地址:https://www.cnblogs.com/ljy1227476113/p/12512228.html
Copyright © 2020-2023  润新知