• model.train方法的dataset_sink_mode参数设置为False时以step作为单位打印数据——(只在mode=context.GRAPH_MODE下成立,在mode=context.PYNATIVE_MODE模式下不成立)


    如题:

    官方中的内容支持:

    https://www.mindspore.cn/tutorial/training/zh-CN/r1.2/advanced_use/summary_record.html?highlight=sink_mode

    使用summary功能时,建议将model.train方法的dataset_sink_mode参数设置为False,从而以step作为collect_freq参数的单位收集数据。当dataset_sink_modeTrue时,将以epoch作为collect_freq的单位,此时建议手动设置collect_freq参数。collect_freq参数默认值为10

    从官方文档中我们可以知道:

    如果model.train方法的dataset_sink_mode参数设置为False,那么就是以step为单位打印数据。

    如果model.train方法的dataset_sink_mode参数设置为True,那么就是以episode为单位打印数据。

    这里我们不过多解释,直接上代码:

     (代码具体参看:https://www.cnblogs.com/devilmaycry812839668/p/14971668.html

    当   dataset_sink_mode=False  时:

    model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=False)

    #!/usr/bin python
    # encoding:UTF-8
    
    """" 对输入的超参数进行处理 """
    import os
    import argparse
    
    """ 设置运行的背景context """
    from mindspore import context
    
    """ 对数据集进行预处理 """
    import mindspore.dataset as ds
    import mindspore.dataset.transforms.c_transforms as C
    import mindspore.dataset.vision.c_transforms as CV
    from mindspore.dataset.vision import Inter
    from mindspore import dtype as mstype
    
    """ 构建神经网络 """
    import mindspore.nn as nn
    from mindspore.common.initializer import Normal
    
    """ 训练时对模型参数的保存 """
    from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
    
    """ 导入模型训练需要的库 """
    from mindspore.nn import Accuracy
    from mindspore.train.callback import LossMonitor
    from mindspore import Model
    
    
    parser = argparse.ArgumentParser(description='MindSpore LeNet Example')
    parser.add_argument('--device_target', type=str, default="GPU", choices=['Ascend', 'GPU', 'CPU'])
    
    args = parser.parse_known_args()[0]
    
    # 为mindspore设置运行背景context
    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
    
    
    def create_dataset(data_path, batch_size=32, repeat_size=1,
                       num_parallel_workers=1):
        # 定义数据集
        mnist_ds = ds.MnistDataset(data_path)
        resize_height, resize_width = 32, 32
        rescale = 1.0 / 255.0
        shift = 0.0
        rescale_nml = 1 / 0.3081
        shift_nml = -1 * 0.1307 / 0.3081
    
        # 定义所需要操作的map映射
        resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)
        rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
        rescale_op = CV.Rescale(rescale, shift)
        hwc2chw_op = CV.HWC2CHW()
        type_cast_op = C.TypeCast(mstype.int32)
    
        # 使用map映射函数,将数据操作应用到数据集
        mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
    
        # 进行shuffle、batch、repeat操作
        buffer_size = 10000
        mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
        mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
        mnist_ds = mnist_ds.repeat(repeat_size)
    
        return mnist_ds
    
    
    class LeNet5(nn.Cell):
        """
        Lenet网络结构
        """
    
        def __init__(self, num_class=10, num_channel=1):
            super(LeNet5, self).__init__()
            # 定义所需要的运算
            self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
            self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
            self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))
            self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
            self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
            self.relu = nn.ReLU()
            self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
            self.flatten = nn.Flatten()
    
        def construct(self, x):
            # 使用定义好的运算构建前向网络
            x = self.conv1(x)
            x = self.relu(x)
            x = self.max_pool2d(x)
            x = self.conv2(x)
            x = self.relu(x)
            x = self.max_pool2d(x)
            x = self.flatten(x)
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            x = self.relu(x)
            x = self.fc3(x)
            return x
    
    # 实例化网络
    net = LeNet5()
    
    # 定义损失函数
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    
    # 定义优化器
    net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9)
    
    # 设置模型保存参数
    # 每125steps保存一次模型参数,最多保留15个文件
    config_ck = CheckpointConfig(save_checkpoint_steps=125, keep_checkpoint_max=15)
    # 应用模型保存参数
    ckpoint = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck)
    
    
    def train_net(args, model, epoch_size, data_path, repeat_size, ckpoint_cb, sink_mode):
        """定义训练的方法"""
        # 加载训练数据集
        ds_train = create_dataset(os.path.join(data_path, "train"), 32, repeat_size)
        model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=sink_mode)
    
    
    def test_net(network, model, data_path):
        """定义验证的方法"""
        ds_eval = create_dataset(os.path.join(data_path, "test"))
        acc = model.eval(ds_eval, dataset_sink_mode=False)
        print("{}".format(acc))
    
    
    mnist_path = "./datasets/MNIST_Data"
    train_epoch = 1
    dataset_size = 1
    model = Model(net, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
    train_net(args, model, train_epoch, mnist_path, dataset_size, ckpoint, False)
    test_net(net, model, mnist_path)
    View Code

    输出结果:

    epoch: 1 step: 125, loss is 2.2959
    epoch: 1 step: 250, loss is 2.2959309
    epoch: 1 step: 375, loss is 2.2982068
    epoch: 1 step: 500, loss is 2.2916625
    epoch: 1 step: 625, loss is 2.3001077
    epoch: 1 step: 750, loss is 1.9395046
    epoch: 1 step: 875, loss is 0.728865
    epoch: 1 step: 1000, loss is 0.2426785
    epoch: 1 step: 1125, loss is 0.45475814
    epoch: 1 step: 1250, loss is 0.1676599
    epoch: 1 step: 1375, loss is 0.14273866
    epoch: 1 step: 1500, loss is 0.030339874
    epoch: 1 step: 1625, loss is 0.19792284
    epoch: 1 step: 1750, loss is 0.09066871
    epoch: 1 step: 1875, loss is 0.12958783
    {'Accuracy': 0.9688501602564102}

    当   dataset_sink_mode=True  时:

    model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=True)
    #!/usr/bin python
    # encoding:UTF-8
    
    """" 对输入的超参数进行处理 """
    import os
    import argparse
    
    """ 设置运行的背景context """
    from mindspore import context
    
    """ 对数据集进行预处理 """
    import mindspore.dataset as ds
    import mindspore.dataset.transforms.c_transforms as C
    import mindspore.dataset.vision.c_transforms as CV
    from mindspore.dataset.vision import Inter
    from mindspore import dtype as mstype
    
    """ 构建神经网络 """
    import mindspore.nn as nn
    from mindspore.common.initializer import Normal
    
    """ 训练时对模型参数的保存 """
    from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
    
    """ 导入模型训练需要的库 """
    from mindspore.nn import Accuracy
    from mindspore.train.callback import LossMonitor
    from mindspore import Model
    
    
    parser = argparse.ArgumentParser(description='MindSpore LeNet Example')
    parser.add_argument('--device_target', type=str, default="GPU", choices=['Ascend', 'GPU', 'CPU'])
    
    args = parser.parse_known_args()[0]
    
    # 为mindspore设置运行背景context
    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
    
    
    def create_dataset(data_path, batch_size=32, repeat_size=1,
                       num_parallel_workers=1):
        # 定义数据集
        mnist_ds = ds.MnistDataset(data_path)
        resize_height, resize_width = 32, 32
        rescale = 1.0 / 255.0
        shift = 0.0
        rescale_nml = 1 / 0.3081
        shift_nml = -1 * 0.1307 / 0.3081
    
        # 定义所需要操作的map映射
        resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)
        rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
        rescale_op = CV.Rescale(rescale, shift)
        hwc2chw_op = CV.HWC2CHW()
        type_cast_op = C.TypeCast(mstype.int32)
    
        # 使用map映射函数,将数据操作应用到数据集
        mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
    
        # 进行shuffle、batch、repeat操作
        buffer_size = 10000
        mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
        mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
        mnist_ds = mnist_ds.repeat(repeat_size)
    
        return mnist_ds
    
    
    class LeNet5(nn.Cell):
        """
        Lenet网络结构
        """
    
        def __init__(self, num_class=10, num_channel=1):
            super(LeNet5, self).__init__()
            # 定义所需要的运算
            self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
            self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
            self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))
            self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
            self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
            self.relu = nn.ReLU()
            self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
            self.flatten = nn.Flatten()
    
        def construct(self, x):
            # 使用定义好的运算构建前向网络
            x = self.conv1(x)
            x = self.relu(x)
            x = self.max_pool2d(x)
            x = self.conv2(x)
            x = self.relu(x)
            x = self.max_pool2d(x)
            x = self.flatten(x)
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            x = self.relu(x)
            x = self.fc3(x)
            return x
    
    # 实例化网络
    net = LeNet5()
    
    # 定义损失函数
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    
    # 定义优化器
    net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9)
    
    # 设置模型保存参数
    # 每125steps保存一次模型参数,最多保留15个文件
    config_ck = CheckpointConfig(save_checkpoint_steps=125, keep_checkpoint_max=15)
    # 应用模型保存参数
    ckpoint = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck)
    
    
    def train_net(args, model, epoch_size, data_path, repeat_size, ckpoint_cb, sink_mode):
        """定义训练的方法"""
        # 加载训练数据集
        ds_train = create_dataset(os.path.join(data_path, "train"), 32, repeat_size)
        model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=sink_mode)
    
    
    def test_net(network, model, data_path):
        """定义验证的方法"""
        ds_eval = create_dataset(os.path.join(data_path, "test"))
        acc = model.eval(ds_eval, dataset_sink_mode=False)
        print("{}".format(acc))
    
    
    mnist_path = "./datasets/MNIST_Data"
    train_epoch = 1
    dataset_size = 1
    model = Model(net, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
    train_net(args, model, train_epoch, mnist_path, dataset_size, ckpoint, True)
    test_net(net, model, mnist_path)
    View Code

    输出结果:

    epoch: 1 step: 1875, loss is 0.04107348
    {'Accuracy': 0.9638421474358975}

    ==================================================================

    可以看到在mindspore中进行训练时如果设置 dataset_sink_mode=True

    那么无论设置多少step打印一次结果,每个epoch中只会打印一次结果,即一个epoch中最后的那个打印结果

    (比如数据集中一个epoch是100个数据,batch_size=10, 一个epoch的数据训练需要10个steps,  如果设置dataset_sink_mode=True那么只会打印第10step的结果,前9次step的结果不打印)。

    =====================================================================

    经过进一步发现,上面的描述都是在 运行背景设置为:

    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')


    下才成立的。

    如果设置为:

    context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')

    那么,无论 dataset_sink_mode 设置为False 还是True ,  都是执行 step为单位打印数据。

    代码如下:

    #!/usr/bin python
    # encoding:UTF-8
    
    """" 对输入的超参数进行处理 """
    import os
    import argparse
    
    """ 设置运行的背景context """
    from mindspore import context
    
    """ 对数据集进行预处理 """
    import mindspore.dataset as ds
    import mindspore.dataset.transforms.c_transforms as C
    import mindspore.dataset.vision.c_transforms as CV
    from mindspore.dataset.vision import Inter
    from mindspore import dtype as mstype
    
    """ 构建神经网络 """
    import mindspore.nn as nn
    from mindspore.common.initializer import Normal
    
    """ 训练时对模型参数的保存 """
    from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
    
    """ 导入模型训练需要的库 """
    from mindspore.nn import Accuracy
    from mindspore.train.callback import LossMonitor
    from mindspore import Model
    
    
    parser = argparse.ArgumentParser(description='MindSpore LeNet Example')
    parser.add_argument('--device_target', type=str, default="GPU", choices=['Ascend', 'GPU', 'CPU'])
    
    args = parser.parse_known_args()[0]
    
    # 为mindspore设置运行背景context
    context.set_context(mode=context.PYNATIVE_MODE, device_target=args.device_target)
    
    
    def create_dataset(data_path, batch_size=32, repeat_size=1,
                       num_parallel_workers=1):
        # 定义数据集
        mnist_ds = ds.MnistDataset(data_path)
        resize_height, resize_width = 32, 32
        rescale = 1.0 / 255.0
        shift = 0.0
        rescale_nml = 1 / 0.3081
        shift_nml = -1 * 0.1307 / 0.3081
    
        # 定义所需要操作的map映射
        resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)
        rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
        rescale_op = CV.Rescale(rescale, shift)
        hwc2chw_op = CV.HWC2CHW()
        type_cast_op = C.TypeCast(mstype.int32)
    
        # 使用map映射函数,将数据操作应用到数据集
        mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
        mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
    
        # 进行shuffle、batch、repeat操作
        buffer_size = 10000
        mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
        mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
        mnist_ds = mnist_ds.repeat(repeat_size)
    
        return mnist_ds
    
    
    class LeNet5(nn.Cell):
        """
        Lenet网络结构
        """
    
        def __init__(self, num_class=10, num_channel=1):
            super(LeNet5, self).__init__()
            # 定义所需要的运算
            self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
            self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
            self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))
            self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
            self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
            self.relu = nn.ReLU()
            self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
            self.flatten = nn.Flatten()
    
        def construct(self, x):
            # 使用定义好的运算构建前向网络
            x = self.conv1(x)
            x = self.relu(x)
            x = self.max_pool2d(x)
            x = self.conv2(x)
            x = self.relu(x)
            x = self.max_pool2d(x)
            x = self.flatten(x)
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            x = self.relu(x)
            x = self.fc3(x)
            return x
    
    # 实例化网络
    net = LeNet5()
    
    # 定义损失函数
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    
    # 定义优化器
    net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9)
    
    # 设置模型保存参数
    # 每125steps保存一次模型参数,最多保留15个文件
    config_ck = CheckpointConfig(save_checkpoint_steps=125, keep_checkpoint_max=15)
    # 应用模型保存参数
    ckpoint = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck)
    
    
    def train_net(args, model, epoch_size, data_path, repeat_size, ckpoint_cb, sink_mode):
        """定义训练的方法"""
        # 加载训练数据集
        ds_train = create_dataset(os.path.join(data_path, "train"), 32, repeat_size)
        model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=sink_mode)
    
    
    def test_net(network, model, data_path):
        """定义验证的方法"""
        ds_eval = create_dataset(os.path.join(data_path, "test"))
        acc = model.eval(ds_eval, dataset_sink_mode=False)
        print("{}".format(acc))
    
    
    mnist_path = "./datasets/MNIST_Data"
    train_epoch = 1
    dataset_size = 1
    model = Model(net, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
    train_net(args, model, train_epoch, mnist_path, dataset_size, ckpoint, True)
    test_net(net, model, mnist_path)

    不得不说,对于新框架 MindSpore 来说,还是坑蛮多的,稍有不注意就会出意料以外的结果。

    本博客是博主个人学习时的一些记录,不保证是为原创,个别文章加入了转载的源地址还有个别文章是汇总网上多份资料所成,在这之中也必有疏漏未加标注者,如有侵权请与博主联系。
  • 相关阅读:
    游戏编程模式--原型模式
    游戏编程模式--观察者模式
    游戏编程模式--享元模式
    游戏编程模式--命令模式
    mybatis的线程安全
    开发遇到的问题
    spring的ThreadLocal解决线程安全
    i++
    jvm内存初步了解
    注解@RequestMapping,@RequestBody
  • 原文地址:https://www.cnblogs.com/devilmaycry812839668/p/14985427.html
Copyright © 2020-2023  润新知