• MDP


    // MyMakv.cpp : 定义控制台应用程序的入口点。
    //
    #include "stdafx.h"
    
    struct  strategy
        {    
            int state;
            int action;
        };// P 建立状态到行动的映射,P 即映射表
    
    strategy Pai [6]; //这里是最终的结果,即建立状态到行动的映射
    
    int ActionSet[4] = {0,1,2,3}; //数字为行动的标签代号
    int stateSet[6] = {0,1,2,3,4,5};//数字为行动的标签代号, 假设 这里的 0到5个状态为 摄像头采集的图像记过初步处理
    //划分,这里不表示空间定位。
    
    struct StaFactor
    {
        int s;
        double Character; // 假设Character 为某重要因素,例如当前抓拍的图像特征 //这里先随便写
    };
    StaFactor  RealStaFa[6]  = {0};// 和s 相关的评估因素,主要用来计算Rs,即当前状态的回报。
    
    double Vps[6] = {0}; //主要用来保存当前p策略下计算出来的Fps的结果,主要用来不断更新局部最优
    double Vbackup[6] = {0};
    
    double Rs( int s) 
    {
        return( RealStaFa[s].Character + 0.9);     ;//先随便写了
    }
    
    double Ps[4][6][6] = { 0.1,0.2,0.3,0.1,0.1,0.2,
                        0.4,0.1,0.1,0.1,0.1,0.2,
                        0.1,0.3,0.2,0.1,0.1,0.2,
                        0.0,0.1,0.9,0.0,0.0,0.0,
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.1,0.2,0.3,0.1,0.1,0.2,
    
                        0.5,0.1,0.1,0.1,0.1,0.1,
                        0.1,0.2,0.4,0.1,0.1,0.1,
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.1,0.2,0.3,0.1,0.1,0.2,
    
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.7,0.0,0.3,0.0,0.0,0.0,
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.1,0.7,0.0,0.0,0.0,0.3,
                        0.0,0.0,0.0,0.0,0.1,0.9,
    
                        0.1,0.2,0.3,0.1,0.1,0.2,
                        0.4,0.0,0.1,0.2,0.1,0.2,
                        0.8,0.0,0.0,0.0,0.0,0.2,
                        0.5,0.0,0.0,0.0,0.5,0.0,
                        0.0,0.7,0.3,0.0,0.0,0.0,
                        0.5,0.0,0.0,0.1,0.1,0.2,                 
                    } ; //这里的 0、1、2、3表示具体的a
    
     double Fpsa (int s,int a ) //计算值迭代的贝努利部分
     {
        double Value = 0; 
         for ( int i=0;i<=5;i++)
        {
            {Value = Value + (Ps[a][s][i] * Vps[i]);}
        }
         return Value;
     }
    
    double Fps( int s)   //p 策略下总价值(含未来)评估函数;这里区别于R(s),R(s)为只考虑当前状态的评估函数,写成贝努利方程形式,采用值迭代
    {                  //衰减因子暂时写成0.9
        return (Rs(s) + 0.9* Fpsa (s,Pai[s].action)); // 注意pai会影响这里状态转移矩阵的代入,所以是pai下的价值评估
    }
    
    void Paiupdate(int s, int a )
    {
        Pai[s].action = a;
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        for (int i =0;i<= 5;i++)
        {
            Pai[i].state = i;
            Pai[i].action = 0;
         // 值迭代本不需要初始化pai,这里方便观察变化
            Vps[i] = 0;
            RealStaFa[i].s = i;
            RealStaFa[i].Character = (double)i; //先这样写,做实验数据
        }
        while(1)
        {
            for (int i =0;i<= 5;i++)
            {
               Vbackup[i] =  Vps[i];
            }
            for (int i = 0;i<= 5;i++)
            {
              double vsacompare = Fpsa(i,0);
              int maxflag = 0 ;
                  for (int j=0;j<=3;j++)
                  {
                    if  (Fpsa(i,j)> vsacompare)
                    {
                       maxflag = j;
                    }
                  }
              Paiupdate(i,maxflag);//注意这里更新Pai很重要,会影响后面Vps的更新。
              Vps[i] = Fps(i); //异步迭代的方式
            }
    
          //这里写停机条件,先随便写
            if (    ( ( Vps[0]- Vbackup[0])*( Vps[0]- Vbackup[0])
                       +( Vps[1]- Vbackup[1])*( Vps[1]- Vbackup[1])
                       +( Vps[2]- Vbackup[2])*( Vps[2]- Vbackup[2])
                       +( Vps[3]- Vbackup[3])*( Vps[3]- Vbackup[3])
                       +( Vps[4]- Vbackup[4])*( Vps[4]- Vbackup[4])
                       +( Vps[5]- Vbackup[5])*( Vps[5]- Vbackup[5]))
                  / ( (Vps[0]*Vps[0] + Vps[1]*Vps[1] + Vps[2]*Vps[2] + Vps[3]*Vps[3] + Vps[4]*Vps[4] + Vps[5]*Vps[5]))
                  <=0.1)
                
                break;
         }
        printf ("结束/n");
        return 0;
    }
  • 相关阅读:
    CodeForces 58C Trees
    【转】二分匹配题集
    HDU2604 Queuing
    HDU1281 棋盘游戏
    HDU3360 National Treasures
    HDU2444 The Accomodation of Students
    HDU1498 50 years, 50 colors
    HDU1068 Girls and Boys
    【转】常用的latex宏包
    【转】网络流题集
  • 原文地址:https://www.cnblogs.com/hgonlywj/p/4842700.html
Copyright © 2020-2023  润新知