MDP

// MyMakv.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"

struct  strategy
    {    
        int state;
        int action;
    };// P 建立状态到行动的映射，P 即映射表

strategy Pai [6]; //这里是最终的结果，即建立状态到行动的映射

int ActionSet[4] = {0,1,2,3}; //数字为行动的标签代号
int stateSet[6] = {0,1,2,3,4,5};//数字为行动的标签代号, 假设 这里的 0到5个状态为 摄像头采集的图像记过初步处理
//划分，这里不表示空间定位。

struct StaFactor
{
    int s;
    double Character; // 假设Character 为某重要因素，例如当前抓拍的图像特征 //这里先随便写
};
StaFactor  RealStaFa[6]  = {0};// 和s 相关的评估因素，主要用来计算Rs,即当前状态的回报。

double Vps[6] = {0}; //主要用来保存当前p策略下计算出来的Fps的结果，主要用来不断更新局部最优
double Vbackup[6] = {0};

double Rs( int s) 
{
    return( RealStaFa[s].Character + 0.9);     ;//先随便写了
}

double Ps[4][6][6] = { 0.1,0.2,0.3,0.1,0.1,0.2,
                    0.4,0.1,0.1,0.1,0.1,0.2,
                    0.1,0.3,0.2,0.1,0.1,0.2,
                    0.0,0.1,0.9,0.0,0.0,0.0,
                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.1,0.2,0.3,0.1,0.1,0.2,

                    0.5,0.1,0.1,0.1,0.1,0.1,
                    0.1,0.2,0.4,0.1,0.1,0.1,
                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.1,0.2,0.3,0.1,0.1,0.2,

                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.7,0.0,0.3,0.0,0.0,0.0,
                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.1,0.7,0.0,0.0,0.0,0.3,
                    0.0,0.0,0.0,0.0,0.1,0.9,

                    0.1,0.2,0.3,0.1,0.1,0.2,
                    0.4,0.0,0.1,0.2,0.1,0.2,
                    0.8,0.0,0.0,0.0,0.0,0.2,
                    0.5,0.0,0.0,0.0,0.5,0.0,
                    0.0,0.7,0.3,0.0,0.0,0.0,
                    0.5,0.0,0.0,0.1,0.1,0.2,                 
                } ; //这里的 0、1、2、3表示具体的a

 double Fpsa (int s,int a ) //计算值迭代的贝努利部分
 {
    double Value = 0; 
     for ( int i=0;i<=5;i++)
    {
        {Value = Value + (Ps[a][s][i] * Vps[i]);}
    }
     return Value;
 }

double Fps( int s)   //p 策略下总价值（含未来）评估函数；这里区别于R(s),R(s)为只考虑当前状态的评估函数,写成贝努利方程形式,采用值迭代
{                  //衰减因子暂时写成0.9
    return (Rs(s) + 0.9* Fpsa (s,Pai[s].action)); // 注意pai会影响这里状态转移矩阵的代入，所以是pai下的价值评估
}

void Paiupdate(int s, int a )
{
    Pai[s].action = a;
}

int _tmain(int argc, _TCHAR* argv[])
{
    for (int i =0;i<= 5;i++)
    {
        Pai[i].state = i;
        Pai[i].action = 0;
     // 值迭代本不需要初始化pai，这里方便观察变化
        Vps[i] = 0;
        RealStaFa[i].s = i;
        RealStaFa[i].Character = (double)i; //先这样写，做实验数据
    }
    while(1)
    {
        for (int i =0;i<= 5;i++)
        {
           Vbackup[i] =  Vps[i];
        }
        for (int i = 0;i<= 5;i++)
        {
          double vsacompare = Fpsa(i,0);
          int maxflag = 0 ;
              for (int j=0;j<=3;j++)
              {
                if  (Fpsa(i,j)> vsacompare)
                {
                   maxflag = j;
                }
              }
          Paiupdate(i,maxflag);//注意这里更新Pai很重要，会影响后面Vps的更新。
          Vps[i] = Fps(i); //异步迭代的方式
        }

      //这里写停机条件,先随便写
        if (    ( ( Vps[0]- Vbackup[0])*( Vps[0]- Vbackup[0])
                   +( Vps[1]- Vbackup[1])*( Vps[1]- Vbackup[1])
                   +( Vps[2]- Vbackup[2])*( Vps[2]- Vbackup[2])
                   +( Vps[3]- Vbackup[3])*( Vps[3]- Vbackup[3])
                   +( Vps[4]- Vbackup[4])*( Vps[4]- Vbackup[4])
                   +( Vps[5]- Vbackup[5])*( Vps[5]- Vbackup[5]))
              / ( (Vps[0]*Vps[0] + Vps[1]*Vps[1] + Vps[2]*Vps[2] + Vps[3]*Vps[3] + Vps[4]*Vps[4] + Vps[5]*Vps[5]))
              <=0.1)
            
            break;
     }
    printf ("结束/n");
    return 0;
}
相关阅读:
CodeForces 58C Trees
【转】二分匹配题集
 HDU2604 Queuing
HDU1281 棋盘游戏
 HDU3360 National Treasures
HDU2444 The Accomodation of Students
HDU1498 50 years, 50 colors
HDU1068 Girls and Boys
【转】常用的latex宏包
 【转】网络流题集
原文地址：https://www.cnblogs.com/hgonlywj/p/4842700.html