// MyMakv.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" struct strategy { int state; int action; };// P 建立状态到行动的映射,P 即映射表 strategy Pai [6]; //这里是最终的结果,即建立状态到行动的映射 int ActionSet[4] = {0,1,2,3}; //数字为行动的标签代号 int stateSet[6] = {0,1,2,3,4,5};//数字为行动的标签代号, 假设 这里的 0到5个状态为 摄像头采集的图像记过初步处理 //划分,这里不表示空间定位。 struct StaFactor { int s; double Character; // 假设Character 为某重要因素,例如当前抓拍的图像特征 //这里先随便写 }; StaFactor RealStaFa[6] = {0};// 和s 相关的评估因素,主要用来计算Rs,即当前状态的回报。 double Vps[6] = {0}; //主要用来保存当前p策略下计算出来的Fps的结果,主要用来不断更新局部最优 double Vbackup[6] = {0}; double Rs( int s) { return( RealStaFa[s].Character + 0.9); ;//先随便写了 } double Ps[4][6][6] = { 0.1,0.2,0.3,0.1,0.1,0.2, 0.4,0.1,0.1,0.1,0.1,0.2, 0.1,0.3,0.2,0.1,0.1,0.2, 0.0,0.1,0.9,0.0,0.0,0.0, 0.1,0.2,0.3,0.1,0.1,0.2, 0.1,0.2,0.3,0.1,0.1,0.2, 0.5,0.1,0.1,0.1,0.1,0.1, 0.1,0.2,0.4,0.1,0.1,0.1, 0.1,0.2,0.3,0.1,0.1,0.2, 0.1,0.2,0.3,0.1,0.1,0.2, 0.1,0.2,0.3,0.1,0.1,0.2, 0.1,0.2,0.3,0.1,0.1,0.2, 0.1,0.2,0.3,0.1,0.1,0.2, 0.7,0.0,0.3,0.0,0.0,0.0, 0.1,0.2,0.3,0.1,0.1,0.2, 0.1,0.2,0.3,0.1,0.1,0.2, 0.1,0.7,0.0,0.0,0.0,0.3, 0.0,0.0,0.0,0.0,0.1,0.9, 0.1,0.2,0.3,0.1,0.1,0.2, 0.4,0.0,0.1,0.2,0.1,0.2, 0.8,0.0,0.0,0.0,0.0,0.2, 0.5,0.0,0.0,0.0,0.5,0.0, 0.0,0.7,0.3,0.0,0.0,0.0, 0.5,0.0,0.0,0.1,0.1,0.2, } ; //这里的 0、1、2、3表示具体的a double Fpsa (int s,int a ) //计算值迭代的贝努利部分 { double Value = 0; for ( int i=0;i<=5;i++) { {Value = Value + (Ps[a][s][i] * Vps[i]);} } return Value; } double Fps( int s) //p 策略下总价值(含未来)评估函数;这里区别于R(s),R(s)为只考虑当前状态的评估函数,写成贝努利方程形式,采用值迭代 { //衰减因子暂时写成0.9 return (Rs(s) + 0.9* Fpsa (s,Pai[s].action)); // 注意pai会影响这里状态转移矩阵的代入,所以是pai下的价值评估 } void Paiupdate(int s, int a ) { Pai[s].action = a; } int _tmain(int argc, _TCHAR* argv[]) { for (int i =0;i<= 5;i++) { Pai[i].state = i; Pai[i].action = 0; // 值迭代本不需要初始化pai,这里方便观察变化 Vps[i] = 0; RealStaFa[i].s = i; RealStaFa[i].Character = (double)i; //先这样写,做实验数据 } while(1) { for (int i =0;i<= 5;i++) { Vbackup[i] = Vps[i]; } for (int i = 0;i<= 5;i++) { double vsacompare = Fpsa(i,0); int maxflag = 0 ; for (int j=0;j<=3;j++) { if (Fpsa(i,j)> vsacompare) { maxflag = j; } } Paiupdate(i,maxflag);//注意这里更新Pai很重要,会影响后面Vps的更新。 Vps[i] = Fps(i); //异步迭代的方式 } //这里写停机条件,先随便写 if ( ( ( Vps[0]- Vbackup[0])*( Vps[0]- Vbackup[0]) +( Vps[1]- Vbackup[1])*( Vps[1]- Vbackup[1]) +( Vps[2]- Vbackup[2])*( Vps[2]- Vbackup[2]) +( Vps[3]- Vbackup[3])*( Vps[3]- Vbackup[3]) +( Vps[4]- Vbackup[4])*( Vps[4]- Vbackup[4]) +( Vps[5]- Vbackup[5])*( Vps[5]- Vbackup[5])) / ( (Vps[0]*Vps[0] + Vps[1]*Vps[1] + Vps[2]*Vps[2] + Vps[3]*Vps[3] + Vps[4]*Vps[4] + Vps[5]*Vps[5])) <=0.1) break; } printf ("结束/n"); return 0; }