• [MCTS] 不围棋NoGoAI


    计概课的大作业

    学了一下MCTS,随便调了下参数(

    upd:被锤爆了 需要修改一下随机后的估值(咕咕咕


    MCTS参考资料

    https://blog.csdn.net/baidu_40614951/article/details/105481498

    https://www.cnblogs.com/yifdu25/p/8303462.html

    界面相关

    https://blog.csdn.net/luoshengkim/article/details/50412354

    平台

    https://www.botzone.org.cn/game/NoGo


    扔一下代码

      1 #include <cstdio>
      2 #include <cstring>
      3 #include <algorithm>
      4 #include <cmath>
      5 #include <vector>
      6 #include <string>
      7 #include <iostream>
      8 #include "jsoncpp/json.h"
      9 using namespace std;
     10 const int N=9;
     11 const int M=10000;
     12 const int dx[4]={-1,0,1,0},dy[4]={0,-1,0,1};
     13 int currBotColor;
     14 
     15 class Grid {
     16 public:
     17     int grid[N][N],avasz,curCol,lose;//,turn=1; //turn 记为我方已落子数+1
     18     bool use[N*N];
     19     vector<int> ava;
     20     bool vis[N][N];
     21     bool ingrid(int x,int y) {return x>=0 && y>=0 && x<9 && y<9;}
     22     bool dfsqi(int x,int y) { //判断是否有气
     23         int tx,ty; vis[x][y]=1; bool flag=0;
     24         for (int k=0; k<4; k++) if (ingrid(tx=x+dx[k],ty=y+dy[k])) {
     25             if (!grid[tx][ty]) flag=1;
     26             else if (grid[x][y]==grid[tx][ty] && !vis[tx][ty] && dfsqi(tx,ty)) flag=1;
     27         }
     28         return flag;
     29     }
     30     bool judgeAvailable(int x,int y,int col) {
     31         if (grid[x][y]) return 0;
     32         grid[x][y]=col; memset(vis,0,sizeof vis);
     33         if (!dfsqi(x,y)) {grid[x][y]=0; return 0;}
     34         int tx,ty;
     35         for (int k=0; k<4; k++) if (ingrid(tx=x+dx[k],ty=y+dy[k])&&!vis[tx][ty]) 
     36             if (col==-grid[tx][ty]&&!dfsqi(tx,ty)) {grid[x][y]=0;return 0;}
     37         grid[x][y]=0; return 1;
     38     }
     39     void checkavailable() {
     40         if (ava.size()) ava.clear();
     41         for (int i=0; i<9; i++) for (int j=0; j<9; j++) if (judgeAvailable(i,j,curCol))
     42             ava.push_back(i*9+j);
     43         avasz=ava.size(); if (!avasz) lose=1; else lose=0;
     44         memset(use,0,sizeof use);
     45     }
     46     int getaction() {
     47         if (!avasz) return -1;
     48         return ava[rand()%avasz];
     49     }
     50 } G;
     51 
     52 // const double Con = 0.70710678; //调参
     53 const double Con = 0.8;
     54 const int SearchDepth = 60;
     55 const int TimeLimit = 15000;
     56 int cnt,rt,act[M],A[M],B[M],fa[M];// A/B表胜率
     57 Grid g[M];
     58 vector<int> son[M];
     59 
     60 int BestChild(int v,bool flag,double c) {//c是参数
     61     int id; double mx=-1,val;
     62     if (g[v].curCol != currBotColor) {
     63         if (flag) for (int i=son[v].size()-1; ~i; i--) {
     64             if (!B[son[v][i]]) val=1e5;
     65             else val=1.0*(B[son[v][i]]-A[son[v][i]])/B[son[v][i]]+c*sqrt(2*log(B[v])/B[son[v][i]]);//需要保证B非零!!
     66             if (val-mx>1e-8) mx=val,id=i;
     67         } else for (int i=son[v].size()-1; ~i; i--) {
     68             if (!B[son[v][i]]) val=1e5;
     69             else val=1.0*(B[son[v][i]]-A[son[v][i]])/B[son[v][i]];//需要保证B非零!!
     70             if (val-mx>1e-8) mx=val,id=i;
     71         }
     72     } else {
     73         if (flag) for (int i=son[v].size()-1; ~i; i--) {
     74             if (!B[son[v][i]]) val=1e5;
     75             else val=1.0*A[son[v][i]]/B[son[v][i]]+c*sqrt(2*log(B[v])/B[son[v][i]]);//需要保证B非零!!
     76             if (val-mx>1e-8) mx=val,id=i;
     77         } else for (int i=son[v].size()-1; ~i; i--) {
     78             if (!B[son[v][i]]) val=1e5;
     79             else val=1.0*A[son[v][i]]/B[son[v][i]];//需要保证B非零!!
     80             if (val-mx>1e-8) mx=val,id=i;
     81         }
     82     }
     83     return son[v][id];
     84 }
     85 int expand(int v) {// Expansion
     86     for (int i=son[v].size()-1; ~i; i--) g[v].use[act[son[v][i]]]=1;
     87     vector<int> tmp;
     88     for (int i=g[v].avasz-1; ~i; i--) if (!g[v].use[g[v].ava[i]]) tmp.push_back(g[v].ava[i]);
     89     int ac = tmp[rand()%tmp.size()];
     90     g[++cnt]=g[v],g[cnt].grid[ac/9][ac%9]=g[v].curCol,g[cnt].curCol=-g[v].curCol;
     91     fa[cnt]=v; son[v].push_back(cnt); act[cnt]=ac; g[cnt].checkavailable();
     92     return cnt;
     93 }
     94 int TreePolicy(int v) {
     95     int dep=SearchDepth;
     96     while (g[v].avasz && dep--) //调参(控制层数)
     97         if (son[v].size()<g[v].avasz) return expand(v);
     98         else v=BestChild(v,1,Con);
     99     return v;
    100 }
    101 int DefaultPolicy(int v) {
    102     Grid gt=g[v];
    103     vector<int> a,b; int pa=0,pb=0; bool cur=1;
    104     for (int i=0; i<81; i++) if (!gt.grid[i/9][i%9]) a.push_back(i),b.push_back(i);
    105     for (int i=0; i<a.size(); i++) swap(a[i],a[rand()%a.size()]);
    106     for (int i=0; i<b.size(); i++) swap(b[i],b[rand()%b.size()]);
    107     while (pa<a.size()&&pb<b.size()) {
    108         if (cur) {
    109             while (pa<a.size() && !gt.judgeAvailable(a[pa]/9,a[pa]%9,gt.curCol)) pa++;
    110             if (pa==a.size()) return currBotColor == gt.curCol ? 0:1;
    111             gt.grid[a[pa]/9][a[pa]%9]=gt.curCol,pa++,cur=0;
    112         }
    113         else {
    114             while (pb<b.size() && !gt.judgeAvailable(b[pb]/9,b[pb]%9,-gt.curCol)) pb++;
    115             if (pb==b.size()) return currBotColor == -gt.curCol ? 0:1;
    116             gt.grid[b[pb]/9][b[pb]%9]=-gt.curCol,pb++,cur=1;
    117         }
    118     }
    119     if (pa==a.size()) return currBotColor == gt.curCol ? 0:1;
    120     else return currBotColor == -gt.curCol ? 0:1;
    121 }
    122 inline void BackUp(int v,int dt) {for (; v; v=fa[v]) A[v]+=dt,B[v]++;} //判断胜负以currBotColor计
    123 int MCTS(int runtime) {
    124     cnt=rt=1; g[1]=G;
    125     while (runtime--) {
    126         int v=TreePolicy(rt); //Selection
    127         int dt=DefaultPolicy(v); //Simulation
    128         BackUp(v,dt); //Backpropagation
    129     }
    130     return act[BestChild(rt,0,0)];
    131 }
    132 
    133 int main() {
    134     srand((unsigned)time(0));
    135     string str;
    136     getline(cin,str);
    137     Json::Reader reader;
    138     Json::Value input;
    139     reader.parse(str, input);
    140 
    141     int x,y;
    142     int turnID = input["responses"].size();
    143     G.curCol = currBotColor = input["requests"][0]["x"].asInt()<0 ? 1:-1;
    144     for (int i = 0; i < turnID; i++) {
    145         x=input["requests"][i]["x"].asInt(), y=input["requests"][i]["y"].asInt();
    146         if (x!=-1) G.grid[x][y]=-currBotColor;
    147         x=input["responses"][i]["x"].asInt(), y=input["responses"][i]["y"].asInt();
    148         if (x!=-1) G.grid[x][y]=currBotColor;//,turn++;
    149     }
    150     x=input["requests"][turnID]["x"].asInt(), y=input["requests"][turnID]["y"].asInt();
    151     if (x!=-1) G.grid[x][y]=-currBotColor;
    152     
    153     G.checkavailable();
    154     int decision=MCTS(TimeLimit);//调参
    155     
    156     Json::Value ret;
    157     Json::Value action;
    158     action["x"]=decision/9; action["y"]=decision%9;
    159     ret["response"] = action;
    160     Json::FastWriter writer;
    161     cout << writer.write(ret) << endl;
    162     return 0;
    163 }
  • 相关阅读:
    广域网(ppp协议、HDLC协议)
    0120. Triangle (M)
    0589. N-ary Tree Preorder Traversal (E)
    0377. Combination Sum IV (M)
    1074. Number of Submatrices That Sum to Target (H)
    1209. Remove All Adjacent Duplicates in String II (M)
    0509. Fibonacci Number (E)
    0086. Partition List (M)
    0667. Beautiful Arrangement II (M)
    1302. Deepest Leaves Sum (M)
  • 原文地址:https://www.cnblogs.com/hnooo/p/14258513.html
Copyright © 2020-2023  润新知