• 算法笔记--极大极小搜索及alpha-beta剪枝


    参考1:https://www.zhihu.com/question/27221568

    参考2:https://blog.csdn.net/hzk_cpp/article/details/79275772

    参考3:https://blog.csdn.net/BIT1120172185/article/details/80963609

    极小极大搜索算法即minimax搜索算法

    主要应用于零和博弈(非胜即负,如围棋,象棋,井子棋等),完全信息(玩家知道之前所有的步骤。象棋就是完全信息,因为玩家是交替着落子,且之前的步骤都能在棋盘上体现)

    这个算法采用搜索算法递归实现,一层为先手,记为a, 一层为后手,记为b, 交替出现

    对于最终局面,有一个分数(比如:先手胜分数为1, 平局分数为0,先手输分数为-1)

    先手a想要让这个分数越大越好,后手b想要让这个分数越小越好,于是搜索到先手这一层,取最大返回,搜索到后手这一层,取最小返回

    如下图:

    于是伪代码为:

    int MaxMin(position,int d)
    {
        int bestvalue,value;
        if(game over)   //检查游戏是否结束 
            return evaluation(p);// 游戏结束,返回估值 
        if(depth<=0)    //检查是否是叶子节点 
            return evaluation(p);//叶子节点,返回估值 
        if(max)         //极大值点 
            bestvalue=-INFINTY;
        else            //极小值点 
            bestvalue=INFINTY;
        for(each possibly move m)
        {
            MakeMove(m);    //走棋 
            value=MaxMin(p,d-1);
            UnMakeMove(m);  //恢复当前局面 
            if(max)
                bestvalue=max(value,bestvalue);//取最大值 
            else
                bestvalue=min(value,bestvalue);//取最小值 
        }
        return bestvalue;
    }

     关于alpha-beta剪枝:

    如果当前层为取最小,如果取最小后比上一层当前最大值还小,则不需要往下搜索,因为上一层根本不会选择当前节点往下搜,还有更好的选择

    同理,如果当前层为取最大,如果取最大后比上一层当前最小值还大,则不需要往下搜索,因为上一层根本不会选择当前节点往下搜

    具体推荐看最上面的知乎链接点赞最多的回答。

    alpha-beta剪枝后的伪代码:

    int AlphaBeta(nPlay,nAlpha,nBeta)
    {
        if(game over)
            return Eveluation;   //胜负已分,返回估值
        if(nPly==0)
            return  Eveluation;  //叶子节点返回估值
        if(Is Min Node)          //判断 节点类型 
        {        // 极小值节点
            for(each possible move m)
            {
                make move m;      //生成新节点
                score=AlphaBeta(nPly-1,nAlpha,nBeta)//递归搜索子节点
                unmake move m;//撤销搜索过的节点
                if(score<nBeta)
                {
                    nBeta=score;//取极小值
                    if(nAlpha>=nBeta)
                        return nAlpha;//alpha剪枝,抛弃后继节点 
                 } 
            }
            return nBeta;//返回最小值 
        }
        else
        {//取极大值的节点 
            for(each possible move m)
            {
                make move m;      //生成新节点
                score=AlphaBeta(nPly-1,nAlpha,nBeta)//递归搜索子节点
                unmake move m;//撤销搜索过的节点
                if(score>nAlpha)
                {
                    nAlpha=score;//取极小值
                    if(nAlpha>=nBeta)
                        return nBeta;//nBeta剪枝,抛弃后继节点 
                 } 
            }
            return nAlpha;//返回最小值 
        } 
    } 

    例题1:POJ - 1568

    思路:井子棋下的步数小于等于4必平局

    代码:

    #pragma GCC optimize(2)
    #pragma GCC optimize(3)
    #pragma GCC optimize(4)
    #include<cstdio>
    #include<iostream>
    using namespace std;
    #define fi first
    #define se second
    #define pi acos(-(long double)1.0)
    #define LL long long
    //#define mp make_pair
    #define pb push_back
    #define ls rt<<1, l, m
    #define rs rt<<1|1, m+1, r
    #define ULL unsigned LL
    #define pll pair<LL, LL>
    #define pli pair<LL, int>
    #define pii pair<int, int>
    #define piii pair<pii, int>
    #define pdd pair<long double, long double>
    #define mem(a, b) memset(a, b, sizeof(a))
    #define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    #define fopen freopen("in.txt", "r", stdin);freopen("out.txt", "w", stout);
    //head
    
    char s[10], mp[10][10]; 
    int cnt = 0, ansx, ansy;
    int check(char c) {
        for (int i = 0; i < 4; i++) {
            int a = 0;
            for (int j = 0; j < 4; j++) {
                if(mp[i][j] == c) a++;
            }
            if(a == 4) return 1;
            a = 0;
            for (int j = 0; j < 4; j++) {
                if(mp[j][i] == c) a++;
            }
            if(a == 4) return 1;
        }
        int a = 0;
        for (int i = 0; i < 4; i++) if(mp[i][i] == c) a++;
        if(a == 4) return 1;
        a = 0;    
        for (int i = 0; i < 4; i++) if(mp[i][3-i] == c) a++;
        if(a == 4) return 1;
        return 0;
    }
    int dfs(int step, int a, int b) {
        if((cnt-step)%2 == 0) {
            int tmp = check('o');
            if(tmp || step == 0) return -tmp;
        }
        else {
            int tmp = check('x');
            if(tmp || step == 0) return tmp;
        }
        if((cnt-step)%2 == 0) {
            for (int i = 0; i < 4; i++) {
                for (int j = 0; j < 4; j++) {
                    if(mp[i][j] == '.') {
                        mp[i][j] = 'x';
                        int tmp = dfs(step-1, a, b);
                        mp[i][j] = '.';
                        if(tmp >= a) {
                            if(step == cnt) {
                                ansx = i;
                                ansy = j;
                            } 
                            a = tmp;
                            if(b <= a) return b;
                        }
                    }        
                }
            }
            return a;
        }
        else {
            for (int i = 0; i < 4; i++) {
                for (int j = 0; j < 4; j++) {
                    if(mp[i][j] == '.') {
                        mp[i][j] = 'o';
                        int tmp = dfs(step-1, a, b);
                        mp[i][j] = '.';
                        if(tmp <= b) {
                            b = tmp;
                            if(a >= b) return a;
                        }
                    }
                }
            }
            return b;
        }
    }
    int main() {
        while(~scanf("%s", s)) {
            if(s[0] == '$') return 0;
            for (int i = 0; i < 4; i++) scanf("%s", mp[i]);
            cnt = 0;
            for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) if(mp[i][j] == '.') cnt++; 
            if(cnt >= 12) {
                printf("#####
    ");
                continue;
            }
            int ans = dfs(cnt, -1, 1);    
            if(ans == 1) printf("(%d,%d)
    ", ansx, ansy);
            else printf("#####
    ");
        }
        return 0;
    }
    View Code

    例题2:POJ - 1085

    思路1:记忆化搜索dp, dp[s]表示从状态s出发先手所能获得的最大利益。

    特点:空间大,时间短

    代码:

    #pragma GCC optimize(2)
    #pragma GCC optimize(3)
    #pragma GCC optimize(4)
    #include<cstdio>
    #include<iostream>
    #include<cstring>
    #include<algorithm>
    #include<cmath>
    #include<climits> 
    using namespace std;
    #define fi first
    #define se second
    #define pi acos(-1.0)
    #define LL long long
    //#define mp make_pair
    #define pb push_back
    #define ls rt<<1, l, m
    #define rs rt<<1|1, m+1, r
    #define ULL unsigned LL
    #define pll pair<LL, LL>
    #define pli pair<LL, int>
    #define pii pair<int, int>
    #define piii pair<pii, int>
    #define pdd pair<long double, long double>
    #define mem(a, b) memset(a, b, sizeof(a))
    #define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    #define fopen freopen("in.txt", "r", stdin);freopen("out.txt", "w", stout);
    //head
    
    int line[18][2] = {{1, 2}, {1, 3}, {2, 3}, {2, 4}, {2, 5}, {4, 5}, {3, 5}, {3, 6}, {5, 6}, {4, 7}, {4, 8}, {7, 8}, {5, 8}, {5, 9}, {8, 9}, {6, 9}, {6, 10}, {9, 10}};
    int tri[9][3] = {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}, {12, 13, 14}, {15, 16, 17}, {2, 4, 6}, {5, 10, 12}, {8, 13, 15}};
    int num[11][11];
    int dp[1<<20];
    int count(int s) {
        int cnt = 0;
        for (int i = 0; i < 9; i++) if((s&(1<<tri[i][0])) && (s&(1<<tri[i][1])) && (s&(1<<tri[i][2]))) cnt++;
        return cnt;
    }
    int dfs(int s) {
        if(~dp[s]) return dp[s];
        if(__builtin_popcount(s) == 18) return 0;
        int prenum = count(s);
        int ans = INT_MIN;
        for(int i = 0; i < 18; i++) {
            if(!(s&(1<<i))) {
                int nownum = count(s|(1<<i));
                if(nownum == prenum) {
                    ans = max(ans, -dfs(s|1<<i));
                }
                else {
                    ans = max(ans, nownum - prenum + dfs(s|1<<i));
                }
            }
        }  
        return dp[s] = ans;
    }
    int main() {
        int T, m, u, v;
        for (int i = 0; i < 18; i++) num[line[i][0]][line[i][1]] = num[line[i][1]][line[i][0]] = i;
        mem(dp, -1);
        scanf("%d", &T);
        for(int cs = 1; cs <= T; cs++) {
            int st = 0;
            scanf("%d", &m);
            int prenum = 0, nownum = 0;
            int cnt = 0, step = 0;
            for (int i = 1; i <= m; i++) {
                scanf("%d %d", &u, &v);
                st |= 1<<num[u][v];
                nownum = count(st);
                if(nownum > prenum) {
                    if(step%2 == 0) cnt += nownum - prenum;
                    else cnt -= nownum - prenum;
                } 
                else step++; 
                prenum = nownum;
            }    
            if(step%2 == 0) cnt += dfs(st);
            else cnt -= dfs(st);
            if(cnt > 0) printf("Game %d: A wins.
    ", cs);
            else printf("Game %d: B wins.
    ", cs);
        }
        return 0;
    }
    View Code

    思路2:极大极小搜索+alpha-beta剪枝

    特点:空间小,时间长

    代码:

    #pragma GCC optimize(2)
    #pragma GCC optimize(3)
    #pragma GCC optimize(4)
    #include<cstdio>
    #include<iostream>
    #include<cstring>
    #include<algorithm>
    #include<cmath>
    #include<climits> 
    using namespace std;
    #define fi first
    #define se second
    #define pi acos(-1.0)
    #define LL long long
    //#define mp make_pair
    #define pb push_back
    #define ls rt<<1, l, m
    #define rs rt<<1|1, m+1, r
    #define ULL unsigned LL
    #define pll pair<LL, LL>
    #define pli pair<LL, int>
    #define pii pair<int, int>
    #define piii pair<pii, int>
    #define pdd pair<long double, long double>
    #define mem(a, b) memset(a, b, sizeof(a))
    #define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    #define fopen freopen("in.txt", "r", stdin);freopen("out.txt", "w", stout);
    //head
    
    int line[18][2] = {{1, 2}, {1, 3}, {2, 3}, {2, 4}, {2, 5}, {4, 5}, {3, 5}, {3, 6}, {5, 6}, {4, 7}, {4, 8}, {7, 8}, {5, 8}, {5, 9}, {8, 9}, {6, 9}, {6, 10}, {9, 10}};
    int tri[9][3] = {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}, {12, 13, 14}, {15, 16, 17}, {2, 4, 6}, {5, 10, 12}, {8, 13, 15}};
    int num[11][11];
    int count(int s) {
        int cnt = 0;
        for (int i = 0; i < 9; i++) if((s&(1<<tri[i][0])) && (s&(1<<tri[i][1])) && (s&(1<<tri[i][2]))) cnt++;
        return cnt;
    }
    int dfs(int s, int a, int b, int alpha, int beta, int step) {
        if(__builtin_popcount(s) == 18) {
            if(a > b) return 1;
            else return -1;
        }
        int prenum = count(s);
        for(int i = 0; i < 18; i++) {
            if(!(s&(1<<i))) {
                int nownum = count(s|(1<<i));
                if(nownum == prenum) {
                    if(step%2 == 0) {
                        int tmp = dfs(s|1<<i, a, b, alpha, beta, step+1);    
                        if(tmp >= alpha) {
                            alpha = tmp;
                            if(alpha >= beta) return alpha;
                        }
                    }
                    else {
                        int tmp = dfs(s|1<<i, a, b, alpha, beta, step+1);    
                        if(tmp <= beta) {
                            beta = tmp;
                            if(alpha >= beta) return beta;
                        }
                    }
                }
                else {
                    if(step%2 == 0) {
                        int tmp = dfs(s|1<<i, a+nownum-prenum, b, alpha, beta, step);    
                        if(tmp >= alpha) {
                            alpha = tmp;
                            if(alpha >= beta) return alpha;
                        }
                    }
                    else {
                        int tmp = dfs(s|1<<i, a, b+nownum-prenum, alpha, beta, step);    
                        if(tmp <= beta) {
                            beta = tmp;
                            if(alpha >= beta) return beta;
                        }
                    }
                }
            }
        }  
        if(step%2 == 0) return alpha;
        else return beta;
    }
    int main() {
        int T, m, u, v;
        for (int i = 0; i < 18; i++) num[line[i][0]][line[i][1]] = num[line[i][1]][line[i][0]] = i;
        scanf("%d", &T);
        for(int cs = 1; cs <= T; cs++) {
            int st = 0;
            scanf("%d", &m);
            int prenum = 0, nownum = 0;
            int step = 0, a = 0, b = 0;
            for (int i = 1; i <= m; i++) {
                scanf("%d %d", &u, &v);
                st |= 1<<num[u][v];
                nownum = count(st);
                if(nownum > prenum) {
                    if(step%2 == 0) a += nownum - prenum;
                    else b += nownum - prenum;
                } 
                else step++; 
                prenum = nownum;
            }    
            int cnt = dfs(st, a, b, -1, 1, step);
            if(cnt > 0) printf("Game %d: A wins.
    ", cs);
            else printf("Game %d: B wins.
    ", cs);
        }
        return 0;
    }
    View Code
  • 相关阅读:
    npm 全局执行 update 、 outdated 出现 npm-debug.log 404 错误的问题
    python 爬虫爬取内容时, xa0 、 u3000 的含义
    SCNU ACM 2016新生赛决赛 解题报告
    SCNU ACM 2016新生赛初赛 解题报告
    【记录】Ubuntu下安装VirtualBox
    Ubuntu16.04 LTS下apt安装WireShark
    Java的初始化块、静态初始化块、构造函数的执行顺序及用途探究
    Ubuntu在wps-office等qt5程序下不能切换中文fcitx输入法的问题
    Ubuntu管理开机启动服务项 -- 图形界面的Boot-up Manager
    解决Ubuntu下Firefox+OpenJDK没有Java插件的问题
  • 原文地址:https://www.cnblogs.com/widsom/p/10153159.html
Copyright © 2020-2023  润新知