• 强化学习-下棋系列


    整理强化学习知识, 实践出真知。

    本篇文章只贴代码, 写了一个 game: 五子棋环境,  一个 player : 随机下棋,  一个 WuziBoard    : 棋盘可视化

    效果图:



    绘制棋盘:
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    # Author:Hiuhung Wan
    
    import turtle
    from enum import Enum
    
    
    
    class PotColor(Enum):
        Black = 1
        White = 2
    
    import time
    class WuziBoard(object):
        def __init__(self, RowNum):
            turtle.speed(9)
            turtle.hideturtle()
            self.RowNum = RowNum
            self.halfDim = 500 /(RowNum-1)/2.0
            pass
    
        def drawBoard(self, ActionHis =None):
            turtle.screensize(400, 400, "white")
            turtle.title('五子棋')
            turtle.home()
            turtle.speed(0)
            time.sleep(5)
    
            for i in range( self.RowNum ):
                
                x = 0 - 250 + i * ( self.halfDim ) *2
                y = 0 -250
                turtle.penup()
                turtle.setpos(x, y)
                turtle.pendown()
                turtle.goto(x, y + 500)
                
                
    
            for i in range( self.RowNum ):
                x = 0 - 250
                y = 0 -250 + i * ( self.halfDim ) *2
                turtle.penup()
                turtle.setpos(x, y)
                turtle.pendown()
                turtle.setpos(x+500, y)
    
            if( ActionHis != None):
                self.drawNow( ActionHis )
    
            turtle.done()
    
            pass
    
        def action2potxy(self, action):
            x = 0 - 250 + action[0]*self.halfDim*2
            y = 0 - 250 + action[1]*self.halfDim*2
            return x,y
    
        def drawNow(self, RunAction ):
    
            for potsite in RunAction:
    
                x,y = self.action2potxy( ( potsite[0], potsite[1] ) )
                turtle.penup()
                turtle.setpos(x, y)
                turtle.pendown()
                if( potsite[2] != PotColor.Black ) :
                    turtle.dot(10,"Red")
                else:
                    turtle.dot(10, "Black")
    
                if( potsite ==  RunAction[len(RunAction) - 1 ] ):
                    if (potsite[2] != PotColor.Black):
                        turtle.dot(20, "Red")
                    else:
                        turtle.dot(20, "Black")
    
            pass
    
        def drawAction(self):
            pass
    
        pass
    
    def main():
        ActionHis =[ (0,1,1), (1,1,2), (5,1,1),  ]
        wuziBoard = WuziBoard( 6 )
        wuziBoard.drawBoard( ActionHis )
    
        pass
    
    
    if __name__ == "__main__":
        #test()
        main()

    游戏代码:

    class GameFivePot(object):
    
        def __init__(self):
            self.potCount  =0;
            self.AllAction    =[]
            self.ActionHis = []
    
            for x in range(ROW_NUM):
                    for y in range(ECO_NUM):
                        self.AllAction     += [(x,y)]
    
            self.AvailAction = self.AllAction
            self.RunAction = [[0 for col in range(ROW_NUM)] for row in range(ECO_NUM)]
          
        def getActions(self):
            return self.AvailAction
            
        def getRunAction(self):      
            return self.RunAction
    
         def getActionHis(self):
            return  self.ActionHis
     
        def is_over(self, action, potColor):
            x = action[0]
            y = action[1]
            dimCount =[1,1,1,1]
            
            #���� xiang qian
            for x1 in range(x+1, x+5):
    
                if(x1 >= ROW_NUM ):
                    break
                if( self.RunAction[x1][y] == potColor ):
                    dimCount[0] +=1 
                else:
                    break
                    
            #- xiang hou
            for x1 in range(x-1, x-5, -1 ):
                if(x1 < 0 ):
                    break
                    
                if( self.RunAction[x1][y] == potColor ):
                    dimCount[0] +=1 
                else:
                    break
                    
            if( dimCount[0] >= 5 ):
                return True,True
                
            #���� ����
            for y1 in range(y+1, y+5):
                if(y1 >= ROW_NUM ):
                    break
                    
                if( self.RunAction[x][y1] == potColor ):
                    dimCount[1] +=1 
                else:
                    break
                    
            #- ����
            for y1 in range(y-1, y-5, -1 ):
                if(y1 < 0 ):
                    break
                    
                if( self.RunAction[x][y1] == potColor ):
                    dimCount[1] +=1 
                else:
                    break
                    
            if( dimCount[1] >= 5 ):
                return True,True
                                
            #-��б ����
            for offset in range(1 ,5):
                x1 = x+offset
                y1 = y+offset
                
                if(y1 >= ROW_NUM or x1 >= ROW_NUM  ):
                    break
                    
                if( self.RunAction[x1][y1] == potColor ):
                    dimCount[2] +=1 
                else:
                    break
                    
            #- ����
            for offset in range(-1, -5, -1 ):
                x1 = x+offset
                y1 = y+offset            
                if(y1 < 0 or x1<0):
                    break
                    
                if( self.RunAction[x1][y1] == potColor ):
                    dimCount[2] +=1 
                else:
                    break
                    
            if( dimCount[2] >= 5 ):
                return True,True
                    
            #-��б ���� 
            for offset in range(1 ,5):
                x1 = x+offset
                y1 = y-offset
                
                if(y1 < 0 or x1 >= ROW_NUM  ):
                    break
                    
                if( self.RunAction[x1][y1] == potColor ):
                    dimCount[3] +=1 
                else:
                    break
                    
            #- ���� 
            for offset in range(-1, -5, -1 ):
                x1 = x+offset
                y1 = y-offset            
                if(y1 >= ROW_NUM  or x1<0 ):
                    break
                    
                if( self.RunAction[x1][y1] == potColor ):
                    dimCount[3] +=1 
                else:
                    break
                    
            if( dimCount[3] >= 5 ):
                return True,True
                
            if( len(self.AvailAction) == 0 ):
                return True,False
            
            return False,False
            pass   
                
                
        def action( self, action,potColor ):
            self.potCount +=1
            self.ActionHis += [  ( action[0], action[1], potColor )  ]
            self.AllAction.remove(  action  )
    
            self.RunAction[ action[0] ][  action[1] ] =potColor
    
            isOver, isWin = self.is_over(action, potColor)
            return self.RunAction,  isOver, isWin
    
        def __repr__(self):
            return "Game step count: {}, AvailAction len: {},  ".format( self.potCount,     len(self.AvailAction) )
        

    玩家代码:

    class GamePlayer(object):
        
        def __init__(self, potColor ):
            self.actionHis = []
            self.color = potColor
          
          
        def getActionHis(self):
            return self.actionHis
              
        def play(self, game):
            actions = game.getActions()
            action = self.choiceActions( actions )
            self.actionHis = self.actionHis +[action]
    
            gameInfo , isOver, isWin = game.action(action , self.color )
            
            return gameInfo , isOver, isWin
            
            pass
                  
        def choiceActions( self, actions ):
            action = random.choice( actions  )
            return action
            
            
            
            
        def __repr__(self):
            return "color: {}, actionHis: {},  ".format(    self.color, self.actionHis )

    github 代码地址:

    https://github.com/rehylas/play_chess

     ps: 下一篇文章,  用MCTS 相互博弈




  • 相关阅读:
    hihocoder 1142 三分·三分求极值(三分)
    poj 3304 Segments(计算直线与线段之间的关系)
    poj 1269 Intersecting Lines(判断两直线关系,并求交点坐标)
    poj 2398 Toy Storage(计算几何 点线关系)
    poj 2318 TOYS(计算几何 点与线段的关系)
    计算几何基础(模板)
    Jmeter-基本组成
    java-面向对象
    性能测试基础
    java-多线程
  • 原文地址:https://www.cnblogs.com/xiaoxuebiye/p/9272364.html
Copyright © 2020-2023  润新知