整理强化学习知识, 实践出真知。
本篇文章只贴代码, 写了一个 game: 五子棋环境, 一个 player : 随机下棋, 一个 WuziBoard : 棋盘可视化
效果图:
绘制棋盘:
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:Hiuhung Wan import turtle from enum import Enum class PotColor(Enum): Black = 1 White = 2 import time class WuziBoard(object): def __init__(self, RowNum): turtle.speed(9) turtle.hideturtle() self.RowNum = RowNum self.halfDim = 500 /(RowNum-1)/2.0 pass def drawBoard(self, ActionHis =None): turtle.screensize(400, 400, "white") turtle.title('五子棋') turtle.home() turtle.speed(0) time.sleep(5) for i in range( self.RowNum ): x = 0 - 250 + i * ( self.halfDim ) *2 y = 0 -250 turtle.penup() turtle.setpos(x, y) turtle.pendown() turtle.goto(x, y + 500) for i in range( self.RowNum ): x = 0 - 250 y = 0 -250 + i * ( self.halfDim ) *2 turtle.penup() turtle.setpos(x, y) turtle.pendown() turtle.setpos(x+500, y) if( ActionHis != None): self.drawNow( ActionHis ) turtle.done() pass def action2potxy(self, action): x = 0 - 250 + action[0]*self.halfDim*2 y = 0 - 250 + action[1]*self.halfDim*2 return x,y def drawNow(self, RunAction ): for potsite in RunAction: x,y = self.action2potxy( ( potsite[0], potsite[1] ) ) turtle.penup() turtle.setpos(x, y) turtle.pendown() if( potsite[2] != PotColor.Black ) : turtle.dot(10,"Red") else: turtle.dot(10, "Black") if( potsite == RunAction[len(RunAction) - 1 ] ): if (potsite[2] != PotColor.Black): turtle.dot(20, "Red") else: turtle.dot(20, "Black") pass def drawAction(self): pass pass def main(): ActionHis =[ (0,1,1), (1,1,2), (5,1,1), ] wuziBoard = WuziBoard( 6 ) wuziBoard.drawBoard( ActionHis ) pass if __name__ == "__main__": #test() main()
游戏代码:
class GameFivePot(object): def __init__(self): self.potCount =0; self.AllAction =[] self.ActionHis = [] for x in range(ROW_NUM): for y in range(ECO_NUM): self.AllAction += [(x,y)] self.AvailAction = self.AllAction self.RunAction = [[0 for col in range(ROW_NUM)] for row in range(ECO_NUM)] def getActions(self): return self.AvailAction def getRunAction(self): return self.RunAction def getActionHis(self): return self.ActionHis def is_over(self, action, potColor): x = action[0] y = action[1] dimCount =[1,1,1,1] #���� xiang qian for x1 in range(x+1, x+5): if(x1 >= ROW_NUM ): break if( self.RunAction[x1][y] == potColor ): dimCount[0] +=1 else: break #- xiang hou for x1 in range(x-1, x-5, -1 ): if(x1 < 0 ): break if( self.RunAction[x1][y] == potColor ): dimCount[0] +=1 else: break if( dimCount[0] >= 5 ): return True,True #���� ���� for y1 in range(y+1, y+5): if(y1 >= ROW_NUM ): break if( self.RunAction[x][y1] == potColor ): dimCount[1] +=1 else: break #- ���� for y1 in range(y-1, y-5, -1 ): if(y1 < 0 ): break if( self.RunAction[x][y1] == potColor ): dimCount[1] +=1 else: break if( dimCount[1] >= 5 ): return True,True #-��б ���� for offset in range(1 ,5): x1 = x+offset y1 = y+offset if(y1 >= ROW_NUM or x1 >= ROW_NUM ): break if( self.RunAction[x1][y1] == potColor ): dimCount[2] +=1 else: break #- ���� for offset in range(-1, -5, -1 ): x1 = x+offset y1 = y+offset if(y1 < 0 or x1<0): break if( self.RunAction[x1][y1] == potColor ): dimCount[2] +=1 else: break if( dimCount[2] >= 5 ): return True,True #-��б ���� for offset in range(1 ,5): x1 = x+offset y1 = y-offset if(y1 < 0 or x1 >= ROW_NUM ): break if( self.RunAction[x1][y1] == potColor ): dimCount[3] +=1 else: break #- ���� for offset in range(-1, -5, -1 ): x1 = x+offset y1 = y-offset if(y1 >= ROW_NUM or x1<0 ): break if( self.RunAction[x1][y1] == potColor ): dimCount[3] +=1 else: break if( dimCount[3] >= 5 ): return True,True if( len(self.AvailAction) == 0 ): return True,False return False,False pass def action( self, action,potColor ): self.potCount +=1 self.ActionHis += [ ( action[0], action[1], potColor ) ] self.AllAction.remove( action ) self.RunAction[ action[0] ][ action[1] ] =potColor isOver, isWin = self.is_over(action, potColor) return self.RunAction, isOver, isWin def __repr__(self): return "Game step count: {}, AvailAction len: {}, ".format( self.potCount, len(self.AvailAction) )
玩家代码:
class GamePlayer(object): def __init__(self, potColor ): self.actionHis = [] self.color = potColor def getActionHis(self): return self.actionHis def play(self, game): actions = game.getActions() action = self.choiceActions( actions ) self.actionHis = self.actionHis +[action] gameInfo , isOver, isWin = game.action(action , self.color ) return gameInfo , isOver, isWin pass def choiceActions( self, actions ): action = random.choice( actions ) return action def __repr__(self): return "color: {}, actionHis: {}, ".format( self.color, self.actionHis )
github 代码地址:
https://github.com/rehylas/play_chess
ps: 下一篇文章, 用MCTS 相互博弈