1 Star 1 Fork 0

乔之恒 / 博客

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
wuzi.py 4.96 KB
一键复制 编辑 原始数据 按行查看 历史
乔之恒 提交于 2020-12-01 11:57 . update wuzi.py.
import tensorflow as tf
import numpy as np
import random
import pygame
class con:
board_size = 19
batch_size = 50
num_episodes = 2000
num_ex_episodes = 1000
final_epsilon = 0.01
over_win_reward = -150
over_lost_reward = -100
conv_filters_num = [12,24,24]
conv_kernel_size = [[6,6],[4,4],[4,4]]
pooling_pool_size = [[2,2],[2,2],[2,2]]
pooling_strides = [2,2,2]
dense_units_num = [361,361]
dense_activation = [tf.nn.relu,None]
black_zi = 1
white_zi = -1
empty_zi = 0
class CNN(tf.keras.model):
def __init__(self):
self.convs = []
for i in range(3):
self.convs.append(
tf.keras.layers.Conv2D(
filters = con.conv_filters_num[i],
kernel_size = con.conv_kernel_size[i],
padding = 'same',
activation = tf.nn.relu
))
self.poolings = []
for i in range(3):
self.poolings.append(
tf.keras.layers.MaxPool2D(
pool_size = con.pooling_pool_size[i],
strides = con.pooling_strides[i]
))
self.flatten = tf.keras.layers.Flatten()
self.denses = []
for i in range(2):
self.denses.append(
tf.keras.layers.Dense(
units = con.dense_units_num[i],
activation=con.dense_activation[i]
))
def call(self,inputs): #shape=(batch_size,19,19,6),tensor
for i in range(3):
input = self.convs[i](inputs)
input = self.poolings[i](inputs)
inputs = self.flatten(inputs)
for i in range(2):
inputs = self.denses[i](inputs)
return inputs
class train_checkerboard:
def __init__(self):
self.board = np.array([])
self.init_board()
self.Cnn = CNN()
self.moving = [[1,-1],[1,0],[1,1],[0,1]]
self.replay_buffer = np.array([])
self.reward_buffer = np.array([])
def playing(self,y,x,color):
if self.board[y][x]!=con.empty_zi : return True
self.board[y][x] = color
return False
def check_zi(self,y,x):
for i in range(4):
Bool = True
for j in range(1,5):
if self.board[y+self.moving[i][0]*j][x+self.moving[i][1]*j]\
!=self.board[y][x] :
Bool = False
break
if Bool==True : return True
return False
def init_buffer(self):
self.reward_buffer = np.array([]) #储存每次对局后六局状态
self.replay_buffer = np.array([]) # 闲置
for i in range(6):
lin = np.array([])
for j in range(21):
a = np.array([])
for k in range(21): a = np.append(a,0)
lin = np.append(lin,a)
self.replay_buffer = np.append(self.replay_buffer,lin)
for i in range(6):
self.reward_buffer = np.append(self.reward_buffer,0)
def init_board(self):
lin_board = []
for i in range(con.board_size+2):
lin = []
for j in range(con.board_size+2):
lin.append(0)
lin_board.append(lin)
self.board = np.array(lin_board)
def train_main(self):
# for epi_id in range(con.num_episodes):
# epsilon = \
# max((con.num_ex_episodes-epi_id)/con.num_ex_episodes,con.final_epsilon)
# print("epi_id: ",epi_id," epsilon: ",epsilon)
# self.init_buffer()
# self.init_board()
# for play_num in range(361):
# if play_num%2==0: lin_color = con.black_zi
# else lin_color = con.white_zi
# if random.random() <= epsilon:#探索,随机选取动作
# pact_y,pact_x = random.randint(1,19),random.randint(1,19)
# else:
# lin_index = self.replay_buffer.shape[0]
# lin_data = self.replay_buffer[lin_index-6:lin_index]
# pact_lin = self.Cnn(lin_data[None])
# max_index = tf.argmax(pact_lin,1).numpy()[0]
# pact_y,pact_x = max_index//19+1,max_index%19+1
# lost_test = self.playing(pact_y,pact_x,lin_color) #下棋
# win_test = self.check_zi(pact_y,pact_x)
# self.replay_buffer = np.append(self.replay_buffer,self.board) #添加到经验池
for epi_id in range(1,con.num_episodes+1):
epsilon = \
max((con.num_ex_episodes-epi_id)/con.num_ex_episodes,con.final_epsilon)
print("epi_id: ",epi_id," epsilon: ",epsilon)
if random.random()<=epsilon :
lin_pact =
else:
pact_y,pact_x = random.randint(1,19),random.randint(1,19)
def test_main(self):
pygame.init()
if __name__ == "__main":
wuzi = train_checkerboard()
1
https://gitee.com/qiao_zhi_heng/boke.git
git@gitee.com:qiao_zhi_heng/boke.git
qiao_zhi_heng
boke
博客
master

搜索帮助