这是从my previous question开始的延续.
所以,我想我已经取得了很大的进步.人工智能现在似乎通常会走出最好的一步,并追求胜利,但我面临的最后一个问题是,它似乎不在乎失败.也就是说,如果它有一个fork ,或连续4个,它就会赢得比赛,但如果HUMAN
名玩家连续有3个(或4个),它不会做出阻止他们获胜的举动.
更奇怪的是,有时如果我将深度设置为较低的值,它将防止损失,但如果我增加深度则不会.以下是我当前的代码,以及一个未能找到最佳走法(以防止下一轮失败)的棋盘示例:
const ROWS = 9;
const COLS = 9;
const LEN = 5;
const EMPTY = 0;
const HUMAN = 1;
const COMP = 2;
const WINNING_MOVE = 100000;
function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
let newChain = 0;
while (currChain + newChain < LEN) {
const row = sRow + (incRow * (newChain + 1));
const col = sCol + (incCol * (newChain + 1));
if (grid[row * COLS + col] !== who) {
break;
}
newChain++;
}
return newChain;
}
function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
let chain = 1;
chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);
return chain >= LEN;
}
function isWinningMove(grid, who, row, col) {
return lineCheck(grid, who, row, col, 1, 0) ||
lineCheck(grid, who, row, col, 0, 1) ||
lineCheck(grid, who, row, col, 1, 1) ||
lineCheck(grid, who, row, col, -1, 1);
}
function getTile(grid, row, col) {
if (row < 0 || col < 0 || row >= ROWS || col >= COLS) {
return -1;
}
return grid[row * COLS + col];
}
function hasNeighbor(board, row, col) {
if (getTile(board, row - 1, col - 1) > 0) { return true; }
if (getTile(board, row - 1, col + 1) > 0) { return true; }
if (getTile(board, row + 1, col - 1) > 0) { return true; }
if (getTile(board, row + 1, col + 1) > 0) { return true; }
if (getTile(board, row - 1, col) > 0) { return true; }
if (getTile(board, row + 1, col) > 0) { return true; }
if (getTile(board, row, col - 1) > 0) { return true; }
if (getTile(board, row, col + 1) > 0) { return true; }
return false;
}
function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
if (depth === 0) {
const val = evaluateBoard(board, latestRow, latestCol);
return [ val, latestRow * COLS + latestCol ]; // returns a pair (value, move)
}
const opponent = player === COMP ? HUMAN : COMP;
// player argument should be opponent, and return statement should be different per player
if (isWinningMove(board, opponent, latestRow, latestCol)) {
const multiplier = player === COMP ? 1 : -1;
return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
}
let bestMove = -1;
if (player === COMP) {
let maxEval = Number.MIN_SAFE_INTEGER;
for (let row = 0; row < ROWS; row++) {
for (let col = 0; col < COLS; col++) {
const idx = row * COLS + col;
const tileValue = board[idx];
if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }
board[idx] = player;
const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col)[0];
board[idx] = tileValue;
if (evaluation > maxEval) {
maxEval = evaluation;
bestMove = idx;
}
alpha = Math.max(alpha, evaluation);
if (beta <= alpha) {
return [ maxEval, bestMove ];
}
}
}
return [ maxEval, bestMove ];
} else {
let minEval = Number.MAX_SAFE_INTEGER;
for (let row = 0; row < ROWS; row++) {
for (let col = 0; col < COLS; col++) {
const idx = row * COLS + col;
const tileValue = board[idx];
if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }
board[idx] = player;
const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col)[0];
board[idx] = tileValue;
if (evaluation < minEval) {
minEval = evaluation;
bestMove = idx; // Also track best move for HUMAN.
}
beta = Math.min(beta, evaluation);
if (beta <= alpha) {
return [ minEval, bestMove ];
}
}
}
return [ minEval, bestMove ];
}
}
function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
let idx = 0;
let score = 0;
if (isWinningMove(grid, who, latestRow, latestCol)) {
return WINNING_MOVE;
}
for (let row = 0; row < ROWS; row++) {
for (let col = 0; col < COLS; col++) {
if (grid[idx] !== who) { idx++; continue; }
if (getTile(grid, row - 1, col - 1) === who) { score++; }
if (getTile(grid, row - 1, col + 1) === who) { score++; }
if (getTile(grid, row + 1, col - 1) === who) { score++; }
if (getTile(grid, row + 1, col + 1) === who) { score++; }
if (getTile(grid, row - 1, col) === who) { score++; }
if (getTile(grid, row + 1, col) === who) { score++; }
if (getTile(grid, row, col - 1) === who) { score++; }
if (getTile(grid, row, col + 1) === who) { score++; }
// if (getTile(grid, row, col) === who) { score++; }
idx++;
}
}
return score;
}
function evaluateBoard(grid, latestRow, latestCol) {
return evaluatePlayerBoard(grid, COMP, latestRow, latestCol) // COMP is maximizing
- evaluatePlayerBoard(grid, HUMAN, latestRow, latestCol); // HUMAN is minimizing
}
function getBestMove(board, maxDepth) {
for (let depth = 1; depth <= maxDepth; depth++) {
const [ evaluation, move ] = minimax(board, depth, Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER, COMP, -1, -1);
// if we found a winning move already, return early
// otherwise, keep iterating until we reach max depth
if (evaluation > 10000 || depth === maxDepth) {
return move;
}
}
return 0; // should never run
}
const exampleBoard = [
0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-8
0, 0, 0, 0, 0, 0, 0, 0, 0, // 9-17
0, 0, 0, 0, 0, 0, 2, 0, 0, // 18-26
0, 0, 2, 2, 0, 1, 0, 0, 0, // 27-35
0, 0, 0, 2, 1, 0, 0, 0, 0, // 36-44
0, 0, 0, 1, 0, 0, 0, 0, 0, // 45-53
0, 0, 1, 0, 0, 0, 0, 0, 0, // 54-62
0, 0, 0, 0, 0, 0, 0, 0, 0, // 63-71
0, 0, 0, 0, 0, 0, 0, 0, 0, // 72-80
];
console.log(getBestMove(exampleBoard, 3));
我认为它应该记录64
(为了防止下一个回合的损失),但它却记录了20