这是从my previous question开始的延续.

所以,我想我已经取得了很大的进步.人工智能现在似乎通常会走出最好的一步,并追求胜利,但我面临的最后一个问题是,它似乎不在乎失败.也就是说,如果它有一个fork ,或连续4个,它就会赢得比赛,但如果HUMAN名玩家连续有3个(或4个),它不会做出阻止他们获胜的举动.

更奇怪的是,有时如果我将深度设置为较低的值,它将防止损失,但如果我增加深度则不会.以下是我当前的代码,以及一个未能找到最佳走法(以防止下一轮失败)的棋盘示例:

const ROWS = 9;
const COLS = 9;
const LEN = 5;

const EMPTY = 0;
const HUMAN = 1;
const COMP = 2;

const WINNING_MOVE = 100000;

function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
  let newChain = 0;

  while (currChain + newChain < LEN) {
    const row = sRow + (incRow * (newChain + 1));
    const col = sCol + (incCol * (newChain + 1));

    if (grid[row * COLS + col] !== who) {
      break;
    }
    
    newChain++;
  }

  return newChain;
}

function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
  let chain = 1;

  chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
  chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);

  return chain >= LEN;
}

function isWinningMove(grid, who, row, col) {    
  return lineCheck(grid, who, row, col, 1, 0) || 
         lineCheck(grid, who, row, col, 0, 1) || 
         lineCheck(grid, who, row, col, 1, 1) || 
         lineCheck(grid, who, row, col, -1, 1);
}

function getTile(grid, row, col) {
  if (row < 0 || col < 0 || row >= ROWS || col >= COLS) {
    return -1;
  }

  return grid[row * COLS + col];
}

function hasNeighbor(board, row, col) {
  if (getTile(board, row - 1, col - 1) > 0) { return true; }
  if (getTile(board, row - 1, col + 1) > 0) { return true; }
  if (getTile(board, row + 1, col - 1) > 0) { return true; }
  if (getTile(board, row + 1, col + 1) > 0) { return true; }

  if (getTile(board, row - 1, col) > 0) { return true; }
  if (getTile(board, row + 1, col) > 0) { return true; }

  if (getTile(board, row, col - 1) > 0) { return true; }
  if (getTile(board, row, col + 1) > 0) { return true; }

  return false;
}

function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
  if (depth === 0) {
    const val = evaluateBoard(board, latestRow, latestCol);

    return [ val, latestRow * COLS + latestCol ]; // returns a pair (value, move)
  }

  const opponent = player === COMP ? HUMAN : COMP;

  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = player === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
  }

  let bestMove = -1;

  if (player === COMP) {
    let maxEval = Number.MIN_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col)[0];
        board[idx] = tileValue;

        if (evaluation > maxEval) {
          maxEval = evaluation;
          bestMove = idx;
        }

        alpha = Math.max(alpha, evaluation);

        if (beta <= alpha) {
          return [ maxEval, bestMove ];
        }
      }
    }

    return [ maxEval, bestMove ];
  } else {
    let minEval = Number.MAX_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col)[0];
        board[idx] = tileValue;

        if (evaluation < minEval) {
          minEval = evaluation;
          bestMove = idx; // Also track best move for HUMAN.
        }

        beta = Math.min(beta, evaluation);

        if (beta <= alpha) {
          return [ minEval, bestMove ];
        }
      }
    }

    return [ minEval, bestMove ];
  }
}

function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
  let idx = 0;
  let score = 0;

  if (isWinningMove(grid, who, latestRow, latestCol)) {
    return WINNING_MOVE;
  }

  for (let row = 0; row < ROWS; row++) {
    for (let col = 0; col < COLS; col++) {
      if (grid[idx] !== who) { idx++; continue; }

      if (getTile(grid, row - 1, col - 1) === who) { score++; }
      if (getTile(grid, row - 1, col + 1) === who) { score++; }
      if (getTile(grid, row + 1, col - 1) === who) { score++; }
      if (getTile(grid, row + 1, col + 1) === who) { score++; }

      if (getTile(grid, row - 1, col) === who) { score++; }
      if (getTile(grid, row + 1, col) === who) { score++; }

      if (getTile(grid, row, col - 1) === who) { score++; }
      if (getTile(grid, row, col + 1) === who) { score++; }
      
      // if (getTile(grid, row, col) === who) { score++; }

      idx++;
    } 
  }

  return score;
}

function evaluateBoard(grid, latestRow, latestCol) {
  return evaluatePlayerBoard(grid, COMP, latestRow, latestCol) // COMP is maximizing
       - evaluatePlayerBoard(grid, HUMAN, latestRow, latestCol); // HUMAN is minimizing
}

function getBestMove(board, maxDepth) {
  for (let depth = 1; depth <= maxDepth; depth++) {
    const [ evaluation, move ] = minimax(board, depth, Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER, COMP, -1, -1);

    // if we found a winning move already, return early
    // otherwise, keep iterating until we reach max depth
    if (evaluation > 10000 || depth === maxDepth) {
      return move;
    }
  }

  return 0; // should never run
}

const exampleBoard = [
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-8
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 9-17
  0, 0, 0, 0, 0, 0, 2, 0, 0, // 18-26
  0, 0, 2, 2, 0, 1, 0, 0, 0, // 27-35
  0, 0, 0, 2, 1, 0, 0, 0, 0, // 36-44
  0, 0, 0, 1, 0, 0, 0, 0, 0, // 45-53
  0, 0, 1, 0, 0, 0, 0, 0, 0, // 54-62
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 63-71
  0, 0, 0, 0, 0, 0, 0, 0, 0, // 72-80
];

console.log(getBestMove(exampleBoard, 3));

我认为它应该记录64(为了防止下一个回合的损失),但它却记录了20

推荐答案

您在这部分代码中有一个逻辑错误:

  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = player === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
  }

当进入这个区块时,我们知道赢的是opponent,而不是player,所以乘数应该以opponent为基础.如果opponent是最大化的玩家,我们应该乘以1,否则乘以-1.

不是问题,但返回latestRow * COLS + latestCol作为最好的走法是没有意义的,因为current名球员没有最好的走法(他们输了比赛),latestRow * COLS + latestCol肯定不是their步,所以这是无关紧要的.(depth == 0区块也可以说同样的话)

修复:

  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = opponent === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, -1 ];
  }

Javascript相关问答推荐

如何比较嵌套对象的更改并创建更改报告

Angular material 表多个标题行映射

如何在加载的元数据上使用juserc和await中获得同步负载?

docx.js:如何在客户端使用文档修补程序

我应该绑定不影响状态的函数吗?'

Exceljs:我们在file.xlsx(...)&#中发现了一个问题'"" 39人;

使搜索栏更改语言

给定一个凸多边形作为一组边,如何根据到最近边的距离填充里面的区域

XSLT处理器未运行

如何使用子字符串在数组中搜索重复项

类构造函数不能在没有用With Router包装的情况下调用

在使用位板时,如何在Java脚本中判断Connect 4板中中柱的对称性?

钛中的onClick事件需要在两次点击之间等待几秒钟

expo 联系人:如果联系人的状态被拒绝,则请求访问联系人的权限

如何访问此数组中的值?

警告框不显示包含HTML输入字段的总和

Docent.cloneNode(TRUE)不克隆用户输入

react 路由如何使用从加载器返回的数据

与在编剧中具有动态价值的定位器交互

更改管线时不渲染组件的react