Topcoder Marathon Match Round90 勉強メモ

2位のEvbCFfp1XBさんのコードを読んでみる。
https://community.topcoder.com/longcontest/?module=ViewProblemSolution&pm=14094&rd=16495&cr=23100980&subnum=3

問題文はこちら。
https://community.topcoder.com/longcontest/?module=ViewProblemStatement&rd=16495&pm=14094

方針（Forumより）

ランダムに~~ポイント0~~ポイント1未満のターゲットを選ぶ ※コードより修正
ターゲットにポイント1が入るか、ループ回数が10になるまで以下を繰り返す

ターゲット近くの同色ボールを選ぶ
そのボールの近くのボールを1~10個選ぶ ※上で選んだボール含む
選んだボールでビームサーチ（Max Depth=20、幅250)

情報の持ち方

class Board {
    public static final int WALL = 1 << 27;
    public static final int EMPTY = 1 << 28;
    public static final int[] DR = { 0, 1, 0, -1, };
    public static final int[] DC = { -1, 0, 1, 0, };
    public static final int[] DELTA = { -1, 64, 1, -64, };
    public static final int SHIFT = 6;
    public static final int MASK = (1 << SHIFT) - 1;
    public static final int[] ballIndex = new int[64 * 64];
    public static final long[][] hashes = new long[64 * 64][10];
    public int[][] distanceFromTarget;
    public static final int[] targetIndex = new int[64 * 64];
    public List<ColoredPoint> balls = new ArrayList<>();
    public List<ColoredPoint> targets = new ArrayList<>();
    public int R;
    public int C;
    public long hash = 0;
    int countPoint1;
    private ArrayList<State> moveHistory = new ArrayList<>();  // ボールを動かした履歴

    /// 省略 ///

    // ボールをある方向へ動かす
    public void next(int ballIndex, int direction) {
        ColoredPoint ball = balls.get(ballIndex);

        State current = new State(null, ballIndex, ball.z, direction, 0);
        moveHistory.add(current);
        {
            int index1 = targetIndex[ball.z];
            if (index1 != EMPTY && getTargetColor(index1) == ball.color) {
                countPoint1--;
            }
        }
        hash ^= hashes[ball.z][ball.color];

        // ボールを転がす。この更新のやりかたは参考になる  DELTA = { -1, 64, 1, -64, };
        int delta = DELTA[direction];
        int z = ball.z;
        this.ballIndex[z] = EMPTY;
        do {
            z += delta;
        } while (this.ballIndex[z] == EMPTY);
        z -= delta;
        ball.z = z;
        this.ballIndex[z] = ballIndex;

        hash ^= hashes[ball.z][ball.color];
        {
            int index1 = targetIndex[ball.z];
            if (index1 != EMPTY && getTargetColor(index1) == ball.color) {
                countPoint1++;
            }
        }
    }

    /// 省略 ///
}

class State {
    State parent;
    public int index;
    public int z;
    public int direction;
    public double score;
    public int numMoves;

    /// 省略 ///
}

class State2 implements Comparable<State2> {
    State2 parent;
    public int index;
    public int z;
    public int direction;
    public double score;
    public double distance;  // ビームサーチ用。ターゲットまでの（最短）距離合計
    public int numMoves;

    /// 省略 ///
}

巨大なboardクラスを用意している。履歴はStateクラスで持つ。State2はビームサーチ用。

メインルーチン

private void solve() {
    for (; timeManager.getSecond() < 9.5;) {

        if (board.getScorePoint1() > 0.999) {
            break;
        }

        // スコアが1未満のターゲット
        ArrayList<Integer> targetsScore0 = getTargetsScore0();

        // ランダムに１つ選ぶ
        IntArray targetIndexes = new IntArray();
        int targetIndex = targetsScore0.get((int) (targetsScore0.size() * rng.nextDouble()));
        targetIndexes.add(targetIndex);

        // 近隣ボールの選ぶ数。ランダムに試す
        int[] ns = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, };
        Utils.shuffle(ns, rng);
        for (int maxN : ns) {
            if (board.getTargetScore(targetIndex) > 0.999) {
                break;
            }
            if (timeManager.getSecond() > 9.5) {
                break;
            }

            // ターゲットから最寄りの同色ボール
            int nearestSameColorBallIndex = getSameColorScore0BallsDistanceFromTargetAscending(targetIndex).get(0).second;

            ArrayList<Pair<Double, Integer>> balls = getBallsDistanceFromBallAscending(nearestSameColorBallIndex);

            // 同色ボールの近くのボールmaxN個
            IntArray ballIndexes = new IntArray();
            int n = Math.min(maxN, balls.size());
            for (int i = 0; i < n; i++) {
                ballIndexes.add(balls.get(i).second);
            }

            int currentNumMoves = board.numMoves();
            int maxStep = 20;
            int maxBeamWidth = (int) (5 * 1000 / maxStep);
            countBeamsearch++;

            // ビームサーチ
            ArrayList<State> moves = beamsearch(ballIndexes, maxStep, maxBeamWidth, targetIndexes);

            if (moves == null) {
                continue;
            }
            for (State state : moves) {
                assert board.canMove(state.index, state.direction);
                board.next(state.index, state.direction);
            }

            // ベストスコアなら保持
            if (board.getScorePoint1() > bestScore && board.numMoves() < board.maxNumRolls()) {
                bestScore = board.getScorePoint1();
                bestSolution = board.getSolution();
            } else {
                // ベストでなく、ターン数に余裕がないようなら棄却
                if ((double) board.numMoves() / board.maxNumRolls() > 0.99 * timeManager.getSecond() / 9.5) {
                    for (; board.numMoves() > currentNumMoves;) {
                        board.previous();
                    }
                }
            }
        }
    }
}

単純にN個のボールを選ぶのではなく、1個～10個と少ないほうから試している。後の参考になりそう。

ビームサーチの部分

// 動かすボール、最大深さ、ビーム幅、対象ターゲット（１つだけ）
private ArrayList<State> beamsearch(IntArray ballIndexes, int maxStep, int maxBeamWidth, IntArray targetIndexes) {
    ArrayList<State2> currents = new ArrayList<>();
    ArrayList<State2> nexts = new ArrayList<>();
    ArrayList<State2> all = new ArrayList<>();  // 探索にでてきた全ノード。最後に一番いいやつを選ぶ

    int currentNumMoves = board.numMoves();

    used.clear();
    used.add(board.hash);

    // 初手の集合currentsを得る
    for (int i = 0; i < ballIndexes.length; i++) {
        int ballIndex = ballIndexes.values[i];
        ColoredPoint ball = board.balls.get(ballIndex);
        int z = ball.z; // zはボール位置のIntパック
        for (int direction = 0; direction < 4; direction++) {
            if (!board.canMove(ballIndex, direction)) {
                continue;
            }

            board.next(ballIndex, direction);
            if (used.add(board.hash)) {
                double score = getSumScore(ballIndexes);
                double distance = getSumMinDistance(ballIndexes, targetIndexes);
                distance += 1e-2 * rng.nextDouble();
                currents.add(new State2(null, ballIndex, z, direction, score, distance, 1));
            }
            board.previous();
        }
    }


    for (int step = 0; step < maxStep; step++) {
        if (currents.size() == 0) {
            break;
        }
        int beamWidth = Math.min(maxBeamWidth, currents.size());            
        Utils.select(currents, 0, currents.size(), beamWidth);  // currentsをスコア＞距離でソートか？最初のビーム幅分のみ抽出？
        for (int beam = 0; beam < beamWidth; beam++) {
            State2 currentState = currents.get(beam);

            all.add(currentState);

            {
                ArrayList<State2> currentStateList = reverse2(toList(currentState));    // 初手からcurrentStateまでの手のリスト
                ArrayList<State> res = new ArrayList<>();
                for (State2 state : currentStateList) {
                    res.add(new State(null, state.index, state.z, state.direction, state.score));
                }

                board.set(res, currentNumMoves);   // boardをcurrentStateにする
            }

            {
                assert getSumScore(ballIndexes) == currentState.score;
                if (currentState.score > ballIndexes.length - 1e-9) {
                    break;
                }
            }
            for (int i = 0; i < ballIndexes.length; i++) {
                int ballIndex = ballIndexes.values[i];
                ColoredPoint ball = board.balls.get(ballIndex);
                int z = ball.z;
                for (int direction = 0; direction < 4; direction++) {
                    if (!board.canMove(ballIndex, direction)) {
                        continue;
                    }

                    int nextZ = board.simulateNext(ball, direction);    // 該当の方向へ動かしてみたときのz

                    long currentHash = board.hash;

                    boolean add = used.add(currentHash ^ (board.hashes[z][ball.color] ^ board.hashes[nextZ][ball.color]));

                    // 使ってないHashになるなら
                    if (add) {
                        // scoreとdistanceを計算
                        double nextScore = currentState.score;
                        {
                            int index1 = board.targetIndex[z];
                            if (index1 != Board.EMPTY && board.getTargetColor(index1) == ball.color) {
                                nextScore--;
                            }
                        }
                        {
                            int index1 = board.targetIndex[nextZ];
                            if (index1 != Board.EMPTY && board.getTargetColor(index1) == ball.color) {
                                nextScore++;
                            }
                        }
                        double score = nextScore;

                        double nextDistance = currentState.distance;
                        int targetIndex = targetIndexes.values[0];
                        if (board.distanceFromTarget[targetIndex][z] == 0 && board.getTargetColor(targetIndex) != ball.color) {
                            nextDistance -= 1e9;
                        } else {
                            nextDistance -= board.distanceFromTarget[targetIndex][z];
                        }
                        if (board.distanceFromTarget[targetIndex][nextZ] == 0 && board.getTargetColor(targetIndex) != ball.color) {
                            nextDistance += 1e9;
                        } else {
                            nextDistance += board.distanceFromTarget[targetIndex][nextZ];
                        }
                        double distance = nextDistance;

                        distance += 1e-2 * rng.nextDouble();    // これは・・・？
                                                                // ちょっとバラけさせると探索が良くなるのか？

                        // 次手の集合nextsに加える
                        nexts.add(new State2(currentState, ballIndex, z, direction, score, distance, currentState.numMoves + 1));
                    }
                }
            }
        }
        {
            ArrayList<State2> swap = currents;
            currents = nexts;
            nexts = swap;
        }
        nexts.clear();

    }
    for (; board.numMoves() > currentNumMoves;) {
        board.previous();
    }

    if (all.size() == 0) {
        return null;
    }

    Collections.sort(all);
    ArrayList<State2> allList = reverse2(toList(all.get(0)));
    ArrayList<State> res = new ArrayList<>();
    for (State2 state : allList) {
        res.add(new State(null, state.index, state.z, state.direction, state.score));
    }
    return res;

}

ループで幅優先ビームサーチをしている。各NodeはState2クラスで保持していて、これが親へのポインタを持っているようだ。

この人のコードはかなり読みやすかった。
Javaだからか、それとも書き方が自分に似ているからだろうか。いずれにしても、とても参考になる。

かなり強い方なので、他のマッチのコードも読んでみても良さそうだ。