FreeWay/Assignment5/mini_go/test/random_test.py

from absl import logging, flags, app
from environment.GoEnv import Go
import time, os
import numpy as np
from agent.agent import RandomAgent
import tensorflow as tf

FLAGS = flags.FLAGS

flags.DEFINE_integer("num_train_episodes", 10,
                     "Number of training episodes for each base policy.")
flags.DEFINE_integer("num_eval", 10,
                     "Number of evaluation episodes")
flags.DEFINE_integer("eval_every", 2000,
                     "Episode frequency at which the agents are evaluated.")
flags.DEFINE_integer("learn_every", 128,
                     "Episode frequency at which the agents are evaluated.")
flags.DEFINE_list("hidden_layers_sizes", [
    128
], "Number of hidden units in the avg-net and Q-net.")
flags.DEFINE_integer("replay_buffer_capacity", int(2e5),
                     "Size of the replay buffer.")
flags.DEFINE_integer("reservoir_buffer_capacity", int(2e6),
                     "Size of the reservoir buffer.")


def main(unused_argv):
    begin = time.time()
    env = Go()
    agents = [RandomAgent(idx) for idx in range(2)]

    for ep in range(FLAGS.num_eval):
        time_step = env.reset()
        while not time_step.last():
            player_id = time_step.observations["current_player"]
            if player_id == 0:
                agent_output = agents[player_id].step(time_step)
            else:
                agent_output = agents[player_id].step(time_step)
            action_list = agent_output.action
            time_step = env.step(action_list)
            print(time_step.observations["info_state"][0])

        # Episode is over, step all agents with final info state.
        # for agent in agents:
        agents[0].step(time_step)
        agents[1].step(time_step)
        print(time_step.rewards, env.get_current_board())

    print('Time elapsed:', time.time()-begin)


if __name__ == '__main__':
    app.run(main)
上传文件至 '' 2022-04-26 03:05:19 +00:00			`from absl import logging, flags, app`
			`from environment.GoEnv import Go`
			`import time, os`
			`import numpy as np`
			`from agent.agent import RandomAgent`
			`import tensorflow as tf`

			`FLAGS = flags.FLAGS`

			`flags.DEFINE_integer("num_train_episodes", 10,`
			`"Number of training episodes for each base policy.")`
			`flags.DEFINE_integer("num_eval", 10,`
			`"Number of evaluation episodes")`
			`flags.DEFINE_integer("eval_every", 2000,`
			`"Episode frequency at which the agents are evaluated.")`
			`flags.DEFINE_integer("learn_every", 128,`
			`"Episode frequency at which the agents are evaluated.")`
			`flags.DEFINE_list("hidden_layers_sizes", [`
			`128`
			`], "Number of hidden units in the avg-net and Q-net.")`
			`flags.DEFINE_integer("replay_buffer_capacity", int(2e5),`
			`"Size of the replay buffer.")`
			`flags.DEFINE_integer("reservoir_buffer_capacity", int(2e6),`
			`"Size of the reservoir buffer.")`


			`def main(unused_argv):`
			`begin = time.time()`
			`env = Go()`
			`agents = [RandomAgent(idx) for idx in range(2)]`

			`for ep in range(FLAGS.num_eval):`
			`time_step = env.reset()`
			`while not time_step.last():`
			`player_id = time_step.observations["current_player"]`
			`if player_id == 0:`
			`agent_output = agents[player_id].step(time_step)`
			`else:`
			`agent_output = agents[player_id].step(time_step)`
			`action_list = agent_output.action`
			`time_step = env.step(action_list)`
			`print(time_step.observations["info_state"][0])`

			`# Episode is over, step all agents with final info state.`
			`# for agent in agents:`
			`agents[0].step(time_step)`
			`agents[1].step(time_step)`
			`print(time_step.rewards, env.get_current_board())`

			`print('Time elapsed:', time.time()-begin)`


			`if __name__ == '__main__':`
			`app.run(main)`