My own solution for the CardPole-Challange. I only use the pole-angle, nothing else. I remember a history of it (with memory capacity) With a memory capacity of 1 it does not work, with 2 I need luck. 3 and more look good. I already solved it with all 4 parameters but this did not match my personal experience where I am balancing a pole on my finger. The only thing I observe here is the angle.
import gymimport randomimport numpy as npclass HillClimbingAgent():def __init__(self, env):self.action_size = env.action_space.nself.state = [0, 0, 0] # memory capacityself.input_size = len(self.state)self.W = 1e-4 * np.random.rand(self.action_size, self.input_size)self.best_W = np.copy(self.W)self.best = -np.infself.noise_scale = 1e-5def _append_to_state(self, state):n = len(self.state)for i in range(0, n-1):self.state[n-i-1] = self.state[n-i-2]self.state[0] = state[2]def get_action(self, state):self._append_to_state(state)action = np.argmax(np.dot(self.W, self.state))return actiondef update(self, total_reward):if total_reward > self.best:self.best = total_rewardself.best_W = np.copy(self.W)inc = self.noise_scale * np.random.rand(self.action_size, self.input_size)self.noise_scale = min(self.noise_scale/2, 1e-3)else:self.noise_scale = max(self.noise_scale * 2, 2)self.W = self.best_W * self.noise_scale * np.random.rand(self.action_size, self.input_size)env_name = "CartPole-v1"env = gym.make(env_name)agent = HillClimbingAgent(env)for i in range(50):state = env.reset()total_reward = 0done = Falsewhile not done:action = agent.get_action(state)state, reward, done, info = env.step(action)total_reward += rewardenv.render()agent.update(total_reward)print(f'{i} total_reward: {total_reward}')if total_reward == 500:break
Comments
Post a Comment