Prompt Title: Actor critic 구조만들기

Created 1 year ago
Votes 0
Views 101 times
0

Prompt Detail:

Sure! Here's an example of how to modify the code to have 10 possible actions:

python
import gym import math import random import numpy as np import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F from torch.distributions import Categorical # Actor network class Actor(nn.Module): def __init__(self, state_dim, num_actions, hidden_size): super(Actor, self).__init__() self.fc1 = nn.Linear(state_dim, hidden_size) self.fc2 = nn.Linear(hidden_size, hidden_size) self.fc3 = nn.Linear(hidden_size, num_actions) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.softmax(self.fc3(x), dim=1) return x # Critic network class Critic(nn.Module): def __init__(self, state_dim, hidden_size): super(Critic, self).__init__() self.fc1 = nn.Linear(state_dim, hidden_size) self.fc2 = nn.Linear(hidden_size, hidden_size) self.fc3 = nn.Linear(hidden_size, 1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x class ActorCriticAgent: def __init__(self, state_dim, num_actions=10, hidden_size=128, lr=0.001, gamma=0.99): self.actor = Actor(state_dim, num_actions, hidden_size).to(device) self.critic = Critic(state_dim, hidden_size).to(device) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=lr) self.gamma = gamma self.num_actions = num_actions self.steps_done = 0 def get_action(self, state, eps): state = torch.FloatTensor(state).to(device) dist = self.actor(state) if eps is None: action = dist.sample() else: # gradually reduce exploration from 1 to 0.01 over the course of 10000 steps eps_threshold = 0.01 eps = eps_threshold + (eps - eps_threshold) * \ math.exp(-1. * self.steps_done / 10000) if random.random() > eps: action = dist.sample() else: action = torch.tensor([[random.randrange(self.num_actions)]], device=device, dtype=torch.long) self.steps_done += 1 return action.detach().cpu().numpy()[0] def update(self, state, action, reward, next_state, done): state = torch.FloatTensor(state).to(device) next_state = torch.FloatTensor(next_state).to(device) action = torch.tensor([action], device=device) reward = torch.tensor([reward], device=device) done = torch.tensor([done], device=device) # update critic value = self.critic(state) next_value = self.critic(next_state) target = reward + self.gamma * (1 - done) * next_value advantage = target - value.detach() critic_loss = F.smooth_l1_loss(value, target) self.critic_optimizer.zero_grad() critic_loss.backward() self.critic_optimizer.step() # update actor dist = self.actor(state) log_prob = dist.log_prob(action) entropy = -torch.sum(dist.probs * dist.log_probs)
avatar
Kyuyeong kim
Shared 1 prompt
Created 1 year ago

Leave a Comment

Related Tag Prompts

94
0
151
0
Docker Pro
1 year ago 2023-01-29 06:08:59 AIPRM
308
0
MySQL Marvel
1 year ago 2023-02-13 00:05:32 AIPRM
260
0
Ansible Expert
1 year ago 2023-02-01 09:23:50 AIPRM
193
0
PostgreSQL Pro
1 year ago 2023-02-07 03:45:19 AIPRM
114
0
270
1
Dockerize me
1 year ago 2023-02-17 08:27:58 Chad Thompson-Smith