修改了算法代码,并建立了一个简单的训练脚本.修改bert处理二维输入,移除PPO的permute参数
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
from torch import nn
|
||||
from .utils import build_mlp, reparameterize, evaluate_lop_pi
|
||||
try:
|
||||
from .utils import build_mlp, reparameterize, evaluate_lop_pi
|
||||
except ImportError:
|
||||
from utils import build_mlp, reparameterize, evaluate_lop_pi
|
||||
|
||||
class StateIndependentPolicy(nn.Module):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user