修改了算法代码,并建立了一个简单的训练脚本.修改bert处理二维输入,移除PPO的permute参数

This commit is contained in:
2025-10-22 16:56:12 +08:00
parent b626702cbb
commit 3f7e183c4b
101 changed files with 3837 additions and 39 deletions

View File

@@ -1,7 +1,10 @@
import torch
import numpy as np
from torch import nn
from .utils import build_mlp, reparameterize, evaluate_lop_pi
try:
from .utils import build_mlp, reparameterize, evaluate_lop_pi
except ImportError:
from utils import build_mlp, reparameterize, evaluate_lop_pi
class StateIndependentPolicy(nn.Module):