修改了算法代码,并建立了一个简单的训练脚本.修改bert处理二维输入,移除PPO的permute参数

This commit is contained in:
2025-10-22 16:56:12 +08:00
parent b626702cbb
commit 3f7e183c4b
101 changed files with 3837 additions and 39 deletions

36
test_training.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
# 测试完整的MAGAIL训练流程
echo "======================================================================"
echo "🧪 测试MAGAIL完整训练流程"
echo "======================================================================"
# 测试参数(较小规模)
EPISODES=10
HORIZON=200
ROLLOUT=512
echo ""
echo "📋 测试配置:"
echo " Episodes: $EPISODES"
echo " Horizon: $HORIZON"
echo " Rollout Length: $ROLLOUT"
echo ""
# 运行训练(不渲染,加快速度)
python train_magail.py \
--episodes $EPISODES \
--horizon $HORIZON \
--rollout-length $ROLLOUT \
--batch-size 64 \
--lr-actor 3e-4 \
--lr-critic 3e-4 \
--lr-disc 3e-4 \
--epoch-disc 3 \
--epoch-ppo 5
echo ""
echo "======================================================================"
echo "✅ 测试完成"
echo "======================================================================"