Add proxy eval and skeleton experiment tooling

This commit is contained in:
2026-03-09 23:11:35 +08:00
parent 36aef46a0d
commit 6c8cd2950c
16 changed files with 1107 additions and 69 deletions
+22 -13
View File
@@ -553,22 +553,31 @@ class BaseModel(MetaModel, nn.Module):
resume_every_iter = int(model.engine_cfg.get('resume_every_iter', 0))
if resume_every_iter > 0 and model.iteration % resume_every_iter == 0:
model.save_resume_ckpt(model.iteration)
if model.iteration % model.engine_cfg['save_iter'] == 0:
save_iter = int(model.engine_cfg['save_iter'])
eval_iter = int(model.engine_cfg.get('eval_iter', 0))
should_save = save_iter > 0 and model.iteration % save_iter == 0
should_eval = False
if model.engine_cfg['with_test']:
if eval_iter > 0:
should_eval = model.iteration % eval_iter == 0
else:
should_eval = should_save
if should_save:
# save the checkpoint
model.save_ckpt(model.iteration)
# run test if with_test = true
if model.engine_cfg['with_test']:
model.msg_mgr.log_info("Running test...")
model.eval()
result_dict = BaseModel.run_test(model)
model.train()
if model.cfgs['trainer_cfg']['fix_BN']:
model.fix_BN()
if result_dict:
model.msg_mgr.write_to_tensorboard(result_dict)
model.msg_mgr.write_to_wandb(result_dict)
model.msg_mgr.reset_time()
if should_eval:
model.msg_mgr.log_info("Running test...")
model.eval()
result_dict = BaseModel.run_test(model)
model.train()
if model.cfgs['trainer_cfg']['fix_BN']:
model.fix_BN()
if result_dict:
model.msg_mgr.write_to_tensorboard(result_dict)
model.msg_mgr.write_to_wandb(result_dict)
model.msg_mgr.reset_time()
if model.iteration >= model.engine_cfg['total_iter']:
break