[WIP] Support RLDisaggregated#1669
Conversation
|
@claude review |
| rollout_controller=rollout_controller, | ||
| judger=judger, | ||
| logger=logger, | ||
| ) | ||
| produce_strategy = task_cfg.produce_strategy_config.build() | ||
| sampler = task_cfg.sampler_config.build(tokenizer=tokenizer, replay_buffer=replay_buffer) | ||
| task_runners.append( | ||
| _TaskRunner( | ||
| task_name=task_cfg.task_name, |
There was a problem hiding this comment.
Claude: Warning: _validate_target_task_counts will raise a raw KeyError if required_task_counts contains a key not present in target_task_counts. This can happen if the caller passes mismatched dicts.
Consider using .get() with an explicit check, or validate matching keys first:
| rollout_controller=rollout_controller, | |
| judger=judger, | |
| logger=logger, | |
| ) | |
| produce_strategy = task_cfg.produce_strategy_config.build() | |
| sampler = task_cfg.sampler_config.build(tokenizer=tokenizer, replay_buffer=replay_buffer) | |
| task_runners.append( | |
| _TaskRunner( | |
| task_name=task_cfg.task_name, | |
| @staticmethod | |
| def _validate_target_task_counts( | |
| required_task_counts: dict[str, int], | |
| target_task_counts: dict[str, int], | |
| ) -> None: | |
| missing_keys = required_task_counts.keys() - target_task_counts.keys() | |
| if missing_keys: | |
| raise ValueError( | |
| f"target_task_counts is missing keys present in required_task_counts: {sorted(missing_keys)}" | |
| ) | |
| undersized_targets = { | |
| task_name: {"required": required_task_count, "target": target_task_counts[task_name]} | |
| for task_name, required_task_count in required_task_counts.items() | |
| if target_task_counts[task_name] < required_task_count | |
| } | |
| if undersized_targets: | |
| raise ValueError( | |
| "target_task_batch_sizes must be greater than or equal to required_task_batch_sizes, " | |
| f"got {undersized_targets}" | |
| ) |
| task_name=task_cfg.task_name, | ||
| agent_loop=agent_loop, | ||
| produce_strategy=produce_strategy, | ||
| sampler=sampler, |
There was a problem hiding this comment.
Claude: Warning (CLAUDE.md): Missing return type annotation. Per the project standards, all new code must include type hints for function signatures.
| sampler=sampler, | |
| def _get_shared_rollout_ctl(self, active_tasks: list[_TaskRunner]) -> RolloutController | None: |
| enable_partial_rollout=enable_partial_rollout, | ||
| ) | ||
|
|
||
| result = ProduceBatchResult(rollout_states=[]) |
There was a problem hiding this comment.
Claude: Warning (CLAUDE.md): Multiple functions and __init__ accept a logger parameter with no type annotation. The project standard requires type hints for all function signatures.
This applies to build_task_runners, build_task_runner, BaseAgentLoopManager.__init__, _produce_single_task_batch, and _produce_single_task_window_to_replay_buffer.
Suggested type: logger: logging.Logger | None = None
| import json | ||
| import math | ||
| from pathlib import Path | ||
| from typing import Union |
There was a problem hiding this comment.
Claude: Warning: from typing import Union is imported but Python >= 3.10 uses native str | Path syntax. This import is unnecessary and the usages should be modernized per CLAUDE.md.
| import ray | ||
| from mmengine.dist import get_rank | ||
| from mmengine.runner import set_random_seed | ||
| from pydantic import BaseModel, ConfigDict |
There was a problem hiding this comment.
Claude: Warning: typing_extensions.Literal is imported but Literal has been available in typing since Python 3.8. Since the project targets Python >= 3.10, use from typing import Literal instead.
| from pydantic import BaseModel, ConfigDict | |
| from typing import Literal |
| @@ -0,0 +1,560 @@ | |||
| # modified from https://github.com/volcengine/verl/blob/main/verl/trainer/ppo/mismatch_helper.py | |||
There was a problem hiding this comment.
为啥需要再加一个rollout_is.py,rl/trainer下已经有了rollout_is.py
| # - disaggregated 需要 trainer 决定 window 节奏,所以 partial_rollout 必须放在 trainer 层 | ||
| # | ||
| # 因此两边都叫“partial rollout”,但它们不是同一个旋钮,不能强行合并成同一层配置。 | ||
| produce_batch_enable_partial_rollout: bool = False |
…ated logic to rl_trainer (#8)
No description provided.