diff --git a/examples/llm_engine_example.py b/examples/llm_engine_example.py index 932a1151553f..f95080ac610a 100644 --- a/examples/llm_engine_example.py +++ b/examples/llm_engine_example.py @@ -30,7 +30,7 @@ def main(args: argparse.Namespace): request_outputs = engine.step() for request_output in request_outputs: - if request_output.finished(): + if request_output.finished: print(request_output) if not (engine.has_unfinished_requests() or test_prompts): diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index e74f6b418fb2..1b0016e5831c 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -154,7 +154,7 @@ async def generate( yield request_output # Once finished, release the resources of the sequence group. - if request_output.finished(): + if request_output.finished: if self.log_requests: logger.info(f"Finished request {request_id}.") diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 19510d9050a7..d9079cc1a7b9 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -133,7 +133,7 @@ def _run_engine(self, use_tqdm: bool) -> List[RequestOutput]: while self.llm_engine.has_unfinished_requests(): step_outputs = self.llm_engine.step() for output in step_outputs: - if output.finished(): + if output.finished: outputs.append(output) if use_tqdm: pbar.update(1) diff --git a/vllm/outputs.py b/vllm/outputs.py index fea521d12924..ebb5c19df0ad 100644 --- a/vllm/outputs.py +++ b/vllm/outputs.py @@ -60,11 +60,13 @@ def __init__( prompt: str, prompt_token_ids: List[int], outputs: List[CompletionOutput], + finished: bool, ) -> None: self.request_id = request_id self.prompt = prompt self.prompt_token_ids = prompt_token_ids self.outputs = outputs + self.finished = finished @classmethod def from_seq_group(cls, seq_group: SequenceGroup) -> "RequestOutput": @@ -95,13 +97,13 @@ def from_seq_group(cls, seq_group: SequenceGroup) -> "RequestOutput": # Every sequence in the sequence group should have the same prompt. prompt = top_n_seqs[0].prompt prompt_token_ids = top_n_seqs[0].data.prompt_token_ids - return cls(seq_group.request_id, prompt, prompt_token_ids, outputs) + finished = seq_group.is_finished() + return cls(seq_group.request_id, prompt, prompt_token_ids, outputs, + finished) def __repr__(self) -> str: return (f"RequestOutput(request_id={self.request_id}, " f"prompt={self.prompt!r}, " f"prompt_token_ids={self.prompt_token_ids}, " - f"outputs={self.outputs})") - - def finished(self) -> bool: - return all(output.finished() for output in self.outputs) + f"outputs={self.outputs}, " + f"finished={self.finished})")