Skip to content

Commit

Permalink
[Feature] reward model inferencer support
Browse files Browse the repository at this point in the history
  • Loading branch information
Yizhen committed Jun 22, 2024
1 parent e5ab2fd commit 3b2cb04
Show file tree
Hide file tree
Showing 14 changed files with 747 additions and 197 deletions.
61 changes: 61 additions & 0 deletions examples/rm_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.
import logging
import os
import sys

from transformers import (
HfArgumentParser
)

from lmflow.datasets import Dataset
from lmflow.models.auto_model import AutoModel
from lmflow.pipeline.auto_pipeline import AutoPipeline
from lmflow.args import (
ModelArguments,
DatasetArguments,
AutoArguments,
)


logger = logging.getLogger(__name__)


def main():
# Parses arguments
pipeline_name = "rm_inferencer"
PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name)

parser = HfArgumentParser((
ModelArguments,
DatasetArguments,
PipelineArguments
))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
model_args, data_args, pipeline_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses()

dataset = Dataset(data_args)
model = AutoModel.get_model(model_args, tune_strategy='none', use_accelerator=pipeline_args.use_accelerator)
inferencer = AutoPipeline.get_pipeline(
pipeline_name=pipeline_name,
model_args=model_args,
data_args=data_args,
pipeline_args=pipeline_args
)

res = inferencer.inference(
model,
dataset,
)

if pipeline_args.save_results:
res.save(pipeline_args.results_path)


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions examples/vllm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
)

from lmflow.datasets import Dataset
from lmflow.models.hf_decoder_model import HFDecoderModel
from lmflow.models.auto_model import AutoModel
from lmflow.pipeline.auto_pipeline import AutoPipeline
from lmflow.args import (
ModelArguments,
Expand Down Expand Up @@ -40,7 +40,7 @@ def main():
model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses()

dataset = Dataset(data_args)
model = HFDecoderModel(model_args)
model = AutoModel.get_model(model_args, tune_strategy='none')
inferencer = AutoPipeline.get_pipeline(
pipeline_name=pipeline_name,
model_args=model_args,
Expand Down
67 changes: 67 additions & 0 deletions scripts/run_rm_inference.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash
# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.

# Parses arguments
run_name=rm_inference
# model_name_or_path=sfairXC/FsfairX-LLaMA3-RM-v0.1
model_name_or_path=/vol/yizhenjia/projs/RLHFlow-fox/models/rm/sfairXC-FsfairX-LLaMA3-RM-v0.1
dataset_path=data/alpaca/test
output_dir=data/rm_inference_results
output_file_name=results.json

# Safety related arguments
trust_remote_code=0

while [[ $# -ge 1 ]]; do
key="$1"
case ${key} in
-r|--run_name)
run_name="$2"
shift
;;
-m|--model_name_or_path)
model_name_or_path="$2"
shift
;;
-d|--dataset_path)
dataset_path="$2"
shift
;;
--output_dir)
output_dir="$2"
shift
;;
--output_file_name)
output_file_name="$2"
shift
;;
--trust_remote_code)
trust_remote_code="$2"
shift
;;
*)
echo "error: unknown option \"${key}\"" 1>&2
exit 1
esac
shift
done

# inference
project_dir=$(cd "$(dirname $0)"/..; pwd)
log_dir=${project_dir}/log/${run_name}
output_file_path=${output_dir}/${run_name}/${output_file_name}
mkdir -p ${output_dir}/${run_name} ${log_dir}

accelerate launch --config_file configs/accelerator_multigpu_config.yaml \
examples/rm_inference.py \
--trust_remote_code ${trust_remote_code} \
--model_name_or_path ${model_name_or_path} \
--arch_type text_regression \
--use_accelerator True \
--block_size 4096 \
--inference_batch_size 16 \
--dataset_path ${dataset_path} \
--preprocessing_num_workers 16 \
--save_results True \
--results_path ${output_file_path} \
2>&1 | tee ${log_dir}/rm_inference.log
15 changes: 8 additions & 7 deletions src/lmflow/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ class FinetunerArguments(TrainingArguments):


@dataclass
class RewardModelingArguments(FinetunerArguments):
class RewardModelTunerArguments(FinetunerArguments):
"""
Arguments for reward modeling.
"""
Expand Down Expand Up @@ -825,18 +825,15 @@ class InferencerArguments:
local_rank : str
For distributed training: local_rank
random_seed : int, default = 1
inference_batch_size : int, default = 1
deepspeed :
Enable deepspeed and pass the path to deepspeed json config file (e.g. ds_config.json) or an already
loaded json file as a dict
mixed_precision : str, choice from ["bf16","fp16"].
mixed precision mode, whether to use bf16 or fp16
temperature : float
An argument of model.generate in huggingface to control the diversity of generation.
repetition_penalty : float
An argument of model.generate in huggingface to penalize repetitions.
use_beam_search : Optional[bool]
Expand Down Expand Up @@ -882,7 +879,10 @@ class InferencerArguments:
metadata={"help": "For distributed training: local_rank"
},
)

inference_batch_size: int = field(
default=1,
metadata={"help": "batch size for inference"},
)
temperature: float = field(
default=0.0,
metadata={"help": "Temperature during inference."},
Expand Down Expand Up @@ -1251,9 +1251,10 @@ class IterativeAlignerArguments(InferencerArguments):
"evaluator": EvaluatorArguments,
"inferencer": InferencerArguments,
"vllm_inferencer": InferencerArguments,
"rm_inferencer": InferencerArguments,
"raft_aligner": RaftAlignerArguments,
"dpo_aligner": DPOAlignerArguments,
"rm_tuner": RewardModelingArguments,
"rm_tuner": RewardModelTunerArguments,
}


Expand Down
21 changes: 19 additions & 2 deletions src/lmflow/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# Importing necessary libraries and modules
import copy
import json
from pathlib import Path

from cmath import e
from pathlib import Path
Expand All @@ -24,6 +25,7 @@
from lmflow.utils.constants import (
DATASET_DESCRIPTION_MAP,
TEXT_ONLY_DATASET_DESCRIPTION,
SCORED_TEXT_ONLY_DATASET_DESCRIPTION,
TEXT2TEXT_DATASET_DESCRIPTION,
FLOAT_ONLY_DATASET_DESCRIPTION,
INSTANCE_FIELDS_MAP,
Expand All @@ -42,6 +44,7 @@

KEY_TYPE = "type"
KEY_INSTANCES = "instances"
KEY_SCORES = "score"

class Dataset:
r"""
Expand Down Expand Up @@ -236,7 +239,7 @@ def from_dict(self, dict_obj: dict, *args, **kwargs):
f' {list(fields)}: should be {list(correct_fields)}.\n'
f'The bad instance triggers the error, the {i}-th instance:\n'
f' {instance}'
)
)

try:
hf_dict = {}
Expand Down Expand Up @@ -427,4 +430,18 @@ def get_type(self):
self.type
"""
return self.type
return self.type


def save(self, file_path: str):
r"""
Save the dataset to a json file.
Parameters
------------
file_path : str.
The path to the file where the dataset will be saved.
"""
assert Path(file_path).suffix == ".json", "The file path must have a .json extension."
with open(file_path, "w") as fout:
json.dump(self.to_dict(), fout, indent=2)
6 changes: 3 additions & 3 deletions src/lmflow/models/hf_decoder_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def decode(self, input, *args, **kwargs ) -> Union[str, List[str]]:
else:
# Can be list of ints or a Tensor
return self.tokenizer.decode(input, *args, **kwargs)


def inference(
self,
Expand Down Expand Up @@ -380,7 +380,7 @@ def inference(
return res


def __inference(self, inputs, use_accelerator=False, *args, **kwargs):
def __inference(self, inputs, *args, **kwargs):
"""
Perform generation process of the model.
Expand All @@ -401,7 +401,7 @@ def __inference(self, inputs, use_accelerator=False, *args, **kwargs):
The generated sequence output
"""
with torch.no_grad():
if use_accelerator:
if self.use_accelerator:
outputs = self.backend_model.generate(
input_ids=inputs,
pad_token_id=self.tokenizer.pad_token_id,
Expand Down
Loading

0 comments on commit 3b2cb04

Please sign in to comment.