Skip to content

Commit

Permalink
feat: add encode_as_serialized_proto to SentencePieceProcessor
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshoku committed Mar 24, 2023
1 parent b6dd28b commit 74c31ee
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions ext/sentencepiece/sentencepiece.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class RbSentencePieceProcessor {
rb_define_method(rb_cSentencePieceProcessor, "decode", RUBY_METHOD_FUNC(_sentencepiece_processor_decode), -1);
rb_define_method(rb_cSentencePieceProcessor, "decode_pieces", RUBY_METHOD_FUNC(_sentencepiece_processor_decode_pieces), 1);
rb_define_method(rb_cSentencePieceProcessor, "decode_ids", RUBY_METHOD_FUNC(_sentencepiece_processor_decode_ids), 1);
rb_define_method(rb_cSentencePieceProcessor, "encode_as_serialized_proto", RUBY_METHOD_FUNC(_sentencepiece_processor_encode_as_serialized_proto), 1);
rb_define_method(rb_cSentencePieceProcessor, "piece_size", RUBY_METHOD_FUNC(_sentencepiece_processor_piece_size), 0);
rb_define_method(rb_cSentencePieceProcessor, "piece_to_id", RUBY_METHOD_FUNC(_sentencepiece_processor_piece_to_id), 1);
rb_define_method(rb_cSentencePieceProcessor, "id_to_piece", RUBY_METHOD_FUNC(_sentencepiece_processor_id_to_piece), 1);
Expand Down Expand Up @@ -518,6 +519,20 @@ class RbSentencePieceProcessor {
return output;
};

static VALUE _sentencepiece_processor_encode_as_serialized_proto(VALUE self, VALUE text) {
if (!RB_TYPE_P(text, T_STRING)) {
rb_raise(rb_eArgError, "expected text to be a String");
return Qnil;
}

sentencepiece::SentencePieceProcessor* ptr = get_sentencepiece_processor(self);
const sentencepiece::util::bytes serialized = ptr->EncodeAsSerializedProto(StringValueCStr(text));
VALUE output = rb_str_new_cstr(serialized.c_str());

RB_GC_GUARD(text);
return output;
};

static VALUE _sentencepiece_processor_piece_size(VALUE self) {
sentencepiece::SentencePieceProcessor* ptr = get_sentencepiece_processor(self);
return INT2NUM(ptr->GetPieceSize());
Expand Down

0 comments on commit 74c31ee

Please sign in to comment.