fix: use bytes in gRPC proto instead of strings (mudler#813)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
EvilFreelancer · Jul 27, 2023 · b96e30e · b96e30e
1 parent 0af0df7
commit b96e30e
Show file tree

Hide file tree

Showing 8 changed files with 20 additions and 16 deletions.
diff --git a/api/backend/llm.go b/api/backend/llm.go
@@ -67,17 +67,17 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
 		opts.Prompt = s
 		if tokenCallback != nil {
 			ss := ""
-			err := inferenceModel.PredictStream(ctx, opts, func(s string) {
-				tokenCallback(s)
-				ss += s
+			err := inferenceModel.PredictStream(ctx, opts, func(s []byte) {
+				tokenCallback(string(s))
+				ss += string(s)
 			})
 			return ss, err
 		} else {
 			reply, err := inferenceModel.Predict(ctx, opts)
 			if err != nil {
 				return "", err
 			}
-			return reply.Message, err
+			return string(reply.Message), err
 		}
 	}
 

diff --git a/extra/grpc/huggingface/backend_pb2.py b/extra/grpc/huggingface/backend_pb2.py
diff --git a/extra/grpc/huggingface/huggingface.py b/extra/grpc/huggingface/huggingface.py
@@ -15,7 +15,7 @@
 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
     def Health(self, request, context):
-        return backend_pb2.Reply(message="OK")
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
     def LoadModel(self, request, context):
         model_name = request.Model
         model_name = os.path.basename(model_name)

diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go
@@ -42,7 +42,7 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
 		return false
 	}
 
-	if res.Message == "OK" {
+	if string(res.Message) == "OK" {
 		return true
 	}
 	return false
@@ -80,7 +80,7 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
 	return client.LoadModel(ctx, in, opts...)
 }
 
-func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s string), opts ...grpc.CallOption) error {
+func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error {
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return err

diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go
@@ -14,3 +14,7 @@ type LLM interface {
 	AudioTranscription(*pb.TranscriptRequest) (api.Result, error)
 	TTS(*pb.TTSRequest) error
 }
+
+func newReply(s string) *pb.Reply {
+	return &pb.Reply{Message: []byte(s)}
+}
diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go
diff --git a/pkg/grpc/proto/backend.proto b/pkg/grpc/proto/backend.proto
@@ -65,7 +65,7 @@ message PredictOptions {
 
 // The response message containing the result
 message Reply {
-  string message = 1;
+  bytes message = 1;
 }
 
 message ModelOptions {

diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go
@@ -26,7 +26,7 @@ type server struct {
 }
 
 func (s *server) Health(ctx context.Context, in *pb.HealthMessage) (*pb.Reply, error) {
-	return &pb.Reply{Message: "OK"}, nil
+	return newReply("OK"), nil
 }
 
 func (s *server) Embedding(ctx context.Context, in *pb.PredictOptions) (*pb.EmbeddingResult, error) {
@@ -48,7 +48,7 @@ func (s *server) LoadModel(ctx context.Context, in *pb.ModelOptions) (*pb.Result
 
 func (s *server) Predict(ctx context.Context, in *pb.PredictOptions) (*pb.Reply, error) {
 	result, err := s.llm.Predict(in)
-	return &pb.Reply{Message: result}, err
+	return newReply(result), err
 }
 
 func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest) (*pb.Result, error) {
@@ -99,7 +99,7 @@ func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictS
 	done := make(chan bool)
 	go func() {
 		for result := range resultChan {
-			stream.Send(&pb.Reply{Message: result})
+			stream.Send(newReply(result))
 		}
 		done <- true
 	}()