diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..c76e895 --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +TELEGRAM_APITOKEN= +MODELPATH= \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d5a3a5d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +whisper.cpp/ +whisper.cpp/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..21f6212 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# syntax=docker/dockerfile:1 + +FROM golang:1.19 AS build-stage + +WORKDIR /app + +COPY go.mod go.sum ./ +RUN go mod download +COPY *.go ./ + +RUN git clone https://github.com/ggerganov/whisper.cpp.git && \ + cd whisper.cpp/bindings/go && \ + make whisper && \ + cd /app/ && \ + C_INCLUDE_PATH=/app/whisper.cpp/ LIBRARY_PATH=/app/whisper.cpp/ go build + +FROM debian:bookworm-slim AS build-release-stage +# TODO: Can't use distroless because of ffmpeg binary requirement, could use static build +# Still, debian-slim is still slimmer than golang:1.19 image + +COPY --from=build-stage /app/ /app/ + +RUN rm -rf /app/whisper.cpp +RUN mkdir /app/tmp/ + +RUN apt-get update && apt-get install ffmpeg -y + +WORKDIR /app +VOLUME /app/models + +CMD ["./cbot-telegram"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0e11e3d --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# cbot-telegram-whisper +Simple bot that transcribes Telegram voice messages using OpenAI's +using CPU inference thanks to [whisper.cpp](https://github.com/ggerganov/whisper.cpp) + +## Built using: +* [go-telegram-bot-api](https://pkg.go.dev/github.com/go-telegram-bot-api/telegram-bot-api/v5) +* [whisper.cpp Go bindings](https://github.com/ggerganov/whisper.cpp/tree/master/bindings/go/) +* [u2takey's ffmpeg-go](https://github.com/u2takey/ffmpeg-go) +* [grab](https://github.com/cavaliergopher/grab) + +## How to build +#### You can check out the example [Dockerfile](Dockerfile), but TL;DR: + +```bash +git clone git@github.com:chinese-soup/cbot-telegram-whisper.git && \ +cd cbot-telegram-whisper && \ +git clone https://github.com/ggerganov/whisper.cpp.git && \ +cd whisper.cpp/bindings/go && \ +make whisper && \ +cd ../../.. && \ +go get +C_INCLUDE_PATH=/app/whisper.cpp/ LIBRARY_PATH=/app/whisper.cpp/ go build -o whisperbot +``` + +#### Get a model, e.g.: +```bash +bash whisper.cpp/models/download-ggml-model.sh tiny.en +``` +Check out [whisper.cpp's](https://github.com/ggerganov/whisper.cpp) README for more info. + + +#### Set the environment variables mentioned in `.env.example`, e.g.: + +```bash +export TELEGRAM_APITOKEN= +export MODELPATH=whisper.cpp/models/ggml-tiny.en.bin +``` + +#### And you're all set, run it: +```bash +./whisperbot +``` \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..5f80aa0 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,11 @@ +version: '2' +services: + telegram-cbot: + image: telegram-cbot:latest + container_name: telegram-cbot + restart: always + volumes: + - ./models:/app/models + environment: + TELEGRAM_APITOKEN: "${TELEGRAM_APITOKEN}" + MODELPATH: "${MODELPATH}" \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..e149a33 --- /dev/null +++ b/go.mod @@ -0,0 +1,17 @@ +module cbot-telegram + +go 1.19 + +require github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 + +require ( + github.com/aws/aws-sdk-go v1.38.20 // indirect + github.com/cavaliergopher/grab/v3 v3.0.1 // indirect + github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230415143044-c23588cc4b2c // indirect + github.com/go-audio/audio v1.0.0 // indirect + github.com/go-audio/riff v1.0.0 // indirect + github.com/go-audio/wav v1.1.0 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/u2takey/ffmpeg-go v0.4.1 // indirect + github.com/u2takey/go-utils v0.3.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..bcd62b9 --- /dev/null +++ b/go.sum @@ -0,0 +1,113 @@ +github.com/aws/aws-sdk-go v1.38.20 h1:QbzNx/tdfATbdKfubBpkt84OM6oBkxQZRw6+bW2GyeA= +github.com/aws/aws-sdk-go v1.38.20/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= +github.com/cavaliergopher/grab/v3 v3.0.1 h1:4z7TkBfmPjmLAAmkkAZNX/6QJ1nNFdv3SdIHXju0Fr4= +github.com/cavaliergopher/grab/v3 v3.0.1/go.mod h1:1U/KNnD+Ft6JJiYoYBAimKH2XrYptb8Kl3DFGmsjpq4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c= +github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= +github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230415143044-c23588cc4b2c h1:siCIF0jYFop8RfLODwz+BYpS8wckEMFlUrc7eIu7Td8= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230415143044-c23588cc4b2c/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= +github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= +github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= +github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= +github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498= +github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g= +github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE= +github.com/go-logr/logr v0.1.0 h1:M1Tv3VzNlEHg6uyACnRdtrploV2P7wZqH8BoQMtz0cg= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 h1:wG8n/XJQ07TmjbITcGiUaOtXxdrINDz1b0J1w0SzqDc= +github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1/go.mod h1:A2S0CWkNylc2phvKXWBBdD3K0iGnDBGbzRpISP2zBl8= +github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= +github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/kisielk/errcheck v1.2.0 h1:reN85Pxc5larApoH1keMBiu2GWtPqXQ1nc9gx+jOU+E= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/gotool v1.0.0 h1:AV2c/EiW3KqPNT9ZKl07ehoAGi4C5/01Cfbblndcapg= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/panjf2000/ants/v2 v2.4.2 h1:kesjjo8JipN3vNNg1XaiXaeSs6xJweBTgenkBtsrHf8= +github.com/panjf2000/ants/v2 v2.4.2/go.mod h1:f6F0NZVFsGCp5A7QW/Zj/m92atWwOkY0OIhFxRNFr4A= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/u2takey/ffmpeg-go v0.4.1 h1:l5ClIwL3N2LaH1zF3xivb3kP2HW95eyG5xhHE1JdZ9Y= +github.com/u2takey/ffmpeg-go v0.4.1/go.mod h1:ruZWkvC1FEiUNjmROowOAps3ZcWxEiOpFoHCvk97kGc= +github.com/u2takey/go-utils v0.3.1 h1:TaQTgmEZZeDHQFYfd+AdUT1cT4QJgJn/XVPELhHw4ys= +github.com/u2takey/go-utils v0.3.1/go.mod h1:6e+v5vEZ/6gu12w/DC2ixZdZtCrNokVxD0JUklcqdCs= +github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k= +github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ= +github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU= +github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50= +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634= +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= +gocv.io/x/gocv v0.25.0 h1:vM50jL3v9OEqWSi+urelX5M1ptZeFWA/VhGPvdTqsJU= +gocv.io/x/gocv v0.25.0/go.mod h1:Rar2PS6DV+T4FL+PM535EImD/h13hGVaHhnCu1xarBs= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3NCfkPxbDKRdnNE1Rpg0U= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b h1:uwuIcX0g4Yl1NC5XAz37xsr2lTtcqevgzYNVt49waME= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563 h1:NIou6eNFigscvKJmsbyez16S2cIS6idossORlFtSt2E= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= +sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/helpers.go b/helpers.go new file mode 100644 index 0000000..e7c2159 --- /dev/null +++ b/helpers.go @@ -0,0 +1,79 @@ +package main + +import ( + "fmt" + "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + "github.com/go-audio/wav" + "log" + "os" +) + +func ProcessSamples(model whisper.Model, samples []float32) (finalText string) { + // TODO: Fix err handling + + // Process samples + context, err := model.NewContext() + if err != nil { + panic(err) + } + + if err := context.SetLanguage("auto"); err != nil { + log.Printf("failed to set language to auto-detect") + } + + if err := context.Process(samples, nil); err != nil { + panic(err) + } + + // Print out the results + fmt.Printf("Recognized: ") + for { + segment, err := context.NextSegment() + if err != nil { + break + } + fmt.Printf(" %s ", segment.Text) + finalText += fmt.Sprintf(" %v", segment.Text) + } + + return finalText + +} + +func GetSamplesFromFilePath(path string) (samples []float32, err error) { + fmt.Printf("Loading %q\n", path) + fh, err := os.Open(path) + if err != nil { + log.Print(err) + } + defer fh.Close() + dec := wav.NewDecoder(fh) + if buf, err := dec.FullPCMBuffer(); err != nil { + log.Print(err) + } else if dec.SampleRate != whisper.SampleRate { + log.Printf("unsupported sample rate: %d", dec.SampleRate) + } else if dec.NumChans != 1 { + log.Printf("unsupported number of channels: %d", dec.NumChans) + } else { + samples = buf.AsFloat32Buffer().Data + } + fmt.Printf("Loaded %q, no. of samples = %d\n", path, len(samples)) + return samples, err +} + +func GetModel() whisper.Model { + var modelPath = "./whisper.cpp/bindings/go/models/ggml-small.en.bin" + + customPath, ok := os.LookupEnv("MODELPATH") + if ok { + log.Printf("MODELPATH set to %v\n", customPath) + modelPath = customPath // If we found the env variable, set it, otherwise we will leave the default + } + + // Load the model + model, err := whisper.New(modelPath) + if err != nil { + panic(err) + } + return model +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..0776d30 --- /dev/null +++ b/main.go @@ -0,0 +1,100 @@ +package main + +import ( + "fmt" + "github.com/cavaliergopher/grab/v3" + tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" + ffmpeg "github.com/u2takey/ffmpeg-go" + "log" + "os" +) + +func main() { + apiToken, ok := os.LookupEnv("TELEGRAM_APITOKEN") + if !ok { + panic("No TELEGRAM_APITOKEN environment variable found.") + } + + bot, err := tgbotapi.NewBotAPI(apiToken) + if err != nil { + panic(err) + } + + whisperModel := GetModel() + + bot.Debug = true + // Create a new UpdateConfig struct with an offset of 0. Offsets are used + // to make sure Telegram knows we've handled previous values and we don't + // need them repeated. + updateConfig := tgbotapi.NewUpdate(0) + + // Tell Telegram we should wait up to 30 seconds on each request for an + // update. This way we can get information just as quickly as making many + // frequent requests without having to send nearly as many. + updateConfig.Timeout = 30 + + // Start polling Telegram for updates. + updates := bot.GetUpdatesChan(updateConfig) + + // Let's go through each update that we're getting from Telegram. + for update := range updates { + // Telegram can send many types of updates depending on what your Bot + // is up to. We only want to look at messages for now, so we can + // discard any other updates. + if update.Message == nil { + continue + } + + if update.Message.Voice == nil { + continue + } + + file, err := bot.GetFileDirectURL(update.Message.Voice.FileID) + if err != nil { + log.Fatal(err) + } + + //TODO: Don't save to a temporary file, replace with a pipe to ffmpeg + resp, err := grab.Get("tmp/", file) + if err != nil { + log.Fatal(err) + } + fmt.Println("Download saved to", resp.Filename) + + voiceMsgFilename := resp.Filename + err = ffmpeg.Input(voiceMsgFilename). + Output("tmp/tmp.wav", + ffmpeg.KwArgs{"acodec": "pcm_s16le", "ac": "1", "ar": "16000"}). + OverWriteOutput().ErrorToStdOut().Run() + + if err != nil { + log.Fatal(err) + } + + samples, err := GetSamplesFromFilePath("tmp/tmp.wav") + if err != nil { + log.Printf("Unfortunately this happened: %v", err) + continue + } + recognizedText := ProcessSamples(whisperModel, samples) + + // Now that we know we've gotten a new message, we can construct a + // reply! We'll take the Chat ID and Text from the incoming message + // and use it to create a new message. + if recognizedText != "" { + msg := tgbotapi.NewMessage(update.Message.Chat.ID, recognizedText) + // We'll also say that this message is a reply to the previous message. + // For any other specifications than Chat ID or Text, you'll need to + // set fields on the `MessageConfig`. + msg.ReplyToMessageID = update.Message.MessageID + + // Okay, we're sending our message off! We don't care about the message + // we just sent, so we'll discard it. + if _, err := bot.Send(msg); err != nil { + log.Printf("Error sending message: %v", err) //TODO: Handle and retry? + } + } else { + log.Println("Nothing recognized, nothing to send.") + } + } +}