Skip to content

Commit

Permalink
feat: give gobot a voice (#6)
Browse files Browse the repository at this point in the history
feat: give gobot a voice

We use playht API and stream the audio on the default audio device using github.com/gopxl/beep.
We introduce a new package: tts; we start a TTS worker which streams
the audio data to the writer end of io.Pipe which is then played on the default
audio device. The audio playback must run in the main func otherwise
io.Pipe break for some reason that needs to be investigated: spinning
an audio playback goroutine breaks the io.Pipe

Signed-off-by: Milos Gajdos <[email protected]>

---------

Signed-off-by: Milos Gajdos <[email protected]>
  • Loading branch information
milosgajdos authored Apr 23, 2024
1 parent 44410f2 commit c8a9c4d
Show file tree
Hide file tree
Showing 9 changed files with 315 additions and 18 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ jobs:
- '1.22'

steps:
- name: Update apt
run: sudo apt update
- name: Install alsa
run: sudo apt install -y --no-install-recommends libasound2-dev pkg-config

- name: Set up Go ${{ matrix.go }}
uses: actions/setup-go@v5
Expand Down Expand Up @@ -72,6 +76,10 @@ jobs:
- '1.22'

steps:
- name: Update apt
run: sudo apt update
- name: Install alsa
run: sudo apt install -y --no-install-recommends libasound2-dev pkg-config

- name: Check out source code
uses: actions/checkout@v4
Expand All @@ -93,6 +101,11 @@ jobs:
CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }}

steps:
- name: Update apt
run: sudo apt update
- name: Install alsa
run: sudo apt install -y --no-install-recommends libasound2-dev pkg-config

- name: check out code
uses: actions/checkout@v4

Expand Down
11 changes: 11 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,30 @@ go 1.22.0
toolchain go1.22.2

require (
github.com/gopxl/beep v1.4.1
github.com/milosgajdos/go-playht v0.1.0
github.com/nats-io/nats.go v1.34.1
github.com/tmc/langchaingo v0.1.8
golang.org/x/sync v0.7.0
)

require (
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/ebitengine/oto/v3 v3.1.0 // indirect
github.com/ebitengine/purego v0.7.1 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hajimehoshi/go-mp3 v0.3.4 // indirect
github.com/klauspost/compress v1.17.2 // indirect
github.com/nats-io/nkeys v0.4.7 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
golang.org/x/crypto v0.18.0 // indirect
golang.org/x/net v0.20.0 // indirect
golang.org/x/sys v0.16.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect
google.golang.org/grpc v1.62.1 // indirect
google.golang.org/protobuf v1.33.0 // indirect
)
31 changes: 31 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,35 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/ebitengine/oto/v3 v3.1.0 h1:9tChG6rizyeR2w3vsygTTTVVJ9QMMyu00m2yBOCch6U=
github.com/ebitengine/oto/v3 v3.1.0/go.mod h1:IK1QTnlfZK2GIB6ziyECm433hAdTaPpOsGMLhEyEGTg=
github.com/ebitengine/purego v0.7.1 h1:6/55d26lG3o9VCZX8lping+bZcmShseiqlh2bnUDiPA=
github.com/ebitengine/purego v0.7.1/go.mod h1:ah1In8AOtksoNK6yk5z1HTJeUkC1Ez4Wk2idgGslMwQ=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gopxl/beep v1.4.1 h1:WqNs9RsDAhG9M3khMyc1FaVY50dTdxG/6S6a3qsUHqE=
github.com/gopxl/beep v1.4.1/go.mod h1:A1dmiUkuY8kxsvcNJNUBIEcchmiP6eUyCHSxpXl0YO0=
github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/milosgajdos/go-playht v0.1.0 h1:N0S8LvsGs/xE5cqDrzHE9pB1CQd4Hdp0KGyt2HRVy9M=
github.com/milosgajdos/go-playht v0.1.0/go.mod h1:KXtsK+GZNQC7Yh8yRsyx84qTckbeGxB5Q3Xfd7aTPSI=
github.com/nats-io/nats.go v1.34.1 h1:syWey5xaNHZgicYBemv0nohUPPmaLteiBEUT6Q5+F/4=
github.com/nats-io/nats.go v1.34.1/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAcUsw=
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand All @@ -22,11 +41,23 @@ github.com/tmc/langchaingo v0.1.8 h1:nrImgh0aWdu3stJTHz80N60WGwPWY8HXCK10gQny7bA
github.com/tmc/langchaingo v0.1.8/go.mod h1:iNBfS9e6jxBKsJSPWnlqNhoVWgdA3D1g5cdFJjbIZNQ=
golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s=
google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk=
google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 changes: 9 additions & 4 deletions gobot/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@ const (
defaultSeedPrompt = `You are a Go programming language expert and a helpful ` +
`AI assistant trying to learn about Rust programming language. You will ` +
`answer questions ONLY about Go and ONLY ask questions about Rust. You do ` +
`NOT explain how Rust works, you ONLY compare Rust to Go. When you receive ` +
`a response you will evaluate it from an experienced Go programmer point of ` +
`view and ask followup questions about Rust. NEVER use emojis in your answers. ` +
`Your response must NOT be longer than 100 words!
`NOT explain how Rust works. You are NOT Rust expert. You ONLY compare ` +
`Rust to Go. When you receive a response you will evaluate it from an ` +
`experienced Go programmer point of view and ask followup questions about ` +
`Rust. You must NEVER use emojis in your answers. Your answers must NOT ` +
`be longer than 100 words!
Question: What is the biggest strength of Go?
Assistant: One of the biggest strengths of Go is its concise syntax and simple grammar,` +
`which makes it easy to write code quickly. Can you tell me what are some of the biggest` +
`strengths of Rust that make it stand out from other programming languages?`
)

const (
defaultVoiceID = "s3://mockingbird-prod/abigail_vo_6661b91f-4012-44e3-ad12-589fbdee9948/voices/speaker/manifest.json"
)
17 changes: 11 additions & 6 deletions gobot/jet/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type Writer struct {
subject string
}

func (w Writer) Write(ctx context.Context, chunks chan []byte) error {
func (w Writer) Write(ctx context.Context, chunks chan []byte, doneTTS chan struct{}) error {
log.Println("launching JetStream Reader")
defer log.Println("done writing to JetStream")
msg := []byte{}
Expand All @@ -24,12 +24,17 @@ func (w Writer) Write(ctx context.Context, chunks chan []byte) error {
case chunk := <-chunks:
if len(chunk) == 0 {
fmt.Printf("\n[A]: %s\n", string(msg))
_, err := w.stream.Publish(ctx, w.subject, msg)
if err != nil {
return err
select {
case <-ctx.Done():
return ctx.Err()
case <-doneTTS:
_, err := w.stream.Publish(ctx, w.subject, msg)
if err != nil {
return err
}
// reset the msg slice instead of reallocating
msg = msg[:0]
}
// reset the msg slice instead of reallocating
msg = msg[:0]
break
}
msg = append(msg, chunk...)
Expand Down
20 changes: 18 additions & 2 deletions gobot/llm/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/ollama"
"golang.org/x/sync/errgroup"
)

type Config struct {
Expand All @@ -33,7 +34,15 @@ func New(c Config) (*LLM, error) {
}, nil
}

func (l *LLM) Stream(ctx context.Context, prompts chan string, chunks chan []byte) error {
func sendChunk(ctx context.Context, chunks chan []byte, chunk []byte) error {
select {
case <-ctx.Done():
case chunks <- chunk:
}
return nil
}

func (l *LLM) Stream(ctx context.Context, prompts chan string, jetChunks, ttsChunks chan []byte) error {
log.Println("launching LLM stream")
defer log.Println("done streaming LLM")
chat := NewHistory(int(l.histSize))
Expand All @@ -52,7 +61,14 @@ func (l *LLM) Stream(ctx context.Context, prompts chan string, chunks chan []byt
select {
case <-ctx.Done():
return ctx.Err()
case chunks <- chunk:
default:
// NOTE: we could just fire and forget here, but let's be clean
g, ctx := errgroup.WithContext(ctx)
g.Go(func() error { return sendChunk(ctx, jetChunks, chunk) })
g.Go(func() error { return sendChunk(ctx, ttsChunks, chunk) })
if err := g.Wait(); err != nil {
return err
}
return nil
}
}))
Expand Down
57 changes: 51 additions & 6 deletions gobot/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@ import (
"context"
"flag"
"fmt"
"io"
"log"
"os"
"os/signal"
"time"

"github.com/milosgajdos/bot-banter/gobot/jet"
"github.com/milosgajdos/bot-banter/gobot/llm"
"github.com/milosgajdos/bot-banter/gobot/tts"

"github.com/gopxl/beep"
"github.com/gopxl/beep/mp3"
"github.com/gopxl/beep/speaker"
"github.com/nats-io/nats.go"
"golang.org/x/sync/errgroup"
)
Expand All @@ -24,6 +30,7 @@ var (
botName string
pubSubject string
subSubject string
voiceID string
)

func init() {
Expand All @@ -34,6 +41,7 @@ func init() {
flag.StringVar(&botName, "bot-name", defaultBotName, "bot name")
flag.StringVar(&pubSubject, "pub-subject", defaultPubSubject, "bot publish subject")
flag.StringVar(&subSubject, "sub-subject", defaultSubSubject, "bot subscribe subject")
flag.StringVar(&voiceID, "voice-id", defaultVoiceID, "play HT voice ID")
}

func main() {
Expand All @@ -59,7 +67,7 @@ func main() {
}

// NOTE: we could also provide functional options
// instead of passing it Config.
// instead of creating stream from Config.
jetConf := jet.Config{
StreamURL: url,
StreamName: streamName,
Expand All @@ -73,32 +81,53 @@ func main() {
}

// NOTE: we could also provide functional options
// instead of passing it Config.
// instead of creating llm from Config.
llmConf := llm.Config{
ModelName: modelName,
HistSize: histSize,
SeedPrompt: seedPrompt,
}
l, err := llm.New(llmConf)
if err != nil {
log.Fatal("failed creating an LLM client: ", err)
log.Fatal("failed creating LLM client: ", err)
}

chunks := make(chan []byte)
// NOTE: we could also provide functional options
// instead of creating tts from Config.
ttsConf := tts.DefaultConfig()
ttsConf.VoiceID = voiceID
t, err := tts.New(*ttsConf)
if err != nil {
log.Fatal("failed creating TTS client: ", err)
}

pipeReader, pipeWriter := io.Pipe()

log.Println("created pipe reader")

// chunks for TTS stream
ttsChunks := make(chan []byte, 100)
// chunk for JetStream
jetChunks := make(chan []byte, 100)
prompts := make(chan string)
// ttsDone for signalling we're done talking
ttsDone := make(chan struct{})

g, ctx := errgroup.WithContext(ctx)

log.Println("launching workers")

g.Go(func() error {
return l.Stream(ctx, prompts, chunks)
return t.Stream(ctx, pipeWriter, ttsChunks, ttsDone)
})
g.Go(func() error {
return l.Stream(ctx, prompts, jetChunks, ttsChunks)
})
g.Go(func() error {
return s.Reader.Read(ctx, prompts)
})
g.Go(func() error {
return s.Writer.Write(ctx, chunks)
return s.Writer.Write(ctx, jetChunks, ttsDone)
})

var prompt string
Expand All @@ -121,6 +150,22 @@ func main() {
case <-ctx.Done():
}

// NOTE: this must run on the main thread otherwise bad things happen:
// beep uses portaudio which requires to be running on the main thread
streamer, format, err := mp3.Decode(pipeReader)
if err != nil {
log.Printf("failed to initialize MP3 decoder: %v\n", err)
}
defer streamer.Close()

if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
log.Printf("Failed to initialize speaker: :%v\n", err)
}

speaker.Play(beep.Seq(streamer, beep.Callback(func() {
<-ctx.Done()
})))

if err := g.Wait(); err != nil {
if err != context.Canceled {
log.Fatalf("encountered error: %v", err)
Expand Down
Loading

0 comments on commit c8a9c4d

Please sign in to comment.