Skip to content

Commit

Permalink
Search through subtitles (TUM-Dev#955)
Browse files Browse the repository at this point in the history
* search through subtitles

* clean up

* lint ts

* go mod tidy

* implement feedback
  • Loading branch information
joschahenningsen authored Mar 6, 2023
1 parent e96dba7 commit fc1c3c5
Show file tree
Hide file tree
Showing 16 changed files with 331 additions and 7 deletions.
14 changes: 13 additions & 1 deletion api/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ import (

func configGinSearchRouter(router *gin.Engine, daoWrapper dao.DaoWrapper) {
routes := searchRoutes{daoWrapper}
router.GET("/api/search/streams", routes.searchStreams)

searchGroup := router.Group("/api/search")
searchGroup.GET("/streams", routes.searchStreams)

withStream := searchGroup.Group("/stream/:streamID")
withStream.Use(tools.InitStream(daoWrapper))
withStream.GET("/subtitles", routes.searchSubtitles)
}

type searchRoutes struct {
Expand Down Expand Up @@ -65,3 +71,9 @@ func (r searchRoutes) searchStreams(c *gin.Context) {
"results": response,
})
}

func (r searchRoutes) searchSubtitles(c *gin.Context) {
s := c.MustGet("TUMLiveContext").(tools.TUMLiveContext).Stream
q := c.Query("q")
c.JSON(http.StatusOK, tools.SearchSubtitles(q, s.ID))
}
6 changes: 6 additions & 0 deletions cmd/tumlive/tumlive.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ func main() {
log.Fatalf("%v", err)
}
dao.Cache = *cache

// init meili search index settings
go tools.NewMeiliExporter(dao.NewDaoWrapper()).SetIndexSettings()

initCron()
go func() {
err = GinServer()
Expand All @@ -218,6 +222,8 @@ func initCron() {
_ = tools.Cron.AddFunc("triggerDueStreams", api.NotifyWorkers(daoWrapper), "0-59 * * * *")
// update courses available
_ = tools.Cron.AddFunc("prefetchCourses", tum.PrefetchCourses(daoWrapper), "30 3 * * *")
// export data to meili search
_ = tools.Cron.AddFunc("exportToMeili", tools.NewMeiliExporter(daoWrapper).Export, "30 4 * * *")
tools.Cron.Run()
}

Expand Down
40 changes: 40 additions & 0 deletions dao/streams.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type StreamsDao interface {
GetStreamByID(ctx context.Context, id string) (stream model.Stream, err error)
GetWorkersForStream(stream model.Stream) ([]model.Worker, error)
GetAllStreams() ([]model.Stream, error)
ExecAllStreamsWithCoursesAndSubtitles(f func([]StreamWithCourseAndSubtitles))
GetCurrentLive(ctx context.Context) (currentLive []model.Stream, err error)
GetCurrentLiveNonHidden(ctx context.Context) (currentLive []model.Stream, err error)
GetLiveStreamsInLectureHall(lectureHallId uint) ([]model.Stream, error)
Expand Down Expand Up @@ -209,6 +210,45 @@ func (d streamsDao) GetAllStreams() ([]model.Stream, error) {
return res, err
}

type StreamWithCourseAndSubtitles struct {
Name, Description, TeachingTerm, CourseName, Subtitles string
ID, CourseID uint
Year int
}

// ExecAllStreamsWithCoursesAndSubtitles executes f on all streams with their courses and subtitles preloaded.
func (d streamsDao) ExecAllStreamsWithCoursesAndSubtitles(f func([]StreamWithCourseAndSubtitles)) {
var res []StreamWithCourseAndSubtitles
batchNum := 0
batchSize := 100
var numStreams int64
DB.Where("recording").Model(&model.Stream{}).Count(&numStreams)
for batchSize*batchNum < int(numStreams) {
err := DB.Raw(`WITH sws AS (
SELECT streams.id,
streams.name,
streams.description,
c.id as course_id,
c.name as course_name,
c.teaching_term,
c.year,
s.content as subtitles,
IFNULL(s.stream_id, streams.id) as sid
FROM streams
JOIN courses c ON c.id = streams.course_id
LEFT JOIN subtitles s ON streams.id = s.stream_id
WHERE streams.recording AND streams.deleted_at IS NULL
LIMIT ? OFFSET ?
)
SELECT *, GROUP_CONCAT(subtitles, '\n') AS subtitles FROM sws GROUP BY sid;`, batchSize, batchNum*batchSize).Scan(&res).Error
if err != nil {
fmt.Println(err)
}
f(res)
batchNum++
}
}

func (d streamsDao) GetCurrentLive(ctx context.Context) (currentLive []model.Stream, err error) {
if streams, found := Cache.Get("AllCurrentlyLiveStreams"); found {
return streams.([]model.Stream), nil
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,15 @@ require (

require (
github.com/TUM-Dev/CampusProxy/client v0.0.0-20230226120508-3e8bb2411921
github.com/asticode/go-astisub v0.23.0
github.com/matthiasreumann/gomino v0.0.2
github.com/meilisearch/meilisearch-go v0.23.0
)

require (
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/asticode/go-astikit v0.20.0 // indirect
github.com/asticode/go-astits v1.8.0 // indirect
github.com/bytedance/sonic v1.8.3 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/felixge/fgprof v0.9.3 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ github.com/antchfx/xmlquery v1.3.15/go.mod h1:zMDv5tIGjOxY/JCNNinnle7V/EwthZ5IT8
github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY=
github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/asticode/go-astikit v0.20.0 h1:+7N+J4E4lWx2QOkRdOf6DafWJMv6O4RRfgClwQokrH8=
github.com/asticode/go-astikit v0.20.0/go.mod h1:h4ly7idim1tNhaVkdVBeXQZEE3L0xblP7fCWbgwipF0=
github.com/asticode/go-astisub v0.23.0 h1:WzkWty0Phy9rGrG6r0FjShBy9f1Wn7sMLvjdYj5hki4=
github.com/asticode/go-astisub v0.23.0/go.mod h1:WTkuSzFB+Bp7wezuSf2Oxulj5A8zu2zLRVFf6bIFQK8=
github.com/asticode/go-astits v1.8.0 h1:rf6aiiGn/QhlFjNON1n5plqF3Fs025XLUwiQ0NB6oZg=
github.com/asticode/go-astits v1.8.0/go.mod h1:DkOWmBNQpnr9mv24KfZjq4JawCFX1FCqjLVGvO0DygQ=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs=
Expand Down Expand Up @@ -342,6 +348,7 @@ github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsK
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.4.0/go.mod h1:NWz/XGvpEW1FyYQ7fCx4dqYBLlfTcE+A9FLAkNKqjFE=
github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=
github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=
github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
Expand Down Expand Up @@ -504,6 +511,7 @@ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
Expand Down
13 changes: 13 additions & 0 deletions mock_dao/streams.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

134 changes: 134 additions & 0 deletions tools/meiliExporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package tools

import (
"errors"
"fmt"
"github.com/asticode/go-astisub"
"github.com/joschahenningsen/TUM-Live/dao"
"github.com/meilisearch/meilisearch-go"
log "github.com/sirupsen/logrus"
"strings"
)

type MeiliStream struct {
ID uint `json:"ID"`
Name string `json:"name"`
Description string `json:"description"`
CourseName string `json:"courseName"`
Year int `json:"year"`
TeachingTerm string `json:"semester"`
CourseID uint `json:"courseID"`
}

type MeiliSubtitles struct {
ID string `json:"ID"` // meili id: streamID + timestamp
StreamID uint `json:"streamID"`
Timestamp int64 `json:"timestamp"`
TextPrev string `json:"textPrev"` // the previous subtitle line
Text string `json:"text"`
TextNext string `json:"textNext"` // the next subtitle line
}

type MeiliExporter struct {
c *meilisearch.Client
d dao.DaoWrapper
}

func NewMeiliExporter(d dao.DaoWrapper) *MeiliExporter {
c, err := Cfg.GetMeiliClient()
if err != nil && errors.Is(err, ErrMeiliNotConfigured) {
return nil
} else if err != nil {
log.WithError(err).Error("could not get meili client")
return nil
}

return &MeiliExporter{c, d}
}

func (m *MeiliExporter) Export() {
if m == nil {
return
}
index := m.c.Index("STREAMS")
_, err := m.c.Index("SUBTITLES").DeleteAllDocuments()
if err != nil {
log.WithError(err).Warn("could not delete all old subtitles")
}

m.d.StreamsDao.ExecAllStreamsWithCoursesAndSubtitles(func(streams []dao.StreamWithCourseAndSubtitles) {
meilistreams := make([]MeiliStream, len(streams))
streamIDs := make([]uint, len(streams))
for i, stream := range streams {
streamIDs[i] = stream.ID
meilistreams[i] = MeiliStream{
ID: stream.ID,
CourseID: stream.CourseID,
Name: stream.Name,
Description: stream.Description,
CourseName: stream.CourseName,
Year: stream.Year,
TeachingTerm: stream.TeachingTerm,
}
if stream.Subtitles != "" {
meiliSubtitles := make([]MeiliSubtitles, 0)

vtt, err := astisub.ReadFromWebVTT(strings.NewReader(stream.Subtitles))
if err != nil {
log.WithError(err).Warn("could not parse subtitles")
continue
}
for i, _ := range vtt.Items {
sub := MeiliSubtitles{
ID: fmt.Sprintf("%d-%d", stream.ID, vtt.Items[i].StartAt.Milliseconds()),
StreamID: stream.ID,
Timestamp: vtt.Items[i].StartAt.Milliseconds(),
Text: vtt.Items[i].String(),
}
if i > 0 {
sub.TextPrev = meiliSubtitles[i-1].Text
meiliSubtitles[i-1].TextNext = sub.Text
}

meiliSubtitles = append(meiliSubtitles, sub)
}

if len(meiliSubtitles) > 0 {
_, err := m.c.Index("SUBTITLES").AddDocuments(&meiliSubtitles, "ID")
if err != nil {
log.WithError(err).Error("issue adding subtitles to meili")
}
}
}
}
_, err := index.AddDocuments(&meilistreams, "ID")
if err != nil {
log.WithError(err).Error("issue adding documents to meili")
}

})
}

func (m *MeiliExporter) SetIndexSettings() {
if m == nil {
return
}
index := m.c.Index("STREAMS")
synonyms := map[string][]string{
"W": {"Wintersemester", "Winter", "WS", "WiSe"},
"S": {"Sommersemester", "Sommer", "SS", "SoSe", "Summer"},
}
_, err := index.UpdateSynonyms(&synonyms)
if err != nil {
log.WithError(err).Error("could not set synonyms for meili index STREAMS")
}

_, err = m.c.Index("SUBTITLES").UpdateSettings(&meilisearch.Settings{
FilterableAttributes: []string{"streamID", "courseID"},
SearchableAttributes: []string{"text"},
SortableAttributes: []string{"timestamp"},
})
if err != nil {
log.WithError(err).Warn("could not set settings for meili index SUBTITLES")
}
}
23 changes: 23 additions & 0 deletions tools/meiliSearch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package tools

import (
"fmt"
"github.com/meilisearch/meilisearch-go"
log "github.com/sirupsen/logrus"
)

func SearchSubtitles(q string, streamID uint) *meilisearch.SearchResponse {
c, err := Cfg.GetMeiliClient()
if err != nil {
return nil
}
response, err := c.Index("SUBTITLES").Search(q, &meilisearch.SearchRequest{
Filter: fmt.Sprintf("streamID = %d", streamID),
Limit: 10,
})
if err != nil {
log.WithError(err).Error("could not search meili")
return nil
}
return response
}
5 changes: 4 additions & 1 deletion web/template/header.gohtml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<img src="/logo.svg" width="75" height="50" alt="TUM-Live Logo">
</a>
<div class="w-full mx-4 md:flex items-center justify-between">
<div class="hidden md:flex text-sm grow">
<div class="hidden md:flex text-sm">
<a href="/"
class="inline-block align-middle mt-0 mr-4 text-5 hover:text-1 font-medium">
Start
Expand Down Expand Up @@ -53,6 +53,9 @@
{{end}}
{{end}}
</div>

{{template "search"}}

<div class="md:flex">
<div class="hidden md:flex">
{{template "theme-selector"}}
Expand Down
2 changes: 1 addition & 1 deletion web/template/partial/stream/bookmarks.gohtml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
hover:bg-gray-100 dark:hover:bg-gray-600"
:class="bookmarkUpdater.show && 'rounded-r-none'"
:disabled="bookmarkUpdater.show"
@click="watch.jumpTo(b.hours, b.minutes, b.seconds);">
@click="watch.jumpTo({ timeParts:{hours: b.hours, minutes: b.minutes, seconds: b.seconds} });">
<template x-if="!bookmarkUpdater.show">
<div class="flex items-center justify-between flex-grow">
<div>
Expand Down
32 changes: 32 additions & 0 deletions web/template/partial/stream/search.gohtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{{define "search"}}
<div class="grow px-6 text-right hidden md:block"
x-data="{showSearch: false, searcher:undefined}"
x-show="showSearch"
@keyup.escape.window="searcher.closeRes(); $refs.searchInput.blur();"
@togglesearch.window="e => {showSearch=true; searcher=watch.subtitleSearch(e.detail.streamID)}" x-cloak>
<template x-if="searcher!=undefined">
<input type="search" x-ref="searchInput"
@input="searcher.search($event.target.value)"
@focus="searcher.hits.length > 0 && searcher.openRes()"
class="max-w-xl p-2 bg-transparent rounded-lg border-gray-600 border w-full px-2 font-light text-2"
placeholder="Search in lecture">
</template>
<template x-if="searcher!=undefined">
<div x-show="searcher.open" @click.outside="searcher.closeRes()" class="absolute top-24 right-5 z-50 px-4 overflow-x-hidden h-96 bg-gray-100 shadow dark:bg-gray-900/50 rounded-lg text-left text-gray-800 dark:text-gray-200">
<div class="w-2xl p-3 overflow-y-auto">
<template x-for="res in searcher.hits">
<div @click="watch.jumpTo({Ms: res.timestamp});" class="dark:hover:bg-gray-700 hover:outline dark:bg-gray-800 bg-white rounded p-2 my-2 flex" role="button">
<span class="my-auto p-2 font-semibold"
x-text="global.Time.FromSeconds(res.timestamp/1000).toString()"></span>
<div class="p-2">
<span class="block text-xs" x-text="res.textPrev"></span>
<span class="block text-sm font-bold" x-text="res.text"></span>
<span class="block text-xs" x-text="res.textNext"></span>
</div>
</div>
</template>
</div>
</div>
</template>
</div>
{{end}}
Loading

0 comments on commit fc1c3c5

Please sign in to comment.