From 85293fcf4173af921499c52b5cf7c625814088ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Espino?= Date: Fri, 26 Feb 2021 07:41:05 +0100 Subject: [PATCH] Add the API for search files (#15605) * Adding search files api * Fixing golangci-lint * Adding bulk-indexing and improving a bit the name indexing for bleve and elasticsearch * Add content extraction config migration * Fixing a problem with document extraction * Unapplying certain changes moved to other PR * Fixing tests * Making extract content app migration a private method * Addressing PR review comments * Addressing PR review comments * Adding feature flag * Removing debug string * Fixing imports * Fixing linting errors * Do not migrate the config if the feature flag is not enabled * Fix tests Co-authored-by: Mattermod --- api4/file.go | 75 ++++++++++ api4/file_test.go | 162 +++++++++++++++++++++ app/app_iface.go | 1 + app/file.go | 91 ++++++++++++ app/file_test.go | 202 ++++++++++++++++++++++++++- app/migrations.go | 32 +++++ app/opentracing/opentracing_layer.go | 22 +++ app/upload.go | 17 +++ i18n/en.json | 12 ++ model/client4.go | 19 +++ model/config.go | 15 ++ services/telemetry/telemetry.go | 3 + testlib/store.go | 1 + 13 files changed, 648 insertions(+), 4 deletions(-) diff --git a/api4/file.go b/api4/file.go index e1c736a9e3..31c5d10c3a 100644 --- a/api4/file.go +++ b/api4/file.go @@ -60,6 +60,8 @@ func (api *API) InitFile() { api.BaseRoutes.File.Handle("/preview", api.ApiSessionRequiredTrustRequester(getFilePreview)).Methods("GET") api.BaseRoutes.File.Handle("/info", api.ApiSessionRequired(getFileInfo)).Methods("GET") + api.BaseRoutes.Team.Handle("/files/search", api.ApiSessionRequiredDisableWhenBusy(searchFiles)).Methods("POST") + api.BaseRoutes.PublicFile.Handle("", api.ApiHandler(getPublicFile)).Methods("GET") } @@ -725,3 +727,76 @@ func writeFileResponse(filename string, contentType string, contentSize int64, l http.ServeContent(w, r, filename, lastModification, fileReader) } + +func searchFiles(c *Context, w http.ResponseWriter, r *http.Request) { + c.RequireTeamId() + if c.Err != nil { + return + } + + if !c.App.Config().FeatureFlags.FilesSearch { + c.Err = model.NewAppError("searchFiles", "api.post.search_files.not_implemented.app_error", nil, "", http.StatusNotImplemented) + return + } + + if !c.App.SessionHasPermissionToTeam(*c.App.Session(), c.Params.TeamId, model.PERMISSION_VIEW_TEAM) { + c.SetPermissionError(model.PERMISSION_VIEW_TEAM) + return + } + + params, jsonErr := model.SearchParameterFromJson(r.Body) + if jsonErr != nil { + c.Err = model.NewAppError("searchFiles", "api.post.search_files.invalid_body.app_error", nil, jsonErr.Error(), http.StatusBadRequest) + return + } + + if params.Terms == nil || *params.Terms == "" { + c.SetInvalidParam("terms") + return + } + terms := *params.Terms + + timeZoneOffset := 0 + if params.TimeZoneOffset != nil { + timeZoneOffset = *params.TimeZoneOffset + } + + isOrSearch := false + if params.IsOrSearch != nil { + isOrSearch = *params.IsOrSearch + } + + page := 0 + if params.Page != nil { + page = *params.Page + } + + perPage := 60 + if params.PerPage != nil { + perPage = *params.PerPage + } + + includeDeletedChannels := false + if params.IncludeDeletedChannels != nil { + includeDeletedChannels = *params.IncludeDeletedChannels + } + + startTime := time.Now() + + results, err := c.App.SearchFilesInTeamForUser(terms, c.App.Session().UserId, c.Params.TeamId, isOrSearch, includeDeletedChannels, timeZoneOffset, page, perPage) + + elapsedTime := float64(time.Since(startTime)) / float64(time.Second) + metrics := c.App.Metrics() + if metrics != nil { + metrics.IncrementFilesSearchCounter() + metrics.ObserveFilesSearchDuration(elapsedTime) + } + + if err != nil { + c.Err = err + return + } + + w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate") + w.Write([]byte(results.ToJson())) +} diff --git a/api4/file_test.go b/api4/file_test.go index f1b832a3c8..3d1ea3871f 100644 --- a/api4/file_test.go +++ b/api4/file_test.go @@ -1047,3 +1047,165 @@ func TestGetPublicFile(t *testing.T) { require.NoError(t, err) require.Equal(t, http.StatusNotFound, resp.StatusCode, "should've failed to get file after it is deleted") } + +func TestSearchFilesOnFeatureFlagDisabled(t *testing.T) { + th := Setup(t).InitBasic() + defer th.TearDown() + + terms := "search" + isOrSearch := false + timezoneOffset := 5 + searchParams := model.SearchParameter{ + Terms: &terms, + IsOrSearch: &isOrSearch, + TimeZoneOffset: &timezoneOffset, + } + _, resp := th.Client.SearchFilesWithParams(th.BasicTeam.Id, &searchParams) + require.NotNil(t, resp.Error) +} + +func TestSearchFiles(t *testing.T) { + th := Setup(t).InitBasic() + defer th.TearDown() + experimentalViewArchivedChannels := *th.App.Config().TeamSettings.ExperimentalViewArchivedChannels + defer func() { + os.Unsetenv("MM_FEATUREFLAGS_FILESSEARCH") + th.App.UpdateConfig(func(cfg *model.Config) { + cfg.TeamSettings.ExperimentalViewArchivedChannels = &experimentalViewArchivedChannels + }) + }() + os.Setenv("MM_FEATUREFLAGS_FILESSEARCH", "true") + th.App.UpdateConfig(func(cfg *model.Config) { + *cfg.TeamSettings.ExperimentalViewArchivedChannels = true + }) + data, err := testutils.ReadTestFile("test.png") + require.NoError(t, err) + + th.LoginBasic() + Client := th.Client + + filename := "search for fileInfo1" + fileInfo1, err := th.App.UploadFile(data, th.BasicChannel.Id, filename) + require.Nil(t, err) + err = th.App.Srv().Store.FileInfo().AttachToPost(fileInfo1.Id, th.BasicPost.Id, th.BasicUser.Id) + require.Nil(t, err) + + filename = "search for fileInfo2" + fileInfo2, err := th.App.UploadFile(data, th.BasicChannel.Id, filename) + require.Nil(t, err) + err = th.App.Srv().Store.FileInfo().AttachToPost(fileInfo2.Id, th.BasicPost.Id, th.BasicUser.Id) + require.Nil(t, err) + + filename = "tagged search for fileInfo3" + fileInfo3, err := th.App.UploadFile(data, th.BasicChannel.Id, filename) + require.Nil(t, err) + err = th.App.Srv().Store.FileInfo().AttachToPost(fileInfo3.Id, th.BasicPost.Id, th.BasicUser.Id) + require.Nil(t, err) + + filename = "tagged for fileInfo4" + fileInfo4, err := th.App.UploadFile(data, th.BasicChannel.Id, filename) + require.Nil(t, err) + err = th.App.Srv().Store.FileInfo().AttachToPost(fileInfo4.Id, th.BasicPost.Id, th.BasicUser.Id) + require.Nil(t, err) + + archivedChannel := th.CreatePublicChannel() + fileInfo5, err := th.App.UploadFile(data, archivedChannel.Id, "tagged for fileInfo3") + require.Nil(t, err) + post := &model.Post{ChannelId: archivedChannel.Id, Message: model.NewId() + "a"} + rpost, resp := Client.CreatePost(post) + CheckNoError(t, resp) + err = th.App.Srv().Store.FileInfo().AttachToPost(fileInfo5.Id, rpost.Id, th.BasicUser.Id) + require.Nil(t, err) + th.Client.DeleteChannel(archivedChannel.Id) + + terms := "search" + isOrSearch := false + timezoneOffset := 5 + searchParams := model.SearchParameter{ + Terms: &terms, + IsOrSearch: &isOrSearch, + TimeZoneOffset: &timezoneOffset, + } + fileInfos, resp := Client.SearchFilesWithParams(th.BasicTeam.Id, &searchParams) + CheckNoError(t, resp) + require.Len(t, fileInfos.Order, 3, "wrong search") + + terms = "search" + page := 0 + perPage := 2 + searchParams = model.SearchParameter{ + Terms: &terms, + IsOrSearch: &isOrSearch, + TimeZoneOffset: &timezoneOffset, + Page: &page, + PerPage: &perPage, + } + fileInfos2, resp := Client.SearchFilesWithParams(th.BasicTeam.Id, &searchParams) + CheckNoError(t, resp) + // We don't support paging for DB search yet, modify this when we do. + require.Len(t, fileInfos2.Order, 3, "Wrong number of fileInfos") + assert.Equal(t, fileInfos.Order[0], fileInfos2.Order[0]) + assert.Equal(t, fileInfos.Order[1], fileInfos2.Order[1]) + + page = 1 + searchParams = model.SearchParameter{ + Terms: &terms, + IsOrSearch: &isOrSearch, + TimeZoneOffset: &timezoneOffset, + Page: &page, + PerPage: &perPage, + } + fileInfos2, resp = Client.SearchFilesWithParams(th.BasicTeam.Id, &searchParams) + CheckNoError(t, resp) + // We don't support paging for DB search yet, modify this when we do. + require.Empty(t, fileInfos2.Order, "Wrong number of fileInfos") + + fileInfos, resp = Client.SearchFiles(th.BasicTeam.Id, "search", false) + CheckNoError(t, resp) + require.Len(t, fileInfos.Order, 3, "wrong search") + + fileInfos, resp = Client.SearchFiles(th.BasicTeam.Id, "fileInfo2", false) + CheckNoError(t, resp) + require.Len(t, fileInfos.Order, 1, "wrong number of fileInfos") + require.Equal(t, fileInfo2.Id, fileInfos.Order[0], "wrong search") + + terms = "tagged" + includeDeletedChannels := true + searchParams = model.SearchParameter{ + Terms: &terms, + IsOrSearch: &isOrSearch, + TimeZoneOffset: &timezoneOffset, + IncludeDeletedChannels: &includeDeletedChannels, + } + fileInfos, resp = Client.SearchFilesWithParams(th.BasicTeam.Id, &searchParams) + CheckNoError(t, resp) + require.Len(t, fileInfos.Order, 3, "wrong search") + + th.App.UpdateConfig(func(cfg *model.Config) { + *cfg.TeamSettings.ExperimentalViewArchivedChannels = false + }) + + fileInfos, resp = Client.SearchFilesWithParams(th.BasicTeam.Id, &searchParams) + CheckNoError(t, resp) + require.Len(t, fileInfos.Order, 2, "wrong search") + + fileInfos, _ = Client.SearchFiles(th.BasicTeam.Id, "*", false) + require.Empty(t, fileInfos.Order, "searching for just * shouldn't return any results") + + fileInfos, resp = Client.SearchFiles(th.BasicTeam.Id, "fileInfo1 fileInfo2", true) + CheckNoError(t, resp) + require.Len(t, fileInfos.Order, 2, "wrong search results") + + _, resp = Client.SearchFiles("junk", "#sgtitlereview", false) + CheckBadRequestStatus(t, resp) + + _, resp = Client.SearchFiles(model.NewId(), "#sgtitlereview", false) + CheckForbiddenStatus(t, resp) + + _, resp = Client.SearchFiles(th.BasicTeam.Id, "", false) + CheckBadRequestStatus(t, resp) + + Client.Logout() + _, resp = Client.SearchFiles(th.BasicTeam.Id, "#sgtitlereview", false) + CheckUnauthorizedStatus(t, resp) +} diff --git a/app/app_iface.go b/app/app_iface.go index 3a0be13fa1..c6d26e37e5 100644 --- a/app/app_iface.go +++ b/app/app_iface.go @@ -894,6 +894,7 @@ type AppIface interface { SearchChannelsUserNotIn(teamID string, userID string, term string) (*model.ChannelList, *model.AppError) SearchEmoji(name string, prefixOnly bool, limit int) ([]*model.Emoji, *model.AppError) SearchEngine() *searchengine.Broker + SearchFilesInTeamForUser(terms string, userId string, teamId string, isOrSearch bool, includeDeletedChannels bool, timeZoneOffset int, page, perPage int) (*model.FileInfoList, *model.AppError) SearchGroupChannels(userID, term string) (*model.ChannelList, *model.AppError) SearchPostsInTeam(teamID string, paramsList []*model.SearchParams) (*model.PostList, *model.AppError) SearchPostsInTeamForUser(terms string, userID string, teamID string, isOrSearch bool, includeDeletedChannels bool, timeZoneOffset int, page, perPage int) (*model.PostSearchResults, *model.AppError) diff --git a/app/file.go b/app/file.go index 1ad3bae82a..3c66c1121c 100644 --- a/app/file.go +++ b/app/file.go @@ -36,6 +36,7 @@ import ( "github.com/mattermost/mattermost-server/v5/mlog" "github.com/mattermost/mattermost-server/v5/model" "github.com/mattermost/mattermost-server/v5/plugin" + "github.com/mattermost/mattermost-server/v5/services/docextractor" "github.com/mattermost/mattermost-server/v5/services/filesstore" "github.com/mattermost/mattermost-server/v5/store" "github.com/mattermost/mattermost-server/v5/utils" @@ -771,6 +772,28 @@ func (a *App) UploadFileX(channelID, name string, input io.Reader, } } + if *a.Config().FileSettings.ExtractContent && a.Config().FeatureFlags.FilesSearch { + infoCopy := *t.fileinfo + a.Srv().Go(func() { + file, aerr := a.FileReader(t.fileinfo.Path) + if aerr != nil { + mlog.Error("Failed to open file for extract file content", mlog.Err(aerr)) + return + } + defer file.Close() + text, err := docextractor.Extract(infoCopy.Name, file, docextractor.ExtractSettings{ + ArchiveRecursion: *a.Config().FileSettings.ArchiveRecursion, + }) + if err != nil { + mlog.Error("Failed to extract file content", mlog.Err(err)) + return + } + if storeErr := a.Srv().Store.FileInfo().SetContent(infoCopy.Id, text); storeErr != nil { + mlog.Error("Failed to save the extracted file content", mlog.Err(storeErr)) + } + }) + } + return t.fileinfo, nil } @@ -1020,6 +1043,28 @@ func (a *App) DoUploadFileExpectModification(now time.Time, rawTeamId string, ra } } + if *a.Config().FileSettings.ExtractContent && a.Config().FeatureFlags.FilesSearch { + infoCopy := *info + a.Srv().Go(func() { + file, aerr := a.FileReader(infoCopy.Path) + if aerr != nil { + mlog.Error("Failed to open file for extract file content", mlog.Err(aerr)) + return + } + defer file.Close() + text, err := docextractor.Extract(infoCopy.Name, file, docextractor.ExtractSettings{ + ArchiveRecursion: *a.Config().FileSettings.ArchiveRecursion, + }) + if err != nil { + mlog.Error("Failed to extract file content", mlog.Err(err)) + return + } + if storeErr := a.Srv().Store.FileInfo().SetContent(infoCopy.Id, text); storeErr != nil { + mlog.Error("Failed to save the extracted file content", mlog.Err(storeErr)) + } + }) + } + return info, data, nil } @@ -1308,3 +1353,49 @@ func populateZipfile(w *zip.Writer, fileDatas []model.FileData) error { } return nil } + +func (a *App) SearchFilesInTeamForUser(terms string, userId string, teamId string, isOrSearch bool, includeDeletedChannels bool, timeZoneOffset int, page, perPage int) (*model.FileInfoList, *model.AppError) { + paramsList := model.ParseSearchParams(strings.TrimSpace(terms), timeZoneOffset) + includeDeleted := includeDeletedChannels && *a.Config().TeamSettings.ExperimentalViewArchivedChannels + + if !*a.Config().ServiceSettings.EnableFileSearch { + return nil, model.NewAppError("SearchFilesInTeamForUser", "store.sql_file_info.search.disabled", nil, fmt.Sprintf("teamId=%v userId=%v", teamId, userId), http.StatusNotImplemented) + } + + finalParamsList := []*model.SearchParams{} + + for _, params := range paramsList { + params.OrTerms = isOrSearch + params.IncludeDeletedChannels = includeDeleted + // Don't allow users to search for "*" + if params.Terms != "*" { + // Convert channel names to channel IDs + params.InChannels = a.convertChannelNamesToChannelIds(params.InChannels, userId, teamId, includeDeletedChannels) + params.ExcludedChannels = a.convertChannelNamesToChannelIds(params.ExcludedChannels, userId, teamId, includeDeletedChannels) + + // Convert usernames to user IDs + params.FromUsers = a.convertUserNameToUserIds(params.FromUsers) + params.ExcludedUsers = a.convertUserNameToUserIds(params.ExcludedUsers) + + finalParamsList = append(finalParamsList, params) + } + } + + // If the processed search params are empty, return empty search results. + if len(finalParamsList) == 0 { + return model.NewFileInfoList(), nil + } + + fileInfoSearchResults, nErr := a.Srv().Store.FileInfo().Search(finalParamsList, userId, teamId, page, perPage) + if nErr != nil { + var appErr *model.AppError + switch { + case errors.As(nErr, &appErr): + return nil, appErr + default: + return nil, model.NewAppError("SearchPostsInTeamForUser", "app.post.search.app_error", nil, nErr.Error(), http.StatusInternalServerError) + } + } + + return fileInfoSearchResults, nil +} diff --git a/app/file_test.go b/app/file_test.go index b7b132c0fe..82cc2b31b5 100644 --- a/app/file_test.go +++ b/app/file_test.go @@ -13,11 +13,12 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "github.com/mattermost/mattermost-server/v5/model" - "github.com/mattermost/mattermost-server/v5/plugin/plugintest/mock" - "github.com/mattermost/mattermost-server/v5/services/filesstore/mocks" + filesStoreMocks "github.com/mattermost/mattermost-server/v5/services/filesstore/mocks" + "github.com/mattermost/mattermost-server/v5/services/searchengine/mocks" "github.com/mattermost/mattermost-server/v5/utils/fileutils" ) @@ -277,7 +278,7 @@ func TestCreateZipFileAndAddFiles(t *testing.T) { th := Setup(t) defer th.TearDown() - mockBackend := mocks.FileBackend{} + mockBackend := filesStoreMocks.FileBackend{} mockBackend.On("WriteFile", mock.Anything, "directory-to-heaven/zip-file-name-to-heaven.zip").Return(int64(666), errors.New("only those who dare to fail greatly can ever achieve greatly")) err := th.App.CreateZipFileAndAddFiles(&mockBackend, []model.FileData{}, "zip-file-name-to-heaven.zip", "directory-to-heaven") @@ -285,7 +286,7 @@ func TestCreateZipFileAndAddFiles(t *testing.T) { require.NotNil(t, err) require.Equal(t, err.Error(), "only those who dare to fail greatly can ever achieve greatly") - mockBackend = mocks.FileBackend{} + mockBackend = filesStoreMocks.FileBackend{} mockBackend.On("WriteFile", mock.Anything, "directory-to-heaven/zip-file-name-to-heaven.zip").Return(int64(666), nil) err = th.App.CreateZipFileAndAddFiles(&mockBackend, []model.FileData{}, "zip-file-name-to-heaven.zip", "directory-to-heaven") require.NoError(t, err) @@ -350,3 +351,196 @@ func createDummyImage() *image.RGBA { lowerRightCorner := image.Point{width, height} return image.NewRGBA(image.Rectangle{upperLeftCorner, lowerRightCorner}) } + +func TestSearchFilesInTeamForUser(t *testing.T) { + perPage := 5 + searchTerm := "searchTerm" + + setup := func(t *testing.T, enableElasticsearch bool) (*TestHelper, []*model.FileInfo) { + th := Setup(t).InitBasic() + + fileInfos := make([]*model.FileInfo, 7) + for i := 0; i < cap(fileInfos); i++ { + fileInfo, err := th.App.Srv().Store.FileInfo().Save(&model.FileInfo{ + CreatorId: th.BasicUser.Id, + PostId: th.BasicPost.Id, + Name: searchTerm, + Path: searchTerm, + Extension: "jpg", + MimeType: "image/jpeg", + }) + time.Sleep(1 * time.Millisecond) + + require.Nil(t, err) + + fileInfos[i] = fileInfo + } + + if enableElasticsearch { + th.App.Srv().SetLicense(model.NewTestLicense("elastic_search")) + + th.App.UpdateConfig(func(cfg *model.Config) { + *cfg.ElasticsearchSettings.EnableIndexing = true + *cfg.ElasticsearchSettings.EnableSearching = true + }) + } else { + th.App.UpdateConfig(func(cfg *model.Config) { + *cfg.ElasticsearchSettings.EnableSearching = false + }) + } + + return th, fileInfos + } + + t.Run("should return everything as first page of fileInfos from database", func(t *testing.T) { + th, fileInfos := setup(t, false) + defer th.TearDown() + + page := 0 + + results, err := th.App.SearchFilesInTeamForUser(searchTerm, th.BasicUser.Id, th.BasicTeam.Id, false, false, 0, page, perPage) + + require.Nil(t, err) + require.NotNil(t, results) + assert.Equal(t, []string{ + fileInfos[6].Id, + fileInfos[5].Id, + fileInfos[4].Id, + fileInfos[3].Id, + fileInfos[2].Id, + fileInfos[1].Id, + fileInfos[0].Id, + }, results.Order) + }) + + t.Run("should not return later pages of fileInfos from database", func(t *testing.T) { + th, _ := setup(t, false) + defer th.TearDown() + + page := 1 + + results, err := th.App.SearchFilesInTeamForUser(searchTerm, th.BasicUser.Id, th.BasicTeam.Id, false, false, 0, page, perPage) + + require.Nil(t, err) + require.NotNil(t, results) + assert.Equal(t, []string{}, results.Order) + }) + + t.Run("should return first page of fileInfos from ElasticSearch", func(t *testing.T) { + th, fileInfos := setup(t, true) + defer th.TearDown() + + page := 0 + resultsPage := []string{ + fileInfos[6].Id, + fileInfos[5].Id, + fileInfos[4].Id, + fileInfos[3].Id, + fileInfos[2].Id, + } + + es := &mocks.SearchEngineInterface{} + es.On("SearchFiles", mock.Anything, mock.Anything, page, perPage).Return(resultsPage, nil) + es.On("GetName").Return("mock") + es.On("Start").Return(nil).Maybe() + es.On("IsActive").Return(true) + es.On("IsSearchEnabled").Return(true) + th.App.Srv().SearchEngine.ElasticsearchEngine = es + defer func() { + th.App.Srv().SearchEngine.ElasticsearchEngine = nil + }() + + results, err := th.App.SearchFilesInTeamForUser(searchTerm, th.BasicUser.Id, th.BasicTeam.Id, false, false, 0, page, perPage) + + require.Nil(t, err) + require.NotNil(t, results) + assert.Equal(t, resultsPage, results.Order) + es.AssertExpectations(t) + }) + + t.Run("should return later pages of fileInfos from ElasticSearch", func(t *testing.T) { + th, fileInfos := setup(t, true) + defer th.TearDown() + + page := 1 + resultsPage := []string{ + fileInfos[1].Id, + fileInfos[0].Id, + } + + es := &mocks.SearchEngineInterface{} + es.On("SearchFiles", mock.Anything, mock.Anything, page, perPage).Return(resultsPage, nil) + es.On("GetName").Return("mock") + es.On("Start").Return(nil).Maybe() + es.On("IsActive").Return(true) + es.On("IsSearchEnabled").Return(true) + th.App.Srv().SearchEngine.ElasticsearchEngine = es + defer func() { + th.App.Srv().SearchEngine.ElasticsearchEngine = nil + }() + + results, err := th.App.SearchFilesInTeamForUser(searchTerm, th.BasicUser.Id, th.BasicTeam.Id, false, false, 0, page, perPage) + + require.Nil(t, err) + require.NotNil(t, results) + assert.Equal(t, resultsPage, results.Order) + es.AssertExpectations(t) + }) + + t.Run("should fall back to database if ElasticSearch fails on first page", func(t *testing.T) { + th, fileInfos := setup(t, true) + defer th.TearDown() + + page := 0 + + es := &mocks.SearchEngineInterface{} + es.On("SearchFiles", mock.Anything, mock.Anything, page, perPage).Return(nil, &model.AppError{}) + es.On("GetName").Return("mock") + es.On("Start").Return(nil).Maybe() + es.On("IsActive").Return(true) + es.On("IsSearchEnabled").Return(true) + th.App.Srv().SearchEngine.ElasticsearchEngine = es + defer func() { + th.App.Srv().SearchEngine.ElasticsearchEngine = nil + }() + + results, err := th.App.SearchFilesInTeamForUser(searchTerm, th.BasicUser.Id, th.BasicTeam.Id, false, false, 0, page, perPage) + + require.Nil(t, err) + require.NotNil(t, results) + assert.Equal(t, []string{ + fileInfos[6].Id, + fileInfos[5].Id, + fileInfos[4].Id, + fileInfos[3].Id, + fileInfos[2].Id, + fileInfos[1].Id, + fileInfos[0].Id, + }, results.Order) + es.AssertExpectations(t) + }) + + t.Run("should return nothing if ElasticSearch fails on later pages", func(t *testing.T) { + th, _ := setup(t, true) + defer th.TearDown() + + page := 1 + + es := &mocks.SearchEngineInterface{} + es.On("SearchFiles", mock.Anything, mock.Anything, page, perPage).Return(nil, &model.AppError{}) + es.On("GetName").Return("mock") + es.On("Start").Return(nil).Maybe() + es.On("IsActive").Return(true) + es.On("IsSearchEnabled").Return(true) + th.App.Srv().SearchEngine.ElasticsearchEngine = es + defer func() { + th.App.Srv().SearchEngine.ElasticsearchEngine = nil + }() + + results, err := th.App.SearchFilesInTeamForUser(searchTerm, th.BasicUser.Id, th.BasicTeam.Id, false, false, 0, page, perPage) + + require.Nil(t, err) + assert.Equal(t, []string{}, results.Order) + es.AssertExpectations(t) + }) +} diff --git a/app/migrations.go b/app/migrations.go index 7f10c64b77..44c807b3c6 100644 --- a/app/migrations.go +++ b/app/migrations.go @@ -15,6 +15,8 @@ import ( const EmojisPermissionsMigrationKey = "EmojisPermissionsMigrationComplete" const GuestRolesCreationMigrationKey = "GuestRolesCreationMigrationComplete" const SystemConsoleRolesCreationMigrationKey = "SystemConsoleRolesCreationMigrationComplete" +const ContentExtractionConfigMigrationKey = "ContentExtractionConfigMigrationComplete" +const usersLimitToAutoEnableContentExtraction = 500 // This function migrates the default built in roles from code/config to the database. func (a *App) DoAdvancedPermissionsMigration() { @@ -283,6 +285,35 @@ func (a *App) DoSystemConsoleRolesCreationMigration() { } } +func (a *App) doContentExtractionConfigMigration() { + if !a.Config().FeatureFlags.FilesSearch { + return + } + // If the migration is already marked as completed, don't do it again. + if _, err := a.Srv().Store.System().GetByName(ContentExtractionConfigMigrationKey); err == nil { + return + } + + if usersCount, err := a.Srv().Store.User().Count(model.UserCountOptions{}); err != nil { + mlog.Critical("Failed to get the users count for migrating the content extraction, using default value", mlog.Err(err)) + } else { + if usersCount < usersLimitToAutoEnableContentExtraction { + a.UpdateConfig(func(config *model.Config) { + config.FileSettings.ExtractContent = model.NewBool(true) + }) + } + } + + system := model.System{ + Name: ContentExtractionConfigMigrationKey, + Value: "true", + } + + if err := a.Srv().Store.System().Save(&system); err != nil { + mlog.Critical("Failed to mark content extraction config migration as completed.", mlog.Err(err)) + } +} + func (a *App) DoAppMigrations() { a.DoAdvancedPermissionsMigration() a.DoEmojisPermissionsMigration() @@ -294,4 +325,5 @@ func (a *App) DoAppMigrations() { if err != nil { mlog.Critical("(app.App).DoPermissionsMigrations failed", mlog.Err(err)) } + a.doContentExtractionConfigMigration() } diff --git a/app/opentracing/opentracing_layer.go b/app/opentracing/opentracing_layer.go index 8545f9c020..97095820cc 100644 --- a/app/opentracing/opentracing_layer.go +++ b/app/opentracing/opentracing_layer.go @@ -12956,6 +12956,28 @@ func (a *OpenTracingAppLayer) SearchEngine() *searchengine.Broker { return resultVar0 } +func (a *OpenTracingAppLayer) SearchFilesInTeamForUser(terms string, userId string, teamId string, isOrSearch bool, includeDeletedChannels bool, timeZoneOffset int, page int, perPage int) (*model.FileInfoList, *model.AppError) { + origCtx := a.ctx + span, newCtx := tracing.StartSpanWithParentByContext(a.ctx, "app.SearchFilesInTeamForUser") + + a.ctx = newCtx + a.app.Srv().Store.SetContext(newCtx) + defer func() { + a.app.Srv().Store.SetContext(origCtx) + a.ctx = origCtx + }() + + defer span.Finish() + resultVar0, resultVar1 := a.app.SearchFilesInTeamForUser(terms, userId, teamId, isOrSearch, includeDeletedChannels, timeZoneOffset, page, perPage) + + if resultVar1 != nil { + span.LogFields(spanlog.Error(resultVar1)) + ext.Error.Set(span, true) + } + + return resultVar0, resultVar1 +} + func (a *OpenTracingAppLayer) SearchGroupChannels(userID string, term string) (*model.ChannelList, *model.AppError) { origCtx := a.ctx span, newCtx := tracing.StartSpanWithParentByContext(a.ctx, "app.SearchGroupChannels") diff --git a/app/upload.go b/app/upload.go index 90e60d9347..91a6775fa4 100644 --- a/app/upload.go +++ b/app/upload.go @@ -15,6 +15,7 @@ import ( "github.com/mattermost/mattermost-server/v5/mlog" "github.com/mattermost/mattermost-server/v5/model" "github.com/mattermost/mattermost-server/v5/plugin" + "github.com/mattermost/mattermost-server/v5/services/docextractor" "github.com/mattermost/mattermost-server/v5/store" ) @@ -295,6 +296,22 @@ func (a *App) UploadData(us *model.UploadSession, rd io.Reader) (*model.FileInfo } } + if *a.Config().FileSettings.ExtractContent && a.Config().FeatureFlags.FilesSearch { + infoCopy := *info + a.Srv().Go(func() { + text, err := docextractor.Extract(infoCopy.Name, file, docextractor.ExtractSettings{ + ArchiveRecursion: *a.Config().FileSettings.ArchiveRecursion, + }) + if err != nil { + mlog.Error("Failed to extract file content", mlog.Err(err)) + return + } + if storeErr := a.Srv().Store.FileInfo().SetContent(infoCopy.Id, text); storeErr != nil { + mlog.Error("Failed to save the extracted file content", mlog.Err(storeErr)) + } + }) + } + // delete upload session if storeErr := a.Srv().Store.UploadSession().Delete(us.Id); storeErr != nil { mlog.Warn("Failed to delete UploadSession", mlog.Err(storeErr)) diff --git a/i18n/en.json b/i18n/en.json index 96993d7761..17f6d3364a 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -1870,6 +1870,14 @@ "id": "api.post.save_is_pinned_post.town_square_read_only", "translation": "This channel is read-only. Only members with permission can pin or unpin posts here." }, + { + "id": "api.post.search_files.invalid_body.app_error", + "translation": "Unable to parse the request body." + }, + { + "id": "api.post.search_files.not_implemented.app_error", + "translation": "This feature is in development, and is only available using a feature flag." + }, { "id": "api.post.search_posts.invalid_body.app_error", "translation": "Unable to parse the request body." @@ -8490,6 +8498,10 @@ "id": "store.sql_command.update.missing.app_error", "translation": "Command does not exist." }, + { + "id": "store.sql_file_info.search.disabled", + "translation": "Searching files has been disabled on this server. Please contact your System Administrator." + }, { "id": "store.sql_post.search.disabled", "translation": "Searching has been disabled on this server. Please contact your System Administrator." diff --git a/model/client4.go b/model/client4.go index 8f4deac73d..758c268469 100644 --- a/model/client4.go +++ b/model/client4.go @@ -3044,6 +3044,25 @@ func (c *Client4) GetPostsAroundLastUnread(userId, channelId string, limitBefore return PostListFromJson(r.Body), BuildResponse(r) } +// SearchFiles returns any posts with matching terms string. +func (c *Client4) SearchFiles(teamId string, terms string, isOrSearch bool) (*FileInfoList, *Response) { + params := SearchParameter{ + Terms: &terms, + IsOrSearch: &isOrSearch, + } + return c.SearchFilesWithParams(teamId, ¶ms) +} + +// SearchFilesWithParams returns any posts with matching terms string. +func (c *Client4) SearchFilesWithParams(teamId string, params *SearchParameter) (*FileInfoList, *Response) { + r, err := c.DoApiPost(c.GetTeamRoute(teamId)+"/files/search", params.SearchParameterToJson()) + if err != nil { + return nil, BuildErrorResponse(r, err) + } + defer closeBody(r) + return FileInfoListFromJson(r.Body), BuildResponse(r) +} + // SearchPosts returns any posts with matching terms string. func (c *Client4) SearchPosts(teamId string, terms string, isOrSearch bool) (*PostList, *Response) { params := SearchParameter{ diff --git a/model/config.go b/model/config.go index 8fb7b5954a..3b9eb25c2c 100644 --- a/model/config.go +++ b/model/config.go @@ -335,6 +335,7 @@ type ServiceSettings struct { PostEditTimeLimit *int `access:"user_management_permissions"` TimeBetweenUserTypingUpdatesMilliseconds *int64 `access:"experimental,write_restrictable,cloud_restrictable"` EnablePostSearch *bool `access:"write_restrictable,cloud_restrictable"` + EnableFileSearch *bool `access:"write_restrictable"` MinimumHashtagLength *int `access:"environment,write_restrictable,cloud_restrictable"` EnableUserTypingMessages *bool `access:"experimental,write_restrictable,cloud_restrictable"` EnableChannelViewedMessages *bool `access:"experimental,write_restrictable,cloud_restrictable"` @@ -537,6 +538,10 @@ func (s *ServiceSettings) SetDefaults(isUpdate bool) { s.EnablePostSearch = NewBool(true) } + if s.EnableFileSearch == nil { + s.EnableFileSearch = NewBool(true) + } + if s.MinimumHashtagLength == nil { s.MinimumHashtagLength = NewInt(3) } @@ -1353,6 +1358,8 @@ type FileSettings struct { DriverName *string `access:"environment,write_restrictable,cloud_restrictable"` Directory *string `access:"environment,write_restrictable,cloud_restrictable"` EnablePublicLink *bool `access:"site,cloud_restrictable"` + ExtractContent *bool `access:"environment,write_restrictable"` + ArchiveRecursion *bool `access:"environment,write_restrictable"` PublicLinkSalt *string `access:"site,cloud_restrictable"` // telemetry: none InitialFont *string `access:"environment,cloud_restrictable"` // telemetry: none AmazonS3AccessKeyId *string `access:"environment,write_restrictable,cloud_restrictable"` // telemetry: none @@ -1396,6 +1403,14 @@ func (s *FileSettings) SetDefaults(isUpdate bool) { s.EnablePublicLink = NewBool(false) } + if s.ExtractContent == nil { + s.ExtractContent = NewBool(false) + } + + if s.ArchiveRecursion == nil { + s.ArchiveRecursion = NewBool(false) + } + if isUpdate { // When updating an existing configuration, ensure link salt has been specified. if s.PublicLinkSalt == nil || *s.PublicLinkSalt == "" { diff --git a/services/telemetry/telemetry.go b/services/telemetry/telemetry.go index 77a5d73b33..049f128848 100644 --- a/services/telemetry/telemetry.go +++ b/services/telemetry/telemetry.go @@ -439,6 +439,7 @@ func (ts *TelemetryService) trackConfig() { "enable_legacy_sidebar": *cfg.ServiceSettings.EnableLegacySidebar, "thread_auto_follow": *cfg.ServiceSettings.ThreadAutoFollow, "enable_link_previews": *cfg.ServiceSettings.EnableLinkPreviews, + "enable_file_search": *cfg.ServiceSettings.EnableFileSearch, }) ts.sendTelemetry(TrackConfigTeam, map[string]interface{}{ @@ -544,6 +545,8 @@ func (ts *TelemetryService) trackConfig() { "driver_name": *cfg.FileSettings.DriverName, "isdefault_directory": isDefault(*cfg.FileSettings.Directory, model.FILE_SETTINGS_DEFAULT_DIRECTORY), "isabsolute_directory": filepath.IsAbs(*cfg.FileSettings.Directory), + "extract_content": *cfg.FileSettings.ExtractContent, + "archive_recursion": *cfg.FileSettings.ArchiveRecursion, "amazon_s3_ssl": *cfg.FileSettings.AmazonS3SSL, "amazon_s3_sse": *cfg.FileSettings.AmazonS3SSE, "amazon_s3_signv2": *cfg.FileSettings.AmazonS3SignV2, diff --git a/testlib/store.go b/testlib/store.go index 88feb92378..1f8b520e21 100644 --- a/testlib/store.go +++ b/testlib/store.go @@ -25,6 +25,7 @@ func GetMockStoreForSetupFunctions() *mocks.Store { mockStore := mocks.Store{} systemStore := mocks.SystemStore{} systemStore.On("GetByName", "UpgradedFromTE").Return(nil, model.NewAppError("FakeError", "app.system.get_by_name.app_error", nil, "", http.StatusInternalServerError)) + systemStore.On("GetByName", "ContentExtractionConfigMigrationComplete").Return(&model.System{Name: "ContentExtractionConfigMigrationComplete", Value: "true"}, nil) systemStore.On("GetByName", "AsymmetricSigningKey").Return(nil, model.NewAppError("FakeError", "app.system.get_by_name.app_error", nil, "", http.StatusInternalServerError)) systemStore.On("GetByName", "PostActionCookieSecret").Return(nil, model.NewAppError("FakeError", "app.system.get_by_name.app_error", nil, "", http.StatusInternalServerError)) systemStore.On("GetByName", "InstallationDate").Return(&model.System{Name: "InstallationDate", Value: strconv.FormatInt(model.GetMillis(), 10)}, nil)