diff --git a/modules/git/grep.go b/modules/git/grep.go index c7ba862761..44ec6ca2be 100644 --- a/modules/git/grep.go +++ b/modules/git/grep.go @@ -23,11 +23,19 @@ type GrepResult struct { LineCodes []string } +type GrepModeType string + +const ( + GrepModeExact GrepModeType = "exact" + GrepModeWords GrepModeType = "words" + GrepModeRegexp GrepModeType = "regexp" +) + type GrepOptions struct { RefName string MaxResultLimit int ContextLineNumber int - IsFuzzy bool + GrepMode GrepModeType MaxLineLength int // the maximum length of a line to parse, exceeding chars will be truncated PathspecList []string } @@ -52,15 +60,23 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO 2^@repo: go-gitea/gitea */ var results []*GrepResult - cmd := NewCommand("grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name") + cmd := NewCommand("grep", "--null", "--break", "--heading", "--line-number", "--full-name") cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber)) - if opts.IsFuzzy { + if opts.GrepMode == GrepModeExact { + cmd.AddArguments("--fixed-strings") + cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) + } else if opts.GrepMode == GrepModeRegexp { + cmd.AddArguments("--perl-regexp") + cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) + } else /* words */ { words := strings.Fields(search) - for _, word := range words { + cmd.AddArguments("--fixed-strings", "--ignore-case") + for i, word := range words { cmd.AddOptionValues("-e", strings.TrimLeft(word, "-")) + if i < len(words)-1 { + cmd.AddOptionValues("--and") + } } - } else { - cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) } cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD")) cmd.AddDashesAndList(opts.PathspecList...) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index fdb7866145..52a934d4ff 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/indexer" path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" @@ -136,6 +137,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new bleve local indexer func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping) @@ -267,19 +272,18 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int pathQuery.FieldVal = "Filename" pathQuery.SetBoost(10) - keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword) - if isPhrase { - q := bleve.NewMatchPhraseQuery(keywordAsPhrase) + if opts.SearchMode == indexer.SearchModeExact { + q := bleve.NewMatchPhraseQuery(opts.Keyword) q.FieldVal = "Content" - if opts.IsKeywordFuzzy { - q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase) - } contentQuery = q - } else { + } else /* words */ { q := bleve.NewMatchQuery(opts.Keyword) q.FieldVal = "Content" - if opts.IsKeywordFuzzy { + if opts.SearchMode == indexer.SearchModeFuzzy { + // this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case. q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) + } else { + q.Operator = query.MatchQueryOperatorAnd } contentQuery = q } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 9abece921e..354a8334fb 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -16,6 +16,7 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" @@ -24,7 +25,6 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" - "code.gitea.io/gitea/modules/util" "github.com/go-enry/go-enry/v2" "github.com/olivere/elastic/v7" @@ -46,6 +46,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new elasticsearch indexer func NewIndexer(url, indexerName string) *Indexer { inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) @@ -361,15 +365,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan // Search searches for codes and language stats by given conditions. func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { var contentQuery elastic.Query - keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword) - if isPhrase { - contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase) - } else { - // TODO: this is the old logic, but not really using "fuzziness" - // * IsKeywordFuzzy=true: "best_fields" - // * IsKeywordFuzzy=false: "phrase_prefix" - contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword). - Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix)) + if opts.SearchMode == indexer.SearchModeExact { + contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword) + } else /* words */ { + contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and") } kwQuery := elastic.NewBoolQuery().Should( contentQuery, diff --git a/modules/indexer/code/gitgrep/gitgrep.go b/modules/indexer/code/gitgrep/gitgrep.go index a85c9d02a5..093c189ba3 100644 --- a/modules/indexer/code/gitgrep/gitgrep.go +++ b/modules/indexer/code/gitgrep/gitgrep.go @@ -9,6 +9,7 @@ import ( "strings" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" ) @@ -23,11 +24,16 @@ func indexSettingToGitGrepPathspecList() (list []string) { return list } -func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) { - // TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior +func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) { + grepMode := git.GrepModeWords + if searchMode == indexer.SearchModeExact { + grepMode = git.GrepModeExact + } else if searchMode == indexer.SearchModeRegexp { + grepMode = git.GrepModeRegexp + } res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{ ContextLineNumber: 1, - IsFuzzy: isFuzzy, + GrepMode: grepMode, RefName: ref.String(), PathspecList: indexSettingToGitGrepPathspecList(), }) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 38fd10dae7..6035ddfe95 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" @@ -302,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) { } log.Info("Done (re)populating the repo indexer with existing repositories") } + +func SupportedSearchModes() []indexer.SearchMode { + gi := globalIndexer.Load() + if gi == nil { + return nil + } + return (*gi).SupportedSearchModes() +} diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 43cf8ff254..96516166a0 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -11,6 +11,7 @@ import ( "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/unittest" + indexer_module "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" @@ -39,10 +40,11 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer)) keywords := []struct { - RepoIDs []int64 - Keyword string - Langs int - Results []codeSearchResult + RepoIDs []int64 + Keyword string + Langs int + SearchMode indexer_module.SearchModeType + Results []codeSearchResult }{ // Search for an exact match on the contents of a file // This scenario yields a single result (the file README.md on the repo '1') @@ -183,9 +185,10 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, // Search for matches on the contents of files regardless of case. { - RepoIDs: nil, - Keyword: "dESCRIPTION", - Langs: 1, + RepoIDs: nil, + Keyword: "dESCRIPTION", + Langs: 1, + SearchMode: indexer_module.SearchModeFuzzy, Results: []codeSearchResult{ { Filename: "README.md", @@ -193,7 +196,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for an exact match on the filename within the repo '62' (case insenstive). + // Search for an exact match on the filename within the repo '62' (case-insensitive). // This scenario yields a single result (the file avocado.md on the repo '62') { RepoIDs: []int64{62}, @@ -206,7 +209,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files when the criteria is a expression. + // Search for matches on the contents of files when the criteria are an expression. { RepoIDs: []int64{62}, Keyword: "console.log", @@ -218,7 +221,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files when the criteria is part of a expression. + // Search for matches on the contents of files when the criteria are parts of an expression. { RepoIDs: []int64{62}, Keyword: "log", @@ -235,16 +238,16 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{ - RepoIDs: kw.RepoIDs, - Keyword: kw.Keyword, + RepoIDs: kw.RepoIDs, + Keyword: kw.Keyword, + SearchMode: kw.SearchMode, Paginator: &db.ListOptions{ Page: 1, PageSize: 10, }, - IsKeywordFuzzy: true, }) - assert.NoError(t, err) - assert.Len(t, langs, kw.Langs) + require.NoError(t, err) + require.Len(t, langs, kw.Langs) hits := make([]codeSearchResult, 0, len(res)) @@ -289,7 +292,7 @@ func TestBleveIndexAndSearch(t *testing.T) { _, err := idx.Init(t.Context()) require.NoError(t, err) - testIndexer("beleve", t, idx) + testIndexer("bleve", t, idx) } func TestESIndexAndSearch(t *testing.T) { diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index c259fcd26e..6c9a8af635 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -9,6 +9,7 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -18,6 +19,7 @@ type Indexer interface { Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) + SupportedSearchModes() []indexer.SearchMode } type SearchOptions struct { @@ -25,7 +27,7 @@ type SearchOptions struct { Keyword string Language string - IsKeywordFuzzy bool + SearchMode indexer.SearchModeType db.Paginator } @@ -41,6 +43,10 @@ type dummyIndexer struct { internal.Indexer } +func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode { + return nil +} + func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error { return fmt.Errorf("indexer is not ready") } diff --git a/modules/indexer/code/internal/util.go b/modules/indexer/code/internal/util.go index 46e631166d..fa958be473 100644 --- a/modules/indexer/code/internal/util.go +++ b/modules/indexer/code/internal/util.go @@ -10,9 +10,7 @@ import ( "code.gitea.io/gitea/modules/log" ) -const ( - filenameMatchNumberOfLines = 7 // Copied from github search -) +const filenameMatchNumberOfLines = 7 // Copied from GitHub search func FilenameIndexerID(repoID int64, filename string) string { return internal.Base36(repoID) + "_" + filename @@ -48,11 +46,3 @@ func FilenameMatchIndexPos(content string) (int, int) { } return 0, len(content) } - -func ParseKeywordAsPhrase(keyword string) (string, bool) { - if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 { - // only remove the prefix and suffix quotes, no need to decode the content at the moment - return keyword[1 : len(keyword)-1], true - } - return "", false -} diff --git a/modules/indexer/code/internal/util_test.go b/modules/indexer/code/internal/util_test.go deleted file mode 100644 index 457936296b..0000000000 --- a/modules/indexer/code/internal/util_test.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2025 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package internal - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestParseKeywordAsPhrase(t *testing.T) { - cases := []struct { - keyword string - phrase string - isPhrase bool - }{ - {``, "", false}, - {`a`, "", false}, - {`"`, "", false}, - {`"a`, "", false}, - {`"a"`, "a", true}, - {`""\"""`, `"\""`, true}, - } - for _, c := range cases { - phrase, isPhrase := ParseKeywordAsPhrase(c.keyword) - assert.Equal(t, c.phrase, phrase, "keyword=%q", c.keyword) - assert.Equal(t, c.isPhrase, isPhrase, "keyword=%q", c.keyword) - } -} diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 74c957dde6..e37aff8e59 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res } // PerformSearch perform a search on a repository -// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) { if opts == nil || len(opts.Keyword) == 0 { return 0, nil, nil, nil diff --git a/modules/indexer/indexer.go b/modules/indexer/indexer.go new file mode 100644 index 0000000000..1e0f81de89 --- /dev/null +++ b/modules/indexer/indexer.go @@ -0,0 +1,54 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package indexer + +type SearchModeType string + +const ( + SearchModeExact SearchModeType = "exact" + SearchModeWords SearchModeType = "words" + SearchModeFuzzy SearchModeType = "fuzzy" + SearchModeRegexp SearchModeType = "regexp" +) + +type SearchMode struct { + ModeValue SearchModeType + TooltipTrKey string + TitleTrKey string +} + +func SearchModesExactWords() []SearchMode { + return []SearchMode{ + { + ModeValue: SearchModeExact, + TooltipTrKey: "search.exact_tooltip", + TitleTrKey: "search.exact", + }, + { + ModeValue: SearchModeWords, + TooltipTrKey: "search.words_tooltip", + TitleTrKey: "search.words", + }, + } +} + +func SearchModesExactWordsFuzzy() []SearchMode { + return append(SearchModesExactWords(), []SearchMode{ + { + ModeValue: SearchModeFuzzy, + TooltipTrKey: "search.fuzzy_tooltip", + TitleTrKey: "search.fuzzy", + }, + }...) +} + +func GitGrepSupportedSearchModes() []SearchMode { + return append(SearchModesExactWords(), []SearchMode{ + { + ModeValue: SearchModeRegexp, + TooltipTrKey: "search.regexp_tooltip", + TitleTrKey: "search.regexp", + }, + }...) +} diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 1b18ca1a77..8895ae2c64 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -28,6 +28,16 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query return q } +// MatchAndQuery generates a match query for the given phrase, field and analyzer +func MatchAndQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchQuery { + q := bleve.NewMatchQuery(matchPhrase) + q.FieldVal = field + q.Analyzer = analyzer + q.Fuzziness = fuzziness + q.Operator = query.MatchQueryOperatorAnd + return q +} + // BoolFieldQuery generates a bool field query for the given value and field func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery { q := bleve.NewBoolFieldQuery(value) diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index bf51bd6c14..e778df86c1 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -6,6 +6,7 @@ package bleve import ( "context" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/indexer/issues/internal" @@ -120,6 +121,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWordsFuzzy() +} + // NewIndexer creates a new bleve local indexer func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping) @@ -157,16 +162,23 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { - fuzziness := 0 - if options.IsFuzzyKeyword { - fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) + if options.SearchMode == indexer.SearchModeWords || options.SearchMode == indexer.SearchModeFuzzy { + fuzziness := 0 + if options.SearchMode == indexer.SearchModeFuzzy { + fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) + } + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchAndQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchAndQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchAndQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), + }...)) + } else /* exact */ { + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0), + }...)) } - - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), - }...)) } if len(options.RepoIDs) > 0 || options.AllPublic { diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go index 6c9cfcf670..493f6dd0b0 100644 --- a/modules/indexer/issues/db/db.go +++ b/modules/indexer/issues/db/db.go @@ -5,9 +5,11 @@ package db import ( "context" + "strings" "code.gitea.io/gitea/models/db" issue_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_db "code.gitea.io/gitea/modules/indexer/internal/db" "code.gitea.io/gitea/modules/indexer/issues/internal" @@ -22,6 +24,10 @@ type Indexer struct { indexer_internal.Indexer } +func (i *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + func NewIndexer() *Indexer { return &Indexer{ Indexer: &inner_db.Indexer{}, @@ -38,6 +44,26 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error { return nil } +func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond { + if mode == indexer.SearchModeExact { + return db.BuildCaseInsensitiveLike("issue.name", keyword) + } + + // match words + cond := builder.NewCond() + fields := strings.Fields(keyword) + if len(fields) == 0 { + return builder.Expr("1=1") + } + for _, field := range fields { + if field == "" { + continue + } + cond = cond.And(db.BuildCaseInsensitiveLike(colName, field)) + } + return cond +} + // Search searches for issues func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { // FIXME: I tried to avoid importing models here, but it seems to be impossible. @@ -60,14 +86,14 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( subQuery := builder.Select("id").From("issue").Where(repoCond) cond = builder.Or( - db.BuildCaseInsensitiveLike("issue.name", options.Keyword), - db.BuildCaseInsensitiveLike("issue.content", options.Keyword), + buildMatchQuery(options.SearchMode, "issue.name", options.Keyword), + buildMatchQuery(options.SearchMode, "issue.content", options.Keyword), builder.In("issue.id", builder.Select("issue_id"). From("comment"). Where(builder.And( builder.Eq{"type": issue_model.CommentTypeComment}, builder.In("issue_id", subQuery), - db.BuildCaseInsensitiveLike("content", options.Keyword), + buildMatchQuery(options.SearchMode, "content", options.Keyword), )), ), ) diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 4c293f3f2a..2e4e172540 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -10,6 +10,7 @@ import ( "strings" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/indexer/issues/internal" @@ -33,6 +34,11 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + // TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO" + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new elasticsearch indexer func NewIndexer(url, indexerName string) *Indexer { inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) @@ -146,12 +152,11 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query := elastic.NewBoolQuery() if options.Keyword != "" { - searchType := esMultiMatchTypePhrasePrefix - if options.IsFuzzyKeyword { - searchType = esMultiMatchTypeBestFields + if options.SearchMode == indexer.SearchModeExact { + query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix)) + } else /* words */ { + query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and")) } - - query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType)) } if len(options.RepoIDs) > 0 { diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index c82dc0867e..a712efbc8b 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -14,6 +14,7 @@ import ( db_model "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/issues/bleve" "code.gitea.io/gitea/modules/indexer/issues/db" "code.gitea.io/gitea/modules/indexer/issues/elasticsearch" @@ -313,3 +314,11 @@ func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) { _, total, err := SearchIssues(ctx, opts) return total, err } + +func SupportedSearchModes() []indexer.SearchMode { + gi := globalIndexer.Load() + if gi == nil { + return nil + } + return (*gi).SupportedSearchModes() +} diff --git a/modules/indexer/issues/internal/indexer.go b/modules/indexer/issues/internal/indexer.go index 95740bc598..415f442d0c 100644 --- a/modules/indexer/issues/internal/indexer.go +++ b/modules/indexer/issues/internal/indexer.go @@ -7,6 +7,7 @@ import ( "context" "fmt" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -16,6 +17,7 @@ type Indexer interface { Index(ctx context.Context, issue ...*IndexerData) error Delete(ctx context.Context, ids ...int64) error Search(ctx context.Context, options *SearchOptions) (*SearchResult, error) + SupportedSearchModes() []indexer.SearchMode } // NewDummyIndexer returns a dummy indexer @@ -29,6 +31,10 @@ type dummyIndexer struct { internal.Indexer } +func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode { + return nil +} + func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error { return fmt.Errorf("indexer is not ready") } diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go index 09dcbf4804..976e2d696b 100644 --- a/modules/indexer/issues/internal/model.go +++ b/modules/indexer/issues/internal/model.go @@ -7,6 +7,7 @@ import ( "strconv" "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/timeutil" ) @@ -77,7 +78,7 @@ type SearchResult struct { type SearchOptions struct { Keyword string // keyword to search - IsFuzzyKeyword bool // if false the levenshtein distance is 0 + SearchMode indexer.SearchModeType RepoIDs []int64 // repository IDs which the issues belong to AllPublic bool // if include all public repositories diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 1066e96272..a1746f5954 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -10,6 +10,7 @@ import ( "strconv" "strings" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" "code.gitea.io/gitea/modules/indexer/issues/internal" @@ -35,6 +36,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new meilisearch indexer func NewIndexer(url, apiKey, indexerName string) *Indexer { settings := &meilisearch.Settings{ @@ -230,9 +235,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( limit = 1 } - keyword := options.Keyword - if !options.IsFuzzyKeyword { - // to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s) + keyword := options.Keyword // default to match "words" + if options.SearchMode == indexer.SearchModeExact { // https://www.meilisearch.com/docs/reference/api/search#phrase-search keyword = doubleQuoteKeyword(keyword) } diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 2f13c1a19c..036f8e5389 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -169,6 +169,10 @@ search = Search... type_tooltip = Search type fuzzy = Fuzzy fuzzy_tooltip = Include results that also match the search term closely +words = Words +words_tooltip = Include only results that match the search term words +regexp = Regexp +regexp_tooltip = Include only results that match the regexp search term exact = Exact exact_tooltip = Include only results that match the exact search term repo_kind = Search repos... diff --git a/routers/common/codesearch.go b/routers/common/codesearch.go index 7cd01068b0..9bec448d7e 100644 --- a/routers/common/codesearch.go +++ b/routers/common/codesearch.go @@ -4,36 +4,30 @@ package common import ( + "code.gitea.io/gitea/modules/indexer" + code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/services/context" ) func PrepareCodeSearch(ctx *context.Context) (ret struct { - Keyword string - Language string - IsFuzzy bool + Keyword string + Language string + SearchMode indexer.SearchModeType }, ) { ret.Language = ctx.FormTrim("l") ret.Keyword = ctx.FormTrim("q") + ret.SearchMode = indexer.SearchModeType(ctx.FormTrim("search_mode")) - fuzzyDefault := setting.Indexer.RepoIndexerEnabled - fuzzyAllow := true - if setting.Indexer.RepoType == "bleve" && setting.Indexer.TypeBleveMaxFuzzniess == 0 { - fuzzyDefault = false - fuzzyAllow = false - } - isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(fuzzyDefault) - if isFuzzy && !fuzzyAllow { - ctx.Flash.Info("Fuzzy search is disabled by default due to performance reasons", true) - isFuzzy = false - } - - ctx.Data["IsBleveFuzzyDisabled"] = true ctx.Data["Keyword"] = ret.Keyword ctx.Data["Language"] = ret.Language - ctx.Data["IsFuzzy"] = isFuzzy - + ctx.Data["SelectedSearchMode"] = string(ret.SearchMode) + if setting.Indexer.RepoIndexerEnabled { + ctx.Data["SearchModes"] = code_indexer.SupportedSearchModes() + } else { + ctx.Data["SearchModes"] = indexer.GitGrepSupportedSearchModes() + } ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled return ret } diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index ae5ff3db76..8f6518a4fc 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -72,10 +72,10 @@ func Code(ctx *context.Context) { if (len(repoIDs) > 0) || isAdmin { total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ - RepoIDs: repoIDs, - Keyword: prepareSearch.Keyword, - IsKeywordFuzzy: prepareSearch.IsFuzzy, - Language: prepareSearch.Language, + RepoIDs: repoIDs, + Keyword: prepareSearch.Keyword, + SearchMode: prepareSearch.SearchMode, + Language: prepareSearch.Language, Paginator: &db.ListOptions{ Page: page, PageSize: setting.UI.RepoSearchPagingNum, diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index ea40e64bbb..12216fc620 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -38,10 +38,10 @@ func Search(ctx *context.Context) { if setting.Indexer.RepoIndexerEnabled { var err error total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ - RepoIDs: []int64{ctx.Repo.Repository.ID}, - Keyword: prepareSearch.Keyword, - IsKeywordFuzzy: prepareSearch.IsFuzzy, - Language: prepareSearch.Language, + RepoIDs: []int64{ctx.Repo.Repository.ID}, + Keyword: prepareSearch.Keyword, + SearchMode: prepareSearch.SearchMode, + Language: prepareSearch.Language, Paginator: &db.ListOptions{ Page: page, PageSize: setting.UI.RepoSearchPagingNum, @@ -60,7 +60,7 @@ func Search(ctx *context.Context) { var err error // ref should be default branch or the first existing branch searchRef := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch) - searchResults, total, err = gitgrep.PerformSearch(ctx, page, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, searchRef, prepareSearch.Keyword, prepareSearch.IsFuzzy) + searchResults, total, err = gitgrep.PerformSearch(ctx, page, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, searchRef, prepareSearch.Keyword, prepareSearch.SearchMode) if err != nil { ctx.ServerError("gitgrep.PerformSearch", err) return diff --git a/routers/web/user/code.go b/routers/web/user/code.go index 665ce1a6a6..f9aa58b877 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -68,10 +68,10 @@ func CodeSearch(ctx *context.Context) { if len(repoIDs) > 0 { total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ - RepoIDs: repoIDs, - Keyword: prepareSearch.Keyword, - IsKeywordFuzzy: prepareSearch.IsFuzzy, - Language: prepareSearch.Language, + RepoIDs: repoIDs, + Keyword: prepareSearch.Keyword, + SearchMode: prepareSearch.SearchMode, + Language: prepareSearch.Language, Paginator: &db.ListOptions{ Page: page, PageSize: setting.UI.RepoSearchPagingNum, diff --git a/routers/web/user/home.go b/routers/web/user/home.go index dc78950cf2..8e030a62a2 100644 --- a/routers/web/user/home.go +++ b/routers/web/user/home.go @@ -26,6 +26,7 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/container" + "code.gitea.io/gitea/modules/indexer" issue_indexer "code.gitea.io/gitea/modules/indexer/issues" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup/markdown" @@ -447,7 +448,7 @@ func buildIssueOverview(ctx *context.Context, unitType unit.Type) { ctx.Data["FilterAssigneeUsername"] = assigneeUsername opts.AssigneeID = user.GetFilterUserIDByName(ctx, assigneeUsername) - isFuzzy := ctx.FormBool("fuzzy") + searchMode := ctx.FormString("search_mode") // Search all repositories which // @@ -549,7 +550,9 @@ func buildIssueOverview(ctx *context.Context, unitType unit.Type) { var issues issues_model.IssueList { issueIDs, _, err := issue_indexer.SearchIssues(ctx, issue_indexer.ToSearchOptions(keyword, opts).Copy( - func(o *issue_indexer.SearchOptions) { o.IsFuzzyKeyword = isFuzzy }, + func(o *issue_indexer.SearchOptions) { + o.SearchMode = indexer.SearchModeType(searchMode) + }, )) if err != nil { ctx.ServerError("issueIDsFromSearch", err) @@ -578,7 +581,7 @@ func buildIssueOverview(ctx *context.Context, unitType unit.Type) { // ------------------------------- issueStats, err := getUserIssueStats(ctx, ctxUser, filterMode, issue_indexer.ToSearchOptions(keyword, opts).Copy( func(o *issue_indexer.SearchOptions) { - o.IsFuzzyKeyword = isFuzzy + o.SearchMode = indexer.SearchModeType(searchMode) }, )) if err != nil { @@ -633,7 +636,8 @@ func buildIssueOverview(ctx *context.Context, unitType unit.Type) { ctx.Data["ViewType"] = viewType ctx.Data["SortType"] = sortType ctx.Data["IsShowClosed"] = isShowClosed - ctx.Data["IsFuzzy"] = isFuzzy + ctx.Data["SearchModes"] = issue_indexer.SupportedSearchModes() + ctx.Data["SelectedSearchMode"] = ctx.FormTrim("search_mode") if isShowClosed { ctx.Data["State"] = "closed" diff --git a/templates/shared/search/code/results.tmpl b/templates/shared/search/code/results.tmpl index 6877efe486..8a08f5c25c 100644 --- a/templates/shared/search/code/results.tmpl +++ b/templates/shared/search/code/results.tmpl @@ -1,7 +1,7 @@