From e3021fae79f31907a3b5be512397ede547b74c02 Mon Sep 17 00:00:00 2001 From: Jimmy Praet Date: Tue, 25 Feb 2025 21:20:54 +0100 Subject: [PATCH] Use MatchPhraseQuery for bleve code search (#33628) Fix regression from #32210 which unintentionally changed the search mode for bleve from MaatchPhraseQuery to MatchQuery. On the main branch, meanwhile with #33590 a "literal code search" mode (by using quotes) was implemented as workaround for this unexpected code search behavior. Maybe that feature needs some redesign as it turns out to have been caused by a regression. But this PR at least already fixes the regression for 1.23.x --- modules/indexer/code/bleve/bleve.go | 2 +- modules/indexer/code/indexer_test.go | 70 ++++++++++++++++------------ 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 772317fa59..981fe75c3d 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -266,7 +266,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int pathQuery.FieldVal = "Filename" pathQuery.SetBoost(10) - contentQuery := bleve.NewMatchQuery(opts.Keyword) + contentQuery := bleve.NewMatchPhraseQuery(opts.Keyword) contentQuery.FieldVal = "Content" if opts.IsKeywordFuzzy { diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index f358bbe785..48afdd1a71 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -165,35 +165,6 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files within the repo '62'. - // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one) - { - RepoIDs: []int64{62}, - Keyword: "This is not cheese", - Langs: 1, - Results: []codeSearchResult{ - { - Filename: "potato/ham.md", - Content: "This is not cheese", - }, - { - Filename: "ham.md", - Content: "This is also not cheese", - }, - }, - }, - // Search for matches on the contents of files regardless of case. - { - RepoIDs: nil, - Keyword: "dESCRIPTION", - Langs: 1, - Results: []codeSearchResult{ - { - Filename: "README.md", - Content: "# repo1\n\nDescription for repo1", - }, - }, - }, // Search for an exact match on the filename within the repo '62' (case insenstive). // This scenario yields a single result (the file avocado.md on the repo '62') { @@ -233,6 +204,47 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, } + if name == "elastic_search" { + // Additional scenarios for elastic_search only + additional := []struct { + RepoIDs []int64 + Keyword string + Langs int + Results []codeSearchResult + }{ + // Search for matches on the contents of files within the repo '62'. + // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one) + { + RepoIDs: []int64{62}, + Keyword: "This is not cheese", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "potato/ham.md", + Content: "This is not cheese", + }, + { + Filename: "ham.md", + Content: "This is also not cheese", + }, + }, + }, + // Search for matches on the contents of files regardless of case. + { + RepoIDs: nil, + Keyword: "dESCRIPTION", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "README.md", + Content: "# repo1\n\nDescription for repo1", + }, + }, + }, + } + keywords = append(keywords, additional...) + } + for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{