Full-file syntax highlighting for diff pages (#33766)

Fix #33358, fix #21970

This adds a step in the `GitDiffForRender` that does syntax highlighting for the
entire file and then only references lines from that syntax highlighted
code. This allows things like multi-line comments to be syntax
highlighted correctly.

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
pull/33830/head
Dustin Firebaugh 4 days ago committed by GitHub
parent 6f13331754
commit 3f1f808b9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -7,6 +7,7 @@ package git
import (
"bytes"
"encoding/base64"
"errors"
"io"
"code.gitea.io/gitea/modules/typesniffer"
@ -34,8 +35,9 @@ func (b *Blob) GetBlobContent(limit int64) (string, error) {
return string(buf), err
}
// GetBlobLineCount gets line count of the blob
func (b *Blob) GetBlobLineCount() (int, error) {
// GetBlobLineCount gets line count of the blob.
// It will also try to write the content to w if it's not nil, then we could pre-fetch the content without reading it again.
func (b *Blob) GetBlobLineCount(w io.Writer) (int, error) {
reader, err := b.DataAsync()
if err != nil {
return 0, err
@ -44,20 +46,20 @@ func (b *Blob) GetBlobLineCount() (int, error) {
buf := make([]byte, 32*1024)
count := 1
lineSep := []byte{'\n'}
c, err := reader.Read(buf)
if c == 0 && err == io.EOF {
return 0, nil
}
for {
c, err := reader.Read(buf)
if w != nil {
if _, err := w.Write(buf[:c]); err != nil {
return count, err
}
}
count += bytes.Count(buf[:c], lineSep)
switch {
case err == io.EOF:
case errors.Is(err, io.EOF):
return count, nil
case err != nil:
return count, err
}
c, err = reader.Read(buf)
}
}

@ -11,6 +11,7 @@ import (
gohtml "html"
"html/template"
"io"
"path"
"path/filepath"
"strings"
"sync"
@ -83,7 +84,7 @@ func Code(fileName, language, code string) (output template.HTML, lexerName stri
}
if lexer == nil {
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
if val, ok := highlightMapping[path.Ext(fileName)]; ok {
// use mapped value to find lexer
lexer = lexers.Get(val)
}

@ -1591,8 +1591,7 @@ func GetPullRequestFiles(ctx *context.APIContext) {
maxLines := setting.Git.MaxGitDiffLines
// FIXME: If there are too many files in the repo, may cause some unpredictable issues.
// FIXME: it doesn't need to call "GetDiff" to do various parsing and highlighting
diff, err := gitdiff.GetDiff(ctx, baseGitRepo,
diff, err := gitdiff.GetDiffForAPI(ctx, baseGitRepo,
&gitdiff.DiffOptions{
BeforeCommitID: startCommitID,
AfterCommitID: endCommitID,

@ -97,7 +97,7 @@ func RefBlame(ctx *context.Context) {
return
}
ctx.Data["NumLines"], err = blob.GetBlobLineCount()
ctx.Data["NumLines"], err = blob.GetBlobLineCount(nil)
if err != nil {
ctx.NotFound(err)
return

@ -314,7 +314,7 @@ func Diff(ctx *context.Context) {
maxLines, maxFiles = -1, -1
}
diff, err := gitdiff.GetDiff(ctx, gitRepo, &gitdiff.DiffOptions{
diff, err := gitdiff.GetDiffForRender(ctx, gitRepo, &gitdiff.DiffOptions{
AfterCommitID: commitID,
SkipTo: ctx.FormString("skip-to"),
MaxLines: maxLines,

@ -614,7 +614,7 @@ func PrepareCompareDiff(
fileOnly := ctx.FormBool("file-only")
diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo,
diff, err := gitdiff.GetDiffForRender(ctx, ci.HeadGitRepo,
&gitdiff.DiffOptions{
BeforeCommitID: beforeCommitID,
AfterCommitID: headCommitID,

@ -749,7 +749,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi
diffOptions.BeforeCommitID = startCommitID
}
diff, err := gitdiff.GetDiff(ctx, gitRepo, diffOptions, files...)
diff, err := gitdiff.GetDiffForRender(ctx, gitRepo, diffOptions, files...)
if err != nil {
ctx.ServerError("GetDiff", err)
return

@ -31,6 +31,7 @@ import (
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
"github.com/sergi/go-diff/diffmatchpatch"
stdcharset "golang.org/x/net/html/charset"
@ -75,12 +76,12 @@ const (
// DiffLine represents a line difference in a DiffSection.
type DiffLine struct {
LeftIdx int
RightIdx int
Match int
LeftIdx int // line number, 1-based
RightIdx int // line number, 1-based
Match int // line number, 1-based
Type DiffLineType
Content string
Comments issues_model.CommentList
Comments issues_model.CommentList // related PR code comments
SectionInfo *DiffLineSectionInfo
}
@ -95,9 +96,18 @@ type DiffLineSectionInfo struct {
RightHunkSize int
}
// DiffHTMLOperation is the HTML version of diffmatchpatch.Diff
type DiffHTMLOperation struct {
Type diffmatchpatch.Operation
HTML template.HTML
}
// BlobExcerptChunkSize represent max lines of excerpt
const BlobExcerptChunkSize = 20
// MaxDiffHighlightEntireFileSize is the maximum file size that will be highlighted with "entire file diff"
const MaxDiffHighlightEntireFileSize = 1 * 1024 * 1024
// GetType returns the type of DiffLine.
func (d *DiffLine) GetType() int {
return int(d.Type)
@ -112,8 +122,9 @@ func (d *DiffLine) GetHTMLDiffLineType() string {
return "del"
case DiffLineSection:
return "tag"
default:
return "same"
}
return "same"
}
// CanComment returns whether a line can get commented
@ -196,38 +207,6 @@ type DiffSection struct {
Lines []*DiffLine
}
var (
addedCodePrefix = []byte(`<span class="added-code">`)
removedCodePrefix = []byte(`<span class="removed-code">`)
codeTagSuffix = []byte(`</span>`)
)
func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
buf := bytes.NewBuffer(nil)
// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
for _, tag := range lineWrapperTags {
buf.WriteString(tag)
}
for _, diff := range diffs {
switch {
case diff.Type == diffmatchpatch.DiffEqual:
buf.WriteString(diff.Text)
case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
buf.Write(addedCodePrefix)
buf.WriteString(diff.Text)
buf.Write(codeTagSuffix)
case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
buf.Write(removedCodePrefix)
buf.WriteString(diff.Text)
buf.Write(codeTagSuffix)
}
}
for range lineWrapperTags {
buf.WriteString("</span>")
}
return buf.String()
}
// GetLine gets a specific line by type (add or del) and file line number
func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
var (
@ -271,10 +250,10 @@ LOOP:
return nil
}
var diffMatchPatch = diffmatchpatch.New()
func init() {
diffMatchPatch.DiffEditCost = 100
func defaultDiffMatchPatch() *diffmatchpatch.DiffMatchPatch {
dmp := diffmatchpatch.New()
dmp.DiffEditCost = 100
return dmp
}
// DiffInline is a struct that has a content and escape status
@ -283,97 +262,114 @@ type DiffInline struct {
Content template.HTML
}
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden Unicode characters escaped
func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
status, content := charset.EscapeControlHTML(s, locale)
return DiffInline{EscapeStatus: status, Content: content}
}
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
highlighted, _ := highlight.Code(fileName, language, code)
status, content := charset.EscapeControlHTML(highlighted, locale)
return DiffInline{EscapeStatus: status, Content: content}
}
// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *DiffLine, fileLanguage string, highlightLines map[int]template.HTML) template.HTML {
h, ok := highlightLines[lineIdx-1]
if ok {
return h
}
if diffLine.Content == "" {
return ""
}
if setting.Git.DisableDiffHighlight {
return getLineContent(diffLine.Content[1:], locale)
return template.HTML(html.EscapeString(diffLine.Content[1:]))
}
h, _ = highlight.Code(diffSection.Name, fileLanguage, diffLine.Content[1:])
return h
}
var (
compareDiffLine *DiffLine
diff1 string
diff2 string
)
language := ""
func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline {
var fileLanguage string
var highlightedLeftLines, highlightedRightLines map[int]template.HTML
// when a "diff section" is manually prepared by ExcerptBlob, it doesn't have "file" information
if diffSection.file != nil {
language = diffSection.file.Language
fileLanguage = diffSection.file.Language
highlightedLeftLines, highlightedRightLines = diffSection.file.highlightedLeftLines, diffSection.file.highlightedRightLines
}
hcd := newHighlightCodeDiff()
var diff1, diff2, lineHTML template.HTML
if leftLine != nil {
diff1 = diffSection.getLineContentForRender(leftLine.LeftIdx, leftLine, fileLanguage, highlightedLeftLines)
lineHTML = util.Iif(diffLineType == DiffLinePlain, diff1, "")
}
if rightLine != nil {
diff2 = diffSection.getLineContentForRender(rightLine.RightIdx, rightLine, fileLanguage, highlightedRightLines)
lineHTML = util.Iif(diffLineType == DiffLinePlain, diff2, "")
}
if diffLineType != DiffLinePlain {
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
// if the line wrappers are still needed in the future, it can be added back by "diffLineWithHighlightWrapper(hcd.lineWrapperTags. ...)"
lineHTML = hcd.diffLineWithHighlight(diffLineType, diff1, diff2)
}
return DiffInlineWithUnicodeEscape(lineHTML, locale)
}
// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
// try to find equivalent diff line. ignore, otherwise
switch diffLine.Type {
case DiffLineSection:
return getLineContent(diffLine.Content[1:], locale)
case DiffLineAdd:
compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
if compareDiffLine == nil {
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
}
diff1 = compareDiffLine.Content
diff2 = diffLine.Content
compareDiffLine := diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
return diffSection.getDiffLineForRender(DiffLineAdd, compareDiffLine, diffLine, locale)
case DiffLineDel:
compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
if compareDiffLine == nil {
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
}
diff1 = diffLine.Content
diff2 = compareDiffLine.Content
default:
if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
}
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale)
compareDiffLine := diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
return diffSection.getDiffLineForRender(DiffLineDel, diffLine, compareDiffLine, locale)
default: // Plain
// TODO: there was an "if" check: `if diffLine.Content >strings.IndexByte(" +-", diffLine.Content[0]) > -1 { ... } else { ... }`
// no idea why it needs that check, it seems that the "if" should be always true, so try to simplify the code
return diffSection.getDiffLineForRender(DiffLinePlain, nil, diffLine, locale)
}
hcd := newHighlightCodeDiff()
diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale)
}
// DiffFile represents a file diff.
type DiffFile struct {
Name string
NameHash string
OldName string
Index int
Addition, Deletion int
Type DiffFileType
IsCreated bool
IsDeleted bool
IsBin bool
IsLFSFile bool
IsRenamed bool
IsAmbiguous bool
Sections []*DiffSection
IsIncomplete bool
IsIncompleteLineTooLong bool
IsProtected bool
IsGenerated bool
IsVendored bool
// only used internally to parse Ambiguous filenames
isAmbiguous bool
// basic fields (parsed from diff result)
Name string
NameHash string
OldName string
Addition int
Deletion int
Type DiffFileType
Mode string
OldMode string
IsCreated bool
IsDeleted bool
IsBin bool
IsLFSFile bool
IsRenamed bool
IsSubmodule bool
// basic fields but for render purpose only
Sections []*DiffSection
IsIncomplete bool
IsIncompleteLineTooLong bool
// will be filled by the extra loop in GitDiffForRender
Language string
IsGenerated bool
IsVendored bool
SubmoduleDiffInfo *SubmoduleDiffInfo // IsSubmodule==true, then there must be a SubmoduleDiffInfo
// will be filled by route handler
IsProtected bool
// will be filled by SyncUserSpecificDiff
IsViewed bool // User specific
HasChangedSinceLastReview bool // User specific
Language string
Mode string
OldMode string
IsSubmodule bool // if IsSubmodule==true, then there must be a SubmoduleDiffInfo
SubmoduleDiffInfo *SubmoduleDiffInfo
// for render purpose only, will be filled by the extra loop in GitDiffForRender
highlightedLeftLines map[int]template.HTML
highlightedRightLines map[int]template.HTML
}
// GetType returns type of diff file.
@ -381,18 +377,23 @@ func (diffFile *DiffFile) GetType() int {
return int(diffFile.Type)
}
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
func (diffFile *DiffFile) GetTailSection(leftCommit, rightCommit *git.Commit) *DiffSection {
type DiffLimitedContent struct {
LeftContent, RightContent *limitByteWriter
}
// GetTailSectionAndLimitedContent creates a fake DiffLineSection if the last section is not the end of the file
func (diffFile *DiffFile) GetTailSectionAndLimitedContent(leftCommit, rightCommit *git.Commit) (_ *DiffSection, diffLimitedContent DiffLimitedContent) {
if len(diffFile.Sections) == 0 || leftCommit == nil || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
return nil
return nil, diffLimitedContent
}
lastSection := diffFile.Sections[len(diffFile.Sections)-1]
lastLine := lastSection.Lines[len(lastSection.Lines)-1]
leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name)
leftLineCount, leftContent := getCommitFileLineCountAndLimitedContent(leftCommit, diffFile.Name)
rightLineCount, rightContent := getCommitFileLineCountAndLimitedContent(rightCommit, diffFile.Name)
diffLimitedContent = DiffLimitedContent{LeftContent: leftContent, RightContent: rightContent}
if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx {
return nil
return nil, diffLimitedContent
}
tailDiffLine := &DiffLine{
Type: DiffLineSection,
@ -406,7 +407,7 @@ func (diffFile *DiffFile) GetTailSection(leftCommit, rightCommit *git.Commit) *D
},
}
tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}}
return tailSection
return tailSection, diffLimitedContent
}
// GetDiffFileName returns the name of the diff file, or its old name in case it was deleted
@ -438,16 +439,29 @@ func (diffFile *DiffFile) ModeTranslationKey(mode string) string {
}
}
func getCommitFileLineCount(commit *git.Commit, filePath string) int {
type limitByteWriter struct {
buf bytes.Buffer
limit int
}
func (l *limitByteWriter) Write(p []byte) (n int, err error) {
if l.buf.Len()+len(p) > l.limit {
p = p[:l.limit-l.buf.Len()]
}
return l.buf.Write(p)
}
func getCommitFileLineCountAndLimitedContent(commit *git.Commit, filePath string) (lineCount int, limitWriter *limitByteWriter) {
blob, err := commit.GetBlobByPath(filePath)
if err != nil {
return 0
return 0, nil
}
lineCount, err := blob.GetBlobLineCount()
w := &limitByteWriter{limit: MaxDiffHighlightEntireFileSize + 1}
lineCount, err = blob.GetBlobLineCount(w)
if err != nil {
return 0
return 0, nil
}
return lineCount
return lineCount, w
}
// Diff represents a difference between two git trees.
@ -526,13 +540,13 @@ parsingLoop:
}
if maxFiles > -1 && len(diff.Files) >= maxFiles {
lastFile := createDiffFile(diff, line)
lastFile := createDiffFile(line)
diff.End = lastFile.Name
diff.IsIncomplete = true
break parsingLoop
}
curFile = createDiffFile(diff, line)
curFile = createDiffFile(line)
if skipping {
if curFile.Name != skipToFile {
line, err = skipToNextDiffHead(input)
@ -615,28 +629,28 @@ parsingLoop:
case strings.HasPrefix(line, "rename from "):
curFile.IsRenamed = true
curFile.Type = DiffFileRename
if curFile.IsAmbiguous {
if curFile.isAmbiguous {
curFile.OldName = prepareValue(line, "rename from ")
}
case strings.HasPrefix(line, "rename to "):
curFile.IsRenamed = true
curFile.Type = DiffFileRename
if curFile.IsAmbiguous {
if curFile.isAmbiguous {
curFile.Name = prepareValue(line, "rename to ")
curFile.IsAmbiguous = false
curFile.isAmbiguous = false
}
case strings.HasPrefix(line, "copy from "):
curFile.IsRenamed = true
curFile.Type = DiffFileCopy
if curFile.IsAmbiguous {
if curFile.isAmbiguous {
curFile.OldName = prepareValue(line, "copy from ")
}
case strings.HasPrefix(line, "copy to "):
curFile.IsRenamed = true
curFile.Type = DiffFileCopy
if curFile.IsAmbiguous {
if curFile.isAmbiguous {
curFile.Name = prepareValue(line, "copy to ")
curFile.IsAmbiguous = false
curFile.isAmbiguous = false
}
case strings.HasPrefix(line, "new file"):
curFile.Type = DiffFileAdd
@ -663,7 +677,7 @@ parsingLoop:
curFile.IsBin = true
case strings.HasPrefix(line, "--- "):
// Handle ambiguous filenames
if curFile.IsAmbiguous {
if curFile.isAmbiguous {
// The shortest string that can end up here is:
// "--- a\t\n" without the quotes.
// This line has a len() of 7 but doesn't contain a oldName.
@ -681,7 +695,7 @@ parsingLoop:
// Otherwise do nothing with this line
case strings.HasPrefix(line, "+++ "):
// Handle ambiguous filenames
if curFile.IsAmbiguous {
if curFile.isAmbiguous {
if len(line) > 6 && line[4] == 'b' {
curFile.Name = line[6 : len(line)-1]
if line[len(line)-2] == '\t' {
@ -693,7 +707,7 @@ parsingLoop:
} else {
curFile.Name = curFile.OldName
}
curFile.IsAmbiguous = false
curFile.isAmbiguous = false
}
// Otherwise do nothing with this line, but now switch to parsing hunks
lineBytes, isFragment, err := parseHunks(ctx, curFile, maxLines, maxLineCharacters, input)
@ -1006,7 +1020,7 @@ func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharact
}
}
func createDiffFile(diff *Diff, line string) *DiffFile {
func createDiffFile(line string) *DiffFile {
// The a/ and b/ filenames are the same unless rename/copy is involved.
// Especially, even for a creation or a deletion, /dev/null is not used
// in place of the a/ or b/ filenames.
@ -1017,12 +1031,11 @@ func createDiffFile(diff *Diff, line string) *DiffFile {
//
// Path names are quoted if necessary.
//
// This means that you should always be able to determine the file name even when there
// This means that you should always be able to determine the file name even when
// there is potential ambiguity...
//
// but we can be simpler with our heuristics by just forcing git to prefix things nicely
curFile := &DiffFile{
Index: len(diff.Files) + 1,
Type: DiffFileChange,
Sections: make([]*DiffSection, 0, 10),
}
@ -1034,7 +1047,7 @@ func createDiffFile(diff *Diff, line string) *DiffFile {
curFile.OldName, oldNameAmbiguity = readFileName(rd)
curFile.Name, newNameAmbiguity = readFileName(rd)
if oldNameAmbiguity && newNameAmbiguity {
curFile.IsAmbiguous = true
curFile.isAmbiguous = true
// OK we should bet that the oldName and the newName are the same if they can be made to be same
// So we need to start again ...
if (len(line)-len(cmdDiffHead)-1)%2 == 0 {
@ -1121,20 +1134,21 @@ func guessBeforeCommitForDiff(gitRepo *git.Repository, beforeCommitID string, af
return actualBeforeCommit, actualBeforeCommitID, nil
}
// GetDiff builds a Diff between two commits of a repository.
// getDiffBasic builds a Diff between two commits of a repository.
// Passing the empty string as beforeCommitID returns a diff from the parent commit.
// The whitespaceBehavior is either an empty string or a git flag
func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
// Returned beforeCommit could be nil if the afterCommit doesn't have parent commit
func getDiffBasic(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (_ *Diff, beforeCommit, afterCommit *git.Commit, err error) {
repoPath := gitRepo.Path
afterCommit, err := gitRepo.GetCommit(opts.AfterCommitID)
afterCommit, err = gitRepo.GetCommit(opts.AfterCommitID)
if err != nil {
return nil, err
return nil, nil, nil, err
}
actualBeforeCommit, actualBeforeCommitID, err := guessBeforeCommitForDiff(gitRepo, opts.BeforeCommitID, afterCommit)
beforeCommit, beforeCommitID, err := guessBeforeCommitForDiff(gitRepo, opts.BeforeCommitID, afterCommit)
if err != nil {
return nil, err
return nil, nil, nil, err
}
cmdDiff := git.NewCommand().
@ -1150,7 +1164,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
parsePatchSkipToFile = ""
}
cmdDiff.AddDynamicArguments(actualBeforeCommitID.String(), opts.AfterCommitID)
cmdDiff.AddDynamicArguments(beforeCommitID.String(), opts.AfterCommitID)
cmdDiff.AddDashesAndList(files...)
cmdCtx, cmdCancel := context.WithCancel(ctx)
@ -1180,12 +1194,25 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
// Ensure the git process is killed if it didn't exit already
cmdCancel()
if err != nil {
return nil, fmt.Errorf("unable to ParsePatch: %w", err)
return nil, nil, nil, fmt.Errorf("unable to ParsePatch: %w", err)
}
diff.Start = opts.SkipTo
return diff, beforeCommit, afterCommit, nil
}
checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID)
defer deferable()
func GetDiffForAPI(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
diff, _, _, err := getDiffBasic(ctx, gitRepo, opts, files...)
return diff, err
}
func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
diff, beforeCommit, afterCommit, err := getDiffBasic(ctx, gitRepo, opts, files...)
if err != nil {
return nil, err
}
checker, deferrable := gitRepo.CheckAttributeReader(opts.AfterCommitID)
defer deferrable()
for _, diffFile := range diff.Files {
isVendored := optional.None[bool]()
@ -1205,7 +1232,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
// Populate Submodule URLs
if diffFile.SubmoduleDiffInfo != nil {
diffFile.SubmoduleDiffInfo.PopulateURL(diffFile, actualBeforeCommit, afterCommit)
diffFile.SubmoduleDiffInfo.PopulateURL(diffFile, beforeCommit, afterCommit)
}
if !isVendored.Has() {
@ -1217,15 +1244,46 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
isGenerated = optional.Some(analyze.IsGenerated(diffFile.Name))
}
diffFile.IsGenerated = isGenerated.Value()
tailSection := diffFile.GetTailSection(actualBeforeCommit, afterCommit)
tailSection, limitedContent := diffFile.GetTailSectionAndLimitedContent(beforeCommit, afterCommit)
if tailSection != nil {
diffFile.Sections = append(diffFile.Sections, tailSection)
}
if !setting.Git.DisableDiffHighlight {
if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String())
}
if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String())
}
}
}
return diff, nil
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML {
highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content)
splitLines := strings.Split(string(highlightedNewContent), "\n")
lines := make(map[int]template.HTML, len(splitLines))
// only save the highlighted lines we need, but not the whole file, to save memory
for _, sec := range diffFile.Sections {
for _, ln := range sec.Lines {
lineIdx := ln.LeftIdx
if !isLeft {
lineIdx = ln.RightIdx
}
if lineIdx >= 1 {
idx := lineIdx - 1
if idx < len(splitLines) {
lines[idx] = template.HTML(splitLines[idx])
}
}
}
}
return lines
}
type DiffShortStat struct {
NumFiles, TotalAddition, TotalDeletion int
}

@ -17,27 +17,10 @@ import (
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/setting"
dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestDiffToHTML(t *testing.T) {
assert.Equal(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML(nil, []dmp.Diff{
{Type: dmp.DiffEqual, Text: "foo "},
{Type: dmp.DiffInsert, Text: "bar"},
{Type: dmp.DiffDelete, Text: " baz"},
{Type: dmp.DiffEqual, Text: " biz"},
}, DiffLineAdd))
assert.Equal(t, "foo <span class=\"removed-code\">bar</span> biz", diffToHTML(nil, []dmp.Diff{
{Type: dmp.DiffEqual, Text: "foo "},
{Type: dmp.DiffDelete, Text: "bar"},
{Type: dmp.DiffInsert, Text: " baz"},
{Type: dmp.DiffEqual, Text: " biz"},
}, DiffLineDel))
}
func TestParsePatch_skipTo(t *testing.T) {
type testcase struct {
name string
@ -621,7 +604,7 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
defer gitRepo.Close()
for _, behavior := range []git.TrustedCmdArgs{{"-w"}, {"--ignore-space-at-eol"}, {"-b"}, nil} {
diffs, err := GetDiff(t.Context(), gitRepo,
diffs, err := GetDiffForAPI(t.Context(), gitRepo,
&DiffOptions{
AfterCommitID: "d8e0bbb45f200e67d9a784ce55bd90821af45ebd",
BeforeCommitID: "72866af952e98d02a73003501836074b286a78f6",

@ -4,10 +4,10 @@
package gitdiff
import (
"bytes"
"html/template"
"strings"
"code.gitea.io/gitea/modules/highlight"
"github.com/sergi/go-diff/diffmatchpatch"
)
@ -77,7 +77,7 @@ func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool {
return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount)
}
func (hcd *highlightCodeDiff) collectUsedRunes(code string) {
func (hcd *highlightCodeDiff) collectUsedRunes(code template.HTML) {
for _, r := range code {
if hcd.isInPlaceholderRange(r) {
// put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore.
@ -86,27 +86,76 @@ func (hcd *highlightCodeDiff) collectUsedRunes(code string) {
}
}
func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff {
func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA, codeB template.HTML) template.HTML {
return hcd.diffLineWithHighlightWrapper(nil, lineType, codeA, codeB)
}
func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []string, lineType DiffLineType, codeA, codeB template.HTML) template.HTML {
hcd.collectUsedRunes(codeA)
hcd.collectUsedRunes(codeB)
highlightCodeA, _ := highlight.Code(filename, language, codeA)
highlightCodeB, _ := highlight.Code(filename, language, codeB)
convertedCodeA := hcd.convertToPlaceholders(codeA)
convertedCodeB := hcd.convertToPlaceholders(codeB)
dmp := defaultDiffMatchPatch()
diffs := dmp.DiffMain(convertedCodeA, convertedCodeB, true)
diffs = dmp.DiffCleanupEfficiency(diffs)
buf := bytes.NewBuffer(nil)
convertedCodeA := hcd.convertToPlaceholders(string(highlightCodeA))
convertedCodeB := hcd.convertToPlaceholders(string(highlightCodeB))
// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
for _, tag := range lineWrapperTags {
buf.WriteString(tag)
}
diffs := diffMatchPatch.DiffMain(convertedCodeA, convertedCodeB, true)
diffs = diffMatchPatch.DiffCleanupEfficiency(diffs)
addedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="added-code">`)
removedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="removed-code">`)
codeTagSuffix := hcd.registerTokenAsPlaceholder(`</span>`)
for i := range diffs {
hcd.recoverOneDiff(&diffs[i])
if codeTagSuffix != 0 {
for _, diff := range diffs {
switch {
case diff.Type == diffmatchpatch.DiffEqual:
buf.WriteString(diff.Text)
case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
buf.WriteRune(addedCodePrefix)
buf.WriteString(diff.Text)
buf.WriteRune(codeTagSuffix)
case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
buf.WriteRune(removedCodePrefix)
buf.WriteString(diff.Text)
buf.WriteRune(codeTagSuffix)
}
}
} else {
// placeholder map space is exhausted
for _, diff := range diffs {
take := diff.Type == diffmatchpatch.DiffEqual || (diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd) || (diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel)
if take {
buf.WriteString(diff.Text)
}
}
}
return diffs
for range lineWrapperTags {
buf.WriteString("</span>")
}
return hcd.recoverOneDiff(buf.String())
}
func (hcd *highlightCodeDiff) registerTokenAsPlaceholder(token string) rune {
placeholder, ok := hcd.tokenPlaceholderMap[token]
if !ok {
placeholder = hcd.nextPlaceholder()
if placeholder != 0 {
hcd.tokenPlaceholderMap[token] = placeholder
hcd.placeholderTokenMap[placeholder] = token
}
}
return placeholder
}
// convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) string {
var tagStack []string
res := strings.Builder{}
@ -115,6 +164,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
var beforeToken, token string
var valid bool
htmlCode := string(htmlContent)
// the standard chroma highlight HTML is "<span class="line [hl]"><span class="cl"> ... </span></span>"
for {
beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode)
@ -151,14 +201,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
} // else: impossible
// remember the placeholder and token in the map
placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap]
if !ok {
placeholder = hcd.nextPlaceholder()
if placeholder != 0 {
hcd.tokenPlaceholderMap[tokenInMap] = placeholder
hcd.placeholderTokenMap[placeholder] = tokenInMap
}
}
placeholder := hcd.registerTokenAsPlaceholder(tokenInMap)
if placeholder != 0 {
res.WriteRune(placeholder) // use the placeholder to replace the token
@ -179,11 +222,11 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
return res.String()
}
func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) {
func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
sb := strings.Builder{}
var tagStack []string
for _, r := range diff.Text {
for _, r := range str {
token, ok := hcd.placeholderTokenMap[r]
if !ok || token == "" {
sb.WriteRune(r) // if the rune is not a placeholder, write it as it is
@ -217,6 +260,5 @@ func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) {
} // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag
}
}
diff.Text = sb.String()
return template.HTML(sb.String())
}

@ -5,121 +5,72 @@ package gitdiff
import (
"fmt"
"html/template"
"strings"
"testing"
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
)
func TestDiffWithHighlight(t *testing.T) {
hcd := newHighlightCodeDiff()
diffs := hcd.diffWithHighlight(
"main.v", "",
" run('<>')\n",
" run(db)\n",
)
expected := ` <span class="n">run</span><span class="o">(</span><span class="removed-code"><span class="k">&#39;</span><span class="o">&lt;</span><span class="o">&gt;</span><span class="k">&#39;</span></span><span class="o">)</span>`
output := diffToHTML(nil, diffs, DiffLineDel)
assert.Equal(t, expected, output)
expected = ` <span class="n">run</span><span class="o">(</span><span class="added-code"><span class="n">db</span></span><span class="o">)</span>`
output = diffToHTML(nil, diffs, DiffLineAdd)
assert.Equal(t, expected, output)
hcd = newHighlightCodeDiff()
hcd.placeholderTokenMap['O'] = "<span>"
hcd.placeholderTokenMap['C'] = "</span>"
diff := diffmatchpatch.Diff{}
diff.Text = "OC"
hcd.recoverOneDiff(&diff)
assert.Equal(t, "<span></span>", diff.Text)
diff.Text = "O"
hcd.recoverOneDiff(&diff)
assert.Equal(t, "<span></span>", diff.Text)
diff.Text = "C"
hcd.recoverOneDiff(&diff)
assert.Equal(t, "", diff.Text)
t.Run("DiffLineAddDel", func(t *testing.T) {
hcd := newHighlightCodeDiff()
codeA := template.HTML(`x <span class="k">foo</span> y`)
codeB := template.HTML(`x <span class="k">bar</span> y`)
outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB)
assert.Equal(t, `x <span class="k"><span class="removed-code">foo</span></span> y`, string(outDel))
outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB)
assert.Equal(t, `x <span class="k"><span class="added-code">bar</span></span> y`, string(outAdd))
})
t.Run("OpenCloseTags", func(t *testing.T) {
hcd := newHighlightCodeDiff()
hcd.placeholderTokenMap['O'], hcd.placeholderTokenMap['C'] = "<span>", "</span>"
assert.Equal(t, "<span></span>", string(hcd.recoverOneDiff("OC")))
assert.Equal(t, "<span></span>", string(hcd.recoverOneDiff("O")))
assert.Equal(t, "", string(hcd.recoverOneDiff("C")))
})
}
func TestDiffWithHighlightPlaceholder(t *testing.T) {
hcd := newHighlightCodeDiff()
diffs := hcd.diffWithHighlight(
"main.js", "",
"a='\U00100000'",
"a='\U0010FFFD''",
)
output := hcd.diffLineWithHighlight(DiffLineDel, "a='\U00100000'", "a='\U0010FFFD''")
assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000])
assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD])
expected := fmt.Sprintf(`<span class="nx">a</span><span class="o">=</span><span class="s1">&#39;</span><span class="removed-code">%s</span>&#39;`, "\U00100000")
output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel)
assert.Equal(t, expected, output)
expected := fmt.Sprintf(`a='<span class="removed-code">%s</span>'`, "\U00100000")
assert.Equal(t, expected, string(output))
hcd = newHighlightCodeDiff()
diffs = hcd.diffWithHighlight(
"main.js", "",
"a='\U00100000'",
"a='\U0010FFFD'",
)
expected = fmt.Sprintf(`<span class="nx">a</span><span class="o">=</span><span class="s1">&#39;</span><span class="added-code">%s</span>&#39;`, "\U0010FFFD")
output = diffToHTML(nil, diffs, DiffLineAdd)
assert.Equal(t, expected, output)
output = hcd.diffLineWithHighlight(DiffLineAdd, "a='\U00100000'", "a='\U0010FFFD'")
expected = fmt.Sprintf(`a='<span class="added-code">%s</span>'`, "\U0010FFFD")
assert.Equal(t, expected, string(output))
}
func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) {
hcd := newHighlightCodeDiff()
hcd.placeholderMaxCount = 0
diffs := hcd.diffWithHighlight(
"main.js", "",
"'",
``,
)
output := diffToHTML(nil, diffs, DiffLineDel)
expected := fmt.Sprintf(`<span class="removed-code">%s#39;</span>`, "\uFFFD")
assert.Equal(t, expected, output)
hcd = newHighlightCodeDiff()
hcd.placeholderMaxCount = 0
diffs = hcd.diffWithHighlight(
"main.js", "",
"a < b",
"a > b",
)
output = diffToHTML(nil, diffs, DiffLineDel)
expected = fmt.Sprintf(`a %s<span class="removed-code">l</span>t; b`, "\uFFFD")
assert.Equal(t, expected, output)
output = diffToHTML(nil, diffs, DiffLineAdd)
expected = fmt.Sprintf(`a %s<span class="added-code">g</span>t; b`, "\uFFFD")
assert.Equal(t, expected, output)
placeHolderAmp := string(rune(0xFFFD))
output := hcd.diffLineWithHighlight(DiffLineDel, `<span class="k">&lt;</span>`, `<span class="k">&gt;</span>`)
assert.Equal(t, placeHolderAmp+"lt;", string(output))
output = hcd.diffLineWithHighlight(DiffLineAdd, `<span class="k">&lt;</span>`, `<span class="k">&gt;</span>`)
assert.Equal(t, placeHolderAmp+"gt;", string(output))
}
func TestDiffWithHighlightTagMatch(t *testing.T) {
totalOverflow := 0
for i := 0; i < 100; i++ {
hcd := newHighlightCodeDiff()
hcd.placeholderMaxCount = i
diffs := hcd.diffWithHighlight(
"main.js", "",
"a='1'",
"b='2'",
)
totalOverflow += hcd.placeholderOverflowCount
output := diffToHTML(nil, diffs, DiffLineDel)
c1 := strings.Count(output, "<span")
c2 := strings.Count(output, "</span")
assert.Equal(t, c1, c2)
output = diffToHTML(nil, diffs, DiffLineAdd)
c1 = strings.Count(output, "<span")
c2 = strings.Count(output, "</span")
assert.Equal(t, c1, c2)
f := func(t *testing.T, lineType DiffLineType) {
totalOverflow := 0
for i := 0; ; i++ {
hcd := newHighlightCodeDiff()
hcd.placeholderMaxCount = i
output := string(hcd.diffLineWithHighlight(lineType, `<span class="k">&lt;</span>`, `<span class="k">&gt;</span>`))
totalOverflow += hcd.placeholderOverflowCount
assert.Equal(t, strings.Count(output, "<span"), strings.Count(output, "</span"))
if hcd.placeholderOverflowCount == 0 {
break
}
}
assert.NotZero(t, totalOverflow)
}
assert.NotZero(t, totalOverflow)
t.Run("DiffLineAdd", func(t *testing.T) { f(t, DiffLineAdd) })
t.Run("DiffLineDel", func(t *testing.T) { f(t, DiffLineDel) })
}

@ -35,7 +35,6 @@ func TestGetDiffPreview(t *testing.T) {
Name: "README.md",
OldName: "README.md",
NameHash: "8ec9a00bfd09b3190ac6b22251dbb1aa95a0579d",
Index: 1,
Addition: 2,
Deletion: 1,
Type: 2,

@ -9,15 +9,15 @@
>
<overflow-menu class="ui secondary pointing tabular menu custom">
<div class="overflow-menu-items tw-justify-center">
<a class="item active" data-tab="diff-side-by-side-{{.file.Index}}">{{ctx.Locale.Tr "repo.diff.image.side_by_side"}}</a>
<a class="item active" data-tab="diff-side-by-side-{{.file.NameHash}}">{{ctx.Locale.Tr "repo.diff.image.side_by_side"}}</a>
{{if and .blobBase .blobHead}}
<a class="item" data-tab="diff-swipe-{{.file.Index}}">{{ctx.Locale.Tr "repo.diff.image.swipe"}}</a>
<a class="item" data-tab="diff-overlay-{{.file.Index}}">{{ctx.Locale.Tr "repo.diff.image.overlay"}}</a>
<a class="item" data-tab="diff-swipe-{{.file.NameHash}}">{{ctx.Locale.Tr "repo.diff.image.swipe"}}</a>
<a class="item" data-tab="diff-overlay-{{.file.NameHash}}">{{ctx.Locale.Tr "repo.diff.image.overlay"}}</a>
{{end}}
</div>
</overflow-menu>
<div class="image-diff-tabs is-loading">
<div class="ui bottom attached tab image-diff-container active" data-tab="diff-side-by-side-{{.file.Index}}">
<div class="ui bottom attached tab image-diff-container active" data-tab="diff-side-by-side-{{.file.NameHash}}">
<div class="diff-side-by-side">
{{if .blobBase}}
<span class="side">
@ -52,7 +52,7 @@
</div>
</div>
{{if and .blobBase .blobHead}}
<div class="ui bottom attached tab image-diff-container" data-tab="diff-swipe-{{.file.Index}}">
<div class="ui bottom attached tab image-diff-container" data-tab="diff-swipe-{{.file.NameHash}}">
<div class="diff-swipe">
<div class="swipe-frame">
<span class="before-container"><img class="image-before"></span>
@ -66,7 +66,7 @@
</div>
</div>
</div>
<div class="ui bottom attached tab image-diff-container" data-tab="diff-overlay-{{.file.Index}}">
<div class="ui bottom attached tab image-diff-container" data-tab="diff-overlay-{{.file.NameHash}}">
<div class="diff-overlay">
<input type="range" min="0" max="100" value="50">
<div class="overlay-frame">

@ -5,30 +5,33 @@ package integration
import (
"net/http"
"net/url"
"testing"
pull_service "code.gitea.io/gitea/services/pull"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestListPullCommits(t *testing.T) {
onGiteaRun(t, func(t *testing.T, u *url.URL) {
session := loginUser(t, "user5")
req := NewRequest(t, "GET", "/user2/repo1/pulls/3/commits/list")
resp := session.MakeRequest(t, req, http.StatusOK)
var pullCommitList struct {
Commits []pull_service.CommitInfo `json:"commits"`
LastReviewCommitSha string `json:"last_review_commit_sha"`
}
DecodeJSON(t, resp, &pullCommitList)
if assert.Len(t, pullCommitList.Commits, 2) {
assert.Equal(t, "985f0301dba5e7b34be866819cd15ad3d8f508ee", pullCommitList.Commits[0].ID)
assert.Equal(t, "5c050d3b6d2db231ab1f64e324f1b6b9a0b181c2", pullCommitList.Commits[1].ID)
}
assert.Equal(t, "4a357436d925b5c974181ff12a994538ddc5a269", pullCommitList.LastReviewCommitSha)
session := loginUser(t, "user5")
req := NewRequest(t, "GET", "/user2/repo1/pulls/3/commits/list")
resp := session.MakeRequest(t, req, http.StatusOK)
var pullCommitList struct {
Commits []pull_service.CommitInfo `json:"commits"`
LastReviewCommitSha string `json:"last_review_commit_sha"`
}
DecodeJSON(t, resp, &pullCommitList)
require.Len(t, pullCommitList.Commits, 2)
assert.Equal(t, "985f0301dba5e7b34be866819cd15ad3d8f508ee", pullCommitList.Commits[0].ID)
assert.Equal(t, "5c050d3b6d2db231ab1f64e324f1b6b9a0b181c2", pullCommitList.Commits[1].ID)
assert.Equal(t, "4a357436d925b5c974181ff12a994538ddc5a269", pullCommitList.LastReviewCommitSha)
t.Run("CommitBlobExcerpt", func(t *testing.T) {
req = NewRequest(t, "GET", "/user2/repo1/blob_excerpt/985f0301dba5e7b34be866819cd15ad3d8f508ee?last_left=0&last_right=0&left=2&right=2&left_hunk_size=2&right_hunk_size=2&path=README.md&style=split&direction=up")
resp = session.MakeRequest(t, req, http.StatusOK)
assert.Contains(t, resp.Body.String(), `<td class="lines-code lines-code-new"><code class="code-inner"># repo1</code>`)
})
}

Loading…
Cancel
Save