From a435d649c45bd33676bf7c6399c2e2e37807c545 Mon Sep 17 00:00:00 2001 From: silverwind Date: Sat, 23 Jul 2022 19:16:18 +0200 Subject: [PATCH 1/5] Rework file highlight rendering and fix yaml copy-paste --- modules/highlight/highlight.go | 146 +++++++++++------- modules/highlight/highlight_test.go | 223 ++++++++++++++++++---------- routers/web/repo/view.go | 26 ++-- 3 files changed, 254 insertions(+), 141 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 6832207c0fc7..8838176eabd3 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -10,6 +10,7 @@ import ( "bytes" "fmt" gohtml "html" + "io" "path/filepath" "strings" "sync" @@ -26,7 +27,13 @@ import ( ) // don't index files larger than this many bytes for performance purposes -const sizeLimit = 1000000 +const sizeLimit = 1024 * 1024 + +// newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept +// this option is here for 2 purposes: +// (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future +// (2) make it clear to do tests: " " is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored +var newLineInHTML = " " var ( // For custom user mapping @@ -46,7 +53,6 @@ func NewContext() { highlightMapping[keys[i].Name()] = keys[i].Value() } } - // The size 512 is simply a conservative rule of thumb c, err := lru.New2Q(512) if err != nil { @@ -60,7 +66,7 @@ func NewContext() { func Code(fileName, language, code string) string { NewContext() - // diff view newline will be passed as empty, change to literal \n so it can be copied + // diff view newline will be passed as empty, change to literal '\n' so it can be copied // preserve literal newline in blame view if code == "" || code == "\n" { return "\n" @@ -128,36 +134,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { return code } - htmlw.Flush() + _ = htmlw.Flush() // Chroma will add newlines for certain lexers in order to highlight them properly - // Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output + // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output return strings.TrimSuffix(htmlbuf.String(), "\n") } -// File returns a slice of chroma syntax highlighted lines of code -func File(numLines int, fileName, language string, code []byte) []string { +// File returns a slice of chroma syntax highlighted HTML lines of code +func File(fileName, language string, code []byte) ([]string, error) { NewContext() if len(code) > sizeLimit { - return plainText(string(code), numLines) + return PlainText(code), nil } + formatter := html.New(html.WithClasses(true), html.WithLineNumbers(false), html.PreventSurroundingPre(true), ) - if formatter == nil { - log.Error("Couldn't create chroma formatter") - return plainText(string(code), numLines) - } - - htmlbuf := bytes.Buffer{} - htmlw := bufio.NewWriter(&htmlbuf) + htmlBuf := bytes.Buffer{} + htmlWriter := bufio.NewWriter(&htmlBuf) var lexer chroma.Lexer // provided language overrides everything - if len(language) > 0 { + if language != "" { lexer = lexers.Get(language) } @@ -168,9 +170,9 @@ func File(numLines int, fileName, language string, code []byte) []string { } if lexer == nil { - language := analyze.GetCodeLanguage(fileName, code) + guessLanguage := analyze.GetCodeLanguage(fileName, code) - lexer = lexers.Get(language) + lexer = lexers.Get(guessLanguage) if lexer == nil { lexer = lexers.Match(fileName) if lexer == nil { @@ -181,54 +183,92 @@ func File(numLines int, fileName, language string, code []byte) []string { iterator, err := lexer.Tokenise(nil, string(code)) if err != nil { - log.Error("Can't tokenize code: %v", err) - return plainText(string(code), numLines) + return nil, fmt.Errorf("can't tokenize code: %w", err) } - err = formatter.Format(htmlw, styles.GitHub, iterator) + err = formatter.Format(htmlWriter, styles.GitHub, iterator) if err != nil { - log.Error("Can't format code: %v", err) - return plainText(string(code), numLines) + return nil, fmt.Errorf("can't format code: %w", err) } - htmlw.Flush() - finalNewLine := false - if len(code) > 0 { - finalNewLine = code[len(code)-1] == '\n' - } + _ = htmlWriter.Flush() + + m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) - m := make([]string, 0, numLines) - for _, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) { - content := v - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" - } else if content == `` { - content += "\n" - } else if content == `` { - content += "\n" + htmlStr := htmlBuf.String() + line := strings.Builder{} + insideLine := 0 // every makes it increase one level, every closed makes it decrease one level + tagStack := make([]string, 0, 4) + for len(htmlStr) > 0 { + pos1 := strings.IndexByte(htmlStr, '<') + pos2 := strings.IndexByte(htmlStr, '>') + if pos1 == -1 || pos2 == -1 || pos1 > pos2 { + break } - content = strings.TrimSuffix(content, ``) - content = strings.TrimPrefix(content, ``) - m = append(m, content) + tag := htmlStr[pos1 : pos2+1] + if insideLine > 0 { + line.WriteString(htmlStr[:pos1]) + } + if tag[1] == '/' { + if len(tagStack) == 0 { + return nil, fmt.Errorf("can't find matched tag: %q", tag) + } + popped := tagStack[len(tagStack)-1] + tagStack = tagStack[:len(tagStack)-1] + if popped == `` { + insideLine-- + lineStr := line.String() + if newLineInHTML != "" && lineStr != "" && lineStr[len(lineStr)-1] == '\n' { + lineStr = lineStr[:len(lineStr)-1] + newLineInHTML + } + m = append(m, lineStr) + line = strings.Builder{} + } + if insideLine > 0 { + line.WriteString(tag) + } + } else { + tagStack = append(tagStack, tag) + if insideLine > 0 { + line.WriteString(tag) + } + if tag == `` { + insideLine++ + } + } + htmlStr = htmlStr[pos2+1:] } - if finalNewLine { - m = append(m, "\n") + + if len(m) == 0 { + m = append(m, "") // maybe we do not want to return 0 lines } - return m + return m, nil } -// return unhiglighted map -func plainText(code string, numLines int) []string { - m := make([]string, 0, numLines) - for _, v := range strings.SplitN(code, "\n", numLines) { - content := v - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" +// PlainText returns non-highlighted HTML for code +func PlainText(code []byte) []string { + r := bufio.NewReader(bytes.NewReader(code)) + m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) + for { + content, err := r.ReadString('\n') + if err != nil && err != io.EOF { + log.Error("failed to read string from buffer: %v", err) + break + } + if content == "" && err == io.EOF { + break + } + s := gohtml.EscapeString(content) + if newLineInHTML != "" && s != "" && s[len(s)-1] == '\n' { + s = s[:len(s)-1] + newLineInHTML } - m = append(m, gohtml.EscapeString(content)) + m = append(m, s) } + + if len(m) == 0 { + m = append(m, "") // maybe we do not want to return 0 lines + } + return m } diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index e5dfedd2b3c8..d37d81ff6a31 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -8,97 +8,170 @@ import ( "strings" "testing" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" - "github.com/stretchr/testify/assert" - "gopkg.in/ini.v1" ) +func lines(s string) []string { + return strings.Split(strings.TrimSpace(s), "\n") +} + func TestFile(t *testing.T) { - setting.Cfg = ini.Empty() + defaultNewLineInHTML := newLineInHTML + defer func() { + newLineInHTML = defaultNewLineInHTML + }() + + newLineInHTML = " " tests := []struct { - name string - numLines int - fileName string - code string - want string + name string + code string + want []string }{ { - name: ".drone.yml", - numLines: 12, - fileName: ".drone.yml", - code: util.Dedent(` - kind: pipeline - name: default + name: "empty.py", + code: "", + want: []string{""}, + }, + { + name: "tags.txt", + code: "<>", + want: []string{"<>"}, + }, + { + name: "tags.py", + code: "<>", + want: []string{`<>`}, + }, + { + name: "eol-no.py", + code: "a=1", + want: []string{`a=1`}, + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: []string{ + `a=1 `, + }, + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: []string{ + `a=1 `, + ` `, + }, + }, + { + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `), - want: util.Dedent(` - kind: pipeline - name: default - - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `), +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +def: + a=1 + +b='' + +c=2`, + ), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out, err := File(tt.name, "", []byte(tt.code)) + assert.NoError(t, err) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.Equal(t, strings.Count(actual, "")) + assert.EqualValues(t, expected, actual) + }) + } + + newLineInHTML = "" + out, err := File("test-original-newline.py", "", []byte("a=1\n")) + assert.NoError(t, err) + assert.EqualValues(t, `a=1`+"\n", strings.Join(out, "")) +} + +func TestPlainText(t *testing.T) { + defaultNewLineInHTML := newLineInHTML + defer func() { + newLineInHTML = defaultNewLineInHTML + }() + + newLineInHTML = " " + tests := []struct { + name string + code string + want []string + }{ + { + name: "empty.py", + code: "", + want: []string{""}, }, { - name: ".drone.yml - trailing space", - numLines: 13, - fileName: ".drone.yml", - code: strings.Replace(util.Dedent(` - kind: pipeline - name: default + name: "tags.py", + code: "<>", + want: []string{"<>"}, + }, + { + name: "eol-no.py", + code: "a=1", + want: []string{`a=1`}, + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: []string{ + `a=1 `, + }, + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: []string{ + `a=1 `, + ` `, + }, + }, + { + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `)+"\n", "name: default", "name: default ", 1), - want: util.Dedent(` - kind: pipeline - name: default - - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - - - - `), +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +def: + a=1 + +b='' + +c=2`), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := strings.Join(File(tt.numLines, tt.fileName, "", []byte(tt.code)), "\n") - assert.Equal(t, tt.want, got) + out := PlainText([]byte(tt.code)) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.EqualValues(t, expected, actual) }) } + + newLineInHTML = "" + out := PlainText([]byte("a=1\n")) + assert.EqualValues(t, "a=1\n", strings.Join(out, "")) } diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 6b6660f7747b..6ead5fe8f894 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -15,7 +15,6 @@ import ( "net/http" "net/url" "path" - "strconv" "strings" "time" @@ -58,15 +57,6 @@ type namedBlob struct { blob *git.Blob } -func linesBytesCount(s []byte) int { - nl := []byte{'\n'} - n := bytes.Count(s, nl) - if len(s) > 0 && !bytes.HasSuffix(s, nl) { - n++ - } - return n -} - // FIXME: There has to be a more efficient way of doing this func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) { tree, err := commit.SubTree(treePath) @@ -556,8 +546,14 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ) } else { buf, _ := io.ReadAll(rd) - lineNums := linesBytesCount(buf) - ctx.Data["NumLines"] = strconv.Itoa(lineNums) + + // empty: 0 lines; "a": one line; "a\n": two lines; "a\nb": two lines; + // the NumLines is only used for the display on the UI: "xxx lines" + if len(buf) == 0 { + ctx.Data["NumLines"] = 0 + } else { + ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1 + } ctx.Data["NumLinesSet"] = true language := "" @@ -585,7 +581,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st language = "" } } - fileContent := highlight.File(lineNums, blob.Name(), language, buf) + fileContent, err := highlight.File(blob.Name(), language, buf) + if err != nil { + log.Error("highlight.File failed, fallback to plain text: %v", err) + fileContent = highlight.PlainText(buf) + } status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) ctx.Data["EscapeStatus"] = status statuses := make([]charset.EscapeStatus, len(fileContent)) From 49f62a4d7145e14e5277e5c7d03995fe05a8c171 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Wed, 27 Jul 2022 11:12:36 +0800 Subject: [PATCH 2/5] use Split+Trim to replace tag parser --- modules/highlight/highlight.go | 58 +++++----------------------------- 1 file changed, 8 insertions(+), 50 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 8838176eabd3..73b5d92f78c3 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -195,54 +195,17 @@ func File(fileName, language string, code []byte) ([]string, error) { m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) + // at the moment, Chroma generates stable output `...\n` for each line htmlStr := htmlBuf.String() - line := strings.Builder{} - insideLine := 0 // every makes it increase one level, every closed makes it decrease one level - tagStack := make([]string, 0, 4) - for len(htmlStr) > 0 { - pos1 := strings.IndexByte(htmlStr, '<') - pos2 := strings.IndexByte(htmlStr, '>') - if pos1 == -1 || pos2 == -1 || pos1 > pos2 { - break - } - tag := htmlStr[pos1 : pos2+1] - if insideLine > 0 { - line.WriteString(htmlStr[:pos1]) - } - if tag[1] == '/' { - if len(tagStack) == 0 { - return nil, fmt.Errorf("can't find matched tag: %q", tag) - } - popped := tagStack[len(tagStack)-1] - tagStack = tagStack[:len(tagStack)-1] - if popped == `` { - insideLine-- - lineStr := line.String() - if newLineInHTML != "" && lineStr != "" && lineStr[len(lineStr)-1] == '\n' { - lineStr = lineStr[:len(lineStr)-1] + newLineInHTML - } - m = append(m, lineStr) - line = strings.Builder{} - } - if insideLine > 0 { - line.WriteString(tag) - } - } else { - tagStack = append(tagStack, tag) - if insideLine > 0 { - line.WriteString(tag) - } - if tag == `` { - insideLine++ - } + lines := strings.Split(htmlStr, ``) + for i := 1; i < len(lines); i++ { + line := lines[i] + line = strings.TrimSuffix(line, "") + if newLineInHTML != "" && line != "" && line[len(line)-1] == '\n' { + line = line[:len(line)-1] + newLineInHTML } - htmlStr = htmlStr[pos2+1:] - } - - if len(m) == 0 { - m = append(m, "") // maybe we do not want to return 0 lines + m = append(m, line) } - return m, nil } @@ -265,10 +228,5 @@ func PlainText(code []byte) []string { } m = append(m, s) } - - if len(m) == 0 { - m = append(m, "") // maybe we do not want to return 0 lines - } - return m } From c8107f8d52404987cd6e65f4ea7b1505dc33b588 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Wed, 27 Jul 2022 11:18:31 +0800 Subject: [PATCH 3/5] remove unnecessary bytes.Count --- modules/highlight/highlight.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 73b5d92f78c3..2ddb553342aa 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -193,11 +193,10 @@ func File(fileName, language string, code []byte) ([]string, error) { _ = htmlWriter.Flush() - m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) - // at the moment, Chroma generates stable output `...\n` for each line htmlStr := htmlBuf.String() lines := strings.Split(htmlStr, ``) + m := make([]string, 0, len(lines)) for i := 1; i < len(lines); i++ { line := lines[i] line = strings.TrimSuffix(line, "") From 025e3d9aa2f8fd4e61e31210eb868db27c3bb334 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 28 Jul 2022 08:56:06 +0800 Subject: [PATCH 4/5] remove newLineInHTML = " " --- modules/highlight/highlight.go | 12 ------------ modules/highlight/highlight_test.go | 21 --------------------- 2 files changed, 33 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 2ddb553342aa..af3376e8d712 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -29,12 +29,6 @@ import ( // don't index files larger than this many bytes for performance purposes const sizeLimit = 1024 * 1024 -// newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept -// this option is here for 2 purposes: -// (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future -// (2) make it clear to do tests: " " is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored -var newLineInHTML = " " - var ( // For custom user mapping highlightMapping = map[string]string{} @@ -200,9 +194,6 @@ func File(fileName, language string, code []byte) ([]string, error) { for i := 1; i < len(lines); i++ { line := lines[i] line = strings.TrimSuffix(line, "") - if newLineInHTML != "" && line != "" && line[len(line)-1] == '\n' { - line = line[:len(line)-1] + newLineInHTML - } m = append(m, line) } return m, nil @@ -222,9 +213,6 @@ func PlainText(code []byte) []string { break } s := gohtml.EscapeString(content) - if newLineInHTML != "" && s != "" && s[len(s)-1] == '\n' { - s = s[:len(s)-1] + newLineInHTML - } m = append(m, s) } return m diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index d37d81ff6a31..bb035110b9df 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -16,12 +16,6 @@ func lines(s string) []string { } func TestFile(t *testing.T) { - defaultNewLineInHTML := newLineInHTML - defer func() { - newLineInHTML = defaultNewLineInHTML - }() - - newLineInHTML = " " tests := []struct { name string code string @@ -93,20 +87,9 @@ c=2 assert.EqualValues(t, expected, actual) }) } - - newLineInHTML = "" - out, err := File("test-original-newline.py", "", []byte("a=1\n")) - assert.NoError(t, err) - assert.EqualValues(t, `a=1`+"\n", strings.Join(out, "")) } func TestPlainText(t *testing.T) { - defaultNewLineInHTML := newLineInHTML - defer func() { - newLineInHTML = defaultNewLineInHTML - }() - - newLineInHTML = " " tests := []struct { name string code string @@ -170,8 +153,4 @@ c=2`), assert.EqualValues(t, expected, actual) }) } - - newLineInHTML = "" - out := PlainText([]byte("a=1\n")) - assert.EqualValues(t, "a=1\n", strings.Join(out, "")) } From 52459933873d23a2ea11433bfd23a6056d1d862d Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 28 Jul 2022 09:04:14 +0800 Subject: [PATCH 5/5] fix tests --- modules/highlight/highlight_test.go | 61 ++++++++++++++--------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index bb035110b9df..8f83f4a2f612 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -12,7 +12,7 @@ import ( ) func lines(s string) []string { - return strings.Split(strings.TrimSpace(s), "\n") + return strings.Split(strings.ReplaceAll(strings.TrimSpace(s), `\n`, "\n"), "\n") } func TestFile(t *testing.T) { @@ -24,37 +24,36 @@ func TestFile(t *testing.T) { { name: "empty.py", code: "", - want: []string{""}, + want: lines(""), }, { name: "tags.txt", code: "<>", - want: []string{"<>"}, + want: lines("<>"), }, { name: "tags.py", code: "<>", - want: []string{`<>`}, + want: lines(`<>`), }, { name: "eol-no.py", code: "a=1", - want: []string{`a=1`}, + want: lines(`a=1`), }, { name: "eol-newline1.py", code: "a=1\n", - want: []string{ - `a=1 `, - }, + want: lines(`a=1\n`), }, { name: "eol-newline2.py", code: "a=1\n\n", - want: []string{ - `a=1 `, - ` `, - }, + want: lines(` +a=1\n +\n + `, + ), }, { name: "empty-line-with-space.py", @@ -67,11 +66,11 @@ b='' c=2 `), "{space}", " "), want: lines(` -def: - a=1 - -b='' - +def:\n + a=1\n +\n +b=''\n + \n c=2`, ), }, @@ -98,32 +97,30 @@ func TestPlainText(t *testing.T) { { name: "empty.py", code: "", - want: []string{""}, + want: lines(""), }, { name: "tags.py", code: "<>", - want: []string{"<>"}, + want: lines("<>"), }, { name: "eol-no.py", code: "a=1", - want: []string{`a=1`}, + want: lines(`a=1`), }, { name: "eol-newline1.py", code: "a=1\n", - want: []string{ - `a=1 `, - }, + want: lines(`a=1\n`), }, { name: "eol-newline2.py", code: "a=1\n\n", - want: []string{ - `a=1 `, - ` `, - }, + want: lines(` +a=1\n +\n + `), }, { name: "empty-line-with-space.py", @@ -136,11 +133,11 @@ b='' c=2 `), "{space}", " "), want: lines(` -def: - a=1 - -b='' - +def:\n + a=1\n +\n +b=''\n + \n c=2`), }, }