Commit 9610b616 authored by Robert Griesemer's avatar Robert Griesemer

go/doc: fix URL matched in ToHTML

Permit paired parentheses in URLs such as:

http://en.wikipedia.org/wiki/Camellia_(cipher)

Fixes #5043.

LGTM=rsc
R=rsc
CC=golang-codereviews
https://golang.org/cl/85610043
parent e79bab30
...@@ -45,13 +45,13 @@ func commentEscape(w io.Writer, text string, nice bool) { ...@@ -45,13 +45,13 @@ func commentEscape(w io.Writer, text string, nice bool) {
const ( const (
// Regexp for Go identifiers // Regexp for Go identifiers
identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this identRx = `[\pL_][\pL_0-9]*`
// Regexp for URLs // Regexp for URLs
protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
hostPart = `[a-zA-Z0-9_@\-]+` hostPart = `[a-zA-Z0-9_@\-]+`
filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
urlRx = protocol + `//` + // http:// urlRx = `(` + protocol + `)://` + // http://
hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
filePart + `([:.,]` + filePart + `)*` filePart + `([:.,]` + filePart + `)*`
) )
...@@ -73,6 +73,29 @@ var ( ...@@ -73,6 +73,29 @@ var (
html_endh = []byte("</h3>\n") html_endh = []byte("</h3>\n")
) )
// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
func pairedParensPrefixLen(s string) int {
parens := 0
l := len(s)
for i, ch := range s {
switch ch {
case '(':
if parens == 0 {
l = i
}
parens++
case ')':
parens--
if parens == 0 {
l = len(s)
} else if parens < 0 {
return i
}
}
}
return l
}
// Emphasize and escape a line of text for HTML. URLs are converted into links; // Emphasize and escape a line of text for HTML. URLs are converted into links;
// if the URL also appears in the words map, the link is taken from the map (if // if the URL also appears in the words map, the link is taken from the map (if
// the corresponding map value is the empty string, the URL is not converted // the corresponding map value is the empty string, the URL is not converted
...@@ -92,18 +115,26 @@ func emphasize(w io.Writer, line string, words map[string]string, nice bool) { ...@@ -92,18 +115,26 @@ func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
// write text before match // write text before match
commentEscape(w, line[0:m[0]], nice) commentEscape(w, line[0:m[0]], nice)
// analyze match // adjust match if necessary
match := line[m[0]:m[1]] match := line[m[0]:m[1]]
if n := pairedParensPrefixLen(match); n < len(match) {
// match contains unpaired parentheses (rare);
// redo matching with shortened line for correct indices
m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
match = match[:n]
}
// analyze match
url := "" url := ""
italics := false italics := false
if words != nil { if words != nil {
url, italics = words[string(match)] url, italics = words[match]
} }
if m[2] >= 0 { if m[2] >= 0 {
// match against first parenthesized sub-regexp; must be match against urlRx // match against first parenthesized sub-regexp; must be match against urlRx
if !italics { if !italics {
// no alternative URL in words list, use match instead // no alternative URL in words list, use match instead
url = string(match) url = match
} }
italics = false // don't italicize URLs italics = false // don't italicize URLs
} }
......
...@@ -148,13 +148,16 @@ func TestToText(t *testing.T) { ...@@ -148,13 +148,16 @@ func TestToText(t *testing.T) {
} }
var emphasizeTests = []struct { var emphasizeTests = []struct {
in string in, out string
out string
}{ }{
{"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`}, {"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
{"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`}, {"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
{"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`}, {"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},
{"http://en.wikipedia.org/wiki/Camellia_(cipher)", `<a href="http://en.wikipedia.org/wiki/Camellia_(cipher)">http://en.wikipedia.org/wiki/Camellia_(cipher)</a>`},
{"(http://www.google.com/)", `(<a href="http://www.google.com/">http://www.google.com/</a>)`}, {"(http://www.google.com/)", `(<a href="http://www.google.com/">http://www.google.com/</a>)`},
{"http://gmail.com)", `<a href="http://gmail.com">http://gmail.com</a>)`},
{"((http://gmail.com))", `((<a href="http://gmail.com">http://gmail.com</a>))`},
{"http://gmail.com ((http://gmail.com)) ()", `<a href="http://gmail.com">http://gmail.com</a> ((<a href="http://gmail.com">http://gmail.com</a>)) ()`},
{"Foo bar http://example.com/ quux!", `Foo bar <a href="http://example.com/">http://example.com/</a> quux!`}, {"Foo bar http://example.com/ quux!", `Foo bar <a href="http://example.com/">http://example.com/</a> quux!`},
{"Hello http://example.com/%2f/ /world.", `Hello <a href="http://example.com/%2f/">http://example.com/%2f/</a> /world.`}, {"Hello http://example.com/%2f/ /world.", `Hello <a href="http://example.com/%2f/">http://example.com/%2f/</a> /world.`},
{"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"}, {"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"},
...@@ -171,3 +174,34 @@ func TestEmphasize(t *testing.T) { ...@@ -171,3 +174,34 @@ func TestEmphasize(t *testing.T) {
} }
} }
} }
var pairedParensPrefixLenTests = []struct {
in, out string
}{
{"", ""},
{"foo", "foo"},
{"()", "()"},
{"foo()", "foo()"},
{"foo()()()", "foo()()()"},
{"foo()((()()))", "foo()((()()))"},
{"foo()((()()))bar", "foo()((()()))bar"},
{"foo)", "foo"},
{"foo))", "foo"},
{"foo)))))", "foo"},
{"(foo", ""},
{"((foo", ""},
{"(((((foo", ""},
{"(foo)", "(foo)"},
{"((((foo))))", "((((foo))))"},
{"foo()())", "foo()()"},
{"foo((()())", "foo"},
{"foo((()())) (() foo ", "foo((()())) "},
}
func TestPairedParensPrefixLen(t *testing.T) {
for i, tt := range pairedParensPrefixLenTests {
if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out {
t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out)
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment