Commit c532940e authored by Robert Griesemer's avatar Robert Griesemer

improved sentence extraction:

- don't forget first periods
- look at capitalization of last char before periods

R=rsc
http://go/go-review/1024027
parent ed6eb5b5
......@@ -23,6 +23,7 @@ import (
"sync";
"template";
"time";
"unicode";
"utf8";
)
......@@ -137,21 +138,38 @@ func htmlEscape(s string) string {
func firstSentence(s string) string {
// find first period followed by whitespace, or just the first period
i := -1;
for j, ch := range s {
i := -1; // index+1 of first period
j := -1; // index+1 of first period that is followed by white space
prev := 'A';
for k, ch := range s {
k1 := k+1;
if ch == '.' {
i = j+1; // include period
if i < len(s) && s[i] <= ' ' {
break;
if i < 0 {
i = k1; // first period
}
if k1 < len(s) && s[k1] <= ' ' {
if j < 0 {
j = k1; // first period followed by white space
}
if !unicode.IsUpper(prev) {
j = k1;
break;
}
}
}
prev = ch;
}
if i < 0 {
// no period found, use the enire string
i = len(s);
if j < 0 {
// use the next best period
j = i;
if j < 0 {
// no period at all, use the entire string
j = len(s);
}
}
return s[0:i];
return s[0:j];
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment