Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
golang
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
go
golang
Commits
4497960b
Commit
4497960b
authored
Jan 27, 2011
by
Robert Griesemer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
godoc: full text index for whitelisted non-Go files
R=rsc CC=golang-dev
https://golang.org/cl/4029046
parent
c2ea38ac
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
174 additions
and
89 deletions
+174
-89
godoc.go
src/cmd/godoc/godoc.go
+3
-51
index.go
src/cmd/godoc/index.go
+109
-38
utils.go
src/cmd/godoc/utils.go
+62
-0
No files found.
src/cmd/godoc/godoc.go
View file @
4497960b
...
...
@@ -25,7 +25,6 @@ import (
"strings"
"template"
"time"
"utf8"
)
...
...
@@ -56,7 +55,7 @@ var (
// TODO(gri) consider the invariant that goroot always end in '/'
goroot
=
flag
.
String
(
"goroot"
,
runtime
.
GOROOT
(),
"Go root directory"
)
testDir
=
flag
.
String
(
"testdir"
,
""
,
"Go root subdirectory - for testing only (faster startups)"
)
p
ath
=
flag
.
String
(
"path"
,
""
,
"additional package directories (colon-separated)"
)
p
kgPath
=
flag
.
String
(
"path"
,
""
,
"additional package directories (colon-separated)"
)
filter
=
flag
.
String
(
"filter"
,
""
,
"filter file containing permitted package directory paths"
)
filterMin
=
flag
.
Int
(
"filter_minutes"
,
0
,
"filter file update interval in minutes; disabled if <= 0"
)
filterDelay
delayTime
// actual filter update interval in minutes; usually filterDelay == filterMin, but filterDelay may back off exponentially
...
...
@@ -80,7 +79,7 @@ var (
func
initHandlers
()
{
fsMap
.
Init
(
*
path
)
fsMap
.
Init
(
*
p
kgP
ath
)
fileServer
=
http
.
FileServer
(
*
goroot
,
""
)
cmdHandler
=
httpHandler
{
"/cmd/"
,
pathutil
.
Join
(
*
goroot
,
"src/cmd"
),
false
}
pkgHandler
=
httpHandler
{
"/pkg/"
,
pathutil
.
Join
(
*
goroot
,
"src/pkg"
),
true
}
...
...
@@ -768,53 +767,6 @@ func redirect(w http.ResponseWriter, r *http.Request) (redirected bool) {
}
// TODO(gri): Should have a mapping from extension to handler, eventually.
// textExt[x] is true if the extension x indicates a text file, and false otherwise.
var
textExt
=
map
[
string
]
bool
{
".css"
:
false
,
// must be served raw
".js"
:
false
,
// must be served raw
}
func
isTextFile
(
path
string
)
bool
{
// if the extension is known, use it for decision making
if
isText
,
found
:=
textExt
[
pathutil
.
Ext
(
path
)];
found
{
return
isText
}
// the extension is not known; read an initial chunk of
// file and check if it looks like correct UTF-8; if it
// does, it's probably a text file
f
,
err
:=
os
.
Open
(
path
,
os
.
O_RDONLY
,
0
)
if
err
!=
nil
{
return
false
}
defer
f
.
Close
()
var
buf
[
1024
]
byte
n
,
err
:=
f
.
Read
(
buf
[
0
:
])
if
err
!=
nil
{
return
false
}
s
:=
string
(
buf
[
0
:
n
])
n
-=
utf8
.
UTFMax
// make sure there's enough bytes for a complete unicode char
for
i
,
c
:=
range
s
{
if
i
>
n
{
break
}
if
c
==
0xFFFD
||
c
<
' '
&&
c
!=
'\n'
&&
c
!=
'\t'
{
// decoding error or control character - not a text file
return
false
}
}
// likely a text file
return
true
}
func
serveTextFile
(
w
http
.
ResponseWriter
,
r
*
http
.
Request
,
abspath
,
relpath
,
title
string
)
{
src
,
err
:=
ioutil
.
ReadFile
(
abspath
)
if
err
!=
nil
{
...
...
@@ -922,7 +874,7 @@ type PageInfoMode uint
const
(
exportsOnly
PageInfoMode
=
1
<<
iota
// only keep exported stuff
genDoc
// generate documentation
genDoc
// generate documentation
)
...
...
src/cmd/godoc/index.go
View file @
4497960b
...
...
@@ -47,7 +47,7 @@ import (
"index/suffixarray"
"io/ioutil"
"os"
pathutil
"path"
"path"
"regexp"
"sort"
"strings"
...
...
@@ -430,8 +430,9 @@ func (a *AltWords) filter(s string) *AltWords {
// Indexer
// Adjust these flags as seems best.
const
excludeMainPackages
=
false
const
excludeTestFiles
=
false
const
includeNonGoFiles
=
true
const
includeMainPackages
=
true
const
includeTestFiles
=
true
type
IndexResult
struct
{
...
...
@@ -619,11 +620,14 @@ func pkgName(filename string) string {
}
func
(
x
*
Indexer
)
addFile
(
filename
string
)
*
ast
.
File
{
// addFile adds a file to the index if possible and returns the file set file
// and the file's AST if it was successfully parsed as a Go file. If addFile
// failed (that is, if the file was not added), it returns file == nil.
func
(
x
*
Indexer
)
addFile
(
filename
string
,
goFile
bool
)
(
file
*
token
.
File
,
ast
*
ast
.
File
)
{
// open file
f
,
err
:=
os
.
Open
(
filename
,
os
.
O_RDONLY
,
0
)
if
err
!=
nil
{
return
nil
return
}
defer
f
.
Close
()
...
...
@@ -643,59 +647,126 @@ func (x *Indexer) addFile(filename string) *ast.File {
panic
(
"internal error - file base incorrect"
)
}
// append file contents to x.sources
if
_
,
err
:=
x
.
sources
.
ReadFrom
(
f
);
err
!=
nil
{
x
.
sources
.
Truncate
(
base
)
// discard possibly added data
return
nil
// ignore files with I/O errors
}
// append file contents (src) to x.sources
if
_
,
err
:=
x
.
sources
.
ReadFrom
(
f
);
err
==
nil
{
src
:=
x
.
sources
.
Bytes
()[
base
:
]
// parse the file and in the process add it to the file set
src
:=
x
.
sources
.
Bytes
()[
base
:
]
// no need to reread the file
file
,
err
:=
parser
.
ParseFile
(
x
.
fset
,
filename
,
src
,
parser
.
ParseComments
)
if
err
!=
nil
{
// do not discard the added source code in this case
// because the file has been added to the file set and
// the source size must match the file set base
// TODO(gri): given a FileSet.RemoveFile() one might be
// able to discard the data here (worthwhile?)
return
nil
// ignore files with (parse) errors
if
goFile
{
// parse the file and in the process add it to the file set
if
ast
,
err
=
parser
.
ParseFile
(
x
.
fset
,
filename
,
src
,
parser
.
ParseComments
);
err
==
nil
{
file
=
x
.
fset
.
File
(
ast
.
Pos
())
// ast.Pos() is inside the file
return
}
// file has parse errors, and the AST may be incorrect -
// set lines information explicitly and index as ordinary
// text file (cannot fall through to the text case below
// because the file has already been added to the file set
// by the parser)
file
=
x
.
fset
.
File
(
token
.
Pos
(
base
))
// token.Pos(base) is inside the file
file
.
SetLinesForContent
(
src
)
ast
=
nil
return
}
if
isText
(
src
)
{
// only add the file to the file set (for the full text index)
file
=
x
.
fset
.
AddFile
(
filename
,
x
.
fset
.
Base
(),
len
(
src
))
file
.
SetLinesForContent
(
src
)
return
}
}
return
file
// discard possibly added data
x
.
sources
.
Truncate
(
base
-
1
)
// -1 to remove added byte 0 since no file was added
return
}
func
(
x
*
Indexer
)
visitFile
(
dirname
string
,
f
*
os
.
FileInfo
)
{
if
!
isGoFile
(
f
)
{
return
// Design note: Using an explicit white list of permitted files for indexing
// makes sure that the important files are included and massively reduces the
// number of files to index. The advantage over a blacklist is that unexpected
// (non-blacklisted) files won't suddenly explode the index.
//
// TODO(gri): We may want to make this list customizable, perhaps via a flag.
// Files are whitelisted if they have a file name or extension
// present as key in whitelisted.
var
whitelisted
=
map
[
string
]
bool
{
".bash"
:
true
,
".c"
:
true
,
".css"
:
true
,
".go"
:
true
,
".goc"
:
true
,
".h"
:
true
,
".html"
:
true
,
".js"
:
true
,
".out"
:
true
,
".py"
:
true
,
".s"
:
true
,
".sh"
:
true
,
".txt"
:
true
,
".xml"
:
true
,
"AUTHORS"
:
true
,
"CONTRIBUTORS"
:
true
,
"LICENSE"
:
true
,
"Makefile"
:
true
,
"PATENTS"
:
true
,
"README"
:
true
,
}
// isWhitelisted returns true if a file is on the list
// of "permitted" files for indexing.
func
isWhitelisted
(
filename
string
)
bool
{
key
:=
path
.
Ext
(
filename
)
if
key
==
""
{
// file has no extension - use entire filename
key
=
filename
}
return
whitelisted
[
key
]
}
path
:=
pathutil
.
Join
(
dirname
,
f
.
Name
)
if
excludeTestFiles
&&
(
!
isPkgFile
(
f
)
||
strings
.
HasPrefix
(
path
,
"test/"
))
{
func
(
x
*
Indexer
)
visitFile
(
dirname
string
,
f
*
os
.
FileInfo
)
{
if
!
f
.
IsRegular
()
{
return
}
if
excludeMainPackages
&&
pkgName
(
path
)
==
"main"
{
filename
:=
path
.
Join
(
dirname
,
f
.
Name
)
goFile
:=
false
switch
{
case
isGoFile
(
f
)
:
if
!
includeTestFiles
&&
(
!
isPkgFile
(
f
)
||
strings
.
HasPrefix
(
filename
,
"test/"
))
{
return
}
if
!
includeMainPackages
&&
pkgName
(
filename
)
==
"main"
{
return
}
goFile
=
true
case
!
includeNonGoFiles
||
!
isWhitelisted
(
filename
)
:
return
}
file
:=
x
.
addFile
(
path
)
file
,
fast
:=
x
.
addFile
(
filename
,
goFile
)
if
file
==
nil
{
return
return
// addFile failed
}
// we've got a file to index
x
.
current
=
x
.
fset
.
File
(
file
.
Pos
())
// file.Pos is in the current file
dir
,
_
:=
pathutil
.
Split
(
path
)
pak
:=
Pak
{
dir
,
file
.
Name
.
Name
}
x
.
file
=
&
File
{
path
,
pak
}
ast
.
Walk
(
x
,
file
)
if
fast
!=
nil
{
// we've got a Go file to index
x
.
current
=
file
dir
,
_
:=
path
.
Split
(
filename
)
pak
:=
Pak
{
dir
,
fast
.
Name
.
Name
}
x
.
file
=
&
File
{
filename
,
pak
}
ast
.
Walk
(
x
,
fast
)
}
// update statistics
// (count real file size as opposed to using the padded x.sources.Len())
x
.
stats
.
Bytes
+=
x
.
current
.
Size
()
x
.
stats
.
Bytes
+=
file
.
Size
()
x
.
stats
.
Files
++
x
.
stats
.
Lines
+=
x
.
current
.
LineCount
()
x
.
stats
.
Lines
+=
file
.
LineCount
()
}
...
...
src/cmd/godoc/utils.go
View file @
4497960b
...
...
@@ -15,11 +15,13 @@ import (
"strings"
"sync"
"time"
"utf8"
)
// An RWValue wraps a value and permits mutually exclusive
// access to it and records the time the value was last set.
//
type
RWValue
struct
{
mutex
sync
.
RWMutex
value
interface
{}
...
...
@@ -107,3 +109,63 @@ func writeFileAtomically(filename string, data []byte) os.Error {
}
return
os
.
Rename
(
f
.
Name
(),
filename
)
}
// isText returns true if a significant prefix of s looks like correct UTF-8;
// that is, if it is likely that s is human-readable text.
//
func
isText
(
s
[]
byte
)
bool
{
const
max
=
1024
// at least utf8.UTFMax
if
len
(
s
)
>
max
{
s
=
s
[
0
:
max
]
}
for
i
,
c
:=
range
string
(
s
)
{
if
i
+
utf8
.
UTFMax
>
len
(
s
)
{
// last char may be incomplete - ignore
break
}
if
c
==
0xFFFD
||
c
<
' '
&&
c
!=
'\n'
&&
c
!=
'\t'
{
// decoding error or control character - not a text file
return
false
}
}
return
true
}
// TODO(gri): Should have a mapping from extension to handler, eventually.
// textExt[x] is true if the extension x indicates a text file, and false otherwise.
var
textExt
=
map
[
string
]
bool
{
".css"
:
false
,
// must be served raw
".js"
:
false
,
// must be served raw
}
// isTextFile returns true if the file has a known extension indicating
// a text file, or if a significant chunk of the specified file looks like
// correct UTF-8; that is, if it is likely that the file contains human-
// readable text.
//
func
isTextFile
(
filename
string
)
bool
{
// if the extension is known, use it for decision making
if
isText
,
found
:=
textExt
[
pathutil
.
Ext
(
filename
)];
found
{
return
isText
}
// the extension is not known; read an initial chunk
// of the file and check if it looks like text
f
,
err
:=
os
.
Open
(
filename
,
os
.
O_RDONLY
,
0
)
if
err
!=
nil
{
return
false
}
defer
f
.
Close
()
var
buf
[
1024
]
byte
n
,
err
:=
f
.
Read
(
buf
[
0
:
])
if
err
!=
nil
{
return
false
}
return
isText
(
buf
[
0
:
n
])
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment