Commit d01ee38f authored by Robert Griesemer's avatar Robert Griesemer

godoc: support for reading/writing (splitted) index files.

This CL implements a new godoc feature to save the search
index on disk. Use -write_index to create the search
index file named with -index_files. Use -index_files to
provide a glob pattern specifying index file(s) when
starting godoc; in this case the run-time indexer is not
run.

Known issues:
- saving/restoring full text index is not yet supported
- the list of flags and overall usage logic could use a
  cleanup

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4974045
parent 6b902628
...@@ -11,9 +11,18 @@ package main ...@@ -11,9 +11,18 @@ package main
const ( const (
// zipFilename is the name of the .zip file // zipFilename is the name of the .zip file
// containing the file system served by godoc. // containing the file system served by godoc.
zipFilename = "go.zip" zipFilename = "godoc.zip"
// zipGoroot is the path of the goroot directory // zipGoroot is the path of the goroot directory
// in the .zip file. // in the .zip file.
zipGoroot = "/home/username/go" zipGoroot = "/home/user/go"
// indexFilenames is a glob pattern specifying
// files containing the search index served by
// godoc. The files are concatenated in sorted
// order (by filename).
// app-engine limit: file sizes must be <= 10MB;
// use "split -b8m indexfile index.split." to get
// smaller files.
indexFilenames = "index.split.*"
) )
...@@ -23,11 +23,12 @@ ...@@ -23,11 +23,12 @@
// strings // never version of the strings package // strings // never version of the strings package
// ... // // ... //
// app.yaml // app engine control file // app.yaml // app engine control file
// go.zip // zip file containing the file system to serve // godoc.zip // .zip file containing the file system to serve
// godoc // contains godoc sources // godoc // contains godoc sources
// appinit.go // this file instead of godoc/main.go // appinit.go // this file instead of godoc/main.go
// appconfig.go // godoc for app engine configuration // appconfig.go // godoc for app engine configuration
// ... // // ... //
// index.split.* // index file(s) containing the search index to serve
// //
// To run app the engine emulator locally: // To run app the engine emulator locally:
// //
...@@ -43,6 +44,7 @@ import ( ...@@ -43,6 +44,7 @@ import (
"http" "http"
"log" "log"
"os" "os"
"path"
) )
func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.Error) { func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.Error) {
...@@ -53,7 +55,16 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E ...@@ -53,7 +55,16 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E
func init() { func init() {
log.Println("initializing godoc ...") log.Println("initializing godoc ...")
log.Printf(".zip file = %s", zipFilename)
log.Printf(".zip GOROOT = %s", zipGoroot)
log.Printf("index files = %s", indexFilenames)
// initialize flags for app engine
*goroot = path.Join("/", zipGoroot) // fsHttp paths are relative to '/' *goroot = path.Join("/", zipGoroot) // fsHttp paths are relative to '/'
*indexEnabled = true
*indexFiles = indexFilenames
*maxResults = 0 // save space for now
*indexThrottle = 0.3 // in case *indexFiles is empty (and thus the indexer is run)
// read .zip file and set up file systems // read .zip file and set up file systems
const zipfile = zipFilename const zipfile = zipFilename
...@@ -65,8 +76,8 @@ func init() { ...@@ -65,8 +76,8 @@ func init() {
fsHttp = NewHttpZipFS(rc, *goroot) fsHttp = NewHttpZipFS(rc, *goroot)
// initialize http handlers // initialize http handlers
initHandlers()
readTemplates() readTemplates()
initHandlers()
registerPublicHandlers(http.DefaultServeMux) registerPublicHandlers(http.DefaultServeMux)
// initialize default directory tree with corresponding timestamp. // initialize default directory tree with corresponding timestamp.
...@@ -75,12 +86,12 @@ func init() { ...@@ -75,12 +86,12 @@ func init() {
// initialize directory trees for user-defined file systems (-path flag). // initialize directory trees for user-defined file systems (-path flag).
initDirTrees() initDirTrees()
// create search index // initialize search index
// TODO(gri) Disabled for now as it takes too long. Find a solution for this. if *indexEnabled {
/* if err := initIndex(); err != nil {
*indexEnabled = true log.Fatalf("error initializing index: %s", err)
go indexer() }
*/ }
log.Println("godoc initialization complete") log.Println("godoc initialization complete")
} }
...@@ -50,11 +50,17 @@ The flags are: ...@@ -50,11 +50,17 @@ The flags are:
-index -index
enable identifier and full text search index enable identifier and full text search index
(no search box is shown if -index is not set) (no search box is shown if -index is not set)
-index_files=""
glob pattern specifying index files; if not empty,
the index is read from these files in sorted order
-index_throttle=0.75 -index_throttle=0.75
index throttle value; a value of 0 means no time is allocated index throttle value; a value of 0 means no time is allocated
to the indexer (the indexer will never finish), a value of 1.0 to the indexer (the indexer will never finish), a value of 1.0
means that index creation is running at full throttle (other means that index creation is running at full throttle (other
goroutines may get no time while the index is built) goroutines may get no time while the index is built)
-write_index=false
write index to a file; the file name must be specified with
-index_files
-maxresults=10000 -maxresults=10000
maximum number of full text search results shown maximum number of full text search results shown
(no full text index is built if maxresults <= 0) (no full text index is built if maxresults <= 0)
......
...@@ -63,7 +63,9 @@ var ( ...@@ -63,7 +63,9 @@ var (
templateDir = flag.String("templates", "", "directory containing alternate template files") templateDir = flag.String("templates", "", "directory containing alternate template files")
// search index // search index
indexEnabled = flag.Bool("index", false, "enable search index") indexEnabled = flag.Bool("index", false, "enable search index")
indexFiles = flag.String("index_files", "", "glob pattern specifying index files;"+
"if not empty, the index is read from these files in sorted order")
maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown") maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown")
indexThrottle = flag.Float64("index_throttle", 0.75, "index throttle value; 0.0 = no time allocated, 1.0 = full throttle") indexThrottle = flag.Float64("index_throttle", 0.75, "index throttle value; 0.0 = no time allocated, 1.0 = full throttle")
...@@ -1062,10 +1064,12 @@ func lookup(query string) (result SearchResult) { ...@@ -1062,10 +1064,12 @@ func lookup(query string) (result SearchResult) {
// is the result accurate? // is the result accurate?
if *indexEnabled { if *indexEnabled {
if _, ts := fsModified.get(); timestamp < ts { if _, ts := fsModified.get(); timestamp < ts {
// The index is older than the latest file system change // The index is older than the latest file system change under godoc's observation.
// under godoc's observation. Indexing may be in progress if *indexFiles != "" {
// or start shortly (see indexer()). result.Alert = "Index not automatically updated: result may be inaccurate"
result.Alert = "Indexing in progress: result may be inaccurate" } else {
result.Alert = "Indexing in progress: result may be inaccurate"
}
} }
} else { } else {
result.Alert = "Search index disabled: no results available" result.Alert = "Search index disabled: no results available"
...@@ -1141,26 +1145,30 @@ func fsDirnames() <-chan string { ...@@ -1141,26 +1145,30 @@ func fsDirnames() <-chan string {
return c return c
} }
func updateIndex() {
if *verbose {
log.Printf("updating index...")
}
start := time.Nanoseconds()
index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle)
stop := time.Nanoseconds()
searchIndex.set(index)
if *verbose {
secs := float64((stop-start)/1e6) / 1e3
stats := index.Stats()
log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
}
log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
runtime.GC()
log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
}
func indexer() { func indexer() {
for { for {
if !indexUpToDate() { if !indexUpToDate() {
// index possibly out of date - make a new one // index possibly out of date - make a new one
if *verbose { updateIndex()
log.Printf("updating index...")
}
start := time.Nanoseconds()
index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle)
stop := time.Nanoseconds()
searchIndex.set(index)
if *verbose {
secs := float64((stop-start)/1e6) / 1e3
stats := index.Stats()
log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
}
log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
runtime.GC()
log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
} }
var delay int64 = 60 * 1e9 // by default, try every 60s var delay int64 = 60 * 1e9 // by default, try every 60s
if *testDir != "" { if *testDir != "" {
...@@ -1170,3 +1178,33 @@ func indexer() { ...@@ -1170,3 +1178,33 @@ func indexer() {
time.Sleep(delay) time.Sleep(delay)
} }
} }
func initIndex() os.Error {
if *indexFiles == "" {
// run periodic indexer
go indexer()
return nil
}
// get search index from files
matches, err := filepath.Glob(*indexFiles)
if err != nil {
return err
}
sort.Strings(matches) // make sure files are in the right order
files := make([]io.Reader, 0, len(matches))
for _, filename := range matches {
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
files = append(files, f)
}
x := new(Index)
if err := x.Read(io.MultiReader(files...)); err != nil {
return err
}
searchIndex.set(x)
return nil
}
...@@ -43,7 +43,9 @@ import ( ...@@ -43,7 +43,9 @@ import (
"go/parser" "go/parser"
"go/token" "go/token"
"go/scanner" "go/scanner"
"gob"
"index/suffixarray" "index/suffixarray"
"io"
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
...@@ -804,6 +806,37 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Ind ...@@ -804,6 +806,37 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Ind
return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats} return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
} }
type FileIndex struct {
Words map[string]*LookupResult
Alts map[string]*AltWords
Snippets []*Snippet
}
// Write writes the index x to w.
func (x *Index) Write(w io.Writer) os.Error {
if x.suffixes != nil {
panic("no support for writing full text index yet")
}
fx := FileIndex{
x.words,
x.alts,
x.snippets,
}
return gob.NewEncoder(w).Encode(fx)
}
// Read reads the index from r into x; x must not be nil.
func (x *Index) Read(r io.Reader) os.Error {
var fx FileIndex
if err := gob.NewDecoder(r).Decode(&fx); err != nil {
return err
}
x.words = fx.Words
x.alts = fx.Alts
x.snippets = fx.Snippets
return nil
}
// Stats() returns index statistics. // Stats() returns index statistics.
func (x *Index) Stats() Statistics { func (x *Index) Stats() Statistics {
return x.stats return x.stats
......
...@@ -54,6 +54,9 @@ var ( ...@@ -54,6 +54,9 @@ var (
// (with e.g.: zip -r go.zip $GOROOT -i \*.go -i \*.html -i \*.css -i \*.js -i \*.txt -i \*.c -i \*.h -i \*.s -i \*.png -i \*.jpg -i \*.sh -i favicon.ico) // (with e.g.: zip -r go.zip $GOROOT -i \*.go -i \*.html -i \*.css -i \*.js -i \*.txt -i \*.c -i \*.h -i \*.s -i \*.png -i \*.jpg -i \*.sh -i favicon.ico)
zipfile = flag.String("zip", "", "zip file providing the file system to serve; disabled if empty") zipfile = flag.String("zip", "", "zip file providing the file system to serve; disabled if empty")
// file-based index
writeIndex = flag.Bool("write_index", false, "write index to a file; the file name must be specified with -index_files")
// periodic sync // periodic sync
syncCmd = flag.String("sync", "", "sync command; disabled if empty") syncCmd = flag.String("sync", "", "sync command; disabled if empty")
syncMin = flag.Int("sync_minutes", 0, "sync interval in minutes; disabled if <= 0") syncMin = flag.Int("sync_minutes", 0, "sync interval in minutes; disabled if <= 0")
...@@ -221,8 +224,8 @@ func main() { ...@@ -221,8 +224,8 @@ func main() {
flag.Usage = usage flag.Usage = usage
flag.Parse() flag.Parse()
// Check usage: either server and no args, or command line and args // Check usage: either server and no args, command line and args, or index creation mode
if (*httpAddr != "") != (flag.NArg() == 0) { if (*httpAddr != "") != (flag.NArg() == 0) && !*writeIndex {
usage() usage()
} }
...@@ -253,6 +256,39 @@ func main() { ...@@ -253,6 +256,39 @@ func main() {
readTemplates() readTemplates()
initHandlers() initHandlers()
if (*indexEnabled || *writeIndex) && *indexFiles != "" && *maxResults > 0 {
log.Println("warning: no support for full-text index yet (setting -maxresults to 0)")
*maxResults = 0
}
if *writeIndex {
if *indexFiles == "" {
log.Fatal("no index files specified")
}
log.Println("initialize file systems")
*verbose = true // want to see what happens
initFSTree()
initDirTrees()
*indexThrottle = 1
updateIndex()
log.Println("writing index file", *indexFiles)
f, err := os.Create(*indexFiles)
if err != nil {
log.Fatal(err)
}
index, _ := searchIndex.get()
err = index.(*Index).Write(f)
if err != nil {
log.Fatal(err)
}
log.Println("done")
return
}
if *httpAddr != "" { if *httpAddr != "" {
// HTTP server mode. // HTTP server mode.
var handler http.Handler = http.DefaultServeMux var handler http.Handler = http.DefaultServeMux
...@@ -304,9 +340,11 @@ func main() { ...@@ -304,9 +340,11 @@ func main() {
}() }()
} }
// Start indexing goroutine. // Initialize search index.
if *indexEnabled { if *indexEnabled {
go indexer() if err := initIndex(); err != nil {
log.Fatalf("error initializing index: %s", err)
}
} }
// Start http server. // Start http server.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment