Commit d01ee38f authored by Robert Griesemer's avatar Robert Griesemer

godoc: support for reading/writing (splitted) index files.

This CL implements a new godoc feature to save the search
index on disk. Use -write_index to create the search
index file named with -index_files. Use -index_files to
provide a glob pattern specifying index file(s) when
starting godoc; in this case the run-time indexer is not
run.

Known issues:
- saving/restoring full text index is not yet supported
- the list of flags and overall usage logic could use a
  cleanup

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4974045
parent 6b902628
......@@ -11,9 +11,18 @@ package main
const (
// zipFilename is the name of the .zip file
// containing the file system served by godoc.
zipFilename = "go.zip"
zipFilename = "godoc.zip"
// zipGoroot is the path of the goroot directory
// in the .zip file.
zipGoroot = "/home/username/go"
zipGoroot = "/home/user/go"
// indexFilenames is a glob pattern specifying
// files containing the search index served by
// godoc. The files are concatenated in sorted
// order (by filename).
// app-engine limit: file sizes must be <= 10MB;
// use "split -b8m indexfile index.split." to get
// smaller files.
indexFilenames = "index.split.*"
)
......@@ -23,11 +23,12 @@
// strings // never version of the strings package
// ... //
// app.yaml // app engine control file
// go.zip // zip file containing the file system to serve
// godoc.zip // .zip file containing the file system to serve
// godoc // contains godoc sources
// appinit.go // this file instead of godoc/main.go
// appconfig.go // godoc for app engine configuration
// ... //
// index.split.* // index file(s) containing the search index to serve
//
// To run app the engine emulator locally:
//
......@@ -43,6 +44,7 @@ import (
"http"
"log"
"os"
"path"
)
func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.Error) {
......@@ -53,7 +55,16 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E
func init() {
log.Println("initializing godoc ...")
log.Printf(".zip file = %s", zipFilename)
log.Printf(".zip GOROOT = %s", zipGoroot)
log.Printf("index files = %s", indexFilenames)
// initialize flags for app engine
*goroot = path.Join("/", zipGoroot) // fsHttp paths are relative to '/'
*indexEnabled = true
*indexFiles = indexFilenames
*maxResults = 0 // save space for now
*indexThrottle = 0.3 // in case *indexFiles is empty (and thus the indexer is run)
// read .zip file and set up file systems
const zipfile = zipFilename
......@@ -65,8 +76,8 @@ func init() {
fsHttp = NewHttpZipFS(rc, *goroot)
// initialize http handlers
initHandlers()
readTemplates()
initHandlers()
registerPublicHandlers(http.DefaultServeMux)
// initialize default directory tree with corresponding timestamp.
......@@ -75,12 +86,12 @@ func init() {
// initialize directory trees for user-defined file systems (-path flag).
initDirTrees()
// create search index
// TODO(gri) Disabled for now as it takes too long. Find a solution for this.
/*
*indexEnabled = true
go indexer()
*/
// initialize search index
if *indexEnabled {
if err := initIndex(); err != nil {
log.Fatalf("error initializing index: %s", err)
}
}
log.Println("godoc initialization complete")
}
......@@ -50,11 +50,17 @@ The flags are:
-index
enable identifier and full text search index
(no search box is shown if -index is not set)
-index_files=""
glob pattern specifying index files; if not empty,
the index is read from these files in sorted order
-index_throttle=0.75
index throttle value; a value of 0 means no time is allocated
to the indexer (the indexer will never finish), a value of 1.0
means that index creation is running at full throttle (other
goroutines may get no time while the index is built)
-write_index=false
write index to a file; the file name must be specified with
-index_files
-maxresults=10000
maximum number of full text search results shown
(no full text index is built if maxresults <= 0)
......
......@@ -64,6 +64,8 @@ var (
// search index
indexEnabled = flag.Bool("index", false, "enable search index")
indexFiles = flag.String("index_files", "", "glob pattern specifying index files;"+
"if not empty, the index is read from these files in sorted order")
maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown")
indexThrottle = flag.Float64("index_throttle", 0.75, "index throttle value; 0.0 = no time allocated, 1.0 = full throttle")
......@@ -1062,11 +1064,13 @@ func lookup(query string) (result SearchResult) {
// is the result accurate?
if *indexEnabled {
if _, ts := fsModified.get(); timestamp < ts {
// The index is older than the latest file system change
// under godoc's observation. Indexing may be in progress
// or start shortly (see indexer()).
// The index is older than the latest file system change under godoc's observation.
if *indexFiles != "" {
result.Alert = "Index not automatically updated: result may be inaccurate"
} else {
result.Alert = "Indexing in progress: result may be inaccurate"
}
}
} else {
result.Alert = "Search index disabled: no results available"
}
......@@ -1141,10 +1145,7 @@ func fsDirnames() <-chan string {
return c
}
func indexer() {
for {
if !indexUpToDate() {
// index possibly out of date - make a new one
func updateIndex() {
if *verbose {
log.Printf("updating index...")
}
......@@ -1161,6 +1162,13 @@ func indexer() {
log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
runtime.GC()
log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
}
func indexer() {
for {
if !indexUpToDate() {
// index possibly out of date - make a new one
updateIndex()
}
var delay int64 = 60 * 1e9 // by default, try every 60s
if *testDir != "" {
......@@ -1170,3 +1178,33 @@ func indexer() {
time.Sleep(delay)
}
}
func initIndex() os.Error {
if *indexFiles == "" {
// run periodic indexer
go indexer()
return nil
}
// get search index from files
matches, err := filepath.Glob(*indexFiles)
if err != nil {
return err
}
sort.Strings(matches) // make sure files are in the right order
files := make([]io.Reader, 0, len(matches))
for _, filename := range matches {
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
files = append(files, f)
}
x := new(Index)
if err := x.Read(io.MultiReader(files...)); err != nil {
return err
}
searchIndex.set(x)
return nil
}
......@@ -43,7 +43,9 @@ import (
"go/parser"
"go/token"
"go/scanner"
"gob"
"index/suffixarray"
"io"
"os"
"path/filepath"
"regexp"
......@@ -804,6 +806,37 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Ind
return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
}
type FileIndex struct {
Words map[string]*LookupResult
Alts map[string]*AltWords
Snippets []*Snippet
}
// Write writes the index x to w.
func (x *Index) Write(w io.Writer) os.Error {
if x.suffixes != nil {
panic("no support for writing full text index yet")
}
fx := FileIndex{
x.words,
x.alts,
x.snippets,
}
return gob.NewEncoder(w).Encode(fx)
}
// Read reads the index from r into x; x must not be nil.
func (x *Index) Read(r io.Reader) os.Error {
var fx FileIndex
if err := gob.NewDecoder(r).Decode(&fx); err != nil {
return err
}
x.words = fx.Words
x.alts = fx.Alts
x.snippets = fx.Snippets
return nil
}
// Stats() returns index statistics.
func (x *Index) Stats() Statistics {
return x.stats
......
......@@ -54,6 +54,9 @@ var (
// (with e.g.: zip -r go.zip $GOROOT -i \*.go -i \*.html -i \*.css -i \*.js -i \*.txt -i \*.c -i \*.h -i \*.s -i \*.png -i \*.jpg -i \*.sh -i favicon.ico)
zipfile = flag.String("zip", "", "zip file providing the file system to serve; disabled if empty")
// file-based index
writeIndex = flag.Bool("write_index", false, "write index to a file; the file name must be specified with -index_files")
// periodic sync
syncCmd = flag.String("sync", "", "sync command; disabled if empty")
syncMin = flag.Int("sync_minutes", 0, "sync interval in minutes; disabled if <= 0")
......@@ -221,8 +224,8 @@ func main() {
flag.Usage = usage
flag.Parse()
// Check usage: either server and no args, or command line and args
if (*httpAddr != "") != (flag.NArg() == 0) {
// Check usage: either server and no args, command line and args, or index creation mode
if (*httpAddr != "") != (flag.NArg() == 0) && !*writeIndex {
usage()
}
......@@ -253,6 +256,39 @@ func main() {
readTemplates()
initHandlers()
if (*indexEnabled || *writeIndex) && *indexFiles != "" && *maxResults > 0 {
log.Println("warning: no support for full-text index yet (setting -maxresults to 0)")
*maxResults = 0
}
if *writeIndex {
if *indexFiles == "" {
log.Fatal("no index files specified")
}
log.Println("initialize file systems")
*verbose = true // want to see what happens
initFSTree()
initDirTrees()
*indexThrottle = 1
updateIndex()
log.Println("writing index file", *indexFiles)
f, err := os.Create(*indexFiles)
if err != nil {
log.Fatal(err)
}
index, _ := searchIndex.get()
err = index.(*Index).Write(f)
if err != nil {
log.Fatal(err)
}
log.Println("done")
return
}
if *httpAddr != "" {
// HTTP server mode.
var handler http.Handler = http.DefaultServeMux
......@@ -304,9 +340,11 @@ func main() {
}()
}
// Start indexing goroutine.
// Initialize search index.
if *indexEnabled {
go indexer()
if err := initIndex(); err != nil {
log.Fatalf("error initializing index: %s", err)
}
}
// Start http server.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment