Commit 9cf37c37 authored by Rob Pike's avatar Rob Pike

docs: fold the prog.sh scripting from makehtml into htmlgen itself.

This allows us to drop some crufty scripting and provides a firmer
footing for building better tools for preparing documents with source
code inside.

Also eliminate line numbers from the examples and text.

R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/4650069
parent a3420062
This diff is collapsed.
This diff is collapsed.
......@@ -2,46 +2,80 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Process plain text into HTML.
// If --html is set, process plain text into HTML.
// - h2's are made from lines followed by a line "----\n"
// - tab-indented blocks become <pre> blocks
// - tab-indented blocks become <pre> blocks with the first tab deleted
// - blank lines become <p> marks (except inside <pre> tags)
// - "quoted strings" become <code>quoted strings</code>
// Lines beginning !src define pieces of program source to be
// extracted from other files and injected as <pre> blocks.
// The syntax is simple: 1, 2, or 3 space-separated arguments:
//
// Whole file:
// !src foo.go
// One line (here the signature of main):
// !src foo.go /^func.main/
// Block of text, determined by start and end (here the body of main):
// !src foo.go /^func.main/ /^}/
//
// Patterns can be /regular.expression/, a decimal number, or $
// to signify the end of the file.
// TODO: the regular expression cannot contain spaces; does this matter?
package main
import (
"bufio"
"bytes"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"regexp"
"strconv"
"strings"
"template"
)
var (
lines = make([][]byte, 0, 2000) // probably big enough; grows if not
html = flag.Bool("html", true, "process text into HTML")
)
var (
// lines holds the input and is reworked in place during processing.
lines = make([][]byte, 0, 20000)
empty = []byte("")
newline = []byte("\n")
tab = []byte("\t")
quote = []byte(`"`)
indent = []byte{' ', ' ', ' ', ' '}
indent = []byte(" ")
sectionMarker = []byte("----\n")
preStart = []byte("<pre>")
preEnd = []byte("</pre>\n")
pp = []byte("<p>\n")
srcPrefix = []byte("!src")
)
func main() {
flag.Parse()
read()
headings()
coalesce(preStart, foldPre)
coalesce(tab, foldTabs)
paragraphs()
quotes()
programs()
if *html {
headings()
coalesce(preStart, foldPre)
coalesce(tab, foldTabs)
paragraphs()
quotes()
}
write()
}
// read turns standard input into a slice of lines.
func read() {
b := bufio.NewReader(os.Stdin)
for {
......@@ -56,6 +90,7 @@ func read() {
}
}
// write puts the result on standard output.
func write() {
b := bufio.NewWriter(os.Stdout)
for _, line := range lines {
......@@ -64,8 +99,104 @@ func write() {
b.Flush()
}
// each time prefix is found on a line, call fold and replace
// line with return value from fold.
// programs injects source code from !src invocations.
func programs() {
nlines := make([][]byte, 0, len(lines)*3/2)
for _, line := range lines {
if bytes.HasPrefix(line, srcPrefix) {
line = trim(line)[len(srcPrefix):]
prog := srcCommand(string(line))
if *html {
nlines = append(nlines, []byte(fmt.Sprintf("<pre><!--%s\n-->", line)))
}
for _, l := range prog {
nlines = append(nlines, htmlEscape(l))
}
if *html {
nlines = append(nlines, preEnd)
}
} else {
nlines = append(nlines, line)
}
}
lines = nlines
}
// srcCommand processes one !src invocation.
func srcCommand(command string) [][]byte {
// TODO: quoted args so we can have 'a b'?
args := strings.Fields(command)
if len(args) == 0 || len(args) > 3 {
log.Fatal("bad syntax for src command: %s", command)
}
file := args[0]
lines := bytes.SplitAfter(readFile(file), newline)
// File plus zero args: whole file:
// !src file.go
if len(args) == 1 {
return lines
}
start := match(file, 0, lines, string(args[1]))
// File plus one arg: one line:
// !src file.go /foo/
if len(args) == 2 {
return [][]byte{lines[start]}
}
// File plus two args: range:
// !src file.go /foo/ /^}/
end := match(file, start, lines, string(args[2]))
return lines[start : end+1] // +1 to include matched line.
}
// htmlEscape makes sure input is HTML clean, if necessary.
func htmlEscape(input []byte) []byte {
if !*html || bytes.IndexAny(input, `&"<>`) < 0 {
return input
}
var b bytes.Buffer
template.HTMLEscape(&b, input)
return b.Bytes()
}
// readFile reads and returns a file as part of !src processing.
func readFile(name string) []byte {
file, err := ioutil.ReadFile(name)
if err != nil {
log.Fatal(err)
}
return file
}
// match identifies the input line that matches the pattern in a !src invocation.
// If start>0, match lines starting there rather than at the beginning.
func match(file string, start int, lines [][]byte, pattern string) int {
// $ matches the end of the file.
if pattern == "$" {
return len(lines) - 1
}
// Number matches the line.
if i, err := strconv.Atoi(pattern); err == nil {
return i - 1 // Lines are 1-indexed.
}
// /regexp/ matches the line that matches the regexp.
if len(pattern) > 2 && pattern[0] == '/' && pattern[len(pattern)-1] == '/' {
re, err := regexp.Compile(pattern[1 : len(pattern)-1])
if err != nil {
log.Fatal(err)
}
for i := start; i < len(lines); i++ {
if re.Match(lines[i]) {
return i
}
}
log.Fatalf("%s: no match for %s", file, pattern)
}
log.Fatalf("unrecognized pattern: %s", pattern)
return 0
}
// coalesce combines lines. Each time prefix is found on a line,
// it calls fold and replaces the line with return value from fold.
func coalesce(prefix []byte, fold func(i int) (n int, line []byte)) {
j := 0 // output line number goes up by one each loop
for i := 0; i < len(lines); {
......@@ -82,7 +213,7 @@ func coalesce(prefix []byte, fold func(i int) (n int, line []byte)) {
lines = lines[0:j]
}
// return the <pre> block as a single slice
// foldPre returns the <pre> block as a single slice.
func foldPre(i int) (n int, line []byte) {
buf := new(bytes.Buffer)
for i < len(lines) {
......@@ -96,7 +227,7 @@ func foldPre(i int) (n int, line []byte) {
return n, buf.Bytes()
}
// return the tab-indented block as a single <pre>-bounded slice
// foldTabs returns the tab-indented block as a single <pre>-bounded slice.
func foldTabs(i int) (n int, line []byte) {
buf := new(bytes.Buffer)
buf.WriteString("<pre>\n")
......@@ -104,7 +235,7 @@ func foldTabs(i int) (n int, line []byte) {
if !bytes.HasPrefix(lines[i], tab) {
break
}
buf.Write(lines[i])
buf.Write(lines[i][1:]) // delete leading tab.
n++
i++
}
......@@ -112,6 +243,7 @@ func foldTabs(i int) (n int, line []byte) {
return n, buf.Bytes()
}
// headings turns sections into HTML sections.
func headings() {
b := bufio.NewWriter(os.Stdout)
for i, l := range lines {
......@@ -123,6 +255,7 @@ func headings() {
b.Flush()
}
// paragraphs turns blank lines into paragraph marks.
func paragraphs() {
for i, l := range lines {
if bytes.Equal(l, newline) {
......@@ -131,12 +264,14 @@ func paragraphs() {
}
}
// quotes turns "x" in the file into <code>x</code>.
func quotes() {
for i, l := range lines {
lines[i] = codeQuotes(l)
}
}
// quotes turns "x" in the line into <code>x</code>.
func codeQuotes(l []byte) []byte {
if bytes.HasPrefix(l, preStart) {
return l
......@@ -162,7 +297,7 @@ func codeQuotes(l []byte) []byte {
return buf.Bytes()
}
// drop trailing newline
// trim drops the trailing newline, if present.
func trim(l []byte) []byte {
n := len(l)
if n > 0 && l[n-1] == '\n' {
......@@ -171,7 +306,7 @@ func trim(l []byte) []byte {
return l
}
// expand tabs to spaces. don't worry about columns.
// expandTabs expands tabs to spaces. It doesn't worry about columns.
func expandTabs(l []byte) []byte {
return bytes.Replace(l, tab, indent, -1)
}
......@@ -7,7 +7,6 @@ set -e
TXT=${1:-go_tutorial.txt} # input file
HTML=$(basename $TXT .txt).html # output file (basename)
TMP=TEMP.txt # input to htmlgen
if ! test -w $HTML
then
......@@ -15,17 +14,4 @@ then
exit 1
fi
if grep -q '^--PROG' $TXT
then
echo >&2 makehtml: processing PROG sections
<$TXT >$TMP awk '
/^--PROG/ { system("sh ./prog.sh "$2" "$3" "$4" "); getline }
/^/ {print}
'
else
cp $TXT $TMP
fi
make htmlgen && ./htmlgen < $TMP > $HTML
rm -f $TMP
make htmlgen && ./htmlgen < $TXT > $HTML
#!/bin/sh
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# generate HTML for a program excerpt.
# first arg is file name
# second arg is awk pattern to match start line
# third arg is awk pattern to stop processing
#
# missing third arg means print one line
# third arg "END" means proces rest of file
# missing second arg means process whole file
#
# examples:
#
# prog.sh foo.go # whole file
# prog.sh foo.go "/^func.main/" # signature of main
# prog.sh foo.go "/^func.main/" "/^}/ # body of main
#
# non-blank lines are annotated with line number in file
# line numbers are printed %.2d to make them equal-width for nice formatting.
# the format gives a leading 0. the format %2d gives a leading space but
# that appears to confuse sanjay's makehtml formatter into bungling quotes
# because it makes some lines look indented.
echo "<pre> <!-- $* -->"
case $# in
3)
if test "$3" = "END" # $2 to end of file
then
awk '
function LINE() { printf("%.2d\t%s\n", NR, $0) }
BEGIN { printing = 0 }
'$2' { printing = 1; LINE(); getline }
printing { if($0 ~ /./) { LINE() } else { print "" } }
'
else # $2 through $3
awk '
function LINE() { printf("%.2d\t%s\n", NR, $0) }
BEGIN { printing = 0 }
'$2' { printing = 1; LINE(); getline }
'$3' && printing { if(printing) {printing = 0; LINE(); exit} }
printing { if($0 ~ /./) { LINE() } else { print "" } }
'
fi
;;
2) # one line
awk '
function LINE() { printf("%.2d\t%s\n", NR, $0) }
'$2' { LINE(); getline; exit }
'
;;
1) # whole file
awk '
function LINE() { printf("%.2d\t%s\n", NR, $0) }
{ if($0 ~ /./) { LINE() } else { print "" } }
'
;;
*)
echo >&2 usage: prog.sh file.go /func.main/ /^}/
esac <$1 |
sed '
s/&/\&amp;/g
s/"/\&quot;/g
s/</\&lt;/g
s/>/\&gt;/g
'
echo '</pre>'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment