docs: fold the prog.sh scripting from makehtml into htmlgen itself.

This allows us to drop some crufty scripting and provides a firmer footing for building better tools for preparing documents with source code inside. Also eliminate line numbers from the examples and text. R=golang-dev, adg CC=golang-dev https://golang.org/cl/4650069

docs: fold the prog.sh scripting from makehtml into htmlgen itself.
This allows us to drop some crufty scripting and provides a firmer footing for building better tools for preparing documents with source code inside. Also eliminate line numbers from the examples and text. R=golang-dev, adg CC=golang-dev https://golang.org/cl/4650069
9cf37c37 · Rob Pike · a3420062 · 9cf37c37 · 9cf37c37 · 9cf37c37
Commit 9cf37c37 authored Jul 04, 2011 by Rob Pike
Showing with 147 additions and 98 deletions

go_tutorial.html doc/go_tutorial.html +0 -0

go_tutorial.txt doc/go_tutorial.txt +0 -0

htmlgen.go doc/htmlgen.go +146 -11

makehtml doc/makehtml +1 -15

prog.sh doc/prog.sh +0 -72

No files found.
--- a/doc/go_tutorial.html
+++ b/doc/go_tutorial.html
--- a/doc/go_tutorial.txt
+++ b/doc/go_tutorial.txt
--- a/doc/htmlgen.go
+++ b/doc/htmlgen.go
@@ -2,46 +2,80 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// Process plain text into HTML.
+// If --html is set, process plain text into HTML.
 //	- h2's are made from lines followed by a line "----\n"
-//	- tab-indented blocks become <pre> blocks
+//	- tab-indented blocks become <pre> blocks with the first tab deleted
 //	- blank lines become <p> marks (except inside <pre> tags)
 //	- "quoted strings" become <code>quoted strings</code>

+// Lines beginning !src define pieces of program source to be
+// extracted from other files and injected as <pre> blocks.
+// The syntax is simple: 1, 2, or 3 space-separated arguments:
+//
+// Whole file:
+//	!src foo.go
+// One line (here the signature of main):
+//	!src foo.go /^func.main/
+// Block of text, determined by start and end (here the body of main):
+// !src foo.go /^func.main/ /^}/
+//
+// Patterns can be /regular.expression/, a decimal number, or $
+// to signify the end of the file.
+// TODO: the regular expression cannot contain spaces; does this matter?
+
 package main

 import (
 	"bufio"
 	"bytes"
+	"flag"
+	"fmt"
+	"io/ioutil"
 	"log"
 	"os"
+	"regexp"
+	"strconv"
+	"strings"
+	"template"
 )

 var (
-	lines = make([][]byte, 0, 2000) // probably big enough; grows if not
+	html = flag.Bool("html", true, "process text into HTML")
+)
+
+var (
+	// lines holds the input and is reworked in place during processing.
+	lines = make([][]byte, 0, 20000)

 	empty   = []byte("")
 	newline = []byte("\n")
 	tab     = []byte("\t")
 	quote   = []byte(`"`)
-	indent  = []byte{' ', ' ', ' ', ' '}
+	indent  = []byte("    ")

 	sectionMarker = []byte("----\n")
 	preStart      = []byte("<pre>")
 	preEnd        = []byte("</pre>\n")
 	pp            = []byte("<p>\n")
+
+	srcPrefix = []byte("!src")
 )

 func main() {
+	flag.Parse()
 	read()
+	programs()
+	if *html {
 		headings()
 		coalesce(preStart, foldPre)
 		coalesce(tab, foldTabs)
 		paragraphs()
 		quotes()
+	}
 	write()
 }

+// read turns standard input into a slice of lines.
 func read() {
 	b := bufio.NewReader(os.Stdin)
 	for {
@@ -56,6 +90,7 @@ func read() {
 	}
 }

+// write puts the result on standard output.
 func write() {
 	b := bufio.NewWriter(os.Stdout)
 	for _, line := range lines {
@@ -64,8 +99,104 @@ func write() {
 	b.Flush()
 }

-// each time prefix is found on a line, call fold and replace
-// line with return value from fold.
+// programs injects source code from !src invocations.
+func programs() {
+	nlines := make([][]byte, 0, len(lines)*3/2)
+	for _, line := range lines {
+		if bytes.HasPrefix(line, srcPrefix) {
+			line = trim(line)[len(srcPrefix):]
+			prog := srcCommand(string(line))
+			if *html {
+				nlines = append(nlines, []byte(fmt.Sprintf("<pre><!--%s\n-->", line)))
+			}
+			for _, l := range prog {
+				nlines = append(nlines, htmlEscape(l))
+			}
+			if *html {
+				nlines = append(nlines, preEnd)
+			}
+		} else {
+			nlines = append(nlines, line)
+		}
+	}
+	lines = nlines
+}
+
+// srcCommand processes one !src invocation.
+func srcCommand(command string) [][]byte {
+	// TODO: quoted args so we can have 'a b'?
+	args := strings.Fields(command)
+	if len(args) == 0 || len(args) > 3 {
+		log.Fatal("bad syntax for src command: %s", command)
+	}
+	file := args[0]
+	lines := bytes.SplitAfter(readFile(file), newline)
+	// File plus zero args: whole file:
+	//	!src file.go
+	if len(args) == 1 {
+		return lines
+	}
+	start := match(file, 0, lines, string(args[1]))
+	// File plus one arg: one line:
+	//	!src file.go /foo/
+	if len(args) == 2 {
+		return [][]byte{lines[start]}
+	}
+	// File plus two args: range:
+	//	!src file.go /foo/ /^}/
+	end := match(file, start, lines, string(args[2]))
+	return lines[start : end+1] // +1 to include matched line.
+}
+
+// htmlEscape makes sure input is HTML clean, if necessary.
+func htmlEscape(input []byte) []byte {
+	if !*html || bytes.IndexAny(input, `&"<>`) < 0 {
+		return input
+	}
+	var b bytes.Buffer
+	template.HTMLEscape(&b, input)
+	return b.Bytes()
+}
+
+// readFile reads and returns a file as part of !src processing.
+func readFile(name string) []byte {
+	file, err := ioutil.ReadFile(name)
+	if err != nil {
+		log.Fatal(err)
+	}
+	return file
+}
+
+// match identifies the input line that matches the pattern in a !src invocation.
+// If start>0, match lines starting there rather than at the beginning.
+func match(file string, start int, lines [][]byte, pattern string) int {
+	// $ matches the end of the file.
+	if pattern == "$" {
+		return len(lines) - 1
+	}
+	// Number matches the line.
+	if i, err := strconv.Atoi(pattern); err == nil {
+		return i - 1 // Lines are 1-indexed.
+	}
+	// /regexp/ matches the line that matches the regexp.
+	if len(pattern) > 2 && pattern[0] == '/' && pattern[len(pattern)-1] == '/' {
+		re, err := regexp.Compile(pattern[1 : len(pattern)-1])
+		if err != nil {
+			log.Fatal(err)
+		}
+		for i := start; i < len(lines); i++ {
+			if re.Match(lines[i]) {
+				return i
+			}
+		}
+		log.Fatalf("%s: no match for %s", file, pattern)
+	}
+	log.Fatalf("unrecognized pattern: %s", pattern)
+	return 0
+}
+
+// coalesce combines lines. Each time prefix is found on a line,
+// it calls fold and replaces the line with return value from fold.
 func coalesce(prefix []byte, fold func(i int) (n int, line []byte)) {
 	j := 0 // output line number goes up by one each loop
 	for i := 0; i < len(lines); {
@@ -82,7 +213,7 @@ func coalesce(prefix []byte, fold func(i int) (n int, line []byte)) {
 	lines = lines[0:j]
 }

-// return the <pre> block as a single slice
+// foldPre returns the <pre> block as a single slice.
 func foldPre(i int) (n int, line []byte) {
 	buf := new(bytes.Buffer)
 	for i < len(lines) {
@@ -96,7 +227,7 @@ func foldPre(i int) (n int, line []byte) {
 	return n, buf.Bytes()
 }

-// return the tab-indented block as a single <pre>-bounded slice
+// foldTabs returns the tab-indented block as a single <pre>-bounded slice.
 func foldTabs(i int) (n int, line []byte) {
 	buf := new(bytes.Buffer)
 	buf.WriteString("<pre>\n")
@@ -104,7 +235,7 @@ func foldTabs(i int) (n int, line []byte) {
 		if !bytes.HasPrefix(lines[i], tab) {
 			break
 		}
-		buf.Write(lines[i])
+		buf.Write(lines[i][1:]) // delete leading tab.
 		n++
 		i++
 	}
@@ -112,6 +243,7 @@ func foldTabs(i int) (n int, line []byte) {
 	return n, buf.Bytes()
 }

+// headings turns sections into HTML sections.
 func headings() {
 	b := bufio.NewWriter(os.Stdout)
 	for i, l := range lines {
@@ -123,6 +255,7 @@ func headings() {
 	b.Flush()
 }

+// paragraphs turns blank lines into paragraph marks.
 func paragraphs() {
 	for i, l := range lines {
 		if bytes.Equal(l, newline) {
@@ -131,12 +264,14 @@ func paragraphs() {
 	}
 }

+// quotes turns "x" in the file into <code>x</code>.
 func quotes() {
 	for i, l := range lines {
 		lines[i] = codeQuotes(l)
 	}
 }

+// quotes turns "x" in the line into <code>x</code>.
 func codeQuotes(l []byte) []byte {
 	if bytes.HasPrefix(l, preStart) {
 		return l
@@ -162,7 +297,7 @@ func codeQuotes(l []byte) []byte {
 	return buf.Bytes()
 }

-// drop trailing newline
+// trim drops the trailing newline, if present.
 func trim(l []byte) []byte {
 	n := len(l)
 	if n > 0 && l[n-1] == '\n' {
@@ -171,7 +306,7 @@ func trim(l []byte) []byte {
 	return l
 }

-// expand tabs to spaces. don't worry about columns.
+// expandTabs expands tabs to spaces. It doesn't worry about columns.
 func expandTabs(l []byte) []byte {
 	return bytes.Replace(l, tab, indent, -1)
 }
--- a/doc/makehtml
+++ b/doc/makehtml
@@ -7,7 +7,6 @@ set -e

 TXT=${1:-go_tutorial.txt}		# input file
 HTML=$(basename $TXT .txt).html		# output file (basename)
-TMP=TEMP.txt				# input to htmlgen

 if ! test -w $HTML
 then
@@ -15,17 +14,4 @@ then
 	exit 1
 fi

-if grep -q '^--PROG' $TXT
-then
-	echo >&2 makehtml: processing PROG sections
-	<$TXT >$TMP awk '
-		/^--PROG/ { system("sh ./prog.sh "$2" "$3" "$4" "); getline }
-		/^/ {print}
-	'
-else
-	cp $TXT $TMP
-fi
-
-make htmlgen && ./htmlgen < $TMP > $HTML
-
-rm -f $TMP
+make htmlgen && ./htmlgen < $TXT > $HTML
--- a/doc/prog.sh
+++ b/doc/prog.sh
-#!/bin/sh
-# Copyright 2009 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-# generate HTML for a program excerpt.
-# first arg is file name
-# second arg is awk pattern to match start line
-# third arg is awk pattern to stop processing
-#
-# missing third arg means print one line
-# third arg "END" means proces rest of file
-# missing second arg means process whole file
-#
-# examples:
-#
-#	prog.sh foo.go                       # whole file
-#	prog.sh foo.go "/^func.main/"        # signature of main
-#	prog.sh foo.go "/^func.main/" "/^}/  # body of main
-#
-# non-blank lines are annotated with line number in file
-
-# line numbers are printed %.2d to make them equal-width for nice formatting.
-# the format gives a leading 0.  the format %2d gives a leading space but
-# that appears to confuse sanjay's makehtml formatter into bungling quotes
-# because it makes some lines look indented.
-
-echo "<pre> <!-- $* -->"
-
-case $# in
-3)
-	if test "$3" = "END"  # $2 to end of file
-	then
-		awk '
-			function LINE() { printf("%.2d\t%s\n", NR, $0) }
-			BEGIN { printing = 0 }
-			'$2' { printing = 1; LINE(); getline }
-			printing { if($0 ~ /./) { LINE() } else { print "" } }
-		'
-	else	# $2 through $3
-		awk '
-			function LINE() { printf("%.2d\t%s\n", NR, $0) }
-			BEGIN { printing = 0 }
-			'$2' { printing = 1; LINE(); getline }
-			'$3' && printing { if(printing) {printing = 0; LINE(); exit} }
-			printing { if($0 ~ /./) { LINE() } else { print "" } }
-		'
-	fi
-	;;
-2)	# one line
-	awk '
-		function LINE() { printf("%.2d\t%s\n", NR, $0) }
-		'$2' { LINE(); getline; exit }
-	'
-	;;
-1)	# whole file
-	awk '
-		function LINE() { printf("%.2d\t%s\n", NR, $0) }
-		{ if($0 ~ /./) { LINE() } else { print "" } }
-	'
-	;;
-*)
-	echo >&2 usage: prog.sh file.go /func.main/ /^}/
-esac <$1 |
-sed '
-	s/&/\&amp;/g
-	s/"/\&quot;/g
-	s/</\&lt;/g
-	s/>/\&gt;/g
-'
-
-echo '</pre>'