From 3f22092d45d5e7d968db317ffc8ca33e689ae122 Mon Sep 17 00:00:00 2001 From: Ben Gras Date: Sat, 26 Jun 2010 02:20:06 +0000 Subject: [PATCH] import mdocml. --- commands/Makefile | 2 +- commands/mdocml/ChangeLog.xsl | 43 + commands/mdocml/Makefile | 36 + commands/mdocml/arch.c | 38 + commands/mdocml/arch.in | 53 + commands/mdocml/att.c | 38 + commands/mdocml/att.in | 37 + commands/mdocml/chars.c | 206 +++ commands/mdocml/chars.h | 34 + commands/mdocml/chars.in | 446 ++++++ commands/mdocml/compat.c | 95 ++ commands/mdocml/config.h | 20 + commands/mdocml/example.style.css | 72 + commands/mdocml/external.png | Bin 0 -> 165 bytes commands/mdocml/html.c | 754 +++++++++ commands/mdocml/html.h | 156 ++ commands/mdocml/index.css | 48 + commands/mdocml/index.sgml | 420 +++++ commands/mdocml/lib.c | 38 + commands/mdocml/lib.in | 93 ++ commands/mdocml/libman.h | 90 ++ commands/mdocml/libmandoc.h | 37 + commands/mdocml/libmdoc.h | 142 ++ commands/mdocml/main.c | 866 +++++++++++ commands/mdocml/main.h | 55 + commands/mdocml/man.3 | 324 ++++ commands/mdocml/man.7 | 968 ++++++++++++ commands/mdocml/man.c | 677 ++++++++ commands/mdocml/man.h | 119 ++ commands/mdocml/man_action.c | 280 ++++ commands/mdocml/man_argv.c | 104 ++ commands/mdocml/man_hash.c | 106 ++ commands/mdocml/man_html.c | 751 +++++++++ commands/mdocml/man_macro.c | 485 ++++++ commands/mdocml/man_term.c | 964 ++++++++++++ commands/mdocml/man_validate.c | 328 ++++ commands/mdocml/mandoc.1 | 530 +++++++ commands/mdocml/mandoc.c | 396 +++++ commands/mdocml/mandoc.h | 110 ++ commands/mdocml/mandoc_char.7 | 559 +++++++ commands/mdocml/mdoc.3 | 290 ++++ commands/mdocml/mdoc.7 | 2375 +++++++++++++++++++++++++++++ commands/mdocml/mdoc.c | 822 ++++++++++ commands/mdocml/mdoc.h | 345 +++++ commands/mdocml/mdoc_action.c | 1034 +++++++++++++ commands/mdocml/mdoc_argv.c | 790 ++++++++++ commands/mdocml/mdoc_hash.c | 93 ++ commands/mdocml/mdoc_html.c | 2195 ++++++++++++++++++++++++++ commands/mdocml/mdoc_macro.c | 1728 +++++++++++++++++++++ commands/mdocml/mdoc_strings.c | 219 +++ commands/mdocml/mdoc_term.c | 2109 +++++++++++++++++++++++++ commands/mdocml/mdoc_validate.c | 1413 +++++++++++++++++ commands/mdocml/msec.c | 37 + commands/mdocml/msec.in | 40 + commands/mdocml/out.c | 399 +++++ commands/mdocml/out.h | 82 + commands/mdocml/roff.3 | 156 ++ commands/mdocml/roff.7 | 304 ++++ commands/mdocml/roff.c | 857 +++++++++++ commands/mdocml/roff.h | 40 + commands/mdocml/st.c | 38 + commands/mdocml/st.in | 72 + commands/mdocml/style.css | 77 + commands/mdocml/term.c | 702 +++++++++ commands/mdocml/term.h | 123 ++ commands/mdocml/term_ascii.c | 128 ++ commands/mdocml/term_ps.c | 430 ++++++ commands/mdocml/test-strlcat.c | 8 + commands/mdocml/test-strlcpy.c | 8 + commands/mdocml/tree.c | 213 +++ commands/mdocml/vol.c | 38 + commands/mdocml/vol.in | 35 + 72 files changed, 27719 insertions(+), 1 deletion(-) create mode 100644 commands/mdocml/ChangeLog.xsl create mode 100644 commands/mdocml/Makefile create mode 100644 commands/mdocml/arch.c create mode 100644 commands/mdocml/arch.in create mode 100644 commands/mdocml/att.c create mode 100644 commands/mdocml/att.in create mode 100644 commands/mdocml/chars.c create mode 100644 commands/mdocml/chars.h create mode 100644 commands/mdocml/chars.in create mode 100644 commands/mdocml/compat.c create mode 100644 commands/mdocml/config.h create mode 100644 commands/mdocml/example.style.css create mode 100644 commands/mdocml/external.png create mode 100644 commands/mdocml/html.c create mode 100644 commands/mdocml/html.h create mode 100644 commands/mdocml/index.css create mode 100644 commands/mdocml/index.sgml create mode 100644 commands/mdocml/lib.c create mode 100644 commands/mdocml/lib.in create mode 100644 commands/mdocml/libman.h create mode 100644 commands/mdocml/libmandoc.h create mode 100644 commands/mdocml/libmdoc.h create mode 100644 commands/mdocml/main.c create mode 100644 commands/mdocml/main.h create mode 100644 commands/mdocml/man.3 create mode 100644 commands/mdocml/man.7 create mode 100644 commands/mdocml/man.c create mode 100644 commands/mdocml/man.h create mode 100644 commands/mdocml/man_action.c create mode 100644 commands/mdocml/man_argv.c create mode 100644 commands/mdocml/man_hash.c create mode 100644 commands/mdocml/man_html.c create mode 100644 commands/mdocml/man_macro.c create mode 100644 commands/mdocml/man_term.c create mode 100644 commands/mdocml/man_validate.c create mode 100644 commands/mdocml/mandoc.1 create mode 100644 commands/mdocml/mandoc.c create mode 100644 commands/mdocml/mandoc.h create mode 100644 commands/mdocml/mandoc_char.7 create mode 100644 commands/mdocml/mdoc.3 create mode 100644 commands/mdocml/mdoc.7 create mode 100644 commands/mdocml/mdoc.c create mode 100644 commands/mdocml/mdoc.h create mode 100644 commands/mdocml/mdoc_action.c create mode 100644 commands/mdocml/mdoc_argv.c create mode 100644 commands/mdocml/mdoc_hash.c create mode 100644 commands/mdocml/mdoc_html.c create mode 100644 commands/mdocml/mdoc_macro.c create mode 100644 commands/mdocml/mdoc_strings.c create mode 100644 commands/mdocml/mdoc_term.c create mode 100644 commands/mdocml/mdoc_validate.c create mode 100644 commands/mdocml/msec.c create mode 100644 commands/mdocml/msec.in create mode 100644 commands/mdocml/out.c create mode 100644 commands/mdocml/out.h create mode 100644 commands/mdocml/roff.3 create mode 100644 commands/mdocml/roff.7 create mode 100644 commands/mdocml/roff.c create mode 100644 commands/mdocml/roff.h create mode 100644 commands/mdocml/st.c create mode 100644 commands/mdocml/st.in create mode 100644 commands/mdocml/style.css create mode 100644 commands/mdocml/term.c create mode 100644 commands/mdocml/term.h create mode 100644 commands/mdocml/term_ascii.c create mode 100644 commands/mdocml/term_ps.c create mode 100644 commands/mdocml/test-strlcat.c create mode 100644 commands/mdocml/test-strlcpy.c create mode 100644 commands/mdocml/tree.c create mode 100644 commands/mdocml/vol.c create mode 100644 commands/mdocml/vol.in diff --git a/commands/Makefile b/commands/Makefile index 4efa39f08..61b67c101 100644 --- a/commands/Makefile +++ b/commands/Makefile @@ -17,7 +17,7 @@ SUBDIR= aal add_route adduser advent arp ash at autil awk \ intr ipcrm ipcs irdpd isoread join kill last leave \ less lex life loadkeys loadramdisk logger login look lp \ lpd ls lspci M m4 mail make MAKEDEV man \ - mdb mesg mined mkdep mkdir mkdist mkfifo mkfs mknod \ + mdb mdocml mesg mined mkdep mkdir mkdist mkfifo mkfs mknod \ mkproto modem mount mt netconf newroot nice nm nohup \ nonamed od packit packman passwd paste patch pax \ ping postinstall poweroff pr prep printf printroot \ diff --git a/commands/mdocml/ChangeLog.xsl b/commands/mdocml/ChangeLog.xsl new file mode 100644 index 000000000..dccc79dba --- /dev/null +++ b/commands/mdocml/ChangeLog.xsl @@ -0,0 +1,43 @@ + + + + + + + mdocml - CVS-ChangeLog + + + + +
+ Files modified by + +
+
+ + Note: + + +
    + +
  • + + + — Rev: + + , Status: + + + , Tag: + + + +
  • +
    +
+
+
+ + +
+
diff --git a/commands/mdocml/Makefile b/commands/mdocml/Makefile new file mode 100644 index 000000000..5d3ec3d94 --- /dev/null +++ b/commands/mdocml/Makefile @@ -0,0 +1,36 @@ + +VERSION = 1.10.2 +VDATE = 19 June 2010 + +VFLAGS = -DVERSION="\"$(VERSION)\"" + +.if ${CC} == gcc +WFLAGS = -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings +CFLAGS += -g $(WFLAGS) $(VFLAGS) -DHAVE_CONFIG_H +ARFLAGS = rs +.else +CFLAGS += ${VFLAGS} -DHAVE_CONFIG_H +ARFLAGS = r +.endif + +# Specify this if you want to hard-code the operating system to appear +# in the lower-left hand corner of -mdoc manuals. +# CFLAGS += -DOSNAME="\"OpenBSD 4.5\"" + +# Unset this if you don't want Xo/Xc allowing split `It' lines, which +# breaks symmetry. +CFLAGS += -DUGLY + +SRCS = main.c mdoc_term.c chars.c term.c tree.c compat.c \ + man_term.c html.c mdoc_html.c man_html.c out.c \ + term_ps.c term_ascii.c man_macro.c man.c man_hash.c \ + man_validate.c man_action.c mandoc.c man_argv.c roff.c \ + mdoc_macro.c mdoc.c mdoc_hash.c mdoc_strings.c mdoc_argv.c \ + mdoc_validate.c mdoc_action.c lib.c att.c arch.c vol.c \ + msec.c st.c + +MAN = mandoc.1 mdoc.3 mdoc.7 mandoc_char.7 man.7 man.3 roff.7 roff.3 + +PROG = mandoc + +.include diff --git a/commands/mdocml/arch.c b/commands/mdocml/arch.c new file mode 100644 index 000000000..c3d76340a --- /dev/null +++ b/commands/mdocml/arch.c @@ -0,0 +1,38 @@ +/* $Id: arch.c,v 1.8 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2arch(const char *p) +{ + +#include "arch.in" + + return(NULL); +} diff --git a/commands/mdocml/arch.in b/commands/mdocml/arch.in new file mode 100644 index 000000000..d73e24610 --- /dev/null +++ b/commands/mdocml/arch.in @@ -0,0 +1,53 @@ +/* $Id: arch.in,v 1.8 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines the architecture token of the .Dt prologue macro. + * All architectures that your system supports (or the manuals of your + * system) should be included here. The right-hand-side is the + * formatted output. + * + * Be sure to escape strings. + */ + +LINE("alpha", "Alpha") +LINE("amd64", "AMD64") +LINE("amiga", "Amiga") +LINE("arc", "ARC") +LINE("arm", "ARM") +LINE("armish", "ARMISH") +LINE("aviion", "AViiON") +LINE("hp300", "HP300") +LINE("hppa", "HPPA") +LINE("hppa64", "HPPA64") +LINE("i386", "i386") +LINE("landisk", "LANDISK") +LINE("loongson", "Loongson") +LINE("luna88k", "Luna88k") +LINE("mac68k", "Mac68k") +LINE("macppc", "MacPPC") +LINE("mvme68k", "MVME68k") +LINE("mvme88k", "MVME88k") +LINE("mvmeppc", "MVMEPPC") +LINE("pmax", "PMAX") +LINE("sgi", "SGI") +LINE("socppc", "SOCPPC") +LINE("sparc", "SPARC") +LINE("sparc64", "SPARC64") +LINE("sun3", "Sun3") +LINE("vax", "VAX") +LINE("zaurus", "Zaurus") diff --git a/commands/mdocml/att.c b/commands/mdocml/att.c new file mode 100644 index 000000000..cfd6b4436 --- /dev/null +++ b/commands/mdocml/att.c @@ -0,0 +1,38 @@ +/* $Id: att.c,v 1.8 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2att(const char *p) +{ + +#include "att.in" + + return(NULL); +} diff --git a/commands/mdocml/att.in b/commands/mdocml/att.in new file mode 100644 index 000000000..48fcd30b9 --- /dev/null +++ b/commands/mdocml/att.in @@ -0,0 +1,37 @@ +/* $Id: att.in,v 1.6 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines the AT&T versions of the .At macro. This probably + * isn't going to change. The right-hand side is the formatted string. + * + * Be sure to escape strings. + */ + +LINE("v1", "Version 1 AT&T UNIX") +LINE("v2", "Version 2 AT&T UNIX") +LINE("v3", "Version 3 AT&T UNIX") +LINE("v4", "Version 4 AT&T UNIX") +LINE("v5", "Version 5 AT&T UNIX") +LINE("v6", "Version 6 AT&T UNIX") +LINE("v7", "Version 7 AT&T UNIX") +LINE("32v", "Version 32V AT&T UNIX") +LINE("V", "AT&T System V UNIX") +LINE("V.1", "AT&T System V.1 UNIX") +LINE("V.2", "AT&T System V.2 UNIX") +LINE("V.3", "AT&T System V.3 UNIX") +LINE("V.4", "AT&T System V.4 UNIX") diff --git a/commands/mdocml/chars.c b/commands/mdocml/chars.c new file mode 100644 index 000000000..b938c58c2 --- /dev/null +++ b/commands/mdocml/chars.c @@ -0,0 +1,206 @@ +/* $Id: chars.c,v 1.20 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include "mandoc.h" +#include "chars.h" + +#define PRINT_HI 126 +#define PRINT_LO 32 + +struct ln { + struct ln *next; + const char *code; + const char *ascii; + const char *html; + size_t codesz; + size_t asciisz; + size_t htmlsz; + int type; +#define CHARS_CHAR (1 << 0) +#define CHARS_STRING (1 << 1) +#define CHARS_BOTH (CHARS_CHAR | CHARS_STRING) +}; + +#define LINES_MAX 370 + +#define CHAR(w, x, y, z, a, b) \ + { NULL, (w), (y), (a), (x), (z), (b), CHARS_CHAR }, +#define STRING(w, x, y, z, a, b) \ + { NULL, (w), (y), (a), (x), (z), (b), CHARS_STRING }, +#define BOTH(w, x, y, z, a, b) \ + { NULL, (w), (y), (a), (x), (z), (b), CHARS_BOTH }, + +#define CHAR_TBL_START static struct ln lines[LINES_MAX] = { +#define CHAR_TBL_END }; + +#include "chars.in" + +struct tbl { + enum chars type; + struct ln **htab; +}; + +#ifndef __GNUC__ +#define inline +#endif +static inline int match(const struct ln *, + const char *, size_t, int); +static const char *find(struct tbl *, const char *, + size_t, size_t *, int); + + +void +chars_free(void *arg) +{ + struct tbl *tab; + + tab = (struct tbl *)arg; + + free(tab->htab); + free(tab); +} + + +void * +chars_init(enum chars type) +{ + struct tbl *tab; + struct ln **htab; + struct ln *pp; + int i, hash; + + /* + * Constructs a very basic chaining hashtable. The hash routine + * is simply the integral value of the first character. + * Subsequent entries are chained in the order they're processed + * (they're in-line re-ordered during lookup). + */ + + tab = malloc(sizeof(struct tbl)); + if (NULL == tab) { + perror(NULL); + exit(EXIT_FAILURE); + } + + htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); + if (NULL == htab) { + perror(NULL); + exit(EXIT_FAILURE); + } + + for (i = 0; i < LINES_MAX; i++) { + hash = (int)lines[i].code[0] - PRINT_LO; + + if (NULL == (pp = htab[hash])) { + htab[hash] = &lines[i]; + continue; + } + + for ( ; pp->next; pp = pp->next) + /* Scan ahead. */ ; + pp->next = &lines[i]; + } + + tab->htab = htab; + tab->type = type; + return(tab); +} + + +const char * +chars_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz) +{ + + return(find((struct tbl *)arg, p, sz, rsz, CHARS_CHAR)); +} + + +const char * +chars_a2res(void *arg, const char *p, size_t sz, size_t *rsz) +{ + + return(find((struct tbl *)arg, p, sz, rsz, CHARS_STRING)); +} + + +static const char * +find(struct tbl *tab, const char *p, size_t sz, size_t *rsz, int type) +{ + struct ln *pp, *prev; + struct ln **htab; + int hash; + + assert(p); + assert(sz > 0); + + if (p[0] < PRINT_LO || p[0] > PRINT_HI) + return(NULL); + + /* + * Lookup the symbol in the symbol hash. See ascii2htab for the + * hashtable specs. This dynamically re-orders the hash chain + * to optimise for repeat hits. + */ + + hash = (int)p[0] - PRINT_LO; + htab = tab->htab; + + if (NULL == (pp = htab[hash])) + return(NULL); + + for (prev = NULL; pp; pp = pp->next) { + if ( ! match(pp, p, sz, type)) { + prev = pp; + continue; + } + + if (prev) { + prev->next = pp->next; + pp->next = htab[hash]; + htab[hash] = pp; + } + + if (CHARS_HTML == tab->type) { + *rsz = pp->htmlsz; + return(pp->html); + } + *rsz = pp->asciisz; + return(pp->ascii); + } + + return(NULL); +} + + +static inline int +match(const struct ln *ln, const char *p, size_t sz, int type) +{ + + if ( ! (ln->type & type)) + return(0); + if (ln->codesz != sz) + return(0); + return(0 == strncmp(ln->code, p, sz)); +} diff --git a/commands/mdocml/chars.h b/commands/mdocml/chars.h new file mode 100644 index 000000000..e1000e659 --- /dev/null +++ b/commands/mdocml/chars.h @@ -0,0 +1,34 @@ +/* $Id: chars.h,v 1.4 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef CHARS_H +#define CHARS_H + +__BEGIN_DECLS + +enum chars { + CHARS_ASCII, + CHARS_HTML +}; + +void *chars_init(enum chars); +const char *chars_a2ascii(void *, const char *, size_t, size_t *); +const char *chars_a2res(void *, const char *, size_t, size_t *); +void chars_free(void *); + +__END_DECLS + +#endif /*!CHARS_H*/ diff --git a/commands/mdocml/chars.in b/commands/mdocml/chars.in new file mode 100644 index 000000000..ebbbcfaa5 --- /dev/null +++ b/commands/mdocml/chars.in @@ -0,0 +1,446 @@ +/* $Id: chars.in,v 1.25 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The ASCII translation tables. STRING corresponds to predefined + * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff). CHAR + * corresponds to special characters (cf. groff_char.7). BOTH contains + * sequences that are equivalent in both STRING and CHAR. + * + * Either way, the left-hand side corresponds to the input sequence (\x, + * \(xx, \*(xx and so on) whose length is listed second element. The + * right-hand side is what's produced by the front-end, with the fourth + * element being its length. + * + * XXX - C-escape strings! + * XXX - update LINES_MAX if adding more! + */ + +/* Non-breaking, non-collapsing space uses unit separator. */ +static const char ascii_nbrsp[2] = { ASCII_NBRSP, 0 }; + +CHAR_TBL_START + +/* Spacing. */ +CHAR("c", 1, "", 0, "", 0) +CHAR("0", 1, " ", 1, " ", 7) +CHAR(" ", 1, " ", 1, " ", 7) +CHAR("~", 1, ascii_nbrsp, 1, " ", 6) +CHAR("%", 1, "", 0, "", 0) +CHAR("&", 1, "", 0, "", 0) +CHAR("^", 1, "", 0, "", 0) +CHAR("|", 1, "", 0, "", 0) +CHAR("}", 1, "", 0, "", 0) + +/* Accents. */ +CHAR("a\"", 2, "\"", 1, "̋", 6) +CHAR("a-", 2, "-", 1, "¯", 6) +CHAR("a.", 2, ".", 1, "˙", 6) +CHAR("a^", 2, "^", 1, "̂", 6) +BOTH("\'", 1, "\'", 1, "́", 6) +BOTH("aa", 2, "\'", 1, "́", 6) +BOTH("ga", 2, "`", 1, "̀", 6) +BOTH("`", 1, "`", 1, "̀", 6) +CHAR("ab", 2, "`", 1, "̆", 6) +CHAR("ac", 2, ",", 1, "̧", 6) +CHAR("ad", 2, "\"", 1, "̈", 6) +CHAR("ah", 2, "v", 1, "ˇ", 6) +CHAR("ao", 2, "o", 1, "˚", 6) +CHAR("a~", 2, "~", 1, "̃", 6) +CHAR("ho", 2, ",", 1, "̨", 6) +CHAR("ha", 2, "^", 1, "^", 1) +CHAR("ti", 2, "~", 1, "~", 1) + +/* Quotes. */ +CHAR("Bq", 2, ",,", 2, "„", 7) +CHAR("bq", 2, ",", 1, "‚", 7) +BOTH("lq", 2, "``", 2, "“", 7) +BOTH("rq", 2, "\'\'", 2, "”", 7) +CHAR("oq", 2, "`", 1, "‘", 7) +CHAR("cq", 2, "\'", 1, "’", 7) +CHAR("aq", 2, "\'", 1, "\'", 1) +CHAR("dq", 2, "\"", 1, "\"", 1) +CHAR("Fo", 2, "<<", 2, "«", 6) +CHAR("Fc", 2, ">>", 2, "»", 6) +CHAR("fo", 2, "<", 1, "‹", 7) +CHAR("fc", 2, ">", 1, "›", 7) + +/* Brackets. */ +CHAR("lB", 2, "[", 1, "[", 1) +CHAR("rB", 2, "]", 1, "]", 1) +CHAR("lC", 2, "{", 1, "{", 1) +CHAR("rC", 2, "}", 1, "}", 1) +CHAR("la", 2, "<", 1, "⟨", 8) +CHAR("ra", 2, ">", 1, "⟩", 8) +CHAR("bv", 2, "|", 1, "⎪", 7) +CHAR("braceex", 7, "|", 1, "⎪", 7) +CHAR("bracketlefttp", 13, "|", 1, "⎡", 7) +CHAR("bracketleftbp", 13, "|", 1, "⎣", 7) +CHAR("bracketleftex", 13, "|", 1, "⎢", 7) +CHAR("bracketrighttp", 14, "|", 1, "⎤", 7) +CHAR("bracketrightbp", 14, "|", 1, "⎦", 7) +CHAR("bracketrightex", 14, "|", 1, "⎥", 7) +CHAR("lt", 2, ",-", 2, "⎧", 7) +CHAR("bracelefttp", 11, ",-", 2, "⎧", 7) +CHAR("lk", 2, "{", 1, "⎨", 7) +CHAR("braceleftmid", 12, "{", 1, "⎨", 7) +CHAR("lb", 2, ",-", 2, "⎩", 7) +CHAR("braceleftbp", 11, "`-", 2, "⎩", 7) +CHAR("braceleftex", 11, "|", 1, "⎪", 7) +CHAR("rt", 2, "-.", 2, "⎫", 7) +CHAR("bracerighttp", 12, "-.", 2, "⎫", 7) +CHAR("rk", 2, "}", 1, "⎬", 7) +CHAR("bracerightmid", 13, "}", 1, "⎬", 7) +CHAR("rb", 2, "-\'", 2, "⎭", 7) +CHAR("bracerightbp", 12, "-\'", 2, "⎭", 7) +CHAR("bracerightex", 12, "|", 1, "⎪", 7) +CHAR("parenlefttp", 11, "/", 1, "⎛", 7) +CHAR("parenleftbp", 11, "\\", 1, "⎝", 7) +CHAR("parenleftex", 11, "|", 1, "⎜", 7) +CHAR("parenrighttp", 12, "\\", 1, "⎞", 7) +CHAR("parenrightbp", 12, "/", 1, "⎠", 7) +CHAR("parenrightex", 12, "|", 1, "⎟", 7) + +/* Greek characters. */ +CHAR("*A", 2, "A", 1, "Α", 6) +CHAR("*B", 2, "B", 1, "Β", 6) +CHAR("*G", 2, "|", 1, "Γ", 6) +CHAR("*D", 2, "/\\", 2, "Δ", 6) +CHAR("*E", 2, "E", 1, "Ε", 6) +CHAR("*Z", 2, "Z", 1, "Ζ", 6) +CHAR("*Y", 2, "H", 1, "Η", 6) +CHAR("*H", 2, "O", 1, "Θ", 6) +CHAR("*I", 2, "I", 1, "Ι", 6) +CHAR("*K", 2, "K", 1, "Κ", 6) +CHAR("*L", 2, "/\\", 2, "Λ", 6) +CHAR("*M", 2, "M", 1, "Μ", 6) +CHAR("*N", 2, "N", 1, "Ν", 6) +CHAR("*C", 2, "H", 1, "Ξ", 6) +CHAR("*O", 2, "O", 1, "Ο", 6) +CHAR("*P", 2, "TT", 2, "Π", 6) +CHAR("*R", 2, "P", 1, "Ρ", 6) +CHAR("*S", 2, ">", 1, "Σ", 6) +CHAR("*T", 2, "T", 1, "Τ", 6) +CHAR("*U", 2, "Y", 1, "Υ", 6) +CHAR("*F", 2, "O_", 1, "Φ", 6) +CHAR("*X", 2, "X", 1, "Χ", 6) +CHAR("*Q", 2, "Y", 1, "Ψ", 6) +CHAR("*W", 2, "O", 1, "Ω", 6) +CHAR("*a", 2, "a", 1, "α", 6) +CHAR("*b", 2, "B", 1, "β", 6) +CHAR("*g", 2, "y", 1, "γ", 6) +CHAR("*d", 2, "d", 1, "δ", 6) +CHAR("*e", 2, "e", 1, "ε", 6) +CHAR("*z", 2, "C", 1, "ζ", 6) +CHAR("*y", 2, "n", 1, "η", 6) +CHAR("*h", 2, "0", 1, "θ", 6) +CHAR("*i", 2, "i", 1, "ι", 6) +CHAR("*k", 2, "k", 1, "κ", 6) +CHAR("*l", 2, "\\", 1, "λ", 6) +CHAR("*m", 2, "u", 1, "μ", 6) +CHAR("*n", 2, "v", 1, "ν", 6) +CHAR("*c", 2, "E", 1, "ξ", 6) +CHAR("*o", 2, "o", 1, "ο", 6) +CHAR("*p", 2, "n", 1, "π", 6) +CHAR("*r", 2, "p", 1, "ρ", 6) +CHAR("*s", 2, "o", 1, "σ", 6) +CHAR("*t", 2, "t", 1, "τ", 6) +CHAR("*u", 2, "u", 1, "υ", 6) +CHAR("*f", 2, "o", 1, "ϕ", 6) +CHAR("*x", 2, "x", 1, "χ", 6) +CHAR("*q", 2, "u", 1, "ψ", 6) +CHAR("*w", 2, "w", 1, "ω", 6) +CHAR("+h", 2, "0", 1, "ϑ", 6) +CHAR("+f", 2, "o", 1, "φ", 6) +CHAR("+p", 2, "w", 1, "ϖ", 6) +CHAR("+e", 2, "e", 1, "ϵ", 7) +CHAR("ts", 2, "s", 1, "ς", 6) + +/* Accented letters. */ +CHAR(",C", 2, "C", 1, "Ç", 6) +CHAR(",c", 2, "c", 1, "ç", 6) +CHAR("/L", 2, "L", 1, "Ł", 6) +CHAR("/O", 2, "O", 1, "Ø", 6) +CHAR("/l", 2, "l", 1, "ł", 6) +CHAR("/o", 2, "o", 1, "ø", 6) +CHAR("oA", 2, "A", 1, "Å", 6) +CHAR("oa", 2, "a", 1, "å", 6) +CHAR(":A", 2, "A", 1, "Ä", 6) +CHAR(":E", 2, "E", 1, "Ë", 6) +CHAR(":I", 2, "I", 1, "Ï", 6) +CHAR(":O", 2, "O", 1, "Ö", 6) +CHAR(":U", 2, "U", 1, "Ü", 6) +CHAR(":a", 2, "a", 1, "ä", 6) +CHAR(":e", 2, "e", 1, "ë", 6) +CHAR(":i", 2, "i", 1, "ï", 6) +CHAR(":o", 2, "o", 1, "õ", 6) +CHAR(":u", 2, "u", 1, "ü", 6) +CHAR(":y", 2, "y", 1, "ÿ", 6) +CHAR("\'A", 2, "A", 1, "Á", 6) +CHAR("\'E", 2, "E", 1, "É", 6) +CHAR("\'I", 2, "I", 1, "Í", 6) +CHAR("\'O", 2, "O", 1, "Ó", 6) +CHAR("\'U", 2, "U", 1, "Ú", 6) +CHAR("\'a", 2, "a", 1, "á", 6) +CHAR("\'e", 2, "e", 1, "é", 6) +CHAR("\'i", 2, "i", 1, "í", 6) +CHAR("\'o", 2, "o", 1, "ó", 6) +CHAR("\'u", 2, "u", 1, "ú", 6) +CHAR("^A", 2, "A", 1, "Â", 6) +CHAR("^E", 2, "E", 1, "Ê", 6) +CHAR("^I", 2, "I", 1, "Î", 6) +CHAR("^O", 2, "O", 1, "Ô", 6) +CHAR("^U", 2, "U", 1, "Û", 6) +CHAR("^a", 2, "a", 1, "â", 6) +CHAR("^e", 2, "e", 1, "ê", 6) +CHAR("^i", 2, "i", 1, "î", 6) +CHAR("^o", 2, "o", 1, "ô", 6) +CHAR("^u", 2, "u", 1, "û", 6) +CHAR("`A", 2, "A", 1, "À", 6) +CHAR("`E", 2, "E", 1, "È", 6) +CHAR("`I", 2, "I", 1, "Ì", 6) +CHAR("`O", 2, "O", 1, "Ò", 6) +CHAR("`U", 2, "U", 1, "Ù", 6) +CHAR("`a", 2, "a", 1, "à", 6) +CHAR("`e", 2, "e", 1, "è", 6) +CHAR("`i", 2, "i", 1, "ì", 6) +CHAR("`o", 2, "o", 1, "ò", 6) +CHAR("`u", 2, "u", 1, "ù", 6) +CHAR("~A", 2, "A", 1, "Ã", 6) +CHAR("~N", 2, "N", 1, "Ñ", 6) +CHAR("~O", 2, "O", 1, "Õ", 6) +CHAR("~a", 2, "a", 1, "ã", 6) +CHAR("~n", 2, "n", 1, "ñ", 6) +CHAR("~o", 2, "o", 1, "õ", 6) + +/* Arrows and lines. */ +CHAR("<-", 2, "<-", 2, "←", 7) +CHAR("->", 2, "->", 2, "→", 7) +CHAR("<>", 2, "<>", 2, "↔", 7) +CHAR("da", 2, "v", 1, "↓", 7) +BOTH("ua", 2, "^", 1, "↑", 7) +BOTH("va", 2, "^v", 2, "↕", 7) +CHAR("lA", 2, "<=", 2, "⇐", 7) +CHAR("rA", 2, "=>", 2, "⇒", 7) +CHAR("hA", 2, "<=>", 3, "⇔", 7) +CHAR("dA", 2, "v", 1, "⇓", 7) +CHAR("uA", 2, "^", 1, "⇑", 7) +CHAR("vA", 2, "^=v", 3, "⇕", 7) + +/* Logic. */ +CHAR("AN", 2, "^", 1, "∧", 7) +CHAR("OR", 2, "v", 1, "∨", 7) +CHAR("no", 2, "~", 1, "¬", 6) +CHAR("tno", 3, "~", 1, "¬", 6) +CHAR("te", 2, "3", 1, "∃", 7) +CHAR("fa", 2, "V", 1, "∀", 7) +CHAR("st", 2, "-)", 2, "∋", 7) +CHAR("tf", 2, ".:.", 3, "∴", 7) +CHAR("3d", 2, ".:.", 3, "∴", 7) +CHAR("or", 2, "|", 1, "|", 1) + +/* Mathematicals. */ +CHAR("pl", 2, "+", 1, "+", 5) +CHAR("mi", 2, "-", 1, "−", 7) +CHAR("-", 1, "-", 1, "-", 1) +CHAR("-+", 2, "-+", 2, "∓", 7) +CHAR("+-", 2, "+-", 2, "±", 6) +CHAR("t+-", 3, "+-", 2, "±", 6) +CHAR("pc", 2, ".", 1, "·", 6) +CHAR("md", 2, ".", 1, "⋅", 7) +CHAR("mu", 2, "x", 1, "×", 6) +CHAR("tmu", 3, "x", 1, "×", 6) +CHAR("c*", 2, "x", 1, "⊗", 7) +CHAR("c+", 2, "+", 1, "⊕", 7) +CHAR("di", 2, "-:-", 3, "÷", 6) +CHAR("tdi", 3, "-:-", 3, "÷", 6) +CHAR("f/", 2, "/", 1, "⁄", 7) +CHAR("**", 2, "*", 1, "∗", 7) +BOTH("<=", 2, "<=", 2, "≤", 7) +BOTH(">=", 2, ">=", 2, "≥", 7) +CHAR("<<", 2, "<<", 2, "≪", 7) +CHAR(">>", 2, ">>", 2, "≫", 7) +CHAR("eq", 2, "=", 1, "=", 5) +CHAR("!=", 2, "!=", 2, "≠", 7) +CHAR("==", 2, "==", 2, "≡", 7) +CHAR("ne", 2, "!==", 3, "≢", 7) +CHAR("=~", 2, "=~", 2, "≅", 7) +CHAR("-~", 2, "-~", 2, "≃", 7) +CHAR("ap", 2, "~", 1, "∼", 7) +CHAR("~~", 2, "~~", 2, "≈", 7) +CHAR("~=", 2, "~=", 2, "≌", 7) +CHAR("pt", 2, "oc", 2, "∝", 7) +CHAR("es", 2, "{}", 2, "∅", 7) +CHAR("mo", 2, "E", 1, "∈", 7) +CHAR("nm", 2, "!E", 2, "∉", 7) +CHAR("sb", 2, "(=", 2, "⊂", 7) +CHAR("nb", 2, "(!=", 3, "⊄", 7) +CHAR("sp", 2, "=)", 2, "⊃", 7) +CHAR("nc", 2, "!=)", 3, "⊅", 7) +CHAR("ib", 2, "(=", 2, "⊆", 7) +CHAR("ip", 2, "=)", 2, "⊇", 7) +CHAR("ca", 2, "(^)", 3, "∩", 7) +CHAR("cu", 2, "U", 1, "∪", 7) +CHAR("/_", 2, "/_", 2, "∠", 7) +CHAR("pp", 2, "_|_", 3, "⊥", 7) +CHAR("is", 2, "I", 1, "∫", 7) +CHAR("integral", 8, "I", 1, "∫", 7) +CHAR("sum", 3, "E", 1, "∑", 7) +CHAR("product", 7, "TT", 2, "∏", 7) +CHAR("coproduct", 9, "U", 1, "∐", 7) +CHAR("gr", 2, "V", 1, "∇", 7) +CHAR("sr", 2, "\\/", 2, "√", 7) +CHAR("sqrt", 4, "\\/", 2, "√", 7) +CHAR("lc", 2, "|~", 2, "⌈", 7) +CHAR("rc", 2, "~|", 2, "⌉", 7) +CHAR("lf", 2, "|_", 2, "⌊", 7) +CHAR("rf", 2, "_|", 2, "⌋", 7) +CHAR("if", 2, "oo", 2, "∞", 7) +CHAR("Ah", 2, "N", 1, "ℵ", 7) +CHAR("Im", 2, "I", 1, "ℑ", 7) +CHAR("Re", 2, "R", 1, "ℜ", 7) +CHAR("pd", 2, "a", 1, "∂", 7) +CHAR("-h", 2, "/h", 2, "ℏ", 7) + +/* Ligatures. */ +CHAR("ff", 2, "ff", 2, "ff", 8) +CHAR("fi", 2, "fi", 2, "fi", 8) +CHAR("fl", 2, "fl", 2, "fl", 8) +CHAR("Fi", 2, "ffi", 3, "ffi", 8) +CHAR("Fl", 2, "ffl", 3, "ffl", 8) +BOTH("AE", 2, "AE", 2, "Æ", 6) +BOTH("ae", 2, "ae", 2, "æ", 6) +CHAR("OE", 2, "OE", 2, "Œ", 6) +CHAR("oe", 2, "oe", 2, "œ", 6) +CHAR("ss", 2, "ss", 2, "ß", 6) +CHAR("IJ", 2, "IJ", 2, "IJ", 6) +CHAR("ij", 2, "ij", 2, "ij", 6) + +/* Special letters. */ +CHAR("-D", 2, "D", 1, "Ð", 6) +CHAR("Sd", 2, "o", 1, "ð", 6) +CHAR("TP", 2, "b", 1, "Þ", 6) +CHAR("Tp", 2, "b", 1, "þ", 6) +CHAR(".i", 2, "i", 1, "ı", 6) +CHAR(".j", 2, "j", 1, "ȷ", 6) + +/* Currency. */ +CHAR("Do", 2, "$", 1, "$", 1) +CHAR("ct", 2, "c", 1, "¢", 6) +CHAR("Eu", 2, "EUR", 3, "€", 7) +CHAR("eu", 2, "EUR", 3, "€", 7) +CHAR("Ye", 2, "Y", 1, "¥", 6) +CHAR("Po", 2, "L", 1, "£", 6) +CHAR("Cs", 2, "x", 1, "¤", 6) +CHAR("Fn", 2, "f", 1, "ƒ", 6) + +/* pod2man holdovers. */ +STRING("--", 2, "--", 2, "—", 7) +STRING("PI", 2, "pi", 2, "π", 6) +STRING("L\"", 2, "``", 2, "“", 7) +STRING("R\"", 2, "\'\'", 2, "”", 7) +STRING("C+", 2, "C++", 3, "C++", 3) +STRING("C`", 2, "`", 1, "‘", 7) +STRING("C\'", 2, "\'", 1, "’", 7) +STRING("Aq", 2, "\'", 1, "\'", 1) +STRING("^", 1, "^", 1, "^", 1) +STRING(",", 1, ",", 1, ",", 1) +STRING("~", 1, "~", 1, "~", 1) +STRING("/", 1, "/", 1, "/", 1) +STRING(":", 1, "\"", 1, "̈", 6) +STRING("8", 1, "B", 1, "β", 6) +STRING("o", 1, "o", 1, "°", 6) +STRING("D-", 2, "D", 1, "Ð", 6) +STRING("d-", 2, "o", 1, "ð", 6) +STRING("Th", 2, "b", 1, "Þ", 6) +STRING("th", 2, "b", 1, "þ", 6) + +/* Old style. */ +STRING("Am", 2, "&", 1, "&", 5) +STRING("Ba", 2, "|", 1, "|", 1) +STRING("Ge", 2, ">=", 2, "≥", 7) +STRING("Gt", 2, ">", 1, ">", 4) +STRING("If", 2, "infinity", 8, "infinity", 8) +STRING("Le", 2, "<=", 2, "≤", 7) +STRING("Lq", 2, "``", 2, "“", 7) +STRING("Lt", 2, "<", 1, "<", 4) +STRING("Na", 2, "NaN", 3, "NaN", 3) +STRING("Ne", 2, "!=", 2, "≠", 7) +STRING("Pi", 2, "pi", 2, "π", 6) +STRING("Pm", 2, "+-", 2, "±", 6) +STRING("R", 1, "(R)", 3, "®", 6) +STRING("Rq", 2, "\'\'", 2, "”", 7) +STRING("Tm", 2, "tm", 2, "™", 7) +STRING("left-bracket", 12, "[", 1, "[", 1) +STRING("left-parenthesis", 16, "(", 1, "(", 1) +STRING("left-singlequote", 16, "`", 1, "‘", 7) +STRING("lp", 2, "(", 1, "(", 1) +STRING("q", 1, "\"", 1, """, 6) +STRING("quote-left", 10, "`", 1, "‘", 7) +STRING("quote-right", 11, "\'", 1, "’", 7) +STRING("right-bracket", 13, "]", 1, "]", 1) +STRING("right-parenthesis", 17, ")", 1, ")", 1) +STRING("right-singlequote", 17, "\'", 1, "’", 7) +STRING("rp", 2, ")", 1, ")", 1) + +/* Lines. */ +CHAR("ba", 2, "|", 1, "|", 6) +CHAR("br", 2, "|", 1, "│", 7) +CHAR("ul", 2, "_", 1, "_", 5) +CHAR("rl", 2, "-", 1, "‾", 7) +CHAR("bb", 2, "|", 1, "¦", 6) +CHAR("sl", 2, "/", 1, "/", 5) +CHAR("rs", 2, "\\", 1, "\", 5) + +/* Text markers. */ +CHAR("ci", 2, "o", 1, "○", 7) +CHAR("bu", 2, "o", 1, "•", 7) +CHAR("dd", 2, "=", 1, "‡", 7) +CHAR("dg", 2, "-", 1, "†", 7) +CHAR("lz", 2, "<>", 2, "◊", 7) +CHAR("sq", 2, "[]", 2, "□", 7) +CHAR("ps", 2, "9|", 2, "¶", 6) +CHAR("sc", 2, "S", 1, "§", 6) +CHAR("lh", 2, "<=", 2, "☜", 7) +CHAR("rh", 2, "=>", 2, "☞", 7) +CHAR("at", 2, "@", 1, "@", 5) +CHAR("sh", 2, "#", 1, "#", 5) +CHAR("CR", 2, "_|", 2, "↵", 7) +CHAR("OK", 2, "\\/", 2, "✓", 8) + +/* Legal symbols. */ +CHAR("co", 2, "(C)", 3, "©", 6) +CHAR("rg", 2, "(R)", 3, "®", 6) +CHAR("tm", 2, "tm", 2, "™", 7) + +/* Punctuation. */ +CHAR(".", 1, ".", 1, ".", 1) +CHAR("r!", 2, "i", 1, "¡", 6) +CHAR("r?", 2, "c", 1, "¿", 6) +CHAR("em", 2, "--", 2, "—", 7) +CHAR("en", 2, "-", 1, "–", 7) +CHAR("hy", 2, "-", 1, "‐", 7) +CHAR("e", 1, "\\", 1, "\\", 1) + +/* Units. */ +CHAR("de", 2, "o", 1, "°", 6) +CHAR("%0", 2, "%o", 2, "‰", 7) +CHAR("fm", 2, "\'", 1, "′", 7) +CHAR("sd", 2, "\"", 1, "″", 7) +CHAR("mc", 2, "mu", 2, "µ", 6) + +CHAR_TBL_END diff --git a/commands/mdocml/compat.c b/commands/mdocml/compat.c new file mode 100644 index 000000000..f00cc5c6b --- /dev/null +++ b/commands/mdocml/compat.c @@ -0,0 +1,95 @@ +/* $OpenBSD: strlcat.c,v 1.13 2005/08/08 08:05:37 espie Exp $ */ + +/* + * Copyright (c) 1998 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +int dummy; /* To prevent an empty object file */ + +#ifndef HAVE_STRLCAT +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return(dlen + (s - src)); /* count does not include NUL */ +} +#endif + +#ifndef HAVE_STRLCPY +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} +#endif diff --git a/commands/mdocml/config.h b/commands/mdocml/config.h new file mode 100644 index 000000000..17072dd3a --- /dev/null +++ b/commands/mdocml/config.h @@ -0,0 +1,20 @@ + +#ifndef MANDOC_CONFIG_H +#define MANDOC_CONFIG_H + +#define HAVE_STRLCAT +#define HAVE_STRLCPY + +#include +#include + +#ifndef __GNUC__ +#define inline +#endif + +#ifndef _DIAGASSERT +#define _DIAGASSERT assert +#endif + +#endif /* MANDOC_CONFIG_H */ + diff --git a/commands/mdocml/example.style.css b/commands/mdocml/example.style.css new file mode 100644 index 000000000..fcaf64b60 --- /dev/null +++ b/commands/mdocml/example.style.css @@ -0,0 +1,72 @@ +/* $Id: example.style.css,v 1.20 2010/04/08 08:17:55 kristaps Exp $ */ + +div.body { font-family: monospace; + min-width: 580px; width: 580px; } /* Top-most div tag. */ + +div.sec-head { font-weight: bold; font-style: normal; } /* Sections (Sh). */ +div.sec-body { } +div.sec-block { } + +div.ssec-head { font-weight: bold; font-style: normal; } /* Sub-sections (Ss). */ +div.ssec-body { } +div.ssec-block { } + +span.addr { } /* Address (Ad). */ +span.arg { font-style: italic; font-weight: normal; } /* Command argument (Ar). */ +span.author { } /* Author name (An). */ +span.bold { font-weight: bold; font-style: normal; } /* Generically bold (SB, BI, IB, BR, RB, B). */ +span.cmd { font-weight: bold; font-style: normal; } /* Command (Cm). */ +span.config { font-weight: bold; font-style: normal; } /* Config statement (Cd). */ +span.define { } /* Defines (Dv). */ +span.desc { } /* Nd. After em-dash. */ +span.diag { font-weight: bold; font-style: normal; } /* Diagnostic (Bl -diag). */ +span.emph { font-style: italic; font-weight: normal; } /* Emphasis (Em). */ +span.env { } /* Environment variables (Ev). */ +span.errno { } /* Error string (Er). */ +span.farg { font-style: italic; font-weight: normal; } /* Function argument (Fa, Fn). */ +span.file { font-style: italic; font-weight: normal; } /* File (Pa). */ +span.flag { font-weight: bold; font-style: normal; } /* Flag (Fl, Cm). */ +span.fname { font-weight: bold; font-style: normal; } /* Function name (Fa, Fn, Rv). */ +span.ftype { font-style: italic; font-weight: normal; } /* Function types (Ft, Fn). */ +span.includes { font-weight: bold; font-style: normal; } /* Header includes (In). */ +span.italic { font-style: italic; font-weight: normal; } /* Generically italic (BI, IB, I). */ +span.lib { } /* Library (Lb). */ +span.lit { } /* Literals (Bf -literal). */ +span.macro { font-weight: bold; font-style: normal; } /* Macro-ish thing (Fd). */ +span.name { font-weight: bold; font-style: normal; } /* Name of utility (Nm). */ +span.opt { } /* Options (Op, Oo/Oc). */ +span.ref { } /* Citations (Rs). */ +span.ref-auth { } /* Reference author (%A). */ +span.ref-book { font-style: italic; font-weight: normal; } /* Reference book (%B). */ +span.ref-city { } /* Reference city (%C). */ +span.ref-date { } /* Reference date (%D). */ +span.ref-issue { font-style: italic; font-weight: normal; } /* Reference issuer/publisher (%I). */ +span.ref-jrnl { font-style: italic; font-weight: normal; } /* Reference journal (%J). */ +span.ref-num { } /* Reference number (%N). */ +span.ref-opt { } /* Reference optionals (%O). */ +span.ref-page { } /* Reference page (%P). */ +span.ref-corp { } /* Reference corporate/foreign author (%Q). */ +span.ref-rep { } /* Reference report (%R). */ +span.ref-title { text-decoration: underline; } /* Reference title (%T). */ +span.ref-vol { } /* Reference volume (%V). */ +span.roman { font-style: normal; font-weight: normal; } /* Generic font. */ +span.small { font-size: smaller; } /* Generically small (SB, SM). */ +span.symb { font-weight: bold; font-style: normal; } /* Symbols. */ +span.type { font-style: italic; font-weight: normal; } /* Variable types (Vt). */ +span.unix { } /* Unices (Ux, Ox, Nx, Fx, Bx, Bsx, Dx). */ +span.utility { font-weight: bold; font-style: normal; } /* Name of utility (Ex). */ +span.var { font-weight: bold; font-style: normal; } /* Variables (Rv). */ + +a.link-ext { } /* Off-site link (Lk). */ +a.link-includes { } /* Include-file link (In). */ +a.link-mail { } /* Mailto links (Mt). */ +a.link-man { } /* Manual links (Xr). */ +a.link-ref { } /* Reference section links (%Q). */ +a.link-sec { } /* Section links (Sx). */ + +div.emph { font-style: italic; font-weight: normal; } /* Emphasis (Bl -emphasis). */ +div.lit { } /* Literal (D1, Bd -literal, Dl, Bd -literal). */ +div.symb { font-weight: bold; font-style: normal; } /* Symbols (Bl -symbolic). */ + +table.footer { } /* Document footer. */ +table.header { } /* Document header. */ diff --git a/commands/mdocml/external.png b/commands/mdocml/external.png new file mode 100644 index 0000000000000000000000000000000000000000..419c06fb960b0b665791c90044a78621616a4cb8 GIT binary patch literal 165 zcmeAS@N?(olHy`uVBq!ia0vp^AT}2VGmzZ%#=aj&i3a$DxTeiKV?6WB%rpNP(#|lX z{f7XTR~%D;3fN16{DL7O3{u|AZa^UmPZ!4!iE!1^jzSC$EX7 z>#2$@InOtpuqyg8<+R}9g-+|v&13&5`dCp`{|95_8^+nQkC>(b&0z3!^>bP0l+XkK DO07OF literal 0 HcmV?d00001 diff --git a/commands/mdocml/html.c b/commands/mdocml/html.c new file mode 100644 index 000000000..817272ad2 --- /dev/null +++ b/commands/mdocml/html.c @@ -0,0 +1,754 @@ +/* $Id: html.c,v 1.102 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "out.h" +#include "chars.h" +#include "html.h" +#include "main.h" + +struct htmldata { + const char *name; + int flags; +#define HTML_CLRLINE (1 << 0) +#define HTML_NOSTACK (1 << 1) +#define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ +}; + +static const struct htmldata htmltags[TAG_MAX] = { + {"html", HTML_CLRLINE}, /* TAG_HTML */ + {"head", HTML_CLRLINE}, /* TAG_HEAD */ + {"body", HTML_CLRLINE}, /* TAG_BODY */ + {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ + {"title", HTML_CLRLINE}, /* TAG_TITLE */ + {"div", HTML_CLRLINE}, /* TAG_DIV */ + {"h1", 0}, /* TAG_H1 */ + {"h2", 0}, /* TAG_H2 */ + {"span", 0}, /* TAG_SPAN */ + {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ + {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ + {"a", 0}, /* TAG_A */ + {"table", HTML_CLRLINE}, /* TAG_TABLE */ + {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ + {"tr", HTML_CLRLINE}, /* TAG_TR */ + {"td", HTML_CLRLINE}, /* TAG_TD */ + {"li", HTML_CLRLINE}, /* TAG_LI */ + {"ul", HTML_CLRLINE}, /* TAG_UL */ + {"ol", HTML_CLRLINE}, /* TAG_OL */ +}; + +static const char *const htmlfonts[HTMLFONT_MAX] = { + "roman", + "bold", + "italic" +}; + +static const char *const htmlattrs[ATTR_MAX] = { + "http-equiv", + "content", + "name", + "rel", + "href", + "type", + "media", + "class", + "style", + "width", + "valign", + "target", + "id", + "summary", +}; + +static void print_spec(struct html *, const char *, size_t); +static void print_res(struct html *, const char *, size_t); +static void print_ctag(struct html *, enum htmltag); +static void print_doctype(struct html *); +static void print_xmltype(struct html *); +static int print_encode(struct html *, const char *, int); +static void print_metaf(struct html *, enum roffdeco); +static void print_attr(struct html *, + const char *, const char *); +static void *ml_alloc(char *, enum htmltype); + + +static void * +ml_alloc(char *outopts, enum htmltype type) +{ + struct html *h; + const char *toks[4]; + char *v; + + toks[0] = "style"; + toks[1] = "man"; + toks[2] = "includes"; + toks[3] = NULL; + + h = calloc(1, sizeof(struct html)); + if (NULL == h) { + perror(NULL); + exit(EXIT_FAILURE); + } + + h->type = type; + h->tags.head = NULL; + h->ords.head = NULL; + h->symtab = chars_init(CHARS_HTML); + + while (outopts && *outopts) + switch (getsubopt(&outopts, UNCONST(toks), &v)) { + case (0): + h->style = v; + break; + case (1): + h->base_man = v; + break; + case (2): + h->base_includes = v; + break; + default: + break; + } + + return(h); +} + +void * +html_alloc(char *outopts) +{ + + return(ml_alloc(outopts, HTML_HTML_4_01_STRICT)); +} + + +void * +xhtml_alloc(char *outopts) +{ + + return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT)); +} + + +void +html_free(void *p) +{ + struct tag *tag; + struct ord *ord; + struct html *h; + + h = (struct html *)p; + + while ((ord = h->ords.head) != NULL) { + h->ords.head = ord->next; + free(ord); + } + + while ((tag = h->tags.head) != NULL) { + h->tags.head = tag->next; + free(tag); + } + + if (h->symtab) + chars_free(h->symtab); + + free(h); +} + + +void +print_gen_head(struct html *h) +{ + struct htmlpair tag[4]; + + tag[0].key = ATTR_HTTPEQUIV; + tag[0].val = "Content-Type"; + tag[1].key = ATTR_CONTENT; + tag[1].val = "text/html; charset=utf-8"; + print_otag(h, TAG_META, 2, tag); + + tag[0].key = ATTR_NAME; + tag[0].val = "resource-type"; + tag[1].key = ATTR_CONTENT; + tag[1].val = "document"; + print_otag(h, TAG_META, 2, tag); + + if (h->style) { + tag[0].key = ATTR_REL; + tag[0].val = "stylesheet"; + tag[1].key = ATTR_HREF; + tag[1].val = h->style; + tag[2].key = ATTR_TYPE; + tag[2].val = "text/css"; + tag[3].key = ATTR_MEDIA; + tag[3].val = "all"; + print_otag(h, TAG_LINK, 4, tag); + } +} + + +static void +print_spec(struct html *h, const char *p, size_t len) +{ + const char *rhs; + size_t sz; + + rhs = chars_a2ascii(h->symtab, p, len, &sz); + + if (NULL == rhs) + return; + fwrite(rhs, 1, sz, stdout); +} + + +static void +print_res(struct html *h, const char *p, size_t len) +{ + const char *rhs; + size_t sz; + + rhs = chars_a2res(h->symtab, p, len, &sz); + + if (NULL == rhs) + return; + fwrite(rhs, 1, sz, stdout); +} + + +struct tag * +print_ofont(struct html *h, enum htmlfont font) +{ + struct htmlpair tag; + + h->metal = h->metac; + h->metac = font; + + /* FIXME: DECO_ROMAN should just close out preexisting. */ + + if (h->metaf && h->tags.head == h->metaf) + print_tagq(h, h->metaf); + + PAIR_CLASS_INIT(&tag, htmlfonts[font]); + h->metaf = print_otag(h, TAG_SPAN, 1, &tag); + return(h->metaf); +} + + +static void +print_metaf(struct html *h, enum roffdeco deco) +{ + enum htmlfont font; + + switch (deco) { + case (DECO_PREVIOUS): + font = h->metal; + break; + case (DECO_ITALIC): + font = HTMLFONT_ITALIC; + break; + case (DECO_BOLD): + font = HTMLFONT_BOLD; + break; + case (DECO_ROMAN): + font = HTMLFONT_NONE; + break; + default: + abort(); + /* NOTREACHED */ + } + + (void)print_ofont(h, font); +} + + +static int +print_encode(struct html *h, const char *p, int norecurse) +{ + size_t sz; + int len, nospace; + const char *seq; + enum roffdeco deco; + static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; + + nospace = 0; + + for (; *p; p++) { + sz = strcspn(p, rejs); + + fwrite(p, 1, sz, stdout); + p += /* LINTED */ + sz; + + if ('<' == *p) { + printf("<"); + continue; + } else if ('>' == *p) { + printf(">"); + continue; + } else if ('&' == *p) { + printf("&"); + continue; + } else if (ASCII_HYPH == *p) { + /* + * Note: "soft hyphens" aren't graphically + * displayed when not breaking the text; we want + * them to be displayed. + */ + /*printf("­");*/ + putchar('-'); + continue; + } else if ('\0' == *p) + break; + + seq = ++p; + len = a2roffdeco(&deco, &seq, &sz); + + switch (deco) { + case (DECO_RESERVED): + print_res(h, seq, sz); + break; + case (DECO_SPECIAL): + print_spec(h, seq, sz); + break; + case (DECO_PREVIOUS): + /* FALLTHROUGH */ + case (DECO_BOLD): + /* FALLTHROUGH */ + case (DECO_ITALIC): + /* FALLTHROUGH */ + case (DECO_ROMAN): + if (norecurse) + break; + print_metaf(h, deco); + break; + default: + break; + } + + p += len - 1; + + if (DECO_NOSPACE == deco && '\0' == *(p + 1)) + nospace = 1; + } + + return(nospace); +} + + +static void +print_attr(struct html *h, const char *key, const char *val) +{ + printf(" %s=\"", key); + (void)print_encode(h, val, 1); + putchar('\"'); +} + + +struct tag * +print_otag(struct html *h, enum htmltag tag, + int sz, const struct htmlpair *p) +{ + int i; + struct tag *t; + + /* Push this tags onto the stack of open scopes. */ + + if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { + t = malloc(sizeof(struct tag)); + if (NULL == t) { + perror(NULL); + exit(EXIT_FAILURE); + } + t->tag = tag; + t->next = h->tags.head; + h->tags.head = t; + } else + t = NULL; + + if ( ! (HTML_NOSPACE & h->flags)) + if ( ! (HTML_CLRLINE & htmltags[tag].flags)) + putchar(' '); + + /* Print out the tag name and attributes. */ + + printf("<%s", htmltags[tag].name); + for (i = 0; i < sz; i++) + print_attr(h, htmlattrs[p[i].key], p[i].val); + + /* Add non-overridable attributes. */ + + if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) { + print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml"); + print_attr(h, "xml:lang", "en"); + print_attr(h, "lang", "en"); + } + + /* Accomodate for XML "well-formed" singleton escaping. */ + + if (HTML_AUTOCLOSE & htmltags[tag].flags) + switch (h->type) { + case (HTML_XHTML_1_0_STRICT): + putchar('/'); + break; + default: + break; + } + + putchar('>'); + + h->flags |= HTML_NOSPACE; + return(t); +} + + +static void +print_ctag(struct html *h, enum htmltag tag) +{ + + printf("", htmltags[tag].name); + if (HTML_CLRLINE & htmltags[tag].flags) { + h->flags |= HTML_NOSPACE; + putchar('\n'); + } +} + + +void +print_gen_decls(struct html *h) +{ + + print_xmltype(h); + print_doctype(h); +} + + +static void +print_xmltype(struct html *h) +{ + + if (HTML_XHTML_1_0_STRICT == h->type) + printf(""); +} + + +static void +print_doctype(struct html *h) +{ + const char *doctype; + const char *dtd; + const char *name; + + switch (h->type) { + case (HTML_HTML_4_01_STRICT): + name = "HTML"; + doctype = "-//W3C//DTD HTML 4.01//EN"; + dtd = "http://www.w3.org/TR/html4/strict.dtd"; + break; + default: + name = "html"; + doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; + dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; + break; + } + + printf("\n", + name, doctype, dtd); +} + + +void +print_text(struct html *h, const char *p) +{ + + if (*p && 0 == *(p + 1)) + switch (*p) { + case('.'): + /* FALLTHROUGH */ + case(','): + /* FALLTHROUGH */ + case(';'): + /* FALLTHROUGH */ + case(':'): + /* FALLTHROUGH */ + case('?'): + /* FALLTHROUGH */ + case('!'): + /* FALLTHROUGH */ + case(')'): + /* FALLTHROUGH */ + case(']'): + if ( ! (HTML_IGNDELIM & h->flags)) + h->flags |= HTML_NOSPACE; + break; + default: + break; + } + + if ( ! (h->flags & HTML_NOSPACE)) + putchar(' '); + + assert(p); + if ( ! print_encode(h, p, 0)) + h->flags &= ~HTML_NOSPACE; + + /* + * Note that we don't process the pipe: the parser sees it as + * punctuation, but we don't in terms of typography. + */ + if (*p && 0 == *(p + 1)) + switch (*p) { + case('('): + /* FALLTHROUGH */ + case('['): + h->flags |= HTML_NOSPACE; + break; + default: + break; + } +} + + +void +print_tagq(struct html *h, const struct tag *until) +{ + struct tag *tag; + + while ((tag = h->tags.head) != NULL) { + if (tag == h->metaf) + h->metaf = NULL; + print_ctag(h, tag->tag); + h->tags.head = tag->next; + free(tag); + if (until && tag == until) + return; + } +} + + +void +print_stagq(struct html *h, const struct tag *suntil) +{ + struct tag *tag; + + while ((tag = h->tags.head) != NULL) { + if (suntil && tag == suntil) + return; + if (tag == h->metaf) + h->metaf = NULL; + print_ctag(h, tag->tag); + h->tags.head = tag->next; + free(tag); + } +} + + +void +bufinit(struct html *h) +{ + + h->buf[0] = '\0'; + h->buflen = 0; +} + + +void +bufcat_style(struct html *h, const char *key, const char *val) +{ + + bufcat(h, key); + bufncat(h, ":", 1); + bufcat(h, val); + bufncat(h, ";", 1); +} + + +void +bufcat(struct html *h, const char *p) +{ + + bufncat(h, p, strlen(p)); +} + + +void +buffmt(struct html *h, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void)vsnprintf(h->buf + (int)h->buflen, + BUFSIZ - h->buflen - 1, fmt, ap); + va_end(ap); + h->buflen = strlen(h->buf); +} + + +void +bufncat(struct html *h, const char *p, size_t sz) +{ + + if (h->buflen + sz > BUFSIZ - 1) + sz = BUFSIZ - 1 - h->buflen; + + (void)strncat(h->buf, p, sz); + h->buflen += sz; +} + + +void +buffmt_includes(struct html *h, const char *name) +{ + const char *p, *pp; + + pp = h->base_includes; + + while (NULL != (p = strchr(pp, '%'))) { + bufncat(h, pp, (size_t)(p - pp)); + switch (*(p + 1)) { + case('I'): + bufcat(h, name); + break; + default: + bufncat(h, p, 2); + break; + } + pp = p + 2; + } + if (pp) + bufcat(h, pp); +} + + +void +buffmt_man(struct html *h, + const char *name, const char *sec) +{ + const char *p, *pp; + + pp = h->base_man; + + /* LINTED */ + while (NULL != (p = strchr(pp, '%'))) { + bufncat(h, pp, (size_t)(p - pp)); + switch (*(p + 1)) { + case('S'): + bufcat(h, sec ? sec : "1"); + break; + case('N'): + buffmt(h, name); + break; + default: + bufncat(h, p, 2); + break; + } + pp = p + 2; + } + if (pp) + bufcat(h, pp); +} + + +void +bufcat_su(struct html *h, const char *p, const struct roffsu *su) +{ + double v; + const char *u; + + v = su->scale; + + switch (su->unit) { + case (SCALE_CM): + u = "cm"; + break; + case (SCALE_IN): + u = "in"; + break; + case (SCALE_PC): + u = "pc"; + break; + case (SCALE_PT): + u = "pt"; + break; + case (SCALE_EM): + u = "em"; + break; + case (SCALE_MM): + if (0 == (v /= 100)) + v = 1; + u = "em"; + break; + case (SCALE_EN): + u = "ex"; + break; + case (SCALE_BU): + u = "ex"; + break; + case (SCALE_VS): + u = "em"; + break; + default: + u = "ex"; + break; + } + + if (su->pt) + buffmt(h, "%s: %f%s;", p, v, u); + else + /* LINTED */ + buffmt(h, "%s: %d%s;", p, (int)v, u); +} + + +void +html_idcat(char *dst, const char *src, int sz) +{ + int ssz; + + assert(sz); + + /* Cf. . */ + + for ( ; *dst != '\0' && sz; dst++, sz--) + /* Jump to end. */ ; + + assert(sz > 2); + + /* We can't start with a number (bah). */ + + *dst++ = 'x'; + *dst = '\0'; + sz--; + + for ( ; *src != '\0' && sz > 1; src++) { + ssz = snprintf(dst, (size_t)sz, "%.2x", *src); + sz -= ssz; + dst += ssz; + } +} diff --git a/commands/mdocml/html.h b/commands/mdocml/html.h new file mode 100644 index 000000000..874adc075 --- /dev/null +++ b/commands/mdocml/html.h @@ -0,0 +1,156 @@ +/* $Id: html.h,v 1.24 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef HTML_H +#define HTML_H + +__BEGIN_DECLS + +enum htmltag { + TAG_HTML, + TAG_HEAD, + TAG_BODY, + TAG_META, + TAG_TITLE, + TAG_DIV, + TAG_H1, + TAG_H2, + TAG_SPAN, + TAG_LINK, + TAG_BR, + TAG_A, + TAG_TABLE, + TAG_COL, + TAG_TR, + TAG_TD, + TAG_LI, + TAG_UL, + TAG_OL, + TAG_MAX +}; + +enum htmlattr { + ATTR_HTTPEQUIV, + ATTR_CONTENT, + ATTR_NAME, + ATTR_REL, + ATTR_HREF, + ATTR_TYPE, + ATTR_MEDIA, + ATTR_CLASS, + ATTR_STYLE, + ATTR_WIDTH, + ATTR_VALIGN, + ATTR_TARGET, + ATTR_ID, + ATTR_SUMMARY, + ATTR_MAX +}; + +enum htmlfont { + HTMLFONT_NONE = 0, + HTMLFONT_BOLD, + HTMLFONT_ITALIC, + HTMLFONT_MAX +}; + +struct tag { + struct tag *next; + enum htmltag tag; +}; + +struct ord { + struct ord *next; + const void *cookie; + int pos; +}; + +struct tagq { + struct tag *head; +}; +struct ordq { + struct ord *head; +}; + +struct htmlpair { + enum htmlattr key; + const char *val; +}; + +#define PAIR_INIT(p, t, v) \ + do { \ + (p)->key = (t); \ + (p)->val = (v); \ + } while (/* CONSTCOND */ 0) + +#define PAIR_ID_INIT(p, v) PAIR_INIT(p, ATTR_ID, v) +#define PAIR_CLASS_INIT(p, v) PAIR_INIT(p, ATTR_CLASS, v) +#define PAIR_HREF_INIT(p, v) PAIR_INIT(p, ATTR_HREF, v) +#define PAIR_STYLE_INIT(p, h) PAIR_INIT(p, ATTR_STYLE, (h)->buf) +#define PAIR_SUMMARY_INIT(p, v) PAIR_INIT(p, ATTR_SUMMARY, v) + +enum htmltype { + HTML_HTML_4_01_STRICT, + HTML_XHTML_1_0_STRICT +}; + +struct html { + int flags; +#define HTML_NOSPACE (1 << 0) +#define HTML_IGNDELIM (1 << 2) + struct tagq tags; + struct ordq ords; + void *symtab; + char *base; + char *base_man; + char *base_includes; + char *style; + char buf[BUFSIZ]; + size_t buflen; + struct tag *metaf; + enum htmlfont metal; + enum htmlfont metac; + enum htmltype type; +}; + +struct roffsu; + +void print_gen_decls(struct html *); +void print_gen_head(struct html *); +struct tag *print_ofont(struct html *, enum htmlfont); +struct tag *print_otag(struct html *, enum htmltag, + int, const struct htmlpair *); +void print_tagq(struct html *, const struct tag *); +void print_stagq(struct html *, const struct tag *); +void print_text(struct html *, const char *); + +void bufcat_su(struct html *, const char *, + const struct roffsu *); +void buffmt_man(struct html *, + const char *, const char *); +void buffmt_includes(struct html *, const char *); +void buffmt(struct html *, const char *, ...); +void bufcat(struct html *, const char *); +void bufcat_style(struct html *, + const char *, const char *); +void bufncat(struct html *, const char *, size_t); +void bufinit(struct html *); + +void html_idcat(char *, const char *, int); + +__END_DECLS + +#endif /*!HTML_H*/ diff --git a/commands/mdocml/index.css b/commands/mdocml/index.css new file mode 100644 index 000000000..51025707a --- /dev/null +++ b/commands/mdocml/index.css @@ -0,0 +1,48 @@ +body { color: #333333; + font-size: smaller; + font-family: Verdana, Tahoma, Arial, sans-serif; } + +table.frame { max-width: 800px; + padding-left: 10px; } + +table { padding-left: 40px; } + +p { padding-left: 40px; + text-align: justify; } + +h1 { font-weight: bold; + font-size: small; + font-family: Verdana, Tahoma, Arial, sans-serif; } + +h2 { font-weight: bold; + font-size: small; + padding-left: 20px; + margin-bottom: 0px; + font-family: Verdana, Tahoma, Arial, sans-serif; } + +span.nm { color: #000000; font-weight: bold; } + +span.attn { color: #000000; font-weight: bold; } + +span.flag { font-weight: bold; } + +div.head { border-bottom: 1px solid #dddddd; + padding-bottom: 5px; + text-align: right; } + +div.subhead { font-size: smaller; + margin-bottom: 1em; } + +div.foot { border-top: 1px solid #dddddd; + padding-top: 5px; + font-size: smaller; + text-align: right; } + +a.external { background: transparent url(external.png) center right no-repeat; + padding-right: 12px; } + +span.date { color: #000000; } + +div.news { margin-bottom: 2em; } + +div.news ul { margin-left: 4em; } diff --git a/commands/mdocml/index.sgml b/commands/mdocml/index.sgml new file mode 100644 index 000000000..43caf293a --- /dev/null +++ b/commands/mdocml/index.sgml @@ -0,0 +1,420 @@ + + + + + + + mdocml | mdoc macro compiler + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ mdocml – mdoc macro compiler +
+ +
+

+ DESCRIPTION +

+ +

+ mdocml is a suite of tools compiling -mdoc, the roff macro package + of choice for BSD manual pages, and -man, the predominant historical package for UNIX + manuals. The mission of mdocml is to deprecate groff, the GNU roff implementation, for + displaying -mdoc pages whilst providing token support for -man. +

+ +

+ Why? groff amounts to over 5 MB of source code, most of which is C++ and all of which is GPL. It runs + slowly, produces uncertain output, and varies in operation from system to system. mdocml strives to fix + this (respectively small, C, ISC-licensed, fast and regular). +

+ +

+ The core of mdocml is composed of the libmdoc, libman, and libroff validating compiler libraries. All + are simple, fast libraries operating on memory buffers, so they may be used for a variety of front-ends + (terminal-based, CGI and so on). The front-end is mandoc, which formats + manuals for display. +

+ +

+ The mdocml suite is a BSD.lv + Project member. +

+
+

+ SOURCES +

+ +

+ Sources correctly build and install on DragonFly BSD, FreeBSD, OpenBSD, NetBSD, GNU/Linux, and many + other operating systems, tested variously on i386, AMD64, alpha, and others. The most current version + is @VERSION@, dated @VDATE@. A full ChangeLog (txt) is written with each release. +

+ +

+ Current +

+ + + + + + + + + + + + + + +
Source archive + /snapshots/mdocml.tar.gz + (md5) +
Online source + cvsweb +
+ +

+ Downstream +

+ + + + + + + + + + + + + + + + + + + + + + +
DragonFly BSD + usr.bin/mandoc +
FreeBSD + ports/textproc/mdocml +
NetBSD + src/external/bsd/mdocml +
OpenBSD + src/usr.bin/mandoc +
+ +

+ Historical +

+ + + + + + + + + + +
Source archive + /snapshots/ +
+
+

+ DOCUMENTATION +

+ +

+ These manuals are generated automatically and refer to the current snapshot. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
man(3) + man macro compiler library + +
man(7) + man language reference + +
mandoc(1) + format and display UNIX manuals + +
mandoc_char(7) + mandoc special characters + +
mdoc(3) + mdoc macro compiler library + +
mdoc(7) + mdoc language reference + +
roff(3) + roff macro compiler library + +
roff(7) + roff-mandoc language reference + +
+ +

+ See Writing UNIX Manual Pages for a general + introduction to manpages and mdoc. +

+
+

+ CONTACT +

+ +

+ For all issues related to mdocml, contact Kristaps Dzonsons, kris...@bsd.lv. +

+ +

+ You may also subscribe to several mailing lists (these require subscription, which is moderated). An + archive is not yet available on-line, although you may request one once subscribed. +

+ + + + + + + + + + + + + + + + + + +
+ disc...@mdocml.bsd.lv + high-level discussions and version announcements
+ tec...@mdocml.bsd.lv + low-level discussions
+ sou...@mdocml.bsd.lv + source commit messages
+
+

+ NEWS +

+
+

+ 19-06-2010: + version 1.10.2 +

+

+ Small release featuring text-decoration in -Tps output, a few + minor relaxations of errors, and some optimisations. +

+
+
+

+ 07-06-2010: + version 1.10.1 +

+

+ This primarily focusses on the Bl and It macros described in mdoc. Multi-line column support is now fully compatible with + groff, as are implicit list entries for columns. Removed manuals.7 in favour of http://manpages.bsd.lv. The way we + handle the SYNOPSIS section (see the SYNOPSIS documentation in MANUAL STRUCTURE) has also + been considerably simplified compared to groff's method. Furthermore, the -Owidth=width output option has been added to -Tascii (see mandoc). Lastly, initial + PostScript output has been added with the -Tps option to mandoc. It's brutally simple at the moment: fixed-font, with + no font decorations. +

+
+
+

+ 29-05-2010: + version 1.10.0 +

+

+ Release consisting of the results from the m2k10 hackathon and up-merge from OpenBSD. + This requires a significant note of thanks to Ingo Schwarze (OpenBSD) and Joerg + Sonnenberger (NetBSD) for their hard work, and again to Joerg for hosting m2k10. + Highlights (mostly cribbed from Ingo's m2k10 report) follow in no particular order: +

+
    +
  • a libroff preprocessor in front of libmdoc and libman stripping out + roff instructions;
  • +
  • end-of-sentence (EOS) detection in free-form and macro lines;
  • +
  • correct handling of tab-separated columnar lists in -mdoc;
  • +
  • improved main calling routines to optionally use mmap() for better + performance;
  • +
  • cleaned up exiting when invoked as -Tlint or over + multiple files with -fign-errors;
  • +
  • error and warning message handling re-written to be unified for libroff, libmdoc, and libman;
  • +
  • handling of badly-nested explicit-scoped macros;
  • +
  • improved free-form text parsing in libman and libmdoc;
  • +
  • significant GNU troff compatibility improvements in -Tascii, largely in terms of spacing;
  • +
  • a regression framework for making sure the many fragilities of GNU troff + aren't trampled in subsequent work;
  • +
  • support for -Tascii breaking at hyphens + encountered in free-form text;
  • +
  • and many more minor fixes and improvements (no really, consult cvsweb and see + for yourself!).
  • +
+
+
+

+ 13-05-2010: + version 1.9.25 +

+

+ Fixed handling of \*(Ba escape. Backed out -fno-ign-chars (pointless complexity). Fixed erroneous + breaking of literal lines. Fixed SYNOPSIS breaking lines before non-initial + macros. Changed default section ordering. Most importantly, the framework for + end-of-sentence double-spacing is in place, now implemented for the + end-of-sentence, end-of-line rule. This is a stable roll-back point + before the mandoc hackathon in Rostock! +

+
+
+

+ 09-05-2010: + version 1.9.24 +

+

+ Rolled back break-at-hyphen. -DUGLY is now the + default (no feature splits!). Free-form text is not de-chunked any more: lines + are passed whole-sale into the front-end, including whitespace. Added mailing + lists. Lastly, mdocml is the focus of two Google Summer of Code + projects this year: mandoc -Tps + (NetBSD) and BSD-licensed + Text-Processing Tools (FreeBSD). +

+
+

+ See cvsweb for + historical notes. +

+
+
+ Copyright © 2008–2010 Kristaps Dzonsons, $Date: 2010/06/19 20:43:35 $ +
+
+ + diff --git a/commands/mdocml/lib.c b/commands/mdocml/lib.c new file mode 100644 index 000000000..bbf2aec8b --- /dev/null +++ b/commands/mdocml/lib.c @@ -0,0 +1,38 @@ +/* $Id: lib.c,v 1.8 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2lib(const char *p) +{ + +#include "lib.in" + + return(NULL); +} diff --git a/commands/mdocml/lib.in b/commands/mdocml/lib.in new file mode 100644 index 000000000..18ee711c1 --- /dev/null +++ b/commands/mdocml/lib.in @@ -0,0 +1,93 @@ +/* $Id: lib.in,v 1.9 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * These are all possible .Lb strings. When a new library is added, add + * its short-string to the left-hand side and formatted string to the + * right-hand side. + * + * Be sure to escape strings. + */ + +LINE("libarchive", "Reading and Writing Streaming Archives Library (libarchive, \\-larchive)") +LINE("libarm", "ARM Architecture Library (libarm, \\-larm)") +LINE("libarm32", "ARM32 Architecture Library (libarm32, \\-larm32)") +LINE("libbluetooth", "Bluetooth Library (libbluetooth, \\-lbluetooth)") +LINE("libbsm", "Basic Security Module User Library (libbsm, \\-lbsm)") +LINE("libc", "Standard C Library (libc, \\-lc)") +LINE("libc_r", "Reentrant C\\~Library (libc_r, \\-lc_r)") +LINE("libcalendar", "Calendar Arithmetic Library (libcalendar, \\-lcalendar)") +LINE("libcam", "Common Access Method User Library (libcam, \\-lcam)") +LINE("libcdk", "Curses Development Kit Library (libcdk, \\-lcdk)") +LINE("libcipher", "FreeSec Crypt Library (libcipher, \\-lcipher)") +LINE("libcompat", "Compatibility Library (libcompat, \\-lcompat)") +LINE("libcrypt", "Crypt Library (libcrypt, \\-lcrypt)") +LINE("libcurses", "Curses Library (libcurses, \\-lcurses)") +LINE("libdevinfo", "Device and Resource Information Utility Library (libdevinfo, \\-ldevinfo)") +LINE("libdevstat", "Device Statistics Library (libdevstat, \\-ldevstat)") +LINE("libdisk", "Interface to Slice and Partition Labels Library (libdisk, \\-ldisk)") +LINE("libedit", "Command Line Editor Library (libedit, \\-ledit)") +LINE("libelf", "ELF Parsing Library (libelf, \\-lelf)") +LINE("libevent", "Event Notification Library (libevent, \\-levent)") +LINE("libfetch", "File Transfer Library for URLs (libfetch, \\-lfetch)") +LINE("libform", "Curses Form Library (libform, \\-lform)") +LINE("libgeom", "Userland API Library for kernel GEOM subsystem (libgeom, \\-lgeom)") +LINE("libgpib", "General-Purpose Instrument Bus (GPIB) library (libgpib, \\-lgpib)") +LINE("libi386", "i386 Architecture Library (libi386, \\-li386)") +LINE("libintl", "Internationalized Message Handling Library (libintl, \\-lintl)") +LINE("libipsec", "IPsec Policy Control Library (libipsec, \\-lipsec)") +LINE("libipx", "IPX Address Conversion Support Library (libipx, \\-lipx)") +LINE("libiscsi", "iSCSI protocol library (libiscsi, \\-liscsi)") +LINE("libjail", "Jail Library (libjail, \\-ljail)") +LINE("libkiconv", "Kernel side iconv library (libkiconv, \\-lkiconv)") +LINE("libkse", "N:M Threading Library (libkse, \\-lkse)") +LINE("libkvm", "Kernel Data Access Library (libkvm, \\-lkvm)") +LINE("libm", "Math Library (libm, \\-lm)") +LINE("libm68k", "m68k Architecture Library (libm68k, \\-lm68k)") +LINE("libmagic", "Magic Number Recognition Library (libmagic, \\-lmagic)") +LINE("libmd", "Message Digest (MD4, MD5, etc.) Support Library (libmd, \\-lmd)") +LINE("libmemstat", "Kernel Memory Allocator Statistics Library (libmemstat, \\-lmemstat)") +LINE("libmenu", "Curses Menu Library (libmenu, \\-lmenu)") +LINE("libnetgraph", "Netgraph User Library (libnetgraph, \\-lnetgraph)") +LINE("libnetpgp", "Netpgp signing, verification, encryption and decryption (libnetpgp, \\-lnetpgp)") +LINE("libossaudio", "OSS Audio Emulation Library (libossaudio, \\-lossaudio)") +LINE("libpam", "Pluggable Authentication Module Library (libpam, \\-lpam)") +LINE("libpcap", "Capture Library (libpcap, \\-lpcap)") +LINE("libpci", "PCI Bus Access Library (libpci, \\-lpci)") +LINE("libpmc", "Performance Counters Library (libpmc, \\-lpmc)") +LINE("libposix", "POSIX Compatibility Library (libposix, \\-lposix)") +LINE("libprop", "Property Container Object Library (libprop, \\-lprop)") +LINE("libpthread", "POSIX Threads Library (libpthread, \\-lpthread)") +LINE("libpuffs", "puffs Convenience Library (libpuffs, \\-lpuffs)") +LINE("librefuse", "File System in Userspace Convenience Library (librefuse, \\-lrefuse)") +LINE("libresolv", "DNS Resolver Library (libresolv, \\-lresolv)") +LINE("librpcsec_gss", "RPC GSS-API Authentication Library (librpcsec_gss, \\-lrpcsec_gss)") +LINE("librpcsvc", "RPC Service Library (librpcsvc, \\-lrpcsvc)") +LINE("librt", "POSIX Real\\-time Library (librt, -lrt)") +LINE("libsdp", "Bluetooth Service Discovery Protocol User Library (libsdp, \\-lsdp)") +LINE("libssp", "Buffer Overflow Protection Library (libssp, \\-lssp)") +LINE("libtermcap", "Termcap Access Library (libtermcap, \\-ltermcap)") +LINE("libterminfo", "Terminal Information Library (libterminfo, \\-lterminfo)") +LINE("libthr", "1:1 Threading Library (libthr, \\-lthr)") +LINE("libufs", "UFS File System Access Library (libufs, \\-lufs)") +LINE("libugidfw", "File System Firewall Interface Library (libugidfw, \\-lugidfw)") +LINE("libulog", "User Login Record Library (libulog, \\-lulog)") +LINE("libusbhid", "USB Human Interface Devices Library (libusbhid, \\-lusbhid)") +LINE("libutil", "System Utilities Library (libutil, \\-lutil)") +LINE("libvgl", "Video Graphics Library (libvgl, \\-lvgl)") +LINE("libx86_64", "x86_64 Architecture Library (libx86_64, \\-lx86_64)") +LINE("libz", "Compression Library (libz, \\-lz)") diff --git a/commands/mdocml/libman.h b/commands/mdocml/libman.h new file mode 100644 index 000000000..4a0df6331 --- /dev/null +++ b/commands/mdocml/libman.h @@ -0,0 +1,90 @@ +/* $Id: libman.h,v 1.36 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LIBMAN_H +#define LIBMAN_H + +#include "man.h" + +enum man_next { + MAN_NEXT_SIBLING = 0, + MAN_NEXT_CHILD +}; + +struct man { + void *data; + mandocmsg msg; + int pflags; /* parse flags (see man.h) */ + int flags; /* parse flags */ +#define MAN_HALT (1 << 0) /* badness happened: die */ +#define MAN_ELINE (1 << 1) /* Next-line element scope. */ +#define MAN_BLINE (1 << 2) /* Next-line block scope. */ +#define MAN_ILINE (1 << 3) /* Ignored in next-line scope. */ +#define MAN_LITERAL (1 << 4) /* Literal input. */ +#define MAN_BPLINE (1 << 5) + enum man_next next; + struct man_node *last; + struct man_node *first; + struct man_meta meta; +}; + +#define MACRO_PROT_ARGS struct man *m, enum mant tok, int line, \ + int ppos, int *pos, char *buf + +struct man_macro { + int (*fp)(MACRO_PROT_ARGS); + int flags; +#define MAN_SCOPED (1 << 0) +#define MAN_EXPLICIT (1 << 1) /* See blk_imp(). */ +#define MAN_FSCOPED (1 << 2) /* See blk_imp(). */ +#define MAN_NSCOPED (1 << 3) /* See in_line_eoln(). */ +#define MAN_NOCLOSE (1 << 4) /* See blk_exp(). */ +}; + +extern const struct man_macro *const man_macros; + +__BEGIN_DECLS + +#define man_pmsg(m, l, p, t) \ + (*(m)->msg)((t), (m)->data, (l), (p), NULL) +#define man_nmsg(m, n, t) \ + (*(m)->msg)((t), (m)->data, (n)->line, (n)->pos, NULL) +int man_word_alloc(struct man *, int, int, const char *); +int man_block_alloc(struct man *, int, int, enum mant); +int man_head_alloc(struct man *, int, int, enum mant); +int man_body_alloc(struct man *, int, int, enum mant); +int man_elem_alloc(struct man *, int, int, enum mant); +void man_node_delete(struct man *, struct man_node *); +void man_hash_init(void); +enum mant man_hash_find(const char *); +int man_macroend(struct man *); +int man_args(struct man *, int, int *, char *, char **); +#define ARGS_ERROR (-1) +#define ARGS_EOLN (0) +#define ARGS_WORD (1) +#define ARGS_QWORD (1) +int man_vmsg(struct man *, enum mandocerr, + int, int, const char *, ...); +int man_valid_post(struct man *); +int man_valid_pre(struct man *, struct man_node *); +int man_action_post(struct man *); +int man_action_pre(struct man *, struct man_node *); +int man_unscope(struct man *, + const struct man_node *, enum mandocerr); + +__END_DECLS + +#endif /*!LIBMAN_H*/ diff --git a/commands/mdocml/libmandoc.h b/commands/mdocml/libmandoc.h new file mode 100644 index 000000000..b25e4a7e6 --- /dev/null +++ b/commands/mdocml/libmandoc.h @@ -0,0 +1,37 @@ +/* $Id: libmandoc.h,v 1.8 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LIBMANDOC_H +#define LIBMANDOC_H + +__BEGIN_DECLS + +int mandoc_special(char *); +void *mandoc_calloc(size_t, size_t); +char *mandoc_strdup(const char *); +void *mandoc_malloc(size_t); +void *mandoc_realloc(void *, size_t); +time_t mandoc_a2time(int, const char *); +#define MTIME_CANONICAL (1 << 0) +#define MTIME_REDUCED (1 << 1) +#define MTIME_MDOCDATE (1 << 2) +#define MTIME_ISO_8601 (1 << 3) +int mandoc_eos(const char *, size_t); +int mandoc_hyph(const char *, const char *); + +__END_DECLS + +#endif /*!LIBMANDOC_H*/ diff --git a/commands/mdocml/libmdoc.h b/commands/mdocml/libmdoc.h new file mode 100644 index 000000000..e0935be05 --- /dev/null +++ b/commands/mdocml/libmdoc.h @@ -0,0 +1,142 @@ +/* $Id: libmdoc.h,v 1.53 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LIBMDOC_H +#define LIBMDOC_H + +#include "mdoc.h" + +enum mdoc_next { + MDOC_NEXT_SIBLING = 0, + MDOC_NEXT_CHILD +}; + +struct mdoc { + void *data; + mandocmsg msg; + int flags; +#define MDOC_HALT (1 << 0) /* error in parse: halt */ +#define MDOC_LITERAL (1 << 1) /* in a literal scope */ +#define MDOC_PBODY (1 << 2) /* in the document body */ +#define MDOC_NEWLINE (1 << 3) /* first macro/text in a line */ +#define MDOC_PHRASELIT (1 << 4) /* literal within a partila phrase */ +#define MDOC_PPHRASE (1 << 5) /* within a partial phrase */ +#define MDOC_FREECOL (1 << 6) /* `It' invocation should close */ + int pflags; + enum mdoc_next next; + struct mdoc_node *last; + struct mdoc_node *first; + struct mdoc_meta meta; + enum mdoc_sec lastnamed; + enum mdoc_sec lastsec; +}; + +#define MACRO_PROT_ARGS struct mdoc *m, enum mdoct tok, \ + int line, int ppos, int *pos, char *buf + +struct mdoc_macro { + int (*fp)(MACRO_PROT_ARGS); + int flags; +#define MDOC_CALLABLE (1 << 0) +#define MDOC_PARSED (1 << 1) +#define MDOC_EXPLICIT (1 << 2) +#define MDOC_PROLOGUE (1 << 3) +#define MDOC_IGNDELIM (1 << 4) + /* Reserved words in arguments treated as text. */ +}; + +enum margserr { + ARGS_ERROR, + ARGS_EOLN, + ARGS_WORD, + ARGS_PUNCT, + ARGS_QWORD, + ARGS_PHRASE, + ARGS_PPHRASE, + ARGS_PEND +}; + +enum margverr { + ARGV_ERROR, + ARGV_EOLN, + ARGV_ARG, + ARGV_WORD +}; + +enum mdelim { + DELIM_NONE = 0, + DELIM_OPEN, + DELIM_MIDDLE, + DELIM_CLOSE +}; + +extern const struct mdoc_macro *const mdoc_macros; + +__BEGIN_DECLS + +#define mdoc_pmsg(m, l, p, t) \ + (*(m)->msg)((t), (m)->data, (l), (p), NULL) +#define mdoc_nmsg(m, n, t) \ + (*(m)->msg)((t), (m)->data, (n)->line, (n)->pos, NULL) +int mdoc_vmsg(struct mdoc *, enum mandocerr, + int, int, const char *, ...); +int mdoc_macro(MACRO_PROT_ARGS); +int mdoc_word_alloc(struct mdoc *, + int, int, const char *); +int mdoc_elem_alloc(struct mdoc *, int, int, + enum mdoct, struct mdoc_arg *); +int mdoc_block_alloc(struct mdoc *, int, int, + enum mdoct, struct mdoc_arg *); +int mdoc_head_alloc(struct mdoc *, int, int, enum mdoct); +int mdoc_tail_alloc(struct mdoc *, int, int, enum mdoct); +int mdoc_body_alloc(struct mdoc *, int, int, enum mdoct); +void mdoc_node_delete(struct mdoc *, struct mdoc_node *); +void mdoc_hash_init(void); +enum mdoct mdoc_hash_find(const char *); +enum mdelim mdoc_iscdelim(char); +enum mdelim mdoc_isdelim(const char *); +size_t mdoc_isescape(const char *); +enum mdoc_sec mdoc_str2sec(const char *); +time_t mdoc_atotime(const char *); +size_t mdoc_macro2len(enum mdoct); +const char *mdoc_a2att(const char *); +const char *mdoc_a2lib(const char *); +const char *mdoc_a2st(const char *); +const char *mdoc_a2arch(const char *); +const char *mdoc_a2vol(const char *); +const char *mdoc_a2msec(const char *); +int mdoc_valid_pre(struct mdoc *, struct mdoc_node *); +int mdoc_valid_post(struct mdoc *); +int mdoc_action_pre(struct mdoc *, + struct mdoc_node *); +int mdoc_action_post(struct mdoc *); +enum margverr mdoc_argv(struct mdoc *, int, enum mdoct, + struct mdoc_arg **, int *, char *); +void mdoc_argv_free(struct mdoc_arg *); +void mdoc_argn_free(struct mdoc_arg *, int); +enum margserr mdoc_args(struct mdoc *, int, + int *, char *, enum mdoct, char **); +enum margserr mdoc_zargs(struct mdoc *, int, + int *, char *, int, char **); +#define ARGS_DELIM (1 << 1) +#define ARGS_TABSEP (1 << 2) +#define ARGS_NOWARN (1 << 3) + +int mdoc_macroend(struct mdoc *); + +__END_DECLS + +#endif /*!LIBMDOC_H*/ diff --git a/commands/mdocml/main.c b/commands/mdocml/main.c new file mode 100644 index 000000000..fb497d3bd --- /dev/null +++ b/commands/mdocml/main.c @@ -0,0 +1,866 @@ +/* $Id: main.c,v 1.89 2010/06/19 20:46:27 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "mdoc.h" +#include "man.h" +#include "roff.h" +#include "main.h" + +#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) + +/* FIXME: Intel's compiler? LLVM? pcc? */ + +#if !defined(__GNUC__) || (__GNUC__ < 2) +# if !defined(lint) +# define __attribute__(x) +# endif +#endif /* !defined(__GNUC__) || (__GNUC__ < 2) */ + +typedef void (*out_mdoc)(void *, const struct mdoc *); +typedef void (*out_man)(void *, const struct man *); +typedef void (*out_free)(void *); + +struct buf { + char *buf; + size_t sz; +}; + +enum intt { + INTT_AUTO, + INTT_MDOC, + INTT_MAN +}; + +enum outt { + OUTT_ASCII = 0, + OUTT_TREE, + OUTT_HTML, + OUTT_XHTML, + OUTT_LINT, + OUTT_PS +}; + +struct curparse { + const char *file; /* Current parse. */ + int fd; /* Current parse. */ + int wflags; + /* FIXME: set by max error */ +#define WARN_WALL (1 << 0) /* All-warnings mask. */ +#define WARN_WERR (1 << 2) /* Warnings->errors. */ + int fflags; +#define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */ +#define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */ +#define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */ +#define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */ +#define FL_STRICT FL_NIGN_ESCAPE | \ + FL_NIGN_MACRO /* ignore nothing */ + enum intt inttype; /* which parser to use */ + struct man *man; /* man parser */ + struct mdoc *mdoc; /* mdoc parser */ + struct roff *roff; /* roff parser (!NULL) */ + enum outt outtype; /* which output to use */ + out_mdoc outmdoc; /* mdoc output ptr */ + out_man outman; /* man output ptr */ + out_free outfree; /* free output ptr */ + void *outdata; /* data for output */ + char outopts[BUFSIZ]; /* buf of output opts */ +}; + +static const char * const mandocerrs[MANDOCERR_MAX] = { + "ok", + "text should be uppercase", + "sections out of conventional order", + "section name repeats", + "out of order prologue", + "repeated prologue entry", + "list type must come first", + "bad standard", + "bad library", + "bad escape sequence", + "unterminated quoted string", + "argument requires the width argument", + "superfluous width argument", + "ignoring argument", + "bad date argument", + "bad width argument", + "unknown manual section", + "section not in conventional manual section", + "end of line whitespace", + "scope open on exit", + "NAME section must come first", + "bad Boolean value", + "child violates parent syntax", + "bad AT&T symbol", + "list type repeated", + "display type repeated", + "argument repeated", + "manual name not yet set", + "obsolete macro ignored", + "empty macro ignored", + "macro not allowed in body", + "macro not allowed in prologue", + "bad character", + "bad NAME section contents", + "no blank lines", + "no text in this context", + "bad comment style", + "unknown macro will be lost", + "line scope broken", + "scope broken", + "argument count wrong", + "request scope close w/none open", + "scope already open", + "macro requires line argument(s)", + "macro requires body argument(s)", + "macro requires argument(s)", + "no title in document", + "missing list type", + "missing display type", + "line argument(s) will be lost", + "body argument(s) will be lost", + "column syntax is inconsistent", + "missing font type", + "displays may not be nested", + "unsupported display type", + "no scope to rewind: syntax violated", + "scope broken, syntax violated", + "line scope broken, syntax violated", + "argument count wrong, violates syntax", + "child violates parent syntax", + "argument count wrong, violates syntax", + "no document body", + "no document prologue", + "utsname system call failed", + "memory exhausted", +}; + +static void fdesc(struct curparse *); +static void ffile(const char *, struct curparse *); +static int foptions(int *, char *); +static struct man *man_init(struct curparse *); +static struct mdoc *mdoc_init(struct curparse *); +static struct roff *roff_init(struct curparse *); +static int moptions(enum intt *, char *); +static int mmsg(enum mandocerr, void *, + int, int, const char *); +static int pset(const char *, int, struct curparse *, + struct man **, struct mdoc **); +static int toptions(struct curparse *, char *); +static void usage(void) __attribute__((noreturn)); +static void version(void) __attribute__((noreturn)); +static int woptions(int *, char *); + +static const char *progname; +static int with_error; +static int with_warning; + +int +main(int argc, char *argv[]) +{ + int c; + struct curparse curp; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + memset(&curp, 0, sizeof(struct curparse)); + + curp.inttype = INTT_AUTO; + curp.outtype = OUTT_ASCII; + + /* LINTED */ + while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:"))) + switch (c) { + case ('f'): + if ( ! foptions(&curp.fflags, optarg)) + return(EXIT_FAILURE); + break; + case ('m'): + if ( ! moptions(&curp.inttype, optarg)) + return(EXIT_FAILURE); + break; + case ('O'): + (void)strlcat(curp.outopts, optarg, BUFSIZ); + (void)strlcat(curp.outopts, ",", BUFSIZ); + break; + case ('T'): + if ( ! toptions(&curp, optarg)) + return(EXIT_FAILURE); + break; + case ('W'): + if ( ! woptions(&curp.wflags, optarg)) + return(EXIT_FAILURE); + break; + case ('V'): + version(); + /* NOTREACHED */ + default: + usage(); + /* NOTREACHED */ + } + + argc -= optind; + argv += optind; + + if (NULL == *argv) { + curp.file = ""; + curp.fd = STDIN_FILENO; + + fdesc(&curp); + } + + while (*argv) { + ffile(*argv, &curp); + + if (with_error && !(curp.fflags & FL_IGN_ERRORS)) + break; + ++argv; + } + + if (curp.outfree) + (*curp.outfree)(curp.outdata); + if (curp.mdoc) + mdoc_free(curp.mdoc); + if (curp.man) + man_free(curp.man); + if (curp.roff) + roff_free(curp.roff); + + return((with_warning || with_error) ? + EXIT_FAILURE : EXIT_SUCCESS); +} + + +static void +version(void) +{ + + (void)printf("%s %s\n", progname, VERSION); + exit(EXIT_SUCCESS); +} + + +static void +usage(void) +{ + + (void)fprintf(stderr, "usage: %s [-V] [-foption] " + "[-mformat] [-Ooption] [-Toutput] " + "[-Werr] [file...]\n", progname); + exit(EXIT_FAILURE); +} + + +static struct man * +man_init(struct curparse *curp) +{ + int pflags; + + /* Defaults from mandoc.1. */ + + pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE; + + if (curp->fflags & FL_NIGN_MACRO) + pflags &= ~MAN_IGN_MACRO; + if (curp->fflags & FL_NIGN_ESCAPE) + pflags &= ~MAN_IGN_ESCAPE; + + return(man_alloc(curp, pflags, mmsg)); +} + + +static struct roff * +roff_init(struct curparse *curp) +{ + + return(roff_alloc(mmsg, curp)); +} + + +static struct mdoc * +mdoc_init(struct curparse *curp) +{ + int pflags; + + /* Defaults from mandoc.1. */ + + pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE; + + if (curp->fflags & FL_IGN_SCOPE) + pflags |= MDOC_IGN_SCOPE; + if (curp->fflags & FL_NIGN_ESCAPE) + pflags &= ~MDOC_IGN_ESCAPE; + if (curp->fflags & FL_NIGN_MACRO) + pflags &= ~MDOC_IGN_MACRO; + + return(mdoc_alloc(curp, pflags, mmsg)); +} + + +static void +ffile(const char *file, struct curparse *curp) +{ + + curp->file = file; + if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) { + perror(curp->file); + with_error = 1; + return; + } + + fdesc(curp); + + if (-1 == close(curp->fd)) + perror(curp->file); +} + + +static int +resize_buf(struct buf *buf, size_t initial) +{ + void *tmp; + size_t sz; + + if (buf->sz == 0) + sz = initial; + else + sz = 2 * buf->sz; + tmp = realloc(buf->buf, sz); + if (NULL == tmp) { + perror(NULL); + return(0); + } + buf->buf = tmp; + buf->sz = sz; + return(1); +} + + +static int +read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap) +{ + struct stat st; + size_t off; + ssize_t ssz; + + if (-1 == fstat(curp->fd, &st)) { + perror(curp->file); + with_error = 1; + return(0); + } + +#ifndef __minix + /* + * If we're a regular file, try just reading in the whole entry + * via mmap(). This is faster than reading it into blocks, and + * since each file is only a few bytes to begin with, I'm not + * concerned that this is going to tank any machines. + */ + + if (S_ISREG(st.st_mode)) { + if (st.st_size >= (1U << 31)) { + fprintf(stderr, "%s: input too large\n", + curp->file); + with_error = 1; + return(0); + } + *with_mmap = 1; + fb->sz = (size_t)st.st_size; + fb->buf = mmap(NULL, fb->sz, PROT_READ, + MAP_FILE|MAP_SHARED, curp->fd, 0); + if (fb->buf != MAP_FAILED) + return(1); + } +#endif + + /* + * If this isn't a regular file (like, say, stdin), then we must + * go the old way and just read things in bit by bit. + */ + + *with_mmap = 0; + off = 0; + fb->sz = 0; + fb->buf = NULL; + for (;;) { + if (off == fb->sz) { + if (fb->sz == (1U << 31)) { + fprintf(stderr, "%s: input too large\n", + curp->file); + break; + } + if (! resize_buf(fb, 65536)) + break; + } + ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off); + if (ssz == 0) { + fb->sz = off; + return(1); + } + if (ssz == -1) { + perror(curp->file); + break; + } + off += (size_t)ssz; + } + + free(fb->buf); + fb->buf = NULL; + with_error = 1; + return(0); +} + + +static void +fdesc(struct curparse *curp) +{ + struct buf ln, blk; + int i, pos, lnn, lnn_start, with_mmap, of; + enum rofferr re; + struct man *man; + struct mdoc *mdoc; + struct roff *roff; + + man = NULL; + mdoc = NULL; + roff = NULL; + memset(&ln, 0, sizeof(struct buf)); + + /* + * Two buffers: ln and buf. buf is the input file and may be + * memory mapped. ln is a line buffer and grows on-demand. + */ + + if ( ! read_whole_file(curp, &blk, &with_mmap)) + return; + + if (NULL == curp->roff) + curp->roff = roff_init(curp); + if (NULL == (roff = curp->roff)) + goto bailout; + + for (i = 0, lnn = 1; i < (int)blk.sz;) { + pos = 0; + lnn_start = lnn; + while (i < (int)blk.sz) { + if ('\n' == blk.buf[i]) { + ++i; + ++lnn; + break; + } + /* Trailing backslash is like a plain character. */ + if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { + if (pos >= (int)ln.sz) + if (! resize_buf(&ln, 256)) + goto bailout; + ln.buf[pos++] = blk.buf[i++]; + continue; + } + /* Found an escape and at least one other character. */ + if ('\n' == blk.buf[i + 1]) { + /* Escaped newlines are skipped over */ + i += 2; + ++lnn; + continue; + } + if ('"' == blk.buf[i + 1]) { + i += 2; + /* Comment, skip to end of line */ + for (; i < (int)blk.sz; ++i) { + if ('\n' == blk.buf[i]) { + ++i; + ++lnn; + break; + } + } + /* Backout trailing whitespaces */ + for (; pos > 0; --pos) { + if (ln.buf[pos - 1] != ' ') + break; + if (pos > 2 && ln.buf[pos - 2] == '\\') + break; + } + break; + } + /* Some other escape sequence, copy and continue. */ + if (pos + 1 >= (int)ln.sz) + if (! resize_buf(&ln, 256)) + goto bailout; + + ln.buf[pos++] = blk.buf[i++]; + ln.buf[pos++] = blk.buf[i++]; + } + + if (pos >= (int)ln.sz) + if (! resize_buf(&ln, 256)) + goto bailout; + ln.buf[pos] = '\0'; + + /* + * A significant amount of complexity is contained by + * the roff preprocessor. It's line-oriented but can be + * expressed on one line, so we need at times to + * readjust our starting point and re-run it. The roff + * preprocessor can also readjust the buffers with new + * data, so we pass them in wholesale. + */ + + of = 0; + do { + re = roff_parseln(roff, lnn_start, + &ln.buf, &ln.sz, of, &of); + } while (ROFF_RERUN == re); + + if (ROFF_IGN == re) + continue; + else if (ROFF_ERR == re) + goto bailout; + + /* + * If input parsers have not been allocated, do so now. + * We keep these instanced betwen parsers, but set them + * locally per parse routine since we can use different + * parsers with each one. + */ + + if ( ! (man || mdoc)) + if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc)) + goto bailout; + + /* Lastly, push down into the parsers themselves. */ + + if (man && ! man_parseln(man, lnn_start, ln.buf, of)) + goto bailout; + if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) + goto bailout; + } + + /* NOTE a parser may not have been assigned, yet. */ + + if ( ! (man || mdoc)) { + fprintf(stderr, "%s: Not a manual\n", curp->file); + goto bailout; + } + + /* Clean up the parse routine ASTs. */ + + if (mdoc && ! mdoc_endparse(mdoc)) + goto bailout; + if (man && ! man_endparse(man)) + goto bailout; + if (roff && ! roff_endparse(roff)) + goto bailout; + + /* If unset, allocate output dev now (if applicable). */ + + if ( ! (curp->outman && curp->outmdoc)) { + switch (curp->outtype) { + case (OUTT_XHTML): + curp->outdata = xhtml_alloc(curp->outopts); + break; + case (OUTT_HTML): + curp->outdata = html_alloc(curp->outopts); + break; + case (OUTT_ASCII): + curp->outdata = ascii_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_PS): + curp->outdata = ps_alloc(); + curp->outfree = ps_free; + break; + default: + break; + } + + switch (curp->outtype) { + case (OUTT_HTML): + /* FALLTHROUGH */ + case (OUTT_XHTML): + curp->outman = html_man; + curp->outmdoc = html_mdoc; + curp->outfree = html_free; + break; + case (OUTT_TREE): + curp->outman = tree_man; + curp->outmdoc = tree_mdoc; + break; + case (OUTT_ASCII): + /* FALLTHROUGH */ + case (OUTT_PS): + curp->outman = terminal_man; + curp->outmdoc = terminal_mdoc; + break; + default: + break; + } + } + + /* Execute the out device, if it exists. */ + + if (man && curp->outman) + (*curp->outman)(curp->outdata, man); + if (mdoc && curp->outmdoc) + (*curp->outmdoc)(curp->outdata, mdoc); + + cleanup: + if (mdoc) + mdoc_reset(mdoc); + if (man) + man_reset(man); + if (roff) + roff_reset(roff); + if (ln.buf) + free(ln.buf); + if (with_mmap) + munmap(blk.buf, blk.sz); + else + free(blk.buf); + + return; + + bailout: + with_error = 1; + goto cleanup; +} + + +static int +pset(const char *buf, int pos, struct curparse *curp, + struct man **man, struct mdoc **mdoc) +{ + int i; + + /* + * Try to intuit which kind of manual parser should be used. If + * passed in by command-line (-man, -mdoc), then use that + * explicitly. If passed as -mandoc, then try to guess from the + * line: either skip dot-lines, use -mdoc when finding `.Dt', or + * default to -man, which is more lenient. + */ + + if ('.' == buf[0] || '\'' == buf[0]) { + for (i = 1; buf[i]; i++) + if (' ' != buf[i] && '\t' != buf[i]) + break; + if (0 == buf[i]) + return(1); + } + + switch (curp->inttype) { + case (INTT_MDOC): + if (NULL == curp->mdoc) + curp->mdoc = mdoc_init(curp); + if (NULL == (*mdoc = curp->mdoc)) + return(0); + return(1); + case (INTT_MAN): + if (NULL == curp->man) + curp->man = man_init(curp); + if (NULL == (*man = curp->man)) + return(0); + return(1); + default: + break; + } + + if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { + if (NULL == curp->mdoc) + curp->mdoc = mdoc_init(curp); + if (NULL == (*mdoc = curp->mdoc)) + return(0); + return(1); + } + + if (NULL == curp->man) + curp->man = man_init(curp); + if (NULL == (*man = curp->man)) + return(0); + return(1); +} + + +static int +moptions(enum intt *tflags, char *arg) +{ + + if (0 == strcmp(arg, "doc")) + *tflags = INTT_MDOC; + else if (0 == strcmp(arg, "andoc")) + *tflags = INTT_AUTO; + else if (0 == strcmp(arg, "an")) + *tflags = INTT_MAN; + else { + fprintf(stderr, "%s: Bad argument\n", arg); + return(0); + } + + return(1); +} + + +static int +toptions(struct curparse *curp, char *arg) +{ + + if (0 == strcmp(arg, "ascii")) + curp->outtype = OUTT_ASCII; + else if (0 == strcmp(arg, "lint")) { + curp->outtype = OUTT_LINT; + curp->wflags |= WARN_WALL; + curp->fflags |= FL_STRICT; + } + else if (0 == strcmp(arg, "tree")) + curp->outtype = OUTT_TREE; + else if (0 == strcmp(arg, "html")) + curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "xhtml")) + curp->outtype = OUTT_XHTML; + else if (0 == strcmp(arg, "ps")) + curp->outtype = OUTT_PS; + else { + fprintf(stderr, "%s: Bad argument\n", arg); + return(0); + } + + return(1); +} + + +static int +foptions(int *fflags, char *arg) +{ + char *v, *o; + const char *toks[8]; + + toks[0] = "ign-scope"; + toks[1] = "no-ign-escape"; + toks[2] = "no-ign-macro"; + toks[3] = "ign-errors"; + toks[4] = "strict"; + toks[5] = "ign-escape"; + toks[6] = NULL; + + while (*arg) { + o = arg; + switch (getsubopt(&arg, UNCONST(toks), &v)) { + case (0): + *fflags |= FL_IGN_SCOPE; + break; + case (1): + *fflags |= FL_NIGN_ESCAPE; + break; + case (2): + *fflags |= FL_NIGN_MACRO; + break; + case (3): + *fflags |= FL_IGN_ERRORS; + break; + case (4): + *fflags |= FL_STRICT; + break; + case (5): + *fflags &= ~FL_NIGN_ESCAPE; + break; + default: + fprintf(stderr, "%s: Bad argument\n", o); + return(0); + } + } + + return(1); +} + + +static int +woptions(int *wflags, char *arg) +{ + char *v, *o; + const char *toks[3]; + + toks[0] = "all"; + toks[1] = "error"; + toks[2] = NULL; + + while (*arg) { + o = arg; + switch (getsubopt(&arg, UNCONST(toks), &v)) { + case (0): + *wflags |= WARN_WALL; + break; + case (1): + *wflags |= WARN_WERR; + break; + default: + fprintf(stderr, "%s: Bad argument\n", o); + return(0); + } + } + + return(1); +} + + +static int +mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg) +{ + struct curparse *cp; + + cp = (struct curparse *)arg; + + if (t <= MANDOCERR_ERROR) { + if ( ! (cp->wflags & WARN_WALL)) + return(1); + with_warning = 1; + } else + with_error = 1; + + fprintf(stderr, "%s:%d:%d: %s", cp->file, + ln, col + 1, mandocerrs[t]); + + if (msg) + fprintf(stderr, ": %s", msg); + + fputc('\n', stderr); + + /* This is superfluous, but whatever. */ + if (t > MANDOCERR_ERROR) + return(0); + if (cp->wflags & WARN_WERR) { + with_error = 1; + return(0); + } + return(1); +} diff --git a/commands/mdocml/main.h b/commands/mdocml/main.h new file mode 100644 index 000000000..c48d00535 --- /dev/null +++ b/commands/mdocml/main.h @@ -0,0 +1,55 @@ +/* $Id: main.h,v 1.7 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MAIN_H +#define MAIN_H + +__BEGIN_DECLS + +struct mdoc; +struct man; + +#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) + + +/* + * Definitions for main.c-visible output device functions, e.g., -Thtml + * and -Tascii. Note that ascii_alloc() is named as such in + * anticipation of latin1_alloc() and so on, all of which map into the + * terminal output routines with different character settings. + */ + +void *html_alloc(char *); +void *xhtml_alloc(char *); +void html_mdoc(void *, const struct mdoc *); +void html_man(void *, const struct man *); +void html_free(void *); + +void tree_mdoc(void *, const struct mdoc *); +void tree_man(void *, const struct man *); + +void *ascii_alloc(char *); +void ascii_free(void *); + +void *ps_alloc(void); +void ps_free(void *); + +void terminal_mdoc(void *, const struct mdoc *); +void terminal_man(void *, const struct man *); + +__END_DECLS + +#endif /*!MAIN_H*/ diff --git a/commands/mdocml/man.3 b/commands/mdocml/man.3 new file mode 100644 index 000000000..082914473 --- /dev/null +++ b/commands/mdocml/man.3 @@ -0,0 +1,324 @@ +.\" $Id: man.3,v 1.18 2010/05/25 22:16:59 kristaps Exp $ +.\" +.\" Copyright (c) 2009-2010 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 25 2010 $ +.Dt MAN 3 +.Os +.Sh NAME +.Nm man , +.Nm man_alloc , +.Nm man_endparse , +.Nm man_free , +.Nm man_meta , +.Nm man_node , +.Nm man_parseln , +.Nm man_reset +.Nd man macro compiler library +.Sh SYNOPSIS +.In mandoc.h +.In man.h +.Vt extern const char * const * man_macronames; +.Ft "struct man *" +.Fn man_alloc "void *data" "int pflags" "mandocmsg msgs" +.Ft int +.Fn man_endparse "struct man *man" +.Ft void +.Fn man_free "struct man *man" +.Ft "const struct man_meta *" +.Fn man_meta "const struct man *man" +.Ft "const struct man_node *" +.Fn man_node "const struct man *man" +.Ft int +.Fn man_parseln "struct man *man" "int line" "char *buf" +.Ft void +.Fn man_reset "struct man *man" +.Sh DESCRIPTION +The +.Nm +library parses lines of +.Xr man 7 +input into an abstract syntax tree (AST). +.Pp +In general, applications initiate a parsing sequence with +.Fn man_alloc , +parse each line in a document with +.Fn man_parseln , +close the parsing session with +.Fn man_endparse , +operate over the syntax tree returned by +.Fn man_node +and +.Fn man_meta , +then free all allocated memory with +.Fn man_free . +The +.Fn man_reset +function may be used in order to reset the parser for another input +sequence. +See the +.Sx EXAMPLES +section for a full example. +.Pp +Beyond the full set of macros defined in +.Xr man 7 , +the +.Nm +library also accepts the following macros: +.Pp +.Bl -tag -width Ds -compact +.It PD +Has no effect. Handled as a current-scope line macro. +.It Sp +A synonym for +.Sq sp 0.5v +.Pq part of the standard preamble for Perl documentation . +Handled as a line macro. +.It Vb +A synonym for +.Sq nf +.Pq part of the standard preamble for Perl documentation . +Handled as a current-scope line macro. +.It Ve +A synonym for +.Sq fi , +closing +.Sq Vb +.Pq part of the standard preamble for Perl documentation . +Handled as a current-scope line macro. +.El +.Pp +Furthermore, the following escapes are accepted to allow +.Xr pod2man 1 +documents to be correctly formatted: +\e*(-- (dash), +\e*(PI (pi), +\e*(L" (left double-quote), +\e*(R" (right double-quote), +\e*(C+ (C++), +\e*(C` (left single-quote), +\e*(C' (right single-quote), +\e*(Aq (apostrophe), +\e*^ (hat), +\e*, (comma), +\e*~ (tilde), +\e*/ (forward slash), +\e*: (umlaut), +\e*8 (beta), +\e*o (degree), +\e*(D- (Eth), +\e*(d- (eth), +\e*(Th (Thorn), +and +\e*(th (thorn). +.Sh REFERENCE +This section further defines the +.Sx Types , +.Sx Functions +and +.Sx Variables +available to programmers. +Following that, the +.Sx Abstract Syntax Tree +section documents the output tree. +.Ss Types +Both functions (see +.Sx Functions ) +and variables (see +.Sx Variables ) +may use the following types: +.Bl -ohang +.It Vt struct man +An opaque type defined in +.Pa man.c . +Its values are only used privately within the library. +.It Vt mandocmsg +A function callback type defined in +.Pa mandoc.h . +.It Vt struct man_node +A parsed node. +Defined in +.Pa man.h . +See +.Sx Abstract Syntax Tree +for details. +.El +.Ss Functions +Function descriptions follow: +.Bl -ohang +.It Fn man_alloc +Allocates a parsing structure. +The +.Fa data +pointer is passed to +.Fa msgs . +The +.Fa pflags +arguments are defined in +.Pa man.h . +Returns NULL on failure. +If non-NULL, the pointer must be freed with +.Fn man_free . +.It Fn man_reset +Reset the parser for another parse routine. +After its use, +.Fn man_parseln +behaves as if invoked for the first time. +.It Fn man_free +Free all resources of a parser. +The pointer is no longer valid after invocation. +.It Fn man_parseln +Parse a nil-terminated line of input. +This line should not contain the trailing newline. +Returns 0 on failure, 1 on success. +The input buffer +.Fa buf +is modified by this function. +.It Fn man_endparse +Signals that the parse is complete. +Note that if +.Fn man_endparse +is called subsequent to +.Fn man_node , +the resulting tree is incomplete. +Returns 0 on failure, 1 on success. +.It Fn man_node +Returns the first node of the parse. +Note that if +.Fn man_parseln +or +.Fn man_endparse +return 0, the tree will be incomplete. +.It Fn man_meta +Returns the document's parsed meta-data. +If this information has not yet been supplied or +.Fn man_parseln +or +.Fn man_endparse +return 0, the data will be incomplete. +.El +.Ss Variables +The following variables are also defined: +.Bl -ohang +.It Va man_macronames +An array of string-ified token names. +.El +.Ss Abstract Syntax Tree +The +.Nm +functions produce an abstract syntax tree (AST) describing input in a +regular form. +It may be reviewed at any time with +.Fn man_nodes ; +however, if called before +.Fn man_endparse , +or after +.Fn man_endparse +or +.Fn man_parseln +fail, it may be incomplete. +.Pp +This AST is governed by the ontological rules dictated in +.Xr man 7 +and derives its terminology accordingly. +.Pp +The AST is composed of +.Vt struct man_node +nodes with element, root and text types as declared by the +.Va type +field. +Each node also provides its parse point (the +.Va line , +.Va sec , +and +.Va pos +fields), its position in the tree (the +.Va parent , +.Va child , +.Va next +and +.Va prev +fields) and some type-specific data. +.Pp +The tree itself is arranged according to the following normal form, +where capitalised non-terminals represent nodes. +.Pp +.Bl -tag -width "ELEMENTXX" -compact +.It ROOT +\(<- mnode+ +.It mnode +\(<- ELEMENT | TEXT | BLOCK +.It BLOCK +\(<- HEAD BODY +.It HEAD +\(<- mnode* +.It BODY +\(<- mnode* +.It ELEMENT +\(<- ELEMENT | TEXT* +.It TEXT +\(<- [[:alpha:]]* +.El +.Pp +The only elements capable of nesting other elements are those with +next-lint scope as documented in +.Xr man 7 . +.Sh EXAMPLES +The following example reads lines from stdin and parses them, operating +on the finished parse tree with +.Fn parsed . +This example does not error-check nor free memory upon failure. +.Bd -literal -offset indent +struct man *man; +struct man_node *node; +char *buf; +size_t len; +int line; + +line = 1; +man = man_alloc(NULL, 0, NULL); +buf = NULL; +alloc_len = 0; + +while ((len = getline(&buf, &alloc_len, stdin)) >= 0) { + if (len && buflen[len - 1] = '\en') + buf[len - 1] = '\e0'; + if ( ! man_parseln(man, line, buf)) + errx(1, "man_parseln"); + line++; +} + +free(buf); + +if ( ! man_endparse(man)) + errx(1, "man_endparse"); +if (NULL == (node = man_node(man))) + errx(1, "man_node"); + +parsed(man, node); +man_free(man); +.Ed +.Pp +Please see +.Pa main.c +in the source archive for a rigorous reference. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr man 7 +.Sh AUTHORS +The +.Nm +library was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . diff --git a/commands/mdocml/man.7 b/commands/mdocml/man.7 new file mode 100644 index 000000000..39006ef3a --- /dev/null +++ b/commands/mdocml/man.7 @@ -0,0 +1,968 @@ +.\" $Id: man.7,v 1.74 2010/05/26 14:03:54 kristaps Exp $ +.\" +.\" Copyright (c) 2009 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 26 2010 $ +.Dt MAN 7 +.Os +.Sh NAME +.Nm man +.Nd man language reference +.Sh DESCRIPTION +The +.Nm man +language was historically used to format +.Ux +manuals. +This reference document describes its syntax, structure, and usage. +.Pp +.Bf -emphasis +Do not use +.Nm +to write your manuals. +.Ef +Use the +.Xr mdoc 7 +language, instead. +.Pp +An +.Nm +document follows simple rules: lines beginning with the control +character +.Sq \&. +are parsed for macros. +Other lines are interpreted within the scope of +prior macros: +.Bd -literal -offset indent +\&.SH Macro lines change control state. +Other lines are interpreted within the current state. +.Ed +.Sh INPUT ENCODING +.Nm +documents may contain only graphable 7-bit ASCII characters, the +space character, and the tabs character. +All manuals must have +.Ux +line termination. +.Pp +Blank lines are acceptable; where found, the output will assert a +vertical space. +.Ss Comments +Text following a +.Sq \e\*" , +whether in a macro or free-form text line, is ignored to the end of +line. +A macro line with only a control character and comment escape, +.Sq \&.\e" , +is also ignored. +Macro lines with only a control character and optionally whitespace are +stripped from input. +.Ss Special Characters +Special characters may occur in both macro and free-form lines. +Sequences begin with the escape character +.Sq \e +followed by either an open-parenthesis +.Sq \&( +for two-character sequences; an open-bracket +.Sq \&[ +for n-character sequences (terminated at a close-bracket +.Sq \&] ) ; +or a single one-character sequence. +See +.Xr mandoc_char 7 +for a complete list. +Examples include +.Sq \e(em +.Pq em-dash +and +.Sq \ee +.Pq back-slash . +.Ss Text Decoration +Terms may be text-decorated using the +.Sq \ef +escape followed by an indicator: B (bold), I, (italic), R (Roman), or P +(revert to previous mode): +.Pp +.D1 \efBbold\efR \efIitalic\efP +.Pp +A numerical representation 3, 2, or 1 (bold, italic, and Roman, +respectively) may be used instead. +A text decoration is only valid, if specified in free-form text, until +the next macro invocation; if specified within a macro, it's only valid +until the macro closes scope. +Note that macros like +.Sx \&BR +open and close a font scope with each argument. +.Pp +Text may also be sized with the +.Sq \es +escape, whose syntax is one of +.Sq \es+-n +for one-digit numerals; +.Sq \es(+-nn +or +.Sq \es+-(nn +for two-digit numerals; and +.Sq \es[+-N] , +.Sq \es+-[N] , +.Sq \es'+-N' , +or +.Sq \es+-'N' +for arbitrary-digit numerals: +.Pp +.D1 \es+1bigger\es-1 +.D1 \es[+10]much bigger\es[-10] +.D1 \es+(10much bigger\es-(10 +.D1 \es+'100'much much bigger\es-'100' +.Pp +Both +.Sq \es +and +.Sq \ef +attributes are forgotten when entering or exiting a macro block. +.Ss Whitespace +Whitespace consists of the space character. +In free-form lines, whitespace is preserved within a line; un-escaped +trailing spaces are stripped from input (unless in a literal context). +Blank free-form lines, which may include spaces, are permitted and +rendered as an empty line. +.Pp +In macro lines, whitespace delimits arguments and is discarded. +If arguments are quoted, whitespace within the quotes is retained. +.Ss Dates +The +.Sx \&TH +macro is the only +.Nm +macro that requires a date. +The form for this date is the ISO-8601 +standard +.Cm YYYY-MM-DD . +.Ss Scaling Widths +Many macros support scaled widths for their arguments, such as +stipulating a two-inch paragraph indentation with the following: +.Bd -literal -offset indent +\&.HP 2i +.Ed +.Pp +The syntax for scaled widths is +.Sq Li [+-]?[0-9]*.[0-9]*[:unit:]? , +where a decimal must be preceded or proceeded by at least one digit. +Negative numbers, while accepted, are truncated to zero. +The following scaling units are accepted: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It c +centimetre +.It i +inch +.It P +pica (~1/6 inch) +.It p +point (~1/72 inch) +.It f +synonym for +.Sq u +.It v +default vertical span +.It m +width of rendered +.Sq m +.Pq em +character +.It n +width of rendered +.Sq n +.Pq en +character +.It u +default horizontal span +.It M +mini-em (~1/100 em) +.El +.Pp +Using anything other than +.Sq m , +.Sq n , +.Sq u , +or +.Sq v +is necessarily non-portable across output media. +.Pp +If a scaling unit is not provided, the numerical value is interpreted +under the default rules of +.Sq v +for vertical spaces and +.Sq u +for horizontal ones. +.Em Note : +this differs from +.Xr mdoc 7 , +which, if a unit is not provided, will instead interpret the string as +literal text. +.Ss Sentence Spacing +When composing a manual, make sure that your sentences end at the end of +a line. +By doing so, front-ends will be able to apply the proper amount of +spacing after the end of sentence (unescaped) period, exclamation mark, +or question mark followed by zero or more non-sentence closing +delimiters ( +.Ns Sq \&) , +.Sq \&] , +.Sq \&' , +.Sq \&" ) . +.Sh MANUAL STRUCTURE +Each +.Nm +document must contain contains at least the +.Sx \&TH +macro describing the document's section and title. +It may occur anywhere in the document, although conventionally, it +appears as the first macro. +.Pp +Beyond +.Sx \&TH , +at least one macro or text node must appear in the document. +Documents are generally structured as follows: +.Bd -literal -offset indent +\&.TH FOO 1 2009-10-10 +\&. +\&.SH NAME +\efBfoo\efR \e(en a description goes here +\&.\e\*q The next is for sections 2 & 3 only. +\&.\e\*q .SH LIBRARY +\&. +\&.SH SYNOPSIS +\efBfoo\efR [\efB\e-options\efR] arguments... +\&. +\&.SH DESCRIPTION +The \efBfoo\efR utility processes files... +\&. +\&.\e\*q .SH IMPLEMENTATION NOTES +\&.\e\*q The next is for sections 2, 3, & 9 only. +\&.\e\*q .SH RETURN VALUES +\&.\e\*q The next is for sections 1, 6, 7, & 8 only. +\&.\e\*q .SH ENVIRONMENT +\&.\e\*q .SH FILES +\&.\e\*q The next is for sections 1 & 8 only. +\&.\e\*q .SH EXIT STATUS +\&.\e\*q .SH EXAMPLES +\&.\e\*q The next is for sections 1, 4, 6, 7, & 8 only. +\&.\e\*q .SH DIAGNOSTICS +\&.\e\*q The next is for sections 2, 3, & 9 only. +\&.\e\*q .SH ERRORS +\&.\e\*q .SH SEE ALSO +\&.\e\*q .BR foo ( 1 ) +\&.\e\*q .SH STANDARDS +\&.\e\*q .SH HISTORY +\&.\e\*q .SH AUTHORS +\&.\e\*q .SH CAVEATS +\&.\e\*q .SH BUGS +\&.\e\*q .SH SECURITY CONSIDERATIONS +.Ed +.Pp +The sections in a +.Nm +document are conventionally ordered as they appear above. +Sections should be composed as follows: +.Bl -ohang -offset indent +.It Em NAME +The name(s) and a short description of the documented material. +The syntax for this is generally as follows: +.Pp +.D1 \efBname\efR \e(en description +.It Em LIBRARY +The name of the library containing the documented material, which is +assumed to be a function in a section 2 or 3 manual. +For functions in the C library, this may be as follows: +.Pp +.D1 Standard C Library (libc, -lc) +.It Em SYNOPSIS +Documents the utility invocation syntax, function call syntax, or device +configuration. +.Pp +For the first, utilities (sections 1, 6, and 8), this is +generally structured as follows: +.Pp +.D1 \efBname\efR [-\efBab\efR] [-\efBc\efR\efIarg\efR] \efBpath\efR... +.Pp +For the second, function calls (sections 2, 3, 9): +.Pp +.D1 \&.B char *name(char *\efIarg\efR); +.Pp +And for the third, configurations (section 4): +.Pp +.D1 \&.B name* at cardbus ? function ? +.Pp +Manuals not in these sections generally don't need a +.Em SYNOPSIS . +.It Em DESCRIPTION +This expands upon the brief, one-line description in +.Em NAME . +It usually contains a break-down of the options (if documenting a +command). +.It Em IMPLEMENTATION NOTES +Implementation-specific notes should be kept here. +This is useful when implementing standard functions that may have side +effects or notable algorithmic implications. +.It Em RETURN VALUES +This section is the dual of +.Em EXIT STATUS , +which is used for commands. +It documents the return values of functions in sections 2, 3, and 9. +.It Em ENVIRONMENT +Documents any usages of environment variables, e.g., +.Xr environ 7 . +.It Em FILES +Documents files used. +It's helpful to document both the file and a short description of how +the file is used (created, modified, etc.). +.It Em EXIT STATUS +Command exit status for section 1, 6, and 8 manuals. +This section is the dual of +.Em RETURN VALUES , +which is used for functions. +Historically, this information was described in +.Em DIAGNOSTICS , +a practise that is now discouraged. +.It Em EXAMPLES +Example usages. +This often contains snippets of well-formed, +well-tested invocations. +Make doubly sure that your examples work properly! +.It Em DIAGNOSTICS +Documents error conditions. +This is most useful in section 4 manuals. +Historically, this section was used in place of +.Em EXIT STATUS +for manuals in sections 1, 6, and 8; however, this practise is +discouraged. +.It Em ERRORS +Documents error handling in sections 2, 3, and 9. +.It Em SEE ALSO +References other manuals with related topics. +This section should exist for most manuals. +.Pp +.D1 \&.BR bar \&( 1 \&), +.Pp +Cross-references should conventionally be ordered +first by section, then alphabetically. +.It Em STANDARDS +References any standards implemented or used, such as +.Pp +.D1 IEEE Std 1003.2 (\e(lqPOSIX.2\e(rq) +.Pp +If not adhering to any standards, the +.Em HISTORY +section should be used. +.It Em HISTORY +The history of any manual without a +.Em STANDARDS +section should be described in this section. +.It Em AUTHORS +Credits to authors, if applicable, should appear in this section. +Authors should generally be noted by both name and an e-mail address. +.It Em CAVEATS +Explanations of common misuses and misunderstandings should be explained +in this section. +.It Em BUGS +Extant bugs should be described in this section. +.It Em SECURITY CONSIDERATIONS +Documents any security precautions that operators should consider. +.El +.Sh MACRO SYNTAX +Macros are one to three three characters in length and begin with a +control character , +.Sq \&. , +at the beginning of the line. +The +.Sq \(aq +macro control character is also accepted. +An arbitrary amount of whitespace (spaces or tabs) may sit between the +control character and the macro name. +Thus, the following are equivalent: +.Bd -literal -offset indent +\&.PP +\&.\ \ \ PP +.Ed +.Pp +The +.Nm +macros are classified by scope: line scope or block scope. +Line macros are only scoped to the current line (and, in some +situations, the subsequent line). +Block macros are scoped to the current line and subsequent lines until +closed by another block macro. +.Ss Line Macros +Line macros are generally scoped to the current line, with the body +consisting of zero or more arguments. +If a macro is scoped to the next line and the line arguments are empty, +the next line, which must be text, is used instead. +Thus: +.Bd -literal -offset indent +\&.I +foo +.Ed +.Pp +is equivalent to +.Sq \&.I foo . +If next-line macros are invoked consecutively, only the last is used. +If a next-line macro is followed by a non-next-line macro, an error is +raised (unless in the case of +.Sx \&br , +.Sx \&sp , +or +.Sx \&na ) . +.Pp +The syntax is as follows: +.Bd -literal -offset indent +\&.YO \(lBbody...\(rB +\(lBbody...\(rB +.Ed +.Pp +.Bl -column -compact -offset indent "MacroX" "ArgumentsX" "ScopeXXXXX" "CompatX" +.It Em Macro Ta Em Arguments Ta Em Scope Ta Em Notes +.It Sx \&AT Ta <=1 Ta current Ta \& +.It Sx \&B Ta n Ta next-line Ta \& +.It Sx \&BI Ta n Ta current Ta \& +.It Sx \&BR Ta n Ta current Ta \& +.It Sx \&DT Ta 0 Ta current Ta \& +.It Sx \&I Ta n Ta next-line Ta \& +.It Sx \&IB Ta n Ta current Ta \& +.It Sx \&IR Ta n Ta current Ta \& +.\" .It Sx \&PD Ta n Ta current Ta compat +.It Sx \&R Ta n Ta next-line Ta \& +.It Sx \&RB Ta n Ta current Ta \& +.It Sx \&RI Ta n Ta current Ta \& +.It Sx \&SB Ta n Ta next-line Ta \& +.It Sx \&SM Ta n Ta next-line Ta \& +.It Sx \&TH Ta >1, <6 Ta current Ta \& +.It Sx \&UC Ta <=1 Ta current Ta \& +.It Sx \&br Ta 0 Ta current Ta compat +.It Sx \&fi Ta 0 Ta current Ta compat +.It Sx \&i Ta n Ta current Ta compat +.It Sx \&na Ta 0 Ta current Ta compat +.It Sx \&nf Ta 0 Ta current Ta compat +.It Sx \&r Ta 0 Ta current Ta compat +.It Sx \&sp Ta 1 Ta current Ta compat +.\" .It Sx \&Sp Ta <1 Ta current Ta compat +.\" .It Sx \&Vb Ta <1 Ta current Ta compat +.\" .It Sx \&Ve Ta 0 Ta current Ta compat +.El +.Pp +Macros marked as +.Qq compat +are included for compatibility with the significant corpus of existing +manuals that mix dialects of roff. +These macros should not be used for portable +.Nm +manuals. +.Ss Block Macros +Block macros are comprised of a head and body. +Like for in-line macros, the head is scoped to the current line and, in +one circumstance, the next line (the next-line stipulations as in +.Sx Line Macros +apply here as well). +.Pp +The syntax is as follows: +.Bd -literal -offset indent +\&.YO \(lBhead...\(rB +\(lBhead...\(rB +\(lBbody...\(rB +.Ed +.Pp +The closure of body scope may be to the section, where a macro is closed +by +.Sx \&SH ; +sub-section, closed by a section or +.Sx \&SS ; +part, closed by a section, sub-section, or +.Sx \&RE ; +or paragraph, closed by a section, sub-section, part, +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +.Sx \&PP , +or +.Sx \&TP . +No closure refers to an explicit block closing macro. +.Pp +As a rule, block macros may not be nested; thus, calling a block macro +while another block macro scope is open, and the open scope is not +implicitly closed, is syntactically incorrect. +.Pp +.Bl -column -compact -offset indent "MacroX" "ArgumentsX" "Head ScopeX" "sub-sectionX" "compatX" +.It Em Macro Ta Em Arguments Ta Em Head Scope Ta Em Body Scope Ta Em Notes +.It Sx \&HP Ta <2 Ta current Ta paragraph Ta \& +.It Sx \&IP Ta <3 Ta current Ta paragraph Ta \& +.It Sx \&LP Ta 0 Ta current Ta paragraph Ta \& +.It Sx \&P Ta 0 Ta current Ta paragraph Ta \& +.It Sx \&PP Ta 0 Ta current Ta paragraph Ta \& +.It Sx \&RE Ta 0 Ta current Ta none Ta compat +.It Sx \&RS Ta 1 Ta current Ta part Ta compat +.It Sx \&SH Ta >0 Ta next-line Ta section Ta \& +.It Sx \&SS Ta >0 Ta next-line Ta sub-section Ta \& +.It Sx \&TP Ta n Ta next-line Ta paragraph Ta \& +.El +.Pp +Macros marked +.Qq compat +are as mentioned in +.Sx Line Macros . +.Pp +If a block macro is next-line scoped, it may only be followed by in-line +macros for decorating text. +.Sh REFERENCE +This section is a canonical reference to all macros, arranged +alphabetically. +For the scoping of individual macros, see +.Sx MACRO SYNTAX . +.Ss \&AT +Sets the volume for the footer for compatibility with man pages from +.Tn AT&T UNIX +releases. +The optional arguments specify which release it is from. +.Ss \&B +Text is rendered in bold face. +.Pp +See also +.Sx \&I , +.Sx \&R , +.Sx \&b , +.Sx \&i , +and +.Sx \&r . +.Ss \&BI +Text is rendered alternately in bold face and italic. +Thus, +.Sq .BI this word and that +causes +.Sq this +and +.Sq and +to render in bold face, while +.Sq word +and +.Sq that +render in italics. +Whitespace between arguments is omitted in output. +.Pp +Examples: +.Pp +.D1 \&.BI bold italic bold italic +.Pp +The output of this example will be emboldened +.Dq bold +and italicised +.Dq italic , +with spaces stripped between arguments. +.Pp +See also +.Sx \&IB , +.Sx \&BR , +.Sx \&RB , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&BR +Text is rendered alternately in bold face and roman (the default font). +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&RB , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&DT +Has no effect. +Included for compatibility. +.Ss \&HP +Begin a paragraph whose initial output line is left-justified, but +subsequent output lines are indented, with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&HP +.Op Cm width +.Ed +.Pp +The +.Cm width +argument must conform to +.Sx Scaling Widths . +If specified, it's saved for later paragraph left-margins; if unspecified, the +saved or default width is used. +.Pp +See also +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&I +Text is rendered in italics. +.Pp +See also +.Sx \&B , +.Sx \&R , +.Sx \&b , +.Sx \&i , +and +.Sx \&r . +.Ss \&IB +Text is rendered alternately in italics and bold face. Whitespace +between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&BR , +.Sx \&RB , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&IP +Begin an indented paragraph with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&IP +.Op Cm head Op Cm width +.Ed +.Pp +The +.Cm width +argument defines the width of the left margin and is defined by +.Sx Scaling Widths , +It's saved for later paragraph left-margins; if unspecified, the saved or +default width is used. +.Pp +The +.Cm head +argument is used as a leading term, flushed to the left margin. +This is useful for bulleted paragraphs and so on. +.Pp +See also +.Sx \&HP , +.Sx \&LP , +.Sx \&P , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&IR +Text is rendered alternately in italics and roman (the default font). +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&BR , +.Sx \&RB , +and +.Sx \&RI . +.Ss \&LP +Begin an undecorated paragraph. +The scope of a paragraph is closed by a subsequent paragraph, +sub-section, section, or end of file. +The saved paragraph left-margin width is re-set to the default. +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&P , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&P +Synonym for +.Sx \&LP . +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&PP +Synonym for +.Sx \&LP . +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +and +.Sx \&TP . +.Ss \&R +Text is rendered in roman (the default font). +.Pp +See also +.Sx \&I , +.Sx \&B , +.Sx \&b , +.Sx \&i , +and +.Sx \&r . +.Ss \&RB +Text is rendered alternately in roman (the default font) and bold face. +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&BR , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&RE +Explicitly close out the scope of a prior +.Sx \&RS . +.Ss \&RI +Text is rendered alternately in roman (the default font) and italics. +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&BR , +.Sx \&RB , +and +.Sx \&IR . +.Ss \&RS +Begin a part setting the left margin. +The left margin controls the offset, following an initial indentation, +to un-indented text such as that of +.Sx \&PP . +This has the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&Rs +.Op Cm width +.Ed +.Pp +The +.Cm width +argument must conform to +.Sx Scaling Widths . +If not specified, the saved or default width is used. +.Ss \&SB +Text is rendered in small size (one point smaller than the default font) +bold face. +.Ss \&SH +Begin a section. +The scope of a section is only closed by another section or the end of +file. +The paragraph left-margin width is re-set to the default. +.Ss \&SM +Text is rendered in small size (one point smaller than the default +font). +.Ss \&SS +Begin a sub-section. +The scope of a sub-section is closed by a subsequent sub-section, +section, or end of file. +The paragraph left-margin width is re-set to the default. +.Ss \&TH +Sets the title of the manual page with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&TH +.Cm title section +.Op Cm date Op Cm source Op Cm volume +.Ed +.Pp +At least the upper-case document title +.Cm title +and numeric manual section +.Cm section +arguments must be provided. +The +.Cm date +argument should be formatted as described in +.Sx Dates , +but will be printed verbatim if it is not. +If the date is not specified, the current date is used. +The +.Cm source +string specifies the organisation providing the utility. +The +.Cm volume +string replaces the default rendered volume, which is dictated by the +manual section. +.Pp +Examples: +.Pp +.D1 \&.TH CVS 5 "1992-02-12" GNU +.Ss \&TP +Begin a paragraph where the head, if exceeding the indentation width, is +followed by a newline; if not, the body follows on the same line after a +buffer to the indentation width. +Subsequent output lines are indented. +The syntax is as follows: +.Bd -filled -offset indent +.Pf \. Sx \&TP +.Op Cm width +.Ed +.Pp +The +.Cm width +argument must conform to +.Sx Scaling Widths . +If specified, it's saved for later paragraph left-margins; if +unspecified, the saved or default width is used. +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +and +.Sx \&PP . +.\" . +.\" . +.\" .Ss \&PD +.\" Has no effect. Included for compatibility. +.\" . +.\" . +.Ss \&UC +Sets the volume for the footer for compatibility with man pages from +BSD releases. +The optional first argument specifies which release it is from. +.Ss \&br +Breaks the current line. +Consecutive invocations have no further effect. +.Pp +See also +.Sx \&sp . +.Ss \&fi +End literal mode begun by +.Sx \&nf . +.Ss \&i +Italicise arguments. +Synonym for +.Sx \&I . +.Pp +See also +.Sx \&B , +.Sx \&I , +.Sx \&R . +.Sx \&b , +and +.Sx \&r . +.Ss \&na +Don't align to the right margin. +.Ss \&nf +Begin literal mode: all subsequent free-form lines have their end of +line boundaries preserved. +May be ended by +.Sx \&fi . +.Ss \&r +Fonts and styles (bold face, italics) reset to roman (default font). +.Pp +See also +.Sx \&B , +.Sx \&I , +.Sx \&R , +.Sx \&b , +and +.Sx \&i . +.Ss \&sp +Insert vertical spaces into output with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&sp +.Op Cm height +.Ed +.Pp +Insert +.Cm height +spaces, which must conform to +.Sx Scaling Widths . +If 0, this is equivalent to the +.Sx \&br +macro. +Defaults to 1, if unspecified. +.Pp +See also +.Sx \&br . +.\" .Ss \&Sp +.\" A synonym for +.\" .Sx \&sp +.\" .Cm 0.5v . +.\" . +.\" .Ss \&Vb +.\" A synonym for +.\" .Sx \&nf . +.\" Accepts an argument (the height of the formatted space) which is +.\" disregarded. +.\" . +.\" .Ss \&Ve +.\" A synonym for +.\" .Sx \&fi . +.\" . +.Sh COMPATIBILITY +This section documents areas of questionable portability between +implementations of the +.Nm +language. +.Pp +.Bl -dash -compact +.It +In quoted literals, GNU troff allowed pair-wise double-quotes to produce +a standalone double-quote in formatted output. +It is not known whether this behaviour is exhibited by other formatters. +.It +The +.Sx \&sp +macro does not accept negative values in mandoc. +In GNU troff, this would result in strange behaviour. +.It +The +.Sq \(aq +macro control character, in GNU troff (and prior troffs) suppresses a +newline before macro output; in mandoc, it is an alias for the standard +.Sq \&. +control character. +.El +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc_char 7 +.Sh AUTHORS +The +.Nm +reference was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.Sh CAVEATS +Do not use this language. +Use +.Xr mdoc 7 , +instead. diff --git a/commands/mdocml/man.c b/commands/mdocml/man.c new file mode 100644 index 000000000..b77fbb2df --- /dev/null +++ b/commands/mdocml/man.c @@ -0,0 +1,677 @@ +/* $Id: man.c,v 1.76 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libman.h" +#include "libmandoc.h" + +const char *const __man_macronames[MAN_MAX] = { + "br", "TH", "SH", "SS", + "TP", "LP", "PP", "P", + "IP", "HP", "SM", "SB", + "BI", "IB", "BR", "RB", + "R", "B", "I", "IR", + "RI", "na", "i", "sp", + "nf", "fi", "r", "RE", + "RS", "DT", "UC", "PD", + "Sp", "Vb", "Ve", "AT", + }; + +const char * const *man_macronames = __man_macronames; + +static struct man_node *man_node_alloc(int, int, + enum man_type, enum mant); +static int man_node_append(struct man *, + struct man_node *); +static void man_node_free(struct man_node *); +static void man_node_unlink(struct man *, + struct man_node *); +static int man_ptext(struct man *, int, char *, int); +static int man_pmacro(struct man *, int, char *, int); +static void man_free1(struct man *); +static void man_alloc1(struct man *); +static int macrowarn(struct man *, int, const char *, int); + + +const struct man_node * +man_node(const struct man *m) +{ + + return(MAN_HALT & m->flags ? NULL : m->first); +} + + +const struct man_meta * +man_meta(const struct man *m) +{ + + return(MAN_HALT & m->flags ? NULL : &m->meta); +} + + +void +man_reset(struct man *man) +{ + + man_free1(man); + man_alloc1(man); +} + + +void +man_free(struct man *man) +{ + + man_free1(man); + free(man); +} + + +struct man * +man_alloc(void *data, int pflags, mandocmsg msg) +{ + struct man *p; + + p = mandoc_calloc(1, sizeof(struct man)); + + man_hash_init(); + p->data = data; + p->pflags = pflags; + p->msg = msg; + + man_alloc1(p); + return(p); +} + + +int +man_endparse(struct man *m) +{ + + if (MAN_HALT & m->flags) + return(0); + else if (man_macroend(m)) + return(1); + m->flags |= MAN_HALT; + return(0); +} + + +int +man_parseln(struct man *m, int ln, char *buf, int offs) +{ + + if (MAN_HALT & m->flags) + return(0); + + return(('.' == buf[offs] || '\'' == buf[offs]) ? + man_pmacro(m, ln, buf, offs) : + man_ptext(m, ln, buf, offs)); +} + + +static void +man_free1(struct man *man) +{ + + if (man->first) + man_node_delete(man, man->first); + if (man->meta.title) + free(man->meta.title); + if (man->meta.source) + free(man->meta.source); + if (man->meta.rawdate) + free(man->meta.rawdate); + if (man->meta.vol) + free(man->meta.vol); + if (man->meta.msec) + free(man->meta.msec); +} + + +static void +man_alloc1(struct man *m) +{ + + memset(&m->meta, 0, sizeof(struct man_meta)); + m->flags = 0; + m->last = mandoc_calloc(1, sizeof(struct man_node)); + m->first = m->last; + m->last->type = MAN_ROOT; + m->last->tok = MAN_MAX; + m->next = MAN_NEXT_CHILD; +} + + +static int +man_node_append(struct man *man, struct man_node *p) +{ + + assert(man->last); + assert(man->first); + assert(MAN_ROOT != p->type); + + switch (man->next) { + case (MAN_NEXT_SIBLING): + man->last->next = p; + p->prev = man->last; + p->parent = man->last->parent; + break; + case (MAN_NEXT_CHILD): + man->last->child = p; + p->parent = man->last; + break; + default: + abort(); + /* NOTREACHED */ + } + + assert(p->parent); + p->parent->nchild++; + + if ( ! man_valid_pre(man, p)) + return(0); + + switch (p->type) { + case (MAN_HEAD): + assert(MAN_BLOCK == p->parent->type); + p->parent->head = p; + break; + case (MAN_BODY): + assert(MAN_BLOCK == p->parent->type); + p->parent->body = p; + break; + default: + break; + } + + man->last = p; + + switch (p->type) { + case (MAN_TEXT): + if ( ! man_valid_post(man)) + return(0); + if ( ! man_action_post(man)) + return(0); + break; + default: + break; + } + + return(1); +} + + +static struct man_node * +man_node_alloc(int line, int pos, enum man_type type, enum mant tok) +{ + struct man_node *p; + + p = mandoc_calloc(1, sizeof(struct man_node)); + p->line = line; + p->pos = pos; + p->type = type; + p->tok = tok; + return(p); +} + + +int +man_elem_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_ELEM, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_head_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_HEAD, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_body_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_BODY, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_block_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_BLOCK, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_word_alloc(struct man *m, int line, int pos, const char *word) +{ + struct man_node *n; + size_t sv, len; + + len = strlen(word); + + n = man_node_alloc(line, pos, MAN_TEXT, MAN_MAX); + n->string = mandoc_malloc(len + 1); + sv = strlcpy(n->string, word, len + 1); + + /* Prohibit truncation. */ + assert(sv < len + 1); + + if ( ! man_node_append(m, n)) + return(0); + + m->next = MAN_NEXT_SIBLING; + return(1); +} + + +/* + * Free all of the resources held by a node. This does NOT unlink a + * node from its context; for that, see man_node_unlink(). + */ +static void +man_node_free(struct man_node *p) +{ + + if (p->string) + free(p->string); + free(p); +} + + +void +man_node_delete(struct man *m, struct man_node *p) +{ + + while (p->child) + man_node_delete(m, p->child); + + man_node_unlink(m, p); + man_node_free(p); +} + + +static int +man_ptext(struct man *m, int line, char *buf, int offs) +{ + int i; + + /* Ignore bogus comments. */ + + if ('\\' == buf[offs] && + '.' == buf[offs + 1] && + '"' == buf[offs + 2]) + return(man_pmsg(m, line, offs, MANDOCERR_BADCOMMENT)); + + /* Literal free-form text whitespace is preserved. */ + + if (MAN_LITERAL & m->flags) { + if ( ! man_word_alloc(m, line, offs, buf + offs)) + return(0); + goto descope; + } + + /* Pump blank lines directly into the backend. */ + + for (i = offs; ' ' == buf[i]; i++) + /* Skip leading whitespace. */ ; + + if ('\0' == buf[i]) { + /* Allocate a blank entry. */ + if ( ! man_word_alloc(m, line, offs, "")) + return(0); + goto descope; + } + + /* + * Warn if the last un-escaped character is whitespace. Then + * strip away the remaining spaces (tabs stay!). + */ + + i = (int)strlen(buf); + assert(i); + + if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { + if (i > 1 && '\\' != buf[i - 2]) + if ( ! man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE)) + return(0); + + for (--i; i && ' ' == buf[i]; i--) + /* Spin back to non-space. */ ; + + /* Jump ahead of escaped whitespace. */ + i += '\\' == buf[i] ? 2 : 1; + + buf[i] = '\0'; + } + + if ( ! man_word_alloc(m, line, offs, buf + offs)) + return(0); + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(i); + if (mandoc_eos(buf, (size_t)i)) + m->last->flags |= MAN_EOS; + +descope: + /* + * Co-ordinate what happens with having a next-line scope open: + * first close out the element scope (if applicable), then close + * out the block scope (also if applicable). + */ + + if (MAN_ELINE & m->flags) { + m->flags &= ~MAN_ELINE; + if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) + return(0); + } + + if ( ! (MAN_BLINE & m->flags)) + return(1); + m->flags &= ~MAN_BLINE; + + if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) + return(0); + return(man_body_alloc(m, line, offs, m->last->tok)); +} + + +static int +macrowarn(struct man *m, int ln, const char *buf, int offs) +{ + int rc; + + rc = man_vmsg(m, MANDOCERR_MACRO, ln, offs, + "unknown macro: %s%s", + buf, strlen(buf) > 3 ? "..." : ""); + + return(MAN_IGN_MACRO & m->pflags ? rc : 0); +} + + +static int +man_pmacro(struct man *m, int ln, char *buf, int offs) +{ + int i, j, ppos; + enum mant tok; + char mac[5]; + struct man_node *n; + + /* Comments and empties are quickly ignored. */ + + offs++; + + if ('\0' == buf[offs]) + return(1); + + i = offs; + + /* + * Skip whitespace between the control character and initial + * text. "Whitespace" is both spaces and tabs. + */ + + if (' ' == buf[i] || '\t' == buf[i]) { + i++; + while (buf[i] && (' ' == buf[i] || '\t' == buf[i])) + i++; + if ('\0' == buf[i]) + goto out; + } + + ppos = i; + + /* Copy the first word into a nil-terminated buffer. */ + + for (j = 0; j < 4; j++, i++) { + if ('\0' == (mac[j] = buf[i])) + break; + else if (' ' == buf[i]) + break; + + /* Check for invalid characters. */ + + if (isgraph((u_char)buf[i])) + continue; + if ( ! man_pmsg(m, ln, i, MANDOCERR_BADCHAR)) + return(0); + i--; + } + + mac[j] = '\0'; + + if (j == 4 || j < 1) { + if ( ! macrowarn(m, ln, mac, ppos)) + goto err; + return(1); + } + + if (MAN_MAX == (tok = man_hash_find(mac))) { + if ( ! macrowarn(m, ln, mac, ppos)) + goto err; + return(1); + } + + /* The macro is sane. Jump to the next word. */ + + while (buf[i] && ' ' == buf[i]) + i++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if ('\0' == buf[i] && ' ' == buf[i - 1]) + if ( ! man_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE)) + goto err; + + /* + * Remove prior ELINE macro, as it's being clobbering by a new + * macro. Note that NSCOPED macros do not close out ELINE + * macros---they don't print text---so we let those slip by. + */ + + if ( ! (MAN_NSCOPED & man_macros[tok].flags) && + m->flags & MAN_ELINE) { + assert(MAN_TEXT != m->last->type); + + /* + * This occurs in the following construction: + * .B + * .br + * .B + * .br + * I hate man macros. + * Flat-out disallow this madness. + */ + if (MAN_NSCOPED & man_macros[m->last->tok].flags) { + man_pmsg(m, ln, ppos, MANDOCERR_SYNTLINESCOPE); + return(0); + } + + n = m->last; + + assert(n); + assert(NULL == n->child); + assert(0 == n->nchild); + + if ( ! man_nmsg(m, n, MANDOCERR_LINESCOPE)) + return(0); + + man_node_delete(m, n); + m->flags &= ~MAN_ELINE; + } + + /* + * Save the fact that we're in the next-line for a block. In + * this way, embedded roff instructions can "remember" state + * when they exit. + */ + + if (MAN_BLINE & m->flags) + m->flags |= MAN_BPLINE; + + /* Call to handler... */ + + assert(man_macros[tok].fp); + if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &i, buf)) + goto err; + +out: + /* + * We weren't in a block-line scope when entering the + * above-parsed macro, so return. + */ + + if ( ! (MAN_BPLINE & m->flags)) { + m->flags &= ~MAN_ILINE; + return(1); + } + m->flags &= ~MAN_BPLINE; + + /* + * If we're in a block scope, then allow this macro to slip by + * without closing scope around it. + */ + + if (MAN_ILINE & m->flags) { + m->flags &= ~MAN_ILINE; + return(1); + } + + /* + * If we've opened a new next-line element scope, then return + * now, as the next line will close out the block scope. + */ + + if (MAN_ELINE & m->flags) + return(1); + + /* Close out the block scope opened in the prior line. */ + + assert(MAN_BLINE & m->flags); + m->flags &= ~MAN_BLINE; + + if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) + return(0); + return(man_body_alloc(m, ln, offs, m->last->tok)); + +err: /* Error out. */ + + m->flags |= MAN_HALT; + return(0); +} + + +int +man_vmsg(struct man *man, enum mandocerr t, + int ln, int pos, const char *fmt, ...) +{ + char buf[256]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf) - 1, fmt, ap); + va_end(ap); + return((*man->msg)(t, man->data, ln, pos, buf)); +} + + +/* + * Unlink a node from its context. If "m" is provided, the last parse + * point will also be adjusted accordingly. + */ +static void +man_node_unlink(struct man *m, struct man_node *n) +{ + + /* Adjust siblings. */ + + if (n->prev) + n->prev->next = n->next; + if (n->next) + n->next->prev = n->prev; + + /* Adjust parent. */ + + if (n->parent) { + n->parent->nchild--; + if (n->parent->child == n) + n->parent->child = n->prev ? n->prev : n->next; + } + + /* Adjust parse point, if applicable. */ + + if (m && m->last == n) { + /*XXX: this can occur when bailing from validation. */ + /*assert(NULL == n->next);*/ + if (n->prev) { + m->last = n->prev; + m->next = MAN_NEXT_SIBLING; + } else { + m->last = n->parent; + m->next = MAN_NEXT_CHILD; + } + } + + if (m && m->first == n) + m->first = NULL; +} diff --git a/commands/mdocml/man.h b/commands/mdocml/man.h new file mode 100644 index 000000000..46ec7f8e7 --- /dev/null +++ b/commands/mdocml/man.h @@ -0,0 +1,119 @@ +/* $Id: man.h,v 1.37 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MAN_H +#define MAN_H + +#include + +enum mant { + MAN_br = 0, + MAN_TH, + MAN_SH, + MAN_SS, + MAN_TP, + MAN_LP, + MAN_PP, + MAN_P, + MAN_IP, + MAN_HP, + MAN_SM, + MAN_SB, + MAN_BI, + MAN_IB, + MAN_BR, + MAN_RB, + MAN_R, + MAN_B, + MAN_I, + MAN_IR, + MAN_RI, + MAN_na, + MAN_i, + MAN_sp, + MAN_nf, + MAN_fi, + MAN_r, + MAN_RE, + MAN_RS, + MAN_DT, + MAN_UC, + MAN_PD, + MAN_Sp, + MAN_Vb, + MAN_Ve, + MAN_AT, + MAN_MAX +}; + +enum man_type { + MAN_TEXT, + MAN_ELEM, + MAN_ROOT, + MAN_BLOCK, + MAN_HEAD, + MAN_BODY +}; + +struct man_meta { + char *msec; + time_t date; + char *rawdate; + char *vol; + char *title; + char *source; +}; + +struct man_node { + struct man_node *parent; + struct man_node *child; + struct man_node *next; + struct man_node *prev; + int nchild; + int line; + int pos; + enum mant tok; + int flags; +#define MAN_VALID (1 << 0) +#define MAN_ACTED (1 << 1) +#define MAN_EOS (1 << 2) + enum man_type type; + char *string; + struct man_node *head; + struct man_node *body; +}; + +#define MAN_IGN_MACRO (1 << 0) +#define MAN_IGN_ESCAPE (1 << 2) + +extern const char *const *man_macronames; + +__BEGIN_DECLS + +struct man; + +void man_free(struct man *); +struct man *man_alloc(void *, int, mandocmsg); +void man_reset(struct man *); +int man_parseln(struct man *, int, char *, int); +int man_endparse(struct man *); + +const struct man_node *man_node(const struct man *); +const struct man_meta *man_meta(const struct man *); + +__END_DECLS + +#endif /*!MAN_H*/ diff --git a/commands/mdocml/man_action.c b/commands/mdocml/man_action.c new file mode 100644 index 000000000..e63d342b4 --- /dev/null +++ b/commands/mdocml/man_action.c @@ -0,0 +1,280 @@ +/* $Id: man_action.c,v 1.39 2010/05/26 14:03:54 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "mandoc.h" +#include "libman.h" +#include "libmandoc.h" + +struct actions { + int (*post)(struct man *); +}; + +static int post_TH(struct man *); +static int post_fi(struct man *); +static int post_nf(struct man *); +static int post_AT(struct man *); +static int post_UC(struct man *); + +const struct actions man_actions[MAN_MAX] = { + { NULL }, /* br */ + { post_TH }, /* TH */ + { NULL }, /* SH */ + { NULL }, /* SS */ + { NULL }, /* TP */ + { NULL }, /* LP */ + { NULL }, /* PP */ + { NULL }, /* P */ + { NULL }, /* IP */ + { NULL }, /* HP */ + { NULL }, /* SM */ + { NULL }, /* SB */ + { NULL }, /* BI */ + { NULL }, /* IB */ + { NULL }, /* BR */ + { NULL }, /* RB */ + { NULL }, /* R */ + { NULL }, /* B */ + { NULL }, /* I */ + { NULL }, /* IR */ + { NULL }, /* RI */ + { NULL }, /* na */ + { NULL }, /* i */ + { NULL }, /* sp */ + { post_nf }, /* nf */ + { post_fi }, /* fi */ + { NULL }, /* r */ + { NULL }, /* RE */ + { NULL }, /* RS */ + { NULL }, /* DT */ + { post_UC }, /* UC */ + { NULL }, /* PD */ + { NULL }, /* Sp */ + { post_nf }, /* Vb */ + { post_fi }, /* Ve */ + { post_AT }, /* AT */ +}; + + +int +man_action_post(struct man *m) +{ + + if (MAN_ACTED & m->last->flags) + return(1); + m->last->flags |= MAN_ACTED; + + switch (m->last->type) { + case (MAN_TEXT): + /* FALLTHROUGH */ + case (MAN_ROOT): + return(1); + default: + break; + } + + if (NULL == man_actions[m->last->tok].post) + return(1); + return((*man_actions[m->last->tok].post)(m)); +} + + +static int +post_fi(struct man *m) +{ + + if ( ! (MAN_LITERAL & m->flags)) + if ( ! man_nmsg(m, m->last, MANDOCERR_NOSCOPE)) + return(0); + m->flags &= ~MAN_LITERAL; + return(1); +} + + +static int +post_nf(struct man *m) +{ + + if (MAN_LITERAL & m->flags) + if ( ! man_nmsg(m, m->last, MANDOCERR_SCOPEREP)) + return(0); + m->flags |= MAN_LITERAL; + return(1); +} + + +static int +post_TH(struct man *m) +{ + struct man_node *n; + + if (m->meta.title) + free(m->meta.title); + if (m->meta.vol) + free(m->meta.vol); + if (m->meta.source) + free(m->meta.source); + if (m->meta.msec) + free(m->meta.msec); + if (m->meta.rawdate) + free(m->meta.rawdate); + + m->meta.title = m->meta.vol = m->meta.rawdate = + m->meta.msec = m->meta.source = NULL; + m->meta.date = 0; + + /* ->TITLE<- MSEC DATE SOURCE VOL */ + + n = m->last->child; + assert(n); + m->meta.title = mandoc_strdup(n->string); + + /* TITLE ->MSEC<- DATE SOURCE VOL */ + + n = n->next; + assert(n); + m->meta.msec = mandoc_strdup(n->string); + + /* TITLE MSEC ->DATE<- SOURCE VOL */ + + /* + * Try to parse the date. If this works, stash the epoch (this + * is optimal because we can reformat it in the canonical form). + * If it doesn't parse, isn't specified at all, or is an empty + * string, then use the current date. + */ + + n = n->next; + if (n && n->string && *n->string) { + m->meta.date = mandoc_a2time + (MTIME_ISO_8601, n->string); + if (0 == m->meta.date) { + if ( ! man_nmsg(m, n, MANDOCERR_BADDATE)) + return(0); + m->meta.rawdate = mandoc_strdup(n->string); + } + } else + m->meta.date = time(NULL); + + /* TITLE MSEC DATE ->SOURCE<- VOL */ + + if (n && (n = n->next)) + m->meta.source = mandoc_strdup(n->string); + + /* TITLE MSEC DATE SOURCE ->VOL<- */ + + if (n && (n = n->next)) + m->meta.vol = mandoc_strdup(n->string); + + /* + * Remove the `TH' node after we've processed it for our + * meta-data. + */ + man_node_delete(m, m->last); + return(1); +} + + +static int +post_AT(struct man *m) +{ + static const char * const unix_versions[] = { + "7th Edition", + "System III", + "System V", + "System V Release 2", + }; + + const char *p, *s; + struct man_node *n, *nn; + + n = m->last->child; + + if (NULL == n || MAN_TEXT != n->type) + p = unix_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = unix_versions[0]; + else if (0 == strcmp(s, "4")) + p = unix_versions[1]; + else if (0 == strcmp(s, "5")) { + nn = n->next; + if (nn && MAN_TEXT == nn->type && nn->string[0]) + p = unix_versions[3]; + else + p = unix_versions[2]; + } else + p = unix_versions[0]; + } + + if (m->meta.source) + free(m->meta.source); + + m->meta.source = mandoc_strdup(p); + + return(1); +} + + +static int +post_UC(struct man *m) +{ + static const char * const bsd_versions[] = { + "3rd Berkeley Distribution", + "4th Berkeley Distribution", + "4.2 Berkeley Distribution", + "4.3 Berkeley Distribution", + "4.4 Berkeley Distribution", + }; + + const char *p, *s; + struct man_node *n; + + n = m->last->child; + + if (NULL == n || MAN_TEXT != n->type) + p = bsd_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = bsd_versions[0]; + else if (0 == strcmp(s, "4")) + p = bsd_versions[1]; + else if (0 == strcmp(s, "5")) + p = bsd_versions[2]; + else if (0 == strcmp(s, "6")) + p = bsd_versions[3]; + else if (0 == strcmp(s, "7")) + p = bsd_versions[4]; + else + p = bsd_versions[0]; + } + + if (m->meta.source) + free(m->meta.source); + + m->meta.source = mandoc_strdup(p); + + return(1); +} diff --git a/commands/mdocml/man_argv.c b/commands/mdocml/man_argv.c new file mode 100644 index 000000000..44b9a25c3 --- /dev/null +++ b/commands/mdocml/man_argv.c @@ -0,0 +1,104 @@ +/* $Id: man_argv.c,v 1.4 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include + +#include "mandoc.h" +#include "libman.h" + + +int +man_args(struct man *m, int line, int *pos, char *buf, char **v) +{ + + assert(*pos); + assert(' ' != buf[*pos]); + + if (0 == buf[*pos]) + return(ARGS_EOLN); + + *v = &buf[*pos]; + + /* + * Process a quoted literal. A quote begins with a double-quote + * and ends with a double-quote NOT preceded by a double-quote. + * Whitespace is NOT involved in literal termination. + */ + + if ('\"' == buf[*pos]) { + *v = &buf[++(*pos)]; + + for ( ; buf[*pos]; (*pos)++) { + if ('\"' != buf[*pos]) + continue; + if ('\"' != buf[*pos + 1]) + break; + (*pos)++; + } + + if (0 == buf[*pos]) { + if ( ! man_pmsg(m, line, *pos, MANDOCERR_BADQUOTE)) + return(ARGS_ERROR); + return(ARGS_QWORD); + } + + buf[(*pos)++] = 0; + + if (0 == buf[*pos]) + return(ARGS_QWORD); + + while (' ' == buf[*pos]) + (*pos)++; + + if (0 == buf[*pos]) + if ( ! man_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) + return(ARGS_ERROR); + + return(ARGS_QWORD); + } + + /* + * A non-quoted term progresses until either the end of line or + * a non-escaped whitespace. + */ + + for ( ; buf[*pos]; (*pos)++) + if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) + break; + + if (0 == buf[*pos]) + return(ARGS_WORD); + + buf[(*pos)++] = 0; + + while (' ' == buf[*pos]) + (*pos)++; + + if (0 == buf[*pos]) + if ( ! man_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) + return(ARGS_ERROR); + + return(ARGS_WORD); +} + diff --git a/commands/mdocml/man_hash.c b/commands/mdocml/man_hash.c new file mode 100644 index 000000000..e80a39c46 --- /dev/null +++ b/commands/mdocml/man_hash.c @@ -0,0 +1,106 @@ +/* $Id: man_hash.c,v 1.22 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libman.h" + +#define HASH_DEPTH 6 + +#define HASH_ROW(x) do { \ + if (isupper((u_char)(x))) \ + (x) -= 65; \ + else \ + (x) -= 97; \ + (x) *= HASH_DEPTH; \ + } while (/* CONSTCOND */ 0) + +/* + * Lookup table is indexed first by lower-case first letter (plus one + * for the period, which is stored in the last row), then by lower or + * uppercase second letter. Buckets correspond to the index of the + * macro (the integer value of the enum stored as a char to save a bit + * of space). + */ +static u_char table[26 * HASH_DEPTH]; + +/* + * XXX - this hash has global scope, so if intended for use as a library + * with multiple callers, it will need re-invocation protection. + */ +void +man_hash_init(void) +{ + int i, j, x; + + memset(table, UCHAR_MAX, sizeof(table)); + + assert(/* LINTED */ + MAN_MAX < UCHAR_MAX); + + for (i = 0; i < (int)MAN_MAX; i++) { + x = man_macronames[i][0]; + + assert(isalpha((u_char)x)); + + HASH_ROW(x); + + for (j = 0; j < HASH_DEPTH; j++) + if (UCHAR_MAX == table[x + j]) { + table[x + j] = (u_char)i; + break; + } + + assert(j < HASH_DEPTH); + } +} + + +enum mant +man_hash_find(const char *tmp) +{ + int x, y, i; + enum mant tok; + + if ('\0' == (x = tmp[0])) + return(MAN_MAX); + if ( ! (isalpha((u_char)x))) + return(MAN_MAX); + + HASH_ROW(x); + + for (i = 0; i < HASH_DEPTH; i++) { + if (UCHAR_MAX == (y = table[x + i])) + return(MAN_MAX); + + tok = (enum mant)y; + if (0 == strcmp(tmp, man_macronames[tok])) + return(tok); + } + + return(MAN_MAX); +} diff --git a/commands/mdocml/man_html.c b/commands/mdocml/man_html.c new file mode 100644 index 000000000..2406393a3 --- /dev/null +++ b/commands/mdocml/man_html.c @@ -0,0 +1,751 @@ +/* $Id: man_html.c,v 1.37 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "out.h" +#include "html.h" +#include "man.h" +#include "main.h" + +/* TODO: preserve ident widths. */ +/* FIXME: have PD set the default vspace width. */ + +#define INDENT 5 +#define HALFINDENT 3 + +#define MAN_ARGS const struct man_meta *m, \ + const struct man_node *n, \ + struct html *h + +struct htmlman { + int (*pre)(MAN_ARGS); + int (*post)(MAN_ARGS); +}; + +static void print_man(MAN_ARGS); +static void print_man_head(MAN_ARGS); +static void print_man_nodelist(MAN_ARGS); +static void print_man_node(MAN_ARGS); + +static int a2width(const struct man_node *, + struct roffsu *); + +static int man_alt_pre(MAN_ARGS); +static int man_br_pre(MAN_ARGS); +static int man_ign_pre(MAN_ARGS); +static void man_root_post(MAN_ARGS); +static int man_root_pre(MAN_ARGS); +static int man_B_pre(MAN_ARGS); +static int man_HP_pre(MAN_ARGS); +static int man_I_pre(MAN_ARGS); +static int man_IP_pre(MAN_ARGS); +static int man_PP_pre(MAN_ARGS); +static int man_RS_pre(MAN_ARGS); +static int man_SB_pre(MAN_ARGS); +static int man_SH_pre(MAN_ARGS); +static int man_SM_pre(MAN_ARGS); +static int man_SS_pre(MAN_ARGS); + +static const struct htmlman mans[MAN_MAX] = { + { man_br_pre, NULL }, /* br */ + { NULL, NULL }, /* TH */ + { man_SH_pre, NULL }, /* SH */ + { man_SS_pre, NULL }, /* SS */ + { man_IP_pre, NULL }, /* TP */ + { man_PP_pre, NULL }, /* LP */ + { man_PP_pre, NULL }, /* PP */ + { man_PP_pre, NULL }, /* P */ + { man_IP_pre, NULL }, /* IP */ + { man_HP_pre, NULL }, /* HP */ + { man_SM_pre, NULL }, /* SM */ + { man_SB_pre, NULL }, /* SB */ + { man_alt_pre, NULL }, /* BI */ + { man_alt_pre, NULL }, /* IB */ + { man_alt_pre, NULL }, /* BR */ + { man_alt_pre, NULL }, /* RB */ + { NULL, NULL }, /* R */ + { man_B_pre, NULL }, /* B */ + { man_I_pre, NULL }, /* I */ + { man_alt_pre, NULL }, /* IR */ + { man_alt_pre, NULL }, /* RI */ + { NULL, NULL }, /* na */ + { NULL, NULL }, /* i */ + { man_br_pre, NULL }, /* sp */ + { NULL, NULL }, /* nf */ + { NULL, NULL }, /* fi */ + { NULL, NULL }, /* r */ + { NULL, NULL }, /* RE */ + { man_RS_pre, NULL }, /* RS */ + { man_ign_pre, NULL }, /* DT */ + { man_ign_pre, NULL }, /* UC */ + { man_ign_pre, NULL }, /* PD */ + { man_br_pre, NULL }, /* Sp */ + { man_ign_pre, NULL }, /* Vb */ + { NULL, NULL }, /* Ve */ + { man_ign_pre, NULL }, /* AT */ +}; + + +void +html_man(void *arg, const struct man *m) +{ + struct html *h; + struct tag *t; + + h = (struct html *)arg; + + print_gen_decls(h); + + t = print_otag(h, TAG_HTML, 0, NULL); + print_man(man_meta(m), man_node(m), h); + print_tagq(h, t); + + printf("\n"); +} + + +static void +print_man(MAN_ARGS) +{ + struct tag *t; + struct htmlpair tag; + + t = print_otag(h, TAG_HEAD, 0, NULL); + + print_man_head(m, n, h); + print_tagq(h, t); + t = print_otag(h, TAG_BODY, 0, NULL); + + tag.key = ATTR_CLASS; + tag.val = "body"; + print_otag(h, TAG_DIV, 1, &tag); + + print_man_nodelist(m, n, h); + + print_tagq(h, t); +} + + +/* ARGSUSED */ +static void +print_man_head(MAN_ARGS) +{ + + print_gen_head(h); + bufinit(h); + buffmt(h, "%s(%s)", m->title, m->msec); + + print_otag(h, TAG_TITLE, 0, NULL); + print_text(h, h->buf); +} + + +static void +print_man_nodelist(MAN_ARGS) +{ + + print_man_node(m, n, h); + if (n->next) + print_man_nodelist(m, n->next, h); +} + + +static void +print_man_node(MAN_ARGS) +{ + int child; + struct tag *t; + + child = 1; + t = h->tags.head; + + bufinit(h); + + /* + * FIXME: embedded elements within next-line scopes (e.g., `br' + * within an empty `B') will cause formatting to be forgotten + * due to scope closing out. + */ + + switch (n->type) { + case (MAN_ROOT): + child = man_root_pre(m, n, h); + break; + case (MAN_TEXT): + print_text(h, n->string); + return; + default: + /* + * Close out scope of font prior to opening a macro + * scope. Assert that the metafont is on the top of the + * stack (it's never nested). + */ + if (h->metaf) { + assert(h->metaf == t); + print_tagq(h, h->metaf); + assert(NULL == h->metaf); + t = h->tags.head; + } + if (mans[n->tok].pre) + child = (*mans[n->tok].pre)(m, n, h); + break; + } + + if (child && n->child) + print_man_nodelist(m, n->child, h); + + /* This will automatically close out any font scope. */ + print_stagq(h, t); + + bufinit(h); + + switch (n->type) { + case (MAN_ROOT): + man_root_post(m, n, h); + break; + case (MAN_TEXT): + break; + default: + if (mans[n->tok].post) + (*mans[n->tok].post)(m, n, h); + break; + } +} + + +static int +a2width(const struct man_node *n, struct roffsu *su) +{ + + if (MAN_TEXT != n->type) + return(0); + if (a2roffsu(n->string, su, SCALE_BU)) + return(1); + + return(0); +} + + +/* ARGSUSED */ +static int +man_root_pre(MAN_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + char b[BUFSIZ], title[BUFSIZ]; + + b[0] = 0; + if (m->vol) + (void)strlcat(b, m->vol, BUFSIZ); + + snprintf(title, BUFSIZ - 1, "%s(%s)", m->title, m->msec); + + PAIR_CLASS_INIT(&tag[0], "header"); + bufcat_style(h, "width", "100%"); + PAIR_STYLE_INIT(&tag[1], h); + PAIR_SUMMARY_INIT(&tag[2], "header"); + + t = print_otag(h, TAG_TABLE, 3, tag); + tt = print_otag(h, TAG_TR, 0, NULL); + + bufinit(h); + bufcat_style(h, "width", "10%"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, title); + print_stagq(h, tt); + + bufinit(h); + bufcat_style(h, "width", "80%"); + bufcat_style(h, "white-space", "nowrap"); + bufcat_style(h, "text-align", "center"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, b); + print_stagq(h, tt); + + bufinit(h); + bufcat_style(h, "width", "10%"); + bufcat_style(h, "text-align", "right"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, title); + print_tagq(h, t); + return(1); +} + + +/* ARGSUSED */ +static void +man_root_post(MAN_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + char b[DATESIZ]; + + if (m->rawdate) + strlcpy(b, m->rawdate, DATESIZ); + else + time2a(m->date, b, DATESIZ); + + PAIR_CLASS_INIT(&tag[0], "footer"); + bufcat_style(h, "width", "100%"); + PAIR_STYLE_INIT(&tag[1], h); + PAIR_SUMMARY_INIT(&tag[2], "footer"); + + t = print_otag(h, TAG_TABLE, 3, tag); + tt = print_otag(h, TAG_TR, 0, NULL); + + bufinit(h); + bufcat_style(h, "width", "50%"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, b); + print_stagq(h, tt); + + bufinit(h); + bufcat_style(h, "width", "50%"); + bufcat_style(h, "text-align", "right"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + if (m->source) + print_text(h, m->source); + print_tagq(h, t); +} + + + +/* ARGSUSED */ +static int +man_br_pre(MAN_ARGS) +{ + struct roffsu su; + struct htmlpair tag; + + SCALE_VS_INIT(&su, 1); + + switch (n->tok) { + case (MAN_Sp): + SCALE_VS_INIT(&su, 0.5); + break; + case (MAN_sp): + if (n->child) + a2roffsu(n->child->string, &su, SCALE_VS); + break; + default: + su.scale = 0; + break; + } + + bufcat_su(h, "height", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + + /* So the div isn't empty: */ + print_text(h, "\\~"); + + return(0); +} + + +/* ARGSUSED */ +static int +man_SH_pre(MAN_ARGS) +{ + struct htmlpair tag[2]; + struct roffsu su; + + if (MAN_BODY == n->type) { + SCALE_HS_INIT(&su, INDENT); + bufcat_su(h, "margin-left", &su); + PAIR_CLASS_INIT(&tag[0], "sec-body"); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); + } else if (MAN_BLOCK == n->type) { + PAIR_CLASS_INIT(&tag[0], "sec-block"); + if (n->prev && MAN_SH == n->prev->tok) + if (NULL == n->prev->body->child) { + print_otag(h, TAG_DIV, 1, tag); + return(1); + } + + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + if (NULL == n->next) + bufcat_su(h, "margin-bottom", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); + } + + PAIR_CLASS_INIT(&tag[0], "sec-head"); + print_otag(h, TAG_DIV, 1, tag); + return(1); +} + + +/* ARGSUSED */ +static int +man_alt_pre(MAN_ARGS) +{ + const struct man_node *nn; + struct tag *t; + int i; + enum htmlfont fp; + + for (i = 0, nn = n->child; nn; nn = nn->next, i++) { + switch (n->tok) { + case (MAN_BI): + fp = i % 2 ? HTMLFONT_ITALIC : HTMLFONT_BOLD; + break; + case (MAN_IB): + fp = i % 2 ? HTMLFONT_BOLD : HTMLFONT_ITALIC; + break; + case (MAN_RI): + fp = i % 2 ? HTMLFONT_ITALIC : HTMLFONT_NONE; + break; + case (MAN_IR): + fp = i % 2 ? HTMLFONT_NONE : HTMLFONT_ITALIC; + break; + case (MAN_BR): + fp = i % 2 ? HTMLFONT_NONE : HTMLFONT_BOLD; + break; + case (MAN_RB): + fp = i % 2 ? HTMLFONT_BOLD : HTMLFONT_NONE; + break; + default: + abort(); + /* NOTREACHED */ + } + + if (i) + h->flags |= HTML_NOSPACE; + + /* + * Open and close the scope with each argument, so that + * internal \f escapes, which are common, are also + * closed out with the scope. + */ + t = print_ofont(h, fp); + print_man_node(m, nn, h); + print_tagq(h, t); + } + + return(0); +} + + +/* ARGSUSED */ +static int +man_SB_pre(MAN_ARGS) +{ + struct htmlpair tag; + + /* FIXME: print_ofont(). */ + PAIR_CLASS_INIT(&tag, "small bold"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +man_SM_pre(MAN_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "small"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +man_SS_pre(MAN_ARGS) +{ + struct htmlpair tag[3]; + struct roffsu su; + + SCALE_VS_INIT(&su, 1); + + if (MAN_BODY == n->type) { + PAIR_CLASS_INIT(&tag[0], "ssec-body"); + if (n->parent->next && n->child) { + bufcat_su(h, "margin-bottom", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); + } + + print_otag(h, TAG_DIV, 1, tag); + return(1); + } else if (MAN_BLOCK == n->type) { + PAIR_CLASS_INIT(&tag[0], "ssec-block"); + if (n->prev && MAN_SS == n->prev->tok) + if (n->prev->body->child) { + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); + } + + print_otag(h, TAG_DIV, 1, tag); + return(1); + } + + SCALE_HS_INIT(&su, INDENT - HALFINDENT); + bufcat_su(h, "margin-left", &su); + PAIR_CLASS_INIT(&tag[0], "ssec-head"); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static int +man_PP_pre(MAN_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + int i; + + if (MAN_BLOCK != n->type) + return(1); + + i = 0; + + if (MAN_ROOT == n->parent->type) { + SCALE_HS_INIT(&su, INDENT); + bufcat_su(h, "margin-left", &su); + i = 1; + } + if (n->prev) { + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + i = 1; + } + + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, i, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +man_IP_pre(MAN_ARGS) +{ + struct roffsu su; + struct htmlpair tag; + const struct man_node *nn; + int width; + + /* + * This scattering of 1-BU margins and pads is to make sure that + * when text overruns its box, the subsequent text isn't flush + * up against it. However, the rest of the right-hand box must + * also be adjusted in consideration of this 1-BU space. + */ + + if (MAN_BODY == n->type) { + SCALE_HS_INIT(&su, INDENT); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return(1); + } + + nn = MAN_BLOCK == n->type ? + n->head->child : n->parent->head->child; + + SCALE_HS_INIT(&su, INDENT); + width = 0; + + /* Width is the last token. */ + + if (MAN_IP == n->tok && NULL != nn) + if (NULL != (nn = nn->next)) { + for ( ; nn->next; nn = nn->next) + /* Do nothing. */ ; + width = a2width(nn, &su); + } + + /* Width is the first token. */ + + if (MAN_TP == n->tok && NULL != nn) { + /* Skip past non-text children. */ + while (nn && MAN_TEXT != nn->type) + nn = nn->next; + if (nn) + width = a2width(nn, &su); + } + + if (MAN_BLOCK == n->type) { + bufcat_su(h, "margin-left", &su); + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + bufcat_style(h, "clear", "both"); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return(1); + } + + bufcat_su(h, "min-width", &su); + SCALE_INVERT(&su); + bufcat_su(h, "margin-left", &su); + SCALE_HS_INIT(&su, 1); + bufcat_su(h, "margin-right", &su); + bufcat_style(h, "clear", "left"); + + if (n->next && n->next->child) + bufcat_style(h, "float", "left"); + + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + + /* + * Without a length string, we can print all of our children. + */ + + if ( ! width) + return(1); + + /* + * When a length has been specified, we need to carefully print + * our child context: IP gets all children printed but the last + * (the width), while TP gets all children printed but the first + * (the width). + */ + + if (MAN_IP == n->tok) + for (nn = n->child; nn->next; nn = nn->next) + print_man_node(m, nn, h); + if (MAN_TP == n->tok) + for (nn = n->child->next; nn; nn = nn->next) + print_man_node(m, nn, h); + + return(0); +} + + +/* ARGSUSED */ +static int +man_HP_pre(MAN_ARGS) +{ + const struct man_node *nn; + struct htmlpair tag; + struct roffsu su; + + if (MAN_HEAD == n->type) + return(0); + + nn = MAN_BLOCK == n->type ? + n->head->child : n->parent->head->child; + + SCALE_HS_INIT(&su, INDENT); + + if (NULL != nn) + (void)a2width(nn, &su); + + if (MAN_BLOCK == n->type) { + bufcat_su(h, "margin-left", &su); + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + bufcat_style(h, "clear", "both"); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return(1); + } + + bufcat_su(h, "margin-left", &su); + SCALE_INVERT(&su); + bufcat_su(h, "text-indent", &su); + + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +man_B_pre(MAN_ARGS) +{ + + print_ofont(h, HTMLFONT_BOLD); + return(1); +} + + +/* ARGSUSED */ +static int +man_I_pre(MAN_ARGS) +{ + + print_ofont(h, HTMLFONT_ITALIC); + return(1); +} + + +/* ARGSUSED */ +static int +man_ign_pre(MAN_ARGS) +{ + + return(0); +} + + +/* ARGSUSED */ +static int +man_RS_pre(MAN_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + + if (MAN_HEAD == n->type) + return(0); + else if (MAN_BODY == n->type) + return(1); + + SCALE_HS_INIT(&su, INDENT); + bufcat_su(h, "margin-left", &su); + + if (n->head->child) { + SCALE_VS_INIT(&su, 1); + a2width(n->head->child, &su); + bufcat_su(h, "margin-top", &su); + } + + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return(1); +} diff --git a/commands/mdocml/man_macro.c b/commands/mdocml/man_macro.c new file mode 100644 index 000000000..3e7a51b6a --- /dev/null +++ b/commands/mdocml/man_macro.c @@ -0,0 +1,485 @@ +/* $Id: man_macro.c,v 1.47 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include "mandoc.h" +#include "libman.h" + +enum rew { + REW_REWIND, + REW_NOHALT, + REW_HALT +}; + +static int blk_close(MACRO_PROT_ARGS); +static int blk_exp(MACRO_PROT_ARGS); +static int blk_imp(MACRO_PROT_ARGS); +static int in_line_eoln(MACRO_PROT_ARGS); + +static int rew_scope(enum man_type, + struct man *, enum mant); +static enum rew rew_dohalt(enum mant, enum man_type, + const struct man_node *); +static enum rew rew_block(enum mant, enum man_type, + const struct man_node *); +static int rew_warn(struct man *, + struct man_node *, enum mandocerr); + +const struct man_macro __man_macros[MAN_MAX] = { + { in_line_eoln, MAN_NSCOPED }, /* br */ + { in_line_eoln, 0 }, /* TH */ + { blk_imp, MAN_SCOPED }, /* SH */ + { blk_imp, MAN_SCOPED }, /* SS */ + { blk_imp, MAN_SCOPED | MAN_FSCOPED }, /* TP */ + { blk_imp, 0 }, /* LP */ + { blk_imp, 0 }, /* PP */ + { blk_imp, 0 }, /* P */ + { blk_imp, 0 }, /* IP */ + { blk_imp, 0 }, /* HP */ + { in_line_eoln, MAN_SCOPED }, /* SM */ + { in_line_eoln, MAN_SCOPED }, /* SB */ + { in_line_eoln, 0 }, /* BI */ + { in_line_eoln, 0 }, /* IB */ + { in_line_eoln, 0 }, /* BR */ + { in_line_eoln, 0 }, /* RB */ + { in_line_eoln, MAN_SCOPED }, /* R */ + { in_line_eoln, MAN_SCOPED }, /* B */ + { in_line_eoln, MAN_SCOPED }, /* I */ + { in_line_eoln, 0 }, /* IR */ + { in_line_eoln, 0 }, /* RI */ + { in_line_eoln, MAN_NSCOPED }, /* na */ + { in_line_eoln, 0 }, /* i */ + { in_line_eoln, MAN_NSCOPED }, /* sp */ + { in_line_eoln, 0 }, /* nf */ + { in_line_eoln, 0 }, /* fi */ + { in_line_eoln, 0 }, /* r */ + { blk_close, 0 }, /* RE */ + { blk_exp, MAN_EXPLICIT }, /* RS */ + { in_line_eoln, 0 }, /* DT */ + { in_line_eoln, 0 }, /* UC */ + { in_line_eoln, 0 }, /* PD */ + { in_line_eoln, MAN_NSCOPED }, /* Sp */ + { in_line_eoln, 0 }, /* Vb */ + { in_line_eoln, 0 }, /* Ve */ + { in_line_eoln, 0 }, /* AT */ +}; + +const struct man_macro * const man_macros = __man_macros; + + +/* + * Warn when "n" is an explicit non-roff macro. + */ +static int +rew_warn(struct man *m, struct man_node *n, enum mandocerr er) +{ + + if (er == MANDOCERR_MAX || MAN_BLOCK != n->type) + return(1); + if (MAN_VALID & n->flags) + return(1); + if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags)) + return(1); + return(man_nmsg(m, n, er)); +} + + +/* + * Rewind scope. If a code "er" != MANDOCERR_MAX has been provided, it + * will be used if an explicit block scope is being closed out. + */ +int +man_unscope(struct man *m, const struct man_node *n, + enum mandocerr er) +{ + + assert(n); + + /* LINTED */ + while (m->last != n) { + if ( ! rew_warn(m, m->last, er)) + return(0); + if ( ! man_valid_post(m)) + return(0); + if ( ! man_action_post(m)) + return(0); + m->last = m->last->parent; + assert(m->last); + } + + if ( ! rew_warn(m, m->last, er)) + return(0); + if ( ! man_valid_post(m)) + return(0); + if ( ! man_action_post(m)) + return(0); + + m->next = MAN_ROOT == m->last->type ? + MAN_NEXT_CHILD : MAN_NEXT_SIBLING; + + return(1); +} + + +static enum rew +rew_block(enum mant ntok, enum man_type type, const struct man_node *n) +{ + + if (MAN_BLOCK == type && ntok == n->parent->tok && + MAN_BODY == n->parent->type) + return(REW_REWIND); + return(ntok == n->tok ? REW_HALT : REW_NOHALT); +} + + +/* + * There are three scope levels: scoped to the root (all), scoped to the + * section (all less sections), and scoped to subsections (all less + * sections and subsections). + */ +static enum rew +rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n) +{ + enum rew c; + + /* We cannot progress beyond the root ever. */ + if (MAN_ROOT == n->type) + return(REW_HALT); + + assert(n->parent); + + /* Normal nodes shouldn't go to the level of the root. */ + if (MAN_ROOT == n->parent->type) + return(REW_REWIND); + + /* Already-validated nodes should be closed out. */ + if (MAN_VALID & n->flags) + return(REW_NOHALT); + + /* First: rewind to ourselves. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + + /* + * Next follow the implicit scope-smashings as defined by man.7: + * section, sub-section, etc. + */ + + switch (tok) { + case (MAN_SH): + break; + case (MAN_SS): + /* Rewind to a section, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) + return(c); + break; + case (MAN_RS): + /* Rewind to a subsection, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) + return(c); + /* Rewind to a section, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) + return(c); + break; + default: + /* Rewind to an offsetter, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_RS, type, n))) + return(c); + /* Rewind to a subsection, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) + return(c); + /* Rewind to a section, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) + return(c); + break; + } + + return(REW_NOHALT); +} + + +/* + * Rewinding entails ascending the parse tree until a coherent point, + * for example, the `SH' macro will close out any intervening `SS' + * scopes. When a scope is closed, it must be validated and actioned. + */ +static int +rew_scope(enum man_type type, struct man *m, enum mant tok) +{ + struct man_node *n; + enum rew c; + + /* LINTED */ + for (n = m->last; n; n = n->parent) { + /* + * Whether we should stop immediately (REW_HALT), stop + * and rewind until this point (REW_REWIND), or keep + * rewinding (REW_NOHALT). + */ + c = rew_dohalt(tok, type, n); + if (REW_HALT == c) + return(1); + if (REW_REWIND == c) + break; + } + + /* + * Rewind until the current point. Warn if we're a roff + * instruction that's mowing over explicit scopes. + */ + assert(n); + + return(man_unscope(m, n, MANDOCERR_MAX)); +} + + +/* + * Close out a generic explicit macro. + */ +/* ARGSUSED */ +static int +blk_close(MACRO_PROT_ARGS) +{ + enum mant ntok; + const struct man_node *nn; + + switch (tok) { + case (MAN_RE): + ntok = MAN_RS; + break; + default: + abort(); + /* NOTREACHED */ + } + + for (nn = m->last->parent; nn; nn = nn->parent) + if (ntok == nn->tok) + break; + + if (NULL == nn) + if ( ! man_pmsg(m, line, ppos, MANDOCERR_NOSCOPE)) + return(0); + + if ( ! rew_scope(MAN_BODY, m, ntok)) + return(0); + if ( ! rew_scope(MAN_BLOCK, m, ntok)) + return(0); + + return(1); +} + + +static int +blk_exp(MACRO_PROT_ARGS) +{ + int w, la; + char *p; + + /* + * Close out prior scopes. "Regular" explicit macros cannot be + * nested, but we allow roff macros to be placed just about + * anywhere. + */ + + if ( ! rew_scope(MAN_BODY, m, tok)) + return(0); + if ( ! rew_scope(MAN_BLOCK, m, tok)) + return(0); + + if ( ! man_block_alloc(m, line, ppos, tok)) + return(0); + if ( ! man_head_alloc(m, line, ppos, tok)) + return(0); + + for (;;) { + la = *pos; + w = man_args(m, line, pos, buf, &p); + + if (-1 == w) + return(0); + if (0 == w) + break; + + if ( ! man_word_alloc(m, line, la, p)) + return(0); + } + + assert(m); + assert(tok != MAN_MAX); + + if ( ! rew_scope(MAN_HEAD, m, tok)) + return(0); + return(man_body_alloc(m, line, ppos, tok)); +} + + + +/* + * Parse an implicit-block macro. These contain a MAN_HEAD and a + * MAN_BODY contained within a MAN_BLOCK. Rules for closing out other + * scopes, such as `SH' closing out an `SS', are defined in the rew + * routines. + */ +static int +blk_imp(MACRO_PROT_ARGS) +{ + int w, la; + char *p; + struct man_node *n; + + /* Close out prior scopes. */ + + if ( ! rew_scope(MAN_BODY, m, tok)) + return(0); + if ( ! rew_scope(MAN_BLOCK, m, tok)) + return(0); + + /* Allocate new block & head scope. */ + + if ( ! man_block_alloc(m, line, ppos, tok)) + return(0); + if ( ! man_head_alloc(m, line, ppos, tok)) + return(0); + + n = m->last; + + /* Add line arguments. */ + + for (;;) { + la = *pos; + w = man_args(m, line, pos, buf, &p); + + if (-1 == w) + return(0); + if (0 == w) + break; + + if ( ! man_word_alloc(m, line, la, p)) + return(0); + } + + /* Close out head and open body (unless MAN_SCOPE). */ + + if (MAN_SCOPED & man_macros[tok].flags) { + /* If we're forcing scope (`TP'), keep it open. */ + if (MAN_FSCOPED & man_macros[tok].flags) { + m->flags |= MAN_BLINE; + return(1); + } else if (n == m->last) { + m->flags |= MAN_BLINE; + return(1); + } + } + + if ( ! rew_scope(MAN_HEAD, m, tok)) + return(0); + return(man_body_alloc(m, line, ppos, tok)); +} + + +static int +in_line_eoln(MACRO_PROT_ARGS) +{ + int w, la; + char *p; + struct man_node *n; + + if ( ! man_elem_alloc(m, line, ppos, tok)) + return(0); + + n = m->last; + + for (;;) { + la = *pos; + w = man_args(m, line, pos, buf, &p); + + if (-1 == w) + return(0); + if (0 == w) + break; + if ( ! man_word_alloc(m, line, la, p)) + return(0); + } + + /* + * If no arguments are specified and this is MAN_SCOPED (i.e., + * next-line scoped), then set our mode to indicate that we're + * waiting for terms to load into our context. + */ + + if (n == m->last && MAN_SCOPED & man_macros[tok].flags) { + assert( ! (MAN_NSCOPED & man_macros[tok].flags)); + m->flags |= MAN_ELINE; + return(1); + } + + /* Set ignorable context, if applicable. */ + + if (MAN_NSCOPED & man_macros[tok].flags) { + assert( ! (MAN_SCOPED & man_macros[tok].flags)); + m->flags |= MAN_ILINE; + } + + /* + * Rewind our element scope. Note that when TH is pruned, we'll + * be back at the root, so make sure that we don't clobber as + * its sibling. + */ + + for ( ; m->last; m->last = m->last->parent) { + if (m->last == n) + break; + if (m->last->type == MAN_ROOT) + break; + if ( ! man_valid_post(m)) + return(0); + if ( ! man_action_post(m)) + return(0); + } + + assert(m->last); + + /* + * Same here regarding whether we're back at the root. + */ + + if (m->last->type != MAN_ROOT && ! man_valid_post(m)) + return(0); + if (m->last->type != MAN_ROOT && ! man_action_post(m)) + return(0); + + m->next = MAN_ROOT == m->last->type ? + MAN_NEXT_CHILD : MAN_NEXT_SIBLING; + + return(1); +} + + +int +man_macroend(struct man *m) +{ + + return(man_unscope(m, m->first, MANDOCERR_SCOPEEXIT)); +} + diff --git a/commands/mdocml/man_term.c b/commands/mdocml/man_term.c new file mode 100644 index 000000000..db2e9cc4a --- /dev/null +++ b/commands/mdocml/man_term.c @@ -0,0 +1,964 @@ +/* $Id: man_term.c,v 1.76 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "out.h" +#include "man.h" +#include "term.h" +#include "chars.h" +#include "main.h" + +#define INDENT 7 +#define HALFINDENT 3 + +/* FIXME: have PD set the default vspace width. */ + +struct mtermp { + int fl; +#define MANT_LITERAL (1 << 0) + /* + * Default amount to indent the left margin after leading text + * has been printed (e.g., `HP' left-indent, `TP' and `IP' body + * indent). This needs to be saved because `HP' and so on, if + * not having a specified value, must default. + * + * Note that this is the indentation AFTER the left offset, so + * the total offset is usually offset + lmargin. + */ + size_t lmargin; + /* + * The default offset, i.e., the amount between any text and the + * page boundary. + */ + size_t offset; +}; + +#define DECL_ARGS struct termp *p, \ + struct mtermp *mt, \ + const struct man_node *n, \ + const struct man_meta *m + +struct termact { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); + int flags; +#define MAN_NOTEXT (1 << 0) /* Never has text children. */ +}; + +static int a2width(const struct man_node *); +static int a2height(const struct man_node *); + +static void print_man_nodelist(DECL_ARGS); +static void print_man_node(DECL_ARGS); +static void print_man_head(struct termp *, const void *); +static void print_man_foot(struct termp *, const void *); +static void print_bvspace(struct termp *, + const struct man_node *); + +static int pre_B(DECL_ARGS); +static int pre_BI(DECL_ARGS); +static int pre_HP(DECL_ARGS); +static int pre_I(DECL_ARGS); +static int pre_IP(DECL_ARGS); +static int pre_PP(DECL_ARGS); +static int pre_RB(DECL_ARGS); +static int pre_RI(DECL_ARGS); +static int pre_RS(DECL_ARGS); +static int pre_SH(DECL_ARGS); +static int pre_SS(DECL_ARGS); +static int pre_TP(DECL_ARGS); +static int pre_br(DECL_ARGS); +static int pre_fi(DECL_ARGS); +static int pre_ign(DECL_ARGS); +static int pre_nf(DECL_ARGS); +static int pre_sp(DECL_ARGS); + +static void post_IP(DECL_ARGS); +static void post_HP(DECL_ARGS); +static void post_RS(DECL_ARGS); +static void post_SH(DECL_ARGS); +static void post_SS(DECL_ARGS); +static void post_TP(DECL_ARGS); + +static const struct termact termacts[MAN_MAX] = { + { pre_br, NULL, MAN_NOTEXT }, /* br */ + { NULL, NULL, 0 }, /* TH */ + { pre_SH, post_SH, 0 }, /* SH */ + { pre_SS, post_SS, 0 }, /* SS */ + { pre_TP, post_TP, 0 }, /* TP */ + { pre_PP, NULL, 0 }, /* LP */ + { pre_PP, NULL, 0 }, /* PP */ + { pre_PP, NULL, 0 }, /* P */ + { pre_IP, post_IP, 0 }, /* IP */ + { pre_HP, post_HP, 0 }, /* HP */ + { NULL, NULL, 0 }, /* SM */ + { pre_B, NULL, 0 }, /* SB */ + { pre_BI, NULL, 0 }, /* BI */ + { pre_BI, NULL, 0 }, /* IB */ + { pre_RB, NULL, 0 }, /* BR */ + { pre_RB, NULL, 0 }, /* RB */ + { NULL, NULL, 0 }, /* R */ + { pre_B, NULL, 0 }, /* B */ + { pre_I, NULL, 0 }, /* I */ + { pre_RI, NULL, 0 }, /* IR */ + { pre_RI, NULL, 0 }, /* RI */ + { NULL, NULL, MAN_NOTEXT }, /* na */ + { pre_I, NULL, 0 }, /* i */ + { pre_sp, NULL, MAN_NOTEXT }, /* sp */ + { pre_nf, NULL, 0 }, /* nf */ + { pre_fi, NULL, 0 }, /* fi */ + { NULL, NULL, 0 }, /* r */ + { NULL, NULL, 0 }, /* RE */ + { pre_RS, post_RS, 0 }, /* RS */ + { pre_ign, NULL, 0 }, /* DT */ + { pre_ign, NULL, 0 }, /* UC */ + { pre_ign, NULL, 0 }, /* PD */ + { pre_sp, NULL, MAN_NOTEXT }, /* Sp */ + { pre_nf, NULL, 0 }, /* Vb */ + { pre_fi, NULL, 0 }, /* Ve */ + { pre_ign, NULL, 0 }, /* AT */ +}; + + + +void +terminal_man(void *arg, const struct man *man) +{ + struct termp *p; + const struct man_node *n; + const struct man_meta *m; + struct mtermp mt; + + p = (struct termp *)arg; + + p->overstep = 0; + p->maxrmargin = p->defrmargin; + p->tabwidth = 5; + + if (NULL == p->symtab) + switch (p->enc) { + case (TERMENC_ASCII): + p->symtab = chars_init(CHARS_ASCII); + break; + default: + abort(); + /* NOTREACHED */ + } + + n = man_node(man); + m = man_meta(man); + + term_begin(p, print_man_head, print_man_foot, m); + p->flags |= TERMP_NOSPACE; + + mt.fl = 0; + mt.lmargin = INDENT; + mt.offset = INDENT; + + if (n->child) + print_man_nodelist(p, &mt, n->child, m); + + term_end(p); +} + + +static int +a2height(const struct man_node *n) +{ + struct roffsu su; + + assert(MAN_TEXT == n->type); + assert(n->string); + if ( ! a2roffsu(n->string, &su, SCALE_VS)) + SCALE_VS_INIT(&su, strlen(n->string)); + + return((int)term_vspan(&su)); +} + + +static int +a2width(const struct man_node *n) +{ + struct roffsu su; + + assert(MAN_TEXT == n->type); + assert(n->string); + if ( ! a2roffsu(n->string, &su, SCALE_BU)) + return(-1); + + return((int)term_hspan(&su)); +} + + +static void +print_bvspace(struct termp *p, const struct man_node *n) +{ + term_newln(p); + + if (NULL == n->prev) + return; + + if (MAN_SS == n->prev->tok) + return; + if (MAN_SH == n->prev->tok) + return; + + term_vspace(p); +} + + +/* ARGSUSED */ +static int +pre_ign(DECL_ARGS) +{ + + return(0); +} + + +/* ARGSUSED */ +static int +pre_I(DECL_ARGS) +{ + + term_fontrepl(p, TERMFONT_UNDER); + return(1); +} + + +/* ARGSUSED */ +static int +pre_fi(DECL_ARGS) +{ + + mt->fl &= ~MANT_LITERAL; + return(1); +} + + +/* ARGSUSED */ +static int +pre_nf(DECL_ARGS) +{ + + mt->fl |= MANT_LITERAL; + return(MAN_Vb != n->tok); +} + + +/* ARGSUSED */ +static int +pre_RB(DECL_ARGS) +{ + const struct man_node *nn; + int i; + + for (i = 0, nn = n->child; nn; nn = nn->next, i++) { + if (i % 2 && MAN_RB == n->tok) + term_fontrepl(p, TERMFONT_BOLD); + else if ( ! (i % 2) && MAN_RB != n->tok) + term_fontrepl(p, TERMFONT_BOLD); + else + term_fontrepl(p, TERMFONT_NONE); + + if (i > 0) + p->flags |= TERMP_NOSPACE; + + print_man_node(p, mt, nn, m); + } + return(0); +} + + +/* ARGSUSED */ +static int +pre_RI(DECL_ARGS) +{ + const struct man_node *nn; + int i; + + for (i = 0, nn = n->child; nn; nn = nn->next, i++) { + if (i % 2 && MAN_RI == n->tok) + term_fontrepl(p, TERMFONT_UNDER); + else if ( ! (i % 2) && MAN_RI != n->tok) + term_fontrepl(p, TERMFONT_UNDER); + else + term_fontrepl(p, TERMFONT_NONE); + + if (i > 0) + p->flags |= TERMP_NOSPACE; + + print_man_node(p, mt, nn, m); + } + return(0); +} + + +/* ARGSUSED */ +static int +pre_BI(DECL_ARGS) +{ + const struct man_node *nn; + int i; + + for (i = 0, nn = n->child; nn; nn = nn->next, i++) { + if (i % 2 && MAN_BI == n->tok) + term_fontrepl(p, TERMFONT_UNDER); + else if (i % 2) + term_fontrepl(p, TERMFONT_BOLD); + else if (MAN_BI == n->tok) + term_fontrepl(p, TERMFONT_BOLD); + else + term_fontrepl(p, TERMFONT_UNDER); + + if (i) + p->flags |= TERMP_NOSPACE; + + print_man_node(p, mt, nn, m); + } + return(0); +} + + +/* ARGSUSED */ +static int +pre_B(DECL_ARGS) +{ + + term_fontrepl(p, TERMFONT_BOLD); + return(1); +} + + +/* ARGSUSED */ +static int +pre_sp(DECL_ARGS) +{ + int i, len; + + len = n->child ? a2height(n->child) : 1; + + if (0 == len) + term_newln(p); + for (i = 0; i <= len; i++) + term_vspace(p); + + return(0); +} + + +/* ARGSUSED */ +static int +pre_br(DECL_ARGS) +{ + + term_newln(p); + return(0); +} + + +/* ARGSUSED */ +static int +pre_HP(DECL_ARGS) +{ + size_t len; + int ival; + const struct man_node *nn; + + switch (n->type) { + case (MAN_BLOCK): + print_bvspace(p, n); + return(1); + case (MAN_BODY): + p->flags |= TERMP_NOBREAK; + p->flags |= TERMP_TWOSPACE; + break; + default: + return(0); + } + + len = mt->lmargin; + ival = -1; + + /* Calculate offset. */ + + if (NULL != (nn = n->parent->head->child)) + if ((ival = a2width(nn)) >= 0) + len = (size_t)ival; + + if (0 == len) + len = 1; + + p->offset = mt->offset; + p->rmargin = mt->offset + len; + + if (ival >= 0) + mt->lmargin = (size_t)ival; + + return(1); +} + + +/* ARGSUSED */ +static void +post_HP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + term_flushln(p); + break; + case (MAN_BODY): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->offset = mt->offset; + p->rmargin = p->maxrmargin; + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_PP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + mt->lmargin = INDENT; + print_bvspace(p, n); + break; + default: + p->offset = mt->offset; + break; + } + + return(1); +} + + +/* ARGSUSED */ +static int +pre_IP(DECL_ARGS) +{ + const struct man_node *nn; + size_t len; + int ival; + + switch (n->type) { + case (MAN_BODY): + p->flags |= TERMP_NOLPAD; + p->flags |= TERMP_NOSPACE; + break; + case (MAN_HEAD): + p->flags |= TERMP_NOBREAK; + break; + case (MAN_BLOCK): + print_bvspace(p, n); + /* FALLTHROUGH */ + default: + return(1); + } + + len = mt->lmargin; + ival = -1; + + /* Calculate offset. */ + + if (NULL != (nn = n->parent->head->child)) + if (NULL != (nn = nn->next)) { + for ( ; nn->next; nn = nn->next) + /* Do nothing. */ ; + if ((ival = a2width(nn)) >= 0) + len = (size_t)ival; + } + + switch (n->type) { + case (MAN_HEAD): + /* Handle zero-width lengths. */ + if (0 == len) + len = 1; + + p->offset = mt->offset; + p->rmargin = mt->offset + len; + if (ival < 0) + break; + + /* Set the saved left-margin. */ + mt->lmargin = (size_t)ival; + + /* Don't print the length value. */ + for (nn = n->child; nn->next; nn = nn->next) + print_man_node(p, mt, nn, m); + return(0); + case (MAN_BODY): + p->offset = mt->offset + len; + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_IP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->rmargin = p->maxrmargin; + break; + case (MAN_BODY): + term_flushln(p); + p->flags &= ~TERMP_NOLPAD; + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_TP(DECL_ARGS) +{ + const struct man_node *nn; + size_t len; + int ival; + + switch (n->type) { + case (MAN_HEAD): + p->flags |= TERMP_NOBREAK; + p->flags |= TERMP_TWOSPACE; + break; + case (MAN_BODY): + p->flags |= TERMP_NOLPAD; + p->flags |= TERMP_NOSPACE; + break; + case (MAN_BLOCK): + print_bvspace(p, n); + /* FALLTHROUGH */ + default: + return(1); + } + + len = (size_t)mt->lmargin; + ival = -1; + + /* Calculate offset. */ + + if (NULL != (nn = n->parent->head->child)) { + while (nn && MAN_TEXT != nn->type) + nn = nn->next; + if (nn && nn->next) + if ((ival = a2width(nn)) >= 0) + len = (size_t)ival; + } + + switch (n->type) { + case (MAN_HEAD): + /* Handle zero-length properly. */ + if (0 == len) + len = 1; + + p->offset = mt->offset; + p->rmargin = mt->offset + len; + + /* Don't print same-line elements. */ + for (nn = n->child; nn; nn = nn->next) + if (nn->line > n->line) + print_man_node(p, mt, nn, m); + + if (ival >= 0) + mt->lmargin = (size_t)ival; + + return(0); + case (MAN_BODY): + p->offset = mt->offset + len; + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_TP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->rmargin = p->maxrmargin; + break; + case (MAN_BODY): + term_flushln(p); + p->flags &= ~TERMP_NOLPAD; + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_SS(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + mt->lmargin = INDENT; + mt->offset = INDENT; + /* If following a prior empty `SS', no vspace. */ + if (n->prev && MAN_SS == n->prev->tok) + if (NULL == n->prev->body->child) + break; + if (NULL == n->prev) + break; + term_vspace(p); + break; + case (MAN_HEAD): + term_fontrepl(p, TERMFONT_BOLD); + p->offset = HALFINDENT; + break; + case (MAN_BODY): + p->offset = mt->offset; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_SS(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_SH(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + mt->lmargin = INDENT; + mt->offset = INDENT; + /* If following a prior empty `SH', no vspace. */ + if (n->prev && MAN_SH == n->prev->tok) + if (NULL == n->prev->body->child) + break; + /* If the first macro, no vspae. */ + if (NULL == n->prev) + break; + term_vspace(p); + break; + case (MAN_HEAD): + term_fontrepl(p, TERMFONT_BOLD); + p->offset = 0; + break; + case (MAN_BODY): + p->offset = mt->offset; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_SH(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_RS(DECL_ARGS) +{ + const struct man_node *nn; + int ival; + + switch (n->type) { + case (MAN_BLOCK): + term_newln(p); + return(1); + case (MAN_HEAD): + return(0); + default: + break; + } + + if (NULL == (nn = n->parent->head->child)) { + mt->offset = mt->lmargin + INDENT; + p->offset = mt->offset; + return(1); + } + + if ((ival = a2width(nn)) < 0) + return(1); + + mt->offset = INDENT + (size_t)ival; + p->offset = mt->offset; + + return(1); +} + + +/* ARGSUSED */ +static void +post_RS(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + mt->offset = mt->lmargin = INDENT; + break; + case (MAN_HEAD): + break; + default: + term_newln(p); + p->offset = INDENT; + break; + } +} + + +static void +print_man_node(DECL_ARGS) +{ + size_t rm, rmax; + int c; + + c = 1; + + switch (n->type) { + case(MAN_TEXT): + if (0 == *n->string) { + term_vspace(p); + break; + } + + term_word(p, n->string); + + /* FIXME: this means that macro lines are munged! */ + + if (MANT_LITERAL & mt->fl) { + rm = p->rmargin; + rmax = p->maxrmargin; + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + p->flags |= TERMP_NOSPACE; + term_flushln(p); + p->rmargin = rm; + p->maxrmargin = rmax; + } + break; + default: + if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) + term_fontrepl(p, TERMFONT_NONE); + if (termacts[n->tok].pre) + c = (*termacts[n->tok].pre)(p, mt, n, m); + break; + } + + if (c && n->child) + print_man_nodelist(p, mt, n->child, m); + + if (MAN_TEXT != n->type) { + if (termacts[n->tok].post) + (*termacts[n->tok].post)(p, mt, n, m); + if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) + term_fontrepl(p, TERMFONT_NONE); + } + + if (MAN_EOS & n->flags) + p->flags |= TERMP_SENTENCE; +} + + +static void +print_man_nodelist(DECL_ARGS) +{ + + print_man_node(p, mt, n, m); + if ( ! n->next) + return; + print_man_nodelist(p, mt, n->next, m); +} + + +static void +print_man_foot(struct termp *p, const void *arg) +{ + char buf[DATESIZ]; + const struct man_meta *meta; + + meta = (const struct man_meta *)arg; + + term_fontrepl(p, TERMFONT_NONE); + + if (meta->rawdate) + strlcpy(buf, meta->rawdate, DATESIZ); + else + time2a(meta->date, buf, DATESIZ); + + term_vspace(p); + term_vspace(p); + term_vspace(p); + + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + p->rmargin = p->maxrmargin - strlen(buf); + p->offset = 0; + + if (meta->source) + term_word(p, meta->source); + if (meta->source) + term_word(p, ""); + term_flushln(p); + + p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->flags &= ~TERMP_NOBREAK; + + term_word(p, buf); + term_flushln(p); +} + + +static void +print_man_head(struct termp *p, const void *arg) +{ + char buf[BUFSIZ], title[BUFSIZ]; + size_t buflen, titlen; + const struct man_meta *m; + + m = (const struct man_meta *)arg; + + /* + * Note that old groff would spit out some spaces before the + * header. We discontinue this strange behaviour, but at one + * point we did so here. + */ + + p->rmargin = p->maxrmargin; + + p->offset = 0; + buf[0] = title[0] = '\0'; + + if (m->vol) + strlcpy(buf, m->vol, BUFSIZ); + buflen = strlen(buf); + + snprintf(title, BUFSIZ, "%s(%s)", m->title, m->msec); + titlen = strlen(title); + + p->offset = 0; + p->rmargin = 2 * (titlen+1) + buflen < p->maxrmargin ? + (p->maxrmargin - strlen(buf) + 1) / 2 : + p->maxrmargin - buflen; + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + + term_word(p, title); + term_flushln(p); + + p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->offset + buflen + titlen < p->maxrmargin ? + p->maxrmargin - titlen : p->maxrmargin; + + term_word(p, buf); + term_flushln(p); + + p->flags &= ~TERMP_NOBREAK; + if (p->rmargin + titlen <= p->maxrmargin) { + p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + term_word(p, title); + term_flushln(p); + } + + p->rmargin = p->maxrmargin; + p->offset = 0; + p->flags &= ~TERMP_NOSPACE; + + /* + * Groff likes to have some leading spaces before content. Well + * that's fine by me. + */ + + term_vspace(p); + term_vspace(p); + term_vspace(p); +} diff --git a/commands/mdocml/man_validate.c b/commands/mdocml/man_validate.c new file mode 100644 index 000000000..fb7056fe8 --- /dev/null +++ b/commands/mdocml/man_validate.c @@ -0,0 +1,328 @@ +/* $Id: man_validate.c,v 1.44 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libman.h" +#include "libmandoc.h" + +#define CHKARGS struct man *m, struct man_node *n + +typedef int (*v_check)(CHKARGS); + +struct man_valid { + v_check *pres; + v_check *posts; +}; + +static int check_bline(CHKARGS); +static int check_eq0(CHKARGS); +static int check_le1(CHKARGS); +static int check_ge2(CHKARGS); +static int check_le5(CHKARGS); +static int check_par(CHKARGS); +static int check_part(CHKARGS); +static int check_root(CHKARGS); +static int check_sec(CHKARGS); +static int check_text(CHKARGS); +static int check_title(CHKARGS); + +static v_check posts_eq0[] = { check_eq0, NULL }; +static v_check posts_th[] = { check_ge2, check_le5, check_title, NULL }; +static v_check posts_par[] = { check_par, NULL }; +static v_check posts_part[] = { check_part, NULL }; +static v_check posts_sec[] = { check_sec, NULL }; +static v_check posts_le1[] = { check_le1, NULL }; +static v_check pres_bline[] = { check_bline, NULL }; + +static const struct man_valid man_valids[MAN_MAX] = { + { NULL, posts_eq0 }, /* br */ + { pres_bline, posts_th }, /* TH */ + { pres_bline, posts_sec }, /* SH */ + { pres_bline, posts_sec }, /* SS */ + { pres_bline, posts_par }, /* TP */ + { pres_bline, posts_par }, /* LP */ + { pres_bline, posts_par }, /* PP */ + { pres_bline, posts_par }, /* P */ + { pres_bline, posts_par }, /* IP */ + { pres_bline, posts_par }, /* HP */ + { NULL, NULL }, /* SM */ + { NULL, NULL }, /* SB */ + { NULL, NULL }, /* BI */ + { NULL, NULL }, /* IB */ + { NULL, NULL }, /* BR */ + { NULL, NULL }, /* RB */ + { NULL, NULL }, /* R */ + { NULL, NULL }, /* B */ + { NULL, NULL }, /* I */ + { NULL, NULL }, /* IR */ + { NULL, NULL }, /* RI */ + { NULL, posts_eq0 }, /* na */ + { NULL, NULL }, /* i */ + { NULL, posts_le1 }, /* sp */ + { pres_bline, posts_eq0 }, /* nf */ + { pres_bline, posts_eq0 }, /* fi */ + { NULL, NULL }, /* r */ + { NULL, NULL }, /* RE */ + { NULL, posts_part }, /* RS */ + { NULL, NULL }, /* DT */ + { NULL, NULL }, /* UC */ + { NULL, NULL }, /* PD */ + { NULL, posts_le1 }, /* Sp */ + { pres_bline, posts_le1 }, /* Vb */ + { pres_bline, posts_eq0 }, /* Ve */ + { NULL, NULL }, /* AT */ +}; + + +int +man_valid_pre(struct man *m, struct man_node *n) +{ + v_check *cp; + + if (MAN_TEXT == n->type) + return(1); + if (MAN_ROOT == n->type) + return(1); + + if (NULL == (cp = man_valids[n->tok].pres)) + return(1); + for ( ; *cp; cp++) + if ( ! (*cp)(m, n)) + return(0); + return(1); +} + + +int +man_valid_post(struct man *m) +{ + v_check *cp; + + if (MAN_VALID & m->last->flags) + return(1); + m->last->flags |= MAN_VALID; + + switch (m->last->type) { + case (MAN_TEXT): + return(check_text(m, m->last)); + case (MAN_ROOT): + return(check_root(m, m->last)); + default: + break; + } + + if (NULL == (cp = man_valids[m->last->tok].posts)) + return(1); + for ( ; *cp; cp++) + if ( ! (*cp)(m, m->last)) + return(0); + + return(1); +} + + +static int +check_root(CHKARGS) +{ + + if (MAN_BLINE & m->flags) + return(man_nmsg(m, n, MANDOCERR_SCOPEEXIT)); + if (MAN_ELINE & m->flags) + return(man_nmsg(m, n, MANDOCERR_SCOPEEXIT)); + + m->flags &= ~MAN_BLINE; + m->flags &= ~MAN_ELINE; + + if (NULL == m->first->child) { + man_nmsg(m, n, MANDOCERR_NODOCBODY); + return(0); + } else if (NULL == m->meta.title) { + if ( ! man_nmsg(m, n, MANDOCERR_NOTITLE)) + return(0); + /* + * If a title hasn't been set, do so now (by + * implication, date and section also aren't set). + * + * FIXME: this should be in man_action.c. + */ + m->meta.title = mandoc_strdup("unknown"); + m->meta.date = time(NULL); + m->meta.msec = mandoc_strdup("1"); + } + + return(1); +} + + +static int +check_title(CHKARGS) +{ + const char *p; + + assert(n->child); + /* FIXME: is this sufficient? */ + if ('\0' == *n->child->string) { + man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); + return(0); + } + + for (p = n->child->string; '\0' != *p; p++) + if (isalpha((u_char)*p) && ! isupper((u_char)*p)) + if ( ! man_nmsg(m, n, MANDOCERR_UPPERCASE)) + return(0); + + return(1); +} + + +static int +check_text(CHKARGS) +{ + char *p; + int pos, c; + + assert(n->string); + + for (p = n->string, pos = n->pos + 1; *p; p++, pos++) { + if ('\\' == *p) { + c = mandoc_special(p); + if (c) { + p += c - 1; + pos += c - 1; + continue; + } + + c = man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE); + if ( ! (MAN_IGN_ESCAPE & m->pflags) && ! c) + return(c); + } + + if ('\t' == *p || isprint((u_char)*p) || ASCII_HYPH == *p) + continue; + if ( ! man_pmsg(m, n->line, pos, MANDOCERR_BADCHAR)) + return(0); + } + + return(1); +} + + +#define INEQ_DEFINE(x, ineq, name) \ +static int \ +check_##name(CHKARGS) \ +{ \ + if (n->nchild ineq (x)) \ + return(1); \ + man_vmsg(m, MANDOCERR_SYNTARGCOUNT, n->line, n->pos, \ + "line arguments %s %d (have %d)", \ + #ineq, (x), n->nchild); \ + return(0); \ +} + +INEQ_DEFINE(0, ==, eq0) +INEQ_DEFINE(1, <=, le1) +INEQ_DEFINE(2, >=, ge2) +INEQ_DEFINE(5, <=, le5) + + +static int +check_sec(CHKARGS) +{ + + if (MAN_HEAD == n->type && 0 == n->nchild) { + man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); + return(0); + } else if (MAN_BODY == n->type && 0 == n->nchild) + return(man_nmsg(m, n, MANDOCERR_NOBODY)); + + return(1); +} + + +static int +check_part(CHKARGS) +{ + + if (MAN_BODY == n->type && 0 == n->nchild) + return(man_nmsg(m, n, MANDOCERR_NOBODY)); + return(1); +} + + +static int +check_par(CHKARGS) +{ + + if (MAN_BODY == n->type) + switch (n->tok) { + case (MAN_IP): + /* FALLTHROUGH */ + case (MAN_HP): + /* FALLTHROUGH */ + case (MAN_TP): + /* Body-less lists are ok. */ + break; + default: + if (n->nchild) + break; + return(man_nmsg(m, n, MANDOCERR_NOBODY)); + } + if (MAN_HEAD == n->type) + switch (n->tok) { + case (MAN_PP): + /* FALLTHROUGH */ + case (MAN_P): + /* FALLTHROUGH */ + case (MAN_LP): + if (0 == n->nchild) + break; + return(man_nmsg(m, n, MANDOCERR_ARGSLOST)); + default: + if (n->nchild) + break; + return(man_nmsg(m, n, MANDOCERR_NOARGS)); + } + + return(1); +} + + +static int +check_bline(CHKARGS) +{ + + assert( ! (MAN_ELINE & m->flags)); + if (MAN_BLINE & m->flags) { + man_nmsg(m, n, MANDOCERR_SYNTLINESCOPE); + return(0); + } + + return(1); +} + diff --git a/commands/mdocml/mandoc.1 b/commands/mdocml/mandoc.1 new file mode 100644 index 000000000..fb22e8d73 --- /dev/null +++ b/commands/mdocml/mandoc.1 @@ -0,0 +1,530 @@ +.\" $Id: mandoc.1,v 1.63 2010/06/11 07:15:42 kristaps Exp $ +.\" +.\" Copyright (c) 2009 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: June 11 2010 $ +.Dt MANDOC 1 +.Os +.Sh NAME +.Nm mandoc +.Nd format and display UNIX manuals +.Sh SYNOPSIS +.Nm mandoc +.Op Fl V +.Op Fl f Ns Ar option +.Op Fl m Ns Ar format +.Op Fl O Ns Ar option +.Op Fl T Ns Ar output +.Op Fl W Ns Ar err +.Op Ar file... +.Sh DESCRIPTION +The +.Nm +utility formats +.Ux +manual pages for display. +The arguments are as follows: +.Bl -tag -width Ds +.It Fl f Ns Ar option +Comma-separated compiler options. +See +.Sx Compiler Options +for details. +.It Fl m Ns Ar format +Input format. +See +.Sx Input Formats +for available formats. +Defaults to +.Fl m Ns Cm andoc . +.It Fl O Ns Ar option +Comma-separated output options. +See +.Sx Output Options +for details. +.It Fl T Ns Ar output +Output format. +See +.Sx Output Formats +for available formats. +Defaults to +.Fl T Ns Cm ascii . +.It Fl V +Print version and exit. +.It Fl W Ns Ar err +Comma-separated warning options. +Use +.Fl W Ns Cm all +to print warnings, +.Fl W Ns Cm error +for warnings to be considered errors and cause utility +termination. +Multiple +.Fl W +arguments may be comma-separated, such as +.Fl W Ns Cm error , Ns Cm all . +.It Ar file +Read input from zero or more files. +If unspecified, reads from stdin. +If multiple files are specified, +.Nm +will halt with the first failed parse. +.El +.Pp +By default, +.Nm +reads +.Xr mdoc 7 +or +.Xr man 7 +text from stdin, implying +.Fl m Ns Cm andoc , +and produces +.Fl T Ns Cm ascii +output. +.Pp +.Ex -std mandoc +.Ss Input Formats +The +.Nm +utility accepts +.Xr mdoc 7 +and +.Xr man 7 +input with +.Fl m Ns Cm doc +and +.Fl m Ns Cm an , +respectively. +The +.Xr mdoc 7 +format is +.Em strongly +recommended; +.Xr man 7 +should only be used for legacy manuals. +.Pp +A third option, +.Fl m Ns Cm andoc , +which is also the default, determines encoding on-the-fly: if the first +non-comment macro is +.Sq \&Dd +or +.Sq \&Dt , +the +.Xr mdoc 7 +parser is used; otherwise, the +.Xr man 7 +parser is used. +.Pp +If multiple +files are specified with +.Fl m Ns Cm andoc , +each has its file-type determined this way. +If multiple files are +specified and +.Fl m Ns Cm doc +or +.Fl m Ns Cm an +is specified, then this format is used exclusively. +.Ss Output Formats +The +.Nm +utility accepts the following +.Fl T +arguments (see +.Sx OUTPUT ) : +.Bl -tag -width Ds +.It Fl T Ns Cm ascii +Produce 7-bit ASCII output, backspace-encoded for bold and underline +styles. +This is the default. +See +.Sx ASCII Output . +.It Fl T Ns Cm html +Produce strict HTML-4.01 output, with a sane default style. +See +.Sx HTML Output . +.It Fl T Ns Cm lint +Parse only: produce no output. +Implies +.Fl W Ns Cm all +and +.Fl f Ns Cm strict . +.It Fl T Ns Cm ps +Produce PostScript output. +See +.Sx PostScript Output . +.It Fl T Ns Cm tree +Produce an indented parse tree. +.It Fl T Ns Cm xhtml +Produce strict XHTML-1.0 output, with a sane default style. +See +.Sx XHTML Output . +.El +.Pp +If multiple input files are specified, these will be processed by the +corresponding filter in-order. +.Ss Compiler Options +Default compiler behaviour may be overridden with the +.Fl f +flag. +.Bl -tag -width Ds +.It Fl f Ns Cm ign-errors +When parsing multiple files, don't halt when one errors out. +Useful with +.Fl T Ns Cm lint +over a large set of manuals passed on the command line. +.It Fl f Ns Cm ign-escape +Ignore invalid escape sequences. +This is the default, but the option can be used to override an earlier +.Fl f Ns Cm strict . +.It Fl f Ns Cm ign-scope +When rewinding the scope of a block macro, forces the compiler to ignore +scope violations. +This can seriously mangle the resulting tree. +.Pq mdoc only +.It Fl f Ns Cm no-ign-escape +Do not ignore invalid escape sequences. +.It Fl f Ns Cm no-ign-macro +Do not ignore unknown macros at the start of input lines. +.It Fl f Ns Cm strict +Implies +.Fl f Ns Cm no-ign-escape +and +.Fl f Ns Cm no-ign-macro . +.El +.Ss Output Options +The +.Fl T Ns Ar html +and +.Fl T Ns Ar xhtml +modes accept the following output options: +.Bl -tag -width Ds +.It Fl O Ns Cm includes Ns = Ns Ar fmt +The string +.Ar fmt , +for example, +.Ar ../src/%I.html , +is used as a template for linked header files (usually via the +.Sq \&In +macro). +Instances of +.Sq \&%I +are replaced with the include filename. +The default is not to present a +hyperlink. +.It Fl O Ns Cm man Ns = Ns Ar fmt +The string +.Ar fmt , +for example, +.Ar ../html%S/%N.%S.html , +is used as a template for linked manuals (usually via the +.Sq \&Xr +macro). +Instances of +.Sq \&%N +and +.Sq %S +are replaced with the linked manual's name and section, respectively. +If no section is included, section 1 is assumed. +The default is not to +present a hyperlink. +.It Fl O Ns Cm style Ns = Ns Ar style.css +The file +.Ar style.css +is used for an external style-sheet. +This must be a valid absolute or +relative URI. +.El +.Pp +The +.Fl T Ns Ar ascii +mode accepts the following output option: +.Bl -tag -width Ds +.It Fl O Ns Cm width Ns = Ns Ar width +The output width is set to +.Ar width , +which will normalise to \(>=60. +.El +.Sh OUTPUT +This section documents output details of +.Nm . +In general, output conforms to the traditional manual style of a header, +a body composed of sections and sub-sections, and a footer. +.Pp +The text style of output characters (non-macro characters, punctuation, +and white-space) is dictated by context. +.Pp +White-space is generally stripped from input. +This can be changed with +character escapes (specified in +.Xr mandoc_char 7 ) +or literal modes (specified in +.Xr mdoc 7 +and +.Xr man 7 ) . +.Pp +If non-macro punctuation is set apart from words, such as in the phrase +.Dq to be \&, or not to be , +it's processed by +.Nm , +regardless of output format, according to the following rules: opening +punctuation +.Po +.Sq \&( , +.Sq \&[ , +and +.Sq \&{ +.Pc +is not followed by a space; closing punctuation +.Po +.Sq \&. , +.Sq \&, , +.Sq \&; , +.Sq \&: , +.Sq \&? , +.Sq \&! , +.Sq \&) , +.Sq \&] +and +.Sq \&} +.Pc +is not preceded by white-space. +.Pp +If the input is +.Xr mdoc 7 , +however, these rules are also applied to macro arguments when appropriate. +.Ss ASCII Output +Output produced by +.Fl T Ns Cm ascii , +which is the default, is rendered in standard 7-bit ASCII documented in +.Xr ascii 7 . +.Pp +Font styles are applied by using back-spaced encoding such that an +underlined character +.Sq c +is rendered as +.Sq _ Ns \e[bs] Ns c , +where +.Sq \e[bs] +is the back-space character number 8. +Emboldened characters are rendered as +.Sq c Ns \e[bs] Ns c . +.Pp +The special characters documented in +.Xr mandoc_char 7 +are rendered best-effort in an ASCII equivalent. +.Pp +Output width is limited to 78 visible columns unless literal input lines +exceed this limit. +.Ss HTML Output +Output produced by +.Fl T Ns Cm html +conforms to HTML-4.01 strict. +.Pp +Font styles and page structure are applied using CSS2. +By default, no font style is applied to any text, +although CSS2 is hard-coded to format +the basic structure of output. +.Pp +The +.Pa example.style.css +file documents the range of styles applied to output and, if used, will +cause rendered documents to appear as they do in +.Fl T Ns Cm ascii . +.Pp +Special characters are rendered in decimal-encoded UTF-8. +.Ss PostScript Output +PostScript Level 2 pages may be generated by +.Fl T Ns Cm ps . +Output pages are US-letter sized (215.9 x 279.4 mm) and rendered in +fixed, 10-point Courier font. +.Ss XHTML Output +Output produced by +.Fl T Ns Cm xhtml +conforms to XHTML-1.0 strict. +.Pp +See +.Sx HTML Output +for details; beyond generating XHTML tags instead of HTML tags, these +output modes are identical. +.Sh EXAMPLES +To page manuals to the terminal: +.Pp +.D1 $ mandoc \-Wall,error \-fstrict mandoc.1 2\*(Gt&1 | less +.D1 $ mandoc mandoc.1 mdoc.3 mdoc.7 | less +.Pp +To produce HTML manuals with +.Ar style.css +as the style-sheet: +.Pp +.D1 $ mandoc \-Thtml -Ostyle=style.css mdoc.7 \*(Gt mdoc.7.html +.Pp +To check over a large set of manuals: +.Pp +.Dl $ mandoc \-Tlint \-fign-errors `find /usr/src -name \e*\e.[1-9]` +.Sh COMPATIBILITY +This section summarises +.Nm +compatibility with +.Xr groff 1 . +Each input and output format is separately noted. +.Ss ASCII Compatibility +.Bl -bullet -compact +.It +The +.Sq \e~ +special character doesn't produce expected behaviour in +.Fl T Ns Cm ascii . +.It +The +.Sq \&Bd \-literal +and +.Sq \&Bd \-unfilled +macros of +.Xr mdoc 7 +in +.Fl T Ns Cm ascii +are synonyms, as are \-filled and \-ragged. +.It +In +.Xr groff 1 , +the +.Sq \&Pa +.Xr mdoc 7 +macro does not underline when scoped under an +.Sq \&It +in the FILES section. +This behaves correctly in +.Nm . +.It +A list or display following the +.Sq \&Ss +.Xr mdoc 7 +macro in +.Fl T Ns Cm ascii +does not assert a prior vertical break, just as it doesn't with +.Sq \&Sh . +.It +The +.Sq \&na +.Xr man 7 +macro in +.Fl T Ns Cm ascii +has no effect. +.It +Words aren't hyphenated. +.It +In normal mode (not a literal block), blocks of spaces aren't preserved, +so double spaces following sentence closure are reduced to a single space; +.Xr groff 1 +retains spaces. +.It +Sentences are unilaterally monospaced. +.El +.Ss HTML/XHTML Compatibility +.Bl -bullet -compact +.It +The +.Sq \efP +escape will revert the font to the previous +.Sq \ef +escape, not to the last rendered decoration, which is now dictated by +CSS instead of hard-coded. +It also will not span past the current scope, +for the same reason. +Note that in +.Sx ASCII Output +mode, this will work fine. +.It +The +.Xr mdoc 7 +.Sq \&Bl \-hang +and +.Sq \&Bl \-tag +list types render similarly (no break following overreached left-hand +side) due to the expressive constraints of HTML. +.It +The +.Xr man 7 +.Sq IP +and +.Sq TP +lists render similarly. +.El +.Sh SEE ALSO +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.Sh CAVEATS +The +.Fl T Ns Cm html +and +.Fl T Ns Cm xhtml +CSS2 styling used for +.Fl m Ns Cm doc +input lists does not render properly in older browsers, such as Internet +Explorer 6 and earlier. +.Pp +In +.Fl T Ns Cm html +and +.Fl T Ns Cm xhtml , +the maximum size of an element attribute is determined by +.Dv BUFSIZ , +which is usually 1024 bytes. +Be aware of this when setting long link +formats such as +.Fl O Ns Cm style Ns = Ns Ar really/long/link . +.Pp +The +.Fl T Ns Cm html +and +.Fl T Ns Cm xhtml +output modes don't render the +.Sq \es +font size escape documented in +.Xr mdoc 7 +and +.Xr man 7 . +.Pp +Nesting elements within next-line element scopes of +.Fl m Ns Cm an , +such as +.Sq br +within an empty +.Sq B , +will confuse +.Fl T Ns Cm html +and +.Fl T Ns Cm xhtml +and cause them to forget the formatting of the prior next-line scope. +.Pp +The +.Sq i +macro in +.Fl m Ns Cm an +should italicise all subsequent text if a line argument is not provided. +This behaviour is not implemented. +The +.Sq \(aq +control character is an alias for the standard macro control character +and does not emit a line-break as stipulated in GNU troff. diff --git a/commands/mdocml/mandoc.c b/commands/mdocml/mandoc.c new file mode 100644 index 000000000..a923a3686 --- /dev/null +++ b/commands/mdocml/mandoc.c @@ -0,0 +1,396 @@ +/* $Id: mandoc.c,v 1.19 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmandoc.h" + +static int a2time(time_t *, const char *, const char *); +static int spec_norm(char *, int); + + +/* + * "Normalise" a special string by converting its ASCII_HYPH entries + * into actual hyphens. + */ +static int +spec_norm(char *p, int sz) +{ + int i; + + for (i = 0; i < sz; i++) + if (ASCII_HYPH == p[i]) + p[i] = '-'; + + return(sz); +} + + +int +mandoc_special(char *p) +{ + int terminator; /* Terminator for \s. */ + int lim; /* Limit for N in \s. */ + int c, i; + char *sv; + + sv = p; + + if ('\\' != *p++) + return(spec_norm(sv, 0)); + + switch (*p) { + case ('\''): + /* FALLTHROUGH */ + case ('`'): + /* FALLTHROUGH */ + case ('q'): + /* FALLTHROUGH */ + case (ASCII_HYPH): + /* FALLTHROUGH */ + case ('-'): + /* FALLTHROUGH */ + case ('~'): + /* FALLTHROUGH */ + case ('^'): + /* FALLTHROUGH */ + case ('%'): + /* FALLTHROUGH */ + case ('0'): + /* FALLTHROUGH */ + case (' '): + /* FALLTHROUGH */ + case ('}'): + /* FALLTHROUGH */ + case ('|'): + /* FALLTHROUGH */ + case ('&'): + /* FALLTHROUGH */ + case ('.'): + /* FALLTHROUGH */ + case (':'): + /* FALLTHROUGH */ + case ('c'): + /* FALLTHROUGH */ + case ('e'): + return(spec_norm(sv, 2)); + case ('s'): + if ('\0' == *++p) + return(spec_norm(sv, 2)); + + c = 2; + terminator = 0; + lim = 1; + + if (*p == '\'') { + lim = 0; + terminator = 1; + ++p; + ++c; + } else if (*p == '[') { + lim = 0; + terminator = 2; + ++p; + ++c; + } else if (*p == '(') { + lim = 2; + terminator = 3; + ++p; + ++c; + } + + if (*p == '+' || *p == '-') { + ++p; + ++c; + } + + if (*p == '\'') { + if (terminator) + return(spec_norm(sv, 0)); + lim = 0; + terminator = 1; + ++p; + ++c; + } else if (*p == '[') { + if (terminator) + return(spec_norm(sv, 0)); + lim = 0; + terminator = 2; + ++p; + ++c; + } else if (*p == '(') { + if (terminator) + return(spec_norm(sv, 0)); + lim = 2; + terminator = 3; + ++p; + ++c; + } + + /* TODO: needs to handle floating point. */ + + if ( ! isdigit((u_char)*p)) + return(spec_norm(sv, 0)); + + for (i = 0; isdigit((u_char)*p); i++) { + if (lim && i >= lim) + break; + ++p; + ++c; + } + + if (terminator && terminator < 3) { + if (1 == terminator && *p != '\'') + return(spec_norm(sv, 0)); + if (2 == terminator && *p != ']') + return(spec_norm(sv, 0)); + ++p; + ++c; + } + + return(spec_norm(sv, c)); + case ('f'): + /* FALLTHROUGH */ + case ('F'): + /* FALLTHROUGH */ + case ('*'): + if ('\0' == *++p || isspace((u_char)*p)) + return(spec_norm(sv, 0)); + switch (*p) { + case ('('): + if ('\0' == *++p || isspace((u_char)*p)) + return(spec_norm(sv, 0)); + return(spec_norm(sv, 4)); + case ('['): + for (c = 3, p++; *p && ']' != *p; p++, c++) + if (isspace((u_char)*p)) + break; + return(spec_norm(sv, *p == ']' ? c : 0)); + default: + break; + } + return(spec_norm(sv, 3)); + case ('('): + if ('\0' == *++p || isspace((u_char)*p)) + return(spec_norm(sv, 0)); + if ('\0' == *++p || isspace((u_char)*p)) + return(spec_norm(sv, 0)); + return(spec_norm(sv, 4)); + case ('['): + break; + default: + return(spec_norm(sv, 0)); + } + + for (c = 3, p++; *p && ']' != *p; p++, c++) + if (isspace((u_char)*p)) + break; + + return(spec_norm(sv, *p == ']' ? c : 0)); +} + + +void * +mandoc_calloc(size_t num, size_t size) +{ + void *ptr; + + ptr = calloc(num, size); + if (NULL == ptr) { + perror(NULL); + exit(EXIT_FAILURE); + } + + return(ptr); +} + + +void * +mandoc_malloc(size_t size) +{ + void *ptr; + + ptr = malloc(size); + if (NULL == ptr) { + perror(NULL); + exit(EXIT_FAILURE); + } + + return(ptr); +} + + +void * +mandoc_realloc(void *ptr, size_t size) +{ + + ptr = realloc(ptr, size); + if (NULL == ptr) { + perror(NULL); + exit(EXIT_FAILURE); + } + + return(ptr); +} + + +char * +mandoc_strdup(const char *ptr) +{ + char *p; + + p = strdup(ptr); + if (NULL == p) { + perror(NULL); + exit(EXIT_FAILURE); + } + + return(p); +} + + +static int +a2time(time_t *t, const char *fmt, const char *p) +{ + struct tm tm; + char *pp; + + memset(&tm, 0, sizeof(struct tm)); + + pp = strptime(p, fmt, &tm); + if (NULL != pp && '\0' == *pp) { + *t = mktime(&tm); + return(1); + } + + return(0); +} + + +/* + * Convert from a manual date string (see mdoc(7) and man(7)) into a + * date according to the stipulated date type. + */ +time_t +mandoc_a2time(int flags, const char *p) +{ + time_t t; + + if (MTIME_MDOCDATE & flags) { + if (0 == strcmp(p, "$" "Mdocdate$")) + return(time(NULL)); + if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p)) + return(t); + } + + if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags) + if (a2time(&t, "%b %d, %Y", p)) + return(t); + + if (MTIME_ISO_8601 & flags) + if (a2time(&t, "%Y-%m-%d", p)) + return(t); + + if (MTIME_REDUCED & flags) { + if (a2time(&t, "%d, %Y", p)) + return(t); + if (a2time(&t, "%Y", p)) + return(t); + } + + return(0); +} + + +int +mandoc_eos(const char *p, size_t sz) +{ + + if (0 == sz) + return(0); + + /* + * End-of-sentence recognition must include situations where + * some symbols, such as `)', allow prior EOS punctuation to + * propogate outward. + */ + + for ( ; sz; sz--) { + switch (p[(int)sz - 1]) { + case ('\"'): + /* FALLTHROUGH */ + case ('\''): + /* FALLTHROUGH */ + case (']'): + /* FALLTHROUGH */ + case (')'): + break; + case ('.'): + /* Escaped periods. */ + if (sz > 1 && '\\' == p[(int)sz - 2]) + return(0); + /* FALLTHROUGH */ + case ('!'): + /* FALLTHROUGH */ + case ('?'): + return(1); + default: + return(0); + } + } + + return(0); +} + + +int +mandoc_hyph(const char *start, const char *c) +{ + + /* + * Choose whether to break at a hyphenated character. We only + * do this if it's free-standing within a word. + */ + + /* Skip first/last character of buffer. */ + if (c == start || '\0' == *(c + 1)) + return(0); + /* Skip first/last character of word. */ + if ('\t' == *(c + 1) || '\t' == *(c - 1)) + return(0); + if (' ' == *(c + 1) || ' ' == *(c - 1)) + return(0); + /* Skip double invocations. */ + if ('-' == *(c + 1) || '-' == *(c - 1)) + return(0); + /* Skip escapes. */ + if ('\\' == *(c - 1)) + return(0); + + return(1); +} diff --git a/commands/mdocml/mandoc.h b/commands/mdocml/mandoc.h new file mode 100644 index 000000000..2fc74676c --- /dev/null +++ b/commands/mdocml/mandoc.h @@ -0,0 +1,110 @@ +/* $Id: mandoc.h,v 1.12 2010/06/12 11:41:50 kristaps Exp $ */ +/* + * Copyright (c) 2010 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MANDOC_H +#define MANDOC_H + +#define ASCII_NBRSP 31 /* non-breaking space */ +#define ASCII_HYPH 30 /* breakable hyphen */ + + +__BEGIN_DECLS + +enum mandocerr { + MANDOCERR_OK, + MANDOCERR_UPPERCASE, /* text should be uppercase */ + MANDOCERR_SECOOO, /* sections out of conventional order */ + MANDOCERR_SECREP, /* section name repeats */ + MANDOCERR_PROLOGOOO, /* out of order prologue */ + MANDOCERR_PROLOGREP, /* repeated prologue entry */ + MANDOCERR_LISTFIRST, /* list type must come first */ + MANDOCERR_BADSTANDARD, /* bad standard */ + MANDOCERR_BADLIB, /* bad library */ + MANDOCERR_BADESCAPE, /* bad escape sequence */ + MANDOCERR_BADQUOTE, /* unterminated quoted string */ + MANDOCERR_NOWIDTHARG, /* argument requires the width argument */ + /* FIXME: merge with MANDOCERR_IGNARGV. */ + MANDOCERR_WIDTHARG, /* superfluous width argument */ + MANDOCERR_IGNARGV, /* macro ignoring argv */ + MANDOCERR_BADDATE, /* bad date argument */ + MANDOCERR_BADWIDTH, /* bad width argument */ + MANDOCERR_BADMSEC, /* unknown manual section */ + MANDOCERR_SECMSEC, /* section not in conventional manual section */ + MANDOCERR_EOLNSPACE, /* end of line whitespace */ + MANDOCERR_SCOPEEXIT, /* scope open on exit */ +#define MANDOCERR_WARNING MANDOCERR_SCOPEEXIT + + MANDOCERR_NAMESECFIRST, /* NAME section must come first */ + MANDOCERR_BADBOOL, /* bad Boolean value */ + MANDOCERR_CHILD, /* child violates parent syntax */ + MANDOCERR_BADATT, /* bad AT&T symbol */ + MANDOCERR_LISTREP, /* list type repeated */ + MANDOCERR_DISPREP, /* display type repeated */ + MANDOCERR_ARGVREP, /* argument repeated */ + MANDOCERR_NONAME, /* manual name not yet set */ + MANDOCERR_MACROOBS, /* obsolete macro ignored */ + MANDOCERR_MACROEMPTY, /* empty macro ignored */ + MANDOCERR_BADBODY, /* macro not allowed in body */ + MANDOCERR_BADPROLOG, /* macro not allowed in prologue */ + MANDOCERR_BADCHAR, /* bad character */ + MANDOCERR_BADNAMESEC, /* bad NAME section contents */ + MANDOCERR_NOBLANKLN, /* no blank lines */ + MANDOCERR_NOTEXT, /* no text in this context */ + MANDOCERR_BADCOMMENT, /* bad comment style */ + MANDOCERR_MACRO, /* unknown macro will be lost */ + MANDOCERR_LINESCOPE, /* line scope broken */ + MANDOCERR_SCOPE, /* scope broken */ + MANDOCERR_ARGCOUNT, /* argument count wrong */ + MANDOCERR_NOSCOPE, /* request scope close w/none open */ + MANDOCERR_SCOPEREP, /* scope already open */ + /* FIXME: merge following with MANDOCERR_ARGCOUNT */ + MANDOCERR_NOARGS, /* macro requires line argument(s) */ + MANDOCERR_NOBODY, /* macro requires body argument(s) */ + MANDOCERR_NOARGV, /* macro requires argument(s) */ + MANDOCERR_NOTITLE, /* no title in document */ + MANDOCERR_LISTTYPE, /* missing list type */ + MANDOCERR_DISPTYPE, /* missing display type */ + MANDOCERR_ARGSLOST, /* line argument(s) will be lost */ + MANDOCERR_BODYLOST, /* body argument(s) will be lost */ +#define MANDOCERR_ERROR MANDOCERR_BODYLOST + + MANDOCERR_COLUMNS, /* column syntax is inconsistent */ + /* FIXME: this should be a MANDOCERR_ERROR */ + MANDOCERR_FONTTYPE, /* missing font type */ + /* FIXME: this should be a MANDOCERR_ERROR */ + MANDOCERR_NESTEDDISP, /* displays may not be nested */ + MANDOCERR_BADDISP, /* unsupported display type */ + MANDOCERR_SYNTNOSCOPE, /* request scope close w/none open */ + MANDOCERR_SYNTSCOPE, /* scope broken, syntax violated */ + MANDOCERR_SYNTLINESCOPE, /* line scope broken, syntax violated */ + MANDOCERR_SYNTARGVCOUNT, /* argument count wrong, violates syntax */ + MANDOCERR_SYNTCHILD, /* child violates parent syntax */ + MANDOCERR_SYNTARGCOUNT, /* argument count wrong, violates syntax */ + MANDOCERR_NODOCBODY, /* no document body */ + MANDOCERR_NODOCPROLOG, /* no document prologue */ + MANDOCERR_UTSNAME, /* utsname() system call failed */ + MANDOCERR_MEM, /* memory exhausted */ +#define MANDOCERR_FATAL MANDOCERR_MEM + + MANDOCERR_MAX +}; + +typedef int (*mandocmsg)(enum mandocerr, + void *, int, int, const char *); + +__END_DECLS + +#endif /*!MANDOC_H*/ diff --git a/commands/mdocml/mandoc_char.7 b/commands/mdocml/mandoc_char.7 new file mode 100644 index 000000000..c01826589 --- /dev/null +++ b/commands/mdocml/mandoc_char.7 @@ -0,0 +1,559 @@ +.\" $Id: mandoc_char.7,v 1.39 2010/05/12 08:29:23 kristaps Exp $ +.\" +.\" Copyright (c) 2009 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 12 2010 $ +.Dt MANDOC_CHAR 7 +.Os +.Sh NAME +.Nm mandoc_char +.Nd mandoc special characters +.Sh DESCRIPTION +This page documents the special characters and predefined strings accepted by +.Xr mandoc 1 +to format +.Xr mdoc 7 +and +.Xr man 7 +documents. +.Pp +Both +.Xr mdoc 7 +and +.Xr man 7 +encode special characters with +.Sq \eX +.Pq for a one-character escape , +.Sq \e(XX +.Pq two-character , +and +.Sq \e[N] +.Pq N-character . +One may generalise +.Sq \e(XX +as +.Sq \e[XX] +and +.Sq \eX +as +.Sq \e[X] . +Predefined strings are functionally similar to special characters, using +.Sq \e*X +.Pq for a one-character escape , +.Sq \e*(XX +.Pq two-character , +and +.Sq \e*[N] +.Pq N-character . +One may generalise +.Sq \e*(XX +as +.Sq \e*[XX] +and +.Sq \e*X +as +.Sq \e*[X] . +.Pp +Note that each output mode will have a different rendering of the +characters. +It's guaranteed that each input symbol will correspond to a +(more or less) meaningful output rendering, regardless the mode. +.Sh SPECIAL CHARACTERS +These are the preferred input symbols for producing special characters. +.Pp +Spacing: +.Bl -column -compact -offset indent "Input" "Description" +.It Em Input Ta Em Description +.It \e~ Ta non-breaking, non-collapsing space +.It \e Ta breaking, non-collapsing n-width space +.It \e^ Ta zero-width space +.It \e% Ta zero-width space +.It \e& Ta zero-width space +.It \e| Ta zero-width space +.It \e0 Ta breaking, non-collapsing digit-width space +.It \ec Ta removes any trailing space (if applicable) +.El +.Pp +Lines: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(ba Ta \(ba Ta bar +.It \e(br Ta \(br Ta box rule +.It \e(ul Ta \(ul Ta underscore +.It \e(rl Ta \(rl Ta overline +.It \e(bb Ta \(bb Ta broken bar +.It \e(sl Ta \(sl Ta forward slash +.It \e(rs Ta \(rs Ta backward slash +.El +.Pp +Text markers: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(ci Ta \(ci Ta circle +.It \e(bu Ta \(bu Ta bullet +.It \e(dd Ta \(dd Ta double dagger +.It \e(dg Ta \(dg Ta dagger +.It \e(lz Ta \(lz Ta lozenge +.It \e(sq Ta \(sq Ta white square +.It \e(ps Ta \(ps Ta paragraph +.It \e(sc Ta \(sc Ta section +.It \e(lh Ta \(lh Ta left hand +.It \e(rh Ta \(rh Ta right hand +.It \e(at Ta \(at Ta at +.It \e(sh Ta \(sh Ta hash (pound) +.It \e(CR Ta \(CR Ta carriage return +.It \e(OK Ta \(OK Ta check mark +.El +.Pp +Legal symbols: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(co Ta \(co Ta copyright +.It \e(rg Ta \(rg Ta registered +.It \e(tm Ta \(tm Ta trademarked +.El +.Pp +Punctuation: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(em Ta \(em Ta em-dash +.It \e(en Ta \(en Ta en-dash +.It \e(hy Ta \(hy Ta hyphen +.It \ee Ta \e Ta back-slash +.It \e. Ta \. Ta period +.It \e(r! Ta \(r! Ta upside-down exclamation +.It \e(r? Ta \(r? Ta upside-down question +.El +.Pp +Quotes: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(Bq Ta \(Bq Ta right low double-quote +.It \e(bq Ta \(bq Ta right low single-quote +.It \e(lq Ta \(lq Ta left double-quote +.It \e(rq Ta \(rq Ta right double-quote +.It \e(oq Ta \(oq Ta left single-quote +.It \e(cq Ta \(cq Ta right single-quote +.It \e(aq Ta \(aq Ta apostrophe quote (text) +.It \e(dq Ta \(dq Ta double quote (text) +.It \e(Fo Ta \(Fo Ta left guillemet +.It \e(Fc Ta \(Fc Ta right guillemet +.It \e(fo Ta \(fo Ta left single guillemet +.It \e(fc Ta \(fc Ta right single guillemet +.El +.Pp +Brackets: +.Bl -column -compact -offset indent "xxbracketrightbpx" Rendered Description +.It Em Input Ta Em Rendered Ta Em Description +.It \e(lB Ta \(lB Ta left bracket +.It \e(rB Ta \(rB Ta right bracket +.It \e(lC Ta \(lC Ta left brace +.It \e(rC Ta \(rC Ta right brace +.It \e(la Ta \(la Ta left angle +.It \e(ra Ta \(ra Ta right angle +.It \e(bv Ta \(bv Ta brace extension +.It \e[braceex] Ta \[braceex] Ta brace extension +.It \e[bracketlefttp] Ta \[bracketlefttp] Ta top-left hooked bracket +.It \e[bracketleftbp] Ta \[bracketleftbp] Ta bottom-left hooked bracket +.It \e[bracketleftex] Ta \[bracketleftex] Ta left hooked bracket extension +.It \e[bracketrighttp] Ta \[bracketrighttp] Ta top-right hooked bracket +.It \e[bracketrightbp] Ta \[bracketrightbp] Ta bottom-right hooked bracket +.It \e[bracketrightex] Ta \[bracketrightex] Ta right hooked bracket extension +.It \e(lt Ta \(lt Ta top-left hooked brace +.It \e[bracelefttp] Ta \[bracelefttp] Ta top-left hooked brace +.It \e(lk Ta \(lk Ta mid-left hooked brace +.It \e[braceleftmid] Ta \[braceleftmid] Ta mid-left hooked brace +.It \e(lb Ta \(lb Ta bottom-left hooked brace +.It \e[braceleftbp] Ta \[braceleftbp] Ta bottom-left hooked brace +.It \e[braceleftex] Ta \[braceleftex] Ta left hooked brace extension +.It \e(rt Ta \(rt Ta top-left hooked brace +.It \e[bracerighttp] Ta \[bracerighttp] Ta top-right hooked brace +.It \e(rk Ta \(rk Ta mid-right hooked brace +.It \e[bracerightmid] Ta \[bracerightmid] Ta mid-right hooked brace +.It \e(rb Ta \(rb Ta bottom-right hooked brace +.It \e[bracerightbp] Ta \[bracerightbp] Ta bottom-right hooked brace +.It \e[bracerightex] Ta \[bracerightex] Ta right hooked brace extension +.It \e[parenlefttp] Ta \[parenlefttp] Ta top-left hooked parenthesis +.It \e[parenleftbp] Ta \[parenleftbp] Ta bottom-left hooked parenthesis +.It \e[parenleftex] Ta \[parenleftex] Ta left hooked parenthesis extension +.It \e[parenrighttp] Ta \[parenrighttp] Ta top-right hooked parenthesis +.It \e[parenrightbp] Ta \[parenrightbp] Ta bottom-right hooked parenthesis +.It \e[parenrightex] Ta \[parenrightex] Ta right hooked parenthesis extension +.El +.Pp +Arrows: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(<- Ta \(<- Ta left arrow +.It \e(-> Ta \(-> Ta right arrow +.It \e(<> Ta \(<> Ta left-right arrow +.It \e(da Ta \(da Ta down arrow +.It \e(ua Ta \(ua Ta up arrow +.It \e(va Ta \(va Ta up-down arrow +.It \e(lA Ta \(lA Ta left double-arrow +.It \e(rA Ta \(rA Ta right double-arrow +.It \e(hA Ta \(hA Ta left-right double-arrow +.It \e(uA Ta \(uA Ta up double-arrow +.It \e(dA Ta \(dA Ta down double-arrow +.It \e(vA Ta \(vA Ta up-down double-arrow +.El +.Pp +Logical: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(AN Ta \(AN Ta logical and +.It \e(OR Ta \(OR Ta logical or +.It \e(no Ta \(no Ta logical not +.It \e[tno] Ta \[tno] Ta logical not (text) +.It \e(te Ta \(te Ta existential quantifier +.It \e(fa Ta \(fa Ta universal quantifier +.It \e(st Ta \(st Ta such that +.It \e(tf Ta \(tf Ta therefore +.It \e(3d Ta \(3d Ta therefore +.It \e(or Ta \(or Ta bitwise or +.El +.Pp +Mathematical: +.Bl -column -compact -offset indent "xxcoproductxx" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(pl Ta \(pl Ta plus +.It \e(mi Ta \(mi Ta minus +.It \e- Ta \- Ta minus (text) +.It \e(-+ Ta \(-+ Ta minus-plus +.It \e(+- Ta \(+- Ta plus-minus +.It \e[t+-] Ta \[t+-] Ta plus-minus (text) +.It \e(pc Ta \(pc Ta centre-dot +.It \e(mu Ta \(mu Ta multiply +.It \e[tmu] Ta \[tmu] Ta multiply (text) +.It \e(c* Ta \(c* Ta circle-multiply +.It \e(c+ Ta \(c+ Ta circle-plus +.It \e(di Ta \(di Ta divide +.It \e[tdi] Ta \[tdi] Ta divide (text) +.It \e(f/ Ta \(f/ Ta fraction +.It \e(** Ta \(** Ta asterisk +.It \e(<= Ta \(<= Ta less-than-equal +.It \e(>= Ta \(>= Ta greater-than-equal +.It \e(<< Ta \(<< Ta much less +.It \e(>> Ta \(>> Ta much greater +.It \e(eq Ta \(eq Ta equal +.It \e(!= Ta \(!= Ta not equal +.It \e(== Ta \(== Ta equivalent +.It \e(ne Ta \(ne Ta not equivalent +.It \e(=~ Ta \(=~ Ta congruent +.It \e(-~ Ta \(-~ Ta asymptotically congruent +.It \e(ap Ta \(ap Ta asymptotically similar +.It \e(~~ Ta \(~~ Ta approximately similar +.It \e(~= Ta \(~= Ta approximately equal +.It \e(pt Ta \(pt Ta proportionate +.It \e(es Ta \(es Ta empty set +.It \e(mo Ta \(mo Ta element +.It \e(nm Ta \(nm Ta not element +.It \e(sb Ta \(sb Ta proper subset +.It \e(nb Ta \(nb Ta not subset +.It \e(sp Ta \(sp Ta proper superset +.It \e(nc Ta \(nc Ta not superset +.It \e(ib Ta \(ib Ta reflexive subset +.It \e(ip Ta \(ip Ta reflexive superset +.It \e(ca Ta \(ca Ta intersection +.It \e(cu Ta \(cu Ta union +.It \e(/_ Ta \(/_ Ta angle +.It \e(pp Ta \(pp Ta perpendicular +.It \e(is Ta \(is Ta integral +.It \e[integral] Ta \[integral] Ta integral +.It \e[sum] Ta \[sum] Ta summation +.It \e[product] Ta \[product] Ta product +.It \e[coproduct] Ta \[coproduct] Ta coproduct +.It \e(gr Ta \(gr Ta gradient +.It \e(sr Ta \(sr Ta square root +.It \e[sqrt] Ta \[sqrt] Ta square root +.It \e(lc Ta \(lc Ta left-ceiling +.It \e(rc Ta \(rc Ta right-ceiling +.It \e(lf Ta \(lf Ta left-floor +.It \e(rf Ta \(rf Ta right-floor +.It \e(if Ta \(if Ta infinity +.It \e(Ah Ta \(Ah Ta aleph +.It \e(Im Ta \(Im Ta imaginary +.It \e(Re Ta \(Re Ta real +.It \e(pd Ta \(pd Ta partial differential +.It \e(-h Ta \(-h Ta Planck constant over 2\(*p +.El +.Pp +Ligatures: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(ff Ta \(ff Ta ff ligature +.It \e(fi Ta \(fi Ta fi ligature +.It \e(fl Ta \(fl Ta fl ligature +.It \e(Fi Ta \(Fi Ta ffi ligature +.It \e(Fl Ta \(Fl Ta ffl ligature +.It \e(AE Ta \(AE Ta AE +.It \e(ae Ta \(ae Ta ae +.It \e(OE Ta \(OE Ta OE +.It \e(oe Ta \(oe Ta oe +.It \e(ss Ta \(ss Ta German eszett +.It \e(IJ Ta \(IJ Ta IJ ligature +.It \e(ij Ta \(ij Ta ij ligature +.El +.Pp +Accents: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(a" Ta \(a" Ta Hungarian umlaut +.It \e(a- Ta \(a- Ta macron +.It \e(a. Ta \(a. Ta dotted +.It \e(a^ Ta \(a^ Ta circumflex +.It \e(aa Ta \(aa Ta acute +.It \e' Ta \' Ta acute +.It \e(ga Ta \(ga Ta grave +.It \e` Ta \` Ta grave +.It \e(ab Ta \(ab Ta breve +.It \e(ac Ta \(ac Ta cedilla +.It \e(ad Ta \(ad Ta dieresis +.It \e(ah Ta \(ah Ta caron +.It \e(ao Ta \(ao Ta ring +.It \e(a~ Ta \(a~ Ta tilde +.It \e(ho Ta \(ho Ta ogonek +.It \e(ha Ta \(ha Ta hat (text) +.It \e(ti Ta \(ti Ta tilde (text) +.El +.Pp +Accented letters: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e('A Ta \('A Ta acute A +.It \e('E Ta \('E Ta acute E +.It \e('I Ta \('I Ta acute I +.It \e('O Ta \('O Ta acute O +.It \e('U Ta \('U Ta acute U +.It \e('a Ta \('a Ta acute a +.It \e('e Ta \('e Ta acute e +.It \e('i Ta \('i Ta acute i +.It \e('o Ta \('o Ta acute o +.It \e('u Ta \('u Ta acute u +.It \e(`A Ta \(`A Ta grave A +.It \e(`E Ta \(`E Ta grave E +.It \e(`I Ta \(`I Ta grave I +.It \e(`O Ta \(`O Ta grave O +.It \e(`U Ta \(`U Ta grave U +.It \e(`a Ta \(`a Ta grave a +.It \e(`e Ta \(`e Ta grave e +.It \e(`i Ta \(`i Ta grave i +.It \e(`o Ta \(`i Ta grave o +.It \e(`u Ta \(`u Ta grave u +.It \e(~A Ta \(~A Ta tilde A +.It \e(~N Ta \(~N Ta tilde N +.It \e(~O Ta \(~O Ta tilde O +.It \e(~a Ta \(~a Ta tilde a +.It \e(~n Ta \(~n Ta tilde n +.It \e(~o Ta \(~o Ta tilde o +.It \e(:A Ta \(:A Ta dieresis A +.It \e(:E Ta \(:E Ta dieresis E +.It \e(:I Ta \(:I Ta dieresis I +.It \e(:O Ta \(:O Ta dieresis O +.It \e(:U Ta \(:U Ta dieresis U +.It \e(:a Ta \(:a Ta dieresis a +.It \e(:e Ta \(:e Ta dieresis e +.It \e(:i Ta \(:i Ta dieresis i +.It \e(:o Ta \(:o Ta dieresis o +.It \e(:u Ta \(:u Ta dieresis u +.It \e(:y Ta \(:y Ta dieresis y +.It \e(^A Ta \(^A Ta circumflex A +.It \e(^E Ta \(^E Ta circumflex E +.It \e(^I Ta \(^I Ta circumflex I +.It \e(^O Ta \(^O Ta circumflex O +.It \e(^U Ta \(^U Ta circumflex U +.It \e(^a Ta \(^a Ta circumflex a +.It \e(^e Ta \(^e Ta circumflex e +.It \e(^i Ta \(^i Ta circumflex i +.It \e(^o Ta \(^o Ta circumflex o +.It \e(^u Ta \(^u Ta circumflex u +.It \e(,C Ta \(,C Ta cedilla C +.It \e(,c Ta \(,c Ta cedilla c +.It \e(/L Ta \(/L Ta stroke L +.It \e(/l Ta \(/l Ta stroke l +.It \e(/O Ta \(/O Ta stroke O +.It \e(/o Ta \(/o Ta stroke o +.It \e(oA Ta \(oA Ta ring A +.It \e(oa Ta \(oa Ta ring a +.El +.Pp +Special letters: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(-D Ta \(-D Ta Eth +.It \e(Sd Ta \(Sd Ta eth +.It \e(TP Ta \(TP Ta Thorn +.It \e(Tp Ta \(Tp Ta thorn +.It \e(.i Ta \(.i Ta dotless i +.It \e(.j Ta \(.j Ta dotless j +.El +.Pp +Currency: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(Do Ta \(Do Ta dollar +.It \e(ct Ta \(ct Ta cent +.It \e(Eu Ta \(Eu Ta Euro symbol +.It \e(eu Ta \(eu Ta Euro symbol +.It \e(Ye Ta \(Ye Ta yen +.It \e(Po Ta \(Po Ta pound +.It \e(Cs Ta \(Cs Ta Scandinavian +.It \e(Fn Ta \(Fn Ta florin +.El +.Pp +Units: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(de Ta \(de Ta degree +.It \e(%0 Ta \(%0 Ta per-thousand +.It \e(fm Ta \(fm Ta minute +.It \e(sd Ta \(sd Ta second +.It \e(mc Ta \(mc Ta micro +.El +.Pp +Greek letters: +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e(*A Ta \(*A Ta Alpha +.It \e(*B Ta \(*B Ta Beta +.It \e(*G Ta \(*G Ta Gamma +.It \e(*D Ta \(*D Ta Delta +.It \e(*E Ta \(*E Ta Epsilon +.It \e(*Z Ta \(*Z Ta Zeta +.It \e(*Y Ta \(*Y Ta Eta +.It \e(*H Ta \(*H Ta Theta +.It \e(*I Ta \(*I Ta Iota +.It \e(*K Ta \(*K Ta Kappa +.It \e(*L Ta \(*L Ta Lambda +.It \e(*M Ta \(*M Ta Mu +.It \e(*N Ta \(*N Ta Nu +.It \e(*C Ta \(*C Ta Xi +.It \e(*O Ta \(*O Ta Omicron +.It \e(*P Ta \(*P Ta Pi +.It \e(*R Ta \(*R Ta Rho +.It \e(*S Ta \(*S Ta Sigma +.It \e(*T Ta \(*T Ta Tau +.It \e(*U Ta \(*U Ta Upsilon +.It \e(*F Ta \(*F Ta Phi +.It \e(*X Ta \(*X Ta Chi +.It \e(*Q Ta \(*Q Ta Psi +.It \e(*W Ta \(*W Ta Omega +.It \e(*a Ta \(*a Ta alpha +.It \e(*b Ta \(*b Ta beta +.It \e(*g Ta \(*g Ta gamma +.It \e(*d Ta \(*d Ta delta +.It \e(*e Ta \(*e Ta epsilon +.It \e(*z Ta \(*z Ta zeta +.It \e(*y Ta \(*y Ta eta +.It \e(*h Ta \(*h Ta theta +.It \e(*i Ta \(*i Ta iota +.It \e(*k Ta \(*k Ta kappa +.It \e(*l Ta \(*l Ta lambda +.It \e(*m Ta \(*m Ta mu +.It \e(*n Ta \(*n Ta nu +.It \e(*c Ta \(*c Ta xi +.It \e(*o Ta \(*o Ta omicron +.It \e(*p Ta \(*p Ta pi +.It \e(*r Ta \(*r Ta rho +.It \e(*s Ta \(*s Ta sigma +.It \e(*t Ta \(*t Ta tau +.It \e(*u Ta \(*u Ta upsilon +.It \e(*f Ta \(*f Ta phi +.It \e(*x Ta \(*x Ta chi +.It \e(*q Ta \(*q Ta psi +.It \e(*w Ta \(*w Ta omega +.It \e(+h Ta \(+h Ta theta variant +.It \e(+f Ta \(+f Ta phi variant +.It \e(+p Ta \(+p Ta pi variant +.It \e(+e Ta \(+e Ta epsilon variant +.It \e(ts Ta \(ts Ta sigma terminal +.El +.Sh PREDEFINED STRINGS +These are not recommended for use, as they differ across +implementations: +.Pp +.Bl -column -compact -offset indent "Input" "Rendered" "Description" +.It Em Input Ta Em Rendered Ta Em Description +.It \e*(Ba Ta \*(Ba Ta vertical bar +.It \e*(Ne Ta \*(Ne Ta not equal +.It \e*(Ge Ta \*(Ge Ta greater-than-equal +.It \e*(Le Ta \*(Le Ta less-than-equal +.It \e*(Gt Ta \*(Gt Ta greater-than +.It \e*(Lt Ta \*(Lt Ta less-than +.It \e*(Pm Ta \*(Pm Ta plus-minus +.It \e*(If Ta \*(If Ta infinity +.It \e*(Pi Ta \*(Pi Ta pi +.It \e*(Na Ta \*(Na Ta NaN +.It \e*(Am Ta \*(Am Ta ampersand +.It \e*R Ta \*R Ta restricted mark +.It \e*(Tm Ta \*(Tm Ta trade mark +.It \e*q Ta \*q Ta double-quote +.It \e*(Rq Ta \*(Rq Ta right-double-quote +.It \e*(Lq Ta \*(Lq Ta left-double-quote +.It \e*(lp Ta \*(lp Ta right-parenthesis +.It \e*(rp Ta \*(rp Ta left-parenthesis +.It \e*(lq Ta \*(lq Ta left double-quote +.It \e*(rq Ta \*(rq Ta right double-quote +.It \e*(ua Ta \*(ua Ta up arrow +.It \e*(va Ta \*(va Ta up-down arrow +.It \e*(<= Ta \*(<= Ta less-than-equal +.It \e*(>= Ta \*(>= Ta greater-than-equal +.It \e*(aa Ta \*(aa Ta acute +.It \e*(ga Ta \*(ga Ta grave +.El +.Sh COMPATIBILITY +This section documents compatibility of +.Nm +with older or existing versions of +.Xr groff 1 . +.Pp +The following render differently in +.Fl T Ns Ar ascii +output mode: +.Bd -ragged -offset indent +\e(ss, \e(nm, \e(nb, \e(nc, \e(ib, \e(ip, \e(pp, \e[sum], \e[product], +\e[coproduct], \e(gr, \e(-h, \e(a. +.Ed +.Pp +The following render differently in +.Fl T Ns Ar html +output mode: +.Bd -ragged -offset indent +\e(~=, \e(nb, \e(nc +.Ed +.Pp +Finally, the following have been omitted by being poorly documented or +having no known representation: +.Bd -ragged -offset indent +\e[radicalex], \e[sqrtex], \e(ru +.Ed +.Sh SEE ALSO +.Xr mandoc 1 +.Sh AUTHORS +The +.Nm +manual page was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.Sh CAVEATS +The +.Sq \e*(Ba +escape mimics the behaviour of the +.Sq \&| +character in +.Xr mdoc 7 ; +thus, if you wish to render a vertical bar with no side effects, use +the +.Sq \e(ba +escape. diff --git a/commands/mdocml/mdoc.3 b/commands/mdocml/mdoc.3 new file mode 100644 index 000000000..da963ca5b --- /dev/null +++ b/commands/mdocml/mdoc.3 @@ -0,0 +1,290 @@ +.\" $Id: mdoc.3,v 1.41 2010/05/30 22:56:02 kristaps Exp $ +.\" +.\" Copyright (c) 2009-2010 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 30 2010 $ +.Dt MDOC 3 +.Os +.Sh NAME +.Nm mdoc , +.Nm mdoc_alloc , +.Nm mdoc_endparse , +.Nm mdoc_free , +.Nm mdoc_meta , +.Nm mdoc_node , +.Nm mdoc_parseln , +.Nm mdoc_reset +.Nd mdoc macro compiler library +.Sh SYNOPSIS +.In mandoc.h +.In mdoc.h +.Vt extern const char * const * mdoc_macronames; +.Vt extern const char * const * mdoc_argnames; +.Ft "struct mdoc *" +.Fn mdoc_alloc "void *data" "int pflags" "mandocmsg msgs" +.Ft int +.Fn mdoc_endparse "struct mdoc *mdoc" +.Ft void +.Fn mdoc_free "struct mdoc *mdoc" +.Ft "const struct mdoc_meta *" +.Fn mdoc_meta "const struct mdoc *mdoc" +.Ft "const struct mdoc_node *" +.Fn mdoc_node "const struct mdoc *mdoc" +.Ft int +.Fn mdoc_parseln "struct mdoc *mdoc" "int line" "char *buf" +.Ft int +.Fn mdoc_reset "struct mdoc *mdoc" +.Sh DESCRIPTION +The +.Nm mdoc +library parses lines of +.Xr mdoc 7 +input +into an abstract syntax tree (AST). +.Pp +In general, applications initiate a parsing sequence with +.Fn mdoc_alloc , +parse each line in a document with +.Fn mdoc_parseln , +close the parsing session with +.Fn mdoc_endparse , +operate over the syntax tree returned by +.Fn mdoc_node +and +.Fn mdoc_meta , +then free all allocated memory with +.Fn mdoc_free . +The +.Fn mdoc_reset +function may be used in order to reset the parser for another input +sequence. +See the +.Sx EXAMPLES +section for a simple example. +.Pp +This section further defines the +.Sx Types , +.Sx Functions +and +.Sx Variables +available to programmers. +Following that, the +.Sx Abstract Syntax Tree +section documents the output tree. +.Ss Types +Both functions (see +.Sx Functions ) +and variables (see +.Sx Variables ) +may use the following types: +.Bl -ohang +.It Vt struct mdoc +An opaque type defined in +.Pa mdoc.c . +Its values are only used privately within the library. +.It Vt struct mdoc_node +A parsed node. +Defined in +.Pa mdoc.h . +See +.Sx Abstract Syntax Tree +for details. +.It Vt mandocmsg +A function callback type defined in +.Pa mandoc.h . +.El +.Ss Functions +Function descriptions follow: +.Bl -ohang +.It Fn mdoc_alloc +Allocates a parsing structure. +The +.Fa data +pointer is passed to +.Fa msgs . +The +.Fa pflags +arguments are defined in +.Pa mdoc.h . +Returns NULL on failure. +If non-NULL, the pointer must be freed with +.Fn mdoc_free . +.It Fn mdoc_reset +Reset the parser for another parse routine. +After its use, +.Fn mdoc_parseln +behaves as if invoked for the first time. +If it returns 0, memory could not be allocated. +.It Fn mdoc_free +Free all resources of a parser. +The pointer is no longer valid after invocation. +.It Fn mdoc_parseln +Parse a nil-terminated line of input. +This line should not contain the trailing newline. +Returns 0 on failure, 1 on success. +The input buffer +.Fa buf +is modified by this function. +.It Fn mdoc_endparse +Signals that the parse is complete. +Note that if +.Fn mdoc_endparse +is called subsequent to +.Fn mdoc_node , +the resulting tree is incomplete. +Returns 0 on failure, 1 on success. +.It Fn mdoc_node +Returns the first node of the parse. +Note that if +.Fn mdoc_parseln +or +.Fn mdoc_endparse +return 0, the tree will be incomplete. +.It Fn mdoc_meta +Returns the document's parsed meta-data. +If this information has not yet been supplied or +.Fn mdoc_parseln +or +.Fn mdoc_endparse +return 0, the data will be incomplete. +.El +.Ss Variables +The following variables are also defined: +.Bl -ohang +.It Va mdoc_macronames +An array of string-ified token names. +.It Va mdoc_argnames +An array of string-ified token argument names. +.El +.Ss Abstract Syntax Tree +The +.Nm +functions produce an abstract syntax tree (AST) describing input in a +regular form. +It may be reviewed at any time with +.Fn mdoc_nodes ; +however, if called before +.Fn mdoc_endparse , +or after +.Fn mdoc_endparse +or +.Fn mdoc_parseln +fail, it may be incomplete. +.Pp +This AST is governed by the ontological +rules dictated in +.Xr mdoc 7 +and derives its terminology accordingly. +.Qq In-line +elements described in +.Xr mdoc 7 +are described simply as +.Qq elements . +.Pp +The AST is composed of +.Vt struct mdoc_node +nodes with block, head, body, element, root and text types as declared +by the +.Va type +field. +Each node also provides its parse point (the +.Va line , +.Va sec , +and +.Va pos +fields), its position in the tree (the +.Va parent , +.Va child , +.Va next +and +.Va prev +fields) and some type-specific data. +.Pp +The tree itself is arranged according to the following normal form, +where capitalised non-terminals represent nodes. +.Pp +.Bl -tag -width "ELEMENTXX" -compact +.It ROOT +\(<- mnode+ +.It mnode +\(<- BLOCK | ELEMENT | TEXT +.It BLOCK +\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]] +.It ELEMENT +\(<- TEXT* +.It HEAD +\(<- mnode+ +.It BODY +\(<- mnode+ +.It TAIL +\(<- mnode+ +.It TEXT +\(<- [[:printable:],0x1e]* +.El +.Pp +Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of +the BLOCK production: these refer to punctuation marks. +Furthermore, although a TEXT node will generally have a non-zero-length +string, in the specific case of +.Sq \&.Bd \-literal , +an empty line will produce a zero-length string. +Multiple body parts are only found in invocations of +.Sq \&Bl \-column , +where a new body introduces a new phrase. +.Sh EXAMPLES +The following example reads lines from stdin and parses them, operating +on the finished parse tree with +.Fn parsed . +This example does not error-check nor free memory upon failure. +.Bd -literal -offset indent +struct mdoc *mdoc; +const struct mdoc_node *node; +char *buf; +size_t len; +int line; + +line = 1; +mdoc = mdoc_alloc(NULL, 0, NULL); +buf = NULL; +alloc_len = 0; + +while ((len = getline(&buf, &alloc_len, stdin)) >= 0) { + if (len && buflen[len - 1] = '\en') + buf[len - 1] = '\e0'; + if ( ! mdoc_parseln(mdoc, line, buf)) + errx(1, "mdoc_parseln"); + line++; +} + +if ( ! mdoc_endparse(mdoc)) + errx(1, "mdoc_endparse"); +if (NULL == (node = mdoc_node(mdoc))) + errx(1, "mdoc_node"); + +parsed(mdoc, node); +mdoc_free(mdoc); +.Ed +.Pp +Please see +.Pa main.c +in the source archive for a rigorous reference. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mdoc 7 +.Sh AUTHORS +The +.Nm +library was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . diff --git a/commands/mdocml/mdoc.7 b/commands/mdocml/mdoc.7 new file mode 100644 index 000000000..bc5f1ee33 --- /dev/null +++ b/commands/mdocml/mdoc.7 @@ -0,0 +1,2375 @@ +.\" $Id: mdoc.7,v 1.126 2010/06/12 11:41:50 kristaps Exp $ +.\" +.\" Copyright (c) 2009 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: June 12 2010 $ +.Dt MDOC 7 +.Os +.Sh NAME +.Nm mdoc +.Nd mdoc language reference +.Sh DESCRIPTION +The +.Nm mdoc +language is used to format +.Bx +.Ux +manuals. In this reference document, we describe its syntax, structure, +and usage. Our reference implementation is mandoc; the +.Sx COMPATIBILITY +section describes compatibility with other troff \-mdoc implementations. +.Pp +An +.Nm +document follows simple rules: lines beginning with the control +character +.Sq \. +are parsed for macros. Other lines are interpreted within the scope of +prior macros: +.Bd -literal -offset indent +\&.Sh Macro lines change control state. +Other lines are interpreted within the current state. +.Ed +.Sh LANGUAGE SYNTAX +.Nm +documents may contain only graphable 7-bit ASCII characters, the space +character, and, in certain circumstances, the tab character. All +manuals must have +.Ux +line terminators. +.Ss Comments +Text following a +.Sq \e" , +whether in a macro or free-form text line, is ignored to the end of +line. A macro line with only a control character and comment escape, +.Sq \&.\e" , +is also ignored. Macro lines with only a control character and optionally +whitespace are stripped from input. +.Ss Reserved Characters +Within a macro line, the following characters are reserved: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It \&. +.Pq period +.It \&, +.Pq comma +.It \&: +.Pq colon +.It \&; +.Pq semicolon +.It \&( +.Pq left-parenthesis +.It \&) +.Pq right-parenthesis +.It \&[ +.Pq left-bracket +.It \&] +.Pq right-bracket +.It \&? +.Pq question +.It \&! +.Pq exclamation +.It \&| +.Pq vertical bar +.El +.Pp +Use of reserved characters is described in +.Sx MACRO SYNTAX . +For general use in macro lines, these characters must either be escaped +with a non-breaking space +.Pq Sq \e& +or, if applicable, an appropriate escape sequence used. +.Ss Special Characters +Special characters may occur in both macro and free-form lines. +Sequences begin with the escape character +.Sq \e +followed by either an open-parenthesis +.Sq \&( +for two-character sequences; an open-bracket +.Sq \&[ +for n-character sequences (terminated at a close-bracket +.Sq \&] ) ; +or a single one-character sequence. +See +.Xr mandoc_char 7 +for a complete list. +Examples include +.Sq \e(em +.Pq em-dash +and +.Sq \ee +.Pq back-slash . +.Ss Text Decoration +Terms may be text-decorated using the +.Sq \ef +escape followed by an indicator: B (bold), I, (italic), R (Roman), or P +(revert to previous mode): +.Pp +.D1 \efBbold\efR \efIitalic\efP +.Pp +A numerical representation 3, 2, or 1 (bold, italic, and Roman, +respectively) may be used instead. +A text decoration is valid within +the current font scope only: if a macro opens a font scope alongside +its own scope, such as +.Sx \&Bf +.Cm \&Sy , +in-scope invocations of +.Sq \ef +are only valid within the font scope of the macro. +If +.Sq \ef +is specified outside of any font scope, such as in unenclosed, free-form +text, it will affect the remainder of the document. +.Pp +Text may also be sized with the +.Sq \es +escape, whose syntax is one of +.Sq \es+-n +for one-digit numerals; +.Sq \es(+-nn +or +.Sq \es+-(nn +for two-digit numerals; and +.Sq \es[+-N] , +.Sq \es+-[N] , +.Sq \es'+-N' , +or +.Sq \es+-'N' +for arbitrary-digit numerals: +.Pp +.D1 \es+1bigger\es-1 +.D1 \es[+10]much bigger\es[-10] +.D1 \es+(10much bigger\es-(10 +.D1 \es+'100'much much bigger\es-'100' +.Pp +Note these forms are +.Em not +recommended for +.Nm , +which encourages semantic annotation. +.Ss Predefined Strings +Historically, +.Xr groff 1 +also defined a set of package-specific +.Dq predefined strings , +which, like +.Sx Special Characters , +mark special output characters and strings by way of input codes. +Predefined strings are escaped with the slash-asterisk, +.Sq \e* : +single-character +.Sq \e*X , +two-character +.Sq \e*(XX , +and N-character +.Sq \e*[N] . +See +.Xr mandoc_char 7 +for a complete list. +Examples include +.Sq \e*(Am +.Pq ampersand +and +.Sq \e*(Ba +.Pq vertical bar . +.Ss Whitespace +Whitespace consists of the space character. +In free-form lines, whitespace is preserved within a line; un-escaped +trailing spaces are stripped from input (unless in a literal context). +Blank free-form lines, which may include whitespace, are only permitted +within literal contexts. +.Pp +In macro lines, whitespace delimits arguments and is discarded. +If arguments are quoted, whitespace within the quotes is retained. +.Ss Quotation +Macro arguments may be quoted with a double-quote to group +space-delimited terms or to retain blocks of whitespace. +A quoted argument begins with a double-quote preceded by whitespace. +The next double-quote not pair-wise adjacent to another double-quote +terminates the literal, regardless of surrounding whitespace. +.Pp +This produces tokens +.Sq a" , +.Sq b c , +.Sq de , +and +.Sq fg" . +Note that any quoted term, be it argument or macro, is indiscriminately +considered literal text. +Thus, the following produces +.Sq \&Em a : +.Bd -literal -offset indent +\&.Em "Em a" +.Ed +.Pp +In free-form mode, quotes are regarded as opaque text. +.Ss Dates +There are several macros in +.Nm +that require a date argument. +The canonical form for dates is the American format: +.Pp +.D1 Cm Month Day , Year +.Pp +The +.Cm Day +value is an optionally zero-padded numeral. +The +.Cm Month +value is the full month name. +The +.Cm Year +value is the full four-digit year. +.Pp +Reduced form dates are broken-down canonical form dates: +.Pp +.D1 Cm Month , Year +.D1 Cm Year +.Pp +Some examples of valid dates follow: +.Pp +.D1 "May, 2009" Pq reduced form +.D1 "2009" Pq reduced form +.D1 "May 20, 2009" Pq canonical form +.Ss Scaling Widths +Many macros support scaled widths for their arguments, such as +stipulating a two-inch list indentation with the following: +.Bd -literal -offset indent +\&.Bl -tag -width 2i +.Ed +.Pp +The syntax for scaled widths is +.Sq Li [+-]?[0-9]*.[0-9]*[:unit:] , +where a decimal must be preceded or proceeded by at least one digit. +Negative numbers, while accepted, are truncated to zero. +The following scaling units are accepted: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It c +centimetre +.It i +inch +.It P +pica (~1/6 inch) +.It p +point (~1/72 inch) +.It f +synonym for +.Sq u +.It v +default vertical span +.It m +width of rendered +.Sq m +.Pq em +character +.It n +width of rendered +.Sq n +.Pq en +character +.It u +default horizontal span +.It M +mini-em (~1/100 em) +.El +.Pp +Using anything other than +.Sq m , +.Sq n , +.Sq u , +or +.Sq v +is necessarily non-portable across output media. +See +.Sx COMPATIBILITY . +.Ss Sentence Spacing +When composing a manual, make sure that your sentences end at the end of +a line. +By doing so, front-ends will be able to apply the proper amount of +spacing after the end of sentence (unescaped) period, exclamation mark, +or question mark followed by zero or more non-sentence closing +delimiters ( +.Ns Sq \&) , +.Sq \&] , +.Sq \&' , +.Sq \&" ) . +.Pp +The proper spacing is also intelligently preserved if a sentence ends at +the boundary of a macro line, e.g., +.Pp +.D1 \&Xr mandoc 1 \. +.D1 \&Fl T \&Ns \&Cm ascii \. +.Sh MANUAL STRUCTURE +A well-formed +.Nm +document consists of a document prologue followed by one or more +sections. +.Pp +The prologue, which consists of (in order) the +.Sx \&Dd , +.Sx \&Dt , +and +.Sx \&Os +macros, is required for every document. +.Pp +The first section (sections are denoted by +.Sx \&Sh ) +must be the NAME section, consisting of at least one +.Sx \&Nm +followed by +.Sx \&Nd . +.Pp +Following that, convention dictates specifying at least the +.Em SYNOPSIS +and +.Em DESCRIPTION +sections, although this varies between manual sections. +.Pp +The following is a well-formed skeleton +.Nm +file: +.Bd -literal -offset indent +\&.Dd $\&Mdocdate$ +\&.Dt mdoc 7 +\&.Os +\&.Sh NAME +\&.Nm foo +\&.Nd a description goes here +\&.\e\*q The next is for sections 2, 3, & 9 only. +\&.\e\*q .Sh LIBRARY +\&.Sh SYNOPSIS +\&.Nm foo +\&.Op Fl options +\&.Ar +\&.Sh DESCRIPTION +The +\&.Nm +utility processes files ... +\&.\e\*q .Sh IMPLEMENTATION NOTES +\&.\e\*q The next is for sections 2, 3, & 9 only. +\&.\e\*q .Sh RETURN VALUES +\&.\e\*q The next is for sections 1, 6, 7, & 8 only. +\&.\e\*q .Sh ENVIRONMENT +\&.\e\*q .Sh FILES +\&.\e\*q The next is for sections 1 & 8 only. +\&.\e\*q .Sh EXIT STATUS +\&.\e\*q .Sh EXAMPLES +\&.\e\*q The next is for sections 1, 4, 6, 7, & 8 only. +\&.\e\*q .Sh DIAGNOSTICS +\&.\e\*q The next is for sections 2, 3, & 9 only. +\&.\e\*q .Sh ERRORS +\&.\e\*q .Sh SEE ALSO +\&.\e\*q .Xr foobar 1 +\&.\e\*q .Sh STANDARDS +\&.\e\*q .Sh HISTORY +\&.\e\*q .Sh AUTHORS +\&.\e\*q .Sh CAVEATS +\&.\e\*q .Sh BUGS +\&.\e\*q .Sh SECURITY CONSIDERATIONS +.Ed +.Pp +The sections in a +.Nm +document are conventionally ordered as they appear above. +Sections should be composed as follows: +.Bl -ohang -offset Ds +.It Em NAME +The name(s) and a short description of the documented material. +The syntax for this as follows: +.Bd -literal -offset indent +\&.Nm name0 +\&.Nm name1 +\&.Nm name2 +\&.Nd a short description +.Ed +.Pp +The +.Sx \&Nm +macro(s) must precede the +.Sx \&Nd +macro. +.Pp +See +.Sx \&Nm +and +.Sx \&Nd . +.It Em LIBRARY +The name of the library containing the documented material, which is +assumed to be a function in a section 2, 3, or 9 manual. +The syntax for this is as follows: +.Bd -literal -offset indent +\&.Lb libarm +.Ed +.Pp +See +.Sx \&Lb . +.It Em SYNOPSIS +Documents the utility invocation syntax, function call syntax, or device +configuration. +.Pp +For the first, utilities (sections 1, 6, and 8), this is +generally structured as follows: +.Bd -literal -offset indent +\&.Nm foo +\&.Op Fl v +\&.Op Fl o Ar file +\&.Op Ar +\&.Nm bar +\&.Op Fl v +\&.Op Fl o Ar file +\&.Op Ar +.Ed +.Pp +For the second, function calls (sections 2, 3, 9): +.Bd -literal -offset indent +\&.Vt extern const char *global; +\&.In header.h +\&.Ft "char *" +\&.Fn foo "const char *src" +\&.Ft "char *" +\&.Fn bar "const char *src" +.Ed +.Pp +And for the third, configurations (section 4): +.Bd -literal -offset indent +\&.Cd \*qit* at isa? port 0x2e\*q +\&.Cd \*qit* at isa? port 0x4e\*q +.Ed +.Pp +Manuals not in these sections generally don't need a +.Em SYNOPSIS . +.Pp +Some macros are displayed differently in the +.Em SYNOPSIS +section, particularly +.Sx \&Nm , +.Sx \&Cd , +.Sx \&Fd , +.Sx \&Fn , +.Sx \&Fo , +.Sx \&In , +.Sx \&Vt , +and +.Sx \&Ft . +All of these macros are output on their own line. If two such +dissimilar macros are pair-wise invoked (except for +.Sx \&Ft +before +.Sx \&Fo +or +.Sx \&Fn ) , +they are separated by a vertical space, unless in the case of +.Sx \&Fo , +.Sx \&Fn , +and +.Sx \&Ft , +which are always separated by vertical space. +.It Em DESCRIPTION +This expands upon the brief, one-line description in +.Em NAME . +It usually contains a break-down of the options (if documenting a +command), such as: +.Bd -literal -offset indent +The arguments are as follows: +\&.Bl \-tag \-width Ds +\&.It Fl v +Print verbose information. +\&.El +.Ed +.Pp +Manuals not documenting a command won't include the above fragment. +.It Em IMPLEMENTATION NOTES +Implementation-specific notes should be kept here. +This is useful when implementing standard functions that may have side +effects or notable algorithmic implications. +.It Em RETURN VALUES +This section is the dual of +.Em EXIT STATUS , +which is used for commands. +It documents the return values of functions in sections 2, 3, and 9. +.Pp +See +.Sx \&Rv . +.It Em ENVIRONMENT +Documents any usages of environment variables, e.g., +.Xr environ 7 . +.Pp +See +.Sx \&Ev . +.It Em FILES +Documents files used. +It's helpful to document both the file and a short description of how +the file is used (created, modified, etc.). +.Pp +See +.Sx \&Pa . +.It Em EXIT STATUS +Command exit status for section 1, 6, and 8 manuals. +This section is the dual of +.Em RETURN VALUES , +which is used for functions. +Historically, this information was described in +.Em DIAGNOSTICS , +a practise that is now discouraged. +.Pp +See +.Sx \&Ex . +.It Em EXAMPLES +Example usages. +This often contains snippets of well-formed, well-tested invocations. +Make doubly sure that your examples work properly! +.It Em DIAGNOSTICS +Documents error conditions. +This is most useful in section 4 manuals. +Historically, this section was used in place of +.Em EXIT STATUS +for manuals in sections 1, 6, and 8; however, this practise is +discouraged. +.Pp +See +.Sx \&Bl +.Fl diag . +.It Em ERRORS +Documents error handling in sections 2, 3, and 9. +.Pp +See +.Sx \&Er . +.It Em SEE ALSO +References other manuals with related topics. +This section should exist for most manuals. +Cross-references should conventionally be ordered first by section, then +alphabetically. +.Pp +See +.Sx \&Xr . +.It Em STANDARDS +References any standards implemented or used. +If not adhering to any standards, the +.Em HISTORY +section should be used instead. +.Pp +See +.Sx \&St . +.It Em HISTORY +The history of any manual without a +.Em STANDARDS +section should be described in this section. +.It Em AUTHORS +Credits to authors, if applicable, should appear in this section. +Authors should generally be noted by both name and an e-mail address. +.Pp +See +.Sx \&An . +.It Em CAVEATS +Explanations of common misuses and misunderstandings should be explained +in this section. +.It Em BUGS +Extant bugs should be described in this section. +.It Em SECURITY CONSIDERATIONS +Documents any security precautions that operators should consider. +.El +.Sh MACRO SYNTAX +Macros are one to three three characters in length and begin with a +control character , +.Sq \&. , +at the beginning of the line. +An arbitrary amount of whitespace may sit between the control character +and the macro name. +Thus, the following are equivalent: +.Bd -literal -offset indent +\&.Pp +\&.\ \ \ \&Pp +.Ed +.Pp +The syntax of a macro depends on its classification. +In this section, +.Sq \-arg +refers to macro arguments, which may be followed by zero or more +.Sq parm +parameters; +.Sq \&Yo +opens the scope of a macro; and if specified, +.Sq \&Yc +closes it out. +.Pp +The +.Em Callable +column indicates that the macro may be called subsequent to the initial +line-macro. +If a macro is not callable, then its invocation after the initial line +macro is interpreted as opaque text, such that +.Sq \&.Fl \&Sh +produces +.Sq Fl \&Sh . +.Pp +The +.Em Parsable +column indicates whether the macro may be followed by further +(ostensibly callable) macros. +If a macro is not parsable, subsequent macro invocations on the line +will be interpreted as opaque text. +.Pp +The +.Em Scope +column, if applicable, describes closure rules. +.Ss Block full-explicit +Multi-line scope closed by an explicit closing macro. +All macros contains bodies; only +.Sx \&Bf +contains a head. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB +\(lBbody...\(rB +\&.Yc +.Ed +.Pp +.Bl -column -compact -offset indent "MacroX" "CallableX" "ParsableX" "closed by XXX" +.It Em Macro Ta Em Callable Ta Em Parsable Ta Em Scope +.It Sx \&Bd Ta \&No Ta \&No Ta closed by Sx \&Ed +.It Sx \&Bf Ta \&No Ta \&No Ta closed by Sx \&Ef +.It Sx \&Bk Ta \&No Ta \&No Ta closed by Sx \&Ek +.It Sx \&Bl Ta \&No Ta \&No Ta closed by Sx \&El +.It Sx \&Ed Ta \&No Ta \&No Ta opened by Sx \&Bd +.It Sx \&Ef Ta \&No Ta \&No Ta opened by Sx \&Bf +.It Sx \&Ek Ta \&No Ta \&No Ta opened by Sx \&Bk +.It Sx \&El Ta \&No Ta \&No Ta opened by Sx \&Bl +.El +.Ss Block full-implicit +Multi-line scope closed by end-of-file or implicitly by another macro. +All macros have bodies; some +.Po +.Sx \&It Fl bullet , +.Fl hyphen , +.Fl dash , +.Fl enum , +.Fl item +.Pc +don't have heads; only one +.Po +.Sx \&It Fl column +.Pc +has multiple heads. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead... \(lBTa head...\(rB\(rB +\(lBbody...\(rB +.Ed +.Pp +.Bl -column -compact -offset indent "MacroX" "CallableX" "ParsableX" "closed by XXXXXXXXXXX" +.It Em Macro Ta Em Callable Ta Em Parsable Ta Em Scope +.It Sx \&It Ta \&No Ta Yes Ta closed by Sx \&It , Sx \&El +.It Sx \&Nd Ta \&No Ta \&No Ta closed by Sx \&Sh +.It Sx \&Sh Ta \&No Ta \&No Ta closed by Sx \&Sh +.It Sx \&Ss Ta \&No Ta \&No Ta closed by Sx \&Sh , Sx \&Ss +.El +.Ss Block partial-explicit +Like block full-explicit, but also with single-line scope. +Each has at least a body and, in limited circumstances, a head +.Po +.Sx \&Fo , +.Sx \&Eo +.Pc +and/or tail +.Pq Sx \&Ec . +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB +\(lBbody...\(rB +\&.Yc \(lBtail...\(rB + +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB \ +\(lBbody...\(rB \&Yc \(lBtail...\(rB +.Ed +.Pp +.Bl -column "MacroX" "CallableX" "ParsableX" "closed by XXXX" -compact -offset indent +.It Em Macro Ta Em Callable Ta Em Parsable Ta Em Scope +.It Sx \&Ac Ta Yes Ta Yes Ta opened by Sx \&Ao +.It Sx \&Ao Ta Yes Ta Yes Ta closed by Sx \&Ac +.It Sx \&Bc Ta Yes Ta Yes Ta closed by Sx \&Bo +.It Sx \&Bo Ta Yes Ta Yes Ta opened by Sx \&Bc +.It Sx \&Brc Ta Yes Ta Yes Ta opened by Sx \&Bro +.It Sx \&Bro Ta Yes Ta Yes Ta closed by Sx \&Brc +.It Sx \&Dc Ta Yes Ta Yes Ta opened by Sx \&Do +.It Sx \&Do Ta Yes Ta Yes Ta closed by Sx \&Dc +.It Sx \&Ec Ta Yes Ta Yes Ta opened by Sx \&Eo +.It Sx \&Eo Ta Yes Ta Yes Ta closed by Sx \&Ec +.It Sx \&Fc Ta Yes Ta Yes Ta opened by Sx \&Fo +.It Sx \&Fo Ta \&No Ta \&No Ta closed by Sx \&Fc +.It Sx \&Oc Ta Yes Ta Yes Ta closed by Sx \&Oo +.It Sx \&Oo Ta Yes Ta Yes Ta opened by Sx \&Oc +.It Sx \&Pc Ta Yes Ta Yes Ta closed by Sx \&Po +.It Sx \&Po Ta Yes Ta Yes Ta opened by Sx \&Pc +.It Sx \&Qc Ta Yes Ta Yes Ta opened by Sx \&Oo +.It Sx \&Qo Ta Yes Ta Yes Ta closed by Sx \&Oc +.It Sx \&Re Ta \&No Ta \&No Ta opened by Sx \&Rs +.It Sx \&Rs Ta \&No Ta \&No Ta closed by Sx \&Re +.It Sx \&Sc Ta Yes Ta Yes Ta opened by Sx \&So +.It Sx \&So Ta Yes Ta Yes Ta closed by Sx \&Sc +.It Sx \&Xc Ta Yes Ta Yes Ta opened by Sx \&Xo +.It Sx \&Xo Ta Yes Ta Yes Ta closed by Sx \&Xc +.El +.Ss Block partial-implicit +Like block full-implicit, but with single-line scope closed by +.Sx Reserved Characters +or end of line. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBval...\(rB\(rB \(lBbody...\(rB \(lBres...\(rB +.Ed +.Pp +.Bl -column "MacroX" "CallableX" "ParsableX" -compact -offset indent +.It Em Macro Ta Em Callable Ta Em Parsable +.It Sx \&Aq Ta Yes Ta Yes +.It Sx \&Bq Ta Yes Ta Yes +.It Sx \&Brq Ta Yes Ta Yes +.It Sx \&D1 Ta \&No Ta \&Yes +.It Sx \&Dl Ta \&No Ta Yes +.It Sx \&Dq Ta Yes Ta Yes +.It Sx \&Op Ta Yes Ta Yes +.It Sx \&Pq Ta Yes Ta Yes +.It Sx \&Ql Ta Yes Ta Yes +.It Sx \&Qq Ta Yes Ta Yes +.It Sx \&Sq Ta Yes Ta Yes +.It Sx \&Vt Ta Yes Ta Yes +.El +.Pp +Note that the +.Sx \&Vt +macro is a +.Sx Block partial-implicit +only when invoked as the first macro +in a +.Em SYNOPSIS +section line, else it is +.Sx In-line . +.Ss In-line +Closed by +.Sx Reserved Characters , +end of line, fixed argument lengths, and/or subsequent macros. +In-line macros have only text children. +If a number (or inequality) of arguments is +.Pq n , +then the macro accepts an arbitrary number of arguments. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBval...\(rB\(rB \(lBargs...\(rB \(lbres...\(rb + +\&.Yo \(lB\-arg \(lBval...\(rB\(rB \(lBargs...\(rB Yc... + +\&.Yo \(lB\-arg \(lBval...\(rB\(rB arg0 arg1 argN +.Ed +.Pp +.Bl -column "MacroX" "CallableX" "ParsableX" "Arguments" -compact -offset indent +.It Em Macro Ta Em Callable Ta Em Parsable Ta Em Arguments +.It Sx \&%A Ta \&No Ta \&No Ta >0 +.It Sx \&%B Ta \&No Ta \&No Ta >0 +.It Sx \&%C Ta \&No Ta \&No Ta >0 +.It Sx \&%D Ta \&No Ta \&No Ta >0 +.It Sx \&%I Ta \&No Ta \&No Ta >0 +.It Sx \&%J Ta \&No Ta \&No Ta >0 +.It Sx \&%N Ta \&No Ta \&No Ta >0 +.It Sx \&%O Ta \&No Ta \&No Ta >0 +.It Sx \&%P Ta \&No Ta \&No Ta >0 +.It Sx \&%Q Ta \&No Ta \&No Ta >0 +.It Sx \&%R Ta \&No Ta \&No Ta >0 +.It Sx \&%T Ta \&No Ta \&No Ta >0 +.It Sx \&%U Ta \&No Ta \&No Ta >0 +.It Sx \&%V Ta \&No Ta \&No Ta >0 +.It Sx \&Ad Ta Yes Ta Yes Ta n +.It Sx \&An Ta Yes Ta Yes Ta n +.It Sx \&Ap Ta Yes Ta Yes Ta 0 +.It Sx \&Ar Ta Yes Ta Yes Ta n +.It Sx \&At Ta Yes Ta Yes Ta 1 +.It Sx \&Bsx Ta Yes Ta Yes Ta n +.It Sx \&Bt Ta \&No Ta \&No Ta 0 +.It Sx \&Bx Ta Yes Ta Yes Ta n +.It Sx \&Cd Ta Yes Ta Yes Ta >0 +.It Sx \&Cm Ta Yes Ta Yes Ta n +.It Sx \&Db Ta \&No Ta \&No Ta 1 +.It Sx \&Dd Ta \&No Ta \&No Ta >0 +.It Sx \&Dt Ta \&No Ta \&No Ta n +.It Sx \&Dv Ta Yes Ta Yes Ta n +.It Sx \&Dx Ta Yes Ta Yes Ta n +.It Sx \&Em Ta Yes Ta Yes Ta >0 +.It Sx \&En Ta \&No Ta \&No Ta 0 +.It Sx \&Er Ta Yes Ta Yes Ta >0 +.It Sx \&Es Ta \&No Ta \&No Ta 0 +.It Sx \&Ev Ta Yes Ta Yes Ta n +.It Sx \&Ex Ta \&No Ta \&No Ta n +.It Sx \&Fa Ta Yes Ta Yes Ta n +.It Sx \&Fd Ta \&No Ta \&No Ta >0 +.It Sx \&Fl Ta Yes Ta Yes Ta n +.It Sx \&Fn Ta Yes Ta Yes Ta >0 +.It Sx \&Fr Ta \&No Ta \&No Ta n +.It Sx \&Ft Ta Yes Ta Yes Ta n +.It Sx \&Fx Ta Yes Ta Yes Ta n +.It Sx \&Hf Ta \&No Ta \&No Ta n +.It Sx \&Ic Ta Yes Ta Yes Ta >0 +.It Sx \&In Ta \&No Ta \&No Ta n +.It Sx \&Lb Ta \&No Ta \&No Ta 1 +.It Sx \&Li Ta Yes Ta Yes Ta n +.It Sx \&Lk Ta Yes Ta Yes Ta n +.It Sx \&Lp Ta \&No Ta \&No Ta 0 +.It Sx \&Ms Ta Yes Ta Yes Ta >0 +.It Sx \&Mt Ta Yes Ta Yes Ta >0 +.It Sx \&Nm Ta Yes Ta Yes Ta n +.It Sx \&No Ta Yes Ta Yes Ta 0 +.It Sx \&Ns Ta Yes Ta Yes Ta 0 +.It Sx \&Nx Ta Yes Ta Yes Ta n +.It Sx \&Os Ta \&No Ta \&No Ta n +.It Sx \&Ot Ta \&No Ta \&No Ta n +.It Sx \&Ox Ta Yes Ta Yes Ta n +.It Sx \&Pa Ta Yes Ta Yes Ta n +.It Sx \&Pf Ta Yes Ta Yes Ta 1 +.It Sx \&Pp Ta \&No Ta \&No Ta 0 +.It Sx \&Rv Ta \&No Ta \&No Ta n +.It Sx \&Sm Ta \&No Ta \&No Ta 1 +.It Sx \&St Ta \&No Ta Yes Ta 1 +.It Sx \&Sx Ta Yes Ta Yes Ta >0 +.It Sx \&Sy Ta Yes Ta Yes Ta >0 +.It Sx \&Tn Ta Yes Ta Yes Ta >0 +.It Sx \&Ud Ta \&No Ta \&No Ta 0 +.It Sx \&Ux Ta Yes Ta Yes Ta n +.It Sx \&Va Ta Yes Ta Yes Ta n +.It Sx \&Vt Ta Yes Ta Yes Ta >0 +.It Sx \&Xr Ta Yes Ta Yes Ta >0 +.It Sx \&br Ta \&No Ta \&No Ta 0 +.It Sx \&sp Ta \&No Ta \&No Ta 1 +.El +.Sh REFERENCE +This section is a canonical reference of all macros, arranged +alphabetically. +For the scoping of individual macros, see +.Sx MACRO SYNTAX . +.Ss \&%A +Author name of an +.Sx \&Rs +block. Multiple authors should each be accorded their own +.Sx \%%A +line. Author names should be ordered with full or abbreviated +forename(s) first, then full surname. +.Ss \&%B +Book title of an +.Sx \&Rs +block. This macro may also be used in a non-bibliographic context when +referring to book titles. +.Ss \&%C +Publication city or location of an +.Sx \&Rs +block. +.Pp +.Em Remarks : +this macro is not implemented in +.Xr groff 1 . +.Ss \&%D +Publication date of an +.Sx \&Rs +block. This should follow the reduced or canonical form syntax +described in +.Sx Dates . +.Ss \&%I +Publisher or issuer name of an +.Sx \&Rs +block. +.Ss \&%J +Journal name of an +.Sx \&Rs +block. +.Ss \&%N +Issue number (usually for journals) of an +.Sx \&Rs +block. +.Ss \&%O +Optional information of an +.Sx \&Rs +block. +.Ss \&%P +Book or journal page number of an +.Sx \&Rs +block. +.Ss \&%Q +Institutional author (school, government, etc.) of an +.Sx \&Rs +block. Multiple institutional authors should each be accorded their own +.Sx \&%Q +line. +.Ss \&%R +Technical report name of an +.Sx \&Rs +block. +.Ss \&%T +Article title of an +.Sx \&Rs +block. This macro may also be used in a non-bibliographical context +when referring to article titles. +.Ss \&%U +URI of reference document. +.Ss \&%V +Volume number of an +.Sx \&Rs +block. +.Ss \&Ac +Closes an +.Sx \&Ao +block. Does not have any tail arguments. +.Ss \&Ad +Address construct: usually in the context of an computational address in +memory, not a physical (post) address. +.Pp +Examples: +.D1 \&.Ad [0,$] +.D1 \&.Ad 0x00000000 +.Ss \&An +Author name. +This macro may alternatively accepts the following arguments, although +these may not be specified along with a parameter: +.Bl -tag -width 12n -offset indent +.It Fl split +Renders a line break before each author listing. +.It Fl nosplit +The opposite of +.Fl split . +.El +.Pp +In the AUTHORS section, the default is not to split the first author +listing, but all subsequent author listings, whether or not they're +interspersed by other macros or text, are split. +Thus, specifying +.Fl split +will cause the first listing also to be split. +If not in the AUTHORS section, the default is not to split. +.Pp +Examples: +.D1 \&.An -nosplit +.D1 \&.An J. D. Ullman . +.Pp +.Em Remarks : +the effects of +.Fl split +or +.Fl nosplit +are re-set when entering the AUTHORS section, so if one specifies +.Sx \&An Fl nosplit +in the general document body, it must be re-specified in the AUTHORS +section. +.Ss \&Ao +Begins a block enclosed by angled brackets. +Does not have any head arguments. +.Pp +Examples: +.D1 \&.Fl -key= \&Ns \&Ao \&Ar val \&Ac +.Pp +See also +.Sx \&Aq . +.Ss \&Ap +Inserts an apostrophe without any surrounding white-space. +This is generally used as a grammatical device when referring to the verb +form of a function: +.Bd -literal -offset indent +\&.Fn execve Ap d +.Ed +.Ss \&Aq +Encloses its arguments in angled brackets. +.Pp +Examples: +.D1 \&.Fl -key= \&Ns \&Aq \&Ar val +.Pp +.Em Remarks : +this macro is often abused for rendering URIs, which should instead use +.Sx \&Lk +or +.Sx \&Mt , +or to note pre-processor +.Dq Li #include +statements, which should use +.Sx \&In . +.Pp +See also +.Sx \&Ao . +.Ss \&Ar +Command arguments. +If an argument is not provided, the string +.Dq file ... +is used as a default. +.Pp +Examples: +.D1 \&.Fl o \&Ns \&Ar file1 +.D1 \&.Ar +.D1 \&.Ar arg1 , arg2 . +.Ss \&At +Formats an AT&T version. +Accepts at most one parameter: +.Bl -tag -width 12n -offset indent +.It Cm v[1-7] | 32v +A version of +.At . +.It Cm V[.[1-4]]? +A system version of +.At . +.El +.Pp +Note that these parameters do not begin with a hyphen. +.Pp +Examples: +.D1 \&.At +.D1 \&.At V.1 +.Pp +See also +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +.Sx \&Ox , +and +.Sx \&Ux . +.Ss \&Bc +Closes a +.Sx \&Bo +block. Does not have any tail arguments. +.Ss \&Bd +Begins a display block. +A display is collection of macros or text which may be collectively +offset or justified in a manner different from that +of the enclosing context. +By default, the block is preceded by a vertical space. +.Pp +Each display is associated with a type, which must be one of the +following arguments: +.Bl -tag -width 12n -offset indent +.It Fl ragged +Only left-justify the block. +.It Fl unfilled +Do not justify the block at all. +.It Fl filled +Left- and right-justify the block. +.It Fl literal +Alias for +.Fl unfilled . +.It Fl centered +Centre-justify each line. +.El +.Pp +The type must be provided first. +Secondary arguments are as follows: +.Bl -tag -width 12n -offset indent +.It Fl offset Ar width +Offset by the value of +.Ar width , +which is interpreted as one of the following, specified in order: +.Bl -item +.It +As one of the pre-defined strings +.Ar indent , +the width of standard indentation; +.Ar indent-two , +twice +.Ar indent ; +.Ar left , +which has no effect ; +.Ar right , +which justifies to the right margin; and +.Ar center , +which aligns around an imagined centre axis. +.It +As a precalculated width for a named macro. +The most popular is the imaginary macro +.Ar \&Ds , +which resolves to +.Ar 6n . +.It +As a scaling unit following the syntax described in +.Sx Scaling Widths . +.It +As the calculated string length of the opaque string. +.El +.Pp +If not provided an argument, it will be ignored. +.It Fl compact +Do not assert a vertical space before the block. +.It Fl file Ar file +Prepend the file +.Ar file +before any text or macros within the block. +.El +.Pp +Examples: +.Bd -literal -offset indent +\&.Bd \-unfilled \-offset two-indent \-compact + Hello world. +\&.Ed +.Ed +.Pp +See also +.Sx \&D1 +and +.Sx \&Dl . +.Ss \&Bf +.Ss \&Bk +.Ss \&Bl +Begins a list composed of one or more list entries. +A list is associated with a type, which is a required argument. +Other arguments are +.Fl width , +defined per-type as accepting a literal or +.Sx Scaling Widths +value; +.Fl offset , +also accepting a literal or +.Sx Scaling Widths +value setting the list's global offset; and +.Fl compact , +suppressing the default vertical space printed before each list entry. +A list entry is specified by the +.Sx \&It +macro, which consists of a head and optional body (depending on the list +type). +A list must specify one of the following list types: +.Bl -tag -width 12n -offset indent +.It Fl bullet +A list offset by a bullet. +The head of list entries must be empty. +List entry bodies are positioned after the bullet. +The +.Fl width +argument varies the width of list bodies' left-margins. +.It Fl column +A columnated list. +The +.Fl width +argument has no effect. +The number of columns is specified as parameters to the +.Sx \&Bl +macro. +These dictate the width of columns either as +.Sx Scaling Widths +or literal text. +If the initial macro of a +.Fl column +list is not an +.Sx \&It , +an +.Sx \&It +context spanning each line is implied until an +.Sx \&It +line macro is encountered, at which point list bodies are interpreted as +described in the +.Sx \&It +documentation. +.It Fl dash +A list offset by a dash (hyphen). +The head of list entries must be empty. +List entry bodies are positioned past the dash. +The +.Fl width +argument varies the width of list bodies' left-margins. +.It Fl diag +Like +.Fl inset , +but with additional formatting to the head. +The +.Fl width +argument varies the width of list bodies' left-margins. +.It Fl enum +An enumerated list offset by the enumeration from 1. +The head of list entries must be empty. +List entry bodies are positioned after the enumeration. +The +.Fl width +argument varies the width of list bodies' left-margins. +.It Fl hang +Like +.Fl tag , +but instead of list bodies positioned after the head, they trail the +head text. +The +.Fl width +argument varies the width of list bodies' left-margins. +.It Fl hyphen +Synonym for +.Fl dash . +.It Fl inset +List bodies follow the list head. +The +.Fl width +argument is ignored. +.It Fl item +This produces blocks of text. +The head of list entries must be empty. +The +.Fl width +argument is ignored. +.It Fl ohang +List bodies are positioned on the line following the head. +The +.Fl width +argument is ignored. +.It Fl tag +A list offset by list entry heads. List entry bodies are positioned +after the head as specified by the +.Fl width +argument. +.El +.Pp +See also +.Sx \&It . +.Ss \&Bo +Begins a block enclosed by square brackets. +Does not have any head arguments. +.Pp +Examples: +.Bd -literal -offset indent +\&.Bo 1 , +\&.Dv BUFSIZ \&Bc +.Ed +.Pp +See also +.Sx \&Bq . +.Ss \&Bq +Encloses its arguments in square brackets. +.Pp +Examples: +.D1 \&.Bq 1 , \&Dv BUFSIZ +.Pp +.Em Remarks : +this macro is sometimes abused to emulate optional arguments for +commands; the correct macros to use for this purpose are +.Sx \&Op , +.Sx \&Oo , +and +.Sx \&Oc . +.Pp +See also +.Sx \&Bo . +.Ss \&Brc +Closes a +.Sx \&Bro +block. Does not have any tail arguments. +.Ss \&Bro +Begins a block enclosed by curly braces. +Does not have any head arguments. +.Pp +Examples: +.Bd -literal -offset indent +\&.Bro 1 , ... , +\&.Va n \&Brc +.Ed +.Pp +See also +.Sx \&Brq . +.Ss \&Brq +Encloses its arguments in curly braces. +.Pp +Examples: +.D1 \&.Brq 1 , ... , \&Va n +.Pp +See also +.Sx \&Bro . +.Ss \&Bsx +Format the BSD/OS version provided as an argument, or a default value if +no argument is provided. +.Pp +Examples: +.D1 \&.Bsx 1.0 +.D1 \&.Bsx +.Pp +See also +.Sx \&At , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +.Sx \&Ox , +and +.Sx \&Ux . +.Ss \&Bt +Prints +.Dq is currently in beta test. +.Ss \&Bx +Format the BSD version provided as an argument, or a default value if no +argument is provided. +.Pp +Examples: +.D1 \&.Bx 4.4 +.D1 \&.Bx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +.Sx \&Ox , +and +.Sx \&Ux . +.Ss \&Cd +Configuration declaration. +This denotes strings accepted by +.Xr config 8 . +.Pp +Examples: +.D1 \&.Cd device le0 at scode? +.Pp +.Em Remarks : +this macro is commonly abused by using quoted literals to retain +white-space and align consecutive +.Sx \&Cd +declarations. +This practise is discouraged. +.Ss \&Cm +Command modifiers. +Useful when specifying configuration options or keys. +.Pp +Examples: +.D1 \&.Cm ControlPath +.D1 \&.Cm ControlMaster +.Pp +See also +.Sx \&Fl . +.Ss \&D1 +One-line indented display. +This is formatted by the default rules and is useful for simple indented +statements. +It is followed by a newline. +.Pp +Examples: +.D1 \&.D1 \&Fl abcdefgh +.Pp +See also +.Sx \&Bd +and +.Sx \&Dl . +.Ss \&Db +Start a debugging context. +This macro is parsed, but generally ignored. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Db Cm on | off +.Ss \&Dc +Closes a +.Sx \&Do +block. Does not have any tail arguments. +.Ss \&Dd +Document date. +This is the mandatory first macro of any +.Nm +manual. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Dd Cm date +.Pp +The +.Cm date +field may be either +.Ar $\&Mdocdate$ , +which signifies the current manual revision date dictated by +.Xr cvs 1 , +or instead a valid canonical date as specified by +.Sx Dates . +If a date does not conform, the current date is used instead. +.Pp +Examples: +.D1 \&.Dd $\&Mdocdate$ +.D1 \&.Dd $\&Mdocdate: July 21 2007$ +.D1 \&.Dd July 21, 2007 +.Pp +See also +.Sx \&Dt +and +.Sx \&Os . +.Ss \&Dl +One-line intended display. +This is formatted as literal text and is useful for commands and +invocations. +It is followed by a newline. +.Pp +Examples: +.D1 \&.Dl % mandoc mdoc.7 | less +.Pp +See also +.Sx \&Bd +and +.Sx \&D1 . +.Ss \&Do +Begins a block enclosed by double quotes. Does not have any head +arguments. +.Pp +Examples: +.D1 \&.D1 \&Do April is the cruellest month \&Dc \e(em T.S. Eliot +.Pp +See also +.Sx \&Dq . +.Ss \&Dq +Encloses its arguments in double quotes. +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Dq April is the cruellest month +\e(em T.S. Eliot +.Ed +.Pp +See also +.Sx \&Do . +.Ss \&Dt +Document title. +This is the mandatory second macro of any +.Nm +file. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Dt +.Oo +.Cm title +.Oo +.Cm section +.Op Cm volume | arch +.Oc +.Oc +.Ed +.Pp +Its arguments are as follows: +.Bl -tag -width Ds -offset Ds +.It Cm title +The document's title (name), defaulting to +.Qq UNKNOWN +if unspecified. +It should be capitalised. +.It Cm section +The manual section. +This may be one of +.Ar 1 +.Pq utilities , +.Ar 2 +.Pq system calls , +.Ar 3 +.Pq libraries , +.Ar 3p +.Pq Perl libraries , +.Ar 4 +.Pq devices , +.Ar 5 +.Pq file formats , +.Ar 6 +.Pq games , +.Ar 7 +.Pq miscellaneous , +.Ar 8 +.Pq system utilities , +.Ar 9 +.Pq kernel functions , +.Ar X11 +.Pq X Window System , +.Ar X11R6 +.Pq X Window System , +.Ar unass +.Pq unassociated , +.Ar local +.Pq local system , +.Ar draft +.Pq draft manual , +or +.Ar paper +.Pq paper . +It should correspond to the manual's filename suffix and defaults to +.Qq 1 +if unspecified. +.It Cm volume +This overrides the volume inferred from +.Ar section . +This field is optional, and if specified, must be one of +.Ar USD +.Pq users' supplementary documents , +.Ar PS1 +.Pq programmers' supplementary documents , +.Ar AMD +.Pq administrators' supplementary documents , +.Ar SMM +.Pq system managers' manuals , +.Ar URM +.Pq users' reference manuals , +.Ar PRM +.Pq programmers' reference manuals , +.Ar KM +.Pq kernel manuals , +.Ar IND +.Pq master index , +.Ar MMI +.Pq master index , +.Ar LOCAL +.Pq local manuals , +.Ar LOC +.Pq local manuals , +or +.Ar CON +.Pq contributed manuals . +.It Cm arch +This specifies a specific relevant architecture. +If +.Cm volume +is not provided, it may be used in its place, else it may be used +subsequent that. +It, too, is optional. +It must be one of +.Ar alpha , +.Ar amd64 , +.Ar amiga , +.Ar arc , +.Ar arm , +.Ar armish , +.Ar aviion , +.Ar hp300 , +.Ar hppa , +.Ar hppa64 , +.Ar i386 , +.Ar landisk , +.Ar loongson , +.Ar luna88k , +.Ar mac68k , +.Ar macppc , +.Ar mvme68k , +.Ar mvme88k , +.Ar mvmeppc , +.Ar pmax , +.Ar sgi , +.Ar socppc , +.Ar sparc , +.Ar sparc64 , +.Ar sun3 , +.Ar vax , +or +.Ar zaurus . +.El +.Pp +Examples: +.D1 \&.Dt FOO 1 +.D1 \&.Dt FOO 4 KM +.D1 \&.Dt FOO 9 i386 +.Pp +See also +.Sx \&Dd +and +.Sx \&Os . +.Ss \&Dv +Defined variables such as preprocessor constants. +.Pp +Examples: +.D1 \&.Dv BUFSIZ +.D1 \&.Dv STDOUT_FILENO +.Pp +See also +.Sx \&Er . +.Ss \&Dx +Format the DragonFly BSD version provided as an argument, or a default +value if no argument is provided. +.Pp +Examples: +.D1 \&.Dx 2.4.1 +.D1 \&.Dx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Fx , +.Sx \&Nx , +.Sx \&Ox , +and +.Sx \&Ux . +.Ss \&Ec +.Ss \&Ed +.Ss \&Ef +.Ss \&Ek +.Ss \&El +Ends a list context started by +.Sx \&Bl . +.Pp +See also +.Sx \&Bl +and +.Sx \&It . +.Ss \&Em +Denotes text that should be emphasised. +Note that this is a presentation term and should not be used for +stylistically decorating technical terms. +.Pp +Examples: +.D1 \&.Em Warnings! +.D1 \&.Em Remarks : +.Ss \&En +.Ss \&Eo +.Ss \&Er +Display error constants. +.Pp +Examples: +.D1 \&.Er EPERM +.D1 \&.Er ENOENT +.Pp +See also +.Sx \&Dv . +.Ss \&Es +.Ss \&Ev +Environmental variables such as those specified in +.Xr environ 7 . +.Pp +Examples: +.D1 \&.Ev DISPLAY +.D1 \&.Ev PATH +.Ss \&Ex +Inserts text regarding a utility's exit values. +This macro must have first the +.Fl std +argument specified, then an optional +.Ar utility . +If +.Ar utility +is not provided, the document's name as stipulated in +.Sx \&Nm +is provided. +.Ss \&Fa +Function argument. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Fa +.Op Cm argtype +.Cm argname +.Ed +.Pp +This may be invoked for names with or without the corresponding type. +It is also used to specify the field name of a structure. +Most often, the +.Sx \&Fa +macro is used in the +.Em SYNOPSIS +within +.Sx \&Fo +section when documenting multi-line function prototypes. +If invoked with multiple arguments, the arguments are separated by a +comma. +Furthermore, if the following macro is another +.Sx \&Fa , +the last argument will also have a trailing comma. +.Pp +Examples: +.D1 \&.Fa \(dqconst char *p\(dq +.D1 \&.Fa \(dqint a\(dq \(dqint b\(dq \(dqint c\(dq +.D1 \&.Fa foo +.Pp +See also +.Sx \&Fo . +.Ss \&Fc +.Ss \&Fd +Historically used to document include files. +This usage has been deprecated in favour of +.Sx \&In . +Do not use this macro. +.Pp +See also +.Sx MANUAL STRUCTURE +and +.Sx \&In . +.Ss \&Fl +Command-line flag. +Used when listing arguments to command-line utilities. +Prints a fixed-width hyphen +.Sq \- +directly followed by each argument. +If no arguments are provided, a hyphen is printed followed by a space. +If the argument is a macro, a hyphen is prefixed to the subsequent macro +output. +.Pp +Examples: +.D1 \&.Fl a b c +.D1 \&.Fl \&Pf a b +.D1 \&.Fl +.D1 \&.Op \&Fl o \&Ns \&Ar file +.Pp +See also +.Sx \&Cm . +.Ss \&Fn +A function name. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Ns Sx \&Fn +.Op Cm functype +.Cm funcname +.Op Oo Cm argtype Oc Cm argname +.Ed +.Pp +Function arguments are surrounded in parenthesis and +are delimited by commas. +If no arguments are specified, blank parenthesis are output. +.Pp +Examples: +.D1 \&.Fn "int funcname" "int arg0" "int arg1" +.D1 \&.Fn funcname "int arg0" +.D1 \&.Fn funcname arg0 +.Bd -literal -offset indent -compact +\&.Ft functype +\&.Fn funcname +.Ed +.Pp +See also +.Sx MANUAL STRUCTURE +and +.Sx \&Ft . +.Ss \&Fo +Begin a function block. +This is a multi-line version of +.Sx \&Fn . +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Fo Cm funcname +.Pp +Invocations usually occur in the following context: +.Bd -ragged -offset indent +.Pf \. Sx \&Ft Cm functype +.br +.Pf \. Sx \&Fo Cm funcname +.br +.Pf \. Sx \&Fa Oo Cm argtype Oc Cm argname +.br +\.\.\. +.br +.Pf \. Sx \&Fc +.Ed +.Pp +A +.Sx \&Fo +scope is closed by +.Pp +See also +.Sx MANUAL STRUCTURE , +.Sx \&Fa , +.Sx \&Fc , +and +.Ss \&Ft +A function type. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Ft Cm functype +.Pp +Examples: +.D1 \&.Ft int +.Bd -literal -offset indent -compact +\&.Ft functype +\&.Fn funcname +.Ed +.Pp +See also +.Sx MANUAL STRUCTURE , +.Sx \&Fn , +and +.Sx \&Fo . +.Ss \&Fx +Format the FreeBSD version provided as an argument, or a default value +if no argument is provided. +.Pp +Examples: +.D1 \&.Fx 7.1 +.D1 \&.Fx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Nx , +.Sx \&Ox , +and +.Sx \&Ux . +.Ss \&Hf +.Ss \&Ic +.Ss \&In +An +.Qq include +file. +In the +.Em SYNOPSIS +section (only if invoked as the line macro), the first argument is +preceded by +.Qq #include , +the arguments is enclosed in angled braces. +.Pp +Examples: +.D1 \&.In sys/types +.Pp +See also +.Sx MANUAL STRUCTURE . +.Ss \&It +A list item. +The syntax of this macro depends on the list type. +.Pp +Lists +of type +.Fl hang , +.Fl ohang , +.Fl inset , +and +.Fl diag +have the following syntax: +.Pp +.D1 Pf \. Sx \&It Cm args +.Pp +Lists of type +.Fl bullet , +.Fl dash , +.Fl enum , +.Fl hyphen +and +.Fl item +have the following syntax: +.Pp +.D1 Pf \. Sx \&It +.Pp +with subsequent lines interpreted within the scope of the +.Sx \&It +until either a closing +.Sx \&El +or another +.Sx \&It . +.Pp +The +.Fl tag +list has the following syntax: +.Pp +.D1 Pf \. Sx \&It Op Cm args +.Pp +Subsequent lines are interpreted as with +.Fl bullet +and family. +The line arguments correspond to the list's left-hand side; body +arguments correspond to the list's contents. +.Pp +The +.Fl column +list is the most complicated. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&It Op Cm args +.Pp +The +.Cm args +are phrases, a mix of macros and text corresponding to a line column, +delimited by tabs or the special +.Sq \&Ta +pseudo-macro. +Lines subsequent the +.Sx \&It +are interpreted within the scope of the last phrase. +Calling the pseudo-macro +.Sq \&Ta +will open a new phrase scope (this must occur on a macro line to be +interpreted as a macro). Note that the tab phrase delimiter may only be +used within the +.Sx \&It +line itself. +Subsequent this, only the +.Sq \&Ta +pseudo-macro may be used to delimit phrases. +Furthermore, note that quoted sections propagate over tab-delimited +phrases on an +.Sx \&It , +for example, +.Pp +.D1 .It \(dqcol1 ; col2 ;\(dq \&; +.Pp +will preserve the semicolon whitespace except for the last. +.Pp +See also +.Sx \&Bl . +.Ss \&Lb +Specify a library. +The syntax is as follows: +.Pp +.D1 Pf \. Sx \&Lb Cm library +.Pp +The +.Cm library +parameter may be a system library, such as +.Cm libz +or +.Cm libpam , +in which case a small library description is printed next to the linker +invocation; or a custom library, in which case the library name is +printed in quotes. +This is most commonly used in the +.Em SYNOPSIS +section as described in +.Sx MANUAL STRUCTURE . +.Pp +Examples: +.D1 \&.Lb libz +.D1 \&.Lb mdoc +.Ss \&Li +.Ss \&Lk +Format a hyperlink. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Lk Cm uri Op Cm name +.Pp +Examples: +.D1 \&.Lk http://bsd.lv "The BSD.lv Project" +.D1 \&.Lk http://bsd.lv +.Pp +See also +.Sx \&Mt . +.Ss \&Lp +.Ss \&Ms +.Ss \&Mt +Format a +.Qq mailto: +hyperlink. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Mt Cm address +.Pp +Examples: +.D1 \&.Mt discuss@manpages.bsd.lv +.Ss \&Nd +.Ss \&Nm +.Ss \&No +.Ss \&Ns +.Ss \&Nx +Format the NetBSD version provided as an argument, or a default value if +no argument is provided. +.Pp +Examples: +.D1 \&.Nx 5.01 +.D1 \&.Nx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Ox , +and +.Sx \&Ux . +.Ss \&Oc +.Ss \&Oo +.Ss \&Op +.Ss \&Os +Document operating system version. +This is the mandatory third macro of +any +.Nm +file. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Os Op Cm system +.Pp +The optional +.Cm system +parameter specifies the relevant operating system or environment. +Left unspecified, it defaults to the local operating system version. +This is the suggested form. +.Pp +Examples: +.D1 \&.Os +.D1 \&.Os KTH/CSC/TCS +.D1 \&.Os BSD 4.3 +.Pp +See also +.Sx \&Dd +and +.Sx \&Dt . +.Ss \&Ot +Unknown usage. +.Pp +.Em Remarks : +this macro has been deprecated. +.Ss \&Ox +Format the OpenBSD version provided as an argument, or a default value +if no argument is provided. +.Pp +Examples: +.D1 \&.Ox 4.5 +.D1 \&.Ox +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +and +.Sx \&Ux . +.Ss \&Pa +.Ss \&Pc +.Ss \&Pf +.Ss \&Po +.Ss \&Pp +.Ss \&Pq +.Ss \&Qc +.Ss \&Ql +.Ss \&Qo +.Ss \&Qq +.Ss \&Re +Closes a +.Sx \&Rs +block. +Does not have any tail arguments. +.Ss \&Rs +Begins a bibliographic +.Pq Dq reference +block. +Does not have any head arguments. +The block macro may only contain +.Sx \&%A , +.Sx \&%B , +.Sx \&%C , +.Sx \&%D , +.Sx \&%I , +.Sx \&%J , +.Sx \&%N , +.Sx \&%O , +.Sx \&%P , +.Sx \&%Q , +.Sx \&%R , +.Sx \&%T , +.Sx \&%U , +and +.Sx \&%V +child macros (at least one must be specified). +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Rs +\&.%A J. E. Hopcroft +\&.%A J. D. Ullman +\&.%B Introduction to Automata Theory, Languages, and Computation +\&.%I Addison-Wesley +\&.%C Reading, Massachusettes +\&.%D 1979 +\&.Re +.Ed +.Pp +If an +.Sx \&Rs +block is used within a SEE ALSO section, a vertical space is asserted +before the rendered output, else the block continues on the current +line. +.Ss \&Rv +.Ss \&Sc +.Ss \&Sh +.Ss \&Sm +.Ss \&So +.Ss \&Sq +.Ss \&Ss +.Ss \&St +.Ss \&Sx +.Ss \&Sy +.Ss \&Tn +.Ss \&Ud +Prints out +.Dq currently under development. +.Ss \&Ux +Format the UNIX name. +Accepts no argument. +.Pp +Examples: +.D1 \&.Ux +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +and +.Sx \&Ox . +.Ss \&Va +.Ss \&Vt +A variable type. +This is also used for indicating global variables in the +.Em SYNOPSIS +section, in which case a variable name is also specified. +Note that it accepts +.Sx Block partial-implicit +syntax when invoked as the first macro in the +.Em SYNOPSIS +section, else it accepts ordinary +.Sx In-line +syntax. +.Pp +Note that this should not be confused with +.Sx \&Ft , +which is used for function return types. +.Pp +Examples: +.D1 \&.Vt unsigned char +.D1 \&.Vt extern const char * const sys_signame[] \&; +.Pp +See also +.Sx MANUAL STRUCTURE +and +.Sx \&Va . +.Ss \&Xc +Close a scope opened by +.Sx \&Xo . +.Ss \&Xo +Open an extension scope. +This macro originally existed to extend the 9-argument limit of troff; +since this limit has been lifted, the macro has been deprecated. +.Ss \&Xr +Link to another manual +.Pq Qq cross-reference . +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Xr Cm name section +.Pp +The +.Cm name +and +.Cm section +are the name and section of the linked manual. +If +.Cm section +is followed by non-punctuation, an +.Sx \&Ns +is inserted into the token stream. +This behaviour is for compatibility with +.Xr groff 1 . +.Pp +Examples: +.D1 \&.Xr mandoc 1 +.D1 \&.Xr mandoc 1 \&; +.D1 \&.Xr mandoc 1 \&Ns s behaviour +.Ss \&br +.Ss \&sp +.Sh COMPATIBILITY +This section documents compatibility between mandoc and other other +troff implementations, at this time limited to GNU troff +.Pq Qq groff . +The term +.Qq historic groff +refers to groff versions before the +.Pa doc.tmac +file re-write +.Pq somewhere between 1.15 and 1.19 . +.Pp +Heirloom troff, the other significant troff implementation accepting +\-mdoc, is similar to historic groff. +.Pp +.Bl -dash -compact +.It +Old groff fails to assert a newline before +.Sx \&Bd Fl ragged compact . +.It +groff behaves inconsistently when encountering +.Pf non- Sx \&Fa +children of +.Sx \&Fo +regarding spacing between arguments. +In mandoc, this is not the case: each argument is consistently followed +by a single space and the trailing +.Sq \&) +suppresses prior spacing. +.It +groff behaves inconsistently when encountering +.Sx \&Ft +and +.Sx \&Fn +in the +.Em SYNOPSIS : +at times newline(s) are suppressed depending on whether a prior +.Sx \&Fn +has been invoked. +In mandoc, this is not the case. +See +.Sx \&Ft +and +.Sx \&Fn +for the normalised behaviour. +.It +Historic groff does not break before an +.Sx \&Fn +when not invoked as the line macro in the +.Em SYNOPSIS +section. +.It +Historic groff formats the +.Sx \&In +badly: trailing arguments are trashed and +.Em SYNOPSIS +is not specially treated. +.It +groff does not accept the +.Sq \&Ta +pseudo-macro as a line macro. +mandoc does. +.It +The comment syntax +.Sq \e." +is no longer accepted. +.It +In groff, the +.Sx \&Pa +macro does not format its arguments when used in the FILES section under +certain list types. +mandoc does. +.It +Historic groff does not print a dash for empty +.Sx \&Fl +arguments. +mandoc and newer groff implementations do. +.It +groff behaves irregularly when specifying +.Sq \ef +.Sx Text Decoration +within line-macro scopes. +mandoc follows a consistent system. +.It +In mandoc, negative scaling units are truncated to zero; groff would +move to prior lines. +Furthermore, the +.Sq f +scaling unit, while accepted, is rendered as the default unit. +.It +In quoted literals, groff allowed pair-wise double-quotes to produce a +standalone double-quote in formatted output. +This idiosyncratic behaviour is not applicable in mandoc. +.It +Display offsets +.Sx \&Bd +.Fl offset Ar center +and +.Fl offset Ar right +are disregarded in mandoc. +Furthermore, the +.Fl file Ar file +argument is not supported in mandoc. +Lastly, since text is not right-justified in mandoc (or even groff), +.Fl ragged +and +.Fl filled +are aliases, as are +.Fl literal +and +.Fl unfilled . +.It +Historic groff has many un-callable macros. +Most of these (excluding some block-level macros) are now callable. +.It +The vertical bar +.Sq \(ba +made historic groff +.Qq go orbital +but has been a proper delimiter since then. +.It +.Sx \&It Fl nested +is assumed for all lists (it wasn't in historic groff): any list may be +nested and +.Fl enum +lists will restart the sequence only for the sub-list. +.It +Some manuals use +.Sx \&Li +incorrectly by following it with a reserved character and expecting the +delimiter to render. +This is not supported in mandoc. +.It +In groff, the +.Sx \&Cd , +.Sx \&Er , +.Sx \&Ex , +and +.Sx \&Rv +macros were stipulated only to occur in certain manual sections. +mandoc does not have these restrictions. +.It +Newer groff and mandoc print +.Qq AT&T UNIX +prior to unknown arguments of +.Sx \&At ; +older groff did nothing. +.El +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc_char 7 +.Sh AUTHORS +The +.Nm +reference was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.\" +.\" XXX: this really isn't the place for these caveats. +.\" . +.\" . +.\" .Sh CAVEATS +.\" There are many ambiguous parts of mdoc. +.\" . +.\" .Pp +.\" .Bl -dash -compact +.\" .It +.\" .Sq \&Fa +.\" should be +.\" .Sq \&Va +.\" as function arguments are variables. +.\" .It +.\" .Sq \&Ft +.\" should be +.\" .Sq \&Vt +.\" as function return types are still types. Furthermore, the +.\" .Sq \&Ft +.\" should be removed and +.\" .Sq \&Fo , +.\" which ostensibly follows it, should follow the same convention as +.\" .Sq \&Va . +.\" .It +.\" .Sq \&Va +.\" should formalise that only one or two arguments are acceptable: a +.\" variable name and optional, preceding type. +.\" .It +.\" .Sq \&Fd +.\" is ambiguous. It's commonly used to indicate an include file in the +.\" synopsis section. +.\" .Sq \&In +.\" should be used, instead. +.\" .It +.\" Only the +.\" .Sq \-literal +.\" argument to +.\" .Sq \&Bd +.\" makes sense. The remaining ones should be removed. +.\" .It +.\" The +.\" .Sq \&Xo +.\" and +.\" .Sq \&Xc +.\" macros should be deprecated. +.\" .It +.\" The +.\" .Sq \&Dt +.\" macro lacks clarity. It should be absolutely clear which title will +.\" render when formatting the manual page. +.\" .It +.\" A +.\" .Sq \&Lx +.\" should be provided for Linux (\(`a la +.\" .Sq \&Ox , +.\" .Sq \&Nx +.\" etc.). +.\" .It +.\" There's no way to refer to references in +.\" .Sq \&Rs/Re +.\" blocks. +.\" .It +.\" The \-split and \-nosplit dictates via +.\" .Sq \&An +.\" are re-set when entering and leaving the AUTHORS section. +.\" .El +.\" . diff --git a/commands/mdocml/mdoc.c b/commands/mdocml/mdoc.c new file mode 100644 index 000000000..79624ae83 --- /dev/null +++ b/commands/mdocml/mdoc.c @@ -0,0 +1,822 @@ +/* $Id: mdoc.c,v 1.146 2010/06/12 11:58:22 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +const char *const __mdoc_macronames[MDOC_MAX] = { + "Ap", "Dd", "Dt", "Os", + "Sh", "Ss", "Pp", "D1", + "Dl", "Bd", "Ed", "Bl", + "El", "It", "Ad", "An", + "Ar", "Cd", "Cm", "Dv", + "Er", "Ev", "Ex", "Fa", + "Fd", "Fl", "Fn", "Ft", + "Ic", "In", "Li", "Nd", + "Nm", "Op", "Ot", "Pa", + "Rv", "St", "Va", "Vt", + /* LINTED */ + "Xr", "%A", "%B", "%D", + /* LINTED */ + "%I", "%J", "%N", "%O", + /* LINTED */ + "%P", "%R", "%T", "%V", + "Ac", "Ao", "Aq", "At", + "Bc", "Bf", "Bo", "Bq", + "Bsx", "Bx", "Db", "Dc", + "Do", "Dq", "Ec", "Ef", + "Em", "Eo", "Fx", "Ms", + "No", "Ns", "Nx", "Ox", + "Pc", "Pf", "Po", "Pq", + "Qc", "Ql", "Qo", "Qq", + "Re", "Rs", "Sc", "So", + "Sq", "Sm", "Sx", "Sy", + "Tn", "Ux", "Xc", "Xo", + "Fo", "Fc", "Oo", "Oc", + "Bk", "Ek", "Bt", "Hf", + "Fr", "Ud", "Lb", "Lp", + "Lk", "Mt", "Brq", "Bro", + /* LINTED */ + "Brc", "%C", "Es", "En", + /* LINTED */ + "Dx", "%Q", "br", "sp", + /* LINTED */ + "%U", "Ta" + }; + +const char *const __mdoc_argnames[MDOC_ARG_MAX] = { + "split", "nosplit", "ragged", + "unfilled", "literal", "file", + "offset", "bullet", "dash", + "hyphen", "item", "enum", + "tag", "diag", "hang", + "ohang", "inset", "column", + "width", "compact", "std", + "filled", "words", "emphasis", + "symbolic", "nested", "centered" + }; + +const char * const *mdoc_macronames = __mdoc_macronames; +const char * const *mdoc_argnames = __mdoc_argnames; + +static void mdoc_node_free(struct mdoc_node *); +static void mdoc_node_unlink(struct mdoc *, + struct mdoc_node *); +static void mdoc_free1(struct mdoc *); +static void mdoc_alloc1(struct mdoc *); +static struct mdoc_node *node_alloc(struct mdoc *, int, int, + enum mdoct, enum mdoc_type); +static int node_append(struct mdoc *, + struct mdoc_node *); +static int mdoc_ptext(struct mdoc *, int, char *, int); +static int mdoc_pmacro(struct mdoc *, int, char *, int); +static int macrowarn(struct mdoc *, int, + const char *, int); + + +const struct mdoc_node * +mdoc_node(const struct mdoc *m) +{ + + return(MDOC_HALT & m->flags ? NULL : m->first); +} + + +const struct mdoc_meta * +mdoc_meta(const struct mdoc *m) +{ + + return(MDOC_HALT & m->flags ? NULL : &m->meta); +} + + +/* + * Frees volatile resources (parse tree, meta-data, fields). + */ +static void +mdoc_free1(struct mdoc *mdoc) +{ + + if (mdoc->first) + mdoc_node_delete(mdoc, mdoc->first); + if (mdoc->meta.title) + free(mdoc->meta.title); + if (mdoc->meta.os) + free(mdoc->meta.os); + if (mdoc->meta.name) + free(mdoc->meta.name); + if (mdoc->meta.arch) + free(mdoc->meta.arch); + if (mdoc->meta.vol) + free(mdoc->meta.vol); + if (mdoc->meta.msec) + free(mdoc->meta.msec); +} + + +/* + * Allocate all volatile resources (parse tree, meta-data, fields). + */ +static void +mdoc_alloc1(struct mdoc *mdoc) +{ + + memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); + mdoc->flags = 0; + mdoc->lastnamed = mdoc->lastsec = SEC_NONE; + mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); + mdoc->first = mdoc->last; + mdoc->last->type = MDOC_ROOT; + mdoc->next = MDOC_NEXT_CHILD; +} + + +/* + * Free up volatile resources (see mdoc_free1()) then re-initialises the + * data with mdoc_alloc1(). After invocation, parse data has been reset + * and the parser is ready for re-invocation on a new tree; however, + * cross-parse non-volatile data is kept intact. + */ +void +mdoc_reset(struct mdoc *mdoc) +{ + + mdoc_free1(mdoc); + mdoc_alloc1(mdoc); +} + + +/* + * Completely free up all volatile and non-volatile parse resources. + * After invocation, the pointer is no longer usable. + */ +void +mdoc_free(struct mdoc *mdoc) +{ + + mdoc_free1(mdoc); + free(mdoc); +} + + +/* + * Allocate volatile and non-volatile parse resources. + */ +struct mdoc * +mdoc_alloc(void *data, int pflags, mandocmsg msg) +{ + struct mdoc *p; + + p = mandoc_calloc(1, sizeof(struct mdoc)); + + p->msg = msg; + p->data = data; + p->pflags = pflags; + + mdoc_hash_init(); + mdoc_alloc1(p); + return(p); +} + + +/* + * Climb back up the parse tree, validating open scopes. Mostly calls + * through to macro_end() in macro.c. + */ +int +mdoc_endparse(struct mdoc *m) +{ + + if (MDOC_HALT & m->flags) + return(0); + else if (mdoc_macroend(m)) + return(1); + m->flags |= MDOC_HALT; + return(0); +} + + +/* + * Main parse routine. Parses a single line -- really just hands off to + * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). + */ +int +mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) +{ + + if (MDOC_HALT & m->flags) + return(0); + + m->flags |= MDOC_NEWLINE; + return(('.' == buf[offs] || '\'' == buf[offs]) ? + mdoc_pmacro(m, ln, buf, offs) : + mdoc_ptext(m, ln, buf, offs)); +} + + +int +mdoc_vmsg(struct mdoc *mdoc, enum mandocerr t, + int ln, int pos, const char *fmt, ...) +{ + char buf[256]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf) - 1, fmt, ap); + va_end(ap); + + return((*mdoc->msg)(t, mdoc->data, ln, pos, buf)); +} + + +int +mdoc_macro(struct mdoc *m, enum mdoct tok, + int ln, int pp, int *pos, char *buf) +{ + assert(tok < MDOC_MAX); + + /* If we're in the body, deny prologue calls. */ + + if (MDOC_PROLOGUE & mdoc_macros[tok].flags && + MDOC_PBODY & m->flags) + return(mdoc_pmsg(m, ln, pp, MANDOCERR_BADBODY)); + + /* If we're in the prologue, deny "body" macros. */ + + if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && + ! (MDOC_PBODY & m->flags)) { + if ( ! mdoc_pmsg(m, ln, pp, MANDOCERR_BADPROLOG)) + return(0); + if (NULL == m->meta.title) + m->meta.title = mandoc_strdup("UNKNOWN"); + if (NULL == m->meta.vol) + m->meta.vol = mandoc_strdup("LOCAL"); + if (NULL == m->meta.os) + m->meta.os = mandoc_strdup("LOCAL"); + if (0 == m->meta.date) + m->meta.date = time(NULL); + m->flags |= MDOC_PBODY; + } + + return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); +} + + +static int +node_append(struct mdoc *mdoc, struct mdoc_node *p) +{ + + assert(mdoc->last); + assert(mdoc->first); + assert(MDOC_ROOT != p->type); + + switch (mdoc->next) { + case (MDOC_NEXT_SIBLING): + mdoc->last->next = p; + p->prev = mdoc->last; + p->parent = mdoc->last->parent; + break; + case (MDOC_NEXT_CHILD): + mdoc->last->child = p; + p->parent = mdoc->last; + break; + default: + abort(); + /* NOTREACHED */ + } + + p->parent->nchild++; + + if ( ! mdoc_valid_pre(mdoc, p)) + return(0); + if ( ! mdoc_action_pre(mdoc, p)) + return(0); + + switch (p->type) { + case (MDOC_HEAD): + assert(MDOC_BLOCK == p->parent->type); + p->parent->head = p; + break; + case (MDOC_TAIL): + assert(MDOC_BLOCK == p->parent->type); + p->parent->tail = p; + break; + case (MDOC_BODY): + assert(MDOC_BLOCK == p->parent->type); + p->parent->body = p; + break; + default: + break; + } + + mdoc->last = p; + + switch (p->type) { + case (MDOC_TEXT): + if ( ! mdoc_valid_post(mdoc)) + return(0); + if ( ! mdoc_action_post(mdoc)) + return(0); + break; + default: + break; + } + + return(1); +} + + +static struct mdoc_node * +node_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, enum mdoc_type type) +{ + struct mdoc_node *p; + + p = mandoc_calloc(1, sizeof(struct mdoc_node)); + p->sec = m->lastsec; + p->line = line; + p->pos = pos; + p->tok = tok; + p->type = type; + if (MDOC_NEWLINE & m->flags) + p->flags |= MDOC_LINE; + m->flags &= ~MDOC_NEWLINE; + return(p); +} + + +int +mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_TAIL); + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) +{ + struct mdoc_node *p; + + assert(m->first); + assert(m->last); + + p = node_alloc(m, line, pos, tok, MDOC_HEAD); + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_BODY); + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_block_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, struct mdoc_arg *args) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_BLOCK); + p->args = args; + if (p->args) + (args->refcnt)++; + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_elem_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, struct mdoc_arg *args) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_ELEM); + p->args = args; + if (p->args) + (args->refcnt)++; + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) +{ + struct mdoc_node *n; + size_t sv, len; + + len = strlen(p); + + n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT); + n->string = mandoc_malloc(len + 1); + sv = strlcpy(n->string, p, len + 1); + + /* Prohibit truncation. */ + assert(sv < len + 1); + + if ( ! node_append(m, n)) + return(0); + + m->next = MDOC_NEXT_SIBLING; + return(1); +} + + +static void +mdoc_node_free(struct mdoc_node *p) +{ + + if (p->string) + free(p->string); + if (p->args) + mdoc_argv_free(p->args); + free(p); +} + + +static void +mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n) +{ + + /* Adjust siblings. */ + + if (n->prev) + n->prev->next = n->next; + if (n->next) + n->next->prev = n->prev; + + /* Adjust parent. */ + + if (n->parent) { + n->parent->nchild--; + if (n->parent->child == n) + n->parent->child = n->prev ? n->prev : n->next; + } + + /* Adjust parse point, if applicable. */ + + if (m && m->last == n) { + if (n->prev) { + m->last = n->prev; + m->next = MDOC_NEXT_SIBLING; + } else { + m->last = n->parent; + m->next = MDOC_NEXT_CHILD; + } + } + + if (m && m->first == n) + m->first = NULL; +} + + +void +mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) +{ + + while (p->child) { + assert(p->nchild); + mdoc_node_delete(m, p->child); + } + assert(0 == p->nchild); + + mdoc_node_unlink(m, p); + mdoc_node_free(p); +} + + +/* + * Parse free-form text, that is, a line that does not begin with the + * control character. + */ +static int +mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) +{ + char *c, *ws, *end; + struct mdoc_node *n; + + /* Ignore bogus comments. */ + + if ('\\' == buf[offs] && + '.' == buf[offs + 1] && + '"' == buf[offs + 2]) + return(mdoc_pmsg(m, line, offs, MANDOCERR_BADCOMMENT)); + + /* No text before an initial macro. */ + + if (SEC_NONE == m->lastnamed) + return(mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT)); + + assert(m->last); + n = m->last; + + /* + * Divert directly to list processing if we're encountering a + * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry + * (a MDOC_BODY means it's already open, in which case we should + * process within its context in the normal way). + */ + + if (MDOC_Bl == n->tok && MDOC_BODY == n->type && + LIST_column == n->data.Bl.type) { + /* `Bl' is open without any children. */ + m->flags |= MDOC_FREECOL; + return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); + } + + if (MDOC_It == n->tok && MDOC_BLOCK == n->type && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->data.Bl.type) { + /* `Bl' has block-level `It' children. */ + m->flags |= MDOC_FREECOL; + return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); + } + + /* + * Search for the beginning of unescaped trailing whitespace (ws) + * and for the first character not to be output (end). + */ + + /* FIXME: replace with strcspn(). */ + ws = NULL; + for (c = end = buf + offs; *c; c++) { + switch (*c) { + case '-': + if (mandoc_hyph(buf + offs, c)) + *c = ASCII_HYPH; + ws = NULL; + break; + case ' ': + if (NULL == ws) + ws = c; + continue; + case '\t': + /* + * Always warn about trailing tabs, + * even outside literal context, + * where they should be put on the next line. + */ + if (NULL == ws) + ws = c; + /* + * Strip trailing tabs in literal context only; + * outside, they affect the next line. + */ + if (MDOC_LITERAL & m->flags) + continue; + break; + case '\\': + /* Skip the escaped character, too, if any. */ + if (c[1]) + c++; + /* FALLTHROUGH */ + default: + ws = NULL; + break; + } + end = c + 1; + } + *end = '\0'; + + if (ws) + if ( ! mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE)) + return(0); + + if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { + if ( ! mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN)) + return(0); + + /* + * Insert a `Pp' in the case of a blank line. Technically, + * blank lines aren't allowed, but enough manuals assume this + * behaviour that we want to work around it. + */ + if ( ! mdoc_elem_alloc(m, line, offs, MDOC_Pp, NULL)) + return(0); + + m->next = MDOC_NEXT_SIBLING; + return(1); + } + + if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) + return(0); + + if (MDOC_LITERAL & m->flags) + return(1); + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(buf < end); + + if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) + m->last->flags |= MDOC_EOS; + + return(1); +} + + +static int +macrowarn(struct mdoc *m, int ln, const char *buf, int offs) +{ + int rc; + + rc = mdoc_vmsg(m, MANDOCERR_MACRO, ln, offs, + "unknown macro: %s%s", + buf, strlen(buf) > 3 ? "..." : ""); + + /* FIXME: logic should be in driver. */ + /* FIXME: broken, will error out and not omit a message. */ + return(MDOC_IGN_MACRO & m->pflags ? rc : 0); +} + + +/* + * Parse a macro line, that is, a line beginning with the control + * character. + */ +static int +mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) +{ + enum mdoct tok; + int i, j, sv; + char mac[5]; + struct mdoc_node *n; + + /* Empty lines are ignored. */ + + offs++; + + if ('\0' == buf[offs]) + return(1); + + i = offs; + + /* Accept whitespace after the initial control char. */ + + if (' ' == buf[i]) { + i++; + while (buf[i] && ' ' == buf[i]) + i++; + if ('\0' == buf[i]) + return(1); + } + + sv = i; + + /* Copy the first word into a nil-terminated buffer. */ + + for (j = 0; j < 4; j++, i++) { + if ('\0' == (mac[j] = buf[i])) + break; + else if (' ' == buf[i]) + break; + + /* Check for invalid characters. */ + + if (isgraph((u_char)buf[i])) + continue; + if ( ! mdoc_pmsg(m, ln, i, MANDOCERR_BADCHAR)) + return(0); + i--; + } + + mac[j] = '\0'; + + if (j == 4 || j < 2) { + if ( ! macrowarn(m, ln, mac, sv)) + goto err; + return(1); + } + + if (MDOC_MAX == (tok = mdoc_hash_find(mac))) { + if ( ! macrowarn(m, ln, mac, sv)) + goto err; + return(1); + } + + /* The macro is sane. Jump to the next word. */ + + while (buf[i] && ' ' == buf[i]) + i++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if ('\0' == buf[i] && ' ' == buf[i - 1]) + if ( ! mdoc_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE)) + goto err; + + /* + * If an initial macro or a list invocation, divert directly + * into macro processing. + */ + + if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) { + if ( ! mdoc_macro(m, tok, ln, sv, &i, buf)) + goto err; + return(1); + } + + n = m->last; + assert(m->last); + + /* + * If the first macro of a `Bl -column', open an `It' block + * context around the parsed macro. + */ + + if (MDOC_Bl == n->tok && MDOC_BODY == n->type && + LIST_column == n->data.Bl.type) { + m->flags |= MDOC_FREECOL; + if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) + goto err; + return(1); + } + + /* + * If we're following a block-level `It' within a `Bl -column' + * context (perhaps opened in the above block or in ptext()), + * then open an `It' block context around the parsed macro. + */ + + if (MDOC_It == n->tok && MDOC_BLOCK == n->type && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->data.Bl.type) { + m->flags |= MDOC_FREECOL; + if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) + goto err; + return(1); + } + + /* Normal processing of a macro. */ + + if ( ! mdoc_macro(m, tok, ln, sv, &i, buf)) + goto err; + + return(1); + +err: /* Error out. */ + + m->flags |= MDOC_HALT; + return(0); +} + + diff --git a/commands/mdocml/mdoc.h b/commands/mdocml/mdoc.h new file mode 100644 index 000000000..7a84ab16c --- /dev/null +++ b/commands/mdocml/mdoc.h @@ -0,0 +1,345 @@ +/* $Id: mdoc.h,v 1.90 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MDOC_H +#define MDOC_H + +/* + * This library implements a validating scanner/parser for ``mdoc'' roff + * macro documents, a.k.a. BSD manual page documents. The mdoc.c file + * drives the parser, while macro.c describes the macro ontologies. + * validate.c pre- and post-validates parsed macros, and action.c + * performs actions on parsed and validated macros. + */ + +/* What follows is a list of ALL possible macros. */ + +enum mdoct { + MDOC_Ap = 0, + MDOC_Dd, + MDOC_Dt, + MDOC_Os, + MDOC_Sh, + MDOC_Ss, + MDOC_Pp, + MDOC_D1, + MDOC_Dl, + MDOC_Bd, + MDOC_Ed, + MDOC_Bl, + MDOC_El, + MDOC_It, + MDOC_Ad, + MDOC_An, + MDOC_Ar, + MDOC_Cd, + MDOC_Cm, + MDOC_Dv, + MDOC_Er, + MDOC_Ev, + MDOC_Ex, + MDOC_Fa, + MDOC_Fd, + MDOC_Fl, + MDOC_Fn, + MDOC_Ft, + MDOC_Ic, + MDOC_In, + MDOC_Li, + MDOC_Nd, + MDOC_Nm, + MDOC_Op, + MDOC_Ot, + MDOC_Pa, + MDOC_Rv, + MDOC_St, + MDOC_Va, + MDOC_Vt, + MDOC_Xr, + MDOC__A, + MDOC__B, + MDOC__D, + MDOC__I, + MDOC__J, + MDOC__N, + MDOC__O, + MDOC__P, + MDOC__R, + MDOC__T, + MDOC__V, + MDOC_Ac, + MDOC_Ao, + MDOC_Aq, + MDOC_At, + MDOC_Bc, + MDOC_Bf, + MDOC_Bo, + MDOC_Bq, + MDOC_Bsx, + MDOC_Bx, + MDOC_Db, + MDOC_Dc, + MDOC_Do, + MDOC_Dq, + MDOC_Ec, + MDOC_Ef, + MDOC_Em, + MDOC_Eo, + MDOC_Fx, + MDOC_Ms, + MDOC_No, + MDOC_Ns, + MDOC_Nx, + MDOC_Ox, + MDOC_Pc, + MDOC_Pf, + MDOC_Po, + MDOC_Pq, + MDOC_Qc, + MDOC_Ql, + MDOC_Qo, + MDOC_Qq, + MDOC_Re, + MDOC_Rs, + MDOC_Sc, + MDOC_So, + MDOC_Sq, + MDOC_Sm, + MDOC_Sx, + MDOC_Sy, + MDOC_Tn, + MDOC_Ux, + MDOC_Xc, + MDOC_Xo, + MDOC_Fo, + MDOC_Fc, + MDOC_Oo, + MDOC_Oc, + MDOC_Bk, + MDOC_Ek, + MDOC_Bt, + MDOC_Hf, + MDOC_Fr, + MDOC_Ud, + MDOC_Lb, + MDOC_Lp, + MDOC_Lk, + MDOC_Mt, + MDOC_Brq, + MDOC_Bro, + MDOC_Brc, + MDOC__C, + MDOC_Es, + MDOC_En, + MDOC_Dx, + MDOC__Q, + MDOC_br, + MDOC_sp, + MDOC__U, + MDOC_Ta, + MDOC_MAX +}; + +/* What follows is a list of ALL possible macro arguments. */ + +#define MDOC_Split 0 +#define MDOC_Nosplit 1 +#define MDOC_Ragged 2 +#define MDOC_Unfilled 3 +#define MDOC_Literal 4 +#define MDOC_File 5 +#define MDOC_Offset 6 +#define MDOC_Bullet 7 +#define MDOC_Dash 8 +#define MDOC_Hyphen 9 +#define MDOC_Item 10 +#define MDOC_Enum 11 +#define MDOC_Tag 12 +#define MDOC_Diag 13 +#define MDOC_Hang 14 +#define MDOC_Ohang 15 +#define MDOC_Inset 16 +#define MDOC_Column 17 +#define MDOC_Width 18 +#define MDOC_Compact 19 +#define MDOC_Std 20 +#define MDOC_Filled 21 +#define MDOC_Words 22 +#define MDOC_Emphasis 23 +#define MDOC_Symbolic 24 +#define MDOC_Nested 25 +#define MDOC_Centred 26 +#define MDOC_ARG_MAX 27 + +/* Type of a syntax node. */ +enum mdoc_type { + MDOC_TEXT, + MDOC_ELEM, + MDOC_HEAD, + MDOC_TAIL, + MDOC_BODY, + MDOC_BLOCK, + MDOC_ROOT +}; + +/* Section (named/unnamed) of `Sh'. */ +enum mdoc_sec { + SEC_NONE, /* No section, yet. */ + SEC_NAME, + SEC_LIBRARY, + SEC_SYNOPSIS, + SEC_DESCRIPTION, + SEC_IMPLEMENTATION, + SEC_RETURN_VALUES, + SEC_ENVIRONMENT, + SEC_FILES, + SEC_EXIT_STATUS, + SEC_EXAMPLES, + SEC_DIAGNOSTICS, + SEC_COMPATIBILITY, + SEC_ERRORS, + SEC_SEE_ALSO, + SEC_STANDARDS, + SEC_HISTORY, + SEC_AUTHORS, + SEC_CAVEATS, + SEC_BUGS, + SEC_SECURITY, + SEC_CUSTOM, /* User-defined. */ + SEC__MAX +}; + +/* Information from prologue. */ +struct mdoc_meta { + char *msec; + char *vol; + char *arch; + time_t date; + char *title; + char *os; + char *name; +}; + +/* An argument to a macro (multiple values = `It -column'). */ +struct mdoc_argv { + int arg; + int line; + int pos; + size_t sz; + char **value; +}; + +struct mdoc_arg { + size_t argc; + struct mdoc_argv *argv; + unsigned int refcnt; +}; + +enum mdoc_list { + LIST__NONE = 0, + LIST_bullet, + LIST_column, + LIST_dash, + LIST_diag, + LIST_enum, + LIST_hang, + LIST_hyphen, + LIST_inset, + LIST_item, + LIST_ohang, + LIST_tag +}; + +enum mdoc_disp { + DISP__NONE = 0, + DISP_centred, + DISP_ragged, + DISP_unfilled, + DISP_filled, + DISP_literal +}; + +struct mdoc_bd { + const char *offs; /* -offset */ + enum mdoc_disp type; /* -ragged, etc. */ + int comp; /* -compact */ +}; + +struct mdoc_bl { + const char *width; /* -width */ + const char *offs; /* -offset */ + enum mdoc_list type; /* -tag, -enum, etc. */ + int comp; /* -compact */ +}; + +/* Node in AST. */ +struct mdoc_node { + struct mdoc_node *parent; /* parent AST node */ + struct mdoc_node *child; /* first child AST node */ + struct mdoc_node *next; /* sibling AST node */ + struct mdoc_node *prev; /* prior sibling AST node */ + int nchild; /* number children */ + int line; /* parse line */ + int pos; /* parse column */ + enum mdoct tok; /* tok or MDOC__MAX if none */ + int flags; +#define MDOC_VALID (1 << 0) /* has been validated */ +#define MDOC_ACTED (1 << 1) /* has been acted upon */ +#define MDOC_EOS (1 << 2) /* at sentence boundary */ +#define MDOC_LINE (1 << 3) /* first macro/text on line */ + enum mdoc_type type; /* AST node type */ + enum mdoc_sec sec; /* current named section */ + struct mdoc_arg *args; /* BLOCK/ELEM */ +#ifdef UGLY + struct mdoc_node *pending; /* BLOCK */ +#endif + struct mdoc_node *head; /* BLOCK */ + struct mdoc_node *body; /* BLOCK */ + struct mdoc_node *tail; /* BLOCK */ + char *string; /* TEXT */ + + union { + struct mdoc_bl Bl; + struct mdoc_bd Bd; + } data; +}; + +#define MDOC_IGN_SCOPE (1 << 0) /* Ignore scope violations. */ +#define MDOC_IGN_ESCAPE (1 << 1) /* Ignore bad escape sequences. */ +#define MDOC_IGN_MACRO (1 << 2) /* Ignore unknown macros. */ + +/* See mdoc.3 for documentation. */ + +extern const char *const *mdoc_macronames; +extern const char *const *mdoc_argnames; + +__BEGIN_DECLS + +struct mdoc; + +/* See mdoc.3 for documentation. */ + +void mdoc_free(struct mdoc *); +struct mdoc *mdoc_alloc(void *, int, mandocmsg); +void mdoc_reset(struct mdoc *); +int mdoc_parseln(struct mdoc *, int, char *, int); +const struct mdoc_node *mdoc_node(const struct mdoc *); +const struct mdoc_meta *mdoc_meta(const struct mdoc *); +int mdoc_endparse(struct mdoc *); + +__END_DECLS + +#endif /*!MDOC_H*/ diff --git a/commands/mdocml/mdoc_action.c b/commands/mdocml/mdoc_action.c new file mode 100644 index 000000000..3cc87e418 --- /dev/null +++ b/commands/mdocml/mdoc_action.c @@ -0,0 +1,1034 @@ +/* $Id: mdoc_action.c,v 1.71 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifndef OSNAME +#include +#endif + +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +#define POST_ARGS struct mdoc *m, struct mdoc_node *n +#define PRE_ARGS struct mdoc *m, struct mdoc_node *n + +#define NUMSIZ 32 +#define DATESIZ 32 + +struct actions { + int (*pre)(PRE_ARGS); + int (*post)(POST_ARGS); +}; + +static int concat(struct mdoc *, char *, + const struct mdoc_node *, size_t); +static inline int order_rs(enum mdoct); + +static int post_ar(POST_ARGS); +static int post_at(POST_ARGS); +static int post_bl(POST_ARGS); +static int post_bl_head(POST_ARGS); +static int post_bl_tagwidth(POST_ARGS); +static int post_bl_width(POST_ARGS); +static int post_dd(POST_ARGS); +static int post_display(POST_ARGS); +static int post_dt(POST_ARGS); +static int post_lb(POST_ARGS); +static int post_li(POST_ARGS); +static int post_nm(POST_ARGS); +static int post_os(POST_ARGS); +static int post_pa(POST_ARGS); +static int post_prol(POST_ARGS); +static int post_rs(POST_ARGS); +static int post_sh(POST_ARGS); +static int post_st(POST_ARGS); +static int post_std(POST_ARGS); + +static int pre_bd(PRE_ARGS); +static int pre_dl(PRE_ARGS); + +static const struct actions mdoc_actions[MDOC_MAX] = { + { NULL, NULL }, /* Ap */ + { NULL, post_dd }, /* Dd */ + { NULL, post_dt }, /* Dt */ + { NULL, post_os }, /* Os */ + { NULL, post_sh }, /* Sh */ + { NULL, NULL }, /* Ss */ + { NULL, NULL }, /* Pp */ + { NULL, NULL }, /* D1 */ + { pre_dl, post_display }, /* Dl */ + { pre_bd, post_display }, /* Bd */ + { NULL, NULL }, /* Ed */ + { NULL, post_bl }, /* Bl */ + { NULL, NULL }, /* El */ + { NULL, NULL }, /* It */ + { NULL, NULL }, /* Ad */ + { NULL, NULL }, /* An */ + { NULL, post_ar }, /* Ar */ + { NULL, NULL }, /* Cd */ + { NULL, NULL }, /* Cm */ + { NULL, NULL }, /* Dv */ + { NULL, NULL }, /* Er */ + { NULL, NULL }, /* Ev */ + { NULL, post_std }, /* Ex */ + { NULL, NULL }, /* Fa */ + { NULL, NULL }, /* Fd */ + { NULL, NULL }, /* Fl */ + { NULL, NULL }, /* Fn */ + { NULL, NULL }, /* Ft */ + { NULL, NULL }, /* Ic */ + { NULL, NULL }, /* In */ + { NULL, post_li }, /* Li */ + { NULL, NULL }, /* Nd */ + { NULL, post_nm }, /* Nm */ + { NULL, NULL }, /* Op */ + { NULL, NULL }, /* Ot */ + { NULL, post_pa }, /* Pa */ + { NULL, post_std }, /* Rv */ + { NULL, post_st }, /* St */ + { NULL, NULL }, /* Va */ + { NULL, NULL }, /* Vt */ + { NULL, NULL }, /* Xr */ + { NULL, NULL }, /* %A */ + { NULL, NULL }, /* %B */ + { NULL, NULL }, /* %D */ + { NULL, NULL }, /* %I */ + { NULL, NULL }, /* %J */ + { NULL, NULL }, /* %N */ + { NULL, NULL }, /* %O */ + { NULL, NULL }, /* %P */ + { NULL, NULL }, /* %R */ + { NULL, NULL }, /* %T */ + { NULL, NULL }, /* %V */ + { NULL, NULL }, /* Ac */ + { NULL, NULL }, /* Ao */ + { NULL, NULL }, /* Aq */ + { NULL, post_at }, /* At */ + { NULL, NULL }, /* Bc */ + { NULL, NULL }, /* Bf */ + { NULL, NULL }, /* Bo */ + { NULL, NULL }, /* Bq */ + { NULL, NULL }, /* Bsx */ + { NULL, NULL }, /* Bx */ + { NULL, NULL }, /* Db */ + { NULL, NULL }, /* Dc */ + { NULL, NULL }, /* Do */ + { NULL, NULL }, /* Dq */ + { NULL, NULL }, /* Ec */ + { NULL, NULL }, /* Ef */ + { NULL, NULL }, /* Em */ + { NULL, NULL }, /* Eo */ + { NULL, NULL }, /* Fx */ + { NULL, NULL }, /* Ms */ + { NULL, NULL }, /* No */ + { NULL, NULL }, /* Ns */ + { NULL, NULL }, /* Nx */ + { NULL, NULL }, /* Ox */ + { NULL, NULL }, /* Pc */ + { NULL, NULL }, /* Pf */ + { NULL, NULL }, /* Po */ + { NULL, NULL }, /* Pq */ + { NULL, NULL }, /* Qc */ + { NULL, NULL }, /* Ql */ + { NULL, NULL }, /* Qo */ + { NULL, NULL }, /* Qq */ + { NULL, NULL }, /* Re */ + { NULL, post_rs }, /* Rs */ + { NULL, NULL }, /* Sc */ + { NULL, NULL }, /* So */ + { NULL, NULL }, /* Sq */ + { NULL, NULL }, /* Sm */ + { NULL, NULL }, /* Sx */ + { NULL, NULL }, /* Sy */ + { NULL, NULL }, /* Tn */ + { NULL, NULL }, /* Ux */ + { NULL, NULL }, /* Xc */ + { NULL, NULL }, /* Xo */ + { NULL, NULL }, /* Fo */ + { NULL, NULL }, /* Fc */ + { NULL, NULL }, /* Oo */ + { NULL, NULL }, /* Oc */ + { NULL, NULL }, /* Bk */ + { NULL, NULL }, /* Ek */ + { NULL, NULL }, /* Bt */ + { NULL, NULL }, /* Hf */ + { NULL, NULL }, /* Fr */ + { NULL, NULL }, /* Ud */ + { NULL, post_lb }, /* Lb */ + { NULL, NULL }, /* Lp */ + { NULL, NULL }, /* Lk */ + { NULL, NULL }, /* Mt */ + { NULL, NULL }, /* Brq */ + { NULL, NULL }, /* Bro */ + { NULL, NULL }, /* Brc */ + { NULL, NULL }, /* %C */ + { NULL, NULL }, /* Es */ + { NULL, NULL }, /* En */ + { NULL, NULL }, /* Dx */ + { NULL, NULL }, /* %Q */ + { NULL, NULL }, /* br */ + { NULL, NULL }, /* sp */ + { NULL, NULL }, /* %U */ + { NULL, NULL }, /* Ta */ +}; + +#define RSORD_MAX 14 + +static const enum mdoct rsord[RSORD_MAX] = { + MDOC__A, + MDOC__T, + MDOC__B, + MDOC__I, + MDOC__J, + MDOC__R, + MDOC__N, + MDOC__V, + MDOC__P, + MDOC__Q, + MDOC__D, + MDOC__O, + MDOC__C, + MDOC__U +}; + + +int +mdoc_action_pre(struct mdoc *m, struct mdoc_node *n) +{ + + switch (n->type) { + case (MDOC_ROOT): + /* FALLTHROUGH */ + case (MDOC_TEXT): + return(1); + default: + break; + } + + if (NULL == mdoc_actions[n->tok].pre) + return(1); + return((*mdoc_actions[n->tok].pre)(m, n)); +} + + +int +mdoc_action_post(struct mdoc *m) +{ + + if (MDOC_ACTED & m->last->flags) + return(1); + m->last->flags |= MDOC_ACTED; + + switch (m->last->type) { + case (MDOC_TEXT): + /* FALLTHROUGH */ + case (MDOC_ROOT): + return(1); + default: + break; + } + + if (NULL == mdoc_actions[m->last->tok].post) + return(1); + return((*mdoc_actions[m->last->tok].post)(m, m->last)); +} + + +/* + * Concatenate sibling nodes together. All siblings must be of type + * MDOC_TEXT or an assertion is raised. Concatenation is separated by a + * single whitespace. + */ +static int +concat(struct mdoc *m, char *p, const struct mdoc_node *n, size_t sz) +{ + + assert(sz); + p[0] = '\0'; + for ( ; n; n = n->next) { + assert(MDOC_TEXT == n->type); + /* + * XXX: yes, these can technically be resized, but it's + * highly unlikely that we're going to get here, so let + * it slip for now. + */ + if (strlcat(p, n->string, sz) >= sz) { + mdoc_nmsg(m, n, MANDOCERR_MEM); + return(0); + } + if (NULL == n->next) + continue; + if (strlcat(p, " ", sz) >= sz) { + mdoc_nmsg(m, n, MANDOCERR_MEM); + return(0); + } + } + + return(1); +} + + +/* + * Macros accepting `-std' as an argument have the name of the current + * document (`Nm') filled in as the argument if it's not provided. + */ +static int +post_std(POST_ARGS) +{ + struct mdoc_node *nn; + + if (n->child) + return(1); + if (NULL == m->meta.name) + return(1); + + nn = n; + m->next = MDOC_NEXT_CHILD; + + if ( ! mdoc_word_alloc(m, n->line, n->pos, m->meta.name)) + return(0); + m->last = nn; + return(1); +} + + +/* + * The `Nm' macro's first use sets the name of the document. See also + * post_std(), etc. + */ +static int +post_nm(POST_ARGS) +{ + char buf[BUFSIZ]; + + if (m->meta.name) + return(1); + if ( ! concat(m, buf, n->child, BUFSIZ)) + return(0); + m->meta.name = mandoc_strdup(buf); + return(1); +} + + +/* + * Look up the value of `Lb' for matching predefined strings. If it has + * one, then substitute the current value for the formatted value. Note + * that the lookup may fail (we can provide arbitrary strings). + */ +/* ARGSUSED */ +static int +post_lb(POST_ARGS) +{ + const char *p; + char *buf; + size_t sz; + + assert(MDOC_TEXT == n->child->type); + p = mdoc_a2lib(n->child->string); + + if (p) { + free(n->child->string); + n->child->string = mandoc_strdup(p); + return(1); + } + + sz = strlen(n->child->string) + + 2 + strlen("\\(lqlibrary\\(rq"); + buf = mandoc_malloc(sz); + snprintf(buf, sz, "library \\(lq%s\\(rq", n->child->string); + free(n->child->string); + n->child->string = buf; + return(1); +} + + +/* + * Substitute the value of `St' for the corresponding formatted string. + * We're guaranteed that this exists (it's been verified during the + * validation phase). + */ +/* ARGSUSED */ +static int +post_st(POST_ARGS) +{ + const char *p; + + assert(MDOC_TEXT == n->child->type); + p = mdoc_a2st(n->child->string); + if (p != NULL) { + free(n->child->string); + n->child->string = mandoc_strdup(p); + } + return(1); +} + + +/* + * Look up the standard string in a table. We know that it exists from + * the validation phase, so assert on failure. If a standard key wasn't + * supplied, supply the default ``AT&T UNIX''. + */ +static int +post_at(POST_ARGS) +{ + struct mdoc_node *nn; + const char *p, *q; + char *buf; + size_t sz; + + if (n->child) { + assert(MDOC_TEXT == n->child->type); + p = mdoc_a2att(n->child->string); + if (p) { + free(n->child->string); + n->child->string = mandoc_strdup(p); + } else { + p = "AT&T UNIX "; + q = n->child->string; + sz = strlen(p) + strlen(q) + 1; + buf = mandoc_malloc(sz); + strlcpy(buf, p, sz); + strlcat(buf, q, sz); + free(n->child->string); + n->child->string = buf; + } + return(1); + } + + nn = n; + m->next = MDOC_NEXT_CHILD; + if ( ! mdoc_word_alloc(m, nn->line, nn->pos, "AT&T UNIX")) + return(0); + m->last = nn; + return(1); +} + + +/* + * Mark the current section. The ``named'' section (lastnamed) is set + * whenever the current section isn't a custom section--we use this to + * keep track of section ordering. Also check that the section is + * allowed within the document's manual section. + */ +static int +post_sh(POST_ARGS) +{ + enum mdoc_sec sec; + char buf[BUFSIZ]; + + if (MDOC_HEAD != n->type) + return(1); + + if ( ! concat(m, buf, n->child, BUFSIZ)) + return(0); + sec = mdoc_str2sec(buf); + /* + * The first section should always make us move into a non-new + * state. + */ + if (SEC_NONE == m->lastnamed || SEC_CUSTOM != sec) + m->lastnamed = sec; + + /* Some sections only live in certain manual sections. */ + + switch ((m->lastsec = sec)) { + case (SEC_RETURN_VALUES): + /* FALLTHROUGH */ + case (SEC_ERRORS): + assert(m->meta.msec); + if (*m->meta.msec == '2') + break; + if (*m->meta.msec == '3') + break; + if (*m->meta.msec == '9') + break; + return(mdoc_nmsg(m, n, MANDOCERR_SECMSEC)); + default: + break; + } + return(1); +} + + +/* + * Parse out the contents of `Dt'. See in-line documentation for how we + * handle the various fields of this macro. + */ +static int +post_dt(POST_ARGS) +{ + struct mdoc_node *nn; + const char *cp; + + if (m->meta.title) + free(m->meta.title); + if (m->meta.vol) + free(m->meta.vol); + if (m->meta.arch) + free(m->meta.arch); + + m->meta.title = m->meta.vol = m->meta.arch = NULL; + /* Handles: `.Dt' + * --> title = unknown, volume = local, msec = 0, arch = NULL + */ + + if (NULL == (nn = n->child)) { + /* XXX: make these macro values. */ + /* FIXME: warn about missing values. */ + m->meta.title = mandoc_strdup("UNKNOWN"); + m->meta.vol = mandoc_strdup("LOCAL"); + m->meta.msec = mandoc_strdup("1"); + return(post_prol(m, n)); + } + + /* Handles: `.Dt TITLE' + * --> title = TITLE, volume = local, msec = 0, arch = NULL + */ + + m->meta.title = mandoc_strdup + ('\0' == nn->string[0] ? "UNKNOWN" : nn->string); + + if (NULL == (nn = nn->next)) { + /* FIXME: warn about missing msec. */ + /* XXX: make this a macro value. */ + m->meta.vol = mandoc_strdup("LOCAL"); + m->meta.msec = mandoc_strdup("1"); + return(post_prol(m, n)); + } + + /* Handles: `.Dt TITLE SEC' + * --> title = TITLE, volume = SEC is msec ? + * format(msec) : SEC, + * msec = SEC is msec ? atoi(msec) : 0, + * arch = NULL + */ + + cp = mdoc_a2msec(nn->string); + if (cp) { + m->meta.vol = mandoc_strdup(cp); + m->meta.msec = mandoc_strdup(nn->string); + } else if (mdoc_nmsg(m, n, MANDOCERR_BADMSEC)) { + m->meta.vol = mandoc_strdup(nn->string); + m->meta.msec = mandoc_strdup(nn->string); + } else + return(0); + + if (NULL == (nn = nn->next)) + return(post_prol(m, n)); + + /* Handles: `.Dt TITLE SEC VOL' + * --> title = TITLE, volume = VOL is vol ? + * format(VOL) : + * VOL is arch ? format(arch) : + * VOL + */ + + cp = mdoc_a2vol(nn->string); + if (cp) { + free(m->meta.vol); + m->meta.vol = mandoc_strdup(cp); + } else { + /* FIXME: warn about bad arch. */ + cp = mdoc_a2arch(nn->string); + if (NULL == cp) { + free(m->meta.vol); + m->meta.vol = mandoc_strdup(nn->string); + } else + m->meta.arch = mandoc_strdup(cp); + } + + /* Ignore any subsequent parameters... */ + /* FIXME: warn about subsequent parameters. */ + + return(post_prol(m, n)); +} + + +/* + * Set the operating system by way of the `Os' macro. Note that if an + * argument isn't provided and -DOSNAME="\"foo\"" is provided during + * compilation, this value will be used instead of filling in "sysname + * release" from uname(). + */ +static int +post_os(POST_ARGS) +{ + char buf[BUFSIZ]; +#ifndef OSNAME + struct utsname utsname; +#endif + + if (m->meta.os) + free(m->meta.os); + + if ( ! concat(m, buf, n->child, BUFSIZ)) + return(0); + + /* XXX: yes, these can all be dynamically-adjusted buffers, but + * it's really not worth the extra hackery. + */ + + if ('\0' == buf[0]) { +#ifdef OSNAME + if (strlcat(buf, OSNAME, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(m, n, MANDOCERR_MEM); + return(0); + } +#else /*!OSNAME */ + if (-1 == uname(&utsname)) + return(mdoc_nmsg(m, n, MANDOCERR_UTSNAME)); + + if (strlcat(buf, utsname.sysname, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(m, n, MANDOCERR_MEM); + return(0); + } + if (strlcat(buf, " ", 64) >= BUFSIZ) { + mdoc_nmsg(m, n, MANDOCERR_MEM); + return(0); + } + if (strlcat(buf, utsname.release, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(m, n, MANDOCERR_MEM); + return(0); + } +#endif /*!OSNAME*/ + } + + m->meta.os = mandoc_strdup(buf); + return(post_prol(m, n)); +} + + +/* + * Calculate the -width for a `Bl -tag' list if it hasn't been provided. + * Uses the first head macro. NOTE AGAIN: this is ONLY if the -width + * argument has NOT been provided. See post_bl_width() for converting + * the -width string. + */ +static int +post_bl_tagwidth(POST_ARGS) +{ + struct mdoc_node *nn; + size_t sz, ssz; + int i; + char buf[NUMSIZ]; + + sz = 10; + + for (nn = n->body->child; nn; nn = nn->next) { + if (MDOC_It != nn->tok) + continue; + + assert(MDOC_BLOCK == nn->type); + nn = nn->head->child; + + if (MDOC_TEXT == nn->type) { + sz = strlen(nn->string) + 1; + break; + } + + if (0 != (ssz = mdoc_macro2len(nn->tok))) + sz = ssz; + else if ( ! mdoc_nmsg(m, n, MANDOCERR_NOWIDTHARG)) + return(0); + + break; + } + + /* Defaults to ten ens. */ + + snprintf(buf, NUMSIZ, "%zun", sz); + + /* + * We have to dynamically add this to the macro's argument list. + * We're guaranteed that a MDOC_Width doesn't already exist. + */ + + assert(n->args); + i = (int)(n->args->argc)++; + + n->args->argv = mandoc_realloc(n->args->argv, + n->args->argc * sizeof(struct mdoc_argv)); + + n->args->argv[i].arg = MDOC_Width; + n->args->argv[i].line = n->line; + n->args->argv[i].pos = n->pos; + n->args->argv[i].sz = 1; + n->args->argv[i].value = mandoc_malloc(sizeof(char *)); + n->args->argv[i].value[0] = mandoc_strdup(buf); + + /* Set our width! */ + n->data.Bl.width = n->args->argv[i].value[0]; + return(1); +} + + +/* + * Calculate the real width of a list from the -width string, which may + * contain a macro (with a known default width), a literal string, or a + * scaling width. + */ +static int +post_bl_width(POST_ARGS) +{ + size_t width; + int i; + enum mdoct tok; + char buf[NUMSIZ]; + + /* + * If the value to -width is a macro, then we re-write it to be + * the macro's width as set in share/tmac/mdoc/doc-common. + */ + + if (0 == strcmp(n->data.Bl.width, "Ds")) + width = 6; + else if (MDOC_MAX == (tok = mdoc_hash_find(n->data.Bl.width))) + return(1); + else if (0 == (width = mdoc_macro2len(tok))) + return(mdoc_nmsg(m, n, MANDOCERR_BADWIDTH)); + + /* The value already exists: free and reallocate it. */ + + assert(n->args); + + for (i = 0; i < (int)n->args->argc; i++) + if (MDOC_Width == n->args->argv[i].arg) + break; + + assert(i < (int)n->args->argc); + + snprintf(buf, NUMSIZ, "%zun", width); + free(n->args->argv[i].value[0]); + n->args->argv[i].value[0] = mandoc_strdup(buf); + + /* Set our width! */ + n->data.Bl.width = n->args->argv[i].value[0]; + return(1); +} + + +/* + * Do processing for -column lists, which can have two distinct styles + * of invocation. Merge this two styles into a consistent form. + */ +/* ARGSUSED */ +static int +post_bl_head(POST_ARGS) +{ + int i, c; + struct mdoc_node *np, *nn, *nnp; + + if (LIST_column != n->data.Bl.type) + return(1); + else if (NULL == n->child) + return(1); + + np = n->parent; + assert(np->args); + + for (c = 0; c < (int)np->args->argc; c++) + if (MDOC_Column == np->args->argv[c].arg) + break; + + assert(c < (int)np->args->argc); + assert(0 == np->args->argv[c].sz); + + /* + * Accomodate for new-style groff column syntax. Shuffle the + * child nodes, all of which must be TEXT, as arguments for the + * column field. Then, delete the head children. + */ + + np->args->argv[c].sz = (size_t)n->nchild; + np->args->argv[c].value = mandoc_malloc + ((size_t)n->nchild * sizeof(char *)); + + for (i = 0, nn = n->child; nn; i++) { + np->args->argv[c].value[i] = nn->string; + nn->string = NULL; + nnp = nn; + nn = nn->next; + mdoc_node_delete(NULL, nnp); + } + + n->nchild = 0; + n->child = NULL; + return(1); +} + + +static int +post_bl(POST_ARGS) +{ + struct mdoc_node *nn; + const char *ww; + + if (MDOC_HEAD == n->type) + return(post_bl_head(m, n)); + if (MDOC_BLOCK != n->type) + return(1); + + /* + * These are fairly complicated, so we've broken them into two + * functions. post_bl_tagwidth() is called when a -tag is + * specified, but no -width (it must be guessed). The second + * when a -width is specified (macro indicators must be + * rewritten into real lengths). + */ + + ww = n->data.Bl.width; + + if (LIST_tag == n->data.Bl.type && NULL == n->data.Bl.width) { + if ( ! post_bl_tagwidth(m, n)) + return(0); + } else if (NULL != n->data.Bl.width) { + if ( ! post_bl_width(m, n)) + return(0); + } else + return(1); + + assert(n->data.Bl.width); + + /* If it has changed, propogate new width to children. */ + + if (ww == n->data.Bl.width) + return(1); + + for (nn = n->child; nn; nn = nn->next) + if (MDOC_Bl == nn->tok) + nn->data.Bl.width = n->data.Bl.width; + + return(1); +} + + +/* + * The `Pa' macro defaults to a tilde if no value is provided as an + * argument. + */ +static int +post_pa(POST_ARGS) +{ + struct mdoc_node *np; + + if (n->child) + return(1); + + np = n; + m->next = MDOC_NEXT_CHILD; + if ( ! mdoc_word_alloc(m, n->line, n->pos, "~")) + return(0); + m->last = np; + return(1); +} + + +/* + * Empty `Li' macros get an empty string to make front-ends add an extra + * space. + */ +static int +post_li(POST_ARGS) +{ + struct mdoc_node *np; + + if (n->child) + return(1); + + np = n; + m->next = MDOC_NEXT_CHILD; + if ( ! mdoc_word_alloc(m, n->line, n->pos, "")) + return(0); + m->last = np; + return(1); +} + + +/* + * The `Ar' macro defaults to two strings "file ..." if no value is + * provided as an argument. + */ +static int +post_ar(POST_ARGS) +{ + struct mdoc_node *np; + + if (n->child) + return(1); + + np = n; + m->next = MDOC_NEXT_CHILD; + /* XXX: make into macro values. */ + if ( ! mdoc_word_alloc(m, n->line, n->pos, "file")) + return(0); + if ( ! mdoc_word_alloc(m, n->line, n->pos, "...")) + return(0); + m->last = np; + return(1); +} + + +/* + * Parse the date field in `Dd'. + */ +static int +post_dd(POST_ARGS) +{ + char buf[DATESIZ]; + + if ( ! concat(m, buf, n->child, DATESIZ)) + return(0); + + m->meta.date = mandoc_a2time + (MTIME_MDOCDATE | MTIME_CANONICAL, buf); + + if (0 == m->meta.date) { + if ( ! mdoc_nmsg(m, n, MANDOCERR_BADDATE)) + return(0); + m->meta.date = time(NULL); + } + + return(post_prol(m, n)); +} + + +/* + * Remove prologue macros from the document after they're processed. + * The final document uses mdoc_meta for these values and discards the + * originals. + */ +static int +post_prol(POST_ARGS) +{ + + mdoc_node_delete(m, n); + if (m->meta.title && m->meta.date && m->meta.os) + m->flags |= MDOC_PBODY; + return(1); +} + + +/* + * Trigger a literal context. + */ +static int +pre_dl(PRE_ARGS) +{ + + if (MDOC_BODY == n->type) + m->flags |= MDOC_LITERAL; + return(1); +} + + +static int +pre_bd(PRE_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + + if (DISP_literal == n->data.Bd.type) + m->flags |= MDOC_LITERAL; + if (DISP_unfilled == n->data.Bd.type) + m->flags |= MDOC_LITERAL; + + return(1); +} + + +static int +post_display(POST_ARGS) +{ + + if (MDOC_BODY == n->type) + m->flags &= ~MDOC_LITERAL; + return(1); +} + + +static inline int +order_rs(enum mdoct t) +{ + int i; + + for (i = 0; i < (int)RSORD_MAX; i++) + if (rsord[i] == t) + return(i); + + abort(); + /* NOTREACHED */ +} + + +/* ARGSUSED */ +static int +post_rs(POST_ARGS) +{ + struct mdoc_node *nn, *next, *prev; + int o; + + if (MDOC_BLOCK != n->type) + return(1); + + assert(n->body->child); + for (next = NULL, nn = n->body->child->next; nn; nn = next) { + o = order_rs(nn->tok); + + /* Remove `nn' from the chain. */ + next = nn->next; + if (next) + next->prev = nn->prev; + + prev = nn->prev; + if (prev) + prev->next = nn->next; + + nn->prev = nn->next = NULL; + + /* + * Scan back until we reach a node that's ordered before + * us, then set ourselves as being the next. + */ + for ( ; prev; prev = prev->prev) + if (order_rs(prev->tok) <= o) + break; + + nn->prev = prev; + if (prev) { + if (prev->next) + prev->next->prev = nn; + nn->next = prev->next; + prev->next = nn; + continue; + } + + n->body->child->prev = nn; + nn->next = n->body->child; + n->body->child = nn; + } + return(1); +} diff --git a/commands/mdocml/mdoc_argv.c b/commands/mdocml/mdoc_argv.c new file mode 100644 index 000000000..700d558ab --- /dev/null +++ b/commands/mdocml/mdoc_argv.c @@ -0,0 +1,790 @@ +/* $Id: mdoc_argv.c,v 1.54 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +/* + * Routines to parse arguments of macros. Arguments follow the syntax + * of `-arg [val [valN...]]'. Arguments come in all types: quoted + * arguments, multiple arguments per value, no-value arguments, etc. + * + * There's no limit to the number or arguments that may be allocated. + */ + +#define ARGV_NONE (1 << 0) +#define ARGV_SINGLE (1 << 1) +#define ARGV_MULTI (1 << 2) +#define ARGV_OPT_SINGLE (1 << 3) + +#define MULTI_STEP 5 + +static int argv_a2arg(enum mdoct, const char *); +static enum margserr args(struct mdoc *, int, int *, + char *, int, char **); +static int argv(struct mdoc *, int, + struct mdoc_argv *, int *, char *); +static int argv_single(struct mdoc *, int, + struct mdoc_argv *, int *, char *); +static int argv_opt_single(struct mdoc *, int, + struct mdoc_argv *, int *, char *); +static int argv_multi(struct mdoc *, int, + struct mdoc_argv *, int *, char *); + +/* Per-argument flags. */ + +static int mdoc_argvflags[MDOC_ARG_MAX] = { + ARGV_NONE, /* MDOC_Split */ + ARGV_NONE, /* MDOC_Nosplit */ + ARGV_NONE, /* MDOC_Ragged */ + ARGV_NONE, /* MDOC_Unfilled */ + ARGV_NONE, /* MDOC_Literal */ + ARGV_SINGLE, /* MDOC_File */ + ARGV_OPT_SINGLE, /* MDOC_Offset */ + ARGV_NONE, /* MDOC_Bullet */ + ARGV_NONE, /* MDOC_Dash */ + ARGV_NONE, /* MDOC_Hyphen */ + ARGV_NONE, /* MDOC_Item */ + ARGV_NONE, /* MDOC_Enum */ + ARGV_NONE, /* MDOC_Tag */ + ARGV_NONE, /* MDOC_Diag */ + ARGV_NONE, /* MDOC_Hang */ + ARGV_NONE, /* MDOC_Ohang */ + ARGV_NONE, /* MDOC_Inset */ + ARGV_MULTI, /* MDOC_Column */ + ARGV_SINGLE, /* MDOC_Width */ + ARGV_NONE, /* MDOC_Compact */ + ARGV_NONE, /* MDOC_Std */ + ARGV_NONE, /* MDOC_Filled */ + ARGV_NONE, /* MDOC_Words */ + ARGV_NONE, /* MDOC_Emphasis */ + ARGV_NONE, /* MDOC_Symbolic */ + ARGV_NONE /* MDOC_Symbolic */ +}; + +static int mdoc_argflags[MDOC_MAX] = { + 0, /* Ap */ + 0, /* Dd */ + 0, /* Dt */ + 0, /* Os */ + 0, /* Sh */ + 0, /* Ss */ + ARGS_DELIM, /* Pp */ + ARGS_DELIM, /* D1 */ + ARGS_DELIM, /* Dl */ + 0, /* Bd */ + 0, /* Ed */ + 0, /* Bl */ + 0, /* El */ + 0, /* It */ + ARGS_DELIM, /* Ad */ + ARGS_DELIM, /* An */ + ARGS_DELIM, /* Ar */ + 0, /* Cd */ + ARGS_DELIM, /* Cm */ + ARGS_DELIM, /* Dv */ + ARGS_DELIM, /* Er */ + ARGS_DELIM, /* Ev */ + 0, /* Ex */ + ARGS_DELIM, /* Fa */ + 0, /* Fd */ + ARGS_DELIM, /* Fl */ + ARGS_DELIM, /* Fn */ + ARGS_DELIM, /* Ft */ + ARGS_DELIM, /* Ic */ + 0, /* In */ + ARGS_DELIM, /* Li */ + 0, /* Nd */ + ARGS_DELIM, /* Nm */ + ARGS_DELIM, /* Op */ + 0, /* Ot */ + ARGS_DELIM, /* Pa */ + 0, /* Rv */ + ARGS_DELIM, /* St */ + ARGS_DELIM, /* Va */ + ARGS_DELIM, /* Vt */ + ARGS_DELIM, /* Xr */ + 0, /* %A */ + 0, /* %B */ + 0, /* %D */ + 0, /* %I */ + 0, /* %J */ + 0, /* %N */ + 0, /* %O */ + 0, /* %P */ + 0, /* %R */ + 0, /* %T */ + 0, /* %V */ + ARGS_DELIM, /* Ac */ + 0, /* Ao */ + ARGS_DELIM, /* Aq */ + ARGS_DELIM, /* At */ + ARGS_DELIM, /* Bc */ + 0, /* Bf */ + 0, /* Bo */ + ARGS_DELIM, /* Bq */ + ARGS_DELIM, /* Bsx */ + ARGS_DELIM, /* Bx */ + 0, /* Db */ + ARGS_DELIM, /* Dc */ + 0, /* Do */ + ARGS_DELIM, /* Dq */ + ARGS_DELIM, /* Ec */ + 0, /* Ef */ + ARGS_DELIM, /* Em */ + 0, /* Eo */ + ARGS_DELIM, /* Fx */ + ARGS_DELIM, /* Ms */ + ARGS_DELIM, /* No */ + ARGS_DELIM, /* Ns */ + ARGS_DELIM, /* Nx */ + ARGS_DELIM, /* Ox */ + ARGS_DELIM, /* Pc */ + ARGS_DELIM, /* Pf */ + 0, /* Po */ + ARGS_DELIM, /* Pq */ + ARGS_DELIM, /* Qc */ + ARGS_DELIM, /* Ql */ + 0, /* Qo */ + ARGS_DELIM, /* Qq */ + 0, /* Re */ + 0, /* Rs */ + ARGS_DELIM, /* Sc */ + 0, /* So */ + ARGS_DELIM, /* Sq */ + 0, /* Sm */ + ARGS_DELIM, /* Sx */ + ARGS_DELIM, /* Sy */ + ARGS_DELIM, /* Tn */ + ARGS_DELIM, /* Ux */ + ARGS_DELIM, /* Xc */ + 0, /* Xo */ + 0, /* Fo */ + 0, /* Fc */ + 0, /* Oo */ + ARGS_DELIM, /* Oc */ + 0, /* Bk */ + 0, /* Ek */ + 0, /* Bt */ + 0, /* Hf */ + 0, /* Fr */ + 0, /* Ud */ + 0, /* Lb */ + ARGS_DELIM, /* Lp */ + ARGS_DELIM, /* Lk */ + ARGS_DELIM, /* Mt */ + ARGS_DELIM, /* Brq */ + 0, /* Bro */ + ARGS_DELIM, /* Brc */ + 0, /* %C */ + 0, /* Es */ + 0, /* En */ + 0, /* Dx */ + 0, /* %Q */ + 0, /* br */ + 0, /* sp */ + 0, /* %U */ + 0, /* Ta */ +}; + + +/* + * Parse an argument from line text. This comes in the form of -key + * [value0...], which may either have a single mandatory value, at least + * one mandatory value, an optional single value, or no value. + */ +enum margverr +mdoc_argv(struct mdoc *m, int line, enum mdoct tok, + struct mdoc_arg **v, int *pos, char *buf) +{ + char *p, sv; + struct mdoc_argv tmp; + struct mdoc_arg *arg; + + if ('\0' == buf[*pos]) + return(ARGV_EOLN); + + assert(' ' != buf[*pos]); + + /* Parse through to the first unescaped space. */ + + p = &buf[++(*pos)]; + + assert(*pos > 0); + + /* LINTED */ + while (buf[*pos]) { + if (' ' == buf[*pos]) + if ('\\' != buf[*pos - 1]) + break; + (*pos)++; + } + + /* XXX - save zeroed byte, if not an argument. */ + + sv = '\0'; + if (buf[*pos]) { + sv = buf[*pos]; + buf[(*pos)++] = '\0'; + } + + (void)memset(&tmp, 0, sizeof(struct mdoc_argv)); + tmp.line = line; + tmp.pos = *pos; + + /* See if our token accepts the argument. */ + + if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) { + /* XXX - restore saved zeroed byte. */ + if (sv) + buf[*pos - 1] = sv; + return(ARGV_WORD); + } + + while (buf[*pos] && ' ' == buf[*pos]) + (*pos)++; + + if ( ! argv(m, line, &tmp, pos, buf)) + return(ARGV_ERROR); + + if (NULL == (arg = *v)) + arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg)); + + arg->argc++; + arg->argv = mandoc_realloc + (arg->argv, arg->argc * sizeof(struct mdoc_argv)); + + (void)memcpy(&arg->argv[(int)arg->argc - 1], + &tmp, sizeof(struct mdoc_argv)); + + return(ARGV_ARG); +} + + +void +mdoc_argv_free(struct mdoc_arg *p) +{ + int i; + + if (NULL == p) + return; + + if (p->refcnt) { + --(p->refcnt); + if (p->refcnt) + return; + } + assert(p->argc); + + for (i = (int)p->argc - 1; i >= 0; i--) + mdoc_argn_free(p, i); + + free(p->argv); + free(p); +} + + +void +mdoc_argn_free(struct mdoc_arg *p, int iarg) +{ + struct mdoc_argv *arg = &p->argv[iarg]; + int j; + + if (arg->sz && arg->value) { + for (j = (int)arg->sz - 1; j >= 0; j--) + free(arg->value[j]); + free(arg->value); + } + + for (--p->argc; iarg < (int)p->argc; iarg++) + p->argv[iarg] = p->argv[iarg+1]; +} + + +enum margserr +mdoc_zargs(struct mdoc *m, int line, int *pos, + char *buf, int flags, char **v) +{ + + return(args(m, line, pos, buf, flags, v)); +} + + +enum margserr +mdoc_args(struct mdoc *m, int line, int *pos, + char *buf, enum mdoct tok, char **v) +{ + int fl; + struct mdoc_node *n; + + fl = mdoc_argflags[tok]; + + if (MDOC_It != tok) + return(args(m, line, pos, buf, fl, v)); + + /* + * We know that we're in an `It', so it's reasonable to expect + * us to be sitting in a `Bl'. Someday this may not be the case + * (if we allow random `It's sitting out there), so provide a + * safe fall-back into the default behaviour. + */ + + for (n = m->last; n; n = n->parent) + if (MDOC_Bl == n->tok) + break; + + if (n && LIST_column == n->data.Bl.type) { + fl |= ARGS_TABSEP; + fl &= ~ARGS_DELIM; + } + + return(args(m, line, pos, buf, fl, v)); +} + + +static enum margserr +args(struct mdoc *m, int line, int *pos, + char *buf, int fl, char **v) +{ + int i; + char *p, *pp; + enum margserr rc; + enum mdelim d; + + /* + * Parse out the terms (like `val' in `.Xx -arg val' or simply + * `.Xx val'), which can have all sorts of properties: + * + * ARGS_DELIM: use special handling if encountering trailing + * delimiters in the form of [[::delim::][ ]+]+. + * + * ARGS_NOWARN: don't post warnings. This is only used when + * re-parsing delimiters, as the warnings have already been + * posted. + * + * ARGS_TABSEP: use special handling for tab/`Ta' separated + * phrases like in `Bl -column'. + */ + + assert(' ' != buf[*pos]); + + if ('\0' == buf[*pos]) { + if (MDOC_PPHRASE & m->flags) + return(ARGS_EOLN); + /* + * If we're not in a partial phrase and the flag for + * being a phrase literal is still set, the punctuation + * is unterminated. + */ + if (MDOC_PHRASELIT & m->flags) + if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE)) + return(ARGS_ERROR); + + m->flags &= ~MDOC_PHRASELIT; + return(ARGS_EOLN); + } + + /* + * If the first character is a closing delimiter and we're to + * look for delimited strings, then pass down the buffer seeing + * if it follows the pattern of [[::delim::][ ]+]+. Note that + * we ONLY care about closing delimiters. + */ + + if ((fl & ARGS_DELIM) && DELIM_CLOSE == mdoc_iscdelim(buf[*pos])) { + for (i = *pos; buf[i]; ) { + d = mdoc_iscdelim(buf[i]); + if (DELIM_NONE == d || DELIM_OPEN == d) + break; + i++; + if ('\0' == buf[i] || ' ' != buf[i]) + break; + i++; + while (buf[i] && ' ' == buf[i]) + i++; + } + + if ('\0' == buf[i]) { + *v = &buf[*pos]; + if (i && ' ' != buf[i - 1]) + return(ARGS_PUNCT); + if (ARGS_NOWARN & fl) + return(ARGS_PUNCT); + if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) + return(ARGS_ERROR); + return(ARGS_PUNCT); + } + } + + *v = &buf[*pos]; + + /* + * First handle TABSEP items, restricted to `Bl -column'. This + * ignores conventional token parsing and instead uses tabs or + * `Ta' macros to separate phrases. Phrases are parsed again + * for arguments at a later phase. + */ + + if (ARGS_TABSEP & fl) { + /* Scan ahead to tab (can't be escaped). */ + p = strchr(*v, '\t'); + pp = NULL; + + /* Scan ahead to unescaped `Ta'. */ + if ( ! (MDOC_PHRASELIT & m->flags)) + for (pp = *v; ; pp++) { + if (NULL == (pp = strstr(pp, "Ta"))) + break; + if (pp > *v && ' ' != *(pp - 1)) + continue; + if (' ' == *(pp + 2) || '\0' == *(pp + 2)) + break; + } + + /* By default, assume a phrase. */ + rc = ARGS_PHRASE; + + /* + * Adjust new-buffer position to be beyond delimiter + * mark (e.g., Ta -> end + 2). + */ + if (p && pp) { + *pos += pp < p ? 2 : 1; + rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE; + p = pp < p ? pp : p; + } else if (p && ! pp) { + rc = ARGS_PPHRASE; + *pos += 1; + } else if (pp && ! p) { + p = pp; + *pos += 2; + } else { + rc = ARGS_PEND; + p = strchr(*v, 0); + } + + /* Whitespace check for eoln case... */ + if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl)) + if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) + return(ARGS_ERROR); + + *pos += (int)(p - *v); + + /* Strip delimiter's preceding whitespace. */ + pp = p - 1; + while (pp > *v && ' ' == *pp) { + if (pp > *v && '\\' == *(pp - 1)) + break; + pp--; + } + *(pp + 1) = 0; + + /* Strip delimiter's proceeding whitespace. */ + for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++) + /* Skip ahead. */ ; + + return(rc); + } + + /* + * Process a quoted literal. A quote begins with a double-quote + * and ends with a double-quote NOT preceded by a double-quote. + * Whitespace is NOT involved in literal termination. + */ + + if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) { + if ( ! (MDOC_PHRASELIT & m->flags)) + *v = &buf[++(*pos)]; + + if (MDOC_PPHRASE & m->flags) + m->flags |= MDOC_PHRASELIT; + + for ( ; buf[*pos]; (*pos)++) { + if ('\"' != buf[*pos]) + continue; + if ('\"' != buf[*pos + 1]) + break; + (*pos)++; + } + + if ('\0' == buf[*pos]) { + if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags) + return(ARGS_QWORD); + if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE)) + return(ARGS_ERROR); + return(ARGS_QWORD); + } + + m->flags &= ~MDOC_PHRASELIT; + buf[(*pos)++] = '\0'; + + if ('\0' == buf[*pos]) + return(ARGS_QWORD); + + while (' ' == buf[*pos]) + (*pos)++; + + if (0 == buf[*pos] && ! (ARGS_NOWARN & fl)) + if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) + return(ARGS_ERROR); + + return(ARGS_QWORD); + } + + /* + * A non-quoted term progresses until either the end of line or + * a non-escaped whitespace. + */ + + for ( ; buf[*pos]; (*pos)++) + if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1]) + break; + + if ('\0' == buf[*pos]) + return(ARGS_WORD); + + buf[(*pos)++] = '\0'; + + while (' ' == buf[*pos]) + (*pos)++; + + if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl)) + if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) + return(ARGS_ERROR); + + return(ARGS_WORD); +} + + +static int +argv_a2arg(enum mdoct tok, const char *p) +{ + + /* + * Parse an argument identifier from its text. XXX - this + * should really be table-driven to clarify the code. + * + * If you add an argument to the list, make sure that you + * register it here with its one or more macros! + */ + + switch (tok) { + case (MDOC_An): + if (0 == strcmp(p, "split")) + return(MDOC_Split); + else if (0 == strcmp(p, "nosplit")) + return(MDOC_Nosplit); + break; + + case (MDOC_Bd): + if (0 == strcmp(p, "ragged")) + return(MDOC_Ragged); + else if (0 == strcmp(p, "unfilled")) + return(MDOC_Unfilled); + else if (0 == strcmp(p, "filled")) + return(MDOC_Filled); + else if (0 == strcmp(p, "literal")) + return(MDOC_Literal); + else if (0 == strcmp(p, "file")) + return(MDOC_File); + else if (0 == strcmp(p, "offset")) + return(MDOC_Offset); + else if (0 == strcmp(p, "compact")) + return(MDOC_Compact); + else if (0 == strcmp(p, "centered")) + return(MDOC_Centred); + break; + + case (MDOC_Bf): + if (0 == strcmp(p, "emphasis")) + return(MDOC_Emphasis); + else if (0 == strcmp(p, "literal")) + return(MDOC_Literal); + else if (0 == strcmp(p, "symbolic")) + return(MDOC_Symbolic); + break; + + case (MDOC_Bk): + if (0 == strcmp(p, "words")) + return(MDOC_Words); + break; + + case (MDOC_Bl): + if (0 == strcmp(p, "bullet")) + return(MDOC_Bullet); + else if (0 == strcmp(p, "dash")) + return(MDOC_Dash); + else if (0 == strcmp(p, "hyphen")) + return(MDOC_Hyphen); + else if (0 == strcmp(p, "item")) + return(MDOC_Item); + else if (0 == strcmp(p, "enum")) + return(MDOC_Enum); + else if (0 == strcmp(p, "tag")) + return(MDOC_Tag); + else if (0 == strcmp(p, "diag")) + return(MDOC_Diag); + else if (0 == strcmp(p, "hang")) + return(MDOC_Hang); + else if (0 == strcmp(p, "ohang")) + return(MDOC_Ohang); + else if (0 == strcmp(p, "inset")) + return(MDOC_Inset); + else if (0 == strcmp(p, "column")) + return(MDOC_Column); + else if (0 == strcmp(p, "width")) + return(MDOC_Width); + else if (0 == strcmp(p, "offset")) + return(MDOC_Offset); + else if (0 == strcmp(p, "compact")) + return(MDOC_Compact); + else if (0 == strcmp(p, "nested")) + return(MDOC_Nested); + break; + + case (MDOC_Rv): + /* FALLTHROUGH */ + case (MDOC_Ex): + if (0 == strcmp(p, "std")) + return(MDOC_Std); + break; + default: + break; + } + + return(MDOC_ARG_MAX); +} + + +static int +argv_multi(struct mdoc *m, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + for (v->sz = 0; ; v->sz++) { + if ('-' == buf[*pos]) + break; + ac = args(m, line, pos, buf, 0, &p); + if (ARGS_ERROR == ac) + return(0); + else if (ARGS_EOLN == ac) + break; + + if (0 == v->sz % MULTI_STEP) + v->value = mandoc_realloc(v->value, + (v->sz + MULTI_STEP) * sizeof(char *)); + + v->value[(int)v->sz] = mandoc_strdup(p); + } + + return(1); +} + + +static int +argv_opt_single(struct mdoc *m, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + if ('-' == buf[*pos]) + return(1); + + ac = args(m, line, pos, buf, 0, &p); + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + return(1); + + v->sz = 1; + v->value = mandoc_malloc(sizeof(char *)); + v->value[0] = mandoc_strdup(p); + + return(1); +} + + +/* + * Parse a single, mandatory value from the stream. + */ +static int +argv_single(struct mdoc *m, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + int ppos; + enum margserr ac; + char *p; + + ppos = *pos; + + ac = args(m, line, pos, buf, 0, &p); + if (ARGS_EOLN == ac) { + mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT); + return(0); + } else if (ARGS_ERROR == ac) + return(0); + + v->sz = 1; + v->value = mandoc_malloc(sizeof(char *)); + v->value[0] = mandoc_strdup(p); + + return(1); +} + + +/* + * Determine rules for parsing arguments. Arguments can either accept + * no parameters, an optional single parameter, one parameter, or + * multiple parameters. + */ +static int +argv(struct mdoc *mdoc, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + + v->sz = 0; + v->value = NULL; + + switch (mdoc_argvflags[v->arg]) { + case (ARGV_SINGLE): + return(argv_single(mdoc, line, v, pos, buf)); + case (ARGV_MULTI): + return(argv_multi(mdoc, line, v, pos, buf)); + case (ARGV_OPT_SINGLE): + return(argv_opt_single(mdoc, line, v, pos, buf)); + default: + /* ARGV_NONE */ + break; + } + + return(1); +} diff --git a/commands/mdocml/mdoc_hash.c b/commands/mdocml/mdoc_hash.c new file mode 100644 index 000000000..3bf29dfd8 --- /dev/null +++ b/commands/mdocml/mdoc_hash.c @@ -0,0 +1,93 @@ +/* $Id: mdoc_hash.c,v 1.16 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +static u_char table[27 * 12]; + +/* + * XXX - this hash has global scope, so if intended for use as a library + * with multiple callers, it will need re-invocation protection. + */ +void +mdoc_hash_init(void) +{ + int i, j, major; + const char *p; + + memset(table, UCHAR_MAX, sizeof(table)); + + for (i = 0; i < (int)MDOC_MAX; i++) { + p = mdoc_macronames[i]; + + if (isalpha((u_char)p[1])) + major = 12 * (tolower((u_char)p[1]) - 97); + else + major = 12 * 26; + + for (j = 0; j < 12; j++) + if (UCHAR_MAX == table[major + j]) { + table[major + j] = (u_char)i; + break; + } + + assert(j < 12); + } +} + +enum mdoct +mdoc_hash_find(const char *p) +{ + int major, i, j; + + if (0 == p[0]) + return(MDOC_MAX); + if ( ! isalpha((u_char)p[0]) && '%' != p[0]) + return(MDOC_MAX); + + if (isalpha((u_char)p[1])) + major = 12 * (tolower((u_char)p[1]) - 97); + else if ('1' == p[1]) + major = 12 * 26; + else + return(MDOC_MAX); + + if (p[2] && p[3]) + return(MDOC_MAX); + + for (j = 0; j < 12; j++) { + if (UCHAR_MAX == (i = table[major + j])) + break; + if (0 == strcmp(p, mdoc_macronames[i])) + return((enum mdoct)i); + } + + return(MDOC_MAX); +} diff --git a/commands/mdocml/mdoc_html.c b/commands/mdocml/mdoc_html.c new file mode 100644 index 000000000..82ecccf0c --- /dev/null +++ b/commands/mdocml/mdoc_html.c @@ -0,0 +1,2195 @@ +/* $Id: mdoc_html.c,v 1.85 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "out.h" +#include "html.h" +#include "mdoc.h" +#include "main.h" + +#define INDENT 5 +#define HALFINDENT 3 + +#define MDOC_ARGS const struct mdoc_meta *m, \ + const struct mdoc_node *n, \ + struct html *h + +#ifndef MIN +#define MIN(a,b) ((/*CONSTCOND*/(a)<(b))?(a):(b)) +#endif + +struct htmlmdoc { + int (*pre)(MDOC_ARGS); + void (*post)(MDOC_ARGS); +}; + +static void print_mdoc(MDOC_ARGS); +static void print_mdoc_head(MDOC_ARGS); +static void print_mdoc_node(MDOC_ARGS); +static void print_mdoc_nodelist(MDOC_ARGS); +static void synopsis_pre(struct html *, + const struct mdoc_node *); + +static void a2width(const char *, struct roffsu *); +static void a2offs(const char *, struct roffsu *); + +static void mdoc_root_post(MDOC_ARGS); +static int mdoc_root_pre(MDOC_ARGS); + +static void mdoc__x_post(MDOC_ARGS); +static int mdoc__x_pre(MDOC_ARGS); +static int mdoc_ad_pre(MDOC_ARGS); +static int mdoc_an_pre(MDOC_ARGS); +static int mdoc_ap_pre(MDOC_ARGS); +static void mdoc_aq_post(MDOC_ARGS); +static int mdoc_aq_pre(MDOC_ARGS); +static int mdoc_ar_pre(MDOC_ARGS); +static int mdoc_bd_pre(MDOC_ARGS); +static int mdoc_bf_pre(MDOC_ARGS); +static void mdoc_bl_post(MDOC_ARGS); +static int mdoc_bl_pre(MDOC_ARGS); +static void mdoc_bq_post(MDOC_ARGS); +static int mdoc_bq_pre(MDOC_ARGS); +static void mdoc_brq_post(MDOC_ARGS); +static int mdoc_brq_pre(MDOC_ARGS); +static int mdoc_bt_pre(MDOC_ARGS); +static int mdoc_bx_pre(MDOC_ARGS); +static int mdoc_cd_pre(MDOC_ARGS); +static int mdoc_d1_pre(MDOC_ARGS); +static void mdoc_dq_post(MDOC_ARGS); +static int mdoc_dq_pre(MDOC_ARGS); +static int mdoc_dv_pre(MDOC_ARGS); +static int mdoc_fa_pre(MDOC_ARGS); +static int mdoc_fd_pre(MDOC_ARGS); +static int mdoc_fl_pre(MDOC_ARGS); +static int mdoc_fn_pre(MDOC_ARGS); +static int mdoc_ft_pre(MDOC_ARGS); +static int mdoc_em_pre(MDOC_ARGS); +static int mdoc_er_pre(MDOC_ARGS); +static int mdoc_ev_pre(MDOC_ARGS); +static int mdoc_ex_pre(MDOC_ARGS); +static void mdoc_fo_post(MDOC_ARGS); +static int mdoc_fo_pre(MDOC_ARGS); +static int mdoc_ic_pre(MDOC_ARGS); +static int mdoc_in_pre(MDOC_ARGS); +static int mdoc_it_block_pre(MDOC_ARGS, enum mdoc_list, + int, struct roffsu *, struct roffsu *); +static int mdoc_it_head_pre(MDOC_ARGS, enum mdoc_list, + struct roffsu *); +static int mdoc_it_body_pre(MDOC_ARGS, enum mdoc_list, + struct roffsu *); +static int mdoc_it_pre(MDOC_ARGS); +static int mdoc_lb_pre(MDOC_ARGS); +static int mdoc_li_pre(MDOC_ARGS); +static int mdoc_lk_pre(MDOC_ARGS); +static int mdoc_mt_pre(MDOC_ARGS); +static int mdoc_ms_pre(MDOC_ARGS); +static int mdoc_nd_pre(MDOC_ARGS); +static int mdoc_nm_pre(MDOC_ARGS); +static int mdoc_ns_pre(MDOC_ARGS); +static void mdoc_op_post(MDOC_ARGS); +static int mdoc_op_pre(MDOC_ARGS); +static int mdoc_pa_pre(MDOC_ARGS); +static void mdoc_pf_post(MDOC_ARGS); +static int mdoc_pf_pre(MDOC_ARGS); +static void mdoc_pq_post(MDOC_ARGS); +static int mdoc_pq_pre(MDOC_ARGS); +static int mdoc_rs_pre(MDOC_ARGS); +static int mdoc_rv_pre(MDOC_ARGS); +static int mdoc_sh_pre(MDOC_ARGS); +static int mdoc_sp_pre(MDOC_ARGS); +static void mdoc_sq_post(MDOC_ARGS); +static int mdoc_sq_pre(MDOC_ARGS); +static int mdoc_ss_pre(MDOC_ARGS); +static int mdoc_sx_pre(MDOC_ARGS); +static int mdoc_sy_pre(MDOC_ARGS); +static int mdoc_ud_pre(MDOC_ARGS); +static int mdoc_va_pre(MDOC_ARGS); +static int mdoc_vt_pre(MDOC_ARGS); +static int mdoc_xr_pre(MDOC_ARGS); +static int mdoc_xx_pre(MDOC_ARGS); + +static const struct htmlmdoc mdocs[MDOC_MAX] = { + {mdoc_ap_pre, NULL}, /* Ap */ + {NULL, NULL}, /* Dd */ + {NULL, NULL}, /* Dt */ + {NULL, NULL}, /* Os */ + {mdoc_sh_pre, NULL }, /* Sh */ + {mdoc_ss_pre, NULL }, /* Ss */ + {mdoc_sp_pre, NULL}, /* Pp */ + {mdoc_d1_pre, NULL}, /* D1 */ + {mdoc_d1_pre, NULL}, /* Dl */ + {mdoc_bd_pre, NULL}, /* Bd */ + {NULL, NULL}, /* Ed */ + {mdoc_bl_pre, mdoc_bl_post}, /* Bl */ + {NULL, NULL}, /* El */ + {mdoc_it_pre, NULL}, /* It */ + {mdoc_ad_pre, NULL}, /* Ad */ + {mdoc_an_pre, NULL}, /* An */ + {mdoc_ar_pre, NULL}, /* Ar */ + {mdoc_cd_pre, NULL}, /* Cd */ + {mdoc_fl_pre, NULL}, /* Cm */ + {mdoc_dv_pre, NULL}, /* Dv */ + {mdoc_er_pre, NULL}, /* Er */ + {mdoc_ev_pre, NULL}, /* Ev */ + {mdoc_ex_pre, NULL}, /* Ex */ + {mdoc_fa_pre, NULL}, /* Fa */ + {mdoc_fd_pre, NULL}, /* Fd */ + {mdoc_fl_pre, NULL}, /* Fl */ + {mdoc_fn_pre, NULL}, /* Fn */ + {mdoc_ft_pre, NULL}, /* Ft */ + {mdoc_ic_pre, NULL}, /* Ic */ + {mdoc_in_pre, NULL}, /* In */ + {mdoc_li_pre, NULL}, /* Li */ + {mdoc_nd_pre, NULL}, /* Nd */ + {mdoc_nm_pre, NULL}, /* Nm */ + {mdoc_op_pre, mdoc_op_post}, /* Op */ + {NULL, NULL}, /* Ot */ + {mdoc_pa_pre, NULL}, /* Pa */ + {mdoc_rv_pre, NULL}, /* Rv */ + {NULL, NULL}, /* St */ + {mdoc_va_pre, NULL}, /* Va */ + {mdoc_vt_pre, NULL}, /* Vt */ + {mdoc_xr_pre, NULL}, /* Xr */ + {mdoc__x_pre, mdoc__x_post}, /* %A */ + {mdoc__x_pre, mdoc__x_post}, /* %B */ + {mdoc__x_pre, mdoc__x_post}, /* %D */ + {mdoc__x_pre, mdoc__x_post}, /* %I */ + {mdoc__x_pre, mdoc__x_post}, /* %J */ + {mdoc__x_pre, mdoc__x_post}, /* %N */ + {mdoc__x_pre, mdoc__x_post}, /* %O */ + {mdoc__x_pre, mdoc__x_post}, /* %P */ + {mdoc__x_pre, mdoc__x_post}, /* %R */ + {mdoc__x_pre, mdoc__x_post}, /* %T */ + {mdoc__x_pre, mdoc__x_post}, /* %V */ + {NULL, NULL}, /* Ac */ + {mdoc_aq_pre, mdoc_aq_post}, /* Ao */ + {mdoc_aq_pre, mdoc_aq_post}, /* Aq */ + {NULL, NULL}, /* At */ + {NULL, NULL}, /* Bc */ + {mdoc_bf_pre, NULL}, /* Bf */ + {mdoc_bq_pre, mdoc_bq_post}, /* Bo */ + {mdoc_bq_pre, mdoc_bq_post}, /* Bq */ + {mdoc_xx_pre, NULL}, /* Bsx */ + {mdoc_bx_pre, NULL}, /* Bx */ + {NULL, NULL}, /* Db */ + {NULL, NULL}, /* Dc */ + {mdoc_dq_pre, mdoc_dq_post}, /* Do */ + {mdoc_dq_pre, mdoc_dq_post}, /* Dq */ + {NULL, NULL}, /* Ec */ /* FIXME: no space */ + {NULL, NULL}, /* Ef */ + {mdoc_em_pre, NULL}, /* Em */ + {NULL, NULL}, /* Eo */ + {mdoc_xx_pre, NULL}, /* Fx */ + {mdoc_ms_pre, NULL}, /* Ms */ /* FIXME: convert to symbol? */ + {NULL, NULL}, /* No */ + {mdoc_ns_pre, NULL}, /* Ns */ + {mdoc_xx_pre, NULL}, /* Nx */ + {mdoc_xx_pre, NULL}, /* Ox */ + {NULL, NULL}, /* Pc */ + {mdoc_pf_pre, mdoc_pf_post}, /* Pf */ + {mdoc_pq_pre, mdoc_pq_post}, /* Po */ + {mdoc_pq_pre, mdoc_pq_post}, /* Pq */ + {NULL, NULL}, /* Qc */ + {mdoc_sq_pre, mdoc_sq_post}, /* Ql */ + {mdoc_dq_pre, mdoc_dq_post}, /* Qo */ + {mdoc_dq_pre, mdoc_dq_post}, /* Qq */ + {NULL, NULL}, /* Re */ + {mdoc_rs_pre, NULL}, /* Rs */ + {NULL, NULL}, /* Sc */ + {mdoc_sq_pre, mdoc_sq_post}, /* So */ + {mdoc_sq_pre, mdoc_sq_post}, /* Sq */ + {NULL, NULL}, /* Sm */ /* FIXME - no idea. */ + {mdoc_sx_pre, NULL}, /* Sx */ + {mdoc_sy_pre, NULL}, /* Sy */ + {NULL, NULL}, /* Tn */ + {mdoc_xx_pre, NULL}, /* Ux */ + {NULL, NULL}, /* Xc */ + {NULL, NULL}, /* Xo */ + {mdoc_fo_pre, mdoc_fo_post}, /* Fo */ + {NULL, NULL}, /* Fc */ + {mdoc_op_pre, mdoc_op_post}, /* Oo */ + {NULL, NULL}, /* Oc */ + {NULL, NULL}, /* Bk */ + {NULL, NULL}, /* Ek */ + {mdoc_bt_pre, NULL}, /* Bt */ + {NULL, NULL}, /* Hf */ + {NULL, NULL}, /* Fr */ + {mdoc_ud_pre, NULL}, /* Ud */ + {mdoc_lb_pre, NULL}, /* Lb */ + {mdoc_sp_pre, NULL}, /* Lp */ + {mdoc_lk_pre, NULL}, /* Lk */ + {mdoc_mt_pre, NULL}, /* Mt */ + {mdoc_brq_pre, mdoc_brq_post}, /* Brq */ + {mdoc_brq_pre, mdoc_brq_post}, /* Bro */ + {NULL, NULL}, /* Brc */ + {mdoc__x_pre, mdoc__x_post}, /* %C */ + {NULL, NULL}, /* Es */ /* TODO */ + {NULL, NULL}, /* En */ /* TODO */ + {mdoc_xx_pre, NULL}, /* Dx */ + {mdoc__x_pre, mdoc__x_post}, /* %Q */ + {mdoc_sp_pre, NULL}, /* br */ + {mdoc_sp_pre, NULL}, /* sp */ + {mdoc__x_pre, mdoc__x_post}, /* %U */ + {NULL, NULL}, /* Ta */ +}; + + +void +html_mdoc(void *arg, const struct mdoc *m) +{ + struct html *h; + struct tag *t; + + h = (struct html *)arg; + + print_gen_decls(h); + t = print_otag(h, TAG_HTML, 0, NULL); + print_mdoc(mdoc_meta(m), mdoc_node(m), h); + print_tagq(h, t); + + printf("\n"); +} + + +/* + * Calculate the scaling unit passed in a `-width' argument. This uses + * either a native scaling unit (e.g., 1i, 2m) or the string length of + * the value. + */ +static void +a2width(const char *p, struct roffsu *su) +{ + + if ( ! a2roffsu(p, su, SCALE_MAX)) { + su->unit = SCALE_EM; + su->scale = (int)strlen(p); + } +} + + +/* + * See the same function in mdoc_term.c for documentation. + */ +static void +synopsis_pre(struct html *h, const struct mdoc_node *n) +{ + struct roffsu su; + struct htmlpair tag; + + if (NULL == n->prev || SEC_SYNOPSIS != n->sec) + return; + + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag, h); + + if (n->prev->tok == n->tok && + MDOC_Fo != n->tok && + MDOC_Ft != n->tok && + MDOC_Fn != n->tok) { + print_otag(h, TAG_DIV, 0, NULL); + return; + } + + switch (n->prev->tok) { + case (MDOC_Fd): + /* FALLTHROUGH */ + case (MDOC_Fn): + /* FALLTHROUGH */ + case (MDOC_Fo): + /* FALLTHROUGH */ + case (MDOC_In): + /* FALLTHROUGH */ + case (MDOC_Vt): + print_otag(h, TAG_DIV, 1, &tag); + break; + case (MDOC_Ft): + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + print_otag(h, TAG_DIV, 1, &tag); + break; + } + /* FALLTHROUGH */ + default: + print_otag(h, TAG_DIV, 0, NULL); + break; + } +} + + +/* + * Calculate the scaling unit passed in an `-offset' argument. This + * uses either a native scaling unit (e.g., 1i, 2m), one of a set of + * predefined strings (indent, etc.), or the string length of the value. + */ +static void +a2offs(const char *p, struct roffsu *su) +{ + + /* FIXME: "right"? */ + + if (0 == strcmp(p, "left")) + SCALE_HS_INIT(su, 0); + else if (0 == strcmp(p, "indent")) + SCALE_HS_INIT(su, INDENT); + else if (0 == strcmp(p, "indent-two")) + SCALE_HS_INIT(su, INDENT * 2); + else if ( ! a2roffsu(p, su, SCALE_MAX)) { + su->unit = SCALE_EM; + su->scale = (int)strlen(p); + } +} + + +static void +print_mdoc(MDOC_ARGS) +{ + struct tag *t; + struct htmlpair tag; + + t = print_otag(h, TAG_HEAD, 0, NULL); + print_mdoc_head(m, n, h); + print_tagq(h, t); + + t = print_otag(h, TAG_BODY, 0, NULL); + + tag.key = ATTR_CLASS; + tag.val = "body"; + print_otag(h, TAG_DIV, 1, &tag); + + print_mdoc_nodelist(m, n, h); + print_tagq(h, t); +} + + +/* ARGSUSED */ +static void +print_mdoc_head(MDOC_ARGS) +{ + + print_gen_head(h); + bufinit(h); + buffmt(h, "%s(%s)", m->title, m->msec); + + if (m->arch) { + bufcat(h, " ("); + bufcat(h, m->arch); + bufcat(h, ")"); + } + + print_otag(h, TAG_TITLE, 0, NULL); + print_text(h, h->buf); +} + + +static void +print_mdoc_nodelist(MDOC_ARGS) +{ + + print_mdoc_node(m, n, h); + if (n->next) + print_mdoc_nodelist(m, n->next, h); +} + + +static void +print_mdoc_node(MDOC_ARGS) +{ + int child; + struct tag *t; + + child = 1; + t = h->tags.head; + + bufinit(h); + switch (n->type) { + case (MDOC_ROOT): + child = mdoc_root_pre(m, n, h); + break; + case (MDOC_TEXT): + print_text(h, n->string); + return; + default: + if (mdocs[n->tok].pre) + child = (*mdocs[n->tok].pre)(m, n, h); + break; + } + + if (child && n->child) + print_mdoc_nodelist(m, n->child, h); + + print_stagq(h, t); + + bufinit(h); + switch (n->type) { + case (MDOC_ROOT): + mdoc_root_post(m, n, h); + break; + default: + if (mdocs[n->tok].post) + (*mdocs[n->tok].post)(m, n, h); + break; + } +} + + +/* ARGSUSED */ +static void +mdoc_root_post(MDOC_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + char b[DATESIZ]; + + time2a(m->date, b, DATESIZ); + + /* + * XXX: this should use divs, but in Firefox, divs with nested + * divs for some reason puke when trying to put a border line + * below. So I use tables, instead. + */ + + PAIR_CLASS_INIT(&tag[0], "footer"); + bufcat_style(h, "width", "100%"); + PAIR_STYLE_INIT(&tag[1], h); + PAIR_SUMMARY_INIT(&tag[2], "footer"); + + t = print_otag(h, TAG_TABLE, 3, tag); + tt = print_otag(h, TAG_TR, 0, NULL); + + bufinit(h); + bufcat_style(h, "width", "50%"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, b); + print_stagq(h, tt); + + bufinit(h); + bufcat_style(h, "width", "50%"); + bufcat_style(h, "text-align", "right"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, m->os); + print_tagq(h, t); +} + + +/* ARGSUSED */ +static int +mdoc_root_pre(MDOC_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + char b[BUFSIZ], title[BUFSIZ]; + + (void)strlcpy(b, m->vol, BUFSIZ); + + if (m->arch) { + (void)strlcat(b, " (", BUFSIZ); + (void)strlcat(b, m->arch, BUFSIZ); + (void)strlcat(b, ")", BUFSIZ); + } + + (void)snprintf(title, BUFSIZ - 1, + "%s(%s)", m->title, m->msec); + + /* XXX: see note in mdoc_root_post() about divs. */ + + PAIR_CLASS_INIT(&tag[0], "header"); + bufcat_style(h, "width", "100%"); + PAIR_STYLE_INIT(&tag[1], h); + PAIR_SUMMARY_INIT(&tag[2], "header"); + + t = print_otag(h, TAG_TABLE, 3, tag); + + tt = print_otag(h, TAG_TR, 0, NULL); + + bufinit(h); + bufcat_style(h, "width", "10%"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, title); + print_stagq(h, tt); + + bufinit(h); + bufcat_style(h, "text-align", "center"); + bufcat_style(h, "white-space", "nowrap"); + bufcat_style(h, "width", "80%"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, b); + print_stagq(h, tt); + + bufinit(h); + bufcat_style(h, "text-align", "right"); + bufcat_style(h, "width", "10%"); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_TD, 1, tag); + print_text(h, title); + print_tagq(h, t); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_sh_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + const struct mdoc_node *nn; + char buf[BUFSIZ]; + struct roffsu su; + + if (MDOC_BODY == n->type) { + SCALE_HS_INIT(&su, INDENT); + bufcat_su(h, "margin-left", &su); + PAIR_CLASS_INIT(&tag[0], "sec-body"); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); + } else if (MDOC_BLOCK == n->type) { + PAIR_CLASS_INIT(&tag[0], "sec-block"); + if (n->prev && NULL == n->prev->body->child) { + print_otag(h, TAG_DIV, 1, tag); + return(1); + } + + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + if (NULL == n->next) + bufcat_su(h, "margin-bottom", &su); + + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); + } + + buf[0] = '\0'; + for (nn = n->child; nn; nn = nn->next) { + html_idcat(buf, nn->string, BUFSIZ); + if (nn->next) + html_idcat(buf, " ", BUFSIZ); + } + + PAIR_CLASS_INIT(&tag[0], "sec-head"); + PAIR_ID_INIT(&tag[1], buf); + + print_otag(h, TAG_DIV, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ss_pre(MDOC_ARGS) +{ + struct htmlpair tag[3]; + const struct mdoc_node *nn; + char buf[BUFSIZ]; + struct roffsu su; + + SCALE_VS_INIT(&su, 1); + + if (MDOC_BODY == n->type) { + PAIR_CLASS_INIT(&tag[0], "ssec-body"); + if (n->parent->next && n->child) { + bufcat_su(h, "margin-bottom", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + } else + print_otag(h, TAG_DIV, 1, tag); + return(1); + } else if (MDOC_BLOCK == n->type) { + PAIR_CLASS_INIT(&tag[0], "ssec-block"); + if (n->prev) { + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + } else + print_otag(h, TAG_DIV, 1, tag); + return(1); + } + + /* TODO: see note in mdoc_sh_pre() about duplicates. */ + + buf[0] = '\0'; + for (nn = n->child; nn; nn = nn->next) { + html_idcat(buf, nn->string, BUFSIZ); + if (nn->next) + html_idcat(buf, " ", BUFSIZ); + } + + SCALE_HS_INIT(&su, INDENT - HALFINDENT); + su.scale = -su.scale; + bufcat_su(h, "margin-left", &su); + + PAIR_CLASS_INIT(&tag[0], "ssec-head"); + PAIR_STYLE_INIT(&tag[1], h); + PAIR_ID_INIT(&tag[2], buf); + + print_otag(h, TAG_DIV, 3, tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_fl_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "flag"); + print_otag(h, TAG_SPAN, 1, &tag); + + /* `Cm' has no leading hyphen. */ + + if (MDOC_Cm == n->tok) + return(1); + + print_text(h, "\\-"); + + if (n->child) + h->flags |= HTML_NOSPACE; + else if (n->next && n->next->line == n->line) + h->flags |= HTML_NOSPACE; + + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_nd_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BODY != n->type) + return(1); + + /* XXX: this tag in theory can contain block elements. */ + + print_text(h, "\\(em"); + PAIR_CLASS_INIT(&tag, "desc-body"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_op_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BODY != n->type) + return(1); + + /* XXX: this tag in theory can contain block elements. */ + + print_text(h, "\\(lB"); + h->flags |= HTML_NOSPACE; + PAIR_CLASS_INIT(&tag, "opt"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_op_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, "\\(rB"); +} + + +static int +mdoc_nm_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (NULL == n->child && NULL == m->name) + return(1); + + synopsis_pre(h, n); + + PAIR_CLASS_INIT(&tag, "name"); + print_otag(h, TAG_SPAN, 1, &tag); + if (NULL == n->child) + print_text(h, m->name); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_xr_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + const struct mdoc_node *nn; + + if (NULL == n->child) + return(0); + + PAIR_CLASS_INIT(&tag[0], "link-man"); + + if (h->base_man) { + buffmt_man(h, n->child->string, + n->child->next ? + n->child->next->string : NULL); + PAIR_HREF_INIT(&tag[1], h->buf); + print_otag(h, TAG_A, 2, tag); + } else + print_otag(h, TAG_A, 1, tag); + + nn = n->child; + print_text(h, nn->string); + + if (NULL == (nn = nn->next)) + return(0); + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + print_text(h, nn->string); + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_ns_pre(MDOC_ARGS) +{ + + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ar_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "arg"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_xx_pre(MDOC_ARGS) +{ + const char *pp; + struct htmlpair tag; + + switch (n->tok) { + case (MDOC_Bsx): + pp = "BSDI BSD/OS"; + break; + case (MDOC_Dx): + pp = "DragonFly"; + break; + case (MDOC_Fx): + pp = "FreeBSD"; + break; + case (MDOC_Nx): + pp = "NetBSD"; + break; + case (MDOC_Ox): + pp = "OpenBSD"; + break; + case (MDOC_Ux): + pp = "UNIX"; + break; + default: + return(1); + } + + PAIR_CLASS_INIT(&tag, "unix"); + print_otag(h, TAG_SPAN, 1, &tag); + print_text(h, pp); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_bx_pre(MDOC_ARGS) +{ + const struct mdoc_node *nn; + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "unix"); + print_otag(h, TAG_SPAN, 1, &tag); + + for (nn = n->child; nn; nn = nn->next) + print_mdoc_node(m, nn, h); + + if (n->child) + h->flags |= HTML_NOSPACE; + + print_text(h, "BSD"); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_it_block_pre(MDOC_ARGS, enum mdoc_list type, int comp, + struct roffsu *offs, struct roffsu *width) +{ + struct htmlpair tag; + const struct mdoc_node *nn; + struct roffsu su; + + nn = n->parent->parent; + + /* XXX: see notes in mdoc_it_pre(). */ + + if (LIST_column == type) { + /* Don't width-pad on the left. */ + SCALE_HS_INIT(width, 0); + /* Also disallow non-compact. */ + comp = 1; + } + if (LIST_diag == type) + /* Mandate non-compact with empty prior. */ + if (n->prev && NULL == n->prev->body->child) + comp = 1; + + bufcat_style(h, "clear", "both"); + if (offs->scale > 0) + bufcat_su(h, "margin-left", offs); + if (width->scale > 0) + bufcat_su(h, "padding-left", width); + + PAIR_STYLE_INIT(&tag, h); + + /* Mandate compact following `Ss' and `Sh' starts. */ + + for (nn = n; nn && ! comp; nn = nn->parent) { + if (MDOC_BLOCK != nn->type) + continue; + if (MDOC_Ss == nn->tok || MDOC_Sh == nn->tok) + comp = 1; + if (nn->prev) + break; + } + + if ( ! comp) { + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "padding-top", &su); + } + + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_it_body_pre(MDOC_ARGS, enum mdoc_list type, struct roffsu *width) +{ + struct htmlpair tag; + struct roffsu su; + + switch (type) { + case (LIST_item): + /* FALLTHROUGH */ + case (LIST_ohang): + /* FALLTHROUGH */ + case (LIST_column): + bufcat_su(h, "min-width", width); + bufcat_style(h, "clear", "none"); + if (n->next) + bufcat_style(h, "float", "left"); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + break; + default: + /* + * XXX: this tricks CSS into aligning the bodies with + * the right-padding in the head. + */ + SCALE_HS_INIT(&su, 2); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + break; + } + + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_it_head_pre(MDOC_ARGS, enum mdoc_list type, struct roffsu *width) +{ + struct htmlpair tag; + struct ord *ord; + char nbuf[BUFSIZ]; + + switch (type) { + case (LIST_item): + return(0); + case (LIST_ohang): + print_otag(h, TAG_DIV, 0, &tag); + return(1); + case (LIST_column): + break; + default: + bufcat_su(h, "min-width", width); + SCALE_INVERT(width); + bufcat_su(h, "margin-left", width); + if (n->next && n->next->child) + bufcat_style(h, "float", "left"); + + /* XXX: buffer if we run into body. */ + SCALE_HS_INIT(width, 1); + bufcat_su(h, "margin-right", width); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + break; + } + + switch (type) { + case (LIST_diag): + PAIR_CLASS_INIT(&tag, "diag"); + print_otag(h, TAG_SPAN, 1, &tag); + break; + case (LIST_enum): + ord = h->ords.head; + assert(ord); + nbuf[BUFSIZ - 1] = 0; + (void)snprintf(nbuf, BUFSIZ - 1, "%d.", ord->pos++); + print_text(h, nbuf); + return(0); + case (LIST_dash): + print_text(h, "\\(en"); + return(0); + case (LIST_hyphen): + print_text(h, "\\(hy"); + return(0); + case (LIST_bullet): + print_text(h, "\\(bu"); + return(0); + default: + break; + } + + return(1); +} + + +static int +mdoc_it_pre(MDOC_ARGS) +{ + int i, wp, comp; + const struct mdoc_node *bl, *nn; + struct roffsu width, offs; + enum mdoc_list type; + + /* + * XXX: be very careful in changing anything, here. Lists in + * mandoc have many peculiarities; furthermore, they don't + * translate well into HTML and require a bit of mangling. + */ + + bl = n->parent->parent; + if (MDOC_BLOCK != n->type) + bl = bl->parent; + + SCALE_HS_INIT(&offs, 0); + + type = bl->data.Bl.type; + comp = bl->data.Bl.comp; + + if (bl->data.Bl.offs) + a2offs(bl->data.Bl.offs, &offs); + + switch (type) { + case (LIST_enum): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + /* FALLTHROUGH */ + case (LIST_bullet): + SCALE_HS_INIT(&width, 2); + break; + default: + SCALE_HS_INIT(&width, INDENT); + break; + } + + if (bl->data.Bl.width) + a2width(bl->data.Bl.width, &width); + + wp = -1; + for (i = 0; bl->args && i < (int)bl->args->argc; i++) + switch (bl->args->argv[i].arg) { + case (MDOC_Column): + wp = i; /* Save for later. */ + break; + default: + break; + } + + /* Override width in some cases. */ + + switch (type) { + case (LIST_ohang): + /* FALLTHROUGH */ + case (LIST_item): + /* FALLTHROUGH */ + case (LIST_inset): + /* FALLTHROUGH */ + case (LIST_diag): + SCALE_HS_INIT(&width, 0); + break; + default: + if (0 == width.scale) + SCALE_HS_INIT(&width, INDENT); + break; + } + + if (LIST_column == type && MDOC_BODY == n->type) { + nn = n->parent->child; + for (i = 0; nn && nn != n; nn = nn->next) + if (MDOC_BODY == nn->type) + i++; + if (i < (int)bl->args->argv[wp].sz) + a2width(bl->args->argv[wp].value[i], &width); + } + + if (MDOC_HEAD == n->type) + return(mdoc_it_head_pre(m, n, h, type, &width)); + else if (MDOC_BODY == n->type) + return(mdoc_it_body_pre(m, n, h, type, &width)); + + return(mdoc_it_block_pre(m, n, h, type, comp, &offs, &width)); +} + + +/* ARGSUSED */ +static int +mdoc_bl_pre(MDOC_ARGS) +{ + struct ord *ord; + + if (MDOC_HEAD == n->type) + return(0); + if (MDOC_BLOCK != n->type) + return(1); + if (LIST_enum != n->data.Bl.type) + return(1); + + ord = malloc(sizeof(struct ord)); + if (NULL == ord) { + perror(NULL); + exit(EXIT_FAILURE); + } + ord->cookie = n; + ord->pos = 1; + ord->next = h->ords.head; + h->ords.head = ord; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_bl_post(MDOC_ARGS) +{ + struct ord *ord; + + if (MDOC_BLOCK != n->type) + return; + if (LIST_enum != n->data.Bl.type) + return; + + ord = h->ords.head; + assert(ord); + h->ords.head = ord->next; + free(ord); +} + + +/* ARGSUSED */ +static int +mdoc_ex_pre(MDOC_ARGS) +{ + const struct mdoc_node *nn; + struct tag *t; + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "utility"); + + print_text(h, "The"); + for (nn = n->child; nn; nn = nn->next) { + t = print_otag(h, TAG_SPAN, 1, &tag); + print_text(h, nn->string); + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + + if (nn->next && NULL == nn->next->next) + print_text(h, ", and"); + else if (nn->next) + print_text(h, ","); + else + h->flags &= ~HTML_NOSPACE; + } + + if (n->child && n->child->next) + print_text(h, "utilities exit"); + else + print_text(h, "utility exits"); + + print_text(h, "0 on success, and >0 if an error occurs."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_dq_pre(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + print_text(h, "\\(lq"); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_dq_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, "\\(rq"); +} + + +/* ARGSUSED */ +static int +mdoc_pq_pre(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + print_text(h, "\\&("); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_pq_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + print_text(h, ")"); +} + + +/* ARGSUSED */ +static int +mdoc_sq_pre(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + print_text(h, "\\(oq"); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_sq_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, "\\(aq"); +} + + +/* ARGSUSED */ +static int +mdoc_em_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "emph"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_d1_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct roffsu su; + + if (MDOC_BLOCK != n->type) + return(1); + + /* FIXME: D1 shouldn't be literal. */ + + SCALE_VS_INIT(&su, INDENT - 2); + bufcat_su(h, "margin-left", &su); + PAIR_CLASS_INIT(&tag[0], "lit"); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_sx_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + const struct mdoc_node *nn; + char buf[BUFSIZ]; + + strlcpy(buf, "#", BUFSIZ); + for (nn = n->child; nn; nn = nn->next) { + html_idcat(buf, nn->string, BUFSIZ); + if (nn->next) + html_idcat(buf, " ", BUFSIZ); + } + + PAIR_CLASS_INIT(&tag[0], "link-sec"); + PAIR_HREF_INIT(&tag[1], buf); + + print_otag(h, TAG_A, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_aq_pre(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + print_text(h, "\\(la"); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_aq_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, "\\(ra"); +} + + +/* ARGSUSED */ +static int +mdoc_bd_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + int comp; + const struct mdoc_node *nn; + struct roffsu su; + + if (MDOC_HEAD == n->type) + return(0); + + SCALE_VS_INIT(&su, 0); + + if (n->data.Bd.offs) + a2offs(n->data.Bd.offs, &su); + + comp = n->data.Bd.comp; + + /* FIXME: -centered, etc. formatting. */ + /* FIXME: does not respect -offset ??? */ + + if (MDOC_BLOCK == n->type) { + bufcat_su(h, "margin-left", &su); + for (nn = n; nn && ! comp; nn = nn->parent) { + if (MDOC_BLOCK != nn->type) + continue; + if (MDOC_Ss == nn->tok || MDOC_Sh == nn->tok) + comp = 1; + if (nn->prev) + break; + } + if (comp) { + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_DIV, 1, tag); + return(1); + } + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_DIV, 1, tag); + return(1); + } + + if (DISP_unfilled != n->data.Bd.type && + DISP_literal != n->data.Bd.type) + return(1); + + PAIR_CLASS_INIT(&tag[0], "lit"); + bufcat_style(h, "white-space", "pre"); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + + for (nn = n->child; nn; nn = nn->next) { + h->flags |= HTML_NOSPACE; + print_mdoc_node(m, nn, h); + if (NULL == nn->next) + continue; + if (nn->prev && nn->prev->line < nn->line) + print_text(h, "\n"); + else if (NULL == nn->prev) + print_text(h, "\n"); + } + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_pa_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "file"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ad_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "addr"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_an_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + /* TODO: -split and -nosplit (see termp_an_pre()). */ + + PAIR_CLASS_INIT(&tag, "author"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_cd_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "config"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_dv_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "define"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ev_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "env"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_er_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "errno"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_fa_pre(MDOC_ARGS) +{ + const struct mdoc_node *nn; + struct htmlpair tag; + struct tag *t; + + PAIR_CLASS_INIT(&tag, "farg"); + if (n->parent->tok != MDOC_Fo) { + print_otag(h, TAG_SPAN, 1, &tag); + return(1); + } + + for (nn = n->child; nn; nn = nn->next) { + t = print_otag(h, TAG_SPAN, 1, &tag); + print_text(h, nn->string); + print_tagq(h, t); + if (nn->next) + print_text(h, ","); + } + + if (n->child && n->next && n->next->tok == MDOC_Fa) + print_text(h, ","); + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_fd_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + synopsis_pre(h, n); + + PAIR_CLASS_INIT(&tag, "macro"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_vt_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BLOCK == n->type) { + synopsis_pre(h, n); + return(1); + } else if (MDOC_ELEM == n->type) { + synopsis_pre(h, n); + } else if (MDOC_HEAD == n->type) + return(0); + + PAIR_CLASS_INIT(&tag, "type"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ft_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "ftype"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_fn_pre(MDOC_ARGS) +{ + struct tag *t; + struct htmlpair tag[2]; + const struct mdoc_node *nn; + char nbuf[BUFSIZ]; + const char *sp, *ep; + int sz, i; + + synopsis_pre(h, n); + + /* Split apart into type and name. */ + assert(n->child->string); + sp = n->child->string; + + ep = strchr(sp, ' '); + if (NULL != ep) { + PAIR_CLASS_INIT(&tag[0], "ftype"); + t = print_otag(h, TAG_SPAN, 1, tag); + + while (ep) { + sz = MIN((int)(ep - sp), BUFSIZ - 1); + (void)memcpy(nbuf, sp, (size_t)sz); + nbuf[sz] = '\0'; + print_text(h, nbuf); + sp = ++ep; + ep = strchr(sp, ' '); + } + print_tagq(h, t); + } + + PAIR_CLASS_INIT(&tag[0], "fname"); + + /* + * FIXME: only refer to IDs that we know exist. + */ + +#if 0 + if (SEC_SYNOPSIS == n->sec) { + nbuf[0] = '\0'; + html_idcat(nbuf, sp, BUFSIZ); + PAIR_ID_INIT(&tag[1], nbuf); + } else { + strlcpy(nbuf, "#", BUFSIZ); + html_idcat(nbuf, sp, BUFSIZ); + PAIR_HREF_INIT(&tag[1], nbuf); + } +#endif + + t = print_otag(h, TAG_SPAN, 1, tag); + + if (sp) { + strlcpy(nbuf, sp, BUFSIZ); + print_text(h, nbuf); + } + + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + + bufinit(h); + PAIR_CLASS_INIT(&tag[0], "farg"); + bufcat_style(h, "white-space", "nowrap"); + PAIR_STYLE_INIT(&tag[1], h); + + for (nn = n->child->next; nn; nn = nn->next) { + i = 1; + if (SEC_SYNOPSIS == n->sec) + i = 2; + t = print_otag(h, TAG_SPAN, i, tag); + print_text(h, nn->string); + print_tagq(h, t); + if (nn->next) + print_text(h, ","); + } + + print_text(h, ")"); + if (SEC_SYNOPSIS == n->sec) + print_text(h, ";"); + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_sp_pre(MDOC_ARGS) +{ + int len; + struct htmlpair tag; + struct roffsu su; + + switch (n->tok) { + case (MDOC_sp): + /* FIXME: can this have a scaling indicator? */ + len = n->child ? atoi(n->child->string) : 1; + break; + case (MDOC_br): + len = 0; + break; + default: + len = 1; + break; + } + + SCALE_VS_INIT(&su, len); + bufcat_su(h, "height", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + /* So the div isn't empty: */ + print_text(h, "\\~"); + + return(0); + +} + + +/* ARGSUSED */ +static int +mdoc_brq_pre(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + print_text(h, "\\(lC"); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_brq_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, "\\(rC"); +} + + +/* ARGSUSED */ +static int +mdoc_lk_pre(MDOC_ARGS) +{ + const struct mdoc_node *nn; + struct htmlpair tag[2]; + + nn = n->child; + + PAIR_CLASS_INIT(&tag[0], "link-ext"); + PAIR_HREF_INIT(&tag[1], nn->string); + print_otag(h, TAG_A, 2, tag); + + if (NULL == nn->next) + return(1); + + for (nn = nn->next; nn; nn = nn->next) + print_text(h, nn->string); + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_mt_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct tag *t; + const struct mdoc_node *nn; + + PAIR_CLASS_INIT(&tag[0], "link-mail"); + + for (nn = n->child; nn; nn = nn->next) { + bufinit(h); + bufcat(h, "mailto:"); + bufcat(h, nn->string); + PAIR_HREF_INIT(&tag[1], h->buf); + t = print_otag(h, TAG_A, 2, tag); + print_text(h, nn->string); + print_tagq(h, t); + } + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_fo_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t; + + if (MDOC_BODY == n->type) { + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + return(1); + } else if (MDOC_BLOCK == n->type) { + synopsis_pre(h, n); + return(1); + } + + /* XXX: we drop non-initial arguments as per groff. */ + + assert(n->child); + assert(n->child->string); + + PAIR_CLASS_INIT(&tag, "fname"); + t = print_otag(h, TAG_SPAN, 1, &tag); + print_text(h, n->child->string); + print_tagq(h, t); + return(0); +} + + +/* ARGSUSED */ +static void +mdoc_fo_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + h->flags |= HTML_NOSPACE; + print_text(h, ";"); +} + + +/* ARGSUSED */ +static int +mdoc_in_pre(MDOC_ARGS) +{ + const struct mdoc_node *nn; + struct tag *t; + struct htmlpair tag[2]; + int i; + + synopsis_pre(h, n); + + PAIR_CLASS_INIT(&tag[0], "includes"); + print_otag(h, TAG_SPAN, 1, tag); + + if (SEC_SYNOPSIS == n->sec && MDOC_LINE & n->flags) + print_text(h, "#include"); + + print_text(h, "<"); + h->flags |= HTML_NOSPACE; + + for (nn = n->child; nn; nn = nn->next) { + PAIR_CLASS_INIT(&tag[0], "link-includes"); + i = 1; + bufinit(h); + if (h->base_includes) { + buffmt_includes(h, nn->string); + PAIR_HREF_INIT(&tag[i], h->buf); + i++; + } + t = print_otag(h, TAG_A, i, tag); + print_mdoc_node(m, nn, h); + print_tagq(h, t); + } + + h->flags |= HTML_NOSPACE; + print_text(h, ">"); + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_ic_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "cmd"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_rv_pre(MDOC_ARGS) +{ + const struct mdoc_node *nn; + struct htmlpair tag; + struct tag *t; + + print_otag(h, TAG_DIV, 0, NULL); + print_text(h, "The"); + + for (nn = n->child; nn; nn = nn->next) { + PAIR_CLASS_INIT(&tag, "fname"); + t = print_otag(h, TAG_SPAN, 1, &tag); + print_text(h, nn->string); + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + if (nn->next && NULL == nn->next->next) + print_text(h, "(), and"); + else if (nn->next) + print_text(h, "(),"); + else + print_text(h, "()"); + } + + if (n->child && n->child->next) + print_text(h, "functions return"); + else + print_text(h, "function returns"); + + print_text(h, "the value 0 if successful; otherwise the value " + "-1 is returned and the global variable"); + + PAIR_CLASS_INIT(&tag, "var"); + t = print_otag(h, TAG_SPAN, 1, &tag); + print_text(h, "errno"); + print_tagq(h, t); + print_text(h, "is set to indicate the error."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_va_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "var"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_bq_pre(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + print_text(h, "\\(lB"); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_bq_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, "\\(rB"); +} + + +/* ARGSUSED */ +static int +mdoc_ap_pre(MDOC_ARGS) +{ + + h->flags |= HTML_NOSPACE; + print_text(h, "\\(aq"); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_bf_pre(MDOC_ARGS) +{ + int i; + struct htmlpair tag[2]; + struct roffsu su; + + if (MDOC_HEAD == n->type) + return(0); + else if (MDOC_BLOCK != n->type) + return(1); + + PAIR_CLASS_INIT(&tag[0], "lit"); + + if (n->head->child) { + if ( ! strcmp("Em", n->head->child->string)) + PAIR_CLASS_INIT(&tag[0], "emph"); + else if ( ! strcmp("Sy", n->head->child->string)) + PAIR_CLASS_INIT(&tag[0], "symb"); + else if ( ! strcmp("Li", n->head->child->string)) + PAIR_CLASS_INIT(&tag[0], "lit"); + } else { + for (i = 0; n->args && i < (int)n->args->argc; i++) + switch (n->args->argv[i].arg) { + case (MDOC_Symbolic): + PAIR_CLASS_INIT(&tag[0], "symb"); + break; + case (MDOC_Literal): + PAIR_CLASS_INIT(&tag[0], "lit"); + break; + case (MDOC_Emphasis): + PAIR_CLASS_INIT(&tag[0], "emph"); + break; + default: + break; + } + } + + /* FIXME: div's have spaces stripped--we want them. */ + + bufcat_style(h, "display", "inline"); + SCALE_HS_INIT(&su, 1); + bufcat_su(h, "margin-right", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ms_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "symb"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_pf_pre(MDOC_ARGS) +{ + + h->flags |= HTML_IGNDELIM; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_pf_post(MDOC_ARGS) +{ + + h->flags &= ~HTML_IGNDELIM; + h->flags |= HTML_NOSPACE; +} + + +/* ARGSUSED */ +static int +mdoc_rs_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + + if (MDOC_BLOCK != n->type) + return(1); + + if (n->prev && SEC_SEE_ALSO == n->sec) { + SCALE_VS_INIT(&su, 1); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + } + + PAIR_CLASS_INIT(&tag, "ref"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + + +/* ARGSUSED */ +static int +mdoc_li_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "lit"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_sy_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "symb"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_bt_pre(MDOC_ARGS) +{ + + print_text(h, "is currently in beta test."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_ud_pre(MDOC_ARGS) +{ + + print_text(h, "currently under development."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_lb_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (SEC_LIBRARY == n->sec && MDOC_LINE & n->flags) + print_otag(h, TAG_DIV, 0, NULL); + PAIR_CLASS_INIT(&tag, "lib"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc__x_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + + switch (n->tok) { + case(MDOC__A): + PAIR_CLASS_INIT(&tag[0], "ref-auth"); + break; + case(MDOC__B): + PAIR_CLASS_INIT(&tag[0], "ref-book"); + break; + case(MDOC__C): + PAIR_CLASS_INIT(&tag[0], "ref-city"); + break; + case(MDOC__D): + PAIR_CLASS_INIT(&tag[0], "ref-date"); + break; + case(MDOC__I): + PAIR_CLASS_INIT(&tag[0], "ref-issue"); + break; + case(MDOC__J): + PAIR_CLASS_INIT(&tag[0], "ref-jrnl"); + break; + case(MDOC__N): + PAIR_CLASS_INIT(&tag[0], "ref-num"); + break; + case(MDOC__O): + PAIR_CLASS_INIT(&tag[0], "ref-opt"); + break; + case(MDOC__P): + PAIR_CLASS_INIT(&tag[0], "ref-page"); + break; + case(MDOC__Q): + PAIR_CLASS_INIT(&tag[0], "ref-corp"); + break; + case(MDOC__R): + PAIR_CLASS_INIT(&tag[0], "ref-rep"); + break; + case(MDOC__T): + PAIR_CLASS_INIT(&tag[0], "ref-title"); + break; + case(MDOC__U): + PAIR_CLASS_INIT(&tag[0], "link-ref"); + break; + case(MDOC__V): + PAIR_CLASS_INIT(&tag[0], "ref-vol"); + break; + default: + abort(); + /* NOTREACHED */ + } + + if (MDOC__U != n->tok) { + print_otag(h, TAG_SPAN, 1, tag); + return(1); + } + + PAIR_HREF_INIT(&tag[1], n->child->string); + print_otag(h, TAG_A, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static void +mdoc__x_post(MDOC_ARGS) +{ + + /* TODO: %U */ + + h->flags |= HTML_NOSPACE; + print_text(h, n->next ? "," : "."); +} diff --git a/commands/mdocml/mdoc_macro.c b/commands/mdocml/mdoc_macro.c new file mode 100644 index 000000000..0b0fe4080 --- /dev/null +++ b/commands/mdocml/mdoc_macro.c @@ -0,0 +1,1728 @@ +/* $Id: mdoc_macro.c,v 1.80 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +enum rew { + REWIND_REWIND, + REWIND_NOHALT, + REWIND_HALT +}; + +static int blk_full(MACRO_PROT_ARGS); +static int blk_exp_close(MACRO_PROT_ARGS); +static int blk_part_exp(MACRO_PROT_ARGS); +static int blk_part_imp(MACRO_PROT_ARGS); +static int ctx_synopsis(MACRO_PROT_ARGS); +static int in_line_eoln(MACRO_PROT_ARGS); +static int in_line_argn(MACRO_PROT_ARGS); +static int in_line(MACRO_PROT_ARGS); +static int obsolete(MACRO_PROT_ARGS); +static int phrase_ta(MACRO_PROT_ARGS); + +static int append_delims(struct mdoc *, + int, int *, char *); +static enum mdoct lookup(enum mdoct, const char *); +static enum mdoct lookup_raw(const char *); +static int phrase(struct mdoc *, int, int, char *); +static enum mdoct rew_alt(enum mdoct); +static int rew_dobreak(enum mdoct, + const struct mdoc_node *); +static enum rew rew_dohalt(enum mdoct, enum mdoc_type, + const struct mdoc_node *); +static int rew_elem(struct mdoc *, enum mdoct); +static int rew_last(struct mdoc *, + const struct mdoc_node *); +static int rew_sub(enum mdoc_type, struct mdoc *, + enum mdoct, int, int); +static int swarn(struct mdoc *, enum mdoc_type, int, + int, const struct mdoc_node *); + +const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ap */ + { in_line_eoln, MDOC_PROLOGUE }, /* Dd */ + { in_line_eoln, MDOC_PROLOGUE }, /* Dt */ + { in_line_eoln, MDOC_PROLOGUE }, /* Os */ + { blk_full, 0 }, /* Sh */ + { blk_full, 0 }, /* Ss */ + { in_line_eoln, 0 }, /* Pp */ + { blk_part_imp, MDOC_PARSED }, /* D1 */ + { blk_part_imp, MDOC_PARSED }, /* Dl */ + { blk_full, MDOC_EXPLICIT }, /* Bd */ + { blk_exp_close, MDOC_EXPLICIT }, /* Ed */ + { blk_full, MDOC_EXPLICIT }, /* Bl */ + { blk_exp_close, MDOC_EXPLICIT }, /* El */ + { blk_full, MDOC_PARSED }, /* It */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ad */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* An */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ar */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Dv */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Er */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ev */ + { in_line_eoln, 0 }, /* Ex */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fa */ + { in_line_eoln, 0 }, /* Fd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fl */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fn */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ft */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ic */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* In */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Li */ + { blk_full, 0 }, /* Nd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Nm */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Op */ + { obsolete, 0 }, /* Ot */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Pa */ + { in_line_eoln, 0 }, /* Rv */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* St */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Va */ + { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Vt */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Xr */ + { in_line_eoln, 0 }, /* %A */ + { in_line_eoln, 0 }, /* %B */ + { in_line_eoln, 0 }, /* %D */ + { in_line_eoln, 0 }, /* %I */ + { in_line_eoln, 0 }, /* %J */ + { in_line_eoln, 0 }, /* %N */ + { in_line_eoln, 0 }, /* %O */ + { in_line_eoln, 0 }, /* %P */ + { in_line_eoln, 0 }, /* %R */ + { in_line_eoln, 0 }, /* %T */ + { in_line_eoln, 0 }, /* %V */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ac */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Ao */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Aq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* At */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Bc */ + { blk_full, MDOC_EXPLICIT }, /* Bf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Bo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Bq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bsx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bx */ + { in_line_eoln, 0 }, /* Db */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Dc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Do */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Dq */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ec */ + { blk_exp_close, MDOC_EXPLICIT }, /* Ef */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Em */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Eo */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Fx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ms */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* No */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ns */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Nx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ox */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Pc */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* Pf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Po */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Pq */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Qc */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Ql */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Qo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Qq */ + { blk_exp_close, MDOC_EXPLICIT }, /* Re */ + { blk_full, MDOC_EXPLICIT }, /* Rs */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Sc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* So */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Sq */ + { in_line_eoln, 0 }, /* Sm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Sx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Sy */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Tn */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ux */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Xc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Xo */ + { blk_full, MDOC_EXPLICIT | MDOC_CALLABLE }, /* Fo */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Fc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Oo */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Oc */ + { blk_full, MDOC_EXPLICIT }, /* Bk */ + { blk_exp_close, MDOC_EXPLICIT }, /* Ek */ + { in_line_eoln, 0 }, /* Bt */ + { in_line_eoln, 0 }, /* Hf */ + { obsolete, 0 }, /* Fr */ + { in_line_eoln, 0 }, /* Ud */ + { in_line, 0 }, /* Lb */ + { in_line_eoln, 0 }, /* Lp */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Lk */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Mt */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Brq */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Bro */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Brc */ + { in_line_eoln, 0 }, /* %C */ + { obsolete, 0 }, /* Es */ + { obsolete, 0 }, /* En */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Dx */ + { in_line_eoln, 0 }, /* %Q */ + { in_line_eoln, 0 }, /* br */ + { in_line_eoln, 0 }, /* sp */ + { in_line_eoln, 0 }, /* %U */ + { phrase_ta, MDOC_CALLABLE | MDOC_PARSED }, /* Ta */ +}; + +const struct mdoc_macro * const mdoc_macros = __mdoc_macros; + + +static int +swarn(struct mdoc *mdoc, enum mdoc_type type, + int line, int pos, const struct mdoc_node *p) +{ + const char *n, *t, *tt; + enum mandocerr ec; + + n = t = ""; + tt = "block"; + + switch (type) { + case (MDOC_BODY): + tt = "multi-line"; + break; + case (MDOC_HEAD): + tt = "line"; + break; + default: + break; + } + + switch (p->type) { + case (MDOC_BLOCK): + n = mdoc_macronames[p->tok]; + t = "block"; + break; + case (MDOC_BODY): + n = mdoc_macronames[p->tok]; + t = "multi-line"; + break; + case (MDOC_HEAD): + n = mdoc_macronames[p->tok]; + t = "line"; + break; + default: + break; + } + + ec = (MDOC_IGN_SCOPE & mdoc->pflags) ? + MANDOCERR_SCOPE : MANDOCERR_SYNTSCOPE; + + return(mdoc_vmsg(mdoc, ec, line, pos, + "%s scope breaks %s of %s", + tt, t, n)); +} + + +/* + * This is called at the end of parsing. It must traverse up the tree, + * closing out open [implicit] scopes. Obviously, open explicit scopes + * are errors. + */ +int +mdoc_macroend(struct mdoc *m) +{ + struct mdoc_node *n; + + /* Scan for open explicit scopes. */ + + n = MDOC_VALID & m->last->flags ? m->last->parent : m->last; + + for ( ; n; n = n->parent) { + if (MDOC_BLOCK != n->type) + continue; + if ( ! (MDOC_EXPLICIT & mdoc_macros[n->tok].flags)) + continue; + mdoc_nmsg(m, n, MANDOCERR_SYNTSCOPE); + return(0); + } + + /* Rewind to the first. */ + + return(rew_last(m, m->first)); +} + + +/* + * Look up a macro from within a subsequent context. + */ +static enum mdoct +lookup(enum mdoct from, const char *p) +{ + /* FIXME: make -diag lists be un-PARSED. */ + + if ( ! (MDOC_PARSED & mdoc_macros[from].flags)) + return(MDOC_MAX); + return(lookup_raw(p)); +} + + +/* + * Lookup a macro following the initial line macro. + */ +static enum mdoct +lookup_raw(const char *p) +{ + enum mdoct res; + + if (MDOC_MAX == (res = mdoc_hash_find(p))) + return(MDOC_MAX); + if (MDOC_CALLABLE & mdoc_macros[res].flags) + return(res); + return(MDOC_MAX); +} + + +static int +rew_last(struct mdoc *mdoc, const struct mdoc_node *to) +{ + + assert(to); + mdoc->next = MDOC_NEXT_SIBLING; + + /* LINTED */ + while (mdoc->last != to) { + if ( ! mdoc_valid_post(mdoc)) + return(0); + if ( ! mdoc_action_post(mdoc)) + return(0); + mdoc->last = mdoc->last->parent; + assert(mdoc->last); + } + + if ( ! mdoc_valid_post(mdoc)) + return(0); + return(mdoc_action_post(mdoc)); +} + + +/* + * Return the opening macro of a closing one, e.g., `Ec' has `Eo' as its + * matching pair. + */ +static enum mdoct +rew_alt(enum mdoct tok) +{ + switch (tok) { + case (MDOC_Ac): + return(MDOC_Ao); + case (MDOC_Bc): + return(MDOC_Bo); + case (MDOC_Brc): + return(MDOC_Bro); + case (MDOC_Dc): + return(MDOC_Do); + case (MDOC_Ec): + return(MDOC_Eo); + case (MDOC_Ed): + return(MDOC_Bd); + case (MDOC_Ef): + return(MDOC_Bf); + case (MDOC_Ek): + return(MDOC_Bk); + case (MDOC_El): + return(MDOC_Bl); + case (MDOC_Fc): + return(MDOC_Fo); + case (MDOC_Oc): + return(MDOC_Oo); + case (MDOC_Pc): + return(MDOC_Po); + case (MDOC_Qc): + return(MDOC_Qo); + case (MDOC_Re): + return(MDOC_Rs); + case (MDOC_Sc): + return(MDOC_So); + case (MDOC_Xc): + return(MDOC_Xo); + default: + break; + } + abort(); + /* NOTREACHED */ +} + + +/* + * Rewind rules. This indicates whether to stop rewinding + * (REWIND_HALT) without touching our current scope, stop rewinding and + * close our current scope (REWIND_REWIND), or continue (REWIND_NOHALT). + * The scope-closing and so on occurs in the various rew_* routines. + */ +static enum rew +rew_dohalt(enum mdoct tok, enum mdoc_type type, + const struct mdoc_node *p) +{ + + if (MDOC_ROOT == p->type) + return(REWIND_HALT); + if (MDOC_VALID & p->flags) + return(REWIND_NOHALT); + + switch (tok) { + case (MDOC_Aq): + /* FALLTHROUGH */ + case (MDOC_Bq): + /* FALLTHROUGH */ + case (MDOC_Brq): + /* FALLTHROUGH */ + case (MDOC_D1): + /* FALLTHROUGH */ + case (MDOC_Dl): + /* FALLTHROUGH */ + case (MDOC_Dq): + /* FALLTHROUGH */ + case (MDOC_Op): + /* FALLTHROUGH */ + case (MDOC_Pq): + /* FALLTHROUGH */ + case (MDOC_Ql): + /* FALLTHROUGH */ + case (MDOC_Qq): + /* FALLTHROUGH */ + case (MDOC_Sq): + /* FALLTHROUGH */ + case (MDOC_Vt): + assert(MDOC_TAIL != type); + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + break; + case (MDOC_It): + assert(MDOC_TAIL != type); + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + if (MDOC_BODY == p->type && MDOC_Bl == p->tok) + return(REWIND_HALT); + break; + case (MDOC_Sh): + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + break; + case (MDOC_Nd): + /* FALLTHROUGH */ + case (MDOC_Ss): + assert(MDOC_TAIL != type); + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + if (MDOC_BODY == p->type && MDOC_Sh == p->tok) + return(REWIND_HALT); + break; + case (MDOC_Ao): + /* FALLTHROUGH */ + case (MDOC_Bd): + /* FALLTHROUGH */ + case (MDOC_Bf): + /* FALLTHROUGH */ + case (MDOC_Bk): + /* FALLTHROUGH */ + case (MDOC_Bl): + /* FALLTHROUGH */ + case (MDOC_Bo): + /* FALLTHROUGH */ + case (MDOC_Bro): + /* FALLTHROUGH */ + case (MDOC_Do): + /* FALLTHROUGH */ + case (MDOC_Eo): + /* FALLTHROUGH */ + case (MDOC_Fo): + /* FALLTHROUGH */ + case (MDOC_Oo): + /* FALLTHROUGH */ + case (MDOC_Po): + /* FALLTHROUGH */ + case (MDOC_Qo): + /* FALLTHROUGH */ + case (MDOC_Rs): + /* FALLTHROUGH */ + case (MDOC_So): + /* FALLTHROUGH */ + case (MDOC_Xo): + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + break; + /* Multi-line explicit scope close. */ + case (MDOC_Ac): + /* FALLTHROUGH */ + case (MDOC_Bc): + /* FALLTHROUGH */ + case (MDOC_Brc): + /* FALLTHROUGH */ + case (MDOC_Dc): + /* FALLTHROUGH */ + case (MDOC_Ec): + /* FALLTHROUGH */ + case (MDOC_Ed): + /* FALLTHROUGH */ + case (MDOC_Ek): + /* FALLTHROUGH */ + case (MDOC_El): + /* FALLTHROUGH */ + case (MDOC_Fc): + /* FALLTHROUGH */ + case (MDOC_Ef): + /* FALLTHROUGH */ + case (MDOC_Oc): + /* FALLTHROUGH */ + case (MDOC_Pc): + /* FALLTHROUGH */ + case (MDOC_Qc): + /* FALLTHROUGH */ + case (MDOC_Re): + /* FALLTHROUGH */ + case (MDOC_Sc): + /* FALLTHROUGH */ + case (MDOC_Xc): + if (type == p->type && rew_alt(tok) == p->tok) + return(REWIND_REWIND); + break; + default: + abort(); + /* NOTREACHED */ + } + + return(REWIND_NOHALT); +} + + +/* + * See if we can break an encountered scope (the rew_dohalt has returned + * REWIND_NOHALT). + */ +static int +rew_dobreak(enum mdoct tok, const struct mdoc_node *p) +{ + + assert(MDOC_ROOT != p->type); + if (MDOC_ELEM == p->type) + return(1); + if (MDOC_TEXT == p->type) + return(1); + if (MDOC_VALID & p->flags) + return(1); + + switch (tok) { + case (MDOC_It): + return(MDOC_It == p->tok); + case (MDOC_Nd): + return(MDOC_Nd == p->tok); + case (MDOC_Ss): + return(MDOC_Ss == p->tok); + case (MDOC_Sh): + if (MDOC_Nd == p->tok) + return(1); + if (MDOC_Ss == p->tok) + return(1); + return(MDOC_Sh == p->tok); + case (MDOC_El): + if (MDOC_It == p->tok) + return(1); + break; + case (MDOC_Oc): + if (MDOC_Op == p->tok) + return(1); + break; + default: + break; + } + + if (MDOC_EXPLICIT & mdoc_macros[tok].flags) + return(p->tok == rew_alt(tok)); + else if (MDOC_BLOCK == p->type) + return(1); + + return(tok == p->tok); +} + + +static int +rew_elem(struct mdoc *mdoc, enum mdoct tok) +{ + struct mdoc_node *n; + + n = mdoc->last; + if (MDOC_ELEM != n->type) + n = n->parent; + assert(MDOC_ELEM == n->type); + assert(tok == n->tok); + + return(rew_last(mdoc, n)); +} + + +static int +rew_sub(enum mdoc_type t, struct mdoc *m, + enum mdoct tok, int line, int ppos) +{ + struct mdoc_node *n; + enum rew c; + + /* LINTED */ + for (n = m->last; n; n = n->parent) { + c = rew_dohalt(tok, t, n); + if (REWIND_HALT == c) { + if (MDOC_BLOCK != t) + return(1); + if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) + return(1); + /* FIXME: shouldn't raise an error */ + mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTNOSCOPE); + return(0); + } + if (REWIND_REWIND == c) + break; + else if (rew_dobreak(tok, n)) + continue; + if ( ! swarn(m, t, line, ppos, n)) + return(0); + } + + assert(n); + if ( ! rew_last(m, n)) + return(0); + +#ifdef UGLY + /* + * The current block extends an enclosing block beyond a line + * break. Now that the current block ends, close the enclosing + * block, too. + */ + if (NULL != (n = n->pending)) { + assert(MDOC_HEAD == n->type); + if ( ! rew_last(m, n)) + return(0); + if ( ! mdoc_body_alloc(m, n->line, n->pos, n->tok)) + return(0); + } +#endif + + return(1); +} + + +static int +append_delims(struct mdoc *m, int line, int *pos, char *buf) +{ + int la; + enum margserr ac; + char *p; + + if ('\0' == buf[*pos]) + return(1); + + for (;;) { + la = *pos; + ac = mdoc_zargs(m, line, pos, buf, ARGS_NOWARN, &p); + + if (ARGS_ERROR == ac) + return(0); + else if (ARGS_EOLN == ac) + break; + + assert(DELIM_NONE != mdoc_isdelim(p)); + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + + /* + * If we encounter end-of-sentence symbols, then trigger + * the double-space. + * + * XXX: it's easy to allow this to propogate outward to + * the last symbol, such that `. )' will cause the + * correct double-spacing. However, (1) groff isn't + * smart enough to do this and (2) it would require + * knowing which symbols break this behaviour, for + * example, `. ;' shouldn't propogate the double-space. + */ + if (mandoc_eos(p, strlen(p))) + m->last->flags |= MDOC_EOS; + } + + return(1); +} + + +/* + * Close out block partial/full explicit. + */ +static int +blk_exp_close(MACRO_PROT_ARGS) +{ + int j, lastarg, maxargs, flushed, nl; + enum margserr ac; + enum mdoct ntok; + char *p; + + nl = MDOC_NEWLINE & m->flags; + + switch (tok) { + case (MDOC_Ec): + maxargs = 1; + break; + default: + maxargs = 0; + break; + } + + if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) { + /* FIXME: do this in validate */ + if (buf[*pos]) + if ( ! mdoc_pmsg(m, line, ppos, MANDOCERR_ARGSLOST)) + return(0); + + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + return(rew_sub(MDOC_BLOCK, m, tok, line, ppos)); + } + + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + + if (maxargs > 0) + if ( ! mdoc_tail_alloc(m, line, ppos, rew_alt(tok))) + return(0); + + for (flushed = j = 0; ; j++) { + lastarg = *pos; + + if (j == maxargs && ! flushed) { + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + flushed = 1; + } + + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_PUNCT == ac) + break; + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! mdoc_word_alloc(m, line, lastarg, p)) + return(0); + continue; + } + + if ( ! flushed) { + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + flushed = 1; + } + if ( ! mdoc_macro(m, ntok, line, lastarg, pos, buf)) + return(0); + break; + } + + if ( ! flushed && ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +static int +in_line(MACRO_PROT_ARGS) +{ + int la, scope, cnt, nc, nl; + enum margverr av; + enum mdoct ntok; + enum margserr ac; + enum mdelim d; + struct mdoc_arg *arg; + char *p; + + nl = MDOC_NEWLINE & m->flags; + + /* + * Whether we allow ignored elements (those without content, + * usually because of reserved words) to squeak by. + */ + + switch (tok) { + case (MDOC_An): + /* FALLTHROUGH */ + case (MDOC_Ar): + /* FALLTHROUGH */ + case (MDOC_Fl): + /* FALLTHROUGH */ + case (MDOC_Lk): + /* FALLTHROUGH */ + case (MDOC_Nm): + /* FALLTHROUGH */ + case (MDOC_Pa): + nc = 1; + break; + default: + nc = 0; + break; + } + + for (arg = NULL;; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + for (cnt = scope = 0;; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + if (ARGS_PUNCT == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + /* + * In this case, we've located a submacro and must + * execute it. Close out scope, if open. If no + * elements have been generated, either create one (nc) + * or raise a warning. + */ + + if (MDOC_MAX != ntok) { + if (scope && ! rew_elem(m, tok)) + return(0); + if (nc && 0 == cnt) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + if ( ! rew_last(m, m->last)) + return(0); + } else if ( ! nc && 0 == cnt) { + mdoc_argv_free(arg); + if ( ! mdoc_pmsg(m, line, ppos, MANDOCERR_MACROEMPTY)) + return(0); + } + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); + } + + /* + * Non-quote-enclosed punctuation. Set up our scope, if + * a word; rewind the scope, if a delimiter; then append + * the word. + */ + + d = ARGS_QWORD == ac ? DELIM_NONE : mdoc_isdelim(p); + + if (DELIM_NONE != d) { + /* + * If we encounter closing punctuation, no word + * has been omitted, no scope is open, and we're + * allowed to have an empty element, then start + * a new scope. `Ar', `Fl', and `Li', only do + * this once per invocation. There may be more + * of these (all of them?). + */ + if (0 == cnt && (nc || MDOC_Li == tok) && + DELIM_CLOSE == d && ! scope) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + if (MDOC_Ar == tok || MDOC_Li == tok || + MDOC_Fl == tok) + cnt++; + scope = 1; + } + /* + * Close out our scope, if one is open, before + * any punctuation. + */ + if (scope && ! rew_elem(m, tok)) + return(0); + scope = 0; + } else if ( ! scope) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + scope = 1; + } + + if (DELIM_NONE == d) + cnt++; + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + + /* + * `Fl' macros have their scope re-opened with each new + * word so that the `-' can be added to each one without + * having to parse out spaces. + */ + if (scope && MDOC_Fl == tok) { + if ( ! rew_elem(m, tok)) + return(0); + scope = 0; + } + } + + if (scope && ! rew_elem(m, tok)) + return(0); + + /* + * If no elements have been collected and we're allowed to have + * empties (nc), open a scope and close it out. Otherwise, + * raise a warning. + */ + + if (nc && 0 == cnt) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + if ( ! rew_last(m, m->last)) + return(0); + } else if ( ! nc && 0 == cnt) { + mdoc_argv_free(arg); + if ( ! mdoc_pmsg(m, line, ppos, MANDOCERR_MACROEMPTY)) + return(0); + } + + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +static int +blk_full(MACRO_PROT_ARGS) +{ + int la, nl; + struct mdoc_arg *arg; + struct mdoc_node *head; /* save of head macro */ + struct mdoc_node *body; /* save of body macro */ +#ifdef UGLY + struct mdoc_node *n; +#endif + enum mdoc_type mtt; + enum mdoct ntok; + enum margserr ac, lac; + enum margverr av; + char *p; + + nl = MDOC_NEWLINE & m->flags; + + /* Close out prior implicit scope. */ + + if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) { + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + } + + /* + * This routine accomodates implicitly- and explicitly-scoped + * macro openings. Implicit ones first close out prior scope + * (seen above). Delay opening the head until necessary to + * allow leading punctuation to print. Special consideration + * for `It -column', which has phrase-part syntax instead of + * regular child nodes. + */ + + for (arg = NULL;; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + if ( ! mdoc_block_alloc(m, line, ppos, tok, arg)) + return(0); + + head = body = NULL; + + /* + * The `Nd' macro has all arguments in its body: it's a hybrid + * of block partial-explicit and full-implicit. Stupid. + */ + + if (MDOC_Nd == tok) { + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + } + + ac = ARGS_ERROR; + + for ( ; ; ) { + la = *pos; + /* Initialise last-phrase-type with ARGS_PEND. */ + lac = ARGS_ERROR == ac ? ARGS_PEND : ac; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + + if (ARGS_EOLN == ac) { + if (ARGS_PPHRASE != lac && ARGS_PHRASE != lac) + break; + /* + * This is necessary: if the last token on a + * line is a `Ta' or tab, then we'll get + * ARGS_EOLN, so we must be smart enough to + * reopen our scope if the last parse was a + * phrase or partial phrase. + */ + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + break; + } + + /* + * Emit leading punctuation (i.e., punctuation before + * the MDOC_HEAD) for non-phrase types. + */ + + if (NULL == head && + ARGS_PEND != ac && + ARGS_PHRASE != ac && + ARGS_PPHRASE != ac && + ARGS_QWORD != ac && + DELIM_OPEN == mdoc_isdelim(p)) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + /* Open a head if one hasn't been opened. */ + + if (NULL == head) { + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + } + + if (ARGS_PHRASE == ac || + ARGS_PEND == ac || + ARGS_PPHRASE == ac) { + /* + * If we haven't opened a body yet, rewind the + * head; if we have, rewind that instead. + */ + + mtt = body ? MDOC_BODY : MDOC_HEAD; + if ( ! rew_sub(mtt, m, tok, line, ppos)) + return(0); + + /* Then allocate our body context. */ + + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + + /* + * Process phrases: set whether we're in a + * partial-phrase (this effects line handling) + * then call down into the phrase parser. + */ + + if (ARGS_PPHRASE == ac) + m->flags |= MDOC_PPHRASE; + if (ARGS_PEND == ac && ARGS_PPHRASE == lac) + m->flags |= MDOC_PPHRASE; + + if ( ! phrase(m, line, la, buf)) + return(0); + + m->flags &= ~MDOC_PPHRASE; + continue; + } + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + break; + } + + if (NULL == head) { + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + } + + if (nl && ! append_delims(m, line, pos, buf)) + return(0); + + /* If we've already opened our body, exit now. */ + + if (NULL != body) + goto out; + +#ifdef UGLY + /* + * If there is an open (i.e., unvalidated) sub-block requiring + * explicit close-out, postpone switching the current block from + * head to body until the rew_sub() call closing out that + * sub-block. + */ + for (n = m->last; n && n != head; n = n->parent) { + if (MDOC_BLOCK == n->type && + MDOC_EXPLICIT & mdoc_macros[n->tok].flags && + ! (MDOC_VALID & n->flags)) { + assert( ! (MDOC_ACTED & n->flags)); + n->pending = head; + return(1); + } + } +#endif + + /* Close out scopes to remain in a consistent state. */ + + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + +out: + if ( ! (MDOC_FREECOL & m->flags)) + return(1); + + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + + m->flags &= ~MDOC_FREECOL; + return(1); +} + + +static int +blk_part_imp(MACRO_PROT_ARGS) +{ + int la, nl; + enum mdoct ntok; + enum margserr ac; + char *p; + struct mdoc_node *blk; /* saved block context */ + struct mdoc_node *body; /* saved body context */ + struct mdoc_node *n; + + nl = MDOC_NEWLINE & m->flags; + + /* + * A macro that spans to the end of the line. This is generally + * (but not necessarily) called as the first macro. The block + * has a head as the immediate child, which is always empty, + * followed by zero or more opening punctuation nodes, then the + * body (which may be empty, depending on the macro), then zero + * or more closing punctuation nodes. + */ + + if ( ! mdoc_block_alloc(m, line, ppos, tok, NULL)) + return(0); + + blk = m->last; + + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + + /* + * Open the body scope "on-demand", that is, after we've + * processed all our the leading delimiters (open parenthesis, + * etc.). + */ + + for (body = NULL; ; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + if (ARGS_PUNCT == ac) + break; + + if (NULL == body && ARGS_QWORD != ac && + DELIM_OPEN == mdoc_isdelim(p)) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if (NULL == body) { + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + } + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + break; + } + + /* Clean-ups to leave in a consistent state. */ + + if (NULL == body) { + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + } + + for (n = body->child; n && n->next; n = n->next) + /* Do nothing. */ ; + + /* + * End of sentence spacing: if the last node is a text node and + * has a trailing period, then mark it as being end-of-sentence. + */ + + if (n && MDOC_TEXT == n->type && n->string) + if (mandoc_eos(n->string, strlen(n->string))) + n->flags |= MDOC_EOS; + + /* Up-propogate the end-of-space flag. */ + + if (n && (MDOC_EOS & n->flags)) { + body->flags |= MDOC_EOS; + body->parent->flags |= MDOC_EOS; + } + + /* + * If we can't rewind to our body, then our scope has already + * been closed by another macro (like `Oc' closing `Op'). This + * is ugly behaviour nodding its head to OpenBSD's overwhelming + * crufty use of `Op' breakage. + * + * FIXME - this should be ifdef'd OpenBSD? + */ + for (n = m->last; n; n = n->parent) + if (body == n) + break; + + if (NULL == n && ! mdoc_nmsg(m, body, MANDOCERR_SCOPE)) + return(0); + + if (n && ! rew_last(m, body)) + return(0); + + /* Standard appending of delimiters. */ + + if (nl && ! append_delims(m, line, pos, buf)) + return(0); + + /* Rewind scope, if applicable. */ + + if (n && ! rew_last(m, blk)) + return(0); + + return(1); +} + + +static int +blk_part_exp(MACRO_PROT_ARGS) +{ + int la, nl; + enum margserr ac; + struct mdoc_node *head; /* keep track of head */ + struct mdoc_node *body; /* keep track of body */ + char *p; + enum mdoct ntok; + + nl = MDOC_NEWLINE & m->flags; + + /* + * The opening of an explicit macro having zero or more leading + * punctuation nodes; a head with optional single element (the + * case of `Eo'); and a body that may be empty. + */ + + if ( ! mdoc_block_alloc(m, line, ppos, tok, NULL)) + return(0); + + for (head = body = NULL; ; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_PUNCT == ac) + break; + if (ARGS_EOLN == ac) + break; + + /* Flush out leading punctuation. */ + + if (NULL == head && ARGS_QWORD != ac && + DELIM_OPEN == mdoc_isdelim(p)) { + assert(NULL == body); + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if (NULL == head) { + assert(NULL == body); + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + } + + /* + * `Eo' gobbles any data into the head, but most other + * macros just immediately close out and begin the body. + */ + + if (NULL == body) { + assert(head); + /* No check whether it's a macro! */ + if (MDOC_Eo == tok) + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + + if (MDOC_Eo == tok) + continue; + } + + assert(NULL != head && NULL != body); + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + break; + } + + /* Clean-up to leave in a consistent state. */ + + if (NULL == head) { + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + } + + if (NULL == body) { + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + } + + /* Standard appending of delimiters. */ + + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +/* ARGSUSED */ +static int +in_line_argn(MACRO_PROT_ARGS) +{ + int la, flushed, j, maxargs, nl; + enum margserr ac; + enum margverr av; + struct mdoc_arg *arg; + char *p; + enum mdoct ntok; + + nl = MDOC_NEWLINE & m->flags; + + /* + * A line macro that has a fixed number of arguments (maxargs). + * Only open the scope once the first non-leading-punctuation is + * found (unless MDOC_IGNDELIM is noted, like in `Pf'), then + * keep it open until the maximum number of arguments are + * exhausted. + */ + + switch (tok) { + case (MDOC_Ap): + /* FALLTHROUGH */ + case (MDOC_No): + /* FALLTHROUGH */ + case (MDOC_Ns): + /* FALLTHROUGH */ + case (MDOC_Ux): + maxargs = 0; + break; + case (MDOC_Xr): + maxargs = 2; + break; + default: + maxargs = 1; + break; + } + + for (arg = NULL; ; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + for (flushed = j = 0; ; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_PUNCT == ac) + break; + if (ARGS_EOLN == ac) + break; + + if ( ! (MDOC_IGNDELIM & mdoc_macros[tok].flags) && + ARGS_QWORD != ac && + 0 == j && DELIM_OPEN == mdoc_isdelim(p)) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } else if (0 == j) + if ( ! mdoc_elem_alloc(m, line, la, tok, arg)) + return(0); + + if (j == maxargs && ! flushed) { + if ( ! rew_elem(m, tok)) + return(0); + flushed = 1; + } + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX != ntok) { + if ( ! flushed && ! rew_elem(m, tok)) + return(0); + flushed = 1; + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + j++; + break; + } + + if ( ! (MDOC_IGNDELIM & mdoc_macros[tok].flags) && + ARGS_QWORD != ac && + ! flushed && + DELIM_NONE != mdoc_isdelim(p)) { + if ( ! rew_elem(m, tok)) + return(0); + flushed = 1; + } + + /* + * XXX: this is a hack to work around groff's ugliness + * as regards `Xr' and extraneous arguments. It should + * ideally be deprecated behaviour, but because this is + * code is no here, it's unlikely to be removed. + */ + +#ifdef __OpenBSD__ + if (MDOC_Xr == tok && j == maxargs) { + if ( ! mdoc_elem_alloc(m, line, la, MDOC_Ns, NULL)) + return(0); + if ( ! rew_elem(m, MDOC_Ns)) + return(0); + } +#endif + + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + j++; + } + + if (0 == j && ! mdoc_elem_alloc(m, line, la, tok, arg)) + return(0); + + /* Close out in a consistent state. */ + + if ( ! flushed && ! rew_elem(m, tok)) + return(0); + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +static int +in_line_eoln(MACRO_PROT_ARGS) +{ + int la; + enum margserr ac; + enum margverr av; + struct mdoc_arg *arg; + char *p; + enum mdoct ntok; + + assert( ! (MDOC_PARSED & mdoc_macros[tok].flags)); + + /* Parse macro arguments. */ + + for (arg = NULL; ; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + /* Open element scope. */ + + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + + /* Parse argument terms. */ + + for (;;) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if ( ! rew_elem(m, tok)) + return(0); + return(mdoc_macro(m, ntok, line, la, pos, buf)); + } + + /* Close out (no delimiters). */ + + return(rew_elem(m, tok)); +} + + +/* ARGSUSED */ +static int +ctx_synopsis(MACRO_PROT_ARGS) +{ + int nl; + + nl = MDOC_NEWLINE & m->flags; + + /* If we're not in the SYNOPSIS, go straight to in-line. */ + if (SEC_SYNOPSIS != m->lastsec) + return(in_line(m, tok, line, ppos, pos, buf)); + + /* If we're a nested call, same place. */ + if ( ! nl) + return(in_line(m, tok, line, ppos, pos, buf)); + + /* + * XXX: this will open a block scope; however, if later we end + * up formatting the block scope, then child nodes will inherit + * the formatting. Be careful. + */ + + return(blk_part_imp(m, tok, line, ppos, pos, buf)); +} + + +/* ARGSUSED */ +static int +obsolete(MACRO_PROT_ARGS) +{ + + return(mdoc_pmsg(m, line, ppos, MANDOCERR_MACROOBS)); +} + + +/* + * Phrases occur within `Bl -column' entries, separated by `Ta' or tabs. + * They're unusual because they're basically free-form text until a + * macro is encountered. + */ +static int +phrase(struct mdoc *m, int line, int ppos, char *buf) +{ + int la, pos; + enum margserr ac; + enum mdoct ntok; + char *p; + + for (pos = ppos; ; ) { + la = pos; + + ac = mdoc_zargs(m, line, &pos, buf, 0, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup_raw(p); + + if (MDOC_MAX == ntok) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, &pos, buf)) + return(0); + return(append_delims(m, line, &pos, buf)); + } + + return(1); +} + + +/* ARGSUSED */ +static int +phrase_ta(MACRO_PROT_ARGS) +{ + int la; + enum mdoct ntok; + enum margserr ac; + char *p; + + /* + * FIXME: this is overly restrictive: if the `Ta' is unexpected, + * it should simply error out with ARGSLOST. + */ + + if ( ! rew_sub(MDOC_BODY, m, MDOC_It, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, MDOC_It)) + return(0); + + for (;;) { + la = *pos; + ac = mdoc_zargs(m, line, pos, buf, 0, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup_raw(p); + + if (MDOC_MAX == ntok) { + if ( ! mdoc_word_alloc(m, line, la, p)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + return(append_delims(m, line, pos, buf)); + } + + return(1); +} diff --git a/commands/mdocml/mdoc_strings.c b/commands/mdocml/mdoc_strings.c new file mode 100644 index 000000000..c05807f1b --- /dev/null +++ b/commands/mdocml/mdoc_strings.c @@ -0,0 +1,219 @@ +/* $Id: mdoc_strings.c,v 1.23 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +static const char * const secnames[SEC__MAX] = { + NULL, + "NAME", + "LIBRARY", + "SYNOPSIS", + "DESCRIPTION", + "IMPLEMENTATION NOTES", + "RETURN VALUES", + "ENVIRONMENT", + "FILES", + "EXIT STATUS", + "EXAMPLES", + "DIAGNOSTICS", + "COMPATIBILITY", + "ERRORS", + "SEE ALSO", + "STANDARDS", + "HISTORY", + "AUTHORS", + "CAVEATS", + "BUGS", + "SECURITY CONSIDERATIONS", + NULL +}; + +/* + * FIXME: this is repeated in print_text() (html.c) and term_word() + * (term.c). + */ +enum mdelim +mdoc_iscdelim(char p) +{ + + switch (p) { + case('('): + /* FALLTHROUGH */ + case('['): + return(DELIM_OPEN); + case('|'): + return(DELIM_MIDDLE); + case('.'): + /* FALLTHROUGH */ + case(','): + /* FALLTHROUGH */ + case(';'): + /* FALLTHROUGH */ + case(':'): + /* FALLTHROUGH */ + case('?'): + /* FALLTHROUGH */ + case('!'): + /* FALLTHROUGH */ + case(')'): + /* FALLTHROUGH */ + case(']'): + return(DELIM_CLOSE); + default: + break; + } + + return(DELIM_NONE); +} + + +enum mdelim +mdoc_isdelim(const char *p) +{ + + if ('\0' == p[0]) + return(DELIM_NONE); + if ('\0' == p[1]) + return(mdoc_iscdelim(p[0])); + + /* + * XXX; account for groff bubu where the \*(Ba reserved string + * is treated in exactly the same way as the vertical bar. This + * is the only function that checks for this. + */ + return(strcmp(p, "\\*(Ba") ? DELIM_NONE : DELIM_MIDDLE); +} + + +enum mdoc_sec +mdoc_str2sec(const char *p) +{ + int i; + + for (i = 0; i < (int)SEC__MAX; i++) + if (secnames[i] && 0 == strcmp(p, secnames[i])) + return((enum mdoc_sec)i); + + return(SEC_CUSTOM); +} + + +/* FIXME: move this into an editable .in file. */ +size_t +mdoc_macro2len(enum mdoct macro) +{ + + switch (macro) { + case(MDOC_Ad): + return(12); + case(MDOC_Ao): + return(12); + case(MDOC_An): + return(12); + case(MDOC_Aq): + return(12); + case(MDOC_Ar): + return(12); + case(MDOC_Bo): + return(12); + case(MDOC_Bq): + return(12); + case(MDOC_Cd): + return(12); + case(MDOC_Cm): + return(10); + case(MDOC_Do): + return(10); + case(MDOC_Dq): + return(12); + case(MDOC_Dv): + return(12); + case(MDOC_Eo): + return(12); + case(MDOC_Em): + return(10); + case(MDOC_Er): + return(17); + case(MDOC_Ev): + return(15); + case(MDOC_Fa): + return(12); + case(MDOC_Fl): + return(10); + case(MDOC_Fo): + return(16); + case(MDOC_Fn): + return(16); + case(MDOC_Ic): + return(10); + case(MDOC_Li): + return(16); + case(MDOC_Ms): + return(6); + case(MDOC_Nm): + return(10); + case(MDOC_No): + return(12); + case(MDOC_Oo): + return(10); + case(MDOC_Op): + return(14); + case(MDOC_Pa): + return(32); + case(MDOC_Pf): + return(12); + case(MDOC_Po): + return(12); + case(MDOC_Pq): + return(12); + case(MDOC_Ql): + return(16); + case(MDOC_Qo): + return(12); + case(MDOC_So): + return(12); + case(MDOC_Sq): + return(12); + case(MDOC_Sy): + return(6); + case(MDOC_Sx): + return(16); + case(MDOC_Tn): + return(10); + case(MDOC_Va): + return(12); + case(MDOC_Vt): + return(12); + case(MDOC_Xr): + return(10); + default: + break; + }; + return(0); +} diff --git a/commands/mdocml/mdoc_term.c b/commands/mdocml/mdoc_term.c new file mode 100644 index 000000000..c3dc634a3 --- /dev/null +++ b/commands/mdocml/mdoc_term.c @@ -0,0 +1,2109 @@ +/* $Id: mdoc_term.c,v 1.156 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "out.h" +#include "term.h" +#include "mdoc.h" +#include "chars.h" +#include "main.h" + +#define INDENT 5 +#define HALFINDENT 3 + +struct termpair { + struct termpair *ppair; + int count; +}; + +#define DECL_ARGS struct termp *p, \ + struct termpair *pair, \ + const struct mdoc_meta *m, \ + const struct mdoc_node *n + +struct termact { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); +}; + +static size_t a2width(const char *); +static size_t a2height(const struct mdoc_node *); +static size_t a2offs(const char *); + +static int arg_hasattr(int, const struct mdoc_node *); +static int arg_getattr(int, const struct mdoc_node *); +static void print_bvspace(struct termp *, + const struct mdoc_node *, + const struct mdoc_node *); +static void print_mdoc_node(DECL_ARGS); +static void print_mdoc_nodelist(DECL_ARGS); +static void print_mdoc_head(struct termp *, const void *); +static void print_mdoc_foot(struct termp *, const void *); +static void synopsis_pre(struct termp *, + const struct mdoc_node *); + +static void termp____post(DECL_ARGS); +static void termp_an_post(DECL_ARGS); +static void termp_aq_post(DECL_ARGS); +static void termp_bd_post(DECL_ARGS); +static void termp_bl_post(DECL_ARGS); +static void termp_bq_post(DECL_ARGS); +static void termp_brq_post(DECL_ARGS); +static void termp_bx_post(DECL_ARGS); +static void termp_d1_post(DECL_ARGS); +static void termp_dq_post(DECL_ARGS); +static int termp_fd_pre(DECL_ARGS); +static void termp_fo_post(DECL_ARGS); +static void termp_in_post(DECL_ARGS); +static void termp_it_post(DECL_ARGS); +static void termp_lb_post(DECL_ARGS); +static void termp_op_post(DECL_ARGS); +static void termp_pf_post(DECL_ARGS); +static void termp_pq_post(DECL_ARGS); +static void termp_qq_post(DECL_ARGS); +static void termp_sh_post(DECL_ARGS); +static void termp_sq_post(DECL_ARGS); +static void termp_ss_post(DECL_ARGS); + +static int termp_an_pre(DECL_ARGS); +static int termp_ap_pre(DECL_ARGS); +static int termp_aq_pre(DECL_ARGS); +static int termp_bd_pre(DECL_ARGS); +static int termp_bf_pre(DECL_ARGS); +static int termp_bl_pre(DECL_ARGS); +static int termp_bold_pre(DECL_ARGS); +static int termp_bq_pre(DECL_ARGS); +static int termp_brq_pre(DECL_ARGS); +static int termp_bt_pre(DECL_ARGS); +static int termp_cd_pre(DECL_ARGS); +static int termp_d1_pre(DECL_ARGS); +static int termp_dq_pre(DECL_ARGS); +static int termp_ex_pre(DECL_ARGS); +static int termp_fa_pre(DECL_ARGS); +static int termp_fl_pre(DECL_ARGS); +static int termp_fn_pre(DECL_ARGS); +static int termp_fo_pre(DECL_ARGS); +static int termp_ft_pre(DECL_ARGS); +static int termp_in_pre(DECL_ARGS); +static int termp_it_pre(DECL_ARGS); +static int termp_li_pre(DECL_ARGS); +static int termp_lk_pre(DECL_ARGS); +static int termp_nd_pre(DECL_ARGS); +static int termp_nm_pre(DECL_ARGS); +static int termp_ns_pre(DECL_ARGS); +static int termp_op_pre(DECL_ARGS); +static int termp_pf_pre(DECL_ARGS); +static int termp_pq_pre(DECL_ARGS); +static int termp_qq_pre(DECL_ARGS); +static int termp_rs_pre(DECL_ARGS); +static int termp_rv_pre(DECL_ARGS); +static int termp_sh_pre(DECL_ARGS); +static int termp_sm_pre(DECL_ARGS); +static int termp_sp_pre(DECL_ARGS); +static int termp_sq_pre(DECL_ARGS); +static int termp_ss_pre(DECL_ARGS); +static int termp_under_pre(DECL_ARGS); +static int termp_ud_pre(DECL_ARGS); +static int termp_vt_pre(DECL_ARGS); +static int termp_xr_pre(DECL_ARGS); +static int termp_xx_pre(DECL_ARGS); + +static const struct termact termacts[MDOC_MAX] = { + { termp_ap_pre, NULL }, /* Ap */ + { NULL, NULL }, /* Dd */ + { NULL, NULL }, /* Dt */ + { NULL, NULL }, /* Os */ + { termp_sh_pre, termp_sh_post }, /* Sh */ + { termp_ss_pre, termp_ss_post }, /* Ss */ + { termp_sp_pre, NULL }, /* Pp */ + { termp_d1_pre, termp_d1_post }, /* D1 */ + { termp_d1_pre, termp_d1_post }, /* Dl */ + { termp_bd_pre, termp_bd_post }, /* Bd */ + { NULL, NULL }, /* Ed */ + { termp_bl_pre, termp_bl_post }, /* Bl */ + { NULL, NULL }, /* El */ + { termp_it_pre, termp_it_post }, /* It */ + { NULL, NULL }, /* Ad */ + { termp_an_pre, termp_an_post }, /* An */ + { termp_under_pre, NULL }, /* Ar */ + { termp_cd_pre, NULL }, /* Cd */ + { termp_bold_pre, NULL }, /* Cm */ + { NULL, NULL }, /* Dv */ + { NULL, NULL }, /* Er */ + { NULL, NULL }, /* Ev */ + { termp_ex_pre, NULL }, /* Ex */ + { termp_fa_pre, NULL }, /* Fa */ + { termp_fd_pre, NULL }, /* Fd */ + { termp_fl_pre, NULL }, /* Fl */ + { termp_fn_pre, NULL }, /* Fn */ + { termp_ft_pre, NULL }, /* Ft */ + { termp_bold_pre, NULL }, /* Ic */ + { termp_in_pre, termp_in_post }, /* In */ + { termp_li_pre, NULL }, /* Li */ + { termp_nd_pre, NULL }, /* Nd */ + { termp_nm_pre, NULL }, /* Nm */ + { termp_op_pre, termp_op_post }, /* Op */ + { NULL, NULL }, /* Ot */ + { termp_under_pre, NULL }, /* Pa */ + { termp_rv_pre, NULL }, /* Rv */ + { NULL, NULL }, /* St */ + { termp_under_pre, NULL }, /* Va */ + { termp_vt_pre, NULL }, /* Vt */ + { termp_xr_pre, NULL }, /* Xr */ + { NULL, termp____post }, /* %A */ + { termp_under_pre, termp____post }, /* %B */ + { NULL, termp____post }, /* %D */ + { termp_under_pre, termp____post }, /* %I */ + { termp_under_pre, termp____post }, /* %J */ + { NULL, termp____post }, /* %N */ + { NULL, termp____post }, /* %O */ + { NULL, termp____post }, /* %P */ + { NULL, termp____post }, /* %R */ + { termp_under_pre, termp____post }, /* %T */ + { NULL, termp____post }, /* %V */ + { NULL, NULL }, /* Ac */ + { termp_aq_pre, termp_aq_post }, /* Ao */ + { termp_aq_pre, termp_aq_post }, /* Aq */ + { NULL, NULL }, /* At */ + { NULL, NULL }, /* Bc */ + { termp_bf_pre, NULL }, /* Bf */ + { termp_bq_pre, termp_bq_post }, /* Bo */ + { termp_bq_pre, termp_bq_post }, /* Bq */ + { termp_xx_pre, NULL }, /* Bsx */ + { NULL, termp_bx_post }, /* Bx */ + { NULL, NULL }, /* Db */ + { NULL, NULL }, /* Dc */ + { termp_dq_pre, termp_dq_post }, /* Do */ + { termp_dq_pre, termp_dq_post }, /* Dq */ + { NULL, NULL }, /* Ec */ /* FIXME: no space */ + { NULL, NULL }, /* Ef */ + { termp_under_pre, NULL }, /* Em */ + { NULL, NULL }, /* Eo */ + { termp_xx_pre, NULL }, /* Fx */ + { termp_bold_pre, NULL }, /* Ms */ /* FIXME: convert to symbol? */ + { NULL, NULL }, /* No */ + { termp_ns_pre, NULL }, /* Ns */ + { termp_xx_pre, NULL }, /* Nx */ + { termp_xx_pre, NULL }, /* Ox */ + { NULL, NULL }, /* Pc */ + { termp_pf_pre, termp_pf_post }, /* Pf */ + { termp_pq_pre, termp_pq_post }, /* Po */ + { termp_pq_pre, termp_pq_post }, /* Pq */ + { NULL, NULL }, /* Qc */ + { termp_sq_pre, termp_sq_post }, /* Ql */ + { termp_qq_pre, termp_qq_post }, /* Qo */ + { termp_qq_pre, termp_qq_post }, /* Qq */ + { NULL, NULL }, /* Re */ + { termp_rs_pre, NULL }, /* Rs */ + { NULL, NULL }, /* Sc */ + { termp_sq_pre, termp_sq_post }, /* So */ + { termp_sq_pre, termp_sq_post }, /* Sq */ + { termp_sm_pre, NULL }, /* Sm */ + { termp_under_pre, NULL }, /* Sx */ + { termp_bold_pre, NULL }, /* Sy */ + { NULL, NULL }, /* Tn */ + { termp_xx_pre, NULL }, /* Ux */ + { NULL, NULL }, /* Xc */ + { NULL, NULL }, /* Xo */ + { termp_fo_pre, termp_fo_post }, /* Fo */ + { NULL, NULL }, /* Fc */ + { termp_op_pre, termp_op_post }, /* Oo */ + { NULL, NULL }, /* Oc */ + { NULL, NULL }, /* Bk */ + { NULL, NULL }, /* Ek */ + { termp_bt_pre, NULL }, /* Bt */ + { NULL, NULL }, /* Hf */ + { NULL, NULL }, /* Fr */ + { termp_ud_pre, NULL }, /* Ud */ + { NULL, termp_lb_post }, /* Lb */ + { termp_sp_pre, NULL }, /* Lp */ + { termp_lk_pre, NULL }, /* Lk */ + { termp_under_pre, NULL }, /* Mt */ + { termp_brq_pre, termp_brq_post }, /* Brq */ + { termp_brq_pre, termp_brq_post }, /* Bro */ + { NULL, NULL }, /* Brc */ + { NULL, termp____post }, /* %C */ + { NULL, NULL }, /* Es */ /* TODO */ + { NULL, NULL }, /* En */ /* TODO */ + { termp_xx_pre, NULL }, /* Dx */ + { NULL, termp____post }, /* %Q */ + { termp_sp_pre, NULL }, /* br */ + { termp_sp_pre, NULL }, /* sp */ + { termp_under_pre, termp____post }, /* %U */ + { NULL, NULL }, /* Ta */ +}; + + +void +terminal_mdoc(void *arg, const struct mdoc *mdoc) +{ + const struct mdoc_node *n; + const struct mdoc_meta *m; + struct termp *p; + + p = (struct termp *)arg; + + p->overstep = 0; + p->maxrmargin = p->defrmargin; + p->tabwidth = 5; + + if (NULL == p->symtab) + switch (p->enc) { + case (TERMENC_ASCII): + p->symtab = chars_init(CHARS_ASCII); + break; + default: + abort(); + /* NOTREACHED */ + } + + n = mdoc_node(mdoc); + m = mdoc_meta(mdoc); + + term_begin(p, print_mdoc_head, print_mdoc_foot, m); + + if (n->child) + print_mdoc_nodelist(p, NULL, m, n->child); + + term_end(p); +} + + +static void +print_mdoc_nodelist(DECL_ARGS) +{ + + print_mdoc_node(p, pair, m, n); + if (n->next) + print_mdoc_nodelist(p, pair, m, n->next); +} + + +/* ARGSUSED */ +static void +print_mdoc_node(DECL_ARGS) +{ + int chld; + const void *font; + struct termpair npair; + size_t offset, rmargin; + + chld = 1; + offset = p->offset; + rmargin = p->rmargin; + font = term_fontq(p); + + memset(&npair, 0, sizeof(struct termpair)); + npair.ppair = pair; + + if (MDOC_TEXT != n->type) { + if (termacts[n->tok].pre) + chld = (*termacts[n->tok].pre)(p, &npair, m, n); + } else + term_word(p, n->string); + + if (chld && n->child) + print_mdoc_nodelist(p, &npair, m, n->child); + + term_fontpopq(p, font); + + if (MDOC_TEXT != n->type) + if (termacts[n->tok].post) + (*termacts[n->tok].post)(p, &npair, m, n); + + if (MDOC_EOS & n->flags) + p->flags |= TERMP_SENTENCE; + + p->offset = offset; + p->rmargin = rmargin; +} + + +static void +print_mdoc_foot(struct termp *p, const void *arg) +{ + char buf[DATESIZ], os[BUFSIZ]; + const struct mdoc_meta *m; + + m = (const struct mdoc_meta *)arg; + + term_fontrepl(p, TERMFONT_NONE); + + /* + * Output the footer in new-groff style, that is, three columns + * with the middle being the manual date and flanking columns + * being the operating system: + * + * SYSTEM DATE SYSTEM + */ + + time2a(m->date, buf, DATESIZ); + strlcpy(os, m->os, BUFSIZ); + + term_vspace(p); + + p->offset = 0; + p->rmargin = (p->maxrmargin - strlen(buf) + 1) / 2; + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + + term_word(p, os); + term_flushln(p); + + p->offset = p->rmargin; + p->rmargin = p->maxrmargin - strlen(os); + p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; + + term_word(p, buf); + term_flushln(p); + + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; + + term_word(p, os); + term_flushln(p); + + p->offset = 0; + p->rmargin = p->maxrmargin; + p->flags = 0; +} + + +static void +print_mdoc_head(struct termp *p, const void *arg) +{ + char buf[BUFSIZ], title[BUFSIZ]; + const struct mdoc_meta *m; + + m = (const struct mdoc_meta *)arg; + + p->rmargin = p->maxrmargin; + p->offset = 0; + + /* + * The header is strange. It has three components, which are + * really two with the first duplicated. It goes like this: + * + * IDENTIFIER TITLE IDENTIFIER + * + * The IDENTIFIER is NAME(SECTION), which is the command-name + * (if given, or "unknown" if not) followed by the manual page + * section. These are given in `Dt'. The TITLE is a free-form + * string depending on the manual volume. If not specified, it + * switches on the manual section. + */ + + assert(m->vol); + strlcpy(buf, m->vol, BUFSIZ); + + if (m->arch) { + strlcat(buf, " (", BUFSIZ); + strlcat(buf, m->arch, BUFSIZ); + strlcat(buf, ")", BUFSIZ); + } + + snprintf(title, BUFSIZ, "%s(%s)", m->title, m->msec); + + p->offset = 0; + p->rmargin = (p->maxrmargin - strlen(buf) + 1) / 2; + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + + term_word(p, title); + term_flushln(p); + + p->offset = p->rmargin; + p->rmargin = p->maxrmargin - strlen(title); + p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; + + term_word(p, buf); + term_flushln(p); + + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; + + term_word(p, title); + term_flushln(p); + + p->offset = 0; + p->rmargin = p->maxrmargin; + p->flags &= ~TERMP_NOSPACE; +} + + +static size_t +a2height(const struct mdoc_node *n) +{ + struct roffsu su; + + assert(MDOC_TEXT == n->type); + assert(n->string); + if ( ! a2roffsu(n->string, &su, SCALE_VS)) + SCALE_VS_INIT(&su, strlen(n->string)); + + return(term_vspan(&su)); +} + + +static size_t +a2width(const char *v) +{ + struct roffsu su; + + assert(v); + if ( ! a2roffsu(v, &su, SCALE_MAX)) + SCALE_HS_INIT(&su, strlen(v)); + + return(term_hspan(&su)); +} + + +static size_t +a2offs(const char *v) +{ + struct roffsu su; + + if ('\0' == *v) + return(0); + else if (0 == strcmp(v, "left")) + return(0); + else if (0 == strcmp(v, "indent")) + return(INDENT + 1); + else if (0 == strcmp(v, "indent-two")) + return((INDENT + 1) * 2); + else if ( ! a2roffsu(v, &su, SCALE_MAX)) + SCALE_HS_INIT(&su, strlen(v)); + + return(term_hspan(&su)); +} + + +/* + * Return 1 if an argument has a particular argument value or 0 if it + * does not. See arg_getattr(). + */ +static int +arg_hasattr(int arg, const struct mdoc_node *n) +{ + + return(-1 != arg_getattr(arg, n)); +} + + +/* + * Get the index of an argument in a node's argument list or -1 if it + * does not exist. + */ +static int +arg_getattr(int v, const struct mdoc_node *n) +{ + int i; + + if (NULL == n->args) + return(0); + + for (i = 0; i < (int)n->args->argc; i++) + if (n->args->argv[i].arg == v) + return(i); + + return(-1); +} + + +/* + * Determine how much space to print out before block elements of `It' + * (and thus `Bl') and `Bd'. And then go ahead and print that space, + * too. + */ +static void +print_bvspace(struct termp *p, + const struct mdoc_node *bl, + const struct mdoc_node *n) +{ + const struct mdoc_node *nn; + + term_newln(p); + + if (MDOC_Bd == bl->tok && bl->data.Bd.comp) + return; + if (MDOC_Bl == bl->tok && bl->data.Bl.comp) + return; + + /* Do not vspace directly after Ss/Sh. */ + + for (nn = n; nn; nn = nn->parent) { + if (MDOC_BLOCK != nn->type) + continue; + if (MDOC_Ss == nn->tok) + return; + if (MDOC_Sh == nn->tok) + return; + if (NULL == nn->prev) + continue; + break; + } + + /* A `-column' does not assert vspace within the list. */ + + if (MDOC_Bl == bl->tok && LIST_column == bl->data.Bl.type) + if (n->prev && MDOC_It == n->prev->tok) + return; + + /* A `-diag' without body does not vspace. */ + + if (MDOC_Bl == bl->tok && LIST_diag == bl->data.Bl.type) + if (n->prev && MDOC_It == n->prev->tok) { + assert(n->prev->body); + if (NULL == n->prev->body->child) + return; + } + + term_vspace(p); +} + + +/* ARGSUSED */ +static int +termp_dq_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + + term_word(p, "\\(lq"); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_dq_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + + p->flags |= TERMP_NOSPACE; + term_word(p, "\\(rq"); +} + + +/* ARGSUSED */ +static int +termp_it_pre(DECL_ARGS) +{ + const struct mdoc_node *bl, *nn; + char buf[7]; + int i, col; + size_t width, offset, ncols, dcol; + enum mdoc_list type; + + if (MDOC_BLOCK == n->type) { + print_bvspace(p, n->parent->parent, n); + return(1); + } + + bl = n->parent->parent->parent; + type = bl->data.Bl.type; + + /* + * First calculate width and offset. This is pretty easy unless + * we're a -column list, in which case all prior columns must + * be accounted for. + */ + + width = offset = 0; + + if (bl->data.Bl.offs) + offset = a2offs(bl->data.Bl.offs); + + switch (type) { + case (LIST_column): + if (MDOC_HEAD == n->type) + break; + + col = arg_getattr(MDOC_Column, bl); + + /* + * Imitate groff's column handling: + * - For each earlier column, add its width. + * - For less than 5 columns, add four more blanks per + * column. + * - For exactly 5 columns, add three more blank per + * column. + * - For more than 5 columns, add only one column. + */ + ncols = bl->args->argv[col].sz; + /* LINTED */ + dcol = ncols < 5 ? 4 : ncols == 5 ? 3 : 1; + + /* + * Calculate the offset by applying all prior MDOC_BODY, + * so we stop at the MDOC_HEAD (NULL == nn->prev). + */ + + for (i = 0, nn = n->prev; + nn->prev && i < (int)ncols; + nn = nn->prev, i++) + offset += dcol + a2width + (bl->args->argv[col].value[i]); + + /* + * When exceeding the declared number of columns, leave + * the remaining widths at 0. This will later be + * adjusted to the default width of 10, or, for the last + * column, stretched to the right margin. + */ + if (i >= (int)ncols) + break; + + /* + * Use the declared column widths, extended as explained + * in the preceding paragraph. + */ + width = a2width(bl->args->argv[col].value[i]) + dcol; + break; + default: + if (NULL == bl->data.Bl.width) + break; + + /* + * Note: buffer the width by 2, which is groff's magic + * number for buffering single arguments. See the above + * handling for column for how this changes. + */ + assert(bl->data.Bl.width); + width = a2width(bl->data.Bl.width) + 2; + break; + } + + /* + * List-type can override the width in the case of fixed-head + * values (bullet, dash/hyphen, enum). Tags need a non-zero + * offset. + */ + + switch (type) { + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + if (width < 4) + width = 4; + break; + case (LIST_enum): + if (width < 5) + width = 5; + break; + case (LIST_hang): + if (0 == width) + width = 8; + break; + case (LIST_column): + /* FALLTHROUGH */ + case (LIST_tag): + if (0 == width) + width = 10; + break; + default: + break; + } + + /* + * Whitespace control. Inset bodies need an initial space, + * while diagonal bodies need two. + */ + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case (LIST_diag): + if (MDOC_BODY == n->type) + term_word(p, "\\ \\ "); + break; + case (LIST_inset): + if (MDOC_BODY == n->type) + term_word(p, "\\ "); + break; + default: + break; + } + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case (LIST_diag): + if (MDOC_HEAD == n->type) + term_fontpush(p, TERMFONT_BOLD); + break; + default: + break; + } + + /* + * Pad and break control. This is the tricky part. These flags + * are documented in term_flushln() in term.c. Note that we're + * going to unset all of these flags in termp_it_post() when we + * exit. + */ + + switch (type) { + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_enum): + /* FALLTHROUGH */ + case (LIST_hyphen): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK; + else + p->flags |= TERMP_NOLPAD; + break; + case (LIST_hang): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK; + else + p->flags |= TERMP_NOLPAD; + + if (MDOC_HEAD != n->type) + break; + + /* + * This is ugly. If `-hang' is specified and the body + * is a `Bl' or `Bd', then we want basically to nullify + * the "overstep" effect in term_flushln() and treat + * this as a `-ohang' list instead. + */ + if (n->next->child && + (MDOC_Bl == n->next->child->tok || + MDOC_Bd == n->next->child->tok)) { + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_NOLPAD; + } else + p->flags |= TERMP_HANG; + break; + case (LIST_tag): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK | TERMP_TWOSPACE; + else + p->flags |= TERMP_NOLPAD; + + if (MDOC_HEAD != n->type) + break; + if (NULL == n->next || NULL == n->next->child) + p->flags |= TERMP_DANGLE; + break; + case (LIST_column): + if (MDOC_HEAD == n->type) + break; + + if (NULL == n->next) + p->flags &= ~TERMP_NOBREAK; + else + p->flags |= TERMP_NOBREAK; + + assert(n->prev); + if (MDOC_BODY == n->prev->type) + p->flags |= TERMP_NOLPAD; + + break; + case (LIST_diag): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK; + break; + default: + break; + } + + /* + * Margin control. Set-head-width lists have their right + * margins shortened. The body for these lists has the offset + * necessarily lengthened. Everybody gets the offset. + */ + + p->offset += offset; + + switch (type) { + case (LIST_hang): + /* + * Same stipulation as above, regarding `-hang'. We + * don't want to recalculate rmargin and offsets when + * using `Bd' or `Bl' within `-hang' overstep lists. + */ + if (MDOC_HEAD == n->type && n->next->child && + (MDOC_Bl == n->next->child->tok || + MDOC_Bd == n->next->child->tok)) + break; + /* FALLTHROUGH */ + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_enum): + /* FALLTHROUGH */ + case (LIST_hyphen): + /* FALLTHROUGH */ + case (LIST_tag): + assert(width); + if (MDOC_HEAD == n->type) + p->rmargin = p->offset + width; + else + p->offset += width; + break; + case (LIST_column): + assert(width); + p->rmargin = p->offset + width; + /* + * XXX - this behaviour is not documented: the + * right-most column is filled to the right margin. + */ + if (MDOC_HEAD == n->type) + break; + if (NULL == n->next && p->rmargin < p->maxrmargin) + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + /* + * The dash, hyphen, bullet and enum lists all have a special + * HEAD character (temporarily bold, in some cases). + */ + + if (MDOC_HEAD == n->type) + switch (type) { + case (LIST_bullet): + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\[bu]"); + term_fontpop(p); + break; + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\(hy"); + term_fontpop(p); + break; + case (LIST_enum): + (pair->ppair->ppair->count)++; + snprintf(buf, sizeof(buf), "%d.", + pair->ppair->ppair->count); + term_word(p, buf); + break; + default: + break; + } + + /* + * If we're not going to process our children, indicate so here. + */ + + switch (type) { + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_item): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + /* FALLTHROUGH */ + case (LIST_enum): + if (MDOC_HEAD == n->type) + return(0); + break; + case (LIST_column): + if (MDOC_HEAD == n->type) + return(0); + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +termp_it_post(DECL_ARGS) +{ + enum mdoc_list type; + + if (MDOC_BLOCK == n->type) + return; + + type = n->parent->parent->parent->data.Bl.type; + + switch (type) { + case (LIST_item): + /* FALLTHROUGH */ + case (LIST_diag): + /* FALLTHROUGH */ + case (LIST_inset): + if (MDOC_BODY == n->type) + term_newln(p); + break; + case (LIST_column): + if (MDOC_BODY == n->type) + term_flushln(p); + break; + default: + term_newln(p); + break; + } + + /* + * Now that our output is flushed, we can reset our tags. Since + * only `It' sets these flags, we're free to assume that nobody + * has munged them in the meanwhile. + */ + + p->flags &= ~TERMP_DANGLE; + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->flags &= ~TERMP_NOLPAD; + p->flags &= ~TERMP_HANG; +} + + +/* ARGSUSED */ +static int +termp_nm_pre(DECL_ARGS) +{ + + if (NULL == n->child && NULL == m->name) + return(1); + + synopsis_pre(p, n); + + term_fontpush(p, TERMFONT_BOLD); + if (NULL == n->child) + term_word(p, m->name); + return(1); +} + + +/* ARGSUSED */ +static int +termp_fl_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\-"); + + if (n->child) + p->flags |= TERMP_NOSPACE; + else if (n->next && n->next->line == n->line) + p->flags |= TERMP_NOSPACE; + + return(1); +} + + +/* ARGSUSED */ +static int +termp_an_pre(DECL_ARGS) +{ + + if (NULL == n->child) + return(1); + + /* + * If not in the AUTHORS section, `An -split' will cause + * newlines to occur before the author name. If in the AUTHORS + * section, by default, the first `An' invocation is nosplit, + * then all subsequent ones, regardless of whether interspersed + * with other macros/text, are split. -split, in this case, + * will override the condition of the implied first -nosplit. + */ + + if (n->sec == SEC_AUTHORS) { + if ( ! (TERMP_ANPREC & p->flags)) { + if (TERMP_SPLIT & p->flags) + term_newln(p); + return(1); + } + if (TERMP_NOSPLIT & p->flags) + return(1); + term_newln(p); + return(1); + } + + if (TERMP_SPLIT & p->flags) + term_newln(p); + + return(1); +} + + +/* ARGSUSED */ +static void +termp_an_post(DECL_ARGS) +{ + + if (n->child) { + if (SEC_AUTHORS == n->sec) + p->flags |= TERMP_ANPREC; + return; + } + + if (arg_hasattr(MDOC_Split, n)) { + p->flags &= ~TERMP_NOSPLIT; + p->flags |= TERMP_SPLIT; + } else { + p->flags &= ~TERMP_SPLIT; + p->flags |= TERMP_NOSPLIT; + } + +} + + +/* ARGSUSED */ +static int +termp_ns_pre(DECL_ARGS) +{ + + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static int +termp_rs_pre(DECL_ARGS) +{ + + if (SEC_SEE_ALSO != n->sec) + return(1); + if (MDOC_BLOCK == n->type && n->prev) + term_vspace(p); + return(1); +} + + +/* ARGSUSED */ +static int +termp_rv_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + term_newln(p); + term_word(p, "The"); + + for (nn = n->child; nn; nn = nn->next) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, nn->string); + term_fontpop(p); + p->flags |= TERMP_NOSPACE; + if (nn->next && NULL == nn->next->next) + term_word(p, "(), and"); + else if (nn->next) + term_word(p, "(),"); + else + term_word(p, "()"); + } + + if (n->child && n->child->next) + term_word(p, "functions return"); + else + term_word(p, "function returns"); + + term_word(p, "the value 0 if successful; otherwise the value " + "-1 is returned and the global variable"); + + term_fontpush(p, TERMFONT_UNDER); + term_word(p, "errno"); + term_fontpop(p); + + term_word(p, "is set to indicate the error."); + p->flags |= TERMP_SENTENCE; + + return(0); +} + + +/* ARGSUSED */ +static int +termp_ex_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + term_word(p, "The"); + + for (nn = n->child; nn; nn = nn->next) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, nn->string); + term_fontpop(p); + p->flags |= TERMP_NOSPACE; + if (nn->next && NULL == nn->next->next) + term_word(p, ", and"); + else if (nn->next) + term_word(p, ","); + else + p->flags &= ~TERMP_NOSPACE; + } + + if (n->child && n->child->next) + term_word(p, "utilities exit"); + else + term_word(p, "utility exits"); + + term_word(p, "0 on success, and >0 if an error occurs."); + p->flags |= TERMP_SENTENCE; + + return(0); +} + + +/* ARGSUSED */ +static int +termp_nd_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + +#if defined(__OpenBSD__) || defined(__linux__) + term_word(p, "\\(en"); +#else + term_word(p, "\\(em"); +#endif + return(1); +} + + +/* ARGSUSED */ +static int +termp_bl_pre(DECL_ARGS) +{ + + return(MDOC_HEAD != n->type); +} + + +/* ARGSUSED */ +static void +termp_bl_post(DECL_ARGS) +{ + + if (MDOC_BLOCK == n->type) + term_newln(p); +} + + +/* ARGSUSED */ +static void +termp_op_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + p->flags |= TERMP_NOSPACE; + term_word(p, "\\(rB"); +} + + +/* ARGSUSED */ +static int +termp_xr_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + if (NULL == n->child) + return(0); + + assert(MDOC_TEXT == n->child->type); + nn = n->child; + + term_word(p, nn->string); + if (NULL == (nn = nn->next)) + return(0); + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + term_word(p, nn->string); + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + return(0); +} + + +/* + * This decides how to assert whitespace before any of the SYNOPSIS set + * of macros (which, as in the case of Ft/Fo and Ft/Fn, may contain + * macro combos). + */ +static void +synopsis_pre(struct termp *p, const struct mdoc_node *n) +{ + /* + * Obviously, if we're not in a SYNOPSIS or no prior macros + * exist, do nothing. + */ + if (NULL == n->prev || SEC_SYNOPSIS != n->sec) + return; + + /* + * If we're the second in a pair of like elements, emit our + * newline and return. UNLESS we're `Fo', `Fn', `Fn', in which + * case we soldier on. + */ + if (n->prev->tok == n->tok && + MDOC_Ft != n->tok && + MDOC_Fo != n->tok && + MDOC_Fn != n->tok) { + term_newln(p); + return; + } + + /* + * If we're one of the SYNOPSIS set and non-like pair-wise after + * another (or Fn/Fo, which we've let slip through) then assert + * vertical space, else only newline and move on. + */ + switch (n->prev->tok) { + case (MDOC_Fd): + /* FALLTHROUGH */ + case (MDOC_Fn): + /* FALLTHROUGH */ + case (MDOC_Fo): + /* FALLTHROUGH */ + case (MDOC_In): + /* FALLTHROUGH */ + case (MDOC_Vt): + term_vspace(p); + break; + case (MDOC_Ft): + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + term_vspace(p); + break; + } + /* FALLTHROUGH */ + default: + term_newln(p); + break; + } +} + + +static int +termp_vt_pre(DECL_ARGS) +{ + + if (MDOC_ELEM == n->type) { + synopsis_pre(p, n); + return(termp_under_pre(p, pair, m, n)); + } else if (MDOC_BLOCK == n->type) { + synopsis_pre(p, n); + return(1); + } else if (MDOC_HEAD == n->type) + return(0); + + return(termp_under_pre(p, pair, m, n)); +} + + +/* ARGSUSED */ +static int +termp_bold_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_BOLD); + return(1); +} + + +/* ARGSUSED */ +static int +termp_fd_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + return(termp_bold_pre(p, pair, m, n)); +} + + +/* ARGSUSED */ +static int +termp_sh_pre(DECL_ARGS) +{ + + /* No vspace between consecutive `Sh' calls. */ + + switch (n->type) { + case (MDOC_BLOCK): + if (n->prev && MDOC_Sh == n->prev->tok) + if (NULL == n->prev->body->child) + break; + term_vspace(p); + break; + case (MDOC_HEAD): + term_fontpush(p, TERMFONT_BOLD); + break; + case (MDOC_BODY): + p->offset = INDENT; + break; + default: + break; + } + return(1); +} + + +/* ARGSUSED */ +static void +termp_sh_post(DECL_ARGS) +{ + + switch (n->type) { + case (MDOC_HEAD): + term_newln(p); + break; + case (MDOC_BODY): + term_newln(p); + p->offset = 0; + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +termp_op_pre(DECL_ARGS) +{ + + switch (n->type) { + case (MDOC_BODY): + term_word(p, "\\(lB"); + p->flags |= TERMP_NOSPACE; + break; + default: + break; + } + return(1); +} + + +/* ARGSUSED */ +static int +termp_bt_pre(DECL_ARGS) +{ + + term_word(p, "is currently in beta test."); + p->flags |= TERMP_SENTENCE; + return(0); +} + + +/* ARGSUSED */ +static void +termp_lb_post(DECL_ARGS) +{ + + if (SEC_LIBRARY == n->sec && MDOC_LINE & n->flags) + term_newln(p); +} + + +/* ARGSUSED */ +static int +termp_ud_pre(DECL_ARGS) +{ + + term_word(p, "currently under development."); + p->flags |= TERMP_SENTENCE; + return(0); +} + + +/* ARGSUSED */ +static int +termp_d1_pre(DECL_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + term_newln(p); + p->offset += (INDENT + 1); + return(1); +} + + +/* ARGSUSED */ +static void +termp_d1_post(DECL_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return; + term_newln(p); +} + + +/* ARGSUSED */ +static int +termp_aq_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + term_word(p, "\\(la"); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_aq_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + p->flags |= TERMP_NOSPACE; + term_word(p, "\\(ra"); +} + + +/* ARGSUSED */ +static int +termp_ft_pre(DECL_ARGS) +{ + + /* NB: MDOC_LINE does not effect this! */ + synopsis_pre(p, n); + term_fontpush(p, TERMFONT_UNDER); + return(1); +} + + +/* ARGSUSED */ +static int +termp_fn_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + synopsis_pre(p, n); + + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->child->string); + term_fontpop(p); + + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + + for (nn = n->child->next; nn; nn = nn->next) { + term_fontpush(p, TERMFONT_UNDER); + term_word(p, nn->string); + term_fontpop(p); + + if (nn->next) + term_word(p, ","); + } + + term_word(p, ")"); + + if (SEC_SYNOPSIS == n->sec) + term_word(p, ";"); + + return(0); +} + + +/* ARGSUSED */ +static int +termp_fa_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + if (n->parent->tok != MDOC_Fo) { + term_fontpush(p, TERMFONT_UNDER); + return(1); + } + + for (nn = n->child; nn; nn = nn->next) { + term_fontpush(p, TERMFONT_UNDER); + term_word(p, nn->string); + term_fontpop(p); + + if (nn->next) + term_word(p, ","); + } + + if (n->child && n->next && n->next->tok == MDOC_Fa) + term_word(p, ","); + + return(0); +} + + +/* ARGSUSED */ +static int +termp_bd_pre(DECL_ARGS) +{ + size_t tabwidth; + size_t rm, rmax; + const struct mdoc_node *nn; + + if (MDOC_BLOCK == n->type) { + print_bvspace(p, n, n); + return(1); + } else if (MDOC_HEAD == n->type) + return(0); + + if (n->data.Bd.offs) + p->offset += a2offs(n->data.Bd.offs); + + /* + * If -ragged or -filled are specified, the block does nothing + * but change the indentation. If -unfilled or -literal are + * specified, text is printed exactly as entered in the display: + * for macro lines, a newline is appended to the line. Blank + * lines are allowed. + */ + + if (DISP_literal != n->data.Bd.type && + DISP_unfilled != n->data.Bd.type) + return(1); + + tabwidth = p->tabwidth; + p->tabwidth = 8; + rm = p->rmargin; + rmax = p->maxrmargin; + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + + for (nn = n->child; nn; nn = nn->next) { + p->flags |= TERMP_NOSPACE; + print_mdoc_node(p, pair, m, nn); + if (NULL == nn->prev || + nn->prev->line < nn->line || + NULL == nn->next) + term_flushln(p); + } + + p->tabwidth = tabwidth; + p->rmargin = rm; + p->maxrmargin = rmax; + return(0); +} + + +/* ARGSUSED */ +static void +termp_bd_post(DECL_ARGS) +{ + size_t rm, rmax; + + if (MDOC_BODY != n->type) + return; + + rm = p->rmargin; + rmax = p->maxrmargin; + + if (DISP_literal == n->data.Bd.type || + DISP_unfilled == n->data.Bd.type) + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + + p->flags |= TERMP_NOSPACE; + term_newln(p); + + p->rmargin = rm; + p->maxrmargin = rmax; +} + + +/* ARGSUSED */ +static int +termp_qq_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + term_word(p, "\""); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_qq_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + p->flags |= TERMP_NOSPACE; + term_word(p, "\""); +} + + +/* ARGSUSED */ +static void +termp_bx_post(DECL_ARGS) +{ + + if (n->child) + p->flags |= TERMP_NOSPACE; + term_word(p, "BSD"); +} + + +/* ARGSUSED */ +static int +termp_xx_pre(DECL_ARGS) +{ + const char *pp; + + pp = NULL; + switch (n->tok) { + case (MDOC_Bsx): + pp = "BSDI BSD/OS"; + break; + case (MDOC_Dx): + pp = "DragonFly"; + break; + case (MDOC_Fx): + pp = "FreeBSD"; + break; + case (MDOC_Nx): + pp = "NetBSD"; + break; + case (MDOC_Ox): + pp = "OpenBSD"; + break; + case (MDOC_Ux): + pp = "UNIX"; + break; + default: + break; + } + + assert(pp); + term_word(p, pp); + return(1); +} + + +/* ARGSUSED */ +static int +termp_sq_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + term_word(p, "\\(oq"); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_sq_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + p->flags |= TERMP_NOSPACE; + term_word(p, "\\(aq"); +} + + +/* ARGSUSED */ +static int +termp_pf_pre(DECL_ARGS) +{ + + p->flags |= TERMP_IGNDELIM; + return(1); +} + + +/* ARGSUSED */ +static void +termp_pf_post(DECL_ARGS) +{ + + p->flags &= ~TERMP_IGNDELIM; + p->flags |= TERMP_NOSPACE; +} + + +/* ARGSUSED */ +static int +termp_ss_pre(DECL_ARGS) +{ + + switch (n->type) { + case (MDOC_BLOCK): + term_newln(p); + if (n->prev) + term_vspace(p); + break; + case (MDOC_HEAD): + term_fontpush(p, TERMFONT_BOLD); + p->offset = HALFINDENT; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +termp_ss_post(DECL_ARGS) +{ + + if (MDOC_HEAD == n->type) + term_newln(p); +} + + +/* ARGSUSED */ +static int +termp_cd_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + term_fontpush(p, TERMFONT_BOLD); + return(1); +} + + +/* ARGSUSED */ +static int +termp_in_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + + if (SEC_SYNOPSIS == n->sec && MDOC_LINE & n->flags) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "#include"); + term_word(p, "<"); + } else { + term_word(p, "<"); + term_fontpush(p, TERMFONT_UNDER); + } + + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_in_post(DECL_ARGS) +{ + + if (SEC_SYNOPSIS == n->sec) + term_fontpush(p, TERMFONT_BOLD); + + p->flags |= TERMP_NOSPACE; + term_word(p, ">"); + + if (SEC_SYNOPSIS == n->sec) + term_fontpop(p); +} + + +/* ARGSUSED */ +static int +termp_sp_pre(DECL_ARGS) +{ + size_t i, len; + + switch (n->tok) { + case (MDOC_sp): + len = n->child ? a2height(n->child) : 1; + break; + case (MDOC_br): + len = 0; + break; + default: + len = 1; + break; + } + + if (0 == len) + term_newln(p); + for (i = 0; i < len; i++) + term_vspace(p); + + return(0); +} + + +/* ARGSUSED */ +static int +termp_brq_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + term_word(p, "\\(lC"); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_brq_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + p->flags |= TERMP_NOSPACE; + term_word(p, "\\(rC"); +} + + +/* ARGSUSED */ +static int +termp_bq_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + term_word(p, "\\(lB"); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_bq_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + p->flags |= TERMP_NOSPACE; + term_word(p, "\\(rB"); +} + + +/* ARGSUSED */ +static int +termp_pq_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + term_word(p, "\\&("); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_pq_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + term_word(p, ")"); +} + + +/* ARGSUSED */ +static int +termp_fo_pre(DECL_ARGS) +{ + + if (MDOC_BLOCK == n->type) { + synopsis_pre(p, n); + return(1); + } else if (MDOC_BODY == n->type) { + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + return(1); + } + + /* XXX: we drop non-initial arguments as per groff. */ + + assert(n->child); + assert(n->child->string); + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->child->string); + return(0); +} + + +/* ARGSUSED */ +static void +termp_fo_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + if (SEC_SYNOPSIS == n->sec) { + p->flags |= TERMP_NOSPACE; + term_word(p, ";"); + } +} + + +/* ARGSUSED */ +static int +termp_bf_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + if (MDOC_HEAD == n->type) + return(0); + else if (MDOC_BLOCK != n->type) + return(1); + + if (NULL == (nn = n->head->child)) { + if (arg_hasattr(MDOC_Emphasis, n)) + term_fontpush(p, TERMFONT_UNDER); + else if (arg_hasattr(MDOC_Symbolic, n)) + term_fontpush(p, TERMFONT_BOLD); + else + term_fontpush(p, TERMFONT_NONE); + + return(1); + } + + assert(MDOC_TEXT == nn->type); + if (0 == strcmp("Em", nn->string)) + term_fontpush(p, TERMFONT_UNDER); + else if (0 == strcmp("Sy", nn->string)) + term_fontpush(p, TERMFONT_BOLD); + else + term_fontpush(p, TERMFONT_NONE); + + return(1); +} + + +/* ARGSUSED */ +static int +termp_sm_pre(DECL_ARGS) +{ + + assert(n->child && MDOC_TEXT == n->child->type); + if (0 == strcmp("on", n->child->string)) + p->flags &= ~TERMP_NONOSPACE; + else + p->flags |= TERMP_NONOSPACE; + + return(0); +} + + +/* ARGSUSED */ +static int +termp_ap_pre(DECL_ARGS) +{ + + p->flags |= TERMP_NOSPACE; + term_word(p, "\\(aq"); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp____post(DECL_ARGS) +{ + + /* TODO: %U. */ + + p->flags |= TERMP_NOSPACE; + term_word(p, n->next ? "," : "."); +} + + +/* ARGSUSED */ +static int +termp_li_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_NONE); + return(1); +} + + +/* ARGSUSED */ +static int +termp_lk_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + term_fontpush(p, TERMFONT_UNDER); + nn = n->child; + + if (NULL == nn->next) + return(1); + + term_word(p, nn->string); + term_fontpop(p); + + p->flags |= TERMP_NOSPACE; + term_word(p, ":"); + + term_fontpush(p, TERMFONT_BOLD); + for (nn = nn->next; nn; nn = nn->next) + term_word(p, nn->string); + term_fontpop(p); + + return(0); +} + + +/* ARGSUSED */ +static int +termp_under_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_UNDER); + return(1); +} diff --git a/commands/mdocml/mdoc_validate.c b/commands/mdocml/mdoc_validate.c new file mode 100644 index 000000000..de1a7cc1d --- /dev/null +++ b/commands/mdocml/mdoc_validate.c @@ -0,0 +1,1413 @@ +/* $Id: mdoc_validate.c,v 1.99 2010/06/13 21:02:49 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +/* FIXME: .Bl -diag can't have non-text children in HEAD. */ +/* TODO: ignoring Pp (it's superfluous in some invocations). */ + +#define PRE_ARGS struct mdoc *mdoc, struct mdoc_node *n +#define POST_ARGS struct mdoc *mdoc + +typedef int (*v_pre)(PRE_ARGS); +typedef int (*v_post)(POST_ARGS); + +struct valids { + v_pre *pre; + v_post *post; +}; + +static int check_parent(PRE_ARGS, enum mdoct, enum mdoc_type); +static int check_stdarg(PRE_ARGS); +static int check_text(struct mdoc *, int, int, char *); +static int check_argv(struct mdoc *, + struct mdoc_node *, struct mdoc_argv *); +static int check_args(struct mdoc *, struct mdoc_node *); +static int err_child_lt(struct mdoc *, const char *, int); +static int warn_child_lt(struct mdoc *, const char *, int); +static int err_child_gt(struct mdoc *, const char *, int); +static int warn_child_gt(struct mdoc *, const char *, int); +static int err_child_eq(struct mdoc *, const char *, int); +static int warn_child_eq(struct mdoc *, const char *, int); +static int warn_count(struct mdoc *, const char *, + int, const char *, int); +static int err_count(struct mdoc *, const char *, + int, const char *, int); + +static int berr_ge1(POST_ARGS); +static int bwarn_ge1(POST_ARGS); +static int ebool(POST_ARGS); +static int eerr_eq0(POST_ARGS); +static int eerr_eq1(POST_ARGS); +static int eerr_ge1(POST_ARGS); +static int eerr_le1(POST_ARGS); +static int ewarn_ge1(POST_ARGS); +static int herr_eq0(POST_ARGS); +static int herr_ge1(POST_ARGS); +static int hwarn_eq1(POST_ARGS); +static int hwarn_eq0(POST_ARGS); +static int hwarn_le1(POST_ARGS); + +static int post_an(POST_ARGS); +static int post_at(POST_ARGS); +static int post_bf(POST_ARGS); +static int post_bl(POST_ARGS); +static int post_bl_head(POST_ARGS); +static int post_dt(POST_ARGS); +static int post_it(POST_ARGS); +static int post_lb(POST_ARGS); +static int post_nm(POST_ARGS); +static int post_root(POST_ARGS); +static int post_rs(POST_ARGS); +static int post_sh(POST_ARGS); +static int post_sh_body(POST_ARGS); +static int post_sh_head(POST_ARGS); +static int post_st(POST_ARGS); +static int post_eoln(POST_ARGS); +static int post_vt(POST_ARGS); +static int pre_an(PRE_ARGS); +static int pre_bd(PRE_ARGS); +static int pre_bl(PRE_ARGS); +static int pre_dd(PRE_ARGS); +static int pre_display(PRE_ARGS); +static int pre_dt(PRE_ARGS); +static int pre_it(PRE_ARGS); +static int pre_os(PRE_ARGS); +static int pre_rv(PRE_ARGS); +static int pre_sh(PRE_ARGS); +static int pre_ss(PRE_ARGS); + +static v_post posts_an[] = { post_an, NULL }; +static v_post posts_at[] = { post_at, NULL }; +static v_post posts_bd[] = { hwarn_eq0, bwarn_ge1, NULL }; +static v_post posts_bf[] = { hwarn_le1, post_bf, NULL }; +static v_post posts_bl[] = { bwarn_ge1, post_bl, NULL }; +static v_post posts_bool[] = { eerr_eq1, ebool, NULL }; +static v_post posts_eoln[] = { post_eoln, NULL }; +static v_post posts_dt[] = { post_dt, NULL }; +static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; +static v_post posts_it[] = { post_it, NULL }; +static v_post posts_lb[] = { eerr_eq1, post_lb, NULL }; +static v_post posts_nd[] = { berr_ge1, NULL }; +static v_post posts_nm[] = { post_nm, NULL }; +static v_post posts_notext[] = { eerr_eq0, NULL }; +static v_post posts_rs[] = { berr_ge1, herr_eq0, post_rs, NULL }; +static v_post posts_sh[] = { herr_ge1, bwarn_ge1, post_sh, NULL }; +static v_post posts_sp[] = { eerr_le1, NULL }; +static v_post posts_ss[] = { herr_ge1, NULL }; +static v_post posts_st[] = { eerr_eq1, post_st, NULL }; +static v_post posts_text[] = { eerr_ge1, NULL }; +static v_post posts_text1[] = { eerr_eq1, NULL }; +static v_post posts_vt[] = { post_vt, NULL }; +static v_post posts_wline[] = { bwarn_ge1, herr_eq0, NULL }; +static v_post posts_wtext[] = { ewarn_ge1, NULL }; +static v_pre pres_an[] = { pre_an, NULL }; +static v_pre pres_bd[] = { pre_display, pre_bd, NULL }; +static v_pre pres_bl[] = { pre_bl, NULL }; +static v_pre pres_d1[] = { pre_display, NULL }; +static v_pre pres_dd[] = { pre_dd, NULL }; +static v_pre pres_dt[] = { pre_dt, NULL }; +static v_pre pres_er[] = { NULL, NULL }; +static v_pre pres_ex[] = { NULL, NULL }; +static v_pre pres_fd[] = { NULL, NULL }; +static v_pre pres_it[] = { pre_it, NULL }; +static v_pre pres_os[] = { pre_os, NULL }; +static v_pre pres_rv[] = { pre_rv, NULL }; +static v_pre pres_sh[] = { pre_sh, NULL }; +static v_pre pres_ss[] = { pre_ss, NULL }; + +const struct valids mdoc_valids[MDOC_MAX] = { + { NULL, NULL }, /* Ap */ + { pres_dd, posts_text }, /* Dd */ + { pres_dt, posts_dt }, /* Dt */ + { pres_os, NULL }, /* Os */ + { pres_sh, posts_sh }, /* Sh */ + { pres_ss, posts_ss }, /* Ss */ + { NULL, posts_notext }, /* Pp */ + { pres_d1, posts_wline }, /* D1 */ + { pres_d1, posts_wline }, /* Dl */ + { pres_bd, posts_bd }, /* Bd */ + { NULL, NULL }, /* Ed */ + { pres_bl, posts_bl }, /* Bl */ + { NULL, NULL }, /* El */ + { pres_it, posts_it }, /* It */ + { NULL, posts_text }, /* Ad */ + { pres_an, posts_an }, /* An */ + { NULL, NULL }, /* Ar */ + { NULL, posts_text }, /* Cd */ + { NULL, NULL }, /* Cm */ + { NULL, NULL }, /* Dv */ + { pres_er, posts_text }, /* Er */ + { NULL, NULL }, /* Ev */ + { pres_ex, NULL }, /* Ex */ + { NULL, NULL }, /* Fa */ + { pres_fd, posts_wtext }, /* Fd */ + { NULL, NULL }, /* Fl */ + { NULL, posts_text }, /* Fn */ + { NULL, posts_wtext }, /* Ft */ + { NULL, posts_text }, /* Ic */ + { NULL, posts_text1 }, /* In */ + { NULL, NULL }, /* Li */ + { NULL, posts_nd }, /* Nd */ + { NULL, posts_nm }, /* Nm */ + { NULL, posts_wline }, /* Op */ + { NULL, NULL }, /* Ot */ + { NULL, NULL }, /* Pa */ + { pres_rv, NULL }, /* Rv */ + { NULL, posts_st }, /* St */ + { NULL, NULL }, /* Va */ + { NULL, posts_vt }, /* Vt */ + { NULL, posts_wtext }, /* Xr */ + { NULL, posts_text }, /* %A */ + { NULL, posts_text }, /* %B */ /* FIXME: can be used outside Rs/Re. */ + { NULL, posts_text }, /* %D */ /* FIXME: check date with mandoc_a2time(). */ + { NULL, posts_text }, /* %I */ + { NULL, posts_text }, /* %J */ + { NULL, posts_text }, /* %N */ + { NULL, posts_text }, /* %O */ + { NULL, posts_text }, /* %P */ + { NULL, posts_text }, /* %R */ + { NULL, posts_text }, /* %T */ /* FIXME: can be used outside Rs/Re. */ + { NULL, posts_text }, /* %V */ + { NULL, NULL }, /* Ac */ + { NULL, NULL }, /* Ao */ + { NULL, posts_wline }, /* Aq */ + { NULL, posts_at }, /* At */ + { NULL, NULL }, /* Bc */ + { NULL, posts_bf }, /* Bf */ + { NULL, NULL }, /* Bo */ + { NULL, posts_wline }, /* Bq */ + { NULL, NULL }, /* Bsx */ + { NULL, NULL }, /* Bx */ + { NULL, posts_bool }, /* Db */ + { NULL, NULL }, /* Dc */ + { NULL, NULL }, /* Do */ + { NULL, posts_wline }, /* Dq */ + { NULL, NULL }, /* Ec */ + { NULL, NULL }, /* Ef */ + { NULL, NULL }, /* Em */ + { NULL, NULL }, /* Eo */ + { NULL, NULL }, /* Fx */ + { NULL, posts_text }, /* Ms */ + { NULL, posts_notext }, /* No */ + { NULL, posts_notext }, /* Ns */ + { NULL, NULL }, /* Nx */ + { NULL, NULL }, /* Ox */ + { NULL, NULL }, /* Pc */ + { NULL, posts_text1 }, /* Pf */ + { NULL, NULL }, /* Po */ + { NULL, posts_wline }, /* Pq */ + { NULL, NULL }, /* Qc */ + { NULL, posts_wline }, /* Ql */ + { NULL, NULL }, /* Qo */ + { NULL, posts_wline }, /* Qq */ + { NULL, NULL }, /* Re */ + { NULL, posts_rs }, /* Rs */ + { NULL, NULL }, /* Sc */ + { NULL, NULL }, /* So */ + { NULL, posts_wline }, /* Sq */ + { NULL, posts_bool }, /* Sm */ + { NULL, posts_text }, /* Sx */ + { NULL, posts_text }, /* Sy */ + { NULL, posts_text }, /* Tn */ + { NULL, NULL }, /* Ux */ + { NULL, NULL }, /* Xc */ + { NULL, NULL }, /* Xo */ + { NULL, posts_fo }, /* Fo */ + { NULL, NULL }, /* Fc */ + { NULL, NULL }, /* Oo */ + { NULL, NULL }, /* Oc */ + { NULL, posts_wline }, /* Bk */ + { NULL, NULL }, /* Ek */ + { NULL, posts_eoln }, /* Bt */ + { NULL, NULL }, /* Hf */ + { NULL, NULL }, /* Fr */ + { NULL, posts_eoln }, /* Ud */ + { NULL, posts_lb }, /* Lb */ + { NULL, posts_notext }, /* Lp */ + { NULL, posts_text }, /* Lk */ + { NULL, posts_text }, /* Mt */ + { NULL, posts_wline }, /* Brq */ + { NULL, NULL }, /* Bro */ + { NULL, NULL }, /* Brc */ + { NULL, posts_text }, /* %C */ + { NULL, NULL }, /* Es */ + { NULL, NULL }, /* En */ + { NULL, NULL }, /* Dx */ + { NULL, posts_text }, /* %Q */ + { NULL, posts_notext }, /* br */ + { NULL, posts_sp }, /* sp */ + { NULL, posts_text1 }, /* %U */ + { NULL, NULL }, /* Ta */ +}; + + +int +mdoc_valid_pre(struct mdoc *mdoc, struct mdoc_node *n) +{ + v_pre *p; + int line, pos; + char *tp; + + if (MDOC_TEXT == n->type) { + tp = n->string; + line = n->line; + pos = n->pos; + return(check_text(mdoc, line, pos, tp)); + } + + if ( ! check_args(mdoc, n)) + return(0); + if (NULL == mdoc_valids[n->tok].pre) + return(1); + for (p = mdoc_valids[n->tok].pre; *p; p++) + if ( ! (*p)(mdoc, n)) + return(0); + return(1); +} + + +int +mdoc_valid_post(struct mdoc *mdoc) +{ + v_post *p; + + if (MDOC_VALID & mdoc->last->flags) + return(1); + mdoc->last->flags |= MDOC_VALID; + + if (MDOC_TEXT == mdoc->last->type) + return(1); + if (MDOC_ROOT == mdoc->last->type) + return(post_root(mdoc)); + + if (NULL == mdoc_valids[mdoc->last->tok].post) + return(1); + for (p = mdoc_valids[mdoc->last->tok].post; *p; p++) + if ( ! (*p)(mdoc)) + return(0); + + return(1); +} + + +static inline int +warn_count(struct mdoc *m, const char *k, + int want, const char *v, int has) +{ + + return(mdoc_vmsg(m, MANDOCERR_ARGCOUNT, + m->last->line, m->last->pos, + "%s %s %d (have %d)", v, k, want, has)); +} + + +static inline int +err_count(struct mdoc *m, const char *k, + int want, const char *v, int has) +{ + + mdoc_vmsg(m, MANDOCERR_SYNTARGCOUNT, + m->last->line, m->last->pos, + "%s %s %d (have %d)", + v, k, want, has); + return(0); +} + + +/* + * Build these up with macros because they're basically the same check + * for different inequalities. Yes, this could be done with functions, + * but this is reasonable for now. + */ + +#define CHECK_CHILD_DEFN(lvl, name, ineq) \ +static int \ +lvl##_child_##name(struct mdoc *mdoc, const char *p, int sz) \ +{ \ + if (mdoc->last->nchild ineq sz) \ + return(1); \ + return(lvl##_count(mdoc, #ineq, sz, p, mdoc->last->nchild)); \ +} + +#define CHECK_BODY_DEFN(name, lvl, func, num) \ +static int \ +b##lvl##_##name(POST_ARGS) \ +{ \ + if (MDOC_BODY != mdoc->last->type) \ + return(1); \ + return(func(mdoc, "multi-line arguments", (num))); \ +} + +#define CHECK_ELEM_DEFN(name, lvl, func, num) \ +static int \ +e##lvl##_##name(POST_ARGS) \ +{ \ + assert(MDOC_ELEM == mdoc->last->type); \ + return(func(mdoc, "line arguments", (num))); \ +} + +#define CHECK_HEAD_DEFN(name, lvl, func, num) \ +static int \ +h##lvl##_##name(POST_ARGS) \ +{ \ + if (MDOC_HEAD != mdoc->last->type) \ + return(1); \ + return(func(mdoc, "line arguments", (num))); \ +} + + +CHECK_CHILD_DEFN(warn, gt, >) /* warn_child_gt() */ +CHECK_CHILD_DEFN(err, gt, >) /* err_child_gt() */ +CHECK_CHILD_DEFN(warn, eq, ==) /* warn_child_eq() */ +CHECK_CHILD_DEFN(err, eq, ==) /* err_child_eq() */ +CHECK_CHILD_DEFN(err, lt, <) /* err_child_lt() */ +CHECK_CHILD_DEFN(warn, lt, <) /* warn_child_lt() */ +CHECK_BODY_DEFN(ge1, warn, warn_child_gt, 0) /* bwarn_ge1() */ +CHECK_BODY_DEFN(ge1, err, err_child_gt, 0) /* berr_ge1() */ +CHECK_ELEM_DEFN(ge1, warn, warn_child_gt, 0) /* ewarn_ge1() */ +CHECK_ELEM_DEFN(eq1, err, err_child_eq, 1) /* eerr_eq1() */ +CHECK_ELEM_DEFN(le1, err, err_child_lt, 2) /* eerr_le1() */ +CHECK_ELEM_DEFN(eq0, err, err_child_eq, 0) /* eerr_eq0() */ +CHECK_ELEM_DEFN(ge1, err, err_child_gt, 0) /* eerr_ge1() */ +CHECK_HEAD_DEFN(eq0, err, err_child_eq, 0) /* herr_eq0() */ +CHECK_HEAD_DEFN(le1, warn, warn_child_lt, 2) /* hwarn_le1() */ +CHECK_HEAD_DEFN(ge1, err, err_child_gt, 0) /* herr_ge1() */ +CHECK_HEAD_DEFN(eq1, warn, warn_child_eq, 1) /* hwarn_eq1() */ +CHECK_HEAD_DEFN(eq0, warn, warn_child_eq, 0) /* hwarn_eq0() */ + + +static int +check_stdarg(PRE_ARGS) +{ + + if (n->args && 1 == n->args->argc) + if (MDOC_Std == n->args->argv[0].arg) + return(1); + return(mdoc_nmsg(mdoc, n, MANDOCERR_NOARGV)); +} + + +static int +check_args(struct mdoc *m, struct mdoc_node *n) +{ + int i; + + if (NULL == n->args) + return(1); + + assert(n->args->argc); + for (i = 0; i < (int)n->args->argc; i++) + if ( ! check_argv(m, n, &n->args->argv[i])) + return(0); + + return(1); +} + + +static int +check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v) +{ + int i; + + for (i = 0; i < (int)v->sz; i++) + if ( ! check_text(m, v->line, v->pos, v->value[i])) + return(0); + + if (MDOC_Std == v->arg) { + if (v->sz || m->meta.name) + return(1); + if ( ! mdoc_nmsg(m, n, MANDOCERR_NONAME)) + return(0); + } + + return(1); +} + + +static int +check_text(struct mdoc *mdoc, int line, int pos, char *p) +{ + int c; + + for ( ; *p; p++, pos++) { + if ('\t' == *p) { + if ( ! (MDOC_LITERAL & mdoc->flags)) + if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR)) + return(0); + } else if ( ! isprint((u_char)*p) && ASCII_HYPH != *p) + if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR)) + return(0); + + if ('\\' != *p) + continue; + + c = mandoc_special(p); + if (c) { + p += c - 1; + pos += c - 1; + continue; + } + + c = mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADESCAPE); + if ( ! (MDOC_IGN_ESCAPE & mdoc->pflags) && ! c) + return(c); + } + + return(1); +} + + + + +static int +check_parent(PRE_ARGS, enum mdoct tok, enum mdoc_type t) +{ + + assert(n->parent); + if ((MDOC_ROOT == t || tok == n->parent->tok) && + (t == n->parent->type)) + return(1); + + mdoc_vmsg(mdoc, MANDOCERR_SYNTCHILD, + n->line, n->pos, "want parent %s", + MDOC_ROOT == t ? "" : + mdoc_macronames[tok]); + return(0); +} + + + +static int +pre_display(PRE_ARGS) +{ + struct mdoc_node *node; + + /* Display elements (`Bd', `D1'...) cannot be nested. */ + + if (MDOC_BLOCK != n->type) + return(1); + + /* LINTED */ + for (node = mdoc->last->parent; node; node = node->parent) + if (MDOC_BLOCK == node->type) + if (MDOC_Bd == node->tok) + break; + if (NULL == node) + return(1); + + mdoc_nmsg(mdoc, n, MANDOCERR_NESTEDDISP); + return(0); +} + + +static int +pre_bl(PRE_ARGS) +{ + int i, comp, dup; + const char *offs, *width; + enum mdoc_list lt; + + if (MDOC_BLOCK != n->type) { + assert(n->parent); + assert(MDOC_BLOCK == n->parent->type); + assert(MDOC_Bl == n->parent->tok); + assert(LIST__NONE != n->parent->data.Bl.type); + memcpy(&n->data.Bl, &n->parent->data.Bl, + sizeof(struct mdoc_bl)); + return(1); + } + + /* + * First figure out which kind of list to use: bind ourselves to + * the first mentioned list type and warn about any remaining + * ones. If we find no list type, we default to LIST_item. + */ + + assert(LIST__NONE == n->data.Bl.type); + + /* LINTED */ + for (i = 0; n->args && i < (int)n->args->argc; i++) { + lt = LIST__NONE; + dup = comp = 0; + width = offs = NULL; + switch (n->args->argv[i].arg) { + /* Set list types. */ + case (MDOC_Bullet): + lt = LIST_bullet; + break; + case (MDOC_Dash): + lt = LIST_dash; + break; + case (MDOC_Enum): + lt = LIST_enum; + break; + case (MDOC_Hyphen): + lt = LIST_hyphen; + break; + case (MDOC_Item): + lt = LIST_item; + break; + case (MDOC_Tag): + lt = LIST_tag; + break; + case (MDOC_Diag): + lt = LIST_diag; + break; + case (MDOC_Hang): + lt = LIST_hang; + break; + case (MDOC_Ohang): + lt = LIST_ohang; + break; + case (MDOC_Inset): + lt = LIST_inset; + break; + case (MDOC_Column): + lt = LIST_column; + break; + /* Set list arguments. */ + case (MDOC_Compact): + dup = n->data.Bl.comp; + comp = 1; + break; + case (MDOC_Width): + dup = (NULL != n->data.Bl.width); + width = n->args->argv[i].value[0]; + break; + case (MDOC_Offset): + /* NB: this can be empty! */ + if (n->args->argv[i].sz) { + offs = n->args->argv[i].value[0]; + dup = (NULL != n->data.Bl.offs); + break; + } + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV)) + return(0); + break; + } + + /* Check: duplicate auxiliary arguments. */ + + if (dup && ! mdoc_nmsg(mdoc, n, MANDOCERR_ARGVREP)) + return(0); + + if (comp && ! dup) + n->data.Bl.comp = comp; + if (offs && ! dup) + n->data.Bl.offs = offs; + if (width && ! dup) + n->data.Bl.width = width; + + /* Check: multiple list types. */ + + if (LIST__NONE != lt && n->data.Bl.type != LIST__NONE) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_LISTREP)) + return(0); + + /* Assign list type. */ + + if (LIST__NONE != lt && n->data.Bl.type == LIST__NONE) + n->data.Bl.type = lt; + + /* The list type should come first. */ + + if (n->data.Bl.type == LIST__NONE) + if (n->data.Bl.width || + n->data.Bl.offs || + n->data.Bl.comp) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_LISTFIRST)) + return(0); + + continue; + } + + /* Allow lists to default to LIST_item. */ + + if (LIST__NONE == n->data.Bl.type) { + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_LISTTYPE)) + return(0); + n->data.Bl.type = LIST_item; + } + + /* + * Validate the width field. Some list types don't need width + * types and should be warned about them. Others should have it + * and must also be warned. + */ + + switch (n->data.Bl.type) { + case (LIST_tag): + if (n->data.Bl.width) + break; + if (mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG)) + break; + return(0); + case (LIST_column): + /* FALLTHROUGH */ + case (LIST_diag): + /* FALLTHROUGH */ + case (LIST_ohang): + /* FALLTHROUGH */ + case (LIST_inset): + /* FALLTHROUGH */ + case (LIST_item): + if (NULL == n->data.Bl.width) + break; + if (mdoc_nmsg(mdoc, n, MANDOCERR_WIDTHARG)) + break; + return(0); + default: + break; + } + + return(1); +} + + +static int +pre_bd(PRE_ARGS) +{ + int i, dup, comp; + enum mdoc_disp dt; + const char *offs; + + if (MDOC_BLOCK != n->type) { + assert(n->parent); + assert(MDOC_BLOCK == n->parent->type); + assert(MDOC_Bd == n->parent->tok); + assert(DISP__NONE != n->parent->data.Bd.type); + memcpy(&n->data.Bd, &n->parent->data.Bd, + sizeof(struct mdoc_bd)); + return(1); + } + + assert(DISP__NONE == n->data.Bd.type); + + /* LINTED */ + for (i = 0; n->args && i < (int)n->args->argc; i++) { + dt = DISP__NONE; + dup = comp = 0; + offs = NULL; + + switch (n->args->argv[i].arg) { + case (MDOC_Centred): + dt = DISP_centred; + break; + case (MDOC_Ragged): + dt = DISP_ragged; + break; + case (MDOC_Unfilled): + dt = DISP_unfilled; + break; + case (MDOC_Filled): + dt = DISP_filled; + break; + case (MDOC_Literal): + dt = DISP_literal; + break; + case (MDOC_File): + mdoc_nmsg(mdoc, n, MANDOCERR_BADDISP); + return(0); + case (MDOC_Offset): + /* NB: this can be empty! */ + if (n->args->argv[i].sz) { + offs = n->args->argv[i].value[0]; + dup = (NULL != n->data.Bd.offs); + break; + } + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV)) + return(0); + break; + case (MDOC_Compact): + comp = 1; + dup = n->data.Bd.comp; + break; + default: + abort(); + /* NOTREACHED */ + } + + /* Check whether we have duplicates. */ + + if (dup && ! mdoc_nmsg(mdoc, n, MANDOCERR_ARGVREP)) + return(0); + + /* Make our auxiliary assignments. */ + + if (offs && ! dup) + n->data.Bd.offs = offs; + if (comp && ! dup) + n->data.Bd.comp = comp; + + /* Check whether a type has already been assigned. */ + + if (DISP__NONE != dt && n->data.Bd.type != DISP__NONE) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_DISPREP)) + return(0); + + /* Make our type assignment. */ + + if (DISP__NONE != dt && n->data.Bd.type == DISP__NONE) + n->data.Bd.type = dt; + } + + if (DISP__NONE == n->data.Bd.type) { + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_DISPTYPE)) + return(0); + n->data.Bd.type = DISP_ragged; + } + + return(1); +} + + +static int +pre_ss(PRE_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + return(check_parent(mdoc, n, MDOC_Sh, MDOC_BODY)); +} + + +static int +pre_sh(PRE_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + return(check_parent(mdoc, n, MDOC_MAX, MDOC_ROOT)); +} + + +static int +pre_it(PRE_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + /* + * FIXME: this can probably be lifted if we make the It into + * something else on-the-fly? + */ + return(check_parent(mdoc, n, MDOC_Bl, MDOC_BODY)); +} + + +static int +pre_an(PRE_ARGS) +{ + + if (NULL == n->args || 1 == n->args->argc) + return(1); + mdoc_vmsg(mdoc, MANDOCERR_SYNTARGCOUNT, + n->line, n->pos, + "line arguments == 1 (have %d)", + n->args->argc); + return(0); +} + + +static int +pre_rv(PRE_ARGS) +{ + + return(check_stdarg(mdoc, n)); +} + + +static int +post_dt(POST_ARGS) +{ + const struct mdoc_node *nn; + const char *p; + + if (NULL != (nn = mdoc->last->child)) + for (p = nn->string; *p; p++) { + if (toupper((u_char)*p) == *p) + continue; + if ( ! mdoc_nmsg(mdoc, nn, MANDOCERR_UPPERCASE)) + return(0); + break; + } + + return(1); +} + + +static int +pre_dt(PRE_ARGS) +{ + + if (0 == mdoc->meta.date || mdoc->meta.os) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO)) + return(0); + if (mdoc->meta.title) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGREP)) + return(0); + return(1); +} + + +static int +pre_os(PRE_ARGS) +{ + + if (NULL == mdoc->meta.title || 0 == mdoc->meta.date) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO)) + return(0); + if (mdoc->meta.os) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGREP)) + return(0); + return(1); +} + + +static int +pre_dd(PRE_ARGS) +{ + + if (mdoc->meta.title || mdoc->meta.os) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO)) + return(0); + if (mdoc->meta.date) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGREP)) + return(0); + return(1); +} + + +static int +post_bf(POST_ARGS) +{ + char *p; + struct mdoc_node *head; + + if (MDOC_BLOCK != mdoc->last->type) + return(1); + + head = mdoc->last->head; + + if (mdoc->last->args && head->child) { + /* FIXME: this should provide a default. */ + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SYNTARGVCOUNT); + return(0); + } else if (mdoc->last->args) + return(1); + + if (NULL == head->child || MDOC_TEXT != head->child->type) { + /* FIXME: this should provide a default. */ + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SYNTARGVCOUNT); + return(0); + } + + p = head->child->string; + + if (0 == strcmp(p, "Em")) + return(1); + else if (0 == strcmp(p, "Li")) + return(1); + else if (0 == strcmp(p, "Sy")) + return(1); + + mdoc_nmsg(mdoc, head, MANDOCERR_FONTTYPE); + return(0); +} + + +static int +post_lb(POST_ARGS) +{ + + if (mdoc_a2lib(mdoc->last->child->string)) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADLIB)); +} + + +static int +post_eoln(POST_ARGS) +{ + + if (NULL == mdoc->last->child) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_ARGSLOST)); +} + + +static int +post_vt(POST_ARGS) +{ + const struct mdoc_node *n; + + /* + * The Vt macro comes in both ELEM and BLOCK form, both of which + * have different syntaxes (yet more context-sensitive + * behaviour). ELEM types must have a child; BLOCK types, + * specifically the BODY, should only have TEXT children. + */ + + if (MDOC_ELEM == mdoc->last->type) + return(eerr_ge1(mdoc)); + if (MDOC_BODY != mdoc->last->type) + return(1); + + for (n = mdoc->last->child; n; n = n->next) + if (MDOC_TEXT != n->type) + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_CHILD)) + return(0); + + return(1); +} + + +static int +post_nm(POST_ARGS) +{ + + if (mdoc->last->child) + return(1); + if (mdoc->meta.name) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME)); +} + + +static int +post_at(POST_ARGS) +{ + + if (NULL == mdoc->last->child) + return(1); + assert(MDOC_TEXT == mdoc->last->child->type); + if (mdoc_a2att(mdoc->last->child->string)) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADATT)); +} + + +static int +post_an(POST_ARGS) +{ + + if (mdoc->last->args) { + if (NULL == mdoc->last->child) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_ARGCOUNT)); + } + + if (mdoc->last->child) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOARGS)); +} + + +static int +post_it(POST_ARGS) +{ + int i, cols, rc; + enum mdoc_list lt; + struct mdoc_node *n, *c; + enum mandocerr er; + + if (MDOC_BLOCK != mdoc->last->type) + return(1); + + n = mdoc->last->parent->parent; + lt = n->data.Bl.type; + + if (LIST__NONE == lt) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_LISTTYPE); + return(0); + } + + switch (lt) { + case (LIST_tag): + if (mdoc->last->head->child) + break; + /* FIXME: give this a dummy value. */ + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOARGS)) + return(0); + break; + case (LIST_hang): + /* FALLTHROUGH */ + case (LIST_ohang): + /* FALLTHROUGH */ + case (LIST_inset): + /* FALLTHROUGH */ + case (LIST_diag): + if (NULL == mdoc->last->head->child) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOARGS)) + return(0); + if (NULL == mdoc->last->body->child) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOBODY)) + return(0); + break; + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_enum): + /* FALLTHROUGH */ + case (LIST_hyphen): + /* FALLTHROUGH */ + case (LIST_item): + if (mdoc->last->head->child) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_ARGSLOST)) + return(0); + if (NULL == mdoc->last->body->child) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOBODY)) + return(0); + break; + case (LIST_column): + cols = -1; + for (i = 0; i < (int)n->args->argc; i++) + if (MDOC_Column == n->args->argv[i].arg) { + cols = (int)n->args->argv[i].sz; + break; + } + + assert(-1 != cols); + assert(NULL == mdoc->last->head->child); + + if (NULL == mdoc->last->body->child) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOBODY)) + return(0); + + for (i = 0, c = mdoc->last->child; c; c = c->next) + if (MDOC_BODY == c->type) + i++; + + if (i < cols) + er = MANDOCERR_ARGCOUNT; + else if (i == cols || i == cols + 1) + break; + else + er = MANDOCERR_SYNTARGCOUNT; + + rc = mdoc_vmsg(mdoc, er, + mdoc->last->line, mdoc->last->pos, + "columns == %d (have %d)", cols, i); + return(rc); + default: + break; + } + + return(1); +} + + +static int +post_bl_head(POST_ARGS) +{ + int i; + struct mdoc_node *n; + + assert(mdoc->last->parent); + n = mdoc->last->parent; + + if (LIST_column == n->data.Bl.type) { + for (i = 0; i < (int)n->args->argc; i++) + if (MDOC_Column == n->args->argv[i].arg) + break; + assert(i < (int)n->args->argc); + + if (n->args->argv[i].sz && mdoc->last->nchild) { + mdoc_nmsg(mdoc, n, MANDOCERR_COLUMNS); + return(0); + } + return(1); + } + + if (0 == (i = mdoc->last->nchild)) + return(1); + return(warn_count(mdoc, "==", 0, "line arguments", i)); +} + + +static int +post_bl(POST_ARGS) +{ + struct mdoc_node *n; + + if (MDOC_HEAD == mdoc->last->type) + return(post_bl_head(mdoc)); + if (MDOC_BODY != mdoc->last->type) + return(1); + if (NULL == mdoc->last->child) + return(1); + + /* + * We only allow certain children of `Bl'. This is usually on + * `It', but apparently `Sm' occurs here and there, so we let + * that one through, too. + */ + + /* LINTED */ + for (n = mdoc->last->child; n; n = n->next) { + if (MDOC_BLOCK == n->type && MDOC_It == n->tok) + continue; + if (MDOC_Sm == n->tok) + continue; + mdoc_nmsg(mdoc, n, MANDOCERR_SYNTCHILD); + return(0); + } + + return(1); +} + + +static int +ebool(struct mdoc *mdoc) +{ + struct mdoc_node *n; + + /* LINTED */ + for (n = mdoc->last->child; n; n = n->next) { + if (MDOC_TEXT != n->type) + break; + if (0 == strcmp(n->string, "on")) + continue; + if (0 == strcmp(n->string, "off")) + continue; + break; + } + + if (NULL == n) + return(1); + return(mdoc_nmsg(mdoc, n, MANDOCERR_BADBOOL)); +} + + +static int +post_root(POST_ARGS) +{ + + if (NULL == mdoc->first->child) + mdoc_nmsg(mdoc, mdoc->first, MANDOCERR_NODOCBODY); + else if ( ! (MDOC_PBODY & mdoc->flags)) + mdoc_nmsg(mdoc, mdoc->first, MANDOCERR_NODOCPROLOG); + else if (MDOC_BLOCK != mdoc->first->child->type) + mdoc_nmsg(mdoc, mdoc->first, MANDOCERR_NODOCBODY); + else if (MDOC_Sh != mdoc->first->child->tok) + mdoc_nmsg(mdoc, mdoc->first, MANDOCERR_NODOCBODY); + else + return(1); + + return(0); +} + + +static int +post_st(POST_ARGS) +{ + + if (mdoc_a2st(mdoc->last->child->string)) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADSTANDARD)); +} + + +static int +post_rs(POST_ARGS) +{ + struct mdoc_node *nn; + + if (MDOC_BODY != mdoc->last->type) + return(1); + + for (nn = mdoc->last->child; nn; nn = nn->next) + switch (nn->tok) { + case(MDOC__U): + /* FALLTHROUGH */ + case(MDOC__Q): + /* FALLTHROUGH */ + case(MDOC__C): + /* FALLTHROUGH */ + case(MDOC__A): + /* FALLTHROUGH */ + case(MDOC__B): + /* FALLTHROUGH */ + case(MDOC__D): + /* FALLTHROUGH */ + case(MDOC__I): + /* FALLTHROUGH */ + case(MDOC__J): + /* FALLTHROUGH */ + case(MDOC__N): + /* FALLTHROUGH */ + case(MDOC__O): + /* FALLTHROUGH */ + case(MDOC__P): + /* FALLTHROUGH */ + case(MDOC__R): + /* FALLTHROUGH */ + case(MDOC__T): + /* FALLTHROUGH */ + case(MDOC__V): + break; + default: + mdoc_nmsg(mdoc, nn, MANDOCERR_SYNTCHILD); + return(0); + } + + return(1); +} + + +static int +post_sh(POST_ARGS) +{ + + if (MDOC_HEAD == mdoc->last->type) + return(post_sh_head(mdoc)); + if (MDOC_BODY == mdoc->last->type) + return(post_sh_body(mdoc)); + + return(1); +} + + +static int +post_sh_body(POST_ARGS) +{ + struct mdoc_node *n; + + if (SEC_NAME != mdoc->lastsec) + return(1); + + /* + * Warn if the NAME section doesn't contain the `Nm' and `Nd' + * macros (can have multiple `Nm' and one `Nd'). Note that the + * children of the BODY declaration can also be "text". + */ + + if (NULL == (n = mdoc->last->child)) + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADNAMESEC)); + + for ( ; n && n->next; n = n->next) { + if (MDOC_ELEM == n->type && MDOC_Nm == n->tok) + continue; + if (MDOC_TEXT == n->type) + continue; + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADNAMESEC)) + return(0); + } + + assert(n); + if (MDOC_BLOCK == n->type && MDOC_Nd == n->tok) + return(1); + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADNAMESEC)); +} + + +static int +post_sh_head(POST_ARGS) +{ + char buf[BUFSIZ]; + enum mdoc_sec sec; + const struct mdoc_node *n; + + /* + * Process a new section. Sections are either "named" or + * "custom"; custom sections are user-defined, while named ones + * usually follow a conventional order and may only appear in + * certain manual sections. + */ + + buf[0] = '\0'; + + /* + * FIXME: yes, these can use a dynamic buffer, but I don't do so + * in the interests of simplicity. + */ + + for (n = mdoc->last->child; n; n = n->next) { + /* XXX - copied from compact(). */ + assert(MDOC_TEXT == n->type); + + if (strlcat(buf, n->string, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, n, MANDOCERR_MEM); + return(0); + } + if (NULL == n->next) + continue; + if (strlcat(buf, " ", BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, n, MANDOCERR_MEM); + return(0); + } + } + + sec = mdoc_str2sec(buf); + + /* + * Check: NAME should always be first, CUSTOM has no roles, + * non-CUSTOM has a conventional order to be followed. + */ + + if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NAMESECFIRST)) + return(0); + + if (SEC_CUSTOM == sec) + return(1); + + if (sec == mdoc->lastnamed) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECREP)) + return(0); + + if (sec < mdoc->lastnamed) + if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECOOO)) + return(0); + + /* + * Check particular section/manual conventions. LIBRARY can + * only occur in manual section 2, 3, and 9. + */ + + switch (sec) { + case (SEC_LIBRARY): + assert(mdoc->meta.msec); + if (*mdoc->meta.msec == '2') + break; + if (*mdoc->meta.msec == '3') + break; + if (*mdoc->meta.msec == '9') + break; + return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECMSEC)); + default: + break; + } + + return(1); +} diff --git a/commands/mdocml/msec.c b/commands/mdocml/msec.c new file mode 100644 index 000000000..ba5e8d783 --- /dev/null +++ b/commands/mdocml/msec.c @@ -0,0 +1,37 @@ +/* $Id: msec.c,v 1.8 2010/05/17 22:11:42 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2msec(const char *p) +{ + +#include "msec.in" + + return(NULL); +} diff --git a/commands/mdocml/msec.in b/commands/mdocml/msec.in new file mode 100644 index 000000000..f3aebb46a --- /dev/null +++ b/commands/mdocml/msec.in @@ -0,0 +1,40 @@ +/* $Id: msec.in,v 1.6 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * These are all possible manual-section macros and what they correspond + * to when rendered as the volume title. + * + * Be sure to escape strings. + */ + +LINE("1", "General Commands Manual") +LINE("2", "System Calls Manual") +LINE("3", "Library Functions Manual") +LINE("3p", "Perl Library Functions Manual") +LINE("4", "Kernel Interfaces Manual") +LINE("5", "File Formats Manual") +LINE("6", "Games Manual") +LINE("7", "Miscellaneous Information Manual") +LINE("8", "System Manager\'s Manual") +LINE("9", "Kernel Developer\'s Manual") +LINE("X11", "X11 Developer\'s Manual") +LINE("X11R6", "X11 Developer\'s Manual") +LINE("unass", "Unassociated") +LINE("local", "Local") +LINE("draft", "Draft") +LINE("paper", "Paper") diff --git a/commands/mdocml/out.c b/commands/mdocml/out.c new file mode 100644 index 000000000..6a26f95a3 --- /dev/null +++ b/commands/mdocml/out.c @@ -0,0 +1,399 @@ +/* $Id: out.c,v 1.16 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "out.h" + +/* See a2roffdeco(). */ +#define C2LIM(c, l) do { \ + (l) = 1; \ + if ('[' == (c) || '\'' == (c)) \ + (l) = 0; \ + else if ('(' == (c)) \ + (l) = 2; } \ + while (/* CONSTCOND */ 0) + +/* See a2roffdeco(). */ +#define C2TERM(c, t) do { \ + (t) = 0; \ + if ('\'' == (c)) \ + (t) = 1; \ + else if ('[' == (c)) \ + (t) = 2; \ + else if ('(' == (c)) \ + (t) = 3; } \ + while (/* CONSTCOND */ 0) + +/* + * Convert a `scaling unit' to a consistent form, or fail. Scaling + * units are documented in groff.7, mdoc.7, man.7. + */ +int +a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) +{ + char buf[BUFSIZ], hasd; + int i; + enum roffscale unit; + + if ('\0' == *src) + return(0); + + i = hasd = 0; + + switch (*src) { + case ('+'): + src++; + break; + case ('-'): + buf[i++] = *src++; + break; + default: + break; + } + + if ('\0' == *src) + return(0); + + while (i < BUFSIZ) { + if ( ! isdigit((u_char)*src)) { + if ('.' != *src) + break; + else if (hasd) + break; + else + hasd = 1; + } + buf[i++] = *src++; + } + + if (BUFSIZ == i || (*src && *(src + 1))) + return(0); + + buf[i] = '\0'; + + switch (*src) { + case ('c'): + unit = SCALE_CM; + break; + case ('i'): + unit = SCALE_IN; + break; + case ('P'): + unit = SCALE_PC; + break; + case ('p'): + unit = SCALE_PT; + break; + case ('f'): + unit = SCALE_FS; + break; + case ('v'): + unit = SCALE_VS; + break; + case ('m'): + unit = SCALE_EM; + break; + case ('\0'): + if (SCALE_MAX == def) + return(0); + unit = SCALE_BU; + break; + case ('u'): + unit = SCALE_BU; + break; + case ('M'): + unit = SCALE_MM; + break; + case ('n'): + unit = SCALE_EN; + break; + default: + return(0); + } + + if ((dst->scale = atof(buf)) < 0) + dst->scale = 0; + dst->unit = unit; + dst->pt = hasd; + + return(1); +} + + +/* + * Correctly writes the time in nroff form, which differs from standard + * form in that a space isn't printed in lieu of the extra %e field for + * single-digit dates. + */ +void +time2a(time_t t, char *dst, size_t sz) +{ + struct tm tm; + char buf[5]; + char *p; + size_t nsz; + + assert(sz > 1); + localtime_r(&t, &tm); + + p = dst; + nsz = 0; + + dst[0] = '\0'; + + if (0 == (nsz = strftime(p, sz, "%B ", &tm))) + return; + + p += (int)nsz; + sz -= nsz; + + if (0 == strftime(buf, sizeof(buf), "%e, ", &tm)) + return; + + nsz = strlcat(p, buf + (' ' == buf[0] ? 1 : 0), sz); + + if (nsz >= sz) + return; + + p += (int)nsz; + sz -= nsz; + + (void)strftime(p, sz, "%Y", &tm); +} + + +/* + * Returns length of parsed string (the leading "\" should NOT be + * included). This can be zero if the current character is the nil + * terminator. "d" is set to the type of parsed decorator, which may + * have an adjoining "word" of size "sz" (e.g., "(ab" -> "ab", 2). + */ +int +a2roffdeco(enum roffdeco *d, + const char **word, size_t *sz) +{ + int j, term, lim; + char set; + const char *wp, *sp; + + *d = DECO_NONE; + wp = *word; + + switch ((set = *wp)) { + case ('\0'): + return(0); + + case ('('): + if ('\0' == *(++wp)) + return(1); + if ('\0' == *(wp + 1)) + return(2); + + *d = DECO_SPECIAL; + *sz = 2; + *word = wp; + return(3); + + case ('F'): + /* FALLTHROUGH */ + case ('f'): + /* + * FIXME: this needs work and consolidation (it should + * follow the sequence that special characters do, for + * one), but isn't a priority at the moment. Note, for + * one, that in reality \fB != \FB, although here we let + * these slip by. + */ + switch (*(++wp)) { + case ('\0'): + return(1); + case ('3'): + /* FALLTHROUGH */ + case ('B'): + *d = DECO_BOLD; + return(2); + case ('2'): + /* FALLTHROUGH */ + case ('I'): + *d = DECO_ITALIC; + return(2); + case ('P'): + *d = DECO_PREVIOUS; + return(2); + case ('1'): + /* FALLTHROUGH */ + case ('R'): + *d = DECO_ROMAN; + return(2); + case ('('): + if ('\0' == *(++wp)) + return(2); + if ('\0' == *(wp + 1)) + return(3); + + *d = 'F' == set ? DECO_FFONT : DECO_FONT; + *sz = 2; + *word = wp; + return(4); + case ('['): + *word = ++wp; + for (j = 0; *wp && ']' != *wp; wp++, j++) + /* Loop... */ ; + + if ('\0' == *wp) + return(j + 2); + + *d = 'F' == set ? DECO_FFONT : DECO_FONT; + *sz = (size_t)j; + return(j + 3); + default: + break; + } + + *d = 'F' == set ? DECO_FFONT : DECO_FONT; + *sz = 1; + *word = wp; + return(2); + + case ('*'): + switch (*(++wp)) { + case ('\0'): + return(1); + + case ('('): + if ('\0' == *(++wp)) + return(2); + if ('\0' == *(wp + 1)) + return(3); + + *d = DECO_RESERVED; + *sz = 2; + *word = wp; + return(4); + + case ('['): + *word = ++wp; + for (j = 0; *wp && ']' != *wp; wp++, j++) + /* Loop... */ ; + + if ('\0' == *wp) + return(j + 2); + + *d = DECO_RESERVED; + *sz = (size_t)j; + return(j + 3); + + default: + break; + } + + *d = DECO_RESERVED; + *sz = 1; + *word = wp; + return(2); + + case ('s'): + sp = wp; + if ('\0' == *(++wp)) + return(1); + + C2LIM(*wp, lim); + C2TERM(*wp, term); + + if (term) + wp++; + + *word = wp; + + if (*wp == '+' || *wp == '-') + ++wp; + + switch (*wp) { + case ('\''): + /* FALLTHROUGH */ + case ('['): + /* FALLTHROUGH */ + case ('('): + if (term) + return((int)(wp - sp)); + + C2LIM(*wp, lim); + C2TERM(*wp, term); + wp++; + break; + default: + break; + } + + if ( ! isdigit((u_char)*wp)) + return((int)(wp - sp)); + + for (j = 0; isdigit((u_char)*wp); j++) { + if (lim && j >= lim) + break; + ++wp; + } + + if (term && term < 3) { + if (1 == term && *wp != '\'') + return((int)(wp - sp)); + if (2 == term && *wp != ']') + return((int)(wp - sp)); + ++wp; + } + + *d = DECO_SIZE; + return((int)(wp - sp)); + + case ('['): + *word = ++wp; + + for (j = 0; *wp && ']' != *wp; wp++, j++) + /* Loop... */ ; + + if ('\0' == *wp) + return(j + 1); + + *d = DECO_SPECIAL; + *sz = (size_t)j; + return(j + 2); + + case ('c'): + *d = DECO_NOSPACE; + *sz = 1; + return(1); + + default: + break; + } + + *d = DECO_SPECIAL; + *word = wp; + *sz = 1; + return(1); +} diff --git a/commands/mdocml/out.h b/commands/mdocml/out.h new file mode 100644 index 000000000..39dde4203 --- /dev/null +++ b/commands/mdocml/out.h @@ -0,0 +1,82 @@ +/* $Id: out.h,v 1.11 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef OUT_H +#define OUT_H + +#define DATESIZ 24 + +__BEGIN_DECLS + +enum roffscale { + SCALE_CM, + SCALE_IN, + SCALE_PC, + SCALE_PT, + SCALE_EM, + SCALE_MM, + SCALE_EN, + SCALE_BU, + SCALE_VS, + SCALE_FS, + SCALE_MAX +}; + +enum roffdeco { + DECO_NONE, + DECO_SPECIAL, + DECO_RESERVED, + DECO_BOLD, + DECO_ITALIC, + DECO_ROMAN, + DECO_PREVIOUS, + DECO_SIZE, + DECO_NOSPACE, + DECO_FONT, /* font */ + DECO_FFONT, /* font family */ + DECO_MAX +}; + +struct roffsu { + enum roffscale unit; + double scale; + int pt; +}; + +#define SCALE_INVERT(p) \ + do { (p)->scale = -(p)->scale; } \ + while (/* CONSTCOND */ 0) + +#define SCALE_VS_INIT(p, v) \ + do { (p)->unit = SCALE_VS; \ + (p)->scale = (v); \ + (p)->pt = 0; } \ + while (/* CONSTCOND */ 0) + +#define SCALE_HS_INIT(p, v) \ + do { (p)->unit = SCALE_BU; \ + (p)->scale = (v); \ + (p)->pt = 0; } \ + while (/* CONSTCOND */ 0) + +int a2roffsu(const char *, + struct roffsu *, enum roffscale); +int a2roffdeco(enum roffdeco *, const char **, size_t *); +void time2a(time_t, char *, size_t); + +__END_DECLS + +#endif /*!HTML_H*/ diff --git a/commands/mdocml/roff.3 b/commands/mdocml/roff.3 new file mode 100644 index 000000000..fcc47395b --- /dev/null +++ b/commands/mdocml/roff.3 @@ -0,0 +1,156 @@ +.\" $Id: roff.3,v 1.1 2010/05/25 22:16:59 kristaps Exp $ +.\" +.\" Copyright (c) 2010 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 25 2010 $ +.Dt ROFF 3 +.Os +.Sh NAME +.Nm roff , +.Nm roff_alloc , +.Nm roff_endparse , +.Nm roff_free , +.Nm roff_parseln , +.Nm roff_reset +.Nd roff macro compiler library +.Sh SYNOPSIS +.In mandoc.h +.In roff.h +.Ft "struct roff *" +.Fn roff_alloc "mandocmsg msgs" "void *data" +.Ft int +.Fn roff_endparse "struct roff *roff" +.Ft void +.Fn roff_free "struct roff *roff" +.Ft "enum rofferr" +.Fo roff_parseln +.Fa "struct roff *roff" +.Fa "int line" +.Fa "char **bufp" +.Fa "size_t *bufsz" +.Fa "int pos" +.Fa "int *offs" +.Fc +.Ft void +.Fn roff_reset "struct roff *roff" +.Sh DESCRIPTION +The +.Nm +library processes lines of +.Xr roff 7 +input. +.Pp +In general, applications initiate a parsing sequence with +.Fn roff_alloc , +parse each line in a document with +.Fn roff_parseln , +close the parsing session with +.Fn roff_endparse , +and finally free all allocated memory with +.Fn roff_free . +The +.Fn roff_reset +function may be used in order to reset the parser for another input +sequence. +.Pp +The +.Fn roff_parseln +function should be invoked before passing a line into the +.Xr mdoc 3 +or +.Xr man 3 +libraries. +.Pp +See the +.Sx EXAMPLES +section for a full example. +.Sh REFERENCE +This section further defines the +.Sx Types +and +.Sx Functions +available to programmers. +.Ss Types +Functions (see +.Sx Functions ) +may use the following types: +.Bl -ohang +.It Vt "enum rofferr" +Instructions for further processing to the caller of +.Fn roff_parseln . +.It Vt struct roff +An opaque type defined in +.Pa roff.c . +Its values are only used privately within the library. +.It Vt mandocmsg +A function callback type defined in +.Pa mandoc.h . +.El +.Ss Functions +Function descriptions follow: +.Bl -ohang +.It Fn roff_alloc +Allocates a parsing structure. +The +.Fa data +pointer is passed to +.Fa msgs . +The +.Fa pflags +arguments are defined in +.Pa roff.h . +Returns NULL on failure. +If non-NULL, the pointer must be freed with +.Fn roff_free . +.It Fn roff_reset +Reset the parser for another parse routine. +After its use, +.Fn roff_parseln +behaves as if invoked for the first time. +.It Fn roff_free +Free all resources of a parser. +The pointer is no longer valid after invocation. +.It Fn roff_parseln +Parse a nil-terminated line of input. +The character array +.Fa bufp +may be modified or reallocated within this function. +In the latter case, +.Fa bufsz +will be modified accordingly. +The +.Fa offs +pointer will be modified if the line start during subsequent processing +of the line is not at the zeroth index. +This line should not contain the trailing newline. +Returns 0 on failure, 1 on success. +.It Fn roff_endparse +Signals that the parse is complete. +Returns 0 on failure, 1 on success. +.El +.Sh EXAMPLES +See +.Pa main.c +in the source distribution for an example of usage. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr man 3 , +.Xr mdoc 3 , +.Xr roff 7 +.Sh AUTHORS +The +.Nm +library was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . diff --git a/commands/mdocml/roff.7 b/commands/mdocml/roff.7 new file mode 100644 index 000000000..12ccfc353 --- /dev/null +++ b/commands/mdocml/roff.7 @@ -0,0 +1,304 @@ +.\" $Id: roff.7,v 1.9 2010/06/10 21:42:02 kristaps Exp $ +.\" +.\" Copyright (c) 2010 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: June 10 2010 $ +.Dt ROFF 7 +.Os +.Sh NAME +.Nm roff +.Nd roff language reference +.Sh DESCRIPTION +The +.Nm roff +language is a general-purpose text-formatting language. The purpose of +this document is to consistently describe those language constructs +accepted by the +.Xr mandoc 1 +utility. It is a work in progress. +.Pp +An +.Nm +document follows simple rules: lines beginning with the control +characters +.Sq \. +or +.Sq \(aq +are parsed for macros. Other lines are interpreted within the scope of +prior macros: +.Bd -literal -offset indent +\&.xx Macro lines change control state. +Other lines are interpreted within the current state. +.Ed +.Sh LANGUAGE SYNTAX +.Nm +documents may contain only graphable 7-bit ASCII characters, the space +character, and, in certain circumstances, the tab character. All +manuals must have +.Ux +line terminators. +.Sh MACRO SYNTAX +Macros are arbitrary in length and begin with a control character , +.Sq \. +or +.Sq \(aq , +at the beginning of the line. +An arbitrary amount of whitespace may sit between the control character +and the macro name. +Thus, the following are equivalent: +.Bd -literal -offset indent +\&.if +\&.\ \ \ \&if +.Ed +.Sh REFERENCE +This section is a canonical reference of all macros, arranged +alphabetically. +.Ss \&am +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&ami +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&am1 +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&de +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&dei +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&ds +Define a string. +This macro is intended to have two arguments, +the name of the string to define and its content. +Currently, it is ignored including its arguments, +and the number of arguments is not checked. +.Ss \&de1 +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&el +The +.Qq else +half of an if/else conditional. +Pops a result off the stack of conditional evaluations pushed by +.Sx \&ie +and uses it as its conditional. +If no stack entries are present (e.g., due to no prior +.Sx \&ie +calls) +then false is assumed. +The syntax of this macro is similar to +.Sx \&if +except that the conditional is missing. +.Ss \&ie +The +.Qq if +half of an if/else conditional. +The result of the conditional is pushed into a stack used by subsequent +invocations of +.Sx \&el , +which may be separated by any intervening input (or not exist at all). +Its syntax is equivalent to +.Sx \&if . +.Ss \&if +Begins a conditional. +Right now, the conditional evaluates to true +if and only if it starts with the letter +.Sy n , +indicating processing in +.Xr nroff 1 +style as opposed to +.Xr troff 1 +style. +If a conditional is false, its children are not processed, but are +syntactically interpreted to preserve the integrity of the input +document. +Thus, +.Pp +.D1 \&.if t \e .ig +.Pp +will discard the +.Sq \&.ig , +which may lead to interesting results, but +.Pp +.D1 \&.if t \e .if t \e{\e +.Pp +will continue to syntactically interpret to the block close of the final +conditional. +Sub-conditionals, in this case, obviously inherit the truth value of +the parent. +This macro has the following syntax: +.Pp +.Bd -literal -offset indent -compact +\&.if COND \e{\e +BODY... +\&.\e} +.Ed +.Bd -literal -offset indent -compact +\&.if COND \e{ BODY +BODY... \e} +.Ed +.Bd -literal -offset indent -compact +\&.if COND \e{ BODY +BODY... +\&.\e} +.Ed +.Bd -literal -offset indent -compact +\&.if COND \e +BODY +.Ed +.Pp +COND is a conditional statement. +roff allows for complicated conditionals; mandoc is much simpler. +At this time, mandoc supports only +.Sq n , +evaluating to true; +and +.Sq t , +.Sq e , +and +.Sq o , +evaluating to false. +All other invocations are read up to the next end of line or space and +evaluate as false. +.Pp +If the BODY section is begun by an escaped brace +.Sq \e{ , +scope continues until a closing-brace macro +.Sq \.\e} . +If the BODY is not enclosed in braces, scope continues until the next +macro or word. +If the COND is followed by a BODY on the same line, whether after a +brace or not, then macros +.Em must +begin with a control character. +It is generally more intuitive, in this case, to write +.Bd -literal -offset indent +\&.if COND \e{\e +\&.foo +bar +\&.\e} +.Ed +.Pp +than having the macro follow as +.Pp +.D1 \&.if COND \e{ .foo +.Pp +The scope of a conditional is always parsed, but only executed if the +conditional evaluates to true. +.Pp +Note that text subsequent a +.Sq \&.\e} +macro is discarded. +Furthermore, if an explicit closing sequence +.Sq \e} +is specified in a free-form line, the entire line is accepted within the +scope of the prior macro, not only the text preceding the close, with the +.Sq \e} +collapsing into a zero-width space. +.Ss \&ig +Ignore input. +Accepts the following syntax: +.Pp +.Bd -literal -offset indent -compact +\&.ig +BODY... +\&.. +.Ed +.Bd -literal -offset indent -compact +\&.ig END +BODY... +\&.END +.Ed +.Pp +In the first case, input is ignored until a +.Sq \&.. +macro is encountered on its own line. +In the second case, input is ignored until a +.Sq \&.END +is encountered. +Text subsequent the +.Sq \&.END +or +.Sq \&.. +is discarded. +.Pp +Do not use the escape +.Sq \e +anywhere in the definition of END. +It causes very strange behaviour. +Furthermore, if you redefine a +.Nm +macro, such as +.Pp +.D1 \&.ig if +.Pp +the subsequent invocation of +.Sx \&if +will first signify the end of comment, then be invoked as a macro. +This behaviour really shouldn't be counted upon. +.Ss \&rm +Remove a request, macro or string. +This macro is intended to have one argument, +the name of the request, macro or string to be undefined. +Currently, it is ignored including its arguments, +and the number of arguments is not checked. +.Ss \&tr +Output character translation. +This macro is intended to have one argument, +consisting of an even number of characters. +Currently, it is ignored including its arguments, +and the number of arguments is not checked. +.Sh COMPATIBILITY +This section documents compatibility between mandoc and other other +troff implementations, at this time limited to GNU troff +.Pq Qq groff . +The term +.Qq historic groff +refers to groff versions before the +.Pa doc.tmac +file re-write +.Pq somewhere between 1.15 and 1.19 . +.Pp +.Bl -dash -compact +.It +Historic groff did not accept white-space buffering the custom END tag +for the +.Sx \&ig +macro. +.It +The +.Sx \&if +and family would print funny white-spaces with historic groff when +depending on next-line syntax. +.El +.Sh AUTHORS +The +.Nm +reference was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . diff --git a/commands/mdocml/roff.c b/commands/mdocml/roff.c new file mode 100644 index 000000000..a22697d1c --- /dev/null +++ b/commands/mdocml/roff.c @@ -0,0 +1,857 @@ +/* $Id: roff.c,v 1.88 2010/06/10 21:42:02 kristaps Exp $ */ +/* + * Copyright (c) 2010 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "roff.h" + +#define RSTACK_MAX 128 + +#define ROFF_CTL(c) \ + ('.' == (c) || '\'' == (c)) + +#ifndef __minix +#if 1 +#define ROFF_DEBUG(fmt, args...) \ + do { /* Nothing. */ } while (/*CONSTCOND*/ 0) +#else +#define ROFF_DEBUG(fmt, args...) \ + do { fprintf(stderr, fmt , ##args); } while (/*CONSTCOND*/ 0) +#endif +#else +void do_nothing(char *fmt, ...) +{ +} +#if 1 +#define ROFF_DEBUG do_nothing +#else +#define ROFF_DEBUG fprintf +#endif +#endif + +enum rofft { + ROFF_am, + ROFF_ami, + ROFF_am1, + ROFF_de, + ROFF_dei, + ROFF_de1, + ROFF_ds, + ROFF_el, + ROFF_ie, + ROFF_if, + ROFF_ig, + ROFF_rm, + ROFF_tr, + ROFF_cblock, + ROFF_ccond, + ROFF_MAX +}; + +enum roffrule { + ROFFRULE_ALLOW, + ROFFRULE_DENY +}; + +struct roff { + struct roffnode *last; /* leaf of stack */ + mandocmsg msg; /* err/warn/fatal messages */ + void *data; /* privdata for messages */ + enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ + int rstackpos; /* position in rstack */ +}; + +struct roffnode { + enum rofft tok; /* type of node */ + struct roffnode *parent; /* up one in stack */ + int line; /* parse line */ + int col; /* parse col */ + char *end; /* end-rules: custom token */ + int endspan; /* end-rules: next-line or infty */ + enum roffrule rule; /* current evaluation rule */ +}; + +#define ROFF_ARGS struct roff *r, /* parse ctx */ \ + enum rofft tok, /* tok of macro */ \ + char **bufp, /* input buffer */ \ + size_t *szp, /* size of input buffer */ \ + int ln, /* parse line */ \ + int ppos, /* original pos in buffer */ \ + int pos, /* current pos in buffer */ \ + int *offs /* reset offset of buffer data */ + +typedef enum rofferr (*roffproc)(ROFF_ARGS); + +struct roffmac { + const char *name; /* macro name */ + roffproc proc; /* process new macro */ + roffproc text; /* process as child text of macro */ + roffproc sub; /* process as child of macro */ + int flags; +#define ROFFMAC_STRUCT (1 << 0) /* always interpret */ + struct roffmac *next; +}; + +static enum rofferr roff_block(ROFF_ARGS); +static enum rofferr roff_block_text(ROFF_ARGS); +static enum rofferr roff_block_sub(ROFF_ARGS); +static enum rofferr roff_cblock(ROFF_ARGS); +static enum rofferr roff_ccond(ROFF_ARGS); +static enum rofferr roff_cond(ROFF_ARGS); +static enum rofferr roff_cond_text(ROFF_ARGS); +static enum rofferr roff_cond_sub(ROFF_ARGS); +static enum roffrule roff_evalcond(const char *, int *); +static enum rofferr roff_line(ROFF_ARGS); + +/* See roff_hash_find() */ + +#define ASCII_HI 126 +#define ASCII_LO 33 +#define HASHWIDTH (ASCII_HI - ASCII_LO + 1) + +static struct roffmac *hash[HASHWIDTH]; + +static struct roffmac roffs[ROFF_MAX] = { + { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "ds", roff_line, NULL, NULL, 0, NULL }, + { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "rm", roff_line, NULL, NULL, 0, NULL }, + { "tr", roff_line, NULL, NULL, 0, NULL }, + { ".", roff_cblock, NULL, NULL, 0, NULL }, + { "\\}", roff_ccond, NULL, NULL, 0, NULL }, +}; + +static void roff_free1(struct roff *); +static enum rofft roff_hash_find(const char *); +static void roff_hash_init(void); +static void roffnode_cleanscope(struct roff *); +static int roffnode_push(struct roff *, + enum rofft, int, int); +static void roffnode_pop(struct roff *); +static enum rofft roff_parse(const char *, int *); + +/* See roff_hash_find() */ +#define ROFF_HASH(p) (p[0] - ASCII_LO) + +static void +roff_hash_init(void) +{ + struct roffmac *n; + int buc, i; + + for (i = 0; i < (int)ROFF_MAX; i++) { + assert(roffs[i].name[0] >= ASCII_LO); + assert(roffs[i].name[0] <= ASCII_HI); + + buc = ROFF_HASH(roffs[i].name); + + if (NULL != (n = hash[buc])) { + for ( ; n->next; n = n->next) + /* Do nothing. */ ; + n->next = &roffs[i]; + } else + hash[buc] = &roffs[i]; + } +} + + +/* + * Look up a roff token by its name. Returns ROFF_MAX if no macro by + * the nil-terminated string name could be found. + */ +static enum rofft +roff_hash_find(const char *p) +{ + int buc; + struct roffmac *n; + + /* + * libroff has an extremely simple hashtable, for the time + * being, which simply keys on the first character, which must + * be printable, then walks a chain. It works well enough until + * optimised. + */ + + if (p[0] < ASCII_LO || p[0] > ASCII_HI) + return(ROFF_MAX); + + buc = ROFF_HASH(p); + + if (NULL == (n = hash[buc])) + return(ROFF_MAX); + for ( ; n; n = n->next) + if (0 == strcmp(n->name, p)) + return((enum rofft)(n - roffs)); + + return(ROFF_MAX); +} + + +/* + * Pop the current node off of the stack of roff instructions currently + * pending. + */ +static void +roffnode_pop(struct roff *r) +{ + struct roffnode *p; + + assert(r->last); + p = r->last; + + if (ROFF_el == p->tok) + if (r->rstackpos > -1) + r->rstackpos--; + + r->last = r->last->parent; + if (p->end) + free(p->end); + free(p); +} + + +/* + * Push a roff node onto the instruction stack. This must later be + * removed with roffnode_pop(). + */ +static int +roffnode_push(struct roff *r, enum rofft tok, int line, int col) +{ + struct roffnode *p; + + if (NULL == (p = calloc(1, sizeof(struct roffnode)))) { + (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL); + return(0); + } + + p->tok = tok; + p->parent = r->last; + p->line = line; + p->col = col; + p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; + + r->last = p; + return(1); +} + + +static void +roff_free1(struct roff *r) +{ + + while (r->last) + roffnode_pop(r); +} + + +void +roff_reset(struct roff *r) +{ + + roff_free1(r); +} + + +void +roff_free(struct roff *r) +{ + + roff_free1(r); + free(r); +} + + +struct roff * +roff_alloc(const mandocmsg msg, void *data) +{ + struct roff *r; + + if (NULL == (r = calloc(1, sizeof(struct roff)))) { + (*msg)(MANDOCERR_MEM, data, 0, 0, NULL); + return(0); + } + + r->msg = msg; + r->data = data; + r->rstackpos = -1; + + roff_hash_init(); + return(r); +} + + +enum rofferr +roff_parseln(struct roff *r, int ln, + char **bufp, size_t *szp, int pos, int *offs) +{ + enum rofft t; + int ppos; + + /* + * First, if a scope is open and we're not a macro, pass the + * text through the macro's filter. If a scope isn't open and + * we're not a macro, just let it through. + */ + + if (r->last && ! ROFF_CTL((*bufp)[pos])) { + t = r->last->tok; + assert(roffs[t].text); + + ROFF_DEBUG("roff: intercept scoped text: %s, [%s]\n", + roffs[t].name, &(*bufp)[pos]); + return((*roffs[t].text) + (r, t, bufp, szp, ln, pos, pos, offs)); + } else if ( ! ROFF_CTL((*bufp)[pos])) { + ROFF_DEBUG("roff: pass non-scoped text: [%s]\n", + &(*bufp)[pos]); + return(ROFF_CONT); + } + + /* + * If a scope is open, go to the child handler for that macro, + * as it may want to preprocess before doing anything with it. + */ + + if (r->last) { + t = r->last->tok; + assert(roffs[t].sub); + ROFF_DEBUG("roff: intercept scoped context: %s\n", + roffs[t].name); + return((*roffs[t].sub) + (r, t, bufp, szp, ln, pos, pos, offs)); + } + + /* + * Lastly, as we've no scope open, try to look up and execute + * the new macro. If no macro is found, simply return and let + * the compilers handle it. + */ + + ppos = pos; + if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) { + ROFF_DEBUG("roff: pass non-scoped non-macro: [%s]\n", + &(*bufp)[pos]); + return(ROFF_CONT); + } + + ROFF_DEBUG("roff: intercept new-scope: %s, [%s]\n", + roffs[t].name, &(*bufp)[pos]); + assert(roffs[t].proc); + return((*roffs[t].proc) + (r, t, bufp, szp, ln, ppos, pos, offs)); +} + + +int +roff_endparse(struct roff *r) +{ + + if (NULL == r->last) + return(1); + return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line, + r->last->col, NULL)); +} + + +/* + * Parse a roff node's type from the input buffer. This must be in the + * form of ".foo xxx" in the usual way. + */ +static enum rofft +roff_parse(const char *buf, int *pos) +{ + int j; + char mac[5]; + enum rofft t; + + assert(ROFF_CTL(buf[*pos])); + (*pos)++; + + while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos])) + (*pos)++; + + if ('\0' == buf[*pos]) + return(ROFF_MAX); + + for (j = 0; j < 4; j++, (*pos)++) + if ('\0' == (mac[j] = buf[*pos])) + break; + else if (' ' == buf[*pos] || (j && '\\' == buf[*pos])) + break; + + if (j == 4 || j < 1) + return(ROFF_MAX); + + mac[j] = '\0'; + + if (ROFF_MAX == (t = roff_hash_find(mac))) + return(t); + + while (buf[*pos] && ' ' == buf[*pos]) + (*pos)++; + + return(t); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cblock(ROFF_ARGS) +{ + + /* + * A block-close `..' should only be invoked as a child of an + * ignore macro, otherwise raise a warning and just ignore it. + */ + + if (NULL == r->last) { + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } + + switch (r->last->tok) { + case (ROFF_am): + /* FALLTHROUGH */ + case (ROFF_ami): + /* FALLTHROUGH */ + case (ROFF_am1): + /* FALLTHROUGH */ + case (ROFF_de): + /* FALLTHROUGH */ + case (ROFF_dei): + /* FALLTHROUGH */ + case (ROFF_de1): + /* FALLTHROUGH */ + case (ROFF_ig): + break; + default: + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } + + if ((*bufp)[pos]) + if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL)) + return(ROFF_ERR); + + roffnode_pop(r); + roffnode_cleanscope(r); + return(ROFF_IGN); + +} + + +static void +roffnode_cleanscope(struct roff *r) +{ + + while (r->last) { + if (--r->last->endspan < 0) + break; + roffnode_pop(r); + } +} + + +/* ARGSUSED */ +static enum rofferr +roff_ccond(ROFF_ARGS) +{ + + if (NULL == r->last) { + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } + + switch (r->last->tok) { + case (ROFF_el): + /* FALLTHROUGH */ + case (ROFF_ie): + /* FALLTHROUGH */ + case (ROFF_if): + break; + default: + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } + + if (r->last->endspan > -1) { + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } + + if ((*bufp)[pos]) + if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL)) + return(ROFF_ERR); + + roffnode_pop(r); + roffnode_cleanscope(r); + return(ROFF_IGN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_block(ROFF_ARGS) +{ + int sv; + size_t sz; + + if (ROFF_ig != tok && '\0' == (*bufp)[pos]) { + if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } else if (ROFF_ig != tok) { + while ((*bufp)[pos] && ' ' != (*bufp)[pos]) + pos++; + while (' ' == (*bufp)[pos]) + pos++; + } + + if ( ! roffnode_push(r, tok, ln, ppos)) + return(ROFF_ERR); + + if ('\0' == (*bufp)[pos]) + return(ROFF_IGN); + + sv = pos; + while ((*bufp)[pos] && ' ' != (*bufp)[pos] && + '\t' != (*bufp)[pos]) + pos++; + + /* + * Note: groff does NOT like escape characters in the input. + * Instead of detecting this, we're just going to let it fly and + * to hell with it. + */ + + assert(pos > sv); + sz = (size_t)(pos - sv); + + if (1 == sz && '.' == (*bufp)[sv]) + return(ROFF_IGN); + + r->last->end = malloc(sz + 1); + + if (NULL == r->last->end) { + (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL); + return(ROFF_ERR); + } + + memcpy(r->last->end, *bufp + sv, sz); + r->last->end[(int)sz] = '\0'; + + if ((*bufp)[pos]) + if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL)) + return(ROFF_ERR); + + return(ROFF_IGN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_block_sub(ROFF_ARGS) +{ + enum rofft t; + int i, j; + + /* + * First check whether a custom macro exists at this level. If + * it does, then check against it. This is some of groff's + * stranger behaviours. If we encountered a custom end-scope + * tag and that tag also happens to be a "real" macro, then we + * need to try interpreting it again as a real macro. If it's + * not, then return ignore. Else continue. + */ + + if (r->last->end) { + i = pos + 1; + while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) + i++; + + for (j = 0; r->last->end[j]; j++, i++) + if ((*bufp)[i] != r->last->end[j]) + break; + + if ('\0' == r->last->end[j] && + ('\0' == (*bufp)[i] || + ' ' == (*bufp)[i] || + '\t' == (*bufp)[i])) { + roffnode_pop(r); + roffnode_cleanscope(r); + + if (ROFF_MAX != roff_parse(*bufp, &pos)) + return(ROFF_RERUN); + return(ROFF_IGN); + } + } + + /* + * If we have no custom end-query or lookup failed, then try + * pulling it out of the hashtable. + */ + + ppos = pos; + t = roff_parse(*bufp, &pos); + + /* If we're not a comment-end, then throw it away. */ + if (ROFF_cblock != t) + return(ROFF_IGN); + + assert(roffs[t].proc); + return((*roffs[t].proc)(r, t, bufp, + szp, ln, ppos, pos, offs)); +} + + +/* ARGSUSED */ +static enum rofferr +roff_block_text(ROFF_ARGS) +{ + + return(ROFF_IGN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cond_sub(ROFF_ARGS) +{ + enum rofft t; + enum roffrule rr; + struct roffnode *l; + + ppos = pos; + rr = r->last->rule; + + /* + * Clean out scope. If we've closed ourselves, then don't + * continue. + */ + + l = r->last; + roffnode_cleanscope(r); + + if (l != r->last) + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + + if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + + /* + * A denied conditional must evaluate its children if and only + * if they're either structurally required (such as loops and + * conditionals) or a closing macro. + */ + if (ROFFRULE_DENY == rr) + if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) + if (ROFF_ccond != t) + return(ROFF_IGN); + + assert(roffs[t].proc); + return((*roffs[t].proc) + (r, t, bufp, szp, ln, ppos, pos, offs)); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cond_text(ROFF_ARGS) +{ + char *ep, *st; + enum roffrule rr; + + rr = r->last->rule; + + /* + * We display the value of the text if out current evaluation + * scope permits us to do so. + */ + + st = &(*bufp)[pos]; + if (NULL == (ep = strstr(st, "\\}"))) { + roffnode_cleanscope(r); + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + } + + if (ep == st || (ep > st && '\\' != *(ep - 1))) + roffnode_pop(r); + + roffnode_cleanscope(r); + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); +} + + +static enum roffrule +roff_evalcond(const char *v, int *pos) +{ + + switch (v[*pos]) { + case ('n'): + (*pos)++; + return(ROFFRULE_ALLOW); + case ('e'): + /* FALLTHROUGH */ + case ('o'): + /* FALLTHROUGH */ + case ('t'): + (*pos)++; + return(ROFFRULE_DENY); + default: + break; + } + + while (v[*pos] && ' ' != v[*pos]) + (*pos)++; + return(ROFFRULE_DENY); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cond(ROFF_ARGS) +{ + int sv; + enum roffrule rule; + + /* Stack overflow! */ + + if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) { + (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL); + return(ROFF_ERR); + } + + /* First, evaluate the conditional. */ + + if (ROFF_el == tok) { + /* + * An `.el' will get the value of the current rstack + * entry set in prior `ie' calls or defaults to DENY. + */ + if (r->rstackpos < 0) + rule = ROFFRULE_DENY; + else + rule = r->rstack[r->rstackpos]; + } else + rule = roff_evalcond(*bufp, &pos); + + sv = pos; + + while (' ' == (*bufp)[pos]) + pos++; + + /* + * Roff is weird. If we have just white-space after the + * conditional, it's considered the BODY and we exit without + * really doing anything. Warn about this. It's probably + * wrong. + */ + + if ('\0' == (*bufp)[pos] && sv != pos) { + if ((*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL)) + return(ROFF_IGN); + return(ROFF_ERR); + } + + if ( ! roffnode_push(r, tok, ln, ppos)) + return(ROFF_ERR); + + r->last->rule = rule; + + ROFF_DEBUG("roff: cond: %s -> %s\n", roffs[tok].name, + ROFFRULE_ALLOW == rule ? "allow" : "deny"); + + if (ROFF_ie == tok) { + /* + * An if-else will put the NEGATION of the current + * evaluated conditional into the stack. + */ + r->rstackpos++; + if (ROFFRULE_DENY == r->last->rule) + r->rstack[r->rstackpos] = ROFFRULE_ALLOW; + else + r->rstack[r->rstackpos] = ROFFRULE_DENY; + } + + /* If the parent has false as its rule, then so do we. */ + + if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) { + r->last->rule = ROFFRULE_DENY; + ROFF_DEBUG("roff: cond override: %s -> deny\n", + roffs[tok].name); + } + + /* + * Determine scope. If we're invoked with "\{" trailing the + * conditional, then we're in a multiline scope. Else our scope + * expires on the next line. + */ + + r->last->endspan = 1; + + if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { + r->last->endspan = -1; + pos += 2; + ROFF_DEBUG("roff: cond-scope: %s, multi-line\n", + roffs[tok].name); + } else + ROFF_DEBUG("roff: cond-scope: %s, one-line\n", + roffs[tok].name); + + /* + * If there are no arguments on the line, the next-line scope is + * assumed. + */ + + if ('\0' == (*bufp)[pos]) + return(ROFF_IGN); + + /* Otherwise re-run the roff parser after recalculating. */ + + *offs = pos; + return(ROFF_RERUN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_line(ROFF_ARGS) +{ + + return(ROFF_IGN); +} diff --git a/commands/mdocml/roff.h b/commands/mdocml/roff.h new file mode 100644 index 000000000..84859ec13 --- /dev/null +++ b/commands/mdocml/roff.h @@ -0,0 +1,40 @@ +/* $Id: roff.h,v 1.15 2010/05/17 00:06:36 kristaps Exp $ */ +/* + * Copyright (c) 2010 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef ROFF_H +#define ROFF_H + +enum rofferr { + ROFF_CONT, /* continue processing line */ + ROFF_RERUN, /* re-run roff interpreter with offset */ + ROFF_IGN, /* ignore current line */ + ROFF_ERR /* badness: puke and stop */ +}; + +__BEGIN_DECLS + +struct roff; + +void roff_free(struct roff *); +struct roff *roff_alloc(mandocmsg, void *); +void roff_reset(struct roff *); +enum rofferr roff_parseln(struct roff *, int, + char **, size_t *, int, int *); +int roff_endparse(struct roff *); + +__END_DECLS + +#endif /*!ROFF_H*/ diff --git a/commands/mdocml/st.c b/commands/mdocml/st.c new file mode 100644 index 000000000..5c6798e58 --- /dev/null +++ b/commands/mdocml/st.c @@ -0,0 +1,38 @@ +/* $Id: st.c,v 1.8 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2st(const char *p) +{ + +#include "st.in" + + return(NULL); +} diff --git a/commands/mdocml/st.in b/commands/mdocml/st.in new file mode 100644 index 000000000..113ccbacb --- /dev/null +++ b/commands/mdocml/st.in @@ -0,0 +1,72 @@ +/* $Id: st.in,v 1.13 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines the .St macro arguments. If you add a new + * standard, make sure that the left-and side corresponds to the .St + * argument (like .St -p1003.1) and the right-hand side corresponds to + * the formatted output string. + * + * Be sure to escape strings. + */ + +LINE("-p1003.1-88", "IEEE Std 1003.1-1988 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-90", "IEEE Std 1003.1-1990 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2001", "IEEE Std 1003.1-2001 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2004", "IEEE Std 1003.1-2004 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2008", "IEEE Std 1003.1-2008 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1", "IEEE Std 1003.1 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1b", "IEEE Std 1003.1b (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1b-93", "IEEE Std 1003.1b-1993 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1c-95", "IEEE Std 1003.1c-1995 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1g-2000", "IEEE Std 1003.1g-2000 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1i-95", "IEEE Std 1003.1i-1995 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.2-92", "IEEE Std 1003.2-1992 (\\(lqPOSIX.2\\(rq)") +LINE("-p1003.2a-92", "IEEE Std 1003.2a-1992 (\\(lqPOSIX.2\\(rq)") +LINE("-p1387.2-95", "IEEE Std 1387.2-1995 (\\(lqPOSIX.7.2\\(rq)") +LINE("-p1003.2", "IEEE Std 1003.2 (\\(lqPOSIX.2\\(rq)") +LINE("-p1387.2", "IEEE Std 1387.2 (\\(lqPOSIX.7.2\\(rq)") +LINE("-isoC", "ISO/IEC 9899:1990 (\\(lqISO C90\\(rq)") +LINE("-isoC-90", "ISO/IEC 9899:1990 (\\(lqISO C90\\(rq)") +LINE("-isoC-amd1", "ISO/IEC 9899/AMD1:1995 (\\(lqISO C90\\(rq)") +LINE("-isoC-tcor1", "ISO/IEC 9899/TCOR1:1994 (\\(lqISO C90\\(rq)") +LINE("-isoC-tcor2", "ISO/IEC 9899/TCOR2:1995 (\\(lqISO C90\\(rq)") +LINE("-isoC-99", "ISO/IEC 9899:1999 (\\(lqISO C99\\(rq)") +LINE("-iso9945-1-90", "ISO/IEC 9945-1:1990 (\\(lqPOSIX.1\\(rq)") +LINE("-iso9945-1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)") +LINE("-iso9945-2-93", "ISO/IEC 9945-2:1993 (\\(lqPOSIX.2\\(rq)") +LINE("-ansiC", "ANSI X3.159-1989 (\\(lqANSI C\\(rq)") +LINE("-ansiC-89", "ANSI X3.159-1989 (\\(lqANSI C\\(rq)") +LINE("-ansiC-99", "ANSI/ISO/IEC 9899-1999 (\\(lqANSI C99\\(rq)") +LINE("-ieee754", "IEEE Std 754-1985") +LINE("-iso8802-3", "ISO 8802-3: 1989") +LINE("-ieee1275-94", "IEEE Std 1275-1994 (\\(lqOpen Firmware\\(rq)") +LINE("-xpg3", "X/Open Portability Guide Issue 3 (\\(lqXPG3\\(rq)") +LINE("-xpg4", "X/Open Portability Guide Issue 4 (\\(lqXPG4\\(rq)") +LINE("-xpg4.2", "X/Open Portability Guide Issue 4.2 (\\(lqXPG4.2\\(rq)") +LINE("-xpg4.3", "X/Open Portability Guide Issue 4.3 (\\(lqXPG4.3\\(rq)") +LINE("-xbd5", "X/Open System Interface Definitions Issue 5 (\\(lqXBD5\\(rq)") +LINE("-xcu5", "X/Open Commands and Utilities Issue 5 (\\(lqXCU5\\(rq)") +LINE("-xsh5", "X/Open System Interfaces and Headers Issue 5 (\\(lqXSH5\\(rq)") +LINE("-xns5", "X/Open Networking Services Issue 5 (\\(lqXNS5\\(rq)") +LINE("-xns5.2", "X/Open Networking Services Issue 5.2 (\\(lqXNS5.2\\(rq)") +LINE("-xns5.2d2.0", "X/Open Networking Services Issue 5.2 Draft 2.0 (\\(lqXNS5.2D2.0\\(rq)") +LINE("-xcurses4.2", "X/Open Curses Issue 4 Version 2 (\\(lqXCURSES4.2\\(rq)") +LINE("-susv2", "Version 2 of the Single UNIX Specification") +LINE("-susv3", "Version 3 of the Single UNIX Specification") +LINE("-svid4", "System V Interface Definition, Fourth Edition (\\(lqSVID4\\(rq)") diff --git a/commands/mdocml/style.css b/commands/mdocml/style.css new file mode 100644 index 000000000..090c7331c --- /dev/null +++ b/commands/mdocml/style.css @@ -0,0 +1,77 @@ +div.body { color: #333333; + max-width: 800px; + padding-left: 10px; + font-size: smaller; + font-family: Verdana, Tahoma, Arial, sans-serif; } + +div.sec-head { color: #000000; + font-weight: bold; } +div.sec-body { } +div.sec-block { padding-bottom: 1em; } +div.ssec-head { color: #000000; + font-weight: bold; } +div.ssec-body { } +div.ssec-block { } + +span.addr { } /* Address (Ad). */ +span.arg { font-style: italic; } /* Command argument (Ar). */ +span.author { } /* Author name (An). */ +span.cmd { font-weight: bold; } /* Command (Cm). */ +span.config { font-weight: bold; } /* Config statement (Cd). */ +span.define { } /* Defines (Dv). */ +span.desc { } /* Nd. After em-dash. */ +span.diag { font-weight: bold; } /* Diagnostic (Bl -diag). */ +span.emph { font-style: italic; } /* Emphasis (Em). */ +span.env { } /* Environment variables (Ev). */ +span.errno { } /* Error string (Er). */ +span.farg { font-style: italic; } /* Function argument (Fa, Fn). */ +span.file { font-style: italic; } /* File (Pa). */ +span.flag { font-weight: bold; } /* Flag (Fl, Cm). */ +span.fname { font-weight: bold; } /* Function name (Fa, Fn, Rv). */ +span.ftype { font-style: italic; } /* Function types (Ft, Fn). */ +span.includes { font-weight: bold; } /* Header includes (In). */ +span.lib { } /* Library (Lb). */ +span.lit { font-family: monospace; } /* Literals (Bf -literal). */ +span.macro { font-weight: bold; } /* Macro-ish thing (Fd). */ +span.name { color: #003333; font-weight: bold; } /* Name of utility (Nm). */ +span.opt { } /* Options (Op, Oo/Oc). */ +span.ref { } /* Citations (Rs). */ +span.ref-auth { } /* Reference author (%A). */ +span.ref-book { font-style: italic; } /* Reference book (%B). */ +span.ref-city { } /* Reference city (%C). */ +span.ref-date { } /* Reference date (%D). */ +span.ref-issue { font-style: italic; } /* Reference issuer/publisher (%I). */ +span.ref-jrnl { font-style: italic; } /* Reference journal (%J). */ +span.ref-num { } /* Reference number (%N). */ +span.ref-opt { } /* Reference optionals (%O). */ +span.ref-page { } /* Reference page (%P). */ +span.ref-corp { } /* Reference corporate/foreign author (%Q). */ +span.ref-rep { } /* Reference report (%R). */ +span.ref-title { } /* Reference title (%T). */ +span.ref-vol { } /* Reference volume (%V). */ +span.symb { font-weight: bold; } /* Symbols. */ +span.type { font-style: italic; } /* Variable types (Vt). */ +span.unix { } /* Unices (Ux, Ox, Nx, Fx, Bx, Bsx, Dx). */ +span.utility { font-weight: bold; } /* Name of utility (Ex). */ +span.var { font-weight: bold; } /* Variables (Rv). */ + +a.link-ext { background: transparent url(external.png) center right no-repeat; padding-right: 12px; }/* Off-site link (Lk). */ +a.link-includes { } /* Include-file link (In). */ +a.link-mail { background: transparent url(external.png) center right no-repeat; padding-right: 12px; }/* Mailto links (Mt). */ +a.link-man { } /* Manual links (Xr). */ +a.link-sec { text-decoration: none; border-bottom: 1px dotted #339999; } /* Section links (Sx). */ + +div.emph { font-style: italic; } /* Emphasis (Bl -emphasis). */ +div.lit { margin: 3px; + padding: 3px; + background-color: #EEEEEE; + border: 1px solid #339999; + color: #000000; + font-family: monospace; } +div.symb { font-weight: bold; } /* Symbols (Bl -symbolic). */ + +table.header { border-bottom: 1px dotted #dddddd; + color: #999999; } +table.footer { border-top: 1px dotted #dddddd; + color: #999999; } + diff --git a/commands/mdocml/term.c b/commands/mdocml/term.c new file mode 100644 index 000000000..1801ef936 --- /dev/null +++ b/commands/mdocml/term.c @@ -0,0 +1,702 @@ +/* $Id: term.c,v 1.148 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "mandoc.h" +#include "chars.h" +#include "out.h" +#include "term.h" +#include "man.h" +#include "mdoc.h" +#include "main.h" + +static void spec(struct termp *, const char *, size_t); +static void res(struct termp *, const char *, size_t); +static void buffera(struct termp *, const char *, size_t); +static void bufferc(struct termp *, char); +static void adjbuf(struct termp *p, size_t); +static void encode(struct termp *, const char *, size_t); + + +void +term_free(struct termp *p) +{ + + if (p->buf) + free(p->buf); + if (p->symtab) + chars_free(p->symtab); + + free(p); +} + + +void +term_begin(struct termp *p, term_margin head, + term_margin foot, const void *arg) +{ + + p->headf = head; + p->footf = foot; + p->argf = arg; + (*p->begin)(p); +} + + +void +term_end(struct termp *p) +{ + + (*p->end)(p); +} + + +struct termp * +term_alloc(enum termenc enc) +{ + struct termp *p; + + p = calloc(1, sizeof(struct termp)); + if (NULL == p) { + perror(NULL); + exit(EXIT_FAILURE); + } + + p->tabwidth = 5; + p->enc = enc; + p->defrmargin = 78; + return(p); +} + + +/* + * Flush a line of text. A "line" is loosely defined as being something + * that should be followed by a newline, regardless of whether it's + * broken apart by newlines getting there. A line can also be a + * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does + * not have a trailing newline. + * + * The following flags may be specified: + * + * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the + * offset value. This is useful when doing columnar lists where the + * prior column has right-padded. + * + * - TERMP_NOBREAK: this is the most important and is used when making + * columns. In short: don't print a newline and instead pad to the + * right margin. Used in conjunction with TERMP_NOLPAD. + * + * - TERMP_TWOSPACE: when padding, make sure there are at least two + * space characters of padding. Otherwise, rather break the line. + * + * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and + * the line is overrun, and don't pad-right if it's underrun. + * + * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when + * overruning, instead save the position and continue at that point + * when the next invocation. + * + * In-line line breaking: + * + * If TERMP_NOBREAK is specified and the line overruns the right + * margin, it will break and pad-right to the right margin after + * writing. If maxrmargin is violated, it will break and continue + * writing from the right-margin, which will lead to the above scenario + * upon exit. Otherwise, the line will break at the right margin. + */ +void +term_flushln(struct termp *p) +{ + int i; /* current input position in p->buf */ + size_t vis; /* current visual position on output */ + size_t vbl; /* number of blanks to prepend to output */ + size_t vend; /* end of word visual position on output */ + size_t bp; /* visual right border position */ + int j; /* temporary loop index */ + int jhy; /* last hyphen before line overflow */ + size_t maxvis, mmax; + + /* + * First, establish the maximum columns of "visible" content. + * This is usually the difference between the right-margin and + * an indentation, but can be, for tagged lists or columns, a + * small set of values. + */ + + assert(p->offset < p->rmargin); + + maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ? + /* LINTED */ + 0 : p->rmargin - p->offset - p->overstep; + mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ? + /* LINTED */ + 0 : p->maxrmargin - p->offset - p->overstep; + + bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; + + /* + * Indent the first line of a paragraph. + */ + vbl = p->flags & TERMP_NOLPAD ? 0 : p->offset; + + /* + * FIXME: if bp is zero, we still output the first word before + * breaking the line. + */ + + vis = vend = i = 0; + while (i < (int)p->col) { + + /* + * Handle literal tab characters. + */ + for (j = i; j < (int)p->col; j++) { + if ('\t' != p->buf[j]) + break; + vend = (vis/p->tabwidth+1)*p->tabwidth; + vbl += vend - vis; + vis = vend; + } + + /* + * Count up visible word characters. Control sequences + * (starting with the CSI) aren't counted. A space + * generates a non-printing word, which is valid (the + * space is printed according to regular spacing rules). + */ + + /* LINTED */ + for (jhy = 0; j < (int)p->col; j++) { + if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) + break; + if (8 != p->buf[j]) { + if (vend > vis && vend < bp && + ASCII_HYPH == p->buf[j]) + jhy = j; + vend++; + } else + vend--; + } + + /* + * Find out whether we would exceed the right margin. + * If so, break to the next line. + */ + if (vend > bp && 0 == jhy && vis > 0) { + vend -= vis; + (*p->endline)(p); + if (TERMP_NOBREAK & p->flags) { + p->viscol = p->rmargin; + (*p->advance)(p, p->rmargin); + vend += p->rmargin - p->offset; + } else { + p->viscol = 0; + vbl = p->offset; + } + + /* Remove the p->overstep width. */ + + bp += (int)/* LINTED */ + p->overstep; + p->overstep = 0; + } + + /* + * Skip leading tabs, they were handled above. + */ + while (i < (int)p->col && '\t' == p->buf[i]) + i++; + + /* Write out the [remaining] word. */ + for ( ; i < (int)p->col; i++) { + if (vend > bp && jhy > 0 && i > jhy) + break; + if ('\t' == p->buf[i]) + break; + if (' ' == p->buf[i]) { + while (' ' == p->buf[i]) { + vbl++; + i++; + } + break; + } + if (ASCII_NBRSP == p->buf[i]) { + vbl++; + continue; + } + + /* + * Now we definitely know there will be + * printable characters to output, + * so write preceding white space now. + */ + if (vbl) { + (*p->advance)(p, vbl); + p->viscol += vbl; + vbl = 0; + } + + if (ASCII_HYPH == p->buf[i]) + (*p->letter)(p, '-'); + else + (*p->letter)(p, p->buf[i]); + + p->viscol += 1; + } + vend += vbl; + vis = vend; + } + + p->col = 0; + p->overstep = 0; + + if ( ! (TERMP_NOBREAK & p->flags)) { + p->viscol = 0; + (*p->endline)(p); + return; + } + + if (TERMP_HANG & p->flags) { + /* We need one blank after the tag. */ + p->overstep = /* LINTED */ + vis - maxvis + 1; + + /* + * Behave exactly the same way as groff: + * If we have overstepped the margin, temporarily move + * it to the right and flag the rest of the line to be + * shorter. + * If we landed right at the margin, be happy. + * If we are one step before the margin, temporarily + * move it one step LEFT and flag the rest of the line + * to be longer. + */ + if (p->overstep >= -1) { + assert((int)maxvis + p->overstep >= 0); + /* LINTED */ + maxvis += p->overstep; + } else + p->overstep = 0; + + } else if (TERMP_DANGLE & p->flags) + return; + + /* Right-pad. */ + if (maxvis > vis + /* LINTED */ + ((TERMP_TWOSPACE & p->flags) ? 1 : 0)) { + p->viscol += maxvis - vis; + (*p->advance)(p, maxvis - vis); + vis += (maxvis - vis); + } else { /* ...or newline break. */ + (*p->endline)(p); + p->viscol = p->rmargin; + (*p->advance)(p, p->rmargin); + } +} + + +/* + * A newline only breaks an existing line; it won't assert vertical + * space. All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_newln(struct termp *p) +{ + + p->flags |= TERMP_NOSPACE; + if (0 == p->col && 0 == p->viscol) { + p->flags &= ~TERMP_NOLPAD; + return; + } + term_flushln(p); + p->flags &= ~TERMP_NOLPAD; +} + + +/* + * Asserts a vertical space (a full, empty line-break between lines). + * Note that if used twice, this will cause two blank spaces and so on. + * All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_vspace(struct termp *p) +{ + + term_newln(p); + p->viscol = 0; + (*p->endline)(p); +} + + +static void +spec(struct termp *p, const char *word, size_t len) +{ + const char *rhs; + size_t sz; + + rhs = chars_a2ascii(p->symtab, word, len, &sz); + if (rhs) + encode(p, rhs, sz); +} + + +static void +res(struct termp *p, const char *word, size_t len) +{ + const char *rhs; + size_t sz; + + rhs = chars_a2res(p->symtab, word, len, &sz); + if (rhs) + encode(p, rhs, sz); +} + + +void +term_fontlast(struct termp *p) +{ + enum termfont f; + + f = p->fontl; + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + + +void +term_fontrepl(struct termp *p, enum termfont f) +{ + + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + + +void +term_fontpush(struct termp *p, enum termfont f) +{ + + assert(p->fonti + 1 < 10); + p->fontl = p->fontq[p->fonti]; + p->fontq[++p->fonti] = f; +} + + +const void * +term_fontq(struct termp *p) +{ + + return(&p->fontq[p->fonti]); +} + + +enum termfont +term_fonttop(struct termp *p) +{ + + return(p->fontq[p->fonti]); +} + + +void +term_fontpopq(struct termp *p, const void *key) +{ + + while (p->fonti >= 0 && key != &p->fontq[p->fonti]) + p->fonti--; + assert(p->fonti >= 0); +} + + +void +term_fontpop(struct termp *p) +{ + + assert(p->fonti); + p->fonti--; +} + + +/* + * Handle pwords, partial words, which may be either a single word or a + * phrase that cannot be broken down (such as a literal string). This + * handles word styling. + */ +void +term_word(struct termp *p, const char *word) +{ + const char *sv, *seq; + int sz; + size_t ssz; + enum roffdeco deco; + + sv = word; + + if (word[0] && '\0' == word[1]) + switch (word[0]) { + case('.'): + /* FALLTHROUGH */ + case(','): + /* FALLTHROUGH */ + case(';'): + /* FALLTHROUGH */ + case(':'): + /* FALLTHROUGH */ + case('?'): + /* FALLTHROUGH */ + case('!'): + /* FALLTHROUGH */ + case(')'): + /* FALLTHROUGH */ + case(']'): + if ( ! (TERMP_IGNDELIM & p->flags)) + p->flags |= TERMP_NOSPACE; + break; + default: + break; + } + + if ( ! (TERMP_NOSPACE & p->flags)) { + bufferc(p, ' '); + if (TERMP_SENTENCE & p->flags) + bufferc(p, ' '); + } + + if ( ! (p->flags & TERMP_NONOSPACE)) + p->flags &= ~TERMP_NOSPACE; + + p->flags &= ~TERMP_SENTENCE; + + /* FIXME: use strcspn. */ + + while (*word) { + if ('\\' != *word) { + encode(p, word, 1); + word++; + continue; + } + + seq = ++word; + sz = a2roffdeco(&deco, &seq, &ssz); + + switch (deco) { + case (DECO_RESERVED): + res(p, seq, ssz); + break; + case (DECO_SPECIAL): + spec(p, seq, ssz); + break; + case (DECO_BOLD): + term_fontrepl(p, TERMFONT_BOLD); + break; + case (DECO_ITALIC): + term_fontrepl(p, TERMFONT_UNDER); + break; + case (DECO_ROMAN): + term_fontrepl(p, TERMFONT_NONE); + break; + case (DECO_PREVIOUS): + term_fontlast(p); + break; + default: + break; + } + + word += sz; + if (DECO_NOSPACE == deco && '\0' == *word) + p->flags |= TERMP_NOSPACE; + } + + /* + * Note that we don't process the pipe: the parser sees it as + * punctuation, but we don't in terms of typography. + */ + if (sv[0] && 0 == sv[1]) + switch (sv[0]) { + case('('): + /* FALLTHROUGH */ + case('['): + p->flags |= TERMP_NOSPACE; + break; + default: + break; + } +} + + +static void +adjbuf(struct termp *p, size_t sz) +{ + + if (0 == p->maxcols) + p->maxcols = 1024; + while (sz >= p->maxcols) + p->maxcols <<= 2; + + p->buf = realloc(p->buf, p->maxcols); + if (NULL == p->buf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + + +static void +buffera(struct termp *p, const char *word, size_t sz) +{ + + if (p->col + sz >= p->maxcols) + adjbuf(p, p->col + sz); + + memcpy(&p->buf[(int)p->col], word, sz); + p->col += sz; +} + + +static void +bufferc(struct termp *p, char c) +{ + + if (p->col + 1 >= p->maxcols) + adjbuf(p, p->col + 1); + + p->buf[(int)p->col++] = c; +} + + +static void +encode(struct termp *p, const char *word, size_t sz) +{ + enum termfont f; + int i; + + /* + * Encode and buffer a string of characters. If the current + * font mode is unset, buffer directly, else encode then buffer + * character by character. + */ + + if (TERMFONT_NONE == (f = term_fonttop(p))) { + buffera(p, word, sz); + return; + } + + for (i = 0; i < (int)sz; i++) { + if ( ! isgraph((u_char)word[i])) { + bufferc(p, word[i]); + continue; + } + + if (TERMFONT_UNDER == f) + bufferc(p, '_'); + else + bufferc(p, word[i]); + + bufferc(p, 8); + bufferc(p, word[i]); + } +} + + +size_t +term_vspan(const struct roffsu *su) +{ + double r; + + switch (su->unit) { + case (SCALE_CM): + r = su->scale * 2; + break; + case (SCALE_IN): + r = su->scale * 6; + break; + case (SCALE_PC): + r = su->scale; + break; + case (SCALE_PT): + r = su->scale / 8; + break; + case (SCALE_MM): + r = su->scale / 1000; + break; + case (SCALE_VS): + r = su->scale; + break; + default: + r = su->scale - 1; + break; + } + + if (r < 0.0) + r = 0.0; + return(/* LINTED */(size_t) + r); +} + + +size_t +term_hspan(const struct roffsu *su) +{ + double r; + + /* XXX: CM, IN, and PT are approximations. */ + + switch (su->unit) { + case (SCALE_CM): + r = 4 * su->scale; + break; + case (SCALE_IN): + /* XXX: this is an approximation. */ + r = 10 * su->scale; + break; + case (SCALE_PC): + r = (10 * su->scale) / 6; + break; + case (SCALE_PT): + r = (10 * su->scale) / 72; + break; + case (SCALE_MM): + r = su->scale / 1000; /* FIXME: double-check. */ + break; + case (SCALE_VS): + r = su->scale * 2 - 1; /* FIXME: double-check. */ + break; + default: + r = su->scale; + break; + } + + if (r < 0.0) + r = 0.0; + return((size_t)/* LINTED */ + r); +} + + diff --git a/commands/mdocml/term.h b/commands/mdocml/term.h new file mode 100644 index 000000000..12928da61 --- /dev/null +++ b/commands/mdocml/term.h @@ -0,0 +1,123 @@ +/* $Id: term.h,v 1.64 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef TERM_H +#define TERM_H + +__BEGIN_DECLS + +struct termp; + +enum termenc { + TERMENC_ASCII +}; + +enum termtype { + TERMTYPE_CHAR, + TERMTYPE_PS +}; + +enum termfont { + TERMFONT_NONE = 0, + TERMFONT_BOLD, + TERMFONT_UNDER +}; + +#define TERM_MAXMARGIN 100000 /* FIXME */ + +typedef void (*term_margin)(struct termp *, const void *); + +struct termp_ps { + int psstate; /* state of ps output */ +#define PS_INLINE (1 << 0) /* we're in a word */ +#define PS_MARGINS (1 << 1) /* we're in the margins */ + size_t pscol; /* visible column */ + size_t psrow; /* visible row */ + char *psmarg; /* margin buf */ + size_t psmargsz; /* margin buf size */ + size_t psmargcur; /* current pos in margin buf */ + size_t pspage; /* current page */ + char last; /* character buffer */ + enum termfont lastf; /* last set font */ +}; + +struct termp { + enum termtype type; + size_t defrmargin; /* Right margin of the device.. */ + size_t rmargin; /* Current right margin. */ + size_t maxrmargin; /* Max right margin. */ + size_t maxcols; /* Max size of buf. */ + size_t offset; /* Margin offest. */ + size_t tabwidth; /* Distance of tab positions. */ + size_t col; /* Bytes in buf. */ + size_t viscol; /* Chars on current line. */ + int overstep; /* See termp_flushln(). */ + int flags; +#define TERMP_SENTENCE (1 << 1) /* Space before a sentence. */ +#define TERMP_NOSPACE (1 << 2) /* No space before words. */ +#define TERMP_NOLPAD (1 << 3) /* See term_flushln(). */ +#define TERMP_NOBREAK (1 << 4) /* See term_flushln(). */ +#define TERMP_IGNDELIM (1 << 6) /* Delims like regulars. */ +#define TERMP_NONOSPACE (1 << 7) /* No space (no autounset). */ +#define TERMP_DANGLE (1 << 8) /* See term_flushln(). */ +#define TERMP_HANG (1 << 9) /* See term_flushln(). */ +#define TERMP_TWOSPACE (1 << 10) /* See term_flushln(). */ +#define TERMP_NOSPLIT (1 << 11) /* See termp_an_pre/post(). */ +#define TERMP_SPLIT (1 << 12) /* See termp_an_pre/post(). */ +#define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */ + char *buf; /* Output buffer. */ + enum termenc enc; /* Type of encoding. */ + void *symtab; /* Encoded-symbol table. */ + enum termfont fontl; /* Last font set. */ + enum termfont fontq[10]; /* Symmetric fonts. */ + int fonti; /* Index of font stack. */ + term_margin headf; /* invoked to print head */ + term_margin footf; /* invoked to print foot */ + void (*letter)(struct termp *, char); + void (*begin)(struct termp *); + void (*end)(struct termp *); + void (*endline)(struct termp *); + void (*advance)(struct termp *, size_t); + const void *argf; /* arg for headf/footf */ + union { + struct termp_ps ps; + } engine; +}; + +struct termp *term_alloc(enum termenc); +void term_free(struct termp *); +void term_newln(struct termp *); +void term_vspace(struct termp *); +void term_word(struct termp *, const char *); +void term_flushln(struct termp *); +void term_begin(struct termp *, term_margin, + term_margin, const void *); +void term_end(struct termp *); + +size_t term_hspan(const struct roffsu *); +size_t term_vspan(const struct roffsu *); + +enum termfont term_fonttop(struct termp *); +const void *term_fontq(struct termp *); +void term_fontpush(struct termp *, enum termfont); +void term_fontpop(struct termp *); +void term_fontpopq(struct termp *, const void *); +void term_fontrepl(struct termp *, enum termfont); +void term_fontlast(struct termp *); + +__END_DECLS + +#endif /*!TERM_H*/ diff --git a/commands/mdocml/term_ascii.c b/commands/mdocml/term_ascii.c new file mode 100644 index 000000000..84d946486 --- /dev/null +++ b/commands/mdocml/term_ascii.c @@ -0,0 +1,128 @@ +/* $Id: term_ascii.c,v 1.4 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include + +#include "out.h" +#include "term.h" +#include "main.h" + +static void ascii_endline(struct termp *); +static void ascii_letter(struct termp *, char); +static void ascii_begin(struct termp *); +static void ascii_advance(struct termp *, size_t); +static void ascii_end(struct termp *); + + +void * +ascii_alloc(char *outopts) +{ + struct termp *p; + const char *toks[2]; + char *v; + + if (NULL == (p = term_alloc(TERMENC_ASCII))) + return(NULL); + + p->type = TERMTYPE_CHAR; + p->letter = ascii_letter; + p->begin = ascii_begin; + p->end = ascii_end; + p->endline = ascii_endline; + p->advance = ascii_advance; + + toks[0] = "width"; + toks[1] = NULL; + + while (outopts && *outopts) + switch (getsubopt(&outopts, UNCONST(toks), &v)) { + case (0): + p->defrmargin = (size_t)atoi(v); + break; + default: + break; + } + + /* Enforce a lower boundary. */ + if (p->defrmargin < 58) + p->defrmargin = 58; + + return(p); +} + + +void +ascii_free(void *arg) +{ + + term_free((struct termp *)arg); +} + + +/* ARGSUSED */ +static void +ascii_letter(struct termp *p, char c) +{ + + putchar(c); +} + + +static void +ascii_begin(struct termp *p) +{ + + (*p->headf)(p, p->argf); +} + + +static void +ascii_end(struct termp *p) +{ + + (*p->footf)(p, p->argf); +} + + +/* ARGSUSED */ +static void +ascii_endline(struct termp *p) +{ + + putchar('\n'); +} + + +/* ARGSUSED */ +static void +ascii_advance(struct termp *p, size_t len) +{ + size_t i; + + /* Just print whitespace on the terminal. */ + for (i = 0; i < len; i++) + putchar(' '); +} diff --git a/commands/mdocml/term_ps.c b/commands/mdocml/term_ps.c new file mode 100644 index 000000000..b6e0e2b5d --- /dev/null +++ b/commands/mdocml/term_ps.c @@ -0,0 +1,430 @@ +/* $Id: term_ps.c,v 1.10 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include + +#include "out.h" +#include "main.h" +#include "term.h" + +#define PS_CHAR_WIDTH 6 +#define PS_CHAR_HEIGHT 12 +#define PS_CHAR_TOPMARG (792 - 24) +#define PS_CHAR_TOP (PS_CHAR_TOPMARG - 36) +#define PS_CHAR_LEFT 36 +#define PS_CHAR_BOTMARG 24 +#define PS_CHAR_BOT (PS_CHAR_BOTMARG + 36) + +#define PS_BUFSLOP 128 +#define PS_GROWBUF(p, sz) \ + do if ((p)->engine.ps.psmargcur + (sz) > \ + (p)->engine.ps.psmargsz) { \ + (p)->engine.ps.psmargsz += /* CONSTCOND */ \ + MAX(PS_BUFSLOP, (sz)); \ + (p)->engine.ps.psmarg = realloc \ + ((p)->engine.ps.psmarg, \ + (p)->engine.ps.psmargsz); \ + if (NULL == (p)->engine.ps.psmarg) { \ + perror(NULL); \ + exit(EXIT_FAILURE); \ + } \ + } while (/* CONSTCOND */ 0) + + +#ifndef MAX +#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#endif +static void ps_letter(struct termp *, char); +static void ps_begin(struct termp *); +static void ps_end(struct termp *); +static void ps_advance(struct termp *, size_t); +static void ps_endline(struct termp *); +static void ps_fclose(struct termp *); +static void ps_pclose(struct termp *); +static void ps_pletter(struct termp *, char); +static void ps_printf(struct termp *, const char *, ...); +static void ps_putchar(struct termp *, char); +static void ps_setfont(struct termp *, enum termfont); + + +void * +ps_alloc(void) +{ + struct termp *p; + + if (NULL == (p = term_alloc(TERMENC_ASCII))) + return(NULL); + + p->type = TERMTYPE_PS; + p->letter = ps_letter; + p->begin = ps_begin; + p->end = ps_end; + p->advance = ps_advance; + p->endline = ps_endline; + return(p); +} + + +void +ps_free(void *arg) +{ + struct termp *p; + + p = (struct termp *)arg; + + if (p->engine.ps.psmarg) + free(p->engine.ps.psmarg); + + term_free(p); +} + + +static void +ps_printf(struct termp *p, const char *fmt, ...) +{ + va_list ap; + int pos; + + va_start(ap, fmt); + + /* + * If we're running in regular mode, then pipe directly into + * vprintf(). If we're processing margins, then push the data + * into our growable margin buffer. + */ + + if ( ! (PS_MARGINS & p->engine.ps.psstate)) { + vprintf(fmt, ap); + va_end(ap); + return; + } + + /* + * XXX: I assume that the in-margin print won't exceed + * PS_BUFSLOP (128 bytes), which is reasonable but still an + * assumption that will cause pukeage if it's not the case. + */ + + PS_GROWBUF(p, PS_BUFSLOP); + + pos = (int)p->engine.ps.psmargcur; + vsnprintf(&p->engine.ps.psmarg[pos], PS_BUFSLOP, fmt, ap); + p->engine.ps.psmargcur = strlen(p->engine.ps.psmarg); + + va_end(ap); +} + + +static void +ps_putchar(struct termp *p, char c) +{ + int pos; + + /* See ps_printf(). */ + + if ( ! (PS_MARGINS & p->engine.ps.psstate)) { + putchar(c); + return; + } + + PS_GROWBUF(p, 2); + + pos = (int)p->engine.ps.psmargcur++; + p->engine.ps.psmarg[pos++] = c; + p->engine.ps.psmarg[pos] = '\0'; +} + + +/* ARGSUSED */ +static void +ps_end(struct termp *p) +{ + + /* + * At the end of the file, do one last showpage. This is the + * same behaviour as groff(1) and works for multiple pages as + * well as just one. + */ + + assert(0 == p->engine.ps.psstate); + assert('\0' == p->engine.ps.last); + assert(p->engine.ps.psmarg && p->engine.ps.psmarg[0]); + printf("%s", p->engine.ps.psmarg); + printf("showpage\n"); + printf("%s\n", "%%EOF"); +} + + +static void +ps_begin(struct termp *p) +{ + + /* + * Print margins into margin buffer. Nothing gets output to the + * screen yet, so we don't need to initialise the primary state. + */ + + if (p->engine.ps.psmarg) { + assert(p->engine.ps.psmargsz); + p->engine.ps.psmarg[0] = '\0'; + } + + p->engine.ps.psmargcur = 0; + p->engine.ps.psstate = PS_MARGINS; + p->engine.ps.pscol = PS_CHAR_LEFT; + p->engine.ps.psrow = PS_CHAR_TOPMARG; + + ps_setfont(p, TERMFONT_NONE); + + (*p->headf)(p, p->argf); + (*p->endline)(p); + + p->engine.ps.pscol = PS_CHAR_LEFT; + p->engine.ps.psrow = PS_CHAR_BOTMARG; + + (*p->footf)(p, p->argf); + (*p->endline)(p); + + p->engine.ps.psstate &= ~PS_MARGINS; + + assert(0 == p->engine.ps.psstate); + assert(p->engine.ps.psmarg); + assert('\0' != p->engine.ps.psmarg[0]); + + /* + * Print header and initialise page state. Following this, + * stuff gets printed to the screen, so make sure we're sane. + */ + + printf("%s\n", "%!PS"); + ps_setfont(p, TERMFONT_NONE); + p->engine.ps.pscol = PS_CHAR_LEFT; + p->engine.ps.psrow = PS_CHAR_TOP; +} + + +static void +ps_pletter(struct termp *p, char c) +{ + + /* + * If we're not in a PostScript "word" context, then open one + * now at the current cursor. + */ + + if ( ! (PS_INLINE & p->engine.ps.psstate)) { + ps_printf(p, "%zu %zu moveto\n(", + p->engine.ps.pscol, + p->engine.ps.psrow); + p->engine.ps.psstate |= PS_INLINE; + } + + /* + * We need to escape these characters as per the PostScript + * specification. We would also escape non-graphable characters + * (like tabs), but none of them would get to this point and + * it's superfluous to abort() on them. + */ + + switch (c) { + case ('('): + /* FALLTHROUGH */ + case (')'): + /* FALLTHROUGH */ + case ('\\'): + ps_putchar(p, '\\'); + break; + default: + break; + } + + /* Write the character and adjust where we are on the page. */ + + ps_putchar(p, c); + p->engine.ps.pscol += PS_CHAR_WIDTH; +} + + +static void +ps_pclose(struct termp *p) +{ + + /* + * Spit out that we're exiting a word context (this is a + * "partial close" because we don't check the last-char buffer + * or anything). + */ + + if ( ! (PS_INLINE & p->engine.ps.psstate)) + return; + + ps_printf(p, ") show\n"); + p->engine.ps.psstate &= ~PS_INLINE; +} + + +static void +ps_fclose(struct termp *p) +{ + + /* + * Strong closure: if we have a last-char, spit it out after + * checking that we're in the right font mode. This will of + * course open a new scope, if applicable. + * + * Following this, close out any scope that's open. + */ + + if ('\0' != p->engine.ps.last) { + if (p->engine.ps.lastf != TERMFONT_NONE) { + ps_pclose(p); + ps_setfont(p, TERMFONT_NONE); + } + ps_pletter(p, p->engine.ps.last); + p->engine.ps.last = '\0'; + } + + if ( ! (PS_INLINE & p->engine.ps.psstate)) + return; + + ps_pclose(p); +} + + +static void +ps_letter(struct termp *p, char c) +{ + char cc; + + /* + * State machine dictates whether to buffer the last character + * or not. Basically, encoded words are detected by checking if + * we're an "8" and switching on the buffer. Then we put "8" in + * our buffer, and on the next charater, flush both character + * and buffer. Thus, "regular" words are detected by having a + * regular character and a regular buffer character. + */ + + if ('\0' == p->engine.ps.last) { + assert(8 != c); + p->engine.ps.last = c; + return; + } else if (8 == p->engine.ps.last) { + assert(8 != c); + p->engine.ps.last = '\0'; + } else if (8 == c) { + assert(8 != p->engine.ps.last); + if ('_' == p->engine.ps.last) { + if (p->engine.ps.lastf != TERMFONT_UNDER) { + ps_pclose(p); + ps_setfont(p, TERMFONT_UNDER); + } + } else if (p->engine.ps.lastf != TERMFONT_BOLD) { + ps_pclose(p); + ps_setfont(p, TERMFONT_BOLD); + } + p->engine.ps.last = c; + return; + } else { + if (p->engine.ps.lastf != TERMFONT_NONE) { + ps_pclose(p); + ps_setfont(p, TERMFONT_NONE); + } + cc = p->engine.ps.last; + p->engine.ps.last = c; + c = cc; + } + + ps_pletter(p, c); +} + + +static void +ps_advance(struct termp *p, size_t len) +{ + + /* + * Advance some spaces. This can probably be made smarter, + * i.e., to have multiple space-separated words in the same + * scope, but this is easier: just close out the current scope + * and readjust our column settings. + */ + + ps_fclose(p); + p->engine.ps.pscol += len ? len * PS_CHAR_WIDTH : 0; +} + + +static void +ps_endline(struct termp *p) +{ + + /* Close out any scopes we have open: we're at eoln. */ + + ps_fclose(p); + + /* + * If we're in the margin, don't try to recalculate our current + * row. XXX: if the column tries to be fancy with multiple + * lines, we'll do nasty stuff. + */ + + if (PS_MARGINS & p->engine.ps.psstate) + return; + + /* + * Put us down a line. If we're at the page bottom, spit out a + * showpage and restart our row. + */ + + p->engine.ps.pscol = PS_CHAR_LEFT; + if (p->engine.ps.psrow >= PS_CHAR_HEIGHT + PS_CHAR_BOT) { + p->engine.ps.psrow -= PS_CHAR_HEIGHT; + return; + } + + assert(p->engine.ps.psmarg && p->engine.ps.psmarg[0]); + printf("%s", p->engine.ps.psmarg); + printf("showpage\n"); + p->engine.ps.psrow = PS_CHAR_TOP; +} + + +static void +ps_setfont(struct termp *p, enum termfont f) +{ + + if (TERMFONT_BOLD == f) + ps_printf(p, "/Courier-Bold\n"); + else if (TERMFONT_UNDER == f) + ps_printf(p, "/Courier-Oblique\n"); + else + ps_printf(p, "/Courier\n"); + + ps_printf(p, "10 selectfont\n"); + p->engine.ps.lastf = f; +} + diff --git a/commands/mdocml/test-strlcat.c b/commands/mdocml/test-strlcat.c new file mode 100644 index 000000000..5d450dd04 --- /dev/null +++ b/commands/mdocml/test-strlcat.c @@ -0,0 +1,8 @@ +#include + +int +main(int argc, char **argv) +{ + strlcat(argv[0], argv[1], 10); + return 0; +} diff --git a/commands/mdocml/test-strlcpy.c b/commands/mdocml/test-strlcpy.c new file mode 100644 index 000000000..c7d182aaf --- /dev/null +++ b/commands/mdocml/test-strlcpy.c @@ -0,0 +1,8 @@ +#include + +int +main(int argc, char **argv) +{ + strlcpy(argv[0], argv[1], 10); + return 0; +} diff --git a/commands/mdocml/tree.c b/commands/mdocml/tree.c new file mode 100644 index 000000000..9125e6a5e --- /dev/null +++ b/commands/mdocml/tree.c @@ -0,0 +1,213 @@ +/* $Id: tree.c,v 1.21 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include "mandoc.h" +#include "mdoc.h" +#include "man.h" +#include "main.h" + +static void print_mdoc(const struct mdoc_node *, int); +static void print_man(const struct man_node *, int); + + +/* ARGSUSED */ +void +tree_mdoc(void *arg, const struct mdoc *mdoc) +{ + + print_mdoc(mdoc_node(mdoc), 0); +} + + +/* ARGSUSED */ +void +tree_man(void *arg, const struct man *man) +{ + + print_man(man_node(man), 0); +} + + +static void +print_mdoc(const struct mdoc_node *n, int indent) +{ + const char *p, *t; + int i, j; + size_t argc, sz; + char **params; + struct mdoc_argv *argv; + + argv = NULL; + argc = sz = 0; + params = NULL; + + switch (n->type) { + case (MDOC_ROOT): + t = "root"; + break; + case (MDOC_BLOCK): + t = "block"; + break; + case (MDOC_HEAD): + t = "block-head"; + break; + case (MDOC_BODY): + t = "block-body"; + break; + case (MDOC_TAIL): + t = "block-tail"; + break; + case (MDOC_ELEM): + t = "elem"; + break; + case (MDOC_TEXT): + t = "text"; + break; + default: + abort(); + /* NOTREACHED */ + } + + switch (n->type) { + case (MDOC_TEXT): + p = n->string; + break; + case (MDOC_BODY): + p = mdoc_macronames[n->tok]; + break; + case (MDOC_HEAD): + p = mdoc_macronames[n->tok]; + break; + case (MDOC_TAIL): + p = mdoc_macronames[n->tok]; + break; + case (MDOC_ELEM): + p = mdoc_macronames[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case (MDOC_BLOCK): + p = mdoc_macronames[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case (MDOC_ROOT): + p = "root"; + break; + default: + abort(); + /* NOTREACHED */ + } + + for (i = 0; i < indent; i++) + (void)printf(" "); + (void)printf("%s (%s)", p, t); + + for (i = 0; i < (int)argc; i++) { + (void)printf(" -%s", mdoc_argnames[argv[i].arg]); + if (argv[i].sz > 0) + (void)printf(" ["); + for (j = 0; j < (int)argv[i].sz; j++) + (void)printf(" [%s]", argv[i].value[j]); + if (argv[i].sz > 0) + (void)printf(" ]"); + } + + for (i = 0; i < (int)sz; i++) + (void)printf(" [%s]", params[i]); + + (void)printf(" %d:%d\n", n->line, n->pos); + + if (n->child) + print_mdoc(n->child, indent + 1); + if (n->next) + print_mdoc(n->next, indent); +} + + +static void +print_man(const struct man_node *n, int indent) +{ + const char *p, *t; + int i; + + switch (n->type) { + case (MAN_ROOT): + t = "root"; + break; + case (MAN_ELEM): + t = "elem"; + break; + case (MAN_TEXT): + t = "text"; + break; + case (MAN_BLOCK): + t = "block"; + break; + case (MAN_HEAD): + t = "block-head"; + break; + case (MAN_BODY): + t = "block-body"; + break; + default: + abort(); + /* NOTREACHED */ + } + + switch (n->type) { + case (MAN_TEXT): + p = n->string; + break; + case (MAN_ELEM): + /* FALLTHROUGH */ + case (MAN_BLOCK): + /* FALLTHROUGH */ + case (MAN_HEAD): + /* FALLTHROUGH */ + case (MAN_BODY): + p = man_macronames[n->tok]; + break; + case (MAN_ROOT): + p = "root"; + break; + default: + abort(); + /* NOTREACHED */ + } + + for (i = 0; i < indent; i++) + (void)printf(" "); + (void)printf("%s (%s) %d:%d\n", p, t, n->line, n->pos); + + if (n->child) + print_man(n->child, indent + 1); + if (n->next) + print_man(n->next, indent); +} diff --git a/commands/mdocml/vol.c b/commands/mdocml/vol.c new file mode 100644 index 000000000..144d363ff --- /dev/null +++ b/commands/mdocml/vol.c @@ -0,0 +1,38 @@ +/* $Id: vol.c,v 1.8 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2vol(const char *p) +{ + +#include "vol.in" + + return(NULL); +} diff --git a/commands/mdocml/vol.in b/commands/mdocml/vol.in new file mode 100644 index 000000000..7650b57a1 --- /dev/null +++ b/commands/mdocml/vol.in @@ -0,0 +1,35 @@ +/* $Id: vol.in,v 1.6 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines volume titles for .Dt. + * + * Be sure to escape strings. + */ + +LINE("USD", "User\'s Supplementary Documents") +LINE("PS1", "Programmer\'s Supplementary Documents") +LINE("AMD", "Ancestral Manual Documents") +LINE("SMM", "System Manager\'s Manual") +LINE("URM", "User\'s Reference Manual") +LINE("PRM", "Programmer\'s Manual") +LINE("KM", "Kernel Manual") +LINE("IND", "Manual Master Index") +LINE("MMI", "Manual Master Index") +LINE("LOCAL", "Local Manual") +LINE("LOC", "Local Manual") +LINE("CON", "Contributed Software Manual")