2011-09-26 18:19:29 +02:00
|
|
|
.\" $Vendor-Id: mdoc.3,v 1.55 2011/01/07 15:07:21 kristaps Exp $
|
2010-06-26 04:20:06 +02:00
|
|
|
.\"
|
2011-09-26 18:19:29 +02:00
|
|
|
.\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
|
|
|
|
.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
|
2010-06-26 04:20:06 +02:00
|
|
|
.\"
|
|
|
|
.\" Permission to use, copy, modify, and distribute this software for any
|
|
|
|
.\" purpose with or without fee is hereby granted, provided that the above
|
|
|
|
.\" copyright notice and this permission notice appear in all copies.
|
|
|
|
.\"
|
|
|
|
.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
.\"
|
2011-09-26 18:19:29 +02:00
|
|
|
.Dd January 7, 2011
|
2010-06-26 04:20:06 +02:00
|
|
|
.Dt MDOC 3
|
|
|
|
.Os
|
|
|
|
.Sh NAME
|
|
|
|
.Nm mdoc ,
|
|
|
|
.Nm mdoc_alloc ,
|
|
|
|
.Nm mdoc_endparse ,
|
|
|
|
.Nm mdoc_free ,
|
|
|
|
.Nm mdoc_meta ,
|
|
|
|
.Nm mdoc_node ,
|
|
|
|
.Nm mdoc_parseln ,
|
|
|
|
.Nm mdoc_reset
|
|
|
|
.Nd mdoc macro compiler library
|
|
|
|
.Sh SYNOPSIS
|
|
|
|
.In mandoc.h
|
|
|
|
.In mdoc.h
|
|
|
|
.Vt extern const char * const * mdoc_macronames;
|
|
|
|
.Vt extern const char * const * mdoc_argnames;
|
2011-09-26 18:19:29 +02:00
|
|
|
.Ft int
|
|
|
|
.Fo mdoc_addspan
|
|
|
|
.Fa "struct mdoc *mdoc"
|
|
|
|
.Fa "const struct tbl_span *span"
|
|
|
|
.Fc
|
2010-06-26 04:20:06 +02:00
|
|
|
.Ft "struct mdoc *"
|
2011-09-26 18:19:29 +02:00
|
|
|
.Fo mdoc_alloc
|
|
|
|
.Fa "struct regset *regs"
|
|
|
|
.Fa "void *data"
|
|
|
|
.Fa "mandocmsg msgs"
|
|
|
|
.Fc
|
2010-06-26 04:20:06 +02:00
|
|
|
.Ft int
|
|
|
|
.Fn mdoc_endparse "struct mdoc *mdoc"
|
|
|
|
.Ft void
|
|
|
|
.Fn mdoc_free "struct mdoc *mdoc"
|
|
|
|
.Ft "const struct mdoc_meta *"
|
|
|
|
.Fn mdoc_meta "const struct mdoc *mdoc"
|
|
|
|
.Ft "const struct mdoc_node *"
|
|
|
|
.Fn mdoc_node "const struct mdoc *mdoc"
|
|
|
|
.Ft int
|
2011-09-26 18:19:29 +02:00
|
|
|
.Fo mdoc_parseln
|
|
|
|
.Fa "struct mdoc *mdoc"
|
|
|
|
.Fa "int line"
|
|
|
|
.Fa "char *buf"
|
|
|
|
.Fc
|
2010-06-26 04:20:06 +02:00
|
|
|
.Ft int
|
|
|
|
.Fn mdoc_reset "struct mdoc *mdoc"
|
|
|
|
.Sh DESCRIPTION
|
|
|
|
The
|
|
|
|
.Nm mdoc
|
|
|
|
library parses lines of
|
|
|
|
.Xr mdoc 7
|
|
|
|
input
|
|
|
|
into an abstract syntax tree (AST).
|
|
|
|
.Pp
|
|
|
|
In general, applications initiate a parsing sequence with
|
|
|
|
.Fn mdoc_alloc ,
|
|
|
|
parse each line in a document with
|
|
|
|
.Fn mdoc_parseln ,
|
|
|
|
close the parsing session with
|
|
|
|
.Fn mdoc_endparse ,
|
|
|
|
operate over the syntax tree returned by
|
|
|
|
.Fn mdoc_node
|
|
|
|
and
|
|
|
|
.Fn mdoc_meta ,
|
|
|
|
then free all allocated memory with
|
|
|
|
.Fn mdoc_free .
|
|
|
|
The
|
|
|
|
.Fn mdoc_reset
|
|
|
|
function may be used in order to reset the parser for another input
|
|
|
|
sequence.
|
|
|
|
.Ss Types
|
|
|
|
.Bl -ohang
|
|
|
|
.It Vt struct mdoc
|
2011-09-26 18:19:29 +02:00
|
|
|
An opaque type.
|
2010-06-26 04:20:06 +02:00
|
|
|
Its values are only used privately within the library.
|
|
|
|
.It Vt struct mdoc_node
|
|
|
|
A parsed node.
|
|
|
|
See
|
|
|
|
.Sx Abstract Syntax Tree
|
|
|
|
for details.
|
|
|
|
.El
|
|
|
|
.Ss Functions
|
2011-09-26 18:19:29 +02:00
|
|
|
If
|
|
|
|
.Fn mdoc_addspan ,
|
|
|
|
.Fn mdoc_parseln ,
|
|
|
|
or
|
|
|
|
.Fn mdoc_endparse
|
|
|
|
return 0, calls to any function but
|
|
|
|
.Fn mdoc_reset
|
|
|
|
or
|
|
|
|
.Fn mdoc_free
|
|
|
|
will raise an assertion.
|
2010-06-26 04:20:06 +02:00
|
|
|
.Bl -ohang
|
2011-09-26 18:19:29 +02:00
|
|
|
.It Fn mdoc_addspan
|
|
|
|
Add a table span to the parsing stream.
|
|
|
|
Returns 0 on failure, 1 on success.
|
2010-06-26 04:20:06 +02:00
|
|
|
.It Fn mdoc_alloc
|
|
|
|
Allocates a parsing structure.
|
|
|
|
The
|
|
|
|
.Fa data
|
|
|
|
pointer is passed to
|
|
|
|
.Fa msgs .
|
2011-09-26 18:19:29 +02:00
|
|
|
Always returns a valid pointer.
|
|
|
|
The pointer must be freed with
|
2010-06-26 04:20:06 +02:00
|
|
|
.Fn mdoc_free .
|
|
|
|
.It Fn mdoc_reset
|
|
|
|
Reset the parser for another parse routine.
|
|
|
|
After its use,
|
|
|
|
.Fn mdoc_parseln
|
|
|
|
behaves as if invoked for the first time.
|
|
|
|
If it returns 0, memory could not be allocated.
|
|
|
|
.It Fn mdoc_free
|
|
|
|
Free all resources of a parser.
|
|
|
|
The pointer is no longer valid after invocation.
|
|
|
|
.It Fn mdoc_parseln
|
|
|
|
Parse a nil-terminated line of input.
|
|
|
|
This line should not contain the trailing newline.
|
|
|
|
Returns 0 on failure, 1 on success.
|
|
|
|
The input buffer
|
|
|
|
.Fa buf
|
|
|
|
is modified by this function.
|
|
|
|
.It Fn mdoc_endparse
|
|
|
|
Signals that the parse is complete.
|
|
|
|
Returns 0 on failure, 1 on success.
|
|
|
|
.It Fn mdoc_node
|
|
|
|
Returns the first node of the parse.
|
|
|
|
.It Fn mdoc_meta
|
|
|
|
Returns the document's parsed meta-data.
|
|
|
|
.El
|
|
|
|
.Ss Variables
|
|
|
|
.Bl -ohang
|
|
|
|
.It Va mdoc_macronames
|
|
|
|
An array of string-ified token names.
|
|
|
|
.It Va mdoc_argnames
|
|
|
|
An array of string-ified token argument names.
|
|
|
|
.El
|
|
|
|
.Ss Abstract Syntax Tree
|
|
|
|
The
|
|
|
|
.Nm
|
|
|
|
functions produce an abstract syntax tree (AST) describing input in a
|
|
|
|
regular form.
|
|
|
|
It may be reviewed at any time with
|
|
|
|
.Fn mdoc_nodes ;
|
|
|
|
however, if called before
|
|
|
|
.Fn mdoc_endparse ,
|
|
|
|
or after
|
|
|
|
.Fn mdoc_endparse
|
|
|
|
or
|
|
|
|
.Fn mdoc_parseln
|
|
|
|
fail, it may be incomplete.
|
|
|
|
.Pp
|
|
|
|
This AST is governed by the ontological
|
|
|
|
rules dictated in
|
|
|
|
.Xr mdoc 7
|
|
|
|
and derives its terminology accordingly.
|
|
|
|
.Qq In-line
|
|
|
|
elements described in
|
|
|
|
.Xr mdoc 7
|
|
|
|
are described simply as
|
|
|
|
.Qq elements .
|
|
|
|
.Pp
|
|
|
|
The AST is composed of
|
|
|
|
.Vt struct mdoc_node
|
|
|
|
nodes with block, head, body, element, root and text types as declared
|
|
|
|
by the
|
|
|
|
.Va type
|
|
|
|
field.
|
|
|
|
Each node also provides its parse point (the
|
|
|
|
.Va line ,
|
|
|
|
.Va sec ,
|
|
|
|
and
|
|
|
|
.Va pos
|
|
|
|
fields), its position in the tree (the
|
|
|
|
.Va parent ,
|
|
|
|
.Va child ,
|
2011-09-26 18:19:29 +02:00
|
|
|
.Va nchild ,
|
2010-06-26 04:20:06 +02:00
|
|
|
.Va next
|
|
|
|
and
|
|
|
|
.Va prev
|
2011-09-26 18:19:29 +02:00
|
|
|
fields) and some type-specific data, in particular, for nodes generated
|
|
|
|
from macros, the generating macro in the
|
|
|
|
.Va tok
|
|
|
|
field.
|
2010-06-26 04:20:06 +02:00
|
|
|
.Pp
|
|
|
|
The tree itself is arranged according to the following normal form,
|
|
|
|
where capitalised non-terminals represent nodes.
|
|
|
|
.Pp
|
|
|
|
.Bl -tag -width "ELEMENTXX" -compact
|
|
|
|
.It ROOT
|
|
|
|
\(<- mnode+
|
|
|
|
.It mnode
|
|
|
|
\(<- BLOCK | ELEMENT | TEXT
|
|
|
|
.It BLOCK
|
|
|
|
\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
|
|
|
|
.It ELEMENT
|
|
|
|
\(<- TEXT*
|
|
|
|
.It HEAD
|
2011-09-26 18:19:29 +02:00
|
|
|
\(<- mnode*
|
2010-06-26 04:20:06 +02:00
|
|
|
.It BODY
|
2011-09-26 18:19:29 +02:00
|
|
|
\(<- mnode* [ENDBODY mnode*]
|
2010-06-26 04:20:06 +02:00
|
|
|
.It TAIL
|
2011-09-26 18:19:29 +02:00
|
|
|
\(<- mnode*
|
2010-06-26 04:20:06 +02:00
|
|
|
.It TEXT
|
|
|
|
\(<- [[:printable:],0x1e]*
|
|
|
|
.El
|
|
|
|
.Pp
|
|
|
|
Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
|
|
|
|
the BLOCK production: these refer to punctuation marks.
|
|
|
|
Furthermore, although a TEXT node will generally have a non-zero-length
|
|
|
|
string, in the specific case of
|
|
|
|
.Sq \&.Bd \-literal ,
|
|
|
|
an empty line will produce a zero-length string.
|
|
|
|
Multiple body parts are only found in invocations of
|
|
|
|
.Sq \&Bl \-column ,
|
|
|
|
where a new body introduces a new phrase.
|
2011-09-26 18:19:29 +02:00
|
|
|
.Ss Badly-nested Blocks
|
|
|
|
The ENDBODY node is available to end the formatting associated
|
|
|
|
with a given block before the physical end of that block.
|
|
|
|
It has a non-null
|
|
|
|
.Va end
|
|
|
|
field, is of the BODY
|
|
|
|
.Va type ,
|
|
|
|
has the same
|
|
|
|
.Va tok
|
|
|
|
as the BLOCK it is ending, and has a
|
|
|
|
.Va pending
|
|
|
|
field pointing to that BLOCK's BODY node.
|
|
|
|
It is an indirect child of that BODY node
|
|
|
|
and has no children of its own.
|
|
|
|
.Pp
|
|
|
|
An ENDBODY node is generated when a block ends while one of its child
|
|
|
|
blocks is still open, like in the following example:
|
|
|
|
.Bd -literal -offset indent
|
|
|
|
\&.Ao ao
|
|
|
|
\&.Bo bo ac
|
|
|
|
\&.Ac bc
|
|
|
|
\&.Bc end
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
This example results in the following block structure:
|
|
|
|
.Bd -literal -offset indent
|
|
|
|
BLOCK Ao
|
|
|
|
HEAD Ao
|
|
|
|
BODY Ao
|
|
|
|
TEXT ao
|
|
|
|
BLOCK Bo, pending -> Ao
|
|
|
|
HEAD Bo
|
|
|
|
BODY Bo
|
|
|
|
TEXT bo
|
|
|
|
TEXT ac
|
|
|
|
ENDBODY Ao, pending -> Ao
|
|
|
|
TEXT bc
|
|
|
|
TEXT end
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
Here, the formatting of the
|
|
|
|
.Sq \&Ao
|
|
|
|
block extends from TEXT ao to TEXT ac,
|
|
|
|
while the formatting of the
|
|
|
|
.Sq \&Bo
|
|
|
|
block extends from TEXT bo to TEXT bc.
|
|
|
|
It renders as follows in
|
|
|
|
.Fl T Ns Cm ascii
|
|
|
|
mode:
|
|
|
|
.Pp
|
|
|
|
.Dl <ao [bo ac> bc] end
|
|
|
|
.Pp
|
|
|
|
Support for badly-nested blocks is only provided for backward
|
|
|
|
compatibility with some older
|
|
|
|
.Xr mdoc 7
|
|
|
|
implementations.
|
|
|
|
Using badly-nested blocks is
|
|
|
|
.Em strongly discouraged :
|
|
|
|
the
|
|
|
|
.Fl T Ns Cm html
|
|
|
|
and
|
|
|
|
.Fl T Ns Cm xhtml
|
|
|
|
front-ends are unable to render them in any meaningful way.
|
|
|
|
Furthermore, behaviour when encountering badly-nested blocks is not
|
|
|
|
consistent across troff implementations, especially when using multiple
|
|
|
|
levels of badly-nested blocks.
|
2010-06-26 04:20:06 +02:00
|
|
|
.Sh EXAMPLES
|
|
|
|
The following example reads lines from stdin and parses them, operating
|
|
|
|
on the finished parse tree with
|
|
|
|
.Fn parsed .
|
|
|
|
This example does not error-check nor free memory upon failure.
|
|
|
|
.Bd -literal -offset indent
|
2011-09-26 18:19:29 +02:00
|
|
|
struct regset regs;
|
2010-06-26 04:20:06 +02:00
|
|
|
struct mdoc *mdoc;
|
|
|
|
const struct mdoc_node *node;
|
|
|
|
char *buf;
|
|
|
|
size_t len;
|
|
|
|
int line;
|
|
|
|
|
2011-09-26 18:19:29 +02:00
|
|
|
bzero(®s, sizeof(struct regset));
|
2010-06-26 04:20:06 +02:00
|
|
|
line = 1;
|
2011-09-26 18:19:29 +02:00
|
|
|
mdoc = mdoc_alloc(®s, NULL, NULL);
|
2010-06-26 04:20:06 +02:00
|
|
|
buf = NULL;
|
|
|
|
alloc_len = 0;
|
|
|
|
|
|
|
|
while ((len = getline(&buf, &alloc_len, stdin)) >= 0) {
|
|
|
|
if (len && buflen[len - 1] = '\en')
|
|
|
|
buf[len - 1] = '\e0';
|
|
|
|
if ( ! mdoc_parseln(mdoc, line, buf))
|
|
|
|
errx(1, "mdoc_parseln");
|
|
|
|
line++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( ! mdoc_endparse(mdoc))
|
|
|
|
errx(1, "mdoc_endparse");
|
|
|
|
if (NULL == (node = mdoc_node(mdoc)))
|
|
|
|
errx(1, "mdoc_node");
|
|
|
|
|
|
|
|
parsed(mdoc, node);
|
|
|
|
mdoc_free(mdoc);
|
|
|
|
.Ed
|
|
|
|
.Pp
|
2011-09-26 18:19:29 +02:00
|
|
|
To compile this, execute
|
|
|
|
.Pp
|
|
|
|
.Dl % cc main.c libmdoc.a libmandoc.a
|
|
|
|
.Pp
|
|
|
|
where
|
2010-06-26 04:20:06 +02:00
|
|
|
.Pa main.c
|
2011-09-26 18:19:29 +02:00
|
|
|
is the example file.
|
2010-06-26 04:20:06 +02:00
|
|
|
.Sh SEE ALSO
|
|
|
|
.Xr mandoc 1 ,
|
|
|
|
.Xr mdoc 7
|
|
|
|
.Sh AUTHORS
|
|
|
|
The
|
|
|
|
.Nm
|
|
|
|
library was written by
|
|
|
|
.An Kristaps Dzonsons Aq kristaps@bsd.lv .
|