290 lines
6.8 KiB
Groff
290 lines
6.8 KiB
Groff
.\" $Id: mdoc.3,v 1.41 2010/05/30 22:56:02 kristaps Exp $
|
|
.\"
|
|
.\" Copyright (c) 2009-2010 Kristaps Dzonsons <kristaps@bsd.lv>
|
|
.\"
|
|
.\" Permission to use, copy, modify, and distribute this software for any
|
|
.\" purpose with or without fee is hereby granted, provided that the above
|
|
.\" copyright notice and this permission notice appear in all copies.
|
|
.\"
|
|
.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
.\"
|
|
.Dd $Mdocdate: May 30 2010 $
|
|
.Dt MDOC 3
|
|
.Os
|
|
.Sh NAME
|
|
.Nm mdoc ,
|
|
.Nm mdoc_alloc ,
|
|
.Nm mdoc_endparse ,
|
|
.Nm mdoc_free ,
|
|
.Nm mdoc_meta ,
|
|
.Nm mdoc_node ,
|
|
.Nm mdoc_parseln ,
|
|
.Nm mdoc_reset
|
|
.Nd mdoc macro compiler library
|
|
.Sh SYNOPSIS
|
|
.In mandoc.h
|
|
.In mdoc.h
|
|
.Vt extern const char * const * mdoc_macronames;
|
|
.Vt extern const char * const * mdoc_argnames;
|
|
.Ft "struct mdoc *"
|
|
.Fn mdoc_alloc "void *data" "int pflags" "mandocmsg msgs"
|
|
.Ft int
|
|
.Fn mdoc_endparse "struct mdoc *mdoc"
|
|
.Ft void
|
|
.Fn mdoc_free "struct mdoc *mdoc"
|
|
.Ft "const struct mdoc_meta *"
|
|
.Fn mdoc_meta "const struct mdoc *mdoc"
|
|
.Ft "const struct mdoc_node *"
|
|
.Fn mdoc_node "const struct mdoc *mdoc"
|
|
.Ft int
|
|
.Fn mdoc_parseln "struct mdoc *mdoc" "int line" "char *buf"
|
|
.Ft int
|
|
.Fn mdoc_reset "struct mdoc *mdoc"
|
|
.Sh DESCRIPTION
|
|
The
|
|
.Nm mdoc
|
|
library parses lines of
|
|
.Xr mdoc 7
|
|
input
|
|
into an abstract syntax tree (AST).
|
|
.Pp
|
|
In general, applications initiate a parsing sequence with
|
|
.Fn mdoc_alloc ,
|
|
parse each line in a document with
|
|
.Fn mdoc_parseln ,
|
|
close the parsing session with
|
|
.Fn mdoc_endparse ,
|
|
operate over the syntax tree returned by
|
|
.Fn mdoc_node
|
|
and
|
|
.Fn mdoc_meta ,
|
|
then free all allocated memory with
|
|
.Fn mdoc_free .
|
|
The
|
|
.Fn mdoc_reset
|
|
function may be used in order to reset the parser for another input
|
|
sequence.
|
|
See the
|
|
.Sx EXAMPLES
|
|
section for a simple example.
|
|
.Pp
|
|
This section further defines the
|
|
.Sx Types ,
|
|
.Sx Functions
|
|
and
|
|
.Sx Variables
|
|
available to programmers.
|
|
Following that, the
|
|
.Sx Abstract Syntax Tree
|
|
section documents the output tree.
|
|
.Ss Types
|
|
Both functions (see
|
|
.Sx Functions )
|
|
and variables (see
|
|
.Sx Variables )
|
|
may use the following types:
|
|
.Bl -ohang
|
|
.It Vt struct mdoc
|
|
An opaque type defined in
|
|
.Pa mdoc.c .
|
|
Its values are only used privately within the library.
|
|
.It Vt struct mdoc_node
|
|
A parsed node.
|
|
Defined in
|
|
.Pa mdoc.h .
|
|
See
|
|
.Sx Abstract Syntax Tree
|
|
for details.
|
|
.It Vt mandocmsg
|
|
A function callback type defined in
|
|
.Pa mandoc.h .
|
|
.El
|
|
.Ss Functions
|
|
Function descriptions follow:
|
|
.Bl -ohang
|
|
.It Fn mdoc_alloc
|
|
Allocates a parsing structure.
|
|
The
|
|
.Fa data
|
|
pointer is passed to
|
|
.Fa msgs .
|
|
The
|
|
.Fa pflags
|
|
arguments are defined in
|
|
.Pa mdoc.h .
|
|
Returns NULL on failure.
|
|
If non-NULL, the pointer must be freed with
|
|
.Fn mdoc_free .
|
|
.It Fn mdoc_reset
|
|
Reset the parser for another parse routine.
|
|
After its use,
|
|
.Fn mdoc_parseln
|
|
behaves as if invoked for the first time.
|
|
If it returns 0, memory could not be allocated.
|
|
.It Fn mdoc_free
|
|
Free all resources of a parser.
|
|
The pointer is no longer valid after invocation.
|
|
.It Fn mdoc_parseln
|
|
Parse a nil-terminated line of input.
|
|
This line should not contain the trailing newline.
|
|
Returns 0 on failure, 1 on success.
|
|
The input buffer
|
|
.Fa buf
|
|
is modified by this function.
|
|
.It Fn mdoc_endparse
|
|
Signals that the parse is complete.
|
|
Note that if
|
|
.Fn mdoc_endparse
|
|
is called subsequent to
|
|
.Fn mdoc_node ,
|
|
the resulting tree is incomplete.
|
|
Returns 0 on failure, 1 on success.
|
|
.It Fn mdoc_node
|
|
Returns the first node of the parse.
|
|
Note that if
|
|
.Fn mdoc_parseln
|
|
or
|
|
.Fn mdoc_endparse
|
|
return 0, the tree will be incomplete.
|
|
.It Fn mdoc_meta
|
|
Returns the document's parsed meta-data.
|
|
If this information has not yet been supplied or
|
|
.Fn mdoc_parseln
|
|
or
|
|
.Fn mdoc_endparse
|
|
return 0, the data will be incomplete.
|
|
.El
|
|
.Ss Variables
|
|
The following variables are also defined:
|
|
.Bl -ohang
|
|
.It Va mdoc_macronames
|
|
An array of string-ified token names.
|
|
.It Va mdoc_argnames
|
|
An array of string-ified token argument names.
|
|
.El
|
|
.Ss Abstract Syntax Tree
|
|
The
|
|
.Nm
|
|
functions produce an abstract syntax tree (AST) describing input in a
|
|
regular form.
|
|
It may be reviewed at any time with
|
|
.Fn mdoc_nodes ;
|
|
however, if called before
|
|
.Fn mdoc_endparse ,
|
|
or after
|
|
.Fn mdoc_endparse
|
|
or
|
|
.Fn mdoc_parseln
|
|
fail, it may be incomplete.
|
|
.Pp
|
|
This AST is governed by the ontological
|
|
rules dictated in
|
|
.Xr mdoc 7
|
|
and derives its terminology accordingly.
|
|
.Qq In-line
|
|
elements described in
|
|
.Xr mdoc 7
|
|
are described simply as
|
|
.Qq elements .
|
|
.Pp
|
|
The AST is composed of
|
|
.Vt struct mdoc_node
|
|
nodes with block, head, body, element, root and text types as declared
|
|
by the
|
|
.Va type
|
|
field.
|
|
Each node also provides its parse point (the
|
|
.Va line ,
|
|
.Va sec ,
|
|
and
|
|
.Va pos
|
|
fields), its position in the tree (the
|
|
.Va parent ,
|
|
.Va child ,
|
|
.Va next
|
|
and
|
|
.Va prev
|
|
fields) and some type-specific data.
|
|
.Pp
|
|
The tree itself is arranged according to the following normal form,
|
|
where capitalised non-terminals represent nodes.
|
|
.Pp
|
|
.Bl -tag -width "ELEMENTXX" -compact
|
|
.It ROOT
|
|
\(<- mnode+
|
|
.It mnode
|
|
\(<- BLOCK | ELEMENT | TEXT
|
|
.It BLOCK
|
|
\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
|
|
.It ELEMENT
|
|
\(<- TEXT*
|
|
.It HEAD
|
|
\(<- mnode+
|
|
.It BODY
|
|
\(<- mnode+
|
|
.It TAIL
|
|
\(<- mnode+
|
|
.It TEXT
|
|
\(<- [[:printable:],0x1e]*
|
|
.El
|
|
.Pp
|
|
Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
|
|
the BLOCK production: these refer to punctuation marks.
|
|
Furthermore, although a TEXT node will generally have a non-zero-length
|
|
string, in the specific case of
|
|
.Sq \&.Bd \-literal ,
|
|
an empty line will produce a zero-length string.
|
|
Multiple body parts are only found in invocations of
|
|
.Sq \&Bl \-column ,
|
|
where a new body introduces a new phrase.
|
|
.Sh EXAMPLES
|
|
The following example reads lines from stdin and parses them, operating
|
|
on the finished parse tree with
|
|
.Fn parsed .
|
|
This example does not error-check nor free memory upon failure.
|
|
.Bd -literal -offset indent
|
|
struct mdoc *mdoc;
|
|
const struct mdoc_node *node;
|
|
char *buf;
|
|
size_t len;
|
|
int line;
|
|
|
|
line = 1;
|
|
mdoc = mdoc_alloc(NULL, 0, NULL);
|
|
buf = NULL;
|
|
alloc_len = 0;
|
|
|
|
while ((len = getline(&buf, &alloc_len, stdin)) >= 0) {
|
|
if (len && buflen[len - 1] = '\en')
|
|
buf[len - 1] = '\e0';
|
|
if ( ! mdoc_parseln(mdoc, line, buf))
|
|
errx(1, "mdoc_parseln");
|
|
line++;
|
|
}
|
|
|
|
if ( ! mdoc_endparse(mdoc))
|
|
errx(1, "mdoc_endparse");
|
|
if (NULL == (node = mdoc_node(mdoc)))
|
|
errx(1, "mdoc_node");
|
|
|
|
parsed(mdoc, node);
|
|
mdoc_free(mdoc);
|
|
.Ed
|
|
.Pp
|
|
Please see
|
|
.Pa main.c
|
|
in the source archive for a rigorous reference.
|
|
.Sh SEE ALSO
|
|
.Xr mandoc 1 ,
|
|
.Xr mdoc 7
|
|
.Sh AUTHORS
|
|
The
|
|
.Nm
|
|
library was written by
|
|
.An Kristaps Dzonsons Aq kristaps@bsd.lv .
|