scdoc2mdoc

A fork of scdoc to output mdoc(7)
git clone git://git.sgregoratto.me/scdoc2mdoc
Log | Files | Refs | README | LICENSE

commit f9051ab30af352f923845d3f1b8a4a0f1fde9ddc
parent 64795b2ff33ef043c0252f35b6e6d553fda62a0d
Author: Stephen Gregoratto <dev@sgregoratto.me>
Date:   Thu,  4 Jul 2019 20:32:01 +1000

first pass of the mdoc/build changes

- Add kristaps@ oconfigure script, cut down to onlt the parts we need.
  As such, remove the config.mk file and add configure files to
  gitignore
- Move all parse(er)? functions to parser.c, rename util.c to parser.h.
  This cuts down the size of main.c.
- Expand str.h -> string.h for consistency
- Start outputting mdoc in the following:
    - Document preamble
    - Headings
    - Indented blocks (buggy)
    - Code/Literal blocks
    - Bulleted, numbered lists
- Remove/replace unneeded code
    - Remove roff_macro and anything using the output stream.
      Since we only output to stdout we only need printf/(f)?puts.
    - Simplify some of the utf8 code.
      This changed some function signatures to void,
      since we weren't using the return value or it was guaranteed to
      never fail.
    - Rewrite code to use the new oconfigure functions
      (strtonum, err, reallocarray).
    - Remove all asserts relating to bad mallocs,
      just exit instead.
- Add my name to COPYING, reflow text to 72 lines.

Diffstat:
M.gitignore | 5+++++
MCOPYING | 32+++++++++++++++++---------------
MMakefile | 12++++++------
Acompats.c | 257+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dconfig.mk | 4----
Aconfigure | 407+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmain.c | 731++++---------------------------------------------------------------------------
Aparser.c | 655+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser.h | 30++++++++++++++++++++++++++++++
Dstr.h | 12------------
Mstring.c | 57+++++++++++++++++++++++++++++----------------------------
Astring.h | 13+++++++++++++
Atests.c | 122+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mutf8.c | 39+++++++++++++++++++--------------------
Mutf8.h | 24++++++++++++------------
Dutil.c | 80-------------------------------------------------------------------------------
Dutil.h | 26--------------------------
17 files changed, 1603 insertions(+), 903 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,4 +1,9 @@ +*.log *.o +*.old +tags +Makefile.configure +config.h scdoc scdoc.1 scdoc.5 diff --git a/COPYING b/COPYING @@ -1,19 +1,21 @@ Copyright © 2017 Drew DeVault +Copyright © 2019 Stephen Gregoratto -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the “Software”), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +“Software”), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile @@ -1,7 +1,7 @@ -include config.mk +include Makefile.configure VERSION=1.9.4 -OBJS = main.o string.o utf8.o util.o +OBJS = main.o string.o utf8.o parser.o compats.o all: scdoc scdoc.1 scdoc.5 scdoc.pc @@ -18,10 +18,10 @@ scdoc.pc: scdoc.pc.in sed -e 's:@prefix@:$(PREFIX):g' \ -e 's:@version@:$(VERSION):g' scdoc.pc.in > $@ -main.o: str.h utf8.h util.h -string.o: str.h utf8.h +main.o: string.h utf8.h parser.h +string.o: string.h utf8.h utf8.o: utf8.h -util.o: utf8.h util.h +parser.o: utf8.h parser.h clean: rm -rf $(OBJS) scdoc scdoc.1 scdoc.5 scdoc.pc @@ -40,7 +40,7 @@ dist: scdoc-$(VERSION).tgz scdoc-$(VERSION).tgz: git archive --prefix "scdoc-$(VERSION)/" $(VERSION) | - gzip > $@ + gzip > $@ check: scdoc scdoc.1 scdoc.5 @find test -perm -111 -exec '{}' \; diff --git a/compats.c b/compats.c @@ -0,0 +1,257 @@ +#include "config.h" +#if !HAVE_ERR +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +void +vwarnx(const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) + vfprintf(stderr, fmt, ap); +} + +void +vwarn(const char *fmt, va_list ap) +{ + int sverrno; + + sverrno = errno; + vwarnx(fmt, ap); + if (fmt != NULL) + fputs(": ", stderr); + fprintf(stderr, "%s\n", strerror(sverrno)); +} + +void +err(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + exit(eval); +} + +void +errx(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(eval); +} + +void +warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); +} + +void +warnx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + fputc('\n', stderr); +} +#endif /* !HAVE_ERR */ +#if !HAVE_GETPROGNAME +/* + * Copyright (c) 2016 Nicholas Marriott <nicholas.marriott@gmail.com> + * Copyright (c) 2017 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> + +#include <errno.h> + +#if HAVE_PROGRAM_INVOCATION_SHORT_NAME +const char * +getprogname(void) +{ + return (program_invocation_short_name); +} +#elif HAVE___PROGNAME +const char * +getprogname(void) +{ + extern char *__progname; + + return (__progname); +} +#else +static const char *progname; + +void +setprogname(const char *name) +{ + progname = name; +} + +const char * +getprogname(void) +{ + return progname; +} +#endif +#endif /* !HAVE_GETPROGNAME */ +#if !HAVE_REALLOCARRAY +/* + * Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) + +void * +reallocarray(void *optr, size_t nmemb, size_t size) +{ + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && SIZE_MAX / nmemb < size) { + errno = ENOMEM; + return NULL; + } + return realloc(optr, size * nmemb); +} +#endif /* !HAVE_REALLOCARRAY */ +#if !HAVE_STRTONUM +/* + * Copyright (c) 2004 Ted Unangst and Todd Miller + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <errno.h> +#include <limits.h> +#include <stdlib.h> + +#define INVALID 1 +#define TOOSMALL 2 +#define TOOLARGE 3 + +long long +strtonum(const char *numstr, long long minval, long long maxval, + const char **errstrp) +{ + long long ll = 0; + int error = 0; + char *ep; + struct errval { + const char *errstr; + int err; + } ev[4] = { + { NULL, 0 }, + { "invalid", EINVAL }, + { "too small", ERANGE }, + { "too large", ERANGE }, + }; + + ev[0].err = errno; + errno = 0; + if (minval > maxval) { + error = INVALID; + } else { + ll = strtoll(numstr, &ep, 10); + if (numstr == ep || *ep != '\0') + error = INVALID; + else if ((ll == LLONG_MIN && errno == ERANGE) || ll < minval) + error = TOOSMALL; + else if ((ll == LLONG_MAX && errno == ERANGE) || ll > maxval) + error = TOOLARGE; + } + if (errstrp != NULL) + *errstrp = ev[error].errstr; + errno = ev[error].err; + if (error) + ll = 0; + + return (ll); +} +#endif /* !HAVE_STRTONUM */ diff --git a/config.mk b/config.mk @@ -1,4 +0,0 @@ -CFLAGS = -g -std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -PREFIX = /usr/local -MANDIR = $(PREFIX)/man -PCDIR = $(PREFIX)/lib/pkgconfig diff --git a/configure b/configure @@ -0,0 +1,407 @@ +#! /bin/sh +# +# Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org> +# Copyright (c) 2017, 2018 Kristaps Dzonsons <kristaps@bsd.lv> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +OCONFIGURE_VERSION="0.1.8" + +# +# This script outputs two files: config.h and Makefile.configure. +# It tries to read from configure.local, which contains predefined +# values we won't autoconfigure. +# +# If you want to use configure with your project, have your GNUmakefile +# or BSDmakefile---whichever---try to import/include Makefile.configure +# at the beginning of the file. +# +# Like so (note no quotes, no period, etc.): +# +# include Makefile.configure +# +# If it exists, configure was run; otherwise, it wasn't. +# +# You'll probably want to change parts of this file. I've noted the +# parts that you'll probably change in the section documentation. +# +# See https://github.com/kristapsdz/oconfigure for more. + +set -e + +#---------------------------------------------------------------------- +# Prepare for running: move aside previous configure runs. +# Output file descriptor usage: +# 1 (stdout): config.h or Makefile.configure +# 2 (stderr): original stderr, usually to the console +# 3: config.log +# You DO NOT want to change this. +#---------------------------------------------------------------------- + +[ -w config.log ] && mv config.log config.log.old +[ -w config.h ] && mv config.h config.h.old + +exec 3> config.log +echo "config.log: writing..." + +#---------------------------------------------------------------------- +# Initialize all variables here such that nothing can leak in from the +# environment except for CC and CFLAGS, which we might have passed in. +#---------------------------------------------------------------------- + +CC=`printf "all:\\n\\t@echo \\\$(CC)\\n" | make -sf -` +CFLAGS=`printf "all:\\n\\t@echo \\\$(CFLAGS)\\n" | make -sf -` +CFLAGS="${CFLAGS} -g -W -Wall -Wextra -Werror -Wno-unused-parameter" +LDADD= +CPPFLAGS= +LDFLAGS= +DESTDIR= +PREFIX="/usr/local" +BINDIR= +SBINDIR= +INCLUDEDIR= +LIBDIR= +MANDIR= +SHAREDIR= +PCDIR= +INSTALL="install" +INSTALL_PROGRAM= +INSTALL_LIB= +INSTALL_MAN= +INSTALL_DATA= + +#---------------------------------------------------------------------- +# Allow certain variables to be overriden on the command line. +#---------------------------------------------------------------------- + +for keyvals in "$@" +do + key=`echo $keyvals | cut -s -d '=' -f 1` + if [ -z "$key" ] + then + echo "$0: invalid key-value: $keyvals" 1>&2 + exit 1 + fi + val=`echo $keyvals | cut -d '=' -f 2-` + case "$key" in + LDADD) + LDADD="$val" ;; + LDFLAGS) + LDFLAGS="$val" ;; + CPPFLAGS) + CPPFLAGS="$val" ;; + DESTDIR) + DESTDIR="$val" ;; + PREFIX) + PREFIX="$val" ;; + MANDIR) + MANDIR="$val" ;; + LIBDIR) + LIBDIR="$val" ;; + BINDIR) + BINDIR="$val" ;; + SHAREDIR) + SHAREDIR="$val" ;; + PCDIR) + PCDIR="$val" ;; + SBINDIR) + SBINDIR="$val" ;; + INCLUDEDIR) + INCLUDEDIR="$val" ;; + *) + echo "$0: invalid key: $key" 1>&2 + exit 1 + esac +done + + +#---------------------------------------------------------------------- +# These are the values that will be pushed into config.h after we test +# for whether they're supported or not. +# Each of these must have a runtest(), below. +# Please sort by alpha, for clarity. +# You WANT to change this. +#---------------------------------------------------------------------- + +HAVE_ERR= +HAVE_GETPROGNAME= +HAVE_PLEDGE= +HAVE_PROGRAM_INVOCATION_SHORT_NAME= +HAVE_REALLOCARRAY= +HAVE_STRTONUM= +HAVE___PROGNAME= + +#---------------------------------------------------------------------- +# Allow configure.local to override all variables, default settings, +# command-line arguments, and tested features, above. +# You PROBABLY DO NOT want to change this. +#---------------------------------------------------------------------- + +if [ -r ./configure.local ]; then + echo "configure.local: reading..." 1>&2 + echo "configure.local: reading..." 1>&3 + cat ./configure.local 1>&3 + . ./configure.local +else + echo "configure.local: no (fully automatic configuration)" 1>&2 + echo "configure.local: no (fully automatic configuration)" 1>&3 +fi + +echo 1>&3 + +#---------------------------------------------------------------------- +# Infrastructure for running tests. +# These consists of a series of functions that will attempt to run the +# given test file and record its exit into a HAVE_xxx variable. +# You DO NOT want to change this. +#---------------------------------------------------------------------- + +COMP="${CC} ${CFLAGS} ${CPPFLAGS} -Wno-unused -Werror" + +# Check whether this HAVE_ setting is manually overridden. +# If yes, use the override, if no, do not decide anything yet. +# Arguments: lower-case test name, manual value + +ismanual() { + [ -z "${3}" ] && return 1 + echo "${1}: manual (HAVE_${2}=${3})" 1>&2 + echo "${1}: manual (HAVE_${2}=${3})" 1>&3 + echo 1>&3 + return 0 +} + +# Run a single autoconfiguration test. +# In case of success, enable the feature. +# In case of failure, do not decide anything yet. +# Arguments: lower-case test name, upper-case test name, additional +# CFLAGS, additional LIBS. + +singletest() { + extralib="" + cat 1>&3 << __HEREDOC__ +${1}: testing... +${COMP} -DTEST_${2} ${3} -o test-${1} tests.c ${4} +__HEREDOC__ + if ${COMP} -DTEST_${2} ${3} -o "test-${1}" tests.c ${4} 1>&3 2>&3; then + echo "${1}: ${CC} succeeded" 1>&3 + else + if [ -n "${5}" ] ; then + echo "${1}: ${CC} failed with $? (retrying)" 1>&3 + cat 1>&3 << __HEREDOC__ +${1}: testing... +${COMP} -DTEST_${2} ${3} -o test-${1} tests.c ${5} +__HEREDOC__ + if ${COMP} -DTEST_${2} ${3} -o "test-${1}" tests.c ${5} 1>&3 2>&3; then + echo "${1}: ${CC} succeeded" 1>&3 + extralib="(with ${5})" + else + echo "${1}: ${CC} failed with $?" 1>&3 + echo 1>&3 + return 1 + fi + else + echo "${1}: ${CC} failed with $?" 1>&3 + echo 1>&3 + return 1 + fi + fi + + echo "${1}: yes ${extralib}" 1>&2 + echo "${1}: yes ${extralib}" 1>&3 + echo 1>&3 + eval HAVE_${2}=1 + rm "test-${1}" + return 0 + + # Don't actually run the test: none of our tests check for + # run-time behaviour. + # if ./test-${1} 1>&3 2>&3; then + # echo "${1}: yes" 1>&2 + # echo "${1}: yes" 1>&3 + # echo 1>&3 + # eval HAVE_${2}=1 + # rm "test-${1}" + # return 0 + # else + # echo "${1}: execution failed with $?" 1>&3 + # echo 1>&3 + # rm "test-${1}" + # return 1 + # fi +} + +# Run a complete autoconfiguration test, including the check for +# a manual override and disabling the feature on failure. +# Arguments: lower case name, upper case name, additional CFLAGS, +# additional LDADD, alternative LDADD. + +runtest() { + eval _manual=\${HAVE_${2}} + ismanual "${1}" "${2}" "${_manual}" && return 0 + singletest "${1}" "${2}" "${3}" "${4}" "${5}" && return 0 + echo "${1}: no" 1>&2 + eval HAVE_${2}=0 + return 1 +} + +#---------------------------------------------------------------------- +# Begin running the tests themselves. +# All of your tests must be defined here. +# Please sort as the HAVE_xxxx values were defined. +# You WANT to change this. +# It consists of the following columns: +# runtest +# (1) test file +# (2) macro to set +# (3) argument to cc *before* -o +# (4) argument to cc *after* +# (5) alternative argument to cc *after* +#---------------------------------------------------------------------- + +runtest err ERR || true +runtest getprogname GETPROGNAME || true +runtest pledge PLEDGE || true +runtest program_invocation_short_name PROGRAM_INVOCATION_SHORT_NAME || true +runtest reallocarray REALLOCARRAY || true +runtest strtonum STRTONUM || true +runtest __progname __PROGNAME || true + +#---------------------------------------------------------------------- +# Output writing: generate the config.h file. +# This file contains all of the HAVE_xxxx variables necessary for +# compiling your source. +# You must include "config.h" BEFORE any other variables. +# You WANT to change this. +#---------------------------------------------------------------------- + +exec > config.h + +# Start with prologue. + +cat << __HEREDOC__ +#ifdef __cplusplus +#error "Do not use C++: this is a C application." +#endif +#if !defined(__GNUC__) || (__GNUC__ < 4) +#define __attribute__(x) +#endif +#if defined(__linux__) || defined(__MINT__) +#define _GNU_SOURCE /* See test-*.c what needs this. */ +#endif +#if !defined(__BEGIN_DECLS) +# define __BEGIN_DECLS +#endif +#if !defined(__END_DECLS) +# define __END_DECLS +#endif +__HEREDOC__ + +# For the function declaration variables... + +[ ${HAVE_REALLOCARRAY} -eq 0 ] \ + && echo "#include <sys/types.h>" + +[ ${HAVE_ERR} -eq 0 ] \ + && echo "#include <stdarg.h>" + +# Now we handle our HAVE_xxxx values. +# Most will just be defined as 0 or 1. + +cat << __HEREDOC__ +#define HAVE_ERR ${HAVE_ERR} +#define HAVE_GETPROGNAME ${HAVE_GETPROGNAME} +#define HAVE_PLEDGE ${HAVE_PLEDGE} +#define HAVE_PROGRAM_INVOCATION_SHORT_NAME ${HAVE_PROGRAM_INVOCATION_SHORT_NAME} +#define HAVE_REALLOCARRAY ${HAVE_REALLOCARRAY} +#define HAVE_STRTONUM ${HAVE_STRTONUM} +#define HAVE___PROGNAME ${HAVE___PROGNAME} +__HEREDOC__ + +# Now we do our function declarations for missing functions. + +if [ ${HAVE_ERR} -eq 0 ]; then + echo "extern void err(int, const char *, ...);" + echo "extern void errx(int, const char *, ...);" + echo "extern void warn(const char *, ...);" + echo "extern void warnx(const char *, ...);" + echo "extern void vwarn(const char *, va_list);" + echo "extern void vwarnx(const char *, va_list);" +fi + +if [ ${HAVE_GETPROGNAME} -eq 0 ]; then + echo "extern const char *getprogname(void);" + if [ ${HAVE_PROGRAM_INVOCATION_SHORT_NAME} -eq 0 -o \ + ${HAVE___PROGNAME} -eq 0 ]; then + echo "extern void setprogname(const char *);" + fi +fi + +if [ ${HAVE_REALLOCARRAY} -eq 0 ]; then + echo "extern void *reallocarray(void *, size_t, size_t);" +fi + +if [ ${HAVE_STRTONUM} -eq 0 ]; then + echo "extern long long strtonum(const char *, long long, long long, const char **);" +fi + +echo "config.h: written" 1>&2 +echo "config.h: written" 1>&3 + +#---------------------------------------------------------------------- +# Now we go to generate our Makefile.configure. +# This file is simply a bunch of Makefile variables. +# They'll work in both GNUmakefile and BSDmakefile. +# You MIGHT want to change this. +#---------------------------------------------------------------------- + +exec > Makefile.configure + +[ -z "${BINDIR}" ] && BINDIR="${PREFIX}/bin" +[ -z "${SBINDIR}" ] && SBINDIR="${PREFIX}/sbin" +[ -z "${INCLUDEDIR}" ] && INCLUDEDIR="${PREFIX}/include" +[ -z "${LIBDIR}" ] && LIBDIR="${PREFIX}/lib" +[ -z "${MANDIR}" ] && MANDIR="${PREFIX}/man" +[ -z "${SHAREDIR}" ] && SHAREDIR="${PREFIX}/share" +[ -z "${PCDIR}" ] && PCDIR="${PREFIX}/lib/pkgconfig" + +[ -z "${INSTALL_PROGRAM}" ] && INSTALL_PROGRAM="${INSTALL} -m 0555" +[ -z "${INSTALL_LIB}" ] && INSTALL_LIB="${INSTALL} -m 0444" +[ -z "${INSTALL_MAN}" ] && INSTALL_MAN="${INSTALL} -m 0444" +[ -z "${INSTALL_DATA}" ] && INSTALL_DATA="${INSTALL} -m 0444" + +cat << __HEREDOC__ +CC = ${CC} +CFLAGS = ${CFLAGS} +CPPFLAGS = ${CPPFLAGS} +LDADD = ${LDADD} +LDFLAGS = ${LDFLAGS} +STATIC = ${STATIC} +PREFIX = ${PREFIX} +BINDIR = ${BINDIR} +SHAREDIR = ${SHAREDIR} +SBINDIR = ${SBINDIR} +INCLUDEDIR = ${INCLUDEDIR} +LIBDIR = ${LIBDIR} +MANDIR = ${MANDIR} +PCDIR = ${PCDIR} +INSTALL = ${INSTALL} +INSTALL_PROGRAM = ${INSTALL_PROGRAM} +INSTALL_LIB = ${INSTALL_LIB} +INSTALL_MAN = ${INSTALL_MAN} +INSTALL_DATA = ${INSTALL_DATA} +__HEREDOC__ + +echo "Makefile.configure: written" 1>&2 +echo "Makefile.configure: written" 1>&3 + +exit 0 diff --git a/main.c b/main.c @@ -1,729 +1,60 @@ -#define _XOPEN_SOURCE 600 -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <limits.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stddef.h> +#include "config.h" +#if HAVE_ERR +#include <err.h> +#endif #include <stdint.h> #include <stdio.h> #include <stdlib.h> +#if !(HAVE_GETPROGNAME || HAVE_PROGRAM_INVOCATION_SHORT_NAME || HAVE__PROGNAME) #include <string.h> -#include <time.h> +#endif +#if HAVE_PLEDGE #include <unistd.h> - -#include "str.h" -#include "utf8.h" -#include "util.h" - -char *strstr(const char *haystack, const char *needle); -char *strerror(int errnum); - -int -parse_section(struct parser *p) -{ - str_t *section = str_create(); - uint32_t ch; - while ((ch = parser_getch(p)) != UTF8_INVALID) { - if (ch < 0x80 && isdigit(ch)) { - int ret = str_append_ch(section, ch); - assert(ret != -1); - } else if (ch == ')') { - if (!section->str) - break; - int sec = strtol(section->str, NULL, 10); - if (sec < 0 || sec > 9) { - parser_fatal(p, "Expected section between 0 and 9"); - break; - } - str_free(section); - return sec; - } else { - parser_fatal(p, "Expected digit or )"); - break; - } - } - - parser_fatal(p, "Expected manual section"); - return -1; -} - -str_t * -parse_extra(struct parser *p) -{ - str_t *extra = str_create(); - int ret = str_append_ch(extra, '"'); - assert(ret != -1); - uint32_t ch; - while ((ch = parser_getch(p)) != UTF8_INVALID) { - if (ch == '"') { - ret = str_append_ch(extra, ch); - assert(ret != -1); - return extra; - } else if (ch == '\n') { - parser_fatal(p, "Unclosed extra preamble field"); - break; - } else { - ret = str_append_ch(extra, ch); - assert(ret != -1); - } - } - - str_free(extra); - return NULL; -} - -void -parse_preamble(struct parser *p) -{ - str_t *name = str_create(); - int ex = 0; - str_t *extras[2] = {NULL}; - int section = -1; - uint32_t ch; - time_t date_time; - char date[256]; - char *source_date_epoch = getenv("SOURCE_DATE_EPOCH"); - if (source_date_epoch != NULL) { - unsigned long long epoch; - char *endptr; - errno = 0; - epoch = strtoull(source_date_epoch, &endptr, 10); - if ((errno == ERANGE && (epoch == ULLONG_MAX || epoch == 0)) - || (errno != 0 && epoch == 0)) { - fprintf(stderr, "$SOURCE_DATE_EPOCH: strtoull: %s\n", - strerror(errno)); - exit(EXIT_FAILURE); - } else if (endptr == source_date_epoch) { - fprintf(stderr, - "$SOURCE_DATE_EPOCH: No digits were found: %s\n", - endptr); - exit(EXIT_FAILURE); - } else if (*endptr != '\0') { - fprintf(stderr, - "$SOURCE_DATE_EPOCH: Trailing garbage: %s\n", - endptr); - exit(EXIT_FAILURE); - } else if (epoch > ULONG_MAX) { - fprintf(stderr, - "$SOURCE_DATE_EPOCH: value must be smaller than or " - "equal to %lu but was found to be: %llu \n", - ULONG_MAX, epoch); - exit(EXIT_FAILURE); - } - date_time = epoch; - } else { - date_time = time(NULL); - } - - struct tm *date_tm = gmtime(&date_time); - strftime(date, sizeof(date), "%F", date_tm); - while ((ch = parser_getch(p)) != UTF8_INVALID) { - if ((ch < 0x80 && isalnum(ch)) || - ch == '_' || ch == '-' || ch == '.') { - int ret = str_append_ch(name, ch); - assert(ret != -1); - } else if (ch == '(') { - section = parse_section(p); - } else if (ch == '"') { - if (ex == 2) - parser_fatal(p, "Too many extra preamble fields"); - extras[ex++] = parse_extra(p); - } else if (ch == '\n') { - if (name->len == 0) - parser_fatal(p, "Expected preamble"); - else if (section == -1) - parser_fatal(p, "Expected manual section"); - - char sec[2] = {'0' + section, 0}; - char *ex2 = extras[0] != NULL ? extras[0]->str : NULL; - char *ex3 = extras[1] != NULL ? extras[1]->str : NULL; - fprintf(p->output, ".TH \"%s\" \"%s\" \"%s\"", - name->str, sec, date); - /* ex2 and ex3 are already double-quoted */ - if (ex2) - fprintf(p->output, " %s", ex2); - if (ex3) - fprintf(p->output, " %s", ex3); - fputc('\n', p->output); - break; - } - } - - str_free(name); - for (int i = 0; i < 2; ++i) { - if (extras[i] != NULL) { - str_free(extras[i]); - } - } -} - -void -parse_format(struct parser *p, enum formatting fmt) -{ - char formats[FORMAT_LAST] = { - [FORMAT_BOLD] = 'B', - [FORMAT_UNDERLINE] = 'I', - }; - char error[512]; - if (p->flags) { - if ((p->flags & ~fmt)) { - snprintf(error, sizeof(error), - "Cannot nest inline formatting " - "(began with %c at %d:%d)", - p->flags == FORMAT_BOLD ? '*' : '_', - p->fmt_line, p->fmt_col); - parser_fatal(p, error); - } - fprintf(p->output, "\\fR"); - } else { - fprintf(p->output, "\\f%c", formats[fmt]); - p->fmt_line = p->line; - p->fmt_col = p->col; - } - p->flags ^= fmt; -} - -void -parse_linebreak(struct parser *p) -{ - uint32_t plus = parser_getch(p); - if (plus != '+') { - fprintf(p->output, "+"); - parser_pushch(p, plus); - return; - } - uint32_t lf = parser_getch(p); - if (lf != '\n') { - fprintf(p->output, "+"); - parser_pushch(p, plus); - parser_pushch(p, '\n'); - return; - } - uint32_t ch = parser_getch(p); - if (ch == '\n') - parser_fatal(p, "Explicit line breaks cannot be followed by a blank line"); - parser_pushch(p, ch); - fprintf(p->output, "\n.br\n"); -} - -void -parse_text(struct parser *p) -{ - uint32_t ch, next, last = ' '; - int i = 0; - while ((ch = parser_getch(p)) != UTF8_INVALID) { - switch (ch) { - case '\\': - ch = parser_getch(p); - if (ch == UTF8_INVALID) { - parser_fatal(p, "Unexpected EOF"); - } else if (ch == '\\') { - fprintf(p->output, "\\\\"); - } else { - utf8_fputch(p->output, ch); - } - break; - case '*': - parse_format(p, FORMAT_BOLD); - break; - case '_': - next = parser_getch(p); - if (!isalnum(last) - || ((p->flags & FORMAT_UNDERLINE) && - !isalnum(next))) - parse_format(p, FORMAT_UNDERLINE); - else - utf8_fputch(p->output, ch); - if (next == UTF8_INVALID) - return; - parser_pushch(p, next); - break; - case '+': - parse_linebreak(p); - break; - case '\n': - utf8_fputch(p->output, ch); - return; - case '.': - if (!i) { - // Escape . if it's the first character - fprintf(p->output, "\\&."); - break; - } - /* fallthrough */ - default: - last = ch; - utf8_fputch(p->output, ch); - break; - } - ++i; - } -} - -void -parse_heading(struct parser *p) -{ - uint32_t ch; - int level = 1; - while ((ch = parser_getch(p)) != UTF8_INVALID) { - if (ch == '#') { - ++level; - } else if (ch == ' ') { - break; - } else { - parser_fatal(p, "Invalid start of heading (probably needs a space)"); - } - } - switch (level) { - case 1: - fprintf(p->output, ".SH "); - break; - case 2: - fprintf(p->output, ".SS "); - break; - default: - parser_fatal(p, "Only headings up to two levels deep are permitted"); - break; - } - while ((ch = parser_getch(p)) != UTF8_INVALID) { - utf8_fputch(p->output, ch); - if (ch == '\n') { - break; - } - } -} - -int -parse_indent(struct parser *p, int *indent, bool write) -{ - int i = 0; - uint32_t ch; - while ((ch = parser_getch(p)) == '\t') - ++i; - - parser_pushch(p, ch); - if (ch == '\n' && *indent != 0) { - /* Don't change indent when we encounter empty lines */ - return *indent; - } else if (write) { - if (i < *indent) { - for (int j = *indent; i < j; --j) { - roff_macro(p, "RE", NULL); - } - } else if (i == *indent + 1) { - fprintf(p->output, ".RS 4\n"); - } else if (i != *indent && ch == '\t') { - parser_fatal(p, "Indented by an amount greater than 1"); - } - } - *indent = i; - return i; -} - -void -list_header(struct parser *p, int *num) -{ - fprintf(p->output, ".RS 4\n"); - fprintf(p->output, ".ie n \\{\\\n"); - if (*num == -1) { - fprintf(p->output, "\\h'-0%d'%s\\h'+03'\\c\n", - *num >= 10 ? 5 : 4, "\\(bu"); - } else { - fprintf(p->output, "\\h'-0%d'%d.\\h'+03'\\c\n", - *num >= 10 ? 5 : 4, *num); - } - fprintf(p->output, ".\\}\n"); - fprintf(p->output, ".el \\{\\\n"); - if (*num == -1) { - fprintf(p->output, ".IP %s 4\n", "\\(bu"); - } else { - fprintf(p->output, ".IP %d. 4\n", *num); - *num = *num + 1; - } - fprintf(p->output, ".\\}\n"); -} - -void -parse_list(struct parser *p, int *indent, int num) -{ - uint32_t ch; - if ((ch = parser_getch(p)) != ' ') - parser_fatal(p, "Expected space before start of list entry"); - - list_header(p, &num); - parse_text(p); - bool closed = false; - do { - parse_indent(p, indent, true); - if ((ch = parser_getch(p)) == UTF8_INVALID) - break; - switch (ch) { - case ' ': - if ((ch = parser_getch(p)) != ' ') - parser_fatal(p, "Expected two spaces for list entry continuation"); - parse_text(p); - break; - case '-': - case '.': - if ((ch = parser_getch(p)) != ' ') - parser_fatal(p, "Expected space before start of list entry"); - else if (!closed) - roff_macro(p, "RE", NULL); - list_header(p, &num); - parse_text(p); - closed = false; - break; - default: - fprintf(p->output, "\n"); - parser_pushch(p, ch); - goto ret; - } - } while (ch != UTF8_INVALID); -ret: - if (!closed) - roff_macro(p, "RE", NULL); -} - -void -parse_literal(struct parser *p, int *indent) -{ - uint32_t ch; - if ((ch = parser_getch(p)) != '`' || - (ch = parser_getch(p)) != '`' || - (ch = parser_getch(p)) != '\n') { - parser_fatal(p, "Expected ``` and a newline to begin literal block"); - } - int stops = 0; - roff_macro(p, "nf", NULL); - fprintf(p->output, ".RS 4\n"); - do { - int _indent = *indent; - parse_indent(p, &_indent, false); - if (_indent < *indent) { - parser_fatal(p, "Cannot deindent in literal block"); - } - while (_indent > *indent) { - --_indent; - fprintf(p->output, "\t"); - } - if ((ch = parser_getch(p)) == UTF8_INVALID) { - break; - } - if (ch == '`') { - if (++stops == 3) { - if ((ch = parser_getch(p)) != '\n') { - parser_fatal(p, "Expected literal block to end with newline"); - } - roff_macro(p, "fi", NULL); - roff_macro(p, "RE", NULL); - return; - } - } else { - while (stops != 0) { - fputc('`', p->output); - --stops; - } - switch (ch) { - case '.': - fprintf(p->output, "\\&."); - break; - case '\\': - ch = parser_getch(p); - if (ch == UTF8_INVALID) { - parser_fatal(p, "Unexpected EOF"); - } else if (ch == '\\') { - fprintf(p->output, "\\\\"); - } else { - utf8_fputch(p->output, ch); - } - break; - default: - utf8_fputch(p->output, ch); - break; - } - } - } while (ch != UTF8_INVALID); -} - -enum table_align { - ALIGN_LEFT, - ALIGN_CENTER, - ALIGN_RIGHT, -}; - -struct table_row { - struct table_cell *cell; - struct table_row *next; -}; - -struct table_cell { - enum table_align align; - str_t *contents; - struct table_cell *next; -}; - -void -parse_table(struct parser *p, uint32_t style) -{ - struct table_row *table = NULL; - struct table_row *currow = NULL, *prevrow = NULL; - struct table_cell *curcell = NULL; - int column = 0; - uint32_t ch; - parser_pushch(p, '|'); - - do { - if ((ch = parser_getch(p)) == UTF8_INVALID) - break; - switch (ch) { - case '\n': - goto commit_table; - case '|': - prevrow = currow; - currow = calloc(1, sizeof(struct table_row)); - if (prevrow) { - // TODO: Verify the number of columns match - prevrow->next = currow; - } - curcell = calloc(1, sizeof(struct table_cell)); - currow->cell = curcell; - column = 0; - if (!table) - table = currow; - break; - case ':': - if (!currow) { - parser_fatal(p, "Cannot start a column without " - "starting a row first"); - } else { - struct table_cell *prev = curcell; - curcell = calloc(1, sizeof(struct table_cell)); - if (prev) { - prev->next = curcell; - } - ++column; - } - break; - case ' ': - goto continue_cell; - default: - parser_fatal(p, "Expected either '|' or ':'"); - break; - } - if ((ch = parser_getch(p)) == UTF8_INVALID) { - break; - } - switch (ch) { - case '[': - curcell->align = ALIGN_LEFT; - break; - case '-': - curcell->align = ALIGN_CENTER; - break; - case ']': - curcell->align = ALIGN_RIGHT; - break; - case ' ': - if (prevrow) { - struct table_cell *pcell = prevrow->cell; - for (int i = 0; i <= column && pcell; ++i, pcell = pcell->next) { - if (i == column) { - curcell->align = pcell->align; - break; - } - } - } else { - parser_fatal(p, "No previous row to infer alignment from"); - } - break; - default: - parser_fatal(p, "Expected one of '[', '-', ']', or ' '"); - break; - } - curcell->contents = str_create(); -continue_cell: - switch (ch = parser_getch(p)) { - case ' ': - // Read out remainder of the text - while ((ch = parser_getch(p)) != UTF8_INVALID) { - switch (ch) { - case '\n': - goto commit_cell; - default:; - int ret = str_append_ch(curcell->contents, ch); - assert(ret != -1); - break; - } - } - break; - case '\n': - goto commit_cell; - default: - parser_fatal(p, "Expected ' ' or a newline"); - break; - } -commit_cell: - if (strstr(curcell->contents->str, "T{") - || strstr(curcell->contents->str, "T}")) - parser_fatal(p, "Cells cannot contain T{ or T} " - "due to roff limitations"); - } while (ch != UTF8_INVALID); -commit_table: - - if (ch == UTF8_INVALID) - return; - - roff_macro(p, "TS", NULL); - - switch (style) { - case '[': - fprintf(p->output, "allbox;"); - break; - case ']': - fprintf(p->output, "box;"); - break; - } - - // Print alignments first - currow = table; - while (currow) { - curcell = currow->cell; - while (curcell) { - fprintf(p->output, "%c%s", "lcr"[curcell->align], - curcell->next ? " " : ""); - curcell = curcell->next; - } - fprintf(p->output, "%s\n", currow->next ? "" : "."); - currow = currow->next; - } - - // Then contents - currow = table; - while (currow) { - curcell = currow->cell; - fprintf(p->output, "T{\n"); - while (curcell) { - parser_pushstr(p, curcell->contents->str); - parse_text(p); - if (curcell->next) { - fprintf(p->output, "\nT}\tT{\n"); - } else { - fprintf(p->output, "\nT}"); - } - struct table_cell *prev = curcell; - curcell = curcell->next; - str_free(prev->contents); - free(prev); - } - fprintf(p->output, "\n"); - struct table_row *prev = currow; - currow = currow->next; - free(prev); - } - - roff_macro(p, "TE", NULL); - fprintf(p->output, ".sp 1\n"); -} - -void -parse_document(struct parser *p) -{ - uint32_t ch; - int indent = 0; - do { - parse_indent(p, &indent, true); - if ((ch = parser_getch(p)) == UTF8_INVALID) - break; - switch (ch) { - case ';': - if ((ch = parser_getch(p)) != ' ') - parser_fatal(p, "Expected space after ; to begin comment"); - do - ch = parser_getch(p); - while (ch != UTF8_INVALID && ch != '\n'); - break; - case '#': - if (indent != 0) { - parser_pushch(p, ch); - parse_text(p); - break; - } - parse_heading(p); - break; - case '-': - parse_list(p, &indent, -1); - break; - case '.': - if ((ch = parser_getch(p)) == ' ') { - parser_pushch(p, ch); - parse_list(p, &indent, 1); - } else { - parser_pushch(p, ch); - parse_text(p); - } - break; - case '`': - parse_literal(p, &indent); - break; - case '[': - case '|': - case ']': - if (indent != 0) { - parser_fatal(p, "Tables cannot be indented"); - } - parse_table(p, ch); - break; - case ' ': - parser_fatal(p, "Tabs are required for indentation"); - break; - case '\n': - if (p->flags) { - char error[512]; - snprintf(error, sizeof(error), "Expected %c before starting " - "new paragraph (began with %c at %d:%d)", - p->flags == FORMAT_BOLD ? '*' : '_', - p->flags == FORMAT_BOLD ? '*' : '_', - p->fmt_line, p->fmt_col); - parser_fatal(p, error); - } - roff_macro(p, "P", NULL); - break; - default: - parser_pushch(p, ch); - parse_text(p); - break; - } - } while (ch != UTF8_INVALID); -} +#endif +#include "parser.h" int main(int argc, char **argv) { FILE *input; + const char *fname; + +#if !(HAVE_GETPROGNAME || HAVE_PROGRAM_INVOCATION_SHORT_NAME || HAVE__PROGNAME) + const char *progname; + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + progname++; + setprogname(progname); +#endif +#if HAVE_PLEDGE + if (pledge("stdio rpath", NULL) == -1) + err(EXIT_FAILURE, "pledge"); +#endif if (argc > 2) { - fprintf(stderr, "%s: Too many arguments\n", argv[2]); + warnx("%s: Too many arguments", argv[2]); goto usage; } else if (argc == 2) { if ((input = fopen(argv[1], "r")) == NULL) { - fprintf(stderr, "cannot open file %s, exiting\n", argv[1]); - return 1; + fclose(input); + err(EXIT_FAILURE, "%s", argv[1]); } + fname = argv[1]; } else { input = stdin; + fname = "<stdin>"; } struct parser p = { .input = input, - .output = stdout, + .fname = fname, .line = 1, .col = 1 }; - parse_preamble(&p); parse_document(&p); + fclose(input); return 0; usage: - fputs("usage: scdoc [file]\n", stderr); + fprintf(stderr, "usage: %s [file]\n", getprogname()); return 1; } diff --git a/parser.c b/parser.c @@ -0,0 +1,655 @@ +#include "config.h" +#include <ctype.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "utf8.h" +#include "string.h" +#include "parser.h" + +void +parser_fatal(struct parser *p, const char *err) +{ + fclose(p->input); + errx(EXIT_FAILURE, "%s:%d:%d: %s", p->fname, p->line, p->col, err); +} + +uint32_t +parser_getch(struct parser *p) +{ + if (p->qhead) { + return p->queue[--p->qhead]; + } else if (p->str) { + uint32_t ch = utf8_decode(&p->str); + if (!ch || ch == UTF8_INVALID) { + p->str = NULL; + return UTF8_INVALID; + } + return ch; + } + uint32_t ch = utf8_fgetch(p->input); + if (ch == '\n') { + p->col = 0; + ++p->line; + } else { + ++p->col; + } + return ch; +} + +void +parser_pushch(struct parser *p, uint32_t ch) +{ + if (ch != UTF8_INVALID) + p->queue[p->qhead++] = ch; +} + +void +parser_pushstr(struct parser *p, const char *str) +{ + p->str = str; +} + +int +parse_section(struct parser *p) +{ + str_t *section = str_create(); + uint32_t ch; + int sec; + const char *errstr; + while ((ch = parser_getch(p)) != UTF8_INVALID) { + if (ch < 0x80 && isdigit(ch)) { + str_append_ch(section, ch); + } else if (ch == ')') { + if (!section->str) + break; + sec = strtonum(section->str, 0, 9, &errstr); + if (errstr != NULL) + parser_fatal(p, "Expected section between 0 and 9"); + str_free(section); + return sec; + } else { + parser_fatal(p, "Expected digit or ')'"); + } + } + + parser_fatal(p, "Expected manual section"); + return -1; +} + +void +parse_preamble(struct parser *p) +{ + str_t *name = str_create(); + int section = -1; + uint32_t ch; + time_t date_time; + char date[256]; + char *offset; + char *source_date_epoch = getenv("SOURCE_DATE_EPOCH"); + if (source_date_epoch != NULL) { + long long epoch; + const char *errstr; + epoch = strtonum(source_date_epoch, 1, LLONG_MAX, &errstr); + if (errstr != NULL) + errx(EXIT_FAILURE, "$SOURCE_DATE_EPOCH is %s: %s", + errstr, source_date_epoch); + date_time = epoch; + } else { + date_time = time(NULL); + } + struct tm *date_tm = gmtime(&date_time); + strftime(date, sizeof(date), "%B %d, %Y", date_tm); + + while ((ch = parser_getch(p)) != UTF8_INVALID) { + if ((ch < 0x80 && isalnum(ch)) || + ch == '_' || ch == '-' || ch == '.') { + str_append_ch(name, ch); + } else if (ch == '(') { + section = parse_section(p); + } else if (ch == '\n') { + if (name->len == 0) + parser_fatal(p, "Expected preamble"); + if (section == -1) + parser_fatal(p, "Expected manual section"); + + /* + * the date format for Dd is in the form: + * month date, year + * where date is a number from 1-31. + * strftime's %d inserts a leading '0' if the date is + * between 1-9. So, check for this '0' and skip it. + */ + fputs(".Dd ", stdout); + for(offset = date;;) { + putchar(*offset); + if (*(offset++) == ' ') { + if (*offset == '0') + offset++; + break; + } + } + puts(offset); + str_toupper(name); + printf(".Dt %s %d\n", name->str, section); + puts(".Os"); + break; + } + } + + str_free(name); +} + +void +parse_format(struct parser *p, enum formatting fmt) +{ + char formats[FORMAT_LAST] = { + [FORMAT_BOLD] = 'B', + [FORMAT_UNDERLINE] = 'I', + }; + char error[512]; + if (p->flags) { + if ((p->flags & ~fmt)) { + snprintf(error, sizeof(error), + "Cannot nest inline formatting " + "(began with %c at %d:%d)", + p->flags == FORMAT_BOLD ? '*' : '_', + p->fmt_line, p->fmt_col); + parser_fatal(p, error); + } + fputs("\\fR", stdout); + } else { + printf("\\f%c", formats[fmt]); + p->fmt_line = p->line; + p->fmt_col = p->col; + } + p->flags ^= fmt; +} + +void +parse_linebreak(struct parser *p) +{ + uint32_t plus = parser_getch(p); + if (plus != '+') { + putchar('+'); + parser_pushch(p, plus); + return; + } + uint32_t lf = parser_getch(p); + if (lf != '\n') { + putchar('+'); + parser_pushch(p, plus); + parser_pushch(p, '\n'); + return; + } + uint32_t ch = parser_getch(p); + if (ch == '\n') + parser_fatal(p, "Explicit line breaks cannot be followed by a blank line"); + parser_pushch(p, ch); + puts("\n.br"); +} + +void +parse_text(struct parser *p) +{ + uint32_t ch, next, last = ' '; + int i = 0; + while ((ch = parser_getch(p)) != UTF8_INVALID) { + switch (ch) { + case '\\': + ch = parser_getch(p); + if (ch == UTF8_INVALID) + parser_fatal(p, "Unexpected EOF"); + else if (ch == '\\') + fputs("\\e", stdout); + else + utf8_putch(ch); + break; + case '*': + parse_format(p, FORMAT_BOLD); + break; + case '_': + next = parser_getch(p); + if (!isalnum(last) + || ((p->flags & FORMAT_UNDERLINE) && + !isalnum(next))) + parse_format(p, FORMAT_UNDERLINE); + else + utf8_putch(ch); + if (next == UTF8_INVALID) + return; + parser_pushch(p, next); + break; + case '+': + parse_linebreak(p); + break; + case '\n': + utf8_putch(ch); + return; + case '.': + if (!i) { + /* Escape lone dots */ + fputs("\\&.", stdout); + break; + } + /* fallthrough */ + default: + last = ch; + utf8_putch(ch); + break; + } + ++i; + } +} + +void +parse_heading(struct parser *p) +{ + uint32_t ch; + int level = 1; + while ((ch = parser_getch(p)) != UTF8_INVALID) { + if (ch == '#') + ++level; + else if (ch == ' ') + break; + else + parser_fatal(p, "Invalid start of heading (probably needs a space)"); + } + switch (level) { + case 1: + fputs(".Sh ", stdout); + break; + case 2: + fputs(".Ss ", stdout); + break; + default: + parser_fatal(p, "Only headings up to two levels deep are permitted"); + break; + } + while ((ch = parser_getch(p)) != UTF8_INVALID) { + utf8_putch(ch); + if (ch == '\n') + break; + } +} + +int +parse_indent(struct parser *p, int *indent, bool write) +{ + int i = 0; + uint32_t ch; + while ((ch = parser_getch(p)) == '\t') + ++i; + + parser_pushch(p, ch); + if (ch == '\n' && *indent != 0) { + /* Don't change indent when we encounter empty lines */ + return *indent; + } else if (write) { + if (i < *indent) + for (int j = *indent; i < j; --j) + puts(".Ed"); + else if (i == *indent + 1) + puts(".Bd -ragged -offset indent -compact"); + else if (i != *indent && ch == '\t') + parser_fatal(p, "Indented by an amount greater than 1"); + } + *indent = i; + return i; +} + +enum list_type { + BULLETED, + NUMBERED, +}; + +void +parse_list(struct parser *p, int *indent, enum list_type t) +{ + uint32_t ch; + int oldindent = *indent; + if ((ch = parser_getch(p)) != ' ') + parser_fatal(p, "Expected space before start of list entry"); + + printf(".Bl -%s -compact\n", + t == BULLETED ? "bullet" : "enum"); + puts(".It"); + + parse_text(p); + do { + parse_indent(p, indent, false); + if (*indent > oldindent) { + parser_getch(p); + parse_list(p, indent, t); + } else if (*indent < oldindent) { + goto ret; + } + if ((ch = parser_getch(p)) == UTF8_INVALID) + break; + switch (ch) { + case ' ': + if ((ch = parser_getch(p)) != ' ') + parser_fatal(p, "Expected two spaces for list entry continuation"); + parse_text(p); + break; + case '-': + case '.': + if ((ch = parser_getch(p)) != ' ') + parser_fatal(p, "Expected space before start of list entry"); + puts(".It"); + parse_text(p); + break; + default: + parser_pushch(p, ch); + goto ret; + } + } while (ch != UTF8_INVALID); +ret: + puts(".El"); + --*indent; +} + +void +parse_literal(struct parser *p, int *indent) +{ + uint32_t ch; + if ((ch = parser_getch(p)) != '`' || + (ch = parser_getch(p)) != '`' || + (ch = parser_getch(p)) != '\n') + parser_fatal(p, "Expected ``` and a newline to begin literal block"); + + int stops = 0; + puts(".Bd -literal -offset indent"); + do { + if ((ch = parser_getch(p)) == UTF8_INVALID) + break; + if (ch == '`') { + if (++stops == 3) { + if ((ch = parser_getch(p)) != '\n') + parser_fatal(p, "Expected literal block to end with newline"); + puts(".Ed"); + return; + } + } else { + while (stops != 0) { + putchar('`'); + --stops; + } + switch (ch) { + case '.': + fputs("\\&.", stdout); + break; + case '\\': + ch = parser_getch(p); + if (ch == UTF8_INVALID) + parser_fatal(p, "Unexpected EOF"); + else if (ch == '\\') + fputs("\\e", stdout); + else + utf8_putch(ch); + break; + default: + utf8_putch(ch); + break; + } + } + } while (ch != UTF8_INVALID); +} + +enum table_align { + ALIGN_LEFT, + ALIGN_CENTER, + ALIGN_RIGHT, +}; + +struct table_row { + struct table_cell *cell; + struct table_row *next; +}; + +struct table_cell { + enum table_align align; + str_t *contents; + struct table_cell *next; +}; + +void +parse_table(struct parser *p, uint32_t style) +{ + struct table_row *table = NULL; + struct table_row *currow = NULL, *prevrow = NULL; + struct table_cell *curcell = NULL; + int column = 0; + uint32_t ch; + parser_pushch(p, '|'); + + do { + if ((ch = parser_getch(p)) == UTF8_INVALID) + break; + switch (ch) { + case '\n': + goto commit_table; + case '|': + prevrow = currow; + if ((currow = calloc(1, sizeof(struct table_row))) == NULL) + err(EXIT_FAILURE, NULL); + /* TODO: Verify the number of columns match */ + if (prevrow) + prevrow->next = currow; + if ((curcell = calloc(1, sizeof(struct table_cell))) == NULL) + err(EXIT_FAILURE, NULL); + currow->cell = curcell; + column = 0; + if (!table) + table = currow; + break; + case ':': + if (!currow) { + parser_fatal(p, "Cannot start a column without " + "starting a row first"); + } else { + struct table_cell *prev = curcell; + if ((curcell = calloc(1, sizeof(struct table_cell))) == NULL) + err(EXIT_FAILURE, NULL); + if (prev) + prev->next = curcell; + ++column; + } + break; + case ' ': + goto continue_cell; + default: + parser_fatal(p, "Expected either '|' or ':'"); + break; + } + if ((ch = parser_getch(p)) == UTF8_INVALID) + break; + switch (ch) { + case '[': + curcell->align = ALIGN_LEFT; + break; + case '-': + curcell->align = ALIGN_CENTER; + break; + case ']': + curcell->align = ALIGN_RIGHT; + break; + case ' ': + if (prevrow) { + struct table_cell *pcell = prevrow->cell; + for (int i = 0; i <= column && pcell; ++i, pcell = pcell->next) { + if (i == column) { + curcell->align = pcell->align; + break; + } + } + } else { + parser_fatal(p, "No previous row to infer alignment from"); + } + break; + default: + parser_fatal(p, "Expected one of '[', '-', ']', or ' '"); + break; + } + curcell->contents = str_create(); +continue_cell: + switch (ch = parser_getch(p)) { + case ' ': + // Read out remainder of the text + while ((ch = parser_getch(p)) != UTF8_INVALID) { + switch (ch) { + case '\n': + goto commit_cell; + default: + str_append_ch(curcell->contents, ch); + break; + } + } + break; + case '\n': + goto commit_cell; + default: + parser_fatal(p, "Expected ' ' or a newline"); + break; + } +commit_cell: + if (strstr(curcell->contents->str, "T{") + || strstr(curcell->contents->str, "T}")) + parser_fatal(p, "Cells cannot contain T{ or T} " + "due to roff limitations"); + } while (ch != UTF8_INVALID); +commit_table: + + if (ch == UTF8_INVALID) + return; + + puts(".TS"); + + switch (style) { + case '[': + fputs("allbox;", stdout); + break; + case ']': + fputs("box;", stdout); + break; + } + + // Print alignments first + currow = table; + while (currow) { + curcell = currow->cell; + while (curcell) { + printf("%c%s", "lcr"[curcell->align], + curcell->next ? " " : ""); + curcell = curcell->next; + } + puts(currow->next ? "" : "."); + currow = currow->next; + } + + // Then contents + currow = table; + while (currow) { + curcell = currow->cell; + puts("T{"); + while (curcell) { + parser_pushstr(p, curcell->contents->str); + parse_text(p); + if (curcell->next) + puts("\nT}\tT{"); + else + fputs("\nT}", stdout); + + struct table_cell *prev = curcell; + curcell = curcell->next; + str_free(prev->contents); + free(prev); + } + putchar('\n'); + struct table_row *prev = currow; + currow = currow->next; + free(prev); + } + + puts(".TE"); + puts(".Pp"); +} + +void +parse_document(struct parser *p) +{ + parse_preamble(p); + uint32_t ch; + int indent = 0; + do { + parse_indent(p, &indent, true); + if ((ch = parser_getch(p)) == UTF8_INVALID) + break; + switch (ch) { + case ';': + if ((ch = parser_getch(p)) != ' ') + parser_fatal(p, "Expected space after ; to begin comment"); + do + ch = parser_getch(p); + while (ch != UTF8_INVALID && ch != '\n'); + break; + case '#': + if (indent != 0) { + parser_pushch(p, ch); + parse_text(p); + break; + } + parse_heading(p); + break; + case '-': + parse_list(p, &indent, BULLETED); + break; + case '.': + if ((ch = parser_getch(p)) == ' ') { + parser_pushch(p, ch); + parse_list(p, &indent, NUMBERED); + } else { + parser_pushch(p, ch); + parse_text(p); + } + break; + case '`': + parse_literal(p, &indent); + break; + case '[': + case '|': + case ']': + if (indent != 0) + parser_fatal(p, "Tables cannot be indented"); + parse_table(p, ch); + break; + case ' ': + parser_fatal(p, "Tabs are required for indentation"); + break; + case '\n': + if (p->flags) { + char error[512]; + snprintf(error, sizeof(error), "Expected %c before starting " + "new paragraph (began with %c at %d:%d)", + p->flags == FORMAT_BOLD ? '*' : '_', + p->flags == FORMAT_BOLD ? '*' : '_', + p->fmt_line, p->fmt_col); + parser_fatal(p, error); + } + puts(".Pp"); + break; + default: + parser_pushch(p, ch); + parse_text(p); + break; + } + } while (ch != UTF8_INVALID); +} diff --git a/parser.h b/parser.h @@ -0,0 +1,30 @@ +#ifndef _UTIL_H +#define _UTIL_H +#define QUEUELEN 32 + +struct parser { + FILE *input; + const char *fname; + int line, col; + int qhead; + uint32_t queue[QUEUELEN]; + uint32_t flags; + const char *str; + int fmt_line, fmt_col; + int inpara; +}; + +enum formatting { + FORMAT_BOLD = 1, + FORMAT_UNDERLINE = 2, + FORMAT_LAST = 4, +}; + +void parser_close(struct parser *); +void parser_fatal(struct parser *, const char *); +uint32_t parser_getch(struct parser *); +void parser_pushch(struct parser *, uint32_t); +void parser_pushstr(struct parser *, const char *); +void parse_preamble(struct parser *); +void parse_document(struct parser *); +#endif diff --git a/str.h b/str.h @@ -1,12 +0,0 @@ -#ifndef _STR_H -#define _STR_H -typedef struct str { - char *str; - size_t len, size; -} str_t; - -str_t *str_create(); -void str_free(str_t *str); -void str_reset(str_t *str); -int str_append_ch(str_t *str, uint32_t ch); -#endif diff --git a/string.c b/string.c @@ -1,32 +1,25 @@ -#include <stddef.h> -#include <stdint.h> +#include "config.h" +#include <ctype.h> +#if HAVE_ERR +#include <err.h> +#endif #include <stdio.h> +#include <stdint.h> #include <stdlib.h> - -#include "str.h" +#include "string.h" #include "utf8.h" -static int -ensure_capacity(str_t *str, size_t len) -{ - if (len + 1 >= str->size) { - char *new = realloc(str->str, str->size * 2); - if (!new) - return 0; - str->str = new; - str->size *= 2; - } - return 1; -} - str_t * str_create() { - str_t *str = calloc(sizeof(str_t), 1); - str->str = malloc(16); + str_t *str; + if ((str = calloc(sizeof(str_t), 1)) == NULL) + err(EXIT_FAILURE, NULL); + if ((str->str = calloc(16, 1)) == NULL) + err(EXIT_FAILURE, NULL); + str->size = 16; str->len = 0; - str->str[0] = '\0'; return str; } @@ -39,17 +32,25 @@ str_free(str_t *str) free(str); } -int +void str_append_ch(str_t *str, uint32_t ch) { - int size = utf8_chsize(ch); - if (size <= 0) - return -1; - else if (!ensure_capacity(str, str->len + size)) - return -1; + size_t chsize = utf8_chsize(ch); + if (str->len + chsize + 1 >= str->size) { + size_t newsize = str->size * 2; + if ((str->str = reallocarray(str->str, newsize, 1)) == NULL) + err(EXIT_FAILURE, NULL); + str->size = newsize; + } utf8_encode(&str->str[str->len], ch); - str->len += size; + str->len += chsize; str->str[str->len] = '\0'; - return size; +} + +void +str_toupper(str_t *str) +{ + for (size_t i = 0; i < str->len; i++) + str->str[i] = toupper(str->str[i]); } diff --git a/string.h b/string.h @@ -0,0 +1,13 @@ +#ifndef _STR_H +#define _STR_H +typedef struct str { + char *str; + size_t len, size; +} str_t; + +str_t *str_create(); +void str_free(str_t *); +void str_reset(str_t *); +void str_append_ch(str_t *, uint32_t); +void str_toupper(str_t *); +#endif diff --git a/tests.c b/tests.c @@ -0,0 +1,122 @@ +#if TEST___PROGNAME +int +main(void) +{ + extern char *__progname; + + return !__progname; +} +#endif /* TEST___PROGNAME */ +#if TEST_ERR +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <err.h> + +int +main(void) +{ + warnx("%d. warnx", 1); + warn("%d. warn", 2); + err(0, "%d. err", 3); + /* NOTREACHED */ + return 1; +} +#endif /* TEST_ERR */ +#if TEST_GETPROGNAME +#include <stdlib.h> + +int +main(void) +{ + const char * progname; + + progname = getprogname(); + return progname == NULL; +} +#endif /* TEST_GETPROGNAME */ +#if TEST_PLEDGE +#include <unistd.h> + +int +main(void) +{ + return !!pledge("stdio", NULL); +} +#endif /* TEST_PLEDGE */ +#if TEST_PROGRAM_INVOCATION_SHORT_NAME +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <errno.h> + +int +main(void) +{ + + return !program_invocation_short_name; +} +#endif /* TEST_PROGRAM_INVOCATION_SHORT_NAME */ +#if TEST_REALLOCARRAY +#include <stdlib.h> + +int +main(void) +{ + return !reallocarray(NULL, 2, 2); +} +#endif /* TEST_REALLOCARRAY */ +#if TEST_STRTONUM +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdlib.h> + +int +main(void) +{ + const char *errstr; + + if (strtonum("1", 0, 2, &errstr) != 1) + return 1; + if (errstr != NULL) + return 2; + if (strtonum("1x", 0, 2, &errstr) != 0) + return 3; + if (errstr == NULL) + return 4; + if (strtonum("2", 0, 1, &errstr) != 0) + return 5; + if (errstr == NULL) + return 6; + if (strtonum("0", 1, 2, &errstr) != 0) + return 7; + if (errstr == NULL) + return 8; + return 0; +} +#endif /* TEST_STRTONUM */ diff --git a/utf8.c b/utf8.c @@ -1,7 +1,6 @@ #include <stddef.h> #include <stdint.h> #include <stdio.h> - #include "utf8.h" size_t @@ -48,7 +47,7 @@ utf8_decode(const char **char_str) ++*s; while (--size) { cp <<= 6; - cp |= **s & 0x3f; + cp |= **s & 0x3F; ++*s; } return cp; @@ -57,25 +56,25 @@ utf8_decode(const char **char_str) size_t utf8_encode(char *str, uint32_t ch) { - size_t len = 0; + size_t len; uint8_t first; - - if (ch < 0x80) { + switch(len = utf8_chsize(ch)) { + case 1: first = 0; - len = 1; - } else if (ch < 0x800) { - first = 0xc0; - len = 2; - } else if (ch < 0x10000) { - first = 0xe0; - len = 3; - } else { - first = 0xf0; - len = 4; + break; + case 2: + first = 0xC0; + break; + case 3: + first = 0xE0; + break; + default: + first = 0xF0; + break; } for (size_t i = len - 1; i > 0; --i) { - str[i] = (ch & 0x3f) | 0x80; + str[i] = (ch & 0x3F) | 0x80; ch >>= 6; } @@ -87,8 +86,8 @@ uint32_t utf8_fgetch(FILE *f) { char buffer[UTF8_MAX_SIZE]; - int c = fgetc(f); - if (c == EOF) + int c; + if ((c = fgetc(f)) == EOF) return UTF8_INVALID; buffer[0] = (char)c; @@ -108,12 +107,12 @@ utf8_fgetch(FILE *f) } size_t -utf8_fputch(FILE *f, uint32_t ch) +utf8_putch(uint32_t ch) { char buffer[UTF8_MAX_SIZE]; char *ptr = buffer; size_t size = utf8_encode(ptr, ch); - return fwrite(&buffer, 1, size, f); + return fwrite(&buffer, 1, size, stdout); } struct { diff --git a/utf8.h b/utf8.h @@ -7,16 +7,16 @@ #define UTF8_MAX_SIZE 4 #define UTF8_INVALID 0x80 -/* Grabs the next UTF-8 character and advances the string pointer. */ -uint32_t utf8_decode(const char **str); -/* Encodes a character as UTF-8 and returns the length of that character. */ -size_t utf8_encode(char *str, uint32_t ch); -/* Return the size of the next UTF-8 character in str */ -int utf8_size(const char *str); -/* Return the size of the UTF-8 character ch */ -size_t utf8_chsize(uint32_t ch); -/* Read and return the next character from f */ -uint32_t utf8_fgetch(FILE *f); -/* Write ch to f and return the number of bytes written */ -size_t utf8_fputch(FILE *f, uint32_t ch); +/* Get the next rune and advance the string pointer. */ +uint32_t utf8_decode(const char **); +/* Encode a character as a rune and returns the length of that character. */ +size_t utf8_encode(char *, uint32_t); +/* Return the size of the next rune in the string */ +int utf8_size(const char *); +/* Return the size of a rune */ +size_t utf8_chsize(uint32_t); +/* Read and return the next rune in the stream */ +uint32_t utf8_fgetch(FILE *); +/* Write a rune to stdout and return the number of bytes written */ +size_t utf8_putch(uint32_t); #endif diff --git a/util.c b/util.c @@ -1,80 +0,0 @@ -#include <stdarg.h> -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> - -#include "utf8.h" -#include "util.h" - -void -parser_fatal(struct parser *parser, const char *err) -{ - fprintf(stderr, "Error at %d:%d: %s\n", parser->line, parser->col, err); - fclose(parser->input); - fclose(parser->output); - exit(1); -} - -uint32_t -parser_getch(struct parser *parser) -{ - if (parser->qhead) { - return parser->queue[--parser->qhead]; - } else if (parser->str) { - uint32_t ch = utf8_decode(&parser->str); - if (!ch || ch == UTF8_INVALID) { - parser->str = NULL; - return UTF8_INVALID; - } - return ch; - } - uint32_t ch = utf8_fgetch(parser->input); - if (ch == '\n') { - parser->col = 0; - ++parser->line; - } else { - ++parser->col; - } - return ch; -} - -void -parser_pushch(struct parser *parser, uint32_t ch) -{ - if (ch != UTF8_INVALID) - parser->queue[parser->qhead++] = ch; -} - -void -parser_pushstr(struct parser *parser, const char *str) -{ - parser->str = str; -} - -int -roff_macro(struct parser *p, char *cmd, ...) -{ - FILE *f = p->output; - int l = fprintf(f, ".%s", cmd); - va_list ap; - va_start(ap, cmd); - const char *arg; - while ((arg = va_arg(ap, const char *))) { - fputc(' ', f); - fputc('"', f); - while (*arg) { - uint32_t ch = utf8_decode(&arg); - if (ch == '"') { - fputc('\\', f); - ++l; - } - l += utf8_fputch(f, ch); - } - fputc('"', f); - l += 3; - } - va_end(ap); - fputc('\n', f); - return l + 1; -} diff --git a/util.h b/util.h @@ -1,26 +0,0 @@ -#ifndef _UTIL_H -#define _UTIL_H -#define QUEUELEN 32 - -struct parser { - FILE *input, *output; - int line, col; - int qhead; - uint32_t queue[QUEUELEN]; - uint32_t flags; - const char *str; - int fmt_line, fmt_col; -}; - -enum formatting { - FORMAT_BOLD = 1, - FORMAT_UNDERLINE = 2, - FORMAT_LAST = 4, -}; - -void parser_fatal(struct parser *parser, const char *err); -uint32_t parser_getch(struct parser *parser); -void parser_pushch(struct parser *parser, uint32_t ch); -void parser_pushstr(struct parser *parser, const char *str); -int roff_macro(struct parser *p, char *cmd, ...); -#endif