文字列・整数限定の cputch_printf

文字列・整数限定の printf(3) のサブセット実装の4月1日版は、リンク先のエイプリルフール記事を見てからでっちあげたもので、隠しきれないやっつけ感が漂っています。それでも、せっかく書いたものなので、ソースコードの整理をしてみました。
追加した機能もあります。

  • jztのキャスト指定子にしたがって va_arg するようにしました。
  • 整数の精度桁指定にしたがって出力するようにしました。文字列は前の版で対応しています。

相変わらず対応していない機能もあります。

  1. 浮動小数点数に未対応です。
  2. 順番指定子に未対応です。
  3. 可変桁指定に未対応です。

浮動小数点数は、そのうち対応させたいのですけど、2進浮動小数点数を10進浮動小数点数に変換するには、それなりの分量のコードを書かないといけないので、今は手が回りません。順番指定子はva_listではトラブルの元ですし、可変桁指定はprintfを2段呼べば同等になるので不要だと考えてます。なので、今後とも対応する気はありません。

このプログラムは GNU Lesser General Public License version 2.1 の下で、自由に再配布・改変できます。

/*
 *  cputch_printf - an experimental subset of the (sf)(v)printf
 *
 *  This recognizes percent format pattern:
 *      %[ 0+-]?\d*(?:[.]\d?)?(?:hh?|ll?|[tzj])?[sciduboxXp]
 *
 *  Copyright (C) 2009 MIZUTANI Tociyuki.
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation;
 *  version 2.1 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  Lesser General Public License for more details.
 *  
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/* @(#) $Id: cputch_printf.c,v 0.1 2009/04/04 08:08:58Z tociyuki Exp $ */

#include <stddef.h>
#include <sys/types.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdarg.h>

#define INT_SAFE32_DIGIT (INT32_MAX / 10 - 1)

#define PRIVATE

/* cputch_t is an abstract struct
 *    initialized with concreate constructors cstream_putch or cstring_putch.
 */
typedef struct cputch_s {
/* private slots */
    FILE *stream;
    unsigned char *buffer;
    unsigned char *cursor;
    unsigned char *bound;
/* public slots */
    int printed;
    int (*call)(struct cputch_s *, unsigned char);
    int (*times)(struct cputch_s *, size_t, unsigned char);
    int (*atmost)(struct cputch_s *, size_t, unsigned char *);
} cputch_t;

/* return EOF (almost -1) if error */ 
#define return_EOF_iferr(C) if ((C) == EOF) return EOF

enum {
    PADDING_BLANK = 1,
    PADDING_ZERO  = 2,
    JUSTIFY_LEFT  = 4,
    ALWAYS_SIGN   = 8
};

static const char const LOWER_DIGIT[] = "0123456789abcdef";
static const char const UPPER_DIGIT[] = "0123456789ABCDEF";

/* cpinfo_t is a concrete struct
 *     initialized with constructor cpinfo.
 */
typedef struct cpinfo_s {
/* public slots */
    int justify;
    size_t width;
    int prec;
    unsigned char minortype;
    unsigned char majortype;
    unsigned char sign;
    unsigned int base;
    const unsigned char *digit;
    size_t nzero;
    void (*clear)(struct cpinfo_s *);
    void (*set_justify)(struct cpinfo_s *, unsigned char);
    void (*push_width)(struct cpinfo_s *, unsigned char);
    void (*push_prec)(struct cpinfo_s *, unsigned char);
    int  (*print_va)(struct cpinfo_s *, cputch_t *, va_list);
} cpinfo_t;

static void
cpinfo_clear(cpinfo_t *self)
{
    self->justify = 0;
    self->width = 0;
    self->prec = -1; /* becomes zero or positive after specified */
    self->minortype = ' ';
    self->majortype = '%';
    self->sign = ' ';
    self->base = 10;
    self->digit = LOWER_DIGIT;
    self->nzero = 0;
}

static void
cpinfo_set_justify(cpinfo_t *self, unsigned char ch)
{
    switch (ch) {
    case '0':
        self->justify |= PADDING_ZERO;
        break;
    case ' ':
        self->justify |= PADDING_BLANK;
        break;
    case '+':
        self->justify |= ALWAYS_SIGN;
        break;
    case '-':
        self->justify |= JUSTIFY_LEFT;
        break;
    }
}

static void
cpinfo_push_width(cpinfo_t *self, unsigned char ch)
{
    if (self->width <= INT_SAFE32_DIGIT)
        self->width = self->width * 10 + ch - '0';
}

static void
cpinfo_push_prec(cpinfo_t *self, unsigned char ch)
{
    if (self->prec < 0)
        self->prec = 0;
    if (self->prec <= INT_SAFE32_DIGIT)
        self->prec = self->prec * 10 + ch - '0';
}

PRIVATE static void
cpinfo_unalias(cpinfo_t *self)
{
    switch (self->majortype) {
    case 'i':
        self->majortype = 'd';
        break;
    case 'b':
        self->base = 2;
        self->majortype = 'u';
        break;
    case 'o':
        self->base = 8;
        self->majortype = 'u';
        break;
    case 'p':
        self->base = 16;
        break;
    case 'X':
        self->digit = UPPER_DIGIT;
        /* fall through */
    case 'x':
        self->base = 16;
        self->majortype = 'u';
        break;
    }
}

PRIVATE static int
cpinfo_print_string(cpinfo_t *self, cputch_t *putch, unsigned char *s)
{
    size_t length, width;
    unsigned char pad;

    length = strlen(s);
    if (self->prec >= 0 && self->prec < length)
        length = self->prec;
    width = self->width;
    if (width < length)
        width = length;
    if (self->justify & JUSTIFY_LEFT) {
        return_EOF_iferr(putch->atmost(putch, length, s));
        if (width > length)
            return_EOF_iferr(putch->times(putch, width - length, ' '));
    }
    else {
        if (width > length)
            return_EOF_iferr(putch->times(putch, width - length, ' '));
        return_EOF_iferr(putch->atmost(putch, length, s));
    }
    return width;
}

PRIVATE static int
cpinfo_print_digit(cpinfo_t *self, cputch_t *putch, size_t length, unsigned char *s)
{
    size_t width, nzero;

    width = self->width;
    if (width < length)
        width = length;
    nzero = self->nzero;
    if (self->justify & JUSTIFY_LEFT) {
        if (self->sign != ' ')
            return_EOF_iferr(putch->call(putch, self->sign));
        return_EOF_iferr(putch->times(putch, nzero, '0'));
        return_EOF_iferr(putch->atmost(putch, length, s));
        if (width > length)
            return_EOF_iferr(putch->times(putch, width - length, ' '));
    }
    else if (self->justify & PADDING_ZERO) {
        if (self->sign != ' ')
            return_EOF_iferr(putch->call(putch, self->sign));
        if (width > length)
            nzero += width - length;
        return_EOF_iferr(putch->times(putch, nzero, '0'));
        return_EOF_iferr(putch->atmost(putch, length, s));
    }
    else {
        if (width > length)
            return_EOF_iferr(putch->times(putch, width - length, ' '));
        if (self->sign != ' ')
            return_EOF_iferr(putch->call(putch, self->sign));
        return_EOF_iferr(putch->times(putch, nzero, '0'));
        return_EOF_iferr(putch->atmost(putch, length, s));
    }
    return width;
}

PRIVATE static int
cpinfo_print_uint(cpinfo_t *self, cputch_t *putch, uintmax_t u)
{
    unsigned char str[sizeof(intmax_t) * 8 + 8];
    unsigned char *s;
    size_t length;
    
    s = str + sizeof(str) - 1;
    *s = '\0';
    length = 0;
    if (u == 0 && self->prec == 0) {
        /* becomes empty. see man page printf(3) */
        self->sign = ' ';
        self->justify = 0;
        self->nzero = 0;
    }
    else {
        do {
            *--s = self->digit[u % self->base];
            u = u / self->base;
            length++;
        } while (u != 0 && s > str);
        if (u != 0)              /* str full */
            return EOF; 
        if (self->prec <= 0)     /* default precision. see man page printf(3) */
            self->prec = 1;
        if (self->prec > length) /* number of additional zero marks for precision */
            self->nzero = self->prec - length;
        else
            self->nzero = 0;

        length += self->nzero;
        if (self->sign != ' ')
            length++;
    }
    return cpinfo_print_digit(self, putch, length, s);
}

PRIVATE static int
cpinfo_print_vs(cpinfo_t *self, cputch_t *putch, va_list ap)
{
    unsigned char *s;
    
    s = va_arg(ap, void*);
    return cpinfo_print_string(self, putch, s);
}

PRIVATE static int
cpinfo_print_vc(cpinfo_t *self, cputch_t *putch, va_list ap)
{
    unsigned char s[2];
    
    s[0] = (unsigned char) va_arg(ap, unsigned int);
    s[1] = '\0';
    return cpinfo_print_string(self, putch, s);
}

PRIVATE static int
cpinfo_print_vd(cpinfo_t *self, cputch_t *putch, va_list ap)
{
    intmax_t d;
    uintmax_t u;
    
    switch (self->minortype) {
    case 'c':
        d = (int8_t) va_arg(ap, int);
        break;
    case 'h':
        d = (int16_t) va_arg(ap, int);
        break;
    case 'l':
        d = (long int) va_arg(ap, long int);
        break;
    case 'z':
        d = (ssize_t) va_arg(ap, ssize_t);
        break;
    case 't':
        d = (ptrdiff_t) va_arg(ap, ptrdiff_t);
        break;
    case 'j': /* 'j' or 'll' */
        d = (intmax_t) va_arg(ap, intmax_t);
        break;
    default:
        d = (int) va_arg(ap, int);
        break;
    }
    if (d < 0) {
        u = -d;
        self->sign = '-';
    }
    else {
        u = d;
        if (u != 0 && self->justify & ALWAYS_SIGN)
            self->sign = '+';
    }
    return cpinfo_print_uint(self, putch, u);
}

PRIVATE static int
cpinfo_print_vu(cpinfo_t *self, cputch_t *putch, va_list ap)
{
    uintmax_t u;
    
    switch (self->minortype) {
    case 'c':
        u = (uint8_t) va_arg(ap, unsigned int);
        break;
    case 'h':
        u = (uint16_t) va_arg(ap, unsigned int);
        break;
    case 'l':
        u = (unsigned long int) va_arg(ap, unsigned long int);
        break;
    case 'z':
        u = (size_t) va_arg(ap, size_t);
        break;
    case 'j': /* 'j' or 'll' */
        u = (uintmax_t) va_arg(ap, uintmax_t);
        break;
    default:
        u = (unsigned int) va_arg(ap, unsigned int);
        break;
    }
    return cpinfo_print_uint(self, putch, u);
}

PRIVATE static int
cpinfo_print_vp(cpinfo_t *self, cputch_t *putch, va_list ap)
{
    uintmax_t u;
    
    u = (uintptr_t) va_arg(ap, void *);
    return cpinfo_print_uint(self, putch, u);
}

static int
cpinfo_print_va(cpinfo_t *self, cputch_t *putch, va_list ap)
{
    int err;
    
    if (self->majortype == '%')
        return_EOF_iferr(putch->call(putch, '%'));

    cpinfo_unalias(self);
    switch (self->majortype) {
    case 's':
        err = cpinfo_print_vs(self, putch, ap);
        break;
    case 'c':
        err = cpinfo_print_vc(self, putch, ap);
        break;
    case 'd':
        err = cpinfo_print_vd(self, putch, ap);
        break;
    case 'u':
        err = cpinfo_print_vu(self, putch, ap);
        break;
    case 'p':
        err = cpinfo_print_vp(self, putch, ap);
        break;
    default:
        err = EOF;
        break;
    }
    return err;
}

/* cpinfo constructor */
static cpinfo_t *
cpinfo(cpinfo_t *self)
{
    self->clear = cpinfo_clear;
    self->set_justify = cpinfo_set_justify;
    self->push_width = cpinfo_push_width;
    self->push_prec = cpinfo_push_prec;
    self->print_va = cpinfo_print_va;
    
    self->clear(self);

    return self;
}

int
cputch_vprintf(cputch_t *self, const unsigned char *fmt, va_list ap)
{
    /* Deterministic Finite Automaton (DFA) for the fmt */
    int state = 0;
    unsigned char ch;

    /* percent format infomation */
    cpinfo_t pinfo_instance;
    cpinfo_t *pinfo = cpinfo(&pinfo_instance);
    
    self->printed = 0; /* reset the counter to count printed characters */
    while ((ch = *fmt++) != '\0' && state >= 0) {
        if (state == 0) {
            if (ch == '%') {
                pinfo->clear(pinfo);
                state = 1;
            }
            else {
                return_EOF_iferr(self->call(self, ch));
            }
            continue;
        }
        if (state < 3 && ch == '.') {
            state = 3;
            continue;
        }
        if (state < 4 && strchr("hl", ch)) {
            pinfo->minortype = ch;
            state = 4;
            continue;
        }
        if (state < 4 && strchr("jzt", ch)) { /* "Ljzt" for float */
            pinfo->minortype = ch;
            state = 5;
            continue;
        }
        switch(state) {
        case 1:
            if (strchr(" 0+-", ch)) {
                pinfo->set_justify(pinfo, ch);
                continue;
            }
            /* fall through */            
        case 2:
            if ('0' <= ch && ch <= '9') {
                pinfo->push_width(pinfo, ch);
                state = 2;
                continue;
            }
            break;
        case 3:
            if ('0' <= ch && ch <= '9') {
                pinfo->push_prec(pinfo, ch);
                continue;
            }
            break;
        case 4:
            if (ch == 'h') {
                pinfo->minortype = 'c';
                state = 5;
                continue;
            }
            else if (ch == 'l') {
                pinfo->minortype = 'j';
                state = 5;
                continue;
            }
            break;
        }
        pinfo->majortype = ch;
        return_EOF_iferr(pinfo->print_va(pinfo, self, ap));
        state = 0;
    }
    
    return self->printed; /* success. return number of printed characters */
}

int
cputch_printf(cputch_t *self, const unsigned char *fmt, ...)
{
    va_list ap;
    int err;
    
    va_start(ap, fmt);
    err = cputch_vprintf(self, fmt, ap);
    va_end(ap);
    return err;
}

static int
cputch_atmost(cputch_t *self, size_t most, unsigned char *s)
{
    size_t i;
    unsigned char c;

    for (i = 0; i < most && (c = *s++) != '\0'; i++)
        return_EOF_iferr(self->call(self, c));

    return i;
}

static int
cputch_times(cputch_t *self, size_t n, unsigned char c)
{
    size_t i;
    
    for (i = 0; i < n; i++)
        return_EOF_iferr(self->call(self, c));

    return i;
}

cputch_t *
cputch(cputch_t *self)
{
    self->call = NULL;
    self->times = cputch_times;
    self->atmost = cputch_atmost;
    self->stream = NULL;
    self->buffer = NULL;
    self->cursor = NULL;
    self->bound = NULL;
    self->printed = 0;
    
    return self;
}

static int
cstream_putch_call(cputch_t *self, unsigned char ch)
{
    int err;
    
    if (self->printed == EOF)
        return EOF;

    if ((err = fputc(ch, self->stream)) == EOF)
        self->printed = err;
    else
        self->printed++;

    return err;
}

cputch_t *
cstream_putch(cputch_t *self, FILE *fh)
{
    if (fh == NULL)
        return NULL;

    self = cputch(self);
    self->call = cstream_putch_call;
    self->stream = fh;
    
    return self;
} 

static int
cstring_putch_call(cputch_t *self, unsigned char ch)
{
    int err;

    if (self->printed == EOF)
        return EOF;

    if (self->cursor < self->bound) {
        *self->cursor++ = ch;
        *self->cursor = '\0';
        self->printed++;
        err = ch;
    }
    else {
        self->printed = EOF;
        err = EOF;
    }
    
    return err;
}

cputch_t *
cstring_putch(cputch_t *self, size_t n, unsigned char *s)
{
    if (s == NULL || n < 2)
        return NULL;

    self = cputch(self);
    self->call = cstring_putch_call;
    self->buffer = s;
    self->cursor = s;
    self->bound = s + n - 1;

    return self;
}

int main(int argc, char *argv[])
{
    cputch_t putch_instance;
    cputch_t *putch;

#ifdef USE_STRING_CPUTCH
    unsigned char got[4096];
    putch = cstring_putch(&putch_instance, sizeof(got), got);
#else
    putch = cstream_putch(&putch_instance, stdout);
#endif

    cputch_printf(putch, ":%s:\n", "hello, world");
    cputch_printf(putch, ":%10s:\n", "hello, world");
    cputch_printf(putch, ":%-10s:\n", "hello, world");
    cputch_printf(putch, ":%20s:\n", "hello, world");
    cputch_printf(putch, ":%-20s:\n", "hello, world");
    cputch_printf(putch, ":%020.10s:\n", "hello, world");
    cputch_printf(putch, ":%-20.10s:\n", "hello, world");
    cputch_printf(putch, ":%.10s:\n", "hello, world");
    cputch_printf(putch, ":%20.0s:\n", "hello, world");
    cputch_printf(putch, ":%c:\n", 'a');
    cputch_printf(putch, ":%10c:\n", 'a');
    cputch_printf(putch, ":%-10c:\n", 'a');
    cputch_printf(putch, ":%020.10c:\n", 'a');
    cputch_printf(putch, ":%-20.10c:\n", 'a');
    cputch_printf(putch, ":%.10c:\n", 'a');
    cputch_printf(putch, ":%20.0c:\n", 'a');
    cputch_printf(putch, ":%d:\n", -1234);
    cputch_printf(putch, ":%8d:\n", -1234);
    cputch_printf(putch, ":%08d:\n", -1234);
    cputch_printf(putch, ":%-8d:\n", -1234);
    cputch_printf(putch, ":%-08d:\n", -1234);
    cputch_printf(putch, ":%d:\n", 1234);
    cputch_printf(putch, ":%8d:\n", 1234);
    cputch_printf(putch, ":%08d:\n", 1234);
    cputch_printf(putch, ":%-8d:\n", 1234);
    cputch_printf(putch, ":%-08d:\n", 1234);
    cputch_printf(putch, ":%+8d:\n", 1234);
    cputch_printf(putch, ":%+08d:\n", 1234);
    cputch_printf(putch, ":%-+8d:\n", 1234);
    cputch_printf(putch, ":%-+08d:\n", 1234);
    cputch_printf(putch, ":%2d:\n", 12345);
    cputch_printf(putch, ":%+8.4d:\n", 900);
    cputch_printf(putch, ":%d:\n", 0x7fffffff);
    cputch_printf(putch, ":%d:\n", 0x80000000);
    cputch_printf(putch, ":%d:\n", 0xffffffff);
    cputch_printf(putch, ":%u:\n", 0x7fffffff);
    cputch_printf(putch, ":%u:\n", 0x80000000);
    cputch_printf(putch, ":%u:\n", 0xffffffff);
    cputch_printf(putch, ":%x:\n", 0xfedcba98);
    cputch_printf(putch, ":%X:\n", 0xfedcba98);
    cputch_printf(putch, ":%b:\n", 0xfedcba98);
    cputch_printf(putch, ":%hhd:\n", 0x12ff);
    cputch_printf(putch, ":%hhu:\n", 0x12ff);
    cputch_printf(putch, ":%08p:\n", main);
    cputch_printf(putch, ":%lld:\n", 0x7fffffffffffffffLL);
    cputch_printf(putch, ":%lld:\n", 0x8fffffffffffffffLL);
    cputch_printf(putch, ":%llu:\n", 0x8fffffffffffffffULL);
    cputch_printf(putch, ":%llx:\n", 0x8fffffffffffffffULL);

#ifdef USE_STRING_CPUTCH
    fputs(got, stdout);
#endif

    return EXIT_SUCCESS;
}