書式付きアウトプット関数 bprintf

全世界的冗談日連動記事として :-p

[結] 2009年4月 - 結城浩の日記
「まなびや」ではアウトプットを重視します。 せっかく時間を掛けて学ぶのですから、 学んだ結果を「形」に残すところまでやりましょう。

本ダイアリーでは「アウトプット」の中でも、とりわけ下層レベルの書式付きアウトプットを重視し、printf(3) 関数の仕組みを時間をかけて学び、学んだ結果をソースコードという「形」でここに残します。bfprintf は文字列、文字、整数にのみ対応している snprintf(3) 関数のサブセットを「車輪の再発明」したものです。

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>

#define INT_SAFE32_DIGIT (INT32_MAX / 10 - 1)

typedef unsigned char uchar_t;
typedef unsigned int uint_t;
typedef unsigned long int uintlong_t;
typedef long int intlong_t;

enum {
    PADDING_BLANK = 1,
    PADDING_ZERO  = 2,
    JUSTIFY_LEFT  = 4,
    ALWAYS_SIGN   = 8
};

static const char const LOWER_DIGIT[] = "0123456789abcdef";
static const char const UPPER_DIGIT[] = "0123456789ABCDEF";

typedef struct template_s {
    int justify;
    size_t width;
    size_t prec;
    uchar_t modifier;
    uchar_t fieldtype;
    uchar_t sign;
    unsigned int base;
    const uchar_t *digit;
} template_t;

typedef struct builder_s {
    uchar_t *buffer;
    size_t length, pos;
} builder_t;

static void
template_clear(template_t *tmpl)
{
    tmpl->justify = 0;
    tmpl->width = 0;
    tmpl->prec = 0;
    tmpl->modifier = ' ';
    tmpl->fieldtype = '%';
    tmpl->sign = ' ';
    tmpl->base = 10;
    tmpl->digit = LOWER_DIGIT;
}

static inline int
isjustify(template_t *tmpl, int mask)
{
    return tmpl->justify & mask;
}

static void
template_set_justify(template_t *tmpl, uchar_t ch)
{
    switch (ch) {
    case '0':
        tmpl->justify |= PADDING_ZERO;
        break;
    case ' ':
        tmpl->justify |= PADDING_BLANK;
        break;
    case '+':
        tmpl->justify |= ALWAYS_SIGN;
        break;
    case '-':
        tmpl->justify |= JUSTIFY_LEFT;
        break;
    }
}

static inline void
template_push_width(template_t *tmpl, uchar_t ch)
{
    if (tmpl->width <= INT_SAFE32_DIGIT)
        tmpl->width = tmpl->width * 10 + ch - '0';
}

static inline void
template_push_prec(template_t *tmpl, uchar_t ch)
{
    if (tmpl->prec <= INT_SAFE32_DIGIT)
        tmpl->prec = tmpl->prec * 10 + ch - '0';
}

static void
template_unalias(template_t *tmpl)
{
    switch (tmpl->fieldtype) {
    case 'i':
        tmpl->fieldtype = 'd';
        break;
    case 'b':
        tmpl->base = 2;
        tmpl->fieldtype = 'u';
        break;
    case 'o':
        tmpl->base = 8;
        tmpl->fieldtype = 'u';
        break;
    case 'p':
        tmpl->base = 16;
        break;
    case 'x':
        tmpl->base = 16;
        tmpl->fieldtype = 'u';
        break;
    case 'X':
        tmpl->base = 16;
        tmpl->fieldtype = 'u';
        tmpl->digit = UPPER_DIGIT;
        break;
    }
}

static inline void
bputc(uchar_t ch, builder_t *bh)
{
    if (bh->pos + 1 < bh->length) {
        bh->buffer[bh->pos++] = ch;
        bh->buffer[bh->pos] = '\0';
    }
}

static void
bnputs(uchar_t *s, size_t length, builder_t *bh)
{
    size_t i;
    
    for (i = 0; i < length; i++)
        bputc(*s++, bh);
}

static void
bnputc(uchar_t c, size_t length, builder_t *bh)
{
    size_t i;
    
    for (i = 0; i < length; i++)
        bputc(c, bh);
}

static void
bnputx(uchar_t c, size_t width, size_t length, builder_t *bh)
{
    if (width > length)
        bnputc(c, width - length, bh);
}

static void
bprints(builder_t *bh, template_t *tmpl, uchar_t *s)
{
    size_t length, width;
    uchar_t pad;
    
    length = strlen(s);
    if (tmpl->prec > 0 && tmpl->prec < length)
        length = tmpl->prec;
    width = tmpl->width;
    if (width < length)
        width = length;
    if (isjustify(tmpl, JUSTIFY_LEFT)) {
        bnputs(s, length, bh);
        bnputx(' ', width, length, bh);
    }
    else {
        pad = ' ';
        if (isjustify(tmpl, PADDING_ZERO)) {
            if (tmpl->sign != ' ') {
                bputc(*s++, bh);
                width--;
                length--;
            }
            pad = '0';
        }
        bnputx(pad, width, length, bh);
        bnputs(s, length, bh);
    }
}

static void
bprintu(builder_t *bh, template_t *tmpl, uintmax_t u)
{
    uchar_t *s;
    uchar_t str[130];
    
    s = str + sizeof(str) - 1;
    *s = '\0';
    do {
        *--s = tmpl->digit[u % tmpl->base];
        u = u / tmpl->base;
    } while (u != 0 && s > str);
    if (tmpl->sign != ' ')
        *--s = tmpl->sign;
    tmpl->prec = 0;
    bprints(bh, tmpl, s);
}

static void
vbprintf_s_part(builder_t *bh, template_t *tmpl, va_list ap)
{
    uchar_t *s;
    
    s = va_arg(ap, void*);
    tmpl->justify &= JUSTIFY_LEFT;
    bprints(bh, tmpl, s);
}

static void
vbprintf_c_part(builder_t *bh, template_t *tmpl, va_list ap)
{
    uchar_t s[2];
    
    s[0] = (uchar_t) va_arg(ap, uint_t);
    s[1] = '\0';
    tmpl->justify &= JUSTIFY_LEFT;
    bprints(bh, tmpl, s);
}

static void
vbprintf_d_part(builder_t *bh, template_t *tmpl, va_list ap)
{
    intmax_t d;
    uintmax_t u;
    
    switch (tmpl->modifier) {
    case 'c':
        d = (int8_t) va_arg(ap, int);
        break;
    case 'h':
        d = (int16_t) va_arg(ap, int);
        break;
    case 'l':
        d = (intlong_t) va_arg(ap, intlong_t);
        break;
    case 'q':
        d = (intmax_t) va_arg(ap, intmax_t);
        break;
    default:
        d = (int) va_arg(ap, int);
        break;
    }
    if (d < 0) {
        u = -d;
        tmpl->sign = '-';
    }
    else {
        u = d;
        if (isjustify(tmpl, ALWAYS_SIGN))
            tmpl->sign = '+';
    }
    bprintu(bh, tmpl, u);
}

static void
vbprintf_u_part(builder_t *bh, template_t *tmpl, va_list ap)
{
    uintmax_t u;
    
    switch (tmpl->modifier) {
    case 'c':
        u = (uint8_t) va_arg(ap, uint_t);
        break;
    case 'h':
        u = (uint16_t) va_arg(ap, uint_t);
        break;
    case 'l':
        u = (uintlong_t) va_arg(ap, uintlong_t);
        break;
    case 'q':
        u = (uintmax_t) va_arg(ap, uintmax_t);
        break;
    default:
        u = (uint_t) va_arg(ap, uint_t);
        break;
    }
    bprintu(bh, tmpl, u);
}

static void
vbprintf_p_part(builder_t *bh, template_t *tmpl, va_list ap)
{
    uintmax_t u;
    
    u = (uintptr_t) va_arg(ap, void *);
    bprintu(bh, tmpl, u);
}

static void
vbprintf_part(builder_t *bh, template_t *tmpl, va_list ap)
{
    if (tmpl->fieldtype == '%') {
        bputc('%', bh);
        return;
    }
    template_unalias(tmpl);
    switch (tmpl->fieldtype) {
    case 's':
        vbprintf_s_part(bh, tmpl, ap);
        break;
    case 'c':
        vbprintf_c_part(bh, tmpl, ap);
        break;
    case 'd':
        vbprintf_d_part(bh, tmpl, ap);
        break;
    case 'u':
        vbprintf_u_part(bh, tmpl, ap);
        break;
    case 'p':
        vbprintf_p_part(bh, tmpl, ap);
        break;
    }
}

builder_t *
builder(builder_t *bh, size_t length, uchar_t *buffer)
{
    bh->buffer = buffer;
    bh->pos = 0;
    bh->length = length;
    
    return bh;
}

void
brewind(builder_t *bh)
{
    bh->pos = 0;
}

void
vbprintf(builder_t *bh, const uchar_t *fmt, va_list ap)
{
    int state = 0;
    uchar_t ch;
    template_t tmpl_instance;
    template_t *tmpl = &tmpl_instance;

    while (*fmt != '\0' && state >= 0) {
        ch = *fmt++;
        if (state == 0) {
            if (ch == '%') {
                template_clear(tmpl);
                state = 1;
            }
            else {
                bputc(ch, bh);
            }
            continue;
        }
        if (state < 3 && ch == '.') {
            state = 3;
            continue;
        }
        if (state < 4 && strchr("hl", ch)) {
            tmpl->modifier = ch;
            state = 4;
            continue;
        }
        if (state < 4 && strchr("Ljzt", ch)) {
            tmpl->modifier = ch;
            state = 5;
            continue;
        }
        switch(state) {
        case 1:
            if (strchr(" 0+-", ch)) {
                template_set_justify(tmpl, ch);
                continue;
            }
            /* fall through */            
        case 2:
            if (isdigit(ch)) {
                template_push_width(tmpl, ch);
                state = 2;
                continue;
            }
            break;
        case 3:
            if (isdigit(ch)) {
                template_push_prec(tmpl, ch);
                continue;
            }
            break;
        case 4:
            if (ch == 'h') {
                tmpl->modifier = 'c';
                state = 5;
                continue;
            }
            else if (ch == 'l') {
                tmpl->modifier = 'q';
                state = 5;
                continue;
            }
            break;
        }
        tmpl->fieldtype = ch;
        vbprintf_part(bh, tmpl, ap);
        state = 0;
    }
}

void
bprintf(builder_t *bh, const uchar_t *fmt, ...)
{
    va_list ap;
    
    va_start(ap, fmt);
    vbprintf(bh, fmt, ap);
    va_end(ap);
}

int main(int argc, char *argv[])
{
    uchar_t got[4096];
    builder_t builder_instance;
    builder_t *bh = builder(&builder_instance, sizeof(got), got);
    
    bprintf(bh, ":%s:\n", "hello, world");
    bprintf(bh, ":%10s:\n", "hello, world");
    bprintf(bh, ":%-10s:\n", "hello, world");
    bprintf(bh, ":%20s:\n", "hello, world");
    bprintf(bh, ":%-20s:\n", "hello, world");
    bprintf(bh, ":%020.10s:\n", "hello, world");
    bprintf(bh, ":%-20.10s:\n", "hello, world");
    bprintf(bh, ":%.10s:\n", "hello, world");
    bprintf(bh, ":%c:\n", 'a');
    bprintf(bh, ":%10c:\n", 'a');
    bprintf(bh, ":%-10c:\n", 'a');
    bprintf(bh, ":%020.10c:\n", 'a');
    bprintf(bh, ":%-20.10c:\n", 'a');
    bprintf(bh, ":%.10c:\n", 'a');
    bprintf(bh, ":%d:\n", -1234);
    bprintf(bh, ":%8d:\n", -1234);
    bprintf(bh, ":%08d:\n", -1234);
    bprintf(bh, ":%-8d:\n", -1234);
    bprintf(bh, ":%-08d:\n", -1234);
    bprintf(bh, ":%d:\n", 1234);
    bprintf(bh, ":%8d:\n", 1234);
    bprintf(bh, ":%08d:\n", 1234);
    bprintf(bh, ":%-8d:\n", 1234);
    bprintf(bh, ":%-08d:\n", 1234);
    bprintf(bh, ":%+8d:\n", 1234);
    bprintf(bh, ":%+08d:\n", 1234);
    bprintf(bh, ":%-+8d:\n", 1234);
    bprintf(bh, ":%-+08d:\n", 1234);
    bprintf(bh, ":%2d:\n", 12345);
    bprintf(bh, ":%d:\n", 0x7fffffff);
    bprintf(bh, ":%d:\n", 0x80000000);
    bprintf(bh, ":%d:\n", 0xffffffff);
    bprintf(bh, ":%u:\n", 0x7fffffff);
    bprintf(bh, ":%u:\n", 0x80000000);
    bprintf(bh, ":%u:\n", 0xffffffff);
    bprintf(bh, ":%x:\n", 0xfedcba98);
    bprintf(bh, ":%X:\n", 0xfedcba98);
    bprintf(bh, ":%b:\n", 0xfedcba98);
    bprintf(bh, ":%hhd:\n", 0x12ff);
    bprintf(bh, ":%hhu:\n", 0x12ff);
    bprintf(bh, ":%08p:\n", main);
    bprintf(bh, ":%lld:\n", 0x7fffffffffffffffLL);
    bprintf(bh, ":%lld:\n", 0x8fffffffffffffffLL);
    bprintf(bh, ":%llu:\n", 0x8fffffffffffffffLLU);
    bprintf(bh, ":%llx:\n", 0x8fffffffffffffffLLU);
    
    fputs(bh->buffer, stdout);
    
    return EXIT_SUCCESS;
}