PostScript風なテンプレート・プロセッサの続き(その4)

その3まででは、空白を含むリテラルを属性値に使いたいとき、オペランドをブロックにして、ブロックに HTML リテラルを記述しないといけませんでした。

{ type= /submit value= { %] 投稿 [% } } %input/

もっと手短に記述できるようにと、今回、すべてのリテラル、つまり、HTML リテラル、スラッシュ付きリテラル、数値は同じように扱えるようにしました。言い換えると、オペレータ中の HTML リテラルは、直感的には PostScript の丸括弧で囲んだ文字列と同じように扱えるようになったということです。

{ type= /submit value= %] 投稿 [% } %input/

PostScript の丸括弧文字列とは括弧の向きが逆になっているのが、気持ち悪く感じますが、そこは妥協ということで目をつぶることにします。

以上で、やりたかったことを実装できたので、ついでにコードを整理することにします。まず、インタープリタで、リテラル・要素生成・属性生成オペレータかどうかをその都度判定するのは効率が悪いので、コンパイラで種別判定するようにします。また、特殊オペレータが増えて if 分岐が間延びしてきたので、それぞれ手続きに独立させ、ハッシュ・テーブルで分岐するようにします。

A Template Processor for HTML with Postfix Notation ― Gist

package Text::PostTemplate;
use strict;
use warnings;
use Carp;
use Encode;
use Scalar::Util qw(looks_like_number);

our $VERSION = '0.004';

my %XML_SPECIAL = (
    q(&) => q(&amp;), q(<) => q(&lt;), q(>) => q(&gt;),
    q(") => q(&quot;), q(') => q(&#39;), q(\\) => q(&#92;),
);

my $ALPHA = q(A-Za-z);
my $DIGIT = q(0-9);
my $ALNUM = q(A-Za-z0-9);

my %EMPTY_ELEMENT = map { $_ => 1 } qw(
    meta img link br hr input area param col base
);

my %ATTRIBUTE_TYPE = (
    (map { $_ => 'uri' } qw(href src action)),
    (map { $_ => 'htmlall' } qw(value)),
    (map { $_ => 'bool' } qw(
        compact nowrap ismap declare noshade checked disabled readonly
        multiple selected noresize defer
    )),
);

my %SPECIAL_OPERATOR = (
    'index' => \&_op_index,
    'var' => \&_op_var,
    'def' => \&_op_def,
    'put' => \&_op_put,
    'if' => \&_op_if,
    'ifelse' => \&_op_ifelse,
    'for' => \&_op_for,
    'with' => \&_op_forall,
    'forall' => \&_op_forall,
    'br' => \&_op_br,
    '% ELEM' => \&_op_element,
    '% ATTR' => \&_op_attribute,
);

my %BINARY_OPERATOR = (
    'exch' => sub { ($_[1], $_[0]) },
    'add'  => sub { $_[0] + $_[1] },
    'sub'  => sub { $_[0] - $_[1] },
    'mul'  => sub { $_[0] * $_[1] },
    'div'  => sub { $_[0] / $_[1] },
    'mod'  => sub { $_[0] % $_[1] },
    'eq' => sub { _compare(@_) == 0 ? 1 : 0 },
    'ne' => sub { _compare(@_) != 0 ? 1 : 0 },
    'lt' => sub { _compare(@_) <  0 ? 1 : 0 },
    'le' => sub { _compare(@_) <= 0 ? 1 : 0 },
    'gt' => sub { _compare(@_) >  0 ? 1 : 0 },
    'ge' => sub { _compare(@_) >= 0 ? 1 : 0 },
    'get!'        => sub { _fetch(@_, \&_escape_raw) },
    'raw_get!'    => sub { _fetch(@_, \&_escape_raw) },
    'get'         => sub { _fetch(@_, \&_escape_html) },
    'html_get'    => sub { _fetch(@_, \&_escape_html) },
    'htmlall_get' => sub { _fetch(@_, \&_escape_htmlall) },
    'uri_get'     => sub { _fetch(@_, \&_escape_uri) },
    'uriall_get'  => sub { _fetch(@_, \&_escape_uriall) },
);

my %UNARY_OPERATOR = (
    'not' => sub { ! $_[0] ? 1 : 0 },
    'neg' => sub { -$_[0] },
    'dup' => sub { ($_[0], $_[0]) },
    'pop' => sub { () },
    'length' => sub { length $_[0] },
    'even' => sub { $_[0] % 2 == 0 ? 1 : 0 },
    'odd'  => sub { $_[0] % 2 != 0 ? 1 : 0 },
);

sub compile {
    my($class, $source) = @_;
    my @block = ([]);
    while ($source =~ m{\G(.*?)\[\%\s*(.*?)\s*\%\]\n?}gcmsx) {
        my($lit, $operators) = ($1, $2);
        if ($lit ne q()) {
            push @{$block[-1]}, '% LIT', $lit;
        }
        for my $op (split /\s+/msx, $operators) {
            if ($op eq '{') {
                push @block, [];
                next;
            }
            if ($op eq '}') {
                my $a = pop @block;
                push @{$block[-1]}, $a;
                next;
            }
            if (looks_like_number($op)) {
                push @{$block[-1]}, '% LIT', $op;
                next;
            }
            if (length $op > 1) {
                if (q(/) eq (substr $op, 0, 1)) {
                    push @{$block[-1]}, '% LIT', (substr $op, 1);
                    next;
                }
                if ($op =~ m{\A%([$ALPHA][$ALNUM:_-]*)(/|!?=)?\z}mosx) {
                    my($tag, $mode) = ($1, $2 || q());
                    if ($mode ne q(/) && $EMPTY_ELEMENT{$tag}) {
                        $mode = q(/);
                    }
                    push @{$block[-1]}, '% ELEM', $tag, $mode;
                    next;
                }
                if (q(=) eq (substr $op, -1)
                    && $op =~ m{\A([$ALPHA][$ALNUM:_-]*)=\z}mosx
                ) {
                    push @{$block[-1]}, '% ATTR', $1;
                    next;
                }
            }
            push @{$block[-1]}, $op;
        }
    }
    if (@block != 1) {
        croak 'Template Error';
    }
    if (pos $source) {
        $source = substr $source, pos $source;
    }
    if ($source ne q()) {
        push @{$block[-1]}, '% LIT', $source;
    }
    return $block[0];    
}

sub apply {
    my($class, $template, $param) = @_;
    my $result = _apply_block([q()], $template, $param);
    return join q(), @{$result};
}

sub _apply_block {
    my($data, $block, $param) = @_;
    my $pc = 0;
    while ($pc <= $#{$block}) {
        my $op = $block->[$pc++];
        next if ref $op || $op eq 'show';
        if ($op eq '% LIT') {
            push @{$data}, $block->[$pc++];
            next;
        }
        if (exists $SPECIAL_OPERATOR{$op}) {
            $SPECIAL_OPERATOR{$op}->($data, $block, $param, \$pc);
            next;
        }
        if (exists $BINARY_OPERATOR{$op}) {
            my($a, $b) = splice @{$data}, -2;
            push @{$data}, $BINARY_OPERATOR{$op}->($a, $b);
            next;
        }
        if (exists $UNARY_OPERATOR{$op}) {
            my $a = pop @{$data};
            push @{$data}, $UNARY_OPERATOR{$op}->($a);
            next;
        }
        push @{$data}, _fetch($param, $op, \&_escape_html);
    }
    return $data;
}

sub _op_index {
    my($data, $block, $param, $refpc) = @_;
    my $i = pop @{$data};
    my $r = $data->[-1 - $i];
    push @{$data}, $r;
    return;
}

sub _op_var {
    my($data, $block, $param, $refpc) = @_;
    push @{$data}, $param;
    return;
}

sub _op_def {
    my($data, $block, $param, $refpc) = @_;
    my($key, $value) = splice @{$data}, -2;
    _store($param, $key, $value);
    return;
}

sub _op_put {
    my($data, $block, $param, $refpc) = @_;
    my($value, $obj, $key) = splice @{$data}, -3;
    _store($obj, $key, $value);
    return;
}

sub _op_if {
    my($data, $block, $param, $refpc) = @_;
    my $cond = pop @{$data};
    if ($cond) {
        _apply_block($data, $block->[${$refpc} - 2], $param);
    }
    return;
}

sub _op_ifelse {
    my($data, $block, $param, $refpc) = @_;
    my $cond = pop @{$data};
    if ($cond) {
        _apply_block($data, $block->[${$refpc} - 3], $param);
    }
    else {
        _apply_block($data, $block->[${$refpc} - 2], $param);
    }
    return;
}

sub _op_for {
    my($data, $block, $param, $refpc) = @_;
    my($i, $d, $limit) = splice @{$data}, -3;
    while (($d > 0 && $i <= $limit) || ($d < 0 && $i >= $limit)) {
        push @{$data}, $i;
        _apply_block($data, $block->[${$refpc} - 2], $param);
        $i += $d;
    }
    return;
}

sub _op_forall {
    my($data, $block, $param, $refpc) = @_;
    my $list = pop @{$data};
    if (ref $list ne 'ARRAY') {
        $list = [$list];
    }
    for my $i (0 .. $#{$list}) {
        my $item = $list->[$i];
        _apply_block($data, $block->[${$refpc} - 2], {
            %{$param},
            'i' => $i,
            ref $item eq 'HASH' ? %{$item} : ('item' => $item),
        });
    }
    return;
}

sub _op_br {
    my($data, $block, $param, $refpc) = @_;
    push @{$data}, "\n";
    return;
}

sub _op_element {
    my($data, $block, $param, $refpc) = @_;
    my $attr = [];
    if (${$refpc} >= 2 && ref $block->[${$refpc} - 2]) {
        $attr = _apply_block([], $block->[${$refpc} - 2], $param);
    }
    my $attrstr = join q(), @{$attr};
    my $tag = $block->[${$refpc}++];
    my $mode = $block->[${$refpc}++];
    if ($mode eq q(/)) {
        my $br = $tag eq 'br' || $tag eq 'hr' ? "\n" : q();
        push @{$data}, "<$tag$attrstr />$br";
        return;
    }
    push @{$data}, qq(<$tag$attrstr>);
    if (ref $block->[${$refpc}] && ! $mode) {
        _apply_block($data, $block->[${$refpc}++], $param);
    }
    else {
        $mode ||= q(=);
        my $value = _opland($block, $param, $refpc);
        push @{$data},
              $mode eq q(!=) ? _escape_raw($value)
            : $tag eq 'textarea' ? _escape_htmlall($value)
            : _escape_html($value);
    }
    push @{$data}, qq(</$tag>);
    return;
}

sub _op_attribute {
    my($data, $block, $param, $refpc) = @_;
    my $attribute = $block->[${$refpc}++];
    my $value = _opland($block, $param, $refpc);
    my $type = $ATTRIBUTE_TYPE{$attribute} || 'html';
    if ($type eq 'uri') {
        push @{$data}, qq( $attribute=") . _escape_uri($value) . q("); 
    }
    elsif ($type eq 'htmlall') {
        push @{$data}, qq( $attribute=") . _escape_htmlall($value) . q(");
    }
    elsif ($type eq 'bool') {
        if ($value) {
            push @{$data}, qq( $attribute="$attribute");
        }
    }
    else {
        push @{$data}, qq( $attribute=") . _escape_html($value) . q(");
    }
    return;
}

sub _opland {
    my($block, $param, $refpc) = @_;
    my $opland = $block->[${$refpc}++];
    if (ref $opland) {
        my $a = _apply_block([], $opland, $param);
        return join q(), @{$a};
    }
    elsif ($opland eq '% LIT') {
        return $block->[${$refpc}++];
    }
    return $param->{$opland};
}

sub _compare {
    my($a, $b) = @_;
    if (looks_like_number($a) && looks_like_number($b)) {
        return $a <=> $b;
    }
    else {
        return $a cmp $b;
    }
}

sub _fetch {
    my($obj, $key, $filter) = @_;
    $filter ||= \&_escape_html;
    my $value = ref $obj eq 'HASH' ? $obj->{$key}
        : ref $obj eq 'ARRAY' ? $obj->[$key]
        : eval{ $obj->can($key) } ? $obj->$key
        : q();
    return q() if ! defined $value;
    return $value if ref $value || looks_like_number($value);
    return $filter->($value);
}

sub _store {
    my($obj, $key, $value) = @_;
    if (ref $obj eq 'HASH') {
        $obj->{$key} = $value;
    }
    elsif (ref $obj eq 'ARRAY') {
        $obj->[$key] = $value;
    }
    elsif (eval{ $obj->can($key) }) {
        $obj->$key($value);
    }
    return;
}

sub _escape_htmlall {
    my($t) = @_;
    $t = defined $t ? $t : q();
    $t =~ s{([&<>"'\\])}{ $XML_SPECIAL{$1} }egmosx;
    return $t;
}

sub _escape_html {
    my($t) = @_;
    $t =~ s{
        (?:([<>"'\\])
        |\&(?:([_$ALPHA][_$ALNUM]*|\#(?:[$DIGIT]{1,5}|x[[:xdigit:]]{2,4}));)?
        )
    }{
        $1 ? $XML_SPECIAL{$1} : $2 ? qq(\&$2;) : q(&amp;)
    }egmosx;
    return $t;
}

sub _escape_uriall {
    my($t) = @_;
    if (utf8::is_utf8($t)) {
        $t = Encode::encode('UTF-8', $t);
    }
    $t =~ s{([^$ALNUM\-_~/.,;:])}{ sprintf '%%%02X', ord $1 }egmosx;
    return $t;
}

sub _escape_uri {
    my($t) = @_;
    if (utf8::is_utf8($t)) {
        $t = Encode::encode('UTF-8', $t);
    }
    $t =~ s{
        (%([[:xdigit:]]{2})?)|(&(?:amp;)?)|([^$ALNUM\-_~*+=/.!,;:\@?\#])
    }{
        $2 ? $1 : $1 ? q(%25) : $3 ? q(&amp;) : sprintf '%%%02X', ord $4
    }egmosx;
    return $t;
}

sub _escape_raw { return $_[0] }

1;

__END__

=pod

=head1 NAME

Text::PostTemplate - One of template processors

=head1 VERSION

0.004

=head1 SYNOPSYS

    use Encode;
    use Text::PostTemplate;

    my $template = Text::PostTemplate->compile(<<'EOS');
    <!DOCTYPE html>
    <html>
    <head><title>Example</title></head>
    <body>
    [%
    { id= /preview } %div!= entry.body br

    { action= entry.postlink method= /post } %form { br
        { name= /token type= /hidden value= session.token } %input/ br
        { name= /body } %textarea= entry.body br
        %]<input type="submit" value=" POST " />[% br
    } br
    %]
    </body>
    </html>
    EOS

    my $output = Text::PostTemplate->apply($template, {
        'entry.postlink' => 'post.cgi',
        'session.token' => 'ateEdC39ag2kng9',
        'entry.body' => '<p>Hello,&nbsp;world!</p>',
    });
    print encode('UTF-8', $output);

=head1 DESCRIPTION

=head1 METHODS

=over

=item1 C<compile>

=item1 C<apply>

=back

=head1 DEPENDENCIES

L<Scalar::Util>
L<Encode>

=head1 AUTHOR

MIZUTANI Tociyuki  C<< <tociyuki@gmail.com> >>

=head1 LICENSE AND COPYRIGHT

Copyright (c) 2012, MIZUTANI Tociyuki C<< <tociyuki@gmail.com> >>.
All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut