Perl, デコードされた文字列の文字幅の取得

| コメント(0)

Text::CharWidthはStrawberryPerlではmakeが通らなかった。

$ perl benchmark/visual_width.pl
                     Rate unicode_japanese encode_cp932 east_asian_width visual_width
unicode_japanese   9246/s               --         -63%             -75%         -92%
encode_cp932      24900/s             169%           --             -34%         -78%
east_asian_width  37693/s             308%          51%               --         -66%
visual_width     112233/s            1114%         351%             198%           --

ソース 参考にさせていただきました

use strict;
use warnings;
use utf8;

use Benchmark qw(:all);
use Encode;
use Unicode::EastAsianWidth;
use Text::VisualWidth::UTF8;
use Unicode::Japanese qw(unijp);

sub east_asian_width {
    local $_ = shift;
    my $ret = 0;
    while (/(?:(\p{InFullwidth}+)|(\p{InHalfwidth}+))/go) { $ret += ($1 ? length($1)*2 : length($2)) }
    $ret;
}

sub visual_width {
    local $_ = shift;
    Text::VisualWidth::UTF8::width($_);
}

sub unicode_japanese {
    local $_ = shift;
    unijp($_)->strlen;
}

sub encode_cp932 {
    local $_ = shift;
    length(encode("cp932", $_));
}

my @words = (
    ["hogehoge"   , 8]  , 
    ["あいうえ"   , 8]  , 
    ["あ1い2うえ" , 10] , 
    ["0"          , 1]  , 
);

my $count = 100000;
cmpthese($count, {
    'east_asian_width' => sub { map { east_asian_width($_->[0]) } @words },
    'visual_width'     => sub { map { visual_width($_->[0])     } @words },
    'unicode_japanese' => sub { map { unicode_japanese($_->[0]) } @words },
    'encode_cp932'     => sub { map { encode_cp932($_->[0])     } @words },
});

#my @filters = qw/east_asian_width visual_width unicode_japanese encode_cp932/;
#
#use Test::More;
#plan tests => @words*@filters;
#
#for my $filter (@filters) {
#    for my $test (@words) {
#        my $string = $test->[0];
#        my $length = $test->[1];
#        no strict 'refs';
#        is( *{$filter}->($string), $length, "$filter" );
#    }
#}

コメントする