#!/usr/bin/env perl
{
package Lasdec;
use utf8;
use strict;
use warnings;
use URI;
use Web::Scraper;
use XXX;
my $TOP = "http://www.lasdec.nippon-net.ne.jp/cms/1,0,14.html";
sub prefectures {
my ($class) = @_;
# http://www.lasdec.nippon-net.ne.jp/cms/1,0,14.html
#
my $link = scraper {
process 'area',
title => '@title',
url => '@href';
};
my $links = scraper {
process "area",
"prefectures[]" => $link;
result 'prefectures';
};
my $res = $links->scrape( URI->new($TOP) );
}
sub municipalities {
my ($class, $prefecture_link) = @_;
# http://www.lasdec.nippon-net.ne.jp/cms/1,22,14,179.html
#
#
#
#
# 団体コード |
# 団体名 |
# ふりがな小書 |
# 新郵便番号 |
# 住所 |
# 電話番号 |
#
my $row = sub {
my $e = shift;
my $ref = eval {
{
code => $e->address(".0")->as_text,
name => $e->address(".1")->as_text,
kana => $e->address(".2")->as_text,
zipcode => $e->address(".3")->as_text,
address => $e->address(".4")->as_text,
tel => $e->address(".5")->as_text,
};
};
#YYY $ref;
return () if eval { $ref->{code} eq "団体コード" }; # skip header.
return $ref;
};
my $rows = scraper {
process 'div.contentBody table tr',
'rows[]' => $row;
result 'rows';
};
my $res = $rows->scrape( $prefecture_link->{url} );
}
}
package main;
use utf8;
use strict;
use warnings;
use Data::Dumper;
use Getopt::Long;
use IO::All;
use Text::CSV::Encoded;
use XXX;
sub usage {
print <file($fn);
my $csv = Text::CSV::Encoded->new ({
encoding_in => "utf8", # the encoding comes into Perl
encoding_out => "cp932", # the encoding comes out of Perl
});
my @columns = qw(code name kana zipcode address tel);
my @header_columns = qw(団体コード 団体名 ふりがな小書 新郵便番号 住所 電話番号);
my $prefectures_ref = Lasdec->prefectures;
$csv->combine( @header_columns );
$out->print( $csv->string, "\n" );
my $i = 0;
for (@$prefectures_ref) {
print_progress: {
my $title = $_->{title};
my $cur = sprintf('%02i', ++$i);
my $left = @$prefectures_ref;
print "($cur/$left) processing $title.\n";
}
#YYY [Lasdec->municipalities( $_ )];
#Lasdec->municipalities( $_ );
my @rows = @{ Lasdec->municipalities( $_ ) };
for (@rows) {
$csv->combine( @$_{@columns} );
$out->print( $csv->string, "\n" );
}
}
print "worte $fn. succeeded.\n";
}
############################################################################
binmode(STDOUT, ":utf8");
$|=1;
my $opt_help;
my $opt_out_fn;
GetOptions(
"help" => \$opt_help, # flag
"out=s" => \$opt_out_fn, # string
);
if ($opt_help or !$opt_out_fn) {
usage();
}
else {
write_csv($opt_out_fn);
}