./convert.pl <pukiwiki eucディレクトリ> <保存先ディレクトリ>
#!/usr/bin/perl -w
use strict;
use warnings;
use Encode;
use File::Basename;
use File::Copy;
use IO::File;
use Path::Class::Dir;
use Path::Class::File;
use constant REGEX_SUFFIX => qw(\..+$); # 【最初】の . 以降を拡張子とする
#use constant REGEX_SUFFIX => qw(\.[^\.]+$); # 【最後】の . 以降を拡張子とする
&main();
sub main {
if ( $#ARGV != 1 ){
print "error: $0 [source_dir] [target_dir]\n";
exit 1;
}
(my $src_dir_root, my $trg_dir_root) = @ARGV;
my $subdir_name;
my @suffixes;
my $src_dir;
my $trg_dir;
####### */attach #######
$subdir_name = "attach";
@suffixes = (".1", ".log");
($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
foreach my $src_file ($src_dir->children) {
if ($src_file->is_dir) { next; }
(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
next if ( grep( /$suffix/, @suffixes) == 0 && $suffix ne "");
my @splited = split(/_/, $hexname);
my @utf8ed;
for (@splited) {
push @utf8ed, &hexEUCtoUTF8($_);
}
my $utf8name = join("_", @utf8ed);
my $trg_file = Path::Class::File->new($trg_dir, $utf8name . $suffix);
copy($src_file, $trg_file->stringify);
utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
}
####### */backup #######
$subdir_name = "backup";
@suffixes = (".gz");
($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
foreach my $src_file ($src_dir->children) {
if ($src_file->is_dir) { next; }
(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
next if ( grep( /$suffix/, @suffixes) == 0 );
my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
&fileEUCtoUTF8(IO::File->new("gzip -cd $src_file 2>/dev/null |"), IO::File->new("| gzip -c - > $trg_file") );
utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
}
####### */cache #######
$subdir_name = "cache";
@suffixes = (".ref", ".rel");
($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
foreach my $src_file ($src_dir->children) {
if ($src_file->is_dir) { next; }
(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
next if ( grep( /$suffix/, @suffixes) == 0 );
my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
}
# recent.dat
my $src_file = Path::Class::File->new($src_dir, "recent.dat");
my $trg_file = Path::Class::File->new($trg_dir, "recent.dat");
&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
####### */diff #######
$subdir_name = "diff";
@suffixes = (".txt");
($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
foreach my $src_file ($src_dir->children) {
if ($src_file->is_dir) { next; }
(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
next if ( grep( /$suffix/, @suffixes) == 0 );
my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
}
####### */wiki #######
$subdir_name = "wiki";
@suffixes = (".txt");
($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
foreach my $src_file ($src_dir->children) {
if ($src_file->is_dir) { next; }
(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
next if ( grep( /$suffix/, @suffixes) == 0 );
my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
}
}
### 元先ディレクトリを設定
sub set_dir {
my $src_rootdir = shift;
my $trg_rootdir = shift;
my $subdir = shift;
my $src_dir = Path::Class::Dir->new($src_rootdir, $subdir);
my $trg_dir = Path::Class::Dir->new($trg_rootdir, $subdir);
# 元ディレクトリ確認
if ( ! -e $src_dir ) {
print "ERROR: Can't locate source directory.\n";
exit 1;
}
# 先ディレクトリ確認
$trg_dir->mkpath if ( ! -e $trg_dir );
return ($src_dir, $trg_dir);
}
### File EUC-JP => UTF-8
sub fileEUCtoUTF8 {
my $src_fh = shift;
my $trg_fh = shift;
while (<$src_fh>) {
chomp;
my $tmp = encode('utf-8', decode("euc-jp", $_));
$trg_fh->print("$tmp\n");
}
$trg_fh->close;
$src_fh->close;
}
### HEX => EUC-JP => UTF-8 => HEX => UPPER CASE
sub hexEUCtoUTF8 {
my $string = shift;
# 変換: 16進 → バイナリ
$string =~ s/([0-9A-Fa-f][0-9A-Fa-f])/pack("C", hex($1) )/eg;
# 変換: EUC-JP → UTF-8
$string = encode('utf-8', decode("euc-jp", $string));
# 変換: バイナリ → 16進
$string = unpack("H*", $string);
# 変換: 小文字 → 大文字
$string =~ tr/a-z/A-Z/;
return $string;
}
1;