pukiwiki


pukiwiki euc 版から utf 版へ移行(2012/11)

pukiwiki utf 版の準備

  • PHP 5.4.x を利用する場合、以下のソースを変更する。
    • lib/func.php
// Decode page name
function decode($key)
{
       return internal_hex2bin($key);
}

// Inversion of bin2hex()
function internal_hex2bin($hex_string)
{
       // preg_match : Avoid warning : pack(): Type H: illegal hex digit ...
       // (string)   : Always treat as string (not int etc). See BugTrack2/31
       return preg_match('/^[0-9a-f]+$/i', $hex_string) ?
               pack('H*', (string)$hex_string) : $hex_string;
}
  • lib/auth.php
hex2bin という文字をすべて internal_hex2bin に変更

データの移行

./convert.pl <pukiwiki eucディレクトリ> <保存先ディレクトリ>
  • 保存先ディレクトリに utf 化されたデータディレクトリが作成されるので、上で準備した utf 版 pukiwiki ディレクトリにマージする。
 #!/usr/bin/perl -w
 
 use strict;
 use warnings;
 use Encode;
 use File::Basename;
 use File::Copy;
 use IO::File;
 use Path::Class::Dir;
 use Path::Class::File;
 
 use constant REGEX_SUFFIX => qw(\..+$);		# 【最初】の . 以降を拡張子とする
 #use constant REGEX_SUFFIX => qw(\.[^\.]+$);	# 【最後】の . 以降を拡張子とする
 
 &main();
 
 sub main {
 	if ( $#ARGV != 1 ){
 		print "error: $0 [source_dir] [target_dir]\n";
 		exit 1;
 	}
 
 	(my $src_dir_root, my $trg_dir_root) = @ARGV;
 	my $subdir_name;
 	my @suffixes;
 	my $src_dir;
 	my $trg_dir;
 
 	####### */attach #######
 	$subdir_name = "attach";
 	@suffixes = (".1", ".log");
 	($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
 	foreach my $src_file ($src_dir->children) {
 		if ($src_file->is_dir) { next; }
 		(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
 		next if ( grep( /$suffix/, @suffixes) == 0 && $suffix ne "");
 		my @splited = split(/_/, $hexname);
 		my @utf8ed;
 		for (@splited) {
 			push @utf8ed, &hexEUCtoUTF8($_);
 		}
 		my $utf8name = join("_", @utf8ed);
 		my $trg_file = Path::Class::File->new($trg_dir, $utf8name . $suffix);
 		copy($src_file, $trg_file->stringify);
 		utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
 	}
 
 	####### */backup #######
 	$subdir_name = "backup";
 	@suffixes = (".gz");
 	($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
 	foreach my $src_file ($src_dir->children) {
 		if ($src_file->is_dir) { next; }
 		(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
 		next if ( grep( /$suffix/, @suffixes) == 0 );
 		my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
 		&fileEUCtoUTF8(IO::File->new("gzip -cd $src_file 2>/dev/null |"), IO::File->new("| gzip -c - > $trg_file") );
 		utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
 	}
 
 	####### */cache #######
 	$subdir_name = "cache";
 	@suffixes = (".ref", ".rel");
 	($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
 	foreach my $src_file ($src_dir->children) {
 		if ($src_file->is_dir) { next; }
 		(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
 		next if ( grep( /$suffix/, @suffixes) == 0 );
 		my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
 		&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
 		utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
 	}
 	# recent.dat
 	my $src_file = Path::Class::File->new($src_dir, "recent.dat");
 	my $trg_file = Path::Class::File->new($trg_dir, "recent.dat");
 	&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
 	utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
 
 	####### */diff #######
 	$subdir_name = "diff";
 	@suffixes = (".txt");
 	($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
 	foreach my $src_file ($src_dir->children) {
 		if ($src_file->is_dir) { next; }
 		(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
 		next if ( grep( /$suffix/, @suffixes) == 0 );
 		my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
 		&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
 		utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
 	}
 
 	####### */wiki #######
 	$subdir_name = "wiki";
 	@suffixes = (".txt");
 	($src_dir, $trg_dir) = &set_dir($src_dir_root, $trg_dir_root, $subdir_name);
 	foreach my $src_file ($src_dir->children) {
 		if ($src_file->is_dir) { next; }
 		(my $hexname, undef, my $suffix) = fileparse($src_file->basename, REGEX_SUFFIX);
 		next if ( grep( /$suffix/, @suffixes) == 0 );
 		my $trg_file = Path::Class::File->new($trg_dir, &hexEUCtoUTF8($hexname) . $suffix);
 		&fileEUCtoUTF8($src_file->open('r'), $trg_file->open('w'));
 		utime $src_file->stat->atime, $src_file->stat->mtime, $trg_file;
 	}
 }
 
 
 ### 元先ディレクトリを設定
 sub set_dir {
 	my $src_rootdir = shift;
 	my $trg_rootdir = shift;
 	my $subdir = shift;
 
 	my $src_dir = Path::Class::Dir->new($src_rootdir, $subdir);
 	my $trg_dir = Path::Class::Dir->new($trg_rootdir, $subdir);
 
 	# 元ディレクトリ確認
 	if ( ! -e $src_dir ) {
 		print "ERROR: Can't locate source directory.\n";
 		exit 1;
 	}
 
 	# 先ディレクトリ確認
 	$trg_dir->mkpath if ( ! -e $trg_dir );
 
 	return ($src_dir, $trg_dir);
 }
 
 
 ### File EUC-JP => UTF-8
 sub fileEUCtoUTF8 {
 	my $src_fh = shift;
 	my $trg_fh = shift;
 
 	while (<$src_fh>) {
 		chomp;
 		my $tmp = encode('utf-8', decode("euc-jp", $_));
 		$trg_fh->print("$tmp\n");
 	}
 
 	$trg_fh->close;
 	$src_fh->close;
 }
 
 
 ### HEX => EUC-JP => UTF-8 => HEX => UPPER CASE
 sub hexEUCtoUTF8 {
 	my $string = shift;
 
 	# 変換: 16進 → バイナリ
 	$string =~ s/([0-9A-Fa-f][0-9A-Fa-f])/pack("C", hex($1) )/eg;
 	# 変換: EUC-JP → UTF-8
 	$string = encode('utf-8', decode("euc-jp", $string));
 	# 変換: バイナリ → 16進
 	$string = unpack("H*", $string);
 	# 変換: 小文字 → 大文字
 	$string =~ tr/a-z/A-Z/;
 
 	return $string;
 }
 
 1;
pukiwiki.txt · 最終更新: 2012/11/14 14:14 by 127.0.0.1
文書の先頭へ
Driven by DokuWiki Recent changes RSS feed Valid CSS Valid XHTML 1.0