UnihanTag makedbm.pl

This is part of the UnihanTag Extension. You can also view my other MediaWiki extensions.

#!/usr/bin/perl

$| = 1;
use CDB_File;
use Fcntl;
use File::Path;

my $src = "Unihan.txt";
my $dst = "Unihan.cdb";
print "Processing $src...";
my $cdb = new CDB_File("$dst", "$dst.$$")
    or die "$dst: $!";

my $n = 0;

open my $fh, "$src" or die "$src:$!";

while(<$fh>){
    chomp;
    my ($uni, $k, $v) = split(/\t/, $_);
    next if /^$/ or /^#/;
    $v = lc($v) if ($k eq 'kMandarin');
    $cdb->insert(lc(substr($uni, 2)."-".substr($k, 1)), $v);
    $n++;
}
close $fh;
$cdb->finish;
print "$n characters processed.\n";

Copyright © Kenny Root. All rights reserved.