#!/usr/bin/perl
#unisharphtml2utf8 -- 2005/12/05 by i16
#rfc2279 which is obsolete but wide span than newer rfc (rfc2279 xlat ucs4)
while(<>) {
do {
if(s/^\&\#(\d+)\;//) {
$c=$1;
if($c < 0) {
print "$1;";
}
elsif($c <= 0x7f) {
print chr($c);
}
elsif($c <= 0x7ff) {
print chr(0xc0 | ($c >> 6));
print chr(0x80 | ($c & 0x3f));
}
elsif($c <= 0xffff) {
print chr(0xe0 | ($c >> 12));
print chr(0x80 | (($c >> 6) & 0x3f));
print chr(0x80 | ($c & 0x3f));
}
elsif($c <= 0x1fffff) {
print chr(0xf0 | ($c >> 18));
print chr(0x80 | (($c >> 12) & 0x3f));
print chr(0x80 | (($c >> 6) & 0x3f));
print chr(0x80 | ($c & 0x3f));
}
elsif($c <= 0x3ffffff) {
print chr(0xf8 | ($c >> 24));
print chr(0x80 | (($c >> 18) & 0x3f));
print chr(0x80 | (($c >> 12) & 0x3f));
print chr(0x80 | (($c >> 6) & 0x3f));
print chr(0x80 | ($c & 0x3f));
}
elsif($c <= 0x7fffffff) {
print chr(0xfc | ($c >> 30));
print chr(0x80 | (($c >> 24) & 0x3f));
print chr(0x80 | (($c >> 18) & 0x3f));
print chr(0x80 | (($c >> 12) & 0x3f));
print chr(0x80 | (($c >> 6) & 0x3f));
print chr(0x80 | ($c & 0x3f));
}
else {
print "$1;";
}
}
else {
print $1 if s/^(.)//;
}
} until(/^$/);
print "\n";
}