#!/usr/bin/perl #unisharphtml2utf8 -- 2005/12/05 by i16 #rfc2279 which is obsolete but wide span than newer rfc (rfc2279 xlat ucs4) while(<>) { do { if(s/^\&\#(\d+)\;//) { $c=$1; if($c < 0) { print "&#$1;"; } elsif($c <= 0x7f) { print chr($c); } elsif($c <= 0x7ff) { print chr(0xc0 | ($c >> 6)); print chr(0x80 | ($c & 0x3f)); } elsif($c <= 0xffff) { print chr(0xe0 | ($c >> 12)); print chr(0x80 | (($c >> 6) & 0x3f)); print chr(0x80 | ($c & 0x3f)); } elsif($c <= 0x1fffff) { print chr(0xf0 | ($c >> 18)); print chr(0x80 | (($c >> 12) & 0x3f)); print chr(0x80 | (($c >> 6) & 0x3f)); print chr(0x80 | ($c & 0x3f)); } elsif($c <= 0x3ffffff) { print chr(0xf8 | ($c >> 24)); print chr(0x80 | (($c >> 18) & 0x3f)); print chr(0x80 | (($c >> 12) & 0x3f)); print chr(0x80 | (($c >> 6) & 0x3f)); print chr(0x80 | ($c & 0x3f)); } elsif($c <= 0x7fffffff) { print chr(0xfc | ($c >> 30)); print chr(0x80 | (($c >> 24) & 0x3f)); print chr(0x80 | (($c >> 18) & 0x3f)); print chr(0x80 | (($c >> 12) & 0x3f)); print chr(0x80 | (($c >> 6) & 0x3f)); print chr(0x80 | ($c & 0x3f)); } else { print "&#$1;"; } } else { print $1 if s/^(.)//; } } until(/^$/); print "\n"; }