File xjdic-utf-8-to-euc-jp of Package xjdic

#!/usr/bin/perl --  # -*- coding: utf-8 -*-
#
# Author: Mike Fabian <mfabian@suse.de>, 2003
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.

require 5.008;
use Encode;
use Unicode::Normalize;
use utf8;
use English;

binmode STDIN,  ":encoding(utf8)";
binmode STDOUT, ":encoding(euc-jp)";

while (<>) {

    # correct but does not map to euc-jp:
    $ARG =~ s/·/・/g;
    $ARG =~ s/¥/¥/g;
    $ARG =~ s/µ/μ/g;
    $ARG =~ s/\x{201A}/\\x{201A}/g; #201A;SINGLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;N;LOW SINGLE COMMA QUOTATION MARK;;;;
    $ARG =~ s/\x{3094}/\\x{3094}/g; #3094;HIRAGANA LETTER VU;Lo;0;L;3046 3099;;;;N;;;;;
    $ARG =~ s/\x{3004}/\\x{3004}/g; #3004;JAPANESE INDUSTRIAL STANDARD SYMBOL;So;0;ON;;;;;N;;;;;
    $ARG =~ s/\x{3299}/\\x{3299}/g; #3299;CIRCLED IDEOGRAPH SECRET;So;0;L;<circle> 79D8;;;;N;;;;;
    $ARG =~ s/\x{3396}/\\x{3396}/g; #3396;SQUARE ML;So;0;L;<square> 006D 2113;;;;N;SQUARED ML;;;;
    # certain errors in wadukujt:
    $ARG =~ s/¾//g;  
    $ARG =~ s/¼//g;
    $ARG =~ s/\x{00B2}/\\x{00B2}/g; #00B2;SUPERSCRIPT TWO;No;0;EN;<super> 0032;;2;2;N;SUPERSCRIPT DIGIT TWO;;;;
    $ARG =~ s/\x{00B3}/\\x{00B3}/g; #00B3;SUPERSCRIPT THREE;No;0;EN;<super> 0033;;3;3;N;SUPERSCRIPT DIGIT THREE;;;;
    $ARG =~ s/\x{00AB}/\\x{00AB}/g; #00AB;LEFT-POINTING DOUBLE ANGLE QUOTATION MARK;Pi;0;ON;;;;;Y;LEFT POINTING GUILLEMET;*;;;
    $ARG =~ s/\x{00BB}/\\x{00BB}/g; #00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;*;;;
    $ARG =~ s/\x{F87E}/\\x{F87E}/g;
    $ARG =~ s/\x{3040}/\\x{3040}/g; 
    $ARG =~ s/\x{00B9}/\\x{00B9}/g; #00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;;

    # possible errors in wadokujt:
    $ARG =~ s/\x{00D0}/\\x{00D0}/g; #00D0;LATIN CAPITAL LETTER ETH;Lu;0;L;;;;;N;;Icelandic;;00F0;
    $ARG =~ s/\x{9840}/\\x{9840}/g; 
    
    $ARG =~ s/\x{007F}//g; #007F;<control>;Cc;0;BN;;;;;N;DELETE;;;;
    $ARG =~ s/\x{0080}//g; #0080;<control>;Cc;0;BN;;;;;N;;;;;
    $ARG =~ s/\x{0081}//g; #0081;<control>;Cc;0;BN;;;;;N;;;;;
    $ARG =~ s/\x{0082}//g; #0082;<control>;Cc;0;BN;;;;;N;BREAK PERMITTED HERE;;;;
    $ARG =~ s/\x{0083}//g; #0083;<control>;Cc;0;BN;;;;;N;NO BREAK HERE;;;;
    $ARG =~ s/\x{0084}//g; #0084;<control>;Cc;0;BN;;;;;N;;;;;
    $ARG =~ s/\x{0085}//g; #0085;<control>;Cc;0;B;;;;;N;NEXT LINE (NEL);;;;
    $ARG =~ s/\x{0086}//g; #0086;<control>;Cc;0;BN;;;;;N;START OF SELECTED AREA;;;;
    $ARG =~ s/\x{0087}//g; #0087;<control>;Cc;0;BN;;;;;N;END OF SELECTED AREA;;;;
    $ARG =~ s/\x{0088}//g; #0088;<control>;Cc;0;BN;;;;;N;CHARACTER TABULATION SET;;;;
    $ARG =~ s/\x{0089}//g; #0089;<control>;Cc;0;BN;;;;;N;CHARACTER TABULATION WITH JUSTIFICATION;;;;
    $ARG =~ s/\x{008A}//g; #008A;<control>;Cc;0;BN;;;;;N;LINE TABULATION SET;;;;
    $ARG =~ s/\x{008B}//g; #008B;<control>;Cc;0;BN;;;;;N;PARTIAL LINE FORWARD;;;;
    $ARG =~ s/\x{008C}//g; #008C;<control>;Cc;0;BN;;;;;N;PARTIAL LINE BACKWARD;;;;
    $ARG =~ s/\x{008D}//g; #008D;<control>;Cc;0;BN;;;;;N;REVERSE LINE FEED;;;;
    $ARG =~ s/\x{008E}//g; #008E;<control>;Cc;0;BN;;;;;N;SINGLE SHIFT TWO;;;;
    $ARG =~ s/\x{008F}//g; #008F;<control>;Cc;0;BN;;;;;N;SINGLE SHIFT THREE;;;;
    $ARG =~ s/\x{0090}//g; #0090;<control>;Cc;0;BN;;;;;N;DEVICE CONTROL STRING;;;;
    $ARG =~ s/\x{0091}//g; #0091;<control>;Cc;0;BN;;;;;N;PRIVATE USE ONE;;;;
    $ARG =~ s/\x{0092}//g; #0092;<control>;Cc;0;BN;;;;;N;PRIVATE USE TWO;;;;
    $ARG =~ s/\x{0093}//g; #0093;<control>;Cc;0;BN;;;;;N;SET TRANSMIT STATE;;;;
    $ARG =~ s/\x{0094}//g; #0094;<control>;Cc;0;BN;;;;;N;CANCEL CHARACTER;;;;
    $ARG =~ s/\x{0095}//g; #0095;<control>;Cc;0;BN;;;;;N;MESSAGE WAITING;;;;
    $ARG =~ s/\x{0096}//g; #0096;<control>;Cc;0;BN;;;;;N;START OF GUARDED AREA;;;;
    $ARG =~ s/\x{0097}//g; #0097;<control>;Cc;0;BN;;;;;N;END OF GUARDED AREA;;;;
    $ARG =~ s/\x{0098}//g; #0098;<control>;Cc;0;BN;;;;;N;START OF STRING;;;;
    $ARG =~ s/\x{0099}//g; #0099;<control>;Cc;0;BN;;;;;N;;;;;
    $ARG =~ s/\x{009A}//g; #009A;<control>;Cc;0;BN;;;;;N;SINGLE CHARACTER INTRODUCER;;;;
    $ARG =~ s/\x{009B}//g; #009B;<control>;Cc;0;BN;;;;;N;CONTROL SEQUENCE INTRODUCER;;;;
    $ARG =~ s/\x{009C}//g; #009C;<control>;Cc;0;BN;;;;;N;STRING TERMINATOR;;;;
    $ARG =~ s/\x{009D}//g; #009D;<control>;Cc;0;BN;;;;;N;OPERATING SYSTEM COMMAND;;;;
    $ARG =~ s/\x{009E}//g; #009E;<control>;Cc;0;BN;;;;;N;PRIVACY MESSAGE;;;;
    $ARG =~ s/\x{009F}//g; #009F;<control>;Cc;0;BN;;;;;N;APPLICATION PROGRAM COMMAND;;;;

    
    $ARG =~ s/\x{2fc0}//g; # 2FC0;KANGXI RADICAL CAULDRON;So;0;ON;<compat> 9B32;;;;N;;;;;

    ######################################################################
    # characters from buddhic which don't map to euc-jp:
    $ARG =~ s/\x{00a0}/\\x{00a0}/g;
    $ARG =~ s/\x{014e}/\\x{014e}/g;
    $ARG =~ s/\x{014f}/\\x{014f}/g;
    $ARG =~ s/\x{1e0d}/\\x{1e0d}/g;
    $ARG =~ s/\x{1e25}/\\x{1e25}/g;
    $ARG =~ s/\x{1e37}/\\x{1e37}/g;
    $ARG =~ s/\x{1e41}/\\x{1e41}/g;
    $ARG =~ s/\x{1e42}/\\x{1e42}/g;
    $ARG =~ s/\x{1e43}/\\x{1e43}/g;
    $ARG =~ s/\x{1e45}/\\x{1e45}/g;
    $ARG =~ s/\x{1e47}/\\x{1e47}/g;
    $ARG =~ s/\x{1e5a}/\\x{1e5a}/g;
    $ARG =~ s/\x{1e5b}/\\x{1e5b}/g;
    $ARG =~ s/\x{1e62}/\\x{1e62}/g;
    $ARG =~ s/\x{1e63}/\\x{1e63}/g;
    $ARG =~ s/\x{1e6d}/\\x{1e6d}/g;
    $ARG =~ s/\x{3779}/\\x{3779}/g;
    $ARG =~ s/\x{4ff1}/\\x{4ff1}/g;
    $ARG =~ s/\x{505d}/\\x{505d}/g;
    $ARG =~ s/\x{5167}/\\x{5167}/g;
    $ARG =~ s/\x{5179}/\\x{5179}/g;
    $ARG =~ s/\x{5367}/\\x{5367}/g;
    $ARG =~ s/\x{537d}/\\x{537d}/g;
    $ARG =~ s/\x{5412}/\\x{5412}/g;
    $ARG =~ s/\x{543f}/\\x{543f}/g;
    $ARG =~ s/\x{544c}/\\x{544c}/g;
    $ARG =~ s/\x{546c}/\\x{546c}/g;
    $ARG =~ s/\x{547e}/\\x{547e}/g;
    $ARG =~ s/\x{5527}/\\x{5527}/g;
    $ARG =~ s/\x{589e}/\\x{589e}/g;
    $ARG =~ s/\x{5e77}/\\x{5e77}/g;
    $ARG =~ s/\x{5ec5}/\\x{5ec5}/g;
    $ARG =~ s/\x{5f65}/\\x{5f65}/g;
    $ARG =~ s/\x{5fb5}/\\x{5fb5}/g;
    $ARG =~ s/\x{5fb7}/\\x{5fb7}/g;
    $ARG =~ s/\x{6085}/\\x{6085}/g;
    $ARG =~ s/\x{63ed}/\\x{63ed}/g;
    $ARG =~ s/\x{640b}/\\x{640b}/g;
    $ARG =~ s/\x{654e}/\\x{654e}/g;
    $ARG =~ s/\x{65e3}/\\x{65e3}/g;
    $ARG =~ s/\x{67e5}/\\x{67e5}/g;
    $ARG =~ s/\x{68c3}/\\x{68c3}/g;
    $ARG =~ s/\x{6a6b}/\\x{6a6b}/g;
    $ARG =~ s/\x{6b31}/\\x{6b31}/g;
    $ARG =~ s/\x{6b65}/\\x{6b65}/g;
    $ARG =~ s/\x{6b72}/\\x{6b72}/g;
    $ARG =~ s/\x{6bc1}/\\x{6bc1}/g;
    $ARG =~ s/\x{6bd7}/\\x{6bd7}/g;
    $ARG =~ s/\x{6c61}/\\x{6c61}/g;
    $ARG =~ s/\x{6df8}/\\x{6df8}/g;
    $ARG =~ s/\x{6e34}/\\x{6e34}/g;
    $ARG =~ s/\x{6ebc}/\\x{6ebc}/g;
    $ARG =~ s/\x{714f}/\\x{714f}/g;
    $ARG =~ s/\x{72c0}/\\x{72c0}/g;
    $ARG =~ s/\x{7501}/\\x{7501}/g;
    $ARG =~ s/\x{7d55}/\\x{7d55}/g;
    $ARG =~ s/\x{7da0}/\\x{7da0}/g;
    $ARG =~ s/\x{7de3}/\\x{7de3}/g;
    $ARG =~ s/\x{812b}/\\x{812b}/g;
    $ARG =~ s/\x{85b0}/\\x{85b0}/g;
    $ARG =~ s/\x{865b}/\\x{865b}/g;
    $ARG =~ s/\x{8aaa}/\\x{8aaa}/g;
    $ARG =~ s/\x{8cd2}/\\x{8cd2}/g;
    $ARG =~ s/\x{8cf4}/\\x{8cf4}/g;
    $ARG =~ s/\x{90de}/\\x{90de}/g;
    $ARG =~ s/\x{9304}/\\x{9304}/g;
    $ARG =~ s/\x{934a}/\\x{934a}/g;
    $ARG =~ s/\x{9592}/\\x{9592}/g;
    $ARG =~ s/\x{95b1}/\\x{95b1}/g;
    $ARG =~ s/\x{9751}/\\x{9751}/g;
    $ARG =~ s/\x{9ebd}/\\x{9ebd}/g;
    $ARG =~ s/\x{9ec3}/\\x{9ec3}/g;
    $ARG =~ s/\x{9ed1}/\\x{9ed1}/g;
    ######################################################################
    
    $ARG=NFC($ARG);
    print $ARG;
}