#--------------------------------------------------------------------------

# Generated from ./create_module on Mon Mar 18 10:20:36 2002

package NexTrieve::UTF8;

# Make sure we do everything by the book
# Set modules to inherit from
# Set version information

use strict;
@NexTrieve::UTF8::ISA = qw(NexTrieve);
$NexTrieve::UTF8::VERSION = '0.30';

# Return true value for use

1;

#--------------------------------------------------------------------------
#  IN: 1 character number to convert (assumes result as 2-3 byte value)
# OUT: 1 UTF-8 sequence

sub _utf8 {

# Obtain the codepoint

  my $cp = shift;

# Handle 1-byte codepoints (00000000 0xxxxxxx)

  return chr($cp) unless $cp >> 7;

# Handle the 2-byte codepoints (00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx)

  return pack( 'C2',
   0xC0 | ($cp >> 6),
   0x80 | ($cp & 0x3F),
  ) unless $cp >> 11;

# Handle the 3-byte codepoints (zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx)

  pack( 'C3',
   0xE0 | (($cp >> 12) & 0x0F),
   0x80 | (($cp >> 6) & 0x3F),
   0x80 | ($cp & 0x3F)
  );
} #_utf8

#--------------------------------------------------------------------------
# Convert from ucs2-be to utf-8

sub illegal_ucs2be { } #illegal_ucs2be

sub ucs2be {
 $_[0] =~ s#(.{2})#_utf8( unpack( 'n',$1 ) )#sge;
} #ucs2be

#--------------------------------------------------------------------------
# Convert from ucs4-be to utf-8

sub illegal_ucs4be { } #illegal_ucs4be

sub ucs4be {
 $_[0] =~ s#(.{4})#_utf8( unpack( 'N',$1 ) )#sge;
} #ucs4be

#--------------------------------------------------------------------------
# Convert from ucs2-le to utf-8

sub illegal_ucs2le { } #illegal_ucs2le

sub ucs2le {
 $_[0] =~ s#(.{2})#_utf8( unpack( 'v',$1 ) )#sge;
} #ucs2le

#--------------------------------------------------------------------------
# Convert from ucs4-le to utf-8

sub illegal_ucs4le { } #illegal_ucs4le

sub ucs4le {
 $_[0] =~ s#(.{4})#_utf8( unpack( 'V',$1 ) )#sge;
} #ucs4le

#--------------------------------------------------------------------------
# Convert from iso-8859-1 to UTF-8

my @iso88591 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 'ª',	# 0x00AA FEMININE ORDINAL INDICATOR
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'º',	# 0x00BA MASCULINE ORDINAL INDICATOR
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '¿',	# 0x00BF INVERTED QUESTION MARK
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ð',	# 0x00D0 LATIN CAPITAL LETTER ETH (Icelandic)
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ý',	# 0x00DD LATIN CAPITAL LETTER Y WITH ACUTE
 'Þ',	# 0x00DE LATIN CAPITAL LETTER THORN (Icelandic)
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S (German)
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ð',	# 0x00F0 LATIN SMALL LETTER ETH (Icelandic)
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ý',	# 0x00FD LATIN SMALL LETTER Y WITH ACUTE
 'þ',	# 0x00FE LATIN SMALL LETTER THORN (Icelandic)
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@iso88591

sub illegal_iso88591 {
} #illegal_iso88591

sub iso88591 {
 $_[0] =~ s#([\x80-\xFF])#$iso88591[ord($1)-0x80]#sge;
} #iso88591

#--------------------------------------------------------------------------
# Convert from iso-8859-10 to UTF-8

my @iso885910 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ą',	# 0x0104 LATIN CAPITAL LETTER A WITH OGONEK
 'Ē',	# 0x0112 LATIN CAPITAL LETTER E WITH MACRON
 'Ģ',	# 0x0122 LATIN CAPITAL LETTER G WITH CEDILLA
 'Ī',	# 0x012A LATIN CAPITAL LETTER I WITH MACRON
 'Ĩ',	# 0x0128 LATIN CAPITAL LETTER I WITH TILDE
 'Ķ',	# 0x0136 LATIN CAPITAL LETTER K WITH CEDILLA
 '§',	# 0x00A7 SECTION SIGN
 'Ļ',	# 0x013B LATIN CAPITAL LETTER L WITH CEDILLA
 'Đ',	# 0x0110 LATIN CAPITAL LETTER D WITH STROKE
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 'Ŧ',	# 0x0166 LATIN CAPITAL LETTER T WITH STROKE
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 '­',	# 0x00AD SOFT HYPHEN
 'Ū',	# 0x016A LATIN CAPITAL LETTER U WITH MACRON
 'Ŋ',	# 0x014A LATIN CAPITAL LETTER ENG
 '°',	# 0x00B0 DEGREE SIGN
 'ą',	# 0x0105 LATIN SMALL LETTER A WITH OGONEK
 'ē',	# 0x0113 LATIN SMALL LETTER E WITH MACRON
 'ģ',	# 0x0123 LATIN SMALL LETTER G WITH CEDILLA
 'ī',	# 0x012B LATIN SMALL LETTER I WITH MACRON
 'ĩ',	# 0x0129 LATIN SMALL LETTER I WITH TILDE
 'ķ',	# 0x0137 LATIN SMALL LETTER K WITH CEDILLA
 '·',	# 0x00B7 MIDDLE DOT
 'ļ',	# 0x013C LATIN SMALL LETTER L WITH CEDILLA
 'đ',	# 0x0111 LATIN SMALL LETTER D WITH STROKE
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 'ŧ',	# 0x0167 LATIN SMALL LETTER T WITH STROKE
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 '―',	# 0x2015 HORIZONTAL BAR
 'ū',	# 0x016B LATIN SMALL LETTER U WITH MACRON
 'ŋ',	# 0x014B LATIN SMALL LETTER ENG
 'Ā',	# 0x0100 LATIN CAPITAL LETTER A WITH MACRON
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Į',	# 0x012E LATIN CAPITAL LETTER I WITH OGONEK
 'Č',	# 0x010C LATIN CAPITAL LETTER C WITH CARON
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ę',	# 0x0118 LATIN CAPITAL LETTER E WITH OGONEK
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ė',	# 0x0116 LATIN CAPITAL LETTER E WITH DOT ABOVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ð',	# 0x00D0 LATIN CAPITAL LETTER ETH (Icelandic)
 'Ņ',	# 0x0145 LATIN CAPITAL LETTER N WITH CEDILLA
 'Ō',	# 0x014C LATIN CAPITAL LETTER O WITH MACRON
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 'Ũ',	# 0x0168 LATIN CAPITAL LETTER U WITH TILDE
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ų',	# 0x0172 LATIN CAPITAL LETTER U WITH OGONEK
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ý',	# 0x00DD LATIN CAPITAL LETTER Y WITH ACUTE
 'Þ',	# 0x00DE LATIN CAPITAL LETTER THORN (Icelandic)
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S (German)
 'ā',	# 0x0101 LATIN SMALL LETTER A WITH MACRON
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'į',	# 0x012F LATIN SMALL LETTER I WITH OGONEK
 'č',	# 0x010D LATIN SMALL LETTER C WITH CARON
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ę',	# 0x0119 LATIN SMALL LETTER E WITH OGONEK
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ė',	# 0x0117 LATIN SMALL LETTER E WITH DOT ABOVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ð',	# 0x00F0 LATIN SMALL LETTER ETH (Icelandic)
 'ņ',	# 0x0146 LATIN SMALL LETTER N WITH CEDILLA
 'ō',	# 0x014D LATIN SMALL LETTER O WITH MACRON
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 'ũ',	# 0x0169 LATIN SMALL LETTER U WITH TILDE
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ų',	# 0x0173 LATIN SMALL LETTER U WITH OGONEK
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ý',	# 0x00FD LATIN SMALL LETTER Y WITH ACUTE
 'þ',	# 0x00FE LATIN SMALL LETTER THORN (Icelandic)
 'ĸ',	# 0x0138 LATIN SMALL LETTER KRA
); #@iso885910

sub illegal_iso885910 {
} #illegal_iso885910

sub iso885910 {
 $_[0] =~ s#([\x80-\xFF])#$iso885910[ord($1)-0x80]#sge;
} #iso885910

#--------------------------------------------------------------------------
# Convert from iso-8859-13 to UTF-8

my @iso885913 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 '©',	# 0x00A9 COPYRIGHT SIGN
 'Ŗ',	# 0x0156 LATIN CAPITAL LETTER R WITH CEDILLA
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'ŗ',	# 0x0157 LATIN SMALL LETTER R WITH CEDILLA
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'Ą',	# 0x0104 LATIN CAPITAL LETTER A WITH OGONEK
 'Į',	# 0x012E LATIN CAPITAL LETTER I WITH OGONEK
 'Ā',	# 0x0100 LATIN CAPITAL LETTER A WITH MACRON
 'Ć',	# 0x0106 LATIN CAPITAL LETTER C WITH ACUTE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Ę',	# 0x0118 LATIN CAPITAL LETTER E WITH OGONEK
 'Ē',	# 0x0112 LATIN CAPITAL LETTER E WITH MACRON
 'Č',	# 0x010C LATIN CAPITAL LETTER C WITH CARON
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ź',	# 0x0179 LATIN CAPITAL LETTER Z WITH ACUTE
 'Ė',	# 0x0116 LATIN CAPITAL LETTER E WITH DOT ABOVE
 'Ģ',	# 0x0122 LATIN CAPITAL LETTER G WITH CEDILLA
 'Ķ',	# 0x0136 LATIN CAPITAL LETTER K WITH CEDILLA
 'Ī',	# 0x012A LATIN CAPITAL LETTER I WITH MACRON
 'Ļ',	# 0x013B LATIN CAPITAL LETTER L WITH CEDILLA
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 'Ń',	# 0x0143 LATIN CAPITAL LETTER N WITH ACUTE
 'Ņ',	# 0x0145 LATIN CAPITAL LETTER N WITH CEDILLA
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ō',	# 0x014C LATIN CAPITAL LETTER O WITH MACRON
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ų',	# 0x0172 LATIN CAPITAL LETTER U WITH OGONEK
 'Ł',	# 0x0141 LATIN CAPITAL LETTER L WITH STROKE
 'Ś',	# 0x015A LATIN CAPITAL LETTER S WITH ACUTE
 'Ū',	# 0x016A LATIN CAPITAL LETTER U WITH MACRON
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ż',	# 0x017B LATIN CAPITAL LETTER Z WITH DOT ABOVE
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S (German)
 'ą',	# 0x0105 LATIN SMALL LETTER A WITH OGONEK
 'į',	# 0x012F LATIN SMALL LETTER I WITH OGONEK
 'ā',	# 0x0101 LATIN SMALL LETTER A WITH MACRON
 'ć',	# 0x0107 LATIN SMALL LETTER C WITH ACUTE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'ę',	# 0x0119 LATIN SMALL LETTER E WITH OGONEK
 'ē',	# 0x0113 LATIN SMALL LETTER E WITH MACRON
 'č',	# 0x010D LATIN SMALL LETTER C WITH CARON
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ź',	# 0x017A LATIN SMALL LETTER Z WITH ACUTE
 'ė',	# 0x0117 LATIN SMALL LETTER E WITH DOT ABOVE
 'ģ',	# 0x0123 LATIN SMALL LETTER G WITH CEDILLA
 'ķ',	# 0x0137 LATIN SMALL LETTER K WITH CEDILLA
 'ī',	# 0x012B LATIN SMALL LETTER I WITH MACRON
 'ļ',	# 0x013C LATIN SMALL LETTER L WITH CEDILLA
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 'ń',	# 0x0144 LATIN SMALL LETTER N WITH ACUTE
 'ņ',	# 0x0146 LATIN SMALL LETTER N WITH CEDILLA
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ō',	# 0x014D LATIN SMALL LETTER O WITH MACRON
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ų',	# 0x0173 LATIN SMALL LETTER U WITH OGONEK
 'ł',	# 0x0142 LATIN SMALL LETTER L WITH STROKE
 'ś',	# 0x015B LATIN SMALL LETTER S WITH ACUTE
 'ū',	# 0x016B LATIN SMALL LETTER U WITH MACRON
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ż',	# 0x017C LATIN SMALL LETTER Z WITH DOT ABOVE
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
); #@iso885913

sub illegal_iso885913 {
} #illegal_iso885913

sub iso885913 {
 $_[0] =~ s#([\x80-\xFF])#$iso885913[ord($1)-0x80]#sge;
} #iso885913

#--------------------------------------------------------------------------
# Convert from iso-8859-14 to UTF-8

my @iso885914 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ḃ',	# 0x1E02 LATIN CAPITAL LETTER B WITH DOT ABOVE
 'ḃ',	# 0x1E03 LATIN SMALL LETTER B WITH DOT ABOVE
 '£',	# 0x00A3 POUND SIGN
 'Ċ',	# 0x010A LATIN CAPITAL LETTER C WITH DOT ABOVE
 'ċ',	# 0x010B LATIN SMALL LETTER C WITH DOT ABOVE
 'Ḋ',	# 0x1E0A LATIN CAPITAL LETTER D WITH DOT ABOVE
 '§',	# 0x00A7 SECTION SIGN
 'Ẁ',	# 0x1E80 LATIN CAPITAL LETTER W WITH GRAVE
 '©',	# 0x00A9 COPYRIGHT SIGN
 'Ẃ',	# 0x1E82 LATIN CAPITAL LETTER W WITH ACUTE
 'ḋ',	# 0x1E0B LATIN SMALL LETTER D WITH DOT ABOVE
 'Ỳ',	# 0x1EF2 LATIN CAPITAL LETTER Y WITH GRAVE
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 'Ÿ',	# 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 'Ḟ',	# 0x1E1E LATIN CAPITAL LETTER F WITH DOT ABOVE
 'ḟ',	# 0x1E1F LATIN SMALL LETTER F WITH DOT ABOVE
 'Ġ',	# 0x0120 LATIN CAPITAL LETTER G WITH DOT ABOVE
 'ġ',	# 0x0121 LATIN SMALL LETTER G WITH DOT ABOVE
 'Ṁ',	# 0x1E40 LATIN CAPITAL LETTER M WITH DOT ABOVE
 'ṁ',	# 0x1E41 LATIN SMALL LETTER M WITH DOT ABOVE
 '¶',	# 0x00B6 PILCROW SIGN
 'Ṗ',	# 0x1E56 LATIN CAPITAL LETTER P WITH DOT ABOVE
 'ẁ',	# 0x1E81 LATIN SMALL LETTER W WITH GRAVE
 'ṗ',	# 0x1E57 LATIN SMALL LETTER P WITH DOT ABOVE
 'ẃ',	# 0x1E83 LATIN SMALL LETTER W WITH ACUTE
 'Ṡ',	# 0x1E60 LATIN CAPITAL LETTER S WITH DOT ABOVE
 'ỳ',	# 0x1EF3 LATIN SMALL LETTER Y WITH GRAVE
 'Ẅ',	# 0x1E84 LATIN CAPITAL LETTER W WITH DIAERESIS
 'ẅ',	# 0x1E85 LATIN SMALL LETTER W WITH DIAERESIS
 'ṡ',	# 0x1E61 LATIN SMALL LETTER S WITH DOT ABOVE
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ŵ',	# 0x0174 LATIN CAPITAL LETTER W WITH CIRCUMFLEX
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 'Ṫ',	# 0x1E6A LATIN CAPITAL LETTER T WITH DOT ABOVE
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ý',	# 0x00DD LATIN CAPITAL LETTER Y WITH ACUTE
 'Ŷ',	# 0x0176 LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ŵ',	# 0x0175 LATIN SMALL LETTER W WITH CIRCUMFLEX
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 'ṫ',	# 0x1E6B LATIN SMALL LETTER T WITH DOT ABOVE
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ý',	# 0x00FD LATIN SMALL LETTER Y WITH ACUTE
 'ŷ',	# 0x0177 LATIN SMALL LETTER Y WITH CIRCUMFLEX
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@iso885914

sub illegal_iso885914 {
} #illegal_iso885914

sub iso885914 {
 $_[0] =~ s#([\x80-\xFF])#$iso885914[ord($1)-0x80]#sge;
} #iso885914

#--------------------------------------------------------------------------
# Convert from iso-8859-15 to UTF-8

my @iso885915 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '€',	# 0x20AC EURO SIGN
 '¥',	# 0x00A5 YEN SIGN
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 '§',	# 0x00A7 SECTION SIGN
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 '©',	# 0x00A9 COPYRIGHT SIGN
 'ª',	# 0x00AA FEMININE ORDINAL INDICATOR
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'º',	# 0x00BA MASCULINE ORDINAL INDICATOR
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 'Œ',	# 0x0152 LATIN CAPITAL LIGATURE OE
 'œ',	# 0x0153 LATIN SMALL LIGATURE OE
 'Ÿ',	# 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 '¿',	# 0x00BF INVERTED QUESTION MARK
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ð',	# 0x00D0 LATIN CAPITAL LETTER ETH
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ý',	# 0x00DD LATIN CAPITAL LETTER Y WITH ACUTE
 'Þ',	# 0x00DE LATIN CAPITAL LETTER THORN
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ð',	# 0x00F0 LATIN SMALL LETTER ETH
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ý',	# 0x00FD LATIN SMALL LETTER Y WITH ACUTE
 'þ',	# 0x00FE LATIN SMALL LETTER THORN
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@iso885915

sub illegal_iso885915 {
} #illegal_iso885915

sub iso885915 {
 $_[0] =~ s#([\x80-\xFF])#$iso885915[ord($1)-0x80]#sge;
} #iso885915

#--------------------------------------------------------------------------
# Convert from iso-8859-16 to UTF-8

my @iso885916 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ą',	# 0x0104 LATIN CAPITAL LETTER A WITH OGONEK
 'ą',	# 0x0105 LATIN SMALL LETTER A WITH OGONEK
 'Ł',	# 0x0141 LATIN CAPITAL LETTER L WITH STROKE
 '€',	# 0x20AC EURO SIGN
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 '§',	# 0x00A7 SECTION SIGN
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 '©',	# 0x00A9 COPYRIGHT SIGN
 'Ș',	# 0x0218 LATIN CAPITAL LETTER S WITH COMMA BELOW
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 'Ź',	# 0x0179 LATIN CAPITAL LETTER Z WITH ACUTE
 '­',	# 0x00AD SOFT HYPHEN
 'ź',	# 0x017A LATIN SMALL LETTER Z WITH ACUTE
 'Ż',	# 0x017B LATIN CAPITAL LETTER Z WITH DOT ABOVE
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 'Č',	# 0x010C LATIN CAPITAL LETTER C WITH CARON
 'ł',	# 0x0142 LATIN SMALL LETTER L WITH STROKE
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 'č',	# 0x010D LATIN SMALL LETTER C WITH CARON
 'ș',	# 0x0219 LATIN SMALL LETTER S WITH COMMA BELOW
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 'Œ',	# 0x0152 LATIN CAPITAL LIGATURE OE
 'œ',	# 0x0153 LATIN SMALL LIGATURE OE
 'Ÿ',	# 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 'ż',	# 0x017C LATIN SMALL LETTER Z WITH DOT ABOVE
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ă',	# 0x0102 LATIN CAPITAL LETTER A WITH BREVE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Ć',	# 0x0106 LATIN CAPITAL LETTER C WITH ACUTE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Đ',	# 0x0110 LATIN CAPITAL LETTER D WITH STROKE
 'Ń',	# 0x0143 LATIN CAPITAL LETTER N WITH ACUTE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Ő',	# 0x0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 'Ś',	# 0x015A LATIN CAPITAL LETTER S WITH ACUTE
 'Ű',	# 0x0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ę',	# 0x0118 LATIN CAPITAL LETTER E WITH OGONEK
 'Ț',	# 0x021A LATIN CAPITAL LETTER T WITH COMMA BELOW
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ă',	# 0x0103 LATIN SMALL LETTER A WITH BREVE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'ć',	# 0x0107 LATIN SMALL LETTER C WITH ACUTE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'đ',	# 0x0111 LATIN SMALL LETTER D WITH STROKE
 'ń',	# 0x0144 LATIN SMALL LETTER N WITH ACUTE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'ő',	# 0x0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 'ś',	# 0x015B LATIN SMALL LETTER S WITH ACUTE
 'ű',	# 0x0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ę',	# 0x0119 LATIN SMALL LETTER E WITH OGONEK
 'ț',	# 0x021B LATIN SMALL LETTER T WITH COMMA BELOW
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@iso885916

sub illegal_iso885916 {
} #illegal_iso885916

sub iso885916 {
 $_[0] =~ s#([\x80-\xFF])#$iso885916[ord($1)-0x80]#sge;
} #iso885916

#--------------------------------------------------------------------------
# Convert from iso-8859-2 to UTF-8

my @iso88592 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ą',	# 0x0104 LATIN CAPITAL LETTER A WITH OGONEK
 '˘',	# 0x02D8 BREVE
 'Ł',	# 0x0141 LATIN CAPITAL LETTER L WITH STROKE
 '¤',	# 0x00A4 CURRENCY SIGN
 'Ľ',	# 0x013D LATIN CAPITAL LETTER L WITH CARON
 'Ś',	# 0x015A LATIN CAPITAL LETTER S WITH ACUTE
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 'Ş',	# 0x015E LATIN CAPITAL LETTER S WITH CEDILLA
 'Ť',	# 0x0164 LATIN CAPITAL LETTER T WITH CARON
 'Ź',	# 0x0179 LATIN CAPITAL LETTER Z WITH ACUTE
 '­',	# 0x00AD SOFT HYPHEN
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 'Ż',	# 0x017B LATIN CAPITAL LETTER Z WITH DOT ABOVE
 '°',	# 0x00B0 DEGREE SIGN
 'ą',	# 0x0105 LATIN SMALL LETTER A WITH OGONEK
 '˛',	# 0x02DB OGONEK
 'ł',	# 0x0142 LATIN SMALL LETTER L WITH STROKE
 '´',	# 0x00B4 ACUTE ACCENT
 'ľ',	# 0x013E LATIN SMALL LETTER L WITH CARON
 'ś',	# 0x015B LATIN SMALL LETTER S WITH ACUTE
 'ˇ',	# 0x02C7 CARON
 '¸',	# 0x00B8 CEDILLA
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 'ş',	# 0x015F LATIN SMALL LETTER S WITH CEDILLA
 'ť',	# 0x0165 LATIN SMALL LETTER T WITH CARON
 'ź',	# 0x017A LATIN SMALL LETTER Z WITH ACUTE
 '˝',	# 0x02DD DOUBLE ACUTE ACCENT
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 'ż',	# 0x017C LATIN SMALL LETTER Z WITH DOT ABOVE
 'Ŕ',	# 0x0154 LATIN CAPITAL LETTER R WITH ACUTE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ă',	# 0x0102 LATIN CAPITAL LETTER A WITH BREVE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Ĺ',	# 0x0139 LATIN CAPITAL LETTER L WITH ACUTE
 'Ć',	# 0x0106 LATIN CAPITAL LETTER C WITH ACUTE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'Č',	# 0x010C LATIN CAPITAL LETTER C WITH CARON
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ę',	# 0x0118 LATIN CAPITAL LETTER E WITH OGONEK
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ě',	# 0x011A LATIN CAPITAL LETTER E WITH CARON
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ď',	# 0x010E LATIN CAPITAL LETTER D WITH CARON
 'Đ',	# 0x0110 LATIN CAPITAL LETTER D WITH STROKE
 'Ń',	# 0x0143 LATIN CAPITAL LETTER N WITH ACUTE
 'Ň',	# 0x0147 LATIN CAPITAL LETTER N WITH CARON
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Ő',	# 0x0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ř',	# 0x0158 LATIN CAPITAL LETTER R WITH CARON
 'Ů',	# 0x016E LATIN CAPITAL LETTER U WITH RING ABOVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Ű',	# 0x0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ý',	# 0x00DD LATIN CAPITAL LETTER Y WITH ACUTE
 'Ţ',	# 0x0162 LATIN CAPITAL LETTER T WITH CEDILLA
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'ŕ',	# 0x0155 LATIN SMALL LETTER R WITH ACUTE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ă',	# 0x0103 LATIN SMALL LETTER A WITH BREVE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'ĺ',	# 0x013A LATIN SMALL LETTER L WITH ACUTE
 'ć',	# 0x0107 LATIN SMALL LETTER C WITH ACUTE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'č',	# 0x010D LATIN SMALL LETTER C WITH CARON
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ę',	# 0x0119 LATIN SMALL LETTER E WITH OGONEK
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ě',	# 0x011B LATIN SMALL LETTER E WITH CARON
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ď',	# 0x010F LATIN SMALL LETTER D WITH CARON
 'đ',	# 0x0111 LATIN SMALL LETTER D WITH STROKE
 'ń',	# 0x0144 LATIN SMALL LETTER N WITH ACUTE
 'ň',	# 0x0148 LATIN SMALL LETTER N WITH CARON
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'ő',	# 0x0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ř',	# 0x0159 LATIN SMALL LETTER R WITH CARON
 'ů',	# 0x016F LATIN SMALL LETTER U WITH RING ABOVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'ű',	# 0x0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ý',	# 0x00FD LATIN SMALL LETTER Y WITH ACUTE
 'ţ',	# 0x0163 LATIN SMALL LETTER T WITH CEDILLA
 '˙',	# 0x02D9 DOT ABOVE
); #@iso88592

sub illegal_iso88592 {
} #illegal_iso88592

sub iso88592 {
 $_[0] =~ s#([\x80-\xFF])#$iso88592[ord($1)-0x80]#sge;
} #iso88592

#--------------------------------------------------------------------------
# Convert from iso-8859-3 to UTF-8

my @iso88593 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ħ',	# 0x0126 LATIN CAPITAL LETTER H WITH STROKE
 '˘',	# 0x02D8 BREVE
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '',	# 0xA5 UNDEFINED
 'Ĥ',	# 0x0124 LATIN CAPITAL LETTER H WITH CIRCUMFLEX
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 'İ',	# 0x0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
 'Ş',	# 0x015E LATIN CAPITAL LETTER S WITH CEDILLA
 'Ğ',	# 0x011E LATIN CAPITAL LETTER G WITH BREVE
 'Ĵ',	# 0x0134 LATIN CAPITAL LETTER J WITH CIRCUMFLEX
 '­',	# 0x00AD SOFT HYPHEN
 '',	# 0xAE UNDEFINED
 'Ż',	# 0x017B LATIN CAPITAL LETTER Z WITH DOT ABOVE
 '°',	# 0x00B0 DEGREE SIGN
 'ħ',	# 0x0127 LATIN SMALL LETTER H WITH STROKE
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 'ĥ',	# 0x0125 LATIN SMALL LETTER H WITH CIRCUMFLEX
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 'ı',	# 0x0131 LATIN SMALL LETTER DOTLESS I
 'ş',	# 0x015F LATIN SMALL LETTER S WITH CEDILLA
 'ğ',	# 0x011F LATIN SMALL LETTER G WITH BREVE
 'ĵ',	# 0x0135 LATIN SMALL LETTER J WITH CIRCUMFLEX
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '',	# 0xBE UNDEFINED
 'ż',	# 0x017C LATIN SMALL LETTER Z WITH DOT ABOVE
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 '',	# 0xC3 UNDEFINED
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Ċ',	# 0x010A LATIN CAPITAL LETTER C WITH DOT ABOVE
 'Ĉ',	# 0x0108 LATIN CAPITAL LETTER C WITH CIRCUMFLEX
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 '',	# 0xD0 UNDEFINED
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Ġ',	# 0x0120 LATIN CAPITAL LETTER G WITH DOT ABOVE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ĝ',	# 0x011C LATIN CAPITAL LETTER G WITH CIRCUMFLEX
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ŭ',	# 0x016C LATIN CAPITAL LETTER U WITH BREVE
 'Ŝ',	# 0x015C LATIN CAPITAL LETTER S WITH CIRCUMFLEX
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 '',	# 0xE3 UNDEFINED
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'ċ',	# 0x010B LATIN SMALL LETTER C WITH DOT ABOVE
 'ĉ',	# 0x0109 LATIN SMALL LETTER C WITH CIRCUMFLEX
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 '',	# 0xF0 UNDEFINED
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'ġ',	# 0x0121 LATIN SMALL LETTER G WITH DOT ABOVE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ĝ',	# 0x011D LATIN SMALL LETTER G WITH CIRCUMFLEX
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ŭ',	# 0x016D LATIN SMALL LETTER U WITH BREVE
 'ŝ',	# 0x015D LATIN SMALL LETTER S WITH CIRCUMFLEX
 '˙',	# 0x02D9 DOT ABOVE
); #@iso88593

sub illegal_iso88593 {
 $_[0] =~ s#[\xA5\xAE\xBE\xC3\xD0\xE3\xF0]# #sg;
} #illegal_iso88593

sub iso88593 {
 $_[0] =~ s#[\xA5\xAE\xBE\xC3\xD0\xE3\xF0]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$iso88593[ord($1)-0x80]#sge;
} #iso88593

#--------------------------------------------------------------------------
# Convert from iso-8859-4 to UTF-8

my @iso88594 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ą',	# 0x0104 LATIN CAPITAL LETTER A WITH OGONEK
 'ĸ',	# 0x0138 LATIN SMALL LETTER KRA
 'Ŗ',	# 0x0156 LATIN CAPITAL LETTER R WITH CEDILLA
 '¤',	# 0x00A4 CURRENCY SIGN
 'Ĩ',	# 0x0128 LATIN CAPITAL LETTER I WITH TILDE
 'Ļ',	# 0x013B LATIN CAPITAL LETTER L WITH CEDILLA
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 'Ē',	# 0x0112 LATIN CAPITAL LETTER E WITH MACRON
 'Ģ',	# 0x0122 LATIN CAPITAL LETTER G WITH CEDILLA
 'Ŧ',	# 0x0166 LATIN CAPITAL LETTER T WITH STROKE
 '­',	# 0x00AD SOFT HYPHEN
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 'ą',	# 0x0105 LATIN SMALL LETTER A WITH OGONEK
 '˛',	# 0x02DB OGONEK
 'ŗ',	# 0x0157 LATIN SMALL LETTER R WITH CEDILLA
 '´',	# 0x00B4 ACUTE ACCENT
 'ĩ',	# 0x0129 LATIN SMALL LETTER I WITH TILDE
 'ļ',	# 0x013C LATIN SMALL LETTER L WITH CEDILLA
 'ˇ',	# 0x02C7 CARON
 '¸',	# 0x00B8 CEDILLA
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 'ē',	# 0x0113 LATIN SMALL LETTER E WITH MACRON
 'ģ',	# 0x0123 LATIN SMALL LETTER G WITH CEDILLA
 'ŧ',	# 0x0167 LATIN SMALL LETTER T WITH STROKE
 'Ŋ',	# 0x014A LATIN CAPITAL LETTER ENG
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 'ŋ',	# 0x014B LATIN SMALL LETTER ENG
 'Ā',	# 0x0100 LATIN CAPITAL LETTER A WITH MACRON
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Į',	# 0x012E LATIN CAPITAL LETTER I WITH OGONEK
 'Č',	# 0x010C LATIN CAPITAL LETTER C WITH CARON
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ę',	# 0x0118 LATIN CAPITAL LETTER E WITH OGONEK
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ė',	# 0x0116 LATIN CAPITAL LETTER E WITH DOT ABOVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ī',	# 0x012A LATIN CAPITAL LETTER I WITH MACRON
 'Đ',	# 0x0110 LATIN CAPITAL LETTER D WITH STROKE
 'Ņ',	# 0x0145 LATIN CAPITAL LETTER N WITH CEDILLA
 'Ō',	# 0x014C LATIN CAPITAL LETTER O WITH MACRON
 'Ķ',	# 0x0136 LATIN CAPITAL LETTER K WITH CEDILLA
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ų',	# 0x0172 LATIN CAPITAL LETTER U WITH OGONEK
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ũ',	# 0x0168 LATIN CAPITAL LETTER U WITH TILDE
 'Ū',	# 0x016A LATIN CAPITAL LETTER U WITH MACRON
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'ā',	# 0x0101 LATIN SMALL LETTER A WITH MACRON
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'į',	# 0x012F LATIN SMALL LETTER I WITH OGONEK
 'č',	# 0x010D LATIN SMALL LETTER C WITH CARON
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ę',	# 0x0119 LATIN SMALL LETTER E WITH OGONEK
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ė',	# 0x0117 LATIN SMALL LETTER E WITH DOT ABOVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ī',	# 0x012B LATIN SMALL LETTER I WITH MACRON
 'đ',	# 0x0111 LATIN SMALL LETTER D WITH STROKE
 'ņ',	# 0x0146 LATIN SMALL LETTER N WITH CEDILLA
 'ō',	# 0x014D LATIN SMALL LETTER O WITH MACRON
 'ķ',	# 0x0137 LATIN SMALL LETTER K WITH CEDILLA
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ų',	# 0x0173 LATIN SMALL LETTER U WITH OGONEK
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ũ',	# 0x0169 LATIN SMALL LETTER U WITH TILDE
 'ū',	# 0x016B LATIN SMALL LETTER U WITH MACRON
 '˙',	# 0x02D9 DOT ABOVE
); #@iso88594

sub illegal_iso88594 {
} #illegal_iso88594

sub iso88594 {
 $_[0] =~ s#([\x80-\xFF])#$iso88594[ord($1)-0x80]#sge;
} #iso88594

#--------------------------------------------------------------------------
# Convert from iso-8859-5 to UTF-8

my @iso88595 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ё',	# 0x0401 CYRILLIC CAPITAL LETTER IO
 'Ђ',	# 0x0402 CYRILLIC CAPITAL LETTER DJE
 'Ѓ',	# 0x0403 CYRILLIC CAPITAL LETTER GJE
 'Є',	# 0x0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE
 'Ѕ',	# 0x0405 CYRILLIC CAPITAL LETTER DZE
 'І',	# 0x0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
 'Ї',	# 0x0407 CYRILLIC CAPITAL LETTER YI
 'Ј',	# 0x0408 CYRILLIC CAPITAL LETTER JE
 'Љ',	# 0x0409 CYRILLIC CAPITAL LETTER LJE
 'Њ',	# 0x040A CYRILLIC CAPITAL LETTER NJE
 'Ћ',	# 0x040B CYRILLIC CAPITAL LETTER TSHE
 'Ќ',	# 0x040C CYRILLIC CAPITAL LETTER KJE
 '­',	# 0x00AD SOFT HYPHEN
 'Ў',	# 0x040E CYRILLIC CAPITAL LETTER SHORT U
 'Џ',	# 0x040F CYRILLIC CAPITAL LETTER DZHE
 'А',	# 0x0410 CYRILLIC CAPITAL LETTER A
 'Б',	# 0x0411 CYRILLIC CAPITAL LETTER BE
 'В',	# 0x0412 CYRILLIC CAPITAL LETTER VE
 'Г',	# 0x0413 CYRILLIC CAPITAL LETTER GHE
 'Д',	# 0x0414 CYRILLIC CAPITAL LETTER DE
 'Е',	# 0x0415 CYRILLIC CAPITAL LETTER IE
 'Ж',	# 0x0416 CYRILLIC CAPITAL LETTER ZHE
 'З',	# 0x0417 CYRILLIC CAPITAL LETTER ZE
 'И',	# 0x0418 CYRILLIC CAPITAL LETTER I
 'Й',	# 0x0419 CYRILLIC CAPITAL LETTER SHORT I
 'К',	# 0x041A CYRILLIC CAPITAL LETTER KA
 'Л',	# 0x041B CYRILLIC CAPITAL LETTER EL
 'М',	# 0x041C CYRILLIC CAPITAL LETTER EM
 'Н',	# 0x041D CYRILLIC CAPITAL LETTER EN
 'О',	# 0x041E CYRILLIC CAPITAL LETTER O
 'П',	# 0x041F CYRILLIC CAPITAL LETTER PE
 'Р',	# 0x0420 CYRILLIC CAPITAL LETTER ER
 'С',	# 0x0421 CYRILLIC CAPITAL LETTER ES
 'Т',	# 0x0422 CYRILLIC CAPITAL LETTER TE
 'У',	# 0x0423 CYRILLIC CAPITAL LETTER U
 'Ф',	# 0x0424 CYRILLIC CAPITAL LETTER EF
 'Х',	# 0x0425 CYRILLIC CAPITAL LETTER HA
 'Ц',	# 0x0426 CYRILLIC CAPITAL LETTER TSE
 'Ч',	# 0x0427 CYRILLIC CAPITAL LETTER CHE
 'Ш',	# 0x0428 CYRILLIC CAPITAL LETTER SHA
 'Щ',	# 0x0429 CYRILLIC CAPITAL LETTER SHCHA
 'Ъ',	# 0x042A CYRILLIC CAPITAL LETTER HARD SIGN
 'Ы',	# 0x042B CYRILLIC CAPITAL LETTER YERU
 'Ь',	# 0x042C CYRILLIC CAPITAL LETTER SOFT SIGN
 'Э',	# 0x042D CYRILLIC CAPITAL LETTER E
 'Ю',	# 0x042E CYRILLIC CAPITAL LETTER YU
 'Я',	# 0x042F CYRILLIC CAPITAL LETTER YA
 'а',	# 0x0430 CYRILLIC SMALL LETTER A
 'б',	# 0x0431 CYRILLIC SMALL LETTER BE
 'в',	# 0x0432 CYRILLIC SMALL LETTER VE
 'г',	# 0x0433 CYRILLIC SMALL LETTER GHE
 'д',	# 0x0434 CYRILLIC SMALL LETTER DE
 'е',	# 0x0435 CYRILLIC SMALL LETTER IE
 'ж',	# 0x0436 CYRILLIC SMALL LETTER ZHE
 'з',	# 0x0437 CYRILLIC SMALL LETTER ZE
 'и',	# 0x0438 CYRILLIC SMALL LETTER I
 'й',	# 0x0439 CYRILLIC SMALL LETTER SHORT I
 'к',	# 0x043A CYRILLIC SMALL LETTER KA
 'л',	# 0x043B CYRILLIC SMALL LETTER EL
 'м',	# 0x043C CYRILLIC SMALL LETTER EM
 'н',	# 0x043D CYRILLIC SMALL LETTER EN
 'о',	# 0x043E CYRILLIC SMALL LETTER O
 'п',	# 0x043F CYRILLIC SMALL LETTER PE
 'р',	# 0x0440 CYRILLIC SMALL LETTER ER
 'с',	# 0x0441 CYRILLIC SMALL LETTER ES
 'т',	# 0x0442 CYRILLIC SMALL LETTER TE
 'у',	# 0x0443 CYRILLIC SMALL LETTER U
 'ф',	# 0x0444 CYRILLIC SMALL LETTER EF
 'х',	# 0x0445 CYRILLIC SMALL LETTER HA
 'ц',	# 0x0446 CYRILLIC SMALL LETTER TSE
 'ч',	# 0x0447 CYRILLIC SMALL LETTER CHE
 'ш',	# 0x0448 CYRILLIC SMALL LETTER SHA
 'щ',	# 0x0449 CYRILLIC SMALL LETTER SHCHA
 'ъ',	# 0x044A CYRILLIC SMALL LETTER HARD SIGN
 'ы',	# 0x044B CYRILLIC SMALL LETTER YERU
 'ь',	# 0x044C CYRILLIC SMALL LETTER SOFT SIGN
 'э',	# 0x044D CYRILLIC SMALL LETTER E
 'ю',	# 0x044E CYRILLIC SMALL LETTER YU
 'я',	# 0x044F CYRILLIC SMALL LETTER YA
 '№',	# 0x2116 NUMERO SIGN
 'ё',	# 0x0451 CYRILLIC SMALL LETTER IO
 'ђ',	# 0x0452 CYRILLIC SMALL LETTER DJE
 'ѓ',	# 0x0453 CYRILLIC SMALL LETTER GJE
 'є',	# 0x0454 CYRILLIC SMALL LETTER UKRAINIAN IE
 'ѕ',	# 0x0455 CYRILLIC SMALL LETTER DZE
 'і',	# 0x0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
 'ї',	# 0x0457 CYRILLIC SMALL LETTER YI
 'ј',	# 0x0458 CYRILLIC SMALL LETTER JE
 'љ',	# 0x0459 CYRILLIC SMALL LETTER LJE
 'њ',	# 0x045A CYRILLIC SMALL LETTER NJE
 'ћ',	# 0x045B CYRILLIC SMALL LETTER TSHE
 'ќ',	# 0x045C CYRILLIC SMALL LETTER KJE
 '§',	# 0x00A7 SECTION SIGN
 'ў',	# 0x045E CYRILLIC SMALL LETTER SHORT U
 'џ',	# 0x045F CYRILLIC SMALL LETTER DZHE
); #@iso88595

sub illegal_iso88595 {
} #illegal_iso88595

sub iso88595 {
 $_[0] =~ s#([\x80-\xFF])#$iso88595[ord($1)-0x80]#sge;
} #iso88595

#--------------------------------------------------------------------------
# Convert from iso-8859-6 to UTF-8

my @iso88596 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 '',	# 0xA1 UNDEFINED
 '',	# 0xA2 UNDEFINED
 '',	# 0xA3 UNDEFINED
 '¤',	# 0x00A4 CURRENCY SIGN
 '',	# 0xA5 UNDEFINED
 '',	# 0xA6 UNDEFINED
 '',	# 0xA7 UNDEFINED
 '',	# 0xA8 UNDEFINED
 '',	# 0xA9 UNDEFINED
 '',	# 0xAA UNDEFINED
 '',	# 0xAB UNDEFINED
 '،',	# 0x060C ARABIC COMMA
 '­',	# 0x00AD SOFT HYPHEN
 '',	# 0xAE UNDEFINED
 '',	# 0xAF UNDEFINED
 '',	# 0xB0 UNDEFINED
 '',	# 0xB1 UNDEFINED
 '',	# 0xB2 UNDEFINED
 '',	# 0xB3 UNDEFINED
 '',	# 0xB4 UNDEFINED
 '',	# 0xB5 UNDEFINED
 '',	# 0xB6 UNDEFINED
 '',	# 0xB7 UNDEFINED
 '',	# 0xB8 UNDEFINED
 '',	# 0xB9 UNDEFINED
 '',	# 0xBA UNDEFINED
 '؛',	# 0x061B ARABIC SEMICOLON
 '',	# 0xBC UNDEFINED
 '',	# 0xBD UNDEFINED
 '',	# 0xBE UNDEFINED
 '؟',	# 0x061F ARABIC QUESTION MARK
 '',	# 0xC0 UNDEFINED
 'ء',	# 0x0621 ARABIC LETTER HAMZA
 'آ',	# 0x0622 ARABIC LETTER ALEF WITH MADDA ABOVE
 'أ',	# 0x0623 ARABIC LETTER ALEF WITH HAMZA ABOVE
 'ؤ',	# 0x0624 ARABIC LETTER WAW WITH HAMZA ABOVE
 'إ',	# 0x0625 ARABIC LETTER ALEF WITH HAMZA BELOW
 'ئ',	# 0x0626 ARABIC LETTER YEH WITH HAMZA ABOVE
 'ا',	# 0x0627 ARABIC LETTER ALEF
 'ب',	# 0x0628 ARABIC LETTER BEH
 'ة',	# 0x0629 ARABIC LETTER TEH MARBUTA
 'ت',	# 0x062A ARABIC LETTER TEH
 'ث',	# 0x062B ARABIC LETTER THEH
 'ج',	# 0x062C ARABIC LETTER JEEM
 'ح',	# 0x062D ARABIC LETTER HAH
 'خ',	# 0x062E ARABIC LETTER KHAH
 'د',	# 0x062F ARABIC LETTER DAL
 'ذ',	# 0x0630 ARABIC LETTER THAL
 'ر',	# 0x0631 ARABIC LETTER REH
 'ز',	# 0x0632 ARABIC LETTER ZAIN
 'س',	# 0x0633 ARABIC LETTER SEEN
 'ش',	# 0x0634 ARABIC LETTER SHEEN
 'ص',	# 0x0635 ARABIC LETTER SAD
 'ض',	# 0x0636 ARABIC LETTER DAD
 'ط',	# 0x0637 ARABIC LETTER TAH
 'ظ',	# 0x0638 ARABIC LETTER ZAH
 'ع',	# 0x0639 ARABIC LETTER AIN
 'غ',	# 0x063A ARABIC LETTER GHAIN
 '',	# 0xDB UNDEFINED
 '',	# 0xDC UNDEFINED
 '',	# 0xDD UNDEFINED
 '',	# 0xDE UNDEFINED
 '',	# 0xDF UNDEFINED
 'ـ',	# 0x0640 ARABIC TATWEEL
 'ف',	# 0x0641 ARABIC LETTER FEH
 'ق',	# 0x0642 ARABIC LETTER QAF
 'ك',	# 0x0643 ARABIC LETTER KAF
 'ل',	# 0x0644 ARABIC LETTER LAM
 'م',	# 0x0645 ARABIC LETTER MEEM
 'ن',	# 0x0646 ARABIC LETTER NOON
 'ه',	# 0x0647 ARABIC LETTER HEH
 'و',	# 0x0648 ARABIC LETTER WAW
 'ى',	# 0x0649 ARABIC LETTER ALEF MAKSURA
 'ي',	# 0x064A ARABIC LETTER YEH
 'ً',	# 0x064B ARABIC FATHATAN
 'ٌ',	# 0x064C ARABIC DAMMATAN
 'ٍ',	# 0x064D ARABIC KASRATAN
 'َ',	# 0x064E ARABIC FATHA
 'ُ',	# 0x064F ARABIC DAMMA
 'ِ',	# 0x0650 ARABIC KASRA
 'ّ',	# 0x0651 ARABIC SHADDA
 'ْ',	# 0x0652 ARABIC SUKUN
); #@iso88596

sub illegal_iso88596 {
 $_[0] =~ s#[\xA1\xA2\xA3\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBC\xBD\xBE\xC0\xDB\xDC\xDD\xDE\xDF]# #sg;
} #illegal_iso88596

sub iso88596 {
 $_[0] =~ s#[\xA1\xA2\xA3\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBC\xBD\xBE\xC0\xDB\xDC\xDD\xDE\xDF]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$iso88596[ord($1)-0x80]#sge;
} #iso88596

#--------------------------------------------------------------------------
# Convert from iso-8859-7 to UTF-8

my @iso88597 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '£',	# 0x00A3 POUND SIGN
 '',	# 0xA4 UNDEFINED
 '',	# 0xA5 UNDEFINED
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 '',	# 0xAA UNDEFINED
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '',	# 0xAE UNDEFINED
 '―',	# 0x2015 HORIZONTAL BAR
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '΄',	# 0x0384 GREEK TONOS
 '΅',	# 0x0385 GREEK DIALYTIKA TONOS
 'Ά',	# 0x0386 GREEK CAPITAL LETTER ALPHA WITH TONOS
 '·',	# 0x00B7 MIDDLE DOT
 'Έ',	# 0x0388 GREEK CAPITAL LETTER EPSILON WITH TONOS
 'Ή',	# 0x0389 GREEK CAPITAL LETTER ETA WITH TONOS
 'Ί',	# 0x038A GREEK CAPITAL LETTER IOTA WITH TONOS
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 'Ό',	# 0x038C GREEK CAPITAL LETTER OMICRON WITH TONOS
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 'Ύ',	# 0x038E GREEK CAPITAL LETTER UPSILON WITH TONOS
 'Ώ',	# 0x038F GREEK CAPITAL LETTER OMEGA WITH TONOS
 'ΐ',	# 0x0390 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
 'Α',	# 0x0391 GREEK CAPITAL LETTER ALPHA
 'Β',	# 0x0392 GREEK CAPITAL LETTER BETA
 'Γ',	# 0x0393 GREEK CAPITAL LETTER GAMMA
 'Δ',	# 0x0394 GREEK CAPITAL LETTER DELTA
 'Ε',	# 0x0395 GREEK CAPITAL LETTER EPSILON
 'Ζ',	# 0x0396 GREEK CAPITAL LETTER ZETA
 'Η',	# 0x0397 GREEK CAPITAL LETTER ETA
 'Θ',	# 0x0398 GREEK CAPITAL LETTER THETA
 'Ι',	# 0x0399 GREEK CAPITAL LETTER IOTA
 'Κ',	# 0x039A GREEK CAPITAL LETTER KAPPA
 'Λ',	# 0x039B GREEK CAPITAL LETTER LAMDA
 'Μ',	# 0x039C GREEK CAPITAL LETTER MU
 'Ν',	# 0x039D GREEK CAPITAL LETTER NU
 'Ξ',	# 0x039E GREEK CAPITAL LETTER XI
 'Ο',	# 0x039F GREEK CAPITAL LETTER OMICRON
 'Π',	# 0x03A0 GREEK CAPITAL LETTER PI
 'Ρ',	# 0x03A1 GREEK CAPITAL LETTER RHO
 '',	# 0xD2 UNDEFINED
 'Σ',	# 0x03A3 GREEK CAPITAL LETTER SIGMA
 'Τ',	# 0x03A4 GREEK CAPITAL LETTER TAU
 'Υ',	# 0x03A5 GREEK CAPITAL LETTER UPSILON
 'Φ',	# 0x03A6 GREEK CAPITAL LETTER PHI
 'Χ',	# 0x03A7 GREEK CAPITAL LETTER CHI
 'Ψ',	# 0x03A8 GREEK CAPITAL LETTER PSI
 'Ω',	# 0x03A9 GREEK CAPITAL LETTER OMEGA
 'Ϊ',	# 0x03AA GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
 'Ϋ',	# 0x03AB GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
 'ά',	# 0x03AC GREEK SMALL LETTER ALPHA WITH TONOS
 'έ',	# 0x03AD GREEK SMALL LETTER EPSILON WITH TONOS
 'ή',	# 0x03AE GREEK SMALL LETTER ETA WITH TONOS
 'ί',	# 0x03AF GREEK SMALL LETTER IOTA WITH TONOS
 'ΰ',	# 0x03B0 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
 'α',	# 0x03B1 GREEK SMALL LETTER ALPHA
 'β',	# 0x03B2 GREEK SMALL LETTER BETA
 'γ',	# 0x03B3 GREEK SMALL LETTER GAMMA
 'δ',	# 0x03B4 GREEK SMALL LETTER DELTA
 'ε',	# 0x03B5 GREEK SMALL LETTER EPSILON
 'ζ',	# 0x03B6 GREEK SMALL LETTER ZETA
 'η',	# 0x03B7 GREEK SMALL LETTER ETA
 'θ',	# 0x03B8 GREEK SMALL LETTER THETA
 'ι',	# 0x03B9 GREEK SMALL LETTER IOTA
 'κ',	# 0x03BA GREEK SMALL LETTER KAPPA
 'λ',	# 0x03BB GREEK SMALL LETTER LAMDA
 'μ',	# 0x03BC GREEK SMALL LETTER MU
 'ν',	# 0x03BD GREEK SMALL LETTER NU
 'ξ',	# 0x03BE GREEK SMALL LETTER XI
 'ο',	# 0x03BF GREEK SMALL LETTER OMICRON
 'π',	# 0x03C0 GREEK SMALL LETTER PI
 'ρ',	# 0x03C1 GREEK SMALL LETTER RHO
 'ς',	# 0x03C2 GREEK SMALL LETTER FINAL SIGMA
 'σ',	# 0x03C3 GREEK SMALL LETTER SIGMA
 'τ',	# 0x03C4 GREEK SMALL LETTER TAU
 'υ',	# 0x03C5 GREEK SMALL LETTER UPSILON
 'φ',	# 0x03C6 GREEK SMALL LETTER PHI
 'χ',	# 0x03C7 GREEK SMALL LETTER CHI
 'ψ',	# 0x03C8 GREEK SMALL LETTER PSI
 'ω',	# 0x03C9 GREEK SMALL LETTER OMEGA
 'ϊ',	# 0x03CA GREEK SMALL LETTER IOTA WITH DIALYTIKA
 'ϋ',	# 0x03CB GREEK SMALL LETTER UPSILON WITH DIALYTIKA
 'ό',	# 0x03CC GREEK SMALL LETTER OMICRON WITH TONOS
 'ύ',	# 0x03CD GREEK SMALL LETTER UPSILON WITH TONOS
 'ώ',	# 0x03CE GREEK SMALL LETTER OMEGA WITH TONOS
); #@iso88597

sub illegal_iso88597 {
 $_[0] =~ s#[\xA4\xA5\xAA\xAE\xD2]# #sg;
} #illegal_iso88597

sub iso88597 {
 $_[0] =~ s#[\xA4\xA5\xAA\xAE\xD2]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$iso88597[ord($1)-0x80]#sge;
} #iso88597

#--------------------------------------------------------------------------
# Convert from iso-8859-8 to UTF-8

my @iso88598 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 '',	# 0xA1 UNDEFINED
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 '×',	# 0x00D7 MULTIPLICATION SIGN
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 '÷',	# 0x00F7 DIVISION SIGN
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '',	# 0xBF UNDEFINED
 '',	# 0xC0 UNDEFINED
 '',	# 0xC1 UNDEFINED
 '',	# 0xC2 UNDEFINED
 '',	# 0xC3 UNDEFINED
 '',	# 0xC4 UNDEFINED
 '',	# 0xC5 UNDEFINED
 '',	# 0xC6 UNDEFINED
 '',	# 0xC7 UNDEFINED
 '',	# 0xC8 UNDEFINED
 '',	# 0xC9 UNDEFINED
 '',	# 0xCA UNDEFINED
 '',	# 0xCB UNDEFINED
 '',	# 0xCC UNDEFINED
 '',	# 0xCD UNDEFINED
 '',	# 0xCE UNDEFINED
 '',	# 0xCF UNDEFINED
 '',	# 0xD0 UNDEFINED
 '',	# 0xD1 UNDEFINED
 '',	# 0xD2 UNDEFINED
 '',	# 0xD3 UNDEFINED
 '',	# 0xD4 UNDEFINED
 '',	# 0xD5 UNDEFINED
 '',	# 0xD6 UNDEFINED
 '',	# 0xD7 UNDEFINED
 '',	# 0xD8 UNDEFINED
 '',	# 0xD9 UNDEFINED
 '',	# 0xDA UNDEFINED
 '',	# 0xDB UNDEFINED
 '',	# 0xDC UNDEFINED
 '',	# 0xDD UNDEFINED
 '',	# 0xDE UNDEFINED
 '‗',	# 0x2017 DOUBLE LOW LINE
 'א',	# 0x05D0 HEBREW LETTER ALEF
 'ב',	# 0x05D1 HEBREW LETTER BET
 'ג',	# 0x05D2 HEBREW LETTER GIMEL
 'ד',	# 0x05D3 HEBREW LETTER DALET
 'ה',	# 0x05D4 HEBREW LETTER HE
 'ו',	# 0x05D5 HEBREW LETTER VAV
 'ז',	# 0x05D6 HEBREW LETTER ZAYIN
 'ח',	# 0x05D7 HEBREW LETTER HET
 'ט',	# 0x05D8 HEBREW LETTER TET
 'י',	# 0x05D9 HEBREW LETTER YOD
 'ך',	# 0x05DA HEBREW LETTER FINAL KAF
 'כ',	# 0x05DB HEBREW LETTER KAF
 'ל',	# 0x05DC HEBREW LETTER LAMED
 'ם',	# 0x05DD HEBREW LETTER FINAL MEM
 'מ',	# 0x05DE HEBREW LETTER MEM
 'ן',	# 0x05DF HEBREW LETTER FINAL NUN
 'נ',	# 0x05E0 HEBREW LETTER NUN
 'ס',	# 0x05E1 HEBREW LETTER SAMEKH
 'ע',	# 0x05E2 HEBREW LETTER AYIN
 'ף',	# 0x05E3 HEBREW LETTER FINAL PE
 'פ',	# 0x05E4 HEBREW LETTER PE
 'ץ',	# 0x05E5 HEBREW LETTER FINAL TSADI
 'צ',	# 0x05E6 HEBREW LETTER TSADI
 'ק',	# 0x05E7 HEBREW LETTER QOF
 'ר',	# 0x05E8 HEBREW LETTER RESH
 'ש',	# 0x05E9 HEBREW LETTER SHIN
 'ת',	# 0x05EA HEBREW LETTER TAV
 '',	# 0xFB UNDEFINED
 '',	# 0xFC UNDEFINED
 '‎',	# 0x200E LEFT-TO-RIGHT MARK
 '‏',	# 0x200F RIGHT-TO-LEFT MARK
); #@iso88598

sub illegal_iso88598 {
 $_[0] =~ s#[\xA1\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xFB\xFC]# #sg;
} #illegal_iso88598

sub iso88598 {
 $_[0] =~ s#[\xA1\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xFB\xFC]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$iso88598[ord($1)-0x80]#sge;
} #iso88598

#--------------------------------------------------------------------------
# Convert from iso-8859-9 to UTF-8

my @iso88599 = (
 '',	# 0x0080 <control>
 '',	# 0x0081 <control>
 '',	# 0x0082 <control>
 '',	# 0x0083 <control>
 '',	# 0x0084 <control>
 '',	# 0x0085 <control>
 '',	# 0x0086 <control>
 '',	# 0x0087 <control>
 '',	# 0x0088 <control>
 '',	# 0x0089 <control>
 '',	# 0x008A <control>
 '',	# 0x008B <control>
 '',	# 0x008C <control>
 '',	# 0x008D <control>
 '',	# 0x008E <control>
 '',	# 0x008F <control>
 '',	# 0x0090 <control>
 '',	# 0x0091 <control>
 '',	# 0x0092 <control>
 '',	# 0x0093 <control>
 '',	# 0x0094 <control>
 '',	# 0x0095 <control>
 '',	# 0x0096 <control>
 '',	# 0x0097 <control>
 '',	# 0x0098 <control>
 '',	# 0x0099 <control>
 '',	# 0x009A <control>
 '',	# 0x009B <control>
 '',	# 0x009C <control>
 '',	# 0x009D <control>
 '',	# 0x009E <control>
 '',	# 0x009F <control>
 ' ',	# 0x00A0 NO-BREAK SPACE
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 'ª',	# 0x00AA FEMININE ORDINAL INDICATOR
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'º',	# 0x00BA MASCULINE ORDINAL INDICATOR
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '¿',	# 0x00BF INVERTED QUESTION MARK
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ğ',	# 0x011E LATIN CAPITAL LETTER G WITH BREVE
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'İ',	# 0x0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
 'Ş',	# 0x015E LATIN CAPITAL LETTER S WITH CEDILLA
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ğ',	# 0x011F LATIN SMALL LETTER G WITH BREVE
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ı',	# 0x0131 LATIN SMALL LETTER DOTLESS I
 'ş',	# 0x015F LATIN SMALL LETTER S WITH CEDILLA
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@iso88599

sub illegal_iso88599 {
} #illegal_iso88599

sub iso88599 {
 $_[0] =~ s#([\x80-\xFF])#$iso88599[ord($1)-0x80]#sge;
} #iso88599

#--------------------------------------------------------------------------
# Convert from windows-1250 to UTF-8

my @windows1250 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 '',	# 0x83 UNDEFINED
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 '',	# 0x88 UNDEFINED
 '‰',	# 0x2030 PER MILLE SIGN
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 'Ś',	# 0x015A LATIN CAPITAL LETTER S WITH ACUTE
 'Ť',	# 0x0164 LATIN CAPITAL LETTER T WITH CARON
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 'Ź',	# 0x0179 LATIN CAPITAL LETTER Z WITH ACUTE
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '',	# 0x98 UNDEFINED
 '™',	# 0x2122 TRADE MARK SIGN
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 'ś',	# 0x015B LATIN SMALL LETTER S WITH ACUTE
 'ť',	# 0x0165 LATIN SMALL LETTER T WITH CARON
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 'ź',	# 0x017A LATIN SMALL LETTER Z WITH ACUTE
 ' ',	# 0x00A0 NO-BREAK SPACE
 'ˇ',	# 0x02C7 CARON
 '˘',	# 0x02D8 BREVE
 'Ł',	# 0x0141 LATIN CAPITAL LETTER L WITH STROKE
 '¤',	# 0x00A4 CURRENCY SIGN
 'Ą',	# 0x0104 LATIN CAPITAL LETTER A WITH OGONEK
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 'Ş',	# 0x015E LATIN CAPITAL LETTER S WITH CEDILLA
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 'Ż',	# 0x017B LATIN CAPITAL LETTER Z WITH DOT ABOVE
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '˛',	# 0x02DB OGONEK
 'ł',	# 0x0142 LATIN SMALL LETTER L WITH STROKE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 'ą',	# 0x0105 LATIN SMALL LETTER A WITH OGONEK
 'ş',	# 0x015F LATIN SMALL LETTER S WITH CEDILLA
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 'Ľ',	# 0x013D LATIN CAPITAL LETTER L WITH CARON
 '˝',	# 0x02DD DOUBLE ACUTE ACCENT
 'ľ',	# 0x013E LATIN SMALL LETTER L WITH CARON
 'ż',	# 0x017C LATIN SMALL LETTER Z WITH DOT ABOVE
 'Ŕ',	# 0x0154 LATIN CAPITAL LETTER R WITH ACUTE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ă',	# 0x0102 LATIN CAPITAL LETTER A WITH BREVE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Ĺ',	# 0x0139 LATIN CAPITAL LETTER L WITH ACUTE
 'Ć',	# 0x0106 LATIN CAPITAL LETTER C WITH ACUTE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'Č',	# 0x010C LATIN CAPITAL LETTER C WITH CARON
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ę',	# 0x0118 LATIN CAPITAL LETTER E WITH OGONEK
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ě',	# 0x011A LATIN CAPITAL LETTER E WITH CARON
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ď',	# 0x010E LATIN CAPITAL LETTER D WITH CARON
 'Đ',	# 0x0110 LATIN CAPITAL LETTER D WITH STROKE
 'Ń',	# 0x0143 LATIN CAPITAL LETTER N WITH ACUTE
 'Ň',	# 0x0147 LATIN CAPITAL LETTER N WITH CARON
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Ő',	# 0x0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ř',	# 0x0158 LATIN CAPITAL LETTER R WITH CARON
 'Ů',	# 0x016E LATIN CAPITAL LETTER U WITH RING ABOVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Ű',	# 0x0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ý',	# 0x00DD LATIN CAPITAL LETTER Y WITH ACUTE
 'Ţ',	# 0x0162 LATIN CAPITAL LETTER T WITH CEDILLA
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'ŕ',	# 0x0155 LATIN SMALL LETTER R WITH ACUTE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ă',	# 0x0103 LATIN SMALL LETTER A WITH BREVE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'ĺ',	# 0x013A LATIN SMALL LETTER L WITH ACUTE
 'ć',	# 0x0107 LATIN SMALL LETTER C WITH ACUTE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'č',	# 0x010D LATIN SMALL LETTER C WITH CARON
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ę',	# 0x0119 LATIN SMALL LETTER E WITH OGONEK
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ě',	# 0x011B LATIN SMALL LETTER E WITH CARON
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ď',	# 0x010F LATIN SMALL LETTER D WITH CARON
 'đ',	# 0x0111 LATIN SMALL LETTER D WITH STROKE
 'ń',	# 0x0144 LATIN SMALL LETTER N WITH ACUTE
 'ň',	# 0x0148 LATIN SMALL LETTER N WITH CARON
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'ő',	# 0x0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ř',	# 0x0159 LATIN SMALL LETTER R WITH CARON
 'ů',	# 0x016F LATIN SMALL LETTER U WITH RING ABOVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'ű',	# 0x0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ý',	# 0x00FD LATIN SMALL LETTER Y WITH ACUTE
 'ţ',	# 0x0163 LATIN SMALL LETTER T WITH CEDILLA
 '˙',	# 0x02D9 DOT ABOVE
); #@windows1250

sub illegal_windows1250 {
 $_[0] =~ s#[\x81\x83\x88\x90\x98]# #sg;
} #illegal_windows1250

sub windows1250 {
 $_[0] =~ s#[\x81\x83\x88\x90\x98]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1250[ord($1)-0x80]#sge;
} #windows1250

#--------------------------------------------------------------------------
# Convert from windows-1251 to UTF-8

my @windows1251 = (
 'Ђ',	# 0x0402 CYRILLIC CAPITAL LETTER DJE
 'Ѓ',	# 0x0403 CYRILLIC CAPITAL LETTER GJE
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 'ѓ',	# 0x0453 CYRILLIC SMALL LETTER GJE
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 '€',	# 0x20AC EURO SIGN
 '‰',	# 0x2030 PER MILLE SIGN
 'Љ',	# 0x0409 CYRILLIC CAPITAL LETTER LJE
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 'Њ',	# 0x040A CYRILLIC CAPITAL LETTER NJE
 'Ќ',	# 0x040C CYRILLIC CAPITAL LETTER KJE
 'Ћ',	# 0x040B CYRILLIC CAPITAL LETTER TSHE
 'Џ',	# 0x040F CYRILLIC CAPITAL LETTER DZHE
 'ђ',	# 0x0452 CYRILLIC SMALL LETTER DJE
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '',	# 0x98 UNDEFINED
 '™',	# 0x2122 TRADE MARK SIGN
 'љ',	# 0x0459 CYRILLIC SMALL LETTER LJE
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 'њ',	# 0x045A CYRILLIC SMALL LETTER NJE
 'ќ',	# 0x045C CYRILLIC SMALL LETTER KJE
 'ћ',	# 0x045B CYRILLIC SMALL LETTER TSHE
 'џ',	# 0x045F CYRILLIC SMALL LETTER DZHE
 ' ',	# 0x00A0 NO-BREAK SPACE
 'Ў',	# 0x040E CYRILLIC CAPITAL LETTER SHORT U
 'ў',	# 0x045E CYRILLIC SMALL LETTER SHORT U
 'Ј',	# 0x0408 CYRILLIC CAPITAL LETTER JE
 '¤',	# 0x00A4 CURRENCY SIGN
 'Ґ',	# 0x0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 'Ё',	# 0x0401 CYRILLIC CAPITAL LETTER IO
 '©',	# 0x00A9 COPYRIGHT SIGN
 'Є',	# 0x0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 'Ї',	# 0x0407 CYRILLIC CAPITAL LETTER YI
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 'І',	# 0x0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
 'і',	# 0x0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
 'ґ',	# 0x0491 CYRILLIC SMALL LETTER GHE WITH UPTURN
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 'ё',	# 0x0451 CYRILLIC SMALL LETTER IO
 '№',	# 0x2116 NUMERO SIGN
 'є',	# 0x0454 CYRILLIC SMALL LETTER UKRAINIAN IE
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 'ј',	# 0x0458 CYRILLIC SMALL LETTER JE
 'Ѕ',	# 0x0405 CYRILLIC CAPITAL LETTER DZE
 'ѕ',	# 0x0455 CYRILLIC SMALL LETTER DZE
 'ї',	# 0x0457 CYRILLIC SMALL LETTER YI
 'А',	# 0x0410 CYRILLIC CAPITAL LETTER A
 'Б',	# 0x0411 CYRILLIC CAPITAL LETTER BE
 'В',	# 0x0412 CYRILLIC CAPITAL LETTER VE
 'Г',	# 0x0413 CYRILLIC CAPITAL LETTER GHE
 'Д',	# 0x0414 CYRILLIC CAPITAL LETTER DE
 'Е',	# 0x0415 CYRILLIC CAPITAL LETTER IE
 'Ж',	# 0x0416 CYRILLIC CAPITAL LETTER ZHE
 'З',	# 0x0417 CYRILLIC CAPITAL LETTER ZE
 'И',	# 0x0418 CYRILLIC CAPITAL LETTER I
 'Й',	# 0x0419 CYRILLIC CAPITAL LETTER SHORT I
 'К',	# 0x041A CYRILLIC CAPITAL LETTER KA
 'Л',	# 0x041B CYRILLIC CAPITAL LETTER EL
 'М',	# 0x041C CYRILLIC CAPITAL LETTER EM
 'Н',	# 0x041D CYRILLIC CAPITAL LETTER EN
 'О',	# 0x041E CYRILLIC CAPITAL LETTER O
 'П',	# 0x041F CYRILLIC CAPITAL LETTER PE
 'Р',	# 0x0420 CYRILLIC CAPITAL LETTER ER
 'С',	# 0x0421 CYRILLIC CAPITAL LETTER ES
 'Т',	# 0x0422 CYRILLIC CAPITAL LETTER TE
 'У',	# 0x0423 CYRILLIC CAPITAL LETTER U
 'Ф',	# 0x0424 CYRILLIC CAPITAL LETTER EF
 'Х',	# 0x0425 CYRILLIC CAPITAL LETTER HA
 'Ц',	# 0x0426 CYRILLIC CAPITAL LETTER TSE
 'Ч',	# 0x0427 CYRILLIC CAPITAL LETTER CHE
 'Ш',	# 0x0428 CYRILLIC CAPITAL LETTER SHA
 'Щ',	# 0x0429 CYRILLIC CAPITAL LETTER SHCHA
 'Ъ',	# 0x042A CYRILLIC CAPITAL LETTER HARD SIGN
 'Ы',	# 0x042B CYRILLIC CAPITAL LETTER YERU
 'Ь',	# 0x042C CYRILLIC CAPITAL LETTER SOFT SIGN
 'Э',	# 0x042D CYRILLIC CAPITAL LETTER E
 'Ю',	# 0x042E CYRILLIC CAPITAL LETTER YU
 'Я',	# 0x042F CYRILLIC CAPITAL LETTER YA
 'а',	# 0x0430 CYRILLIC SMALL LETTER A
 'б',	# 0x0431 CYRILLIC SMALL LETTER BE
 'в',	# 0x0432 CYRILLIC SMALL LETTER VE
 'г',	# 0x0433 CYRILLIC SMALL LETTER GHE
 'д',	# 0x0434 CYRILLIC SMALL LETTER DE
 'е',	# 0x0435 CYRILLIC SMALL LETTER IE
 'ж',	# 0x0436 CYRILLIC SMALL LETTER ZHE
 'з',	# 0x0437 CYRILLIC SMALL LETTER ZE
 'и',	# 0x0438 CYRILLIC SMALL LETTER I
 'й',	# 0x0439 CYRILLIC SMALL LETTER SHORT I
 'к',	# 0x043A CYRILLIC SMALL LETTER KA
 'л',	# 0x043B CYRILLIC SMALL LETTER EL
 'м',	# 0x043C CYRILLIC SMALL LETTER EM
 'н',	# 0x043D CYRILLIC SMALL LETTER EN
 'о',	# 0x043E CYRILLIC SMALL LETTER O
 'п',	# 0x043F CYRILLIC SMALL LETTER PE
 'р',	# 0x0440 CYRILLIC SMALL LETTER ER
 'с',	# 0x0441 CYRILLIC SMALL LETTER ES
 'т',	# 0x0442 CYRILLIC SMALL LETTER TE
 'у',	# 0x0443 CYRILLIC SMALL LETTER U
 'ф',	# 0x0444 CYRILLIC SMALL LETTER EF
 'х',	# 0x0445 CYRILLIC SMALL LETTER HA
 'ц',	# 0x0446 CYRILLIC SMALL LETTER TSE
 'ч',	# 0x0447 CYRILLIC SMALL LETTER CHE
 'ш',	# 0x0448 CYRILLIC SMALL LETTER SHA
 'щ',	# 0x0449 CYRILLIC SMALL LETTER SHCHA
 'ъ',	# 0x044A CYRILLIC SMALL LETTER HARD SIGN
 'ы',	# 0x044B CYRILLIC SMALL LETTER YERU
 'ь',	# 0x044C CYRILLIC SMALL LETTER SOFT SIGN
 'э',	# 0x044D CYRILLIC SMALL LETTER E
 'ю',	# 0x044E CYRILLIC SMALL LETTER YU
 'я',	# 0x044F CYRILLIC SMALL LETTER YA
); #@windows1251

sub illegal_windows1251 {
 $_[0] =~ s#[\x98]# #sg;
} #illegal_windows1251

sub windows1251 {
 $_[0] =~ s#[\x98]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1251[ord($1)-0x80]#sge;
} #windows1251

#--------------------------------------------------------------------------
# Convert from windows-1252 to UTF-8

my @windows1252 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 'ƒ',	# 0x0192 LATIN SMALL LETTER F WITH HOOK
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 'ˆ',	# 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
 '‰',	# 0x2030 PER MILLE SIGN
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 'Œ',	# 0x0152 LATIN CAPITAL LIGATURE OE
 '',	# 0x8D UNDEFINED
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 '',	# 0x8F UNDEFINED
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '˜',	# 0x02DC SMALL TILDE
 '™',	# 0x2122 TRADE MARK SIGN
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 'œ',	# 0x0153 LATIN SMALL LIGATURE OE
 '',	# 0x9D UNDEFINED
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 'Ÿ',	# 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 ' ',	# 0x00A0 NO-BREAK SPACE
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 'ª',	# 0x00AA FEMININE ORDINAL INDICATOR
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'º',	# 0x00BA MASCULINE ORDINAL INDICATOR
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '¿',	# 0x00BF INVERTED QUESTION MARK
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ð',	# 0x00D0 LATIN CAPITAL LETTER ETH
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ý',	# 0x00DD LATIN CAPITAL LETTER Y WITH ACUTE
 'Þ',	# 0x00DE LATIN CAPITAL LETTER THORN
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ð',	# 0x00F0 LATIN SMALL LETTER ETH
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ý',	# 0x00FD LATIN SMALL LETTER Y WITH ACUTE
 'þ',	# 0x00FE LATIN SMALL LETTER THORN
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@windows1252

sub illegal_windows1252 {
 $_[0] =~ s#[\x81\x8D\x8F\x90\x9D]# #sg;
} #illegal_windows1252

sub windows1252 {
 $_[0] =~ s#[\x81\x8D\x8F\x90\x9D]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1252[ord($1)-0x80]#sge;
} #windows1252

#--------------------------------------------------------------------------
# Convert from windows-1253 to UTF-8

my @windows1253 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 'ƒ',	# 0x0192 LATIN SMALL LETTER F WITH HOOK
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 '',	# 0x88 UNDEFINED
 '‰',	# 0x2030 PER MILLE SIGN
 '',	# 0x8A UNDEFINED
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 '',	# 0x8C UNDEFINED
 '',	# 0x8D UNDEFINED
 '',	# 0x8E UNDEFINED
 '',	# 0x8F UNDEFINED
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '',	# 0x98 UNDEFINED
 '™',	# 0x2122 TRADE MARK SIGN
 '',	# 0x9A UNDEFINED
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 '',	# 0x9C UNDEFINED
 '',	# 0x9D UNDEFINED
 '',	# 0x9E UNDEFINED
 '',	# 0x9F UNDEFINED
 ' ',	# 0x00A0 NO-BREAK SPACE
 '΅',	# 0x0385 GREEK DIALYTIKA TONOS
 'Ά',	# 0x0386 GREEK CAPITAL LETTER ALPHA WITH TONOS
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 '',	# 0xAA UNDEFINED
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '―',	# 0x2015 HORIZONTAL BAR
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '΄',	# 0x0384 GREEK TONOS
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 'Έ',	# 0x0388 GREEK CAPITAL LETTER EPSILON WITH TONOS
 'Ή',	# 0x0389 GREEK CAPITAL LETTER ETA WITH TONOS
 'Ί',	# 0x038A GREEK CAPITAL LETTER IOTA WITH TONOS
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 'Ό',	# 0x038C GREEK CAPITAL LETTER OMICRON WITH TONOS
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 'Ύ',	# 0x038E GREEK CAPITAL LETTER UPSILON WITH TONOS
 'Ώ',	# 0x038F GREEK CAPITAL LETTER OMEGA WITH TONOS
 'ΐ',	# 0x0390 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
 'Α',	# 0x0391 GREEK CAPITAL LETTER ALPHA
 'Β',	# 0x0392 GREEK CAPITAL LETTER BETA
 'Γ',	# 0x0393 GREEK CAPITAL LETTER GAMMA
 'Δ',	# 0x0394 GREEK CAPITAL LETTER DELTA
 'Ε',	# 0x0395 GREEK CAPITAL LETTER EPSILON
 'Ζ',	# 0x0396 GREEK CAPITAL LETTER ZETA
 'Η',	# 0x0397 GREEK CAPITAL LETTER ETA
 'Θ',	# 0x0398 GREEK CAPITAL LETTER THETA
 'Ι',	# 0x0399 GREEK CAPITAL LETTER IOTA
 'Κ',	# 0x039A GREEK CAPITAL LETTER KAPPA
 'Λ',	# 0x039B GREEK CAPITAL LETTER LAMDA
 'Μ',	# 0x039C GREEK CAPITAL LETTER MU
 'Ν',	# 0x039D GREEK CAPITAL LETTER NU
 'Ξ',	# 0x039E GREEK CAPITAL LETTER XI
 'Ο',	# 0x039F GREEK CAPITAL LETTER OMICRON
 'Π',	# 0x03A0 GREEK CAPITAL LETTER PI
 'Ρ',	# 0x03A1 GREEK CAPITAL LETTER RHO
 '',	# 0xD2 UNDEFINED
 'Σ',	# 0x03A3 GREEK CAPITAL LETTER SIGMA
 'Τ',	# 0x03A4 GREEK CAPITAL LETTER TAU
 'Υ',	# 0x03A5 GREEK CAPITAL LETTER UPSILON
 'Φ',	# 0x03A6 GREEK CAPITAL LETTER PHI
 'Χ',	# 0x03A7 GREEK CAPITAL LETTER CHI
 'Ψ',	# 0x03A8 GREEK CAPITAL LETTER PSI
 'Ω',	# 0x03A9 GREEK CAPITAL LETTER OMEGA
 'Ϊ',	# 0x03AA GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
 'Ϋ',	# 0x03AB GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
 'ά',	# 0x03AC GREEK SMALL LETTER ALPHA WITH TONOS
 'έ',	# 0x03AD GREEK SMALL LETTER EPSILON WITH TONOS
 'ή',	# 0x03AE GREEK SMALL LETTER ETA WITH TONOS
 'ί',	# 0x03AF GREEK SMALL LETTER IOTA WITH TONOS
 'ΰ',	# 0x03B0 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
 'α',	# 0x03B1 GREEK SMALL LETTER ALPHA
 'β',	# 0x03B2 GREEK SMALL LETTER BETA
 'γ',	# 0x03B3 GREEK SMALL LETTER GAMMA
 'δ',	# 0x03B4 GREEK SMALL LETTER DELTA
 'ε',	# 0x03B5 GREEK SMALL LETTER EPSILON
 'ζ',	# 0x03B6 GREEK SMALL LETTER ZETA
 'η',	# 0x03B7 GREEK SMALL LETTER ETA
 'θ',	# 0x03B8 GREEK SMALL LETTER THETA
 'ι',	# 0x03B9 GREEK SMALL LETTER IOTA
 'κ',	# 0x03BA GREEK SMALL LETTER KAPPA
 'λ',	# 0x03BB GREEK SMALL LETTER LAMDA
 'μ',	# 0x03BC GREEK SMALL LETTER MU
 'ν',	# 0x03BD GREEK SMALL LETTER NU
 'ξ',	# 0x03BE GREEK SMALL LETTER XI
 'ο',	# 0x03BF GREEK SMALL LETTER OMICRON
 'π',	# 0x03C0 GREEK SMALL LETTER PI
 'ρ',	# 0x03C1 GREEK SMALL LETTER RHO
 'ς',	# 0x03C2 GREEK SMALL LETTER FINAL SIGMA
 'σ',	# 0x03C3 GREEK SMALL LETTER SIGMA
 'τ',	# 0x03C4 GREEK SMALL LETTER TAU
 'υ',	# 0x03C5 GREEK SMALL LETTER UPSILON
 'φ',	# 0x03C6 GREEK SMALL LETTER PHI
 'χ',	# 0x03C7 GREEK SMALL LETTER CHI
 'ψ',	# 0x03C8 GREEK SMALL LETTER PSI
 'ω',	# 0x03C9 GREEK SMALL LETTER OMEGA
 'ϊ',	# 0x03CA GREEK SMALL LETTER IOTA WITH DIALYTIKA
 'ϋ',	# 0x03CB GREEK SMALL LETTER UPSILON WITH DIALYTIKA
 'ό',	# 0x03CC GREEK SMALL LETTER OMICRON WITH TONOS
 'ύ',	# 0x03CD GREEK SMALL LETTER UPSILON WITH TONOS
 'ώ',	# 0x03CE GREEK SMALL LETTER OMEGA WITH TONOS
 '',	# 0xFF UNDEFINED
); #@windows1253

sub illegal_windows1253 {
 $_[0] =~ s#[\x81\x88\x8A\x8C\x8D\x8E\x8F\x90\x98\x9A\x9C\x9D\x9E\x9F\xAA\xD2\xFF]# #sg;
} #illegal_windows1253

sub windows1253 {
 $_[0] =~ s#[\x81\x88\x8A\x8C\x8D\x8E\x8F\x90\x98\x9A\x9C\x9D\x9E\x9F\xAA\xD2\xFF]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1253[ord($1)-0x80]#sge;
} #windows1253

#--------------------------------------------------------------------------
# Convert from windows-1254 to UTF-8

my @windows1254 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 'ƒ',	# 0x0192 LATIN SMALL LETTER F WITH HOOK
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 'ˆ',	# 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
 '‰',	# 0x2030 PER MILLE SIGN
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 'Œ',	# 0x0152 LATIN CAPITAL LIGATURE OE
 '',	# 0x8D UNDEFINED
 '',	# 0x8E UNDEFINED
 '',	# 0x8F UNDEFINED
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '˜',	# 0x02DC SMALL TILDE
 '™',	# 0x2122 TRADE MARK SIGN
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 'œ',	# 0x0153 LATIN SMALL LIGATURE OE
 '',	# 0x9D UNDEFINED
 '',	# 0x9E UNDEFINED
 'Ÿ',	# 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 ' ',	# 0x00A0 NO-BREAK SPACE
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 'ª',	# 0x00AA FEMININE ORDINAL INDICATOR
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'º',	# 0x00BA MASCULINE ORDINAL INDICATOR
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '¿',	# 0x00BF INVERTED QUESTION MARK
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ğ',	# 0x011E LATIN CAPITAL LETTER G WITH BREVE
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'İ',	# 0x0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
 'Ş',	# 0x015E LATIN CAPITAL LETTER S WITH CEDILLA
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ğ',	# 0x011F LATIN SMALL LETTER G WITH BREVE
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ı',	# 0x0131 LATIN SMALL LETTER DOTLESS I
 'ş',	# 0x015F LATIN SMALL LETTER S WITH CEDILLA
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@windows1254

sub illegal_windows1254 {
 $_[0] =~ s#[\x81\x8D\x8E\x8F\x90\x9D\x9E]# #sg;
} #illegal_windows1254

sub windows1254 {
 $_[0] =~ s#[\x81\x8D\x8E\x8F\x90\x9D\x9E]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1254[ord($1)-0x80]#sge;
} #windows1254

#--------------------------------------------------------------------------
# Convert from windows-1255 to UTF-8

my @windows1255 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 'ƒ',	# 0x0192 LATIN SMALL LETTER F WITH HOOK
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 'ˆ',	# 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
 '‰',	# 0x2030 PER MILLE SIGN
 '',	# 0x8A UNDEFINED
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 '',	# 0x8C UNDEFINED
 '',	# 0x8D UNDEFINED
 '',	# 0x8E UNDEFINED
 '',	# 0x8F UNDEFINED
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '˜',	# 0x02DC SMALL TILDE
 '™',	# 0x2122 TRADE MARK SIGN
 '',	# 0x9A UNDEFINED
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 '',	# 0x9C UNDEFINED
 '',	# 0x9D UNDEFINED
 '',	# 0x9E UNDEFINED
 '',	# 0x9F UNDEFINED
 ' ',	# 0x00A0 NO-BREAK SPACE
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '₪',	# 0x20AA NEW SHEQEL SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 '×',	# 0x00D7 MULTIPLICATION SIGN
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 '÷',	# 0x00F7 DIVISION SIGN
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '¿',	# 0x00BF INVERTED QUESTION MARK
 'ְ',	# 0x05B0 HEBREW POINT SHEVA
 'ֱ',	# 0x05B1 HEBREW POINT HATAF SEGOL
 'ֲ',	# 0x05B2 HEBREW POINT HATAF PATAH
 'ֳ',	# 0x05B3 HEBREW POINT HATAF QAMATS
 'ִ',	# 0x05B4 HEBREW POINT HIRIQ
 'ֵ',	# 0x05B5 HEBREW POINT TSERE
 'ֶ',	# 0x05B6 HEBREW POINT SEGOL
 'ַ',	# 0x05B7 HEBREW POINT PATAH
 'ָ',	# 0x05B8 HEBREW POINT QAMATS
 'ֹ',	# 0x05B9 HEBREW POINT HOLAM
 '',	# 0xCA UNDEFINED
 'ֻ',	# 0x05BB HEBREW POINT QUBUTS
 'ּ',	# 0x05BC HEBREW POINT DAGESH OR MAPIQ
 'ֽ',	# 0x05BD HEBREW POINT METEG
 '־',	# 0x05BE HEBREW PUNCTUATION MAQAF
 'ֿ',	# 0x05BF HEBREW POINT RAFE
 '׀',	# 0x05C0 HEBREW PUNCTUATION PASEQ
 'ׁ',	# 0x05C1 HEBREW POINT SHIN DOT
 'ׂ',	# 0x05C2 HEBREW POINT SIN DOT
 '׃',	# 0x05C3 HEBREW PUNCTUATION SOF PASUQ
 'װ',	# 0x05F0 HEBREW LIGATURE YIDDISH DOUBLE VAV
 'ױ',	# 0x05F1 HEBREW LIGATURE YIDDISH VAV YOD
 'ײ',	# 0x05F2 HEBREW LIGATURE YIDDISH DOUBLE YOD
 '׳',	# 0x05F3 HEBREW PUNCTUATION GERESH
 '״',	# 0x05F4 HEBREW PUNCTUATION GERSHAYIM
 '',	# 0xD9 UNDEFINED
 '',	# 0xDA UNDEFINED
 '',	# 0xDB UNDEFINED
 '',	# 0xDC UNDEFINED
 '',	# 0xDD UNDEFINED
 '',	# 0xDE UNDEFINED
 '',	# 0xDF UNDEFINED
 'א',	# 0x05D0 HEBREW LETTER ALEF
 'ב',	# 0x05D1 HEBREW LETTER BET
 'ג',	# 0x05D2 HEBREW LETTER GIMEL
 'ד',	# 0x05D3 HEBREW LETTER DALET
 'ה',	# 0x05D4 HEBREW LETTER HE
 'ו',	# 0x05D5 HEBREW LETTER VAV
 'ז',	# 0x05D6 HEBREW LETTER ZAYIN
 'ח',	# 0x05D7 HEBREW LETTER HET
 'ט',	# 0x05D8 HEBREW LETTER TET
 'י',	# 0x05D9 HEBREW LETTER YOD
 'ך',	# 0x05DA HEBREW LETTER FINAL KAF
 'כ',	# 0x05DB HEBREW LETTER KAF
 'ל',	# 0x05DC HEBREW LETTER LAMED
 'ם',	# 0x05DD HEBREW LETTER FINAL MEM
 'מ',	# 0x05DE HEBREW LETTER MEM
 'ן',	# 0x05DF HEBREW LETTER FINAL NUN
 'נ',	# 0x05E0 HEBREW LETTER NUN
 'ס',	# 0x05E1 HEBREW LETTER SAMEKH
 'ע',	# 0x05E2 HEBREW LETTER AYIN
 'ף',	# 0x05E3 HEBREW LETTER FINAL PE
 'פ',	# 0x05E4 HEBREW LETTER PE
 'ץ',	# 0x05E5 HEBREW LETTER FINAL TSADI
 'צ',	# 0x05E6 HEBREW LETTER TSADI
 'ק',	# 0x05E7 HEBREW LETTER QOF
 'ר',	# 0x05E8 HEBREW LETTER RESH
 'ש',	# 0x05E9 HEBREW LETTER SHIN
 'ת',	# 0x05EA HEBREW LETTER TAV
 '',	# 0xFB UNDEFINED
 '',	# 0xFC UNDEFINED
 '‎',	# 0x200E LEFT-TO-RIGHT MARK
 '‏',	# 0x200F RIGHT-TO-LEFT MARK
 '',	# 0xFF UNDEFINED
); #@windows1255

sub illegal_windows1255 {
 $_[0] =~ s#[\x81\x8A\x8C\x8D\x8E\x8F\x90\x9A\x9C\x9D\x9E\x9F\xCA\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xFB\xFC\xFF]# #sg;
} #illegal_windows1255

sub windows1255 {
 $_[0] =~ s#[\x81\x8A\x8C\x8D\x8E\x8F\x90\x9A\x9C\x9D\x9E\x9F\xCA\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xFB\xFC\xFF]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1255[ord($1)-0x80]#sge;
} #windows1255

#--------------------------------------------------------------------------
# Convert from windows-1256 to UTF-8

my @windows1256 = (
 '€',	# 0x20AC EURO SIGN
 'پ',	# 0x067E ARABIC LETTER PEH
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 'ƒ',	# 0x0192 LATIN SMALL LETTER F WITH HOOK
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 'ˆ',	# 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
 '‰',	# 0x2030 PER MILLE SIGN
 'ٹ',	# 0x0679 ARABIC LETTER TTEH
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 'Œ',	# 0x0152 LATIN CAPITAL LIGATURE OE
 'چ',	# 0x0686 ARABIC LETTER TCHEH
 'ژ',	# 0x0698 ARABIC LETTER JEH
 'ڈ',	# 0x0688 ARABIC LETTER DDAL
 'گ',	# 0x06AF ARABIC LETTER GAF
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 'ک',	# 0x06A9 ARABIC LETTER KEHEH
 '™',	# 0x2122 TRADE MARK SIGN
 'ڑ',	# 0x0691 ARABIC LETTER RREH
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 'œ',	# 0x0153 LATIN SMALL LIGATURE OE
 '‌',	# 0x200C ZERO WIDTH NON-JOINER
 '‍',	# 0x200D ZERO WIDTH JOINER
 'ں',	# 0x06BA ARABIC LETTER NOON GHUNNA
 ' ',	# 0x00A0 NO-BREAK SPACE
 '،',	# 0x060C ARABIC COMMA
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 'ھ',	# 0x06BE ARABIC LETTER HEH DOACHASHMEE
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 '؛',	# 0x061B ARABIC SEMICOLON
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '؟',	# 0x061F ARABIC QUESTION MARK
 'ہ',	# 0x06C1 ARABIC LETTER HEH GOAL
 'ء',	# 0x0621 ARABIC LETTER HAMZA
 'آ',	# 0x0622 ARABIC LETTER ALEF WITH MADDA ABOVE
 'أ',	# 0x0623 ARABIC LETTER ALEF WITH HAMZA ABOVE
 'ؤ',	# 0x0624 ARABIC LETTER WAW WITH HAMZA ABOVE
 'إ',	# 0x0625 ARABIC LETTER ALEF WITH HAMZA BELOW
 'ئ',	# 0x0626 ARABIC LETTER YEH WITH HAMZA ABOVE
 'ا',	# 0x0627 ARABIC LETTER ALEF
 'ب',	# 0x0628 ARABIC LETTER BEH
 'ة',	# 0x0629 ARABIC LETTER TEH MARBUTA
 'ت',	# 0x062A ARABIC LETTER TEH
 'ث',	# 0x062B ARABIC LETTER THEH
 'ج',	# 0x062C ARABIC LETTER JEEM
 'ح',	# 0x062D ARABIC LETTER HAH
 'خ',	# 0x062E ARABIC LETTER KHAH
 'د',	# 0x062F ARABIC LETTER DAL
 'ذ',	# 0x0630 ARABIC LETTER THAL
 'ر',	# 0x0631 ARABIC LETTER REH
 'ز',	# 0x0632 ARABIC LETTER ZAIN
 'س',	# 0x0633 ARABIC LETTER SEEN
 'ش',	# 0x0634 ARABIC LETTER SHEEN
 'ص',	# 0x0635 ARABIC LETTER SAD
 'ض',	# 0x0636 ARABIC LETTER DAD
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'ط',	# 0x0637 ARABIC LETTER TAH
 'ظ',	# 0x0638 ARABIC LETTER ZAH
 'ع',	# 0x0639 ARABIC LETTER AIN
 'غ',	# 0x063A ARABIC LETTER GHAIN
 'ـ',	# 0x0640 ARABIC TATWEEL
 'ف',	# 0x0641 ARABIC LETTER FEH
 'ق',	# 0x0642 ARABIC LETTER QAF
 'ك',	# 0x0643 ARABIC LETTER KAF
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'ل',	# 0x0644 ARABIC LETTER LAM
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'م',	# 0x0645 ARABIC LETTER MEEM
 'ن',	# 0x0646 ARABIC LETTER NOON
 'ه',	# 0x0647 ARABIC LETTER HEH
 'و',	# 0x0648 ARABIC LETTER WAW
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'ى',	# 0x0649 ARABIC LETTER ALEF MAKSURA
 'ي',	# 0x064A ARABIC LETTER YEH
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ً',	# 0x064B ARABIC FATHATAN
 'ٌ',	# 0x064C ARABIC DAMMATAN
 'ٍ',	# 0x064D ARABIC KASRATAN
 'َ',	# 0x064E ARABIC FATHA
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'ُ',	# 0x064F ARABIC DAMMA
 'ِ',	# 0x0650 ARABIC KASRA
 '÷',	# 0x00F7 DIVISION SIGN
 'ّ',	# 0x0651 ARABIC SHADDA
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ْ',	# 0x0652 ARABIC SUKUN
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 '‎',	# 0x200E LEFT-TO-RIGHT MARK
 '‏',	# 0x200F RIGHT-TO-LEFT MARK
 'ے',	# 0x06D2 ARABIC LETTER YEH BARREE
); #@windows1256

sub illegal_windows1256 {
} #illegal_windows1256

sub windows1256 {
 $_[0] =~ s#([\x80-\xFF])#$windows1256[ord($1)-0x80]#sge;
} #windows1256

#--------------------------------------------------------------------------
# Convert from windows-1257 to UTF-8

my @windows1257 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 '',	# 0x83 UNDEFINED
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 '',	# 0x88 UNDEFINED
 '‰',	# 0x2030 PER MILLE SIGN
 '',	# 0x8A UNDEFINED
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 '',	# 0x8C UNDEFINED
 '¨',	# 0x00A8 DIAERESIS
 'ˇ',	# 0x02C7 CARON
 '¸',	# 0x00B8 CEDILLA
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '',	# 0x98 UNDEFINED
 '™',	# 0x2122 TRADE MARK SIGN
 '',	# 0x9A UNDEFINED
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 '',	# 0x9C UNDEFINED
 '¯',	# 0x00AF MACRON
 '˛',	# 0x02DB OGONEK
 '',	# 0x9F UNDEFINED
 ' ',	# 0x00A0 NO-BREAK SPACE
 '',	# 0xA1 UNDEFINED
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '',	# 0xA5 UNDEFINED
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 '©',	# 0x00A9 COPYRIGHT SIGN
 'Ŗ',	# 0x0156 LATIN CAPITAL LETTER R WITH CEDILLA
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'ŗ',	# 0x0157 LATIN SMALL LETTER R WITH CEDILLA
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'Ą',	# 0x0104 LATIN CAPITAL LETTER A WITH OGONEK
 'Į',	# 0x012E LATIN CAPITAL LETTER I WITH OGONEK
 'Ā',	# 0x0100 LATIN CAPITAL LETTER A WITH MACRON
 'Ć',	# 0x0106 LATIN CAPITAL LETTER C WITH ACUTE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Ę',	# 0x0118 LATIN CAPITAL LETTER E WITH OGONEK
 'Ē',	# 0x0112 LATIN CAPITAL LETTER E WITH MACRON
 'Č',	# 0x010C LATIN CAPITAL LETTER C WITH CARON
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ź',	# 0x0179 LATIN CAPITAL LETTER Z WITH ACUTE
 'Ė',	# 0x0116 LATIN CAPITAL LETTER E WITH DOT ABOVE
 'Ģ',	# 0x0122 LATIN CAPITAL LETTER G WITH CEDILLA
 'Ķ',	# 0x0136 LATIN CAPITAL LETTER K WITH CEDILLA
 'Ī',	# 0x012A LATIN CAPITAL LETTER I WITH MACRON
 'Ļ',	# 0x013B LATIN CAPITAL LETTER L WITH CEDILLA
 'Š',	# 0x0160 LATIN CAPITAL LETTER S WITH CARON
 'Ń',	# 0x0143 LATIN CAPITAL LETTER N WITH ACUTE
 'Ņ',	# 0x0145 LATIN CAPITAL LETTER N WITH CEDILLA
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ō',	# 0x014C LATIN CAPITAL LETTER O WITH MACRON
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ų',	# 0x0172 LATIN CAPITAL LETTER U WITH OGONEK
 'Ł',	# 0x0141 LATIN CAPITAL LETTER L WITH STROKE
 'Ś',	# 0x015A LATIN CAPITAL LETTER S WITH ACUTE
 'Ū',	# 0x016A LATIN CAPITAL LETTER U WITH MACRON
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ż',	# 0x017B LATIN CAPITAL LETTER Z WITH DOT ABOVE
 'Ž',	# 0x017D LATIN CAPITAL LETTER Z WITH CARON
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'ą',	# 0x0105 LATIN SMALL LETTER A WITH OGONEK
 'į',	# 0x012F LATIN SMALL LETTER I WITH OGONEK
 'ā',	# 0x0101 LATIN SMALL LETTER A WITH MACRON
 'ć',	# 0x0107 LATIN SMALL LETTER C WITH ACUTE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'ę',	# 0x0119 LATIN SMALL LETTER E WITH OGONEK
 'ē',	# 0x0113 LATIN SMALL LETTER E WITH MACRON
 'č',	# 0x010D LATIN SMALL LETTER C WITH CARON
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ź',	# 0x017A LATIN SMALL LETTER Z WITH ACUTE
 'ė',	# 0x0117 LATIN SMALL LETTER E WITH DOT ABOVE
 'ģ',	# 0x0123 LATIN SMALL LETTER G WITH CEDILLA
 'ķ',	# 0x0137 LATIN SMALL LETTER K WITH CEDILLA
 'ī',	# 0x012B LATIN SMALL LETTER I WITH MACRON
 'ļ',	# 0x013C LATIN SMALL LETTER L WITH CEDILLA
 'š',	# 0x0161 LATIN SMALL LETTER S WITH CARON
 'ń',	# 0x0144 LATIN SMALL LETTER N WITH ACUTE
 'ņ',	# 0x0146 LATIN SMALL LETTER N WITH CEDILLA
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ō',	# 0x014D LATIN SMALL LETTER O WITH MACRON
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ų',	# 0x0173 LATIN SMALL LETTER U WITH OGONEK
 'ł',	# 0x0142 LATIN SMALL LETTER L WITH STROKE
 'ś',	# 0x015B LATIN SMALL LETTER S WITH ACUTE
 'ū',	# 0x016B LATIN SMALL LETTER U WITH MACRON
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ż',	# 0x017C LATIN SMALL LETTER Z WITH DOT ABOVE
 'ž',	# 0x017E LATIN SMALL LETTER Z WITH CARON
 '˙',	# 0x02D9 DOT ABOVE
); #@windows1257

sub illegal_windows1257 {
 $_[0] =~ s#[\x81\x83\x88\x8A\x8C\x90\x98\x9A\x9C\x9F\xA1\xA5]# #sg;
} #illegal_windows1257

sub windows1257 {
 $_[0] =~ s#[\x81\x83\x88\x8A\x8C\x90\x98\x9A\x9C\x9F\xA1\xA5]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1257[ord($1)-0x80]#sge;
} #windows1257

#--------------------------------------------------------------------------
# Convert from windows-1258 to UTF-8

my @windows1258 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 'ƒ',	# 0x0192 LATIN SMALL LETTER F WITH HOOK
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '†',	# 0x2020 DAGGER
 '‡',	# 0x2021 DOUBLE DAGGER
 'ˆ',	# 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
 '‰',	# 0x2030 PER MILLE SIGN
 '',	# 0x8A UNDEFINED
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 'Œ',	# 0x0152 LATIN CAPITAL LIGATURE OE
 '',	# 0x8D UNDEFINED
 '',	# 0x8E UNDEFINED
 '',	# 0x8F UNDEFINED
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '˜',	# 0x02DC SMALL TILDE
 '™',	# 0x2122 TRADE MARK SIGN
 '',	# 0x9A UNDEFINED
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 'œ',	# 0x0153 LATIN SMALL LIGATURE OE
 '',	# 0x9D UNDEFINED
 '',	# 0x9E UNDEFINED
 'Ÿ',	# 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 ' ',	# 0x00A0 NO-BREAK SPACE
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '¤',	# 0x00A4 CURRENCY SIGN
 '¥',	# 0x00A5 YEN SIGN
 '¦',	# 0x00A6 BROKEN BAR
 '§',	# 0x00A7 SECTION SIGN
 '¨',	# 0x00A8 DIAERESIS
 '©',	# 0x00A9 COPYRIGHT SIGN
 'ª',	# 0x00AA FEMININE ORDINAL INDICATOR
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¬',	# 0x00AC NOT SIGN
 '­',	# 0x00AD SOFT HYPHEN
 '®',	# 0x00AE REGISTERED SIGN
 '¯',	# 0x00AF MACRON
 '°',	# 0x00B0 DEGREE SIGN
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '²',	# 0x00B2 SUPERSCRIPT TWO
 '³',	# 0x00B3 SUPERSCRIPT THREE
 '´',	# 0x00B4 ACUTE ACCENT
 'µ',	# 0x00B5 MICRO SIGN
 '¶',	# 0x00B6 PILCROW SIGN
 '·',	# 0x00B7 MIDDLE DOT
 '¸',	# 0x00B8 CEDILLA
 '¹',	# 0x00B9 SUPERSCRIPT ONE
 'º',	# 0x00BA MASCULINE ORDINAL INDICATOR
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '¼',	# 0x00BC VULGAR FRACTION ONE QUARTER
 '½',	# 0x00BD VULGAR FRACTION ONE HALF
 '¾',	# 0x00BE VULGAR FRACTION THREE QUARTERS
 '¿',	# 0x00BF INVERTED QUESTION MARK
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ă',	# 0x0102 LATIN CAPITAL LETTER A WITH BREVE
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 '̀',	# 0x0300 COMBINING GRAVE ACCENT
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Đ',	# 0x0110 LATIN CAPITAL LETTER D WITH STROKE
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 '̉',	# 0x0309 COMBINING HOOK ABOVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 'Ơ',	# 0x01A0 LATIN CAPITAL LETTER O WITH HORN
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 '×',	# 0x00D7 MULTIPLICATION SIGN
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'Ư',	# 0x01AF LATIN CAPITAL LETTER U WITH HORN
 '̃',	# 0x0303 COMBINING TILDE
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ă',	# 0x0103 LATIN SMALL LETTER A WITH BREVE
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 '́',	# 0x0301 COMBINING ACUTE ACCENT
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'đ',	# 0x0111 LATIN SMALL LETTER D WITH STROKE
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 '̣',	# 0x0323 COMBINING DOT BELOW
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'ơ',	# 0x01A1 LATIN SMALL LETTER O WITH HORN
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 '÷',	# 0x00F7 DIVISION SIGN
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 'ư',	# 0x01B0 LATIN SMALL LETTER U WITH HORN
 '₫',	# 0x20AB DONG SIGN
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
); #@windows1258

sub illegal_windows1258 {
 $_[0] =~ s#[\x81\x8A\x8D\x8E\x8F\x90\x9A\x9D\x9E]# #sg;
} #illegal_windows1258

sub windows1258 {
 $_[0] =~ s#[\x81\x8A\x8D\x8E\x8F\x90\x9A\x9D\x9E]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows1258[ord($1)-0x80]#sge;
} #windows1258

#--------------------------------------------------------------------------
# Convert from windows-874 to UTF-8

my @windows874 = (
 '€',	# 0x20AC EURO SIGN
 '',	# 0x81 UNDEFINED
 '',	# 0x82 UNDEFINED
 '',	# 0x83 UNDEFINED
 '',	# 0x84 UNDEFINED
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 '',	# 0x86 UNDEFINED
 '',	# 0x87 UNDEFINED
 '',	# 0x88 UNDEFINED
 '',	# 0x89 UNDEFINED
 '',	# 0x8A UNDEFINED
 '',	# 0x8B UNDEFINED
 '',	# 0x8C UNDEFINED
 '',	# 0x8D UNDEFINED
 '',	# 0x8E UNDEFINED
 '',	# 0x8F UNDEFINED
 '',	# 0x90 UNDEFINED
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '•',	# 0x2022 BULLET
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '',	# 0x98 UNDEFINED
 '',	# 0x99 UNDEFINED
 '',	# 0x9A UNDEFINED
 '',	# 0x9B UNDEFINED
 '',	# 0x9C UNDEFINED
 '',	# 0x9D UNDEFINED
 '',	# 0x9E UNDEFINED
 '',	# 0x9F UNDEFINED
 ' ',	# 0x00A0 NO-BREAK SPACE
 'ก',	# 0x0E01 THAI CHARACTER KO KAI
 'ข',	# 0x0E02 THAI CHARACTER KHO KHAI
 'ฃ',	# 0x0E03 THAI CHARACTER KHO KHUAT
 'ค',	# 0x0E04 THAI CHARACTER KHO KHWAI
 'ฅ',	# 0x0E05 THAI CHARACTER KHO KHON
 'ฆ',	# 0x0E06 THAI CHARACTER KHO RAKHANG
 'ง',	# 0x0E07 THAI CHARACTER NGO NGU
 'จ',	# 0x0E08 THAI CHARACTER CHO CHAN
 'ฉ',	# 0x0E09 THAI CHARACTER CHO CHING
 'ช',	# 0x0E0A THAI CHARACTER CHO CHANG
 'ซ',	# 0x0E0B THAI CHARACTER SO SO
 'ฌ',	# 0x0E0C THAI CHARACTER CHO CHOE
 'ญ',	# 0x0E0D THAI CHARACTER YO YING
 'ฎ',	# 0x0E0E THAI CHARACTER DO CHADA
 'ฏ',	# 0x0E0F THAI CHARACTER TO PATAK
 'ฐ',	# 0x0E10 THAI CHARACTER THO THAN
 'ฑ',	# 0x0E11 THAI CHARACTER THO NANGMONTHO
 'ฒ',	# 0x0E12 THAI CHARACTER THO PHUTHAO
 'ณ',	# 0x0E13 THAI CHARACTER NO NEN
 'ด',	# 0x0E14 THAI CHARACTER DO DEK
 'ต',	# 0x0E15 THAI CHARACTER TO TAO
 'ถ',	# 0x0E16 THAI CHARACTER THO THUNG
 'ท',	# 0x0E17 THAI CHARACTER THO THAHAN
 'ธ',	# 0x0E18 THAI CHARACTER THO THONG
 'น',	# 0x0E19 THAI CHARACTER NO NU
 'บ',	# 0x0E1A THAI CHARACTER BO BAIMAI
 'ป',	# 0x0E1B THAI CHARACTER PO PLA
 'ผ',	# 0x0E1C THAI CHARACTER PHO PHUNG
 'ฝ',	# 0x0E1D THAI CHARACTER FO FA
 'พ',	# 0x0E1E THAI CHARACTER PHO PHAN
 'ฟ',	# 0x0E1F THAI CHARACTER FO FAN
 'ภ',	# 0x0E20 THAI CHARACTER PHO SAMPHAO
 'ม',	# 0x0E21 THAI CHARACTER MO MA
 'ย',	# 0x0E22 THAI CHARACTER YO YAK
 'ร',	# 0x0E23 THAI CHARACTER RO RUA
 'ฤ',	# 0x0E24 THAI CHARACTER RU
 'ล',	# 0x0E25 THAI CHARACTER LO LING
 'ฦ',	# 0x0E26 THAI CHARACTER LU
 'ว',	# 0x0E27 THAI CHARACTER WO WAEN
 'ศ',	# 0x0E28 THAI CHARACTER SO SALA
 'ษ',	# 0x0E29 THAI CHARACTER SO RUSI
 'ส',	# 0x0E2A THAI CHARACTER SO SUA
 'ห',	# 0x0E2B THAI CHARACTER HO HIP
 'ฬ',	# 0x0E2C THAI CHARACTER LO CHULA
 'อ',	# 0x0E2D THAI CHARACTER O ANG
 'ฮ',	# 0x0E2E THAI CHARACTER HO NOKHUK
 'ฯ',	# 0x0E2F THAI CHARACTER PAIYANNOI
 'ะ',	# 0x0E30 THAI CHARACTER SARA A
 'ั',	# 0x0E31 THAI CHARACTER MAI HAN-AKAT
 'า',	# 0x0E32 THAI CHARACTER SARA AA
 'ำ',	# 0x0E33 THAI CHARACTER SARA AM
 'ิ',	# 0x0E34 THAI CHARACTER SARA I
 'ี',	# 0x0E35 THAI CHARACTER SARA II
 'ึ',	# 0x0E36 THAI CHARACTER SARA UE
 'ื',	# 0x0E37 THAI CHARACTER SARA UEE
 'ุ',	# 0x0E38 THAI CHARACTER SARA U
 'ู',	# 0x0E39 THAI CHARACTER SARA UU
 'ฺ',	# 0x0E3A THAI CHARACTER PHINTHU
 '',	# 0xDB UNDEFINED
 '',	# 0xDC UNDEFINED
 '',	# 0xDD UNDEFINED
 '',	# 0xDE UNDEFINED
 '฿',	# 0x0E3F THAI CURRENCY SYMBOL BAHT
 'เ',	# 0x0E40 THAI CHARACTER SARA E
 'แ',	# 0x0E41 THAI CHARACTER SARA AE
 'โ',	# 0x0E42 THAI CHARACTER SARA O
 'ใ',	# 0x0E43 THAI CHARACTER SARA AI MAIMUAN
 'ไ',	# 0x0E44 THAI CHARACTER SARA AI MAIMALAI
 'ๅ',	# 0x0E45 THAI CHARACTER LAKKHANGYAO
 'ๆ',	# 0x0E46 THAI CHARACTER MAIYAMOK
 '็',	# 0x0E47 THAI CHARACTER MAITAIKHU
 '่',	# 0x0E48 THAI CHARACTER MAI EK
 '้',	# 0x0E49 THAI CHARACTER MAI THO
 '๊',	# 0x0E4A THAI CHARACTER MAI TRI
 '๋',	# 0x0E4B THAI CHARACTER MAI CHATTAWA
 '์',	# 0x0E4C THAI CHARACTER THANTHAKHAT
 'ํ',	# 0x0E4D THAI CHARACTER NIKHAHIT
 '๎',	# 0x0E4E THAI CHARACTER YAMAKKAN
 '๏',	# 0x0E4F THAI CHARACTER FONGMAN
 '๐',	# 0x0E50 THAI DIGIT ZERO
 '๑',	# 0x0E51 THAI DIGIT ONE
 '๒',	# 0x0E52 THAI DIGIT TWO
 '๓',	# 0x0E53 THAI DIGIT THREE
 '๔',	# 0x0E54 THAI DIGIT FOUR
 '๕',	# 0x0E55 THAI DIGIT FIVE
 '๖',	# 0x0E56 THAI DIGIT SIX
 '๗',	# 0x0E57 THAI DIGIT SEVEN
 '๘',	# 0x0E58 THAI DIGIT EIGHT
 '๙',	# 0x0E59 THAI DIGIT NINE
 '๚',	# 0x0E5A THAI CHARACTER ANGKHANKHU
 '๛',	# 0x0E5B THAI CHARACTER KHOMUT
 '',	# 0xFC UNDEFINED
 '',	# 0xFD UNDEFINED
 '',	# 0xFE UNDEFINED
 '',	# 0xFF UNDEFINED
); #@windows874

sub illegal_windows874 {
 $_[0] =~ s#[\x81\x82\x83\x84\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xDB\xDC\xDD\xDE\xFC\xFD\xFE\xFF]# #sg;
} #illegal_windows874

sub windows874 {
 $_[0] =~ s#[\x81\x82\x83\x84\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xDB\xDC\xDD\xDE\xFC\xFD\xFE\xFF]# #sg;
 $_[0] =~ s#([\x80-\xFF])#$windows874[ord($1)-0x80]#sge;
} #windows874

#--------------------------------------------------------------------------
# Convert from x-mac-roman to UTF-8

my @xmacroman = (
 'Ä',	# 0x00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
 'Å',	# 0x00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
 'Ç',	# 0x00C7 LATIN CAPITAL LETTER C WITH CEDILLA
 'É',	# 0x00C9 LATIN CAPITAL LETTER E WITH ACUTE
 'Ñ',	# 0x00D1 LATIN CAPITAL LETTER N WITH TILDE
 'Ö',	# 0x00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
 'Ü',	# 0x00DC LATIN CAPITAL LETTER U WITH DIAERESIS
 'á',	# 0x00E1 LATIN SMALL LETTER A WITH ACUTE
 'à',	# 0x00E0 LATIN SMALL LETTER A WITH GRAVE
 'â',	# 0x00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
 'ä',	# 0x00E4 LATIN SMALL LETTER A WITH DIAERESIS
 'ã',	# 0x00E3 LATIN SMALL LETTER A WITH TILDE
 'å',	# 0x00E5 LATIN SMALL LETTER A WITH RING ABOVE
 'ç',	# 0x00E7 LATIN SMALL LETTER C WITH CEDILLA
 'é',	# 0x00E9 LATIN SMALL LETTER E WITH ACUTE
 'è',	# 0x00E8 LATIN SMALL LETTER E WITH GRAVE
 'ê',	# 0x00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
 'ë',	# 0x00EB LATIN SMALL LETTER E WITH DIAERESIS
 'í',	# 0x00ED LATIN SMALL LETTER I WITH ACUTE
 'ì',	# 0x00EC LATIN SMALL LETTER I WITH GRAVE
 'î',	# 0x00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
 'ï',	# 0x00EF LATIN SMALL LETTER I WITH DIAERESIS
 'ñ',	# 0x00F1 LATIN SMALL LETTER N WITH TILDE
 'ó',	# 0x00F3 LATIN SMALL LETTER O WITH ACUTE
 'ò',	# 0x00F2 LATIN SMALL LETTER O WITH GRAVE
 'ô',	# 0x00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
 'ö',	# 0x00F6 LATIN SMALL LETTER O WITH DIAERESIS
 'õ',	# 0x00F5 LATIN SMALL LETTER O WITH TILDE
 'ú',	# 0x00FA LATIN SMALL LETTER U WITH ACUTE
 'ù',	# 0x00F9 LATIN SMALL LETTER U WITH GRAVE
 'û',	# 0x00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
 'ü',	# 0x00FC LATIN SMALL LETTER U WITH DIAERESIS
 '†',	# 0x2020 DAGGER
 '°',	# 0x00B0 DEGREE SIGN
 '¢',	# 0x00A2 CENT SIGN
 '£',	# 0x00A3 POUND SIGN
 '§',	# 0x00A7 SECTION SIGN
 '•',	# 0x2022 BULLET
 '¶',	# 0x00B6 PILCROW SIGN
 'ß',	# 0x00DF LATIN SMALL LETTER SHARP S
 '®',	# 0x00AE REGISTERED SIGN
 '©',	# 0x00A9 COPYRIGHT SIGN
 '™',	# 0x2122 TRADE MARK SIGN
 '´',	# 0x00B4 ACUTE ACCENT
 '¨',	# 0x00A8 DIAERESIS
 '≠',	# 0x2260 NOT EQUAL TO
 'Æ',	# 0x00C6 LATIN CAPITAL LETTER AE
 'Ø',	# 0x00D8 LATIN CAPITAL LETTER O WITH STROKE
 '∞',	# 0x221E INFINITY
 '±',	# 0x00B1 PLUS-MINUS SIGN
 '≤',	# 0x2264 LESS-THAN OR EQUAL TO
 '≥',	# 0x2265 GREATER-THAN OR EQUAL TO
 '¥',	# 0x00A5 YEN SIGN
 'µ',	# 0x00B5 MICRO SIGN
 '∂',	# 0x2202 PARTIAL DIFFERENTIAL
 '∑',	# 0x2211 N-ARY SUMMATION
 '∏',	# 0x220F N-ARY PRODUCT
 'π',	# 0x03C0 GREEK SMALL LETTER PI
 '∫',	# 0x222B INTEGRAL
 'ª',	# 0x00AA FEMININE ORDINAL INDICATOR
 'º',	# 0x00BA MASCULINE ORDINAL INDICATOR
 'Ω',	# 0x03A9 GREEK CAPITAL LETTER OMEGA
 'æ',	# 0x00E6 LATIN SMALL LETTER AE
 'ø',	# 0x00F8 LATIN SMALL LETTER O WITH STROKE
 '¿',	# 0x00BF INVERTED QUESTION MARK
 '¡',	# 0x00A1 INVERTED EXCLAMATION MARK
 '¬',	# 0x00AC NOT SIGN
 '√',	# 0x221A SQUARE ROOT
 'ƒ',	# 0x0192 LATIN SMALL LETTER F WITH HOOK
 '≈',	# 0x2248 ALMOST EQUAL TO
 '∆',	# 0x2206 INCREMENT
 '«',	# 0x00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 '»',	# 0x00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 '…',	# 0x2026 HORIZONTAL ELLIPSIS
 ' ',	# 0x00A0 NO-BREAK SPACE
 'À',	# 0x00C0 LATIN CAPITAL LETTER A WITH GRAVE
 'Ã',	# 0x00C3 LATIN CAPITAL LETTER A WITH TILDE
 'Õ',	# 0x00D5 LATIN CAPITAL LETTER O WITH TILDE
 'Œ',	# 0x0152 LATIN CAPITAL LIGATURE OE
 'œ',	# 0x0153 LATIN SMALL LIGATURE OE
 '–',	# 0x2013 EN DASH
 '—',	# 0x2014 EM DASH
 '“',	# 0x201C LEFT DOUBLE QUOTATION MARK
 '”',	# 0x201D RIGHT DOUBLE QUOTATION MARK
 '‘',	# 0x2018 LEFT SINGLE QUOTATION MARK
 '’',	# 0x2019 RIGHT SINGLE QUOTATION MARK
 '÷',	# 0x00F7 DIVISION SIGN
 '◊',	# 0x25CA LOZENGE
 'ÿ',	# 0x00FF LATIN SMALL LETTER Y WITH DIAERESIS
 'Ÿ',	# 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 '⁄',	# 0x2044 FRACTION SLASH
 '€',	# 0x20AC EURO SIGN
 '‹',	# 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 '›',	# 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 'ﬁ',	# 0xFB01 LATIN SMALL LIGATURE FI
 'ﬂ',	# 0xFB02 LATIN SMALL LIGATURE FL
 '‡',	# 0x2021 DOUBLE DAGGER
 '·',	# 0x00B7 MIDDLE DOT
 '‚',	# 0x201A SINGLE LOW-9 QUOTATION MARK
 '„',	# 0x201E DOUBLE LOW-9 QUOTATION MARK
 '‰',	# 0x2030 PER MILLE SIGN
 'Â',	# 0x00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 'Ê',	# 0x00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 'Á',	# 0x00C1 LATIN CAPITAL LETTER A WITH ACUTE
 'Ë',	# 0x00CB LATIN CAPITAL LETTER E WITH DIAERESIS
 'È',	# 0x00C8 LATIN CAPITAL LETTER E WITH GRAVE
 'Í',	# 0x00CD LATIN CAPITAL LETTER I WITH ACUTE
 'Î',	# 0x00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 'Ï',	# 0x00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 'Ì',	# 0x00CC LATIN CAPITAL LETTER I WITH GRAVE
 'Ó',	# 0x00D3 LATIN CAPITAL LETTER O WITH ACUTE
 'Ô',	# 0x00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 '',	# 0xF8FF Apple logo
 'Ò',	# 0x00D2 LATIN CAPITAL LETTER O WITH GRAVE
 'Ú',	# 0x00DA LATIN CAPITAL LETTER U WITH ACUTE
 'Û',	# 0x00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 'Ù',	# 0x00D9 LATIN CAPITAL LETTER U WITH GRAVE
 'ı',	# 0x0131 LATIN SMALL LETTER DOTLESS I
 'ˆ',	# 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
 '˜',	# 0x02DC SMALL TILDE
 '¯',	# 0x00AF MACRON
 '˘',	# 0x02D8 BREVE
 '˙',	# 0x02D9 DOT ABOVE
 '˚',	# 0x02DA RING ABOVE
 '¸',	# 0x00B8 CEDILLA
 '˝',	# 0x02DD DOUBLE ACUTE ACCENT
 '˛',	# 0x02DB OGONEK
 'ˇ',	# 0x02C7 CARON
); #@xmacroman

sub illegal_xmacroman {
} #illegal_xmacroman

sub xmacroman {
 $_[0] =~ s#([\x80-\xFF])#$xmacroman[ord($1)-0x80]#sge;
} #xmacroman

#------------------------------------------------------------------------

__END__

=head1 NAME

NexTrieve::UTF8 - change encoding to UTF-8

=head1 SYNOPSIS

 use NexTrieve;
 $ntv = NexTrieve->new( | {method => value} );
 $xml = $ntv->recode( $to,$xml,$from );

=head1 DESCRIPTION

The UTF8 package of the Perl support for NexTrieve.  Does not create an object
but is accessed indirectly through the "recode" method of NexTrieve.pm.

=head1 NO METHODS

This is only a support package: hence there is no object and hence there are
no methods to call.

=head1 CONVERT TO UTF-8

The following subroutines are available from this package for performing
conversions to UTF-8.  Each of these subroutines do an in-place conversion
of the input parameter, e.g. B<NexTrieve::UTF8::iso88591( $xml );>.  If any
characters are found that are illegal for the encoding, they will be replaced
by spaces.

 ucs2be	convert from ucs-2be to UTF-8
 ucs2le	convert from ucs-2le to UTF-8
 ucs4be	convert from ucs-4be to UTF-8
 ucs4le	convert from ucs-4le to UTF-8
 iso88591	convert from iso-8859-1 to UTF-8
 iso885910	convert from iso-8859-10 to UTF-8
 iso885913	convert from iso-8859-13 to UTF-8
 iso885914	convert from iso-8859-14 to UTF-8
 iso885915	convert from iso-8859-15 to UTF-8
 iso885916	convert from iso-8859-16 to UTF-8
 iso88592	convert from iso-8859-2 to UTF-8
 iso88593	convert from iso-8859-3 to UTF-8
 iso88594	convert from iso-8859-4 to UTF-8
 iso88595	convert from iso-8859-5 to UTF-8
 iso88596	convert from iso-8859-6 to UTF-8
 iso88597	convert from iso-8859-7 to UTF-8
 iso88598	convert from iso-8859-8 to UTF-8
 iso88599	convert from iso-8859-9 to UTF-8
 windows1250	convert from windows-1250 to UTF-8
 windows1251	convert from windows-1251 to UTF-8
 windows1252	convert from windows-1252 to UTF-8
 windows1253	convert from windows-1253 to UTF-8
 windows1254	convert from windows-1254 to UTF-8
 windows1255	convert from windows-1255 to UTF-8
 windows1256	convert from windows-1256 to UTF-8
 windows1257	convert from windows-1257 to UTF-8
 windows1258	convert from windows-1258 to UTF-8
 windows874	convert from windows-874 to UTF-8
 xmacroman	convert from x-mac-roman to UTF-8

=head1 REMOVE ILLEGAL CHARACTERS

The following subroutines are available from this packages for removing
characters that are illegal in the indicated encoding.  Each of these 
subroutines do an in-place conversion of the input parameter, e.g.
B<NexTrieve::UTF8::iso88593( $xml )>.

 illegal_iso88593	remove characters illegal for iso-8859-3
 illegal_iso88596	remove characters illegal for iso-8859-6
 illegal_iso88597	remove characters illegal for iso-8859-7
 illegal_iso88598	remove characters illegal for iso-8859-8
 illegal_windows1250	remove characters illegal for windows-1250
 illegal_windows1251	remove characters illegal for windows-1251
 illegal_windows1252	remove characters illegal for windows-1252
 illegal_windows1253	remove characters illegal for windows-1253
 illegal_windows1254	remove characters illegal for windows-1254
 illegal_windows1255	remove characters illegal for windows-1255
 illegal_windows1257	remove characters illegal for windows-1257
 illegal_windows1258	remove characters illegal for windows-1258
 illegal_windows874	remove characters illegal for windows-874

Please note that in some encodings there are no characters that are considered
to be illegal (such as iso-8859-1).  In that case, the subroutine still exists
but does not perform any operation.

=head1 AUTHOR

Elizabeth Mattijsen, <liz@dijkmat.nl>.

Please report bugs to <perlbugs@dijkmat.nl>.

=head1 COPYRIGHT

Copyright (c) 1995-2002 Elizabeth Mattijsen <liz@dijkmat.nl>. All rights
reserved.  This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

http://www.nextrieve.com, the NexTrieve.pm and the other NexTrieve::xxx modules.

=cut
