├── .github
    └── workflows
    │   ├── platforms.yml
    │   ├── strawberry.yml
    │   └── versions.yml
├── .gitignore
├── AUTHORS
├── Byte
    ├── Byte.pm
    └── Makefile.PL
├── CN
    ├── CN.pm
    └── Makefile.PL
├── Changes
├── EBCDIC
    ├── EBCDIC.pm
    └── Makefile.PL
├── Encode.pm
├── Encode.xs
├── Encode
    ├── Changes.e2x
    ├── ConfigLocal_PM.e2x
    ├── Makefile_PL.e2x
    ├── README.e2x
    ├── _PM.e2x
    ├── _T.e2x
    └── encode.h
├── JP
    ├── JP.pm
    └── Makefile.PL
├── KR
    ├── KR.pm
    └── Makefile.PL
├── MANIFEST
├── Makefile.PL
├── README
├── README.md
├── Symbol
    ├── Makefile.PL
    └── Symbol.pm
├── TW
    ├── Makefile.PL
    └── TW.pm
├── Unicode
    ├── Makefile.PL
    ├── Unicode.pm
    └── Unicode.xs
├── bin
    ├── enc2xs
    ├── encguess
    ├── piconv
    ├── ucm2table
    ├── ucmlint
    ├── ucmsort
    └── unidump
├── encengine.c
├── encoding.pm
├── lib
    └── Encode
    │   ├── Alias.pm
    │   ├── CJKConstants.pm
    │   ├── CN
    │       └── HZ.pm
    │   ├── Config.pm
    │   ├── Encoder.pm
    │   ├── Encoding.pm
    │   ├── GSM0338.pm
    │   ├── Guess.pm
    │   ├── JP
    │       ├── H2Z.pm
    │       └── JIS7.pm
    │   ├── KR
    │       └── 2022_KR.pm
    │   ├── MIME
    │       ├── Header.pm
    │       ├── Header
    │       │   └── ISO_2022_JP.pm
    │       └── Name.pm
    │   ├── PerlIO.pod
    │   ├── Supported.pod
    │   └── Unicode
    │       └── UTF7.pm
├── t
    ├── Aliases.t
    ├── CJKT.t
    ├── Encode.t
    ├── Encoder.t
    ├── Mod_EUCJP.pm
    ├── Unicode.t
    ├── Unicode_trailing_nul.t
    ├── at-cn.t
    ├── at-tw.t
    ├── big5-eten.enc
    ├── big5-eten.utf
    ├── big5-hkscs.enc
    ├── big5-hkscs.utf
    ├── cow.t
    ├── decode.t
    ├── enc_data.t
    ├── enc_eucjp.t
    ├── enc_module.enc
    ├── enc_module.t
    ├── enc_utf8.t
    ├── encoding-locale.t
    ├── encoding.t
    ├── fallback.t
    ├── from_to.t
    ├── gb2312.enc
    ├── gb2312.utf
    ├── grow.t
    ├── gsm0338.t
    ├── guess.t
    ├── isa.t
    ├── jis7-fallback.t
    ├── jisx0201.enc
    ├── jisx0201.utf
    ├── jisx0208.enc
    ├── jisx0208.utf
    ├── jisx0212.enc
    ├── jisx0212.utf
    ├── jperl.t
    ├── ksc5601.enc
    ├── ksc5601.utf
    ├── magic.t
    ├── mime-header.t
    ├── mime-name.t
    ├── mime_header_iso2022jp.t
    ├── perlio.t
    ├── piconv.t
    ├── rt.pl
    ├── rt113164.t
    ├── rt65541.t
    ├── rt76824.t
    ├── rt85489.t
    ├── rt86327.t
    ├── taint.t
    ├── truncated_utf8.t
    ├── undef.t
    ├── unibench.pl
    ├── use-Encode-Alias.t
    ├── utf32warnings.t
    ├── utf8ref.t
    ├── utf8strict.t
    ├── utf8warnings.t
    ├── whatwg-aliases.json
    ├── whatwg-aliases.t
    └── xml.t
└── ucm
    ├── 8859-1.ucm
    ├── 8859-10.ucm
    ├── 8859-11.ucm
    ├── 8859-13.ucm
    ├── 8859-14.ucm
    ├── 8859-15.ucm
    ├── 8859-16.ucm
    ├── 8859-2.ucm
    ├── 8859-3.ucm
    ├── 8859-4.ucm
    ├── 8859-5.ucm
    ├── 8859-6.ucm
    ├── 8859-7.ucm
    ├── 8859-8.ucm
    ├── 8859-9.ucm
    ├── adobeStdenc.ucm
    ├── adobeSymbol.ucm
    ├── adobeZdingbat.ucm
    ├── ascii.ucm
    ├── big5-eten.ucm
    ├── big5-hkscs.ucm
    ├── cp037.ucm
    ├── cp1006.ucm
    ├── cp1026.ucm
    ├── cp1047.ucm
    ├── cp1250.ucm
    ├── cp1251.ucm
    ├── cp1252.ucm
    ├── cp1253.ucm
    ├── cp1254.ucm
    ├── cp1255.ucm
    ├── cp1256.ucm
    ├── cp1257.ucm
    ├── cp1258.ucm
    ├── cp424.ucm
    ├── cp437.ucm
    ├── cp500.ucm
    ├── cp737.ucm
    ├── cp775.ucm
    ├── cp850.ucm
    ├── cp852.ucm
    ├── cp855.ucm
    ├── cp856.ucm
    ├── cp857.ucm
    ├── cp858.ucm
    ├── cp860.ucm
    ├── cp861.ucm
    ├── cp862.ucm
    ├── cp863.ucm
    ├── cp864.ucm
    ├── cp865.ucm
    ├── cp866.ucm
    ├── cp869.ucm
    ├── cp874.ucm
    ├── cp875.ucm
    ├── cp932.ucm
    ├── cp936.ucm
    ├── cp949.ucm
    ├── cp950.ucm
    ├── ctrl.ucm
    ├── dingbats.ucm
    ├── euc-cn.ucm
    ├── euc-jp.ucm
    ├── euc-kr.ucm
    ├── gb12345.ucm
    ├── gb2312.ucm
    ├── hp-roman8.ucm
    ├── ir-165.ucm
    ├── jis0201.ucm
    ├── jis0208.ucm
    ├── jis0212.ucm
    ├── johab.ucm
    ├── koi8-f.ucm
    ├── koi8-r.ucm
    ├── koi8-u.ucm
    ├── ksc5601.ucm
    ├── macArabic.ucm
    ├── macCentEuro.ucm
    ├── macChinsimp.ucm
    ├── macChintrad.ucm
    ├── macCroatian.ucm
    ├── macCyrillic.ucm
    ├── macDingbats.ucm
    ├── macFarsi.ucm
    ├── macGreek.ucm
    ├── macHebrew.ucm
    ├── macIceland.ucm
    ├── macJapanese.ucm
    ├── macKorean.ucm
    ├── macROMnn.ucm
    ├── macRUMnn.ucm
    ├── macRoman.ucm
    ├── macSami.ucm
    ├── macSymbol.ucm
    ├── macThai.ucm
    ├── macTurkish.ucm
    ├── macUkraine.ucm
    ├── nextstep.ucm
    ├── null.ucm
    ├── posix-bc.ucm
    ├── shiftjis.ucm
    ├── symbol.ucm
    └── viscii.ucm


/.github/workflows/platforms.yml:
--------------------------------------------------------------------------------
 1 | name: CI on Perl 5.{40,38,36} on {Linux,macOS}
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     tags: [ '*.*' ]
 7 |   pull_request:
 8 |     branches: [ main ]
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       matrix:
15 |         os: ['ubuntu-latest', 'macos-latest']
16 |         perl: [ '5.40', '5.38', '5.36' ]
17 |     name: Perl ${{ matrix.perl }} on ${{ matrix.os }}
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |       - name: Set up perl
21 |         uses: shogo82148/actions-setup-perl@v1
22 |         with:
23 |           perl-version: ${{ matrix.perl }}
24 |       - run: perl -V
25 |       - run: cpanm --installdeps .
26 |       - run: perl Makefile.PL
27 |       - run: make test
28 | 


--------------------------------------------------------------------------------
/.github/workflows/strawberry.yml:
--------------------------------------------------------------------------------
 1 | name: CI on Perl 5.{38,36} on Strawberry Perl
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     tags: [ '*.*' ]
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         os: ['windows-latest']
14 |         perl: [ '5.38', '5.36' ]
15 |     name: Perl ${{ matrix.perl }} on ${{ matrix.os }}
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - name: Set up perl
19 |         uses: shogo82148/actions-setup-perl@v1
20 |         with:
21 |           perl-version: ${{ matrix.perl }}
22 |           distribution: strawberry
23 |       - run: perl -V
24 |       - run: perl Makefile.PL
25 |       - run: gmake test
26 | 


--------------------------------------------------------------------------------
/.github/workflows/versions.yml:
--------------------------------------------------------------------------------
 1 | name: CI on Perl 5.8-34 on Linux
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     tags: [ '*.*' ]
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         os:
14 |           - 'ubuntu-latest'
15 |         perl:
16 |           - "5.34"
17 |           - "5.32"
18 |           - "5.30"
19 |           - "5.28"
20 |           - "5.26"
21 |           - "5.24"
22 |           - "5.22"
23 |           - "5.20"
24 |           - "5.18"
25 |           - "5.16"
26 |           - "5.14"
27 |           - "5.12"
28 |           - "5.10"
29 |           - "5.8"
30 |     name: Perl ${{ matrix.perl }} on ${{ matrix.os }}
31 |     steps:
32 |       - uses: actions/checkout@v4
33 |       - name: Set up perl
34 |         uses: shogo82148/actions-setup-perl@v1
35 |         with:
36 |           perl-version: ${{ matrix.perl }}
37 |       - run: perl -V
38 |       - run: cpanm --installdeps .
39 |       - run: perl Makefile.PL
40 |       - run: make test
41 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /blib/
  2 | /.build/
  3 | _build/
  4 | cover_db/
  5 | inc/
  6 | Build
  7 | !Build/
  8 | Build.bat
  9 | .last_cover_stats
 10 | /Makefile
 11 | /Makefile.old
 12 | /MANIFEST.bak
 13 | /META.yml
 14 | /META.json
 15 | /MYMETA.*
 16 | nytprof.out
 17 | /pm_to_blib
 18 | *.o
 19 | *.bs
 20 | /_eumm/
 21 | Byte/Byte.c
 22 | Byte/Byte.xs
 23 | Byte/MYMETA.json
 24 | Byte/MYMETA.yml
 25 | Byte/Makefile
 26 | Byte/Makefile.old
 27 | Byte/byte_t.c
 28 | Byte/byte_t.exh
 29 | Byte/byte_t.fnm
 30 | Byte/byte_t.h
 31 | Byte/pm_to_blib
 32 | CN/CN.c
 33 | CN/CN.xs
 34 | CN/MYMETA.json
 35 | CN/MYMETA.yml
 36 | CN/Makefile
 37 | CN/Makefile.old
 38 | CN/cp_00_t.c
 39 | CN/cp_00_t.exh
 40 | CN/cp_00_t.fnm
 41 | CN/cp_00_t.h
 42 | CN/eu_01_t.c
 43 | CN/eu_01_t.exh
 44 | CN/eu_01_t.fnm
 45 | CN/eu_01_t.h
 46 | CN/gb_02_t.c
 47 | CN/gb_02_t.exh
 48 | CN/gb_02_t.fnm
 49 | CN/gb_02_t.h
 50 | CN/gb_03_t.c
 51 | CN/gb_03_t.exh
 52 | CN/gb_03_t.fnm
 53 | CN/gb_03_t.h
 54 | CN/ir_04_t.c
 55 | CN/ir_04_t.exh
 56 | CN/ir_04_t.fnm
 57 | CN/ir_04_t.h
 58 | CN/ma_05_t.c
 59 | CN/ma_05_t.exh
 60 | CN/ma_05_t.fnm
 61 | CN/ma_05_t.h
 62 | CN/pm_to_blib
 63 | EBCDIC/EBCDIC.c
 64 | EBCDIC/EBCDIC.xs
 65 | EBCDIC/MYMETA.json
 66 | EBCDIC/MYMETA.yml
 67 | EBCDIC/Makefile
 68 | EBCDIC/Makefile.old
 69 | EBCDIC/ebcdic_t.c
 70 | EBCDIC/ebcdic_t.exh
 71 | EBCDIC/ebcdic_t.fnm
 72 | EBCDIC/ebcdic_t.h
 73 | EBCDIC/pm_to_blib
 74 | Encode.c
 75 | JP/JP.c
 76 | JP/JP.xs
 77 | JP/MYMETA.json
 78 | JP/MYMETA.yml
 79 | JP/Makefile
 80 | JP/Makefile.old
 81 | JP/cp_00_t.c
 82 | JP/cp_00_t.exh
 83 | JP/cp_00_t.fnm
 84 | JP/cp_00_t.h
 85 | JP/eu_01_t.c
 86 | JP/eu_01_t.exh
 87 | JP/eu_01_t.fnm
 88 | JP/eu_01_t.h
 89 | JP/ji_02_t.c
 90 | JP/ji_02_t.exh
 91 | JP/ji_02_t.fnm
 92 | JP/ji_02_t.h
 93 | JP/ji_03_t.c
 94 | JP/ji_03_t.exh
 95 | JP/ji_03_t.fnm
 96 | JP/ji_03_t.h
 97 | JP/ji_04_t.c
 98 | JP/ji_04_t.exh
 99 | JP/ji_04_t.fnm
100 | JP/ji_04_t.h
101 | JP/ma_05_t.c
102 | JP/ma_05_t.exh
103 | JP/ma_05_t.fnm
104 | JP/ma_05_t.h
105 | JP/pm_to_blib
106 | JP/sh_06_t.c
107 | JP/sh_06_t.exh
108 | JP/sh_06_t.fnm
109 | JP/sh_06_t.h
110 | KR/KR.c
111 | KR/KR.xs
112 | KR/MYMETA.json
113 | KR/MYMETA.yml
114 | KR/Makefile
115 | KR/Makefile.old
116 | KR/cp_00_t.c
117 | KR/cp_00_t.exh
118 | KR/cp_00_t.fnm
119 | KR/cp_00_t.h
120 | KR/eu_01_t.c
121 | KR/eu_01_t.exh
122 | KR/eu_01_t.fnm
123 | KR/eu_01_t.h
124 | KR/jo_02_t.c
125 | KR/jo_02_t.exh
126 | KR/jo_02_t.fnm
127 | KR/jo_02_t.h
128 | KR/ks_03_t.c
129 | KR/ks_03_t.exh
130 | KR/ks_03_t.fnm
131 | KR/ks_03_t.h
132 | KR/ma_04_t.c
133 | KR/ma_04_t.exh
134 | KR/ma_04_t.fnm
135 | KR/ma_04_t.h
136 | KR/pm_to_blib
137 | Makefile~
138 | Symbol/MYMETA.json
139 | Symbol/MYMETA.yml
140 | Symbol/Makefile
141 | Symbol/Makefile.old
142 | Symbol/Symbol.c
143 | Symbol/Symbol.xs
144 | Symbol/pm_to_blib
145 | Symbol/symbol_t.c
146 | Symbol/symbol_t.exh
147 | Symbol/symbol_t.fnm
148 | Symbol/symbol_t.h
149 | TW/MYMETA.json
150 | TW/MYMETA.yml
151 | TW/Makefile
152 | TW/Makefile.old
153 | TW/TW.c
154 | TW/TW.xs
155 | TW/bi_00_t.c
156 | TW/bi_00_t.exh
157 | TW/bi_00_t.fnm
158 | TW/bi_00_t.h
159 | TW/bi_01_t.c
160 | TW/bi_01_t.exh
161 | TW/bi_01_t.fnm
162 | TW/bi_01_t.h
163 | TW/cp_02_t.c
164 | TW/cp_02_t.exh
165 | TW/cp_02_t.fnm
166 | TW/cp_02_t.h
167 | TW/ma_03_t.c
168 | TW/ma_03_t.exh
169 | TW/ma_03_t.fnm
170 | TW/ma_03_t.h
171 | TW/pm_to_blib
172 | Unicode/MYMETA.json
173 | Unicode/MYMETA.yml
174 | Unicode/Makefile
175 | Unicode/Makefile.old
176 | Unicode/Unicode.c
177 | Unicode/pm_to_blib
178 | def_t.c
179 | def_t.exh
180 | def_t.fnm
181 | def_t.h
182 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | # To give due honour to those who have made the Encode module what it
 2 | # is today, here are easily-from-changelogs-extractable people and their
 3 | # (hopefully) current and preferred email addresses (as of early 2002,
 4 | # if known).
 5 | #
 6 | # The use of this database for anything else than Encode and/or Perl 
 7 | # development is strictly forbidden.  (Passive distribution with the Perl 
 8 | # source code kit or CPAN is, of course, allowed.)
 9 | #
10 | # This list is in alphabetical order.
11 | --
12 | Alex Davies                     <alex.davies@talktalk.net>
13 | Andreas J. Koenig               <andreas.koenig@anima.de>
14 | Anton Tagunov			<tagunov@motor.ru>
15 | Autrijus Tang			<autrijus@autrijus.org>
16 | Benjamin Goldberg		<goldbb2@earthlink.net>
17 | Bjoern Hoehrmann		<derhoermi@gmx.net>
18 | Bjoern Jacke			<debianbugs@j3e.de>
19 | Chris Nandor			<pudge@pobox.com>
20 | Craig A. Berry			<craigberry@mac.com>
21 | Curtis Jewell			<csjewell@cpan.org>
22 | Dan Kogai			<dankogai@dan.co.jp>
23 | Dave Evans			<dave@rudolf.org.uk>
24 | Deng Liu			<dengliu@ntu.edu.tw>
25 | Dominic Dunlop			<domo@computer.org>
26 | Elizabeth Mattijsen		<liz@dijkmat.nl>
27 | Gerrit P. Haase			<gp@familiehaase.de>
28 | Gisle Aas			<gisle@ActiveState.com>
29 | Graham Barr			<gbarr@pobox.com>
30 | Gurusamy Sarathy		<gsar@activestate.com>
31 | H.Merijn Brand			<h.m.brand@xs4all.nl>
32 | Hugo van der Sanden		<hv@crypt.org>
33 | Inaba Hiroto			<inaba@st.rim.or.jp>
34 | Jarkko Hietaniemi		<jhi@iki.fi>
35 | Jesse Vincent			<jesse@fsck.com>
36 | Jungshik Shin			<jshin@mailaps.org>
37 | KONNO Hiroharu			<hiroharu.konno@bowneglobal.co.jp>
38 | Laszlo Molnar			<ml1050@freemail.hu>
39 | MATSUNO Tokuhiro		<tokuhirom+cpan@gmail.com>
40 | MORIYAMA Masayuki		<msyk@mtg.biglobe.ne.jp>
41 | Makamaka			<makamaka@donzoko.net>
42 | Mark-Jason Dominus		<mjd@plover.com>
43 | Mattia Barbon			<mbarbon@dsi.unive.it>
44 | Michael G Schwern		<schwern@pobox.com>
45 | Miron Cuperman			<miron@hyper.to>
46 | Nicholas Clark			<nick@ccl4.org>
47 | Nick Ing-Simmons		<nick@ing-simmons.net>
48 | Paul Marquess			<paul_marquess@yahoo.co.uk>
49 | Peter Prymmer			<pvhp@best.com>
50 | Philip Newton			<pne@cpan.org>
51 | Piotr Fusik			<pfusik@op.pl>
52 | Rafael Garcia-Suarez		<rgarciasuarez@mandriva.com>
53 | Robin Barker			<rmb1@cise.npl.co.uk>
54 | SADAHIRO Tomoyuki		<SADAHIRO@cpan.org>
55 | SUGAWARA Hajime			<sugawara@hdt.co.jp>
56 | SUZUKI Norio			<ZAP00217@nifty.com>
57 | Simon Cozens			<simon@netthink.co.uk>
58 | Slaven Rezic			<SREZIC@cpan.org>
59 | Spider Boardman			<spider@web.zk3.dec.com>
60 | Steve Hay			<steve.hay@uk.radan.com>
61 | Steve Peters			<steve@fisharerojo.org>
62 | Tatsuhiko Miyagawa		<miyagawa@bulknews.net>
63 | Tels				<perl_dummy@bloodgate.com>
64 | Tony Cook			<tony@develop-help.com>
65 | Vadim Konovalov			<vkonovalov@peterstar.ru>
66 | Yitzchak Scott-Thoennes		<sthoenna@efn.org>
67 | 


--------------------------------------------------------------------------------
/Byte/Byte.pm:
--------------------------------------------------------------------------------
  1 | package Encode::Byte;
  2 | use strict;
  3 | use warnings;
  4 | use Encode;
  5 | our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
  6 | 
  7 | use XSLoader;
  8 | XSLoader::load( __PACKAGE__, $VERSION );
  9 | 
 10 | 1;
 11 | __END__
 12 | 
 13 | =head1 NAME
 14 | 
 15 | Encode::Byte - Single Byte Encodings
 16 | 
 17 | =head1 SYNOPSIS
 18 | 
 19 |     use Encode qw/encode decode/; 
 20 |     $greek = encode("iso-8859-7", $utf8);  # loads Encode::Byte implicitly
 21 |     $utf8  = decode("iso-8859-7", $greek); # ditto
 22 | 
 23 | =head1 ABSTRACT
 24 | 
 25 | This module implements various single byte encodings.  For most cases it uses
 26 | \x80-\xff (upper half) to map non-ASCII characters.  Encodings
 27 | supported are as follows.   
 28 | 
 29 |   Canonical      Alias		                      Description
 30 |   --------------------------------------------------------------------
 31 |   # ISO 8859 series
 32 |   (iso-8859-1	is in built-in)
 33 |   iso-8859-2	latin2					     [ISO]
 34 |   iso-8859-3	latin3					     [ISO]
 35 |   iso-8859-4	latin4					     [ISO]
 36 |   iso-8859-5						     [ISO]
 37 |   iso-8859-6						     [ISO]
 38 |   iso-8859-7						     [ISO]
 39 |   iso-8859-8						     [ISO]
 40 |   iso-8859-9	latin5					     [ISO]
 41 |   iso-8859-10	latin6					     [ISO]
 42 |   iso-8859-11
 43 |   (iso-8859-12 is nonexistent)
 44 |   iso-8859-13   latin7					     [ISO]
 45 |   iso-8859-14	latin8					     [ISO]
 46 |   iso-8859-15	latin9					     [ISO]
 47 |   iso-8859-16	latin10					     [ISO]
 48 | 
 49 |   # Cyrillic
 50 |   koi8-f					
 51 |   koi8-r        cp878					 [RFC1489]
 52 |   koi8-u						 [RFC2319]
 53 | 
 54 |   # Vietnamese
 55 |   viscii
 56 | 
 57 |   # all cp* are also available as ibm-*, ms-*, and windows-*
 58 |   # also see L<http://msdn.microsoft.com/en-us/library/aa752010%28VS.85%29.aspx>
 59 | 
 60 |   cp424  
 61 |   cp437  
 62 |   cp737  
 63 |   cp775  
 64 |   cp850  
 65 |   cp852  
 66 |   cp855  
 67 |   cp856  
 68 |   cp857  
 69 |   cp860  
 70 |   cp861  
 71 |   cp862  
 72 |   cp863  
 73 |   cp864  
 74 |   cp865  
 75 |   cp866  
 76 |   cp869  
 77 |   cp874  
 78 |   cp1006  
 79 |   cp1250	WinLatin2
 80 |   cp1251	WinCyrillic
 81 |   cp1252	WinLatin1
 82 |   cp1253	WinGreek
 83 |   cp1254	WinTurkish
 84 |   cp1255	WinHebrew
 85 |   cp1256	WinArabic
 86 |   cp1257	WinBaltic
 87 |   cp1258	WinVietnamese
 88 | 
 89 |   # Macintosh
 90 |   # Also see L<http://developer.apple.com/technotes/tn/tn1150.html>
 91 |   MacArabic  
 92 |   MacCentralEurRoman  
 93 |   MacCroatian  
 94 |   MacCyrillic  
 95 |   MacFarsi  
 96 |   MacGreek  
 97 |   MacHebrew  
 98 |   MacIcelandic  
 99 |   MacRoman  
100 |   MacRomanian  
101 |   MacRumanian  
102 |   MacSami  
103 |   MacThai  
104 |   MacTurkish  
105 |   MacUkrainian  
106 | 
107 |   # More vendor encodings
108 |   AdobeStandardEncoding
109 |   nextstep
110 |   hp-roman8
111 | 
112 | =head1 DESCRIPTION
113 | 
114 | To find how to use this module in detail, see L<Encode>.
115 | 
116 | =head1 SEE ALSO
117 | 
118 | L<Encode>
119 | 
120 | =cut
121 | 


--------------------------------------------------------------------------------
/Byte/Makefile.PL:
--------------------------------------------------------------------------------
  1 | use 5.7.2;
  2 | use strict;
  3 | use ExtUtils::MakeMaker;
  4 | use File::Spec::Functions;
  5 | 
  6 | my $name = 'Byte';
  7 | my %tables = (
  8 |           byte_t => 
  9 |           [
 10 |            # misc. vendors
 11 |            # 'gsm0338.ucm', now in Encode::GSM0338
 12 |            'nextstep.ucm',
 13 |            'hp-roman8.ucm',
 14 |            'viscii.ucm',
 15 |            'adobeStdenc.ucm',
 16 |            # koi8
 17 |            'koi8-f.ucm', 'koi8-r.ucm', 'koi8-u.ucm', 
 18 |            # Mac
 19 |            qw(
 20 |           macArabic.ucm
 21 |           macCentEuro.ucm
 22 |           macCroatian.ucm
 23 |           macCyrillic.ucm
 24 |           macFarsi.ucm
 25 |           macGreek.ucm
 26 |           macHebrew.ucm
 27 |           macIceland.ucm
 28 |           macRoman.ucm
 29 |           macROMnn.ucm
 30 |           macRUMnn.ucm
 31 |           macSami.ucm
 32 |           macThai.ucm
 33 |           macTurkish.ucm
 34 |           macUkraine.ucm
 35 |           ),
 36 |           ],
 37 |              );
 38 | 
 39 | my %not_here = 
 40 |     map {$_ => 1} 
 41 | (
 42 |  '8859-1.ucm', 'cp1252.ucm', # default
 43 |  qw(cp037.ucm cp1026.ucm cp1047.ucm cp500.ucm cp875.ucm), # EBCDIC
 44 |  qw(cp932.ucm cp936.ucm cp949.ucm cp950.ucm),  # CJK
 45 |  );
 46 | 
 47 | opendir(ENC,catdir(updir(),'ucm')) or die $!;
 48 | while (defined(my $file = readdir(ENC)))
 49 | {
 50 |     $file =~ /^(8859|cp).*\.ucm$/io or next;
 51 |     $not_here{$file} and next;
 52 |     push(@{$tables{byte_t}},$file);
 53 | }
 54 | closedir(ENC);
 55 | 
 56 | WriteMakefile(
 57 |               INC		=> "-I../Encode",
 58 |           NAME		=> 'Encode::'.$name,
 59 |           VERSION_FROM	=> "$name.pm",
 60 |           OBJECT		=> '$(O_FILES)',
 61 |           'dist'		=> {
 62 |           COMPRESS	=> 'gzip -9f',
 63 |           SUFFIX	=> 'gz',
 64 |           DIST_DEFAULT => 'all tardist',
 65 |           },
 66 |           MAN3PODS	=> {},
 67 |           # OS 390 winges about line numbers > 64K ???
 68 |           XSOPT => '-nolinenumbers',
 69 |           );
 70 | 
 71 | package MY;
 72 | 
 73 | sub post_initialize
 74 | {
 75 |     my ($self) = @_;
 76 |     my %o;
 77 |     my $x = $self->{'OBJ_EXT'};
 78 |     # Add the table O_FILES
 79 |     foreach my $e (keys %tables)
 80 |     {
 81 |     $o{$e.$x} = 1;
 82 |     }
 83 |     $o{"$name$x"} = 1;
 84 |     $self->{'O_FILES'} = [sort keys %o];
 85 |     my @files = ("$name.xs");
 86 |     $self->{'C'} = ["$name.c"];
 87 |     $self->{SOURCE} .= " $name.c"
 88 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 89 |     $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
 90 |     my %xs;
 91 |     foreach my $table (sort keys %tables) {
 92 |     push (@{$self->{'C'}},"$table.c");
 93 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 94 |     # get built.
 95 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 96 |         push (@files,$table.$ext);
 97 |     }
 98 |     $self->{SOURCE} .= " $table.c"
 99 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
100 |     }
101 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
102 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
103 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
104 |     print XS <<'END';
105 | #define PERL_NO_GET_CONTEXT
106 | #include <EXTERN.h>
107 | #include <perl.h>
108 | #include <XSUB.h>
109 | #include "encode.h"
110 | END
111 |     foreach my $table (sort keys %tables) {
112 |     print XS qq[#include "${table}.h"\n];
113 |     }
114 |     print XS <<"END";
115 | 
116 | static void
117 | Encode_XSEncoding(pTHX_ encode_t *enc)
118 | {
119 |  dSP;
120 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
121 |  SV *iv    = newSViv(PTR2IV(enc));
122 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
123 |  int i = 0;
124 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
125 |  constness, in the hope that perl won't mess with it. */
126 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
127 |  SvFLAGS(iv) |= SVp_POK;
128 |  SvPVX(iv) = (char*) enc->name[0];
129 |  PUSHMARK(sp);
130 |  XPUSHs(sv);
131 |  while (enc->name[i])
132 |   {
133 |    const char *name = enc->name[i++];
134 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
135 |   }
136 |  PUTBACK;
137 |  call_pv("Encode::define_encoding",G_DISCARD);
138 |  SvREFCNT_dec(sv);
139 | }
140 | 
141 | MODULE = Encode::$name	PACKAGE = Encode::$name
142 | PROTOTYPES: DISABLE
143 | BOOT:
144 | {
145 | END
146 |     foreach my $table (sort keys %tables) {
147 |     print XS qq[#include "${table}.exh"\n];
148 |     }
149 |     print XS "}\n";
150 |     close(XS);
151 |     return "# Built $name.xs\n\n";
152 | }
153 | 
154 | sub postamble
155 | {
156 |     my $self = shift;
157 |     my $dir  = $self->catdir($self->updir,'ucm');
158 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
159 |     $str    .= "$name.c : $name.xs ";
160 |     foreach my $table (sort keys %tables)
161 |     {
162 |     $str .= " $table.c";
163 |     }
164 |     $str .= "\n\n";
165 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
166 | 
167 |     my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
168 |     foreach my $table (sort keys %tables)
169 |     {
170 |     my $numlines = 1;
171 |     my $lengthsofar = length($str);
172 |     my $continuator = '';
173 |     $str .= "$table.c : $enc2xs Makefile.PL";
174 |     foreach my $file (sort (@{$tables{$table}}))
175 |     {
176 |         $str .= $continuator.' '.$self->catfile($dir,$file);
177 |         if ( length($str)-$lengthsofar > 128*$numlines )
178 |         {
179 |         $continuator .= " \\\n\t";
180 |         $numlines++;
181 |         } else {
182 |         $continuator = '';
183 |         }
184 |     }
185 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
186 |     $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
187 |     my $ucopts = '-"Q" -"O"';
188 |     $str .=  
189 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
190 |     open (FILELIST, ">$table.fnm")
191 |         || die "Could not open $table.fnm: $!";
192 |     foreach my $file (sort (@{$tables{$table}}))
193 |     {
194 |         print FILELIST $self->catfile($dir,$file) . "\n";
195 |     }
196 |     close(FILELIST);
197 |     }
198 |     return $str;
199 | }
200 | 
201 | 


--------------------------------------------------------------------------------
/CN/CN.pm:
--------------------------------------------------------------------------------
 1 | package Encode::CN;
 2 | BEGIN {
 3 |     if ( ord("A") == 193 ) {
 4 |         die "Encode::CN not supported on EBCDIC\n";
 5 |     }
 6 | }
 7 | use strict;
 8 | use warnings;
 9 | use Encode;
10 | our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
11 | use XSLoader;
12 | XSLoader::load( __PACKAGE__, $VERSION );
13 | 
14 | # Relocated from Encode.pm
15 | 
16 | use Encode::CN::HZ;
17 | 
18 | # use Encode::CN::2022_CN;
19 | 
20 | 1;
21 | __END__
22 | 
23 | =head1 NAME
24 | 
25 | Encode::CN - China-based Chinese Encodings
26 | 
27 | =head1 SYNOPSIS
28 | 
29 |     use Encode qw/encode decode/; 
30 |     $euc_cn = encode("euc-cn", $utf8);   # loads Encode::CN implicitly
31 |     $utf8   = decode("euc-cn", $euc_cn); # ditto
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | This module implements China-based Chinese charset encodings.
36 | Encodings supported are as follows.
37 | 
38 |   Canonical   Alias		Description
39 |   --------------------------------------------------------------------
40 |   euc-cn      /\beuc.*cn$/i	EUC (Extended Unix Character)
41 |           /\bcn.*euc$/i
42 |               /\bGB[-_ ]?2312(?:\D.*$|$)/i (see below)
43 |   gb2312-raw			The raw (low-bit) GB2312 character map
44 |   gb12345-raw			Traditional chinese counterpart to 
45 |                 GB2312 (raw)
46 |   iso-ir-165			GB2312 + GB6345 + GB8565 + additions
47 |   MacChineseSimp                GB2312 + Apple Additions
48 |   cp936				Code Page 936, also known as GBK 
49 |                 (Extended GuoBiao)
50 |   hz				7-bit escaped GB2312 encoding
51 |   --------------------------------------------------------------------
52 | 
53 | To find how to use this module in detail, see L<Encode>.
54 | 
55 | =head1 NOTES
56 | 
57 | Due to size concerns, C<GB 18030> (an extension to C<GBK>) is distributed
58 | separately on CPAN, under the name L<Encode::HanExtra>. That module
59 | also contains extra Taiwan-based encodings.
60 | 
61 | =head1 BUGS
62 | 
63 | When you see C<charset=gb2312> on mails and web pages, they really
64 | mean C<euc-cn> encodings.  To fix that, C<gb2312> is aliased to C<euc-cn>.
65 | Use C<gb2312-raw> when you really mean it.
66 | 
67 | The ASCII region (0x00-0x7f) is preserved for all encodings, even though
68 | this conflicts with mappings by the Unicode Consortium.
69 | 
70 | =head1 SEE ALSO
71 | 
72 | L<Encode>
73 | 
74 | =cut
75 | 


--------------------------------------------------------------------------------
/CN/Makefile.PL:
--------------------------------------------------------------------------------
  1 | use 5.7.2;
  2 | use strict;
  3 | use ExtUtils::MakeMaker;
  4 | use strict;
  5 | 
  6 | my %tables = (euc_cn_t   => ['euc-cn.ucm',
  7 |                  'cp936.ucm',
  8 |                  'macChinsimp.ucm',
  9 |                  ],
 10 |           '2312_t'    => ['gb2312.ucm'],
 11 |           '12345_t'  => ['gb12345.ucm'],
 12 |           ir_165_t   => ['ir-165.ucm'],
 13 |              );
 14 | 
 15 | unless ($ENV{AGGREGATE_TABLES}){
 16 |     my @ucm;
 17 |     for my $k (keys %tables){
 18 |     push @ucm, @{$tables{$k}};
 19 |     }
 20 |     %tables = ();
 21 |     my $seq = 0;
 22 |     for my $ucm (sort @ucm){
 23 |     # 8.3 compliance !
 24 |     my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
 25 |     $tables{$t} = [ $ucm ];
 26 |     }
 27 | }
 28 | 
 29 | my $name = 'CN';
 30 | 
 31 | WriteMakefile(
 32 |               INC		=> "-I../Encode",
 33 |           NAME		=> 'Encode::'.$name,
 34 |           VERSION_FROM	=> "$name.pm",
 35 |           OBJECT		=> '$(O_FILES)',
 36 |           'dist'		=> {
 37 |           COMPRESS	=> 'gzip -9f',
 38 |           SUFFIX	=> 'gz',
 39 |           DIST_DEFAULT => 'all tardist',
 40 |           },
 41 |           MAN3PODS	=> {},
 42 |           # OS 390 winges about line numbers > 64K ???
 43 |           XSOPT => '-nolinenumbers',
 44 |               XSPROTOARG => '-noprototypes',
 45 |           );
 46 | 
 47 | package MY;
 48 | 
 49 | sub post_initialize
 50 | {
 51 |     my ($self) = @_;
 52 |     my %o;
 53 |     my $x = $self->{'OBJ_EXT'};
 54 |     # Add the table O_FILES
 55 |     foreach my $e (keys %tables)
 56 |     {
 57 |     $o{$e.$x} = 1;
 58 |     }
 59 |     $o{"$name$x"} = 1;
 60 |     $self->{'O_FILES'} = [sort keys %o];
 61 |     my @files = ("$name.xs");
 62 |     $self->{'C'} = ["$name.c"];
 63 |     $self->{SOURCE} .= " $name.c"
 64 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 65 |     $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
 66 |     my %xs;
 67 |     foreach my $table (sort keys %tables) {
 68 |     push (@{$self->{'C'}},"$table.c");
 69 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 70 |     # get built.
 71 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 72 |         push (@files,$table.$ext);
 73 |     }
 74 |     $self->{SOURCE} .= " $table.c"
 75 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
 76 |     }
 77 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
 78 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
 79 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
 80 |     print XS <<'END';
 81 | #define PERL_NO_GET_CONTEXT
 82 | #include <EXTERN.h>
 83 | #include <perl.h>
 84 | #include <XSUB.h>
 85 | #include "encode.h"
 86 | END
 87 |     foreach my $table (sort keys %tables) {
 88 |     print XS qq[#include "${table}.h"\n];
 89 |     }
 90 |     print XS <<"END";
 91 | 
 92 | static void
 93 | Encode_XSEncoding(pTHX_ encode_t *enc)
 94 | {
 95 |  dSP;
 96 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
 97 |  SV *iv    = newSViv(PTR2IV(enc));
 98 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
 99 |  int i = 0;
100 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
101 |  constness, in the hope that perl won't mess with it. */
102 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
103 |  SvFLAGS(iv) |= SVp_POK;
104 |  SvPVX(iv) = (char*) enc->name[0];
105 |  PUSHMARK(sp);
106 |  XPUSHs(sv);
107 |  while (enc->name[i])
108 |   {
109 |    const char *name = enc->name[i++];
110 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
111 |   }
112 |  PUTBACK;
113 |  call_pv("Encode::define_encoding",G_DISCARD);
114 |  SvREFCNT_dec(sv);
115 | }
116 | 
117 | MODULE = Encode::$name	PACKAGE = Encode::$name
118 | PROTOTYPES: DISABLE
119 | BOOT:
120 | {
121 | END
122 |     foreach my $table (sort keys %tables) {
123 |     print XS qq[#include "${table}.exh"\n];
124 |     }
125 |     print XS "}\n";
126 |     close(XS);
127 |     return "# Built $name.xs\n\n";
128 | }
129 | 
130 | sub postamble
131 | {
132 |     my $self = shift;
133 |     my $dir  = $self->catdir($self->updir,'ucm');
134 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
135 |     $str    .= "$name.c : $name.xs ";
136 |     foreach my $table (sort keys %tables)
137 |     {
138 |     $str .= " $table.c";
139 |     }
140 |     $str .= "\n\n";
141 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
142 | 
143 |     my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
144 |     foreach my $table (sort keys %tables)
145 |     {
146 |     my $numlines = 1;
147 |     my $lengthsofar = length($str);
148 |     my $continuator = '';
149 |     $str .= "$table.c : $enc2xs Makefile.PL";
150 |     foreach my $file (@{$tables{$table}})
151 |     {
152 |         $str .= $continuator.' '.$self->catfile($dir,$file);
153 |         if ( length($str)-$lengthsofar > 128*$numlines )
154 |         {
155 |         $continuator .= " \\\n\t";
156 |         $numlines++;
157 |         } else {
158 |         $continuator = '';
159 |         }
160 |     }
161 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
162 |     $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
163 |     my $ucopts = '-"Q"';
164 |     $str .=  
165 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
166 |     open (FILELIST, ">$table.fnm")
167 |         || die "Could not open $table.fnm: $!";
168 |     foreach my $file (@{$tables{$table}})
169 |     {
170 |         print FILELIST $self->catfile($dir,$file) . "\n";
171 |     }
172 |     close(FILELIST);
173 |     }
174 |     return $str;
175 | }
176 | 
177 | 


--------------------------------------------------------------------------------
/EBCDIC/EBCDIC.pm:
--------------------------------------------------------------------------------
 1 | package Encode::EBCDIC;
 2 | use strict;
 3 | use warnings;
 4 | use Encode;
 5 | our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 6 | 
 7 | use XSLoader;
 8 | XSLoader::load( __PACKAGE__, $VERSION );
 9 | 
10 | 1;
11 | __END__
12 | 
13 | =head1 NAME
14 | 
15 | Encode::EBCDIC - EBCDIC Encodings
16 | 
17 | =head1 SYNOPSIS
18 | 
19 |     use Encode qw/encode decode/; 
20 |     $posix_bc  = encode("posix-bc", $utf8); # loads Encode::EBCDIC implicitly
21 |     $utf8 = decode("", $posix_bc);          # ditto
22 | 
23 | =head1 ABSTRACT
24 | 
25 | This module implements various EBCDIC-Based encodings.  Encodings
26 | supported are as follows.   
27 | 
28 |   Canonical   Alias		Description
29 |   --------------------------------------------------------------------
30 |   cp37  
31 |   cp500  
32 |   cp875  
33 |   cp1026  
34 |   cp1047  
35 |   posix-bc
36 | 
37 | =head1 DESCRIPTION
38 | 
39 | To find how to use this module in detail, see L<Encode>.
40 | 
41 | =head1 SEE ALSO
42 | 
43 | L<Encode>, L<perlebcdic>
44 | 
45 | =cut
46 | 


--------------------------------------------------------------------------------
/EBCDIC/Makefile.PL:
--------------------------------------------------------------------------------
  1 | use 5.7.2;
  2 | use strict;
  3 | use ExtUtils::MakeMaker;
  4 | 
  5 | my $name = 'EBCDIC';
  6 | my %tables = (
  7 |           ebcdic_t  => 
  8 |           ['posix-bc.ucm',
  9 |            qw(cp037.ucm cp1026.ucm cp1047.ucm cp500.ucm cp875.ucm),
 10 |            ],
 11 |           );
 12 | 
 13 | WriteMakefile(
 14 |               INC		=> "-I../Encode",
 15 |           NAME		=> 'Encode::'.$name,
 16 |           VERSION_FROM	=> "$name.pm",
 17 |           OBJECT		=> '$(O_FILES)',
 18 |           'dist'		=> {
 19 |           COMPRESS	=> 'gzip -9f',
 20 |           SUFFIX	=> 'gz',
 21 |           DIST_DEFAULT => 'all tardist',
 22 |           },
 23 |           MAN3PODS	=> {},
 24 |           # OS 390 winges about line numbers > 64K ???
 25 |           XSOPT => '-nolinenumbers',
 26 |           );
 27 | 
 28 | package MY;
 29 | 
 30 | sub post_initialize
 31 | {
 32 |     my ($self) = @_;
 33 |     my %o;
 34 |     my $x = $self->{'OBJ_EXT'};
 35 |     # Add the table O_FILES
 36 |     foreach my $e (keys %tables)
 37 |     {
 38 |     $o{$e.$x} = 1;
 39 |     }
 40 |     $o{"$name$x"} = 1;
 41 |     $self->{'O_FILES'} = [sort keys %o];
 42 |     my @files = ("$name.xs");
 43 |     $self->{'C'} = ["$name.c"];
 44 |     $self->{SOURCE} .= " $name.c"
 45 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 46 |     $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
 47 |     my %xs;
 48 |     foreach my $table (sort keys %tables) {
 49 |     push (@{$self->{'C'}},"$table.c");
 50 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 51 |     # get built.
 52 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 53 |         push (@files,$table.$ext);
 54 |     }
 55 |     $self->{SOURCE} .= " $table.c"
 56 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
 57 |     }
 58 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
 59 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
 60 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
 61 |     print XS <<'END';
 62 | #define PERL_NO_GET_CONTEXT
 63 | #include <EXTERN.h>
 64 | #include <perl.h>
 65 | #include <XSUB.h>
 66 | #include "encode.h"
 67 | END
 68 |     foreach my $table (sort keys %tables) {
 69 |     print XS qq[#include "${table}.h"\n];
 70 |     }
 71 |     print XS <<"END";
 72 | 
 73 | static void
 74 | Encode_XSEncoding(pTHX_ encode_t *enc)
 75 | {
 76 |  dSP;
 77 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
 78 |  SV *iv    = newSViv(PTR2IV(enc));
 79 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
 80 |  int i = 0;
 81 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
 82 |  constness, in the hope that perl won't mess with it. */
 83 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
 84 |  SvFLAGS(iv) |= SVp_POK;
 85 |  SvPVX(iv) = (char*) enc->name[0];
 86 |  PUSHMARK(sp);
 87 |  XPUSHs(sv);
 88 |  while (enc->name[i])
 89 |   {
 90 |    const char *name = enc->name[i++];
 91 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
 92 |   }
 93 |  PUTBACK;
 94 |  call_pv("Encode::define_encoding",G_DISCARD);
 95 |  SvREFCNT_dec(sv);
 96 | }
 97 | 
 98 | MODULE = Encode::$name	PACKAGE = Encode::$name
 99 | PROTOTYPES: DISABLE
100 | BOOT:
101 | {
102 | END
103 |     foreach my $table (sort keys %tables) {
104 |     print XS qq[#include "${table}.exh"\n];
105 |     }
106 |     print XS "}\n";
107 |     close(XS);
108 |     return "# Built $name.xs\n\n";
109 | }
110 | 
111 | sub postamble
112 | {
113 |     my $self = shift;
114 |     my $dir  = $self->catdir($self->updir,'ucm');
115 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
116 |     $str    .= "$name.c : $name.xs ";
117 |     foreach my $table (sort keys %tables)
118 |     {
119 |     $str .= " $table.c";
120 |     }
121 |     $str .= "\n\n";
122 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
123 | 
124 |     my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
125 |     foreach my $table (sort keys %tables)
126 |     {
127 |     my $numlines = 1;
128 |     my $lengthsofar = length($str);
129 |     my $continuator = '';
130 |     $str .= "$table.c : $enc2xs Makefile.PL";
131 |     foreach my $file (@{$tables{$table}})
132 |     {
133 |         $str .= $continuator.' '.$self->catfile($dir,$file);
134 |         if ( length($str)-$lengthsofar > 128*$numlines )
135 |         {
136 |         $continuator .= " \\\n\t";
137 |         $numlines++;
138 |         } else {
139 |         $continuator = '';
140 |         }
141 |     }
142 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
143 |     $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
144 |     my $ucopts = '-"Q" -"O"';
145 |     $str .=  
146 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
147 |     open (FILELIST, ">$table.fnm")
148 |         || die "Could not open $table.fnm: $!";
149 |     foreach my $file (@{$tables{$table}})
150 |     {
151 |         print FILELIST $self->catfile($dir,$file) . "\n";
152 |     }
153 |     close(FILELIST);
154 |     }
155 |     return $str;
156 | }
157 | 
158 | 


--------------------------------------------------------------------------------
/Encode/Changes.e2x:
--------------------------------------------------------------------------------
1 | #
2 | # $Id: Changes.e2x,v 2.0 2004/05/16 20:55:15 dankogai Exp $
3 | # Revision history for Perl extension Encode::$_Name_.
4 | #
5 | 
6 | 0.01  $_Now_
7 |   Autogenerated by enc2xs version $_Version_.
8 | 


--------------------------------------------------------------------------------
/Encode/ConfigLocal_PM.e2x:
--------------------------------------------------------------------------------
 1 | #
 2 | # Local demand-load module list
 3 | #
 4 | # You should not edit this file by hand!  use "enc2xs -C"
 5 | # 
 6 | package Encode::ConfigLocal;
 7 | our $VERSION = $_LocalVer_;
 8 | 
 9 | use strict;
10 | 
11 | $_ModLines_
12 | 
13 | 1;
14 | 


--------------------------------------------------------------------------------
/Encode/Makefile_PL.e2x:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is auto-generated by:
  3 | # enc2xs version $_Version_
  4 | # $_Now_
  5 | #
  6 | use 5.7.2;
  7 | use strict;
  8 | use ExtUtils::MakeMaker;
  9 | use Config;
 10 | 
 11 | # Please edit the following to the taste!
 12 | my $name = '$_Name_';
 13 | my %tables = (
 14 |          $_Name__t   => [ $_TableFiles_ ],
 15 |          );
 16 | 
 17 | #### DO NOT EDIT BEYOND THIS POINT!
 18 | require File::Spec;
 19 | my ($enc2xs, $encode_h) = ();
 20 | my @path_ext = ('');
 21 | @path_ext = split(';', $ENV{PATHEXT}) if $^O eq 'MSWin32';
 22 | PATHLOOP:
 23 | for my $d (@Config{qw/bin sitebin vendorbin/}, 
 24 |        (split /$Config{path_sep}/o, $ENV{PATH})){
 25 |     for my $f (qw/enc2xs enc2xs5.7.3/){
 26 |         my $path = File::Spec->catfile($d, $f);
 27 |         for my $ext (@path_ext) {
 28 |             my $bin = "$path$ext";
 29 |             -r "$bin" and $enc2xs = $bin and last PATHLOOP;
 30 |         }
 31 |     }
 32 | }
 33 | $enc2xs or die "enc2xs not found!";
 34 | print "enc2xs is $enc2xs\n";
 35 | my %encode_h = ();
 36 | for my $d (@INC){
 37 |     my $dir = File::Spec->catfile($d, "Encode");
 38 |     my $file =  File::Spec->catfile($dir, "encode.h");
 39 |     -f $file and $encode_h{$dir} = -M $file;
 40 | }
 41 | %encode_h or die "encode.h not found!";
 42 | # find the latest one
 43 | ($encode_h) = sort {$encode_h{$b} <=> $encode_h{$a}} keys %encode_h;
 44 | print "encode.h is at $encode_h\n";
 45 | 
 46 | WriteMakefile(
 47 |               INC		=> "-I$encode_h",
 48 | #### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! ####
 49 |           NAME		=> 'Encode::'.$name,
 50 |           VERSION_FROM	=> "$name.pm",
 51 |           OBJECT		=> '$(O_FILES)',
 52 |           'dist'		=> {
 53 |           COMPRESS	=> 'gzip -9f',
 54 |           SUFFIX	=> 'gz',
 55 |           DIST_DEFAULT => 'all tardist',
 56 |           },
 57 |           MAN3PODS	=> {},
 58 |           PREREQ_PM => {
 59 |                 'Encode'     => "1.41",
 60 |                            },
 61 |           # OS 390 winges about line numbers > 64K ???
 62 |           XSOPT => '-nolinenumbers',
 63 |           );
 64 | 
 65 | package MY;
 66 | 
 67 | sub post_initialize
 68 | {
 69 |     my ($self) = @_;
 70 |     my %o;
 71 |     my $x = $self->{'OBJ_EXT'};
 72 |     # Add the table O_FILES
 73 |     foreach my $e (keys %tables)
 74 |     {
 75 |     $o{$e.$x} = 1;
 76 |     }
 77 |     $o{"$name$x"} = 1;
 78 |     $self->{'O_FILES'} = [sort keys %o];
 79 |     my @files = ("$name.xs");
 80 |     $self->{'C'} = ["$name.c"];
 81 |     # The next two lines to make MacPerl Happy -- dankogai via pudge
 82 |     $self->{SOURCE} .= " $name.c"
 83 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 84 |     # $self->{'H'} = [$self->catfile($self->updir,'encode.h')];
 85 |     my %xs;
 86 |     foreach my $table (sort keys %tables) {
 87 |     push (@{$self->{'C'}},"$table.c");
 88 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 89 |     # get built.
 90 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 91 |         push (@files,$table.$ext);
 92 |     }
 93 |     }
 94 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
 95 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
 96 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
 97 |     print XS <<'END';
 98 | #include <EXTERN.h>
 99 | #include <perl.h>
100 | #include <XSUB.h>
101 | #include "encode.h"
102 | END
103 |     foreach my $table (sort keys %tables) {
104 |     print XS qq[#include "${table}.h"\n];
105 |     }
106 |     print XS <<"END";
107 | 
108 | static void
109 | Encode_XSEncoding(pTHX_ encode_t *enc)
110 | {
111 |  dSP;
112 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
113 |  SV *iv    = newSViv(PTR2IV(enc));
114 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
115 |  int i = 0;
116 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
117 |  constness, in the hope that perl won't mess with it. */
118 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
119 |  SvFLAGS(iv) |= SVp_POK;
120 |  SvPVX(iv) = (char*) enc->name[0];
121 |  PUSHMARK(sp);
122 |  XPUSHs(sv);
123 |  while (enc->name[i])
124 |   {
125 |    const char *name = enc->name[i++];
126 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
127 |   }
128 |  PUTBACK;
129 |  call_pv("Encode::define_encoding",G_DISCARD);
130 |  SvREFCNT_dec(sv);
131 | }
132 | 
133 | MODULE = Encode::$name	PACKAGE = Encode::$name
134 | PROTOTYPES: DISABLE
135 | BOOT:
136 | {
137 | END
138 |     foreach my $table (sort keys %tables) {
139 |     print XS qq[#include "${table}.exh"\n];
140 |     }
141 |     print XS "}\n";
142 |     close(XS);
143 |     return "# Built $name.xs\n\n";
144 | }
145 | 
146 | sub postamble
147 | {
148 |     my $self = shift;
149 |     my $dir  = "."; # $self->catdir('Encode');
150 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
151 |     $str    .= "$name.c : $name.xs ";
152 |     foreach my $table (sort keys %tables)
153 |     {
154 |     $str .= " $table.c";
155 |     }
156 |     $str .= "\n\n";
157 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
158 | 
159 |     foreach my $table (sort keys %tables)
160 |     {
161 |     my $numlines = 1;
162 |     my $lengthsofar = length($str);
163 |     my $continuator = '';
164 |     $str .= "$table.c : Makefile.PL";
165 |     foreach my $file (@{$tables{$table}})
166 |     {
167 |         $str .= $continuator.' '.$self->catfile($dir,$file);
168 |         if ( length($str)-$lengthsofar > 128*$numlines )
169 |         {
170 |         $continuator .= " \\\n\t";
171 |         $numlines++;
172 |         } else {
173 |         $continuator = '';
174 |         }
175 |     }
176 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
177 |     my $ucopts = '-"Q"';
178 |     $str .=  
179 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
180 |     open (FILELIST, ">$table.fnm")
181 |         || die "Could not open $table.fnm: $!";
182 |     foreach my $file (@{$tables{$table}})
183 |     {
184 |         print FILELIST $self->catfile($dir,$file) . "\n";
185 |     }
186 |     close(FILELIST);
187 |     }
188 |     return $str;
189 | }
190 | 
191 | 


--------------------------------------------------------------------------------
/Encode/README.e2x:
--------------------------------------------------------------------------------
 1 | Encode::$_Name_ version 0.1
 2 | ========
 3 | 
 4 | NAME
 5 |        Encode::$_Name_ - <describe encoding>
 6 | 
 7 | SYNOPSIS
 8 |        use Encode::$_Name_;
 9 |        #<put more words here>
10 | ABSTRACT
11 |       <fill this in>
12 | INSTALLATION
13 | 
14 | To install this module type the following:
15 | 
16 |    perl Makefile.PL
17 |    make
18 |    make test
19 |    make install
20 | 
21 | DEPENDENCIES
22 | 
23 | This module requires perl version 5.7.3 or later.
24 | 
25 | COPYRIGHT AND LICENCE
26 | 
27 | Copyright (C) 2002 Your Name <your@address.domain>
28 | 
29 | This library is free software; you can redistribute it and/or modify
30 | it under the same terms as Perl itself. 
31 | 
32 | 


--------------------------------------------------------------------------------
/Encode/_PM.e2x:
--------------------------------------------------------------------------------
 1 | package Encode::$_Name_;
 2 | our $VERSION = "0.01";
 3 |  
 4 | use Encode;
 5 | use XSLoader;
 6 | XSLoader::load(__PACKAGE__,$VERSION);
 7 | 
 8 | 1;
 9 | __END__
10 | 
11 | =head1 NAME
12 |  
13 | Encode::$_Name_ - New Encoding
14 |  
15 | =head1 SYNOPSIS
16 | 
17 | You got to fill this in!
18 | 
19 | =head1 SEE ALSO
20 | 
21 | L<Encode>
22 | 
23 | =cut
24 | 


--------------------------------------------------------------------------------
/Encode/_T.e2x:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | # Adjust the number here!
 3 | use Test::More tests => 2;
 4 | 
 5 | BEGIN {
 6 |     use_ok('Encode');
 7 |     use_ok('Encode::$_Name_');
 8 | }
 9 | # Add more test here!
10 | 


--------------------------------------------------------------------------------
/JP/JP.pm:
--------------------------------------------------------------------------------
 1 | package Encode::JP;
 2 | BEGIN {
 3 |     if ( ord("A") == 193 ) {
 4 |         die "Encode::JP not supported on EBCDIC\n";
 5 |     }
 6 | }
 7 | use strict;
 8 | use warnings;
 9 | use Encode;
10 | our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
11 | 
12 | use XSLoader;
13 | XSLoader::load( __PACKAGE__, $VERSION );
14 | 
15 | use Encode::JP::JIS7;
16 | 
17 | 1;
18 | __END__
19 | 
20 | =head1 NAME
21 | 
22 | Encode::JP - Japanese Encodings
23 | 
24 | =head1 SYNOPSIS
25 | 
26 |     use Encode qw/encode decode/; 
27 |     $euc_jp = encode("euc-jp", $utf8);   # loads Encode::JP implicitly
28 |     $utf8   = decode("euc-jp", $euc_jp); # ditto
29 | 
30 | =head1 ABSTRACT
31 | 
32 | This module implements Japanese charset encodings.  Encodings
33 | supported are as follows.
34 | 
35 |   Canonical   Alias		Description
36 |   --------------------------------------------------------------------
37 |   euc-jp      /\beuc.*jp$/i	EUC (Extended Unix Character)
38 |               /\bjp.*euc/i   
39 |           /\bujis$/i
40 |   shiftjis    /\bshift.*jis$/i	Shift JIS (aka MS Kanji)
41 |           /\bsjis$/i
42 |   7bit-jis    /\bjis$/i		7bit JIS
43 |   iso-2022-jp			ISO-2022-JP                  [RFC1468]
44 |                 = 7bit JIS with all Halfwidth Kana 
45 |                   converted to Fullwidth
46 |   iso-2022-jp-1			ISO-2022-JP-1                [RFC2237]
47 |                                 = ISO-2022-JP with JIS X 0212-1990
48 |                   support.  See below
49 |   MacJapanese	                Shift JIS + Apple vendor mappings
50 |   cp932       /\bwindows-31j$/i Code Page 932
51 |                                 = Shift JIS + MS/IBM vendor mappings
52 |   jis0201-raw                   JIS0201, raw format
53 |   jis0208-raw                   JIS0208, raw format
54 |   jis0212-raw                   JIS0212, raw format
55 |   --------------------------------------------------------------------
56 | 
57 | =head1 DESCRIPTION
58 | 
59 | To find out how to use this module in detail, see L<Encode>.
60 | 
61 | =head1 Note on ISO-2022-JP(-1)?
62 | 
63 | ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
64 | adds support for JIS X 0212-1990.  That means you can use the same
65 | code to decode to utf8 but not vice versa.
66 | 
67 |   $utf8 = decode('iso-2022-jp-1', $stream);
68 | 
69 | and
70 | 
71 |   $utf8 = decode('iso-2022-jp',   $stream);
72 | 
73 | yield the same result but
74 | 
75 |   $with_0212 = encode('iso-2022-jp-1', $utf8);
76 | 
77 | is now different from
78 | 
79 |   $without_0212 = encode('iso-2022-jp', $utf8 );
80 | 
81 | In the latter case, characters that map to 0212 are first converted
82 | to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or
83 | 'geta mark') then fed to the decoding engine.  U+FFFD is not used,
84 | in order to preserve text layout as much as possible.
85 | 
86 | =head1 BUGS
87 | 
88 | The ASCII region (0x00-0x7f) is preserved for all encodings, even
89 | though this conflicts with mappings by the Unicode Consortium.
90 | 
91 | =head1 SEE ALSO
92 | 
93 | L<Encode>
94 | 
95 | =cut
96 | 


--------------------------------------------------------------------------------
/JP/Makefile.PL:
--------------------------------------------------------------------------------
  1 | use 5.7.2;
  2 | use strict;
  3 | use ExtUtils::MakeMaker;
  4 | use strict;
  5 | 
  6 | my %tables = (
  7 |           euc_jp_t   => ['euc-jp.ucm'],
  8 |               sjis_t     => ['shiftjis.ucm',
  9 |                  'macJapanese.ucm',
 10 |                  'cp932.ucm'],
 11 |           raw_t      => [
 12 |                  qw(jis0201.ucm jis0208.ucm jis0212.ucm)
 13 |                  ],
 14 |              );
 15 | 
 16 | unless ($ENV{AGGREGATE_TABLES}){
 17 |     my @ucm;
 18 |     for my $k (keys %tables){
 19 |     push @ucm, @{$tables{$k}};
 20 |     }
 21 |     %tables = ();
 22 |     my $seq = 0;
 23 |     for my $ucm (sort @ucm){
 24 |     # 8.3 compliance !
 25 |     my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
 26 |     $tables{$t} = [ $ucm ];
 27 |     }
 28 | }
 29 | 
 30 | my $name = 'JP';
 31 | 
 32 | WriteMakefile(
 33 |               INC		=> "-I../Encode",
 34 |           NAME		=> 'Encode::'.$name,
 35 |           VERSION_FROM	=> "$name.pm",
 36 |           OBJECT		=> '$(O_FILES)',
 37 |           'dist'		=> {
 38 |           COMPRESS	=> 'gzip -9f',
 39 |           SUFFIX	=> 'gz',
 40 |           DIST_DEFAULT => 'all tardist',
 41 |           },
 42 |           MAN3PODS	=> {},
 43 |           # OS 390 winges about line numbers > 64K ???
 44 |           XSOPT => '-nolinenumbers',
 45 |           );
 46 | 
 47 | package MY;
 48 | 
 49 | sub post_initialize
 50 | {
 51 |     my ($self) = @_;
 52 |     my %o;
 53 |     my $x = $self->{'OBJ_EXT'};
 54 |     # Add the table O_FILES
 55 |     foreach my $e (keys %tables)
 56 |     {
 57 |     $o{$e.$x} = 1;
 58 |     }
 59 |     $o{"$name$x"} = 1;
 60 |     $self->{'O_FILES'} = [sort keys %o];
 61 |     my @files = ("$name.xs");
 62 |     $self->{'C'} = ["$name.c"];
 63 |     $self->{SOURCE} .= " $name.c"
 64 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 65 |     $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
 66 |     my %xs;
 67 |     foreach my $table (sort keys %tables) {
 68 |     push (@{$self->{'C'}},"$table.c");
 69 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 70 |     # get built.
 71 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 72 |         push (@files,$table.$ext);
 73 |     }
 74 |     $self->{SOURCE} .= " $table.c"
 75 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
 76 |     }
 77 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
 78 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
 79 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
 80 |     print XS <<'END';
 81 | #define PERL_NO_GET_CONTEXT
 82 | #include <EXTERN.h>
 83 | #include <perl.h>
 84 | #include <XSUB.h>
 85 | #include "encode.h"
 86 | END
 87 |     foreach my $table (sort keys %tables) {
 88 |     print XS qq[#include "${table}.h"\n];
 89 |     }
 90 |     print XS <<"END";
 91 | 
 92 | static void
 93 | Encode_XSEncoding(pTHX_ encode_t *enc)
 94 | {
 95 |  dSP;
 96 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
 97 |  SV *iv    = newSViv(PTR2IV(enc));
 98 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
 99 |  int i = 0;
100 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
101 |  constness, in the hope that perl won't mess with it. */
102 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
103 |  SvFLAGS(iv) |= SVp_POK;
104 |  SvPVX(iv) = (char*) enc->name[0];
105 |  PUSHMARK(sp);
106 |  XPUSHs(sv);
107 |  while (enc->name[i])
108 |   {
109 |    const char *name = enc->name[i++];
110 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
111 |   }
112 |  PUTBACK;
113 |  call_pv("Encode::define_encoding",G_DISCARD);
114 |  SvREFCNT_dec(sv);
115 | }
116 | 
117 | MODULE = Encode::$name	PACKAGE = Encode::$name
118 | PROTOTYPES: DISABLE
119 | BOOT:
120 | {
121 | END
122 |     foreach my $table (sort keys %tables) {
123 |     print XS qq[#include "${table}.exh"\n];
124 |     }
125 |     print XS "}\n";
126 |     close(XS);
127 |     return "# Built $name.xs\n\n";
128 | }
129 | 
130 | sub postamble
131 | {
132 |     my $self = shift;
133 |     my $dir  = $self->catdir($self->updir,'ucm');
134 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
135 |     $str    .= "$name.c : $name.xs ";
136 |     foreach my $table (sort keys %tables)
137 |     {
138 |     $str .= " $table.c";
139 |     }
140 |     $str .= "\n\n";
141 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
142 | 
143 |     my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
144 |     foreach my $table (sort keys %tables)
145 |     {
146 |     my $numlines = 1;
147 |     my $lengthsofar = length($str);
148 |     my $continuator = '';
149 |     $str .= "$table.c : $enc2xs Makefile.PL";
150 |     foreach my $file (@{$tables{$table}})
151 |     {
152 |         $str .= $continuator.' '.$self->catfile($dir,$file);
153 |         if ( length($str)-$lengthsofar > 128*$numlines )
154 |         {
155 |         $continuator .= " \\\n\t";
156 |         $numlines++;
157 |         } else {
158 |         $continuator = '';
159 |         }
160 |     }
161 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
162 |     $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
163 |     my $ucopts = '-"Q"';
164 |     $str .=  
165 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
166 |     open (FILELIST, ">$table.fnm")
167 |         || die "Could not open $table.fnm: $!";
168 |     foreach my $file (@{$tables{$table}})
169 |     {
170 |         print FILELIST $self->catfile($dir,$file) . "\n";
171 |     }
172 |     close(FILELIST);
173 |     }
174 |     return $str;
175 | }
176 | 
177 | 


--------------------------------------------------------------------------------
/KR/KR.pm:
--------------------------------------------------------------------------------
 1 | package Encode::KR;
 2 | BEGIN {
 3 |     if ( ord("A") == 193 ) {
 4 |         die "Encode::KR not supported on EBCDIC\n";
 5 |     }
 6 | }
 7 | use strict;
 8 | use warnings;
 9 | use Encode;
10 | our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
11 | use XSLoader;
12 | XSLoader::load( __PACKAGE__, $VERSION );
13 | 
14 | use Encode::KR::2022_KR;
15 | 
16 | 1;
17 | __END__
18 | 
19 | =head1 NAME
20 | 
21 | Encode::KR - Korean Encodings
22 | 
23 | =head1 SYNOPSIS
24 | 
25 |     use Encode qw/encode decode/; 
26 |     $euc_kr = encode("euc-kr", $utf8);   # loads Encode::KR implicitly
27 |     $utf8   = decode("euc-kr", $euc_kr); # ditto
28 | 
29 | =head1 DESCRIPTION
30 | 
31 | This module implements Korean charset encodings.  Encodings supported
32 | are as follows.
33 | 
34 | 
35 |   Canonical   Alias		Description
36 |   --------------------------------------------------------------------
37 |   euc-kr      /\beuc.*kr$/i	EUC (Extended Unix Character)
38 |           /\bkr.*euc$/i
39 |   ksc5601-raw			Korean standard code set (as is)
40 |   cp949	      /(?:x-)?uhc$/i
41 |               /(?:x-)?windows-949$/i
42 |               /\bks_c_5601-1987$/i
43 |                                 Code Page 949 (EUC-KR + 8,822 
44 |                                 (additional Hangul syllables)
45 |   MacKorean			EUC-KR + Apple Vendor Mappings
46 |   johab       JOHAB             A supplementary encoding defined in 
47 |                                              Annex 3 of KS X 1001:1998
48 |   iso-2022-kr                   iso-2022-kr                  [RFC1557]
49 |   --------------------------------------------------------------------
50 | 
51 | To find how to use this module in detail, see L<Encode>.
52 | 
53 | =head1 BUGS
54 | 
55 | When you see C<charset=ks_c_5601-1987> on mails and web pages, they really
56 | mean "cp949" encodings.  To fix that, the following aliases are set;
57 | 
58 |   qr/(?:x-)?uhc$/i         => '"cp949"'
59 |   qr/(?:x-)?windows-949$/i => '"cp949"'
60 |   qr/ks_c_5601-1987$/i     => '"cp949"'
61 | 
62 | The ASCII region (0x00-0x7f) is preserved for all encodings, even
63 | though this conflicts with mappings by the Unicode Consortium.
64 | 
65 | =head1 SEE ALSO
66 | 
67 | L<Encode>
68 | 
69 | =cut
70 | 


--------------------------------------------------------------------------------
/KR/Makefile.PL:
--------------------------------------------------------------------------------
  1 | use 5.7.2;
  2 | use strict;
  3 | use ExtUtils::MakeMaker;
  4 | use strict;
  5 | 
  6 | my %tables = (euc_kr_t   => ['euc-kr.ucm',
  7 |                  'macKorean.ucm',
  8 |                  'cp949.ucm',
  9 |                  ],
 10 |           '5601_t'   => ['ksc5601.ucm'],
 11 |           johab_t    => ['johab.ucm'],
 12 |              );
 13 | 
 14 | unless ($ENV{AGGREGATE_TABLES}){
 15 |     my @ucm;
 16 |     for my $k (keys %tables){
 17 |     push @ucm, @{$tables{$k}};
 18 |     }
 19 |     %tables = ();
 20 |     my $seq = 0;
 21 |     for my $ucm (sort @ucm){
 22 |     # 8.3 compliance !
 23 |     my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
 24 |     $tables{$t} = [ $ucm ];
 25 |     }
 26 | }
 27 | 
 28 | my $name = 'KR';
 29 | 
 30 | WriteMakefile(
 31 |               INC		=> "-I../Encode",
 32 |           NAME		=> 'Encode::'.$name,
 33 |           VERSION_FROM	=> "$name.pm",
 34 |           OBJECT		=> '$(O_FILES)',
 35 |           'dist'		=> {
 36 |           COMPRESS	=> 'gzip -9f',
 37 |           SUFFIX	=> 'gz',
 38 |           DIST_DEFAULT => 'all tardist',
 39 |           },
 40 |           MAN3PODS	=> {},
 41 |           # OS 390 winges about line numbers > 64K ???
 42 |           XSOPT => '-nolinenumbers',
 43 |           );
 44 | 
 45 | package MY;
 46 | 
 47 | sub post_initialize
 48 | {
 49 |     my ($self) = @_;
 50 |     my %o;
 51 |     my $x = $self->{'OBJ_EXT'};
 52 |     # Add the table O_FILES
 53 |     foreach my $e (keys %tables)
 54 |     {
 55 |     $o{$e.$x} = 1;
 56 |     }
 57 |     $o{"$name$x"} = 1;
 58 |     $self->{'O_FILES'} = [sort keys %o];
 59 |     my @files = ("$name.xs");
 60 |     $self->{'C'} = ["$name.c"];
 61 |     $self->{SOURCE} .= " $name.c"
 62 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 63 |     $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
 64 |     my %xs;
 65 |     foreach my $table (sort keys %tables) {
 66 |     push (@{$self->{'C'}},"$table.c");
 67 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 68 |     # get built.
 69 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 70 |         push (@files,$table.$ext);
 71 |     }
 72 |     $self->{SOURCE} .= " $table.c"
 73 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
 74 |     }
 75 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
 76 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
 77 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
 78 |     print XS <<'END';
 79 | #define PERL_NO_GET_CONTEXT
 80 | #include <EXTERN.h>
 81 | #include <perl.h>
 82 | #include <XSUB.h>
 83 | #include "encode.h"
 84 | END
 85 |     foreach my $table (sort keys %tables) {
 86 |     print XS qq[#include "${table}.h"\n];
 87 |     }
 88 |     print XS <<"END";
 89 | 
 90 | static void
 91 | Encode_XSEncoding(pTHX_ encode_t *enc)
 92 | {
 93 |  dSP;
 94 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
 95 |  SV *iv    = newSViv(PTR2IV(enc));
 96 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
 97 |  int i = 0;
 98 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
 99 |  constness, in the hope that perl won't mess with it. */
100 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
101 |  SvFLAGS(iv) |= SVp_POK;
102 |  SvPVX(iv) = (char*) enc->name[0];
103 |  PUSHMARK(sp);
104 |  XPUSHs(sv);
105 |  while (enc->name[i])
106 |   {
107 |    const char *name = enc->name[i++];
108 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
109 |   }
110 |  PUTBACK;
111 |  call_pv("Encode::define_encoding",G_DISCARD);
112 |  SvREFCNT_dec(sv);
113 | }
114 | 
115 | MODULE = Encode::$name	PACKAGE = Encode::$name
116 | PROTOTYPES: DISABLE
117 | BOOT:
118 | {
119 | END
120 |     foreach my $table (sort keys %tables) {
121 |     print XS qq[#include "${table}.exh"\n];
122 |     }
123 |     print XS "}\n";
124 |     close(XS);
125 |     return "# Built $name.xs\n\n";
126 | }
127 | 
128 | sub postamble
129 | {
130 |     my $self = shift;
131 |     my $dir  = $self->catdir($self->updir,'ucm');
132 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
133 |     $str    .= "$name.c : $name.xs ";
134 |     foreach my $table (sort keys %tables)
135 |     {
136 |     $str .= " $table.c";
137 |     }
138 |     $str .= "\n\n";
139 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
140 | 
141 |     my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
142 |     foreach my $table (sort keys %tables)
143 |     {
144 |     my $numlines = 1;
145 |     my $lengthsofar = length($str);
146 |     my $continuator = '';
147 |     $str .= "$table.c : $enc2xs Makefile.PL";
148 |     foreach my $file (@{$tables{$table}})
149 |     {
150 |         $str .= $continuator.' '.$self->catfile($dir,$file);
151 |         if ( length($str)-$lengthsofar > 128*$numlines )
152 |         {
153 |         $continuator .= " \\\n\t";
154 |         $numlines++;
155 |         } else {
156 |         $continuator = '';
157 |         }
158 |     }
159 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
160 |     $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
161 |     my $ucopts = '-"Q"';
162 |     $str .=  
163 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
164 |     open (FILELIST, ">$table.fnm")
165 |         || die "Could not open $table.fnm: $!";
166 |     foreach my $file (@{$tables{$table}})
167 |     {
168 |         print FILELIST $self->catfile($dir,$file) . "\n";
169 |     }
170 |     close(FILELIST);
171 |     }
172 |     return $str;
173 | }
174 | 
175 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | NAME
 2 |        Encode - character encodings
 3 | 
 4 | SYNOPSIS
 5 |            use Encode;
 6 | 
 7 | DESCRIPTION
 8 |        The "Encode" module provides the interfaces between Perl's
 9 |        strings and the rest of the system.  Perl strings are
10 |        sequences of characters.
11 | 
12 |        See "perldoc Encode" for the rest of the story
13 | 
14 | INSTALLATION
15 | 
16 | To install this module, type the following:
17 | 
18 |    perl Makefile.PL
19 |    make
20 |    make test
21 |    make install
22 | 
23 | To install scripts under bin/ directories also,
24 | 
25 |    perl Makefile.PL MORE_SCRIPTS
26 |    make && make test && make install
27 | 
28 | By default, only enc2xs and piconv are installed.
29 | 
30 | To install *.ucm files also, say
31 | 
32 |    perl Makefile.PL INSTALL_UCM
33 |    make && make test && make install
34 | 
35 | By default, *.ucm are not installed.
36 | 
37 | DEPENDENCIES
38 | 
39 | This module requires perl5.7.3 or later.
40 | 
41 | MAINTAINER
42 | 
43 | This project was originated by Nick Ing-Simmons and later maintained by
44 | Dan Kogai <dankogai@dan.co.jp>.  See AUTHORS for the full list of people
45 | involved.
46 | 
47 | QUESTIONS?
48 | 
49 | If you have any questions which "perldoc Encode" does not answer, please
50 | feel free to ask at perl-unicode@perl.org.
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![CI on Perl 5.{36,34} on {Linux,macOS,Windows}](https://github.com/dankogai/p5-encode/actions/workflows/platforms.yml/badge.svg)](https://github.com/dankogai/p5-encode/actions/workflows/platforms.yml)
 2 | 
 3 | ## NAME
 4 | 
 5 | Encode - character encodings
 6 | 
 7 | ## SYNOPSIS
 8 | 
 9 |        use Encode;
10 | 
11 | ## COMMAND LINE
12 | 
13 |        $ encguess README.md
14 |        README.md	US-ASCII
15 | 
16 | See `encguess -h` for more options.
17 | 
18 | ## DESCRIPTION
19 | 
20 | The "Encode" module provides the interfaces between Perl's
21 | strings and the rest of the system.  Perl strings are
22 | sequences of characters.
23 | 
24 |        See `perldoc Encode` for the rest of the story
25 | 
26 | ## INSTALLATION
27 | 
28 | To install this module, type the following:
29 | 
30 |        perl Makefile.PL
31 |        make
32 |        make test
33 |        make install
34 | 
35 | To install scripts under bin/ directories also,
36 | 
37 |        perl Makefile.PL MORE_SCRIPTS
38 |        make && make test && make install
39 | 
40 | By default, only enc2xs and piconv are installed.
41 | 
42 | To install *.ucm files also, say
43 | 
44 |        perl Makefile.PL INSTALL_UCM
45 |        make && make test && make install
46 | 
47 | By default, *.ucm are not installed.
48 | 
49 | ## DEPENDENCIES
50 | 
51 | This module requires perl 5.7.3 or later.
52 | 
53 | ## MAINTAINER
54 | 
55 | This project was originated by Nick Ing-Simmons and later maintained by
56 | Dan Kogai `<dankogai at dan.co.jp>`.  See AUTHORS for the full list of people
57 | involved.
58 | 
59 | ## QUESTIONS?
60 | 
61 | If you have any questions which `perldoc Encode` does not answer, please
62 | feel free to ask at `perl-unicode@perl.org`.
63 | 


--------------------------------------------------------------------------------
/Symbol/Makefile.PL:
--------------------------------------------------------------------------------
  1 | use 5.7.2;
  2 | use strict;
  3 | use ExtUtils::MakeMaker;
  4 | 
  5 | my $name = 'Symbol';
  6 | my %tables = (
  7 |           symbol_t => [qw(
  8 |                   symbol.ucm
  9 |                   dingbats.ucm
 10 |                   adobeSymbol.ucm
 11 |                   adobeZdingbat.ucm
 12 |                   macSymbol.ucm
 13 |                   macDingbats.ucm
 14 |                   )
 15 |                   ],
 16 |           );
 17 | 
 18 | WriteMakefile(
 19 |               INC		=> "-I../Encode",
 20 |           NAME		=> 'Encode::'.$name,
 21 |           VERSION_FROM	=> "$name.pm",
 22 |           OBJECT		=> '$(O_FILES)',
 23 |           'dist'		=> {
 24 |           COMPRESS	=> 'gzip -9f',
 25 |           SUFFIX	=> 'gz',
 26 |           DIST_DEFAULT => 'all tardist',
 27 |           },
 28 |           MAN3PODS	=> {},
 29 |           # OS 390 winges about line numbers > 64K ???
 30 |           XSOPT => '-nolinenumbers',
 31 |           );
 32 | 
 33 | package MY;
 34 | 
 35 | sub post_initialize
 36 | {
 37 |     my ($self) = @_;
 38 |     my %o;
 39 |     my $x = $self->{'OBJ_EXT'};
 40 |     # Add the table O_FILES
 41 |     foreach my $e (keys %tables)
 42 |     {
 43 |     $o{$e.$x} = 1;
 44 |     }
 45 |     $o{"$name$x"} = 1;
 46 |     $self->{'O_FILES'} = [sort keys %o];
 47 |     my @files = ("$name.xs");
 48 |     $self->{'C'} = ["$name.c"];
 49 |     $self->{SOURCE} .= " $name.c"
 50 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 51 |     $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
 52 |     my %xs;
 53 |     foreach my $table (sort keys %tables) {
 54 |     push (@{$self->{'C'}},"$table.c");
 55 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 56 |     # get built.
 57 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 58 |         push (@files,$table.$ext);
 59 |     }
 60 |     $self->{SOURCE} .= " $table.c"
 61 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
 62 |     }
 63 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
 64 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
 65 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
 66 |     print XS <<'END';
 67 | #define PERL_NO_GET_CONTEXT
 68 | #include <EXTERN.h>
 69 | #include <perl.h>
 70 | #include <XSUB.h>
 71 | #include "encode.h"
 72 | END
 73 |     foreach my $table (sort keys %tables) {
 74 |     print XS qq[#include "${table}.h"\n];
 75 |     }
 76 |     print XS <<"END";
 77 | 
 78 | static void
 79 | Encode_XSEncoding(pTHX_ encode_t *enc)
 80 | {
 81 |  dSP;
 82 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
 83 |  SV *iv    = newSViv(PTR2IV(enc));
 84 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
 85 |  int i = 0;
 86 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
 87 |  constness, in the hope that perl won't mess with it. */
 88 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
 89 |  SvFLAGS(iv) |= SVp_POK;
 90 |  SvPVX(iv) = (char*) enc->name[0];
 91 |  PUSHMARK(sp);
 92 |  XPUSHs(sv);
 93 |  while (enc->name[i])
 94 |   {
 95 |    const char *name = enc->name[i++];
 96 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
 97 |   }
 98 |  PUTBACK;
 99 |  call_pv("Encode::define_encoding",G_DISCARD);
100 |  SvREFCNT_dec(sv);
101 | }
102 | 
103 | MODULE = Encode::$name	PACKAGE = Encode::$name
104 | PROTOTYPES: DISABLE
105 | BOOT:
106 | {
107 | END
108 |     foreach my $table (sort keys %tables) {
109 |     print XS qq[#include "${table}.exh"\n];
110 |     }
111 |     print XS "}\n";
112 |     close(XS);
113 |     return "# Built $name.xs\n\n";
114 | }
115 | 
116 | sub postamble
117 | {
118 |     my $self = shift;
119 |     my $dir  = $self->catdir($self->updir,'ucm');
120 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
121 |     $str    .= "$name.c : $name.xs ";
122 |     foreach my $table (sort keys %tables)
123 |     {
124 |     $str .= " $table.c";
125 |     }
126 |     $str .= "\n\n";
127 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
128 | 
129 |     my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
130 |     foreach my $table (sort keys %tables)
131 |     {
132 |     my $numlines = 1;
133 |     my $lengthsofar = length($str);
134 |     my $continuator = '';
135 |     $str .= "$table.c : $enc2xs Makefile.PL";
136 |     foreach my $file (@{$tables{$table}})
137 |     {
138 |         $str .= $continuator.' '.$self->catfile($dir,$file);
139 |         if ( length($str)-$lengthsofar > 128*$numlines )
140 |         {
141 |         $continuator .= " \\\n\t";
142 |         $numlines++;
143 |         } else {
144 |         $continuator = '';
145 |         }
146 |     }
147 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
148 |     $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
149 |     my $ucopts = '-"Q" -"O"';
150 |     $str .=  
151 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
152 |     open (FILELIST, ">$table.fnm")
153 |         || die "Could not open $table.fnm: $!";
154 |     foreach my $file (@{$tables{$table}})
155 |     {
156 |         print FILELIST $self->catfile($dir,$file) . "\n";
157 |     }
158 |     close(FILELIST);
159 |     }
160 |     return $str;
161 | }
162 | 
163 | 


--------------------------------------------------------------------------------
/Symbol/Symbol.pm:
--------------------------------------------------------------------------------
 1 | package Encode::Symbol;
 2 | use strict;
 3 | use warnings;
 4 | use Encode;
 5 | our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 6 | 
 7 | use XSLoader;
 8 | XSLoader::load( __PACKAGE__, $VERSION );
 9 | 
10 | 1;
11 | __END__
12 | 
13 | =head1 NAME
14 | 
15 | Encode::Symbol - Symbol Encodings
16 | 
17 | =head1 SYNOPSIS
18 | 
19 |     use Encode qw/encode decode/; 
20 |     $symbol  = encode("symbol", $utf8); # loads Encode::Symbol implicitly
21 |     $utf8 = decode("", $symbol);        # ditto
22 | 
23 | =head1 ABSTRACT
24 | 
25 | This module implements symbol and dingbats encodings.  Encodings
26 | supported are as follows.   
27 | 
28 |   Canonical   Alias		Description
29 |   --------------------------------------------------------------------
30 |   symbol
31 |   dingbats
32 |   AdobeZDingbat
33 |   AdobeSymbol
34 |   MacDingbats
35 | 
36 | =head1 DESCRIPTION
37 | 
38 | To find out how to use this module in detail, see L<Encode>.
39 | 
40 | =head1 SEE ALSO
41 | 
42 | L<Encode>
43 | 
44 | =cut
45 | 


--------------------------------------------------------------------------------
/TW/Makefile.PL:
--------------------------------------------------------------------------------
  1 | use 5.7.2;
  2 | use strict;
  3 | use ExtUtils::MakeMaker;
  4 | use strict;
  5 | 
  6 | my %tables = (big5_t	=> ['big5-eten.ucm',
  7 |                 'big5-hkscs.ucm',
  8 |                 'macChintrad.ucm',
  9 |                 'cp950.ucm'],
 10 |              );
 11 | 
 12 | unless ($ENV{AGGREGATE_TABLES}){
 13 |     my @ucm;
 14 |     for my $k (keys %tables){
 15 |     push @ucm, @{$tables{$k}};
 16 |     }
 17 |     %tables = ();
 18 |     my $seq = 0;
 19 |     for my $ucm (sort @ucm){
 20 |     # 8.3 compliance !
 21 |     my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
 22 |     $tables{$t} = [ $ucm ];
 23 |     }
 24 | }
 25 | 
 26 | my $name = 'TW';
 27 | 
 28 | WriteMakefile(
 29 |               INC		=> "-I../Encode",
 30 |           NAME		=> 'Encode::'.$name,
 31 |           VERSION_FROM	=> "$name.pm",
 32 |           OBJECT		=> '$(O_FILES)',
 33 |           'dist'		=> {
 34 |           COMPRESS	=> 'gzip -9f',
 35 |           SUFFIX	=> 'gz',
 36 |           DIST_DEFAULT => 'all tardist',
 37 |           },
 38 |           MAN3PODS	=> {},
 39 |           # OS 390 winges about line numbers > 64K ???
 40 |           XSOPT => '-nolinenumbers',
 41 |           );
 42 | 
 43 | package MY;
 44 | 
 45 | sub post_initialize
 46 | {
 47 |     my ($self) = @_;
 48 |     my %o;
 49 |     my $x = $self->{'OBJ_EXT'};
 50 |     # Add the table O_FILES
 51 |     foreach my $e (keys %tables)
 52 |     {
 53 |     $o{$e.$x} = 1;
 54 |     }
 55 |     $o{"$name$x"} = 1;
 56 |     $self->{'O_FILES'} = [sort keys %o];
 57 |     my @files = ("$name.xs");
 58 |     $self->{'C'} = ["$name.c"];
 59 |     $self->{SOURCE} .= " $name.c"
 60 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
 61 |     $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
 62 |     my %xs;
 63 |     foreach my $table (sort keys %tables) {
 64 |     push (@{$self->{'C'}},"$table.c");
 65 |     # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
 66 |     # get built.
 67 |     foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
 68 |         push (@files,$table.$ext);
 69 |     }
 70 |     $self->{SOURCE} .= " $table.c"
 71 |         if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
 72 |     }
 73 |     $self->{'XS'} = { "$name.xs" => "$name.c" };
 74 |     $self->{'clean'}{'FILES'} .= join(' ',@files);
 75 |     open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
 76 |     print XS <<'END';
 77 | #define PERL_NO_GET_CONTEXT
 78 | #include <EXTERN.h>
 79 | #include <perl.h>
 80 | #include <XSUB.h>
 81 | #include "encode.h"
 82 | END
 83 |     foreach my $table (sort keys %tables) {
 84 |     print XS qq[#include "${table}.h"\n];
 85 |     }
 86 |     print XS <<"END";
 87 | 
 88 | static void
 89 | Encode_XSEncoding(pTHX_ encode_t *enc)
 90 | {
 91 |  dSP;
 92 |  HV *stash = gv_stashpv("Encode::XS", TRUE);
 93 |  SV *iv    = newSViv(PTR2IV(enc));
 94 |  SV *sv    = sv_bless(newRV_noinc(iv),stash);
 95 |  int i = 0;
 96 |  /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
 97 |  constness, in the hope that perl won't mess with it. */
 98 |  assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
 99 |  SvFLAGS(iv) |= SVp_POK;
100 |  SvPVX(iv) = (char*) enc->name[0];
101 |  PUSHMARK(sp);
102 |  XPUSHs(sv);
103 |  while (enc->name[i])
104 |   {
105 |    const char *name = enc->name[i++];
106 |    XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
107 |   }
108 |  PUTBACK;
109 |  call_pv("Encode::define_encoding",G_DISCARD);
110 |  SvREFCNT_dec(sv);
111 | }
112 | 
113 | MODULE = Encode::$name	PACKAGE = Encode::$name
114 | PROTOTYPES: DISABLE
115 | BOOT:
116 | {
117 | END
118 |     foreach my $table (sort keys %tables) {
119 |     print XS qq[#include "${table}.exh"\n];
120 |     }
121 |     print XS "}\n";
122 |     close(XS);
123 |     return "# Built $name.xs\n\n";
124 | }
125 | 
126 | sub postamble
127 | {
128 |     my $self = shift;
129 |     my $dir  = $self->catdir($self->updir,'ucm');
130 |     my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
131 |     $str    .= "$name.c : $name.xs ";
132 |     foreach my $table (sort keys %tables)
133 |     {
134 |     $str .= " $table.c";
135 |     }
136 |     $str .= "\n\n";
137 |     $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
138 | 
139 |     my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
140 |     foreach my $table (sort keys %tables)
141 |     {
142 |     my $numlines = 1;
143 |     my $lengthsofar = length($str);
144 |     my $continuator = '';
145 |     $str .= "$table.c : $enc2xs Makefile.PL";
146 |     foreach my $file (@{$tables{$table}})
147 |     {
148 |         $str .= $continuator.' '.$self->catfile($dir,$file);
149 |         if ( length($str)-$lengthsofar > 128*$numlines )
150 |         {
151 |         $continuator .= " \\\n\t";
152 |         $numlines++;
153 |         } else {
154 |         $continuator = '';
155 |         }
156 |     }
157 |     my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
158 |     $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
159 |     my $ucopts = '-"Q"';
160 |     $str .=  
161 |         qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
162 |     open (FILELIST, ">$table.fnm")
163 |         || die "Could not open $table.fnm: $!";
164 |     foreach my $file (@{$tables{$table}})
165 |     {
166 |         print FILELIST $self->catfile($dir,$file) . "\n";
167 |     }
168 |     close(FILELIST);
169 |     }
170 |     return $str;
171 | }
172 | 
173 | 


--------------------------------------------------------------------------------
/TW/TW.pm:
--------------------------------------------------------------------------------
 1 | package Encode::TW;
 2 | BEGIN {
 3 |     if ( ord("A") == 193 ) {
 4 |         die "Encode::TW not supported on EBCDIC\n";
 5 |     }
 6 | }
 7 | use strict;
 8 | use warnings;
 9 | use Encode;
10 | our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
11 | use XSLoader;
12 | XSLoader::load( __PACKAGE__, $VERSION );
13 | 
14 | 1;
15 | __END__
16 | 
17 | =head1 NAME
18 | 
19 | Encode::TW - Taiwan-based Chinese Encodings
20 | 
21 | =head1 SYNOPSIS
22 | 
23 |     use Encode qw/encode decode/; 
24 |     $big5 = encode("big5", $utf8); # loads Encode::TW implicitly
25 |     $utf8 = decode("big5", $big5); # ditto
26 | 
27 | =head1 DESCRIPTION
28 | 
29 | This module implements tradition Chinese charset encodings as used
30 | in Taiwan and Hong Kong.
31 | Encodings supported are as follows.
32 | 
33 |   Canonical   Alias		Description
34 |   --------------------------------------------------------------------
35 |   big5-eten   /\bbig-?5$/i	Big5 encoding (with ETen extensions)
36 |           /\bbig5-?et(en)?$/i
37 |           /\btca-?big5$/i
38 |   big5-hkscs  /\bbig5-?hk(scs)?$/i
39 |               /\bhk(scs)?-?big5$/i
40 |                                 Big5 + Cantonese characters in Hong Kong
41 |   MacChineseTrad		Big5 + Apple Vendor Mappings
42 |   cp950		                Code Page 950 
43 |                                 = Big5 + Microsoft vendor mappings
44 |   --------------------------------------------------------------------
45 | 
46 | To find out how to use this module in detail, see L<Encode>.
47 | 
48 | =head1 NOTES
49 | 
50 | Due to size concerns, C<EUC-TW> (Extended Unix Character), C<CCCII>
51 | (Chinese Character Code for Information Interchange), C<BIG5PLUS>
52 | (CMEX's Big5+) and C<BIG5EXT> (CMEX's Big5e) are distributed separately
53 | on CPAN, under the name L<Encode::HanExtra>. That module also contains
54 | extra China-based encodings.
55 | 
56 | =head1 BUGS
57 | 
58 | Since the original C<big5> encoding (1984) is not supported anywhere
59 | (glibc and DOS-based systems uses C<big5> to mean C<big5-eten>; Microsoft
60 | uses C<big5> to mean C<cp950>), a conscious decision was made to alias
61 | C<big5> to C<big5-eten>, which is the de facto superset of the original
62 | big5.
63 | 
64 | The C<CNS11643> encoding files are not complete. For common C<CNS11643>
65 | manipulation, please use C<EUC-TW> in L<Encode::HanExtra>, which contains
66 | planes 1-7.
67 | 
68 | The ASCII region (0x00-0x7f) is preserved for all encodings, even
69 | though this conflicts with mappings by the Unicode Consortium.
70 | 
71 | =head1 SEE ALSO
72 | 
73 | L<Encode>
74 | 
75 | =cut
76 | 


--------------------------------------------------------------------------------
/Unicode/Makefile.PL:
--------------------------------------------------------------------------------
 1 | use 5.7.2;
 2 | use strict;
 3 | use ExtUtils::MakeMaker;
 4 | 
 5 | WriteMakefile(
 6 |           INC		=> "-I../Encode",
 7 |           NAME		=> 'Encode::Unicode',
 8 |           VERSION_FROM	=> "Unicode.pm",
 9 |           MAN3PODS  => {},
10 |           );
11 | 
12 | 


--------------------------------------------------------------------------------
/bin/encguess:
--------------------------------------------------------------------------------
  1 | #!./perl
  2 | use 5.008001;
  3 | BEGIN { pop @INC if $INC[-1] eq '.' }
  4 | use strict;
  5 | use warnings;
  6 | use Encode;
  7 | use Getopt::Std;
  8 | use Carp;
  9 | use Encode::Guess;
 10 | $Getopt::Std::STANDARD_HELP_VERSION = 1;
 11 | 
 12 | my %opt;
 13 | getopts( "huSs:", \%opt );
 14 | my @suspect_list;
 15 | list_valid_suspects() and exit if $opt{S};
 16 | @suspect_list = split /:,/, $opt{s} if $opt{s};
 17 | HELP_MESSAGE() if $opt{h};
 18 | HELP_MESSAGE() unless @ARGV;
 19 | do_guess($_) for @ARGV;
 20 | 
 21 | sub read_file {
 22 |     my $filename = shift;
 23 |     local $/;
 24 |     open my $fh, '<:raw', $filename or croak "$filename:$!";
 25 |     my $content = <$fh>;
 26 |     close $fh;
 27 |     return $content;
 28 | }
 29 | 
 30 | sub do_guess {
 31 |     my $filename = shift;
 32 |     my $data     = read_file($filename);
 33 |     my $enc      = guess_encoding( $data, @suspect_list );
 34 |     if ( !ref($enc) && $opt{u} ) {
 35 |         return 1;
 36 |     }
 37 |     print "$filename\t";
 38 |     if ( ref($enc) ) {
 39 |         print $enc->mime_name();
 40 |     }
 41 |     else {
 42 |         print "unknown";
 43 |     }
 44 |     print "\n";
 45 |     return 1;
 46 | }
 47 | 
 48 | sub list_valid_suspects {
 49 |     print join( "\n", Encode->encodings(":all") );
 50 |     print "\n";
 51 |     return 1;
 52 | }
 53 | 
 54 | sub HELP_MESSAGE {
 55 |     exec 'pod2usage', $0 or die "pod2usage: $!" 
 56 | }
 57 | __END__
 58 | =head1 NAME
 59 | 
 60 | encguess - guess character encodings of files
 61 | 
 62 | =head1 VERSION
 63 | 
 64 | $Id: encguess,v 0.4 2023/11/10 01:10:50 dankogai Exp $
 65 | 
 66 | =head1 SYNOPSIS
 67 | 
 68 |   encguess [switches] filename...
 69 | 
 70 | =head2 SWITCHES
 71 | 
 72 | =over 2
 73 | 
 74 | =item -h
 75 | 
 76 | show this message and exit.
 77 | 
 78 | =item -s
 79 | 
 80 | specify a list of "suspect encoding types" to test, 
 81 | separated by either C<:> or C<,>
 82 | 
 83 | =item -S
 84 | 
 85 | output a list of all acceptable encoding types that can be used with
 86 | the -s param
 87 | 
 88 | =item -u
 89 | 
 90 | suppress display of unidentified types
 91 | 
 92 | =back
 93 | 
 94 | =head2 EXAMPLES:
 95 | 
 96 | =over 2
 97 | 
 98 | =item *
 99 | 
100 | Guess encoding of a file named C<test.txt>, using only the default
101 | suspect types.
102 | 
103 |    encguess test.txt
104 | 
105 | =item *
106 | 
107 | Guess the encoding type of a file named C<test.txt>, using the suspect
108 | types C<euc-jp,shiftjis,7bit-jis>.
109 | 
110 |    encguess -s euc-jp,shiftjis,7bit-jis test.txt
111 |    encguess -s euc-jp:shiftjis:7bit-jis test.txt
112 | 
113 | =item *
114 | 
115 | Guess the encoding type of several files, do not display results for
116 | unidentified files.
117 | 
118 |    encguess -us euc-jp,shiftjis,7bit-jis test*.txt
119 | 
120 | =back
121 | 
122 | =head1 DESCRIPTION
123 | 
124 | The encoding identification is done by checking one encoding type at a
125 | time until all but the right type are eliminated. The set of encoding
126 | types to try is defined by the -s parameter and defaults to ascii,
127 | utf8 and UTF-16/32 with BOM. This can be overridden by passing one or
128 | more encoding types via the -s parameter. If you need to pass in
129 | multiple suspect encoding types, use a quoted string with the a space
130 | separating each value.
131 | 
132 | =head1 SEE ALSO
133 | 
134 | L<Encode::Guess>, L<Encode::Detect>
135 | 
136 | =head1 LICENSE AND COPYRIGHT
137 | 
138 | Copyright 2015 Michael LaGrasta and Dan Kogai.
139 | 
140 | This program is free software; you can redistribute it and/or modify it
141 | under the terms of the Artistic License (2.0). You may obtain a
142 | copy of the full license at:
143 | 
144 | L<http://www.perlfoundation.org/artistic_license_2_0>
145 | 
146 | =cut
147 | 


--------------------------------------------------------------------------------
/bin/ucm2table:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # $Id: ucm2table,v 2.1 2006/05/03 18:24:10 dankogai Exp $
 3 | #
 4 | 
 5 | use 5.006;
 6 | use strict;
 7 | use Getopt::Std;
 8 | my %Opt;
 9 | getopts("aeu", \%Opt);
10 | my %Chartab;
11 | 
12 | my $Hex = '[0-9A-Fa-f]';
13 | while(<>){
14 |     chomp;
15 |     my ($uni, $enc, $fb) = 
16 |         /^<U($Hex+)>\s+(\S+)\s+\|(\d)/o or next;
17 |     $fb eq '0' or next;
18 |     my @byte = ();
19 |     my $ord = 0;
20 |     while($enc =~ /\G\\x($Hex+)/iog){
21 |         my $byte = hex($1);
22 |         push @byte, $byte;
23 |         $ord <<= 8; $ord += $byte;
24 |     };
25 |     # print join('', @byte), " => $ord \n";
26 |     if ($Opt{u}){
27 |         $Chartab{$ord} = pack("U", hex($uni));
28 |     }else{
29 |         $Chartab{$ord} = pack("C*", @byte);
30 |     }
31 | }
32 | 
33 | my $start = $Opt{a} ? 0x20 : 0xa0;
34 | 
35 | for (my $x = $start; $x <= 0xffff; $x += 32) {
36 |     my $line =  '';
37 |     for my $i (0..31){
38 |     my $num = $x+$i; $num eq 0x7f and next; # skip delete
39 |     my $char = $Chartab{$num};
40 |     $line .= !$char ? " " : 
41 |         ($num < 0x7f ) ? " $char" : $char ;
42 |     }
43 |     $line =~ /^\s+$/o and next;
44 |     printf "0x%04x: $line\n", $x;
45 | }
46 | 


--------------------------------------------------------------------------------
/bin/ucmlint:
--------------------------------------------------------------------------------
  1 | #!/usr/local/bin/perl
  2 | #
  3 | # $Id: ucmlint,v 2.4 2017/06/10 17:23:50 dankogai Exp $
  4 | #
  5 | 
  6 | BEGIN { pop @INC if $INC[-1] eq '.' }
  7 | use strict;
  8 | our  $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  9 | 
 10 | use Getopt::Std;
 11 | our %Opt;
 12 | getopts("Dehfv", \%Opt);
 13 | 
 14 | if ($Opt{e}){
 15 |    eval { require Encode } or die "can't load Encode : $@";
 16 | }
 17 | 
 18 | $Opt{h} and help();
 19 | @ARGV or help();
 20 | 
 21 | sub help{
 22 |     print <<"";
 23 | $0 -[Dehfv] [ucm files ...]
 24 |   -D debug mode on
 25 |   -e test with Encode module also (requires perl 5.7.3 or higher)
 26 |   -h shows this message
 27 |   -f forces roundtrip check even for |[123]
 28 |   -v verbose mode
 29 | 
 30 | }
 31 | 
 32 | $| = 1;
 33 | my (%Hdr, %U2E, %E2U, %Fallback);
 34 | my $in_charmap = 0;
 35 | my $nerror = 0;
 36 | my $nwarning = 0;
 37 | 
 38 | sub nit($;$){
 39 |     my ($msg, $level) = @_;
 40 |     my $lstr;
 41 |     if ($level == 2){
 42 |         $lstr = 'notice';
 43 |     }elsif ($level == 1){
 44 |         $lstr = 'warning'; $nwarning++;
 45 |     }else{
 46 |         $lstr = 'error'; $nerror++;
 47 |     }
 48 |     print "$ARGV:$lstr in line $.: $msg\n";
 49 | }
 50 | 
 51 | for $ARGV (@ARGV){
 52 |     open UCM, $ARGV or die "$ARGV:$!";
 53 |     %Hdr = %U2E = %E2U = %Fallback = ();
 54 |     $in_charmap = $nerror = $nwarning = 0;
 55 |     $. = 0;
 56 |     while(<UCM>){
 57 |         chomp;
 58 |         s/\s*#.*$//o; /^$/ and next;
 59 |         if ($_ eq "CHARMAP"){ 
 60 |             $in_charmap = 1;
 61 |             for my $must (qw/code_set_name mb_cur_min mb_cur_max/){
 62 |                 exists $Hdr{$must} or nit "<$must> nonexistent";
 63 |             }
 64 |             $Hdr{mb_cur_min} > $Hdr{mb_cur_max}
 65 |                 and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)",
 66 |                                 $Hdr{mb_cur_min},$Hdr{mb_cur_max});
 67 |             $in_charmap = 1;
 68 |             next;
 69 |         }
 70 |         unless ($in_charmap){
 71 |             my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next;
 72 |             $Opt{D} and warn "$hkey => $hvalue";
 73 |             if ($hkey eq "code_set_name"){ # name check
 74 |                 exists $Hdr{code_set_name} 
 75 |                     and nit "Duplicate <code_set_name>: $hkey";
 76 |             }
 77 |             if ($hkey eq "code_set_alias"){ # alias check
 78 |                 $hvalue eq $Hdr{code_set_name}
 79 |                     and nit qq(alias "$hvalue" is already in <code_set_name>);
 80 |             }
 81 |             $Hdr{$hkey} = $hvalue;
 82 |         }else{
 83 |             my $name = $Hdr{code_set_name};
 84 |             my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next;
 85 |             $Opt{v} and nit $_, 2;
 86 |             my $uni = uniparse($unistr);
 87 |             my $enc = encparse($encstr);
 88 |             $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb";
 89 |             $fb = $1; 
 90 |             $Opt{f} and $fb = 0;
 91 |             unless ($fb == 3){ # check uni -> enc
 92 |                 if (exists $U2E{$uni}){
 93 |                     nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1;
 94 |                 }else{
 95 |                     $U2E{$uni} = $enc;
 96 |                     $Fallback{$uni}{$enc} = 1 if $fb == 1;
 97 |                     if ($Opt{e}) {
 98 |                         my $e = hex2enc($enc);
 99 |                         my $u = hex2uni($uni);
100 |                         my $eu = Encode::encode($name, $u);
101 |                         $e eq $eu
102 |                             or nit qq(encode('$name', $uni) != $enc);
103 |                     }
104 |                 }
105 |             }
106 |             unless ($fb == 1){  # check enc -> uni
107 |                 if (exists $E2U{$enc}){
108 |                     nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1;
109 |                 }else{
110 |                     $E2U{$enc} = $uni;
111 |                     $Fallback{$enc}{$uni} = 1 if $fb == 3;
112 |                     if ($Opt{e}) {
113 |                         my $e = hex2enc($enc);
114 |                         my $u = hex2uni($uni);
115 |                         $Opt{D} and warn "$uni, $enc";
116 |                         my $de = Encode::decode($name, $e);
117 |                         $de eq $u
118 |                             or nit qq(decode('$name', $enc) != $uni);
119 |                     }
120 |                 }
121 |             }
122 |             # warn "$uni, $enc, $fb";
123 |         }
124 |     }
125 |     $in_charmap or nit "Where is CHARMAP?";
126 |     checkRT();
127 |     printf ("$ARGV: %s error%s found\n", 
128 |             ($nerror == 0 ? 'no' : $nerror),
129 |             ($nerror > 1 ? 's' : ''));
130 | }
131 | 
132 | exit;
133 | 
134 | sub hex2enc{
135 |     pack("C*", map {hex($_)} split(",", shift));
136 | }
137 | sub hex2uni{
138 |     join("", map { chr(hex($_)) } split(",", shift));
139 | }
140 | 
141 | sub checkRT{
142 |     for my $uni (keys %E2U){
143 |         my $enc = $U2E{$uni} or next; # okay
144 |         $E2U{$U2E{$uni}} eq $uni or $Fallback{$uni}{$enc} or
145 |             nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}";
146 |     }
147 |     for my $enc (keys %E2U){
148 |         my $uni = $E2U{$enc} or next; # okay
149 |         $U2E{$E2U{$enc}} eq $enc or $Fallback{$enc}{$uni} or
150 |             nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}";
151 |     }
152 | }
153 | 
154 | 
155 | sub uniparse{
156 |     my $str = shift;
157 |     my @u;
158 |     push @u, $1 while($str =~ /\G<U(.*?)>/ig);
159 |     for my $u (@u){
160 |         $u =~ /^([0-9A-Za-z]+)$/o
161 |             or nit "malformed Unicode character: $u";
162 |     }
163 |     return join(',', @u);
164 | }
165 | 
166 | sub encparse{
167 |     my $str = shift;
168 |     my @e;
169 |     for my $e (split /\\x/io, $str){
170 |         $e or next; # first \x
171 |         $e =~ /^([0-9A-Za-z]{1,2})$/io
172 |             or nit "Hex $e in $str is bogus";
173 |         push @e, $1;
174 |     }
175 |     return join(',', @e);
176 | }
177 | 
178 | 
179 | 
180 | __END__
181 | 
182 | A UCM file looks like this.
183 | 
184 |   #
185 |   # Comments
186 |   #
187 |   <code_set_name> "US-ascii" # Required
188 |   <code_set_alias> "ascii"   # Optional
189 |   <mb_cur_min> 1             # Required; usually 1
190 |   <mb_cur_max> 1             # Max. # of bytes/char
191 |   <subchar> \x3F             # Substitution char
192 |   #
193 |   CHARMAP
194 |   <U0000> \x00 |0 # <control>
195 |   <U0001> \x01 |0 # <control>
196 |   <U0002> \x02 |0 # <control>
197 |   ....
198 |   <U007C> \x7C |0 # VERTICAL LINE
199 |   <U007D> \x7D |0 # RIGHT CURLY BRACKET
200 |   <U007E> \x7E |0 # TILDE
201 |   <U007F> \x7F |0 # <control>
202 |   END CHARMAP
203 | 
204 | 


--------------------------------------------------------------------------------
/bin/ucmsort:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/perl
 2 | #
 3 | # $Id: ucmsort,v 2.2 2006/05/03 18:24:10 dankogai Exp $
 4 | #
 5 | use strict;
 6 | my @lines;
 7 | my ($head, $tail);
 8 | while (<>){
 9 |     unless (m/^<U/o){
10 |         unless(@lines){
11 |         $head .= $_;
12 |     }else{ 
13 |         $tail .= $_;
14 |     }
15 |     next;
16 |     }
17 |     chomp;
18 |     my @words = split;
19 |     my $u = shift @words;
20 |     $u =~ s/^<U//o; $u =~ s/>.*//o;
21 |     push @lines,[ $u, @words ];
22 | }
23 | 
24 | print $head;
25 | for (sort {
26 |     hex($a->[0]) <=> hex($b->[0]) # Unicode descending order
27 |     or $a->[2] cmp $b->[2] # fallback descending order
28 |     or $a->[1] cmp $b->[1] # Encoding descending order
29 |     }
30 |      @lines) {
31 |     my $u = shift @$_;
32 |     print join(" " => "<U$u>", @$_), "\n";
33 | }
34 | print $tail;
35 | __END__
36 | 


--------------------------------------------------------------------------------
/encengine.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Data structures for encoding transformations.
  3 | 
  4 | Perl works internally in either a native 'byte' encoding or
  5 | in UTF-8 encoded Unicode.  We have no immediate need for a "wchar_t"
  6 | representation. When we do we can use utf8_to_uv().
  7 | 
  8 | Most character encodings are either simple byte mappings or
  9 | variable length multi-byte encodings. UTF-8 can be viewed as a
 10 | rather extreme case of the latter.
 11 | 
 12 | So to solve an important part of perl's encode needs we need to solve the
 13 | "multi-byte -> multi-byte" case. The simple byte forms are then just degenerate
 14 | case. (Where one of multi-bytes will usually be UTF-8.)
 15 | 
 16 | The other type of encoding is a shift encoding where a prefix sequence
 17 | determines what subsequent bytes mean. Such encodings have state.
 18 | 
 19 | We also need to handle case where a character in one encoding has to be
 20 | represented as multiple characters in the other. e.g. letter+diacritic.
 21 | 
 22 | The process can be considered as pseudo perl:
 23 | 
 24 | my $dst = '';
 25 | while (length($src))
 26 |  {
 27 |   my $size    = src_count($src);
 28 |   my $in_seq  = substr($src,0,$size,'');
 29 |   my $out_seq = $s2d_hash{$in_seq};
 30 |   if (defined $out_seq)
 31 |    {
 32 |     $dst .= $out_seq;
 33 |    }
 34 |   else
 35 |    {
 36 |     # an error condition
 37 |    }
 38 |  }
 39 | return $dst;
 40 | 
 41 | That has the following components:
 42 |  &src_count - a "rule" for how many bytes make up the next character in the
 43 |               source.
 44 |  %s2d_hash  - a mapping from input sequences to output sequences
 45 | 
 46 | The problem with that scheme is that it does not allow the output
 47 | character repertoire to affect the characters considered from the
 48 | input.
 49 | 
 50 | So we use a "trie" representation which can also be considered
 51 | a state machine:
 52 | 
 53 | my $dst   = '';
 54 | my $seq   = \@s2d_seq;
 55 | my $next  = \@s2d_next;
 56 | while (length($src))
 57 |  {
 58 |   my $byte    = $substr($src,0,1,'');
 59 |   my $out_seq = $seq->[$byte];
 60 |   if (defined $out_seq)
 61 |    {
 62 |     $dst .= $out_seq;
 63 |    }
 64 |   else
 65 |    {
 66 |     # an error condition
 67 |    }
 68 |   ($next,$seq) = @$next->[$byte] if $next;
 69 |  }
 70 | return $dst;
 71 | 
 72 | There is now a pair of data structures to represent everything.
 73 | It is valid for output sequence at a particular point to
 74 | be defined but zero length, that just means "don't know yet".
 75 | For the single byte case there is no 'next' so new tables will be the same as
 76 | the original tables. For a multi-byte case a prefix byte will flip to the tables
 77 | for  the next page (adding nothing to the output), then the tables for the page
 78 | will provide the actual output and set tables back to original base page.
 79 | 
 80 | This scheme can also handle shift encodings.
 81 | 
 82 | A slight enhancement to the scheme also allows for look-ahead - if
 83 | we add a flag to re-add the removed byte to the source we could handle
 84 |   a" -> U+00E4 (LATIN SMALL LETTER A WITH DIAERESIS)
 85 |   ab -> a (and take b back please)
 86 | 
 87 | */
 88 | 
 89 | #define PERL_NO_GET_CONTEXT
 90 | #include <EXTERN.h>
 91 | #include <perl.h>
 92 | #include "encode.h"
 93 | 
 94 | int
 95 | do_encode(const encpage_t * enc, const U8 * src, STRLEN * slen, U8 * dst,
 96 |       STRLEN dlen, STRLEN * dout, int approx, const U8 *term, STRLEN tlen)
 97 | {
 98 |     const U8 *s = src;
 99 |     const U8 *send = s + *slen;
100 |     const U8 *last = s;
101 |     U8 *d = dst;
102 |     U8 *dend = d + dlen, *dlast = d;
103 |     int code = 0;
104 |     if (!dst)
105 |       return ENCODE_NOSPACE;
106 |     while (s < send) {
107 |         const encpage_t *e = enc;
108 |         U8 byte = *s;
109 |         while (byte > e->max)
110 |             e++;
111 |         if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) {
112 |             const U8 *cend = s + (e->slen & 0x7f);
113 |             if (cend <= send) {
114 |                 STRLEN n;
115 |                 if ((n = e->dlen)) {
116 |                     const U8 *out = e->seq + n * (byte - e->min);
117 |                     U8 *oend = d + n;
118 |                     if (dst) {
119 |                         if (oend <= dend) {
120 |                             while (d < oend)
121 |                                 *d++ = *out++;
122 |                         }
123 |                         else {
124 |                             /* Out of space */
125 |                             code = ENCODE_NOSPACE;
126 |                             break;
127 |                         }
128 |                     }
129 |                     else
130 |                         d = oend;
131 |                 }
132 |                 enc = e->next;
133 |                 s++;
134 |                 if (s == cend) {
135 |                     if (approx && (e->slen & 0x80))
136 |                         code = ENCODE_FALLBACK;
137 |                     last = s;
138 |                     if (term && (STRLEN)(d-dlast) == tlen && memEQ(dlast, term, tlen)) {
139 |                         code = ENCODE_FOUND_TERM;
140 |                         break;
141 |                     }
142 |                     dlast = d;
143 |                 }
144 |             }
145 |             else {
146 |                 /* partial source character */
147 |                 code = ENCODE_PARTIAL;
148 |                 break;
149 |             }
150 |         }
151 |         else {
152 |             /* Cannot represent */
153 |             code = ENCODE_NOREP;
154 |             break;
155 |         }
156 |     }
157 |     *slen = last - src;
158 |     *dout = d - dst;
159 |     return code;
160 | }
161 | 


--------------------------------------------------------------------------------
/lib/Encode/CJKConstants.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/lib/Encode/CJKConstants.pm


--------------------------------------------------------------------------------
/lib/Encode/Config.pm:
--------------------------------------------------------------------------------
  1 | #
  2 | # Demand-load module list
  3 | #
  4 | package Encode::Config;
  5 | our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
  6 | 
  7 | use strict;
  8 | use warnings;
  9 | 
 10 | our %ExtModule = (
 11 | 
 12 |     # Encode::Byte
 13 |     #iso-8859-1 is in Encode.pm itself
 14 |     'iso-8859-2'            => 'Encode::Byte',
 15 |     'iso-8859-3'            => 'Encode::Byte',
 16 |     'iso-8859-4'            => 'Encode::Byte',
 17 |     'iso-8859-5'            => 'Encode::Byte',
 18 |     'iso-8859-6'            => 'Encode::Byte',
 19 |     'iso-8859-7'            => 'Encode::Byte',
 20 |     'iso-8859-8'            => 'Encode::Byte',
 21 |     'iso-8859-9'            => 'Encode::Byte',
 22 |     'iso-8859-10'           => 'Encode::Byte',
 23 |     'iso-8859-11'           => 'Encode::Byte',
 24 |     'iso-8859-13'           => 'Encode::Byte',
 25 |     'iso-8859-14'           => 'Encode::Byte',
 26 |     'iso-8859-15'           => 'Encode::Byte',
 27 |     'iso-8859-16'           => 'Encode::Byte',
 28 |     'koi8-f'                => 'Encode::Byte',
 29 |     'koi8-r'                => 'Encode::Byte',
 30 |     'koi8-u'                => 'Encode::Byte',
 31 |     'viscii'                => 'Encode::Byte',
 32 |     'cp424'                 => 'Encode::Byte',
 33 |     'cp437'                 => 'Encode::Byte',
 34 |     'cp737'                 => 'Encode::Byte',
 35 |     'cp775'                 => 'Encode::Byte',
 36 |     'cp850'                 => 'Encode::Byte',
 37 |     'cp852'                 => 'Encode::Byte',
 38 |     'cp855'                 => 'Encode::Byte',
 39 |     'cp856'                 => 'Encode::Byte',
 40 |     'cp857'                 => 'Encode::Byte',
 41 |     'cp858'                 => 'Encode::Byte',
 42 |     'cp860'                 => 'Encode::Byte',
 43 |     'cp861'                 => 'Encode::Byte',
 44 |     'cp862'                 => 'Encode::Byte',
 45 |     'cp863'                 => 'Encode::Byte',
 46 |     'cp864'                 => 'Encode::Byte',
 47 |     'cp865'                 => 'Encode::Byte',
 48 |     'cp866'                 => 'Encode::Byte',
 49 |     'cp869'                 => 'Encode::Byte',
 50 |     'cp874'                 => 'Encode::Byte',
 51 |     'cp1006'                => 'Encode::Byte',
 52 |     'cp1250'                => 'Encode::Byte',
 53 |     'cp1251'                => 'Encode::Byte',
 54 |     'cp1252'                => 'Encode::Byte',
 55 |     'cp1253'                => 'Encode::Byte',
 56 |     'cp1254'                => 'Encode::Byte',
 57 |     'cp1255'                => 'Encode::Byte',
 58 |     'cp1256'                => 'Encode::Byte',
 59 |     'cp1257'                => 'Encode::Byte',
 60 |     'cp1258'                => 'Encode::Byte',
 61 |     'AdobeStandardEncoding' => 'Encode::Byte',
 62 |     'MacArabic'             => 'Encode::Byte',
 63 |     'MacCentralEurRoman'    => 'Encode::Byte',
 64 |     'MacCroatian'           => 'Encode::Byte',
 65 |     'MacCyrillic'           => 'Encode::Byte',
 66 |     'MacFarsi'              => 'Encode::Byte',
 67 |     'MacGreek'              => 'Encode::Byte',
 68 |     'MacHebrew'             => 'Encode::Byte',
 69 |     'MacIcelandic'          => 'Encode::Byte',
 70 |     'MacRoman'              => 'Encode::Byte',
 71 |     'MacRomanian'           => 'Encode::Byte',
 72 |     'MacRumanian'           => 'Encode::Byte',
 73 |     'MacSami'               => 'Encode::Byte',
 74 |     'MacThai'               => 'Encode::Byte',
 75 |     'MacTurkish'            => 'Encode::Byte',
 76 |     'MacUkrainian'          => 'Encode::Byte',
 77 |     'nextstep'              => 'Encode::Byte',
 78 |     'hp-roman8'             => 'Encode::Byte',
 79 |     #'gsm0338'               => 'Encode::Byte',
 80 |     'gsm0338'               => 'Encode::GSM0338',
 81 | 
 82 |     # Encode::EBCDIC
 83 |     'cp37'     => 'Encode::EBCDIC',
 84 |     'cp500'    => 'Encode::EBCDIC',
 85 |     'cp875'    => 'Encode::EBCDIC',
 86 |     'cp1026'   => 'Encode::EBCDIC',
 87 |     'cp1047'   => 'Encode::EBCDIC',
 88 |     'posix-bc' => 'Encode::EBCDIC',
 89 | 
 90 |     # Encode::Symbol
 91 |     'dingbats'      => 'Encode::Symbol',
 92 |     'symbol'        => 'Encode::Symbol',
 93 |     'AdobeSymbol'   => 'Encode::Symbol',
 94 |     'AdobeZdingbat' => 'Encode::Symbol',
 95 |     'MacDingbats'   => 'Encode::Symbol',
 96 |     'MacSymbol'     => 'Encode::Symbol',
 97 | 
 98 |     # Encode::Unicode
 99 |     'UCS-2BE'  => 'Encode::Unicode',
100 |     'UCS-2LE'  => 'Encode::Unicode',
101 |     'UTF-16'   => 'Encode::Unicode',
102 |     'UTF-16BE' => 'Encode::Unicode',
103 |     'UTF-16LE' => 'Encode::Unicode',
104 |     'UTF-32'   => 'Encode::Unicode',
105 |     'UTF-32BE' => 'Encode::Unicode',
106 |     'UTF-32LE' => 'Encode::Unicode',
107 |     'UTF-7'    => 'Encode::Unicode::UTF7',
108 | );
109 | 
110 | unless ( ord("A") == 193 ) {
111 |     %ExtModule = (
112 |         %ExtModule,
113 |         'euc-cn'         => 'Encode::CN',
114 |         'gb12345-raw'    => 'Encode::CN',
115 |         'gb2312-raw'     => 'Encode::CN',
116 |         'hz'             => 'Encode::CN',
117 |         'iso-ir-165'     => 'Encode::CN',
118 |         'cp936'          => 'Encode::CN',
119 |         'MacChineseSimp' => 'Encode::CN',
120 | 
121 |         '7bit-jis'      => 'Encode::JP',
122 |         'euc-jp'        => 'Encode::JP',
123 |         'iso-2022-jp'   => 'Encode::JP',
124 |         'iso-2022-jp-1' => 'Encode::JP',
125 |         'jis0201-raw'   => 'Encode::JP',
126 |         'jis0208-raw'   => 'Encode::JP',
127 |         'jis0212-raw'   => 'Encode::JP',
128 |         'cp932'         => 'Encode::JP',
129 |         'MacJapanese'   => 'Encode::JP',
130 |         'shiftjis'      => 'Encode::JP',
131 | 
132 |         'euc-kr'      => 'Encode::KR',
133 |         'iso-2022-kr' => 'Encode::KR',
134 |         'johab'       => 'Encode::KR',
135 |         'ksc5601-raw' => 'Encode::KR',
136 |         'cp949'       => 'Encode::KR',
137 |         'MacKorean'   => 'Encode::KR',
138 | 
139 |         'big5-eten'      => 'Encode::TW',
140 |         'big5-hkscs'     => 'Encode::TW',
141 |         'cp950'          => 'Encode::TW',
142 |         'MacChineseTrad' => 'Encode::TW',
143 | 
144 |         #'big5plus'           => 'Encode::HanExtra',
145 |         #'euc-tw'             => 'Encode::HanExtra',
146 |         #'gb18030'            => 'Encode::HanExtra',
147 | 
148 |         'MIME-Header' => 'Encode::MIME::Header',
149 |         'MIME-B'      => 'Encode::MIME::Header',
150 |         'MIME-Q'      => 'Encode::MIME::Header',
151 | 
152 |         'MIME-Header-ISO_2022_JP' => 'Encode::MIME::Header::ISO_2022_JP',
153 |     );
154 | }
155 | 
156 | #
157 | # Why not export ? to keep ConfigLocal Happy!
158 | #
159 | while ( my ( $enc, $mod ) = each %ExtModule ) {
160 |     $Encode::ExtModule{$enc} = $mod;
161 | }
162 | 
163 | 1;
164 | __END__
165 | 
166 | =head1 NAME
167 | 
168 | Encode::Config -- internally used by Encode
169 | 
170 | =cut
171 | 


--------------------------------------------------------------------------------
/lib/Encode/JP/H2Z.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/lib/Encode/JP/H2Z.pm


--------------------------------------------------------------------------------
/lib/Encode/JP/JIS7.pm:
--------------------------------------------------------------------------------
  1 | package Encode::JP::JIS7;
  2 | use strict;
  3 | use warnings;
  4 | our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
  5 | 
  6 | use Encode qw(:fallbacks);
  7 | 
  8 | for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) {
  9 |     my $h2z     = ( $name eq '7bit-jis' )    ? 0 : 1;
 10 |     my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1;
 11 | 
 12 |     my $obj = bless {
 13 |         Name    => $name,
 14 |         h2z     => $h2z,
 15 |         jis0212 => $jis0212,
 16 |     } => __PACKAGE__;
 17 |     Encode::define_encoding($obj, $name);
 18 | }
 19 | 
 20 | use parent qw(Encode::Encoding);
 21 | 
 22 | # we override this to 1 so PerlIO works
 23 | sub needs_lines { 1 }
 24 | 
 25 | use Encode::CJKConstants qw(:all);
 26 | 
 27 | #
 28 | # decode is identical for all 2022 variants
 29 | #
 30 | 
 31 | sub decode($$;$) {
 32 |     my ( $obj, $str, $chk ) = @_;
 33 |     return undef unless defined $str;
 34 |     my $residue = '';
 35 |     if ($chk) {
 36 |         $str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
 37 |     }
 38 |     $residue .= jis_euc( \$str );
 39 |     $_[1] = $residue if $chk;
 40 |     return Encode::decode( 'euc-jp', $str, FB_PERLQQ );
 41 | }
 42 | 
 43 | #
 44 | # encode is different
 45 | #
 46 | 
 47 | sub encode($$;$) {
 48 |     require Encode::JP::H2Z;
 49 |     my ( $obj, $utf8, $chk ) = @_;
 50 |     return undef unless defined $utf8;
 51 | 
 52 |     # empty the input string in the stack so perlio is ok
 53 |     $_[1] = '' if $chk;
 54 |     my ( $h2z, $jis0212 ) = @$obj{qw(h2z jis0212)};
 55 |     my $octet = Encode::encode( 'euc-jp', $utf8, $chk || 0 );
 56 |     $h2z and &Encode::JP::H2Z::h2z( \$octet );
 57 |     euc_jis( \$octet, $jis0212 );
 58 |     return $octet;
 59 | }
 60 | 
 61 | #
 62 | # cat_decode
 63 | #
 64 | my $re_scan_jis_g = qr{
 65 |    \G ( ($RE{JIS_0212}) |  $RE{JIS_0208}  |
 66 |         ($RE{ISO_ASC})  | ($RE{JIS_KANA}) | )
 67 |       ([^\e]*)
 68 | }x;
 69 | 
 70 | sub cat_decode {    # ($obj, $dst, $src, $pos, $trm, $chk)
 71 |     my ( $obj, undef, undef, $pos, $trm ) = @_;    # currently ignores $chk
 72 |     my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
 73 |     local ${^ENCODING};
 74 |     use bytes;
 75 |     my $opos = pos($$rsrc);
 76 |     pos($$rsrc) = $pos;
 77 |     while ( $$rsrc =~ /$re_scan_jis_g/gc ) {
 78 |         my ( $esc, $esc_0212, $esc_asc, $esc_kana, $chunk ) =
 79 |           ( $1, $2, $3, $4, $5 );
 80 | 
 81 |         unless ($chunk) { $esc or last; next; }
 82 | 
 83 |         if ( $esc && !$esc_asc ) {
 84 |             $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
 85 |             if ($esc_kana) {
 86 |                 $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
 87 |             }
 88 |             elsif ($esc_0212) {
 89 |                 $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
 90 |             }
 91 |             $chunk = Encode::decode( 'euc-jp', $chunk, 0 );
 92 |         }
 93 |         elsif ( ( my $npos = index( $chunk, $trm ) ) >= 0 ) {
 94 |             $$rdst .= substr( $chunk, 0, $npos + length($trm) );
 95 |             $$rpos += length($esc) + $npos + length($trm);
 96 |             pos($$rsrc) = $opos;
 97 |             return 1;
 98 |         }
 99 |         $$rdst .= $chunk;
100 |         $$rpos = pos($$rsrc);
101 |     }
102 |     $$rpos = pos($$rsrc);
103 |     pos($$rsrc) = $opos;
104 |     return '';
105 | }
106 | 
107 | # JIS<->EUC
108 | my $re_scan_jis = qr{
109 |    (?:($RE{JIS_0212})|$RE{JIS_0208}|($RE{ISO_ASC})|($RE{JIS_KANA}))([^\e]*)
110 | }x;
111 | 
112 | sub jis_euc {
113 |     local ${^ENCODING};
114 |     my $r_str = shift;
115 |     $$r_str =~ s($re_scan_jis)
116 |     {
117 |     my ($esc_0212, $esc_asc, $esc_kana, $chunk) =
118 |        ($1, $2, $3, $4);
119 |     if (!$esc_asc) {
120 |         $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
121 |         if ($esc_kana) {
122 |         $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
123 |         }
124 |         elsif ($esc_0212) {
125 |         $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
126 |         }
127 |     }
128 |     $chunk;
129 |     }geox;
130 |     my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
131 |     return $residue;
132 | }
133 | 
134 | sub euc_jis {
135 |     no warnings qw(uninitialized);
136 |     local ${^ENCODING};
137 |     my $r_str   = shift;
138 |     my $jis0212 = shift;
139 |     $$r_str =~ s{
140 |     ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
141 |     }{
142 |         my $chunk = $1;
143 |         my $esc =
144 |         ( $chunk =~ tr/\x8E//d ) ? $ESC{KANA} :
145 |             ( $chunk =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
146 |             $ESC{JIS_0208};
147 |         if ($esc eq $ESC{JIS_0212} && !$jis0212){
148 |         # fallback to '?'
149 |         $chunk =~ tr/\xA1-\xFE/\x3F/;
150 |         }else{
151 |         $chunk =~ tr/\xA1-\xFE/\x21-\x7E/;
152 |         }
153 |         $esc . $chunk . $ESC{ASC};
154 |     }geox;
155 |     $$r_str =~ s/\Q$ESC{ASC}\E
156 |         (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
157 |     $$r_str;
158 | }
159 | 
160 | 1;
161 | __END__
162 | 
163 | 
164 | =head1 NAME
165 | 
166 | Encode::JP::JIS7 -- internally used by Encode::JP
167 | 
168 | =cut
169 | 


--------------------------------------------------------------------------------
/lib/Encode/KR/2022_KR.pm:
--------------------------------------------------------------------------------
 1 | package Encode::KR::2022_KR;
 2 | use strict;
 3 | use warnings;
 4 | our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 5 | 
 6 | use Encode qw(:fallbacks);
 7 | 
 8 | use parent qw(Encode::Encoding);
 9 | __PACKAGE__->Define('iso-2022-kr');
10 | 
11 | sub needs_lines { 1 }
12 | 
13 | sub perlio_ok {
14 |     return 0;    # for the time being
15 | }
16 | 
17 | sub decode {
18 |     my ( $obj, $str, $chk ) = @_;
19 |     return undef unless defined $str;
20 |     my $res     = $str;
21 |     my $residue = iso_euc( \$res );
22 | 
23 |     # This is for PerlIO
24 |     $_[1] = $residue if $chk;
25 |     return Encode::decode( 'euc-kr', $res, FB_PERLQQ );
26 | }
27 | 
28 | sub encode {
29 |     my ( $obj, $utf8, $chk ) = @_;
30 |     return undef unless defined $utf8;
31 | 
32 |     # empty the input string in the stack so perlio is ok
33 |     $_[1] = '' if $chk;
34 |     my $octet = Encode::encode( 'euc-kr', $utf8, FB_PERLQQ );
35 |     euc_iso( \$octet );
36 |     return $octet;
37 | }
38 | 
39 | use Encode::CJKConstants qw(:all);
40 | 
41 | # ISO<->EUC
42 | 
43 | sub iso_euc {
44 |     my $r_str = shift;
45 |     $$r_str =~ s/$RE{'2022_KR'}//gox;    # remove the designator
46 |     $$r_str =~ s{                      # replace characters in GL
47 |      \x0e                              # between SO(\x0e) and SI(\x0f)
48 |      ([^\x0f]*)                        # with characters in GR
49 |      \x0f
50 |         }
51 |     {
52 |                         my $out= $1;
53 |       $out =~ tr/\x21-\x7e/\xa1-\xfe/;
54 |       $out;
55 |     }geox;
56 |     my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
57 |     return $residue;
58 | }
59 | 
60 | sub euc_iso {
61 |     no warnings qw(uninitialized);
62 |     my $r_str = shift;
63 |     substr( $$r_str, 0, 0 ) =
64 |       $ESC{'2022_KR'};    # put the designator at the beg.
65 |     $$r_str =~
66 |       s{                         # move KS X 1001 characters in GR to GL
67 |         ($RE{EUC_C}+)                     # and enclose them with SO and SI
68 |         }{
69 |             my $str = $1;
70 |             $str =~ tr/\xA1-\xFE/\x21-\x7E/;
71 |             "\x0e" . $str . "\x0f";
72 |         }geox;
73 |     $$r_str;
74 | }
75 | 
76 | 1;
77 | __END__
78 | 
79 | =head1 NAME
80 | 
81 | Encode::KR::2022_KR -- internally used by Encode::KR
82 | 
83 | =cut
84 | 


--------------------------------------------------------------------------------
/lib/Encode/MIME/Header/ISO_2022_JP.pm:
--------------------------------------------------------------------------------
  1 | package Encode::MIME::Header::ISO_2022_JP;
  2 | 
  3 | use strict;
  4 | use warnings;
  5 | 
  6 | use parent qw(Encode::MIME::Header);
  7 | 
  8 | my $obj =
  9 |   bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
 10 |   __PACKAGE__;
 11 | Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP');
 12 | 
 13 | use constant HEAD => '=?ISO-2022-JP?B?';
 14 | use constant TAIL => '?=';
 15 | 
 16 | use Encode::CJKConstants qw(%RE);
 17 | 
 18 | our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 19 | 
 20 | # I owe the below codes totally to
 21 | #   Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
 22 | 
 23 | sub encode {
 24 |     my $self = shift;
 25 |     my $str  = shift;
 26 |     return undef unless defined $str;
 27 | 
 28 |     utf8::encode($str) if ( Encode::is_utf8($str) );
 29 |     Encode::from_to( $str, 'utf8', 'euc-jp' );
 30 | 
 31 |     my ($trailing_crlf) = ( $str =~ /(\n|\r|\x0d\x0a)$/o );
 32 | 
 33 |     $str = _mime_unstructured_header( $str, $self->{bpl} );
 34 | 
 35 |     not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o;
 36 | 
 37 |     return $str;
 38 | }
 39 | 
 40 | sub _mime_unstructured_header {
 41 |     my ( $oldheader, $bpl ) = @_;
 42 |     my $crlf = $oldheader =~ /\n$/;
 43 |     my ( $header, @words, @wordstmp, $i ) = ('');
 44 | 
 45 |     $oldheader =~ s/\s+$//;
 46 | 
 47 |     @wordstmp = split /\s+/, $oldheader;
 48 | 
 49 |     for ( $i = 0 ; $i < $#wordstmp ; $i++ ) {
 50 |         if (    $wordstmp[$i] !~ /^[\x21-\x7E]+$/
 51 |             and $wordstmp[ $i + 1 ] !~ /^[\x21-\x7E]+$/ )
 52 |         {
 53 |             $wordstmp[ $i + 1 ] = "$wordstmp[$i] $wordstmp[$i + 1]";
 54 |         }
 55 |         else {
 56 |             push( @words, $wordstmp[$i] );
 57 |         }
 58 |     }
 59 | 
 60 |     push( @words, $wordstmp[-1] );
 61 | 
 62 |     for my $word (@words) {
 63 |         if ( $word =~ /^[\x21-\x7E]+$/ ) {
 64 |             $header =~ /(?:.*\n)*(.*)/;
 65 |             if ( length($1) + length($word) > $bpl ) {
 66 |                 $header .= "\n $word";
 67 |             }
 68 |             else {
 69 |                 $header .= $word;
 70 |             }
 71 |         }
 72 |         else {
 73 |             $header = _add_encoded_word( $word, $header, $bpl );
 74 |         }
 75 | 
 76 |         $header =~ /(?:.*\n)*(.*)/;
 77 | 
 78 |         if ( length($1) == $bpl ) {
 79 |             $header .= "\n ";
 80 |         }
 81 |         else {
 82 |             $header .= ' ';
 83 |         }
 84 |     }
 85 | 
 86 |     $header =~ s/\n? $//mg;
 87 | 
 88 |     $crlf ? "$header\n" : $header;
 89 | }
 90 | 
 91 | sub _add_encoded_word {
 92 |     my ( $str, $line, $bpl ) = @_;
 93 |     my $result = '';
 94 | 
 95 |     while ( length($str) ) {
 96 |         my $target = $str;
 97 |         $str = '';
 98 | 
 99 |         if (
100 |             length($line) + 22 +
101 |             ( $target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o ) * 8 > $bpl )
102 |         {
103 |             $line =~ s/[ \t\n\r]*$/\n/;
104 |             $result .= $line;
105 |             $line = ' ';
106 |         }
107 | 
108 |         while (1) {
109 |             my $iso_2022_jp = $target;
110 |             Encode::from_to( $iso_2022_jp, 'euc-jp', 'iso-2022-jp' );
111 | 
112 |             my $encoded =
113 |               HEAD . MIME::Base64::encode_base64( $iso_2022_jp, '' ) . TAIL;
114 | 
115 |             if ( length($encoded) + length($line) > $bpl ) {
116 |                 $target =~
117 |                   s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o;
118 |                 $str = $1 . $str;
119 |             }
120 |             else {
121 |                 $line .= $encoded;
122 |                 last;
123 |             }
124 |         }
125 | 
126 |     }
127 | 
128 |     $result . $line;
129 | }
130 | 
131 | 1;
132 | __END__
133 | 
134 | 


--------------------------------------------------------------------------------
/lib/Encode/MIME/Name.pm:
--------------------------------------------------------------------------------
  1 | package Encode::MIME::Name;
  2 | use strict;
  3 | use warnings;
  4 | our $VERSION = do { my @r = ( q$Revision: 1.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
  5 | 
  6 | # NOTE: This table must be 1:1 mapping
  7 | our %MIME_NAME_OF = (
  8 |     'AdobeStandardEncoding' => 'Adobe-Standard-Encoding',
  9 |     'AdobeSymbol'           => 'Adobe-Symbol-Encoding',
 10 |     'ascii'                 => 'US-ASCII',
 11 |     'big5-hkscs'            => 'Big5-HKSCS',
 12 |     'cp1026'                => 'IBM1026',
 13 |     'cp1047'                => 'IBM1047',
 14 |     'cp1250'                => 'windows-1250',
 15 |     'cp1251'                => 'windows-1251',
 16 |     'cp1252'                => 'windows-1252',
 17 |     'cp1253'                => 'windows-1253',
 18 |     'cp1254'                => 'windows-1254',
 19 |     'cp1255'                => 'windows-1255',
 20 |     'cp1256'                => 'windows-1256',
 21 |     'cp1257'                => 'windows-1257',
 22 |     'cp1258'                => 'windows-1258',
 23 |     'cp37'                  => 'IBM037',
 24 |     'cp424'                 => 'IBM424',
 25 |     'cp437'                 => 'IBM437',
 26 |     'cp500'                 => 'IBM500',
 27 |     'cp775'                 => 'IBM775',
 28 |     'cp850'                 => 'IBM850',
 29 |     'cp852'                 => 'IBM852',
 30 |     'cp855'                 => 'IBM855',
 31 |     'cp857'                 => 'IBM857',
 32 |     'cp860'                 => 'IBM860',
 33 |     'cp861'                 => 'IBM861',
 34 |     'cp862'                 => 'IBM862',
 35 |     'cp863'                 => 'IBM863',
 36 |     'cp864'                 => 'IBM864',
 37 |     'cp865'                 => 'IBM865',
 38 |     'cp866'                 => 'IBM866',
 39 |     'cp869'                 => 'IBM869',
 40 |     'cp936'                 => 'GBK',
 41 |     'euc-cn'                => 'EUC-CN',
 42 |     'euc-jp'                => 'EUC-JP',
 43 |     'euc-kr'                => 'EUC-KR',
 44 |     #'gb2312-raw'            => 'GB2312', # no, you're wrong, I18N::Charset
 45 |     'hp-roman8'             => 'hp-roman8',
 46 |     'hz'                    => 'HZ-GB-2312',
 47 |     'iso-2022-jp'           => 'ISO-2022-JP',
 48 |     'iso-2022-jp-1'         => 'ISO-2022-JP-1',
 49 |     'iso-2022-kr'           => 'ISO-2022-KR',
 50 |     'iso-8859-1'            => 'ISO-8859-1',
 51 |     'iso-8859-10'           => 'ISO-8859-10',
 52 |     'iso-8859-13'           => 'ISO-8859-13',
 53 |     'iso-8859-14'           => 'ISO-8859-14',
 54 |     'iso-8859-15'           => 'ISO-8859-15',
 55 |     'iso-8859-16'           => 'ISO-8859-16',
 56 |     'iso-8859-2'            => 'ISO-8859-2',
 57 |     'iso-8859-3'            => 'ISO-8859-3',
 58 |     'iso-8859-4'            => 'ISO-8859-4',
 59 |     'iso-8859-5'            => 'ISO-8859-5',
 60 |     'iso-8859-6'            => 'ISO-8859-6',
 61 |     'iso-8859-7'            => 'ISO-8859-7',
 62 |     'iso-8859-8'            => 'ISO-8859-8',
 63 |     'iso-8859-9'            => 'ISO-8859-9',
 64 |     #'jis0201-raw'           => 'JIS_X0201',
 65 |     #'jis0208-raw'           => 'JIS_C6226-1983',
 66 |     #'jis0212-raw'           => 'JIS_X0212-1990',
 67 |     'koi8-r'                => 'KOI8-R',
 68 |     'koi8-u'                => 'KOI8-U',
 69 |     #'ksc5601-raw'           => 'KS_C_5601-1987',
 70 |     'shiftjis'              => 'Shift_JIS',
 71 |     'UTF-16'                => 'UTF-16',
 72 |     'UTF-16BE'              => 'UTF-16BE',
 73 |     'UTF-16LE'              => 'UTF-16LE',
 74 |     'UTF-32'                => 'UTF-32',
 75 |     'UTF-32BE'              => 'UTF-32BE',
 76 |     'UTF-32LE'              => 'UTF-32LE',
 77 |     'UTF-7'                 => 'UTF-7',
 78 |     'utf-8-strict'          => 'UTF-8',
 79 |     'viscii'                => 'VISCII',
 80 | );
 81 | 
 82 | # NOTE: %MIME_NAME_OF is still 1:1 mapping
 83 | our %ENCODE_NAME_OF = map { uc $MIME_NAME_OF{$_} => $_ } keys %MIME_NAME_OF;
 84 | 
 85 | # Add additional 1:N mapping
 86 | $MIME_NAME_OF{'utf8'} = 'UTF-8';
 87 | 
 88 | sub get_mime_name($) { $MIME_NAME_OF{$_[0]} };
 89 | 
 90 | sub get_encode_name($) { $ENCODE_NAME_OF{uc $_[0]} };
 91 | 
 92 | 1;
 93 | __END__
 94 | 
 95 | =head1 NAME
 96 | 
 97 | Encode::MIME::NAME -- internally used by Encode
 98 | 
 99 | =head1 SEE ALSO
100 | 
101 | L<I18N::Charset>
102 | 
103 | =cut
104 | 


--------------------------------------------------------------------------------
/lib/Encode/PerlIO.pod:
--------------------------------------------------------------------------------
  1 | =head1 NAME
  2 | 
  3 | Encode::PerlIO -- a detailed document on Encode and PerlIO
  4 | 
  5 | =head1 Overview
  6 | 
  7 | It is very common to want to do encoding transformations when
  8 | reading or writing files, network connections, pipes etc.
  9 | If Perl is configured to use the new 'perlio' IO system then
 10 | C<Encode> provides a "layer" (see L<PerlIO>) which can transform
 11 | data as it is read or written.
 12 | 
 13 | Here is how the blind poet would modernise the encoding:
 14 | 
 15 |     use Encode;
 16 |     open(my $iliad,'<:encoding(iso-8859-7)','iliad.greek');
 17 |     open(my $utf8,'>:utf8','iliad.utf8');
 18 |     my @epic = <$iliad>;
 19 |     print $utf8 @epic;
 20 |     close($utf8);
 21 |     close($illiad);
 22 | 
 23 | In addition, the new IO system can also be configured to read/write
 24 | UTF-8 encoded characters (as noted above, this is efficient):
 25 | 
 26 |     open(my $fh,'>:utf8','anything');
 27 |     print $fh "Any \x{0021} string \N{SMILEY FACE}\n";
 28 | 
 29 | Either of the above forms of "layer" specifications can be made the default
 30 | for a lexical scope with the C<use open ...> pragma. See L<open>.
 31 | 
 32 | Once a handle is open, its layers can be altered using C<binmode>.
 33 | 
 34 | Without any such configuration, or if Perl itself is built using the
 35 | system's own IO, then write operations assume that the file handle
 36 | accepts only I<bytes> and will C<die> if a character larger than 255 is
 37 | written to the handle. When reading, each octet from the handle becomes
 38 | a byte-in-a-character. Note that this default is the same behaviour
 39 | as bytes-only languages (including Perl before v5.6) would have,
 40 | and is sufficient to handle native 8-bit encodings e.g. iso-8859-1,
 41 | EBCDIC etc. and any legacy mechanisms for handling other encodings
 42 | and binary data.
 43 | 
 44 | In other cases, it is the program's responsibility to transform
 45 | characters into bytes using the API above before doing writes, and to
 46 | transform the bytes read from a handle into characters before doing
 47 | "character operations" (e.g. C<lc>, C</\W+/>, ...).
 48 | 
 49 | You can also use PerlIO to convert larger amounts of data you don't
 50 | want to bring into memory.  For example, to convert between ISO-8859-1
 51 | (Latin 1) and UTF-8 (or UTF-EBCDIC in EBCDIC machines):
 52 | 
 53 |     open(F, "<:encoding(iso-8859-1)", "data.txt") or die $!;
 54 |     open(G, ">:utf8",                 "data.utf") or die $!;
 55 |     while (<F>) { print G }
 56 | 
 57 |     # Could also do "print G <F>" but that would pull
 58 |     # the whole file into memory just to write it out again.
 59 | 
 60 | More examples:
 61 | 
 62 |     open(my $f, "<:encoding(cp1252)")
 63 |     open(my $g, ">:encoding(iso-8859-2)")
 64 |     open(my $h, ">:encoding(latin9)")       # iso-8859-15
 65 | 
 66 | See also L<encoding> for how to change the default encoding of the
 67 | data in your script.
 68 | 
 69 | =head1 How does it work?
 70 | 
 71 | Here is a crude diagram of how filehandle, PerlIO, and Encode
 72 | interact.
 73 | 
 74 |   filehandle <-> PerlIO        PerlIO <-> scalar (read/printed)
 75 |                        \      /
 76 |                         Encode   
 77 | 
 78 | When PerlIO receives data from either direction, it fills a buffer
 79 | (currently with 1024 bytes) and passes the buffer to Encode.
 80 | Encode tries to convert the valid part and passes it back to PerlIO,
 81 | leaving invalid parts (usually a partial character) in the buffer.
 82 | PerlIO then appends more data to the buffer, calls Encode again,
 83 | and so on until the data stream ends.
 84 | 
 85 | To do so, PerlIO always calls (de|en)code methods with CHECK set to 1.
 86 | This ensures that the method stops at the right place when it
 87 | encounters partial character.  The following is what happens when
 88 | PerlIO and Encode tries to encode (from utf8) more than 1024 bytes
 89 | and the buffer boundary happens to be in the middle of a character.
 90 | 
 91 |    A   B   C   ....   ~     \x{3000}    ....
 92 |   41  42  43   ....  7E   e3   80   80  ....
 93 |   <- buffer --------------->
 94 |   << encoded >>>>>>>>>>
 95 |                        <- next buffer ------
 96 | 
 97 | Encode converts from the beginning to \x7E, leaving \xe3 in the buffer
 98 | because it is invalid (partial character).
 99 | 
100 | Unfortunately, this scheme does not work well with escape-based
101 | encodings such as ISO-2022-JP.
102 | 
103 | =head1 Line Buffering
104 | 
105 | Now let's see what happens when you try to decode from ISO-2022-JP and
106 | the buffer ends in the middle of a character.
107 | 
108 |               JIS208-ESC   \x{5f3e}
109 |    A   B   C   ....   ~   \e   $   B  |DAN | ....
110 |   41  42  43   ....  7E   1b  24  41  43  46 ....
111 |   <- buffer --------------------------->
112 |   << encoded >>>>>>>>>>>>>>>>>>>>>>>
113 | 
114 | As you see, the next buffer begins with \x43.  But \x43 is 'C' in
115 | ASCII, which is wrong in this case because we are now in JISX 0208
116 | area so it has to convert \x43\x46, not \x43.  Unlike utf8 and EUC,
117 | in escape-based encodings you can't tell if a given octet is a whole
118 | character or just part of it.
119 | 
120 | Fortunately PerlIO also supports line buffer if you tell PerlIO to use
121 | one instead of fixed buffer.  Since ISO-2022-JP is guaranteed to revert to ASCII at the end of the line, partial
122 | character will never happen when line buffer is used.
123 | 
124 | To tell PerlIO to use line buffer, implement -E<gt>needs_lines method
125 | for your encoding object.  See  L<Encode::Encoding> for details.
126 | 
127 | Thanks to these efforts most encodings that come with Encode support
128 | PerlIO but that still leaves following encodings.
129 | 
130 |   iso-2022-kr
131 |   MIME-B
132 |   MIME-Header
133 |   MIME-Q
134 | 
135 | Fortunately iso-2022-kr is hardly used (according to Jungshik) and
136 | MIME-* are very unlikely to be fed to PerlIO because they are for mail
137 | headers.  See L<Encode::MIME::Header> for details.
138 | 
139 | =head2 How can I tell whether my encoding fully supports PerlIO ?
140 | 
141 | As of this writing, any encoding whose class belongs to Encode::XS and
142 | Encode::Unicode works.  The Encode module has a C<perlio_ok> method
143 | which you can use before applying PerlIO encoding to the filehandle.
144 | Here is an example:
145 | 
146 |   my $use_perlio = perlio_ok($enc);
147 |   my $layer = $use_perlio ? "<:raw" : "<:encoding($enc)";
148 |   open my $fh, $layer, $file or die "$file : $!";
149 |   while(<$fh>){
150 |     $_ = decode($enc, $_) unless $use_perlio;
151 |     # .... 
152 |   }
153 | 
154 | =head1 SEE ALSO
155 | 
156 | L<Encode::Encoding>,
157 | L<Encode::Supported>,
158 | L<Encode::PerlIO>, 
159 | L<encoding>,
160 | L<perlebcdic>, 
161 | L<perlfunc/open>, 
162 | L<perlunicode>, 
163 | L<utf8>, 
164 | the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt>
165 | 
166 | =cut
167 | 
168 | 


--------------------------------------------------------------------------------
/lib/Encode/Unicode/UTF7.pm:
--------------------------------------------------------------------------------
  1 | #
  2 | # $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $
  3 | #
  4 | package Encode::Unicode::UTF7;
  5 | use strict;
  6 | use warnings;
  7 | use parent qw(Encode::Encoding);
  8 | __PACKAGE__->Define('UTF-7');
  9 | our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 10 | use MIME::Base64;
 11 | use Encode qw(find_encoding);
 12 | 
 13 | #
 14 | # Algorithms taken from Unicode::String by Gisle Aas
 15 | #
 16 | 
 17 | our $OPTIONAL_DIRECT_CHARS = 1;
 18 | my $specials = quotemeta "\'(),-./:?";
 19 | $OPTIONAL_DIRECT_CHARS
 20 |   and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
 21 | 
 22 | # \s will not work because it matches U+3000 DEOGRAPHIC SPACE
 23 | # We use qr/[\n\r\t\ ] instead
 24 | my $re_asis    = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
 25 | my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
 26 | my $e_utf16    = find_encoding("UTF-16BE");
 27 | 
 28 | sub needs_lines { 1 }
 29 | 
 30 | sub encode($$;$) {
 31 |     my ( $obj, $str, $chk ) = @_;
 32 |     return undef unless defined $str;
 33 |     my $len = length($str);
 34 |     pos($str) = 0;
 35 |     my $bytes = substr($str, 0, 0); # to propagate taintedness
 36 |     while ( pos($str) < $len ) {
 37 |         if ( $str =~ /\G($re_asis+)/ogc ) {
 38 | 	    my $octets = $1;
 39 | 	    utf8::downgrade($octets);
 40 | 	    $bytes .= $octets;
 41 |         }
 42 |         elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
 43 |             if ( $1 eq "+" ) {
 44 |                 $bytes .= "+-";
 45 |             }
 46 |             else {
 47 |                 my $s = $1;
 48 |                 my $base64 = encode_base64( $e_utf16->encode($s), '' );
 49 |                 $base64 =~ s/=+$//;
 50 |                 $bytes .= "+$base64-";
 51 |             }
 52 |         }
 53 |         else {
 54 |             die "This should not happen! (pos=" . pos($str) . ")";
 55 |         }
 56 |     }
 57 |     $_[1] = '' if $chk;
 58 |     return $bytes;
 59 | }
 60 | 
 61 | sub decode($$;$) {
 62 |     use re 'taint';
 63 |     my ( $obj, $bytes, $chk ) = @_;
 64 |     return undef unless defined $bytes;
 65 |     my $len = length($bytes);
 66 |     my $str = substr($bytes, 0, 0); # to propagate taintedness;
 67 |     pos($bytes) = 0;
 68 |     no warnings 'uninitialized';
 69 |     while ( pos($bytes) < $len ) {
 70 |         if ( $bytes =~ /\G([^+]+)/ogc ) {
 71 |             $str .= $1;
 72 |         }
 73 |         elsif ( $bytes =~ /\G\+-/ogc ) {
 74 |             $str .= "+";
 75 |         }
 76 |         elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
 77 |             my $base64 = $1;
 78 |             my $pad    = length($base64) % 4;
 79 |             $base64 .= "=" x ( 4 - $pad ) if $pad;
 80 |             $str .= $e_utf16->decode( decode_base64($base64) );
 81 |         }
 82 |         elsif ( $bytes =~ /\G\+/ogc ) {
 83 |             $^W and warn "Bad UTF7 data escape";
 84 |             $str .= "+";
 85 |         }
 86 |         else {
 87 |             die "This should not happen " . pos($bytes);
 88 |         }
 89 |     }
 90 |     $_[1] = '' if $chk;
 91 |     return $str;
 92 | }
 93 | 1;
 94 | __END__
 95 | 
 96 | =head1 NAME
 97 | 
 98 | Encode::Unicode::UTF7 -- UTF-7 encoding
 99 | 
100 | =head1 SYNOPSIS
101 | 
102 |     use Encode qw/encode decode/; 
103 |     $utf7 = encode("UTF-7", $utf8);
104 |     $utf8 = decode("UTF-7", $ucs2);
105 | 
106 | =head1 ABSTRACT
107 | 
108 | This module implements UTF-7 encoding documented in RFC 2152.  UTF-7,
109 | as its name suggests, is a 7-bit re-encoded version of UTF-16BE.  It
110 | is designed to be MTA-safe and expected to be a standard way to
111 | exchange Unicoded mails via mails.  But with the advent of UTF-8 and
112 | 8-bit compliant MTAs, UTF-7 is hardly ever used.
113 | 
114 | UTF-7 was not supported by Encode until version 1.95 because of that.
115 | But Unicode::String, a module by Gisle Aas which adds Unicode supports
116 | to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
117 | so Encode can supersede Unicode::String 100%.
118 | 
119 | =head1 In Practice
120 | 
121 | When you want to encode Unicode for mails and web pages, however, do
122 | not use UTF-7 unless you are sure your recipients and readers can
123 | handle it.  Very few MUAs and WWW Browsers support these days (only
124 | Mozilla seems to support one).  For general cases, use UTF-8 for
125 | message body and MIME-Header for header instead.
126 | 
127 | =head1 SEE ALSO
128 | 
129 | L<Encode>, L<Encode::Unicode>, L<Unicode::String>
130 | 
131 | RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
132 | 
133 | =cut
134 | 


--------------------------------------------------------------------------------
/t/Aliases.t:
--------------------------------------------------------------------------------
  1 | #!../perl
  2 | 
  3 | BEGIN {
  4 |     if ($ENV{'PERL_CORE'}){
  5 |     chdir 't';
  6 |     unshift @INC, '../lib';
  7 |     }
  8 |     require Config; Config->import();
  9 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 10 |     print "1..0 # Skip: Encode was not built\n";
 11 |         exit 0;
 12 |     }
 13 | }
 14 | 
 15 | use strict;
 16 | use Encode;
 17 | use Encode::Alias;
 18 | my %a2c;
 19 | my @override_tests;
 20 | my $ON_EBCDIC;
 21 | 
 22 | sub init_a2c{
 23 |     %a2c = (
 24 |         'US-ascii' => 'ascii',
 25 |         'ISO-646-US' => 'ascii',
 26 |         'UTF-8'    => 'utf-8-strict',
 27 |         'en_US.UTF-8'    => 'utf-8-strict',
 28 |         'UCS-2'    => 'UCS-2BE',
 29 |         'UCS2'     => 'UCS-2BE',
 30 |         'iso-10646-1' => 'UCS-2BE',
 31 |         'ucs2-le'  => 'UCS-2LE',
 32 |         'ucs2-be'  => 'UCS-2BE',
 33 |         'utf16'    => 'UTF-16',
 34 |         'utf32'    => 'UTF-32',
 35 |         'utf16-be'  => 'UTF-16BE',
 36 |         'utf32-be'  => 'UTF-32BE',
 37 |         'utf16-le'  => 'UTF-16LE',
 38 |         'utf32-le'  => 'UTF-32LE',
 39 |         'UCS4-BE'   => 'UTF-32BE',
 40 |         'UCS-4-LE'  => 'UTF-32LE',
 41 |         'cyrillic' => 'iso-8859-5',
 42 |         'arabic'   => 'iso-8859-6',
 43 |         'greek'    => 'iso-8859-7',
 44 |         'hebrew'   => 'iso-8859-8',
 45 |         'iso-8859-8-I' => 'iso-8859-8',
 46 |         'thai'     => 'iso-8859-11',
 47 |         'tis620'   => 'iso-8859-11',
 48 |         'tis-620'   => 'iso-8859-11',
 49 |         'WinLatin1'     => 'cp1252',
 50 |         'WinLatin2'     => 'cp1250',
 51 |         'WinCyrillic'   => 'cp1251',
 52 |         'WinGreek'      => 'cp1253',
 53 |         'WinTurkish'    => 'cp1254',
 54 |         'WinHebrew'     => 'cp1255',
 55 |         'WinArabic'     => 'cp1256',
 56 |         'WinBaltic'     => 'cp1257',
 57 |         'WinVietnamese' => 'cp1258',
 58 | 	'Macintosh'     => 'MacRoman',
 59 |         'koi8r'         => 'koi8-r',
 60 |         'koi8u'         => 'koi8-u',
 61 |         'ja_JP.euc'	    => $ON_EBCDIC ? '' : 'euc-jp',
 62 |         'x-euc-jp'	    => $ON_EBCDIC ? '' : 'euc-jp',
 63 |         'zh_CN.euc'	    => $ON_EBCDIC ? '' : 'euc-cn',
 64 |         'x-euc-cn'	    => $ON_EBCDIC ? '' : 'euc-cn',
 65 |         'ko_KR.euc'	    => $ON_EBCDIC ? '' : 'euc-kr',
 66 |         'x-euc-kr'	    => $ON_EBCDIC ? '' : 'euc-kr',
 67 |         'ujis'	    => $ON_EBCDIC ? '' : 'euc-jp',
 68 |         'Shift_JIS'	    => $ON_EBCDIC ? '' : 'shiftjis',
 69 |         'x-sjis'	    => $ON_EBCDIC ? '' : 'shiftjis',
 70 |         'jis'	    => $ON_EBCDIC ? '' : '7bit-jis',
 71 |         'big-5'	    => $ON_EBCDIC ? '' : 'big5-eten',
 72 |         'zh_TW.Big5'    => $ON_EBCDIC ? '' : 'big5-eten',
 73 |         'tca-big5'	    => $ON_EBCDIC ? '' : 'big5-eten',
 74 |         'big5-hk'	    => $ON_EBCDIC ? '' : 'big5-hkscs',
 75 |         'hkscs-big5'    => $ON_EBCDIC ? '' : 'big5-hkscs',
 76 |         'GB_2312-80'    => $ON_EBCDIC ? '' : 'euc-cn',
 77 |         'KS_C_5601-1987'    => $ON_EBCDIC ? '' : 'cp949',
 78 |         #
 79 |         'gb12345-raw'   => $ON_EBCDIC ? '' : 'gb12345-raw',
 80 |         'gb2312-raw'    => $ON_EBCDIC ? '' : 'gb2312-raw',
 81 |         'jis0201-raw'   => $ON_EBCDIC ? '' : 'jis0201-raw',
 82 |         'jis0208-raw'   => $ON_EBCDIC ? '' : 'jis0208-raw',
 83 |         'jis0212-raw'   => $ON_EBCDIC ? '' : 'jis0212-raw',
 84 |         'ksc5601-raw'   => $ON_EBCDIC ? '' : 'ksc5601-raw',
 85 |         'cp65000' => 'UTF-7',
 86 |         'cp65001' => 'utf-8-strict',
 87 |        );
 88 | 
 89 |     for my $i (1..11,13..16){
 90 |     $a2c{"ISO 8859 $i"} = "iso-8859-$i";
 91 |     }
 92 |     for my $i (1..10){
 93 |     $a2c{"ISO Latin $i"} = "iso-8859-$Encode::Alias::Latin2iso[$i]";
 94 |     }
 95 |     for my $k (keys %Encode::Alias::Winlatin2cp){
 96 |     my $v = $Encode::Alias::Winlatin2cp{$k};
 97 |     $a2c{"Win" . ucfirst($k)} = "cp" . $v;
 98 |     $a2c{"IBM-$v"} = $a2c{"MS-$v"} = "cp" . $v;
 99 |     $a2c{"cp-" . $v} = "cp" . $v;
100 |     }
101 |     my @a2c = keys %a2c;
102 |     for my $k (@a2c){
103 |     $a2c{uc($k)} = $a2c{$k};
104 |     $a2c{lc($k)} = $a2c{$k};
105 |     $a2c{lcfirst($k)} = $a2c{$k};
106 |     $a2c{ucfirst($k)} = $a2c{$k};
107 |     }
108 | }
109 | 
110 | BEGIN{
111 |     $ON_EBCDIC = ord("A") == 193;
112 |     @ARGV and $ON_EBCDIC = $ARGV[0] eq 'EBCDIC';
113 |     $Encode::ON_EBCDIC = $ON_EBCDIC;
114 |     init_a2c();
115 |     @override_tests = qw(
116 |         myascii:cp1252
117 |         mygreek:cp1253
118 |         myhebrew:iso-8859-2
119 |         myarabic:cp1256
120 |         ueightsomething:utf-8-strict
121 |         unknown:
122 |     );
123 | }
124 | 
125 | if ($ON_EBCDIC){
126 |     delete @Encode::ExtModule{
127 |     qw(euc-cn gb2312 gb12345 gbk cp936 iso-ir-165 MacChineseSimp
128 |        euc-jp iso-2022-jp 7bit-jis shiftjis MacJapanese cp932
129 |        euc-kr ksc5601 cp949 MacKorean
130 |        big5	big5-hkscs cp950 MacChineseTrad
131 |        gb18030 big5plus euc-tw)
132 |     };
133 | }
134 | 
135 | use Test::More tests => (scalar keys %a2c) * 3 + @override_tests;
136 | 
137 | print "# alias test;  \$ON_EBCDIC == $ON_EBCDIC\n";
138 | 
139 | foreach my $a (keys %a2c){	
140 |     print "# $a => $a2c{$a}\n";
141 |     my $e = Encode::find_encoding($a);
142 |     is((defined($e) and $e->name), $a2c{$a},$a)
143 |     or warn "alias was $a";;
144 | }
145 | 
146 | # now we override some of the aliases and see if it works fine
147 | 
148 | define_alias(
149 |          qr/ascii/i    => '"WinLatin1"',
150 |          qr/cyrillic/i => '"WinCyrillic"',
151 |          qr/arabic/i   => '"WinArabic"',
152 |          qr/greek/i    => '"WinGreek"',
153 |          qr/hebrew/i   => '"WinHebrew"'
154 |         );
155 | 
156 | Encode::find_encoding("myhebrew");  # polute alias cache
157 | 
158 | define_alias( sub {
159 |     my $enc = shift;
160 |     return "iso-8859-2"     if $enc =~ /hebrew/i;
161 |     return "does-not-exist" if $enc =~ /arabic/i;  # should then use other override alias
162 |     return "utf-8"          if $enc =~ /eight/i;
163 |     return "unknown";
164 | });
165 | 
166 | print "# alias test with alias overrides\n";
167 | 
168 | for my $test (@override_tests) {
169 |     my($a, $c) = split /:/, $test;
170 |     my $e = Encode::find_encoding($a);
171 |     is((defined($e) and $e->name), $c, $a);
172 | }
173 | 
174 | print "# alias undef test\n";
175 | 
176 | Encode::Alias->undef_aliases;
177 | foreach my $a (keys %a2c){	
178 |     my $e = Encode::find_encoding($a);
179 |     ok(!defined($e) || $e->name =~ /-raw$/o,"Undef $a")
180 |     or warn "alias was $a";
181 | }
182 | 
183 | print "# alias reinit test\n";
184 | 
185 | Encode::Alias->init_aliases;
186 | init_a2c();
187 | foreach my $a (keys %a2c){	
188 |     my $e = Encode::find_encoding($a);
189 |     is((defined($e) and $e->name), $a2c{$a}, "Reinit $a")
190 |     or warn "alias was $a";
191 | }
192 | __END__
193 | for my $k (keys %a2c){
194 |     $k =~ /[A-Z]/ and next;
195 |     print "$k => $a2c{$k}\n";
196 | }
197 | 
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/t/CJKT.t:
--------------------------------------------------------------------------------
  1 | BEGIN {
  2 |     require Config; Config->import();
  3 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
  4 |       print "1..0 # Skip: Encode was not built\n";
  5 |       exit 0;
  6 |     }
  7 |     if (ord("A") == 193) {
  8 |     print "1..0 # Skip: EBCDIC\n";
  9 |     exit 0;
 10 |     }
 11 | # should work w/o PerlIO now!
 12 | #    unless (PerlIO::Layer->find('perlio')){
 13 | #	print "1..0 # Skip: PerlIO required\n";
 14 | #	exit 0;
 15 | #   }
 16 |     $| = 1;
 17 | }
 18 | use strict;
 19 | use Test::More tests => 60;
 20 | use Encode;
 21 | use File::Basename;
 22 | use File::Spec;
 23 | use File::Compare qw(compare_text);
 24 | our $DEBUG = shift || 0;
 25 | 
 26 | my %Charset =
 27 |     (
 28 |      'big5-eten'  => [qw(big5-eten)],
 29 |      'big5-hkscs' => [qw(big5-hkscs)],
 30 |      gb2312       => [qw(euc-cn hz)],
 31 |      jisx0201     => [qw(euc-jp shiftjis 7bit-jis)],
 32 |      jisx0208     => [qw(euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1)],
 33 |      jisx0212     => [qw(euc-jp 7bit-jis iso-2022-jp-1)],
 34 |      ksc5601      => [qw(euc-kr iso-2022-kr johab)],
 35 |     );
 36 | 
 37 | 
 38 | my $dir = dirname(__FILE__);
 39 | my $seq = 1;
 40 | 
 41 | for my $charset (sort keys %Charset){
 42 |     my ($src, $uni, $dst, $txt);
 43 | 
 44 |     my $transcoder = find_encoding($Charset{$charset}[0]) or die;
 45 | 
 46 |     my $src_enc = File::Spec->catfile($dir,"$charset.enc");
 47 |     my $src_utf = File::Spec->catfile($dir,"$charset.utf");
 48 |     my $dst_enc = File::Spec->catfile($dir,"$$.enc");
 49 |     my $dst_utf = File::Spec->catfile($dir,"$$.utf8");
 50 | 
 51 |     open $src, "<$src_enc" or die "$src_enc : $!";
 52 |     
 53 |     if (PerlIO::Layer->find('perlio')){
 54 |     binmode($src, ":bytes"); # needed when :utf8 in default open layer
 55 |     }
 56 | 
 57 |     $txt = join('',<$src>);
 58 |     close($src);
 59 |     
 60 |     eval { $uni = $transcoder->decode($txt, 1) } or print $@;
 61 |     ok(defined($uni),  "decode $charset"); $seq++;
 62 |     is(length($txt),0, "decode $charset completely"); $seq++;
 63 |     
 64 |     open $dst, ">$dst_utf" or die "$dst_utf : $!";
 65 |     if (PerlIO::Layer->find('perlio')){
 66 |     binmode($dst, ":utf8");
 67 |     print $dst $uni;
 68 |     }else{ # ugh!
 69 |     binmode($dst);
 70 |     my $raw = $uni; Encode::_utf8_off($raw);
 71 |     print $dst $raw;
 72 |     }
 73 | 
 74 |     close($dst); 
 75 |     is(compare_text($dst_utf, $src_utf), 0, "$dst_utf eq $src_utf")
 76 |     or ($DEBUG and rename $dst_utf, "$dst_utf.$seq");
 77 |     $seq++;
 78 |     
 79 |     open $src, "<$src_utf" or die "$src_utf : $!";
 80 |     if (PerlIO::Layer->find('perlio')){
 81 |     binmode($src, ":utf8");
 82 |     $uni = join('', <$src>);
 83 |     }else{ # ugh!
 84 |     binmode($src);
 85 |     $uni = join('', <$src>);
 86 |     Encode::_utf8_on($uni);
 87 |     }
 88 |     close $src;
 89 | 
 90 |     my $unisave = $uni;
 91 |     eval { $txt = $transcoder->encode($uni,1) } or print $@;
 92 |     ok(defined($txt),   "encode $charset"); $seq++;
 93 |     is(length($uni), 0, "encode $charset completely");  $seq++;
 94 |     $uni = $unisave;
 95 | 
 96 |     open $dst,">$dst_enc" or die "$dst_utf : $!";
 97 |     binmode($dst);
 98 |     print $dst $txt;
 99 |     close($dst); 
100 |     is(compare_text($src_enc, $dst_enc), 0 => "$dst_enc eq $src_enc")
101 |     or ($DEBUG and rename $dst_enc, "$dst_enc.$seq");
102 |     $seq++;
103 |     
104 |     unlink($dst_utf, $dst_enc);
105 | 
106 |     for my $encoding (@{$Charset{$charset}}){
107 |     my $rt = decode($encoding, encode($encoding, $uni));
108 |     is ($rt, $uni, "RT $encoding");
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/t/Encoder.t:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: Encoder.t,v 2.2 2023/11/10 01:10:50 dankogai Exp $
 3 | #
 4 | 
 5 | BEGIN {
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     $| = 1;
12 | }
13 | 
14 | use strict;
15 | #use Test::More 'no_plan';
16 | use Test::More tests => 516;
17 | use Encode::Encoder qw(encoder);
18 | use MIME::Base64;
19 | package Encode::Base64;
20 | use parent 'Encode::Encoding';
21 | __PACKAGE__->Define('base64');
22 | use MIME::Base64;
23 | sub encode{
24 |     my ($obj, $data) = @_;
25 |     return encode_base64($data);
26 | }
27 | sub decode{
28 |     my ($obj, $data) = @_;
29 |     return decode_base64($data);
30 | }
31 | 
32 | package main;
33 | 
34 | my $e = encoder("foo", "ascii");
35 | ok ($e->data("bar"));
36 | is ($e->data, "bar");
37 | ok ($e->encoding("latin1"));
38 | is ($e->encoding, "iso-8859-1");
39 | 
40 | my $data = '';
41 | for my $i (0..255){
42 |     no warnings;
43 |     $data .= chr($i);
44 |     my $base64 = encode_base64($data);
45 |     is(encoder($data)->base64, $base64, "encode");
46 |     is(encoder($base64)->bytes('base64'), $data, "decode");
47 | }
48 | 
49 | 1;
50 | __END__
51 | 


--------------------------------------------------------------------------------
/t/Mod_EUCJP.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/Mod_EUCJP.pm


--------------------------------------------------------------------------------
/t/Unicode.t:
--------------------------------------------------------------------------------
  1 | #
  2 | # $Id: Unicode.t,v 2.5 2023/11/10 01:10:50 dankogai Exp $
  3 | #
  4 | # This script is written entirely in ASCII, even though quoted literals
  5 | # do include non-BMP unicode characters -- Are you happy, jhi?
  6 | #
  7 | 
  8 | BEGIN {
  9 |     require Config; Config->import();
 10 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 11 |       print "1..0 # Skip: Encode was not built\n";
 12 |       exit 0;
 13 |     }
 14 |     if (ord("A") == 193) {
 15 |         print "1..0 # Skip: EBCDIC\n";
 16 |     exit 0;
 17 |     }
 18 |     $| = 1;
 19 | }
 20 | 
 21 | use strict;
 22 | #use Test::More 'no_plan';
 23 | use Test::More tests => 56;
 24 | use Encode qw(encode decode find_encoding);
 25 | 
 26 | #
 27 | # see
 28 | # http://www.unicode.org/reports/tr19/
 29 | #
 30 | 
 31 | my $dankogai   = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}";
 32 | my $nasty      = "$dankogai\x{1abcd}";
 33 | my $fallback   = "$dankogai\x{fffd}\x{fffd}";
 34 | 
 35 | #hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a
 36 | #lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd
 37 | 
 38 | my $n_16be = 
 39 |     pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e  d8 2a df cd>);
 40 | my $n_16le =
 41 |     pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f  2a d8 cd df>);
 42 | my $f_16be = 
 43 |     pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e  ff fd>);
 44 | my $f_16le =
 45 |     pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f  fd ff>);
 46 | my $n_32be =
 47 |     pack("C*", map {hex($_)} 
 48 |      qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e  00 01 ab cd>);
 49 | my $n_32le = 
 50 |     pack("C*", map {hex($_)} 
 51 |      qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00  cd ab 01 00>);
 52 | 
 53 | my $n_16bb = pack('n', 0xFeFF) . $n_16be;
 54 | my $n_16lb = pack('v', 0xFeFF) . $n_16le;
 55 | my $n_32bb = pack('N', 0xFeFF) . $n_32be;
 56 | my $n_32lb = pack('V', 0xFeFF) . $n_32le;
 57 | 
 58 | is($n_16be, encode('UTF-16BE', $nasty),  qq{encode UTF-16BE});
 59 | is($n_16le, encode('UTF-16LE', $nasty),  qq{encode UTF-16LE});
 60 | is($n_32be, encode('UTF-32BE', $nasty),  qq{encode UTF-32BE});
 61 | is($n_32le, encode('UTF-32LE', $nasty),  qq{encode UTF-16LE});
 62 | 
 63 | is($nasty,  decode('UTF-16BE', $n_16be), qq{decode UTF-16BE});
 64 | is($nasty,  decode('UTF-16LE', $n_16le), qq{decode UTF-16LE});
 65 | is($nasty,  decode('UTF-32BE', $n_32be), qq{decode UTF-32BE});
 66 | is($nasty,  decode('UTF-32LE', $n_32le), qq{decode UTF-32LE});
 67 | 
 68 | is($n_16bb, encode('UTF-16',   $nasty),  qq{encode UTF-16});
 69 | is($n_32bb, encode('UTF-32',   $nasty),  qq{encode UTF-32});
 70 | is($nasty,  decode('UTF-16',   $n_16bb), qq{decode UTF-16, bom=be});
 71 | is($nasty,  decode('UTF-16',   $n_16lb), qq{decode UTF-16, bom=le});
 72 | is($nasty,  decode('UTF-32',   $n_32bb), qq{decode UTF-32, bom=be});
 73 | is($nasty,  decode('UTF-32',   $n_32lb), qq{decode UTF-32, bom=le});
 74 | 
 75 | is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback");
 76 | is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback");
 77 | eval { decode('UCS-2BE', $n_16be, 1) }; 
 78 | is (index($@,'UCS-2BE:'), 0, "decode UCS-2BE: exception");
 79 | eval { decode('UCS-2LE', $n_16le, 1) };
 80 | is (index($@,'UCS-2LE:'), 0, "decode UCS-2LE: exception");
 81 | is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback");
 82 | is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback");
 83 | eval { encode('UCS-2BE', $nasty, 1) }; 
 84 | is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception");
 85 | eval { encode('UCS-2LE', $nasty, 1) }; 
 86 | is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception");
 87 | 
 88 | {
 89 |     my %tests = (
 90 |         'UCS-2BE'  => 'n*',
 91 |         'UCS-2LE'  => 'v*',
 92 |         'UTF-16BE' => 'n*',
 93 |         'UTF-16LE' => 'v*',
 94 |         'UTF-32BE' => 'N*',
 95 |         'UTF-32LE' => 'V*',
 96 |     );
 97 | 
 98 |     while (my ($enc, $pack) = each(%tests)) {
 99 |         is(decode($enc, pack($pack, 0xD800, 0x263A)), "\x{FFFD}\x{263A}",
100 |           "decode $enc (HI surrogate followed by WHITE SMILING FACE)");
101 |         is(decode($enc, pack($pack, 0xDC00, 0x263A)), "\x{FFFD}\x{263A}", 
102 |           "decode $enc (LO surrogate followed by WHITE SMILING FACE)");
103 |     }
104 | }
105 | 
106 | {
107 |     my %tests = (
108 |         'UTF-16BE' => 'n*',
109 |         'UTF-16LE' => 'v*',
110 |     );
111 | 
112 |     while (my ($enc, $pack) = each(%tests)) {
113 |         is(decode($enc, pack($pack, 0xD800)), "\x{FFFD}",
114 |           "decode $enc (HI surrogate)");
115 |         is(decode($enc, pack($pack, 0x263A, 0xD800)), "\x{263A}\x{FFFD}",
116 |           "decode $enc (WHITE SMILING FACE followed by HI surrogate)");
117 |     }
118 | }
119 | 
120 | {
121 |     my %tests = (
122 |         'UTF-16BE' => 'n*',
123 |         'UTF-16LE' => 'v*',
124 |     );
125 | 
126 |     while (my ($enc, $pack) = each(%tests)) {
127 |         is(encode($enc, "\x{110000}"), pack($pack, 0xFFFD), 
128 |           "ordinals greater than U+10FFFF is replaced with U+FFFD");
129 |     }
130 | }
131 | 
132 | #
133 | # SvGROW test for (en|de)code_xs
134 | #
135 | SKIP: {
136 |     my $utf8 = '';
137 |     for my $j (0,0x10){
138 |     for my $i (0..0xffff){
139 |         $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next;
140 |         $utf8 .= ord($j+$i);
141 |     }
142 |     for my $major ('UTF-16', 'UTF-32'){
143 |         for my $minor ('BE', 'LE'){
144 |         my $enc = $major.$minor;
145 |         is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT");
146 |         }
147 |     }
148 |     }
149 | };
150 | 
151 | #
152 | # CJKT vs. UTF-7
153 | #
154 | 
155 | use File::Spec;
156 | use File::Basename;
157 | 
158 | my $dir =  dirname(__FILE__);
159 | opendir my $dh, $dir or die "$dir:$!";
160 | my @file = sort grep {/\.utf$/o} readdir $dh;
161 | closedir $dh;
162 | for my $file (@file){
163 |     my $path = File::Spec->catfile($dir, $file);
164 |     open my $fh, '<', $path or die "$path:$!";
165 |     my $content;
166 |     if (PerlIO::Layer->find('perlio')){
167 |     binmode $fh => ':utf8';
168 |     $content = join('' => <$fh>);
169 |     }else{ # ugh!
170 |     binmode $fh;
171 |     $content = join('' => <$fh>);
172 |     Encode::_utf8_on($content)
173 |     }
174 |     close $fh;
175 |     is(decode("UTF-7", encode("UTF-7", $content)), $content, 
176 |        "UTF-7 RT:$file");
177 | }
178 | 
179 | # Magic
180 | {
181 |     # see http://rt.perl.org/rt3//Ticket/Display.html?id=60472
182 |     my $work = chr(0x100);
183 |     my $encoding = find_encoding("UTF16-BE");
184 |     my $tied;
185 |     tie $tied, SomeScalar => \$work;
186 |     my $result = $encoding->encode($tied, 1);
187 |     is($work, "", "check set magic was applied");
188 | }
189 | 
190 | package SomeScalar;
191 | use Tie::Scalar;
192 | use vars qw(@ISA);
193 | BEGIN { @ISA = 'Tie::Scalar' }
194 | 
195 | sub TIESCALAR {
196 |     my ($class, $ref) = @_;
197 |     return bless $ref, $class;
198 | }
199 | 
200 | sub FETCH {
201 |     ${$_[0]}
202 | }
203 | 
204 | sub STORE {
205 |     ${$_[0]} = $_[1];
206 | }
207 | 
208 | 1;
209 | __END__
210 | 


--------------------------------------------------------------------------------
/t/Unicode_trailing_nul.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ( $] < 5.009 ) {
 3 |         print "1..0 # Skip: Perl <= 5.9 or later required\n";
 4 |         exit 0;
 5 |     }
 6 | }
 7 | use strict;
 8 | use Test::More;
 9 | 
10 | use Encode;
11 | use File::Temp;
12 | use File::Spec;
13 | 
14 | # This test relies on https://github.com/Perl/perl5/issues/10623;
15 | # if that bug is ever fixed then this test may never fail again.
16 | 
17 | my $foo = Encode::decode("UTF-16LE", "/\0v\0a\0r\0/\0f\0f\0f\0f\0f\0f\0/\0u\0s\0e\0r\0s\0/\0s\0u\0p\0e\0r\0m\0a\0n\0");
18 | 
19 | my ($fh, $path) = File::Temp::tempfile( UNLINK => 1 );
20 | 
21 | note "temp file: $path";
22 | 
23 | # Perl gives the internal PV to exec .. which is buggy/wrong but
24 | # useful here:
25 | system( $^X, '-e', "open my \$fh, '>>', '$path' or die \$!; print {\$fh} \$ARGV[0]", $foo );
26 | die if $?;
27 | 
28 | my $output = do { local $/; <$fh> };
29 | 
30 | is( $output, "/var/ffffff/users/superman", 'UTF-16 decodes with trailing NUL' );
31 | 
32 | done_testing();
33 | 


--------------------------------------------------------------------------------
/t/at-cn.t:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/at-cn.t


--------------------------------------------------------------------------------
/t/at-tw.t:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/at-tw.t


--------------------------------------------------------------------------------
/t/big5-eten.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/big5-eten.enc


--------------------------------------------------------------------------------
/t/big5-hkscs.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/big5-hkscs.enc


--------------------------------------------------------------------------------
/t/cow.t:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: cow.t,v 1.2 2016/08/04 03:15:58 dankogai Exp $
 3 | #
 4 | use strict;
 5 | use Encode ();
 6 | use Test::More tests => 4;
 7 | 
 8 | 
 9 | my %a = ( "L\x{c3}\x{a9}on" => "acme" );
10 | my ($k) = ( keys %a );
11 | Encode::_utf8_on($k);
12 | my %h = ( $k => "acme" );
13 | is $h{"L\x{e9}on"} => 'acme';
14 | ($k) = ( keys %h );
15 | Encode::_utf8_off($k);
16 | %a = ( $k => "acme" );
17 | is $h{"L\x{e9}on"} => 'acme';
18 | # use Devel::Peek;
19 | # Dump(\%h);
20 | 
21 | { # invalid input to encode/decode/from_to should not affect COW-shared scalars
22 | 	my $x = Encode::decode('UTF-8', "\303\244" x 4);
23 | 	my $orig = "$x"; # non-COW copy
24 | 	is($x, $orig, "copy of original string matches");
25 | 	{ my $y = $x; Encode::from_to($y, "UTF-8", "iso-8859-1"); }
26 | 	is($x, $orig, "original scalar unmodified after from_to() call");
27 | }
28 | 


--------------------------------------------------------------------------------
/t/decode.t:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: decode.t,v 1.5 2019/01/31 04:26:40 dankogai Exp $
 3 | #
 4 | use strict;
 5 | use Encode qw(decode_utf8 FB_CROAK find_encoding decode);
 6 | use Test::More tests => 17;
 7 | use Test::Builder;
 8 | 
 9 | sub croak_ok(&) {
10 |     local $Test::Builder::Level = $Test::Builder::Level + 1;
11 |     my $code = shift;
12 |     eval { $code->() };
13 |     like $@, qr/does not map/;
14 | }
15 | 
16 | my $bytes = "L\x{e9}on";
17 | my $pad = "\x{30C9}";
18 | 
19 | my $orig = $bytes;
20 | croak_ok { Encode::decode_utf8($orig, FB_CROAK) };
21 | 
22 | my $orig2 = $bytes;
23 | croak_ok { Encode::decode('utf-8', $orig2, FB_CROAK) };
24 | 
25 | chop(my $new = $bytes . $pad);
26 | croak_ok { Encode::decode_utf8($new, FB_CROAK) };
27 | 
28 | my $latin1 = find_encoding('latin1');
29 | $orig = "\N{U+0080}";
30 | $orig =~ /(.)/;
31 | is($latin1->decode($1), $orig, '[cpan #115168] passing magic regex globals to decode');
32 | SKIP: {
33 |     skip "Perl Version ($]) is older than v5.16", 1 if $] < 5.016;
34 |     *a = $orig;
35 |     is($latin1->decode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to decode');
36 | }
37 | 
38 | $orig = "\x80";
39 | $orig =~ /(.)/;
40 | is($latin1->decode($1), "\N{U+0080}", 'passing magic regex to latin1 decode');
41 | 
42 | $orig = "\x80";
43 | *a = $orig;
44 | is($latin1->decode(*a), "*main::\N{U+0080}", 'passing typeglob to latin1 decode');
45 | 
46 | $orig = "\N{U+0080}";
47 | $orig =~ /(.)/;
48 | is($latin1->encode($1), "\x80", 'passing magic regex to latin1 encode');
49 | 
50 | $orig = "\xC3\x80";
51 | $orig =~ /(..)/;
52 | is(Encode::decode_utf8($1), "\N{U+C0}", 'passing magic regex to Encode::decode_utf8');
53 | 
54 | SKIP: {
55 |     skip "Perl Version ($]) is older than v5.27.1", 1 if $] < 5.027001;
56 |     $orig = "\xC3\x80";
57 |     *a = $orig;
58 |     is(Encode::decode_utf8(*a), "*main::\N{U+C0}", 'passing typeglob to Encode::decode_utf8');
59 | }
60 | 
61 | $orig = "\N{U+C0}";
62 | $orig =~ /(.)/;
63 | is(Encode::encode_utf8($1), "\xC3\x80", 'passing magic regex to Encode::encode_utf8');
64 | 
65 | $orig = "\xC3\x80";
66 | $orig =~ /(..)/;
67 | is(Encode::decode('utf-8', $1), "\N{U+C0}", 'passing magic regex to UTF-8 decode');
68 | 
69 | $orig = "\xC3\x80";
70 | *a = $orig;
71 | is(Encode::decode('utf-8', *a), "*main::\N{U+C0}", 'passing typeglob to UTF-8 decode');
72 | 
73 | $orig = "\N{U+C0}";
74 | $orig =~ /(.)/;
75 | is(Encode::encode('utf-8', $1), "\xC3\x80", 'passing magic regex to UTF-8 encode');
76 | 
77 | SKIP: {
78 |     skip "Perl Version ($]) is older than v5.16", 3 if $] < 5.016;
79 | 
80 |     $orig = "\N{U+0080}";
81 |     *a = $orig;
82 |     is($latin1->encode(*a), "*main::\x80", 'passing typeglob to latin1 encode');
83 | 
84 |     $orig = "\N{U+C0}";
85 |     *a = $orig;
86 |     is(Encode::encode_utf8(*a), "*main::\xC3\x80", 'passing typeglob to Encode::encode_utf8');
87 | 
88 |     $orig = "\N{U+C0}";
89 |     *a = $orig;
90 |     is(Encode::encode('utf-8', *a), "*main::\xC3\x80", 'passing typeglob to UTF-8 encode');
91 | }
92 | 


--------------------------------------------------------------------------------
/t/enc_data.t:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/enc_data.t


--------------------------------------------------------------------------------
/t/enc_eucjp.t:
--------------------------------------------------------------------------------
  1 | # $Id: enc_eucjp.t,v 2.7 2023/11/10 01:10:50 dankogai Exp $
  2 | # This is the twin of enc_utf8.t .
  3 | 
  4 | BEGIN {
  5 |     require Config; Config->import();
  6 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
  7 |       print "1..0 # Skip: Encode was not built\n";
  8 |       exit 0;
  9 |     }
 10 |     unless (find PerlIO::Layer 'perlio') {
 11 |     print "1..0 # Skip: PerlIO was not built\n";
 12 |     exit 0;
 13 |     }
 14 |     if (ord("A") == 193) {
 15 |     print "1..0 # encoding pragma does not support EBCDIC platforms\n";
 16 |     exit(0);
 17 |     }
 18 |     if ($] <= 5.008 and !$Config{perl_patchlevel}){
 19 |     print "1..0 # Skip: Perl 5.8.1 or later required\n";
 20 |     exit 0;
 21 |     }
 22 |     if ($] >= 5.025003 and !$Config{usecperl}){
 23 |     print "1..0 # Skip: Perl <=5.25.2 or cperl required\n";
 24 |     exit 0;
 25 |     }
 26 | }
 27 | 
 28 | use Encode qw();
 29 | $PerlIO::encoding::fallback |= Encode::ONLY_PRAGMA_WARNINGS;
 30 | use warnings "utf8";
 31 | 
 32 | no warnings "deprecated";
 33 | use encoding 'euc-jp';
 34 | 
 35 | my @c = (127, 128, 255, 256);
 36 | 
 37 | print "1.." . (scalar @c + 2) . "\n";
 38 | 
 39 | my @f;
 40 | 
 41 | for my $i (0..$#c) {
 42 |   no warnings 'pack';
 43 |   my $file = filename("f$i");
 44 |   push @f, $file;
 45 |   open(F, ">$file") or die "$0: failed to open '$file' for writing: $!";
 46 |   binmode(F, ":utf8");
 47 |   print F chr($c[$i]);
 48 |   print F pack("C" => $c[$i]);
 49 |   close F;
 50 | }
 51 | 
 52 | my $t = 1;
 53 | 
 54 | for my $i (0..$#c) {
 55 |   my $file = filename("f$i");
 56 |   open(F, "<$file") or die "$0: failed to open '$file' for reading: $!";
 57 |   binmode(F, ":utf8");
 58 |   my $c = <F>;
 59 |   my $o = ord($c);
 60 |   print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$i]: $o != $c[$i]\n";
 61 |   $t++;
 62 | }
 63 | 
 64 | my $f = filename("f" . @f);
 65 | 
 66 | push @f, $f;
 67 | open(F, ">$f") or die "$0: failed to open '$f' for writing: $!";
 68 | binmode(F, ":raw"); # Output raw bytes.
 69 | print F chr(128); # Output illegal UTF-8.
 70 | close F;
 71 | open(F, $f) or die "$0: failed to open '$f' for reading: $!";
 72 | binmode(F, ":encoding(UTF-8)");
 73 | {
 74 |     local $^W = 1;
 75 |     local $SIG{__WARN__} = sub { $a = shift };
 76 |     eval { <F> }; # This should get caught.
 77 | }
 78 | close F;
 79 | print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ?
 80 |   "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n";
 81 | $t++;
 82 | 
 83 | open(F, $f) or die "$0: failed to open '$f' for reading: $!";
 84 | binmode(F, ":encoding(utf8)");
 85 | {
 86 |     local $^W = 1;
 87 |     local $SIG{__WARN__} = sub { $a = shift };
 88 |     eval { <F> }; # This should get caught.
 89 | }
 90 | close F;
 91 | print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
 92 |   "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";
 93 | $t++;
 94 | 
 95 | # On VMS temporary file names like "f0." may be more readable than "f0" since
 96 | # "f0" could be a logical name pointing elsewhere.
 97 | sub filename {
 98 |     my $name = shift;
 99 |     $name .= '.' if $^O eq 'VMS';
100 |     return $name;
101 | }
102 | 
103 | END {
104 |   1 while unlink @f;
105 | }
106 | 


--------------------------------------------------------------------------------
/t/enc_module.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/enc_module.enc


--------------------------------------------------------------------------------
/t/enc_module.t:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/enc_module.t


--------------------------------------------------------------------------------
/t/enc_utf8.t:
--------------------------------------------------------------------------------
 1 | # $Id: enc_utf8.t,v 2.7 2023/11/10 01:10:50 dankogai Exp $
 2 | # This is the twin of enc_eucjp.t .
 3 | 
 4 | BEGIN {
 5 |     require Config; Config->import();
 6 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 7 |       print "1..0 # Skip: Encode was not built\n";
 8 |       exit 0;
 9 |     }
10 |     unless (find PerlIO::Layer 'perlio') {
11 |       print "1..0 # Skip: PerlIO was not built\n";
12 |       exit 0;
13 |     }
14 |     if (ord("A") == 193) {
15 |       print "1..0 # encoding pragma does not support EBCDIC platforms\n";
16 |       exit(0);
17 |     }
18 |     if ($] >= 5.025003 and !$Config{usecperl}){
19 |       print "1..0 # Skip: Perl <=5.25.2 or cperl required\n";
20 |       exit 0;
21 |     }
22 | }
23 | 
24 | no warnings "deprecated";
25 | use encoding 'utf8';
26 | use warnings;
27 | 
28 | my @c = (127, 128, 255, 256);
29 | 
30 | print "1.." . (scalar @c + 2) . "\n";
31 | 
32 | my @f;
33 | 
34 | for my $i (0..$#c) {
35 |   my $file = filename("f$i");
36 |   push @f, $file;
37 |   open(F, ">$file") or die "$0: failed to open '$file' for writing: $!";
38 |   binmode(F, ":utf8");
39 |   print F chr($c[$i]);
40 |   close F;
41 | }
42 | 
43 | my $t = 1;
44 | 
45 | for my $i (0..$#c) {
46 |   my $file = filename("f$i");
47 |   open(F, "<$file") or die "$0: failed to open '$file' for reading: $!";
48 |   binmode(F, ":utf8");
49 |   my $c = <F>;
50 |   my $o = ord($c);
51 |   print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$i]: $o != $c[$i]\n";
52 |   $t++;
53 | }
54 | 
55 | my $f = filename("f" . @f);
56 | 
57 | push @f, $f;
58 | open(F, ">$f") or die "$0: failed to open '$f' for writing: $!";
59 | binmode(F, ":raw"); # Output raw bytes.
60 | print F chr(128); # Output illegal UTF-8.
61 | close F;
62 | open(F, $f) or die "$0: failed to open '$f' for reading: $!";
63 | binmode(F, ":encoding(UTF-8)");
64 | {
65 |     local $^W = 1;
66 |     local $SIG{__WARN__} = sub { $a = shift };
67 |     eval { <F> }; # This should get caught.
68 | }
69 | close F;
70 | print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ?
71 |   "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n";
72 | $t++;
73 | 
74 | open(F, $f) or die "$0: failed to open '$f' for reading: $!";
75 | binmode(F, ":encoding(utf8)");
76 | {
77 |     local $^W = 1;
78 |     local $SIG{__WARN__} = sub { $a = shift };
79 |     eval { <F> }; # This should get caught.
80 | }
81 | close F;
82 | print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
83 |   "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";
84 | $t++;
85 | 
86 | # On VMS temporary file names like "f0." may be more readable than "f0" since
87 | # "f0" could be a logical name pointing elsewhere.
88 | sub filename {
89 |     my $name = shift;
90 |     $name .= '.' if $^O eq 'VMS';
91 |     return $name;
92 | }
93 | 
94 | END {
95 |   1 while unlink @f;
96 | }
97 | 


--------------------------------------------------------------------------------
/t/encoding-locale.t:
--------------------------------------------------------------------------------
 1 | #
 2 | # This test aims to detect (using CPAN Testers) platforms where the locale
 3 | # encoding detection doesn't work.
 4 | #
 5 | 
 6 | use strict;
 7 | use warnings;
 8 | 
 9 | use Test::More tests => 3;
10 | 
11 | use encoding ();
12 | use Encode qw<find_encoding>;
13 | 
14 | my $locale_encoding = encoding::_get_locale_encoding;
15 | 
16 | SKIP: {
17 |     defined $locale_encoding or skip 'no locale encoding found', 3;
18 | 
19 |     is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value');
20 | 
21 |     my $enc = find_encoding($locale_encoding);
22 |     ok(defined $enc, 'encoding returned is supported')
23 | 	or diag("Encoding: ", explain($locale_encoding));
24 |     isa_ok($enc, 'Encode::Encoding');
25 |     eval { note($locale_encoding, ' => ', $enc->name); };
26 | }
27 | 


--------------------------------------------------------------------------------
/t/encoding.t:
--------------------------------------------------------------------------------
  1 | BEGIN {
  2 |     require Config; Config->import();
  3 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
  4 |       print "1..0 # Skip: Encode was not built\n";
  5 |       exit 0;
  6 |     }
  7 |     unless (find PerlIO::Layer 'perlio') {
  8 |     print "1..0 # Skip: PerlIO was not built\n";
  9 |     exit 0;
 10 |     }
 11 |     if (ord("A") == 193) {
 12 |     print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n";
 13 |     exit(0);
 14 |     }
 15 |     if ($] >= 5.025 and !$Config{usecperl}) {
 16 |     print "1..0 # Skip: encoding pragma not supported in Perl 5.25 or later\n";
 17 |     exit(0);
 18 |     }
 19 | }
 20 | 
 21 | print "1..33\n";
 22 |  
 23 | 
 24 | no warnings "deprecated";
 25 | use encoding "latin1"; # ignored (overwritten by the next line)
 26 | use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)
 27 | 
 28 | # "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
 29 | # \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
 30 | # instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
 31 | 
 32 | $a = "\xDF";
 33 | $b = "\x{100}";
 34 | 
 35 | print "not " unless ord($a) == 0x3af;
 36 | print "ok 1\n";
 37 | 
 38 | print "not " unless ord($b) == 0x100;
 39 | print "ok 2\n";
 40 | 
 41 | my $c;
 42 | 
 43 | $c = $a . $b;
 44 | 
 45 | print "not " unless ord($c) == 0x3af;
 46 | print "ok 3\n";
 47 | 
 48 | print "not " unless length($c) == 2;
 49 | print "ok 4\n";
 50 | 
 51 | print "not " unless ord(substr($c, 1, 1)) == 0x100;
 52 | print "ok 5\n";
 53 | 
 54 | print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
 55 | print "ok 6\n";
 56 | 
 57 | print "not " unless ord(pack("C", 0xdf)) == 0x3af;
 58 | print "ok 7\n";
 59 | 
 60 | # we didn't break pack/unpack, I hope
 61 | 
 62 | print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
 63 | print "ok 8\n";
 64 | 
 65 | # the first octet of UTF-8 encoded 0x3af 
 66 | print "not " unless unpack("U0 C", chr(0xdf)) == 0xce;
 67 | print "ok 9\n";
 68 | 
 69 | print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
 70 | print "ok 10\n";
 71 | 
 72 | print "not " unless unpack("U", chr(0xdf)) == 0x3af;
 73 | print "ok 11\n";
 74 | 
 75 | # charnames must still work
 76 | use charnames ':full';
 77 | print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
 78 | print "ok 12\n";
 79 | 
 80 | # combine
 81 | 
 82 | $c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
 83 | 
 84 | print "not " unless ord($c) == 0x3af;
 85 | print "ok 13\n";
 86 | 
 87 | print "not " unless ord(substr($c, 1, 1)) == 0xdf;
 88 | print "ok 14\n";
 89 | 
 90 | print "not " unless ord(substr($c, 2, 1)) == 0x3af;
 91 | print "ok 15\n";
 92 | 
 93 | # regex literals
 94 | 
 95 | print "not " unless "\xDF"    =~ /\x{3AF}/;
 96 | print "ok 16\n";
 97 | 
 98 | print "not " unless "\x{3AF}" =~ /\xDF/;
 99 | print "ok 17\n";
100 | 
101 | print "not " unless "\xDF"    =~ /\xDF/;
102 | print "ok 18\n";
103 | 
104 | print "not " unless "\x{3AF}" =~ /\x{3AF}/;
105 | print "ok 19\n";
106 | 
107 | # eq, cmp
108 | 
109 | my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 
110 |     pack("C*", 0xDF ),       # byte
111 |     pack("C*", 0xDF, 0x20),  # ($bytes2 cmp $U) > 0
112 |     pack("U*", 0x3AF),       # $U eq $byte
113 |     pack("U*", 0xDF ),       # $Ub would eq $bytev w/o use encoding
114 |     pack("U*", 0x3B1),       # ($g1 cmp $byte) > 0; === chr(0xe1)
115 |     pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
116 |     pack("U*", 0x3AB),       # ($l  cmp $byte) < 0; === chr(0xdb)
117 | );
118 | 
119 | # all the tests in this section that compare a byte encoded string 
120 | # ato UTF-8 encoded are run in all possible vairants 
121 | # all of the eq, ne, cmp operations tested,
122 | # $v z $u tested as well as $u z $v
123 | 
124 | sub alleq($$){
125 |     my ($a,$b)    =    (shift, shift);
126 |      $a  eq  $b        &&     $b  eq  $a         && 
127 |   !( $a  ne  $b )      &&  !( $b  ne  $a )       &&
128 |    ( $a  cmp $b ) == 0 &&   ( $b  cmp $a ) == 0;
129 | }
130 |    
131 | sub anyeq($$){
132 |     my ($a,$b)    =    (shift, shift);
133 |      $a  eq  $b        ||     $b  eq  $a         ||
134 |   !( $a  ne  $b )      ||  !( $b  ne  $a )       ||
135 |    ( $a  cmp $b ) == 0 ||   ( $b  cmp $a ) == 0;
136 | }
137 | 
138 | sub allgt($$){
139 |     my ($a,$b)    =    (shift, shift);
140 |     ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
141 | }
142 | #match the correct UTF-8 string
143 | print "not " unless  alleq($byte, $U);
144 | print "ok 20\n";
145 | 
146 | #do not match a wrong UTF-8 string
147 | print "not " if anyeq($byte, $Ub);
148 | print "ok 21\n";
149 | 
150 | #string ordering
151 | print "not " unless allgt ( $g1,    $byte  )  &&
152 |                     allgt ( $g2,    $byte  )  &&
153 |                     allgt ( $byte,  $l     )  &&
154 |                     allgt ( $bytes, $U     );
155 | print "ok 22\n";
156 | 
157 | # upgrade, downgrade
158 | 
159 | my ($u,$v,$v2);
160 | $u = $v = $v2 = pack("C*", 0xDF);
161 | utf8::upgrade($v);                   #explicit upgrade
162 | $v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
163 | 
164 | # implicit upgrade === explicit upgrade
165 | print "not "  if do{{use bytes; $v ne $v2}} || $v ne $v2;
166 | print "ok 23\n";
167 | 
168 | # utf8::upgrade is transparent and does not break equality
169 | print "not " unless alleq( $u, $v );
170 | print "ok 24\n";
171 | 
172 | $u = $v = pack("C*", 0xDF);
173 | utf8::upgrade($v);
174 | #test for a roundtrip, we should get back from where we left
175 | eval {utf8::downgrade( $v )};
176 | print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
177 | print "ok 25\n";
178 | 
179 | # some more eq, cmp
180 | 
181 | $byte=pack("C*", 0xDF);
182 | 
183 | print "not " unless pack("U*", 0x3AF) eq $byte;
184 | print "ok 26\n";
185 | 
186 | print "not " if chr(0xDF) cmp $byte;
187 | print "ok 27\n";
188 | 
189 | print "not " unless ((pack("U*", 0x3B0)       cmp $byte) ==  1) &&
190 |                     ((pack("U*", 0x3AE)       cmp $byte) == -1) &&
191 |                     ((pack("U*", 0x3AF, 0x20) cmp $byte) ==  1) &&
192 |                 ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
193 | print "ok 28\n";
194 | 
195 | 
196 | {
197 |     # Used to core dump in 5.7.3
198 |     no warnings; # so test goes noiselessly
199 |     print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n";
200 | }
201 | 
202 | {
203 |     my %h1;
204 |     my %h2;
205 |     $h1{"\xdf"}    = 41;
206 |     $h2{"\x{3af}"} = 42;
207 |     print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
208 |     print $h2{"\xdf"}    == 42 ? "ok 31\n" : "not ok 31\n";
209 | }
210 | 
211 | # Order of finding the above-Latin1 code point should not matter: both should
212 | # assume Latin1/Unicode encoding
213 | {
214 |     use bytes;
215 |     print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/;
216 |     print "ok 32\n";
217 |     print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/;
218 |     print "ok 33\n";
219 | }
220 | 


--------------------------------------------------------------------------------
/t/from_to.t:
--------------------------------------------------------------------------------
 1 | # $Id: from_to.t,v 1.1 2006/01/15 15:06:36 dankogai Exp $
 2 | use strict;
 3 | use Test::More tests => 3;
 4 | use Encode qw(encode from_to);
 5 | 
 6 | my $foo = encode("utf-8", "\x{5abe}");
 7 | from_to($foo, "utf-8" => "latin1", Encode::FB_HTMLCREF);
 8 | ok !Encode::is_utf8($foo);
 9 | is $foo, '&#23230;';
10 | 
11 | my $bar = encode("latin-1", "\x{5abe}", Encode::FB_HTMLCREF);
12 | is $bar, '&#23230;';
13 | 


--------------------------------------------------------------------------------
/t/gb2312.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/gb2312.enc


--------------------------------------------------------------------------------
/t/grow.t:
--------------------------------------------------------------------------------
 1 | #!../perl
 2 | our $POWER;
 3 | BEGIN {
 4 |      if ($ENV{'PERL_CORE'}){
 5 |          chdir 't';
 6 |          unshift @INC, '../lib';
 7 |      }
 8 |      require Config; Config->import();
 9 |      if ($Config{'extensions'} !~ /\bEncode\b/) {
10 |          print "1..0 # Skip: Encode was not built\n";
11 |              exit 0;
12 |      }
13 |      $POWER = 12; # up to 1 MB.  You may adjust the figure here
14 | }
15 | 
16 | use strict;
17 | use Encode;
18 | 
19 | my $seed = "";
20 | for my $i (0x00..0xff){
21 |      my $c = chr($i);
22 |      $seed .= ($c =~ /^\p{IsPrint}/o) ? $c : " ";
23 | }
24 | 
25 | use Test::More tests => $POWER*2;
26 | my $octs = $seed;
27 | use bytes ();
28 | for my $i (1..$POWER){
29 |      $octs .= $octs;
30 |      my $len = bytes::length($octs);
31 |      my $utf8 = Encode::decode('latin1', $octs);
32 |      ok(1, "decode $len bytes");
33 |      is($octs,
34 |         Encode::encode('latin1', $utf8),
35 |         "encode $len bytes");
36 | }
37 | __END__
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/t/gsm0338.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ($ENV{'PERL_CORE'}){
 3 |         chdir 't';
 4 |         unshift @INC, '../lib';
 5 |     }
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     $| = 1;
12 | }
13 | 
14 | use strict;
15 | use utf8;
16 | use Test::More tests => 777;
17 | use Encode;
18 | use Encode::GSM0338;
19 | use PerlIO::encoding;
20 | 
21 | # perl < 5.8.8 didn't enable STOP_AT_PARTIAL by default
22 | $PerlIO::encoding::fallback |= Encode::STOP_AT_PARTIAL;
23 | 
24 | my $chk = Encode::LEAVE_SRC();
25 | 
26 | # escapes
27 | # see https://www.3gpp.org/dynareport/23038.htm
28 | # see https://www.etsi.org/deliver/etsi_ts/123000_123099/123038/15.00.00_60/ts_123038v150000p.pdf (page 22)
29 | my %esc_seq = (
30 | 	       "\x{20ac}" => "\x1b\x65",
31 | 	       "\x0c"     => "\x1b\x0A",
32 | 	       "["        => "\x1b\x3C",
33 | 	       "\\"       => "\x1b\x2F",
34 | 	       "]"        => "\x1b\x3E",
35 | 	       "^"        => "\x1b\x14",
36 | 	       "{"        => "\x1b\x28",
37 | 	       "|"        => "\x1b\x40",
38 | 	       "}"        => "\x1b\x29",
39 | 	       "~"        => "\x1b\x3D",
40 | );
41 | 
42 | my %unesc_seq = reverse %esc_seq;
43 | 
44 | 
45 | sub eu{
46 |     $_[0] =~ /[\x00-\x1f]/ ? 
47 | 	sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]);
48 |  
49 | }
50 | 
51 | for my $c ( map { chr } 0 .. 127 ) {
52 |     next if $c eq "\x1B"; # escape character, start of multibyte sequence
53 |     my $u = $Encode::GSM0338::GSM2UNI{$c};
54 | 
55 |     # default character set
56 |     is decode( "gsm0338", $c, $chk ), $u,
57 |       sprintf( "decode \\x%02X", ord($c) );
58 |     eval { decode( "gsm0338", $c . "\xff", $chk | Encode::FB_CROAK ) };
59 |     ok( $@, $@ );
60 |     is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) );
61 |     eval { encode( "gsm0338", $u . "\x{3000}", $chk | Encode::FB_CROAK ) };
62 |     ok( $@, $@ );
63 | 
64 |         is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ),
65 |           sprintf( '@: decode \x00+\x%02X', ord($c) );
66 | 
67 |     # escape seq.
68 |     my $ecs = "\x1b" . $c;
69 |     if ( $unesc_seq{$ecs} ) {
70 |         is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs},
71 |           sprintf( "ESC: decode ESC+\\x%02X", ord($c) );
72 |         is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs,
73 |           sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) );
74 |     }
75 |     else {
76 |         is decode( "gsm0338", $ecs, $chk ),
77 |           "\x{FFFD}",
78 |           sprintf( "decode ESC+\\x%02X", ord($c) );
79 |     }
80 | }
81 | 
82 | # https://rt.cpan.org/Ticket/Display.html?id=75670
83 | is decode("gsm0338", "\x09") => chr(0xC7), 'RT75670: decode';
84 | is encode("gsm0338", chr(0xC7)) => "\x09", 'RT75670: encode';
85 | 
86 | # https://rt.cpan.org/Public/Bug/Display.html?id=124571
87 | is decode("gsm0338", encode('gsm0338', '..@@..')), '..@@..';
88 | is decode("gsm0338", encode('gsm0338', '..@€..')), '..@€..';
89 | 
90 | # special GSM sequence, € is at 1024 byte buffer boundary
91 | my $gsm = "\x41" . "\x1B\x65" x 1024;
92 | open my $fh, '<:encoding(gsm0338)', \$gsm or die;
93 | my $uni = <$fh>;
94 | close $fh;
95 | is $uni, "A" . "€" x 1024, 'PerlIO encoding(gsm0338) read works';
96 | 


--------------------------------------------------------------------------------
/t/guess.t:
--------------------------------------------------------------------------------
  1 | BEGIN {
  2 |     require Config; Config->import();
  3 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
  4 |       print "1..0 # Skip: Encode was not built\n";
  5 |       exit 0;
  6 |     }
  7 |     if (ord("A") == 193) {
  8 |     print "1..0 # Skip: EBCDIC\n";
  9 |     exit 0;
 10 |     }
 11 |     $| = 1;
 12 | }
 13 | 
 14 | use strict;
 15 | use File::Basename;
 16 | use File::Spec;
 17 | use Encode qw(decode encode find_encoding _utf8_off);
 18 | 
 19 | #use Test::More qw(no_plan);
 20 | use Test::More tests => 32;
 21 | BEGIN { use_ok("Encode::Guess") }
 22 | 
 23 | my $ascii  = join('' => map {chr($_)}(0x21..0x7e));
 24 | my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
 25 | my $utf8on  = join('' => map {chr($_)}(0x3000..0x30fe));
 26 | my $utf8off = $utf8on; _utf8_off($utf8off);
 27 | my $utf16 = encode('UTF-16', $utf8on);
 28 | my $utf32 = encode('UTF-32', $utf8on);
 29 | 
 30 | like(guess_encoding(''), qr/empty string/io, 'empty string');
 31 | is(guess_encoding($ascii)->name, 'ascii', 'ascii');
 32 | like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
 33 | is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
 34 | is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag');
 35 | is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag');
 36 | is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16');
 37 | is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32');
 38 | 
 39 | my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
 40 | my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
 41 | my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');
 42 | 
 43 | open my $fh, $jisx0208 or die "$jisx0208: $!";
 44 | binmode($fh);
 45 | $utf8off = join('' => <$fh>);
 46 | close $fh;
 47 | $utf8on = decode('utf8', $utf8off);
 48 | 
 49 | my @jp = qw(7bit-jis shiftjis euc-jp);
 50 | 
 51 | Encode::Guess->set_suspects(@jp);
 52 | 
 53 | for my $jp (@jp){
 54 |     my $test = encode($jp, $utf8on);
 55 |     is(guess_encoding($test)->name, $jp, "JP:$jp");
 56 | }
 57 | 
 58 | is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
 59 | eval{ encode('Guess', $utf8on) };
 60 | like($@, qr/not defined/io, "no encode()");
 61 | 
 62 | {
 63 |     my $warning;
 64 |     local $SIG{__WARN__} = sub { $warning = shift };
 65 |     my $euc_jp = my $euc_jp_clone = encode('euc-jp', $utf8on);
 66 |     Encode::from_to($euc_jp, 'Guess', 'euc-jp');
 67 |     is $euc_jp_clone, $euc_jp, "from_to(..., 'Guess')";
 68 |     ok !$warning, "no warning";
 69 |     diag $warning if $warning;
 70 | }
 71 | 
 72 | my %CJKT = 
 73 |     (
 74 |      'euc-cn'    => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
 75 |      'euc-jp'    => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'),
 76 |      'euc-kr'    => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'),
 77 |      'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'),
 78 | );
 79 | 
 80 | Encode::Guess->set_suspects(keys %CJKT);
 81 | 
 82 | for my $name (keys %CJKT){
 83 |     open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!";
 84 |     binmode($fh);
 85 |     $utf8off = join('' => <$fh>);
 86 |     close $fh;
 87 | 
 88 |     my $test = encode($name, decode('utf8', $utf8off));
 89 |     is(guess_encoding($test)->name, $name, "CJKT:$name");
 90 | }
 91 | 
 92 | my $ambiguous =  "\x{5c0f}\x{98fc}\x{5f3e}";
 93 | my $english   = "The quick brown fox jumps over the black lazy dog.";
 94 | for my $utf (qw/UTF-16 UTF-32/){
 95 |     for my $bl (qw/BE LE/){
 96 |     my $test = encode("$utf$bl" => $english);
 97 |     is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
 98 |     }
 99 | }
100 | for my $bl (qw/BE LE/){
101 |     my $test = encode("UTF-16$bl" => $ambiguous);
102 |     my $result = guess_encoding($test);
103 |     ok(! ref($result), "UTF-16$bl:$result");
104 | }
105 | 
106 | 
107 | 
108 | Encode::Guess->set_suspects();
109 | for my $jp (@jp){
110 |     # intentionally set $1 a priori -- see Changes
111 |     my $test = "English";
112 |     '$1' =~ m/^(.*)/o;
113 |     is(guess_encoding($test, ($jp))->name, 'ascii', 
114 |        "ascii vs $jp (\$1 messed)");
115 |     $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}");
116 |     is(guess_encoding($test, ($jp))->name, 
117 |        $jp, "$jp vs ascii (\$1 messed)");
118 | }
119 | 
120 | __END__;
121 | 


--------------------------------------------------------------------------------
/t/isa.t:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: isa.t,v 1.1 2015/04/02 12:08:24 dankogai Exp $
 3 | #
 4 | use strict;
 5 | use Encode qw/find_encoding/;
 6 | use Test::More;
 7 | my @enc = Encode->encodings(":all");
 8 | plan tests => 0+@enc;
 9 | isa_ok find_encoding($_), "Encode::Encoding" for @enc;
10 | 
11 | 


--------------------------------------------------------------------------------
/t/jis7-fallback.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use Test::More 'no_plan';
 3 | use Encode ':fallbacks';
 4 | 
 5 | my $str = "\x{0647}";
 6 | my @data = grep length, map { chomp; $_ } <DATA>;
 7 | 
 8 | while (my($in, $out) = splice(@data, 0, 2)) {
 9 |     my $copy = $str;
10 |     is Encode::encode("iso-2022-jp", $copy, eval $in), $out;
11 | }
12 | 
13 | __DATA__
14 | FB_PERLQQ
15 | \x{0647}
16 | 
17 | FB_HTMLCREF
18 | &#1607;
19 | 
20 | FB_XMLCREF
21 | &#x647;
22 | 


--------------------------------------------------------------------------------
/t/jisx0201.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/jisx0201.enc


--------------------------------------------------------------------------------
/t/jisx0201.utf:
--------------------------------------------------------------------------------
1 | 0x00a0:  ｡｢｣､･ｦｧｨｩｪｫｬｭｮｯｰｱｲｳｴｵｶｷｸｹｺｻｼｽｾｿ
2 | 0x00c0: ﾀﾁﾂﾃﾄﾅﾆﾇﾈﾉﾊﾋﾌﾍﾎﾏﾐﾑﾒﾓﾔﾕﾖﾗﾘﾙﾚﾛﾜﾝﾞﾟ
3 | 


--------------------------------------------------------------------------------
/t/jisx0208.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/jisx0208.enc


--------------------------------------------------------------------------------
/t/jisx0212.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/jisx0212.enc


--------------------------------------------------------------------------------
/t/jperl.t:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/jperl.t


--------------------------------------------------------------------------------
/t/ksc5601.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/ksc5601.enc


--------------------------------------------------------------------------------
/t/mime-name.t:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: mime-name.t,v 1.4 2023/11/10 01:10:50 dankogai Exp $
 3 | # This script is written in utf8
 4 | #
 5 | BEGIN {
 6 |     if ($ENV{'PERL_CORE'}){
 7 |         chdir 't';
 8 |         unshift @INC, '../lib';
 9 |     }
10 |     require Config; Config->import();
11 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
12 |       print "1..0 # Skip: Encode was not built\n";
13 |       exit 0;
14 |     }
15 |     if (ord("A") == 193) {
16 |     print "1..0 # Skip: EBCDIC\n";
17 |     exit 0;
18 |     }
19 |     $| = 1;
20 | }
21 | 
22 | use strict;
23 | use warnings;
24 | use Encode;
25 | #use Test::More qw(no_plan);
26 | use Test::More tests => 281;
27 | 
28 | BEGIN {
29 |     use_ok("Encode::MIME::Name");
30 | }
31 | 
32 | for my $canon ( sort keys %Encode::MIME::Name::MIME_NAME_OF ) {
33 |     my $enc       = find_encoding($canon);
34 |     my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon};
35 |     is $enc->mime_name, $mime_name,
36 |       qq(find_encoding($canon)->mime_name eq $mime_name);
37 |     is $enc->name, $canon,
38 |       qq(find_encoding($canon)->name eq $canon);
39 | }
40 | for my $mime_name ( sort keys %Encode::MIME::Name::ENCODE_NAME_OF ) {
41 |     my $enc       = find_mime_encoding($mime_name);
42 |     my $canon     = $Encode::MIME::Name::ENCODE_NAME_OF{$mime_name};
43 |     my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon};
44 |     is $enc->mime_name, $mime_name,
45 |       qq(find_mime_encoding($mime_name)->mime_name eq $mime_name);
46 |     is $enc->name, $canon,
47 |       qq(find_mime_encoding($mime_name)->name eq $canon);
48 | }
49 | 
50 | ok find_encoding("utf8");
51 | ok find_encoding("UTF8");
52 | ok find_encoding("utf-8-strict");
53 | ok find_encoding("utf-8");
54 | ok find_encoding("UTF-8");
55 | 
56 | ok not find_mime_encoding("utf8");
57 | ok not find_mime_encoding("UTF8");
58 | ok not find_mime_encoding("utf-8-strict");
59 | ok find_mime_encoding("utf-8");
60 | ok find_mime_encoding("UTF-8");
61 | 
62 | __END__;
63 | 


--------------------------------------------------------------------------------
/t/mime_header_iso2022jp.t:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dankogai/p5-encode/51e8cc56415253dfe27d69204b925b4df74b8a59/t/mime_header_iso2022jp.t


--------------------------------------------------------------------------------
/t/perlio.t:
--------------------------------------------------------------------------------
  1 | BEGIN {
  2 |     require Config; Config->import();
  3 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
  4 |       print "1..0 # Skip: Encode was not built\n";
  5 |       exit 0;
  6 |     }
  7 |     if (ord("A") == 193) {
  8 |     print "1..0 # Skip: EBCDIC\n";
  9 |     exit 0;
 10 |     }
 11 |     unless (PerlIO::Layer->find('perlio')){
 12 |         print "1..0 # Skip: PerlIO required\n";
 13 |         exit 0;
 14 |     }
 15 |     $| = 1;
 16 | }
 17 | 
 18 | use strict;
 19 | use File::Basename;
 20 | use File::Spec;
 21 | use File::Compare qw(compare_text);
 22 | use File::Copy;
 23 | use FileHandle;
 24 | 
 25 | #use Test::More qw(no_plan);
 26 | use Test::More tests => 38;
 27 | 
 28 | our $DEBUG = 0;
 29 | 
 30 | use Encode (":all");
 31 | {
 32 |     no warnings;
 33 |     @ARGV and $DEBUG = shift;
 34 |     #require Encode::JP::JIS7;
 35 |     #require Encode::KR::2022_KR;
 36 |     #$Encode::JP::JIS7::DEBUG = $DEBUG;
 37 | }
 38 | 
 39 | my $seq = 0;
 40 | my $dir = dirname(__FILE__);
 41 | 
 42 | my %e = 
 43 |     (
 44 |      jisx0208 => [ qw/euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1/],
 45 |      ksc5601  => [ qw/euc-kr/],
 46 |      gb2312   => [ qw/euc-cn hz/],
 47 |     );
 48 | 
 49 | $/ = "\x0a"; # may fix VMS problem for test #28 and #29
 50 | 
 51 | for my $src (sort keys %e) {
 52 |     my $ufile = File::Spec->catfile($dir,"$src.utf");
 53 |     open my $fh, "<:utf8", $ufile or die "$ufile : $!";
 54 |     my @uline = <$fh>;
 55 |     my $utext = join('' => @uline);
 56 |     close $fh;
 57 | 
 58 |     for my $e (@{$e{$src}}){
 59 |     my $sfile = File::Spec->catfile($dir,"$$.sio");
 60 |     my $pfile = File::Spec->catfile($dir,"$$.pio");
 61 |     
 62 |     # first create a file without perlio
 63 |     dump2file($sfile, &encode($e, $utext, 0));
 64 |     
 65 |     # then create a file via perlio without autoflush
 66 | 
 67 |     SKIP:{
 68 |         skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG);
 69 |         no warnings 'uninitialized';
 70 |         open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
 71 |         $fh->autoflush(0);
 72 |         print $fh $utext;
 73 |         close $fh;
 74 |         $seq++;
 75 |         is(compare_text($sfile, $pfile), 0 => ">:encoding($e)");
 76 |         if ($DEBUG){
 77 |         copy $sfile, "$sfile.$seq";
 78 |         copy $pfile, "$pfile.$seq";
 79 |         }
 80 |         
 81 |         # this time print line by line.
 82 |         # works even for ISO-2022 but not ISO-2022-KR
 83 |         open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
 84 |         $fh->autoflush(1);
 85 |         for my $l (@uline) {
 86 |         print $fh $l;
 87 |         }
 88 |         close $fh;
 89 |         $seq++;
 90 |         is(compare_text($sfile, $pfile), 0 => ">:encoding($e) by lines");
 91 |         if ($DEBUG){
 92 |         copy $sfile, "$sfile.$seq";
 93 |         copy $pfile, "$pfile.$seq";
 94 |         }
 95 |         my $dtext;
 96 |         open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
 97 |         $fh->autoflush(0);
 98 |         $dtext = join('' => <$fh>);
 99 |         close $fh;
100 |         $seq++;
101 |         ok($utext eq $dtext, "<:encoding($e)");
102 |         if ($DEBUG){
103 |         dump2file("$sfile.$seq", $utext);
104 |         dump2file("$pfile.$seq", $dtext);
105 |         }
106 |         if (perlio_ok($e) or $DEBUG){
107 |         $dtext = '';
108 |         open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
109 |         while(defined(my $l = <$fh>)) {
110 |             $dtext .= $l;
111 |         }
112 |         close $fh;
113 |         }
114 |         $seq++;
115 |         ok($utext eq $dtext,  "<:encoding($e) by lines");
116 |         if ($DEBUG){
117 |         dump2file("$sfile.$seq", $utext);
118 |         dump2file("$pfile.$seq", $dtext);
119 |         }
120 |     }
121 |      if ( ! $DEBUG ) {
122 |             1 while unlink ($sfile);
123 |             1 while unlink ($pfile);
124 |         }
125 |     }
126 | }
127 | 
128 | # BOM Test
129 | 
130 | SKIP:{
131 |     my $pev = PerlIO::encoding->VERSION;
132 |     skip "PerlIO::encoding->VERSION = $pev <= 0.07 ", 6
133 |     unless ($pev >= 0.07 or $DEBUG);
134 | 
135 |     my $file = File::Spec->catfile($dir,"jisx0208.utf");
136 |     open my $fh, "<:utf8", $file or die "$file : $!";
137 |     my $str = join('' => <$fh>);
138 |     close $fh;
139 |     my %bom = (
140 |            'UTF-16BE' => pack('n', 0xFeFF),
141 |            'UTF-16LE' => pack('v', 0xFeFF),
142 |            'UTF-32BE' => pack('N', 0xFeFF),
143 |            'UTF-32LE' => pack('V', 0xFeFF),
144 |           );
145 |     # reading
146 |     for my $utf (sort keys %bom){
147 |     my $bomed = $bom{$utf} . encode($utf, $str);
148 |     my $sfile = File::Spec->catfile($dir,".${utf}_${seq}_$$");
149 |     dump2file($sfile, $bomed);
150 |     my $utf_nobom = $utf; $utf_nobom =~ s/(LE|BE)$//o;
151 |     # reading
152 |     open $fh, "<:encoding($utf_nobom)", $sfile or die "$sfile : $!";
153 |     my $cmp = join '' => <$fh>;
154 |     close $fh;
155 |     is($str, $cmp, "<:encoding($utf_nobom) eq $utf");
156 |     unlink $sfile;  $seq++;
157 |     }
158 |     # writing
159 |     for my $utf_nobom (qw/UTF-16 UTF-32/){
160 |     my $utf = $utf_nobom . 'BE';
161 |     my $sfile = File::Spec->catfile($dir,".${utf_nobom}_${seq}_$$");
162 |     my $bomed = $bom{$utf} . encode($utf, $str);
163 |     open  $fh, ">:encoding($utf_nobom)", $sfile or die "$sfile : $!";
164 |     print $fh $str;
165 |     close $fh;
166 |     open my $fh, "<:bytes", $sfile or die "$sfile : $!";
167 |     read $fh, my $cmp, -s $sfile;
168 |     close $fh;
169 |     use bytes ();
170 |     ok($bomed eq $cmp, ">:encoding($utf_nobom) eq $utf");
171 |     unlink $sfile; $seq++;
172 |     }
173 | }
174 | sub dump2file{
175 |     no warnings;
176 |     open my $fh, ">", $_[0] or die "$_[0]: $!";
177 |     binmode $fh;
178 |     print $fh $_[1];
179 |     close $fh;
180 | }
181 | 


--------------------------------------------------------------------------------
/t/piconv.t:
--------------------------------------------------------------------------------
  1 | #
  2 | # $Id: piconv.t,v 0.4 2013/02/18 02:23:56 dankogai Exp $
  3 | #
  4 | 
  5 | BEGIN {
  6 |     if ( $ENV{'PERL_CORE'} && $] >= 5.011) {
  7 |         print "1..0 # Skip: Don't know how to test this within perl's core\n";
  8 |         exit 0;
  9 |     }
 10 | }
 11 | 
 12 | use strict;
 13 | use FindBin;
 14 | use File::Spec;
 15 | use IPC::Open3 qw(open3);
 16 | use IO::Select;
 17 | use Test::More;
 18 | 
 19 | my $WIN = $^O eq 'MSWin32';
 20 | 
 21 | if ($WIN) {
 22 |     eval { require IPC::Run; IPC::Run->VERSION(0.83); 1; } or 
 23 |         plan skip_all => 'Win32 environments require IPC::Run 0.83 to complete this test';
 24 | }
 25 | 
 26 | sub run_cmd (;$$);
 27 | 
 28 | my $blib =
 29 |   File::Spec->rel2abs(
 30 |     File::Spec->catdir( $FindBin::RealBin, File::Spec->updir ) );
 31 | my $script = File::Spec->catdir($blib, 'bin', 'piconv');
 32 | my @base_cmd = ( $^X, "-Mblib=$blib", $script );
 33 | 
 34 | plan tests => 5;
 35 | 
 36 | {
 37 |     my ( $st, $out, $err ) = run_cmd;
 38 |     is( $st, 0, 'status for usage call' );
 39 |     is( $out, $WIN ? undef : '' );
 40 |     like( $err, qr{^piconv}, 'usage' );
 41 | }
 42 | 
 43 | {
 44 |     my($st, $out, $err) = run_cmd [qw(-S foobar -f utf-8 -t ascii), $script];
 45 |     like($err, qr{unknown scheme.*fallback}i, 'warning for unknown scheme');
 46 | }
 47 | 
 48 | {
 49 |     my ( $st, $out, $err ) = run_cmd [qw(-f utf-8 -t ascii ./non-existing/file)];
 50 |     like( $err, qr{can't open}i );
 51 | }
 52 | 
 53 | sub run_cmd (;$$) {
 54 |     my ( $args, $in ) = @_;
 55 |     
 56 |     my $out = "x" x 10_000;
 57 |     $out = "";
 58 |     my $err = "x" x 10_000;
 59 |     $err = "";
 60 |         
 61 |     if ($WIN) {
 62 | 		IPC::Run->import(qw(run timeout));
 63 | 		my @cmd;
 64 | 		if (defined $args) {
 65 | 			@cmd = (@base_cmd, @$args);
 66 | 		} else {
 67 | 			@cmd = @base_cmd;
 68 | 		}
 69 |         run(\@cmd, \$in, \$out, \$err, timeout(10));
 70 |         my $st = $?;
 71 | 		$out = undef if ($out eq '');
 72 |         ( $st, $out, $err );
 73 |     } else {
 74 | 		$in ||= '';
 75 |         my ( $in_fh, $out_fh, $err_fh );
 76 |         use Symbol 'gensym';
 77 |         $err_fh =
 78 |           gensym;    # sigh... otherwise stderr gets just to $out_fh, not to $err_fh
 79 |         my $pid = open3( $in_fh, $out_fh, $err_fh, @base_cmd, @$args )
 80 |           or die "Can't run @base_cmd @$args: $!";
 81 |         print $in_fh $in;
 82 |         my $sel = IO::Select->new( $out_fh, $err_fh );
 83 | 
 84 |         while ( my @ready = $sel->can_read ) {
 85 |             for my $fh (@ready) {
 86 |                 if ( eof($fh) ) {
 87 |                     $sel->remove($fh);
 88 |                     last if !$sel->handles;
 89 |                 }
 90 |                 elsif ( $out_fh == $fh ) {
 91 |                     my $line = <$fh>;
 92 |                     $out .= $line;
 93 |                 }
 94 |                 elsif ( $err_fh == $fh ) {
 95 |                     my $line = <$fh>;
 96 |                     $err .= $line;
 97 |                 }
 98 |             }
 99 |         }
100 |         my $st = $?;
101 |         ( $st, $out, $err );
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/t/rt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/perl
 2 | #
 3 | # $Id: rt.pl,v 2.2 2023/11/10 01:10:50 dankogai Exp $
 4 | #
 5 | 
 6 | BEGIN {
 7 |     my $ucmdir  = "ucm";
 8 |     if ($ENV{'PERL_CORE'}){
 9 |         chdir 't';
10 |         unshift @INC, '../lib';
11 |         $ucmdir = "../ext/Encode/ucm";
12 |     }
13 |     require Config; Config->import();
14 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
15 |       print "1..0 # Skip: Encode was not built\n";
16 |       exit 0;
17 |     }
18 |     if (ord("A") == 193) {
19 |     print "1..0 # Skip: EBCDIC\n";
20 |     exit 0;
21 |     }
22 |     use strict;
23 |     require Test::More;
24 |     our $DEBUG;
25 |     our @ucm;
26 |     unless(@ARGV){
27 |     use File::Spec;
28 |     Test::More->import(tests => 103);
29 |     opendir my $dh, $ucmdir or die "$ucmdir:$!";
30 |     @ucm = 
31 |         map {File::Spec->catfile($ucmdir, $_) } 
32 |         sort grep {/\.ucm$/o} readdir($dh);
33 |     closedir $dh;
34 |     }else{
35 |     Test::More->import("no_plan");
36 |     $DEBUG = 1;
37 |     @ucm = @ARGV;
38 |     }
39 | }
40 | 
41 | use strict;
42 | use Encode qw/encode decode/;
43 | our $DEBUG;
44 | our @ucm;
45 | 
46 | for my $ucm (@ucm){
47 |     my ($name, $nchar, $nrt, $nok) = rttest($ucm);
48 |     $nok += 0;
49 |     ok($nok == 0, "$ucm => $name ($nchar, $nrt, $nok)");
50 | }
51 | 
52 | sub rttest{
53 |     my $ucm = shift;
54 |     my ($name, $nchar, $nrt, $nok);
55 |     open my $rfh, "<$ucm" or die "$ucm:$!";
56 |     # <U0000> \x00 |0 # <control>
57 |     while(<$rfh>){
58 |     s/#.*//o; /^$/ and next;
59 |     unless ($name){
60 |         /^<code_set_name>\s+"([^\"]+)"/io or next;
61 |         $name = $1 and next;
62 |     }else{
63 |         /^<U([0-9a-f]+)>\s+(\S+)\s+\|(\d)/io or next;
64 |         $nchar++;
65 |         $3 == 0 or next;
66 |         $nrt++;
67 |         my $uni = chr(hex($1));
68 |         my $enc = eval qq{ "$2" };
69 |         decode($name, $enc) eq $uni or $nok++;
70 |         encode($name, $uni) eq $enc or $nok++;
71 |     }
72 |     }
73 |     return($name, $nchar, $nrt, $nok);
74 | }
75 | __END__
76 | 


--------------------------------------------------------------------------------
/t/rt113164.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ($ENV{'PERL_CORE'}) {
 3 |         chdir 't';
 4 |         unshift @INC, '../lib';
 5 |     }
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     if (ord("A") == 193) {
12 |       print "1..0 # Skip: EBCDIC\n";
13 |       exit 0;
14 |     }
15 |     $| = 1;
16 | }
17 | 
18 | use strict;
19 | use warnings;
20 | 
21 | use Test::More tests => 2;
22 | 
23 | use Encode;
24 | 
25 | my $str = "You" . chr(8217) . "re doomed!";
26 | 
27 | my $data;
28 | 
29 | my $cb = sub {
30 |     $data = [ ('?') x 12_500 ];
31 |     return ";";
32 | };
33 | 
34 | my $octets = encode('iso-8859-1', $str, $cb);
35 | is $octets, "You;re doomed!", "stack was not overwritten";
36 | 
37 | $octets = encode('iso-8859-1', $str, $cb);
38 | is $octets, "You;re doomed!", "stack was not overwritten";
39 | 


--------------------------------------------------------------------------------
/t/rt65541.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ($ENV{'PERL_CORE'}) {
 3 |         chdir 't';
 4 |         unshift @INC, '../lib';
 5 |     }
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     if (ord("A") == 193) {
12 |       print "1..0 # Skip: EBCDIC\n";
13 |       exit 0;
14 |     }
15 |     $| = 1;
16 | }
17 | 
18 | use strict;
19 | use warnings;
20 | 
21 | use Encode;
22 | use PerlIO::encoding;
23 | $PerlIO::encoding::fallback &= ~Encode::WARN_ON_ERR;
24 | 
25 | use Test::More tests => 3;
26 | 
27 | ok open my $fh, ">:encoding(cp1250)", do{\(my $str)};
28 | ok print $fh ("a" x 1023) . "\x{0378}";
29 | ok close $fh;
30 | 


--------------------------------------------------------------------------------
/t/rt76824.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ($ENV{'PERL_CORE'}) {
 3 |         chdir 't';
 4 |         unshift @INC, '../lib';
 5 |     }
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     if (ord("A") == 193) {
12 |       print "1..0 # Skip: EBCDIC\n";
13 |       exit 0;
14 |     }
15 |     $| = 1;
16 | }
17 | 
18 | use strict;
19 | use warnings;
20 | 
21 | use Encode;
22 | use PerlIO::encoding;
23 | $PerlIO::encoding::fallback &= ~Encode::WARN_ON_ERR;
24 | 
25 | use Test::More tests => 2;
26 | 
27 | my $out;
28 | my @arr = (
29 |           "\x{feff}\x{39f}\x{3af} \x{3a3}\x{3c5}\x{3bd}\x{3ad}\x{3bd}\x{3bf}\x{3c7}\x{3bf}\x{3b9}\n",
30 |           "\x{39f}\x{3b9} \x{393}\x{3b5}\x{3bd}\x{3bd}\x{3b1}\x{3af}\x{3bf}\x{3b9} \x{3c4}\x{3b7}\x{3c2} \x{3a3}\x{3b1}\x{3bc}\x{3bf}\x{3b8}\x{3c1}\x{3ac}\x{3ba}\x{3b7}\x{3c2}\n",
31 |           "\x{39f}\x{3b9} \x{393}\x{3b5}\x{3c1}\x{3bc}\x{3b1}\x{3bd}\x{3bf}\x{3af} \x{3be}\x{3b1}\x{3bd}\x{3ac}\x{3c1}\x{3c7}\x{3bf}\x{3bd}\x{3c4}\x{3b1}\x{3b9}...\n",
32 |           "\x{39f}\x{3b9} \x{395}\x{3c1}\x{3b1}\x{3c3}\x{3c4}\x{3ad}\x{3c2} \x{3a4}\x{3bf}\x{3c5} \x{391}\x{3b9}\x{3b3}\x{3b1}\x{3af}\x{3bf}\x{3c5}\n",
33 |           "\x{39f}\x{3b9} \x{39a}\x{3c5}\x{3bd}\x{3b7}\x{3b3}\x{3bf}\x{3af}\n",
34 |           "\x{39f}\x{3b9} \x{3a0}\x{3b1}\x{3bd}\x{3ba}\x{3c2} \x{3a4}\x{3b1} \x{39a}\x{3ac}\x{3bd}\x{3bf}\x{3c5}\x{3bd} \x{38c}\x{3bb}\x{3b1}\n",
35 |           "\x{39f}\x{3b9} \x{3a6}\x{3b1}\x{3bd}\x{3c4}\x{3b1}\x{3c1}\x{3af}\x{3bd}\x{3b5}\x{3c2}\n",
36 |           "\x{39f}\x{3b9}\x{3ba}\x{3bf}\x{3b3}\x{3ad}\x{3bd}\x{3b5}\x{3b9}\x{3b1} \x{3a0}\x{3b1}\x{3bd}\x{3c4}\x{3c1}\x{3b5}\x{3c5}\x{3cc}\x{3bc}\x{3b1}\x{3c3}\x{3c4}\x{3b5}\n",
37 |           "\x{39f}\x{3bb}\x{3b1} \x{3b5}\x{3af}\x{3bd}\x{3b1}\x{3b9} \x{3b4}\x{3c1}\x{3cc}\x{3bc}\x{3bf}\x{3c2}\n",
38 |           "\x{39f}\x{3bc}\x{3b7}\x{3c1}\x{3bf}\x{3c2}\n",
39 |           "\x{39f}\x{3be}\x{3c5}\x{3b3}\x{3cc}\x{3bd}\x{3bf}\n",
40 |           "\x{39f}\x{3c1}\x{3b1}\x{3c4}\x{3cc}\x{3c4}\x{3b7}\x{3c2} \x{3bc}\x{3b7}\x{3b4}\x{3ad}\x{3bd}\n",
41 |           "\x{3c0}\n",
42 |           "\x{3c0}\x{3ac}\x{3bd}\x{3c9}, \x{3ba}\x{3ac}\x{3c4}\x{3c9} \x{3ba}\x{3b1}\x{3b9} \x{3c0}\x{3bb}\x{3b1}\x{3b3}\x{3af}\x{3c9}\x{3c2}\n",
43 |           "\x{3a4}\x{3bf} \x{39a}\x{3b1}\x{3ba}\x{3cc}\n",
44 |           "\x{3a4}\x{3bf} \x{39a}\x{3b1}\x{3ba}\x{3cc} - \x{3a3}\x{3c4}\x{3b7}\x{3bd} \x{395}\x{3c0}\x{3bf}\x{3c7}\x{3ae} \x{3c4}\x{3c9}\x{3bd} \x{397}\x{3c1}\x{3ce}\x{3c9}\x{3bd}\n",
45 |           "\x{3a4}\x{3bf} \x{3ba}\x{3bb}\x{3ac}\x{3bc}\x{3b1} \x{3b2}\x{3b3}\x{3ae}\x{3ba}\x{3b5} \x{3b1}\x{3c0}'\x{3c4}\x{3bf}\x{3bd} \x{3c0}\x{3b1}\x{3c1}\x{3ac}\x{3b4}\x{3b5}\x{3b9}\x{3c3}\x{3bf}\n",
46 |           "\x{3a4}\x{3bf} \x{3ba}\x{3bf}\x{3c1}\x{3af}\x{3c4}\x{3c3}\x{3b9} \x{3bc}\x{3b5} \x{3c4}\x{3b1} \x{3bc}\x{3b1}\x{3cd}\x{3c1}\x{3b1}\n",
47 |           "\x{3a4}\x{3bf} \x{3ba}\x{3bf}\x{3c1}\x{3af}\x{3c4}\x{3c3}\x{3b9} \x{3c4}\x{3bf}\x{3c5} \x{3bb}\x{3bf}\x{3cd}\x{3bd}\x{3b1} \x{3c0}\x{3b1}\x{3c1}\x{3ba}\n",
48 |           "\x{3a4}\x{3bf} \x{39e}\x{3cd}\x{3bb}\x{3bf} \x{3b2}\x{3b3}\x{3ae}\x{3ba}\x{3b5} \x{3b1}\x{3c0}\x{3cc} \x{3c4}\x{3bf}\x{3bd} \x{3c0}\x{3b1}\x{3c1}\x{3ac}\x{3b4}\x{3b5}\x{3b9}\x{3c3}\x{3bf}\n",
49 |           "\x{3a4}\x{3bf} \x{3c0}\x{3b9}\x{3bf} \x{3bb}\x{3b1}\x{3bc}\x{3c0}\x{3c1}\x{3cc} \x{3b1}\x{3c3}\x{3c4}\x{3ad}\x{3c1}\x{3b9}\n",
50 |           "\x{3a4}\x{3bf} \x{3a1}\x{3b5}\x{3bc}\x{3b1}\x{3bb}\x{3b9} \x{3a4}\x{3b7}\x{3c2} \x{391}\x{3b8}\x{3b7}\x{3bd}\x{3b1}\x{3c2}\n",
51 |           "\x{3a4}\x{3bf} \x{3a4}\x{3b1}\x{3bd}\x{3b3}\x{3ba}\x{3cc} \x{3c4}\x{3c9}\x{3bd} \x{3a7}\x{3c1}\x{3b9}\x{3c3}\x{3c4}\x{3bf}\x{3c5}\x{3b3}\x{3ad}\x{3bd}\x{3bd}\x{3c9}\x{3bd}\n",
52 |           "\x{3a4}\x{3bf} \x{3c4}\x{3b5}\x{3bb}\x{3b5}\x{3c5}\x{3c4}\x{3b1}\x{3af}\x{3bf} \x{3c8}\x{3ad}\x{3bc}\x{3bc}\x{3b1}\n",
53 |           "\x{3a4}\x{3bf} \x{3c6}\x{3b9}\x{3bb}\x{3af} \x{3c4}\x{3b7}\x{3c2}... \x{396}\x{3c9}\x{3ae}\x{3c2}\n",
54 |           "\x{3a4}\x{3bf} \x{3c7}\x{3ce}\x{3bc}\x{3b1} \x{3b2}\x{3ac}\x{3c6}\x{3c4}\x{3b7}\x{3ba}\x{3b5} \x{3ba}\x{3cc}\x{3ba}\x{3ba}\x{3b9}\x{3bd}\x{3bf}\n",
55 |           "\x{3a4}\x{3bf}\x{3c0}\x{3af}\x{3bf} \x{3c3}\x{3c4}\x{3b7}\x{3bd} \x{3bf}\x{3bc}\x{3af}\x{3c7}\x{3bb}\x{3b7}\n",
56 |           "\x{3a4}\x{3c1}\x{3b9}\x{3bb}\x{3bf}\x{3b3}\x{3af}\x{3b1} 1: \x{3a4}\x{3bf} \x{39b}\x{3b9}\x{3b2}\x{3ac}\x{3b4}\x{3b9} \x{3c0}\x{3bf}\x{3c5} \x{3b4}\x{3b1}\x{3ba}\x{3c1}\x{3cd}\x{3b6}\x{3b5}\x{3b9}\n"
57 |         );
58 | ok open my $wh, '>:crlf:encoding(ISO-8859-1)', \$out;
59 | print $wh $_ for @arr;
60 | ok close $wh;
61 | 


--------------------------------------------------------------------------------
/t/rt85489.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ($ENV{'PERL_CORE'}) {
 3 |         chdir 't';
 4 |         unshift @INC, '../lib';
 5 |     }
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     if (ord("A") == 193) {
12 |       print "1..0 # Skip: EBCDIC\n";
13 |       exit 0;
14 |     }
15 |     $| = 1;
16 | }
17 | 
18 | use strict;
19 | use warnings;
20 | 
21 | use Test::More tests => 8;
22 | 
23 | use Encode;
24 | 
25 | my $ascii = Encode::find_encoding("ascii");
26 | my $orig = "str";
27 | 
28 | my $str = $orig;
29 | ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before ascii encode";
30 | $ascii->encode($str);
31 | ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after ascii encode";
32 | 
33 | $str = $orig;
34 | ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before Encode::encode ascii";
35 | Encode::encode("ascii", $str);
36 | ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after Encode::encode ascii";
37 | 
38 | $str = $orig;
39 | Encode::_utf8_on($str);
40 | ok Encode::is_utf8($str), "UTF8 flag is set on input string before ascii decode";
41 | $ascii->decode($str);
42 | ok Encode::is_utf8($str), "UTF8 flag is set on input string after ascii decode";
43 | 
44 | $str = $orig;
45 | Encode::_utf8_on($str);
46 | ok Encode::is_utf8($str), "UTF8 flag is set on input string before Encode::decode ascii";
47 | Encode::decode("ascii", $str);
48 | ok Encode::is_utf8($str), "UTF8 flag is set on input string after Encode::decode ascii";
49 | 


--------------------------------------------------------------------------------
/t/rt86327.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ($ENV{'PERL_CORE'}) {
 3 |         chdir 't';
 4 |         unshift @INC, '../lib';
 5 |     }
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     if (ord("A") == 193) {
12 |       print "1..0 # Skip: EBCDIC\n";
13 |       exit 0;
14 |     }
15 |     $| = 1;
16 | }
17 | 
18 | use strict;
19 | use warnings;
20 | 
21 | use Encode;
22 | use PerlIO::encoding;
23 | $PerlIO::encoding::fallback &= ~Encode::WARN_ON_ERR;
24 | 
25 | use Test::More tests => 3;
26 | 
27 | my @t = qw/230 13 90 65 34 239 86 15 8 26 181 25 305 123 22 139 111 6 3
28 | 100 37 1 20 1 166 1 300 19 1 42 153 81 106 114 67 1 32 34/;
29 | my $str;
30 | ok open OUT, '>:encoding(iso-8859-1)', \$str;
31 | my $string = join "\x{fffd}", map { '.'x$_ } @t;
32 | ok print OUT $string;
33 | ok close OUT;
34 | 


--------------------------------------------------------------------------------
/t/taint.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -T
 2 | use strict;
 3 | use Encode qw(encode decode);
 4 | local %Encode::ExtModule = %Encode::Config::ExtModule;
 5 | use Scalar::Util qw(tainted);
 6 | use Test::More;
 7 | use Config;
 8 | my $taint = substr($ENV{PATH},0,0);
 9 | my $str = "dan\x{5f3e}" . $taint;                 # tainted string to encode
10 | my $bin = encode('UTF-8', $str);                  # tainted binary to decode
11 | my $notaint = "";
12 | my $notaint_str = "dan\x{5f3e}" . $notaint;
13 | my $notaint_bin = encode('UTF-8', $notaint_str);
14 | my @names = Encode->encodings(':all');
15 | if (exists($Config{taint_support}) && not $Config{taint_support}) {
16 |     plan skip_all => "your perl was built without taint support";
17 | }
18 | else {
19 |     plan tests => 4 * @names + 2;
20 | }
21 | for my $name (@names) {
22 |     my ($d, $e, $s);
23 |     eval {
24 |         $e = encode($name, $str);
25 |     };
26 |   SKIP: {
27 |       skip $@, 1 if $@;
28 |       ok tainted($e), "encode $name";
29 |     }
30 |     $bin = $e.$taint if $e;
31 |     eval {
32 |         $d = decode($name, $bin);
33 |     };
34 |   SKIP: {
35 |       skip $@, 1 if $@;
36 |       ok tainted($d), "decode $name";
37 |     }
38 | }
39 | for my $name (@names) {
40 |     my ($d, $e, $s);
41 |     eval {
42 |         $e = encode($name, $notaint_str);
43 |     };
44 |   SKIP: {
45 |       skip $@, 1 if $@;
46 |       ok ! tainted($e), "encode $name";
47 |     }
48 |     $notaint_bin = $e.$notaint if $e;
49 |     eval {
50 |         $d = decode($name, $notaint_bin);
51 |     };
52 |   SKIP: {
53 |       skip $@, 1 if $@;
54 |       ok ! tainted($d), "decode $name";
55 |     }
56 | }
57 | Encode::_utf8_on($bin);
58 | ok(!Encode::is_utf8($bin), "Encode::_utf8_on does not work on tainted values");
59 | Encode::_utf8_off($str);
60 | ok(Encode::is_utf8($str), "Encode::_utf8_off does not work on tainted values");
61 | 


--------------------------------------------------------------------------------
/t/truncated_utf8.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ($ENV{'PERL_CORE'}) {
 3 |         chdir 't';
 4 |         unshift @INC, '../lib';
 5 |     }
 6 |     require Config; Config->import();
 7 |     if ($Config{'extensions'} !~ /\bEncode\b/) {
 8 |       print "1..0 # Skip: Encode was not built\n";
 9 |       exit 0;
10 |     }
11 |     if (ord("A") == 193) {
12 |       print "1..0 # Skip: EBCDIC\n";
13 |       exit 0;
14 |     }
15 |     if ( $] < 5.009 ) {
16 |         print "1..0 # Skip: Perl <= 5.9 or later required\n";
17 |         exit 0;
18 |     }
19 |     $| = 1;
20 | }
21 | 
22 | use strict;
23 | use warnings;
24 | 
25 | use Encode;
26 | use PerlIO::encoding;
27 | $PerlIO::encoding::fallback &= ~(Encode::WARN_ON_ERR|Encode::PERLQQ);
28 | 
29 | use Test::More tests => 9;
30 | 
31 | binmode Test::More->builder->failure_output, ":utf8";
32 | binmode Test::More->builder->todo_output, ":utf8";
33 | 
34 | is(decode("UTF-8", "\xfd\xfe"), "\x{fffd}" x 2);
35 | is(decode("UTF-8", "\xfd\xfe\xff"), "\x{fffd}" x 3);
36 | is(decode("UTF-8", "\xfd\xfe\xff\xe0"), "\x{fffd}" x 4);
37 | is(decode("UTF-8", "\xfd\xfe\xff\xe0\xe1"), "\x{fffd}" x 5);
38 | is(decode("UTF-8", "\xc1\x9f"), "\x{fffd}");
39 | is(decode("UTF-8", "\xFF\x80\x90\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"), "\x{fffd}");
40 | is(decode("UTF-8", "\xF0\x80\x80\x80"), "\x{fffd}");
41 | 
42 | SKIP: {
43 |     # infinite loop due to bug: https://rt.perl.org/Public/Bug/Display.html?id=41442
44 |     skip "Perl Version ($]) is older than v5.8.9", 2 if $] < 5.008009;
45 |     my $str = ("x" x 1023) . "\xfd\xfe\xffx";
46 |     open my $fh, '<:encoding(UTF-8)', \$str;
47 |     my $str2 = <$fh>;
48 |     close $fh;
49 |     is($str2, ("x" x 1023) . ("\x{fffd}" x 3) . "x");
50 | 
51 |     TODO: {
52 |         local $TODO = "bug in perlio" if $] < 5.027009;
53 |         my $str = ("x" x 1023) . "\xfd\xfe\xff";
54 |         open my $fh, '<:encoding(UTF-8)', \$str;
55 |         my $str2 = <$fh>;
56 |         close $fh;
57 |         is($str2, ("x" x 1023) . ("\x{fffd}" x 3));
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/t/undef.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings FATAL => 'all';
 3 | 
 4 | use Test::More;
 5 | 
 6 | use Encode qw(encode decode find_encoding);
 7 | use Encode::Encoder qw(encoder);
 8 | 
 9 | local %Encode::ExtModule = %Encode::Config::ExtModule;
10 | 
11 | my @names = Encode->encodings(':all');
12 | 
13 | plan tests => 1 + 4 * @names;
14 | 
15 | my $emptyutf8;
16 | eval { my $c = encoder($emptyutf8)->utf8; };
17 | ok(!$@,"crashed encoding undef variable ($@)");
18 | 
19 | for my $name (@names) {
20 |     my $enc = find_encoding($name);
21 |     is($enc->encode(undef), undef, "find_encoding('$name')->encode(undef) returns undef");
22 |     is($enc->decode(undef), undef, "find_encoding('$name')->decode(undef) returns undef");
23 |     is(encode($name, undef), undef, "encode('$name', undef) returns undef");
24 |     is(decode($name, undef), undef, "decode('$name', undef) returns undef");
25 | }
26 | 


--------------------------------------------------------------------------------
/t/unibench.pl:
--------------------------------------------------------------------------------
 1 | #!./perl
 2 | 
 3 | use strict;
 4 | use Encode;
 5 | use Benchmark qw(:all);
 6 | 
 7 | my $Count = shift @ARGV;
 8 | $Count ||= 16;
 9 | my @sizes = @ARGV || (1, 4, 16);
10 | 
11 | my %utf8_seed;
12 | for my $i (0x00..0xff){
13 |     my $c = chr($i);
14 |     $utf8_seed{BMP} .= ($c =~ /^\p{IsPrint}/o) ? $c : " ";
15 | }
16 | utf8::upgrade($utf8_seed{BMP});
17 | 
18 | for my $i (0x00..0xff){
19 |     my $c = chr(0x10000+$i);
20 |     $utf8_seed{HIGH} .= ($c =~ /^\p{IsPrint}/o) ? $c : " ";
21 | }
22 | utf8::upgrade($utf8_seed{HIGH});
23 | 
24 | my %S;
25 | for my $i (@sizes){
26 |     my $sz = 256 * $i;
27 |     for my $cp (qw(BMP HIGH)){
28 |     $S{utf8}{$sz}{$cp}  = $utf8_seed{$cp} x $i;
29 |     $S{utf16}{$sz}{$cp} = encode('UTF-16BE', $S{utf8}{$sz}{$cp});
30 |     }
31 | }
32 | 
33 | for my $i (@sizes){
34 |     my $sz = $i * 256;
35 |     my $count = $Count * int(256/$i);
36 |     for my $cp (qw(BMP HIGH)){
37 |     for my $op (qw(encode decode)){
38 |         my ($meth, $from, $to) = ($op eq 'encode') ?
39 |         (\&encode, 'utf8', 'utf16') : (\&decode, 'utf16', 'utf8');
40 |         my $XS = sub {
41 |         Encode::Unicode::set_transcoder("xs");  
42 |         $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
43 |              eq $S{$to}{$sz}{$cp} 
44 |              or die "$op,$from,$to,$sz,$cp";
45 |         };
46 |         my $modern = sub {
47 |         Encode::Unicode::set_transcoder("modern");  
48 |         $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
49 |              eq $S{$to}{$sz}{$cp} 
50 |              or die "$op,$from,$to,$sz,$cp";
51 |         };
52 |         my $classic = sub {
53 |         Encode::Unicode::set_transcoder("classic");  
54 |         $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
55 |              eq $S{$to}{$sz}{$cp} or 
56 |              die "$op,$from,$to,$sz,$cp";
57 |         };
58 |         print "---- $op length=$sz/range=$cp ----\n";
59 |         my $r = timethese($count,
60 |              {
61 |               "XS"      => $XS,
62 |               "Modern"  => $modern,
63 |               "Classic" => $classic,
64 |              },
65 |              'none',
66 |             );
67 |         cmpthese($r);
68 |     }
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/t/use-Encode-Alias.t:
--------------------------------------------------------------------------------
1 | use strict;
2 | use warnings;
3 | 
4 | use Encode::Alias;
5 | use open ":std", ":locale";
6 | 
7 | print "1..1\n";
8 | print "ok 1 - use Encode::Alias works\n";
9 | 


--------------------------------------------------------------------------------
/t/utf8ref.t:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: utf8ref.t,v 1.2 2016/10/28 05:03:52 dankogai Exp $
 3 | #
 4 | 
 5 | use strict;
 6 | use warnings;
 7 | use Encode;
 8 | use Test::More;
 9 | plan tests => 12;
10 | #plan 'no_plan';
11 | 
12 | # my $a = find_encoding('ASCII');
13 | my $u = find_encoding('UTF-8');
14 | my $r = [];
15 | no warnings 'uninitialized';
16 | is encode_utf8($r), ''.$r;
17 | is $u->encode($r), ''.$r;
18 | $r = {};
19 | is decode_utf8($r), ''.$r;
20 | is $u->decode($r), ''.$r;
21 | use warnings 'uninitialized';
22 | 
23 | is encode_utf8(undef), undef;
24 | is decode_utf8(undef), undef;
25 | 
26 | is encode_utf8(''), '';
27 | is decode_utf8(''), '';
28 | 
29 | is Encode::encode('utf8', undef), undef;
30 | is Encode::decode('utf8', undef), undef;
31 | 
32 | is Encode::encode('utf8', ''), '';
33 | is Encode::decode('utf8', ''), '';
34 | 


--------------------------------------------------------------------------------
/t/utf8strict.t:
--------------------------------------------------------------------------------
  1 | #!../perl
  2 | our $DEBUG = @ARGV;
  3 | our (%ORD, %SEQ, $NTESTS);
  4 | BEGIN {
  5 |      if ($ENV{'PERL_CORE'}){
  6 |          chdir 't';
  7 |          unshift @INC, '../lib';
  8 |      }
  9 |      require Config; Config->import();
 10 |      if ($Config{'extensions'} !~ /\bEncode\b/) {
 11 |          print "1..0 # Skip: Encode was not built\n";
 12 |      exit 0;
 13 |      }
 14 |      if ($] <= 5.008 and !$Config{perl_patchlevel}){
 15 |      print "1..0 # Skip: Perl 5.8.1 or later required\n";
 16 |      exit 0;
 17 |      }
 18 |      # http://smontagu.damowmow.com/utf8test.html
 19 |      # The numbers below, like 2.1.2 are test numbers on this web page
 20 |      %ORD = (
 21 |          0x00000080 => 0, # 2.1.2
 22 |          0x00000800 => 0, # 2.1.3
 23 |          0x00010000 => 0, # 2.1.4
 24 |          0x00200000 => 1, # 2.1.5
 25 |          0x00400000 => 1, # 2.1.6
 26 |          0x0000007F => 0, # 2.2.1 -- unmapped okay
 27 |          0x000007FF => 0, # 2.2.2
 28 |          0x0000FFFF => 1, # 2.2.3
 29 |          0x001FFFFF => 1, # 2.2.4
 30 |          0x03FFFFFF => 1, # 2.2.5
 31 |          0x7FFFFFFF => 1, # 2.2.6
 32 |          0x0000D800 => 1, # 5.1.1
 33 |          0x0000DB7F => 1, # 5.1.2
 34 |          0x0000D880 => 1, # 5.1.3
 35 |          0x0000DBFF => 1, # 5.1.4
 36 |          0x0000DC00 => 1, # 5.1.5
 37 |          0x0000DF80 => 1, # 5.1.6
 38 |          0x0000DFFF => 1, # 5.1.7
 39 |          # 5.2 "Paird UTF-16 surrogates skipped
 40 |          # because utf-8-strict raises exception at the first one
 41 |          0x0000FFFF => 1, # 5.3.1
 42 |         );
 43 |      $NTESTS +=  scalar keys %ORD;
 44 |      if (ord('A') == 193) {
 45 | 	 %SEQ = (
 46 | 		 qq/dd 64 73 73/    => 0, # 2.3.1
 47 | 		 qq/dd 67 41 41/    => 0, # 2.3.2
 48 | 		 qq/ee 42 73 73 71/ => 0, # 2.3.3
 49 | 		 qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
 50 | 		 # EBCDIC TODO: "3 Malformed sequences"
 51 | 		 # EBCDIC TODO: "4 Overlong sequences"
 52 | 		 );
 53 |      } else {
 54 | 	 %SEQ = (
 55 | 		 qq/ed 9f bf/    => 0, # 2.3.1
 56 | 		 qq/ee 80 80/    => 0, # 2.3.2
 57 | 		 qq/f4 8f bf bd/ => 0, # 2.3.3
 58 | 		 qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
 59 | 		 qq/80/          => 1,             # 3.1.1
 60 | 		 qq/bf/          => 1,             # 3.1.2
 61 | 		 qq/80 bf/       => 1,             # 3.1.3
 62 | 		 qq/80 bf 80/    => 1,             # 3.1.4
 63 | 		 qq/80 bf 80 bf/ => 1,             # 3.1.5
 64 | 		 qq/80 bf 80 bf 80/ => 1,          # 3.1.6
 65 | 		 qq/80 bf 80 bf 80 bf/ => 1,       # 3.1.7
 66 | 		 qq/80 bf 80 bf 80 bf 80/ => 1,    # 3.1.8
 67 | 		 qq/80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf/ => 1, # 3.1.9
 68 | 		 qq/c0 20 c1 20 c2 20 c3 20 c4 20 c5 20 c6 20 c7 20 c8 20 c9 20 ca 20 cb 20 cc 20 cd 20 ce 20 cf 20 d0 20 d1 20 d2 20 d3 20 d4 20 d5 20 d6 20 d7 20 d8 20 d9 20 da 20 db 20 dc 20 dd 20 de 20 df 20/ => 1, # 3.2.1
 69 | 		 qq/e0 20 e1 20 e2 20 e3 20 e4 20 e5 20 e6 20 e7 20 e8 20 e9 20 ea 20 eb 20 ec 20 ed 20 ee 20 ef 20/ => 1, # 3.2.2
 70 | 		 qq/f0 20 f1 20 f2 20 f3 20 f4 20 f5 20 f6 20 f7 20/ => 1, # 3.2.3
 71 | 		 qq/f8 20 f9 20 fa 20 fb 20/ => 1, # 3.2.4
 72 | 		 qq/fc 20 fd 20/ => 1,             # 3.2.5
 73 | 		 qq/c0/ => 1,                      # 3.3.1
 74 | 		 qq/e0 80/ => 1,                   # 3.3.2
 75 | 		 qq/f0 80 80/ => 1,                # 3.3.3
 76 | 		 qq/f8 80 80 80/ => 1,             # 3.3.4
 77 | 		 qq/fc 80 80 80 80/ => 1,          # 3.3.5
 78 | 		 qq/df/ => 1,                      # 3.3.6
 79 | 		 qq/ef bf/ => 1,                   # 3.3.7
 80 | 		 qq/f7 bf bf/ => 1,                # 3.3.8
 81 | 		 qq/fb bf bf bf/ => 1,             # 3.3.9
 82 | 		 qq/fd bf bf bf bf/ => 1,          # 3.3.10
 83 | 		 qq/c0 e0 80 f0 80 80 f8 80 80 80 fc 80 80 80 80 df ef bf f7 bf bf fb bf bf bf fd bf bf bf bf/ => 1, # 3.4.1
 84 | 		 qq/fe/ => 1,                      # 3.5.1
 85 | 		 qq/ff/ => 1,                      # 3.5.2
 86 | 		 qq/fe fe ff ff/ => 1,             # 3.5.3
 87 | 		 qq/c0 af/ => 1,                   # 4.1.1
 88 | 		 qq/e0 80 af/ => 1,                # 4.1.2
 89 | 		 qq/f0 80 80 af/ => 1,             # 4.1.3
 90 | 		 qq/f8 80 80 80 af/ => 1,          # 4.1.4
 91 | 		 qq/fc 80 80 80 80 af/ => 1,       # 4.1.5
 92 | 		 qq/c1 bf/ => 1,                   # 4.2.1
 93 | 		 qq/e0 9f bf/ => 1,                # 4.2.2
 94 | 		 qq/f0 8f bf bf/ => 1,             # 4.2.3
 95 | 		 qq/f8 87 bf bf bf/ => 1,          # 4.2.4
 96 | 		 qq/fc 83 bf bf bf bf/ => 1,       # 4.2.5
 97 | 		 qq/c0 80/ => 1,                   # 4.3.1
 98 | 		 qq/e0 80 80/ => 1,                # 4.3.2
 99 | 		 qq/f0 80 80 80/ => 1,             # 4.3.3
100 | 		 qq/f8 80 80 80 80/ => 1,          # 4.3.4
101 | 		 qq/fc 80 80 80 80 80/ => 1,       # 4.3.5
102 | 		 );
103 |      }
104 |      $NTESTS +=  scalar keys %SEQ;
105 | }
106 | use strict;
107 | use Encode;
108 | use utf8;
109 | use Test::More tests => $NTESTS;
110 | 
111 | local($SIG{__WARN__}) = sub { $DEBUG and $@ and print STDERR $@ };
112 | 
113 | my $d = find_encoding("utf-8-strict");
114 | for my $u (sort keys %ORD){
115 |     my $c = chr($u);
116 |     eval { $d->encode($c,1) };
117 |     $DEBUG and $@ and warn $@;
118 |     my $t = $@ ? 1 : 0;
119 |     is($t, $ORD{$u}, sprintf "U+%04X", $u);
120 | }
121 | for my $s (sort keys %SEQ){
122 |     my $o = pack "C*" => map {hex} split /\s+/, $s;
123 |     eval { $d->decode($o,1) };
124 |     $DEBUG and $@ and warn $@;
125 |     my $t = $@ ? 1 : 0;
126 |     is($t, $SEQ{$s}, "sequence: $s");
127 | }
128 | 
129 | __END__
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/t/utf8warnings.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | BEGIN { 'warnings'->unimport('utf8') if $] < 5.014 }; # turn off 'UTF-16 surrogate 0xd800' warnings
 4 | 
 5 | use Test::More;
 6 | use Encode qw(encode decode FB_CROAK LEAVE_SRC);
 7 | 
 8 | my $script = quotemeta $0;
 9 | 
10 | plan tests => 12;
11 | 
12 | my @invalid;
13 | 
14 | ok ! defined eval { encode('UTF-8', "\x{D800}", FB_CROAK | LEAVE_SRC) }, 'Surrogate codepoint \x{D800} is not encoded to strict UTF-8';
15 | like $@, qr/^"\\x\{d800\}" does not map to UTF-8 at $script line /, 'Error message contains strict UTF-8 name';
16 | @invalid = ();
17 | encode('UTF-8', "\x{D800}", sub { @invalid = @_; return ""; });
18 | is_deeply \@invalid, [ 0xD800 ], 'Fallback coderef contains invalid codepoint 0xD800';
19 | 
20 | ok ! defined eval { decode('UTF-8', "\xed\xa0\x80", FB_CROAK | LEAVE_SRC) }, 'Surrogate UTF-8 byte sequence \xED\xA0\x80 is decoded with strict UTF-8 decoder';
21 | like $@, qr/^UTF-8 "\\xED\\xA0\\x80" does not map to Unicode at $script line /, 'Error message contains strict UTF-8 name and original (not decoded) invalid sequence';
22 | @invalid = ();
23 | decode('UTF-8', "\xed\xa0\x80", sub { @invalid = @_; return ""; });
24 | is_deeply \@invalid, [ 0xED, 0xA0, 0x80 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0, 0x80';
25 | 
26 | ok ! defined eval { decode('UTF-8', "\xed\xa0", FB_CROAK | LEAVE_SRC) }, 'Invalid byte sequence \xED\xA0 is not decoded with strict UTF-8 decoder';
27 | like $@, qr/^UTF-8 "\\xED\\xA0" does not map to Unicode at $script line /, 'Error message contains strict UTF-8 name and original (not decoded) invalid sequence';
28 | @invalid = ();
29 | decode('UTF-8', "\xed\xa0", sub { @invalid = @_; return ""; });
30 | is_deeply \@invalid, [ 0xED, 0xA0 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0';
31 | 
32 | ok ! defined eval { decode('utf8', "\xed\xa0", FB_CROAK | LEAVE_SRC) }, 'Invalid byte sequence \xED\xA0 is not decoded with non-strict utf8 decoder';
33 | like $@, qr/^utf8 "\\xED\\xA0" does not map to Unicode at $script line /, 'Error message contains non-strict utf8 name and original (not decoded) invalid sequence';
34 | decode('utf8', "\xed\xa0", sub { @invalid = @_; return ""; });
35 | is_deeply \@invalid, [ 0xED, 0xA0 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0';
36 | 


--------------------------------------------------------------------------------
/t/whatwg-aliases.t:
--------------------------------------------------------------------------------
 1 | # This test checks aliases support based on the list in the
 2 | # WHATWG Encoding Living Standard
 3 | #
 4 | # https://encoding.spec.whatwg.org/
 5 | #
 6 | # The input of this test is the file whatwg-aliases.json downloaded from
 7 | # https://encoding.spec.whatwg.org/encodings.json
 8 | #
 9 | # To run:
10 | #   AUTHOR_TESTING=1 prove -l t/whatwg-aliases.t
11 | 
12 | 
13 | use Test::More
14 |     $ENV{AUTHOR_TESTING}
15 |     ? 'no_plan'
16 |     : (skip_all => 'For maintainers only');
17 | use Encode 'find_encoding';
18 | use JSON::PP 'decode_json';
19 | use File::Spec;
20 | use FindBin;
21 | 
22 | my $encodings = decode_json(do {
23 |     # https://encoding.spec.whatwg.org/encodings.json
24 |     open my $f, '<', File::Spec->catdir($FindBin::Bin, 'whatwg-aliases.json');
25 |     local $/;
26 |     <$f>
27 | });
28 | 
29 | my %IGNORE = map { $_ => '' } qw(
30 |     replacement
31 |     utf8
32 | );
33 | 
34 | my %TODO = (
35 |     'ISO-8859-8-I'   => 'Not supported',
36 |     'gb18030'        => 'Not supported',
37 |     '866'            => 'Not supported',
38 |     'x-user-defined' => 'Not supported',
39 |     # ...
40 | );
41 | 
42 | for my $section (@$encodings) {
43 |     for my $enc (@{$section->{encodings}}) {
44 | 
45 | 	my $name = $enc->{name};
46 | 
47 | 	next if exists $IGNORE{$name};
48 | 
49 | 	local $TODO = $TODO{$name} if exists $TODO{$name};
50 | 
51 | 	my $encoding = find_encoding($name);
52 | 	isa_ok($encoding, 'Encode::Encoding', $name);
53 | 
54 | 	for my $label (@{$enc->{labels}}) {
55 | 	    local $TODO = $TODO{$label} if exists $TODO{$label};
56 | 
57 | 	    my $e = find_encoding($label);
58 | 	    if (isa_ok($e, 'Encode::Encoding', $label)) {
59 | 		next if exists $IGNORE{$label};
60 | 		is($e->name, $encoding->name, "$label ->name is $name")
61 | 	    }
62 | 	}
63 |     }
64 | }
65 | 
66 | done_testing;
67 | 


--------------------------------------------------------------------------------
/t/xml.t:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 |     if ( $] < 5.009 ) {
 3 |         print "1..0 # Skip: Perl <= 5.9 or later required\n";
 4 |         exit 0;
 5 |     }
 6 | }
 7 | use strict;
 8 | use warnings;
 9 | 
10 | use Encode;
11 | use Test::More;
12 | 
13 | my $content = String->new("--\x{30c6}--");
14 | my $text = Encode::encode('latin1', $content, Encode::FB_XMLCREF);
15 | is $text, "--&#x30c6;--";
16 | 
17 | done_testing();
18 | 
19 | package String;
20 | use overload
21 |   '""' => sub { ${$_[0]} }, fallback => 1;
22 | 
23 | sub new {
24 |     my($class, $str) = @_;
25 |     bless \$str, $class;
26 | }
27 | 
28 | 1;
29 | 


--------------------------------------------------------------------------------
/ucm/ascii.ucm:
--------------------------------------------------------------------------------
  1 | #
  2 | # $Id: ascii.ucm,v 2.0 2004/05/16 20:55:19 dankogai Exp $
  3 | #
  4 | <code_set_name>  "ascii"
  5 | <code_set_alias> "US-ascii"
  6 | <mb_cur_min> 1
  7 | <mb_cur_max> 1
  8 | <subchar> \x3F
  9 | #
 10 | CHARMAP
 11 | <U0000> \x00 |0 # <control>
 12 | <U0001> \x01 |0 # <control>
 13 | <U0002> \x02 |0 # <control>
 14 | <U0003> \x03 |0 # <control>
 15 | <U0004> \x04 |0 # <control>
 16 | <U0005> \x05 |0 # <control>
 17 | <U0006> \x06 |0 # <control>
 18 | <U0007> \x07 |0 # <control>
 19 | <U0008> \x08 |0 # <control>
 20 | <U0009> \x09 |0 # <control>
 21 | <U000A> \x0A |0 # <control>
 22 | <U000B> \x0B |0 # <control>
 23 | <U000C> \x0C |0 # <control>
 24 | <U000D> \x0D |0 # <control>
 25 | <U000E> \x0E |0 # <control>
 26 | <U000F> \x0F |0 # <control>
 27 | <U0010> \x10 |0 # <control>
 28 | <U0011> \x11 |0 # <control>
 29 | <U0012> \x12 |0 # <control>
 30 | <U0013> \x13 |0 # <control>
 31 | <U0014> \x14 |0 # <control>
 32 | <U0015> \x15 |0 # <control>
 33 | <U0016> \x16 |0 # <control>
 34 | <U0017> \x17 |0 # <control>
 35 | <U0018> \x18 |0 # <control>
 36 | <U0019> \x19 |0 # <control>
 37 | <U001A> \x1A |0 # <control>
 38 | <U001B> \x1B |0 # <control>
 39 | <U001C> \x1C |0 # <control>
 40 | <U001D> \x1D |0 # <control>
 41 | <U001E> \x1E |0 # <control>
 42 | <U001F> \x1F |0 # <control>
 43 | <U0020> \x20 |0 # SPACE
 44 | <U0021> \x21 |0 # EXCLAMATION MARK
 45 | <U0022> \x22 |0 # QUOTATION MARK
 46 | <U0023> \x23 |0 # NUMBER SIGN
 47 | <U0024> \x24 |0 # DOLLAR SIGN
 48 | <U0025> \x25 |0 # PERCENT SIGN
 49 | <U0026> \x26 |0 # AMPERSAND
 50 | <U0027> \x27 |0 # APOSTROPHE
 51 | <U0028> \x28 |0 # LEFT PARENTHESIS
 52 | <U0029> \x29 |0 # RIGHT PARENTHESIS
 53 | <U002A> \x2A |0 # ASTERISK
 54 | <U002B> \x2B |0 # PLUS SIGN
 55 | <U002C> \x2C |0 # COMMA
 56 | <U002D> \x2D |0 # HYPHEN-MINUS
 57 | <U002E> \x2E |0 # FULL STOP
 58 | <U002F> \x2F |0 # SOLIDUS
 59 | <U0030> \x30 |0 # DIGIT ZERO
 60 | <U0031> \x31 |0 # DIGIT ONE
 61 | <U0032> \x32 |0 # DIGIT TWO
 62 | <U0033> \x33 |0 # DIGIT THREE
 63 | <U0034> \x34 |0 # DIGIT FOUR
 64 | <U0035> \x35 |0 # DIGIT FIVE
 65 | <U0036> \x36 |0 # DIGIT SIX
 66 | <U0037> \x37 |0 # DIGIT SEVEN
 67 | <U0038> \x38 |0 # DIGIT EIGHT
 68 | <U0039> \x39 |0 # DIGIT NINE
 69 | <U003A> \x3A |0 # COLON
 70 | <U003B> \x3B |0 # SEMICOLON
 71 | <U003C> \x3C |0 # LESS-THAN SIGN
 72 | <U003D> \x3D |0 # EQUALS SIGN
 73 | <U003E> \x3E |0 # GREATER-THAN SIGN
 74 | <U003F> \x3F |0 # QUESTION MARK
 75 | <U0040> \x40 |0 # COMMERCIAL AT
 76 | <U0041> \x41 |0 # LATIN CAPITAL LETTER A
 77 | <U0042> \x42 |0 # LATIN CAPITAL LETTER B
 78 | <U0043> \x43 |0 # LATIN CAPITAL LETTER C
 79 | <U0044> \x44 |0 # LATIN CAPITAL LETTER D
 80 | <U0045> \x45 |0 # LATIN CAPITAL LETTER E
 81 | <U0046> \x46 |0 # LATIN CAPITAL LETTER F
 82 | <U0047> \x47 |0 # LATIN CAPITAL LETTER G
 83 | <U0048> \x48 |0 # LATIN CAPITAL LETTER H
 84 | <U0049> \x49 |0 # LATIN CAPITAL LETTER I
 85 | <U004A> \x4A |0 # LATIN CAPITAL LETTER J
 86 | <U004B> \x4B |0 # LATIN CAPITAL LETTER K
 87 | <U004C> \x4C |0 # LATIN CAPITAL LETTER L
 88 | <U004D> \x4D |0 # LATIN CAPITAL LETTER M
 89 | <U004E> \x4E |0 # LATIN CAPITAL LETTER N
 90 | <U004F> \x4F |0 # LATIN CAPITAL LETTER O
 91 | <U0050> \x50 |0 # LATIN CAPITAL LETTER P
 92 | <U0051> \x51 |0 # LATIN CAPITAL LETTER Q
 93 | <U0052> \x52 |0 # LATIN CAPITAL LETTER R
 94 | <U0053> \x53 |0 # LATIN CAPITAL LETTER S
 95 | <U0054> \x54 |0 # LATIN CAPITAL LETTER T
 96 | <U0055> \x55 |0 # LATIN CAPITAL LETTER U
 97 | <U0056> \x56 |0 # LATIN CAPITAL LETTER V
 98 | <U0057> \x57 |0 # LATIN CAPITAL LETTER W
 99 | <U0058> \x58 |0 # LATIN CAPITAL LETTER X
100 | <U0059> \x59 |0 # LATIN CAPITAL LETTER Y
101 | <U005A> \x5A |0 # LATIN CAPITAL LETTER Z
102 | <U005B> \x5B |0 # LEFT SQUARE BRACKET
103 | <U005C> \x5C |0 # REVERSE SOLIDUS
104 | <U005D> \x5D |0 # RIGHT SQUARE BRACKET
105 | <U005E> \x5E |0 # CIRCUMFLEX ACCENT
106 | <U005F> \x5F |0 # LOW LINE
107 | <U0060> \x60 |0 # GRAVE ACCENT
108 | <U0061> \x61 |0 # LATIN SMALL LETTER A
109 | <U0062> \x62 |0 # LATIN SMALL LETTER B
110 | <U0063> \x63 |0 # LATIN SMALL LETTER C
111 | <U0064> \x64 |0 # LATIN SMALL LETTER D
112 | <U0065> \x65 |0 # LATIN SMALL LETTER E
113 | <U0066> \x66 |0 # LATIN SMALL LETTER F
114 | <U0067> \x67 |0 # LATIN SMALL LETTER G
115 | <U0068> \x68 |0 # LATIN SMALL LETTER H
116 | <U0069> \x69 |0 # LATIN SMALL LETTER I
117 | <U006A> \x6A |0 # LATIN SMALL LETTER J
118 | <U006B> \x6B |0 # LATIN SMALL LETTER K
119 | <U006C> \x6C |0 # LATIN SMALL LETTER L
120 | <U006D> \x6D |0 # LATIN SMALL LETTER M
121 | <U006E> \x6E |0 # LATIN SMALL LETTER N
122 | <U006F> \x6F |0 # LATIN SMALL LETTER O
123 | <U0070> \x70 |0 # LATIN SMALL LETTER P
124 | <U0071> \x71 |0 # LATIN SMALL LETTER Q
125 | <U0072> \x72 |0 # LATIN SMALL LETTER R
126 | <U0073> \x73 |0 # LATIN SMALL LETTER S
127 | <U0074> \x74 |0 # LATIN SMALL LETTER T
128 | <U0075> \x75 |0 # LATIN SMALL LETTER U
129 | <U0076> \x76 |0 # LATIN SMALL LETTER V
130 | <U0077> \x77 |0 # LATIN SMALL LETTER W
131 | <U0078> \x78 |0 # LATIN SMALL LETTER X
132 | <U0079> \x79 |0 # LATIN SMALL LETTER Y
133 | <U007A> \x7A |0 # LATIN SMALL LETTER Z
134 | <U007B> \x7B |0 # LEFT CURLY BRACKET
135 | <U007C> \x7C |0 # VERTICAL LINE
136 | <U007D> \x7D |0 # RIGHT CURLY BRACKET
137 | <U007E> \x7E |0 # TILDE
138 | <U007F> \x7F |0 # <control>
139 | END CHARMAP
140 | 


--------------------------------------------------------------------------------
/ucm/ctrl.ucm:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: ctrl.ucm,v 2.0 2004/05/16 20:55:23 dankogai Exp $
 3 | #
 4 | <code_set_name>  "ascii-ctrl"
 5 | <mb_cur_min> 1
 6 | <mb_cur_max> 1
 7 | <subchar> \x3F
 8 | #
 9 | CHARMAP
10 | <U0000> \x00 |0 # <control>
11 | <U0001> \x01 |0 # <control>
12 | <U0002> \x02 |0 # <control>
13 | <U0003> \x03 |0 # <control>
14 | <U0004> \x04 |0 # <control>
15 | <U0005> \x05 |0 # <control>
16 | <U0006> \x06 |0 # <control>
17 | <U0007> \x07 |0 # <control>
18 | <U0008> \x08 |0 # <control>
19 | <U0009> \x09 |0 # <control>
20 | <U000A> \x0A |0 # <control>
21 | <U000B> \x0B |0 # <control>
22 | <U000C> \x0C |0 # <control>
23 | <U000D> \x0D |0 # <control>
24 | <U000E> \x0E |0 # <control>
25 | <U000F> \x0F |0 # <control>
26 | <U0010> \x10 |0 # <control>
27 | <U0011> \x11 |0 # <control>
28 | <U0012> \x12 |0 # <control>
29 | <U0013> \x13 |0 # <control>
30 | <U0014> \x14 |0 # <control>
31 | <U0015> \x15 |0 # <control>
32 | <U0016> \x16 |0 # <control>
33 | <U0017> \x17 |0 # <control>
34 | <U0018> \x18 |0 # <control>
35 | <U0019> \x19 |0 # <control>
36 | <U001A> \x1A |0 # <control>
37 | <U001B> \x1B |0 # <control>
38 | <U001C> \x1C |0 # <control>
39 | <U001D> \x1D |0 # <control>
40 | <U001E> \x1E |0 # <control>
41 | <U001F> \x1F |0 # <control>
42 | END CHARMAP
43 | 


--------------------------------------------------------------------------------
/ucm/macUkraine.ucm:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: macUkraine.ucm,v 2.0 2004/05/16 20:55:28 dankogai Exp $
 3 | #
 4 | # Original table can be obtained at
 5 | # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/UKRAINE.TXT
 6 | #
 7 | <code_set_name> "MacUkrainian"
 8 | <code_set_alias> "MacUkraine"
 9 | <mb_cur_min> 1
10 | <mb_cur_max> 1
11 | <subchar> \x3F
12 | CHARMAP
13 | <U0000> \x00 |0 # <control>
14 | <U0001> \x01 |0 # <control>
15 | <U0002> \x02 |0 # <control>
16 | <U0003> \x03 |0 # <control>
17 | <U0004> \x04 |0 # <control>
18 | <U0005> \x05 |0 # <control>
19 | <U0006> \x06 |0 # <control>
20 | <U0007> \x07 |0 # <control>
21 | <U0008> \x08 |0 # <control>
22 | <U0009> \x09 |0 # <control>
23 | <U000A> \x0A |0 # <control>
24 | <U000B> \x0B |0 # <control>
25 | <U000C> \x0C |0 # <control>
26 | <U000D> \x0D |0 # <control>
27 | <U000E> \x0E |0 # <control>
28 | <U000F> \x0F |0 # <control>
29 | <U0010> \x10 |0 # <control>
30 | <U0011> \x11 |0 # <control>
31 | <U0012> \x12 |0 # <control>
32 | <U0013> \x13 |0 # <control>
33 | <U0014> \x14 |0 # <control>
34 | <U0015> \x15 |0 # <control>
35 | <U0016> \x16 |0 # <control>
36 | <U0017> \x17 |0 # <control>
37 | <U0018> \x18 |0 # <control>
38 | <U0019> \x19 |0 # <control>
39 | <U001A> \x1A |0 # <control>
40 | <U001B> \x1B |0 # <control>
41 | <U001C> \x1C |0 # <control>
42 | <U001D> \x1D |0 # <control>
43 | <U001E> \x1E |0 # <control>
44 | <U001F> \x1F |0 # <control>
45 | END CHARMAP
46 | 


--------------------------------------------------------------------------------
/ucm/null.ucm:
--------------------------------------------------------------------------------
 1 | #
 2 | # $Id: null.ucm,v 2.0 2004/05/16 20:55:28 dankogai Exp $
 3 | #
 4 | <code_set_name>  "null"
 5 | <mb_cur_min> 1
 6 | <mb_cur_max> 1
 7 | <subchar> \x3F
 8 | #
 9 | CHARMAP
10 | END CHARMAP
11 | 


--------------------------------------------------------------------------------