├── MANIFEST ├── MANIFEST.SKIP ├── Changes ├── Makefile.PL ├── t └── 00_dude.t ├── README └── lib └── Convert └── DUDE.pm /MANIFEST: -------------------------------------------------------------------------------- 1 | Changes 2 | MANIFEST 3 | Makefile.PL 4 | README 5 | lib/Convert/DUDE.pm 6 | t/00_dude.t 7 | -------------------------------------------------------------------------------- /MANIFEST.SKIP: -------------------------------------------------------------------------------- 1 | \bRCS\b 2 | \bCVS\b 3 | ^MANIFEST\. 4 | ^Makefile$ 5 | ~$ 6 | \.old$ 7 | ^blib/ 8 | ^pm_to_blib 9 | ^MakeMaker-\d 10 | \.gz$ 11 | \.cvsignore 12 | -------------------------------------------------------------------------------- /Changes: -------------------------------------------------------------------------------- 1 | Revision history for Perl extension Convert::DUDE. 2 | 3 | 0.02 Sat Jul 28 04:18:38 JST 2001 4 | - Now Convert::DUDE handles Unicode strings as UTF8. 5 | Requires Unicode::String. 6 | 7 | 0.01 Thu Jul 19 18:17:35 2001 8 | - original version; created by h2xs 1.19 9 | 10 | -------------------------------------------------------------------------------- /Makefile.PL: -------------------------------------------------------------------------------- 1 | use ExtUtils::MakeMaker; 2 | # See lib/ExtUtils/MakeMaker.pm for details of how to influence 3 | # the contents of the Makefile that is written. 4 | WriteMakefile( 5 | 'NAME' => 'Convert::DUDE', 6 | 'VERSION_FROM' => 'lib/Convert/DUDE.pm', # finds $VERSION 7 | 'PREREQ_PM' => { 8 | 'Unicode::String' => 2.06, 9 | }, 10 | ); 11 | -------------------------------------------------------------------------------- /t/00_dude.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use Test; 3 | BEGIN { plan tests => 10 } 4 | 5 | use Convert::DUDE qw(:all); 6 | use Unicode::String qw(utf16); 7 | 8 | # taken from ietf-idn-dude-02.txt, embeded in UTF16 9 | my @todo = ( 10 | "\x00\x33\x5e\x74\x00\x62\x7d\x44\x91\xd1\x51\x6b\x51\x48\x75\x1f" => 'xdx8whx8tgz7ug863f6s5kuduwxh', 11 | "\x5B\x89\x5B\xA4\x59\x48\x7F\x8E\x60\x75\x00\x2D\x00\x77\x00\x69\x00\x74\x00\x68\x00\x2D\x00\x73\x00\x75\x00\x70\x00\x65\x00\x72\x00\x2D\x00\x6D\x00\x6F\x00\x6E\x00\x6B\x00\x65\x00\x79\x00\x73" => 'x58jupu8nuy6gt99m-yssctqtptn-tmgftfth-trcbfqtnk', 12 | "\x00\x6D\x00\x61\x00\x6A\x00\x69\x30\x67\x00\x6B\x00\x6F\x00\x69\x30\x59\x30\x8B\x00\x35\x79\xD2\x52\x4D" => 'pnmdvssqvssnegvsva7cvs5qz38hu53r', 13 | "\x30\xD1\x30\xD5\x30\xA3\x30\xFC\x00\x64\x00\x65\x30\xEB\x30\xF3\x30\xD0" => 'vs5bezgxrvs3ibvs2qtiud', 14 | "\x30\x5D\x30\x6E\x30\xB9\x30\xD4\x30\xFC\x30\xC9\x30\x67" => 'vsvpvd7hypuivf4q', 15 | ); 16 | 17 | while (my($utf16, $dude) = splice(@todo, 0, 2)) { 18 | eval { my $utf8 = utf16($utf16)->utf8; 19 | ok(dude_encode($utf8) eq $dude); 20 | ok(dude_decode($dude) eq $utf8); 21 | }; 22 | 23 | } 24 | 25 | 26 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | NAME 2 | Convert::DUDE - Conversion between Unicode and DUDE 3 | 4 | SYNOPSIS 5 | use Convert::DUDE ':all'; 6 | 7 | # handles 'dq--' prefix 8 | $domain = to_dude($utf8); 9 | $utf8 = from_dude($domain); 10 | 11 | # don't care about 'dq--' prefix 12 | # not exported by default 13 | $dudestr = dude_encode($utf8); 14 | $utf8 = dude_decode($dudestr); 15 | 16 | DESCRIPTION 17 | This module provides functions to convert between DUDE (Differential 18 | Unicode Domain Encoding) and Unicode encodings. 19 | 20 | Quoted from http://www.i-d-n.net/draft/draft-ietf-idn-dude-02.txt 21 | 22 | DUDE is a reversible transformation from a sequence of nonnegative 23 | integer values to a sequence of letters, digits, and hyphens (LDH 24 | characters). DUDE provides a simple and efficient ASCII-Compatible 25 | Encoding (ACE) of Unicode strings for use with Internationalized 26 | Domain Names. 27 | 28 | FUNCTIONS 29 | Following two functions are exported to your package when you use 30 | Convert::DUDE. 31 | 32 | to_dude 33 | $domain = to_dude($utf8); 34 | 35 | takes UTF8-encoded string, encodes it in DUDE and adds 'dq--' prefix 36 | in front. 37 | 38 | from_dude 39 | $utf8 = from_dude($domain); 40 | 41 | takes 'dq--' prefixed DUDE encoded string and decodes it to original 42 | UTF8 strings. 43 | 44 | Following two functions can be exported to your package when you import 45 | them explicitly. 46 | 47 | dude_encode 48 | $dude = dude_encode($utf8); 49 | 50 | takes UTF8-encoded string, encodes it in DUDE. Note that it doesn't 51 | care about 'dq--' prefix. 52 | 53 | dude_decode 54 | $utf8 = dude_decode($dude); 55 | 56 | takes DUDE encoded string and decodes it to original UTF8 strings. 57 | Note that it doesn't care about 'dq--' prefix. 58 | 59 | Those functions above may throw exeptions in case of error. You may have 60 | to catch 'em with eval block. 61 | 62 | CLASS METHODS 63 | prefix 64 | $prefix = Convert::DUDE->prefix; 65 | Convert::DUDE->prefix('xx--'); 66 | 67 | gets/sets DUDE prefix. 'dq--' for default. 68 | 69 | EXAMPLES 70 | Here's a sample code which does RACE-DUDE conversion. 71 | 72 | use Convert::RACE; 73 | use Convert::DUDE; 74 | use Unicode::String qw(utf16); 75 | 76 | my $race = "bq--aewrcsy"; 77 | 78 | eval { 79 | my $utf16 = from_race($race); 80 | my $dude = to_dude(utf16($utf16)->utf8); 81 | print "RACE: $race => DUDE: $dude\n"; 82 | }; 83 | 84 | if ($@) { 85 | warn "Conversion failed: $@"; 86 | } 87 | 88 | CAVEATS 89 | * There's no constraints on the input. See internet draft for nameprep 90 | about IDN input validation. 91 | 92 | TODO 93 | * Consider mixed-case annotation. See internet draft for DUDE for 94 | details. 95 | 96 | AUTHOR 97 | Tatsuhiko Miyagawa 98 | 99 | This library is free software; you can redistribute it and/or modify it 100 | under the same terms as Perl itself. 101 | 102 | This module comes without warranty of any kind. 103 | 104 | SEE ALSO 105 | the Convert::RACE manpage, http://www.i-d-n.net/, 106 | http://www.i-d-n.net/draft/draft-ietf-idn-dude-02.txt, the 107 | Unicode::String manpage, the Jcode manpage 108 | 109 | -------------------------------------------------------------------------------- /lib/Convert/DUDE.pm: -------------------------------------------------------------------------------- 1 | package Convert::DUDE; 2 | 3 | use strict; 4 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 5 | $VERSION = '0.02'; 6 | 7 | use Unicode::String qw(utf8); 8 | 9 | BEGIN { 10 | require Exporter; 11 | @ISA = qw(Exporter); 12 | @EXPORT = qw(to_dude from_dude); 13 | @EXPORT_OK = qw(dude_encode dude_decode); 14 | %EXPORT_TAGS = ( 15 | all => [ @EXPORT, @EXPORT_OK ], 16 | encode => [ @EXPORT_OK ], 17 | ); 18 | } 19 | 20 | { 21 | my $prefix = 'dq--'; # default 22 | sub prefix { 23 | shift; 24 | $prefix = shift if @_; 25 | $prefix; 26 | } 27 | } 28 | 29 | sub _die { require Carp; Carp::croak @_; } 30 | 31 | # XXX don't use Convert::Base32 32 | # XXX because Base32 tables in RACE / DUDE are different ... 33 | use vars qw(%bits2char %char2bits); 34 | 35 | %bits2char = qw@ 36 | 00000 a 37 | 00001 b 38 | 00010 c 39 | 00011 d 40 | 00100 e 41 | 00101 f 42 | 00110 g 43 | 00111 h 44 | 01000 i 45 | 01001 j 46 | 01010 k 47 | 01011 m 48 | 01100 n 49 | 01101 p 50 | 01110 q 51 | 01111 r 52 | 10000 s 53 | 10001 t 54 | 10010 u 55 | 10011 v 56 | 10100 w 57 | 10101 x 58 | 10110 y 59 | 10111 z 60 | 11000 2 61 | 11001 3 62 | 11010 4 63 | 11011 5 64 | 11100 6 65 | 11101 7 66 | 11110 8 67 | 11111 9 68 | @; # End of qw 69 | 70 | %char2bits = reverse %bits2char; 71 | 72 | =begin algorithm 73 | 74 | let prev = 0x60 75 | for each input integer n (in order) do begin 76 | if n == 0x2D then output hyphen-minus 77 | else begin 78 | let diff = prev XOR n 79 | represent diff in base 16 as a sequence of quartets, 80 | as few as are sufficient (but at least one) 81 | prepend 0 to the last quartet and 1 to each of the others 82 | output a base-32 character corresponding to each quintet 83 | let prev = n 84 | end 85 | end 86 | 87 | =end algorithm 88 | 89 | =cut 90 | 91 | sub dude_encode ($) { 92 | my $input = utf8(shift); 93 | 94 | my $output; 95 | my $prev = 0x60; 96 | for my $i (0 .. $input->length-1) { 97 | my $n = $input->substr($i, 1)->ord; 98 | if ($n == 0x2d) { 99 | $output .= '-'; 100 | next; 101 | } 102 | 103 | my $diff = $prev ^ $n; 104 | 105 | my @quartets = unpack('B*', pack('n*', $diff)) =~ m/(.{4})/gs; 106 | shift @quartets while (@quartets && $quartets[0] eq '0000'); 107 | 108 | my @fb_quartets = ((map { '1' . $_ } @quartets[0..$#quartets - 1]), 109 | '0' . $quartets[-1]); 110 | $output .= $bits2char{$_} for (@fb_quartets); 111 | $prev = $n; 112 | } 113 | return $output; 114 | } 115 | 116 | sub to_dude($) { 117 | my $domain = shift; 118 | return __PACKAGE__->prefix . dude_encode($domain); 119 | } 120 | 121 | =begin algorithm 122 | 123 | let prev = 0x60 124 | while the input string is not exhausted do begin 125 | if the next character is hyphen-minus 126 | then consume it and output 0x2D 127 | else begin 128 | consume characters and convert them to quintets until 129 | encountering a quintet whose first bit is 0 130 | fail upon encountering a non-base-32 character or end-of-input 131 | strip the first bit of each quintet 132 | concatenate the resulting quartets to form diff 133 | let prev = prev XOR diff 134 | output prev 135 | end 136 | end 137 | encode the output sequence and compare it to the input string 138 | fail if they do not match (case-insensitively) 139 | 140 | =end algorithm 141 | 142 | =cut 143 | 144 | sub dude_decode ($) { 145 | my $input = lc shift; 146 | 147 | my $prev = 0x60; 148 | my @input = split //, $input; 149 | 150 | my $output = Unicode::String->new; 151 | while (@input) { 152 | if ($input[0] eq '-') { 153 | $output->append(Unicode::String::uchr(0x2d)); 154 | shift @input; 155 | next; 156 | } 157 | 158 | my @quintets; 159 | CONSUME: while (1) { 160 | unless (exists $char2bits{$input[0]}) { 161 | _die "encountered non-base-32 character: $input[0]"; 162 | } 163 | unless (@input) { 164 | _die "reached end-of-input."; 165 | } 166 | 167 | my $quintet = $char2bits{shift @input}; 168 | push @quintets, $quintet; 169 | last CONSUME if substr($quintet, 0, 1) eq '0'; 170 | } 171 | 172 | my $diff = 0; 173 | my $order = 0; 174 | for my $quintet (reverse @quintets) { 175 | $diff += ord(pack('B*', '0000' . substr($quintet, 1))) * (16 ** $order++); 176 | } 177 | $prev = $prev ^ $diff; 178 | $output->append(Unicode::String::uchr($prev)); 179 | } 180 | 181 | unless (dude_encode($output->utf8) eq $input) { 182 | _die "uniqueness check (paranoia) failed."; 183 | } 184 | 185 | return $output->utf8; 186 | } 187 | 188 | sub from_dude ($) { 189 | my $dude = shift; 190 | my $prefix = __PACKAGE__->prefix; 191 | $dude =~ s/^$prefix//o; 192 | return dude_decode($dude); 193 | } 194 | 195 | 196 | 1; 197 | 198 | __END__ 199 | 200 | =head1 NAME 201 | 202 | Convert::DUDE - Conversion between Unicode and DUDE 203 | 204 | =head1 SYNOPSIS 205 | 206 | use Convert::DUDE ':all'; 207 | 208 | # handles 'dq--' prefix 209 | $domain = to_dude($utf8); 210 | $utf8 = from_dude($domain); 211 | 212 | # don't care about 'dq--' prefix 213 | # not exported by default 214 | $dudestr = dude_encode($utf8); 215 | $utf8 = dude_decode($dudestr); 216 | 217 | =head1 DESCRIPTION 218 | 219 | This module provides functions to convert between DUDE (Differential 220 | Unicode Domain Encoding) and Unicode encodings. 221 | 222 | Quoted from http://www.i-d-n.net/draft/draft-ietf-idn-dude-02.txt 223 | 224 | DUDE is a reversible transformation from a sequence of nonnegative 225 | integer values to a sequence of letters, digits, and hyphens (LDH 226 | characters). DUDE provides a simple and efficient ASCII-Compatible 227 | Encoding (ACE) of Unicode strings for use with Internationalized 228 | Domain Names. 229 | 230 | =head1 FUNCTIONS 231 | 232 | Following two functions are exported to your package when you use 233 | Convert::DUDE. 234 | 235 | =over 4 236 | 237 | =item to_dude 238 | 239 | $domain = to_dude($utf8); 240 | 241 | takes UTF8-encoded string, encodes it in DUDE and adds 'dq--' prefix 242 | in front. 243 | 244 | =item from_dude 245 | 246 | $utf8 = from_dude($domain); 247 | 248 | takes 'dq--' prefixed DUDE encoded string and decodes it to original 249 | UTF8 strings. 250 | 251 | =back 252 | 253 | Following two functions can be exported to your package when you 254 | import them explicitly. 255 | 256 | =over 4 257 | 258 | =item dude_encode 259 | 260 | $dude = dude_encode($utf8); 261 | 262 | takes UTF8-encoded string, encodes it in DUDE. Note that it doesn't 263 | care about 'dq--' prefix. 264 | 265 | =item dude_decode 266 | 267 | $utf8 = dude_decode($dude); 268 | 269 | takes DUDE encoded string and decodes it to original UTF8 270 | strings. Note that it doesn't care about 'dq--' prefix. 271 | 272 | =back 273 | 274 | Those functions above may throw exeptions in case of error. You may 275 | have to catch 'em with eval block. 276 | 277 | =head1 CLASS METHODS 278 | 279 | =over 4 280 | 281 | =item prefix 282 | 283 | $prefix = Convert::DUDE->prefix; 284 | Convert::DUDE->prefix('xx--'); 285 | 286 | gets/sets DUDE prefix. 'dq--' for default. 287 | 288 | =back 289 | 290 | =head1 EXAMPLES 291 | 292 | Here's a sample code which does RACE-DUDE conversion. 293 | 294 | use Convert::RACE; 295 | use Convert::DUDE; 296 | use Unicode::String qw(utf16); 297 | 298 | my $race = "bq--aewrcsy"; 299 | 300 | eval { 301 | my $utf16 = from_race($race); 302 | my $dude = to_dude(utf16($utf16)->utf8); 303 | print "RACE: $race => DUDE: $dude\n"; 304 | }; 305 | 306 | if ($@) { 307 | warn "Conversion failed: $@"; 308 | } 309 | 310 | =head1 CAVEATS 311 | 312 | =over 4 313 | 314 | =item * 315 | 316 | There's no constraints on the input. See internet draft for nameprep 317 | about IDN input validation. 318 | 319 | =back 320 | 321 | =head1 TODO 322 | 323 | =over 4 324 | 325 | =item * 326 | 327 | Consider mixed-case annotation. See internet draft for DUDE for 328 | details. 329 | 330 | =back 331 | 332 | =head1 AUTHOR 333 | 334 | Tatsuhiko Miyagawa 335 | 336 | This library is free software; you can redistribute it and/or 337 | modify it under the same terms as Perl itself. 338 | 339 | This module comes without warranty of any kind. 340 | 341 | =head1 SEE ALSO 342 | 343 | L, http://www.i-d-n.net/, http://www.i-d-n.net/draft/draft-ietf-idn-dude-02.txt, L, L 344 | 345 | =cut 346 | --------------------------------------------------------------------------------