1# 2# $Id: UTF7.pm,v 1.3 2004/08/09 18:09:09 millert Exp $ 3# 4package Encode::Unicode::UTF7; 5use strict; 6no warnings 'redefine'; 7use base qw(Encode::Encoding); 8__PACKAGE__->Define('UTF-7'); 9our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 10use MIME::Base64; 11use Encode; 12 13# 14# Algorithms taken from Unicode::String by Gisle Aas 15# 16 17our $OPTIONAL_DIRECT_CHARS = 1; 18my $specials = quotemeta "\'(),-./:?"; 19$OPTIONAL_DIRECT_CHARS and 20 $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}"; 21# \s will not work because it matches U+3000 DEOGRAPHIC SPACE 22# We use qr/[\n\r\t\ ] instead 23my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/; 24my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/; 25my $e_utf16 = find_encoding("UTF-16BE"); 26 27sub needs_lines { 1 }; 28 29sub encode($$;$){ 30 my ($obj, $str, $chk) = @_; 31 my $len = length($str); 32 pos($str) = 0; 33 my $bytes = ''; 34 while (pos($str) < $len){ 35 if ($str =~ /\G($re_asis+)/ogc){ 36 $bytes .= $1; 37 }elsif($str =~ /\G($re_encoded+)/ogsc){ 38 if ($1 eq "+"){ 39 $bytes .= "+-"; 40 }else{ 41 my $s = $1; 42 my $base64 = encode_base64($e_utf16->encode($s), ''); 43 $base64 =~ s/=+$//; 44 $bytes .= "+$base64-"; 45 } 46 }else{ 47 die "This should not happen! (pos=" . pos($str) . ")"; 48 } 49 } 50 $_[1] = '' if $chk; 51 return $bytes; 52} 53 54sub decode{ 55 my ($obj, $bytes, $chk) = @_; 56 my $len = length($bytes); 57 my $str = ""; 58 while (pos($bytes) < $len) { 59 if ($bytes =~ /\G([^+]+)/ogc) { 60 $str .= $1; 61 }elsif($bytes =~ /\G\+-/ogc) { 62 $str .= "+"; 63 }elsif($bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc) { 64 my $base64 = $1; 65 my $pad = length($base64) % 4; 66 $base64 .= "=" x (4 - $pad) if $pad; 67 $str .= $e_utf16->decode(decode_base64($base64)); 68 }elsif($bytes =~ /\G\+/ogc) { 69 $^W and warn "Bad UTF7 data escape"; 70 $str .= "+"; 71 }else{ 72 die "This should not happen " . pos($bytes); 73 } 74 } 75 $_[1] = '' if $chk; 76 return $str; 77} 781; 79__END__ 80 81=head1 NAME 82 83Encode::Unicode::UTF7 -- UTF-7 encoding 84 85=head1 SYNOPSIS 86 87 use Encode qw/encode decode/; 88 $utf7 = encode("UTF-7", $utf8); 89 $utf8 = decode("UTF-7", $ucs2); 90 91=head1 ABSTRACT 92 93This module implements UTF-7 encoding documented in RFC 2152. UTF-7, 94as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It 95is designed to be MTA-safe and expected to be a standard way to 96exchange Unicoded mails via mails. But with the advent of UTF-8 and 978-bit compliant MTAs, UTF-7 is hardly ever used. 98 99UTF-7 was not supported by Encode until version 1.95 because of that. 100But Unicode::String, a module by Gisle Aas which adds Unicode supports 101to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added 102so Encode can supersede Unicode::String 100%. 103 104=head1 In Practice 105 106When you want to encode Unicode for mails and web pages, however, do 107not use UTF-7 unless you are sure your recipients and readers can 108handle it. Very few MUAs and WWW Browsers support these days (only 109Mozilla seems to support one). For general cases, use UTF-8 for 110message body and MIME-Header for header instead. 111 112=head1 SEE ALSO 113 114L<Encode>, L<Encode::Unicode>, L<Unicode::String> 115 116RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt> 117 118=cut 119