1# 2# $Id: Encoder.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $ 3# 4package Encode::Encoder; 5use strict; 6use warnings; 7our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 8 9require Exporter; 10our @ISA = qw(Exporter); 11our @EXPORT_OK = qw ( encoder ); 12 13our $AUTOLOAD; 14sub DEBUG () { 0 } 15use Encode qw(encode decode find_encoding from_to); 16use Carp; 17 18sub new{ 19 my ($class, $data, $encname) = @_; 20 unless($encname){ 21 $encname = Encode::is_utf8($data) ? 'utf8' : ''; 22 }else{ 23 my $obj = find_encoding($encname) 24 or croak __PACKAGE__, ": unknown encoding: $encname"; 25 $encname = $obj->name; 26 } 27 my $self = { 28 data => $data, 29 encoding => $encname, 30 }; 31 bless $self => $class; 32} 33 34sub encoder{ __PACKAGE__->new(@_) } 35 36sub data{ 37 my ($self, $data) = @_; 38 if (defined $data){ 39 $self->{data} = $data; 40 return $data; 41 }else{ 42 return $self->{data}; 43 } 44} 45 46sub encoding{ 47 my ($self, $encname) = @_; 48 if ($encname){ 49 my $obj = find_encoding($encname) 50 or confess __PACKAGE__, ": unknown encoding: $encname"; 51 $self->{encoding} = $obj->name; 52 return $self; 53 }else{ 54 return $self->{encoding} 55 } 56} 57 58sub bytes { 59 my ($self, $encname) = @_; 60 $encname ||= $self->{encoding}; 61 my $obj = find_encoding($encname) 62 or confess __PACKAGE__, ": unknown encoding: $encname"; 63 $self->{data} = $obj->decode($self->{data}, 1); 64 $self->{encoding} = '' ; 65 return $self; 66} 67 68sub DESTROY{ # defined so it won't autoload. 69 DEBUG and warn shift; 70} 71 72sub AUTOLOAD { 73 my $self = shift; 74 my $type = ref($self) 75 or confess "$self is not an object"; 76 my $myname = $AUTOLOAD; 77 $myname =~ s/.*://; # strip fully-qualified portion 78 my $obj = find_encoding($myname) 79 or confess __PACKAGE__, ": unknown encoding: $myname"; 80 DEBUG and warn $self->{encoding}, " => ", $obj->name; 81 if ($self->{encoding}){ 82 from_to($self->{data}, $self->{encoding}, $obj->name, 1); 83 }else{ 84 $self->{data} = $obj->encode($self->{data}, 1); 85 } 86 $self->{encoding} = $obj->name; 87 return $self; 88} 89 90use overload 91 q("") => sub { $_[0]->{data} }, 92 q(0+) => sub { use bytes (); bytes::length($_[0]->{data}) }, 93 fallback => 1, 94 ; 95 961; 97__END__ 98 99=head1 NAME 100 101Encode::Encoder -- Object Oriented Encoder 102 103=head1 SYNOPSIS 104 105 use Encode::Encoder; 106 # Encode::encode("ISO-8859-1", $data); 107 Encode::Encoder->new($data)->iso_8859_1; # OOP way 108 # shortcut 109 use Encode::Encoder qw(encoder); 110 encoder($data)->iso_8859_1; 111 # you can stack them! 112 encoder($data)->iso_8859_1->base64; # provided base64() is defined 113 # you can use it as a decoder as well 114 encoder($base64)->bytes('base64')->latin1; 115 # stringified 116 print encoder($data)->utf8->latin1; # prints the string in latin1 117 # numified 118 encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data) 119 120=head1 ABSTRACT 121 122B<Encode::Encoder> allows you to use Encode in an object-oriented 123style. This is not only more intuitive than a functional approach, 124but also handier when you want to stack encodings. Suppose you want 125your UTF-8 string converted to Latin1 then Base64: you can simply say 126 127 my $base64 = encoder($utf8)->latin1->base64; 128 129instead of 130 131 my $latin1 = encode("latin1", $utf8); 132 my $base64 = encode_base64($utf8); 133 134or the lazier and more convoluted 135 136 my $base64 = encode_base64(encode("latin1", $utf8)); 137 138=head1 Description 139 140Here is how to use this module. 141 142=over 4 143 144=item * 145 146There are at least two instance variables stored in a hash reference, 147{data} and {encoding}. 148 149=item * 150 151When there is no method, it takes the method name as the name of the 152encoding and encodes the instance I<data> with I<encoding>. If successful, 153the instance I<encoding> is set accordingly. 154 155=item * 156 157You can retrieve the result via -E<gt>data but usually you don't have to 158because the stringify operator ("") is overridden to do exactly that. 159 160=back 161 162=head2 Predefined Methods 163 164This module predefines the methods below: 165 166=over 4 167 168=item $e = Encode::Encoder-E<gt>new([$data, $encoding]); 169 170returns an encoder object. Its data is initialized with $data if 171present, and its encoding is set to $encoding if present. 172 173When $encoding is omitted, it defaults to utf8 if $data is already in 174utf8 or "" (empty string) otherwise. 175 176=item encoder() 177 178is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand. 179 180=item $e-E<gt>data([$data]) 181 182When $data is present, sets the instance data to $data and returns the 183object itself. Otherwise, the current instance data is returned. 184 185=item $e-E<gt>encoding([$encoding]) 186 187When $encoding is present, sets the instance encoding to $encoding and 188returns the object itself. Otherwise, the current instance encoding is 189returned. 190 191=item $e-E<gt>bytes([$encoding]) 192 193decodes instance data from $encoding, or the instance encoding if 194omitted. If the conversion is successful, the instance encoding 195will be set to "". 196 197The name I<bytes> was deliberately picked to avoid namespace tainting 198-- this module may be used as a base class so method names that appear 199in Encode::Encoding are avoided. 200 201=back 202 203=head2 Example: base64 transcoder 204 205This module is designed to work with L<Encode::Encoding>. 206To make the Base64 transcoder example above really work, you could 207write a module like this: 208 209 package Encode::Base64; 210 use base 'Encode::Encoding'; 211 __PACKAGE__->Define('base64'); 212 use MIME::Base64; 213 sub encode{ 214 my ($obj, $data) = @_; 215 return encode_base64($data); 216 } 217 sub decode{ 218 my ($obj, $data) = @_; 219 return decode_base64($data); 220 } 221 1; 222 __END__ 223 224And your caller module would be something like this: 225 226 use Encode::Encoder; 227 use Encode::Base64; 228 229 # now you can really do the following 230 231 encoder($data)->iso_8859_1->base64; 232 encoder($base64)->bytes('base64')->latin1; 233 234=head2 Operator Overloading 235 236This module overloads two operators, stringify ("") and numify (0+). 237 238Stringify dumps the data inside the object. 239 240Numify returns the number of bytes in the instance data. 241 242They come in handy when you want to print or find the size of data. 243 244=head1 SEE ALSO 245 246L<Encode>, 247L<Encode::Encoding> 248 249=cut 250