1#!./perl 2# $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $ 3# 4use 5.8.0; 5use strict; 6use Encode ; 7use Encode::Alias; 8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); 9 10use File::Basename; 11my $name = basename($0); 12 13use Getopt::Long qw(:config no_ignore_case); 14 15my %Opt; 16 17help() 18 unless 19 GetOptions(\%Opt, 20 'from|f=s', 21 'to|t=s', 22 'list|l', 23 'string|s=s', 24 'check|C=i', 25 'c', 26 'perlqq|p', 27 'debug|D', 28 'scheme|S=s', 29 'resolve|r=s', 30 'help', 31 ); 32 33$Opt{help} and help(); 34$Opt{list} and list_encodings(); 35my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; 36defined $Opt{resolve} and resolve_encoding($Opt{resolve}); 37$Opt{from} || $Opt{to} || help(); 38my $from = $Opt{from} || $locale or help("from_encoding unspecified"); 39my $to = $Opt{to} || $locale or help("to_encoding unspecified"); 40$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; 41my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; 42$Opt{check} ||= $Opt{c}; 43$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ; 44 45if ($Opt{debug}){ 46 my $cfrom = Encode->getEncoding($from)->name; 47 my $cto = Encode->getEncoding($to)->name; 48 print <<"EOT"; 49Scheme: $scheme 50From: $from => $cfrom 51To: $to => $cto 52EOT 53} 54 55# we do not use <> (or ARGV) for the sake of binmode() 56@ARGV or push @ARGV, \*STDIN; 57 58unless ($scheme eq 'perlio'){ 59 binmode STDOUT; 60 for my $argv (@ARGV){ 61 my $ifh = ref $argv ? $argv : undef; 62 $ifh or open $ifh, "<", $argv or next; 63 binmode $ifh; 64 if ($scheme eq 'from_to'){ # default 65 while(<$ifh>){ 66 Encode::from_to($_, $from, $to, $Opt{check}); 67 print; 68 } 69 }elsif ($scheme eq 'decode_encode'){ # step-by-step 70 while(<$ifh>){ 71 my $decoded = decode($from, $_, $Opt{check}); 72 my $encoded = encode($to, $decoded); 73 print $encoded; 74 } 75 } else { # won't reach 76 die "$name: unknown scheme: $scheme"; 77 } 78 } 79}else{ 80 # NI-S favorite 81 binmode STDOUT => "raw:encoding($to)"; 82 for my $argv (@ARGV){ 83 my $ifh = ref $argv ? $argv : undef; 84 $ifh or open $ifh, "<", $argv or next; 85 binmode $ifh => "raw:encoding($from)"; 86 print while(<$ifh>); 87 } 88} 89 90sub list_encodings{ 91 print join("\n", Encode->encodings(":all")), "\n"; 92 exit 0; 93} 94 95sub resolve_encoding { 96 if (my $alias = Encode::resolve_alias($_[0])) { 97 print $alias, "\n"; 98 exit 0; 99 } else { 100 warn "$name: $_[0] is not known to Encode\n"; 101 exit 1; 102 } 103} 104 105sub help{ 106 my $message = shift; 107 $message and print STDERR "$name error: $message\n"; 108 print STDERR <<"EOT"; 109$name [-f from_encoding] [-t to_encoding] [-s string] [files...] 110$name -l 111$name -r encoding_alias 112 -l,--list 113 lists all available encodings 114 -r,--resolve encoding_alias 115 resolve encoding to its (Encode) canonical name 116 -f,--from from_encoding 117 when omitted, the current locale will be used 118 -t,--to to_encoding 119 when omitted, the current locale will be used 120 -s,--string string 121 "string" will be the input instead of STDIN or files 122The following are mainly of interest to Encode hackers: 123 -D,--debug show debug information 124 -C N | -c | -p check the validity of the input 125 -S,--scheme scheme use the scheme for conversion 126EOT 127 exit; 128} 129 130__END__ 131 132=head1 NAME 133 134piconv -- iconv(1), reinvented in perl 135 136=head1 SYNOPSIS 137 138 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] 139 piconv -l 140 piconv [-C N|-c|-p] 141 piconv -S scheme ... 142 piconv -r encoding 143 piconv -D ... 144 piconv -h 145 146=head1 DESCRIPTION 147 148B<piconv> is perl version of B<iconv>, a character encoding converter 149widely available for various Unixen today. This script was primarily 150a technology demonstrator for Perl 5.8.0, but you can use piconv in the 151place of iconv for virtually any case. 152 153piconv converts the character encoding of either STDIN or files 154specified in the argument and prints out to STDOUT. 155 156Here is the list of options. Each option can be in short format (-f) 157or long (--from). 158 159=over 4 160 161=item -f,--from from_encoding 162 163Specifies the encoding you are converting from. Unlike B<iconv>, 164this option can be omitted. In such cases, the current locale is used. 165 166=item -t,--to to_encoding 167 168Specifies the encoding you are converting to. Unlike B<iconv>, 169this option can be omitted. In such cases, the current locale is used. 170 171Therefore, when both -f and -t are omitted, B<piconv> just acts 172like B<cat>. 173 174=item -s,--string I<string> 175 176uses I<string> instead of file for the source of text. 177 178=item -l,--list 179 180Lists all available encodings, one per line, in case-insensitive 181order. Note that only the canonical names are listed; many aliases 182exist. For example, the names are case-insensitive, and many standard 183and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" 184instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> 185for a full discussion. 186 187=item -C,--check I<N> 188 189Check the validity of the stream if I<N> = 1. When I<N> = -1, something 190interesting happens when it encounters an invalid character. 191 192=item -c 193 194Same as C<-C 1>. 195 196=item -p,--perlqq 197 198Same as C<-C -1>. 199 200=item -h,--help 201 202Show usage. 203 204=item -D,--debug 205 206Invokes debugging mode. Primarily for Encode hackers. 207 208=item -S,--scheme scheme 209 210Selects which scheme is to be used for conversion. Available schemes 211are as follows: 212 213=over 4 214 215=item from_to 216 217Uses Encode::from_to for conversion. This is the default. 218 219=item decode_encode 220 221Input strings are decode()d then encode()d. A straight two-step 222implementation. 223 224=item perlio 225 226The new perlIO layer is used. NI-S' favorite. 227 228=back 229 230Like the I<-D> option, this is also for Encode hackers. 231 232=back 233 234=head1 SEE ALSO 235 236L<iconv/1> 237L<locale/3> 238L<Encode> 239L<Encode::Supported> 240L<Encode::Alias> 241L<PerlIO> 242 243=cut 244