1package ExtUtils::Constant::Base; 2 3use strict; 4use vars qw($VERSION $is_perl56); 5use Carp; 6use Text::Wrap; 7use ExtUtils::Constant::Utils qw(C_stringify perl_stringify); 8 9$VERSION = '0.01'; 10 11$is_perl56 = ($] < 5.007 && $] > 5.005_50); 12 13 14=head1 NAME 15 16ExtUtils::Constant::Base - base class for ExtUtils::Constant objects 17 18=head1 SYNOPSIS 19 20 require ExtUtils::Constant::Base; 21 @ISA = 'ExtUtils::Constant::Base'; 22 23=head1 DESCRIPTION 24 25ExtUtils::Constant::Base provides a base implementation of methods to 26generate C code to give fast constant value lookup by named string. Currently 27it's mostly used ExtUtils::Constant::XS, which generates the lookup code 28for the constant() subroutine found in many XS modules. 29 30=head1 USAGE 31 32ExtUtils::Constant::Base exports no subroutines. The following methods are 33available 34 35=over 4 36 37=cut 38 39sub valid_type { 40 # Default to assuming that you don't need different types of return data. 41 1; 42} 43sub default_type { 44 ''; 45} 46 47=item header 48 49A method returning a scalar containing definitions needed, typically for a 50C header file. 51 52=cut 53 54sub header { 55 '' 56} 57 58# This might actually be a return statement. Note that you are responsible 59# for any space you might need before your value, as it lets to perform 60# "tricks" such as "return KEY_" and have strings appended. 61sub assignment_clause_for_type; 62# In which case this might be an empty string 63sub return_statement_for_type {undef}; 64sub return_statement_for_notdef; 65sub return_statement_for_notfound; 66 67# "#if 1" is true to a C pre-processor 68sub macro_from_name { 69 1; 70} 71 72sub name_param { 73 'name'; 74} 75 76# This is possibly buggy, in that it's not mandatory (below, in the main 77# C_constant parameters, but is expected to exist here, if it's needed) 78# Buggy because if you're definitely pure 8 bit only, and will never be 79# presented with your constants in utf8, the default form of C_constant can't 80# be told not to do the utf8 version. 81 82sub is_utf8_param { 83 'utf8'; 84} 85 86sub memEQ { 87 "!memcmp"; 88} 89 90=item memEQ_clause args_hashref 91 92A method to return a suitable C C<if> statement to check whether I<name> 93is equal to the C variable C<name>. If I<checked_at> is defined, then it 94is used to avoid C<memEQ> for short names, or to generate a comment to 95highlight the position of the character in the C<switch> statement. 96 97If i<checked_at> is a reference to a scalar, then instead it gives 98the characters pre-checked at the beginning, (and the number of chars by 99which the C variable name has been advanced. These need to be chopped from 100the front of I<name>). 101 102=cut 103 104sub memEQ_clause { 105# if (memEQ(name, "thingy", 6)) { 106 # Which could actually be a character comparison or even "" 107 my ($self, $args) = @_; 108 my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)}; 109 $indent = ' ' x ($indent || 4); 110 my $front_chop; 111 if (ref $checked_at) { 112 # regexp won't work on 5.6.1 without use utf8; in turn that won't work 113 # on 5.005_03. 114 substr ($name, 0, length $$checked_at,) = ''; 115 $front_chop = C_stringify ($$checked_at); 116 undef $checked_at; 117 } 118 my $len = length $name; 119 120 if ($len < 2) { 121 return $indent . "{\n" 122 if (defined $checked_at and $checked_at == 0) or $len == 0; 123 # We didn't switch, drop through to the code for the 2 character string 124 $checked_at = 1; 125 } 126 127 my $name_param = $self->name_param; 128 129 if ($len < 3 and defined $checked_at) { 130 my $check; 131 if ($checked_at == 1) { 132 $check = 0; 133 } elsif ($checked_at == 0) { 134 $check = 1; 135 } 136 if (defined $check) { 137 my $char = C_stringify (substr $name, $check, 1); 138 # Placate 5.005 with a break in the string. I can't see a good way of 139 # getting it to not take [ as introducing an array lookup, even with 140 # ${name_param}[$check] 141 return $indent . "if ($name_param" . "[$check] == '$char') {\n"; 142 } 143 } 144 if (($len == 2 and !defined $checked_at) 145 or ($len == 3 and defined ($checked_at) and $checked_at == 2)) { 146 my $char1 = C_stringify (substr $name, 0, 1); 147 my $char2 = C_stringify (substr $name, 1, 1); 148 return $indent . 149 "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n"; 150 } 151 if (($len == 3 and defined ($checked_at) and $checked_at == 1)) { 152 my $char1 = C_stringify (substr $name, 0, 1); 153 my $char2 = C_stringify (substr $name, 2, 1); 154 return $indent . 155 "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n"; 156 } 157 158 my $pointer = '^'; 159 my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1; 160 if ($have_checked_last) { 161 # Checked at the last character, so no need to memEQ it. 162 $pointer = C_stringify (chop $name); 163 $len--; 164 } 165 166 $name = C_stringify ($name); 167 my $memEQ = $self->memEQ(); 168 my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n"; 169 # Put a little ^ under the letter we checked at 170 # Screws up for non printable and non-7 bit stuff, but that's too hard to 171 # get right. 172 if (defined $checked_at) { 173 $body .= $indent . "/* " . (' ' x length $memEQ) 174 . (' ' x length $name_param) 175 . (' ' x $checked_at) . $pointer 176 . (' ' x ($len - $checked_at + length $len)) . " */\n"; 177 } elsif (defined $front_chop) { 178 $body .= $indent . "/* $front_chop" 179 . (' ' x ($len + 1 + length $len)) . " */\n"; 180 } 181 return $body; 182} 183 184=item dump_names arg_hashref, ITEM... 185 186An internal function to generate the embedded perl code that will regenerate 187the constant subroutines. I<default_type>, I<types> and I<ITEM>s are the 188same as for C_constant. I<indent> is treated as number of spaces to indent 189by. If C<declare_types> is true a C<$types> is always declared in the perl 190code generated, if defined and false never declared, and if undefined C<$types> 191is only declared if the values in I<types> as passed in cannot be inferred from 192I<default_types> and the I<ITEM>s. 193 194=cut 195 196sub dump_names { 197 my ($self, $args, @items) = @_; 198 my ($default_type, $what, $indent, $declare_types) 199 = @{$args}{qw(default_type what indent declare_types)}; 200 $indent = ' ' x ($indent || 0); 201 202 my $result; 203 my (@simple, @complex, %used_types); 204 foreach (@items) { 205 my $type; 206 if (ref $_) { 207 $type = $_->{type} || $default_type; 208 if ($_->{utf8}) { 209 # For simplicity always skip the bytes case, and reconstitute this entry 210 # from its utf8 twin. 211 next if $_->{utf8} eq 'no'; 212 # Copy the hashref, as we don't want to mess with the caller's hashref. 213 $_ = {%$_}; 214 unless ($is_perl56) { 215 utf8::decode ($_->{name}); 216 } else { 217 $_->{name} = pack 'U*', unpack 'U0U*', $_->{name}; 218 } 219 delete $_->{utf8}; 220 } 221 } else { 222 $_ = {name=>$_}; 223 $type = $default_type; 224 } 225 $used_types{$type}++; 226 if ($type eq $default_type 227 # grr 5.6.1 228 and length $_->{name} 229 and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//) 230 and !defined ($_->{macro}) and !defined ($_->{value}) 231 and !defined ($_->{default}) and !defined ($_->{pre}) 232 and !defined ($_->{post}) and !defined ($_->{def_pre}) 233 and !defined ($_->{def_post}) and !defined ($_->{weight})) { 234 # It's the default type, and the name consists only of A-Za-z0-9_ 235 push @simple, $_->{name}; 236 } else { 237 push @complex, $_; 238 } 239 } 240 241 if (!defined $declare_types) { 242 # Do they pass in any types we weren't already using? 243 foreach (keys %$what) { 244 next if $used_types{$_}; 245 $declare_types++; # Found one in $what that wasn't used. 246 last; # And one is enough to terminate this loop 247 } 248 } 249 if ($declare_types) { 250 $result = $indent . 'my $types = {map {($_, 1)} qw(' 251 . join (" ", sort keys %$what) . ")};\n"; 252 } 253 local $Text::Wrap::huge = 'overflow'; 254 local $Text::Wrap::columns = 80; 255 $result .= wrap ($indent . "my \@names = (qw(", 256 $indent . " ", join (" ", sort @simple) . ")"); 257 if (@complex) { 258 foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) { 259 my $name = perl_stringify $item->{name}; 260 my $line = ",\n$indent {name=>\"$name\""; 261 $line .= ", type=>\"$item->{type}\"" if defined $item->{type}; 262 foreach my $thing (qw (macro value default pre post def_pre def_post)) { 263 my $value = $item->{$thing}; 264 if (defined $value) { 265 if (ref $value) { 266 $line .= ", $thing=>[\"" 267 . join ('", "', map {perl_stringify $_} @$value) . '"]'; 268 } else { 269 $line .= ", $thing=>\"" . perl_stringify($value) . "\""; 270 } 271 } 272 } 273 $line .= "}"; 274 # Ensure that the enclosing C comment doesn't end 275 # by turning */ into *" . "/ 276 $line =~ s!\*\/!\*" . "/!gs; 277 # gcc -Wall doesn't like finding /* inside a comment 278 $line =~ s!\/\*!/" . "\*!gs; 279 $result .= $line; 280 } 281 } 282 $result .= ");\n"; 283 284 $result; 285} 286 287=item assign arg_hashref, VALUE... 288 289A method to return a suitable assignment clause. If I<type> is aggregate 290(eg I<PVN> expects both pointer and length) then there should be multiple 291I<VALUE>s for the components. I<pre> and I<post> if defined give snippets 292of C code to proceed and follow the assignment. I<pre> will be at the start 293of a block, so variables may be defined in it. 294 295=cut 296# Hmm. value undef to to NOTDEF? value () to do NOTFOUND? 297 298sub assign { 299 my $self = shift; 300 my $args = shift; 301 my ($indent, $type, $pre, $post, $item) 302 = @{$args}{qw(indent type pre post item)}; 303 $post ||= ''; 304 my $clause; 305 my $close; 306 if ($pre) { 307 chomp $pre; 308 $close = "$indent}\n"; 309 $clause = $indent . "{\n"; 310 $indent .= " "; 311 $clause .= "$indent$pre"; 312 $clause .= ";" unless $pre =~ /;$/; 313 $clause .= "\n"; 314 } 315 confess "undef \$type" unless defined $type; 316 confess "Can't generate code for type $type" 317 unless $self->valid_type($type); 318 319 $clause .= join '', map {"$indent$_\n"} 320 $self->assignment_clause_for_type({type=>$type,item=>$item}, @_); 321 chomp $post; 322 if (length $post) { 323 $clause .= "$post"; 324 $clause .= ";" unless $post =~ /;$/; 325 $clause .= "\n"; 326 } 327 my $return = $self->return_statement_for_type($type); 328 $clause .= "$indent$return\n" if defined $return; 329 $clause .= $close if $close; 330 return $clause; 331} 332 333=item return_clause arg_hashref, ITEM 334 335A method to return a suitable C<#ifdef> clause. I<ITEM> is a hashref 336(as passed to C<C_constant> and C<match_clause>. I<indent> is the number 337of spaces to indent, defaulting to 6. 338 339=cut 340 341sub return_clause { 342 343##ifdef thingy 344# *iv_return = thingy; 345# return PERL_constant_ISIV; 346##else 347# return PERL_constant_NOTDEF; 348##endif 349 my ($self, $args, $item) = @_; 350 my $indent = $args->{indent}; 351 352 my ($name, $value, $macro, $default, $pre, $post, $def_pre, $def_post, $type) 353 = @$item{qw (name value macro default pre post def_pre def_post type)}; 354 $value = $name unless defined $value; 355 $macro = $self->macro_from_name($item) unless defined $macro; 356 # "#if 1" is true to a C pre-processor 357 $macro = 1 if !defined $macro or $macro eq ''; 358 $indent = ' ' x ($indent || 6); 359 unless (defined $type) { 360 # use Data::Dumper; print STDERR Dumper ($item); 361 confess "undef \$type"; 362 } 363 364 my $clause; 365 366 ##ifdef thingy 367 if (ref $macro) { 368 $clause = $macro->[0]; 369 } elsif ($macro ne "1") { 370 $clause = "#ifdef $macro\n"; 371 } 372 373 # *iv_return = thingy; 374 # return PERL_constant_ISIV; 375 $clause 376 .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post, 377 item=>$item}, ref $value ? @$value : $value); 378 379 if (ref $macro or $macro ne "1") { 380 ##else 381 $clause .= "#else\n"; 382 383 # return PERL_constant_NOTDEF; 384 if (!defined $default) { 385 my $notdef = $self->return_statement_for_notdef(); 386 $clause .= "$indent$notdef\n" if defined $notdef; 387 } else { 388 my @default = ref $default ? @$default : $default; 389 $type = shift @default; 390 $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, 391 post=>$post, item=>$item}, @default); 392 } 393 394 ##endif 395 if (ref $macro) { 396 $clause .= $macro->[1]; 397 } else { 398 $clause .= "#endif\n"; 399 } 400 } 401 return $clause; 402} 403 404sub match_clause { 405 # $offset defined if we have checked an offset. 406 my ($self, $args, $item) = @_; 407 my ($offset, $indent) = @{$args}{qw(checked_at indent)}; 408 $indent = ' ' x ($indent || 4); 409 my $body = ''; 410 my ($no, $yes, $either, $name, $inner_indent); 411 if (ref $item eq 'ARRAY') { 412 ($yes, $no) = @$item; 413 $either = $yes || $no; 414 confess "$item is $either expecting hashref in [0] || [1]" 415 unless ref $either eq 'HASH'; 416 $name = $either->{name}; 417 } else { 418 confess "$item->{name} has utf8 flag '$item->{utf8}', should be false" 419 if $item->{utf8}; 420 $name = $item->{name}; 421 $inner_indent = $indent; 422 } 423 424 $body .= $self->memEQ_clause ({name => $name, checked_at => $offset, 425 indent => length $indent}); 426 # If we've been presented with an arrayref for $item, then the user string 427 # contains in the range 128-255, and we need to check whether it was utf8 428 # (or not). 429 # In the worst case we have two named constants, where one's name happens 430 # encoded in UTF8 happens to be the same byte sequence as the second's 431 # encoded in (say) ISO-8859-1. 432 # In this case, $yes and $no both have item hashrefs. 433 if ($yes) { 434 $body .= $indent . " if (" . $self->is_utf8_param . ") {\n"; 435 } elsif ($no) { 436 $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n"; 437 } 438 if ($either) { 439 $body .= $self->return_clause ({indent=>4 + length $indent}, $either); 440 if ($yes and $no) { 441 $body .= $indent . " } else {\n"; 442 $body .= $self->return_clause ({indent=>4 + length $indent}, $no); 443 } 444 $body .= $indent . " }\n"; 445 } else { 446 $body .= $self->return_clause ({indent=>2 + length $indent}, $item); 447 } 448 $body .= $indent . "}\n"; 449} 450 451 452=item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM... 453 454An internal method to generate a suitable C<switch> clause, called by 455C<C_constant> I<ITEM>s are in the hash ref format as given in the description 456of C<C_constant>, and must all have the names of the same length, given by 457I<NAMELEN>. I<ITEMHASH> is a reference to a hash, keyed by name, values being 458the hashrefs in the I<ITEM> list. (No parameters are modified, and there can 459be keys in the I<ITEMHASH> that are not in the list of I<ITEM>s without 460causing problems - the hash is passed in to save generating it afresh for 461each call). 462 463=cut 464 465sub switch_clause { 466 my ($self, $args, $namelen, $items, @items) = @_; 467 my ($indent, $comment) = @{$args}{qw(indent comment)}; 468 $indent = ' ' x ($indent || 2); 469 470 local $Text::Wrap::huge = 'overflow'; 471 local $Text::Wrap::columns = 80; 472 473 my @names = sort map {$_->{name}} @items; 474 my $leader = $indent . '/* '; 475 my $follower = ' ' x length $leader; 476 my $body = $indent . "/* Names all of length $namelen. */\n"; 477 if (defined $comment) { 478 $body = wrap ($leader, $follower, $comment) . "\n"; 479 $leader = $follower; 480 } 481 my @safe_names = @names; 482 foreach (@safe_names) { 483 confess sprintf "Name '$_' is length %d, not $namelen", length 484 unless length == $namelen; 485 # Argh. 5.6.1 486 # next unless tr/A-Za-z0-9_//c; 487 next if tr/A-Za-z0-9_// == length; 488 $_ = '"' . perl_stringify ($_) . '"'; 489 # Ensure that the enclosing C comment doesn't end 490 # by turning */ into *" . "/ 491 s!\*\/!\*"."/!gs; 492 # gcc -Wall doesn't like finding /* inside a comment 493 s!\/\*!/"."\*!gs; 494 } 495 $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n"; 496 # Figure out what to switch on. 497 # (RMS, Spread of jump table, Position, Hashref) 498 my @best = (1e38, ~0); 499 # Prefer the last character over the others. (As it lets us shorten the 500 # memEQ clause at no cost). 501 foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) { 502 my ($min, $max) = (~0, 0); 503 my %spread; 504 if ($is_perl56) { 505 # Need proper Unicode preserving hash keys for bytes in range 128-255 506 # here too, for some reason. grr 5.6.1 yet again. 507 tie %spread, 'ExtUtils::Constant::Aaargh56Hash'; 508 } 509 foreach (@names) { 510 my $char = substr $_, $i, 1; 511 my $ord = ord $char; 512 confess "char $ord is out of range" if $ord > 255; 513 $max = $ord if $ord > $max; 514 $min = $ord if $ord < $min; 515 push @{$spread{$char}}, $_; 516 # warn "$_ $char"; 517 } 518 # I'm going to pick the character to split on that minimises the root 519 # mean square of the number of names in each case. Normally this should 520 # be the one with the most keys, but it may pick a 7 where the 8 has 521 # one long linear search. I'm not sure if RMS or just sum of squares is 522 # actually better. 523 # $max and $min are for the tie-breaker if the root mean squares match. 524 # Assuming that the compiler may be building a jump table for the 525 # switch() then try to minimise the size of that jump table. 526 # Finally use < not <= so that if it still ties the earliest part of 527 # the string wins. Because if that passes but the memEQ fails, it may 528 # only need the start of the string to bin the choice. 529 # I think. But I'm micro-optimising. :-) 530 # OK. Trump that. Now favour the last character of the string, before the 531 # rest. 532 my $ss; 533 $ss += @$_ * @$_ foreach values %spread; 534 my $rms = sqrt ($ss / keys %spread); 535 if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) { 536 @best = ($rms, $max - $min, $i, \%spread); 537 } 538 } 539 confess "Internal error. Failed to pick a switch point for @names" 540 unless defined $best[2]; 541 # use Data::Dumper; print Dumper (@best); 542 my ($offset, $best) = @best[2,3]; 543 $body .= $indent . "/* Offset $offset gives the best switch position. */\n"; 544 545 my $do_front_chop = $offset == 0 && $namelen > 2; 546 if ($do_front_chop) { 547 $body .= $indent . "switch (*" . $self->name_param() . "++) {\n"; 548 } else { 549 $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n"; 550 } 551 foreach my $char (sort keys %$best) { 552 confess sprintf "'$char' is %d bytes long, not 1", length $char 553 if length ($char) != 1; 554 confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255; 555 $body .= $indent . "case '" . C_stringify ($char) . "':\n"; 556 foreach my $thisone (sort { 557 # Deal with the case of an item actually being an array ref to 1 or 2 558 # hashrefs. Don't assign to $a or $b, as they're aliases to the orignal 559 my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a; 560 my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b; 561 # Sort by weight first 562 ($r->{weight} || 0) <=> ($l->{weight} || 0) 563 # Sort equal weights by name 564 or $l->{name} cmp $r->{name}} 565 # If this looks evil, maybe it is. $items is a 566 # hashref, and we're doing a hash slice on it 567 @{$items}{@{$best->{$char}}}) { 568 # warn "You are here"; 569 if ($do_front_chop) { 570 $body .= $self->match_clause ({indent => 2 + length $indent, 571 checked_at => \$char}, $thisone); 572 } else { 573 $body .= $self->match_clause ({indent => 2 + length $indent, 574 checked_at => $offset}, $thisone); 575 } 576 } 577 $body .= $indent . " break;\n"; 578 } 579 $body .= $indent . "}\n"; 580 return $body; 581} 582 583sub C_constant_return_type { 584 "static int"; 585} 586 587sub C_constant_prefix_param { 588 ''; 589} 590 591sub C_constant_prefix_param_defintion { 592 ''; 593} 594 595sub name_param_definition { 596 "const char *" . $_[0]->name_param; 597} 598 599sub namelen_param { 600 'len'; 601} 602 603sub namelen_param_definition { 604 'size_t ' . $_[0]->namelen_param; 605} 606 607sub C_constant_other_params { 608 ''; 609} 610 611sub C_constant_other_params_defintion { 612 ''; 613} 614 615=item params WHAT 616 617An "internal" method, subject to change, currently called to allow an 618overriding class to cache information that will then be passed into all 619the C<*param*> calls. (Yes, having to read the source to make sense of this is 620considered a known bug). I<WHAT> is be a hashref of types the constant 621function will return. In ExtUtils::Constant::XS this method is used to 622returns a hashref keyed IV NV PV SV to show which combination of pointers will 623be needed in the C argument list generated by 624C_constant_other_params_definition and C_constant_other_params 625 626=cut 627 628sub params { 629 ''; 630} 631 632 633=item dogfood arg_hashref, ITEM... 634 635An internal function to generate the embedded perl code that will regenerate 636the constant subroutines. Parameters are the same as for C_constant. 637 638Currently the base class does nothing and returns an empty string. 639 640=cut 641 642sub dogfood { 643 '' 644} 645 646=item C_constant arg_hashref, ITEM... 647 648A function that returns a B<list> of C subroutine definitions that return 649the value and type of constants when passed the name by the XS wrapper. 650I<ITEM...> gives a list of constant names. Each can either be a string, 651which is taken as a C macro name, or a reference to a hash with the following 652keys 653 654=over 8 655 656=item name 657 658The name of the constant, as seen by the perl code. 659 660=item type 661 662The type of the constant (I<IV>, I<NV> etc) 663 664=item value 665 666A C expression for the value of the constant, or a list of C expressions if 667the type is aggregate. This defaults to the I<name> if not given. 668 669=item macro 670 671The C pre-processor macro to use in the C<#ifdef>. This defaults to the 672I<name>, and is mainly used if I<value> is an C<enum>. If a reference an 673array is passed then the first element is used in place of the C<#ifdef> 674line, and the second element in place of the C<#endif>. This allows 675pre-processor constructions such as 676 677 #if defined (foo) 678 #if !defined (bar) 679 ... 680 #endif 681 #endif 682 683to be used to determine if a constant is to be defined. 684 685A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif> 686test is omitted. 687 688=item default 689 690Default value to use (instead of C<croak>ing with "your vendor has not 691defined...") to return if the macro isn't defined. Specify a reference to 692an array with type followed by value(s). 693 694=item pre 695 696C code to use before the assignment of the value of the constant. This allows 697you to use temporary variables to extract a value from part of a C<struct> 698and return this as I<value>. This C code is places at the start of a block, 699so you can declare variables in it. 700 701=item post 702 703C code to place between the assignment of value (to a temporary) and the 704return from the function. This allows you to clear up anything in I<pre>. 705Rarely needed. 706 707=item def_pre 708 709=item def_post 710 711Equivalents of I<pre> and I<post> for the default value. 712 713=item utf8 714 715Generated internally. Is zero or undefined if name is 7 bit ASCII, 716"no" if the name is 8 bit (and so should only match if SvUTF8() is false), 717"yes" if the name is utf8 encoded. 718 719The internals automatically clone any name with characters 128-255 but none 720256+ (ie one that could be either in bytes or utf8) into a second entry 721which is utf8 encoded. 722 723=item weight 724 725Optional sorting weight for names, to determine the order of 726linear testing when multiple names fall in the same case of a switch clause. 727Higher comes earlier, undefined defaults to zero. 728 729=back 730 731In the argument hashref, I<package> is the name of the package, and is only 732used in comments inside the generated C code. I<subname> defaults to 733C<constant> if undefined. 734 735I<default_type> is the type returned by C<ITEM>s that don't specify their 736type. It defaults to the value of C<default_type()>. I<types> should be given 737either as a comma separated list of types that the C subroutine I<subname> 738will generate or as a reference to a hash. I<default_type> will be added to 739the list if not present, as will any types given in the list of I<ITEM>s. The 740resultant list should be the same list of types that C<XS_constant> is 741given. [Otherwise C<XS_constant> and C<C_constant> may differ in the number of 742parameters to the constant function. I<indent> is currently unused and 743ignored. In future it may be used to pass in information used to change the C 744indentation style used.] The best way to maintain consistency is to pass in a 745hash reference and let this function update it. 746 747I<breakout> governs when child functions of I<subname> are generated. If there 748are I<breakout> or more I<ITEM>s with the same length of name, then the code 749to switch between them is placed into a function named I<subname>_I<len>, for 750example C<constant_5> for names 5 characters long. The default I<breakout> is 7513. A single C<ITEM> is always inlined. 752 753=cut 754 755# The parameter now BREAKOUT was previously documented as: 756# 757# I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of 758# this length, and that the constant name passed in by perl is checked and 759# also of this length. It is used during recursion, and should be C<undef> 760# unless the caller has checked all the lengths during code generation, and 761# the generated subroutine is only to be called with a name of this length. 762# 763# As you can see it now performs this function during recursion by being a 764# scalar reference. 765 766sub C_constant { 767 my ($self, $args, @items) = @_; 768 my ($package, $subname, $default_type, $what, $indent, $breakout) = 769 @{$args}{qw(package subname default_type types indent breakout)}; 770 $package ||= 'Foo'; 771 $subname ||= 'constant'; 772 # I'm not using this. But a hashref could be used for full formatting without 773 # breaking this API 774 # $indent ||= 0; 775 776 my ($namelen, $items); 777 if (ref $breakout) { 778 # We are called recursively. We trust @items to be normalised, $what to 779 # be a hashref, and pinch %$items from our parent to save recalculation. 780 ($namelen, $items) = @$breakout; 781 } else { 782 if ($is_perl56) { 783 # Need proper Unicode preserving hash keys. 784 require ExtUtils::Constant::Aaargh56Hash; 785 $items = {}; 786 tie %$items, 'ExtUtils::Constant::Aaargh56Hash'; 787 } 788 $breakout ||= 3; 789 $default_type ||= $self->default_type(); 790 if (!ref $what) { 791 # Convert line of the form IV,UV,NV to hash 792 $what = {map {$_ => 1} split /,\s*/, ($what || '')}; 793 # Figure out what types we're dealing with, and assign all unknowns to the 794 # default type 795 } 796 my @new_items; 797 foreach my $orig (@items) { 798 my ($name, $item); 799 if (ref $orig) { 800 # Make a copy which is a normalised version of the ref passed in. 801 $name = $orig->{name}; 802 my ($type, $macro, $value) = @$orig{qw (type macro value)}; 803 $type ||= $default_type; 804 $what->{$type} = 1; 805 $item = {name=>$name, type=>$type}; 806 807 undef $macro if defined $macro and $macro eq $name; 808 $item->{macro} = $macro if defined $macro; 809 undef $value if defined $value and $value eq $name; 810 $item->{value} = $value if defined $value; 811 foreach my $key (qw(default pre post def_pre def_post weight)) { 812 my $value = $orig->{$key}; 813 $item->{$key} = $value if defined $value; 814 # warn "$key $value"; 815 } 816 } else { 817 $name = $orig; 818 $item = {name=>$name, type=>$default_type}; 819 $what->{$default_type} = 1; 820 } 821 warn +(ref ($self) || $self) 822 . "doesn't know how to handle values of type $_ used in macro $name" 823 unless $self->valid_type ($item->{type}); 824 # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c 825 # doesn't work. Upgrade to 5.8 826 # if ($name !~ tr/\0-\177//c || $] < 5.005_50) { 827 if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50) { 828 # No characters outside 7 bit ASCII. 829 if (exists $items->{$name}) { 830 die "Multiple definitions for macro $name"; 831 } 832 $items->{$name} = $item; 833 } else { 834 # No characters outside 8 bit. This is hardest. 835 if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') { 836 confess "Unexpected ASCII definition for macro $name"; 837 } 838 # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/; 839 # if ($name !~ tr/\0-\377//c) { 840 if ($name =~ tr/\0-\377// == length $name) { 841# if ($] < 5.007) { 842# $name = pack "C*", unpack "U*", $name; 843# } 844 $item->{utf8} = 'no'; 845 $items->{$name}[1] = $item; 846 push @new_items, $item; 847 # Copy item, to create the utf8 variant. 848 $item = {%$item}; 849 } 850 # Encode the name as utf8 bytes. 851 unless ($is_perl56) { 852 utf8::encode($name); 853 } else { 854# warn "Was >$name< " . length ${name}; 855 $name = pack 'C*', unpack 'C*', $name . pack 'U*'; 856# warn "Now '${name}' " . length ${name}; 857 } 858 if ($items->{$name}[0]) { 859 die "Multiple definitions for macro $name"; 860 } 861 $item->{utf8} = 'yes'; 862 $item->{name} = $name; 863 $items->{$name}[0] = $item; 864 # We have need for the utf8 flag. 865 $what->{''} = 1; 866 } 867 push @new_items, $item; 868 } 869 @items = @new_items; 870 # use Data::Dumper; print Dumper @items; 871 } 872 my $params = $self->params ($what); 873 874 # Probably "static int" 875 my ($body, @subs); 876 $body = $self->C_constant_return_type($params) . "\n$subname (" 877 # Eg "pTHX_ " 878 . $self->C_constant_prefix_param_defintion($params) 879 # Probably "const char *name" 880 . $self->name_param_definition($params); 881 # Something like ", STRLEN len" 882 $body .= ", " . $self->namelen_param_definition($params) 883 unless defined $namelen; 884 $body .= $self->C_constant_other_params_defintion($params); 885 $body .= ") {\n"; 886 887 if (defined $namelen) { 888 # We are a child subroutine. Print the simple description 889 my $comment = 'When generated this function returned values for the list' 890 . ' of names given here. However, subsequent manual editing may have' 891 . ' added or removed some.'; 892 $body .= $self->switch_clause ({indent=>2, comment=>$comment}, 893 $namelen, $items, @items); 894 } else { 895 # We are the top level. 896 $body .= " /* Initially switch on the length of the name. */\n"; 897 $body .= $self->dogfood ({package => $package, subname => $subname, 898 default_type => $default_type, what => $what, 899 indent => $indent, breakout => $breakout}, 900 @items); 901 $body .= ' switch ('.$self->namelen_param().") {\n"; 902 # Need to group names of the same length 903 my @by_length; 904 foreach (@items) { 905 push @{$by_length[length $_->{name}]}, $_; 906 } 907 foreach my $i (0 .. $#by_length) { 908 next unless $by_length[$i]; # None of this length 909 $body .= " case $i:\n"; 910 if (@{$by_length[$i]} == 1) { 911 my $only_thing = $by_length[$i]->[0]; 912 if ($only_thing->{utf8}) { 913 if ($only_thing->{utf8} eq 'yes') { 914 # With utf8 on flag item is passed in element 0 915 $body .= $self->match_clause (undef, [$only_thing]); 916 } else { 917 # With utf8 off flag item is passed in element 1 918 $body .= $self->match_clause (undef, [undef, $only_thing]); 919 } 920 } else { 921 $body .= $self->match_clause (undef, $only_thing); 922 } 923 } elsif (@{$by_length[$i]} < $breakout) { 924 $body .= $self->switch_clause ({indent=>4}, 925 $i, $items, @{$by_length[$i]}); 926 } else { 927 # Only use the minimal set of parameters actually needed by the types 928 # of the names of this length. 929 my $what = {}; 930 foreach (@{$by_length[$i]}) { 931 $what->{$_->{type}} = 1; 932 $what->{''} = 1 if $_->{utf8}; 933 } 934 $params = $self->params ($what); 935 push @subs, $self->C_constant ({package=>$package, 936 subname=>"${subname}_$i", 937 default_type => $default_type, 938 types => $what, indent => $indent, 939 breakout => [$i, $items]}, 940 @{$by_length[$i]}); 941 $body .= " return ${subname}_$i (" 942 # Eg "aTHX_ " 943 . $self->C_constant_prefix_param($params) 944 # Probably "name" 945 . $self->name_param($params); 946 $body .= $self->C_constant_other_params($params); 947 $body .= ");\n"; 948 } 949 $body .= " break;\n"; 950 } 951 $body .= " }\n"; 952 } 953 my $notfound = $self->return_statement_for_notfound(); 954 $body .= " $notfound\n" if $notfound; 955 $body .= "}\n"; 956 return (@subs, $body); 957} 958 9591; 960__END__ 961 962=back 963 964=head1 BUGS 965 966Not everything is documented yet. 967 968Probably others. 969 970=head1 AUTHOR 971 972Nicholas Clark <nick@ccl4.org> based on the code in C<h2xs> by Larry Wall and 973others 974