1# DB_File.pm -- Perl 5 interface to Berkeley DB 2# 3# written by Paul Marquess (pmqs@cpan.org) 4# last modified 11th November 2005 5# version 1.814 6# 7# Copyright (c) 1995-2005 Paul Marquess. All rights reserved. 8# This program is free software; you can redistribute it and/or 9# modify it under the same terms as Perl itself. 10 11 12package DB_File::HASHINFO ; 13 14require 5.00404; 15 16use warnings; 17use strict; 18use Carp; 19require Tie::Hash; 20@DB_File::HASHINFO::ISA = qw(Tie::Hash); 21 22sub new 23{ 24 my $pkg = shift ; 25 my %x ; 26 tie %x, $pkg ; 27 bless \%x, $pkg ; 28} 29 30 31sub TIEHASH 32{ 33 my $pkg = shift ; 34 35 bless { VALID => { 36 bsize => 1, 37 ffactor => 1, 38 nelem => 1, 39 cachesize => 1, 40 hash => 2, 41 lorder => 1, 42 }, 43 GOT => {} 44 }, $pkg ; 45} 46 47 48sub FETCH 49{ 50 my $self = shift ; 51 my $key = shift ; 52 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ; 54 55 my $pkg = ref $self ; 56 croak "${pkg}::FETCH - Unknown element '$key'" ; 57} 58 59 60sub STORE 61{ 62 my $self = shift ; 63 my $key = shift ; 64 my $value = shift ; 65 66 my $type = $self->{VALID}{$key}; 67 68 if ( $type ) 69 { 70 croak "Key '$key' not associated with a code reference" 71 if $type == 2 && !ref $value && ref $value ne 'CODE'; 72 $self->{GOT}{$key} = $value ; 73 return ; 74 } 75 76 my $pkg = ref $self ; 77 croak "${pkg}::STORE - Unknown element '$key'" ; 78} 79 80sub DELETE 81{ 82 my $self = shift ; 83 my $key = shift ; 84 85 if ( exists $self->{VALID}{$key} ) 86 { 87 delete $self->{GOT}{$key} ; 88 return ; 89 } 90 91 my $pkg = ref $self ; 92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; 93} 94 95sub EXISTS 96{ 97 my $self = shift ; 98 my $key = shift ; 99 100 exists $self->{VALID}{$key} ; 101} 102 103sub NotHere 104{ 105 my $self = shift ; 106 my $method = shift ; 107 108 croak ref($self) . " does not define the method ${method}" ; 109} 110 111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") } 112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") } 113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") } 114 115package DB_File::RECNOINFO ; 116 117use warnings; 118use strict ; 119 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; 121 122sub TIEHASH 123{ 124 my $pkg = shift ; 125 126 bless { VALID => { map {$_, 1} 127 qw( bval cachesize psize flags lorder reclen bfname ) 128 }, 129 GOT => {}, 130 }, $pkg ; 131} 132 133package DB_File::BTREEINFO ; 134 135use warnings; 136use strict ; 137 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; 139 140sub TIEHASH 141{ 142 my $pkg = shift ; 143 144 bless { VALID => { 145 flags => 1, 146 cachesize => 1, 147 maxkeypage => 1, 148 minkeypage => 1, 149 psize => 1, 150 compare => 2, 151 prefix => 2, 152 lorder => 1, 153 }, 154 GOT => {}, 155 }, $pkg ; 156} 157 158 159package DB_File ; 160 161use warnings; 162use strict; 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO); 164our ($db_version, $use_XSLoader, $splice_end_array, $Error); 165use Carp; 166 167 168$VERSION = "1.814" ; 169 170{ 171 local $SIG{__WARN__} = sub {$splice_end_array = "@_";}; 172 my @a =(1); splice(@a, 3); 173 $splice_end_array = 174 ($splice_end_array =~ /^splice\(\) offset past end of array at /); 175} 176 177#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; 178$DB_BTREE = new DB_File::BTREEINFO ; 179$DB_HASH = new DB_File::HASHINFO ; 180$DB_RECNO = new DB_File::RECNOINFO ; 181 182require Tie::Hash; 183require Exporter; 184use AutoLoader; 185BEGIN { 186 $use_XSLoader = 1 ; 187 { local $SIG{__DIE__} ; eval { require XSLoader } ; } 188 189 if ($@) { 190 $use_XSLoader = 0 ; 191 require DynaLoader; 192 @ISA = qw(DynaLoader); 193 } 194} 195 196push @ISA, qw(Tie::Hash Exporter); 197@EXPORT = qw( 198 $DB_BTREE $DB_HASH $DB_RECNO 199 200 BTREEMAGIC 201 BTREEVERSION 202 DB_LOCK 203 DB_SHMEM 204 DB_TXN 205 HASHMAGIC 206 HASHVERSION 207 MAX_PAGE_NUMBER 208 MAX_PAGE_OFFSET 209 MAX_REC_NUMBER 210 RET_ERROR 211 RET_SPECIAL 212 RET_SUCCESS 213 R_CURSOR 214 R_DUP 215 R_FIRST 216 R_FIXEDLEN 217 R_IAFTER 218 R_IBEFORE 219 R_LAST 220 R_NEXT 221 R_NOKEY 222 R_NOOVERWRITE 223 R_PREV 224 R_RECNOSYNC 225 R_SETCURSOR 226 R_SNAPSHOT 227 __R_UNUSED 228 229); 230 231sub AUTOLOAD { 232 my($constname); 233 ($constname = $AUTOLOAD) =~ s/.*:://; 234 my ($error, $val) = constant($constname); 235 Carp::croak $error if $error; 236 no strict 'refs'; 237 *{$AUTOLOAD} = sub { $val }; 238 goto &{$AUTOLOAD}; 239} 240 241 242eval { 243 # Make all Fcntl O_XXX constants available for importing 244 require Fcntl; 245 my @O = grep /^O_/, @Fcntl::EXPORT; 246 Fcntl->import(@O); # first we import what we want to export 247 push(@EXPORT, @O); 248}; 249 250if ($use_XSLoader) 251 { XSLoader::load("DB_File", $VERSION)} 252else 253 { bootstrap DB_File $VERSION } 254 255# Preloaded methods go here. Autoload methods go after __END__, and are 256# processed by the autosplit program. 257 258sub tie_hash_or_array 259{ 260 my (@arg) = @_ ; 261 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ; 262 263 $arg[4] = tied %{ $arg[4] } 264 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ; 265 266 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2]; 267 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3]; 268 269 # make recno in Berkeley DB version 2 (or better) work like 270 # recno in version 1. 271 if ($db_version >= 4 and ! $tieHASH) { 272 $arg[2] |= O_CREAT(); 273 } 274 275 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and 276 $arg[1] and ! -e $arg[1]) { 277 open(FH, ">$arg[1]") or return undef ; 278 close FH ; 279 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ; 280 } 281 282 DoTie_($tieHASH, @arg) ; 283} 284 285sub TIEHASH 286{ 287 tie_hash_or_array(@_) ; 288} 289 290sub TIEARRAY 291{ 292 tie_hash_or_array(@_) ; 293} 294 295sub CLEAR 296{ 297 my $self = shift; 298 my $key = 0 ; 299 my $value = "" ; 300 my $status = $self->seq($key, $value, R_FIRST()); 301 my @keys; 302 303 while ($status == 0) { 304 push @keys, $key; 305 $status = $self->seq($key, $value, R_NEXT()); 306 } 307 foreach $key (reverse @keys) { 308 my $s = $self->del($key); 309 } 310} 311 312sub EXTEND { } 313 314sub STORESIZE 315{ 316 my $self = shift; 317 my $length = shift ; 318 my $current_length = $self->length() ; 319 320 if ($length < $current_length) { 321 my $key ; 322 for ($key = $current_length - 1 ; $key >= $length ; -- $key) 323 { $self->del($key) } 324 } 325 elsif ($length > $current_length) { 326 $self->put($length-1, "") ; 327 } 328} 329 330 331sub SPLICE 332{ 333 my $self = shift; 334 my $offset = shift; 335 if (not defined $offset) { 336 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 337 $offset = 0; 338 } 339 340 my $length = @_ ? shift : 0; 341 # Carping about definedness comes _after_ the OFFSET sanity check. 342 # This is so we get the same error messages as Perl's splice(). 343 # 344 345 my @list = @_; 346 347 my $size = $self->FETCHSIZE(); 348 349 # 'If OFFSET is negative then it start that far from the end of 350 # the array.' 351 # 352 if ($offset < 0) { 353 my $new_offset = $size + $offset; 354 if ($new_offset < 0) { 355 die "Modification of non-creatable array value attempted, " 356 . "subscript $offset"; 357 } 358 $offset = $new_offset; 359 } 360 361 if (not defined $length) { 362 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 363 $length = 0; 364 } 365 366 if ($offset > $size) { 367 $offset = $size; 368 warnings::warnif('misc', 'splice() offset past end of array') 369 if $splice_end_array; 370 } 371 372 # 'If LENGTH is omitted, removes everything from OFFSET onward.' 373 if (not defined $length) { 374 $length = $size - $offset; 375 } 376 377 # 'If LENGTH is negative, leave that many elements off the end of 378 # the array.' 379 # 380 if ($length < 0) { 381 $length = $size - $offset + $length; 382 383 if ($length < 0) { 384 # The user must have specified a length bigger than the 385 # length of the array passed in. But perl's splice() 386 # doesn't catch this, it just behaves as for length=0. 387 # 388 $length = 0; 389 } 390 } 391 392 if ($length > $size - $offset) { 393 $length = $size - $offset; 394 } 395 396 # $num_elems holds the current number of elements in the database. 397 my $num_elems = $size; 398 399 # 'Removes the elements designated by OFFSET and LENGTH from an 400 # array,'... 401 # 402 my @removed = (); 403 foreach (0 .. $length - 1) { 404 my $old; 405 my $status = $self->get($offset, $old); 406 if ($status != 0) { 407 my $msg = "error from Berkeley DB on get($offset, \$old)"; 408 if ($status == 1) { 409 $msg .= ' (no such element?)'; 410 } 411 else { 412 $msg .= ": error status $status"; 413 if (defined $! and $! ne '') { 414 $msg .= ", message $!"; 415 } 416 } 417 die $msg; 418 } 419 push @removed, $old; 420 421 $status = $self->del($offset); 422 if ($status != 0) { 423 my $msg = "error from Berkeley DB on del($offset)"; 424 if ($status == 1) { 425 $msg .= ' (no such element?)'; 426 } 427 else { 428 $msg .= ": error status $status"; 429 if (defined $! and $! ne '') { 430 $msg .= ", message $!"; 431 } 432 } 433 die $msg; 434 } 435 436 -- $num_elems; 437 } 438 439 # ...'and replaces them with the elements of LIST, if any.' 440 my $pos = $offset; 441 while (defined (my $elem = shift @list)) { 442 my $old_pos = $pos; 443 my $status; 444 if ($pos >= $num_elems) { 445 $status = $self->put($pos, $elem); 446 } 447 else { 448 $status = $self->put($pos, $elem, $self->R_IBEFORE); 449 } 450 451 if ($status != 0) { 452 my $msg = "error from Berkeley DB on put($pos, $elem, ...)"; 453 if ($status == 1) { 454 $msg .= ' (no such element?)'; 455 } 456 else { 457 $msg .= ", error status $status"; 458 if (defined $! and $! ne '') { 459 $msg .= ", message $!"; 460 } 461 } 462 die $msg; 463 } 464 465 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE" 466 if $old_pos != $pos; 467 468 ++ $pos; 469 ++ $num_elems; 470 } 471 472 if (wantarray) { 473 # 'In list context, returns the elements removed from the 474 # array.' 475 # 476 return @removed; 477 } 478 elsif (defined wantarray and not wantarray) { 479 # 'In scalar context, returns the last element removed, or 480 # undef if no elements are removed.' 481 # 482 if (@removed) { 483 my $last = pop @removed; 484 return "$last"; 485 } 486 else { 487 return undef; 488 } 489 } 490 elsif (not defined wantarray) { 491 # Void context 492 } 493 else { die } 494} 495sub ::DB_File::splice { &SPLICE } 496 497sub find_dup 498{ 499 croak "Usage: \$db->find_dup(key,value)\n" 500 unless @_ == 3 ; 501 502 my $db = shift ; 503 my ($origkey, $value_wanted) = @_ ; 504 my ($key, $value) = ($origkey, 0); 505 my ($status) = 0 ; 506 507 for ($status = $db->seq($key, $value, R_CURSOR() ) ; 508 $status == 0 ; 509 $status = $db->seq($key, $value, R_NEXT() ) ) { 510 511 return 0 if $key eq $origkey and $value eq $value_wanted ; 512 } 513 514 return $status ; 515} 516 517sub del_dup 518{ 519 croak "Usage: \$db->del_dup(key,value)\n" 520 unless @_ == 3 ; 521 522 my $db = shift ; 523 my ($key, $value) = @_ ; 524 my ($status) = $db->find_dup($key, $value) ; 525 return $status if $status != 0 ; 526 527 $status = $db->del($key, R_CURSOR() ) ; 528 return $status ; 529} 530 531sub get_dup 532{ 533 croak "Usage: \$db->get_dup(key [,flag])\n" 534 unless @_ == 2 or @_ == 3 ; 535 536 my $db = shift ; 537 my $key = shift ; 538 my $flag = shift ; 539 my $value = 0 ; 540 my $origkey = $key ; 541 my $wantarray = wantarray ; 542 my %values = () ; 543 my @values = () ; 544 my $counter = 0 ; 545 my $status = 0 ; 546 547 # iterate through the database until either EOF ($status == 0) 548 # or a different key is encountered ($key ne $origkey). 549 for ($status = $db->seq($key, $value, R_CURSOR()) ; 550 $status == 0 and $key eq $origkey ; 551 $status = $db->seq($key, $value, R_NEXT()) ) { 552 553 # save the value or count number of matches 554 if ($wantarray) { 555 if ($flag) 556 { ++ $values{$value} } 557 else 558 { push (@values, $value) } 559 } 560 else 561 { ++ $counter } 562 563 } 564 565 return ($wantarray ? ($flag ? %values : @values) : $counter) ; 566} 567 568 5691; 570__END__ 571 572=head1 NAME 573 574DB_File - Perl5 access to Berkeley DB version 1.x 575 576=head1 SYNOPSIS 577 578 use DB_File; 579 580 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; 581 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; 582 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; 583 584 $status = $X->del($key [, $flags]) ; 585 $status = $X->put($key, $value [, $flags]) ; 586 $status = $X->get($key, $value [, $flags]) ; 587 $status = $X->seq($key, $value, $flags) ; 588 $status = $X->sync([$flags]) ; 589 $status = $X->fd ; 590 591 # BTREE only 592 $count = $X->get_dup($key) ; 593 @list = $X->get_dup($key) ; 594 %list = $X->get_dup($key, 1) ; 595 $status = $X->find_dup($key, $value) ; 596 $status = $X->del_dup($key, $value) ; 597 598 # RECNO only 599 $a = $X->length; 600 $a = $X->pop ; 601 $X->push(list); 602 $a = $X->shift; 603 $X->unshift(list); 604 @r = $X->splice(offset, length, elements); 605 606 # DBM Filters 607 $old_filter = $db->filter_store_key ( sub { ... } ) ; 608 $old_filter = $db->filter_store_value( sub { ... } ) ; 609 $old_filter = $db->filter_fetch_key ( sub { ... } ) ; 610 $old_filter = $db->filter_fetch_value( sub { ... } ) ; 611 612 untie %hash ; 613 untie @array ; 614 615=head1 DESCRIPTION 616 617B<DB_File> is a module which allows Perl programs to make use of the 618facilities provided by Berkeley DB version 1.x (if you have a newer 619version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>). 620It is assumed that you have a copy of the Berkeley DB manual pages at 621hand when reading this documentation. The interface defined here 622mirrors the Berkeley DB interface closely. 623 624Berkeley DB is a C library which provides a consistent interface to a 625number of database formats. B<DB_File> provides an interface to all 626three of the database types currently supported by Berkeley DB. 627 628The file types are: 629 630=over 5 631 632=item B<DB_HASH> 633 634This database type allows arbitrary key/value pairs to be stored in data 635files. This is equivalent to the functionality provided by other 636hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, 637the files created using DB_HASH are not compatible with any of the 638other packages mentioned. 639 640A default hashing algorithm, which will be adequate for most 641applications, is built into Berkeley DB. If you do need to use your own 642hashing algorithm it is possible to write your own in Perl and have 643B<DB_File> use it instead. 644 645=item B<DB_BTREE> 646 647The btree format allows arbitrary key/value pairs to be stored in a 648sorted, balanced binary tree. 649 650As with the DB_HASH format, it is possible to provide a user defined 651Perl routine to perform the comparison of keys. By default, though, the 652keys are stored in lexical order. 653 654=item B<DB_RECNO> 655 656DB_RECNO allows both fixed-length and variable-length flat text files 657to be manipulated using the same key/value pair interface as in DB_HASH 658and DB_BTREE. In this case the key will consist of a record (line) 659number. 660 661=back 662 663=head2 Using DB_File with Berkeley DB version 2 or greater 664 665Although B<DB_File> is intended to be used with Berkeley DB version 1, 666it can also be used with version 2, 3 or 4. In this case the interface is 667limited to the functionality provided by Berkeley DB 1.x. Anywhere the 668version 2 or greater interface differs, B<DB_File> arranges for it to work 669like version 1. This feature allows B<DB_File> scripts that were built 670with version 1 to be migrated to version 2 or greater without any changes. 671 672If you want to make use of the new features available in Berkeley DB 6732.x or greater, use the Perl module B<BerkeleyDB> instead. 674 675B<Note:> The database file format has changed multiple times in Berkeley 676DB version 2, 3 and 4. If you cannot recreate your databases, you 677must dump any existing databases with either the C<db_dump> or the 678C<db_dump185> utility that comes with Berkeley DB. 679Once you have rebuilt DB_File to use Berkeley DB version 2 or greater, 680your databases can be recreated using C<db_load>. Refer to the Berkeley DB 681documentation for further details. 682 683Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley 684DB with DB_File. 685 686=head2 Interface to Berkeley DB 687 688B<DB_File> allows access to Berkeley DB files using the tie() mechanism 689in Perl 5 (for full details, see L<perlfunc/tie()>). This facility 690allows B<DB_File> to access Berkeley DB files using either an 691associative array (for DB_HASH & DB_BTREE file types) or an ordinary 692array (for the DB_RECNO file type). 693 694In addition to the tie() interface, it is also possible to access most 695of the functions provided in the Berkeley DB API directly. 696See L<THE API INTERFACE>. 697 698=head2 Opening a Berkeley DB Database File 699 700Berkeley DB uses the function dbopen() to open or create a database. 701Here is the C prototype for dbopen(): 702 703 DB* 704 dbopen (const char * file, int flags, int mode, 705 DBTYPE type, const void * openinfo) 706 707The parameter C<type> is an enumeration which specifies which of the 3 708interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. 709Depending on which of these is actually chosen, the final parameter, 710I<openinfo> points to a data structure which allows tailoring of the 711specific interface method. 712 713This interface is handled slightly differently in B<DB_File>. Here is 714an equivalent call using B<DB_File>: 715 716 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; 717 718The C<filename>, C<flags> and C<mode> parameters are the direct 719equivalent of their dbopen() counterparts. The final parameter $DB_HASH 720performs the function of both the C<type> and C<openinfo> parameters in 721dbopen(). 722 723In the example above $DB_HASH is actually a pre-defined reference to a 724hash object. B<DB_File> has three of these pre-defined references. 725Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. 726 727The keys allowed in each of these pre-defined references is limited to 728the names used in the equivalent C structure. So, for example, the 729$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, 730C<ffactor>, C<hash>, C<lorder> and C<nelem>. 731 732To change one of these elements, just assign to it like this: 733 734 $DB_HASH->{'cachesize'} = 10000 ; 735 736The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are 737usually adequate for most applications. If you do need to create extra 738instances of these objects, constructors are available for each file 739type. 740 741Here are examples of the constructors and the valid options available 742for DB_HASH, DB_BTREE and DB_RECNO respectively. 743 744 $a = new DB_File::HASHINFO ; 745 $a->{'bsize'} ; 746 $a->{'cachesize'} ; 747 $a->{'ffactor'}; 748 $a->{'hash'} ; 749 $a->{'lorder'} ; 750 $a->{'nelem'} ; 751 752 $b = new DB_File::BTREEINFO ; 753 $b->{'flags'} ; 754 $b->{'cachesize'} ; 755 $b->{'maxkeypage'} ; 756 $b->{'minkeypage'} ; 757 $b->{'psize'} ; 758 $b->{'compare'} ; 759 $b->{'prefix'} ; 760 $b->{'lorder'} ; 761 762 $c = new DB_File::RECNOINFO ; 763 $c->{'bval'} ; 764 $c->{'cachesize'} ; 765 $c->{'psize'} ; 766 $c->{'flags'} ; 767 $c->{'lorder'} ; 768 $c->{'reclen'} ; 769 $c->{'bfname'} ; 770 771The values stored in the hashes above are mostly the direct equivalent 772of their C counterpart. Like their C counterparts, all are set to a 773default values - that means you don't have to set I<all> of the 774values when you only want to change one. Here is an example: 775 776 $a = new DB_File::HASHINFO ; 777 $a->{'cachesize'} = 12345 ; 778 tie %y, 'DB_File', "filename", $flags, 0777, $a ; 779 780A few of the options need extra discussion here. When used, the C 781equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers 782to C functions. In B<DB_File> these keys are used to store references 783to Perl subs. Below are templates for each of the subs: 784 785 sub hash 786 { 787 my ($data) = @_ ; 788 ... 789 # return the hash value for $data 790 return $hash ; 791 } 792 793 sub compare 794 { 795 my ($key, $key2) = @_ ; 796 ... 797 # return 0 if $key1 eq $key2 798 # -1 if $key1 lt $key2 799 # 1 if $key1 gt $key2 800 return (-1 , 0 or 1) ; 801 } 802 803 sub prefix 804 { 805 my ($key, $key2) = @_ ; 806 ... 807 # return number of bytes of $key2 which are 808 # necessary to determine that it is greater than $key1 809 return $bytes ; 810 } 811 812See L<Changing the BTREE sort order> for an example of using the 813C<compare> template. 814 815If you are using the DB_RECNO interface and you intend making use of 816C<bval>, you should check out L<The 'bval' Option>. 817 818=head2 Default Parameters 819 820It is possible to omit some or all of the final 4 parameters in the 821call to C<tie> and let them take default values. As DB_HASH is the most 822common file format used, the call: 823 824 tie %A, "DB_File", "filename" ; 825 826is equivalent to: 827 828 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ; 829 830It is also possible to omit the filename parameter as well, so the 831call: 832 833 tie %A, "DB_File" ; 834 835is equivalent to: 836 837 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ; 838 839See L<In Memory Databases> for a discussion on the use of C<undef> 840in place of a filename. 841 842=head2 In Memory Databases 843 844Berkeley DB allows the creation of in-memory databases by using NULL 845(that is, a C<(char *)0> in C) in place of the filename. B<DB_File> 846uses C<undef> instead of NULL to provide this functionality. 847 848=head1 DB_HASH 849 850The DB_HASH file format is probably the most commonly used of the three 851file formats that B<DB_File> supports. It is also very straightforward 852to use. 853 854=head2 A Simple Example 855 856This example shows how to create a database, add key/value pairs to the 857database, delete keys/value pairs and finally how to enumerate the 858contents of the database. 859 860 use warnings ; 861 use strict ; 862 use DB_File ; 863 our (%h, $k, $v) ; 864 865 unlink "fruit" ; 866 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH 867 or die "Cannot open file 'fruit': $!\n"; 868 869 # Add a few key/value pairs to the file 870 $h{"apple"} = "red" ; 871 $h{"orange"} = "orange" ; 872 $h{"banana"} = "yellow" ; 873 $h{"tomato"} = "red" ; 874 875 # Check for existence of a key 876 print "Banana Exists\n\n" if $h{"banana"} ; 877 878 # Delete a key/value pair. 879 delete $h{"apple"} ; 880 881 # print the contents of the file 882 while (($k, $v) = each %h) 883 { print "$k -> $v\n" } 884 885 untie %h ; 886 887here is the output: 888 889 Banana Exists 890 891 orange -> orange 892 tomato -> red 893 banana -> yellow 894 895Note that the like ordinary associative arrays, the order of the keys 896retrieved is in an apparently random order. 897 898=head1 DB_BTREE 899 900The DB_BTREE format is useful when you want to store data in a given 901order. By default the keys will be stored in lexical order, but as you 902will see from the example shown in the next section, it is very easy to 903define your own sorting function. 904 905=head2 Changing the BTREE sort order 906 907This script shows how to override the default sorting algorithm that 908BTREE uses. Instead of using the normal lexical ordering, a case 909insensitive compare function will be used. 910 911 use warnings ; 912 use strict ; 913 use DB_File ; 914 915 my %h ; 916 917 sub Compare 918 { 919 my ($key1, $key2) = @_ ; 920 "\L$key1" cmp "\L$key2" ; 921 } 922 923 # specify the Perl sub that will do the comparison 924 $DB_BTREE->{'compare'} = \&Compare ; 925 926 unlink "tree" ; 927 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE 928 or die "Cannot open file 'tree': $!\n" ; 929 930 # Add a key/value pair to the file 931 $h{'Wall'} = 'Larry' ; 932 $h{'Smith'} = 'John' ; 933 $h{'mouse'} = 'mickey' ; 934 $h{'duck'} = 'donald' ; 935 936 # Delete 937 delete $h{"duck"} ; 938 939 # Cycle through the keys printing them in order. 940 # Note it is not necessary to sort the keys as 941 # the btree will have kept them in order automatically. 942 foreach (keys %h) 943 { print "$_\n" } 944 945 untie %h ; 946 947Here is the output from the code above. 948 949 mouse 950 Smith 951 Wall 952 953There are a few point to bear in mind if you want to change the 954ordering in a BTREE database: 955 956=over 5 957 958=item 1. 959 960The new compare function must be specified when you create the database. 961 962=item 2. 963 964You cannot change the ordering once the database has been created. Thus 965you must use the same compare function every time you access the 966database. 967 968=item 3 969 970Duplicate keys are entirely defined by the comparison function. 971In the case-insensitive example above, the keys: 'KEY' and 'key' 972would be considered duplicates, and assigning to the second one 973would overwrite the first. If duplicates are allowed for (with the 974R_DUP flag discussed below), only a single copy of duplicate keys 975is stored in the database --- so (again with example above) assigning 976three values to the keys: 'KEY', 'Key', and 'key' would leave just 977the first key: 'KEY' in the database with three values. For some 978situations this results in information loss, so care should be taken 979to provide fully qualified comparison functions when necessary. 980For example, the above comparison routine could be modified to 981additionally compare case-sensitively if two keys are equal in the 982case insensitive comparison: 983 984 sub compare { 985 my($key1, $key2) = @_; 986 lc $key1 cmp lc $key2 || 987 $key1 cmp $key2; 988 } 989 990And now you will only have duplicates when the keys themselves 991are truly the same. (note: in versions of the db library prior to 992about November 1996, such duplicate keys were retained so it was 993possible to recover the original keys in sets of keys that 994compared as equal). 995 996 997=back 998 999=head2 Handling Duplicate Keys 1000 1001The BTREE file type optionally allows a single key to be associated 1002with an arbitrary number of values. This option is enabled by setting 1003the flags element of C<$DB_BTREE> to R_DUP when creating the database. 1004 1005There are some difficulties in using the tied hash interface if you 1006want to manipulate a BTREE database with duplicate keys. Consider this 1007code: 1008 1009 use warnings ; 1010 use strict ; 1011 use DB_File ; 1012 1013 my ($filename, %h) ; 1014 1015 $filename = "tree" ; 1016 unlink $filename ; 1017 1018 # Enable duplicate records 1019 $DB_BTREE->{'flags'} = R_DUP ; 1020 1021 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1022 or die "Cannot open $filename: $!\n"; 1023 1024 # Add some key/value pairs to the file 1025 $h{'Wall'} = 'Larry' ; 1026 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1027 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1028 $h{'Smith'} = 'John' ; 1029 $h{'mouse'} = 'mickey' ; 1030 1031 # iterate through the associative array 1032 # and print each key/value pair. 1033 foreach (sort keys %h) 1034 { print "$_ -> $h{$_}\n" } 1035 1036 untie %h ; 1037 1038Here is the output: 1039 1040 Smith -> John 1041 Wall -> Larry 1042 Wall -> Larry 1043 Wall -> Larry 1044 mouse -> mickey 1045 1046As you can see 3 records have been successfully created with key C<Wall> 1047- the only thing is, when they are retrieved from the database they 1048I<seem> to have the same value, namely C<Larry>. The problem is caused 1049by the way that the associative array interface works. Basically, when 1050the associative array interface is used to fetch the value associated 1051with a given key, it will only ever retrieve the first value. 1052 1053Although it may not be immediately obvious from the code above, the 1054associative array interface can be used to write values with duplicate 1055keys, but it cannot be used to read them back from the database. 1056 1057The way to get around this problem is to use the Berkeley DB API method 1058called C<seq>. This method allows sequential access to key/value 1059pairs. See L<THE API INTERFACE> for details of both the C<seq> method 1060and the API in general. 1061 1062Here is the script above rewritten using the C<seq> API method. 1063 1064 use warnings ; 1065 use strict ; 1066 use DB_File ; 1067 1068 my ($filename, $x, %h, $status, $key, $value) ; 1069 1070 $filename = "tree" ; 1071 unlink $filename ; 1072 1073 # Enable duplicate records 1074 $DB_BTREE->{'flags'} = R_DUP ; 1075 1076 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1077 or die "Cannot open $filename: $!\n"; 1078 1079 # Add some key/value pairs to the file 1080 $h{'Wall'} = 'Larry' ; 1081 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1082 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1083 $h{'Smith'} = 'John' ; 1084 $h{'mouse'} = 'mickey' ; 1085 1086 # iterate through the btree using seq 1087 # and print each key/value pair. 1088 $key = $value = 0 ; 1089 for ($status = $x->seq($key, $value, R_FIRST) ; 1090 $status == 0 ; 1091 $status = $x->seq($key, $value, R_NEXT) ) 1092 { print "$key -> $value\n" } 1093 1094 undef $x ; 1095 untie %h ; 1096 1097that prints: 1098 1099 Smith -> John 1100 Wall -> Brick 1101 Wall -> Brick 1102 Wall -> Larry 1103 mouse -> mickey 1104 1105This time we have got all the key/value pairs, including the multiple 1106values associated with the key C<Wall>. 1107 1108To make life easier when dealing with duplicate keys, B<DB_File> comes with 1109a few utility methods. 1110 1111=head2 The get_dup() Method 1112 1113The C<get_dup> method assists in 1114reading duplicate values from BTREE databases. The method can take the 1115following forms: 1116 1117 $count = $x->get_dup($key) ; 1118 @list = $x->get_dup($key) ; 1119 %list = $x->get_dup($key, 1) ; 1120 1121In a scalar context the method returns the number of values associated 1122with the key, C<$key>. 1123 1124In list context, it returns all the values which match C<$key>. Note 1125that the values will be returned in an apparently random order. 1126 1127In list context, if the second parameter is present and evaluates 1128TRUE, the method returns an associative array. The keys of the 1129associative array correspond to the values that matched in the BTREE 1130and the values of the array are a count of the number of times that 1131particular value occurred in the BTREE. 1132 1133So assuming the database created above, we can use C<get_dup> like 1134this: 1135 1136 use warnings ; 1137 use strict ; 1138 use DB_File ; 1139 1140 my ($filename, $x, %h) ; 1141 1142 $filename = "tree" ; 1143 1144 # Enable duplicate records 1145 $DB_BTREE->{'flags'} = R_DUP ; 1146 1147 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1148 or die "Cannot open $filename: $!\n"; 1149 1150 my $cnt = $x->get_dup("Wall") ; 1151 print "Wall occurred $cnt times\n" ; 1152 1153 my %hash = $x->get_dup("Wall", 1) ; 1154 print "Larry is there\n" if $hash{'Larry'} ; 1155 print "There are $hash{'Brick'} Brick Walls\n" ; 1156 1157 my @list = sort $x->get_dup("Wall") ; 1158 print "Wall => [@list]\n" ; 1159 1160 @list = $x->get_dup("Smith") ; 1161 print "Smith => [@list]\n" ; 1162 1163 @list = $x->get_dup("Dog") ; 1164 print "Dog => [@list]\n" ; 1165 1166 1167and it will print: 1168 1169 Wall occurred 3 times 1170 Larry is there 1171 There are 2 Brick Walls 1172 Wall => [Brick Brick Larry] 1173 Smith => [John] 1174 Dog => [] 1175 1176=head2 The find_dup() Method 1177 1178 $status = $X->find_dup($key, $value) ; 1179 1180This method checks for the existence of a specific key/value pair. If the 1181pair exists, the cursor is left pointing to the pair and the method 1182returns 0. Otherwise the method returns a non-zero value. 1183 1184Assuming the database from the previous example: 1185 1186 use warnings ; 1187 use strict ; 1188 use DB_File ; 1189 1190 my ($filename, $x, %h, $found) ; 1191 1192 $filename = "tree" ; 1193 1194 # Enable duplicate records 1195 $DB_BTREE->{'flags'} = R_DUP ; 1196 1197 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1198 or die "Cannot open $filename: $!\n"; 1199 1200 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1201 print "Larry Wall is $found there\n" ; 1202 1203 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ; 1204 print "Harry Wall is $found there\n" ; 1205 1206 undef $x ; 1207 untie %h ; 1208 1209prints this 1210 1211 Larry Wall is there 1212 Harry Wall is not there 1213 1214 1215=head2 The del_dup() Method 1216 1217 $status = $X->del_dup($key, $value) ; 1218 1219This method deletes a specific key/value pair. It returns 12200 if they exist and have been deleted successfully. 1221Otherwise the method returns a non-zero value. 1222 1223Again assuming the existence of the C<tree> database 1224 1225 use warnings ; 1226 use strict ; 1227 use DB_File ; 1228 1229 my ($filename, $x, %h, $found) ; 1230 1231 $filename = "tree" ; 1232 1233 # Enable duplicate records 1234 $DB_BTREE->{'flags'} = R_DUP ; 1235 1236 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1237 or die "Cannot open $filename: $!\n"; 1238 1239 $x->del_dup("Wall", "Larry") ; 1240 1241 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1242 print "Larry Wall is $found there\n" ; 1243 1244 undef $x ; 1245 untie %h ; 1246 1247prints this 1248 1249 Larry Wall is not there 1250 1251=head2 Matching Partial Keys 1252 1253The BTREE interface has a feature which allows partial keys to be 1254matched. This functionality is I<only> available when the C<seq> method 1255is used along with the R_CURSOR flag. 1256 1257 $x->seq($key, $value, R_CURSOR) ; 1258 1259Here is the relevant quote from the dbopen man page where it defines 1260the use of the R_CURSOR flag with seq: 1261 1262 Note, for the DB_BTREE access method, the returned key is not 1263 necessarily an exact match for the specified key. The returned key 1264 is the smallest key greater than or equal to the specified key, 1265 permitting partial key matches and range searches. 1266 1267In the example script below, the C<match> sub uses this feature to find 1268and print the first matching key/value pair given a partial key. 1269 1270 use warnings ; 1271 use strict ; 1272 use DB_File ; 1273 use Fcntl ; 1274 1275 my ($filename, $x, %h, $st, $key, $value) ; 1276 1277 sub match 1278 { 1279 my $key = shift ; 1280 my $value = 0; 1281 my $orig_key = $key ; 1282 $x->seq($key, $value, R_CURSOR) ; 1283 print "$orig_key\t-> $key\t-> $value\n" ; 1284 } 1285 1286 $filename = "tree" ; 1287 unlink $filename ; 1288 1289 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1290 or die "Cannot open $filename: $!\n"; 1291 1292 # Add some key/value pairs to the file 1293 $h{'mouse'} = 'mickey' ; 1294 $h{'Wall'} = 'Larry' ; 1295 $h{'Walls'} = 'Brick' ; 1296 $h{'Smith'} = 'John' ; 1297 1298 1299 $key = $value = 0 ; 1300 print "IN ORDER\n" ; 1301 for ($st = $x->seq($key, $value, R_FIRST) ; 1302 $st == 0 ; 1303 $st = $x->seq($key, $value, R_NEXT) ) 1304 1305 { print "$key -> $value\n" } 1306 1307 print "\nPARTIAL MATCH\n" ; 1308 1309 match "Wa" ; 1310 match "A" ; 1311 match "a" ; 1312 1313 undef $x ; 1314 untie %h ; 1315 1316Here is the output: 1317 1318 IN ORDER 1319 Smith -> John 1320 Wall -> Larry 1321 Walls -> Brick 1322 mouse -> mickey 1323 1324 PARTIAL MATCH 1325 Wa -> Wall -> Larry 1326 A -> Smith -> John 1327 a -> mouse -> mickey 1328 1329=head1 DB_RECNO 1330 1331DB_RECNO provides an interface to flat text files. Both variable and 1332fixed length records are supported. 1333 1334In order to make RECNO more compatible with Perl, the array offset for 1335all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. 1336 1337As with normal Perl arrays, a RECNO array can be accessed using 1338negative indexes. The index -1 refers to the last element of the array, 1339-2 the second last, and so on. Attempting to access an element before 1340the start of the array will raise a fatal run-time error. 1341 1342=head2 The 'bval' Option 1343 1344The operation of the bval option warrants some discussion. Here is the 1345definition of bval from the Berkeley DB 1.85 recno manual page: 1346 1347 The delimiting byte to be used to mark the end of a 1348 record for variable-length records, and the pad charac- 1349 ter for fixed-length records. If no value is speci- 1350 fied, newlines (``\n'') are used to mark the end of 1351 variable-length records and fixed-length records are 1352 padded with spaces. 1353 1354The second sentence is wrong. In actual fact bval will only default to 1355C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL 1356openinfo parameter is used at all, the value that happens to be in bval 1357will be used. That means you always have to specify bval when making 1358use of any of the options in the openinfo parameter. This documentation 1359error will be fixed in the next release of Berkeley DB. 1360 1361That clarifies the situation with regards Berkeley DB itself. What 1362about B<DB_File>? Well, the behavior defined in the quote above is 1363quite useful, so B<DB_File> conforms to it. 1364 1365That means that you can specify other options (e.g. cachesize) and 1366still have bval default to C<"\n"> for variable length records, and 1367space for fixed length records. 1368 1369Also note that the bval option only allows you to specify a single byte 1370as a delimiter. 1371 1372=head2 A Simple Example 1373 1374Here is a simple example that uses RECNO (if you are using a version 1375of Perl earlier than 5.004_57 this example won't work -- see 1376L<Extra RECNO Methods> for a workaround). 1377 1378 use warnings ; 1379 use strict ; 1380 use DB_File ; 1381 1382 my $filename = "text" ; 1383 unlink $filename ; 1384 1385 my @h ; 1386 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO 1387 or die "Cannot open file 'text': $!\n" ; 1388 1389 # Add a few key/value pairs to the file 1390 $h[0] = "orange" ; 1391 $h[1] = "blue" ; 1392 $h[2] = "yellow" ; 1393 1394 push @h, "green", "black" ; 1395 1396 my $elements = scalar @h ; 1397 print "The array contains $elements entries\n" ; 1398 1399 my $last = pop @h ; 1400 print "popped $last\n" ; 1401 1402 unshift @h, "white" ; 1403 my $first = shift @h ; 1404 print "shifted $first\n" ; 1405 1406 # Check for existence of a key 1407 print "Element 1 Exists with value $h[1]\n" if $h[1] ; 1408 1409 # use a negative index 1410 print "The last element is $h[-1]\n" ; 1411 print "The 2nd last element is $h[-2]\n" ; 1412 1413 untie @h ; 1414 1415Here is the output from the script: 1416 1417 The array contains 5 entries 1418 popped black 1419 shifted white 1420 Element 1 Exists with value blue 1421 The last element is green 1422 The 2nd last element is yellow 1423 1424=head2 Extra RECNO Methods 1425 1426If you are using a version of Perl earlier than 5.004_57, the tied 1427array interface is quite limited. In the example script above 1428C<push>, C<pop>, C<shift>, C<unshift> 1429or determining the array length will not work with a tied array. 1430 1431To make the interface more useful for older versions of Perl, a number 1432of methods are supplied with B<DB_File> to simulate the missing array 1433operations. All these methods are accessed via the object returned from 1434the tie call. 1435 1436Here are the methods: 1437 1438=over 5 1439 1440=item B<$X-E<gt>push(list) ;> 1441 1442Pushes the elements of C<list> to the end of the array. 1443 1444=item B<$value = $X-E<gt>pop ;> 1445 1446Removes and returns the last element of the array. 1447 1448=item B<$X-E<gt>shift> 1449 1450Removes and returns the first element of the array. 1451 1452=item B<$X-E<gt>unshift(list) ;> 1453 1454Pushes the elements of C<list> to the start of the array. 1455 1456=item B<$X-E<gt>length> 1457 1458Returns the number of elements in the array. 1459 1460=item B<$X-E<gt>splice(offset, length, elements);> 1461 1462Returns a splice of the array. 1463 1464=back 1465 1466=head2 Another Example 1467 1468Here is a more complete example that makes use of some of the methods 1469described above. It also makes use of the API interface directly (see 1470L<THE API INTERFACE>). 1471 1472 use warnings ; 1473 use strict ; 1474 my (@h, $H, $file, $i) ; 1475 use DB_File ; 1476 use Fcntl ; 1477 1478 $file = "text" ; 1479 1480 unlink $file ; 1481 1482 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO 1483 or die "Cannot open file $file: $!\n" ; 1484 1485 # first create a text file to play with 1486 $h[0] = "zero" ; 1487 $h[1] = "one" ; 1488 $h[2] = "two" ; 1489 $h[3] = "three" ; 1490 $h[4] = "four" ; 1491 1492 1493 # Print the records in order. 1494 # 1495 # The length method is needed here because evaluating a tied 1496 # array in a scalar context does not return the number of 1497 # elements in the array. 1498 1499 print "\nORIGINAL\n" ; 1500 foreach $i (0 .. $H->length - 1) { 1501 print "$i: $h[$i]\n" ; 1502 } 1503 1504 # use the push & pop methods 1505 $a = $H->pop ; 1506 $H->push("last") ; 1507 print "\nThe last record was [$a]\n" ; 1508 1509 # and the shift & unshift methods 1510 $a = $H->shift ; 1511 $H->unshift("first") ; 1512 print "The first record was [$a]\n" ; 1513 1514 # Use the API to add a new record after record 2. 1515 $i = 2 ; 1516 $H->put($i, "Newbie", R_IAFTER) ; 1517 1518 # and a new record before record 1. 1519 $i = 1 ; 1520 $H->put($i, "New One", R_IBEFORE) ; 1521 1522 # delete record 3 1523 $H->del(3) ; 1524 1525 # now print the records in reverse order 1526 print "\nREVERSE\n" ; 1527 for ($i = $H->length - 1 ; $i >= 0 ; -- $i) 1528 { print "$i: $h[$i]\n" } 1529 1530 # same again, but use the API functions instead 1531 print "\nREVERSE again\n" ; 1532 my ($s, $k, $v) = (0, 0, 0) ; 1533 for ($s = $H->seq($k, $v, R_LAST) ; 1534 $s == 0 ; 1535 $s = $H->seq($k, $v, R_PREV)) 1536 { print "$k: $v\n" } 1537 1538 undef $H ; 1539 untie @h ; 1540 1541and this is what it outputs: 1542 1543 ORIGINAL 1544 0: zero 1545 1: one 1546 2: two 1547 3: three 1548 4: four 1549 1550 The last record was [four] 1551 The first record was [zero] 1552 1553 REVERSE 1554 5: last 1555 4: three 1556 3: Newbie 1557 2: one 1558 1: New One 1559 0: first 1560 1561 REVERSE again 1562 5: last 1563 4: three 1564 3: Newbie 1565 2: one 1566 1: New One 1567 0: first 1568 1569Notes: 1570 1571=over 5 1572 1573=item 1. 1574 1575Rather than iterating through the array, C<@h> like this: 1576 1577 foreach $i (@h) 1578 1579it is necessary to use either this: 1580 1581 foreach $i (0 .. $H->length - 1) 1582 1583or this: 1584 1585 for ($a = $H->get($k, $v, R_FIRST) ; 1586 $a == 0 ; 1587 $a = $H->get($k, $v, R_NEXT) ) 1588 1589=item 2. 1590 1591Notice that both times the C<put> method was used the record index was 1592specified using a variable, C<$i>, rather than the literal value 1593itself. This is because C<put> will return the record number of the 1594inserted line via that parameter. 1595 1596=back 1597 1598=head1 THE API INTERFACE 1599 1600As well as accessing Berkeley DB using a tied hash or array, it is also 1601possible to make direct use of most of the API functions defined in the 1602Berkeley DB documentation. 1603 1604To do this you need to store a copy of the object returned from the tie. 1605 1606 $db = tie %hash, "DB_File", "filename" ; 1607 1608Once you have done that, you can access the Berkeley DB API functions 1609as B<DB_File> methods directly like this: 1610 1611 $db->put($key, $value, R_NOOVERWRITE) ; 1612 1613B<Important:> If you have saved a copy of the object returned from 1614C<tie>, the underlying database file will I<not> be closed until both 1615the tied variable is untied and all copies of the saved object are 1616destroyed. 1617 1618 use DB_File ; 1619 $db = tie %hash, "DB_File", "filename" 1620 or die "Cannot tie filename: $!" ; 1621 ... 1622 undef $db ; 1623 untie %hash ; 1624 1625See L<The untie() Gotcha> for more details. 1626 1627All the functions defined in L<dbopen> are available except for 1628close() and dbopen() itself. The B<DB_File> method interface to the 1629supported functions have been implemented to mirror the way Berkeley DB 1630works whenever possible. In particular note that: 1631 1632=over 5 1633 1634=item * 1635 1636The methods return a status value. All return 0 on success. 1637All return -1 to signify an error and set C<$!> to the exact 1638error code. The return code 1 generally (but not always) means that the 1639key specified did not exist in the database. 1640 1641Other return codes are defined. See below and in the Berkeley DB 1642documentation for details. The Berkeley DB documentation should be used 1643as the definitive source. 1644 1645=item * 1646 1647Whenever a Berkeley DB function returns data via one of its parameters, 1648the equivalent B<DB_File> method does exactly the same. 1649 1650=item * 1651 1652If you are careful, it is possible to mix API calls with the tied 1653hash/array interface in the same piece of code. Although only a few of 1654the methods used to implement the tied interface currently make use of 1655the cursor, you should always assume that the cursor has been changed 1656any time the tied hash/array interface is used. As an example, this 1657code will probably not do what you expect: 1658 1659 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1660 or die "Cannot tie $filename: $!" ; 1661 1662 # Get the first key/value pair and set the cursor 1663 $X->seq($key, $value, R_FIRST) ; 1664 1665 # this line will modify the cursor 1666 $count = scalar keys %x ; 1667 1668 # Get the second key/value pair. 1669 # oops, it didn't, it got the last key/value pair! 1670 $X->seq($key, $value, R_NEXT) ; 1671 1672The code above can be rearranged to get around the problem, like this: 1673 1674 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1675 or die "Cannot tie $filename: $!" ; 1676 1677 # this line will modify the cursor 1678 $count = scalar keys %x ; 1679 1680 # Get the first key/value pair and set the cursor 1681 $X->seq($key, $value, R_FIRST) ; 1682 1683 # Get the second key/value pair. 1684 # worked this time. 1685 $X->seq($key, $value, R_NEXT) ; 1686 1687=back 1688 1689All the constants defined in L<dbopen> for use in the flags parameters 1690in the methods defined below are also available. Refer to the Berkeley 1691DB documentation for the precise meaning of the flags values. 1692 1693Below is a list of the methods available. 1694 1695=over 5 1696 1697=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;> 1698 1699Given a key (C<$key>) this method reads the value associated with it 1700from the database. The value read from the database is returned in the 1701C<$value> parameter. 1702 1703If the key does not exist the method returns 1. 1704 1705No flags are currently defined for this method. 1706 1707=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;> 1708 1709Stores the key/value pair in the database. 1710 1711If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter 1712will have the record number of the inserted key/value pair set. 1713 1714Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and 1715R_SETCURSOR. 1716 1717=item B<$status = $X-E<gt>del($key [, $flags]) ;> 1718 1719Removes all key/value pairs with key C<$key> from the database. 1720 1721A return code of 1 means that the requested key was not in the 1722database. 1723 1724R_CURSOR is the only valid flag at present. 1725 1726=item B<$status = $X-E<gt>fd ;> 1727 1728Returns the file descriptor for the underlying database. 1729 1730See L<Locking: The Trouble with fd> for an explanation for why you should 1731not use C<fd> to lock your database. 1732 1733=item B<$status = $X-E<gt>seq($key, $value, $flags) ;> 1734 1735This interface allows sequential retrieval from the database. See 1736L<dbopen> for full details. 1737 1738Both the C<$key> and C<$value> parameters will be set to the key/value 1739pair read from the database. 1740 1741The flags parameter is mandatory. The valid flag values are R_CURSOR, 1742R_FIRST, R_LAST, R_NEXT and R_PREV. 1743 1744=item B<$status = $X-E<gt>sync([$flags]) ;> 1745 1746Flushes any cached buffers to disk. 1747 1748R_RECNOSYNC is the only valid flag at present. 1749 1750=back 1751 1752=head1 DBM FILTERS 1753 1754A DBM Filter is a piece of code that is be used when you I<always> 1755want to make the same transformation to all keys and/or values in a 1756DBM database. 1757 1758There are four methods associated with DBM Filters. All work identically, 1759and each is used to install (or uninstall) a single DBM Filter. Each 1760expects a single parameter, namely a reference to a sub. The only 1761difference between them is the place that the filter is installed. 1762 1763To summarise: 1764 1765=over 5 1766 1767=item B<filter_store_key> 1768 1769If a filter has been installed with this method, it will be invoked 1770every time you write a key to a DBM database. 1771 1772=item B<filter_store_value> 1773 1774If a filter has been installed with this method, it will be invoked 1775every time you write a value to a DBM database. 1776 1777 1778=item B<filter_fetch_key> 1779 1780If a filter has been installed with this method, it will be invoked 1781every time you read a key from a DBM database. 1782 1783=item B<filter_fetch_value> 1784 1785If a filter has been installed with this method, it will be invoked 1786every time you read a value from a DBM database. 1787 1788=back 1789 1790You can use any combination of the methods, from none, to all four. 1791 1792All filter methods return the existing filter, if present, or C<undef> 1793in not. 1794 1795To delete a filter pass C<undef> to it. 1796 1797=head2 The Filter 1798 1799When each filter is called by Perl, a local copy of C<$_> will contain 1800the key or value to be filtered. Filtering is achieved by modifying 1801the contents of C<$_>. The return code from the filter is ignored. 1802 1803=head2 An Example -- the NULL termination problem. 1804 1805Consider the following scenario. You have a DBM database 1806that you need to share with a third-party C application. The C application 1807assumes that I<all> keys and values are NULL terminated. Unfortunately 1808when Perl writes to DBM databases it doesn't use NULL termination, so 1809your Perl application will have to manage NULL termination itself. When 1810you write to the database you will have to use something like this: 1811 1812 $hash{"$key\0"} = "$value\0" ; 1813 1814Similarly the NULL needs to be taken into account when you are considering 1815the length of existing keys/values. 1816 1817It would be much better if you could ignore the NULL terminations issue 1818in the main application code and have a mechanism that automatically 1819added the terminating NULL to all keys and values whenever you write to 1820the database and have them removed when you read from the database. As I'm 1821sure you have already guessed, this is a problem that DBM Filters can 1822fix very easily. 1823 1824 use warnings ; 1825 use strict ; 1826 use DB_File ; 1827 1828 my %hash ; 1829 my $filename = "filt" ; 1830 unlink $filename ; 1831 1832 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1833 or die "Cannot open $filename: $!\n" ; 1834 1835 # Install DBM Filters 1836 $db->filter_fetch_key ( sub { s/\0$// } ) ; 1837 $db->filter_store_key ( sub { $_ .= "\0" } ) ; 1838 $db->filter_fetch_value( sub { s/\0$// } ) ; 1839 $db->filter_store_value( sub { $_ .= "\0" } ) ; 1840 1841 $hash{"abc"} = "def" ; 1842 my $a = $hash{"ABC"} ; 1843 # ... 1844 undef $db ; 1845 untie %hash ; 1846 1847Hopefully the contents of each of the filters should be 1848self-explanatory. Both "fetch" filters remove the terminating NULL, 1849and both "store" filters add a terminating NULL. 1850 1851 1852=head2 Another Example -- Key is a C int. 1853 1854Here is another real-life example. By default, whenever Perl writes to 1855a DBM database it always writes the key and value as strings. So when 1856you use this: 1857 1858 $hash{12345} = "something" ; 1859 1860the key 12345 will get stored in the DBM database as the 5 byte string 1861"12345". If you actually want the key to be stored in the DBM database 1862as a C int, you will have to use C<pack> when writing, and C<unpack> 1863when reading. 1864 1865Here is a DBM Filter that does it: 1866 1867 use warnings ; 1868 use strict ; 1869 use DB_File ; 1870 my %hash ; 1871 my $filename = "filt" ; 1872 unlink $filename ; 1873 1874 1875 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1876 or die "Cannot open $filename: $!\n" ; 1877 1878 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ; 1879 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ; 1880 $hash{123} = "def" ; 1881 # ... 1882 undef $db ; 1883 untie %hash ; 1884 1885This time only two filters have been used -- we only need to manipulate 1886the contents of the key, so it wasn't necessary to install any value 1887filters. 1888 1889=head1 HINTS AND TIPS 1890 1891 1892=head2 Locking: The Trouble with fd 1893 1894Until version 1.72 of this module, the recommended technique for locking 1895B<DB_File> databases was to flock the filehandle returned from the "fd" 1896function. Unfortunately this technique has been shown to be fundamentally 1897flawed (Kudos to David Harris for tracking this down). Use it at your own 1898peril! 1899 1900The locking technique went like this. 1901 1902 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644) 1903 || die "dbcreat foo.db $!"; 1904 $fd = $db->fd; 1905 open(DB_FH, "+<&=$fd") || die "dup $!"; 1906 flock (DB_FH, LOCK_EX) || die "flock: $!"; 1907 ... 1908 $db{"Tom"} = "Jerry" ; 1909 ... 1910 flock(DB_FH, LOCK_UN); 1911 undef $db; 1912 untie %db; 1913 close(DB_FH); 1914 1915In simple terms, this is what happens: 1916 1917=over 5 1918 1919=item 1. 1920 1921Use "tie" to open the database. 1922 1923=item 2. 1924 1925Lock the database with fd & flock. 1926 1927=item 3. 1928 1929Read & Write to the database. 1930 1931=item 4. 1932 1933Unlock and close the database. 1934 1935=back 1936 1937Here is the crux of the problem. A side-effect of opening the B<DB_File> 1938database in step 2 is that an initial block from the database will get 1939read from disk and cached in memory. 1940 1941To see why this is a problem, consider what can happen when two processes, 1942say "A" and "B", both want to update the same B<DB_File> database 1943using the locking steps outlined above. Assume process "A" has already 1944opened the database and has a write lock, but it hasn't actually updated 1945the database yet (it has finished step 2, but not started step 3 yet). Now 1946process "B" tries to open the same database - step 1 will succeed, 1947but it will block on step 2 until process "A" releases the lock. The 1948important thing to notice here is that at this point in time both 1949processes will have cached identical initial blocks from the database. 1950 1951Now process "A" updates the database and happens to change some of the 1952data held in the initial buffer. Process "A" terminates, flushing 1953all cached data to disk and releasing the database lock. At this point 1954the database on disk will correctly reflect the changes made by process 1955"A". 1956 1957With the lock released, process "B" can now continue. It also updates the 1958database and unfortunately it too modifies the data that was in its 1959initial buffer. Once that data gets flushed to disk it will overwrite 1960some/all of the changes process "A" made to the database. 1961 1962The result of this scenario is at best a database that doesn't contain 1963what you expect. At worst the database will corrupt. 1964 1965The above won't happen every time competing process update the same 1966B<DB_File> database, but it does illustrate why the technique should 1967not be used. 1968 1969=head2 Safe ways to lock a database 1970 1971Starting with version 2.x, Berkeley DB has internal support for locking. 1972The companion module to this one, B<BerkeleyDB>, provides an interface 1973to this locking functionality. If you are serious about locking 1974Berkeley DB databases, I strongly recommend using B<BerkeleyDB>. 1975 1976If using B<BerkeleyDB> isn't an option, there are a number of modules 1977available on CPAN that can be used to implement locking. Each one 1978implements locking differently and has different goals in mind. It is 1979therefore worth knowing the difference, so that you can pick the right 1980one for your application. Here are the three locking wrappers: 1981 1982=over 5 1983 1984=item B<Tie::DB_Lock> 1985 1986A B<DB_File> wrapper which creates copies of the database file for 1987read access, so that you have a kind of a multiversioning concurrent read 1988system. However, updates are still serial. Use for databases where reads 1989may be lengthy and consistency problems may occur. 1990 1991=item B<Tie::DB_LockFile> 1992 1993A B<DB_File> wrapper that has the ability to lock and unlock the database 1994while it is being used. Avoids the tie-before-flock problem by simply 1995re-tie-ing the database when you get or drop a lock. Because of the 1996flexibility in dropping and re-acquiring the lock in the middle of a 1997session, this can be massaged into a system that will work with long 1998updates and/or reads if the application follows the hints in the POD 1999documentation. 2000 2001=item B<DB_File::Lock> 2002 2003An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile 2004before tie-ing the database and drops the lock after the untie. Allows 2005one to use the same lockfile for multiple databases to avoid deadlock 2006problems, if desired. Use for databases where updates are reads are 2007quick and simple flock locking semantics are enough. 2008 2009=back 2010 2011=head2 Sharing Databases With C Applications 2012 2013There is no technical reason why a Berkeley DB database cannot be 2014shared by both a Perl and a C application. 2015 2016The vast majority of problems that are reported in this area boil down 2017to the fact that C strings are NULL terminated, whilst Perl strings are 2018not. See L<DBM FILTERS> for a generic way to work around this problem. 2019 2020Here is a real example. Netscape 2.0 keeps a record of the locations you 2021visit along with the time you last visited them in a DB_HASH database. 2022This is usually stored in the file F<~/.netscape/history.db>. The key 2023field in the database is the location string and the value field is the 2024time the location was last visited stored as a 4 byte binary value. 2025 2026If you haven't already guessed, the location string is stored with a 2027terminating NULL. This means you need to be careful when accessing the 2028database. 2029 2030Here is a snippet of code that is loosely based on Tom Christiansen's 2031I<ggh> script (available from your nearest CPAN archive in 2032F<authors/id/TOMC/scripts/nshist.gz>). 2033 2034 use warnings ; 2035 use strict ; 2036 use DB_File ; 2037 use Fcntl ; 2038 2039 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ; 2040 $dotdir = $ENV{HOME} || $ENV{LOGNAME}; 2041 2042 $HISTORY = "$dotdir/.netscape/history.db"; 2043 2044 tie %hist_db, 'DB_File', $HISTORY 2045 or die "Cannot open $HISTORY: $!\n" ;; 2046 2047 # Dump the complete database 2048 while ( ($href, $binary_time) = each %hist_db ) { 2049 2050 # remove the terminating NULL 2051 $href =~ s/\x00$// ; 2052 2053 # convert the binary time into a user friendly string 2054 $date = localtime unpack("V", $binary_time); 2055 print "$date $href\n" ; 2056 } 2057 2058 # check for the existence of a specific key 2059 # remember to add the NULL 2060 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) { 2061 $date = localtime unpack("V", $binary_time) ; 2062 print "Last visited mox.perl.com on $date\n" ; 2063 } 2064 else { 2065 print "Never visited mox.perl.com\n" 2066 } 2067 2068 untie %hist_db ; 2069 2070=head2 The untie() Gotcha 2071 2072If you make use of the Berkeley DB API, it is I<very> strongly 2073recommended that you read L<perltie/The untie Gotcha>. 2074 2075Even if you don't currently make use of the API interface, it is still 2076worth reading it. 2077 2078Here is an example which illustrates the problem from a B<DB_File> 2079perspective: 2080 2081 use DB_File ; 2082 use Fcntl ; 2083 2084 my %x ; 2085 my $X ; 2086 2087 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC 2088 or die "Cannot tie first time: $!" ; 2089 2090 $x{123} = 456 ; 2091 2092 untie %x ; 2093 2094 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2095 or die "Cannot tie second time: $!" ; 2096 2097 untie %x ; 2098 2099When run, the script will produce this error message: 2100 2101 Cannot tie second time: Invalid argument at bad.file line 14. 2102 2103Although the error message above refers to the second tie() statement 2104in the script, the source of the problem is really with the untie() 2105statement that precedes it. 2106 2107Having read L<perltie> you will probably have already guessed that the 2108error is caused by the extra copy of the tied object stored in C<$X>. 2109If you haven't, then the problem boils down to the fact that the 2110B<DB_File> destructor, DESTROY, will not be called until I<all> 2111references to the tied object are destroyed. Both the tied variable, 2112C<%x>, and C<$X> above hold a reference to the object. The call to 2113untie() will destroy the first, but C<$X> still holds a valid 2114reference, so the destructor will not get called and the database file 2115F<tst.fil> will remain open. The fact that Berkeley DB then reports the 2116attempt to open a database that is already open via the catch-all 2117"Invalid argument" doesn't help. 2118 2119If you run the script with the C<-w> flag the error message becomes: 2120 2121 untie attempted while 1 inner references still exist at bad.file line 12. 2122 Cannot tie second time: Invalid argument at bad.file line 14. 2123 2124which pinpoints the real problem. Finally the script can now be 2125modified to fix the original problem by destroying the API object 2126before the untie: 2127 2128 ... 2129 $x{123} = 456 ; 2130 2131 undef $X ; 2132 untie %x ; 2133 2134 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2135 ... 2136 2137 2138=head1 COMMON QUESTIONS 2139 2140=head2 Why is there Perl source in my database? 2141 2142If you look at the contents of a database file created by DB_File, 2143there can sometimes be part of a Perl script included in it. 2144 2145This happens because Berkeley DB uses dynamic memory to allocate 2146buffers which will subsequently be written to the database file. Being 2147dynamic, the memory could have been used for anything before DB 2148malloced it. As Berkeley DB doesn't clear the memory once it has been 2149allocated, the unused portions will contain random junk. In the case 2150where a Perl script gets written to the database, the random junk will 2151correspond to an area of dynamic memory that happened to be used during 2152the compilation of the script. 2153 2154Unless you don't like the possibility of there being part of your Perl 2155scripts embedded in a database file, this is nothing to worry about. 2156 2157=head2 How do I store complex data structures with DB_File? 2158 2159Although B<DB_File> cannot do this directly, there is a module which 2160can layer transparently over B<DB_File> to accomplish this feat. 2161 2162Check out the MLDBM module, available on CPAN in the directory 2163F<modules/by-module/MLDBM>. 2164 2165=head2 What does "Invalid Argument" mean? 2166 2167You will get this error message when one of the parameters in the 2168C<tie> call is wrong. Unfortunately there are quite a few parameters to 2169get wrong, so it can be difficult to figure out which one it is. 2170 2171Here are a couple of possibilities: 2172 2173=over 5 2174 2175=item 1. 2176 2177Attempting to reopen a database without closing it. 2178 2179=item 2. 2180 2181Using the O_WRONLY flag. 2182 2183=back 2184 2185=head2 What does "Bareword 'DB_File' not allowed" mean? 2186 2187You will encounter this particular error message when you have the 2188C<strict 'subs'> pragma (or the full strict pragma) in your script. 2189Consider this script: 2190 2191 use warnings ; 2192 use strict ; 2193 use DB_File ; 2194 my %x ; 2195 tie %x, DB_File, "filename" ; 2196 2197Running it produces the error in question: 2198 2199 Bareword "DB_File" not allowed while "strict subs" in use 2200 2201To get around the error, place the word C<DB_File> in either single or 2202double quotes, like this: 2203 2204 tie %x, "DB_File", "filename" ; 2205 2206Although it might seem like a real pain, it is really worth the effort 2207of having a C<use strict> in all your scripts. 2208 2209=head1 REFERENCES 2210 2211Articles that are either about B<DB_File> or make use of it. 2212 2213=over 5 2214 2215=item 1. 2216 2217I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com), 2218Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41 2219 2220=back 2221 2222=head1 HISTORY 2223 2224Moved to the Changes file. 2225 2226=head1 BUGS 2227 2228Some older versions of Berkeley DB had problems with fixed length 2229records using the RECNO file format. This problem has been fixed since 2230version 1.85 of Berkeley DB. 2231 2232I am sure there are bugs in the code. If you do find any, or can 2233suggest any enhancements, I would welcome your comments. 2234 2235=head1 AVAILABILITY 2236 2237B<DB_File> comes with the standard Perl source distribution. Look in 2238the directory F<ext/DB_File>. Given the amount of time between releases 2239of Perl the version that ships with Perl is quite likely to be out of 2240date, so the most recent version can always be found on CPAN (see 2241L<perlmodlib/CPAN> for details), in the directory 2242F<modules/by-module/DB_File>. 2243 2244This version of B<DB_File> will work with either version 1.x, 2.x or 22453.x of Berkeley DB, but is limited to the functionality provided by 2246version 1. 2247 2248The official web site for Berkeley DB is F<http://www.sleepycat.com>. 2249All versions of Berkeley DB are available there. 2250 2251Alternatively, Berkeley DB version 1 is available at your nearest CPAN 2252archive in F<src/misc/db.1.85.tar.gz>. 2253 2254If you are running IRIX, then get Berkeley DB version 1 from 2255F<http://reality.sgi.com/ariel>. It has the patches necessary to 2256compile properly on IRIX 5.3. 2257 2258=head1 COPYRIGHT 2259 2260Copyright (c) 1995-2005 Paul Marquess. All rights reserved. This program 2261is free software; you can redistribute it and/or modify it under the 2262same terms as Perl itself. 2263 2264Although B<DB_File> is covered by the Perl license, the library it 2265makes use of, namely Berkeley DB, is not. Berkeley DB has its own 2266copyright and its own license. Please take the time to read it. 2267 2268Here are are few words taken from the Berkeley DB FAQ (at 2269F<http://www.sleepycat.com>) regarding the license: 2270 2271 Do I have to license DB to use it in Perl scripts? 2272 2273 No. The Berkeley DB license requires that software that uses 2274 Berkeley DB be freely redistributable. In the case of Perl, that 2275 software is Perl, and not your scripts. Any Perl scripts that you 2276 write are your property, including scripts that make use of 2277 Berkeley DB. Neither the Perl license nor the Berkeley DB license 2278 place any restriction on what you may do with them. 2279 2280If you are in any doubt about the license situation, contact either the 2281Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details. 2282 2283 2284=head1 SEE ALSO 2285 2286L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>, 2287L<perldbmfilter> 2288 2289=head1 AUTHOR 2290 2291The DB_File interface was written by Paul Marquess 2292E<lt>pmqs@cpan.orgE<gt>. 2293Questions about the DB system itself may be addressed to 2294E<lt>db@sleepycat.comE<gt>. 2295 2296=cut 2297