1package Porting::updateAUTHORS; 2use strict; 3use warnings; 4use Data::Dumper; 5use Encode qw(encode_utf8 decode_utf8 decode); 6use Digest::SHA qw(sha256_base64); 7use Text::Wrap qw(wrap); 8use Unicode::Collate; 9use Cwd qw(getcwd); 10use feature 'fc'; 11$Text::Wrap::columns= 80; 12 13# The style of this file is determined by: 14# 15# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \ 16# -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \ 17# -fsb='#start-no-tidy' -fse='#end-no-tidy' 18 19# Info and config for passing to git log. 20# %an: author name 21# %aN: author name (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 22# %ae: author email 23# %aE: author email (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 24# %cn: committer name 25# %cN: committer name (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 26# %ce: committer email 27# %cE: committer email (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 28# %H: commit hash 29# %h: abbreviated commit hash 30# %s: subject 31# %x00: print a byte from a hex code 32 33my %field_spec= ( 34 "an" => "author_name", 35 "aN" => "author_name_mm", 36 "ae" => "author_email", 37 "aE" => "author_email_mm", 38 "cn" => "committer_name", 39 "cN" => "committer_name_mm", 40 "ce" => "committer_email", 41 "cE" => "committer_email_mm", 42 "H" => "commit_hash", 43 "h" => "abbrev_hash", 44 "s" => "commit_subject", 45); 46 47my $Collate= Unicode::Collate->new(level => 1, indentical => 1); 48my @field_codes= sort keys %field_spec; 49my @field_names= map { $field_spec{$_} } @field_codes; 50my $tformat= "=" . join "%x09", map { "%" . $_ } @field_codes; 51 52sub _make_name_author_info { 53 my ($self, $commit_info, $name_key)= @_; 54 my $author_info= $self->{author_info}; 55 (my $email_key= $name_key) =~ s/name/email/; 56 my $email= $commit_info->{$email_key}; 57 my $name= $commit_info->{$name_key}; 58 59 my $line= $author_info->{"email2line"}{$email} 60 // $author_info->{"name2line"}{ lc($name) }; 61 62 $line //= sprintf "%-31s<%s>", 63 $commit_info->{$name_key}, $commit_info->{$email_key}; 64 $commit_info->{ $name_key . "_canon" }= $line; 65 return $line; 66} 67 68sub _make_name_simple { 69 my ($self, $commit_info, $key)= @_; 70 my $name_key= $key . "_name"; 71 my $email_key= $key . "_email"; 72 return sprintf "%s <%s>", $commit_info->{$name_key}, 73 lc($commit_info->{$email_key}); 74} 75 76sub __fold_trim_ws { 77 my ($munged)= @_; 78 $munged =~ s/\s+/ /g; 79 $munged =~ s/\A\s+//; 80 $munged =~ s/\s+\z//; 81 return $munged; 82} 83 84sub _register_author { 85 my ($self, $name, $type)= @_; 86 87 return if $self->_logical_exclude_author($name); 88 89 my $digest= $self->_keeper_digest($name) 90 or return; 91 92 $self->{who_stats}{$name}{$type}++; 93 94 $self->{author_info}{"lines"}{$name} 95 and return; 96 97 my $munged= __fold_trim_ws($name); 98 if ($self->{exclude_missing}) { 99 $self->_exclude_contrib($name, $digest); 100 } 101 else { 102 $self->{author_info}{"lines"}{$name}++; 103 104 my $munged= __fold_trim_ws($name); 105 warn encode_utf8 sprintf 106 "New %s '%s' (%s) will be added to AUTHORS file.\n", 107 $type, $munged, $digest 108 if $self->{verbose}; 109 } 110} 111 112sub git_conf_get { 113 my ($self, $setting)= @_; 114 chomp(my $value= `git config --get $setting`); 115 return decode_utf8 $value; 116} 117 118sub current_git_user_name { 119 my ($self)= @_; 120 return $self->git_conf_get("user.name"); 121} 122 123sub current_git_user_email { 124 my ($self)= @_; 125 return $self->git_conf_get("user.email"); 126} 127 128sub current_git_name_email { 129 my ($self, $type)= @_; 130 my $name= 131 $ENV{"GIT_\U$type\E_NAME"} 132 || $self->git_conf_get("\L$type\E.name") 133 || $self->current_git_user_name(); 134 my $email= 135 $ENV{"GIT_\U$type\E_EMAIL"} 136 || $self->git_conf_get("\L$type\E.email") 137 || $self->current_git_user_email(); 138 return $name, $email; 139} 140 141sub format_name_email { 142 my ($self, $name, $email)= @_; 143 return sprintf "%s <%s>", $name, $email; 144} 145 146sub current_committer_name_email { 147 my ($self, $full)= @_; 148 my ($n, $e)= $self->current_git_name_email("committer"); 149 return $full ? $self->format_name_email($n, $e) : ($n, $e); 150} 151 152sub current_author_name_email { 153 my ($self, $full)= @_; 154 my ($n, $e)= $self->current_git_name_email("author"); 155 return $full ? $self->format_name_email($n, $e) : ($n, $e); 156} 157 158sub git_status_porcelain { 159 my ($self)= @_; 160 my $status= `git status --porcelain`; 161 return $status // ""; 162} 163 164sub finalize_commit_info { 165 my ($self, $commit_info)= @_; 166 my $author= $commit_info->{author_name_mm_canon}; 167 my $author_stats= $self->{who_stats}{$author} ||= {}; 168 169 my $file_info= $commit_info->{files} ||= {}; 170 foreach my $file (keys %{$file_info}) { 171 if (!$self->{file_stats}{$file}) { 172 $self->{summary_stats}{num_files}++; 173 } 174 my $fs= $self->{file_stats}{$file} ||= {}; 175 my $afs= $author_stats->{file_stats}{$file} ||= {}; 176 my $added= $file_info->{$file}{lines_added}; 177 my $removed= $file_info->{$file}{lines_removed}; 178 my $delta= $file_info->{$file}{lines_delta}; 179 defined $_ and $_ eq "-" and undef $_ for $added, $removed; 180 181 if (defined $added) { 182 for my $h ($author_stats, $fs, $afs) { 183 $h->{lines_delta} += $delta; 184 $h->{lines_added} += $added; 185 $h->{lines_removed} += $removed; 186 } 187 } 188 else { 189 $author_stats->{binary_change}++; 190 $fs->{binary_change}++; 191 $afs->{binary_change}++; 192 } 193 $afs->{commits}++ 194 or $author_stats->{num_files}++; 195 196 $fs->{commits}++ 197 or $self->{summary_stats}{num_files}++; 198 199 $fs->{who}{$author}++ 200 or $self->{summary_stats}{authors}++; 201 } 202} 203 204sub read_commit_log { 205 my ($self)= @_; 206 my $author_info= $self->{author_info} ||= {}; 207 my $mailmap_info= $self->{mailmap_info} ||= {}; 208 209 my $commits_read= 0; 210 my @args= ("--pretty='format:$tformat'"); 211 push @args, "--numstat" if $self->{numstat}; 212 push @args, "'$self->{commit_range}'" if $self->{commit_range}; 213 214 my $last_commit_info; 215 my $cmd= qq(git -c diff.algorithm=myers log @args); 216 $cmd =~ s/'/"/g if $^O =~ /Win/; 217 218 # If we run under -Dmksymlinks git might not have access to the 219 # .mailmap unless we chdir into the git working tree. Ideally we 220 # would prefer to pass in the location of the .mailmap file to git, 221 # but it doesn't support that as a command line option. We can't 222 # just chdir into the source_dir as that would break scalar PerlIO 223 # layer operations which are loaded late. So we chdir before we 224 # read the git log, and then chdir right back after we have opened 225 # the handle. Note there is related code in read_mailmap_file(), 226 # if you change this also change that. 227 228 my $cwd = getcwd(); 229 if ($self->{source_dir}) { 230 chdir $self->{source_dir}; 231 } 232 233 open my $fh, "-|", $cmd 234 or die "Failed to open git log pipe: $!"; 235 236 chdir $cwd; 237 238 binmode($fh); 239 while (defined(my $line= <$fh>)) { 240 chomp $line; 241 $line= decode_utf8($line); 242 if ($line =~ s/^=//) { 243 $self->finalize_commit_info($last_commit_info) 244 if $last_commit_info; 245 } 246 elsif ($line =~ /\S/) { 247 my ($added, $removed, $file)= split /\s+/, $line; 248 if ($added ne "-") { 249 $last_commit_info->{files}{$file}= { 250 lines_added => $added, 251 lines_removed => $removed, 252 lines_delta => $added - $removed, 253 }; 254 } 255 else { 256 $last_commit_info->{files}{$file}{binary_changes}++; 257 } 258 next; 259 } 260 else { 261 # whitspace only or empty line 262 next; 263 } 264 $commits_read++; 265 my $commit_info= {}; 266 $last_commit_info= $commit_info; 267 @{$commit_info}{@field_names}= split /\t/, $line, 0 + @field_names; 268 269 my $author_name_mm_canon= 270 $self->_make_name_author_info($commit_info, "author_name_mm"); 271 272 my $committer_name_mm_canon= 273 $self->_make_name_author_info($commit_info, "committer_name_mm"); 274 275 my $author_name_real= $self->_make_name_simple($commit_info, "author"); 276 277 my $committer_name_real= 278 $self->_make_name_simple($commit_info, "committer"); 279 280 my ($author_good, $committer_good); 281 282 if ( $self->_keeper_digest($author_name_mm_canon) 283 && $self->_keeper_digest($author_name_real)) 284 { 285 $self->_check_name_mailmap($author_name_mm_canon, $author_name_real, 286 $commit_info, "author name"); 287 $self->_register_author($author_name_mm_canon, "author"); 288 $author_good= 1; 289 } 290 291 if ( $self->_keeper_digest($committer_name_mm_canon) 292 && $self->_keeper_digest($committer_name_real)) 293 { 294 $self->_check_name_mailmap($committer_name_mm_canon, 295 $committer_name_real, $commit_info, "committer name"); 296 $self->_register_author($committer_name_mm_canon, "committer"); 297 $committer_good= 1; 298 } 299 if ( $author_good 300 and $committer_good 301 and $committer_name_mm_canon ne $author_name_mm_canon) 302 { 303 $self->{who_stats}{$committer_name_mm_canon}{applied}++; 304 } 305 } 306 $self->finalize_commit_info($last_commit_info) if $last_commit_info; 307 if (!$commits_read) { 308 if ($self->{commit_range}) { 309 die "No commits in range '$self->{commit_range}'\n"; 310 } 311 else { 312 die "Panic! There are no commits!\n"; 313 } 314 } 315 return $author_info; 316} 317 318sub dupe_info { 319 my ($self)= @_; 320 my $msg= ""; 321 foreach my $type (sort keys %{ $self->{dupe} || {} }) { 322 $msg .= "Duplicate \u$type in $self->{authors_file}:\n"; 323 foreach my $key (sort keys %{ $self->{dupe}{$type} }) { 324 $msg .= " \u$type '$key'\n"; 325 foreach my $line (sort keys %{ $self->{dupe}{$type}{$key} }) { 326 $msg .= " $line\n"; 327 } 328 } 329 } 330 return $msg; 331} 332 333sub read_authors_file { 334 my ($self)= @_; 335 my $authors_file= $self->{authors_file}; 336 337 my @authors_preamble; 338 open my $in_fh, "<", $authors_file 339 or die "Failed to open for read '$authors_file': $!"; 340 my $raw_text= ""; 341 my $found_sep= 0; 342 while (defined(my $line= <$in_fh>)) { 343 $raw_text .= $line; 344 $line= decode_utf8($line); 345 chomp $line; 346 push @authors_preamble, $line; 347 if ($line =~ /^--/) { 348 $found_sep= 1; 349 last; 350 } 351 } 352 if (!$found_sep) { 353 die sprintf <<'EOFMT', $authors_file; 354Possibly corrupted authors file '%s'. 355 356There should be a big '#' comment block at the start of the file 357followed by "--" followed by a list of names and email/contact 358details. We couldn't find the separator. Where did it go? 359 360Cowardly refusing to continue until this is fixed. 361EOFMT 362 } 363 my %author_info; 364 while (defined(my $line= <$in_fh>)) { 365 $raw_text .= $line; 366 $line= decode_utf8($line); 367 chomp $line; 368 my ($name, $email); 369 my $copy= $line; 370 $copy =~ s/\s+\z//; 371 if ($copy =~ s/<([^<>]*)>//) { 372 $email= $1; 373 } 374 elsif ($copy =~ s/\s+(\@\w+)\z//) { 375 $email= $1; 376 } 377 $copy =~ s/\s+\z//; 378 $name= $copy; 379 $email //= "unknown"; 380 my $orig_name= $name; 381 my $orig_email= $email; 382 if (my $new_name= $self->{change_name_for_name}{$orig_name}) { 383 $name= $new_name; 384 } 385 if (my $new_name= $self->{change_name_for_email}{$orig_email}) { 386 $name= $new_name; 387 } 388 if (my $new_email= $self->{change_email_for_name}{$orig_name}) { 389 $email= $new_email; 390 } 391 if (my $new_email= $self->{change_email_for_email}{$orig_email}) { 392 $email= $new_email; 393 } 394 $line= sprintf "%-31s%s", $name, $email =~ /^\@/ ? $email : "<$email>"; 395 $line =~ s/\s+<unknown>\z//; 396 $email= lc($email); 397 398 $line =~ s/\s+\z//; 399 $author_info{"lines"}{$line}++; 400 if ($email and $email ne "unknown") { 401 if (my $other= $author_info{"email2line"}{$email}) { 402 $self->{dupe}{email}{$email}{$other}= 1; 403 $self->{dupe}{email}{$email}{$line}= 1; 404 } 405 else { 406 $author_info{"email2line"}{$email}= $line; 407 } 408 } 409 if ($name and $name ne "unknown") { 410 if (my $other= $author_info{"name2line"}{ lc($name) }) { 411 $self->{dupe}{name}{$name}{$other}= 1; 412 $self->{dupe}{name}{$name}{$line}= 1; 413 } 414 else { 415 $author_info{"name2line"}{ lc($name) }= $line; 416 } 417 } 418 $author_info{"email2name"}{$email} //= $name 419 if $email 420 and $name 421 and $email ne "unknown"; 422 $author_info{"name2email"}{$name} //= $email 423 if $name and $name ne "unknown"; 424 $author_info{"clean_full"}{ __fold_trim_ws($line) }= $line; 425 } 426 close $in_fh 427 or die "Failed to close '$authors_file': $!"; 428 429 $self->{author_info}= \%author_info; 430 $self->{authors_preamble}= \@authors_preamble; 431 $self->{authors_raw_text}= $raw_text; 432 return (\%author_info, \@authors_preamble, $raw_text); 433} 434 435sub update_authors_file { 436 my ($self)= @_; 437 438 my $author_info= $self->{author_info}; 439 my $authors_preamble= $self->{authors_preamble}; 440 my $authors_file= $self->{authors_file}; 441 my $old_raw_text= $self->{authors_raw_text}; 442 443 my $authors_file_new= $authors_file . ".new"; 444 my $new_raw_text= ""; 445 { 446 open my $out_fh, ">", \$new_raw_text 447 or die "Failed to open scalar buffer for write: $!"; 448 foreach my $line (@$authors_preamble) { 449 print $out_fh encode_utf8($line), "\n" 450 or die "Failed to print to scalar buffer handle: $!"; 451 } 452 foreach my $author (__sorted_hash_keys($author_info->{"lines"})) { 453 next if $self->_logical_exclude_author($author); 454 my $author_mm= $self->_author_to_mailmap($author); 455 if (!$self->_keeper_digest($author_mm)) { 456 next; 457 } 458 print $out_fh encode_utf8($author), "\n" 459 or die "Failed to print to scalar buffer handle: $!"; 460 } 461 close $out_fh 462 or die "Failed to close scalar buffer handle: $!"; 463 } 464 if ($new_raw_text ne $old_raw_text) { 465 $self->{changed_count}++; 466 $self->_log_file_changes_quick_and_dirty_diff($authors_file, 467 $old_raw_text, $new_raw_text); 468 469 if ($self->{no_update}) { 470 return 1; 471 } 472 473 warn "Updating '$authors_file'\n" if $self->{verbose}; 474 475 open my $out_fh, ">", $authors_file_new 476 or die "Failed to open for write '$authors_file_new': $!"; 477 binmode $out_fh; 478 print $out_fh $new_raw_text; 479 close $out_fh 480 or die "Failed to close '$authors_file_new': $!"; 481 rename $authors_file_new, $authors_file 482 or die 483 "Failed to rename '$authors_file_new' to '$authors_file': $!"; 484 return 1; 485 } 486 else { 487 return 0; 488 } 489} 490 491sub read_mailmap_file { 492 my ($self)= @_; 493 my $mailmap_file= $self->{mailmap_file}; 494 495 # If we run under -Dmksymlinks the .mailmap might actually be 496 # located in a different directory than the one we are running from. 497 # We could munge the $mailmap_file to be relative to source_dir if 498 # it was not already an absolute path, but that would require 499 # loading File::Spec, and doing THAT wouldn't help us when we read 500 # the git log (see the related code for doing that in the 501 # read_commit_log() sub), so we use the same strategy of remembering 502 # our current working directory, chdir'ing into the source dir, 503 # opening the file, and then chdir'ing back in both cases. 504 505 my $cwd = getcwd(); 506 if ($self->{source_dir}) { 507 chdir $self->{source_dir}; 508 } 509 open my $in, "<", $mailmap_file 510 or die "Failed to read '$mailmap_file': $!"; 511 chdir $cwd; 512 my %mailmap_hash; 513 my @mailmap_preamble; 514 my $line_num= 0; 515 my $raw_text= ""; 516 while (defined(my $line= <$in>)) { 517 $raw_text .= $line; 518 $line= decode_utf8($line); 519 ++$line_num; 520 next unless $line =~ /\S/; 521 chomp($line); 522 if ($line =~ /^#/) { 523 if (!keys %mailmap_hash) { 524 push @mailmap_preamble, $line; 525 } 526 else { 527 die encode_utf8 "Not expecting comments after header ", 528 "finished at line $line_num!\nLine: $line\n"; 529 } 530 } 531 else { 532 $mailmap_hash{$line}= $line_num; 533 } 534 } 535 close $in 536 or die "Failed to close '$mailmap_file' after reading: $!"; 537 if (!@mailmap_preamble) { 538 die sprintf <<'EOFMT', $mailmap_file; 539Possibly corrupted mailmap file '%s'. 540 541This file should have a preamble of '#' comments in it. 542 543Where did they go? 544 545Cowardly refusing to continue until this is fixed. 546EOFMT 547 } 548 $self->{orig_mailmap_hash}= \%mailmap_hash; 549 $self->{mailmap_preamble}= \@mailmap_preamble; 550 $self->{mailmap_raw_text}= $raw_text; 551 return (\%mailmap_hash, \@mailmap_preamble, $raw_text); 552} 553 554sub __sorted_hash_keys { 555 my ($hash)= @_; 556 return __sort_names(keys %$hash); 557} 558 559sub __sort_names { 560 my @sorted= sort { fc($a) cmp fc($b) || $a cmp $b } @_; 561 return @sorted; 562} 563 564# Returns 0 if the file needed to be changed, Return 1 if it does not. 565sub update_mailmap_file { 566 my ($self)= @_; 567 my $mailmap_hash= $self->{new_mailmap_hash}; 568 my $mailmap_preamble= $self->{mailmap_preamble}; 569 my $mailmap_file= $self->{mailmap_file}; 570 my $old_raw_text= $self->{mailmap_raw_text}; 571 572 my $new_raw_text= ""; 573 { 574 my $cwd = getcwd(); 575 open my $out, ">", \$new_raw_text 576 or die "Failed to open scalar buffer for write: $!"; 577 foreach 578 my $line (@$mailmap_preamble, __sorted_hash_keys($mailmap_hash),) 579 { 580 next if $line =~ m!\A(.*) \1\z!; 581 print $out encode_utf8($line), "\n" 582 or die "Failed to print to scalar buffer handle: $!"; 583 } 584 close $out 585 or die "Failed to close scalar buffer handle: $!"; 586 } 587 if ($new_raw_text ne $old_raw_text) { 588 $self->{changed_count}++; 589 $self->_log_file_changes_quick_and_dirty_diff($mailmap_file, 590 $old_raw_text, $new_raw_text); 591 592 if ($self->{no_update}) { 593 return 1; 594 } 595 596 warn "Updating '$mailmap_file'\n" 597 if $self->{verbose}; 598 599 my $mailmap_file_new= $mailmap_file . ".new"; 600 open my $out, ">", $mailmap_file_new 601 or die "Failed to write '$mailmap_file_new': $!"; 602 binmode $out 603 or die "Failed to binmode '$mailmap_file_new': $!"; 604 print $out $new_raw_text 605 or die "Failed to print to '$mailmap_file_new': $!"; 606 close $out 607 or die "Failed to close '$mailmap_file_new' after writing: $!"; 608 rename $mailmap_file_new, $mailmap_file 609 or die 610 "Failed to rename '$mailmap_file_new' to '$mailmap_file': $!"; 611 return 1; 612 } 613 else { 614 return 0; 615 } 616} 617 618sub parse_orig_mailmap_hash { 619 my ($self)= @_; 620 my $mailmap_hash= $self->{orig_mailmap_hash}; 621 622 my @recs; 623 foreach my $line (__sorted_hash_keys($mailmap_hash)) { 624 my $line_num= $mailmap_hash->{$line}; 625 $line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)> 626 (?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x 627 or die encode_utf8 628 "Failed to parse '$self->{mailmap_file}' line num $line_num: '$line'\n"; 629 if (!$1 or !$2) { 630 die encode_utf8 "Both preferred name and email are mandatory ", 631 "in line num $line_num: '$line'"; 632 } 633 my ($name, $email, $other_name, $other_email)= ($1, $2, $3, $4); 634 my ($orig_name, $orig_email)= ($1, $2); 635 if (my $new_name= $self->{change_name_for_name}{$orig_name}) { 636 $name= $new_name; 637 } 638 if (my $new_name= $self->{change_name_for_email}{$orig_email}) { 639 $name= $new_name; 640 } 641 if (my $new_email= $self->{change_email_for_name}{$orig_name}) { 642 $email= $new_email; 643 } 644 if (my $new_email= $self->{change_email_for_email}{$orig_email}) { 645 $email= $new_email; 646 } 647 648 push @recs, [ $name, $email, $other_name, $other_email, $line_num ]; 649 } 650 return \@recs; 651} 652 653sub _safe_set_key { 654 my ($self, $hash, $root_key, $key, $val, $pretty_name)= @_; 655 $hash->{$root_key}{$key} //= $val; 656 my $prev= $hash->{$root_key}{$key}; 657 if ($prev ne $val) { 658 die encode_utf8 "Collision on mapping $root_key: " 659 . " '$key' maps to '$prev' and '$val'\n"; 660 } 661} 662 663my $O2P= "other2preferred"; 664my $O2PN= "other2preferred_name"; 665my $O2PE= "other2preferred_email"; 666my $P2O= "preferred2other"; 667my $N2P= "name2preferred"; 668my $E2P= "email2preferred"; 669 670my $blurb= ""; # FIXME - replace with a nice message 671 672sub known_contributor { 673 my ($self, $name, $email)= @_; 674 if (!$name or !$email) { return 0 } 675 my $combined= "$name <$email>"; 676 return (( 677 $self->{mailmap_info}{$O2P}{$combined} 678 && $self->_keeper_digest($combined) 679 ) ? 1 : 0 680 ); 681} 682 683sub _check_name_mailmap { 684 my ($self, $auth_name, $raw_name, $commit_info, $descr)= @_; 685 my $mailmap_info= $self->{mailmap_info}; 686 687 my $name= $self->_author_to_mailmap($auth_name); 688 689 my $digest= $self->_keeper_digest($name) 690 or return 1; # known but ignore 691 692 my $name_info= $mailmap_info->{$P2O}{$name}; 693 694 if (!$name_info || !$name_info->{$raw_name}) { 695 if ($self->{exclude_missing}) { 696 $self->_exclude_contrib($name, $digest); 697 } 698 else { 699 $mailmap_info->{add}{"$name $raw_name"}++; 700 701 warn encode_utf8 sprintf 702 "Unknown %s '%s' in commit %s '%s'\n%s", 703 $descr, 704 $name, 705 $commit_info->{"abbrev_hash"}, 706 $commit_info->{"commit_subject"}, $blurb 707 if $self->{verbose}; 708 } 709 return 0; 710 } 711 return 1; 712} 713 714sub _author_to_mailmap { 715 my ($self, $name)= @_; 716 $name =~ s/<([^<>]+)>/<\L$1\E>/ 717 or $name =~ s/(\s)(\@\w+)\z/$1<\L$2\E>/ 718 or $name .= " <unknown>"; 719 720 $name= __fold_trim_ws($name); 721 return $name; 722} 723 724sub check_fix_mailmap_hash { 725 my ($self)= @_; 726 my $orig_mailmap_hash= $self->{orig_mailmap_hash}; 727 my $author_info= $self->{author_info}; 728 foreach my $key (keys %{ $author_info->{clean_full} }) { 729 $key .= " <unknown>" 730 unless $key =~ /\s+(?:<[^>]+>|\@\w+)\z/; 731 $key =~ s/\s+(\@\w+)\z/ <$1>/; 732 $orig_mailmap_hash->{"$key $key"} //= -1; 733 } 734 my $parsed= $self->parse_orig_mailmap_hash(); 735 my @fixed; 736 my %seen_map; 737 my %pref_groups; 738 739 my $remove_no_names_with_overlaps= 0; 740 741 # first pass through the data, do any conversions, eg, LC 742 # the email address, decode any MIME-Header style email addresses. 743 # We also correct any preferred name entries so they match what 744 # we already have in AUTHORS, and check that there aren't collisions 745 # or other issues in the data. 746 foreach my $rec (@$parsed) { 747 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec; 748 $pemail= lc($pemail); 749 $oemail= lc($oemail) if defined $oemail; 750 if ($pname =~ /=\?UTF-8\?/) { 751 $pname= decode("MIME-Header", $pname); 752 } 753 my $auth_email= $author_info->{"name2email"}{$pname}; 754 if ($auth_email) { 755 ## this name exists in authors, so use its email data for pemail 756 $pemail= $auth_email; 757 } 758 my $auth_name= $author_info->{"email2name"}{$pemail}; 759 if ($auth_name) { 760 ## this email exists in authors, so use its name data for pname 761 $pname= $auth_name; 762 } 763 764 # neither name nor email exist in authors. 765 if ($pname ne "unknown") { 766 if (my $email= $seen_map{"name"}{$pname}) { 767 ## we have seen this pname before, check the pemail 768 ## is consistent 769 if ($email ne $pemail) { 770 warn encode_utf8 "Inconsistent emails for name '$pname'" 771 . " at line num $line_num: keeping '$email'," 772 . " ignoring '$pemail'\n"; 773 $pemail= $email; 774 } 775 } 776 else { 777 $seen_map{"name"}{$pname}= $pemail; 778 } 779 } 780 if ($pemail ne "unknown") { 781 if (my $name= $seen_map{"email"}{$pemail}) { 782 ## we have seen this preferred_email before, check the preferred_name 783 ## is consistent 784 if ($name ne $pname) { 785 warn encode_utf8 "Inconsistent name for email '$pemail'" 786 . " at line num $line_num: keeping '$name', ignoring" 787 . " '$pname'\n"; 788 $pname= $name; 789 } 790 } 791 else { 792 $seen_map{"email"}{$pemail}= $pname; 793 } 794 } 795 796 my $rec= [ $pname, $pemail, $oname, $oemail, $line_num ]; 797 if ($remove_no_names_with_overlaps) { 798 799 # Build an index of "preferred name/email" to other-email, other name 800 # we use this later to remove redundant entries missing a name. 801 $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }= $rec; 802 } 803 else { 804 push @fixed, $rec; 805 } 806 } 807 808 if ($remove_no_names_with_overlaps) { 809 810 # this removes entries like 811 # Joe <blogs> <whatever> 812 # where there is a corresponding 813 # Joe <blogs> Joe X <whatever> 814 foreach my $pref (__sorted_hash_keys(\%pref_groups)) { 815 my $entries= $pref_groups{$pref}; 816 foreach my $email (__sorted_hash_keys($entries)) { 817 my @names= __sorted_hash_keys($entries->{$email}); 818 if (0 and $names[0] eq "" and @names > 1) { 819 shift @names; 820 } 821 foreach my $name (@names) { 822 push @fixed, $entries->{$email}{$name}; 823 } 824 } 825 } 826 } 827 828 # final pass through the dataset, build up a database 829 # we will use later for checks and updates, and reconstruct 830 # the canonical entries. 831 my $new_mailmap_hash= {}; 832 my $mailmap_info= {}; 833 foreach my $rec (@fixed) { 834 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec; 835 my $preferred= "$pname <$pemail>"; 836 my $other; 837 if (defined $oemail) { 838 $other= $oname ? "$oname <$oemail>" : "<$oemail>"; 839 } 840 if (!$self->_keeper_digest($preferred)) { 841 $self->_exclude_contrib($other); 842 next; 843 } 844 elsif (!$self->_keeper_digest($other)) { 845 next; 846 } 847 if ($other and $other ne "<unknown>") { 848 $self->_safe_set_key($mailmap_info, $O2P, $other, $preferred); 849 $self->_safe_set_key($mailmap_info, $O2PN, $other, $pname); 850 $self->_safe_set_key($mailmap_info, $O2PE, $other, $pemail); 851 } 852 $mailmap_info->{$P2O}{$preferred}{$other}++; 853 if ($pname ne "unknown") { 854 $self->_safe_set_key($mailmap_info, $N2P, $pname, $preferred); 855 } 856 if ($pemail ne "unknown") { 857 $self->_safe_set_key($mailmap_info, $E2P, $pemail, $preferred); 858 } 859 my $line= $preferred; 860 $line .= " $other" if $other; 861 $new_mailmap_hash->{$line}= $line_num; 862 } 863 $self->{new_mailmap_hash}= $new_mailmap_hash; 864 $self->{mailmap_info}= $mailmap_info; 865 return ($new_mailmap_hash, $mailmap_info); 866} 867 868sub add_new_mailmap_entries { 869 my ($self)= @_; 870 my $mailmap_hash= $self->{new_mailmap_hash}; 871 my $mailmap_info= $self->{mailmap_info}; 872 my $mailmap_file= $self->{mailmap_file}; 873 874 my $mailmap_add= $mailmap_info->{add} 875 or return 0; 876 877 my $num= 0; 878 for my $new (__sorted_hash_keys($mailmap_add)) { 879 !$mailmap_hash->{$new}++ or next; 880 warn encode_utf8 "Updating '$mailmap_file' with: $new\n" 881 if $self->{verbose}; 882 $num++; 883 } 884 return $num; 885} 886 887sub read_and_update { 888 my ($self)= @_; 889 my ($authors_file, $mailmap_file)= 890 %{$self}{qw(authors_file mailmap_file)}; 891 892 # read the authors file and extract the info it contains 893 $self->read_authors_file(); 894 895 # read the mailmap file. 896 $self->read_mailmap_file(); 897 898 # check and possibly fix the mailmap data, and build a set of precomputed 899 # datasets to work with it. 900 $self->check_fix_mailmap_hash(); 901 902 # update the mailmap based on any check or fixes we just did. 903 $self->update_mailmap_file(); 904 905 # read the commits names using git log, and compares and checks 906 # them against the data we have in authors. 907 $self->read_commit_log(); 908 909 # update the authors file with any changes 910 $self->update_authors_file(); 911 912 # check if we discovered new email data from the commits that 913 # we need to write back to disk. 914 $self->add_new_mailmap_entries() 915 and $self->update_mailmap_file(); 916 917 $self->update_exclude_file(); 918 919 return $self->changed_count(); 920} 921 922sub read_exclude_file { 923 my ($self)= @_; 924 my $exclude_file= $self->{exclude_file}; 925 my $exclude_digest= $self->{exclude_digest} ||= {}; 926 927 open my $in_fh, "<", $exclude_file 928 or do { 929 warn "Failed to open '$exclude_file': $!"; 930 return; 931 }; 932 my $head= ""; 933 my $orig= ""; 934 my $seen_data= 0; 935 while (defined(my $line= <$in_fh>)) { 936 $orig .= $line; 937 if ($line =~ /^\s*#/ || $line !~ /\S/) { 938 $head .= $line unless $seen_data; 939 next; 940 } 941 else { 942 $seen_data= 1; 943 } 944 chomp($line); 945 $line =~ s/\A\s+//; 946 $line =~ s/\s*(?:#.*)?\z//; 947 $exclude_digest->{$line}++ if length($line); 948 } 949 close $in_fh 950 or die "Failed to close '$exclude_file' after reading: $!"; 951 if (!$head) { 952 die sprintf <<'EOFMT', $exclude_file; 953Possibly corrupted exclude file '%s'. 954 955This file should have a header of '#' comments in it. 956 957Where did they go? 958 959Cowardly refusing to continue until this is fixed. 960EOFMT 961 } 962 $self->{exclude_file_text_head}= $head; 963 $self->{exclude_file_text_orig}= $orig; 964 965 return $exclude_digest; 966} 967 968sub update_exclude_file { 969 my ($self)= @_; 970 my $exclude_file= $self->{exclude_file}; 971 my $exclude_text= $self->{exclude_file_text_head}; 972 foreach my $digest (__sorted_hash_keys($self->{exclude_digest})) { 973 $exclude_text .= "$digest\n"; 974 } 975 if ($exclude_text ne $self->{exclude_file_text_orig}) { 976 $self->{changed_count}++; 977 $self->_log_file_changes_quick_and_dirty_diff($exclude_file, 978 $self->{exclude_file_text_orig}, 979 $exclude_text); 980 981 if ($self->{no_update}) { 982 return 1; 983 } 984 985 warn "Updating '$exclude_file'\n" if $self->{verbose}; 986 987 my $tmp_file= "$exclude_file.new"; 988 open my $out_fh, ">", $tmp_file 989 or die "Cant open '$tmp_file' for write $!"; 990 print $out_fh $exclude_text 991 or die "Failed to print to '$tmp_file': $!"; 992 close $out_fh 993 or die "Failed to close '$tmp_file' after writing: $!"; 994 rename $tmp_file, $exclude_file 995 or die "Failed to rename '$tmp_file' to '$exclude_file': $!"; 996 997 return 1; 998 } 999 else { 1000 return 0; 1001 } 1002} 1003 1004sub changed_count { 1005 my ($self)= @_; 1006 return $self->{changed_count}; 1007} 1008 1009sub changed_file { 1010 my ($self, $name)= @_; 1011 return $self->{changed_file}{$name}; 1012} 1013 1014sub unchanged_file { 1015 my ($self, $name)= @_; 1016 return $self->changed_file($name) ? 0 : 1; 1017} 1018 1019sub new { 1020 my ($class, %self)= @_; 1021 $self{changed_count}= 0; 1022 for my $name (qw(authors_file mailmap_file exclude_file)) { 1023 $self{$name} 1024 or die "Property '$name' is mandatory in constructor"; 1025 } 1026 1027 my $self= bless \%self, $class; 1028 1029 if (my $ary= $self->{exclude_contrib}) { 1030 $self->_exclude_contrib($_) for @$ary; 1031 } 1032 1033 $self->read_exclude_file(); 1034 1035 die Dumper(\%self) if $self{dump_opts}; 1036 1037 return $self; 1038} 1039 1040sub __digest { 1041 my $thing= $_[0]; 1042 utf8::encode($thing); 1043 return sha256_base64($thing); 1044} 1045 1046# if this name is a "keeper" then return its digest 1047# (if we know the digest and it is marked for exclusion 1048# then we return 0) 1049sub _keeper_digest { 1050 my ($self, $real_name)= @_; 1051 my $digest; 1052 $digest= $self->{digest_cache}{$real_name}; 1053 1054 if (!$digest) { 1055 my $name= __fold_trim_ws($real_name); 1056 1057 $digest= ($self->{digest_cache}{$name} //= __digest($name)); 1058 $self->{digest_cache}{$real_name}= $digest; 1059 } 1060 1061 return $self->{exclude_digest}{$digest} ? 0 : $digest; 1062} 1063 1064# should we exclude this author from the AUTHORS file 1065# simply because of the form of their details? 1066sub _logical_exclude_author { 1067 my ($self, $author)= @_; 1068 1069 # don't know the persona 1070 return 1 if $author =~ /^unknown/; 1071 1072 # Someone at <unknown> with a single word name. 1073 # Eg, we wont list "Bob <unknown>" 1074 if ($author =~ s/\s*<unknown>\z//) { 1075 return 1 if $author =~ /^\w+$/; 1076 } 1077 return 0; 1078} 1079 1080# exclude this contributor by name, if digest isnt provided 1081# then it is computed using _digest. 1082sub _exclude_contrib { 1083 my ($self, $name, $digest)= @_; 1084 1085 # if we would exclude them anyway due to the logical 1086 # naming rules then we do not need to add them to the exclude 1087 # file. 1088 return if $self->_logical_exclude_author($name); 1089 $name= __fold_trim_ws($name); 1090 $digest //= __digest($name); 1091 $self->{exclude_digest}{$digest}++ 1092 or warn "Excluding '$name' with '$digest'\n"; 1093} 1094 1095sub _log_file_changes_quick_and_dirty_diff { 1096 my ($self, $file, $old_raw_text, $new_raw_text)= @_; 1097 1098 my %old; 1099 $old{$_}++ for split /\n/, $old_raw_text; 1100 my %new; 1101 $new{$_}++ for split /\n/, $new_raw_text; 1102 foreach my $key (keys %new) { 1103 delete $new{$key} if delete $old{$key}; 1104 } 1105 $self->{changed_file}{$file}{add}= \%new if keys %new; 1106 $self->{changed_file}{$file}{del}= \%old if keys %old; 1107 return $self->{changed_file}{$file}; 1108} 1109 1110sub _diff_diag { 1111 my ($self, $want_file)= @_; 1112 my $diag_str= ""; 1113 foreach my $file (sort keys %{ $self->{changed_file} || {} }) { 1114 next if $want_file and $file ne $want_file; 1115 $diag_str .= " File '$file' changes:\n"; 1116 foreach my $action (sort keys %{ $self->{changed_file}{$file} }) { 1117 foreach 1118 my $line (sort keys %{ $self->{changed_file}{$file}{$action} }) 1119 { 1120 $diag_str .= " would $action: $line\n"; 1121 } 1122 } 1123 } 1124 return $diag_str; 1125} 1126 1127my %pretty_name= ( 1128 "author" => "Authored", 1129 "committer" => "Committed", 1130 "applied" => "Applied", 1131 "name" => "Name", 1132 "pos" => "Pos", 1133 "num_files" => "NFiles", 1134 "lines_added" => "L++", 1135 "lines_removed" => "L--", 1136 "lines_delta" => "L+-", 1137 "binary_changed" => "Bin+-", 1138); 1139 1140sub report_stats { 1141 my ($self, $stats_key, @types)= @_; 1142 my @extra= "name"; 1143 my @rows; 1144 my @total; 1145 foreach my $name (__sorted_hash_keys($self->{$stats_key})) { 1146 my @data= map { $self->{$stats_key}{$name}{$_} // 0 } @types; 1147 $total[$_] += $data[$_] for 0 .. $#data; 1148 push @data, $name; 1149 push @rows, \@data if $data[0]; 1150 } 1151 @rows= sort { 1152 my $cmp= 0; 1153 for (0 .. $#$a - 1) { 1154 $cmp= $b->[$_] <=> $a->[$_]; 1155 last if $cmp; 1156 } 1157 $cmp ||= $Collate->cmp($a->[-1], $b->[-1]); 1158 $cmp 1159 } @rows; 1160 @rows= reverse @rows if $self->{in_reverse}; 1161 1162 if ($self->{as_cumulative}) { 1163 my $sum= []; 1164 for my $row (@rows) { 1165 do { 1166 $sum->[$_] += $row->[$_]; 1167 $row->[$_]= $sum->[$_]; 1168 } 1169 for 0 .. $#types; 1170 } 1171 } 1172 1173 if ($self->{as_percentage}) { 1174 for my $row (@rows) { 1175 $row->[$_]= sprintf "%.2f", ($row->[$_] / $total[$_]) * 100 1176 for 0 .. $#types; 1177 } 1178 } 1179 1180 foreach my $row (@rows) { 1181 my $name= $row->[-1]; 1182 $name =~ s/\s+<.*\z//; 1183 $name =~ s/\s+\@.*\z//; 1184 $row->[-1]= $name; 1185 } 1186 my @col_names= map { $pretty_name{$_} // $_ } @types; 1187 if ($self->{as_percentage}) { 1188 $_= "%$_" for @col_names; 1189 } 1190 push @col_names, map { $pretty_name{$_} // $_ } @extra; 1191 1192 if ($self->{as_list} && @types == 1) { 1193 $self->_report_list(\@rows, \@types, \@extra, \@col_names); 1194 } 1195 else { 1196 $self->_report_table(\@rows, \@types, \@extra, \@col_names); 1197 } 1198} 1199 1200sub _report_table { 1201 my ($self, $rows, $types, $extra, $col_names)= @_; 1202 my $pos= 1; 1203 unshift @$_, $pos++ for @$rows; 1204 unshift @$col_names, "Pos"; 1205 my @width= (0) x @$col_names; 1206 foreach my $row ($col_names, @$rows) { 1207 for my $idx (0 .. $#$row) { 1208 $width[$idx] < length($row->[$idx]) 1209 and $width[$idx]= length($row->[$idx]); 1210 } 1211 } 1212 $width[-1]= 40 if $width[-1] > 40; 1213 $width[$_]= -$width[$_] for 0, -1; 1214 my $fmt= "#" . join(" | ", ("%*s") x @$col_names) . "\n"; 1215 my $bar_fmt= "#" . join("-+-", ("%*s") x @$col_names) . "\n"; 1216 printf $fmt, map { $width[$_], $col_names->[$_] } 0 .. $#width; 1217 printf $bar_fmt, map { $width[$_], "-" x abs($width[$_]) } 0 .. $#width; 1218 for my $idx (0 .. $#$rows) { 1219 my $row= $rows->[$idx]; 1220 print encode_utf8 sprintf $fmt, 1221 map { $width[$_], $row->[$_] } 0 .. $#width; 1222 } 1223} 1224 1225sub _report_list { 1226 my ($self, $rows, $types, $extra, $col_names)= @_; 1227 my %hash; 1228 foreach my $row (@$rows) { 1229 $hash{ $row->[0] }{ $row->[-1] }++; 1230 } 1231 my @vals= sort { $b <=> $a } keys %hash; # numeric sort 1232 my $width= length($col_names->[0]); 1233 $width < length($_) and $width= length($_) for @vals; 1234 @vals= reverse @vals if $self->{in_reverse}; 1235 1236 my $hdr_str= sprintf "%*s | %s", $width, $col_names->[0], $col_names->[-1]; 1237 my $sep_str= sprintf "%*s-+-%s", $width, "-" x $width, "-" x 40; 1238 my $fmt= "%*s | %s"; 1239 1240 if ($self->{with_rank_numbers}) { 1241 $hdr_str= sprintf "#%*s | %s", -length(0 + @$rows), "Pos", $hdr_str; 1242 $sep_str= sprintf "#%*s-+-%s", -length(0 + @$rows), 1243 "-" x length(0 + @$rows), $hdr_str; 1244 } 1245 print $hdr_str, "\n"; 1246 print $sep_str, "\n"; 1247 my $pos= 1; 1248 foreach my $val (@vals) { 1249 my $val_f= sprintf "%*s | ", $width, $val; 1250 $val_f= sprintf "#%*d | %s", -length(0 + @$rows), $pos++, $val_f 1251 if $self->{with_rank_numbers}; 1252 print encode_utf8 wrap $val_f, 1253 " " x length($val_f), 1254 join(", ", $Collate->sort(keys %{ $hash{$val} })) . "\n"; 1255 } 1256} 1257 1258sub _filter_sort_who { 1259 my ($self, $hash)= @_; 1260 my @who; 1261 foreach my $name ($Collate->sort(keys %$hash)) { 1262 $name =~ s/\s+<.*\z//; 1263 $name =~ s/\s+\@.*\z//; 1264 push @who, $name if length $name and lc($name) ne "unknown"; 1265 } 1266 return @who; 1267} 1268 1269sub print_who { 1270 my ($self)= @_; 1271 my @who= $self->_filter_sort_who($self->{who_stats}); 1272 print encode_utf8 wrap "", "", join(", ", @who) . ".\n"; 1273} 1274 12751; 1276__END__ 1277 1278=head1 NAME 1279 1280Porting::updateAUTHORS - Library to automatically update AUTHORS and .mailmap based on commit data. 1281 1282=head1 SYNOPSIS 1283 1284 use Porting::updateAUTHORS; 1285 1286 my $updater= Porting::updateAUTHORS->new( 1287 authors_file => "AUTHORS", 1288 mailmap_file => ".mailmap", 1289 exclude_file => "Porting/exclude_contrib.txt", 1290 ); 1291 $updater->read_and_update(); 1292 1293=head1 DESCRIPTION 1294 1295This the brain of the F<Porting/updateAUTHORS.pl> script. It is expected 1296to be used B<from> that script and B<by> that script. Most features and 1297options are documented in the F<Porting/updateAUTHORS.pl> and are not 1298explicitly documented here, read the F<Porting/updateAUTHORS.pl> manpage 1299for more details. 1300 1301=head1 METHODS 1302 1303Porting::updateAUTHORS uses OO as way of managing its internal state. 1304This documents the public methods it exposes. 1305 1306=over 4 1307 1308=item add_new_mailmap_entries() 1309 1310If any additions were identified while reading the commits this will 1311inject them into the mailmap_hash so they can be written out. Returns a 1312count of additions found. 1313 1314=item check_fix_mailmap_hash() 1315 1316Analyzes the data contained the in the .mailmap file and applies any 1317automated fixes which are required and which it can automatically 1318perform. Returns a hash of adjusted entries and a hash with additional 1319metadata about the mailmap entries. 1320 1321=item new(%opts) 1322 1323Create a new object. Required parameters are 1324 1325 authors_file 1326 mailmap_file 1327 exclude_file 1328 1329Other supported parameters are as follows: 1330 1331 verbose 1332 commit_range 1333 1334this list is not exhaustive. See the code implementing the main() 1335function in F<Porting/updateAUTHORS.pl> for an exhaustive list. 1336 1337=item parse_orig_mailmap_hash() 1338 1339Takes a mailmap_hash and parses it and returns it as an array of array 1340records with the contents: 1341 1342 [ $preferred_name, $preferred_email, 1343 $other_name, $other_email, 1344 $line_num ] 1345 1346=item read_and_update() 1347 1348Wraps the other functions in this library and implements the logic and 1349intent of this tool. Takes two arguments, the authors file name, and the 1350mailmap file name. Returns nothing but may modify the AUTHORS file 1351or the .mailmap file. Requires that both files are editable. 1352 1353=item read_commit_log() 1354 1355Read the commit log specified by the property "commit_range" and find 1356any new names it contains. 1357 1358Normally used via C<read_and_update> and not called directly. 1359 1360=item read_authors_file() 1361 1362Read the AUTHORS file into the object, and return data about it. 1363 1364Normally used via C<read_and_update> and not called directly. 1365 1366=item read_mailmap_file() 1367 1368Read the .mailmap file into the object and return data about it. 1369 1370Normally used via C<read_and_update> and not called directly. 1371 1372=item read_exclusion_file() 1373 1374Read the exclusion file into the object and return data about it. 1375 1376Normally used via C<read_and_update> and not called directly. 1377 1378=item update_authors_file() 1379 1380Write out an updated AUTHORS file atomically if it has changed, 1381returns 0 if the file was actually updated, 1 if it was not. 1382 1383Normally used via C<read_and_update> and not called directly. 1384 1385=item update_mailmap_file() 1386 1387Write out an updated .mailmap file atomically if it has changed, 1388returns 0 if the file was actually updated, 1 if it was not. 1389 1390Normally used via C<read_and_update> and not called directly. 1391 1392=item update_exclusion_file() 1393 1394Write out an updated exclusion file atomically if it has changed, 1395returns 0 if the file was actually update, 1 if it was not. 1396 1397Normally used via C<read_and_update> and not called directly. 1398 1399=back 1400 1401=head1 TODO 1402 1403More documentation and testing. 1404 1405=head1 SEE ALSO 1406 1407F<Porting/checkAUTHORS.pl> 1408 1409=head1 AUTHOR 1410 1411Yves Orton <demerphq@gmail.com> 1412 1413=cut 1414