1#!./perl -w
2
3=pod
4
5=head1 TEST FOR B::Assembler.pm AND B::Disassembler.pm
6
7=head2 Description
8
9The general idea is to test by assembling a choice set of assembler
10instructions, then disassemble them, and check that we've completed the
11round trip. Also, error checking of Assembler.pm is tested by feeding
12it assorted errors.
13
14Since Assembler.pm likes to assemble a file, we comply by writing a
15text file. This file contains three sections:
16
17  testing operand categories
18  use each opcode
19  erronous assembler instructions
20
21An "operand category" is identified by the suffix of the PUT_/GET_
22subroutines as shown in the C<%Asmdata::insn_data> initialization, e.g.
23opcode C<ldsv> has operand category C<svindex>:
24
25   insn_data{ldsv} = [1, \&PUT_svindex, "GET_svindex"];
26
27Because Disassembler.pm also assumes input from a file, we write the
28resulting object code to a file. And disassembled output is written to
29yet another text file which is then compared to the original input.
30(Erronous assembler instructions still generate code, but this is not
31written to the object file; therefore disassembly bails out at the first
32instruction in error.)
33
34All files are kept in memory by using TIEHASH.
35
36
37=head2 Caveats
38
39An error where Assembler.pm and Disassembler.pm agree but Assembler.pm
40generates invalid object code will not be detected.
41
42Due to the way this test has been set up, failure of a single test
43could cause all subsequent tests to fail as well: After an unexpected
44assembler error no output is written, and disassembled lines will be
45out of sync for all lines thereafter.
46
47Not all possibilities for writing a valid operand value can be tested
48because disassembly results in a uniform representation.
49
50
51=head2 Maintenance
52
53New opcodes are added automatically.
54
55A new operand category will cause this program to die ("no operand list
56for XXX"). The cure is to add suitable entries to C<%goodlist> and
57C<%badlist>. (Since the data in Asmdata.pm is autogenerated, it may also
58happen that the corresponding assembly or disassembly subroutine is
59missing.) Note that an empty array as a C<%goodlist> entry means that
60opcodes of the operand category do not take an operand (and therefore the
61corresponding entry in C<%badlist> should have one). An C<undef> entry
62in C<%badlist> means that any value is acceptable (and thus there is no
63way to cause an error).
64
65Set C<$dbg> to debug this test.
66
67=cut
68
69package VirtFile;
70use strict;
71
72# Note: This is NOT a general purpose package. It implements
73# sequential text and binary file i/o in a rather simple form.
74
75sub TIEHANDLE($;$){
76    my( $class, $data ) = @_;
77    my $obj = { data => defined( $data ) ? $data : '',
78                pos => 0 };
79    return bless( $obj, $class );
80}
81
82sub PRINT($@){
83    my( $self ) = shift;
84    $self->{data} .= join( '', @_ );
85}
86
87sub WRITE($$;$$){
88    my( $self, $buf, $len, $offset ) = @_;
89    unless( defined( $len ) ){
90	$len = length( $buf );
91        $offset = 0;
92    }
93    unless( defined( $offset ) ){
94        $offset = 0;
95    }
96    $self->{data} .= substr( $buf, $offset, $len );
97    return $len;
98}
99
100
101sub GETC($){
102    my( $self ) = @_;
103    return undef() if $self->{pos} >= length( $self->{data} );
104    return substr( $self->{data}, $self->{pos}++, 1 );
105}
106
107sub READLINE($){
108    my( $self ) = @_;
109    return undef() if $self->{pos} >= length( $self->{data} );
110    my $lfpos = index( $self->{data}, "\n", $self->{pos} );
111    if( $lfpos < 0 ){
112        $lfpos = length( $self->{data} );
113    }
114    my $pos = $self->{pos};
115    $self->{pos} = $lfpos + 1;
116    return substr( $self->{data}, $pos, $self->{pos} - $pos );
117}
118
119sub READ($@){
120    my $self = shift();
121    my $bufref = \$_[0];
122    my( undef, $len, $offset ) = @_;
123    if( $offset ){
124        die( "offset beyond end of buffer\n" )
125          if ! defined( $$bufref ) || $offset > length( $$bufref );
126    } else {
127        $$bufref = '';
128        $offset = 0;
129    }
130    my $remlen = length( $self->{data} ) - $self->{pos};
131    $len = $remlen if $remlen < $len;
132    return 0 unless $len;
133    substr( $$bufref, $offset, $len ) =
134      substr( $self->{data}, $self->{pos}, $len );
135    $self->{pos} += $len;
136    return $len;
137}
138
139sub TELL($){
140    my $self = shift();
141    return $self->{pos};
142}
143
144sub CLOSE($){
145    my( $self ) = @_;
146    $self->{pos} = 0;
147}
148
1491;
150
151package main;
152
153use strict;
154use Test::More;
155use Config qw(%Config);
156
157BEGIN {
158  if (($Config{'extensions'} !~ /\bB\b/) ){
159    print "1..0 # Skip -- Perl configured without B module\n";
160    exit 0;
161  }
162  if (($Config{'extensions'} !~ /\bByteLoader\b/) ){
163    print "1..0 # Skip -- Perl configured without ByteLoader module\n";
164    exit 0;
165  }
166}
167
168use B::Asmdata      qw( %insn_data );
169use B::Assembler    qw( &assemble_fh );
170use B::Disassembler qw( &disassemble_fh &get_header );
171
172my( %opsByType, @code2name );
173my( $lineno, $dbg, $firstbadline, @descr );
174$dbg = 0; # debug switch
175
176# $SIG{__WARN__} handler to catch Assembler error messages
177#
178my $warnmsg;
179sub catchwarn($){
180    $warnmsg = $_[0];
181    print "error: $warnmsg\n" if $dbg;
182}
183
184# Callback for writing assembled bytes. This is where we check
185# that we do get an error.
186#
187sub putobj($){
188    if( ++$lineno >= $firstbadline ){
189        ok( $warnmsg && $warnmsg =~ /^\d+:\s/, $descr[$lineno] );
190        undef( $warnmsg );
191    } else {
192        my $l = syswrite( OBJ, $_[0] );
193    }
194}
195
196# Callback for writing a disassembled statement.
197#
198sub putdis(@){
199    my $line = join( ' ', @_ );
200    ++$lineno;
201    print DIS "$line\n";
202    printf "%5d %s\n", $lineno, $line if $dbg;
203}
204
205# Generate assembler instructions from a hash of operand types: each
206# existing entry contains a list of good or bad operand values. The
207# corresponding opcodes can be found in %opsByType.
208#
209sub gen_type($$$){
210    my( $href, $descref, $text ) = @_;
211    for my $odt ( sort( keys( %opsByType ) ) ){
212        my $opcode = $opsByType{$odt}->[0];
213	my $sel = $odt;
214	$sel =~ s/^GET_//;
215	die( "no operand list for $sel\n" ) unless exists( $href->{$sel} );
216        if( defined( $href->{$sel} ) ){
217            if( @{$href->{$sel}} ){
218		for my $od ( @{$href->{$sel}} ){
219		    ++$lineno;
220                    $descref->[$lineno] = "$text: $code2name[$opcode] $od";
221		    print ASM "$code2name[$opcode] $od\n";
222		    printf "%5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg;
223		}
224	    } else {
225		++$lineno;
226                $descref->[$lineno] = "$text: $code2name[$opcode]";
227		print ASM "$code2name[$opcode]\n";
228		printf "%5d %s\n", $lineno, $code2name[$opcode] if $dbg;
229	    }
230	}
231    }
232}
233
234# Interesting operand values
235#
236my %goodlist = (
237comment_t   => [ '"a comment"' ],  # no \n
238none        => [],
239svindex     => [ 0x7fffffff, 0 ],
240opindex     => [ 0x7fffffff, 0 ],
241pvindex     => [ 0x7fffffff, 0 ],
242U32         => [ 0xffffffff, 0 ],
243U8          => [ 0xff, 0 ],
244PV          => [ '""', '"a string"', ],
245I32         => [ -0x80000000, 0x7fffffff ],
246IV64        => [ '0x000000000', '0x0ffffffff', '0x000000001' ], # disass formats  0x%09x
247IV          => $Config{ivsize} == 4 ?
248               [ -0x80000000, 0x7fffffff ] :
249               [ '0x000000000', '0x0ffffffff', '0x000000001' ],
250NV          => [ 1.23456789E3 ],
251U16         => [ 0xffff, 0 ],
252pvcontents  => [],
253strconst    => [ '""', '"another string"' ], # no NUL
254op_tr_array => [ join( ',', 256, 0..255 ) ],
255PADOFFSET   => undef,
256long        => undef,
257	      );
258
259# Erronous operand values
260#
261my %badlist = (
262comment_t   => [ '"multi-line\ncomment"' ],  # no \n
263none        => [ '"spurious arg"'  ],
264svindex     => [ 0xffffffff * 2, -1 ],
265opindex     => [ 0xffffffff * 2, -2 ],
266pvindex     => [ 0xffffffff * 2, -3 ],
267U32         => [ 0xffffffff * 2, -4 ],
268U16         => [ 0x5ffff, -5 ],
269U8          => [ 0x6ff, -6 ],
270PV          => [ 'no quote"' ],
271I32         => [ -0x80000001, 0x80000000 ],
272IV64        => undef, # PUT_IV64 doesn't check - no integrity there
273IV          => $Config{ivsize} == 4 ?
274               [ -0x80000001, 0x80000000 ] : undef,
275NV          => undef, # PUT_NV accepts anything - it shouldn't, real-ly
276pvcontents  => [ '"spurious arg"' ],
277strconst    => [  'no quote"',  '"with NUL '."\0".' char"' ], # no NUL
278op_tr_array => undef, # op_pv_tr is no longer exactly 256 shorts
279PADOFFSET   => undef,
280long	     => undef,
281	      );
282
283
284# Determine all operand types from %Asmdata::insn_data
285#
286for my $opname ( keys( %insn_data ) ){
287    my ( $opcode, $put, $getname ) = @{$insn_data{$opname}};
288    push( @{$opsByType{$getname}}, $opcode );
289    $code2name[$opcode] = $opname;
290}
291
292
293# Write instruction(s) for correct operand values each operand type class
294#
295$lineno = 0;
296tie( *ASM, 'VirtFile' );
297gen_type( \%goodlist, \@descr, 'round trip' );
298
299# Write one instruction for each opcode.
300#
301for my $opcode ( 0..$#code2name ){
302    next unless defined( $code2name[$opcode] );
303    my $sel = $insn_data{$code2name[$opcode]}->[2];
304    $sel =~ s/^GET_//;
305    die( "no operand list for $sel\n" ) unless exists( $goodlist{$sel} );
306    if( defined( $goodlist{$sel} ) ){
307        ++$lineno;
308        if( @{$goodlist{$sel}} ){
309            my $od = $goodlist{$sel}[0];
310            $descr[$lineno] = "round trip: $code2name[$opcode] $od";
311            print ASM "$code2name[$opcode] $od\n";
312            printf "%5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg;
313        } else {
314            $descr[$lineno] = "round trip: $code2name[$opcode]";
315            print ASM "$code2name[$opcode]\n";
316            printf "%5d %s\n", $lineno, $code2name[$opcode] if $dbg;
317	}
318    }
319}
320
321# Write instruction(s) for incorrect operand values each operand type class
322#
323$firstbadline = $lineno + 1;
324gen_type( \%badlist, \@descr, 'asm error' );
325
326# invalid opcode is an odd-man-out ;-)
327#
328++$lineno;
329$descr[$lineno] = "asm error: Gollum";
330print ASM "Gollum\n";
331printf "%5d %s\n", $lineno, 'Gollum' if $dbg;
332
333close( ASM );
334
335# Now that we have defined all of our tests: plan
336#
337plan( tests => $lineno );
338print "firstbadline=$firstbadline\n" if $dbg;
339
340# assemble (guard against warnings and death from assembly errors)
341#
342$SIG{'__WARN__'} = \&catchwarn;
343
344$lineno = -1; # account for the assembly header
345tie( *OBJ, 'VirtFile' );
346eval { assemble_fh( \*ASM, \&putobj ); };
347print "eval: $@" if $dbg;
348close( ASM );
349close( OBJ );
350$SIG{'__WARN__'} = 'DEFAULT';
351
352# disassemble
353#
354print "--- disassembling ---\n" if $dbg;
355$lineno = 0;
356tie( *DIS, 'VirtFile' );
357disassemble_fh( \*OBJ, \&putdis );
358close( OBJ );
359close( DIS );
360
361# get header (for debugging only)
362#
363if( $dbg ){
364    my( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder ) =
365        get_header();
366    printf "Magic:        0x%08x\n", $magic;
367    print  "Architecture: $archname\n";
368    print  "Byteloader V: $blversion\n";
369    print  "ivsize:       $ivsize\n";
370    print  "ptrsize:      $ptrsize\n";
371    print  "Byteorder:    $byteorder\n";
372}
373
374# check by comparing files line by line
375#
376print "--- checking ---\n" if $dbg;
377$lineno = 0;
378my( $asmline, $disline );
379while( defined( $asmline = <ASM> ) ){
380    $disline = <DIS>;
381    ++$lineno;
382    last if $lineno eq $firstbadline; # bail out where errors begin
383    ok( $asmline eq $disline, $descr[$lineno] );
384    printf "%5d %s\n", $lineno, $asmline if $dbg;
385}
386close( ASM );
387close( DIS );
388
389__END__
390