1 /* $OpenBSD: optimize.c,v 1.12 2006/04/02 21:38:57 djm Exp $ */
2
3 /*
4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that: (1) source code distributions
9 * retain the above copyright notice and this paragraph in its entirety, (2)
10 * distributions including binary code include the above copyright notice and
11 * this paragraph in its entirety in the documentation or other materials
12 * provided with the distribution, and (3) all advertising materials mentioning
13 * features or use of this software display the following acknowledgement:
14 * ``This product includes software developed by the University of California,
15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16 * the University nor the names of its contributors may be used to endorse
17 * or promote products derived from this software without specific prior
18 * written permission.
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22 *
23 * Optimization module for tcpdump intermediate representation.
24 */
25
26 #include <sys/types.h>
27 #include <sys/time.h>
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <memory.h>
32
33 #include "pcap-int.h"
34
35 #include "gencode.h"
36
37 #ifdef HAVE_OS_PROTO_H
38 #include "os-proto.h"
39 #endif
40
41 #ifdef BDEBUG
42 extern int dflag;
43 #endif
44
45 #define A_ATOM BPF_MEMWORDS
46 #define X_ATOM (BPF_MEMWORDS+1)
47
48 #define NOP -1
49
50 /*
51 * This define is used to represent *both* the accumulator and
52 * x register in use-def computations.
53 * Currently, the use-def code assumes only one definition per instruction.
54 */
55 #define AX_ATOM N_ATOMS
56
57 /*
58 * A flag to indicate that further optimization is needed.
59 * Iterative passes are continued until a given pass yields no
60 * branch movement.
61 */
62 static int done;
63
64 /*
65 * A block is marked if only if its mark equals the current mark.
66 * Rather than traverse the code array, marking each item, 'cur_mark' is
67 * incremented. This automatically makes each element unmarked.
68 */
69 static int cur_mark;
70 #define isMarked(p) ((p)->mark == cur_mark)
71 #define unMarkAll() cur_mark += 1
72 #define Mark(p) ((p)->mark = cur_mark)
73
74 static void opt_init(struct block *);
75 static void opt_cleanup(void);
76
77 static void make_marks(struct block *);
78 static void mark_code(struct block *);
79
80 static void intern_blocks(struct block *);
81
82 static int eq_slist(struct slist *, struct slist *);
83
84 static void find_levels_r(struct block *);
85
86 static void find_levels(struct block *);
87 static void find_dom(struct block *);
88 static void propedom(struct edge *);
89 static void find_edom(struct block *);
90 static void find_closure(struct block *);
91 static int atomuse(struct stmt *);
92 static int atomdef(struct stmt *);
93 static void compute_local_ud(struct block *);
94 static void find_ud(struct block *);
95 static void init_val(void);
96 static int F(int, int, int);
97 static __inline void vstore(struct stmt *, int *, int, int);
98 static void opt_blk(struct block *, int);
99 static int use_conflict(struct block *, struct block *);
100 static void opt_j(struct edge *);
101 static void or_pullup(struct block *);
102 static void and_pullup(struct block *);
103 static void opt_blks(struct block *, int);
104 static __inline void link_inedge(struct edge *, struct block *);
105 static void find_inedges(struct block *);
106 static void opt_root(struct block **);
107 static void opt_loop(struct block *, int);
108 static void fold_op(struct stmt *, int, int);
109 static __inline struct slist *this_op(struct slist *);
110 static void opt_not(struct block *);
111 static void opt_peep(struct block *);
112 static void opt_stmt(struct stmt *, int[], int);
113 static void deadstmt(struct stmt *, struct stmt *[]);
114 static void opt_deadstores(struct block *);
115 static void opt_blk(struct block *, int);
116 static int use_conflict(struct block *, struct block *);
117 static void opt_j(struct edge *);
118 static struct block *fold_edge(struct block *, struct edge *);
119 static __inline int eq_blk(struct block *, struct block *);
120 static int slength(struct slist *);
121 static int count_blocks(struct block *);
122 static void number_blks_r(struct block *);
123 static int count_stmts(struct block *);
124 static int convert_code_r(struct block *);
125 #ifdef BDEBUG
126 static void opt_dump(struct block *);
127 #endif
128
129 static int n_blocks;
130 struct block **blocks;
131 static int n_edges;
132 struct edge **edges;
133
134 /*
135 * A bit vector set representation of the dominators.
136 * We round up the set size to the next power of two.
137 */
138 static int nodewords;
139 static int edgewords;
140 struct block **levels;
141 bpf_u_int32 *space;
142 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
143 /*
144 * True if a is in uset {p}
145 */
146 #define SET_MEMBER(p, a) \
147 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
148
149 /*
150 * Add 'a' to uset p.
151 */
152 #define SET_INSERT(p, a) \
153 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
154
155 /*
156 * Delete 'a' from uset p.
157 */
158 #define SET_DELETE(p, a) \
159 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
160
161 /*
162 * a := a intersect b
163 */
164 #define SET_INTERSECT(a, b, n)\
165 {\
166 register bpf_u_int32 *_x = a, *_y = b;\
167 register int _n = n;\
168 while (--_n >= 0) *_x++ &= *_y++;\
169 }
170
171 /*
172 * a := a - b
173 */
174 #define SET_SUBTRACT(a, b, n)\
175 {\
176 register bpf_u_int32 *_x = a, *_y = b;\
177 register int _n = n;\
178 while (--_n >= 0) *_x++ &=~ *_y++;\
179 }
180
181 /*
182 * a := a union b
183 */
184 #define SET_UNION(a, b, n)\
185 {\
186 register bpf_u_int32 *_x = a, *_y = b;\
187 register int _n = n;\
188 while (--_n >= 0) *_x++ |= *_y++;\
189 }
190
191 static uset all_dom_sets;
192 static uset all_closure_sets;
193 static uset all_edge_sets;
194
195 #ifndef MAX
196 #define MAX(a,b) ((a)>(b)?(a):(b))
197 #endif
198
199 static void
find_levels_r(b)200 find_levels_r(b)
201 struct block *b;
202 {
203 int level;
204
205 if (isMarked(b))
206 return;
207
208 Mark(b);
209 b->link = 0;
210
211 if (JT(b)) {
212 find_levels_r(JT(b));
213 find_levels_r(JF(b));
214 level = MAX(JT(b)->level, JF(b)->level) + 1;
215 } else
216 level = 0;
217 b->level = level;
218 b->link = levels[level];
219 levels[level] = b;
220 }
221
222 /*
223 * Level graph. The levels go from 0 at the leaves to
224 * N_LEVELS at the root. The levels[] array points to the
225 * first node of the level list, whose elements are linked
226 * with the 'link' field of the struct block.
227 */
228 static void
find_levels(root)229 find_levels(root)
230 struct block *root;
231 {
232 memset((char *)levels, 0, n_blocks * sizeof(*levels));
233 unMarkAll();
234 find_levels_r(root);
235 }
236
237 /*
238 * Find dominator relationships.
239 * Assumes graph has been leveled.
240 */
241 static void
find_dom(root)242 find_dom(root)
243 struct block *root;
244 {
245 int i;
246 struct block *b;
247 bpf_u_int32 *x;
248
249 /*
250 * Initialize sets to contain all nodes.
251 */
252 x = all_dom_sets;
253 i = n_blocks * nodewords;
254 while (--i >= 0)
255 *x++ = ~0;
256 /* Root starts off empty. */
257 for (i = nodewords; --i >= 0;)
258 root->dom[i] = 0;
259
260 /* root->level is the highest level no found. */
261 for (i = root->level; i >= 0; --i) {
262 for (b = levels[i]; b; b = b->link) {
263 SET_INSERT(b->dom, b->id);
264 if (JT(b) == 0)
265 continue;
266 SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
267 SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
268 }
269 }
270 }
271
272 static void
propedom(ep)273 propedom(ep)
274 struct edge *ep;
275 {
276 SET_INSERT(ep->edom, ep->id);
277 if (ep->succ) {
278 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
279 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
280 }
281 }
282
283 /*
284 * Compute edge dominators.
285 * Assumes graph has been leveled and predecessors established.
286 */
287 static void
find_edom(root)288 find_edom(root)
289 struct block *root;
290 {
291 int i;
292 uset x;
293 struct block *b;
294
295 x = all_edge_sets;
296 for (i = n_edges * edgewords; --i >= 0; )
297 x[i] = ~0;
298
299 /* root->level is the highest level no found. */
300 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
301 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
302 for (i = root->level; i >= 0; --i) {
303 for (b = levels[i]; b != 0; b = b->link) {
304 propedom(&b->et);
305 propedom(&b->ef);
306 }
307 }
308 }
309
310 /*
311 * Find the backwards transitive closure of the flow graph. These sets
312 * are backwards in the sense that we find the set of nodes that reach
313 * a given node, not the set of nodes that can be reached by a node.
314 *
315 * Assumes graph has been leveled.
316 */
317 static void
find_closure(root)318 find_closure(root)
319 struct block *root;
320 {
321 int i;
322 struct block *b;
323
324 /*
325 * Initialize sets to contain no nodes.
326 */
327 memset((char *)all_closure_sets, 0,
328 n_blocks * nodewords * sizeof(*all_closure_sets));
329
330 /* root->level is the highest level no found. */
331 for (i = root->level; i >= 0; --i) {
332 for (b = levels[i]; b; b = b->link) {
333 SET_INSERT(b->closure, b->id);
334 if (JT(b) == 0)
335 continue;
336 SET_UNION(JT(b)->closure, b->closure, nodewords);
337 SET_UNION(JF(b)->closure, b->closure, nodewords);
338 }
339 }
340 }
341
342 /*
343 * Return the register number that is used by s. If A and X are both
344 * used, return AX_ATOM. If no register is used, return -1.
345 *
346 * The implementation should probably change to an array access.
347 */
348 static int
atomuse(s)349 atomuse(s)
350 struct stmt *s;
351 {
352 register int c = s->code;
353
354 if (c == NOP)
355 return -1;
356
357 switch (BPF_CLASS(c)) {
358
359 case BPF_RET:
360 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
361 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
362
363 case BPF_LD:
364 case BPF_LDX:
365 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
366 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
367
368 case BPF_ST:
369 return A_ATOM;
370
371 case BPF_STX:
372 return X_ATOM;
373
374 case BPF_JMP:
375 case BPF_ALU:
376 if (BPF_SRC(c) == BPF_X)
377 return AX_ATOM;
378 return A_ATOM;
379
380 case BPF_MISC:
381 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
382 }
383 abort();
384 /* NOTREACHED */
385 }
386
387 /*
388 * Return the register number that is defined by 's'. We assume that
389 * a single stmt cannot define more than one register. If no register
390 * is defined, return -1.
391 *
392 * The implementation should probably change to an array access.
393 */
394 static int
atomdef(s)395 atomdef(s)
396 struct stmt *s;
397 {
398 if (s->code == NOP)
399 return -1;
400
401 switch (BPF_CLASS(s->code)) {
402
403 case BPF_LD:
404 case BPF_ALU:
405 return A_ATOM;
406
407 case BPF_LDX:
408 return X_ATOM;
409
410 case BPF_ST:
411 case BPF_STX:
412 return s->k;
413
414 case BPF_MISC:
415 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
416 }
417 return -1;
418 }
419
420 static void
compute_local_ud(b)421 compute_local_ud(b)
422 struct block *b;
423 {
424 struct slist *s;
425 atomset def = 0, use = 0, kill = 0;
426 int atom;
427
428 for (s = b->stmts; s; s = s->next) {
429 if (s->s.code == NOP)
430 continue;
431 atom = atomuse(&s->s);
432 if (atom >= 0) {
433 if (atom == AX_ATOM) {
434 if (!ATOMELEM(def, X_ATOM))
435 use |= ATOMMASK(X_ATOM);
436 if (!ATOMELEM(def, A_ATOM))
437 use |= ATOMMASK(A_ATOM);
438 }
439 else if (atom < N_ATOMS) {
440 if (!ATOMELEM(def, atom))
441 use |= ATOMMASK(atom);
442 }
443 else
444 abort();
445 }
446 atom = atomdef(&s->s);
447 if (atom >= 0) {
448 if (!ATOMELEM(use, atom))
449 kill |= ATOMMASK(atom);
450 def |= ATOMMASK(atom);
451 }
452 }
453 if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
454 use |= ATOMMASK(A_ATOM);
455
456 b->def = def;
457 b->kill = kill;
458 b->in_use = use;
459 }
460
461 /*
462 * Assume graph is already leveled.
463 */
464 static void
find_ud(root)465 find_ud(root)
466 struct block *root;
467 {
468 int i, maxlevel;
469 struct block *p;
470
471 /*
472 * root->level is the highest level no found;
473 * count down from there.
474 */
475 maxlevel = root->level;
476 for (i = maxlevel; i >= 0; --i)
477 for (p = levels[i]; p; p = p->link) {
478 compute_local_ud(p);
479 p->out_use = 0;
480 }
481
482 for (i = 1; i <= maxlevel; ++i) {
483 for (p = levels[i]; p; p = p->link) {
484 p->out_use |= JT(p)->in_use | JF(p)->in_use;
485 p->in_use |= p->out_use &~ p->kill;
486 }
487 }
488 }
489
490 /*
491 * These data structures are used in a Cocke and Shwarz style
492 * value numbering scheme. Since the flowgraph is acyclic,
493 * exit values can be propagated from a node's predecessors
494 * provided it is uniquely defined.
495 */
496 struct valnode {
497 int code;
498 int v0, v1;
499 int val;
500 struct valnode *next;
501 };
502
503 #define MODULUS 213
504 static struct valnode *hashtbl[MODULUS];
505 static int curval;
506 static int maxval;
507
508 /* Integer constants mapped with the load immediate opcode. */
509 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
510
511 struct vmapinfo {
512 int is_const;
513 bpf_int32 const_val;
514 };
515
516 struct vmapinfo *vmap;
517 struct valnode *vnode_base;
518 struct valnode *next_vnode;
519
520 static void
init_val()521 init_val()
522 {
523 curval = 0;
524 next_vnode = vnode_base;
525 memset((char *)vmap, 0, maxval * sizeof(*vmap));
526 memset((char *)hashtbl, 0, sizeof hashtbl);
527 }
528
529 /* Because we really don't have an IR, this stuff is a little messy. */
530 static int
F(code,v0,v1)531 F(code, v0, v1)
532 int code;
533 int v0, v1;
534 {
535 u_int hash;
536 int val;
537 struct valnode *p;
538
539 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
540 hash %= MODULUS;
541
542 for (p = hashtbl[hash]; p; p = p->next)
543 if (p->code == code && p->v0 == v0 && p->v1 == v1)
544 return p->val;
545
546 val = ++curval;
547 if (BPF_MODE(code) == BPF_IMM &&
548 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
549 vmap[val].const_val = v0;
550 vmap[val].is_const = 1;
551 }
552 p = next_vnode++;
553 p->val = val;
554 p->code = code;
555 p->v0 = v0;
556 p->v1 = v1;
557 p->next = hashtbl[hash];
558 hashtbl[hash] = p;
559
560 return val;
561 }
562
563 static __inline void
vstore(s,valp,newval,alter)564 vstore(s, valp, newval, alter)
565 struct stmt *s;
566 int *valp;
567 int newval;
568 int alter;
569 {
570 if (alter && *valp == newval)
571 s->code = NOP;
572 else
573 *valp = newval;
574 }
575
576 static void
fold_op(s,v0,v1)577 fold_op(s, v0, v1)
578 struct stmt *s;
579 int v0, v1;
580 {
581 bpf_int32 a, b;
582
583 a = vmap[v0].const_val;
584 b = vmap[v1].const_val;
585
586 switch (BPF_OP(s->code)) {
587 case BPF_ADD:
588 a += b;
589 break;
590
591 case BPF_SUB:
592 a -= b;
593 break;
594
595 case BPF_MUL:
596 a *= b;
597 break;
598
599 case BPF_DIV:
600 if (b == 0)
601 bpf_error("division by zero");
602 a /= b;
603 break;
604
605 case BPF_AND:
606 a &= b;
607 break;
608
609 case BPF_OR:
610 a |= b;
611 break;
612
613 case BPF_LSH:
614 a <<= b;
615 break;
616
617 case BPF_RSH:
618 a >>= b;
619 break;
620
621 case BPF_NEG:
622 a = -a;
623 break;
624
625 default:
626 abort();
627 }
628 s->k = a;
629 s->code = BPF_LD|BPF_IMM;
630 done = 0;
631 }
632
633 static __inline struct slist *
this_op(s)634 this_op(s)
635 struct slist *s;
636 {
637 while (s != 0 && s->s.code == NOP)
638 s = s->next;
639 return s;
640 }
641
642 static void
opt_not(b)643 opt_not(b)
644 struct block *b;
645 {
646 struct block *tmp = JT(b);
647
648 JT(b) = JF(b);
649 JF(b) = tmp;
650 }
651
652 static void
opt_peep(b)653 opt_peep(b)
654 struct block *b;
655 {
656 struct slist *s;
657 struct slist *next, *last;
658 int val;
659
660 s = b->stmts;
661 if (s == 0)
662 return;
663
664 last = s;
665 while (1) {
666 s = this_op(s);
667 if (s == 0)
668 break;
669 next = this_op(s->next);
670 if (next == 0)
671 break;
672 last = next;
673
674 /*
675 * st M[k] --> st M[k]
676 * ldx M[k] tax
677 */
678 if (s->s.code == BPF_ST &&
679 next->s.code == (BPF_LDX|BPF_MEM) &&
680 s->s.k == next->s.k) {
681 done = 0;
682 next->s.code = BPF_MISC|BPF_TAX;
683 }
684 /*
685 * ld #k --> ldx #k
686 * tax txa
687 */
688 if (s->s.code == (BPF_LD|BPF_IMM) &&
689 next->s.code == (BPF_MISC|BPF_TAX)) {
690 s->s.code = BPF_LDX|BPF_IMM;
691 next->s.code = BPF_MISC|BPF_TXA;
692 done = 0;
693 }
694 /*
695 * This is an ugly special case, but it happens
696 * when you say tcp[k] or udp[k] where k is a constant.
697 */
698 if (s->s.code == (BPF_LD|BPF_IMM)) {
699 struct slist *add, *tax, *ild;
700
701 /*
702 * Check that X isn't used on exit from this
703 * block (which the optimizer might cause).
704 * We know the code generator won't generate
705 * any local dependencies.
706 */
707 if (ATOMELEM(b->out_use, X_ATOM))
708 break;
709
710 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
711 add = next;
712 else
713 add = this_op(next->next);
714 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
715 break;
716
717 tax = this_op(add->next);
718 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
719 break;
720
721 ild = this_op(tax->next);
722 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
723 BPF_MODE(ild->s.code) != BPF_IND)
724 break;
725 /*
726 * XXX We need to check that X is not
727 * subsequently used. We know we can eliminate the
728 * accumulator modifications since it is defined
729 * by the last stmt of this sequence.
730 *
731 * We want to turn this sequence:
732 *
733 * (004) ldi #0x2 {s}
734 * (005) ldxms [14] {next} -- optional
735 * (006) addx {add}
736 * (007) tax {tax}
737 * (008) ild [x+0] {ild}
738 *
739 * into this sequence:
740 *
741 * (004) nop
742 * (005) ldxms [14]
743 * (006) nop
744 * (007) nop
745 * (008) ild [x+2]
746 *
747 */
748 ild->s.k += s->s.k;
749 s->s.code = NOP;
750 add->s.code = NOP;
751 tax->s.code = NOP;
752 done = 0;
753 }
754 s = next;
755 }
756 /*
757 * If we have a subtract to do a comparison, and the X register
758 * is a known constant, we can merge this value into the
759 * comparison.
760 */
761 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
762 !ATOMELEM(b->out_use, A_ATOM)) {
763 val = b->val[X_ATOM];
764 if (vmap[val].is_const) {
765 int op;
766
767 b->s.k += vmap[val].const_val;
768 op = BPF_OP(b->s.code);
769 if (op == BPF_JGT || op == BPF_JGE) {
770 struct block *t = JT(b);
771 JT(b) = JF(b);
772 JF(b) = t;
773 b->s.k += 0x80000000;
774 }
775 last->s.code = NOP;
776 done = 0;
777 } else if (b->s.k == 0) {
778 /*
779 * sub x -> nop
780 * j #0 j x
781 */
782 last->s.code = NOP;
783 b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
784 BPF_X;
785 done = 0;
786 }
787 }
788 /*
789 * Likewise, a constant subtract can be simplified.
790 */
791 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
792 !ATOMELEM(b->out_use, A_ATOM)) {
793 int op;
794
795 b->s.k += last->s.k;
796 last->s.code = NOP;
797 op = BPF_OP(b->s.code);
798 if (op == BPF_JGT || op == BPF_JGE) {
799 struct block *t = JT(b);
800 JT(b) = JF(b);
801 JF(b) = t;
802 b->s.k += 0x80000000;
803 }
804 done = 0;
805 }
806 /*
807 * and #k nop
808 * jeq #0 -> jset #k
809 */
810 if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
811 !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
812 b->s.k = last->s.k;
813 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
814 last->s.code = NOP;
815 done = 0;
816 opt_not(b);
817 }
818 /*
819 * If the accumulator is a known constant, we can compute the
820 * comparison result.
821 */
822 val = b->val[A_ATOM];
823 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
824 bpf_int32 v = vmap[val].const_val;
825 switch (BPF_OP(b->s.code)) {
826
827 case BPF_JEQ:
828 v = v == b->s.k;
829 break;
830
831 case BPF_JGT:
832 v = (unsigned)v > b->s.k;
833 break;
834
835 case BPF_JGE:
836 v = (unsigned)v >= b->s.k;
837 break;
838
839 case BPF_JSET:
840 v &= b->s.k;
841 break;
842
843 default:
844 abort();
845 }
846 if (JF(b) != JT(b))
847 done = 0;
848 if (v)
849 JF(b) = JT(b);
850 else
851 JT(b) = JF(b);
852 }
853 }
854
855 /*
856 * Compute the symbolic value of expression of 's', and update
857 * anything it defines in the value table 'val'. If 'alter' is true,
858 * do various optimizations. This code would be cleaner if symbolic
859 * evaluation and code transformations weren't folded together.
860 */
861 static void
opt_stmt(s,val,alter)862 opt_stmt(s, val, alter)
863 struct stmt *s;
864 int val[];
865 int alter;
866 {
867 int op;
868 int v;
869
870 switch (s->code) {
871
872 case BPF_LD|BPF_ABS|BPF_W:
873 case BPF_LD|BPF_ABS|BPF_H:
874 case BPF_LD|BPF_ABS|BPF_B:
875 v = F(s->code, s->k, 0L);
876 vstore(s, &val[A_ATOM], v, alter);
877 break;
878
879 case BPF_LD|BPF_IND|BPF_W:
880 case BPF_LD|BPF_IND|BPF_H:
881 case BPF_LD|BPF_IND|BPF_B:
882 v = val[X_ATOM];
883 if (alter && vmap[v].is_const) {
884 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
885 s->k += vmap[v].const_val;
886 v = F(s->code, s->k, 0L);
887 done = 0;
888 }
889 else
890 v = F(s->code, s->k, v);
891 vstore(s, &val[A_ATOM], v, alter);
892 break;
893
894 case BPF_LD|BPF_LEN:
895 v = F(s->code, 0L, 0L);
896 vstore(s, &val[A_ATOM], v, alter);
897 break;
898
899 case BPF_LD|BPF_IMM:
900 v = K(s->k);
901 vstore(s, &val[A_ATOM], v, alter);
902 break;
903
904 case BPF_LDX|BPF_IMM:
905 v = K(s->k);
906 vstore(s, &val[X_ATOM], v, alter);
907 break;
908
909 case BPF_LDX|BPF_MSH|BPF_B:
910 v = F(s->code, s->k, 0L);
911 vstore(s, &val[X_ATOM], v, alter);
912 break;
913
914 case BPF_ALU|BPF_NEG:
915 if (alter && vmap[val[A_ATOM]].is_const) {
916 s->code = BPF_LD|BPF_IMM;
917 s->k = -vmap[val[A_ATOM]].const_val;
918 val[A_ATOM] = K(s->k);
919 }
920 else
921 val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
922 break;
923
924 case BPF_ALU|BPF_ADD|BPF_K:
925 case BPF_ALU|BPF_SUB|BPF_K:
926 case BPF_ALU|BPF_MUL|BPF_K:
927 case BPF_ALU|BPF_DIV|BPF_K:
928 case BPF_ALU|BPF_AND|BPF_K:
929 case BPF_ALU|BPF_OR|BPF_K:
930 case BPF_ALU|BPF_LSH|BPF_K:
931 case BPF_ALU|BPF_RSH|BPF_K:
932 op = BPF_OP(s->code);
933 if (alter) {
934 if (s->k == 0) {
935 if (op == BPF_ADD || op == BPF_SUB ||
936 op == BPF_LSH || op == BPF_RSH ||
937 op == BPF_OR) {
938 s->code = NOP;
939 break;
940 }
941 if (op == BPF_MUL || op == BPF_AND) {
942 s->code = BPF_LD|BPF_IMM;
943 val[A_ATOM] = K(s->k);
944 break;
945 }
946 }
947 if (vmap[val[A_ATOM]].is_const) {
948 fold_op(s, val[A_ATOM], K(s->k));
949 val[A_ATOM] = K(s->k);
950 break;
951 }
952 }
953 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
954 break;
955
956 case BPF_ALU|BPF_ADD|BPF_X:
957 case BPF_ALU|BPF_SUB|BPF_X:
958 case BPF_ALU|BPF_MUL|BPF_X:
959 case BPF_ALU|BPF_DIV|BPF_X:
960 case BPF_ALU|BPF_AND|BPF_X:
961 case BPF_ALU|BPF_OR|BPF_X:
962 case BPF_ALU|BPF_LSH|BPF_X:
963 case BPF_ALU|BPF_RSH|BPF_X:
964 op = BPF_OP(s->code);
965 if (alter && vmap[val[X_ATOM]].is_const) {
966 if (vmap[val[A_ATOM]].is_const) {
967 fold_op(s, val[A_ATOM], val[X_ATOM]);
968 val[A_ATOM] = K(s->k);
969 }
970 else {
971 s->code = BPF_ALU|BPF_K|op;
972 s->k = vmap[val[X_ATOM]].const_val;
973 done = 0;
974 val[A_ATOM] =
975 F(s->code, val[A_ATOM], K(s->k));
976 }
977 break;
978 }
979 /*
980 * Check if we're doing something to an accumulator
981 * that is 0, and simplify. This may not seem like
982 * much of a simplification but it could open up further
983 * optimizations.
984 * XXX We could also check for mul by 1, and -1, etc.
985 */
986 if (alter && vmap[val[A_ATOM]].is_const
987 && vmap[val[A_ATOM]].const_val == 0) {
988 if (op == BPF_ADD || op == BPF_OR ||
989 op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
990 s->code = BPF_MISC|BPF_TXA;
991 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
992 break;
993 }
994 else if (op == BPF_MUL || op == BPF_DIV ||
995 op == BPF_AND) {
996 s->code = BPF_LD|BPF_IMM;
997 s->k = 0;
998 vstore(s, &val[A_ATOM], K(s->k), alter);
999 break;
1000 }
1001 else if (op == BPF_NEG) {
1002 s->code = NOP;
1003 break;
1004 }
1005 }
1006 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1007 break;
1008
1009 case BPF_MISC|BPF_TXA:
1010 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1011 break;
1012
1013 case BPF_LD|BPF_MEM:
1014 v = val[s->k];
1015 if (alter && vmap[v].is_const) {
1016 s->code = BPF_LD|BPF_IMM;
1017 s->k = vmap[v].const_val;
1018 done = 0;
1019 }
1020 vstore(s, &val[A_ATOM], v, alter);
1021 break;
1022
1023 case BPF_MISC|BPF_TAX:
1024 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1025 break;
1026
1027 case BPF_LDX|BPF_MEM:
1028 v = val[s->k];
1029 if (alter && vmap[v].is_const) {
1030 s->code = BPF_LDX|BPF_IMM;
1031 s->k = vmap[v].const_val;
1032 done = 0;
1033 }
1034 vstore(s, &val[X_ATOM], v, alter);
1035 break;
1036
1037 case BPF_ST:
1038 vstore(s, &val[s->k], val[A_ATOM], alter);
1039 break;
1040
1041 case BPF_STX:
1042 vstore(s, &val[s->k], val[X_ATOM], alter);
1043 break;
1044 }
1045 }
1046
1047 static void
deadstmt(s,last)1048 deadstmt(s, last)
1049 register struct stmt *s;
1050 register struct stmt *last[];
1051 {
1052 register int atom;
1053
1054 atom = atomuse(s);
1055 if (atom >= 0) {
1056 if (atom == AX_ATOM) {
1057 last[X_ATOM] = 0;
1058 last[A_ATOM] = 0;
1059 }
1060 else
1061 last[atom] = 0;
1062 }
1063 atom = atomdef(s);
1064 if (atom >= 0) {
1065 if (last[atom]) {
1066 done = 0;
1067 last[atom]->code = NOP;
1068 }
1069 last[atom] = s;
1070 }
1071 }
1072
1073 static void
opt_deadstores(b)1074 opt_deadstores(b)
1075 register struct block *b;
1076 {
1077 register struct slist *s;
1078 register int atom;
1079 struct stmt *last[N_ATOMS];
1080
1081 memset((char *)last, 0, sizeof last);
1082
1083 for (s = b->stmts; s != 0; s = s->next)
1084 deadstmt(&s->s, last);
1085 deadstmt(&b->s, last);
1086
1087 for (atom = 0; atom < N_ATOMS; ++atom)
1088 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1089 last[atom]->code = NOP;
1090 done = 0;
1091 }
1092 }
1093
1094 static void
opt_blk(b,do_stmts)1095 opt_blk(b, do_stmts)
1096 struct block *b;
1097 int do_stmts;
1098 {
1099 struct slist *s;
1100 struct edge *p;
1101 int i;
1102 bpf_int32 aval;
1103
1104 #if 0
1105 for (s = b->stmts; s && s->next; s = s->next)
1106 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1107 do_stmts = 0;
1108 break;
1109 }
1110 #endif
1111
1112 /*
1113 * Initialize the atom values.
1114 * If we have no predecessors, everything is undefined.
1115 * Otherwise, we inherent our values from our predecessors.
1116 * If any register has an ambiguous value (i.e. control paths are
1117 * merging) give it the undefined value of 0.
1118 */
1119 p = b->in_edges;
1120 if (p == 0)
1121 memset((char *)b->val, 0, sizeof(b->val));
1122 else {
1123 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1124 while ((p = p->next) != NULL) {
1125 for (i = 0; i < N_ATOMS; ++i)
1126 if (b->val[i] != p->pred->val[i])
1127 b->val[i] = 0;
1128 }
1129 }
1130 aval = b->val[A_ATOM];
1131 for (s = b->stmts; s; s = s->next)
1132 opt_stmt(&s->s, b->val, do_stmts);
1133
1134 /*
1135 * This is a special case: if we don't use anything from this
1136 * block, and we load the accumulator with value that is
1137 * already there, or if this block is a return,
1138 * eliminate all the statements.
1139 */
1140 if (do_stmts &&
1141 ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1142 BPF_CLASS(b->s.code) == BPF_RET)) {
1143 if (b->stmts != 0) {
1144 b->stmts = 0;
1145 done = 0;
1146 }
1147 } else {
1148 opt_peep(b);
1149 opt_deadstores(b);
1150 }
1151 /*
1152 * Set up values for branch optimizer.
1153 */
1154 if (BPF_SRC(b->s.code) == BPF_K)
1155 b->oval = K(b->s.k);
1156 else
1157 b->oval = b->val[X_ATOM];
1158 b->et.code = b->s.code;
1159 b->ef.code = -b->s.code;
1160 }
1161
1162 /*
1163 * Return true if any register that is used on exit from 'succ', has
1164 * an exit value that is different from the corresponding exit value
1165 * from 'b'.
1166 */
1167 static int
use_conflict(b,succ)1168 use_conflict(b, succ)
1169 struct block *b, *succ;
1170 {
1171 int atom;
1172 atomset use = succ->out_use;
1173
1174 if (use == 0)
1175 return 0;
1176
1177 for (atom = 0; atom < N_ATOMS; ++atom)
1178 if (ATOMELEM(use, atom))
1179 if (b->val[atom] != succ->val[atom])
1180 return 1;
1181 return 0;
1182 }
1183
1184 static struct block *
fold_edge(child,ep)1185 fold_edge(child, ep)
1186 struct block *child;
1187 struct edge *ep;
1188 {
1189 int sense;
1190 int aval0, aval1, oval0, oval1;
1191 int code = ep->code;
1192
1193 if (code < 0) {
1194 code = -code;
1195 sense = 0;
1196 } else
1197 sense = 1;
1198
1199 if (child->s.code != code)
1200 return 0;
1201
1202 aval0 = child->val[A_ATOM];
1203 oval0 = child->oval;
1204 aval1 = ep->pred->val[A_ATOM];
1205 oval1 = ep->pred->oval;
1206
1207 if (aval0 != aval1)
1208 return 0;
1209
1210 if (oval0 == oval1)
1211 /*
1212 * The operands are identical, so the
1213 * result is true if a true branch was
1214 * taken to get here, otherwise false.
1215 */
1216 return sense ? JT(child) : JF(child);
1217
1218 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1219 /*
1220 * At this point, we only know the comparison if we
1221 * came down the true branch, and it was an equality
1222 * comparison with a constant. We rely on the fact that
1223 * distinct constants have distinct value numbers.
1224 */
1225 return JF(child);
1226
1227 return 0;
1228 }
1229
1230 static void
opt_j(ep)1231 opt_j(ep)
1232 struct edge *ep;
1233 {
1234 register int i, k;
1235 register struct block *target;
1236
1237 if (JT(ep->succ) == 0)
1238 return;
1239
1240 if (JT(ep->succ) == JF(ep->succ)) {
1241 /*
1242 * Common branch targets can be eliminated, provided
1243 * there is no data dependency.
1244 */
1245 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1246 done = 0;
1247 ep->succ = JT(ep->succ);
1248 }
1249 }
1250 /*
1251 * For each edge dominator that matches the successor of this
1252 * edge, promote the edge successor to the its grandchild.
1253 *
1254 * XXX We violate the set abstraction here in favor a reasonably
1255 * efficient loop.
1256 */
1257 top:
1258 for (i = 0; i < edgewords; ++i) {
1259 register bpf_u_int32 x = ep->edom[i];
1260
1261 while (x != 0) {
1262 k = ffs(x) - 1;
1263 x &=~ (1 << k);
1264 k += i * BITS_PER_WORD;
1265
1266 target = fold_edge(ep->succ, edges[k]);
1267 /*
1268 * Check that there is no data dependency between
1269 * nodes that will be violated if we move the edge.
1270 */
1271 if (target != 0 && !use_conflict(ep->pred, target)) {
1272 done = 0;
1273 ep->succ = target;
1274 if (JT(target) != 0)
1275 /*
1276 * Start over unless we hit a leaf.
1277 */
1278 goto top;
1279 return;
1280 }
1281 }
1282 }
1283 }
1284
1285
1286 static void
or_pullup(b)1287 or_pullup(b)
1288 struct block *b;
1289 {
1290 int val, at_top;
1291 struct block *pull;
1292 struct block **diffp, **samep;
1293 struct edge *ep;
1294
1295 ep = b->in_edges;
1296 if (ep == 0)
1297 return;
1298
1299 /*
1300 * Make sure each predecessor loads the same value.
1301 * XXX why?
1302 */
1303 val = ep->pred->val[A_ATOM];
1304 for (ep = ep->next; ep != 0; ep = ep->next)
1305 if (val != ep->pred->val[A_ATOM])
1306 return;
1307
1308 if (JT(b->in_edges->pred) == b)
1309 diffp = &JT(b->in_edges->pred);
1310 else
1311 diffp = &JF(b->in_edges->pred);
1312
1313 at_top = 1;
1314 while (1) {
1315 if (*diffp == 0)
1316 return;
1317
1318 if (JT(*diffp) != JT(b))
1319 return;
1320
1321 if (!SET_MEMBER((*diffp)->dom, b->id))
1322 return;
1323
1324 if ((*diffp)->val[A_ATOM] != val)
1325 break;
1326
1327 diffp = &JF(*diffp);
1328 at_top = 0;
1329 }
1330 samep = &JF(*diffp);
1331 while (1) {
1332 if (*samep == 0)
1333 return;
1334
1335 if (JT(*samep) != JT(b))
1336 return;
1337
1338 if (!SET_MEMBER((*samep)->dom, b->id))
1339 return;
1340
1341 if ((*samep)->val[A_ATOM] == val)
1342 break;
1343
1344 /* XXX Need to check that there are no data dependencies
1345 between dp0 and dp1. Currently, the code generator
1346 will not produce such dependencies. */
1347 samep = &JF(*samep);
1348 }
1349 #ifdef notdef
1350 /* XXX This doesn't cover everything. */
1351 for (i = 0; i < N_ATOMS; ++i)
1352 if ((*samep)->val[i] != pred->val[i])
1353 return;
1354 #endif
1355 /* Pull up the node. */
1356 pull = *samep;
1357 *samep = JF(pull);
1358 JF(pull) = *diffp;
1359
1360 /*
1361 * At the top of the chain, each predecessor needs to point at the
1362 * pulled up node. Inside the chain, there is only one predecessor
1363 * to worry about.
1364 */
1365 if (at_top) {
1366 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1367 if (JT(ep->pred) == b)
1368 JT(ep->pred) = pull;
1369 else
1370 JF(ep->pred) = pull;
1371 }
1372 }
1373 else
1374 *diffp = pull;
1375
1376 done = 0;
1377 }
1378
1379 static void
and_pullup(b)1380 and_pullup(b)
1381 struct block *b;
1382 {
1383 int val, at_top;
1384 struct block *pull;
1385 struct block **diffp, **samep;
1386 struct edge *ep;
1387
1388 ep = b->in_edges;
1389 if (ep == 0)
1390 return;
1391
1392 /*
1393 * Make sure each predecessor loads the same value.
1394 */
1395 val = ep->pred->val[A_ATOM];
1396 for (ep = ep->next; ep != 0; ep = ep->next)
1397 if (val != ep->pred->val[A_ATOM])
1398 return;
1399
1400 if (JT(b->in_edges->pred) == b)
1401 diffp = &JT(b->in_edges->pred);
1402 else
1403 diffp = &JF(b->in_edges->pred);
1404
1405 at_top = 1;
1406 while (1) {
1407 if (*diffp == 0)
1408 return;
1409
1410 if (JF(*diffp) != JF(b))
1411 return;
1412
1413 if (!SET_MEMBER((*diffp)->dom, b->id))
1414 return;
1415
1416 if ((*diffp)->val[A_ATOM] != val)
1417 break;
1418
1419 diffp = &JT(*diffp);
1420 at_top = 0;
1421 }
1422 samep = &JT(*diffp);
1423 while (1) {
1424 if (*samep == 0)
1425 return;
1426
1427 if (JF(*samep) != JF(b))
1428 return;
1429
1430 if (!SET_MEMBER((*samep)->dom, b->id))
1431 return;
1432
1433 if ((*samep)->val[A_ATOM] == val)
1434 break;
1435
1436 /* XXX Need to check that there are no data dependencies
1437 between diffp and samep. Currently, the code generator
1438 will not produce such dependencies. */
1439 samep = &JT(*samep);
1440 }
1441 #ifdef notdef
1442 /* XXX This doesn't cover everything. */
1443 for (i = 0; i < N_ATOMS; ++i)
1444 if ((*samep)->val[i] != pred->val[i])
1445 return;
1446 #endif
1447 /* Pull up the node. */
1448 pull = *samep;
1449 *samep = JT(pull);
1450 JT(pull) = *diffp;
1451
1452 /*
1453 * At the top of the chain, each predecessor needs to point at the
1454 * pulled up node. Inside the chain, there is only one predecessor
1455 * to worry about.
1456 */
1457 if (at_top) {
1458 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1459 if (JT(ep->pred) == b)
1460 JT(ep->pred) = pull;
1461 else
1462 JF(ep->pred) = pull;
1463 }
1464 }
1465 else
1466 *diffp = pull;
1467
1468 done = 0;
1469 }
1470
1471 static void
opt_blks(root,do_stmts)1472 opt_blks(root, do_stmts)
1473 struct block *root;
1474 int do_stmts;
1475 {
1476 int i, maxlevel;
1477 struct block *p;
1478
1479 init_val();
1480 maxlevel = root->level;
1481 for (i = maxlevel; i >= 0; --i)
1482 for (p = levels[i]; p; p = p->link)
1483 opt_blk(p, do_stmts);
1484
1485 if (do_stmts)
1486 /*
1487 * No point trying to move branches; it can't possibly
1488 * make a difference at this point.
1489 */
1490 return;
1491
1492 for (i = 1; i <= maxlevel; ++i) {
1493 for (p = levels[i]; p; p = p->link) {
1494 opt_j(&p->et);
1495 opt_j(&p->ef);
1496 }
1497 }
1498 for (i = 1; i <= maxlevel; ++i) {
1499 for (p = levels[i]; p; p = p->link) {
1500 or_pullup(p);
1501 and_pullup(p);
1502 }
1503 }
1504 }
1505
1506 static __inline void
link_inedge(parent,child)1507 link_inedge(parent, child)
1508 struct edge *parent;
1509 struct block *child;
1510 {
1511 parent->next = child->in_edges;
1512 child->in_edges = parent;
1513 }
1514
1515 static void
find_inedges(root)1516 find_inedges(root)
1517 struct block *root;
1518 {
1519 int i;
1520 struct block *b;
1521
1522 for (i = 0; i < n_blocks; ++i)
1523 blocks[i]->in_edges = 0;
1524
1525 /*
1526 * Traverse the graph, adding each edge to the predecessor
1527 * list of its successors. Skip the leaves (i.e. level 0).
1528 */
1529 for (i = root->level; i > 0; --i) {
1530 for (b = levels[i]; b != 0; b = b->link) {
1531 link_inedge(&b->et, JT(b));
1532 link_inedge(&b->ef, JF(b));
1533 }
1534 }
1535 }
1536
1537 static void
opt_root(b)1538 opt_root(b)
1539 struct block **b;
1540 {
1541 struct slist *tmp, *s;
1542
1543 s = (*b)->stmts;
1544 (*b)->stmts = 0;
1545 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1546 *b = JT(*b);
1547
1548 tmp = (*b)->stmts;
1549 if (tmp != 0)
1550 sappend(s, tmp);
1551 (*b)->stmts = s;
1552
1553 /*
1554 * If the root node is a return, then there is no
1555 * point executing any statements (since the bpf machine
1556 * has no side effects).
1557 */
1558 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1559 (*b)->stmts = 0;
1560 }
1561
1562 static void
opt_loop(root,do_stmts)1563 opt_loop(root, do_stmts)
1564 struct block *root;
1565 int do_stmts;
1566 {
1567
1568 #ifdef BDEBUG
1569 if (dflag > 1)
1570 opt_dump(root);
1571 #endif
1572 do {
1573 done = 1;
1574 find_levels(root);
1575 find_dom(root);
1576 find_closure(root);
1577 find_inedges(root);
1578 find_ud(root);
1579 find_edom(root);
1580 opt_blks(root, do_stmts);
1581 #ifdef BDEBUG
1582 if (dflag > 1)
1583 opt_dump(root);
1584 #endif
1585 } while (!done);
1586 }
1587
1588 /*
1589 * Optimize the filter code in its dag representation.
1590 */
1591 void
bpf_optimize(rootp)1592 bpf_optimize(rootp)
1593 struct block **rootp;
1594 {
1595 struct block *root;
1596
1597 root = *rootp;
1598
1599 opt_init(root);
1600 opt_loop(root, 0);
1601 opt_loop(root, 1);
1602 intern_blocks(root);
1603 opt_root(rootp);
1604 opt_cleanup();
1605 }
1606
1607 static void
make_marks(p)1608 make_marks(p)
1609 struct block *p;
1610 {
1611 if (!isMarked(p)) {
1612 Mark(p);
1613 if (BPF_CLASS(p->s.code) != BPF_RET) {
1614 make_marks(JT(p));
1615 make_marks(JF(p));
1616 }
1617 }
1618 }
1619
1620 /*
1621 * Mark code array such that isMarked(i) is true
1622 * only for nodes that are alive.
1623 */
1624 static void
mark_code(p)1625 mark_code(p)
1626 struct block *p;
1627 {
1628 cur_mark += 1;
1629 make_marks(p);
1630 }
1631
1632 /*
1633 * True iff the two stmt lists load the same value from the packet into
1634 * the accumulator.
1635 */
1636 static int
eq_slist(x,y)1637 eq_slist(x, y)
1638 struct slist *x, *y;
1639 {
1640 while (1) {
1641 while (x && x->s.code == NOP)
1642 x = x->next;
1643 while (y && y->s.code == NOP)
1644 y = y->next;
1645 if (x == 0)
1646 return y == 0;
1647 if (y == 0)
1648 return x == 0;
1649 if (x->s.code != y->s.code || x->s.k != y->s.k)
1650 return 0;
1651 x = x->next;
1652 y = y->next;
1653 }
1654 }
1655
1656 static __inline int
eq_blk(b0,b1)1657 eq_blk(b0, b1)
1658 struct block *b0, *b1;
1659 {
1660 if (b0->s.code == b1->s.code &&
1661 b0->s.k == b1->s.k &&
1662 b0->et.succ == b1->et.succ &&
1663 b0->ef.succ == b1->ef.succ)
1664 return eq_slist(b0->stmts, b1->stmts);
1665 return 0;
1666 }
1667
1668 static void
intern_blocks(root)1669 intern_blocks(root)
1670 struct block *root;
1671 {
1672 struct block *p;
1673 int i, j;
1674 int done;
1675 top:
1676 done = 1;
1677 for (i = 0; i < n_blocks; ++i)
1678 blocks[i]->link = 0;
1679
1680 mark_code(root);
1681
1682 for (i = n_blocks - 1; --i >= 0; ) {
1683 if (!isMarked(blocks[i]))
1684 continue;
1685 for (j = i + 1; j < n_blocks; ++j) {
1686 if (!isMarked(blocks[j]))
1687 continue;
1688 if (eq_blk(blocks[i], blocks[j])) {
1689 blocks[i]->link = blocks[j]->link ?
1690 blocks[j]->link : blocks[j];
1691 break;
1692 }
1693 }
1694 }
1695 for (i = 0; i < n_blocks; ++i) {
1696 p = blocks[i];
1697 if (JT(p) == 0)
1698 continue;
1699 if (JT(p)->link) {
1700 done = 0;
1701 JT(p) = JT(p)->link;
1702 }
1703 if (JF(p)->link) {
1704 done = 0;
1705 JF(p) = JF(p)->link;
1706 }
1707 }
1708 if (!done)
1709 goto top;
1710 }
1711
1712 static void
opt_cleanup()1713 opt_cleanup()
1714 {
1715 free((void *)vnode_base);
1716 free((void *)vmap);
1717 free((void *)edges);
1718 free((void *)space);
1719 free((void *)levels);
1720 free((void *)blocks);
1721 }
1722
1723 /*
1724 * Return the number of stmts in 's'.
1725 */
1726 static int
slength(s)1727 slength(s)
1728 struct slist *s;
1729 {
1730 int n = 0;
1731
1732 for (; s; s = s->next)
1733 if (s->s.code != NOP)
1734 ++n;
1735 return n;
1736 }
1737
1738 /*
1739 * Return the number of nodes reachable by 'p'.
1740 * All nodes should be initially unmarked.
1741 */
1742 static int
count_blocks(p)1743 count_blocks(p)
1744 struct block *p;
1745 {
1746 if (p == 0 || isMarked(p))
1747 return 0;
1748 Mark(p);
1749 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1750 }
1751
1752 /*
1753 * Do a depth first search on the flow graph, numbering the
1754 * the basic blocks, and entering them into the 'blocks' array.`
1755 */
1756 static void
number_blks_r(p)1757 number_blks_r(p)
1758 struct block *p;
1759 {
1760 int n;
1761
1762 if (p == 0 || isMarked(p))
1763 return;
1764
1765 Mark(p);
1766 n = n_blocks++;
1767 p->id = n;
1768 blocks[n] = p;
1769
1770 number_blks_r(JT(p));
1771 number_blks_r(JF(p));
1772 }
1773
1774 /*
1775 * Return the number of stmts in the flowgraph reachable by 'p'.
1776 * The nodes should be unmarked before calling.
1777 */
1778 static int
count_stmts(p)1779 count_stmts(p)
1780 struct block *p;
1781 {
1782 int n;
1783
1784 if (p == 0 || isMarked(p))
1785 return 0;
1786 Mark(p);
1787 n = count_stmts(JT(p)) + count_stmts(JF(p));
1788 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1789 }
1790
1791 /*
1792 * Allocate memory. All allocation is done before optimization
1793 * is begun. A linear bound on the size of all data structures is computed
1794 * from the total number of blocks and/or statements.
1795 */
1796 static void
opt_init(root)1797 opt_init(root)
1798 struct block *root;
1799 {
1800 bpf_u_int32 *p;
1801 int i, n, max_stmts;
1802
1803 /*
1804 * First, count the blocks, so we can malloc an array to map
1805 * block number to block. Then, put the blocks into the array.
1806 */
1807 unMarkAll();
1808 n = count_blocks(root);
1809 blocks = (struct block **)malloc(n * sizeof(*blocks));
1810 if (blocks == NULL)
1811 bpf_error("malloc");
1812
1813 unMarkAll();
1814 n_blocks = 0;
1815 number_blks_r(root);
1816
1817 n_edges = 2 * n_blocks;
1818 edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1819 if (edges == NULL)
1820 bpf_error("malloc");
1821
1822 /*
1823 * The number of levels is bounded by the number of nodes.
1824 */
1825 levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1826 if (levels == NULL)
1827 bpf_error("malloc");
1828
1829 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1830 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1831
1832 /* XXX */
1833 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1834 + n_edges * edgewords * sizeof(*space));
1835 if (space == NULL)
1836 bpf_error("malloc");
1837
1838 p = space;
1839 all_dom_sets = p;
1840 for (i = 0; i < n; ++i) {
1841 blocks[i]->dom = p;
1842 p += nodewords;
1843 }
1844 all_closure_sets = p;
1845 for (i = 0; i < n; ++i) {
1846 blocks[i]->closure = p;
1847 p += nodewords;
1848 }
1849 all_edge_sets = p;
1850 for (i = 0; i < n; ++i) {
1851 register struct block *b = blocks[i];
1852
1853 b->et.edom = p;
1854 p += edgewords;
1855 b->ef.edom = p;
1856 p += edgewords;
1857 b->et.id = i;
1858 edges[i] = &b->et;
1859 b->ef.id = n_blocks + i;
1860 edges[n_blocks + i] = &b->ef;
1861 b->et.pred = b;
1862 b->ef.pred = b;
1863 }
1864 max_stmts = 0;
1865 for (i = 0; i < n; ++i)
1866 max_stmts += slength(blocks[i]->stmts) + 1;
1867 /*
1868 * We allocate at most 3 value numbers per statement,
1869 * so this is an upper bound on the number of valnodes
1870 * we'll need.
1871 */
1872 maxval = 3 * max_stmts;
1873 vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1874 vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
1875 if (vmap == NULL || vnode_base == NULL)
1876 bpf_error("malloc");
1877 }
1878
1879 /*
1880 * Some pointers used to convert the basic block form of the code,
1881 * into the array form that BPF requires. 'fstart' will point to
1882 * the malloc'd array while 'ftail' is used during the recursive traversal.
1883 */
1884 static struct bpf_insn *fstart;
1885 static struct bpf_insn *ftail;
1886
1887 #ifdef BDEBUG
1888 int bids[1000];
1889 #endif
1890
1891 /*
1892 * Returns true if successful. Returns false if a branch has
1893 * an offset that is too large. If so, we have marked that
1894 * branch so that on a subsequent iteration, it will be treated
1895 * properly.
1896 */
1897 static int
convert_code_r(p)1898 convert_code_r(p)
1899 struct block *p;
1900 {
1901 struct bpf_insn *dst;
1902 struct slist *src;
1903 int slen;
1904 u_int off;
1905 int extrajmps; /* number of extra jumps inserted */
1906 struct slist **offset = NULL;
1907
1908 if (p == 0 || isMarked(p))
1909 return (1);
1910 Mark(p);
1911
1912 if (convert_code_r(JF(p)) == 0)
1913 return (0);
1914 if (convert_code_r(JT(p)) == 0)
1915 return (0);
1916
1917 slen = slength(p->stmts);
1918 dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1919 /* inflate length by any extra jumps */
1920
1921 p->offset = dst - fstart;
1922
1923 /* generate offset[] for convenience */
1924 if (slen) {
1925 offset = calloc(slen, sizeof(struct slist *));
1926 if (!offset) {
1927 bpf_error("not enough core");
1928 /*NOTREACHED*/
1929 }
1930 }
1931 src = p->stmts;
1932 for (off = 0; off < slen && src; off++) {
1933 #if 0
1934 printf("off=%d src=%x\n", off, src);
1935 #endif
1936 offset[off] = src;
1937 src = src->next;
1938 }
1939
1940 off = 0;
1941 for (src = p->stmts; src; src = src->next) {
1942 if (src->s.code == NOP)
1943 continue;
1944 dst->code = (u_short)src->s.code;
1945 dst->k = src->s.k;
1946
1947 /* fill block-local relative jump */
1948 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1949 #if 0
1950 if (src->s.jt || src->s.jf) {
1951 bpf_error("illegal jmp destination");
1952 /*NOTREACHED*/
1953 }
1954 #endif
1955 goto filled;
1956 }
1957 if (off == slen - 2) /*???*/
1958 goto filled;
1959
1960 {
1961 int i;
1962 int jt, jf;
1963 char *ljerr = "%s for block-local relative jump: off=%d";
1964
1965 #if 0
1966 printf("code=%x off=%d %x %x\n", src->s.code,
1967 off, src->s.jt, src->s.jf);
1968 #endif
1969
1970 if (!src->s.jt || !src->s.jf) {
1971 bpf_error(ljerr, "no jmp destination", off);
1972 /*NOTREACHED*/
1973 }
1974
1975 jt = jf = 0;
1976 for (i = 0; i < slen; i++) {
1977 if (offset[i] == src->s.jt) {
1978 if (jt) {
1979 bpf_error(ljerr, "multiple matches", off);
1980 /*NOTREACHED*/
1981 }
1982
1983 dst->jt = i - off - 1;
1984 jt++;
1985 }
1986 if (offset[i] == src->s.jf) {
1987 if (jf) {
1988 bpf_error(ljerr, "multiple matches", off);
1989 /*NOTREACHED*/
1990 }
1991 dst->jf = i - off - 1;
1992 jf++;
1993 }
1994 }
1995 if (!jt || !jf) {
1996 bpf_error(ljerr, "no destination found", off);
1997 /*NOTREACHED*/
1998 }
1999 }
2000 filled:
2001 ++dst;
2002 ++off;
2003 }
2004 if (offset)
2005 free(offset);
2006
2007 #ifdef BDEBUG
2008 bids[dst - fstart] = p->id + 1;
2009 #endif
2010 dst->code = (u_short)p->s.code;
2011 dst->k = p->s.k;
2012 if (JT(p)) {
2013 extrajmps = 0;
2014 off = JT(p)->offset - (p->offset + slen) - 1;
2015 if (off >= 256) {
2016 /* offset too large for branch, must add a jump */
2017 if (p->longjt == 0) {
2018 /* mark this instruction and retry */
2019 p->longjt++;
2020 return(0);
2021 }
2022 /* branch if T to following jump */
2023 dst->jt = extrajmps;
2024 extrajmps++;
2025 dst[extrajmps].code = BPF_JMP|BPF_JA;
2026 dst[extrajmps].k = off - extrajmps;
2027 }
2028 else
2029 dst->jt = off;
2030 off = JF(p)->offset - (p->offset + slen) - 1;
2031 if (off >= 256) {
2032 /* offset too large for branch, must add a jump */
2033 if (p->longjf == 0) {
2034 /* mark this instruction and retry */
2035 p->longjf++;
2036 return(0);
2037 }
2038 /* branch if F to following jump */
2039 /* if two jumps are inserted, F goes to second one */
2040 dst->jf = extrajmps;
2041 extrajmps++;
2042 dst[extrajmps].code = BPF_JMP|BPF_JA;
2043 dst[extrajmps].k = off - extrajmps;
2044 }
2045 else
2046 dst->jf = off;
2047 }
2048 return (1);
2049 }
2050
2051
2052 /*
2053 * Convert flowgraph intermediate representation to the
2054 * BPF array representation. Set *lenp to the number of instructions.
2055 */
2056 struct bpf_insn *
icode_to_fcode(root,lenp)2057 icode_to_fcode(root, lenp)
2058 struct block *root;
2059 int *lenp;
2060 {
2061 int n;
2062 struct bpf_insn *fp;
2063
2064 /*
2065 * Loop doing convert_codr_r() until no branches remain
2066 * with too-large offsets.
2067 */
2068 while (1) {
2069 unMarkAll();
2070 n = *lenp = count_stmts(root);
2071
2072 fp = calloc(n, sizeof(*fp));
2073 if (fp == NULL)
2074 bpf_error("calloc");
2075
2076 fstart = fp;
2077 ftail = fp + n;
2078
2079 unMarkAll();
2080 if (convert_code_r(root))
2081 break;
2082 free(fp);
2083 }
2084
2085 return fp;
2086 }
2087
2088 #ifdef BDEBUG
2089 static void
opt_dump(root)2090 opt_dump(root)
2091 struct block *root;
2092 {
2093 struct bpf_program f;
2094
2095 memset(bids, 0, sizeof bids);
2096 f.bf_insns = icode_to_fcode(root, &f.bf_len);
2097 bpf_dump(&f, 1);
2098 putchar('\n');
2099 free((char *)f.bf_insns);
2100 }
2101 #endif
2102