source: trunk/src/binutils/gprof/cg_print.c@ 167

Last change on this file since 167 was 10, checked in by bird, 23 years ago

Initial revision

  • Property cvs2svn:cvs-rev set to 1.1
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 33.0 KB
Line 
1/* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright 2000, 2001 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22
23#include "libiberty.h"
24#include "cg_arcs.h"
25#include "cg_print.h"
26#include "hist.h"
27#include "utils.h"
28
29/* Return value of comparison functions used to sort tables. */
30#define LESSTHAN -1
31#define EQUALTO 0
32#define GREATERTHAN 1
33
34static void order_and_dump_functions_by_arcs PARAMS ((Arc **, unsigned long,
35 int, Arc **,
36 unsigned long *));
37/* Declarations of automatically generated functions to output blurbs. */
38extern void bsd_callg_blurb PARAMS ((FILE * fp));
39extern void fsf_callg_blurb PARAMS ((FILE * fp));
40
41double print_time = 0.0;
42
43
44static void
45DEFUN_VOID (print_header)
46{
47 if (first_output)
48 first_output = FALSE;
49 else
50 printf ("\f\n");
51
52 if (!bsd_style_output)
53 {
54 if (print_descriptions)
55 printf (_("\t\t Call graph (explanation follows)\n\n"));
56 else
57 printf (_("\t\t\tCall graph\n\n"));
58 }
59
60 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
61 (long) hist_scale * sizeof (UNIT));
62
63 if (print_time > 0.0)
64 printf (_(" for %.2f%% of %.2f seconds\n\n"),
65 100.0 / print_time, print_time / hz);
66 else
67 {
68 printf (_(" no time propagated\n\n"));
69
70 /* This doesn't hurt, since all the numerators will be 0.0. */
71 print_time = 1.0;
72 }
73
74 if (bsd_style_output)
75 {
76 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
77 "", "", "", "", _("called"), _("total"), _("parents"));
78 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
79 _("index"), _("%time"), _("self"), _("descendents"),
80 _("called"), _("self"), _("name"), _("index"));
81 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
82 "", "", "", "", _("called"), _("total"), _("children"));
83 printf ("\n");
84 }
85 else
86 {
87 printf (_("index %% time self children called name\n"));
88 }
89}
90
91/* Print a cycle header. */
92
93static void
94DEFUN (print_cycle, (cyc), Sym * cyc)
95{
96 char buf[BUFSIZ];
97
98 sprintf (buf, "[%d]", cyc->cg.index);
99 printf (bsd_style_output
100 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
101 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
102 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
103 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
104
105 if (cyc->cg.self_calls != 0)
106 printf ("+%-7lu", cyc->cg.self_calls);
107 else
108 printf (" %7.7s", "");
109
110 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
111}
112
113/* Compare LEFT and RIGHT membmer. Major comparison key is
114 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
115
116static int
117DEFUN (cmp_member, (left, right), Sym * left AND Sym * right)
118{
119 double left_time = left->cg.prop.self + left->cg.prop.child;
120 double right_time = right->cg.prop.self + right->cg.prop.child;
121 unsigned long left_calls = left->ncalls + left->cg.self_calls;
122 unsigned long right_calls = right->ncalls + right->cg.self_calls;
123
124 if (left_time > right_time)
125 return GREATERTHAN;
126
127 if (left_time < right_time)
128 return LESSTHAN;
129
130 if (left_calls > right_calls)
131 return GREATERTHAN;
132
133 if (left_calls < right_calls)
134 return LESSTHAN;
135
136 return EQUALTO;
137}
138
139/* Sort members of a cycle. */
140
141static void
142DEFUN (sort_members, (cyc), Sym * cyc)
143{
144 Sym *todo, *doing, *prev;
145
146 /* Detach cycle members from cyclehead,
147 and insertion sort them back on. */
148 todo = cyc->cg.cyc.next;
149 cyc->cg.cyc.next = 0;
150
151 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
152 {
153 todo = doing->cg.cyc.next;
154
155 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
156 {
157 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
158 break;
159 }
160
161 doing->cg.cyc.next = prev->cg.cyc.next;
162 prev->cg.cyc.next = doing;
163 }
164}
165
166/* Print the members of a cycle. */
167
168static void
169DEFUN (print_members, (cyc), Sym * cyc)
170{
171 Sym *member;
172
173 sort_members (cyc);
174
175 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
176 {
177 printf (bsd_style_output
178 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
179 : "%6.6s %5.5s %7.2f %7.2f %7lu",
180 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
181 member->ncalls);
182
183 if (member->cg.self_calls != 0)
184 printf ("+%-7lu", member->cg.self_calls);
185 else
186 printf (" %7.7s", "");
187
188 printf (" ");
189 print_name (member);
190 printf ("\n");
191 }
192}
193
194/* Compare two arcs to/from the same child/parent.
195 - if one arc is a self arc, it's least.
196 - if one arc is within a cycle, it's less than.
197 - if both arcs are within a cycle, compare arc counts.
198 - if neither arc is within a cycle, compare with
199 time + child_time as major key
200 arc count as minor key. */
201
202static int
203DEFUN (cmp_arc, (left, right), Arc * left AND Arc * right)
204{
205 Sym *left_parent = left->parent;
206 Sym *left_child = left->child;
207 Sym *right_parent = right->parent;
208 Sym *right_child = right->child;
209 double left_time, right_time;
210
211 DBG (TIMEDEBUG,
212 printf ("[cmp_arc] ");
213 print_name (left_parent);
214 printf (" calls ");
215 print_name (left_child);
216 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
217 left->count, left_child->ncalls);
218 printf ("[cmp_arc] ");
219 print_name (right_parent);
220 printf (" calls ");
221 print_name (right_child);
222 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
223 right->count, right_child->ncalls);
224 printf ("\n");
225 );
226
227 if (left_parent == left_child)
228 return LESSTHAN; /* Left is a self call. */
229
230 if (right_parent == right_child)
231 return GREATERTHAN; /* Right is a self call. */
232
233 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
234 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
235 {
236 /* Left is a call within a cycle. */
237 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
238 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
239 {
240 /* Right is a call within the cycle, too. */
241 if (left->count < right->count)
242 return LESSTHAN;
243
244 if (left->count > right->count)
245 return GREATERTHAN;
246
247 return EQUALTO;
248 }
249 else
250 {
251 /* Right isn't a call within the cycle. */
252 return LESSTHAN;
253 }
254 }
255 else
256 {
257 /* Left isn't a call within a cycle. */
258 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
259 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
260 {
261 /* Right is a call within a cycle. */
262 return GREATERTHAN;
263 }
264 else
265 {
266 /* Neither is a call within a cycle. */
267 left_time = left->time + left->child_time;
268 right_time = right->time + right->child_time;
269
270 if (left_time < right_time)
271 return LESSTHAN;
272
273 if (left_time > right_time)
274 return GREATERTHAN;
275
276 if (left->count < right->count)
277 return LESSTHAN;
278
279 if (left->count > right->count)
280 return GREATERTHAN;
281
282 return EQUALTO;
283 }
284 }
285}
286
287
288static void
289DEFUN (sort_parents, (child), Sym * child)
290{
291 Arc *arc, *detached, sorted, *prev;
292
293 /* Unlink parents from child, then insertion sort back on to
294 sorted's parents.
295 *arc the arc you have detached and are inserting.
296 *detached the rest of the arcs to be sorted.
297 sorted arc list onto which you insertion sort.
298 *prev arc before the arc you are comparing. */
299 sorted.next_parent = 0;
300
301 for (arc = child->cg.parents; arc; arc = detached)
302 {
303 detached = arc->next_parent;
304
305 /* Consider *arc as disconnected; insert it into sorted. */
306 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
307 {
308 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
309 break;
310 }
311
312 arc->next_parent = prev->next_parent;
313 prev->next_parent = arc;
314 }
315
316 /* Reattach sorted arcs to child. */
317 child->cg.parents = sorted.next_parent;
318}
319
320
321static void
322DEFUN (print_parents, (child), Sym * child)
323{
324 Sym *parent;
325 Arc *arc;
326 Sym *cycle_head;
327
328 if (child->cg.cyc.head != 0)
329 cycle_head = child->cg.cyc.head;
330 else
331 cycle_head = child;
332
333 if (!child->cg.parents)
334 {
335 printf (bsd_style_output
336 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
337 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
338 "", "", "", "", "", "");
339 return;
340 }
341
342 sort_parents (child);
343
344 for (arc = child->cg.parents; arc; arc = arc->next_parent)
345 {
346 parent = arc->parent;
347 if (child == parent || (child->cg.cyc.num != 0
348 && parent->cg.cyc.num == child->cg.cyc.num))
349 {
350 /* Selfcall or call among siblings. */
351 printf (bsd_style_output
352 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
353 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
354 "", "", "", "",
355 arc->count, "");
356 print_name (parent);
357 printf ("\n");
358 }
359 else
360 {
361 /* Regular parent of child. */
362 printf (bsd_style_output
363 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
364 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
365 "", "",
366 arc->time / hz, arc->child_time / hz,
367 arc->count, cycle_head->ncalls);
368 print_name (parent);
369 printf ("\n");
370 }
371 }
372}
373
374
375static void
376DEFUN (sort_children, (parent), Sym * parent)
377{
378 Arc *arc, *detached, sorted, *prev;
379
380 /* Unlink children from parent, then insertion sort back on to
381 sorted's children.
382 *arc the arc you have detached and are inserting.
383 *detached the rest of the arcs to be sorted.
384 sorted arc list onto which you insertion sort.
385 *prev arc before the arc you are comparing. */
386 sorted.next_child = 0;
387
388 for (arc = parent->cg.children; arc; arc = detached)
389 {
390 detached = arc->next_child;
391
392 /* Consider *arc as disconnected; insert it into sorted. */
393 for (prev = &sorted; prev->next_child; prev = prev->next_child)
394 {
395 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
396 break;
397 }
398
399 arc->next_child = prev->next_child;
400 prev->next_child = arc;
401 }
402
403 /* Reattach sorted children to parent. */
404 parent->cg.children = sorted.next_child;
405}
406
407
408static void
409DEFUN (print_children, (parent), Sym * parent)
410{
411 Sym *child;
412 Arc *arc;
413
414 sort_children (parent);
415 arc = parent->cg.children;
416
417 for (arc = parent->cg.children; arc; arc = arc->next_child)
418 {
419 child = arc->child;
420 if (child == parent || (child->cg.cyc.num != 0
421 && child->cg.cyc.num == parent->cg.cyc.num))
422 {
423 /* Self call or call to sibling. */
424 printf (bsd_style_output
425 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
426 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
427 "", "", "", "", arc->count, "");
428 print_name (child);
429 printf ("\n");
430 }
431 else
432 {
433 /* Regular child of parent. */
434 printf (bsd_style_output
435 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
436 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
437 "", "",
438 arc->time / hz, arc->child_time / hz,
439 arc->count, child->cg.cyc.head->ncalls);
440 print_name (child);
441 printf ("\n");
442 }
443 }
444}
445
446
447static void
448DEFUN (print_line, (np), Sym * np)
449{
450 char buf[BUFSIZ];
451
452 sprintf (buf, "[%d]", np->cg.index);
453 printf (bsd_style_output
454 ? "%-6.6s %5.1f %7.2f %11.2f"
455 : "%-6.6s %5.1f %7.2f %7.2f", buf,
456 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
457 np->cg.prop.self / hz, np->cg.prop.child / hz);
458
459 if ((np->ncalls + np->cg.self_calls) != 0)
460 {
461 printf (" %7lu", np->ncalls);
462
463 if (np->cg.self_calls != 0)
464 printf ("+%-7lu ", np->cg.self_calls);
465 else
466 printf (" %7.7s ", "");
467 }
468 else
469 {
470 printf (" %7.7s %7.7s ", "", "");
471 }
472
473 print_name (np);
474 printf ("\n");
475}
476
477
478/* Print dynamic call graph. */
479
480void
481DEFUN (cg_print, (timesortsym), Sym ** timesortsym)
482{
483 unsigned int index;
484 Sym *parent;
485
486 if (print_descriptions && bsd_style_output)
487 bsd_callg_blurb (stdout);
488
489 print_header ();
490
491 for (index = 0; index < symtab.len + num_cycles; ++index)
492 {
493 parent = timesortsym[index];
494
495 if ((ignore_zeros && parent->ncalls == 0
496 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
497 && parent->cg.prop.child == 0)
498 || !parent->cg.print_flag
499 || (line_granularity && ! parent->is_func))
500 continue;
501
502 if (!parent->name && parent->cg.cyc.num != 0)
503 {
504 /* Cycle header. */
505 print_cycle (parent);
506 print_members (parent);
507 }
508 else
509 {
510 print_parents (parent);
511 print_line (parent);
512 print_children (parent);
513 }
514
515 if (bsd_style_output)
516 printf ("\n");
517
518 printf ("-----------------------------------------------\n");
519
520 if (bsd_style_output)
521 printf ("\n");
522 }
523
524 free (timesortsym);
525
526 if (print_descriptions && !bsd_style_output)
527 fsf_callg_blurb (stdout);
528}
529
530
531static int
532DEFUN (cmp_name, (left, right), const PTR left AND const PTR right)
533{
534 const Sym **npp1 = (const Sym **) left;
535 const Sym **npp2 = (const Sym **) right;
536
537 return strcmp ((*npp1)->name, (*npp2)->name);
538}
539
540
541void
542DEFUN_VOID (cg_print_index)
543{
544 unsigned int index;
545 unsigned int nnames, todo, i, j;
546 int col, starting_col;
547 Sym **name_sorted_syms, *sym;
548 const char *filename;
549 char buf[20];
550 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
551
552 /* Now, sort regular function name
553 alphabetically to create an index. */
554 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
555
556 for (index = 0, nnames = 0; index < symtab.len; index++)
557 {
558 if (ignore_zeros && symtab.base[index].ncalls == 0
559 && symtab.base[index].hist.time == 0)
560 continue;
561
562 name_sorted_syms[nnames++] = &symtab.base[index];
563 }
564
565 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
566
567 for (index = 1, todo = nnames; index <= num_cycles; index++)
568 name_sorted_syms[todo++] = &cycle_header[index];
569
570 printf ("\f\n");
571 printf (_("Index by function name\n\n"));
572 index = (todo + 2) / 3;
573
574 for (i = 0; i < index; i++)
575 {
576 col = 0;
577 starting_col = 0;
578
579 for (j = i; j < todo; j += index)
580 {
581 sym = name_sorted_syms[j];
582
583 if (sym->cg.print_flag)
584 sprintf (buf, "[%d]", sym->cg.index);
585 else
586 sprintf (buf, "(%d)", sym->cg.index);
587
588 if (j < nnames)
589 {
590 if (bsd_style_output)
591 {
592 printf ("%6.6s %-19.19s", buf, sym->name);
593 }
594 else
595 {
596 col += strlen (buf);
597
598 for (; col < starting_col + 5; ++col)
599 putchar (' ');
600
601 printf (" %s ", buf);
602 col += print_name_only (sym);
603
604 if (!line_granularity && sym->is_static && sym->file)
605 {
606 filename = sym->file->name;
607
608 if (!print_path)
609 {
610 filename = strrchr (filename, '/');
611
612 if (filename)
613 ++filename;
614 else
615 filename = sym->file->name;
616 }
617
618 printf (" (%s)", filename);
619 col += strlen (filename) + 3;
620 }
621 }
622 }
623 else
624 {
625 if (bsd_style_output)
626 {
627 printf ("%6.6s ", buf);
628 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
629 printf ("%-19.19s", buf);
630 }
631 else
632 {
633 col += strlen (buf);
634 for (; col < starting_col + 5; ++col)
635 putchar (' ');
636 printf (" %s ", buf);
637 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
638 printf ("%s", buf);
639 col += strlen (buf);
640 }
641 }
642
643 starting_col += column_width;
644 }
645
646 printf ("\n");
647 }
648
649 free (name_sorted_syms);
650}
651
652/* Compare two arcs based on their usage counts.
653 We want to sort in descending order. */
654
655static int
656DEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right)
657{
658 const Arc **npp1 = (const Arc **) left;
659 const Arc **npp2 = (const Arc **) right;
660
661 if ((*npp1)->count > (*npp2)->count)
662 return -1;
663 else if ((*npp1)->count < (*npp2)->count)
664 return 1;
665 else
666 return 0;
667}
668
669/* Compare two funtions based on their usage counts.
670 We want to sort in descending order. */
671
672static int
673DEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right)
674{
675 const Sym **npp1 = (const Sym **) left;
676 const Sym **npp2 = (const Sym **) right;
677
678 if ((*npp1)->nuses > (*npp2)->nuses)
679 return -1;
680 else if ((*npp1)->nuses < (*npp2)->nuses)
681 return 1;
682 else
683 return 0;
684}
685
686/* Print a suggested function ordering based on the profiling data.
687
688 We perform 4 major steps when ordering functions:
689
690 * Group unused functions together and place them at the
691 end of the function order.
692
693 * Search the highest use arcs (those which account for 90% of
694 the total arc count) for functions which have several parents.
695
696 Group those with the most call sites together (currently the
697 top 1.25% which have at least five different call sites).
698
699 These are emitted at the start of the function order.
700
701 * Use a greedy placement algorithm to place functions which
702 occur in the top 99% of the arcs in the profile. Some provisions
703 are made to handle high usage arcs where the parent and/or
704 child has already been placed.
705
706 * Run the same greedy placement algorithm on the remaining
707 arcs to place the leftover functions.
708
709
710 The various "magic numbers" should (one day) be tuneable by command
711 line options. They were arrived at by benchmarking a few applications
712 with various values to see which values produced better overall function
713 orderings.
714
715 Of course, profiling errors, machine limitations (PA long calls), and
716 poor cutoff values for the placement algorithm may limit the usefullness
717 of the resulting function order. Improvements would be greatly appreciated.
718
719 Suggestions:
720
721 * Place the functions with many callers near the middle of the
722 list to reduce long calls.
723
724 * Propagate arc usage changes as functions are placed. Ie if
725 func1 and func2 are placed together, arcs to/from those arcs
726 to the same parent/child should be combined, then resort the
727 arcs to choose the next one.
728
729 * Implement some global positioning algorithm to place the
730 chains made by the greedy local positioning algorithm. Probably
731 by examining arcs which haven't been placed yet to tie two
732 chains together.
733
734 * Take a function's size and time into account in the algorithm;
735 size in particular is important on the PA (long calls). Placing
736 many small functions onto their own page may be wise.
737
738 * Use better profiling information; many published algorithms
739 are based on call sequences through time, rather than just
740 arc counts.
741
742 * Prodecure cloning could improve performance when a small number
743 of arcs account for most of the calls to a particular function.
744
745 * Use relocation information to avoid moving unused functions
746 completely out of the code stream; this would avoid severe lossage
747 when the profile data bears little resemblance to actual runs.
748
749 * Propagation of arc usages should also improve .o link line
750 ordering which shares the same arc placement algorithm with
751 the function ordering code (in fact it is a degenerate case
752 of function ordering). */
753
754void
755DEFUN_VOID (cg_print_function_ordering)
756{
757 unsigned long index, used, unused, scratch_index;
758 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
759#ifdef __GNUC__
760 unsigned long long total_arcs, tmp_arcs_count;
761#else
762 unsigned long total_arcs, tmp_arcs_count;
763#endif
764 Sym **unused_syms, **used_syms, **scratch_syms;
765 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
766
767 index = 0;
768 used = 0;
769 unused = 0;
770 scratch_index = 0;
771 unplaced_arc_count = 0;
772 high_arc_count = 0;
773 scratch_arc_count = 0;
774
775 /* First group all the unused functions together. */
776 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
777 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
778 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
779 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
780 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
781 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
782
783 /* Walk through all the functions; mark those which are never
784 called as placed (we'll emit them as a group later). */
785 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
786 {
787 if (symtab.base[index].ncalls == 0)
788 {
789 /* Filter out gprof generated names. */
790 if (strcmp (symtab.base[index].name, "<locore>")
791 && strcmp (symtab.base[index].name, "<hicore>"))
792 {
793 unused_syms[unused++] = &symtab.base[index];
794 symtab.base[index].has_been_placed = 1;
795 }
796 }
797 else
798 {
799 used_syms[used++] = &symtab.base[index];
800 symtab.base[index].has_been_placed = 0;
801 symtab.base[index].next = 0;
802 symtab.base[index].prev = 0;
803 symtab.base[index].nuses = 0;
804 }
805 }
806
807 /* Sort the arcs from most used to least used. */
808 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
809
810 /* Compute the total arc count. Also mark arcs as unplaced.
811
812 Note we don't compensate for overflow if that happens!
813 Overflow is much less likely when this file is compiled
814 with GCC as it can double-wide integers via long long. */
815 total_arcs = 0;
816 for (index = 0; index < numarcs; index++)
817 {
818 total_arcs += arcs[index]->count;
819 arcs[index]->has_been_placed = 0;
820 }
821
822 /* We want to pull out those functions which are referenced
823 by many highly used arcs and emit them as a group. This
824 could probably use some tuning. */
825 tmp_arcs_count = 0;
826 for (index = 0; index < numarcs; index++)
827 {
828 tmp_arcs_count += arcs[index]->count;
829
830 /* Count how many times each parent and child are used up
831 to our threshhold of arcs (90%). */
832 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
833 break;
834
835 arcs[index]->child->nuses++;
836 }
837
838 /* Now sort a temporary symbol table based on the number of
839 times each function was used in the highest used arcs. */
840 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
841 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
842
843 /* Now pick out those symbols we're going to emit as
844 a group. We take up to 1.25% of the used symbols. */
845 for (index = 0; index < used / 80; index++)
846 {
847 Sym *sym = scratch_syms[index];
848 Arc *arc;
849
850 /* If we hit symbols that aren't used from many call sites,
851 then we can quit. We choose five as the low limit for
852 no particular reason. */
853 if (sym->nuses == 5)
854 break;
855
856 /* We're going to need the arcs between these functions.
857 Unfortunately, we don't know all these functions
858 until we're done. So we keep track of all the arcs
859 to the functions we care about, then prune out those
860 which are uninteresting.
861
862 An interesting variation would be to quit when we found
863 multi-call site functions which account for some percentage
864 of the arcs. */
865 arc = sym->cg.children;
866
867 while (arc)
868 {
869 if (arc->parent != arc->child)
870 scratch_arcs[scratch_arc_count++] = arc;
871 arc->has_been_placed = 1;
872 arc = arc->next_child;
873 }
874
875 arc = sym->cg.parents;
876
877 while (arc)
878 {
879 if (arc->parent != arc->child)
880 scratch_arcs[scratch_arc_count++] = arc;
881 arc->has_been_placed = 1;
882 arc = arc->next_parent;
883 }
884
885 /* Keep track of how many symbols we're going to place. */
886 scratch_index = index;
887
888 /* A lie, but it makes identifying
889 these functions easier later. */
890 sym->has_been_placed = 1;
891 }
892
893 /* Now walk through the temporary arcs and copy
894 those we care about into the high arcs array. */
895 for (index = 0; index < scratch_arc_count; index++)
896 {
897 Arc *arc = scratch_arcs[index];
898
899 /* If this arc refers to highly used functions, then
900 then we want to keep it. */
901 if (arc->child->has_been_placed
902 && arc->parent->has_been_placed)
903 {
904 high_arcs[high_arc_count++] = scratch_arcs[index];
905
906 /* We need to turn of has_been_placed since we're going to
907 use the main arc placement algorithm on these arcs. */
908 arc->child->has_been_placed = 0;
909 arc->parent->has_been_placed = 0;
910 }
911 }
912
913 /* Dump the multi-site high usage functions which are not
914 going to be ordered by the main ordering algorithm. */
915 for (index = 0; index < scratch_index; index++)
916 {
917 if (scratch_syms[index]->has_been_placed)
918 printf ("%s\n", scratch_syms[index]->name);
919 }
920
921 /* Now we can order the multi-site high use
922 functions based on the arcs between them. */
923 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
924 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
925 unplaced_arcs, &unplaced_arc_count);
926
927 /* Order and dump the high use functions left,
928 these typically have only a few call sites. */
929 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
930 unplaced_arcs, &unplaced_arc_count);
931
932 /* Now place the rarely used functions. */
933 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
934 scratch_arcs, &scratch_arc_count);
935
936 /* Output any functions not emitted by the order_and_dump calls. */
937 for (index = 0; index < used; index++)
938 if (used_syms[index]->has_been_placed == 0)
939 printf("%s\n", used_syms[index]->name);
940
941 /* Output the unused functions. */
942 for (index = 0; index < unused; index++)
943 printf("%s\n", unused_syms[index]->name);
944
945 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
946 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
947 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
948 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
949 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
950 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
951
952 free (unused_syms);
953 free (used_syms);
954 free (scratch_syms);
955 free (high_arcs);
956 free (scratch_arcs);
957 free (unplaced_arcs);
958}
959
960/* Place functions based on the arcs in ARCS with NUMARCS entries;
961 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
962
963 If ALL is nonzero, then place all functions referenced by ARCS,
964 else only place those referenced in the top 99% of the arcs in ARCS. */
965
966#define MOST 0.99
967static void
968order_and_dump_functions_by_arcs (arcs, numarcs, all,
969 unplaced_arcs, unplaced_arc_count)
970 Arc **arcs;
971 unsigned long numarcs;
972 int all;
973 Arc **unplaced_arcs;
974 unsigned long *unplaced_arc_count;
975{
976#ifdef __GNUC__
977 unsigned long long tmp_arcs, total_arcs;
978#else
979 unsigned long tmp_arcs, total_arcs;
980#endif
981 unsigned int index;
982
983 /* If needed, compute the total arc count.
984
985 Note we don't compensate for overflow if that happens! */
986 if (! all)
987 {
988 total_arcs = 0;
989 for (index = 0; index < numarcs; index++)
990 total_arcs += arcs[index]->count;
991 }
992 else
993 total_arcs = 0;
994
995 tmp_arcs = 0;
996
997 for (index = 0; index < numarcs; index++)
998 {
999 Sym *sym1, *sym2;
1000 Sym *child, *parent;
1001
1002 tmp_arcs += arcs[index]->count;
1003
1004 /* Ignore this arc if it's already been placed. */
1005 if (arcs[index]->has_been_placed)
1006 continue;
1007
1008 child = arcs[index]->child;
1009 parent = arcs[index]->parent;
1010
1011 /* If we're not using all arcs, and this is a rarely used
1012 arc, then put it on the unplaced_arc list. Similarly
1013 if both the parent and child of this arc have been placed. */
1014 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1015 || child->has_been_placed || parent->has_been_placed)
1016 {
1017 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1018 continue;
1019 }
1020
1021 /* If all slots in the parent and child are full, then there isn't
1022 anything we can do right now. We'll place this arc on the
1023 unplaced arc list in the hope that a global positioning
1024 algorithm can use it to place function chains. */
1025 if (parent->next && parent->prev && child->next && child->prev)
1026 {
1027 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1028 continue;
1029 }
1030
1031 /* If the parent is unattached, then find the closest
1032 place to attach it onto child's chain. Similarly
1033 for the opposite case. */
1034 if (!parent->next && !parent->prev)
1035 {
1036 int next_count = 0;
1037 int prev_count = 0;
1038 Sym *prev = child;
1039 Sym *next = child;
1040
1041 /* Walk to the beginning and end of the child's chain. */
1042 while (next->next)
1043 {
1044 next = next->next;
1045 next_count++;
1046 }
1047
1048 while (prev->prev)
1049 {
1050 prev = prev->prev;
1051 prev_count++;
1052 }
1053
1054 /* Choose the closest. */
1055 child = next_count < prev_count ? next : prev;
1056 }
1057 else if (! child->next && !child->prev)
1058 {
1059 int next_count = 0;
1060 int prev_count = 0;
1061 Sym *prev = parent;
1062 Sym *next = parent;
1063
1064 while (next->next)
1065 {
1066 next = next->next;
1067 next_count++;
1068 }
1069
1070 while (prev->prev)
1071 {
1072 prev = prev->prev;
1073 prev_count++;
1074 }
1075
1076 parent = prev_count < next_count ? prev : next;
1077 }
1078 else
1079 {
1080 /* Couldn't find anywhere to attach the functions,
1081 put the arc on the unplaced arc list. */
1082 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1083 continue;
1084 }
1085
1086 /* Make sure we don't tie two ends together. */
1087 sym1 = parent;
1088 if (sym1->next)
1089 while (sym1->next)
1090 sym1 = sym1->next;
1091 else
1092 while (sym1->prev)
1093 sym1 = sym1->prev;
1094
1095 sym2 = child;
1096 if (sym2->next)
1097 while (sym2->next)
1098 sym2 = sym2->next;
1099 else
1100 while (sym2->prev)
1101 sym2 = sym2->prev;
1102
1103 if (sym1 == child
1104 && sym2 == parent)
1105 {
1106 /* This would tie two ends together. */
1107 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1108 continue;
1109 }
1110
1111 if (parent->next)
1112 {
1113 /* Must attach to the parent's prev field. */
1114 if (! child->next)
1115 {
1116 /* parent-prev and child-next */
1117 parent->prev = child;
1118 child->next = parent;
1119 arcs[index]->has_been_placed = 1;
1120 }
1121 }
1122 else if (parent->prev)
1123 {
1124 /* Must attach to the parent's next field. */
1125 if (! child->prev)
1126 {
1127 /* parent-next and child-prev */
1128 parent->next = child;
1129 child->prev = parent;
1130 arcs[index]->has_been_placed = 1;
1131 }
1132 }
1133 else
1134 {
1135 /* Can attach to either field in the parent, depends
1136 on where we've got space in the child. */
1137 if (child->prev)
1138 {
1139 /* parent-prev and child-next. */
1140 parent->prev = child;
1141 child->next = parent;
1142 arcs[index]->has_been_placed = 1;
1143 }
1144 else
1145 {
1146 /* parent-next and child-prev. */
1147 parent->next = child;
1148 child->prev = parent;
1149 arcs[index]->has_been_placed = 1;
1150 }
1151 }
1152 }
1153
1154 /* Dump the chains of functions we've made. */
1155 for (index = 0; index < numarcs; index++)
1156 {
1157 Sym *sym;
1158 if (arcs[index]->parent->has_been_placed
1159 || arcs[index]->child->has_been_placed)
1160 continue;
1161
1162 sym = arcs[index]->parent;
1163
1164 /* If this symbol isn't attached to any other
1165 symbols, then we've got a rarely used arc.
1166
1167 Skip it for now, we'll deal with them later. */
1168 if (sym->next == NULL
1169 && sym->prev == NULL)
1170 continue;
1171
1172 /* Get to the start of this chain. */
1173 while (sym->prev)
1174 sym = sym->prev;
1175
1176 while (sym)
1177 {
1178 /* Mark it as placed. */
1179 sym->has_been_placed = 1;
1180 printf ("%s\n", sym->name);
1181 sym = sym->next;
1182 }
1183 }
1184
1185 /* If we want to place all the arcs, then output
1186 those which weren't placed by the main algorithm. */
1187 if (all)
1188 for (index = 0; index < numarcs; index++)
1189 {
1190 Sym *sym;
1191 if (arcs[index]->parent->has_been_placed
1192 || arcs[index]->child->has_been_placed)
1193 continue;
1194
1195 sym = arcs[index]->parent;
1196
1197 sym->has_been_placed = 1;
1198 printf ("%s\n", sym->name);
1199 }
1200}
1201
1202/* Print a suggested .o ordering for files on a link line based
1203 on profiling information. This uses the function placement
1204 code for the bulk of its work. */
1205
1206struct function_map
1207{
1208 char *function_name;
1209 char *file_name;
1210};
1211
1212void
1213DEFUN_VOID (cg_print_file_ordering)
1214{
1215 unsigned long scratch_arc_count, index;
1216 Arc **scratch_arcs;
1217 extern struct function_map *symbol_map;
1218 extern unsigned int symbol_map_count;
1219 char *last;
1220
1221 scratch_arc_count = 0;
1222
1223 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1224 for (index = 0; index < numarcs; index++)
1225 {
1226 if (! arcs[index]->parent->mapped
1227 || ! arcs[index]->child->mapped)
1228 arcs[index]->has_been_placed = 1;
1229 }
1230
1231 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1232 scratch_arcs, &scratch_arc_count);
1233
1234 /* Output .o's not handled by the main placement algorithm. */
1235 for (index = 0; index < symtab.len; index++)
1236 {
1237 if (symtab.base[index].mapped
1238 && ! symtab.base[index].has_been_placed)
1239 printf ("%s\n", symtab.base[index].name);
1240 }
1241
1242 /* Now output any .o's that didn't have any text symbols. */
1243 last = NULL;
1244 for (index = 0; index < symbol_map_count; index++)
1245 {
1246 unsigned int index2;
1247
1248 /* Don't bother searching if this symbol
1249 is the same as the previous one. */
1250 if (last && !strcmp (last, symbol_map[index].file_name))
1251 continue;
1252
1253 for (index2 = 0; index2 < symtab.len; index2++)
1254 {
1255 if (! symtab.base[index2].mapped)
1256 continue;
1257
1258 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1259 break;
1260 }
1261
1262 /* If we didn't find it in the symbol table, then it must
1263 be a .o with no text symbols. Output it last. */
1264 if (index2 == symtab.len)
1265 printf ("%s\n", symbol_map[index].file_name);
1266 last = symbol_map[index].file_name;
1267 }
1268}
Note: See TracBrowser for help on using the repository browser.