| 1 | /* tblcmp - table compression routines */
|
|---|
| 2 |
|
|---|
| 3 | /* Copyright (c) 1990 The Regents of the University of California. */
|
|---|
| 4 | /* All rights reserved. */
|
|---|
| 5 |
|
|---|
| 6 | /* This code is derived from software contributed to Berkeley by */
|
|---|
| 7 | /* Vern Paxson. */
|
|---|
| 8 |
|
|---|
| 9 | /* The United States Government has rights in this work pursuant */
|
|---|
| 10 | /* to contract no. DE-AC03-76SF00098 between the United States */
|
|---|
| 11 | /* Department of Energy and the University of California. */
|
|---|
| 12 |
|
|---|
| 13 | /* This file is part of flex. */
|
|---|
| 14 |
|
|---|
| 15 | /* Redistribution and use in source and binary forms, with or without */
|
|---|
| 16 | /* modification, are permitted provided that the following conditions */
|
|---|
| 17 | /* are met: */
|
|---|
| 18 |
|
|---|
| 19 | /* 1. Redistributions of source code must retain the above copyright */
|
|---|
| 20 | /* notice, this list of conditions and the following disclaimer. */
|
|---|
| 21 | /* 2. Redistributions in binary form must reproduce the above copyright */
|
|---|
| 22 | /* notice, this list of conditions and the following disclaimer in the */
|
|---|
| 23 | /* documentation and/or other materials provided with the distribution. */
|
|---|
| 24 |
|
|---|
| 25 | /* Neither the name of the University nor the names of its contributors */
|
|---|
| 26 | /* may be used to endorse or promote products derived from this software */
|
|---|
| 27 | /* without specific prior written permission. */
|
|---|
| 28 |
|
|---|
| 29 | /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
|
|---|
| 30 | /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
|
|---|
| 31 | /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
|---|
| 32 | /* PURPOSE. */
|
|---|
| 33 |
|
|---|
| 34 | #include "flexdef.h"
|
|---|
| 35 |
|
|---|
| 36 |
|
|---|
| 37 | /* declarations for functions that have forward references */
|
|---|
| 38 |
|
|---|
| 39 | void mkentry PROTO ((register int *, int, int, int, int));
|
|---|
| 40 | void mkprot PROTO ((int[], int, int));
|
|---|
| 41 | void mktemplate PROTO ((int[], int, int));
|
|---|
| 42 | void mv2front PROTO ((int));
|
|---|
| 43 | int tbldiff PROTO ((int[], int, int[]));
|
|---|
| 44 |
|
|---|
| 45 |
|
|---|
| 46 | /* bldtbl - build table entries for dfa state
|
|---|
| 47 | *
|
|---|
| 48 | * synopsis
|
|---|
| 49 | * int state[numecs], statenum, totaltrans, comstate, comfreq;
|
|---|
| 50 | * bldtbl( state, statenum, totaltrans, comstate, comfreq );
|
|---|
| 51 | *
|
|---|
| 52 | * State is the statenum'th dfa state. It is indexed by equivalence class and
|
|---|
| 53 | * gives the number of the state to enter for a given equivalence class.
|
|---|
| 54 | * totaltrans is the total number of transitions out of the state. Comstate
|
|---|
| 55 | * is that state which is the destination of the most transitions out of State.
|
|---|
| 56 | * Comfreq is how many transitions there are out of State to Comstate.
|
|---|
| 57 | *
|
|---|
| 58 | * A note on terminology:
|
|---|
| 59 | * "protos" are transition tables which have a high probability of
|
|---|
| 60 | * either being redundant (a state processed later will have an identical
|
|---|
| 61 | * transition table) or nearly redundant (a state processed later will have
|
|---|
| 62 | * many of the same out-transitions). A "most recently used" queue of
|
|---|
| 63 | * protos is kept around with the hope that most states will find a proto
|
|---|
| 64 | * which is similar enough to be usable, and therefore compacting the
|
|---|
| 65 | * output tables.
|
|---|
| 66 | * "templates" are a special type of proto. If a transition table is
|
|---|
| 67 | * homogeneous or nearly homogeneous (all transitions go to the same
|
|---|
| 68 | * destination) then the odds are good that future states will also go
|
|---|
| 69 | * to the same destination state on basically the same character set.
|
|---|
| 70 | * These homogeneous states are so common when dealing with large rule
|
|---|
| 71 | * sets that they merit special attention. If the transition table were
|
|---|
| 72 | * simply made into a proto, then (typically) each subsequent, similar
|
|---|
| 73 | * state will differ from the proto for two out-transitions. One of these
|
|---|
| 74 | * out-transitions will be that character on which the proto does not go
|
|---|
| 75 | * to the common destination, and one will be that character on which the
|
|---|
| 76 | * state does not go to the common destination. Templates, on the other
|
|---|
| 77 | * hand, go to the common state on EVERY transition character, and therefore
|
|---|
| 78 | * cost only one difference.
|
|---|
| 79 | */
|
|---|
| 80 |
|
|---|
| 81 | void bldtbl (state, statenum, totaltrans, comstate, comfreq)
|
|---|
| 82 | int state[], statenum, totaltrans, comstate, comfreq;
|
|---|
| 83 | {
|
|---|
| 84 | int extptr, extrct[2][CSIZE + 1];
|
|---|
| 85 | int mindiff, minprot, i, d;
|
|---|
| 86 |
|
|---|
| 87 | /* If extptr is 0 then the first array of extrct holds the result
|
|---|
| 88 | * of the "best difference" to date, which is those transitions
|
|---|
| 89 | * which occur in "state" but not in the proto which, to date,
|
|---|
| 90 | * has the fewest differences between itself and "state". If
|
|---|
| 91 | * extptr is 1 then the second array of extrct hold the best
|
|---|
| 92 | * difference. The two arrays are toggled between so that the
|
|---|
| 93 | * best difference to date can be kept around and also a difference
|
|---|
| 94 | * just created by checking against a candidate "best" proto.
|
|---|
| 95 | */
|
|---|
| 96 |
|
|---|
| 97 | extptr = 0;
|
|---|
| 98 |
|
|---|
| 99 | /* If the state has too few out-transitions, don't bother trying to
|
|---|
| 100 | * compact its tables.
|
|---|
| 101 | */
|
|---|
| 102 |
|
|---|
| 103 | if ((totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE))
|
|---|
| 104 | mkentry (state, numecs, statenum, JAMSTATE, totaltrans);
|
|---|
| 105 |
|
|---|
| 106 | else {
|
|---|
| 107 | /* "checkcom" is true if we should only check "state" against
|
|---|
| 108 | * protos which have the same "comstate" value.
|
|---|
| 109 | */
|
|---|
| 110 | int checkcom =
|
|---|
| 111 |
|
|---|
| 112 | comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
|
|---|
| 113 |
|
|---|
| 114 | minprot = firstprot;
|
|---|
| 115 | mindiff = totaltrans;
|
|---|
| 116 |
|
|---|
| 117 | if (checkcom) {
|
|---|
| 118 | /* Find first proto which has the same "comstate". */
|
|---|
| 119 | for (i = firstprot; i != NIL; i = protnext[i])
|
|---|
| 120 | if (protcomst[i] == comstate) {
|
|---|
| 121 | minprot = i;
|
|---|
| 122 | mindiff = tbldiff (state, minprot,
|
|---|
| 123 | extrct[extptr]);
|
|---|
| 124 | break;
|
|---|
| 125 | }
|
|---|
| 126 | }
|
|---|
| 127 |
|
|---|
| 128 | else {
|
|---|
| 129 | /* Since we've decided that the most common destination
|
|---|
| 130 | * out of "state" does not occur with a high enough
|
|---|
| 131 | * frequency, we set the "comstate" to zero, assuring
|
|---|
| 132 | * that if this state is entered into the proto list,
|
|---|
| 133 | * it will not be considered a template.
|
|---|
| 134 | */
|
|---|
|
|---|