| 1 | # From arnold Thu May 9 17:27:03 2002
|
|---|
| 2 | # Return-Path: <[email protected]>
|
|---|
| 3 | # Received: (from arnold@localhost)
|
|---|
| 4 | # by skeeve.com (8.11.6/8.11.6) id g49ER3K27925
|
|---|
| 5 | # for arnold; Thu, 9 May 2002 17:27:03 +0300
|
|---|
| 6 | # Date: Thu, 9 May 2002 17:27:03 +0300
|
|---|
| 7 | # From: Aharon Robbins <[email protected]>
|
|---|
| 8 | # Message-Id: <[email protected]>
|
|---|
| 9 | # To: [email protected]
|
|---|
| 10 | # Subject: fixme
|
|---|
| 11 | # X-SpamBouncer: 1.4 (10/07/01)
|
|---|
| 12 | # X-SBRule: Pattern Match (Other Patterns) (Score: 4850)
|
|---|
| 13 | # X-SBRule: Pattern Match (Spam Phone #) (Score: 0)
|
|---|
| 14 | # X-SBClass: Blocked
|
|---|
| 15 | # Status: RO
|
|---|
| 16 | #
|
|---|
| 17 | # Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail
|
|---|
| 18 | # From: [email protected] (laura fairhead)
|
|---|
| 19 | # Newsgroups: comp.lang.awk
|
|---|
| 20 | # Subject: bug in gawk3.1.0 regex code
|
|---|
| 21 | # Date: Wed, 08 May 2002 23:31:40 GMT
|
|---|
| 22 | # Organization: that'll be the daewooo :)
|
|---|
| 23 | # Lines: 211
|
|---|
| 24 | # Message-ID: <[email protected]>
|
|---|
| 25 | # Reply-To: [email protected]
|
|---|
| 26 | # NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48)
|
|---|
| 27 | # X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286])
|
|---|
| 28 | # X-Newsreader: Forte Free Agent 1.21/32.243
|
|---|
| 29 | # Xref: dfw-artgen comp.lang.awk:13059
|
|---|
| 30 | #
|
|---|
| 31 | #
|
|---|
| 32 | # I believe I've just found a bug in gawk3.1.0 implementation of
|
|---|
| 33 | # extended regular expressions. It seems to be down to the alternation
|
|---|
| 34 | # operator; when using an end anchor '$' as a subexpression in an
|
|---|
| 35 | # alternation and the entire matched RE is a nul-string it fails
|
|---|
| 36 | # to match the end of string, for example;
|
|---|
| 37 | #
|
|---|
| 38 | # gsub(/$|2/,"x")
|
|---|
| 39 | # print
|
|---|
| 40 | #
|
|---|
| 41 | # input = 12345
|
|---|
| 42 | # expected output = 1x345x
|
|---|
| 43 | # actual output = 1x345
|
|---|
| 44 | #
|
|---|
| 45 | # The start anchor '^' always works as expected;
|
|---|
| 46 | #
|
|---|
| 47 | # gsub(/^|2/,"x")
|
|---|
| 48 | # print
|
|---|
| 49 | #
|
|---|
| 50 | # input = 12345
|
|---|
| 51 | # expected output = x1x345
|
|---|
| 52 | # actual output = x1x345
|
|---|
| 53 | #
|
|---|
| 54 | # This was with POSIX compliance enabled althought that doesn't
|
|---|
| 55 | # effect the result.
|
|---|
| 56 | #
|
|---|
| 57 | # I checked on gawk3.0.6 and got exactly the same results however
|
|---|
| 58 | # gawk2.15.6 gives the expected results.
|
|---|
| 59 | #
|
|---|
| 60 | # I'm about to post a bug report about this into gnu.utils.bug
|
|---|
| 61 | # but I thought I'd post it here first in case anyone has
|
|---|
| 62 | # any input/comments/whatever ....
|
|---|
| 63 | #
|
|---|
| 64 | # Complete test results were as follows;
|
|---|
| 65 | #
|
|---|
| 66 | # input 12345
|
|---|
| 67 | # output gsub(/regex/,"x",input)
|
|---|
| 68 | #
|
|---|
| 69 | # regex output
|
|---|
| 70 | # (^) x12345
|
|---|
| 71 | # ($) 12345x
|
|---|
| 72 | # (^)|($) x12345x
|
|---|
| 73 | # ($)|(^) x12345x
|
|---|
| 74 | # (2) 1x345
|
|---|
| 75 | # (^)|2 x1x345
|
|---|
| 76 | # 2|(^) x1x345
|
|---|
| 77 | # ($)|2 1x345
|
|---|
| 78 | # 2|($) 1x345
|
|---|
| 79 | # (2)|(^) x1x345
|
|---|
| 80 | # (^)|(2) x1x345
|
|---|
| 81 | # (2)|($) 1x345
|
|---|
| 82 | # ($)|(2) 1x345
|
|---|
| 83 | # .((2)|(^)) x345
|
|---|
| 84 | # .((^)|(2)) x345
|
|---|
| 85 | # .((2)|($)) x34x
|
|---|
| 86 | # .(($)|(2)) x34x
|
|---|
| 87 | # x{0}((2)|(^)) x1x345
|
|---|
| 88 | # x{0}((^)|(2)) x1x345
|
|---|
| 89 | # x{0}((2)|($)) 1x345
|
|---|
| 90 | # x{0}(($)|(2)) 1x345
|
|---|
| 91 | # x*((2)|(^)) x1x345
|
|---|
| 92 | # x*((^)|(2)) x1x345
|
|---|
| 93 | # x*((2)|($)) 1x345
|
|---|
| 94 | # x*(($)|(2)) 1x345
|
|---|
| 95 | #
|
|---|
| 96 | # Here's the test program I used, a few of the cases use ERE {n[,[m]]}
|
|---|
| 97 | # operators so that will have to be commented out or have a check
|
|---|
| 98 | # added or something (should have put a conditional in I know... ;-)
|
|---|
| 99 | #
|
|---|
| 100 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|---|
| 101 | #
|
|---|
| 102 | BEGIN{
|
|---|
| 103 |
|
|---|
| 104 | TESTSTR="12345"
|
|---|
| 105 |
|
|---|
| 106 | print "input "TESTSTR
|
|---|
| 107 | print "output gsub(/regex/,\"x\",input)"
|
|---|
| 108 | print ""
|
|---|
| 109 |
|
|---|
| 110 | print "regex output"
|
|---|
| 111 | $0=TESTSTR
|
|---|
| 112 | gsub(/(^)/,"x")
|
|---|
| 113 | print "(^) "$0
|
|---|
| 114 |
|
|---|
| 115 | $0=TESTSTR
|
|---|
| 116 | gsub(/($)/,"x")
|
|---|
| 117 | print "($) "$0
|
|---|
| 118 |
|
|---|
| 119 | $0=TESTSTR
|
|---|
| 120 | gsub(/(^)|($)/,"x")
|
|---|
| 121 | print "(^)|($) "$0
|
|---|
| 122 |
|
|---|
| 123 | $0=TESTSTR
|
|---|
| 124 | gsub(/($)|(^)/,"x")
|
|---|
| 125 | print "($)|(^) "$0
|
|---|
| 126 |
|
|---|
| 127 | $0=TESTSTR
|
|---|
| 128 | gsub(/2/,"x")
|
|---|
| 129 | print "(2) "$0
|
|---|
| 130 |
|
|---|
| 131 | $0=TESTSTR
|
|---|
| 132 | gsub(/(^)|2/,"x")
|
|---|
| 133 | print "(^)|2 "$0
|
|---|
| 134 |
|
|---|
| 135 | $0=TESTSTR
|
|---|
| 136 | gsub(/2|(^)/,"x")
|
|---|
| 137 | print "2|(^) "$0
|
|---|
| 138 |
|
|---|
| 139 | $0=TESTSTR
|
|---|
| 140 | gsub(/($)|2/,"x")
|
|---|
| 141 | print "($)|2 "$0
|
|---|
| 142 |
|
|---|
| 143 | $0=TESTSTR
|
|---|
| 144 | gsub(/2|($)/,"x")
|
|---|
| 145 | print "2|($) "$0
|
|---|
| 146 |
|
|---|
| 147 | $0=TESTSTR
|
|---|
| 148 | gsub(/(2)|(^)/,"x")
|
|---|
| 149 | print "(2)|(^) "$0
|
|---|
| 150 |
|
|---|
| 151 | $0=TESTSTR
|
|---|
| 152 | gsub(/(^)|(2)/,"x")
|
|---|
| 153 | print "(^)|(2) "$0
|
|---|
| 154 |
|
|---|
| 155 | $0=TESTSTR
|
|---|
| 156 | gsub(/(2)|($)/,"x")
|
|---|
| 157 | print "(2)|($) "$0
|
|---|
| 158 |
|
|---|
| 159 | $0=TESTSTR
|
|---|
| 160 | gsub(/($)|(2)/,"x")
|
|---|
| 161 | print "($)|(2) "$0
|
|---|
| 162 |
|
|---|
| 163 | $0=TESTSTR
|
|---|
| 164 | gsub(/.((2)|(^))/,"x")
|
|---|
| 165 | print ".((2)|(^)) "$0
|
|---|
| 166 |
|
|---|
| 167 | $0=TESTSTR
|
|---|
| 168 | gsub(/.((^)|(2))/,"x")
|
|---|
| 169 | print ".((^)|(2)) "$0
|
|---|
| 170 |
|
|---|
| 171 | $0=TESTSTR
|
|---|
| 172 | gsub(/.((2)|($))/,"x")
|
|---|
| 173 | print ".((2)|($)) "$0
|
|---|
| 174 |
|
|---|
| 175 | $0=TESTSTR
|
|---|
| 176 | gsub(/.(($)|(2))/,"x")
|
|---|
| 177 | print ".(($)|(2)) "$0
|
|---|
| 178 |
|
|---|
| 179 | $0=TESTSTR
|
|---|
| 180 | gsub(/x{0}((2)|(^))/,"x")
|
|---|
| 181 | print "x{0}((2)|(^)) "$0
|
|---|
| 182 |
|
|---|
| 183 | $0=TESTSTR
|
|---|
| 184 | gsub(/x{0}((^)|(2))/,"x")
|
|---|
| 185 | print "x{0}((^)|(2)) "$0
|
|---|
| 186 |
|
|---|
| 187 | $0=TESTSTR
|
|---|
| 188 | gsub(/x{0}((2)|($))/,"x")
|
|---|
| 189 | print "x{0}((2)|($)) "$0
|
|---|
| 190 |
|
|---|
| 191 | $0=TESTSTR
|
|---|
| 192 | gsub(/x{0}(($)|(2))/,"x")
|
|---|
| 193 | print "x{0}(($)|(2)) "$0
|
|---|
| 194 |
|
|---|
| 195 | $0=TESTSTR
|
|---|
| 196 | gsub(/x*((2)|(^))/,"x")
|
|---|
| 197 | print "x*((2)|(^)) "$0
|
|---|
| 198 |
|
|---|
| 199 | $0=TESTSTR
|
|---|
| 200 | gsub(/x*((^)|(2))/,"x")
|
|---|
| 201 | print "x*((^)|(2)) "$0
|
|---|
| 202 |
|
|---|
| 203 | $0=TESTSTR
|
|---|
| 204 | gsub(/x*((2)|($))/,"x")
|
|---|
| 205 | print "x*((2)|($)) "$0
|
|---|
| 206 |
|
|---|
| 207 | $0=TESTSTR
|
|---|
| 208 | gsub(/x*(($)|(2))/,"x")
|
|---|
| 209 | print "x*(($)|(2)) "$0
|
|---|
| 210 |
|
|---|
| 211 | $0=TESTSTR
|
|---|
| 212 | gsub(/x{0}^/,"x")
|
|---|
| 213 | print "x{0}^ "$0
|
|---|
| 214 |
|
|---|
| 215 | $0=TESTSTR
|
|---|
| 216 | gsub(/x{0}$/,"x")
|
|---|
| 217 | print "x{0}$ "$0
|
|---|
| 218 |
|
|---|
| 219 | $0=TESTSTR
|
|---|
| 220 | gsub(/(x{0}^)|2/,"x")
|
|---|
| 221 | print "(x{0}^)|2 "$0
|
|---|
| 222 |
|
|---|
| 223 | $0=TESTSTR
|
|---|
| 224 | gsub(/(x{0}$)|2/,"x")
|
|---|
| 225 | print "(x{0}$)|2 "$0
|
|---|
| 226 |
|
|---|
| 227 |
|
|---|
| 228 | }
|
|---|
| 229 | #
|
|---|
| 230 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|---|
| 231 | #
|
|---|
| 232 | # byefrom
|
|---|
| 233 | #
|
|---|
| 234 | # --
|
|---|
| 235 | # laura fairhead # [email protected] http://lf.8k.com
|
|---|
| 236 | # # if you are bored crack my sig.
|
|---|
| 237 | # 1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A
|
|---|
| 238 | # EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E
|
|---|
| 239 | # 630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA
|
|---|
| 240 | # 8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978
|
|---|
| 241 | # 80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492
|
|---|
| 242 | #
|
|---|