source: vendor/gawk/3.1.5/awklib/eg/prog/uniq.awk@ 3076

Last change on this file since 3076 was 3076, checked in by bird, 19 years ago

gawk 3.1.5

File size: 2.6 KB
Line 
1# uniq.awk --- do uniq in awk
2#
3# Requires getopt and join library functions
4#
5# Arnold Robbins, [email protected], Public Domain
6# May 1993
7
8function usage( e)
9{
10 e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
11 print e > "/dev/stderr"
12 exit 1
13}
14
15# -c count lines. overrides -d and -u
16# -d only repeated lines
17# -u only non-repeated lines
18# -n skip n fields
19# +n skip n characters, skip fields first
20
21BEGIN \
22{
23 count = 1
24 outputfile = "/dev/stdout"
25 opts = "udc0:1:2:3:4:5:6:7:8:9:"
26 while ((c = getopt(ARGC, ARGV, opts)) != -1) {
27 if (c == "u")
28 non_repeated_only++
29 else if (c == "d")
30 repeated_only++
31 else if (c == "c")
32 do_count++
33 else if (index("0123456789", c) != 0) {
34 # getopt requires args to options
35 # this messes us up for things like -5
36 if (Optarg ~ /^[0-9]+$/)
37 fcount = (c Optarg) + 0
38 else {
39 fcount = c + 0
40 Optind--
41 }
42 } else
43 usage()
44 }
45
46 if (ARGV[Optind] ~ /^\+[0-9]+$/) {
47 charcount = substr(ARGV[Optind], 2) + 0
48 Optind++
49 }
50
51 for (i = 1; i < Optind; i++)
52 ARGV[i] = ""
53
54 if (repeated_only == 0 && non_repeated_only == 0)
55 repeated_only = non_repeated_only = 1
56
57 if (ARGC - Optind == 2) {
58 outputfile = ARGV[ARGC - 1]
59 ARGV[ARGC - 1] = ""
60 }
61}
62function are_equal( n, m, clast, cline, alast, aline)
63{
64 if (fcount == 0 && charcount == 0)
65 return (last == $0)
66
67 if (fcount > 0) {
68 n = split(last, alast)
69 m = split($0, aline)
70 clast = join(alast, fcount+1, n)
71 cline = join(aline, fcount+1, m)
72 } else {
73 clast = last
74 cline = $0
75 }
76 if (charcount) {
77 clast = substr(clast, charcount + 1)
78 cline = substr(cline, charcount + 1)
79 }
80
81 return (clast == cline)
82}
83NR == 1 {
84 last = $0
85 next
86}
87
88{
89 equal = are_equal()
90
91 if (do_count) { # overrides -d and -u
92 if (equal)
93 count++
94 else {
95 printf("%4d %s\n", count, last) > outputfile
96 last = $0
97 count = 1 # reset
98 }
99 next
100 }
101
102 if (equal)
103 count++
104 else {
105 if ((repeated_only && count > 1) ||
106 (non_repeated_only && count == 1))
107 print last > outputfile
108 last = $0
109 count = 1
110 }
111}
112
113END {
114 if (do_count)
115 printf("%4d %s\n", count, last) > outputfile
116 else if ((repeated_only && count > 1) ||
117 (non_repeated_only && count == 1))
118 print last > outputfile
119}
Note: See TracBrowser for help on using the repository browser.