Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

fnmatch_loop.c@ 3342

Visit:

Last change on this file since 3342 was 3342, checked in by bird, 19 years ago
tar 1.16.1
File size: 28.8 KB

Line
1	/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
2	Free Software Foundation, Inc.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published by
6	the Free Software Foundation; either version 2, or (at your option)
7	any later version.
8
9	This program is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License
15	along with this program; if not, write to the Free Software Foundation,
16	Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18	/* Match STRING against the file name pattern PATTERN, returning zero if
19	it matches, nonzero if not. */
20	static int EXT (INT opt, const CHAR pattern, const CHAR string,
21	const CHAR *string_end, bool no_leading_period, int flags)
22	internal_function;
23	static const CHAR END (const CHAR patternp) internal_function;
24
25	static int
26	internal_function
27	FCT (const CHAR pattern, const CHAR string, const CHAR *string_end,
28	bool no_leading_period, int flags)
29	{
30	register const CHAR p = pattern, n = string;
31	register UCHAR c;
32	#ifdef _LIBC
33	# if WIDE_CHAR_VERSION
34	const char collseq = (const char )
35	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
36	# else
37	const UCHAR collseq = (const UCHAR )
38	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
39	# endif
40	#endif
41
42	while ((c = *p++) != L_('\0'))
43	{
44	bool new_no_leading_period = false;
45	c = FOLD (c);
46
47	switch (c)
48	{
49	case L_('?'):
50	if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
51	{
52	int res;
53
54	res = EXT (c, p, n, string_end, no_leading_period,
55	flags);
56	if (res != -1)
57	return res;
58	}
59
60	if (n == string_end)
61	return FNM_NOMATCH;
62	else if (*n == L_('/') && (flags & FNM_FILE_NAME))
63	return FNM_NOMATCH;
64	else if (*n == L_('.') && no_leading_period)
65	return FNM_NOMATCH;
66	break;
67
68	case L_('\\'):
69	if (!(flags & FNM_NOESCAPE))
70	{
71	c = *p++;
72	if (c == L_('\0'))
73	/* Trailing \ loses. */
74	return FNM_NOMATCH;
75	c = FOLD (c);
76	}
77	if (n == string_end \|\| FOLD ((UCHAR) *n) != c)
78	return FNM_NOMATCH;
79	break;
80
81	case L_('*'):
82	if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
83	{
84	int res;
85
86	res = EXT (c, p, n, string_end, no_leading_period,
87	flags);
88	if (res != -1)
89	return res;
90	}
91
92	if (n != string_end && *n == L_('.') && no_leading_period)
93	return FNM_NOMATCH;
94
95	for (c = p++; c == L_('?') \|\| c == L_(''); c = *p++)
96	{
97	if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
98	{
99	const CHAR *endp = END (p);
100	if (endp != p)
101	{
102	/* This is a pattern. Skip over it. */
103	p = endp;
104	continue;
105	}
106	}
107
108	if (c == L_('?'))
109	{
110	/* A ? needs to match one character. */
111	if (n == string_end)
112	/* There isn't another character; no match. */
113	return FNM_NOMATCH;
114	else if (*n == L_('/')
115	&& __builtin_expect (flags & FNM_FILE_NAME, 0))
116	/* A slash does not match a wildcard under
117	FNM_FILE_NAME. */
118	return FNM_NOMATCH;
119	else
120	/* One character of the string is consumed in matching
121	this ? wildcard, so *??? won't match if there are
122	less than three characters. */
123	++n;
124	}
125	}
126
127	if (c == L_('\0'))
128	/* The wildcard(s) is/are the last element of the pattern.
129	If the name is a file name and contains another slash
130	this means it cannot match, unless the FNM_LEADING_DIR
131	flag is set. */
132	{
133	int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
134
135	if (flags & FNM_FILE_NAME)
136	{
137	if (flags & FNM_LEADING_DIR)
138	result = 0;
139	else
140	{
141	if (MEMCHR (n, L_('/'), string_end - n) == NULL)
142	result = 0;
143	}
144	}
145
146	return result;
147	}
148	else
149	{
150	const CHAR *endp;
151
152	endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
153	string_end - n);
154	if (endp == NULL)
155	endp = string_end;
156
157	if (c == L_('[')
158	\|\| (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159	&& (c == L_('@') \|\| c == L_('+') \|\| c == L_('!'))
160	&& *p == L_('(')))
161	{
162	int flags2 = ((flags & FNM_FILE_NAME)
163	? flags : (flags & ~FNM_PERIOD));
164	bool no_leading_period2 = no_leading_period;
165
166	for (--p; n < endp; ++n, no_leading_period2 = false)
167	if (FCT (p, n, string_end, no_leading_period2, flags2)
168	== 0)
169	return 0;
170	}
171	else if (c == L_('/') && (flags & FNM_FILE_NAME))
172	{
173	while (n < string_end && *n != L_('/'))
174	++n;
175	if (n < string_end && *n == L_('/')
176	&& (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
177	== 0))
178	return 0;
179	}
180	else
181	{
182	int flags2 = ((flags & FNM_FILE_NAME)
183	? flags : (flags & ~FNM_PERIOD));
184	int no_leading_period2 = no_leading_period;
185
186	if (c == L_('\\') && !(flags & FNM_NOESCAPE))
187	c = *p;
188	c = FOLD (c);
189	for (--p; n < endp; ++n, no_leading_period2 = false)
190	if (FOLD ((UCHAR) *n) == c
191	&& (FCT (p, n, string_end, no_leading_period2, flags2)
192	== 0))
193	return 0;
194	}
195	}
196
197	/* If we come here no match is possible with the wildcard. */
198	return FNM_NOMATCH;
199
200	case L_('['):
201	{
202	/* Nonzero if the sense of the character class is inverted. */
203	register bool not;
204	CHAR cold;
205	UCHAR fn;
206
207	if (posixly_correct == 0)
208	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
209
210	if (n == string_end)
211	return FNM_NOMATCH;
212
213	if (*n == L_('.') && no_leading_period)
214	return FNM_NOMATCH;
215
216	if (*n == L_('/') && (flags & FNM_FILE_NAME))
217	/* `/' cannot be matched. */
218	return FNM_NOMATCH;
219
220	not = (p == L_('!') \|\| (posixly_correct < 0 && p == L_('^')));
221	if (not)
222	++p;
223
224	fn = FOLD ((UCHAR) *n);
225
226	c = *p++;
227	for (;;)
228	{
229	if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
230	{
231	if (*p == L_('\0'))
232	return FNM_NOMATCH;
233	c = FOLD ((UCHAR) *p);
234	++p;
235
236	if (c == fn)
237	goto matched;
238	}
239	else if (c == L_('[') && *p == L_(':'))
240	{
241	/* Leave room for the null. */
242	CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
243	size_t c1 = 0;
244	#if defined _LIBC \|\| WIDE_CHAR_SUPPORT
245	wctype_t wt;
246	#endif
247	const CHAR *startp = p;
248
249	for (;;)
250	{
251	if (c1 == CHAR_CLASS_MAX_LENGTH)
252	/* The name is too long and therefore the pattern
253	is ill-formed. */
254	return FNM_NOMATCH;
255
256	c = *++p;
257	if (c == L_(':') && p[1] == L_(']'))
258	{
259	p += 2;
260	break;
261	}
262	if (c < L_('a') \|\| c >= L_('z'))
263	{
264	/* This cannot possibly be a character class name.
265	Match it as a normal range. */
266	p = startp;
267	c = L_('[');
268	goto normal_bracket;
269	}
270	str[c1++] = c;
271	}
272	str[c1] = L_('\0');
273
274	#if defined _LIBC \|\| WIDE_CHAR_SUPPORT
275	wt = IS_CHAR_CLASS (str);
276	if (wt == 0)
277	/* Invalid character class name. */
278	return FNM_NOMATCH;
279
280	# if defined _LIBC && ! WIDE_CHAR_VERSION
281	/* The following code is glibc specific but does
282	there a good job in speeding up the code since
283	we can avoid the btowc() call. */
284	if (_ISCTYPE ((UCHAR) *n, wt))
285	goto matched;
286	# else
287	if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
288	goto matched;
289	# endif
290	#else
291	if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292	\|\| (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293	\|\| (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294	\|\| (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295	\|\| (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296	\|\| (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297	\|\| (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298	\|\| (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299	\|\| (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300	\|\| (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301	\|\| (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302	\|\| (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
303	goto matched;
304	#endif
305	c = *p++;
306	}
307	#ifdef _LIBC
308	else if (c == L_('[') && *p == L_('='))
309	{
310	UCHAR str[1];
311	uint32_t nrules =
312	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313	const CHAR *startp = p;
314
315	c = *++p;
316	if (c == L_('\0'))
317	{
318	p = startp;
319	c = L_('[');
320	goto normal_bracket;
321	}
322	str[0] = c;
323
324	c = *++p;
325	if (c != L_('=') \|\| p[1] != L_(']'))
326	{
327	p = startp;
328	c = L_('[');
329	goto normal_bracket;
330	}
331	p += 2;
332
333	if (nrules == 0)
334	{
335	if ((UCHAR) *n == str[0])
336	goto matched;
337	}
338	else
339	{
340	const int32_t *table;
341	# if WIDE_CHAR_VERSION
342	const int32_t *weights;
343	const int32_t *extra;
344	# else
345	const unsigned char *weights;
346	const unsigned char *extra;
347	# endif
348	const int32_t *indirect;
349	int32_t idx;
350	const UCHAR cp = (const UCHAR ) str;
351
352	/* This #include defines a local function! */
353	# if WIDE_CHAR_VERSION
354	# include <locale/weightwc.h>
355	# else
356	# include <locale/weight.h>
357	# endif
358
359	# if WIDE_CHAR_VERSION
360	table = (const int32_t *)
361	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362	weights = (const int32_t *)
363	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364	extra = (const int32_t *)
365	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366	indirect = (const int32_t *)
367	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
368	# else
369	table = (const int32_t *)
370	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371	weights = (const unsigned char *)
372	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373	extra = (const unsigned char *)
374	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375	indirect = (const int32_t *)
376	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
377	# endif
378
379	idx = findidx (&cp);
380	if (idx != 0)
381	{
382	/* We found a table entry. Now see whether the
383	character we are currently at has the same
384	equivalance class value. */
385	int len = weights[idx];
386	int32_t idx2;
387	const UCHAR np = (const UCHAR ) n;
388
389	idx2 = findidx (&np);
390	if (idx2 != 0 && len == weights[idx2])
391	{
392	int cnt = 0;
393
394	while (cnt < len
395	&& (weights[idx + 1 + cnt]
396	== weights[idx2 + 1 + cnt]))
397	++cnt;
398
399	if (cnt == len)
400	goto matched;
401	}
402	}
403	}
404
405	c = *p++;
406	}
407	#endif
408	else if (c == L_('\0'))
409	/* [ (unterminated) loses. */
410	return FNM_NOMATCH;
411	else
412	{
413	bool is_range = false;
414
415	#ifdef _LIBC
416	bool is_seqval = false;
417
418	if (c == L_('[') && *p == L_('.'))
419	{
420	uint32_t nrules =
421	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422	const CHAR *startp = p;
423	size_t c1 = 0;
424
425	while (1)
426	{
427	c = *++p;
428	if (c == L_('.') && p[1] == L_(']'))
429	{
430	p += 2;
431	break;
432	}
433	if (c == '\0')
434	return FNM_NOMATCH;
435	++c1;
436	}
437
438	/* We have to handling the symbols differently in
439	ranges since then the collation sequence is
440	important. */
441	is_range = *p == L_('-') && p[1] != L_('\0');
442
443	if (nrules == 0)
444	{
445	/* There are no names defined in the collation
446	data. Therefore we only accept the trivial
447	names consisting of the character itself. */
448	if (c1 != 1)
449	return FNM_NOMATCH;
450
451	if (!is_range && *n == startp[1])
452	goto matched;
453
454	cold = startp[1];
455	c = *p++;
456	}
457	else
458	{
459	int32_t table_size;
460	const int32_t *symb_table;
461	# ifdef WIDE_CHAR_VERSION
462	char str[c1];
463	size_t strcnt;
464	# else
465	# define str (startp + 1)
466	# endif
467	const unsigned char *extra;
468	int32_t idx;
469	int32_t elem;
470	int32_t second;
471	int32_t hash;
472
473	# ifdef WIDE_CHAR_VERSION
474	/* We have to convert the name to a single-byte
475	string. This is possible since the names
476	consist of ASCII characters and the internal
477	representation is UCS4. */
478	for (strcnt = 0; strcnt < c1; ++strcnt)
479	str[strcnt] = startp[1 + strcnt];
480	# endif
481
482	table_size =
483	_NL_CURRENT_WORD (LC_COLLATE,
484	_NL_COLLATE_SYMB_HASH_SIZEMB);
485	symb_table = (const int32_t *)
486	_NL_CURRENT (LC_COLLATE,
487	_NL_COLLATE_SYMB_TABLEMB);
488	extra = (const unsigned char *)
489	_NL_CURRENT (LC_COLLATE,
490	_NL_COLLATE_SYMB_EXTRAMB);
491
492	/* Locate the character in the hashing table. */
493	hash = elem_hash (str, c1);
494
495	idx = 0;
496	elem = hash % table_size;
497	second = hash % (table_size - 2);
498	while (symb_table[2 * elem] != 0)
499	{
500	/* First compare the hashing value. */
501	if (symb_table[2 * elem] == hash
502	&& c1 == extra[symb_table[2 * elem + 1]]
503	&& memcmp (str,
504	&extra[symb_table[2 * elem + 1]
505	+ 1], c1) == 0)
506	{
507	/* Yep, this is the entry. */
508	idx = symb_table[2 * elem + 1];
509	idx += 1 + extra[idx];
510	break;
511	}
512
513	/* Next entry. */
514	elem += second;
515	}
516
517	if (symb_table[2 * elem] != 0)
518	{
519	/* Compare the byte sequence but only if
520	this is not part of a range. */
521	# ifdef WIDE_CHAR_VERSION
522	int32_t *wextra;
523
524	idx += 1 + extra[idx];
525	/* Adjust for the alignment. */
526	idx = (idx + 3) & ~3;
527
528	wextra = (int32_t *) &extra[idx + 4];
529	# endif
530
531	if (! is_range)
532	{
533	# ifdef WIDE_CHAR_VERSION
534	for (c1 = 0;
535	(int32_t) c1 < wextra[idx];
536	++c1)
537	if (n[c1] != wextra[1 + c1])
538	break;
539
540	if ((int32_t) c1 == wextra[idx])
541	goto matched;
542	# else
543	for (c1 = 0; c1 < extra[idx]; ++c1)
544	if (n[c1] != extra[1 + c1])
545	break;
546
547	if (c1 == extra[idx])
548	goto matched;
549	# endif
550	}
551
552	/* Get the collation sequence value. */
553	is_seqval = true;
554	# ifdef WIDE_CHAR_VERSION
555	cold = wextra[1 + wextra[idx]];
556	# else
557	/* Adjust for the alignment. */
558	idx += 1 + extra[idx];
559	idx = (idx + 3) & ~4;
560	cold = ((int32_t ) &extra[idx]);
561	# endif
562
563	c = *p++;
564	}
565	else if (c1 == 1)
566	{
567	/* No valid character. Match it as a
568	single byte. */
569	if (!is_range && *n == str[0])
570	goto matched;
571
572	cold = str[0];
573	c = *p++;
574	}
575	else
576	return FNM_NOMATCH;
577	}
578	}
579	else
580	# undef str
581	#endif
582	{
583	c = FOLD (c);
584	normal_bracket:
585
586	/* We have to handling the symbols differently in
587	ranges since then the collation sequence is
588	important. */
589	is_range = (*p == L_('-') && p[1] != L_('\0')
590	&& p[1] != L_(']'));
591
592	if (!is_range && c == fn)
593	goto matched;
594
595	cold = c;
596	c = *p++;
597	}
598
599	if (c == L_('-') && *p != L_(']'))
600	{
601	#if _LIBC
602	/* We have to find the collation sequence
603	value for C. Collation sequence is nothing
604	we can regularly access. The sequence
605	value is defined by the order in which the
606	definitions of the collation values for the
607	various characters appear in the source
608	file. A strange concept, nowhere
609	documented. */
610	uint32_t fcollseq;
611	uint32_t lcollseq;
612	UCHAR cend = *p++;
613
614	# ifdef WIDE_CHAR_VERSION
615	/* Search in the `names' array for the characters. */
616	fcollseq = __collseq_table_lookup (collseq, fn);
617	if (fcollseq == ~((uint32_t) 0))
618	/* XXX We don't know anything about the character
619	we are supposed to match. This means we are
620	failing. */
621	goto range_not_matched;
622
623	if (is_seqval)
624	lcollseq = cold;
625	else
626	lcollseq = __collseq_table_lookup (collseq, cold);
627	# else
628	fcollseq = collseq[fn];
629	lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
630	# endif
631
632	is_seqval = false;
633	if (cend == L_('[') && *p == L_('.'))
634	{
635	uint32_t nrules =
636	_NL_CURRENT_WORD (LC_COLLATE,
637	_NL_COLLATE_NRULES);
638	const CHAR *startp = p;
639	size_t c1 = 0;
640
641	while (1)
642	{
643	c = *++p;
644	if (c == L_('.') && p[1] == L_(']'))
645	{
646	p += 2;
647	break;
648	}
649	if (c == '\0')
650	return FNM_NOMATCH;
651	++c1;
652	}
653
654	if (nrules == 0)
655	{
656	/* There are no names defined in the
657	collation data. Therefore we only
658	accept the trivial names consisting
659	of the character itself. */
660	if (c1 != 1)
661	return FNM_NOMATCH;
662
663	cend = startp[1];
664	}
665	else
666	{
667	int32_t table_size;
668	const int32_t *symb_table;
669	# ifdef WIDE_CHAR_VERSION
670	char str[c1];
671	size_t strcnt;
672	# else
673	# define str (startp + 1)
674	# endif
675	const unsigned char *extra;
676	int32_t idx;
677	int32_t elem;
678	int32_t second;
679	int32_t hash;
680
681	# ifdef WIDE_CHAR_VERSION
682	/* We have to convert the name to a single-byte
683	string. This is possible since the names
684	consist of ASCII characters and the internal
685	representation is UCS4. */
686	for (strcnt = 0; strcnt < c1; ++strcnt)
687	str[strcnt] = startp[1 + strcnt];
688	# endif
689
690	table_size =
691	_NL_CURRENT_WORD (LC_COLLATE,
692	_NL_COLLATE_SYMB_HASH_SIZEMB);
693	symb_table = (const int32_t *)
694	_NL_CURRENT (LC_COLLATE,
695	_NL_COLLATE_SYMB_TABLEMB);
696	extra = (const unsigned char *)
697	_NL_CURRENT (LC_COLLATE,
698	_NL_COLLATE_SYMB_EXTRAMB);
699
700	/* Locate the character in the hashing
701	table. */
702	hash = elem_hash (str, c1);
703
704	idx = 0;
705	elem = hash % table_size;
706	second = hash % (table_size - 2);
707	while (symb_table[2 * elem] != 0)
708	{
709	/* First compare the hashing value. */
710	if (symb_table[2 * elem] == hash
711	&& (c1
712	== extra[symb_table[2 * elem + 1]])
713	&& memcmp (str,
714	&extra[symb_table[2 * elem + 1]
715	+ 1], c1) == 0)
716	{
717	/* Yep, this is the entry. */
718	idx = symb_table[2 * elem + 1];
719	idx += 1 + extra[idx];
720	break;
721	}
722
723	/* Next entry. */
724	elem += second;
725	}
726
727	if (symb_table[2 * elem] != 0)
728	{
729	/* Compare the byte sequence but only if
730	this is not part of a range. */
731	# ifdef WIDE_CHAR_VERSION
732	int32_t *wextra;
733
734	idx += 1 + extra[idx];
735	/* Adjust for the alignment. */
736	idx = (idx + 3) & ~4;
737
738	wextra = (int32_t *) &extra[idx + 4];
739	# endif
740	/* Get the collation sequence value. */
741	is_seqval = true;
742	# ifdef WIDE_CHAR_VERSION
743	cend = wextra[1 + wextra[idx]];
744	# else
745	/* Adjust for the alignment. */
746	idx += 1 + extra[idx];
747	idx = (idx + 3) & ~4;
748	cend = ((int32_t ) &extra[idx]);
749	# endif
750	}
751	else if (symb_table[2 * elem] != 0 && c1 == 1)
752	{
753	cend = str[0];
754	c = *p++;
755	}
756	else
757	return FNM_NOMATCH;
758	}
759	# undef str
760	}
761	else
762	{
763	if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
764	cend = *p++;
765	if (cend == L_('\0'))
766	return FNM_NOMATCH;
767	cend = FOLD (cend);
768	}
769
770	/* XXX It is not entirely clear to me how to handle
771	characters which are not mentioned in the
772	collation specification. */
773	if (
774	# ifdef WIDE_CHAR_VERSION
775	lcollseq == 0xffffffff \|\|
776	# endif
777	lcollseq <= fcollseq)
778	{
779	/* We have to look at the upper bound. */
780	uint32_t hcollseq;
781
782	if (is_seqval)
783	hcollseq = cend;
784	else
785	{
786	# ifdef WIDE_CHAR_VERSION
787	hcollseq =
788	__collseq_table_lookup (collseq, cend);
789	if (hcollseq == ~((uint32_t) 0))
790	{
791	/* Hum, no information about the upper
792	bound. The matching succeeds if the
793	lower bound is matched exactly. */
794	if (lcollseq != fcollseq)
795	goto range_not_matched;
796
797	goto matched;
798	}
799	# else
800	hcollseq = collseq[cend];
801	# endif
802	}
803
804	if (lcollseq <= hcollseq && fcollseq <= hcollseq)
805	goto matched;
806	}
807	# ifdef WIDE_CHAR_VERSION
808	range_not_matched:
809	# endif
810	#else
811	/* We use a boring value comparison of the character
812	values. This is better than comparing using
813	`strcoll' since the latter would have surprising
814	and sometimes fatal consequences. */
815	UCHAR cend = *p++;
816
817	if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
818	cend = *p++;
819	if (cend == L_('\0'))
820	return FNM_NOMATCH;
821
822	/* It is a range. */
823	if (cold <= fn && fn <= cend)
824	goto matched;
825	#endif
826
827	c = *p++;
828	}
829	}
830
831	if (c == L_(']'))
832	break;
833	}
834
835	if (!not)
836	return FNM_NOMATCH;
837	break;
838
839	matched:
840	/* Skip the rest of the [...] that already matched. */
841	do
842	{
843	ignore_next:
844	c = *p++;
845
846	if (c == L_('\0'))
847	/* [... (unterminated) loses. */
848	return FNM_NOMATCH;
849
850	if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
851	{
852	if (*p == L_('\0'))
853	return FNM_NOMATCH;
854	/* XXX 1003.2d11 is unclear if this is right. */
855	++p;
856	}
857	else if (c == L_('[') && *p == L_(':'))
858	{
859	int c1 = 0;
860	const CHAR *startp = p;
861
862	while (1)
863	{
864	c = *++p;
865	if (++c1 == CHAR_CLASS_MAX_LENGTH)
866	return FNM_NOMATCH;
867
868	if (*p == L_(':') && p[1] == L_(']'))
869	break;
870
871	if (c < L_('a') \|\| c >= L_('z'))
872	{
873	p = startp;
874	goto ignore_next;
875	}
876	}
877	p += 2;
878	c = *p++;
879	}
880	else if (c == L_('[') && *p == L_('='))
881	{
882	c = *++p;
883	if (c == L_('\0'))
884	return FNM_NOMATCH;
885	c = *++p;
886	if (c != L_('=') \|\| p[1] != L_(']'))
887	return FNM_NOMATCH;
888	p += 2;
889	c = *p++;
890	}
891	else if (c == L_('[') && *p == L_('.'))
892	{
893	++p;
894	while (1)
895	{
896	c = *++p;
897	if (c == '\0')
898	return FNM_NOMATCH;
899
900	if (*p == L_('.') && p[1] == L_(']'))
901	break;
902	}
903	p += 2;
904	c = *p++;
905	}
906	}
907	while (c != L_(']'));
908	if (not)
909	return FNM_NOMATCH;
910	}
911	break;
912
913	case L_('+'):
914	case L_('@'):
915	case L_('!'):
916	if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
917	{
918	int res;
919
920	res = EXT (c, p, n, string_end, no_leading_period, flags);
921	if (res != -1)
922	return res;
923	}
924	goto normal_match;
925
926	case L_('/'):
927	if (NO_LEADING_PERIOD (flags))
928	{
929	if (n == string_end \|\| c != (UCHAR) *n)
930	return FNM_NOMATCH;
931
932	new_no_leading_period = true;
933	break;
934	}
935	/* FALLTHROUGH */
936	default:
937	normal_match:
938	if (n == string_end \|\| c != FOLD ((UCHAR) *n))
939	return FNM_NOMATCH;
940	}
941
942	no_leading_period = new_no_leading_period;
943	++n;
944	}
945
946	if (n == string_end)
947	return 0;
948
949	if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
950	/* The FNM_LEADING_DIR flag says that "foo" matches "foobar/frobozz". /
951	return 0;
952
953	return FNM_NOMATCH;
954	}
955
956
957	static const CHAR *
958	internal_function
959	END (const CHAR *pattern)
960	{
961	const CHAR *p = pattern;
962
963	while (1)
964	if (*++p == L_('\0'))
965	/* This is an invalid pattern. */
966	return pattern;
967	else if (*p == L_('['))
968	{
969	/* Handle brackets special. */
970	if (posixly_correct == 0)
971	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
972
973	/* Skip the not sign. We have to recognize it because of a possibly
974	following ']'. */
975	if (++p == L_('!') \|\| (posixly_correct < 0 && p == L_('^')))
976	++p;
977	/* A leading ']' is recognized as such. */
978	if (*p == L_(']'))
979	++p;
980	/* Skip over all characters of the list. */
981	while (*p != L_(']'))
982	if (*p++ == L_('\0'))
983	/* This is no valid pattern. */
984	return pattern;
985	}
986	else if ((p == L_('?') \|\| p == L_('') \|\| p == L_('+') \|\| *p == L_('@')
987	\|\| *p == L_('!')) && p[1] == L_('('))
988	p = END (p + 1);
989	else if (*p == L_(')'))
990	break;
991
992	return p + 1;
993	}
994
995
996	static int
997	internal_function
998	EXT (INT opt, const CHAR pattern, const CHAR string, const CHAR *string_end,
999	bool no_leading_period, int flags)
1000	{
1001	const CHAR *startp;
1002	size_t level;
1003	struct patternlist
1004	{
1005	struct patternlist *next;
1006	CHAR str[1];
1007	} *list = NULL;
1008	struct patternlist **lastp = &list;
1009	size_t pattern_len = STRLEN (pattern);
1010	const CHAR *p;
1011	const CHAR *rs;
1012	enum { ALLOCA_LIMIT = 8000 };
1013
1014	/* Parse the pattern. Store the individual parts in the list. */
1015	level = 0;
1016	for (startp = p = pattern + 1; ; ++p)
1017	if (*p == L_('\0'))
1018	/* This is an invalid pattern. */
1019	return -1;
1020	else if (*p == L_('['))
1021	{
1022	/* Handle brackets special. */
1023	if (posixly_correct == 0)
1024	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1025
1026	/* Skip the not sign. We have to recognize it because of a possibly
1027	following ']'. */
1028	if (++p == L_('!') \|\| (posixly_correct < 0 && p == L_('^')))
1029	++p;
1030	/* A leading ']' is recognized as such. */
1031	if (*p == L_(']'))
1032	++p;
1033	/* Skip over all characters of the list. */
1034	while (*p != L_(']'))
1035	if (*p++ == L_('\0'))
1036	/* This is no valid pattern. */
1037	return -1;
1038	}
1039	else if ((p == L_('?') \|\| p == L_('') \|\| p == L_('+') \|\| *p == L_('@')
1040	\|\| *p == L_('!')) && p[1] == L_('('))
1041	/* Remember the nesting level. */
1042	++level;
1043	else if (*p == L_(')'))
1044	{
1045	if (level-- == 0)
1046	{
1047	/* This means we found the end of the pattern. */
1048	#define NEW_PATTERN \
1049	struct patternlist *newp; \
1050	size_t plen; \
1051	size_t plensize; \
1052	size_t newpsize; \
1053	\
1054	plen = (opt == L_('?') \|\| opt == L_('@') \
1055	? pattern_len \
1056	: p - startp + 1); \
1057	plensize = plen * sizeof (CHAR); \
1058	newpsize = offsetof (struct patternlist, str) + plensize; \
1059	if ((size_t) -1 / sizeof (CHAR) < plen \
1060	\|\| newpsize < offsetof (struct patternlist, str) \
1061	\|\| ALLOCA_LIMIT <= newpsize) \
1062	return -1; \
1063	newp = (struct patternlist *) alloca (newpsize); \
1064	((CHAR ) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1065	newp->next = NULL; \
1066	*lastp = newp; \
1067	lastp = &newp->next
1068	NEW_PATTERN;
1069	break;
1070	}
1071	}
1072	else if (*p == L_('\|'))
1073	{
1074	if (level == 0)
1075	{
1076	NEW_PATTERN;
1077	startp = p + 1;
1078	}
1079	}
1080	assert (list != NULL);
1081	assert (p[-1] == L_(')'));
1082	#undef NEW_PATTERN
1083
1084	switch (opt)
1085	{
1086	case L_('*'):
1087	if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1088	return 0;
1089	/* FALLTHROUGH */
1090
1091	case L_('+'):
1092	do
1093	{
1094	for (rs = string; rs <= string_end; ++rs)
1095	/* First match the prefix with the current pattern with the
1096	current pattern. */
1097	if (FCT (list->str, string, rs, no_leading_period,
1098	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1099	/* This was successful. Now match the rest with the rest
1100	of the pattern. */
1101	&& (FCT (p, rs, string_end,
1102	rs == string
1103	? no_leading_period
1104	: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1105	flags & FNM_FILE_NAME
1106	? flags : flags & ~FNM_PERIOD) == 0
1107	/* This didn't work. Try the whole pattern. */
1108	\|\| (rs != string
1109	&& FCT (pattern - 1, rs, string_end,
1110	rs == string
1111	? no_leading_period
1112	: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1113	flags & FNM_FILE_NAME
1114	? flags : flags & ~FNM_PERIOD) == 0)))
1115	/* It worked. Signal success. */
1116	return 0;
1117	}
1118	while ((list = list->next) != NULL);
1119
1120	/* None of the patterns lead to a match. */
1121	return FNM_NOMATCH;
1122
1123	case L_('?'):
1124	if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1125	return 0;
1126	/* FALLTHROUGH */
1127
1128	case L_('@'):
1129	do
1130	/* I cannot believe it but `strcat' is actually acceptable
1131	here. Match the entire string with the prefix from the
1132	pattern list and the rest of the pattern following the
1133	pattern list. */
1134	if (FCT (STRCAT (list->str, p), string, string_end,
1135	no_leading_period,
1136	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1137	/* It worked. Signal success. */
1138	return 0;
1139	while ((list = list->next) != NULL);
1140
1141	/* None of the patterns lead to a match. */
1142	return FNM_NOMATCH;
1143
1144	case L_('!'):
1145	for (rs = string; rs <= string_end; ++rs)
1146	{
1147	struct patternlist *runp;
1148
1149	for (runp = list; runp != NULL; runp = runp->next)
1150	if (FCT (runp->str, string, rs, no_leading_period,
1151	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1152	break;
1153
1154	/* If none of the patterns matched see whether the rest does. */
1155	if (runp == NULL
1156	&& (FCT (p, rs, string_end,
1157	rs == string
1158	? no_leading_period
1159	: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1160	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1161	== 0))
1162	/* This is successful. */
1163	return 0;
1164	}
1165
1166	/* None of the patterns together with the rest of the pattern
1167	lead to a match. */
1168	return FNM_NOMATCH;
1169
1170	default:
1171	assert (! "Invalid extended matching operator");
1172	break;
1173	}
1174
1175	return -1;
1176	}
1177
1178
1179	#undef FOLD
1180	#undef CHAR
1181	#undef UCHAR
1182	#undef INT
1183	#undef FCT
1184	#undef EXT
1185	#undef END
1186	#undef MEMPCPY
1187	#undef MEMCHR
1188	#undef STRCOLL
1189	#undef STRLEN
1190	#undef STRCAT
1191	#undef L_
1192	#undef BTOWC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/tar/1.16.1/lib/fnmatch_loop.c@ 3342

Download in other formats: