source: branches/libc-0.6/src/emx/include/InnoTekLIBC/locale.h@ 3055

Last change on this file since 3055 was 3055, checked in by bird, 19 years ago

Added a fixed default (C/POSIX) collate locale to avoid unilib strangeness. Made an attempt at adjusting for this strangeness for the non-default locales...

  • Property cvs2svn:cvs-rev set to 1.5
  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.4 KB
Line 
1/* $Id: locale.h 3055 2007-04-08 17:11:36Z bird $ */
2/** @file
3 *
4 * Internal InnoTek LIBC header.
5 * Locale support implementation through OS/2 Unicode API.
6 *
7 * Copyright (c) 2003 InnoTek Systemberatung GmbH
8 * Copyright (c) 2003-2004 knut st. osmundsen <[email protected]>
9 *
10 *
11 * This file is part of InnoTek LIBC.
12 *
13 * InnoTek LIBC is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * InnoTek LIBC is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with InnoTek LIBC; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 *
27 */
28
29#ifndef __InnoTekLIBC_locale_h__
30#define __InnoTekLIBC_locale_h__
31
32#include <sys/cdefs.h>
33#include <sys/_types.h>
34#include <locale.h>
35#include <uconv.h>
36
37__BEGIN_DECLS
38
39/**
40 * LC_COLLATE information.
41 */
42typedef struct __libc_LocaleCollate
43{
44 /** Character weight for SBCS codepages. */
45 unsigned char auchWeight[256];
46 /** MBCS prefixes. Two bits per character. */
47 unsigned char au2MBCSPrefixs[256/4];
48#ifdef __OS2__
49 /** The converter object to convert to and from selected codepage
50 (used with MBCS codepages only). */
51 UconvObject uobj;
52 /** The locale object. */
53 LocaleObject lobj;
54#endif
55 /** Non-zero if there are any MBCS prefix characters in codepage. */
56 char mbcs;
57} __LIBC_LOCALECOLLATE;
58/** Pointer to locale collate structure. */
59typedef __LIBC_LOCALECOLLATE *__LIBC_PLOCALECOLLATE;
60
61/**
62 * Multibyte to/from wide character conversion functions.
63 */
64typedef struct __libc_localeCTypeFuncs
65{
66 int (*pfnmbsinit)(const __mbstate_t *);
67 size_t (*pfnmbrtowc)(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict);
68 size_t (*pfnmbsnrtowcs)(__wchar_t * __restrict, const char ** __restrict, size_t, size_t, __mbstate_t * __restrict);
69 size_t (*pfnwcrtomb)(char * __restrict, __wchar_t, __mbstate_t * __restrict);
70 size_t (*pfnwcsnrtombs)(char * __restrict, const __wchar_t ** __restrict, size_t, size_t, __mbstate_t * __restrict);
71} __LIBC_LOCALECTYPEFUNCS;
72/** Pointer to multibyte/wide character conversion functions. */
73typedef __LIBC_LOCALECTYPEFUNCS *__LIBC_PLOCALECTYPEFUNCS;
74/** Pointer to const multibyte/wide character conversion functions. */
75typedef const __LIBC_LOCALECTYPEFUNCS *__LIBC_PCLOCALECTYPEFUNCS;
76
77/**
78 * This structure contains the flags and uppercase/lowercase tables.
79 */
80typedef struct __libc_LocaleCtype
81{
82 /** All uppercased characters. */
83 unsigned char auchUpper[256];
84 /** All lowercased characters. */
85 unsigned char auchLower[256];
86 /** Bit flags for every character (for isXXX() function series). */
87 unsigned aufType[256];
88
89/* part which we don't 'expose': */
90 /** MBCS prefixes. Two bits per character. */
91 unsigned char au2MBCSPrefixs[256/4];
92 /** Unicode translation. (0xffff means no translation.) */
93 unsigned short aucUnicode[256];
94 /** Unicode -> SBCS conversion: 0..128. */
95 unsigned char auchToSBCS0To128[128];
96 /** Unicode -> SBCS conversion: Custom regions. */
97 struct
98 {
99 /** First unicode code point. */
100 unsigned short usStart;
101 /** Number of entries used. */
102 unsigned short cChars;
103 /** Array SBCS chars corresponding to (wc - usStart). 0 means no conversion. */
104 unsigned char auch[28];
105 } aSBCSs[8];
106 /** Number of aSBCS regions in use. */
107 unsigned cSBCSs;
108 /** Conversion functions. */
109 __LIBC_LOCALECTYPEFUNCS CtypeFuncs;
110#ifdef __OS2__
111 /** The converter object to convert to and from selected codepage
112 (used with MBCS codepages only). */
113 UconvObject uobj;
114 /** The locale object. */
115 LocaleObject lobj;
116#endif
117 /** Non-zero if there are any MBCS prefix characters in codepage. */
118 char mbcs;
119 /** Codeset name. */
120 char szCodeSet[32];
121} __LIBC_LOCALECTYPE;
122/** Pointer to the Ctype locale struct. */
123typedef __LIBC_LOCALECTYPE *__LIBC_PLOCALECTYPE;
124
125
126/**
127 * Unicode CType data.
128 * The structure contains information for the first 256 unicode chars.
129 */
130typedef struct __libc_localeWCType
131{
132 /** All uppercased characters. */
133 __wchar_t awcUpper[256];
134 /** All lowercased characters. */
135 __wchar_t awcLower[256];
136 /** Bit flags for every character (for iswXXX() function series). */
137 unsigned aufType[256];
138 /** Mask used to check if an index is within the above arrays.
139 * This is required because 'C' doesn't do more than 0-127. So,
140 * the mask is either ~0xff or ~0x7f. */
141 unsigned uMask;
142} __LIBC_LOCALEWCTYPE;
143/** Pointer to the Ctype unicode struct. */
144typedef __LIBC_LOCALEWCTYPE *__LIBC_PLOCALEWCTYPE;
145
146/**
147 * This structure keeps the time formatting rules.
148 * The fConsts flag indicates what kind of memory is backing the strings.
149 */
150typedef struct __libc_LocaleTime
151{
152 /** Short month names. */
153 char *smonths[12];
154 /** Long month names. */
155 char *lmonths[12];
156 /** Short weekday names. */
157 char *swdays[7];
158 /** Long weekday names. */
159 char *lwdays[7];
160 /** Date and time format. */
161 char *date_time_fmt;
162 /** Date format. */
163 char *date_fmt;
164 /** Time format. */
165 char *time_fmt;
166 /** AM strings. */
167 char *am;
168 /** PM strings. */
169 char *pm;
170 /** AM/PM format. (T_FMT_AMPM) */
171 char *ampm_fmt;
172 /** ERA */
173 char *era;
174 /** ERA_D_FMT. */
175 char *era_date_fmt;
176 /** ERA_D_T_FMT. */
177 char *era_date_time_fmt;
178 /** ERA_T_FMT. */
179 char *era_time_fmt;
180 /** ALT_DIGITS. */
181 char *alt_digits;
182 /** DATESEP. */
183 char *datesep;
184 /** TIMESEP. */
185 char *timesep;
186 /** LISTSEP. */
187 char *listsep;
188 /** If set all the strings are consts and shall not be free()ed. */
189 int fConsts;
190} __LIBC_LOCALETIME;
191/** Pointer to time locale data. */
192typedef __LIBC_LOCALETIME *__LIBC_PLOCALETIME;
193
194
195/**
196 * Locale information structure.
197 *
198 * This is the lconv struct with a couple of private field indicating
199 * which parts of it we have updated and assigned heap strings.
200 */
201typedef struct __libc_localeLconv
202{
203 /** The lconv structure. */
204 struct lconv s;
205 /** CRNCYSTR. */
206 char *pszCrncyStr;
207 /** Indicates that all the numeric members are readonly const strings. */
208 int fNumericConsts;
209 /** Indicates that all the monetary members are readonly const strings. */
210 int fMonetaryConsts;
211} __LIBC_LOCALELCONV;
212/** Pointer to extended locale information structure. */
213typedef __LIBC_LOCALELCONV *__LIBC_PLOCALELCONV;
214
215
216/**
217 * Message locale information.
218 * The content is available thru the nl_langinfo() interface only.
219 */
220typedef struct __libc_localeMsg
221{
222 /** YESEXPR */
223 char *pszYesExpr;
224 /** NOEXPR */
225 char *pszNoExpr;
226 /** YESSTR */
227 char *pszYesStr;
228 /** NOSTR */
229 char *pszNoStr;
230 /** Indicates that all members are readonly const strings. */
231 int fConsts;
232} __LIBC_LOCALEMSG;
233/** Pointer to the message locale information. */
234typedef __LIBC_LOCALEMSG *__LIBC_PLOCALEMSG;
235
236
237/** String collation information. */
238extern __LIBC_LOCALECOLLATE __libc_gLocaleCollate;
239/** String collation information for the default 'C'/'POSIX' locale. */
240extern const __LIBC_LOCALECOLLATE __libc_gLocaleCollateDefault;
241/** Character case conversion tables. */
242extern __LIBC_LOCALECTYPE __libc_GLocaleCtype;
243/** Character case conversion tables for the default 'C'/'POSIX' locale. */
244extern const __LIBC_LOCALECTYPE __libc_GLocaleCtypeDefault;
245/** Cached Unicode (__wchar_t) case conversion tables and flags. */
246extern __LIBC_LOCALEWCTYPE __libc_GLocaleWCtype;
247/** Locale information structure. */
248extern __LIBC_LOCALELCONV __libc_gLocaleLconv;
249/* Locale information structure for the 'C'/'POSIX' locale. */
250extern const __LIBC_LOCALELCONV __libc_gLocaleLconvDefault;
251/** Date / time formatting rules. */
252extern __LIBC_LOCALETIME __libc_gLocaleTime;
253/** Date / time formatting rules for the 'C'/'POSIX' locale. */
254extern const __LIBC_LOCALETIME __libc_gLocaleTimeDefault;
255/** Message locale information. */
256extern __LIBC_LOCALEMSG __libc_gLocaleMsg;
257/** Message locale information for the 'C'/'POSIX' locale. */
258extern const __LIBC_LOCALEMSG __libc_gLocaleMsgDefault;
259
260/** Macros to lock the different locale structures.
261 * @{
262 */
263#define LOCALE_LOCK() do {} while (0)
264#define LOCALE_UNLOCK() do {} while (0)
265#define LOCALE_CTYPE_RW_LOCK() do {} while (0)
266#define LOCALE_CTYPE_RW_UNLOCK() do {} while (0)
267#define LOCALE_CTYPE_RW_LOCK() do {} while (0)
268#define LOCALE_CTYPE_RW_UNLOCK() do {} while (0)
269/** @} */
270
271/** Convert a string to Unicode, apply some transform and convert back. */
272extern void __libc_ucs2Do(UconvObject *uconv, char *s, void *arg, int (*xform)(UniChar *, void *));
273/** Convert a MBCS character to Unicode; returns number of bytes in MBCS char. */
274extern int __libc_ucs2To(UconvObject, const unsigned char *, size_t, UniChar *);
275/** Convert a Unicode character to MBCS. */
276extern int __libc_ucs2From(UconvObject, UniChar, unsigned char *, size_t);
277/** Converts a codepage string to unichar and something libuni might recognize. */
278extern void __libc_TranslateCodepage(const char *cp, UniChar *ucp);
279
280extern int __libc_localeCreateObjects(const char *pszLocale, const char *pszCodepage, char *pszCodepageActual, LocaleObject *plobj, UconvObject *puobj);
281
282extern void __libc_localeFuncsSBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
283extern void __libc_localeFuncsDBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
284extern void __libc_localeFuncsMBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
285extern void __libc_localeFuncsUCS2(__LIBC_PLOCALECTYPEFUNCS pFuncs);
286extern void __libc_localeFuncsUTF8(__LIBC_PLOCALECTYPEFUNCS pFuncs);
287extern void __libc_localeFuncsDefault(__LIBC_PLOCALECTYPEFUNCS pFuncs);
288
289extern size_t __libc_localeFuncsGeneric_mbsnrtowcs(size_t (*pfnmbrtowc)(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict),
290 __wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, __mbstate_t * __restrict ps);
291extern size_t __libc_localeFuncsGeneric_wcsnrtombs(size_t (*pfnwcrtomb)(char * __restrict, __wchar_t, __mbstate_t * __restrict),
292 char * __restrict dst, const __wchar_t ** __restrict src, size_t nwc, size_t len, __mbstate_t * __restrict ps);
293
294extern void __libc_localeFuncsNone(__LIBC_PLOCALECTYPEFUNCS pFuncs);
295extern size_t __libc_locale_none_mbrtowc(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict);
296extern int __libc_locale_none_mbsinit(const __mbstate_t *);
297extern size_t __libc_locale_none_mbsnrtowcs(__wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, __mbstate_t * __restrict ps __unused);
298extern size_t __libc_locale_none_wcrtomb(char * __restrict, __wchar_t, __mbstate_t * __restrict);
299extern size_t __libc_locale_none_wcsnrtombs(char * __restrict, const __wchar_t ** __restrict, size_t, size_t, __mbstate_t * __restrict);
300
301
302/** Handy macros for working with the au2MBCSPrefixs members of
303 * the locale data structures. The au2MBCSPrefixs members are
304 * array which elements are 2 bits long.
305 * @{
306 */
307#define SET_MBCS_PREFIX(a, c, v) \
308 a[((unsigned char)(c)) >> 2] |= (v) << (2 * ((c) & 3))
309
310#define LEN_MBCS_PREFIX(a, c) \
311 ((a[((unsigned char)(c)) >> 2] >> (2 * (((c) & 3) ^ 3))) & 3)
312
313#define IS_MBCS_PREFIX(p, c) \
314 (LEN_MBCS_PREFIX((p)->au2MBCSPrefixs, c) != 1)
315
316#define CHK_MBCS_PREFIX(p, c, v) \
317 ((v = LEN_MBCS_PREFIX((p)->au2MBCSPrefixs, c)) > 1)
318/** @} */
319
320#include <ctype.h>
321
322/**
323 * Convert the type info we get from the unicode lib to libc talk.
324 * ASSUMES that none of the locals differs from the unicode spec
325 *
326 * @returns libc ctype flags.
327 * @param pUniType The unicode type info to translate.
328 * @param wc The unicode code point.
329 */
330static inline unsigned ___wctype_uni(const UNICTYPE *pUniType, wchar_t wc)
331{
332 unsigned ufType = 0;
333 /* ASSUMES CT_* << 8 == __* ! */
334 ufType = ((unsigned)pUniType->itype << 8)
335 & (__CT_UPPER | __CT_LOWER | __CT_DIGIT | __CT_SPACE |
336 __CT_PUNCT | __CT_CNTRL | __CT_BLANK | __CT_XDIGIT |
337 __CT_ALPHA | __CT_ALNUM | __CT_GRAPH | __CT_PRINT |
338 __CT_NUMBER | __CT_SYMBOL | __CT_ASCII);
339 if (pUniType->extend & C3_IDEOGRAPH)
340 ufType |= __CT_IDEOGRAM;
341 if (ufType & (__CT_XDIGIT | __CT_DIGIT))
342 {
343 if ( (unsigned)wc - 0x30U <= (0x39 - 0x30))
344 ufType |= (unsigned)wc - 0x30;
345 else if ((unsigned)wc - 0x41U <= (0x46 - 0x41))
346 ufType |= (unsigned)wc - 0x41 + 0xa;
347 else
348 {
349 unsigned uVal = UniQueryNumericValue(wc);
350 if (!(uVal & ~0xffU))
351 ufType |= uVal;
352 }
353 }
354 ufType |= (pUniType->bidi & 0xf << 24);
355
356 /** @todo screen width. */
357 return ufType;
358}
359
360__END_DECLS
361
362#endif /* __SYS_LOCALE_H__ */
363
Note: See TracBrowser for help on using the repository browser.