source: trunk/libc/include/klibc/locale.h

Last change on this file was 3897, checked in by bird, 11 years ago

svn properties.

  • Property cvs2svn:cvs-rev set to 1.5
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 13.5 KB
Line 
1/* $Id: locale.h 3897 2014-06-29 19:36:55Z bird $ */
2/** @file
3 *
4 * kLIBC - Internal locale header.
5 *
6 * Copyright (c) 2003 InnoTek Systemberatung GmbH
7 * Copyright (c) 2003-2006 knut st. osmundsen <[email protected]>
8 *
9 *
10 * This file is part of kLIBC.
11 *
12 * kLIBC is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License as published
14 * by the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * kLIBC is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public License
23 * along with kLIBC; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 *
26 */
27
28#ifndef __klibc_locale_h__
29#define __klibc_locale_h__
30
31#include <sys/cdefs.h>
32#include <sys/types.h>
33#include <locale.h>
34#ifdef __OS2__
35# include <uconv.h>
36#endif
37#ifdef __CTYPE_H_
38# error "klibc/locale.h must be included *before* ctype.h!"
39#endif
40
41__BEGIN_DECLS
42
43/**
44 * LC_COLLATE information.
45 */
46typedef struct __libc_LocaleCollate
47{
48 /** Character weight for SBCS codepages. */
49 unsigned char auchWeight[256];
50 /** MBCS prefixes. Two bits per character. */
51 unsigned char au2MBCSPrefixs[256/4];
52#ifdef __OS2__
53 /** The converter object to convert to and from selected codepage
54 (used with MBCS codepages only). */
55 UconvObject uobj;
56 /** The locale object. */
57 LocaleObject lobj;
58#endif
59 /** Non-zero if there are any MBCS prefix characters in codepage. */
60 char mbcs;
61} __LIBC_LOCALECOLLATE;
62/** Pointer to locale collate structure. */
63typedef __LIBC_LOCALECOLLATE *__LIBC_PLOCALECOLLATE;
64
65/**
66 * Multibyte to/from wide character conversion functions.
67 */
68typedef struct __libc_localeCTypeFuncs
69{
70 int (*pfnmbsinit)(const __mbstate_t *);
71 size_t (*pfnmbrtowc)(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict);
72 size_t (*pfnmbsnrtowcs)(__wchar_t * __restrict, const char ** __restrict, size_t, size_t, __mbstate_t * __restrict);
73 size_t (*pfnwcrtomb)(char * __restrict, __wchar_t, __mbstate_t * __restrict);
74 size_t (*pfnwcsnrtombs)(char * __restrict, const __wchar_t ** __restrict, size_t, size_t, __mbstate_t * __restrict);
75} __LIBC_LOCALECTYPEFUNCS;
76/** Pointer to multibyte/wide character conversion functions. */
77typedef __LIBC_LOCALECTYPEFUNCS *__LIBC_PLOCALECTYPEFUNCS;
78/** Pointer to const multibyte/wide character conversion functions. */
79typedef const __LIBC_LOCALECTYPEFUNCS *__LIBC_PCLOCALECTYPEFUNCS;
80
81/**
82 * This structure contains the flags and uppercase/lowercase tables.
83 */
84typedef struct __libc_LocaleCtype
85{
86 /** All uppercased characters. */
87 unsigned char auchUpper[256];
88 /** All lowercased characters. */
89 unsigned char auchLower[256];
90 /** Bit flags for every character (for isXXX() function series). */
91 unsigned aufType[256];
92
93/* part which we don't 'expose': */
94 /** MBCS prefixes. Two bits per character. */
95 unsigned char au2MBCSPrefixs[256/4];
96 /** Unicode translation. (0xffff means no translation.) */
97 unsigned short aucUnicode[256];
98 /** Unicode -> SBCS conversion: 0..128. */
99 unsigned char auchToSBCS0To128[128];
100 /** Unicode -> SBCS conversion: Custom regions. */
101 struct
102 {
103 /** First unicode code point. */
104 unsigned short usStart;
105 /** Number of entries used. */
106 unsigned short cChars;
107 /** Array SBCS chars corresponding to (wc - usStart). 0 means no conversion. */
108 unsigned char auch[28];
109 } aSBCSs[8];
110 /** Number of aSBCS regions in use. */
111 unsigned cSBCSs;
112 /** Conversion functions. */
113 __LIBC_LOCALECTYPEFUNCS CtypeFuncs;
114#ifdef __OS2__
115 /** The converter object to convert to and from selected codepage
116 (used with MBCS codepages only). */
117 UconvObject uobj;
118 /** The locale object. */
119 LocaleObject lobj;
120#endif
121 /** Non-zero if there are any MBCS prefix characters in codepage. */
122 char mbcs;
123 /** Codeset name. */
124 char szCodeSet[32];
125} __LIBC_LOCALECTYPE;
126/** Pointer to the Ctype locale struct. */
127typedef __LIBC_LOCALECTYPE *__LIBC_PLOCALECTYPE;
128
129
130/**
131 * Unicode CType data.
132 * The structure contains information for the first 256 unicode chars.
133 */
134typedef struct __libc_localeWCType
135{
136 /** All uppercased characters. */
137 __wchar_t awcUpper[256];
138 /** All lowercased characters. */
139 __wchar_t awcLower[256];
140 /** Bit flags for every character (for iswXXX() function series). */
141 unsigned aufType[256];
142 /** Mask used to check if an index is within the above arrays.
143 * This is required because 'C' doesn't do more than 0-127. So,
144 * the mask is either ~0xff or ~0x7f. */
145 unsigned uMask;
146} __LIBC_LOCALEWCTYPE;
147/** Pointer to the Ctype unicode struct. */
148typedef __LIBC_LOCALEWCTYPE *__LIBC_PLOCALEWCTYPE;
149
150/**
151 * This structure keeps the time formatting rules.
152 * The fConsts flag indicates what kind of memory is backing the strings.
153 */
154typedef struct __libc_LocaleTime
155{
156 /** Short month names. */
157 char *smonths[12];
158 /** Long month names. */
159 char *lmonths[12];
160 /** Short weekday names. */
161 char *swdays[7];
162 /** Long weekday names. */
163 char *lwdays[7];
164 /** Date and time format. */
165 char *date_time_fmt;
166 /** Date format. */
167 char *date_fmt;
168 /** Time format. */
169 char *time_fmt;
170 /** AM strings. */
171 char *am;
172 /** PM strings. */
173 char *pm;
174 /** AM/PM format. (T_FMT_AMPM) */
175 char *ampm_fmt;
176 /** ERA */
177 char *era;
178 /** ERA_D_FMT. */
179 char *era_date_fmt;
180 /** ERA_D_T_FMT. */
181 char *era_date_time_fmt;
182 /** ERA_T_FMT. */
183 char *era_time_fmt;
184 /** ALT_DIGITS. */
185 char *alt_digits;
186 /** DATESEP. */
187 char *datesep;
188 /** TIMESEP. */
189 char *timesep;
190 /** LISTSEP. */
191 char *listsep;
192 /** If set all the strings are consts and shall not be free()ed. */
193 int fConsts;
194} __LIBC_LOCALETIME;
195/** Pointer to time locale data. */
196typedef __LIBC_LOCALETIME *__LIBC_PLOCALETIME;
197
198
199/**
200 * Locale information structure.
201 *
202 * This is the lconv struct with a couple of private field indicating
203 * which parts of it we have updated and assigned heap strings.
204 */
205typedef struct __libc_localeLconv
206{
207 /** The lconv structure. */
208 struct lconv s;
209 /** CRNCYSTR. */
210 char *pszCrncyStr;
211 /** Indicates that all the numeric members are readonly const strings. */
212 int fNumericConsts;
213 /** Indicates that all the monetary members are readonly const strings. */
214 int fMonetaryConsts;
215} __LIBC_LOCALELCONV;
216/** Pointer to extended locale information structure. */
217typedef __LIBC_LOCALELCONV *__LIBC_PLOCALELCONV;
218
219
220/**
221 * Message locale information.
222 * The content is available thru the nl_langinfo() interface only.
223 */
224typedef struct __libc_localeMsg
225{
226 /** YESEXPR */
227 char *pszYesExpr;
228 /** NOEXPR */
229 char *pszNoExpr;
230 /** YESSTR */
231 char *pszYesStr;
232 /** NOSTR */
233 char *pszNoStr;
234 /** Indicates that all members are readonly const strings. */
235 int fConsts;
236} __LIBC_LOCALEMSG;
237/** Pointer to the message locale information. */
238typedef __LIBC_LOCALEMSG *__LIBC_PLOCALEMSG;
239
240
241/** String collation information. */
242extern __LIBC_LOCALECOLLATE __libc_gLocaleCollate;
243/** String collation information for the default 'C'/'POSIX' locale. */
244extern const __LIBC_LOCALECOLLATE __libc_gLocaleCollateDefault;
245/** Character case conversion tables. */
246extern __LIBC_LOCALECTYPE __libc_GLocaleCtype;
247/** Character case conversion tables for the default 'C'/'POSIX' locale. */
248extern const __LIBC_LOCALECTYPE __libc_GLocaleCtypeDefault;
249/** Cached Unicode (__wchar_t) case conversion tables and flags. */
250extern __LIBC_LOCALEWCTYPE __libc_GLocaleWCtype;
251/** Locale information structure. */
252extern __LIBC_LOCALELCONV __libc_gLocaleLconv;
253/* Locale information structure for the 'C'/'POSIX' locale. */
254extern const __LIBC_LOCALELCONV __libc_gLocaleLconvDefault;
255/** Date / time formatting rules. */
256extern __LIBC_LOCALETIME __libc_gLocaleTime;
257/** Date / time formatting rules for the 'C'/'POSIX' locale. */
258extern const __LIBC_LOCALETIME __libc_gLocaleTimeDefault;
259/** Message locale information. */
260extern __LIBC_LOCALEMSG __libc_gLocaleMsg;
261/** Message locale information for the 'C'/'POSIX' locale. */
262extern const __LIBC_LOCALEMSG __libc_gLocaleMsgDefault;
263
264/** Macros to lock the different locale structures.
265 * @{
266 */
267#define LOCALE_LOCK() do {} while (0)
268#define LOCALE_UNLOCK() do {} while (0)
269#define LOCALE_CTYPE_RW_LOCK() do {} while (0)
270#define LOCALE_CTYPE_RW_UNLOCK() do {} while (0)
271#define LOCALE_CTYPE_RW_LOCK() do {} while (0)
272#define LOCALE_CTYPE_RW_UNLOCK() do {} while (0)
273/** @} */
274
275extern void __libc_localeFuncsSBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
276extern void __libc_localeFuncsDBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
277extern void __libc_localeFuncsMBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
278extern void __libc_localeFuncsUCS2(__LIBC_PLOCALECTYPEFUNCS pFuncs);
279extern void __libc_localeFuncsUTF8(__LIBC_PLOCALECTYPEFUNCS pFuncs);
280extern void __libc_localeFuncsDefault(__LIBC_PLOCALECTYPEFUNCS pFuncs);
281
282extern size_t __libc_localeFuncsGeneric_mbsnrtowcs(size_t (*pfnmbrtowc)(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict),
283 __wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, __mbstate_t * __restrict ps);
284extern size_t __libc_localeFuncsGeneric_wcsnrtombs(size_t (*pfnwcrtomb)(char * __restrict, __wchar_t, __mbstate_t * __restrict),
285 char * __restrict dst, const __wchar_t ** __restrict src, size_t nwc, size_t len, __mbstate_t * __restrict ps);
286
287extern void __libc_localeFuncsNone(__LIBC_PLOCALECTYPEFUNCS pFuncs);
288extern size_t __libc_locale_none_mbrtowc(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict);
289extern int __libc_locale_none_mbsinit(const __mbstate_t *);
290extern size_t __libc_locale_none_mbsnrtowcs(__wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, __mbstate_t * __restrict ps __unused);
291extern size_t __libc_locale_none_wcrtomb(char * __restrict, __wchar_t, __mbstate_t * __restrict);
292extern size_t __libc_locale_none_wcsnrtombs(char * __restrict, const __wchar_t ** __restrict, size_t, size_t, __mbstate_t * __restrict);
293
294
295/** Handy macros for working with the au2MBCSPrefixs members of
296 * the locale data structures. The au2MBCSPrefixs members are
297 * array which elements are 2 bits long.
298 * @{
299 */
300#define SET_MBCS_PREFIX(a, c, v) \
301 a[((unsigned char)(c)) >> 2] |= (v) << (2 * ((c) & 3))
302
303#define LEN_MBCS_PREFIX(a, c) \
304 ((a[((unsigned char)(c)) >> 2] >> (2 * (((c) & 3) ^ 3))) & 3)
305
306#define IS_MBCS_PREFIX(p, c) \
307 (LEN_MBCS_PREFIX((p)->au2MBCSPrefixs, c) != 1)
308
309#define CHK_MBCS_PREFIX(p, c, v) \
310 ((v = LEN_MBCS_PREFIX((p)->au2MBCSPrefixs, c)) > 1)
311/** @} */
312
313#ifdef __OS2__
314#include <ctype.h>
315
316/**
317 * Convert the type info we get from the unicode lib to libc talk.
318 * ASSUMES that none of the locals differs from the unicode spec
319 *
320 * @returns libc ctype flags.
321 * @param pUniType The unicode type info to translate.
322 * @param wc The unicode code point.
323 */
324static inline unsigned ___wctype_uni(const UNICTYPE *pUniType, wchar_t wc)
325{
326 unsigned ufType = 0;
327 /* ASSUMES CT_* << 8 == __* ! */
328 ufType = ((unsigned)pUniType->itype << 8)
329 & (__CT_UPPER | __CT_LOWER | __CT_DIGIT | __CT_SPACE |
330 __CT_PUNCT | __CT_CNTRL | __CT_BLANK | __CT_XDIGIT |
331 __CT_ALPHA | __CT_ALNUM | __CT_GRAPH | __CT_PRINT |
332 __CT_NUMBER | __CT_SYMBOL | __CT_ASCII);
333 if (pUniType->extend & C3_IDEOGRAPH)
334 ufType |= __CT_IDEOGRAM;
335 if (ufType & (__CT_XDIGIT | __CT_DIGIT))
336 {
337 if ( (unsigned)wc - 0x30U <= (0x39 - 0x30))
338 ufType |= (unsigned)wc - 0x30;
339 else if ((unsigned)wc - 0x41U <= (0x46 - 0x41))
340 ufType |= (unsigned)wc - 0x41 + 0xa;
341 else
342 {
343 unsigned uVal = UniQueryNumericValue(wc);
344 if (!(uVal & ~0xffU))
345 ufType |= uVal;
346 }
347 }
348 ufType |= (pUniType->bidi & 0xf << 24);
349
350 /** @todo screen width. */
351 return ufType;
352}
353
354/** Convert a string to Unicode, apply some transform and convert back. */
355extern void __libc_ucs2Do(UconvObject *uconv, char *s, void *arg, int (*xform)(UniChar *, void *));
356/** Convert a MBCS character to Unicode; returns number of bytes in MBCS char. */
357extern int __libc_ucs2To(UconvObject, const unsigned char *, size_t, UniChar *);
358/** Convert a Unicode character to MBCS. */
359extern int __libc_ucs2From(UconvObject, UniChar, unsigned char *, size_t);
360/** Converts a codepage string to unichar and something libuni might recognize. */
361extern void __libc_TranslateCodepage(const char *cp, UniChar *ucp);
362
363extern int __libc_localeCreateObjects(const char *pszLocale, const char *pszCodepage, char *pszCodepageActual, LocaleObject *plobj, UconvObject *puobj);
364#endif /* __OS2__ */
365
366
367__END_DECLS
368
369#endif
370
Note: See TracBrowser for help on using the repository browser.