| 1 | /* $Id: locale.h 3055 2007-04-08 17:11:36Z bird $ */
|
|---|
| 2 | /** @file
|
|---|
| 3 | *
|
|---|
| 4 | * Internal InnoTek LIBC header.
|
|---|
| 5 | * Locale support implementation through OS/2 Unicode API.
|
|---|
| 6 | *
|
|---|
| 7 | * Copyright (c) 2003 InnoTek Systemberatung GmbH
|
|---|
| 8 | * Copyright (c) 2003-2004 knut st. osmundsen <[email protected]>
|
|---|
| 9 | *
|
|---|
| 10 | *
|
|---|
| 11 | * This file is part of InnoTek LIBC.
|
|---|
| 12 | *
|
|---|
| 13 | * InnoTek LIBC is free software; you can redistribute it and/or modify
|
|---|
| 14 | * it under the terms of the GNU General Public License as published by
|
|---|
| 15 | * the Free Software Foundation; either version 2 of the License, or
|
|---|
| 16 | * (at your option) any later version.
|
|---|
| 17 | *
|
|---|
| 18 | * InnoTek LIBC is distributed in the hope that it will be useful,
|
|---|
| 19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 21 | * GNU General Public License for more details.
|
|---|
| 22 | *
|
|---|
| 23 | * You should have received a copy of the GNU General Public License
|
|---|
| 24 | * along with InnoTek LIBC; if not, write to the Free Software
|
|---|
| 25 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|---|
| 26 | *
|
|---|
| 27 | */
|
|---|
| 28 |
|
|---|
| 29 | #ifndef __InnoTekLIBC_locale_h__
|
|---|
| 30 | #define __InnoTekLIBC_locale_h__
|
|---|
| 31 |
|
|---|
| 32 | #include <sys/cdefs.h>
|
|---|
| 33 | #include <sys/_types.h>
|
|---|
| 34 | #include <locale.h>
|
|---|
| 35 | #include <uconv.h>
|
|---|
| 36 |
|
|---|
| 37 | __BEGIN_DECLS
|
|---|
| 38 |
|
|---|
| 39 | /**
|
|---|
| 40 | * LC_COLLATE information.
|
|---|
| 41 | */
|
|---|
| 42 | typedef struct __libc_LocaleCollate
|
|---|
| 43 | {
|
|---|
| 44 | /** Character weight for SBCS codepages. */
|
|---|
| 45 | unsigned char auchWeight[256];
|
|---|
| 46 | /** MBCS prefixes. Two bits per character. */
|
|---|
| 47 | unsigned char au2MBCSPrefixs[256/4];
|
|---|
| 48 | #ifdef __OS2__
|
|---|
| 49 | /** The converter object to convert to and from selected codepage
|
|---|
| 50 | (used with MBCS codepages only). */
|
|---|
| 51 | UconvObject uobj;
|
|---|
| 52 | /** The locale object. */
|
|---|
| 53 | LocaleObject lobj;
|
|---|
| 54 | #endif
|
|---|
| 55 | /** Non-zero if there are any MBCS prefix characters in codepage. */
|
|---|
| 56 | char mbcs;
|
|---|
| 57 | } __LIBC_LOCALECOLLATE;
|
|---|
| 58 | /** Pointer to locale collate structure. */
|
|---|
| 59 | typedef __LIBC_LOCALECOLLATE *__LIBC_PLOCALECOLLATE;
|
|---|
| 60 |
|
|---|
| 61 | /**
|
|---|
| 62 | * Multibyte to/from wide character conversion functions.
|
|---|
| 63 | */
|
|---|
| 64 | typedef struct __libc_localeCTypeFuncs
|
|---|
| 65 | {
|
|---|
| 66 | int (*pfnmbsinit)(const __mbstate_t *);
|
|---|
| 67 | size_t (*pfnmbrtowc)(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict);
|
|---|
| 68 | size_t (*pfnmbsnrtowcs)(__wchar_t * __restrict, const char ** __restrict, size_t, size_t, __mbstate_t * __restrict);
|
|---|
| 69 | size_t (*pfnwcrtomb)(char * __restrict, __wchar_t, __mbstate_t * __restrict);
|
|---|
| 70 | size_t (*pfnwcsnrtombs)(char * __restrict, const __wchar_t ** __restrict, size_t, size_t, __mbstate_t * __restrict);
|
|---|
| 71 | } __LIBC_LOCALECTYPEFUNCS;
|
|---|
| 72 | /** Pointer to multibyte/wide character conversion functions. */
|
|---|
| 73 | typedef __LIBC_LOCALECTYPEFUNCS *__LIBC_PLOCALECTYPEFUNCS;
|
|---|
| 74 | /** Pointer to const multibyte/wide character conversion functions. */
|
|---|
| 75 | typedef const __LIBC_LOCALECTYPEFUNCS *__LIBC_PCLOCALECTYPEFUNCS;
|
|---|
| 76 |
|
|---|
| 77 | /**
|
|---|
| 78 | * This structure contains the flags and uppercase/lowercase tables.
|
|---|
| 79 | */
|
|---|
| 80 | typedef struct __libc_LocaleCtype
|
|---|
| 81 | {
|
|---|
| 82 | /** All uppercased characters. */
|
|---|
| 83 | unsigned char auchUpper[256];
|
|---|
| 84 | /** All lowercased characters. */
|
|---|
| 85 | unsigned char auchLower[256];
|
|---|
| 86 | /** Bit flags for every character (for isXXX() function series). */
|
|---|
| 87 | unsigned aufType[256];
|
|---|
| 88 |
|
|---|
| 89 | /* part which we don't 'expose': */
|
|---|
| 90 | /** MBCS prefixes. Two bits per character. */
|
|---|
| 91 | unsigned char au2MBCSPrefixs[256/4];
|
|---|
| 92 | /** Unicode translation. (0xffff means no translation.) */
|
|---|
| 93 | unsigned short aucUnicode[256];
|
|---|
| 94 | /** Unicode -> SBCS conversion: 0..128. */
|
|---|
| 95 | unsigned char auchToSBCS0To128[128];
|
|---|
| 96 | /** Unicode -> SBCS conversion: Custom regions. */
|
|---|
| 97 | struct
|
|---|
| 98 | {
|
|---|
| 99 | /** First unicode code point. */
|
|---|
| 100 | unsigned short usStart;
|
|---|
| 101 | /** Number of entries used. */
|
|---|
| 102 | unsigned short cChars;
|
|---|
| 103 | /** Array SBCS chars corresponding to (wc - usStart). 0 means no conversion. */
|
|---|
| 104 | unsigned char auch[28];
|
|---|
| 105 | } aSBCSs[8];
|
|---|
| 106 | /** Number of aSBCS regions in use. */
|
|---|
| 107 | unsigned cSBCSs;
|
|---|
| 108 | /** Conversion functions. */
|
|---|
| 109 | __LIBC_LOCALECTYPEFUNCS CtypeFuncs;
|
|---|
| 110 | #ifdef __OS2__
|
|---|
| 111 | /** The converter object to convert to and from selected codepage
|
|---|
| 112 | (used with MBCS codepages only). */
|
|---|
| 113 | UconvObject uobj;
|
|---|
| 114 | /** The locale object. */
|
|---|
| 115 | LocaleObject lobj;
|
|---|
| 116 | #endif
|
|---|
| 117 | /** Non-zero if there are any MBCS prefix characters in codepage. */
|
|---|
| 118 | char mbcs;
|
|---|
| 119 | /** Codeset name. */
|
|---|
| 120 | char szCodeSet[32];
|
|---|
| 121 | } __LIBC_LOCALECTYPE;
|
|---|
| 122 | /** Pointer to the Ctype locale struct. */
|
|---|
| 123 | typedef __LIBC_LOCALECTYPE *__LIBC_PLOCALECTYPE;
|
|---|
| 124 |
|
|---|
| 125 |
|
|---|
| 126 | /**
|
|---|
| 127 | * Unicode CType data.
|
|---|
| 128 | * The structure contains information for the first 256 unicode chars.
|
|---|
| 129 | */
|
|---|
| 130 | typedef struct __libc_localeWCType
|
|---|
| 131 | {
|
|---|
| 132 | /** All uppercased characters. */
|
|---|
| 133 | __wchar_t awcUpper[256];
|
|---|
| 134 | /** All lowercased characters. */
|
|---|
| 135 | __wchar_t awcLower[256];
|
|---|
| 136 | /** Bit flags for every character (for iswXXX() function series). */
|
|---|
| 137 | unsigned aufType[256];
|
|---|
| 138 | /** Mask used to check if an index is within the above arrays.
|
|---|
| 139 | * This is required because 'C' doesn't do more than 0-127. So,
|
|---|
| 140 | * the mask is either ~0xff or ~0x7f. */
|
|---|
| 141 | unsigned uMask;
|
|---|
| 142 | } __LIBC_LOCALEWCTYPE;
|
|---|
| 143 | /** Pointer to the Ctype unicode struct. */
|
|---|
| 144 | typedef __LIBC_LOCALEWCTYPE *__LIBC_PLOCALEWCTYPE;
|
|---|
| 145 |
|
|---|
| 146 | /**
|
|---|
| 147 | * This structure keeps the time formatting rules.
|
|---|
| 148 | * The fConsts flag indicates what kind of memory is backing the strings.
|
|---|
| 149 | */
|
|---|
| 150 | typedef struct __libc_LocaleTime
|
|---|
| 151 | {
|
|---|
| 152 | /** Short month names. */
|
|---|
| 153 | char *smonths[12];
|
|---|
| 154 | /** Long month names. */
|
|---|
| 155 | char *lmonths[12];
|
|---|
| 156 | /** Short weekday names. */
|
|---|
| 157 | char *swdays[7];
|
|---|
| 158 | /** Long weekday names. */
|
|---|
| 159 | char *lwdays[7];
|
|---|
| 160 | /** Date and time format. */
|
|---|
| 161 | char *date_time_fmt;
|
|---|
| 162 | /** Date format. */
|
|---|
| 163 | char *date_fmt;
|
|---|
| 164 | /** Time format. */
|
|---|
| 165 | char *time_fmt;
|
|---|
| 166 | /** AM strings. */
|
|---|
| 167 | char *am;
|
|---|
| 168 | /** PM strings. */
|
|---|
| 169 | char *pm;
|
|---|
| 170 | /** AM/PM format. (T_FMT_AMPM) */
|
|---|
| 171 | char *ampm_fmt;
|
|---|
| 172 | /** ERA */
|
|---|
| 173 | char *era;
|
|---|
| 174 | /** ERA_D_FMT. */
|
|---|
| 175 | char *era_date_fmt;
|
|---|
| 176 | /** ERA_D_T_FMT. */
|
|---|
| 177 | char *era_date_time_fmt;
|
|---|
| 178 | /** ERA_T_FMT. */
|
|---|
| 179 | char *era_time_fmt;
|
|---|
| 180 | /** ALT_DIGITS. */
|
|---|
| 181 | char *alt_digits;
|
|---|
| 182 | /** DATESEP. */
|
|---|
| 183 | char *datesep;
|
|---|
| 184 | /** TIMESEP. */
|
|---|
| 185 | char *timesep;
|
|---|
| 186 | /** LISTSEP. */
|
|---|
| 187 | char *listsep;
|
|---|
| 188 | /** If set all the strings are consts and shall not be free()ed. */
|
|---|
| 189 | int fConsts;
|
|---|
| 190 | } __LIBC_LOCALETIME;
|
|---|
| 191 | /** Pointer to time locale data. */
|
|---|
| 192 | typedef __LIBC_LOCALETIME *__LIBC_PLOCALETIME;
|
|---|
| 193 |
|
|---|
| 194 |
|
|---|
| 195 | /**
|
|---|
| 196 | * Locale information structure.
|
|---|
| 197 | *
|
|---|
| 198 | * This is the lconv struct with a couple of private field indicating
|
|---|
| 199 | * which parts of it we have updated and assigned heap strings.
|
|---|
| 200 | */
|
|---|
| 201 | typedef struct __libc_localeLconv
|
|---|
| 202 | {
|
|---|
| 203 | /** The lconv structure. */
|
|---|
| 204 | struct lconv s;
|
|---|
| 205 | /** CRNCYSTR. */
|
|---|
| 206 | char *pszCrncyStr;
|
|---|
| 207 | /** Indicates that all the numeric members are readonly const strings. */
|
|---|
| 208 | int fNumericConsts;
|
|---|
| 209 | /** Indicates that all the monetary members are readonly const strings. */
|
|---|
| 210 | int fMonetaryConsts;
|
|---|
| 211 | } __LIBC_LOCALELCONV;
|
|---|
| 212 | /** Pointer to extended locale information structure. */
|
|---|
| 213 | typedef __LIBC_LOCALELCONV *__LIBC_PLOCALELCONV;
|
|---|
| 214 |
|
|---|
| 215 |
|
|---|
| 216 | /**
|
|---|
| 217 | * Message locale information.
|
|---|
| 218 | * The content is available thru the nl_langinfo() interface only.
|
|---|
| 219 | */
|
|---|
| 220 | typedef struct __libc_localeMsg
|
|---|
| 221 | {
|
|---|
| 222 | /** YESEXPR */
|
|---|
| 223 | char *pszYesExpr;
|
|---|
| 224 | /** NOEXPR */
|
|---|
| 225 | char *pszNoExpr;
|
|---|
| 226 | /** YESSTR */
|
|---|
| 227 | char *pszYesStr;
|
|---|
| 228 | /** NOSTR */
|
|---|
| 229 | char *pszNoStr;
|
|---|
| 230 | /** Indicates that all members are readonly const strings. */
|
|---|
| 231 | int fConsts;
|
|---|
| 232 | } __LIBC_LOCALEMSG;
|
|---|
| 233 | /** Pointer to the message locale information. */
|
|---|
| 234 | typedef __LIBC_LOCALEMSG *__LIBC_PLOCALEMSG;
|
|---|
| 235 |
|
|---|
| 236 |
|
|---|
| 237 | /** String collation information. */
|
|---|
| 238 | extern __LIBC_LOCALECOLLATE __libc_gLocaleCollate;
|
|---|
| 239 | /** String collation information for the default 'C'/'POSIX' locale. */
|
|---|
| 240 | extern const __LIBC_LOCALECOLLATE __libc_gLocaleCollateDefault;
|
|---|
| 241 | /** Character case conversion tables. */
|
|---|
| 242 | extern __LIBC_LOCALECTYPE __libc_GLocaleCtype;
|
|---|
| 243 | /** Character case conversion tables for the default 'C'/'POSIX' locale. */
|
|---|
| 244 | extern const __LIBC_LOCALECTYPE __libc_GLocaleCtypeDefault;
|
|---|
| 245 | /** Cached Unicode (__wchar_t) case conversion tables and flags. */
|
|---|
| 246 | extern __LIBC_LOCALEWCTYPE __libc_GLocaleWCtype;
|
|---|
| 247 | /** Locale information structure. */
|
|---|
| 248 | extern __LIBC_LOCALELCONV __libc_gLocaleLconv;
|
|---|
| 249 | /* Locale information structure for the 'C'/'POSIX' locale. */
|
|---|
| 250 | extern const __LIBC_LOCALELCONV __libc_gLocaleLconvDefault;
|
|---|
| 251 | /** Date / time formatting rules. */
|
|---|
| 252 | extern __LIBC_LOCALETIME __libc_gLocaleTime;
|
|---|
| 253 | /** Date / time formatting rules for the 'C'/'POSIX' locale. */
|
|---|
| 254 | extern const __LIBC_LOCALETIME __libc_gLocaleTimeDefault;
|
|---|
| 255 | /** Message locale information. */
|
|---|
| 256 | extern __LIBC_LOCALEMSG __libc_gLocaleMsg;
|
|---|
| 257 | /** Message locale information for the 'C'/'POSIX' locale. */
|
|---|
| 258 | extern const __LIBC_LOCALEMSG __libc_gLocaleMsgDefault;
|
|---|
| 259 |
|
|---|
| 260 | /** Macros to lock the different locale structures.
|
|---|
| 261 | * @{
|
|---|
| 262 | */
|
|---|
| 263 | #define LOCALE_LOCK() do {} while (0)
|
|---|
| 264 | #define LOCALE_UNLOCK() do {} while (0)
|
|---|
| 265 | #define LOCALE_CTYPE_RW_LOCK() do {} while (0)
|
|---|
| 266 | #define LOCALE_CTYPE_RW_UNLOCK() do {} while (0)
|
|---|
| 267 | #define LOCALE_CTYPE_RW_LOCK() do {} while (0)
|
|---|
| 268 | #define LOCALE_CTYPE_RW_UNLOCK() do {} while (0)
|
|---|
| 269 | /** @} */
|
|---|
| 270 |
|
|---|
| 271 | /** Convert a string to Unicode, apply some transform and convert back. */
|
|---|
| 272 | extern void __libc_ucs2Do(UconvObject *uconv, char *s, void *arg, int (*xform)(UniChar *, void *));
|
|---|
| 273 | /** Convert a MBCS character to Unicode; returns number of bytes in MBCS char. */
|
|---|
| 274 | extern int __libc_ucs2To(UconvObject, const unsigned char *, size_t, UniChar *);
|
|---|
| 275 | /** Convert a Unicode character to MBCS. */
|
|---|
| 276 | extern int __libc_ucs2From(UconvObject, UniChar, unsigned char *, size_t);
|
|---|
| 277 | /** Converts a codepage string to unichar and something libuni might recognize. */
|
|---|
| 278 | extern void __libc_TranslateCodepage(const char *cp, UniChar *ucp);
|
|---|
| 279 |
|
|---|
| 280 | extern int __libc_localeCreateObjects(const char *pszLocale, const char *pszCodepage, char *pszCodepageActual, LocaleObject *plobj, UconvObject *puobj);
|
|---|
| 281 |
|
|---|
| 282 | extern void __libc_localeFuncsSBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
|
|---|
| 283 | extern void __libc_localeFuncsDBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
|
|---|
| 284 | extern void __libc_localeFuncsMBCS(__LIBC_PLOCALECTYPEFUNCS pFuncs);
|
|---|
| 285 | extern void __libc_localeFuncsUCS2(__LIBC_PLOCALECTYPEFUNCS pFuncs);
|
|---|
| 286 | extern void __libc_localeFuncsUTF8(__LIBC_PLOCALECTYPEFUNCS pFuncs);
|
|---|
| 287 | extern void __libc_localeFuncsDefault(__LIBC_PLOCALECTYPEFUNCS pFuncs);
|
|---|
| 288 |
|
|---|
| 289 | extern size_t __libc_localeFuncsGeneric_mbsnrtowcs(size_t (*pfnmbrtowc)(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict),
|
|---|
| 290 | __wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, __mbstate_t * __restrict ps);
|
|---|
| 291 | extern size_t __libc_localeFuncsGeneric_wcsnrtombs(size_t (*pfnwcrtomb)(char * __restrict, __wchar_t, __mbstate_t * __restrict),
|
|---|
| 292 | char * __restrict dst, const __wchar_t ** __restrict src, size_t nwc, size_t len, __mbstate_t * __restrict ps);
|
|---|
| 293 |
|
|---|
| 294 | extern void __libc_localeFuncsNone(__LIBC_PLOCALECTYPEFUNCS pFuncs);
|
|---|
| 295 | extern size_t __libc_locale_none_mbrtowc(__wchar_t * __restrict, const char * __restrict, size_t, __mbstate_t * __restrict);
|
|---|
| 296 | extern int __libc_locale_none_mbsinit(const __mbstate_t *);
|
|---|
| 297 | extern size_t __libc_locale_none_mbsnrtowcs(__wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, __mbstate_t * __restrict ps __unused);
|
|---|
| 298 | extern size_t __libc_locale_none_wcrtomb(char * __restrict, __wchar_t, __mbstate_t * __restrict);
|
|---|
| 299 | extern size_t __libc_locale_none_wcsnrtombs(char * __restrict, const __wchar_t ** __restrict, size_t, size_t, __mbstate_t * __restrict);
|
|---|
| 300 |
|
|---|
| 301 |
|
|---|
| 302 | /** Handy macros for working with the au2MBCSPrefixs members of
|
|---|
| 303 | * the locale data structures. The au2MBCSPrefixs members are
|
|---|
| 304 | * array which elements are 2 bits long.
|
|---|
| 305 | * @{
|
|---|
| 306 | */
|
|---|
| 307 | #define SET_MBCS_PREFIX(a, c, v) \
|
|---|
| 308 | a[((unsigned char)(c)) >> 2] |= (v) << (2 * ((c) & 3))
|
|---|
| 309 |
|
|---|
| 310 | #define LEN_MBCS_PREFIX(a, c) \
|
|---|
| 311 | ((a[((unsigned char)(c)) >> 2] >> (2 * (((c) & 3) ^ 3))) & 3)
|
|---|
| 312 |
|
|---|
| 313 | #define IS_MBCS_PREFIX(p, c) \
|
|---|
| 314 | (LEN_MBCS_PREFIX((p)->au2MBCSPrefixs, c) != 1)
|
|---|
| 315 |
|
|---|
| 316 | #define CHK_MBCS_PREFIX(p, c, v) \
|
|---|
| 317 | ((v = LEN_MBCS_PREFIX((p)->au2MBCSPrefixs, c)) > 1)
|
|---|
| 318 | /** @} */
|
|---|
| 319 |
|
|---|
| 320 | #include <ctype.h>
|
|---|
| 321 |
|
|---|
| 322 | /**
|
|---|
| 323 | * Convert the type info we get from the unicode lib to libc talk.
|
|---|
| 324 | * ASSUMES that none of the locals differs from the unicode spec
|
|---|
| 325 | *
|
|---|
| 326 | * @returns libc ctype flags.
|
|---|
| 327 | * @param pUniType The unicode type info to translate.
|
|---|
| 328 | * @param wc The unicode code point.
|
|---|
| 329 | */
|
|---|
| 330 | static inline unsigned ___wctype_uni(const UNICTYPE *pUniType, wchar_t wc)
|
|---|
| 331 | {
|
|---|
| 332 | unsigned ufType = 0;
|
|---|
| 333 | /* ASSUMES CT_* << 8 == __* ! */
|
|---|
| 334 | ufType = ((unsigned)pUniType->itype << 8)
|
|---|
| 335 | & (__CT_UPPER | __CT_LOWER | __CT_DIGIT | __CT_SPACE |
|
|---|
| 336 | __CT_PUNCT | __CT_CNTRL | __CT_BLANK | __CT_XDIGIT |
|
|---|
| 337 | __CT_ALPHA | __CT_ALNUM | __CT_GRAPH | __CT_PRINT |
|
|---|
| 338 | __CT_NUMBER | __CT_SYMBOL | __CT_ASCII);
|
|---|
| 339 | if (pUniType->extend & C3_IDEOGRAPH)
|
|---|
| 340 | ufType |= __CT_IDEOGRAM;
|
|---|
| 341 | if (ufType & (__CT_XDIGIT | __CT_DIGIT))
|
|---|
| 342 | {
|
|---|
| 343 | if ( (unsigned)wc - 0x30U <= (0x39 - 0x30))
|
|---|
| 344 | ufType |= (unsigned)wc - 0x30;
|
|---|
| 345 | else if ((unsigned)wc - 0x41U <= (0x46 - 0x41))
|
|---|
| 346 | ufType |= (unsigned)wc - 0x41 + 0xa;
|
|---|
| 347 | else
|
|---|
| 348 | {
|
|---|
| 349 | unsigned uVal = UniQueryNumericValue(wc);
|
|---|
| 350 | if (!(uVal & ~0xffU))
|
|---|
| 351 | ufType |= uVal;
|
|---|
| 352 | }
|
|---|
| 353 | }
|
|---|
| 354 | ufType |= (pUniType->bidi & 0xf << 24);
|
|---|
| 355 |
|
|---|
| 356 | /** @todo screen width. */
|
|---|
| 357 | return ufType;
|
|---|
| 358 | }
|
|---|
| 359 |
|
|---|
| 360 | __END_DECLS
|
|---|
| 361 |
|
|---|
| 362 | #endif /* __SYS_LOCALE_H__ */
|
|---|
| 363 |
|
|---|