27#ifndef __GSCHARACTER_H__
28#define __GSCHARACTER_H__ 1
30#include <CoreFoundation/CFBase.h>
55 return (0x0009 <= c && c <= 0x000D) || (c == 0x0020) || (c == 0x0085)
56 || (c == 0x00A0) || (c == 0x1680) || (0x2000 <= c && c <= 0x200A)
57 || (c == 0x2028) || (c == 0x2029) || (c == 0x202F) || (c == 0x205F)
79 return (c & 0xFFFFF800) == 0xD800;
89 return (c & 0xFFFFFC00) == 0xD800;
99 return (c & 0xFFFFFC00) == 0xDC00;
110#define kGSUTF8CharacterMaximumLength 4
120 return (c < 0xF5) ? (c >= 0xC0) + (c >= 0xE0) + (c >= 0xF0) : 0;
130 return (c & 0xC0) == 0x80;
141 return (c <= 0x10FFFF) ? 1 + (c >= 0x80) + (c >= 0x800) + (c >= 0x10000) : 0;
179 if (*p++ == 0xEF && *p++ == 0xBB && *p++ == 0xBF)
200 static const UTF8Char utf8LeadHeader[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
207 if (l && limit - d > l)
212 d[3] = (c & 0x3F) | 0x80;
215 d[2] = (c & 0x3F) | 0x80;
218 d[1] = (c & 0x3F) | 0x80;
221 d[0] = c | utf8LeadHeader[l - 1];
243 static const UTF32Char utf8LeadMask[4] = { 0x0, 0x1F, 0x0F, 0x07 };
244 const UTF8Char *start;
255 if (limit - s < trail)
257 ch &= utf8LeadMask[trail];
266 ch = (ch << 6) | (*s++ & 0x3F);
273 ch = (ch << 6) | (*s++ & 0x3F);
280 ch = (ch << 6) | (*s++ & 0x3F);
300#define kGSUTF16CharacterMaximumLength 2
303#define kGSUTF16CharacterByteOrderMark 0xFEFF
306#define kGSUTF16CharacterSwappedByteOrderMark 0xFFFE
326 else if (c <= 0x10FFFF)
330 d[0] = (c >> 10) + 0xD7C0;
331 d[1] = (c & 0x3FF) + 0xDC00;
354 const UTF16Char *start;
364 ch = (ch << 10) + (*s++) - ((0xD7C0 << 10) + 0xDC00);
379#define kGSUTF32CharacterByteOrderMark 0x0000FEFF
382#define kGSUTF32CharacterSwappedByteOrderMark 0xFFFE0000
signed long CFIndex
Definition CFBase.h:165
Boolean GSUTF8CharacterIsTrailing(const UTF8Char c)
Determines if the specified UTF-8 code unit is a trailing code unit.
Definition GSCharacter.h:128
CFIndex GSUTF8CharacterGet(const UTF8Char *s, const UTF8Char *limit, UTF32Char *c)
Get a Unicode code unit from a UTF-8 string buffer.
Definition GSCharacter.h:241
CFIndex GSUTF16CharacterGet(const UTF16Char *s, const UTF16Char *limit, UTF32Char *c)
Get a Unicode code point from a UTF-16 string buffer.
Definition GSCharacter.h:352
Boolean GSCharacterIsTrailSurrogate(const UTF32Char c)
Determine if character is a trailing surrogate code point.
Definition GSCharacter.h:97
Boolean GSCharacterIsInSupplementaryPlane(const UTF32Char c)
Determine if character is in one of the supplementary planes.
Definition GSCharacter.h:67
CFIndex GSUTF8CharacterAppendByteOrderMark(UTF8Char *d, const UTF8Char *limit)
Append the UTF-8 Byte Order Mark to the string buffer.
Definition GSCharacter.h:151
Boolean GSCharacterIsWhitespace(const UTF32Char c)
Determine if a character is a whitespace character.
Definition GSCharacter.h:53
Boolean GSCharacterIsASCII(const UTF32Char c)
Determine if a character is an ASCII character (less than 128).
Definition GSCharacter.h:43
Boolean GSCharacterIsLeadSurrogate(const UTF32Char c)
Determine if character is a leading surrogate code point.
Definition GSCharacter.h:87
Boolean GSCharacterIsSurrogate(const UTF32Char c)
Determine true if character is a surrogate code point.
Definition GSCharacter.h:77
CFIndex GSUTF8CharacterTrailBytesCount(const UTF8Char c)
Determine the number of trailing bytes for a UTF-8 character based on the leading code unit.
Definition GSCharacter.h:118
CFIndex GSUTF8CharacterLength(const UTF32Char c)
Determine the number of UTF-8 code units required to represent the specified Unicode code point.
Definition GSCharacter.h:139
CFIndex GSUTF16CharacterAppend(UTF16Char *d, const UTF16Char *limit, UTF32Char c)
Append a character to a UTF-16 string buffer.
Definition GSCharacter.h:318
CFIndex GSUTF8CharacterAppend(UTF8Char *d, const UTF8Char *limit, UTF32Char c)
Append a character to a UTF-8 string buffer.
Definition GSCharacter.h:198
Boolean GSUTF8CharacterSkipByteOrderMark(const UTF8Char **s, const UTF8Char *limit)
Determine if a UTF-8 string buffer has a Byte Order Mark.
Definition GSCharacter.h:172