The world's most popular open source database
#include <my_global.h>#include "m_string.h"#include "m_ctype.h"#include <errno.h>#include "my_uctype.h"Include dependency graph for ctype-utf8.c:

Go to the source code of this file.
Defines | |
| #define | EILSEQ ENOENT |
| #define | HAVE_UNIDATA |
| #define | MY_FILENAME_ESCAPE '@' |
Functions | |
| int | my_wildcmp_unicode (CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights) |
| static int | bincmp (const uchar *s, const uchar *se, const uchar *t, const uchar *te) |
| static int | my_utf8_uni (CHARSET_INFO *cs __attribute__((unused)), my_wc_t *pwc, const uchar *s, const uchar *e) |
| static int | my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)), my_wc_t wc, uchar *r, uchar *e) |
| static uint | my_caseup_utf8 (CHARSET_INFO *cs, char *src, uint srclen, char *dst, uint dstlen) |
| static void | my_hash_sort_utf8 (CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2) |
| static void | my_caseup_str_utf8 (CHARSET_INFO *cs, char *s) |
| static uint | my_casedn_utf8 (CHARSET_INFO *cs, char *src, uint srclen, char *dst, uint dstlen) |
| static void | my_casedn_str_utf8 (CHARSET_INFO *cs, char *s) |
| static int | my_strnncoll_utf8 (CHARSET_INFO *cs, const uchar *s, uint slen, const uchar *t, uint tlen, my_bool t_is_prefix) |
| static int | my_strnncollsp_utf8 (CHARSET_INFO *cs, const uchar *s, uint slen, const uchar *t, uint tlen, my_bool diff_if_only_endspace_difference) |
| static int | my_strcasecmp_utf8 (CHARSET_INFO *cs, const char *s, const char *t) |
| static int | my_wildcmp_utf8 (CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) |
| static uint | my_strnxfrmlen_utf8 (CHARSET_INFO *cs __attribute__((unused)), uint len) |
| static int | my_strnxfrm_utf8 (CHARSET_INFO *cs, uchar *dst, uint dstlen, const uchar *src, uint srclen) |
| static int | my_ismbchar_utf8 (CHARSET_INFO *cs, const char *b, const char *e) |
| static int | my_mbcharlen_utf8 (CHARSET_INFO *cs __attribute__((unused)), uint c) |
| static int | hexlo (int x) |
| static int | my_mb_wc_filename (CHARSET_INFO *cs __attribute__((unused)), my_wc_t *pwc, const uchar *s, const uchar *e) |
| static int | my_wc_mb_filename (CHARSET_INFO *cs __attribute__((unused)), my_wc_t wc, unsigned char *s, unsigned char *e) |
Variables | |
| static MY_UNICASE_INFO | plane00 [] |
| static MY_UNICASE_INFO | plane01 [] |
| static MY_UNICASE_INFO | plane02 [] |
| static MY_UNICASE_INFO | plane03 [] |
| static MY_UNICASE_INFO | plane04 [] |
| static MY_UNICASE_INFO | plane05 [] |
| static MY_UNICASE_INFO | plane1E [] |
| static MY_UNICASE_INFO | plane1F [] |
| static MY_UNICASE_INFO | plane21 [] |
| static MY_UNICASE_INFO | plane24 [] |
| static MY_UNICASE_INFO | planeFF [] |
| MY_UNICASE_INFO * | my_unicase_default [256] |
| static MY_UNICASE_INFO | turk00 [] |
| MY_UNICASE_INFO * | my_unicase_turkish [256] |
| static uchar | ctype_utf8 [] |
| static uchar | to_lower_utf8 [] |
| static uchar | to_upper_utf8 [] |
| static MY_COLLATION_HANDLER | my_collation_ci_handler |
| MY_CHARSET_HANDLER | my_charset_utf8_handler |
| CHARSET_INFO | my_charset_utf8_general_ci |
| CHARSET_INFO | my_charset_utf8_bin |
| static uint16 | touni [5994] |
| static uint16 | uni_0C00_05FF [1344] |
| static uint16 | uni_1E00_1FFF [512] |
| static uint16 | uni_2160_217F [32] |
| static uint16 | uni_24B0_24EF [64] |
| static uint16 | uni_FF20_FF5F [64] |
| static char | filename_safe_char [128] |
| static MY_COLLATION_HANDLER | my_collation_filename_handler |
| static MY_CHARSET_HANDLER | my_charset_filename_handler |
| CHARSET_INFO | my_charset_filename |
| #define EILSEQ ENOENT |
Definition at line 27 of file ctype-utf8.c.
| #define HAVE_UNIDATA |
Definition at line 35 of file ctype-utf8.c.
| #define MY_FILENAME_ESCAPE '@' |
Definition at line 3912 of file ctype-utf8.c.
Referenced by my_mb_wc_filename(), and my_wc_mb_filename().
| static int bincmp | ( | const uchar * | s, | |
| const uchar * | se, | |||
| const uchar * | t, | |||
| const uchar * | te | |||
| ) | [inline, static] |
Definition at line 1936 of file ctype-utf8.c.
References cmp, int(), memcmp(), and min.
Referenced by my_strnncoll_utf8(), and my_strnncollsp_utf8().
01938 { 01939 int slen= (int) (se-s), tlen= (int) (te-t); 01940 int len=min(slen,tlen); 01941 int cmp= memcmp(s,t,len); 01942 return cmp ? cmp : slen-tlen; 01943 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int hexlo | ( | int | x | ) | [static] |
Definition at line 3867 of file ctype-utf8.c.
Referenced by my_mb_wc_filename().
03868 { 03869 static char hex_lo_digit[256]= 03870 { 03871 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03872 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03873 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* !"#$%&'()*+,-./ */ 03874 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, /* 0123456789:;<=>? */ 03875 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* @ABCDEFGHIJKLMNO */ 03876 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* PQRSTUVWXYZ[\]^_ */ 03877 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* `abcdefghijklmno */ 03878 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* pqrstuvwxyz{|}~. */ 03879 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03880 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03881 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03882 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03883 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03884 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03885 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03886 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* ................ */ 03887 }; 03888 return hex_lo_digit[(unsigned int) x]; 03889 }
Here is the caller graph for this function:

| static void my_casedn_str_utf8 | ( | CHARSET_INFO * | cs, | |
| char * | s | |||
| ) | [static] |
Definition at line 2175 of file ctype-utf8.c.
References my_casedn_utf8(), and strlen().
02176 { 02177 uint len= (uint) strlen(s); 02178 my_casedn_utf8(cs, s, len, s, len); 02179 }
Here is the call graph for this function:

| static uint my_casedn_utf8 | ( | CHARSET_INFO * | cs, | |
| char * | src, | |||
| uint | srclen, | |||
| char * | dst, | |||
| uint | dstlen | |||
| ) | [static] |
Definition at line 2153 of file ctype-utf8.c.
References charset_info_st::casedn_multiply, charset_info_st::caseinfo, DBUG_ASSERT, my_uni_utf8(), my_utf8_uni(), my_wc_t, and unicase_info_st::tolower.
Referenced by my_casedn_str_utf8().
02155 { 02156 my_wc_t wc; 02157 int srcres, dstres; 02158 char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst; 02159 MY_UNICASE_INFO **uni_plane= cs->caseinfo; 02160 DBUG_ASSERT(src != dst || cs->casedn_multiply == 1); 02161 02162 while ((src < srcend) && 02163 (srcres= my_utf8_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0) 02164 { 02165 int plane= (wc>>8) & 0xFF; 02166 wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc; 02167 if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) 02168 break; 02169 src+= srcres; 02170 dst+= dstres; 02171 } 02172 return (uint) (dst - dst0); 02173 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void my_caseup_str_utf8 | ( | CHARSET_INFO * | cs, | |
| char * | s | |||
| ) | [static] |
Definition at line 2146 of file ctype-utf8.c.
References my_caseup_utf8(), and strlen().
02147 { 02148 uint len= (uint) strlen(s); 02149 my_caseup_utf8(cs, s, len, s, len); 02150 }
Here is the call graph for this function:

| static uint my_caseup_utf8 | ( | CHARSET_INFO * | cs, | |
| char * | src, | |||
| uint | srclen, | |||
| char * | dst, | |||
| uint | dstlen | |||
| ) | [static] |
Definition at line 2096 of file ctype-utf8.c.
References charset_info_st::caseinfo, charset_info_st::caseup_multiply, DBUG_ASSERT, my_uni_utf8(), my_utf8_uni(), my_wc_t, and unicase_info_st::toupper.
Referenced by my_caseup_str_utf8().
02098 { 02099 my_wc_t wc; 02100 int srcres, dstres; 02101 char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst; 02102 MY_UNICASE_INFO **uni_plane= cs->caseinfo; 02103 DBUG_ASSERT(src != dst || cs->caseup_multiply == 1); 02104 02105 while ((src < srcend) && 02106 (srcres= my_utf8_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0) 02107 { 02108 int plane= (wc>>8) & 0xFF; 02109 wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; 02110 if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) 02111 break; 02112 src+= srcres; 02113 dst+= dstres; 02114 } 02115 return (uint) (dst - dst0); 02116 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void my_hash_sort_utf8 | ( | CHARSET_INFO * | cs, | |
| const uchar * | s, | |||
| uint | slen, | |||
| ulong * | n1, | |||
| ulong * | n2 | |||
| ) | [static] |
Definition at line 2118 of file ctype-utf8.c.
References charset_info_st::caseinfo, e, my_utf8_uni(), my_wc_t, and unicase_info_st::sort.
02120 { 02121 my_wc_t wc; 02122 int res; 02123 const uchar *e=s+slen; 02124 MY_UNICASE_INFO **uni_plane= cs->caseinfo; 02125 02126 /* 02127 Remove end space. We have to do this to be able to compare 02128 'A ' and 'A' as identical 02129 */ 02130 while (e > s && e[-1] == ' ') 02131 e--; 02132 02133 while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) 02134 { 02135 int plane = (wc>>8) & 0xFF; 02136 wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; 02137 n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); 02138 n2[0]+=3; 02139 n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8); 02140 n2[0]+=3; 02141 s+=res; 02142 } 02143 }
Here is the call graph for this function:

| static int my_ismbchar_utf8 | ( | CHARSET_INFO * | cs, | |
| const char * | b, | |||
| const char * | e | |||
| ) | [static] |
Definition at line 2483 of file ctype-utf8.c.
References my_utf8_uni(), and my_wc_t.
02484 { 02485 my_wc_t wc; 02486 int res=my_utf8_uni(cs,&wc, (const uchar*)b, (const uchar*)e); 02487 return (res>1) ? res : 0; 02488 }
Here is the call graph for this function:

| static int my_mb_wc_filename | ( | CHARSET_INFO *cs | __attribute__((unused)), | |
| my_wc_t * | pwc, | |||
| const uchar * | s, | |||
| const uchar * | e | |||
| ) | [static] |
Definition at line 3915 of file ctype-utf8.c.
References hexlo(), MY_CS_ILSEQ, MY_CS_TOOSMALL, MY_CS_TOOSMALL3, MY_CS_TOOSMALL4, and MY_FILENAME_ESCAPE.
03917 { 03918 int byte1, byte2; 03919 if (s >= e) 03920 return MY_CS_TOOSMALL; 03921 03922 if (*s < 128 && filename_safe_char[*s]) 03923 { 03924 *pwc= *s; 03925 return 1; 03926 } 03927 03928 if (*s != MY_FILENAME_ESCAPE) 03929 return MY_CS_ILSEQ; 03930 03931 if (s + 3 > e) 03932 return MY_CS_TOOSMALL3; 03933 03934 byte1= s[1]; 03935 byte2= s[2]; 03936 03937 if (byte1 >= 0x30 && byte1 <= 0x7F && 03938 byte2 >= 0x30 && byte2 <= 0x7F) 03939 { 03940 int code= (byte1 - 0x30) * 80 + byte2 - 0x30; 03941 if (code < 5994 && touni[code]) 03942 { 03943 *pwc= touni[code]; 03944 return 3; 03945 } 03946 if (byte1 == '@' && byte2 == '@') 03947 { 03948 *pwc= 0; 03949 return 3; 03950 } 03951 } 03952 03953 if (s + 4 > e) 03954 return MY_CS_TOOSMALL4; 03955 03956 if ((byte1= hexlo(byte1)) >= 0 && 03957 (byte2= hexlo(byte2)) >= 0) 03958 { 03959 int byte3= hexlo(s[3]); 03960 int byte4= hexlo(s[4]); 03961 if (byte3 >=0 && byte4 >=0) 03962 { 03963 *pwc= (byte1 << 12) + (byte2 << 8) + (byte3 << 4) + byte4; 03964 return 5; 03965 } 03966 } 03967 03968 return MY_CS_ILSEQ; 03969 }
Here is the call graph for this function:

| static int my_mbcharlen_utf8 | ( | CHARSET_INFO *cs | __attribute__((unused)), | |
| uint | c | |||
| ) | [static] |
Definition at line 2490 of file ctype-utf8.c.
02491 { 02492 if (c < 0x80) 02493 return 1; 02494 else if (c < 0xc2) 02495 return 0; /* Illegal mb head */ 02496 else if (c < 0xe0) 02497 return 2; 02498 else if (c < 0xf0) 02499 return 3; 02500 #ifdef UNICODE_32BIT 02501 else if (c < 0xf8) 02502 return 4; 02503 else if (c < 0xfc) 02504 return 5; 02505 else if (c < 0xfe) 02506 return 6; 02507 #endif 02508 return 0; /* Illegal mb head */; 02509 }
| static int my_strcasecmp_utf8 | ( | CHARSET_INFO * | cs, | |
| const char * | s, | |||
| const char * | t | |||
| ) | [static] |
Definition at line 2346 of file ctype-utf8.c.
References charset_info_st::caseinfo, int(), my_utf8_uni(), my_wc_t, plane00, strcmp(), and unicase_info_st::tolower.
02347 { 02348 MY_UNICASE_INFO **uni_plane= cs->caseinfo; 02349 while (s[0] && t[0]) 02350 { 02351 my_wc_t s_wc,t_wc; 02352 02353 if ((uchar) s[0] < 128) 02354 { 02355 /* 02356 s[0] is between 0 and 127. 02357 It represents a single byte character. 02358 Convert it into weight according to collation. 02359 */ 02360 s_wc= plane00[(uchar) s[0]].tolower; 02361 s++; 02362 } 02363 else 02364 { 02365 int plane, res; 02366 02367 /* 02368 Scan a multibyte character. 02369 02370 In the future it is worth to write a special version of my_utf8_uni() 02371 for 0-terminated strings which will not take in account length. Now 02372 we call the regular version of my_utf8_uni() with s+3 in the 02373 last argument. s+3 is enough to scan any multibyte sequence. 02374 02375 Calling the regular version of my_utf8_uni is safe for 0-terminated 02376 strings: we will never lose the end of the string: 02377 If we have 0 character in the middle of a multibyte sequence, 02378 then my_utf8_uni will always return a negative number, so the 02379 loop with finish. 02380 */ 02381 02382 res= my_utf8_uni(cs,&s_wc, (const uchar*)s, (const uchar*) s + 3); 02383 02384 /* 02385 In the case of wrong multibyte sequence we will 02386 call strcmp() for byte-to-byte comparison. 02387 */ 02388 if (res <= 0) 02389 return strcmp(s, t); 02390 s+= res; 02391 02392 /* Convert Unicode code into weight according to collation */ 02393 plane=(s_wc>>8) & 0xFF; 02394 s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc; 02395 } 02396 02397 02398 /* Do the same for the second string */ 02399 02400 if ((uchar) t[0] < 128) 02401 { 02402 /* Convert single byte character into weight */ 02403 t_wc= plane00[(uchar) t[0]].tolower; 02404 t++; 02405 } 02406 else 02407 { 02408 int plane; 02409 int res=my_utf8_uni(cs,&t_wc, (const uchar*)t, (const uchar*) t + 3); 02410 if (res <= 0) 02411 return strcmp(s, t); 02412 t+= res; 02413 02414 /* Convert code into weight */ 02415 plane=(t_wc>>8) & 0xFF; 02416 t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc; 02417 } 02418 02419 /* Now we have two weights, let's compare them */ 02420 if ( s_wc != t_wc ) 02421 return ((int) s_wc) - ((int) t_wc); 02422 } 02423 return ((int)(uchar)s[0]) - ((int) (uchar) t[0]); 02424 }
Here is the call graph for this function:

| static int my_strnncoll_utf8 | ( | CHARSET_INFO * | cs, | |
| const uchar * | s, | |||
| uint | slen, | |||
| const uchar * | t, | |||
| uint | tlen, | |||
| my_bool | t_is_prefix | |||
| ) | [static] |
Definition at line 2182 of file ctype-utf8.c.
References bincmp(), charset_info_st::caseinfo, my_utf8_uni(), my_wc_t, and unicase_info_st::sort.
02186 { 02187 int s_res,t_res; 02188 my_wc_t s_wc,t_wc; 02189 const uchar *se=s+slen; 02190 const uchar *te=t+tlen; 02191 MY_UNICASE_INFO **uni_plane= cs->caseinfo; 02192 02193 while ( s < se && t < te ) 02194 { 02195 int plane; 02196 s_res=my_utf8_uni(cs,&s_wc, s, se); 02197 t_res=my_utf8_uni(cs,&t_wc, t, te); 02198 02199 if ( s_res <= 0 || t_res <= 0 ) 02200 { 02201 /* Incorrect string, compare byte by byte value */ 02202 return bincmp(s, se, t, te); 02203 } 02204 02205 plane=(s_wc>>8) & 0xFF; 02206 s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; 02207 plane=(t_wc>>8) & 0xFF; 02208 t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; 02209 if ( s_wc != t_wc ) 02210 { 02211 return s_wc > t_wc ? 1 : -1; 02212 } 02213 02214 s+=s_res; 02215 t+=t_res; 02216 } 02217 return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); 02218 }
Here is the call graph for this function:

| static int my_strnncollsp_utf8 | ( | CHARSET_INFO * | cs, | |
| const uchar * | s, | |||
| uint | slen, | |||
| const uchar * | t, | |||
| uint | tlen, | |||
| my_bool | diff_if_only_endspace_difference | |||
| ) | [static] |
Definition at line 2252 of file ctype-utf8.c.
References bincmp(), charset_info_st::caseinfo, my_utf8_uni(), my_wc_t, unicase_info_st::sort, and mySTL::swap().
02256 { 02257 int s_res, t_res, res; 02258 my_wc_t s_wc,t_wc; 02259 const uchar *se= s+slen, *te= t+tlen; 02260 MY_UNICASE_INFO **uni_plane= cs->caseinfo; 02261 02262 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE 02263 diff_if_only_endspace_difference= 0; 02264 #endif 02265 02266 while ( s < se && t < te ) 02267 { 02268 int plane; 02269 s_res=my_utf8_uni(cs,&s_wc, s, se); 02270 t_res=my_utf8_uni(cs,&t_wc, t, te); 02271 02272 if ( s_res <= 0 || t_res <= 0 ) 02273 { 02274 /* Incorrect string, compare byte by byte value */ 02275 return bincmp(s, se, t, te); 02276 } 02277 02278 plane=(s_wc>>8) & 0xFF; 02279 s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; 02280 plane=(t_wc>>8) & 0xFF; 02281 t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; 02282 if ( s_wc != t_wc ) 02283 { 02284 return s_wc > t_wc ? 1 : -1; 02285 } 02286 02287 s+=s_res; 02288 t+=t_res; 02289 } 02290 02291 slen= (uint) (se-s); 02292 tlen= (uint) (te-t); 02293 res= 0; 02294 02295 if (slen != tlen) 02296 { 02297 int swap= 1; 02298 if (diff_if_only_endspace_difference) 02299 res= 1; /* Assume 'a' is bigger */ 02300 if (slen < tlen) 02301 { 02302 slen= tlen; 02303 s= t; 02304 se= te; 02305 swap= -1; 02306 res= -res; 02307 } 02308 /* 02309 This following loop uses the fact that in UTF-8 02310 all multibyte characters are greater than space, 02311 and all multibyte head characters are greater than 02312 space. It means if we meet a character greater 02313 than space, it always means that the longer string 02314 is greater. So we can reuse the same loop from the 02315 8bit version, without having to process full multibute 02316 sequences. 02317 */ 02318 for ( ; s < se; s++) 02319 { 02320 if (*s != ' ') 02321 return (*s < ' ') ? -swap : swap; 02322 } 02323 } 02324 return res; 02325 }
Here is the call graph for this function:

| static int my_strnxfrm_utf8 | ( | CHARSET_INFO * | cs, | |
| uchar * | dst, | |||
| uint | dstlen, | |||
| const uchar * | src, | |||
| uint | srclen | |||
| ) | [static] |
Definition at line 2445 of file ctype-utf8.c.
References charset_info_st::caseinfo, my_utf8_uni(), my_wc_t, and unicase_info_st::sort.
02448 { 02449 my_wc_t wc; 02450 int res; 02451 int plane; 02452 uchar *de= dst + dstlen; 02453 uchar *de_beg= de - 1; 02454 const uchar *se = src + srclen; 02455 MY_UNICASE_INFO **uni_plane= cs->caseinfo; 02456 02457 while (dst < de_beg) 02458 { 02459 if ((res=my_utf8_uni(cs,&wc, src, se)) <= 0) 02460 break; 02461 src+=res; 02462 02463 plane=(wc>>8) & 0xFF; 02464 wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; 02465 02466 *dst++= (uchar)(wc >> 8); 02467 *dst++= (uchar)(wc & 0xFF); 02468 02469 } 02470 02471 while (dst < de_beg) /* Fill the tail with keys for space character */ 02472 { 02473 *dst++= 0x00; 02474 *dst++= 0x20; 02475 } 02476 02477 if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */ 02478 *dst= 0x00; 02479 02480 return dstlen; 02481 }
Here is the call graph for this function:

| static uint my_strnxfrmlen_utf8 | ( | CHARSET_INFO *cs | __attribute__((unused)), | |
| uint | len | |||
| ) | [static] |
| static int my_uni_utf8 | ( | CHARSET_INFO *cs | __attribute__((unused)), | |
| my_wc_t | wc, | |||
| uchar * | r, | |||
| uchar * | e | |||
| ) | [static] |
Definition at line 2050 of file ctype-utf8.c.
References count, MY_CS_ILUNI, MY_CS_TOOSMALL, and MY_CS_TOOSMALLN.
Referenced by my_casedn_utf8(), and my_caseup_utf8().
02052 { 02053 int count; 02054 02055 if (r >= e) 02056 return MY_CS_TOOSMALL; 02057 02058 if (wc < 0x80) 02059 count = 1; 02060 else if (wc < 0x800) 02061 count = 2; 02062 else if (wc < 0x10000) 02063 count = 3; 02064 #ifdef UNICODE_32BIT 02065 else if (wc < 0x200000) 02066 count = 4; 02067 else if (wc < 0x4000000) 02068 count = 5; 02069 else if (wc <= 0x7fffffff) 02070 count = 6; 02071 #endif 02072 else return MY_CS_ILUNI; 02073 02074 /* 02075 e is a character after the string r, not the last character of it. 02076 Because of it (r+count > e), not (r+count-1 >e ) 02077 */ 02078 if ( r+count > e ) 02079 return MY_CS_TOOSMALLN(count); 02080 02081 switch (count) { 02082 /* Fall through all cases!!! */ 02083 #ifdef UNICODE_32BIT 02084 case 6: r[5] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x4000000; 02085 case 5: r[4] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x200000; 02086 case 4: r[3] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x10000; 02087 #endif 02088 case 3: r[2] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x800; 02089 case 2: r[1] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0xc0; 02090 case 1: r[0] = (uchar) wc; 02091 } 02092 return count; 02093 }
Here is the caller graph for this function:

| static int my_utf8_uni | ( | CHARSET_INFO *cs | __attribute__((unused)), | |
| my_wc_t * | pwc, | |||
| const uchar * | s, | |||
| const uchar * | e | |||
| ) | [static] |
Definition at line 1946 of file ctype-utf8.c.
References MY_CS_ILSEQ, MY_CS_TOOSMALL, MY_CS_TOOSMALL2, MY_CS_TOOSMALL3, MY_CS_TOOSMALL4, MY_CS_TOOSMALL5, MY_CS_TOOSMALL6, and my_wc_t.
Referenced by my_casedn_utf8(), my_caseup_utf8(), my_hash_sort_utf8(), my_ismbchar_utf8(), my_strcasecmp_utf8(), my_strnncoll_utf8(), my_strnncollsp_utf8(), and my_strnxfrm_utf8().
01948 { 01949 unsigned char c; 01950 01951 if (s >= e) 01952 return MY_CS_TOOSMALL; 01953 01954 c= s[0]; 01955 if (c < 0x80) 01956 { 01957 *pwc = c; 01958 return 1; 01959 } 01960 else if (c < 0xc2) 01961 return MY_CS_ILSEQ; 01962 else if (c < 0xe0) 01963 { 01964 if (s+2 > e) /* We need 2 characters */ 01965 return MY_CS_TOOSMALL2; 01966 01967 if (!((s[1] ^ 0x80) < 0x40)) 01968 return MY_CS_ILSEQ; 01969 01970 *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80); 01971 return 2; 01972 } 01973 else if (c < 0xf0) 01974 { 01975 if (s+3 > e) /* We need 3 characters */ 01976 return MY_CS_TOOSMALL3; 01977 01978 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0))) 01979 return MY_CS_ILSEQ; 01980 01981 *pwc = ((my_wc_t) (c & 0x0f) << 12) | 01982 ((my_wc_t) (s[1] ^ 0x80) << 6) | 01983 (my_wc_t) (s[2] ^ 0x80); 01984 01985 return 3; 01986 } 01987 #ifdef UNICODE_32BIT 01988 else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32) 01989 { 01990 if (s+4 > e) /* We need 4 characters */ 01991 return MY_CS_TOOSMALL4; 01992 01993 if (!((s[1] ^ 0x80) < 0x40 && 01994 (s[2] ^ 0x80) < 0x40 && 01995 (s[3] ^ 0x80) < 0x40 && 01996 (c >= 0xf1 || s[1] >= 0x90))) 01997 return MY_CS_ILSEQ; 01998 01999 *pwc = ((my_wc_t) (c & 0x07) << 18) | 02000 ((my_wc_t) (s[1] ^ 0x80) << 12) | 02001 ((my_wc_t) (s[2] ^ 0x80) << 6) | 02002 (my_wc_t) (s[3] ^ 0x80); 02003 02004 return 4; 02005 } 02006 else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32) 02007 { 02008 if (s+5 >e) /* We need 5 characters */ 02009 return MY_CS_TOOSMALL5; 02010 02011 if (!((s[1] ^ 0x80) < 0x40 && 02012 (s[2] ^ 0x80) < 0x40 && 02013 (s[3] ^ 0x80) < 0x40 && 02014 (s[4] ^ 0x80) < 0x40 && 02015 (c >= 0xf9 || s[1] >= 0x88))) 02016 return MY_CS_ILSEQ; 02017 02018 *pwc = ((my_wc_t) (c & 0x03) << 24) | 02019 ((my_wc_t) (s[1] ^ 0x80) << 18) | 02020 ((my_wc_t) (s[2] ^ 0x80) << 12) | 02021 ((my_wc_t) (s[3] ^ 0x80) << 6) | 02022 (my_wc_t) (s[4] ^ 0x80); 02023 return 5; 02024 } 02025 else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32) 02026 { 02027 if ( s+6 >e ) /* We need 6 characters */ 02028 return MY_CS_TOOSMALL6; 02029 02030 if (!((s[1] ^ 0x80) < 0x40 && 02031 (s[2] ^ 0x80) < 0x40 && 02032 (s[3] ^ 0x80) < 0x40 && 02033 (s[4] ^ 0x80) < 0x40 && 02034 (s[5] ^ 0x80) < 0x40 && 02035 (c >= 0xfd || s[1] >= 0x84))) 02036 return MY_CS_ILSEQ; 02037 02038 *pwc = ((my_wc_t) (c & 0x01) << 30) 02039 | ((my_wc_t) (s[1] ^ 0x80) << 24) 02040 | ((my_wc_t) (s[2] ^ 0x80) << 18) 02041 | ((my_wc_t) (s[3] ^ 0x80) << 12) 02042 | ((my_wc_t) (s[4] ^ 0x80) << 6) 02043 | (my_wc_t) (s[5] ^ 0x80); 02044 return 6; 02045 } 02046 #endif 02047 return MY_CS_ILSEQ; 02048 }
Here is the caller graph for this function:


