utf8lib.c

   1 #include "quakedef.h"
   2 #include "utf8lib.h"
   3
   4 /*
   5 ================================================================================
   6 Initialization of UTF-8 support and new cvars.
   7 ================================================================================
   8 */
   9 // for compatibility this defaults to 0
  10 cvar_t    utf8_enable = {CVAR_SAVE, "utf8_enable", "0", "Enable UTF-8 support. For compatibility, this is disabled by default in most games."};
  11
  12 void   u8_Init(void)
  13 {
  14         Cvar_RegisterVariable(&utf8_enable);
  15 }
  16
  17 /*
  18 ================================================================================
  19 UTF-8 encoding and decoding functions follow.
  20 ================================================================================
  21 */
  22
  23 /** Analyze the next character and return various information if requested.
  24  * @param _s      An utf-8 string.
  25  * @param _start  Filled with the start byte-offset of the next valid character
  26  * @param _len    Fileed with the length of the next valid character
  27  * @param _ch     Filled with the unicode value of the next character
  28  * @param _maxlen Maximum number of bytes to read from _s
  29  * @return        Whether or not another valid character is in the string
  30  */
  31 #define U8_ANALYZE_INFINITY 7
  32 static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch, size_t _maxlen)
  33 {
  34         const unsigned char *s = (const unsigned char*)_s;
  35         unsigned char bt, bc;
  36         size_t i;
  37         size_t bits, j;
  38         Uchar ch;
  39
  40         i = 0;
  41 findchar:
  42
  43         // <0xC2 is always an overlong encoding, they're invalid, thus skipped
  44         while (i < _maxlen && s[i] && s[i] >= 0x80 && s[i] < 0xC2) {
  45                 //fprintf(stderr, "skipping\n");
  46                 ++i;
  47         }
  48
  49         //fprintf(stderr, "checking\n");
  50         // If we hit the end, well, we're out and invalid
  51         if(i >= _maxlen || !s[i]) {
  52                 if (_start) *_start = i;
  53                 if (_len) *_len = 0;
  54                 return false;
  55         }
  56
  57         //fprintf(stderr, "checking ascii\n");
  58         // ascii characters
  59         if (s[i] < 0x80)
  60         {
  61                 if (_start) *_start = i;
  62                 if (_len) *_len = 1;
  63                 if (_ch) *_ch = (Uchar)s[i];
  64                 //fprintf(stderr, "valid ascii\n");
  65                 return true;
  66         }
  67         //fprintf(stderr, "checking length\n");
  68
  69         // Figure out the next char's length
  70         bc = s[i];
  71         bits = 1;
  72         // count the 1 bits, they're the # of bytes
  73         for (bt = 0x40; bt && (bc & bt); bt >>= 1, ++bits);
  74         if (!bt)
  75         {
  76                 //fprintf(stderr, "superlong\n");
  77                 ++i;
  78                 goto findchar;
  79         }
  80         if(i + bits > _maxlen) {
  81                 if (_start) *_start = i;
  82                 if (_len) *_len = 0;
  83                 return false;
  84         }
  85         // turn bt into a mask and give ch a starting value
  86         --bt;
  87         ch = (s[i] & bt);
  88         // check the byte sequence for invalid bytes
  89         for (j = 1; j < bits; ++j)
  90         {
  91                 // valid bit value: 10xx xxxx
  92                 //if (s[i+j] < 0x80 || s[i+j] >= 0xC0)
  93                 if ( (s[i+j] & 0xC0) != 0x80 )
  94                 {
  95                         //fprintf(stderr, "sequence of %i f'd at %i by %x\n", bits, j, (unsigned int)s[i+j]);
  96                         // this byte sequence is invalid, skip it
  97                         i += j;
  98                         // find a character after it
  99                         goto findchar;
 100                 }
 101                 // at the same time, decode the character
 102                 ch = (ch << 6) | (s[i+j] & 0x3F);
 103         }
 104
 105         // Now check the decoded byte for an overlong encoding
 106         if ( (bits >= 2 && ch < 0x80) ||
 107              (bits >= 3 && ch < 0x800) ||
 108              (bits >= 4 && ch < 0x10000) ||
 109              ch >= 0x10FFFF // RFC 3629
 110                 )
 111         {
 112                 i += bits;
 113                 //fprintf(stderr, "overlong: %i bytes for %x\n", bits, ch);
 114                 goto findchar;
 115         }
 116
 117         if (_start)
 118                 *_start = i;
 119         if (_len)
 120                 *_len = bits;
 121         if (_ch)
 122                 *_ch = ch;
 123         //fprintf(stderr, "valid utf8\n");
 124         return true;
 125 }
 126
 127 /** Get the number of characters in an UTF-8 string.
 128  * @param _s    An utf-8 encoded null-terminated string.
 129  * @return      The number of unicode characters in the string.
 130  */
 131 size_t u8_strlen(const char *_s)
 132 {
 133         size_t st, ln;
 134         size_t len = 0;
 135         const unsigned char *s = (const unsigned char*)_s;
 136
 137         if (!utf8_enable.integer)
 138                 return strlen(_s);
 139
 140         while (*s)
 141         {
 142                 // ascii char, skip u8_analyze
 143                 if (*s < 0x80)
 144                 {
 145                         ++len;
 146                         ++s;
 147                         continue;
 148                 }
 149
 150                 // invalid, skip u8_analyze
 151                 if (*s < 0xC2)
 152                 {
 153                         ++s;
 154                         continue;
 155                 }
 156
 157                 if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 158                         break;
 159                 // valid character, skip after it
 160                 s += st + ln;
 161                 ++len;
 162         }
 163         return len;
 164 }
 165
 166 /** Get the number of characters in a part of an UTF-8 string.
 167  * @param _s    An utf-8 encoded null-terminated string.
 168  * @param n     The maximum number of bytes.
 169  * @return      The number of unicode characters in the string.
 170  */
 171 size_t u8_strnlen(const char *_s, size_t n)
 172 {
 173         size_t st, ln;
 174         size_t len = 0;
 175         const unsigned char *s = (const unsigned char*)_s;
 176
 177         if (!utf8_enable.integer)
 178         {
 179                 len = strlen(_s);
 180                 return (len < n) ? len : n;
 181         }
 182
 183         while (*s && n)
 184         {
 185                 // ascii char, skip u8_analyze
 186                 if (*s < 0x80)
 187                 {
 188                         ++len;
 189                         ++s;
 190                         --n;
 191                         continue;
 192                 }
 193
 194                 // invalid, skip u8_analyze
 195                 if (*s < 0xC2)
 196                 {
 197                         ++s;
 198                         --n;
 199                         continue;
 200                 }
 201
 202                 if (!u8_analyze((const char*)s, &st, &ln, NULL, n))
 203                         break;
 204                 // valid character, see if it's still inside the range specified by n:
 205                 if (n < st + ln)
 206                         return len;
 207                 ++len;
 208                 n -= st + ln;
 209                 s += st + ln;
 210         }
 211         return len;
 212 }
 213
 214 /** Get the number of bytes used in a string to represent an amount of characters.
 215  * @param _s    An utf-8 encoded null-terminated string.
 216  * @param n     The number of characters we want to know the byte-size for.
 217  * @return      The number of bytes used to represent n characters.
 218  */
 219 size_t u8_bytelen(const char *_s, size_t n)
 220 {
 221         size_t st, ln;
 222         size_t len = 0;
 223         const unsigned char *s = (const unsigned char*)_s;
 224
 225         if (!utf8_enable.integer) {
 226                 len = strlen(_s);
 227                 return (len < n) ? len : n;
 228         }
 229
 230         while (*s && n)
 231         {
 232                 // ascii char, skip u8_analyze
 233                 if (*s < 0x80)
 234                 {
 235                         ++len;
 236                         ++s;
 237                         --n;
 238                         continue;
 239                 }
 240
 241                 // invalid, skip u8_analyze
 242                 if (*s < 0xC2)
 243                 {
 244                         ++s;
 245                         ++len;
 246                         continue;
 247                 }
 248
 249                 if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 250                         break;
 251                 --n;
 252                 s += st + ln;
 253                 len += st + ln;
 254         }
 255         return len;
 256 }
 257
 258 /** Get the byte-index for a character-index.
 259  * @param _s      An utf-8 encoded string.
 260  * @param i       The character-index for which you want the byte offset.
 261  * @param len     If not null, character's length will be stored in there.
 262  * @return        The byte-index at which the character begins, or -1 if the string is too short.
 263  */
 264 int u8_byteofs(const char *_s, size_t i, size_t *len)
 265 {
 266         size_t st, ln;
 267         size_t ofs = 0;
 268         const unsigned char *s = (const unsigned char*)_s;
 269
 270         if (!utf8_enable.integer)
 271         {
 272                 if (strlen(_s) < i)
 273                 {
 274                         if (len) *len = 0;
 275                         return -1;
 276                 }
 277
 278                 if (len) *len = 1;
 279                 return i;
 280         }
 281
 282         st = ln = 0;
 283         do
 284         {
 285                 ofs += ln;
 286                 if (!u8_analyze((const char*)s + ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 287                         return -1;
 288                 ofs += st;
 289         } while(i-- > 0);
 290         if (len)
 291                 *len = ln;
 292         return ofs;
 293 }
 294
 295 /** Get the char-index for a byte-index.
 296  * @param _s      An utf-8 encoded string.
 297  * @param i       The byte offset for which you want the character index.
 298  * @param len     If not null, the offset within the character is stored here.
 299  * @return        The character-index, or -1 if the string is too short.
 300  */
 301 int u8_charidx(const char *_s, size_t i, size_t *len)
 302 {
 303         size_t st, ln;
 304         size_t ofs = 0;
 305         size_t pofs = 0;
 306         int idx = 0;
 307         const unsigned char *s = (const unsigned char*)_s;
 308
 309         if (!utf8_enable.integer)
 310         {
 311                 if (len) *len = 0;
 312                 return i;
 313         }
 314
 315         while (ofs < i && s[ofs])
 316         {
 317                 // ascii character, skip u8_analyze
 318                 if (s[ofs] < 0x80)
 319                 {
 320                         pofs = ofs;
 321                         ++idx;
 322                         ++ofs;
 323                         continue;
 324                 }
 325
 326                 // invalid, skip u8_analyze
 327                 if (s[ofs] < 0xC2)
 328                 {
 329                         ++ofs;
 330                         continue;
 331                 }
 332
 333                 if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 334                         return -1;
 335                 // see if next char is after the bytemark
 336                 if (ofs + st > i)
 337                 {
 338                         if (len)
 339                                 *len = i - pofs;
 340                         return idx;
 341                 }
 342                 ++idx;
 343                 pofs = ofs + st;
 344                 ofs += st + ln;
 345                 // see if bytemark is within the char
 346                 if (ofs > i)
 347                 {
 348                         if (len)
 349                                 *len = i - pofs;
 350                         return idx;
 351                 }
 352         }
 353         if (len) *len = 0;
 354         return idx;
 355 }
 356
 357 /** Get the byte offset of the previous byte.
 358  * The result equals:
 359  * prevchar_pos = u8_byteofs(text, u8_charidx(text, thischar_pos, NULL) - 1, NULL)
 360  * @param _s      An utf-8 encoded string.
 361  * @param i       The current byte offset.
 362  * @return        The byte offset of the previous character
 363  */
 364 size_t u8_prevbyte(const char *_s, size_t i)
 365 {
 366         size_t st, ln;
 367         const unsigned char *s = (const unsigned char*)_s;
 368         size_t lastofs = 0;
 369         size_t ofs = 0;
 370
 371         if (!utf8_enable.integer)
 372         {
 373                 if (i > 0)
 374                         return i-1;
 375                 return 0;
 376         }
 377
 378         while (ofs < i && s[ofs])
 379         {
 380                 // ascii character, skip u8_analyze
 381                 if (s[ofs] < 0x80)
 382                 {
 383                         lastofs = ofs++;
 384                         continue;
 385                 }
 386
 387                 // invalid, skip u8_analyze
 388                 if (s[ofs] < 0xC2)
 389                 {
 390                         ++ofs;
 391                         continue;
 392                 }
 393
 394                 if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 395                         return lastofs;
 396                 if (ofs + st > i)
 397                         return lastofs;
 398                 if (ofs + st + ln >= i)
 399                         return ofs + st;
 400
 401                 lastofs = ofs;
 402                 ofs += st + ln;
 403         }
 404         return lastofs;
 405 }
 406
 407 static int char_usefont[256] = {
 408         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 409         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 410         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // shift+digit line
 411         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // digits
 412         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // caps
 413         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // caps
 414         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // small
 415         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // small
 416         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 417         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // faces
 418         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 419         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 420         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 421         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 422         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 423         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 424 };
 425
 426
 427 /** Fetch a character from an utf-8 encoded string.
 428  * @param _s      The start of an utf-8 encoded multi-byte character.
 429  * @param _end    Will point to after the first multi-byte character.
 430  * @return        The 32-bit integer representation of the first multi-byte character or 0 for invalid characters.
 431  */
 432 Uchar u8_getchar(const char *_s, const char **_end)
 433 {
 434         size_t st, ln;
 435         Uchar ch;
 436
 437         if (!utf8_enable.integer)
 438         {
 439                 if (_end)
 440                         *_end = _s + 1;
 441                 /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
 442                  * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
 443                  * rest:
 444                  */
 445                 if (!char_usefont[(unsigned int)*(const unsigned char*)_s])
 446                         return 0xE000 + (Uchar)*(const unsigned char*)_s;
 447                 return (Uchar)*(const unsigned char*)_s;
 448         }
 449
 450         if (!u8_analyze(_s, &st, &ln, &ch, U8_ANALYZE_INFINITY))
 451                 ch = 0;
 452         if (_end)
 453                 *_end = _s + st + ln;
 454         return ch;
 455 }
 456
 457 /** Fetch a character from an utf-8 encoded string.
 458  * @param _s      The start of an utf-8 encoded multi-byte character.
 459  * @param _end    Will point to after the first multi-byte character.
 460  * @return        The 32-bit integer representation of the first multi-byte character or 0 for invalid characters.
 461  */
 462 Uchar u8_getnchar(const char *_s, const char **_end, size_t _maxlen)
 463 {
 464         size_t st, ln;
 465         Uchar ch;
 466
 467         if (!utf8_enable.integer)
 468         {
 469                 if (_end)
 470                         *_end = _s + 1;
 471                 /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
 472                  * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
 473                  * rest:
 474                  */
 475                 if (!char_usefont[(unsigned int)*(const unsigned char*)_s])
 476                         return 0xE000 + (Uchar)*(const unsigned char*)_s;
 477                 return (Uchar)*(const unsigned char*)_s;
 478         }
 479
 480         if (!u8_analyze(_s, &st, &ln, &ch, _maxlen))
 481                 ch = 0;
 482         if (_end)
 483                 *_end = _s + st + ln;
 484         return ch;
 485 }
 486
 487 /** Encode a wide-character into utf-8.
 488  * @param w        The wide character to encode.
 489  * @param to       The target buffer the utf-8 encoded string is stored to.
 490  * @param maxlen   The maximum number of bytes that fit into the target buffer.
 491  * @return         Number of bytes written to the buffer not including the terminating null.
 492  *                 Less or equal to 0 if the buffer is too small.
 493  */
 494 int u8_fromchar(Uchar w, char *to, size_t maxlen)
 495 {
 496         if (maxlen < 1)
 497                 return 0;
 498
 499         if (!w)
 500                 return 0;
 501
 502         if (w >= 0xE000 && !utf8_enable.integer)
 503                 w -= 0xE000;
 504
 505         if (w < 0x80 || !utf8_enable.integer)
 506         {
 507                 to[0] = (char)w;
 508                 if (maxlen < 2)
 509                         return -1;
 510                 to[1] = 0;
 511                 return 1;
 512         }
 513         // for a little speedup
 514         if (w < 0x800)
 515         {
 516                 if (maxlen < 3)
 517                 {
 518                         to[0] = 0;
 519                         return -1;
 520                 }
 521                 to[2] = 0;
 522                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 523                 to[0] = 0xC0 | w;
 524                 return 2;
 525         }
 526         if (w < 0x10000)
 527         {
 528                 if (maxlen < 4)
 529                 {
 530                         to[0] = 0;
 531                         return -1;
 532                 }
 533                 to[3] = 0;
 534                 to[2] = 0x80 | (w & 0x3F); w >>= 6;
 535                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 536                 to[0] = 0xE0 | w;
 537                 return 3;
 538         }
 539
 540         // RFC 3629
 541         if (w <= 0x10FFFF)
 542         {
 543                 if (maxlen < 5)
 544                 {
 545                         to[0] = 0;
 546                         return -1;
 547                 }
 548                 to[4] = 0;
 549                 to[3] = 0x80 | (w & 0x3F); w >>= 6;
 550                 to[2] = 0x80 | (w & 0x3F); w >>= 6;
 551                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 552                 to[0] = 0xE0 | w;
 553                 return 4;
 554         }
 555         return 0;
 556 }
 557
 558 /** uses u8_fromchar on a static buffer
 559  * @param ch        The unicode character to convert to encode
 560  * @param l         The number of bytes without the terminating null.
 561  * @return          A statically allocated buffer containing the character's utf8 representation, or NULL if it fails.
 562  */
 563 char *u8_encodech(Uchar ch, size_t *l)
 564 {
 565         static char buf[16];
 566         size_t len;
 567         len = u8_fromchar(ch, buf, sizeof(buf));
 568         if (len > 0)
 569         {
 570                 if (l) *l = len;
 571                 return buf;
 572         }
 573         return NULL;
 574 }
 575
 576 /** Convert a utf-8 multibyte string to a wide character string.
 577  * @param wcs       The target wide-character buffer.
 578  * @param mb        The utf-8 encoded multibyte string to convert.
 579  * @param maxlen    The maximum number of wide-characters that fit into the target buffer.
 580  * @return          The number of characters written to the target buffer.
 581  */
 582 size_t u8_mbstowcs(Uchar *wcs, const char *mb, size_t maxlen)
 583 {
 584         size_t i;
 585         Uchar ch;
 586         if (maxlen < 1)
 587                 return 0;
 588         for (i = 0; *mb && i < maxlen-1; ++i)
 589         {
 590                 ch = u8_getchar(mb, &mb);
 591                 if (!ch)
 592                         break;
 593                 wcs[i] = ch;
 594         }
 595         wcs[i] = 0;
 596         return i;
 597 }
 598
 599 /** Convert a wide-character string to a utf-8 multibyte string.
 600  * @param mb      The target buffer the utf-8 string is written to.
 601  * @param wcs     The wide-character string to convert.
 602  * @param maxlen  The number bytes that fit into the multibyte target buffer.
 603  * @return        The number of bytes written, not including the terminating \0
 604  */
 605 size_t u8_wcstombs(char *mb, const Uchar *wcs, size_t maxlen)
 606 {
 607         size_t i;
 608         const char *start = mb;
 609         if (maxlen < 2)
 610                 return 0;
 611         for (i = 0; wcs[i] && i < maxlen-1; ++i)
 612         {
 613                 /*
 614                 int len;
 615                 if ( (len = u8_fromchar(wcs[i], mb, maxlen - i)) < 0)
 616                         return (mb - start);
 617                 mb += len;
 618                 */
 619                 mb += u8_fromchar(wcs[i], mb, maxlen - i);
 620         }
 621         *mb = 0;
 622         return (mb - start);
 623 }
 624
 625 /*
 626 ============
 627 UTF-8 aware COM_StringLengthNoColors
 628
 629 calculates the visible width of a color coded string.
 630
 631 *valid is filled with TRUE if the string is a valid colored string (that is, if
 632 it does not end with an unfinished color code). If it gets filled with FALSE, a
 633 fix would be adding a STRING_COLOR_TAG at the end of the string.
 634
 635 valid can be set to NULL if the caller doesn't care.
 636
 637 For size_s, specify the maximum number of characters from s to use, or 0 to use
 638 all characters until the zero terminator.
 639 ============
 640 */
 641 size_t
 642 COM_StringLengthNoColors(const char *s, size_t size_s, qboolean *valid);
 643 size_t
 644 u8_COM_StringLengthNoColors(const char *_s, size_t size_s, qboolean *valid)
 645 {
 646         const unsigned char *s = (const unsigned char*)_s;
 647         const unsigned char *end;
 648         size_t len = 0;
 649
 650         if (!utf8_enable.integer)
 651                 return COM_StringLengthNoColors(_s, size_s, valid);
 652
 653         end = size_s ? (s + size_s) : NULL;
 654
 655         for(;;)
 656         {
 657                 switch((s == end) ? 0 : *s)
 658                 {
 659                         case 0:
 660                                 if(valid)
 661                                         *valid = TRUE;
 662                                 return len;
 663                         case STRING_COLOR_TAG:
 664                                 ++s;
 665                                 switch((s == end) ? 0 : *s)
 666                                 {
 667                                         case STRING_COLOR_RGB_TAG_CHAR:
 668                                                 if (s+1 != end && isxdigit(s[1]) &&
 669                                                         s+2 != end && isxdigit(s[2]) &&
 670                                                         s+3 != end && isxdigit(s[3]) )
 671                                                 {
 672                                                         s+=3;
 673                                                         break;
 674                                                 }
 675                                                 ++len; // STRING_COLOR_TAG
 676                                                 ++len; // STRING_COLOR_RGB_TAG_CHAR
 677                                                 break;
 678                                         case 0: // ends with unfinished color code!
 679                                                 ++len;
 680                                                 if(valid)
 681                                                         *valid = FALSE;
 682                                                 return len;
 683                                         case STRING_COLOR_TAG: // escaped ^
 684                                                 ++len;
 685                                                 break;
 686                                         case '0': case '1': case '2': case '3': case '4':
 687                                         case '5': case '6': case '7': case '8': case '9': // color code
 688                                                 break;
 689                                         default: // not a color code
 690                                                 ++len; // STRING_COLOR_TAG
 691                                                 ++len; // the character
 692                                                 break;
 693                                 }
 694                                 break;
 695                         default:
 696                                 ++len;
 697                                 break;
 698                 }
 699
 700                 // start of a wide character
 701                 if (*s & 0xC0)
 702                 {
 703                         for (++s; *s >= 0x80 && *s <= 0xC0; ++s);
 704                         continue;
 705                 }
 706                 // part of a wide character, we ignore that one
 707                 if (*s <= 0xBF)
 708                         --len;
 709                 ++s;
 710         }
 711         // never get here
 712 }
 713
 714 /** Pads a utf-8 string
 715  * @param out     The target buffer the utf-8 string is written to.
 716  * @param outsize The size of the target buffer, including the final NUL
 717  * @param in      The input utf-8 buffer
 718  * @param leftalign Left align the output string (by default right alignment is done)
 719  * @param minwidth The minimum output width
 720  * @param maxwidth The maximum output width
 721  * @return        The number of bytes written, not including the terminating \0
 722  */
 723 size_t u8_strpad(char *out, size_t outsize, const char *in, qboolean leftalign, size_t minwidth, size_t maxwidth)
 724 {
 725         if(!utf8_enable.integer)
 726         {
 727                 return dpsnprintf(out, outsize, "%*.*s", leftalign ? -(int) minwidth : (int) minwidth, (int) maxwidth, in);
 728         }
 729         else
 730         {
 731                 size_t l = u8_bytelen(in, maxwidth);
 732                 size_t actual_width = u8_strnlen(in, l);
 733                 int pad = (actual_width >= minwidth) ? 0 : (minwidth - actual_width);
 734                 int prec = l;
 735                 int lpad = leftalign ? 0 : pad;
 736                 int rpad = leftalign ? pad : 0;
 737                 return dpsnprintf(out, outsize, "%*s%.*s%*s", lpad, "", prec, in, rpad, "");
 738         }
 739 }