xmltok_impl.c 45 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/* This file is included!
                            __  __            _
                         ___\ \/ /_ __   __ _| |_
                        / _ \\  /| '_ \ / _` | __|
                       |  __//  \| |_) | (_| | |_
                        \___/_/\_\ .__/ \__,_|\__|
                                 |_| XML parser

   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
   Copyright (c) 2000-2017 Expat development team
   Licensed under the MIT license:

   Permission is  hereby granted,  free of charge,  to any  person obtaining
   a  copy  of  this  software   and  associated  documentation  files  (the
   "Software"),  to  deal in  the  Software  without restriction,  including
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
   distribute, sublicense, and/or sell copies of the Software, and to permit
   persons  to whom  the Software  is  furnished to  do so,  subject to  the
   following conditions:

   The above copyright  notice and this permission notice  shall be included
   in all copies or substantial portions of the Software.

   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis's avatar
Martin v. Löwis committed
31 32
*/

33 34
#ifdef XML_TOK_IMPL_C

Martin v. Löwis's avatar
Martin v. Löwis committed
35 36 37 38 39 40 41
#ifndef IS_INVALID_CHAR
#define IS_INVALID_CHAR(enc, ptr, n) (0)
#endif

#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
    case BT_LEAD ## n: \
      if (end - ptr < n) \
42
        return XML_TOK_PARTIAL_CHAR; \
Martin v. Löwis's avatar
Martin v. Löwis committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
      if (IS_INVALID_CHAR(enc, ptr, n)) { \
        *(nextTokPtr) = (ptr); \
        return XML_TOK_INVALID; \
      } \
      ptr += n; \
      break;

#define INVALID_CASES(ptr, nextTokPtr) \
  INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
  INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
  INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
  case BT_NONXML: \
  case BT_MALFORM: \
  case BT_TRAIL: \
    *(nextTokPtr) = (ptr); \
    return XML_TOK_INVALID;

#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
   case BT_LEAD ## n: \
     if (end - ptr < n) \
       return XML_TOK_PARTIAL_CHAR; \
     if (!IS_NAME_CHAR(enc, ptr, n)) { \
       *nextTokPtr = ptr; \
       return XML_TOK_INVALID; \
     } \
     ptr += n; \
     break;

#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
  case BT_NONASCII: \
    if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
      *nextTokPtr = ptr; \
      return XML_TOK_INVALID; \
    } \
77
    /* fall through */ \
Martin v. Löwis's avatar
Martin v. Löwis committed
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
  case BT_NMSTRT: \
  case BT_HEX: \
  case BT_DIGIT: \
  case BT_NAME: \
  case BT_MINUS: \
    ptr += MINBPC(enc); \
    break; \
  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)

#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
   case BT_LEAD ## n: \
     if (end - ptr < n) \
       return XML_TOK_PARTIAL_CHAR; \
     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
       *nextTokPtr = ptr; \
       return XML_TOK_INVALID; \
     } \
     ptr += n; \
     break;

#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
  case BT_NONASCII: \
    if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
      *nextTokPtr = ptr; \
      return XML_TOK_INVALID; \
    } \
106
    /* fall through */ \
Martin v. Löwis's avatar
Martin v. Löwis committed
107 108 109 110 111 112 113 114 115 116 117 118
  case BT_NMSTRT: \
  case BT_HEX: \
    ptr += MINBPC(enc); \
    break; \
  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)

#ifndef PREFIX
#define PREFIX(ident) ident
#endif

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136

#define HAS_CHARS(enc, ptr, end, count) \
    (end - ptr >= count * MINBPC(enc))

#define HAS_CHAR(enc, ptr, end) \
    HAS_CHARS(enc, ptr, end, 1)

#define REQUIRE_CHARS(enc, ptr, end, count) \
    { \
      if (! HAS_CHARS(enc, ptr, end, count)) { \
        return XML_TOK_PARTIAL; \
      } \
    }

#define REQUIRE_CHAR(enc, ptr, end) \
    REQUIRE_CHARS(enc, ptr, end, 1)


Martin v. Löwis's avatar
Martin v. Löwis committed
137 138
/* ptr points to character following "<!-" */

139 140 141
static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
                    const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
142
{
143
  if (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
144 145 146 147 148
    if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
    ptr += MINBPC(enc);
149
    while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
150 151 152
      switch (BYTE_TYPE(enc, ptr)) {
      INVALID_CASES(ptr, nextTokPtr)
      case BT_MINUS:
153 154
        ptr += MINBPC(enc);
        REQUIRE_CHAR(enc, ptr, end);
155
        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
156 157
          ptr += MINBPC(enc);
          REQUIRE_CHAR(enc, ptr, end);
158 159 160 161 162 163 164 165
          if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
            *nextTokPtr = ptr;
            return XML_TOK_INVALID;
          }
          *nextTokPtr = ptr + MINBPC(enc);
          return XML_TOK_COMMENT;
        }
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
166
      default:
167 168
        ptr += MINBPC(enc);
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
169 170 171 172 173 174 175 176
      }
    }
  }
  return XML_TOK_PARTIAL;
}

/* ptr points to character following "<!" */

177 178 179
static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
                 const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
180
{
181
  REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
182 183 184 185 186 187 188 189 190 191 192 193 194 195
  switch (BYTE_TYPE(enc, ptr)) {
  case BT_MINUS:
    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_LSQB:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_COND_SECT_OPEN;
  case BT_NMSTRT:
  case BT_HEX:
    ptr += MINBPC(enc);
    break;
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
196
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
197 198
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_PERCNT:
199
      REQUIRE_CHARS(enc, ptr, end, 2);
Martin v. Löwis's avatar
Martin v. Löwis committed
200 201 202
      /* don't allow <!ENTITY% foo "whatever"> */
      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
      case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
203 204
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
      }
      /* fall through */
    case BT_S: case BT_CR: case BT_LF:
      *nextTokPtr = ptr;
      return XML_TOK_DECL_OPEN;
    case BT_NMSTRT:
    case BT_HEX:
      ptr += MINBPC(enc);
      break;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return XML_TOK_PARTIAL;
}

222
static int PTRCALL
223
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
224
                      const char *end, int *tokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
{
  int upper = 0;
  *tokPtr = XML_TOK_PI;
  if (end - ptr != MINBPC(enc)*3)
    return 1;
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case ASCII_x:
    break;
  case ASCII_X:
    upper = 1;
    break;
  default:
    return 1;
  }
  ptr += MINBPC(enc);
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case ASCII_m:
    break;
  case ASCII_M:
    upper = 1;
    break;
  default:
    return 1;
  }
  ptr += MINBPC(enc);
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case ASCII_l:
    break;
  case ASCII_L:
    upper = 1;
    break;
  default:
    return 1;
  }
  if (upper)
    return 0;
  *tokPtr = XML_TOK_XML_DECL;
  return 1;
}

/* ptr points to character following "<?" */

267 268 269
static int PTRCALL
PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
               const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
270 271 272
{
  int tok;
  const char *target = ptr;
273
  REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
274 275 276 277 278 279
  switch (BYTE_TYPE(enc, ptr)) {
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
280
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
281 282 283 284
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    case BT_S: case BT_CR: case BT_LF:
      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
285 286
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
287 288
      }
      ptr += MINBPC(enc);
289
      while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
290 291
        switch (BYTE_TYPE(enc, ptr)) {
        INVALID_CASES(ptr, nextTokPtr)
292 293
        case BT_QUEST:
          ptr += MINBPC(enc);
294
          REQUIRE_CHAR(enc, ptr, end);
295 296 297 298 299 300 301 302 303
          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
            *nextTokPtr = ptr + MINBPC(enc);
            return tok;
          }
          break;
        default:
          ptr += MINBPC(enc);
          break;
        }
Martin v. Löwis's avatar
Martin v. Löwis committed
304 305 306 307
      }
      return XML_TOK_PARTIAL;
    case BT_QUEST:
      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
308 309
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
310 311
      }
      ptr += MINBPC(enc);
312
      REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
313
      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
314 315
        *nextTokPtr = ptr + MINBPC(enc);
        return tok;
Martin v. Löwis's avatar
Martin v. Löwis committed
316 317 318 319 320 321 322 323 324 325
      }
      /* fall through */
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return XML_TOK_PARTIAL;
}

326
static int PTRCALL
327
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
328
                         const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
329
{
330 331
  static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
                                     ASCII_T, ASCII_A, ASCII_LSQB };
Martin v. Löwis's avatar
Martin v. Löwis committed
332 333
  int i;
  /* CDATA[ */
334
  REQUIRE_CHARS(enc, ptr, end, 6);
Martin v. Löwis's avatar
Martin v. Löwis committed
335 336 337 338 339 340 341 342 343 344
  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
    if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  *nextTokPtr = ptr;
  return XML_TOK_CDATA_SECT_OPEN;
}

345 346 347
static int PTRCALL
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
                        const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
348
{
349
  if (ptr >= end)
Martin v. Löwis's avatar
Martin v. Löwis committed
350 351 352 353 354 355
    return XML_TOK_NONE;
  if (MINBPC(enc) > 1) {
    size_t n = end - ptr;
    if (n & (MINBPC(enc) - 1)) {
      n &= ~(MINBPC(enc) - 1);
      if (n == 0)
356
        return XML_TOK_PARTIAL;
Martin v. Löwis's avatar
Martin v. Löwis committed
357 358 359 360 361 362
      end = ptr + n;
    }
  }
  switch (BYTE_TYPE(enc, ptr)) {
  case BT_RSQB:
    ptr += MINBPC(enc);
363
    REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
364 365 366
    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
      break;
    ptr += MINBPC(enc);
367
    REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
368 369 370 371 372 373 374 375
    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
      ptr -= MINBPC(enc);
      break;
    }
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_CDATA_SECT_CLOSE;
  case BT_CR:
    ptr += MINBPC(enc);
376
    REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
377 378 379 380 381 382 383 384 385 386 387 388
    if (BYTE_TYPE(enc, ptr) == BT_LF)
      ptr += MINBPC(enc);
    *nextTokPtr = ptr;
    return XML_TOK_DATA_NEWLINE;
  case BT_LF:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_DATA_NEWLINE;
  INVALID_CASES(ptr, nextTokPtr)
  default:
    ptr += MINBPC(enc);
    break;
  }
389
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
390 391 392 393
    switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
    case BT_LEAD ## n: \
      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
394 395
        *nextTokPtr = ptr; \
        return XML_TOK_DATA_CHARS; \
Martin v. Löwis's avatar
Martin v. Löwis committed
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
      } \
      ptr += n; \
      break;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
    case BT_NONXML:
    case BT_MALFORM:
    case BT_TRAIL:
    case BT_CR:
    case BT_LF:
    case BT_RSQB:
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    default:
      ptr += MINBPC(enc);
      break;
    }
  }
  *nextTokPtr = ptr;
  return XML_TOK_DATA_CHARS;
}

/* ptr points to character following "</" */

420 421 422
static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
                   const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
423
{
424
  REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
425 426 427 428 429 430
  switch (BYTE_TYPE(enc, ptr)) {
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
431
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
432 433 434
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    case BT_S: case BT_CR: case BT_LF:
435
      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
436 437 438 439 440
        switch (BYTE_TYPE(enc, ptr)) {
        case BT_S: case BT_CR: case BT_LF:
          break;
        case BT_GT:
          *nextTokPtr = ptr + MINBPC(enc);
Martin v. Löwis's avatar
Martin v. Löwis committed
441
          return XML_TOK_END_TAG;
442 443 444 445
        default:
          *nextTokPtr = ptr;
          return XML_TOK_INVALID;
        }
Martin v. Löwis's avatar
Martin v. Löwis committed
446 447 448 449
      }
      return XML_TOK_PARTIAL;
#ifdef XML_NS
    case BT_COLON:
450 451
      /* no need to check qname syntax here,
         since end-tag must match exactly */
Martin v. Löwis's avatar
Martin v. Löwis committed
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
      ptr += MINBPC(enc);
      break;
#endif
    case BT_GT:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_END_TAG;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return XML_TOK_PARTIAL;
}

/* ptr points to character following "&#X" */

468 469 470
static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
                       const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
471
{
472
  if (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
473 474 475 476 477 478 479 480
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_DIGIT:
    case BT_HEX:
      break;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
481
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
482 483 484
      switch (BYTE_TYPE(enc, ptr)) {
      case BT_DIGIT:
      case BT_HEX:
485
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
486
      case BT_SEMI:
487 488
        *nextTokPtr = ptr + MINBPC(enc);
        return XML_TOK_CHAR_REF;
Martin v. Löwis's avatar
Martin v. Löwis committed
489
      default:
490 491
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
492 493 494 495 496 497 498 499
      }
    }
  }
  return XML_TOK_PARTIAL;
}

/* ptr points to character following "&#" */

500 501 502
static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
                    const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
503
{
504
  if (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
505 506 507 508 509 510 511 512 513
    if (CHAR_MATCHES(enc, ptr, ASCII_x))
      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_DIGIT:
      break;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
514
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
515 516
      switch (BYTE_TYPE(enc, ptr)) {
      case BT_DIGIT:
517
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
518
      case BT_SEMI:
519 520
        *nextTokPtr = ptr + MINBPC(enc);
        return XML_TOK_CHAR_REF;
Martin v. Löwis's avatar
Martin v. Löwis committed
521
      default:
522 523
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
524 525 526 527 528 529 530 531
      }
    }
  }
  return XML_TOK_PARTIAL;
}

/* ptr points to character following "&" */

532 533 534
static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
                const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
535
{
536
  REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
537 538 539 540 541 542 543 544
  switch (BYTE_TYPE(enc, ptr)) {
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  case BT_NUM:
    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
545
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    case BT_SEMI:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_ENTITY_REF;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return XML_TOK_PARTIAL;
}

/* ptr points to character following first character of attribute name */

561 562 563
static int PTRCALL
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
                 const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
564 565 566 567
{
#ifdef XML_NS
  int hadColon = 0;
#endif
568
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
569 570 571 572 573
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
    case BT_COLON:
      if (hadColon) {
574 575
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
576 577 578
      }
      hadColon = 1;
      ptr += MINBPC(enc);
579
      REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
580 581 582
      switch (BYTE_TYPE(enc, ptr)) {
      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
      default:
583 584
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
585 586 587 588 589
      }
      break;
#endif
    case BT_S: case BT_CR: case BT_LF:
      for (;;) {
590
        int t;
Martin v. Löwis's avatar
Martin v. Löwis committed
591

592
        ptr += MINBPC(enc);
593
        REQUIRE_CHAR(enc, ptr, end);
594 595 596 597 598 599 600 601 602 603 604 605
        t = BYTE_TYPE(enc, ptr);
        if (t == BT_EQUALS)
          break;
        switch (t) {
        case BT_S:
        case BT_LF:
        case BT_CR:
          break;
        default:
          *nextTokPtr = ptr;
          return XML_TOK_INVALID;
        }
Martin v. Löwis's avatar
Martin v. Löwis committed
606
      }
607
      /* fall through */
Martin v. Löwis's avatar
Martin v. Löwis committed
608 609
    case BT_EQUALS:
      {
610
        int open;
Martin v. Löwis's avatar
Martin v. Löwis committed
611
#ifdef XML_NS
612
        hadColon = 0;
Martin v. Löwis's avatar
Martin v. Löwis committed
613
#endif
614 615
        for (;;) {
          ptr += MINBPC(enc);
616
          REQUIRE_CHAR(enc, ptr, end);
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633
          open = BYTE_TYPE(enc, ptr);
          if (open == BT_QUOT || open == BT_APOS)
            break;
          switch (open) {
          case BT_S:
          case BT_LF:
          case BT_CR:
            break;
          default:
            *nextTokPtr = ptr;
            return XML_TOK_INVALID;
          }
        }
        ptr += MINBPC(enc);
        /* in attribute value */
        for (;;) {
          int t;
634
          REQUIRE_CHAR(enc, ptr, end);
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
          t = BYTE_TYPE(enc, ptr);
          if (t == open)
            break;
          switch (t) {
          INVALID_CASES(ptr, nextTokPtr)
          case BT_AMP:
            {
              int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
              if (tok <= 0) {
                if (tok == XML_TOK_INVALID)
                  *nextTokPtr = ptr;
                return tok;
              }
              break;
            }
          case BT_LT:
            *nextTokPtr = ptr;
            return XML_TOK_INVALID;
          default:
            ptr += MINBPC(enc);
            break;
          }
        }
        ptr += MINBPC(enc);
659
        REQUIRE_CHAR(enc, ptr, end);
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
        switch (BYTE_TYPE(enc, ptr)) {
        case BT_S:
        case BT_CR:
        case BT_LF:
          break;
        case BT_SOL:
          goto sol;
        case BT_GT:
          goto gt;
        default:
          *nextTokPtr = ptr;
          return XML_TOK_INVALID;
        }
        /* ptr points to closing quote */
        for (;;) {
          ptr += MINBPC(enc);
676
          REQUIRE_CHAR(enc, ptr, end);
677 678 679 680 681
          switch (BYTE_TYPE(enc, ptr)) {
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
          case BT_S: case BT_CR: case BT_LF:
            continue;
          case BT_GT:
Martin v. Löwis's avatar
Martin v. Löwis committed
682
          gt:
683 684 685
            *nextTokPtr = ptr + MINBPC(enc);
            return XML_TOK_START_TAG_WITH_ATTS;
          case BT_SOL:
Martin v. Löwis's avatar
Martin v. Löwis committed
686
          sol:
687
            ptr += MINBPC(enc);
688
            REQUIRE_CHAR(enc, ptr, end);
689 690 691 692 693 694 695 696 697 698 699 700 701
            if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
              *nextTokPtr = ptr;
              return XML_TOK_INVALID;
            }
            *nextTokPtr = ptr + MINBPC(enc);
            return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
          default:
            *nextTokPtr = ptr;
            return XML_TOK_INVALID;
          }
          break;
        }
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
702 703 704 705 706 707 708 709 710 711 712
      }
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return XML_TOK_PARTIAL;
}

/* ptr points to character following "<" */

713 714 715
static int PTRCALL
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
               const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
716 717 718 719
{
#ifdef XML_NS
  int hadColon;
#endif
720
  REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
721 722 723
  switch (BYTE_TYPE(enc, ptr)) {
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  case BT_EXCL:
724 725
    ptr += MINBPC(enc);
    REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
726 727 728 729
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_MINUS:
      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    case BT_LSQB:
730 731
      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
                                      end, nextTokPtr);
Martin v. Löwis's avatar
Martin v. Löwis committed
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
    }
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  case BT_QUEST:
    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_SOL:
    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
#ifdef XML_NS
  hadColon = 0;
#endif
  /* we have a start-tag */
747
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
748 749 750 751 752
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
    case BT_COLON:
      if (hadColon) {
753 754
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
755 756 757
      }
      hadColon = 1;
      ptr += MINBPC(enc);
758
      REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
759 760 761 762 763 764 765 766 767 768 769
      switch (BYTE_TYPE(enc, ptr)) {
      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
      default:
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
      }
      break;
#endif
    case BT_S: case BT_CR: case BT_LF:
      {
        ptr += MINBPC(enc);
770
        while (HAS_CHAR(enc, ptr, end)) {
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786
          switch (BYTE_TYPE(enc, ptr)) {
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
          case BT_GT:
            goto gt;
          case BT_SOL:
            goto sol;
          case BT_S: case BT_CR: case BT_LF:
            ptr += MINBPC(enc);
            continue;
          default:
            *nextTokPtr = ptr;
            return XML_TOK_INVALID;
          }
          return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
        }
        return XML_TOK_PARTIAL;
Martin v. Löwis's avatar
Martin v. Löwis committed
787 788 789 790 791 792 793 794
      }
    case BT_GT:
    gt:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_START_TAG_NO_ATTS;
    case BT_SOL:
    sol:
      ptr += MINBPC(enc);
795
      REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
796
      if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
797 798
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
799 800 801 802 803 804 805 806 807 808 809
      }
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return XML_TOK_PARTIAL;
}

810 811 812
static int PTRCALL
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
                   const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
813
{
814
  if (ptr >= end)
Martin v. Löwis's avatar
Martin v. Löwis committed
815 816 817 818 819 820
    return XML_TOK_NONE;
  if (MINBPC(enc) > 1) {
    size_t n = end - ptr;
    if (n & (MINBPC(enc) - 1)) {
      n &= ~(MINBPC(enc) - 1);
      if (n == 0)
821
        return XML_TOK_PARTIAL;
Martin v. Löwis's avatar
Martin v. Löwis committed
822 823 824 825 826 827 828 829 830 831
      end = ptr + n;
    }
  }
  switch (BYTE_TYPE(enc, ptr)) {
  case BT_LT:
    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_AMP:
    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_CR:
    ptr += MINBPC(enc);
832
    if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis's avatar
Martin v. Löwis committed
833 834 835 836 837 838 839 840 841 842
      return XML_TOK_TRAILING_CR;
    if (BYTE_TYPE(enc, ptr) == BT_LF)
      ptr += MINBPC(enc);
    *nextTokPtr = ptr;
    return XML_TOK_DATA_NEWLINE;
  case BT_LF:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_DATA_NEWLINE;
  case BT_RSQB:
    ptr += MINBPC(enc);
843
    if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis's avatar
Martin v. Löwis committed
844 845 846 847
      return XML_TOK_TRAILING_RSQB;
    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
      break;
    ptr += MINBPC(enc);
848
    if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis's avatar
Martin v. Löwis committed
849 850 851 852 853 854 855 856 857 858 859 860
      return XML_TOK_TRAILING_RSQB;
    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
      ptr -= MINBPC(enc);
      break;
    }
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  INVALID_CASES(ptr, nextTokPtr)
  default:
    ptr += MINBPC(enc);
    break;
  }
861
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
862 863 864 865
    switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
    case BT_LEAD ## n: \
      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
866 867
        *nextTokPtr = ptr; \
        return XML_TOK_DATA_CHARS; \
Martin v. Löwis's avatar
Martin v. Löwis committed
868 869 870 871 872 873
      } \
      ptr += n; \
      break;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
    case BT_RSQB:
874
      if (HAS_CHARS(enc, ptr, end, 2)) {
875 876 877 878
         if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
           ptr += MINBPC(enc);
           break;
         }
879
         if (HAS_CHARS(enc, ptr, end, 3)) {
880 881 882 883 884 885 886
           if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
             ptr += MINBPC(enc);
             break;
           }
           *nextTokPtr = ptr + 2*MINBPC(enc);
           return XML_TOK_INVALID;
         }
Martin v. Löwis's avatar
Martin v. Löwis committed
887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
      }
      /* fall through */
    case BT_AMP:
    case BT_LT:
    case BT_NONXML:
    case BT_MALFORM:
    case BT_TRAIL:
    case BT_CR:
    case BT_LF:
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    default:
      ptr += MINBPC(enc);
      break;
    }
  }
  *nextTokPtr = ptr;
  return XML_TOK_DATA_CHARS;
}

/* ptr points to character following "%" */

909 910 911
static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
                    const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
912
{
913
  REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
914 915 916 917 918 919 920 921 922
  switch (BYTE_TYPE(enc, ptr)) {
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
    *nextTokPtr = ptr;
    return XML_TOK_PERCENT;
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
923
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
924 925 926 927 928 929 930 931 932 933 934 935 936
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    case BT_SEMI:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_PARAM_ENTITY_REF;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return XML_TOK_PARTIAL;
}

937 938 939
static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
                      const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
940
{
941
  REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
942 943 944 945 946 947
  switch (BYTE_TYPE(enc, ptr)) {
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
948
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
949 950 951 952 953 954 955 956 957 958 959 960 961 962
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    case BT_CR: case BT_LF: case BT_S:
    case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
      *nextTokPtr = ptr;
      return XML_TOK_POUND_NAME;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return -XML_TOK_POUND_NAME;
}

963 964 965 966
static int PTRCALL
PREFIX(scanLit)(int open, const ENCODING *enc,
                const char *ptr, const char *end,
                const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
967
{
968
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
969 970 971 972 973 974 975
    int t = BYTE_TYPE(enc, ptr);
    switch (t) {
    INVALID_CASES(ptr, nextTokPtr)
    case BT_QUOT:
    case BT_APOS:
      ptr += MINBPC(enc);
      if (t != open)
976
        break;
977
      if (! HAS_CHAR(enc, ptr, end))
978
        return -XML_TOK_LITERAL;
Martin v. Löwis's avatar
Martin v. Löwis committed
979 980 981 982
      *nextTokPtr = ptr;
      switch (BYTE_TYPE(enc, ptr)) {
      case BT_S: case BT_CR: case BT_LF:
      case BT_GT: case BT_PERCNT: case BT_LSQB:
983
        return XML_TOK_LITERAL;
Martin v. Löwis's avatar
Martin v. Löwis committed
984
      default:
985
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
986 987 988 989 990 991 992 993 994
      }
    default:
      ptr += MINBPC(enc);
      break;
    }
  }
  return XML_TOK_PARTIAL;
}

995 996 997
static int PTRCALL
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
                  const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
998 999
{
  int tok;
1000
  if (ptr >= end)
Martin v. Löwis's avatar
Martin v. Löwis committed
1001 1002 1003 1004 1005 1006
    return XML_TOK_NONE;
  if (MINBPC(enc) > 1) {
    size_t n = end - ptr;
    if (n & (MINBPC(enc) - 1)) {
      n &= ~(MINBPC(enc) - 1);
      if (n == 0)
1007
        return XML_TOK_PARTIAL;
Martin v. Löwis's avatar
Martin v. Löwis committed
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
      end = ptr + n;
    }
  }
  switch (BYTE_TYPE(enc, ptr)) {
  case BT_QUOT:
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_APOS:
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_LT:
    {
      ptr += MINBPC(enc);
1019
      REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
1020 1021
      switch (BYTE_TYPE(enc, ptr)) {
      case BT_EXCL:
1022
        return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis's avatar
Martin v. Löwis committed
1023
      case BT_QUEST:
1024
        return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis's avatar
Martin v. Löwis committed
1025 1026 1027 1028 1029 1030
      case BT_NMSTRT:
      case BT_HEX:
      case BT_NONASCII:
      case BT_LEAD2:
      case BT_LEAD3:
      case BT_LEAD4:
1031 1032
        *nextTokPtr = ptr - MINBPC(enc);
        return XML_TOK_INSTANCE_START;
Martin v. Löwis's avatar
Martin v. Löwis committed
1033 1034 1035 1036 1037
      }
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  case BT_CR:
1038 1039 1040
    if (ptr + MINBPC(enc) == end) {
      *nextTokPtr = end;
      /* indicate that this might be part of a CR/LF pair */
Martin v. Löwis's avatar
Martin v. Löwis committed
1041
      return -XML_TOK_PROLOG_S;
1042
    }
Martin v. Löwis's avatar
Martin v. Löwis committed
1043 1044 1045 1046
    /* fall through */
  case BT_S: case BT_LF:
    for (;;) {
      ptr += MINBPC(enc);
1047
      if (! HAS_CHAR(enc, ptr, end))
1048
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
1049 1050
      switch (BYTE_TYPE(enc, ptr)) {
      case BT_S: case BT_LF:
1051
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
1052
      case BT_CR:
1053 1054 1055 1056
        /* don't split CR/LF pair */
        if (ptr + MINBPC(enc) != end)
          break;
        /* fall through */
Martin v. Löwis's avatar
Martin v. Löwis committed
1057
      default:
1058 1059
        *nextTokPtr = ptr;
        return XML_TOK_PROLOG_S;
Martin v. Löwis's avatar
Martin v. Löwis committed
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
      }
    }
    *nextTokPtr = ptr;
    return XML_TOK_PROLOG_S;
  case BT_PERCNT:
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_COMMA:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_COMMA;
  case BT_LSQB:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_OPEN_BRACKET;
  case BT_RSQB:
    ptr += MINBPC(enc);
1074
    if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis's avatar
Martin v. Löwis committed
1075 1076
      return -XML_TOK_CLOSE_BRACKET;
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1077
      REQUIRE_CHARS(enc, ptr, end, 2);
Martin v. Löwis's avatar
Martin v. Löwis committed
1078
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1079 1080
        *nextTokPtr = ptr + 2*MINBPC(enc);
        return XML_TOK_COND_SECT_CLOSE;
Martin v. Löwis's avatar
Martin v. Löwis committed
1081 1082 1083 1084 1085 1086 1087 1088 1089
      }
    }
    *nextTokPtr = ptr;
    return XML_TOK_CLOSE_BRACKET;
  case BT_LPAR:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_OPEN_PAREN;
  case BT_RPAR:
    ptr += MINBPC(enc);
1090
    if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis's avatar
Martin v. Löwis committed
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
      return -XML_TOK_CLOSE_PAREN;
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_AST:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_CLOSE_PAREN_ASTERISK;
    case BT_QUEST:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_CLOSE_PAREN_QUESTION;
    case BT_PLUS:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_CLOSE_PAREN_PLUS;
    case BT_CR: case BT_LF: case BT_S:
    case BT_GT: case BT_COMMA: case BT_VERBAR:
    case BT_RPAR:
      *nextTokPtr = ptr;
      return XML_TOK_CLOSE_PAREN;
    }
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  case BT_VERBAR:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_OR;
  case BT_GT:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_DECL_CLOSE;
  case BT_NUM:
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
#define LEAD_CASE(n) \
  case BT_LEAD ## n: \
    if (end - ptr < n) \
      return XML_TOK_PARTIAL_CHAR; \
    if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
      ptr += n; \
      tok = XML_TOK_NAME; \
      break; \
    } \
    if (IS_NAME_CHAR(enc, ptr, n)) { \
      ptr += n; \
      tok = XML_TOK_NMTOKEN; \
      break; \
    } \
    *nextTokPtr = ptr; \
    return XML_TOK_INVALID;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
  case BT_NMSTRT:
  case BT_HEX:
    tok = XML_TOK_NAME;
    ptr += MINBPC(enc);
    break;
  case BT_DIGIT:
  case BT_NAME:
  case BT_MINUS:
#ifdef XML_NS
  case BT_COLON:
#endif
    tok = XML_TOK_NMTOKEN;
    ptr += MINBPC(enc);
    break;
  case BT_NONASCII:
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
      ptr += MINBPC(enc);
      tok = XML_TOK_NAME;
      break;
    }
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
      ptr += MINBPC(enc);
      tok = XML_TOK_NMTOKEN;
      break;
    }
    /* fall through */
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
1166
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    case BT_GT: case BT_RPAR: case BT_COMMA:
    case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
    case BT_S: case BT_CR: case BT_LF:
      *nextTokPtr = ptr;
      return tok;
#ifdef XML_NS
    case BT_COLON:
      ptr += MINBPC(enc);
      switch (tok) {
      case XML_TOK_NAME:
1179
        REQUIRE_CHAR(enc, ptr, end);
1180 1181 1182 1183 1184 1185 1186 1187
        tok = XML_TOK_PREFIXED_NAME;
        switch (BYTE_TYPE(enc, ptr)) {
        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
        default:
          tok = XML_TOK_NMTOKEN;
          break;
        }
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
1188
      case XML_TOK_PREFIXED_NAME:
1189 1190
        tok = XML_TOK_NMTOKEN;
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
1191 1192 1193 1194 1195
      }
      break;
#endif
    case BT_PLUS:
      if (tok == XML_TOK_NMTOKEN)  {
1196 1197
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
1198 1199 1200 1201 1202
      }
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_NAME_PLUS;
    case BT_AST:
      if (tok == XML_TOK_NMTOKEN)  {
1203 1204
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
1205 1206 1207 1208 1209
      }
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_NAME_ASTERISK;
    case BT_QUEST:
      if (tok == XML_TOK_NMTOKEN)  {
1210 1211
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
Martin v. Löwis's avatar
Martin v. Löwis committed
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
      }
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_NAME_QUESTION;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return -tok;
}

1223 1224 1225
static int PTRCALL
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
                          const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
1226 1227
{
  const char *start;
1228
  if (ptr >= end)
Martin v. Löwis's avatar
Martin v. Löwis committed
1229
    return XML_TOK_NONE;
1230 1231 1232 1233 1234 1235 1236 1237
  else if (! HAS_CHAR(enc, ptr, end)) {
    /* This line cannot be executed.  The incoming data has already
     * been tokenized once, so incomplete characters like this have
     * already been eliminated from the input.  Retaining the paranoia
     * check is still valuable, however.
     */
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
  }
Martin v. Löwis's avatar
Martin v. Löwis committed
1238
  start = ptr;
1239
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
1240 1241 1242 1243 1244 1245 1246
    switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
    case BT_LEAD ## n: ptr += n; break;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
    case BT_AMP:
      if (ptr == start)
1247
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis's avatar
Martin v. Löwis committed
1248 1249 1250 1251 1252 1253 1254 1255
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    case BT_LT:
      /* this is for inside entity references */
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    case BT_LF:
      if (ptr == start) {
1256 1257
        *nextTokPtr = ptr + MINBPC(enc);
        return XML_TOK_DATA_NEWLINE;
Martin v. Löwis's avatar
Martin v. Löwis committed
1258 1259 1260 1261 1262
      }
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    case BT_CR:
      if (ptr == start) {
1263
        ptr += MINBPC(enc);
1264
        if (! HAS_CHAR(enc, ptr, end))
1265 1266 1267 1268 1269
          return XML_TOK_TRAILING_CR;
        if (BYTE_TYPE(enc, ptr) == BT_LF)
          ptr += MINBPC(enc);
        *nextTokPtr = ptr;
        return XML_TOK_DATA_NEWLINE;
Martin v. Löwis's avatar
Martin v. Löwis committed
1270 1271 1272 1273 1274
      }
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    case BT_S:
      if (ptr == start) {
1275 1276
        *nextTokPtr = ptr + MINBPC(enc);
        return XML_TOK_ATTRIBUTE_VALUE_S;
Martin v. Löwis's avatar
Martin v. Löwis committed
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288
      }
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    default:
      ptr += MINBPC(enc);
      break;
    }
  }
  *nextTokPtr = ptr;
  return XML_TOK_DATA_CHARS;
}

1289 1290 1291
static int PTRCALL
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
                       const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
1292 1293
{
  const char *start;
1294
  if (ptr >= end)
Martin v. Löwis's avatar
Martin v. Löwis committed
1295
    return XML_TOK_NONE;
1296 1297 1298 1299 1300 1301 1302 1303
  else if (! HAS_CHAR(enc, ptr, end)) {
    /* This line cannot be executed.  The incoming data has already
     * been tokenized once, so incomplete characters like this have
     * already been eliminated from the input.  Retaining the paranoia
     * check is still valuable, however.
     */
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
  }
Martin v. Löwis's avatar
Martin v. Löwis committed
1304
  start = ptr;
1305
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
1306 1307 1308 1309 1310 1311 1312
    switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
    case BT_LEAD ## n: ptr += n; break;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
    case BT_AMP:
      if (ptr == start)
1313
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis's avatar
Martin v. Löwis committed
1314 1315 1316 1317
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    case BT_PERCNT:
      if (ptr == start) {
1318 1319 1320
        int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
                                       end, nextTokPtr);
        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
Martin v. Löwis's avatar
Martin v. Löwis committed
1321 1322 1323 1324 1325
      }
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    case BT_LF:
      if (ptr == start) {
1326 1327
        *nextTokPtr = ptr + MINBPC(enc);
        return XML_TOK_DATA_NEWLINE;
Martin v. Löwis's avatar
Martin v. Löwis committed
1328 1329 1330 1331 1332
      }
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    case BT_CR:
      if (ptr == start) {
1333
        ptr += MINBPC(enc);
1334
        if (! HAS_CHAR(enc, ptr, end))
1335 1336 1337 1338 1339
          return XML_TOK_TRAILING_CR;
        if (BYTE_TYPE(enc, ptr) == BT_LF)
          ptr += MINBPC(enc);
        *nextTokPtr = ptr;
        return XML_TOK_DATA_NEWLINE;
Martin v. Löwis's avatar
Martin v. Löwis committed
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
      }
      *nextTokPtr = ptr;
      return XML_TOK_DATA_CHARS;
    default:
      ptr += MINBPC(enc);
      break;
    }
  }
  *nextTokPtr = ptr;
  return XML_TOK_DATA_CHARS;
}

#ifdef XML_DTD

1354 1355 1356
static int PTRCALL
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
                         const char *end, const char **nextTokPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
1357 1358 1359 1360 1361 1362 1363 1364 1365
{
  int level = 0;
  if (MINBPC(enc) > 1) {
    size_t n = end - ptr;
    if (n & (MINBPC(enc) - 1)) {
      n &= ~(MINBPC(enc) - 1);
      end = ptr + n;
    }
  }
1366
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
1367 1368 1369
    switch (BYTE_TYPE(enc, ptr)) {
    INVALID_CASES(ptr, nextTokPtr)
    case BT_LT:
1370 1371
      ptr += MINBPC(enc);
      REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
1372
      if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1373 1374
        ptr += MINBPC(enc);
        REQUIRE_CHAR(enc, ptr, end);
1375 1376 1377 1378
        if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
          ++level;
          ptr += MINBPC(enc);
        }
Martin v. Löwis's avatar
Martin v. Löwis committed
1379 1380 1381
      }
      break;
    case BT_RSQB:
1382 1383
      ptr += MINBPC(enc);
      REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis's avatar
Martin v. Löwis committed
1384
      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1385 1386
        ptr += MINBPC(enc);
        REQUIRE_CHAR(enc, ptr, end);
1387 1388 1389 1390 1391 1392 1393 1394
        if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
          ptr += MINBPC(enc);
          if (level == 0) {
            *nextTokPtr = ptr;
            return XML_TOK_IGNORE_SECT;
          }
          --level;
        }
Martin v. Löwis's avatar
Martin v. Löwis committed
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406
      }
      break;
    default:
      ptr += MINBPC(enc);
      break;
    }
  }
  return XML_TOK_PARTIAL;
}

#endif /* XML_DTD */

1407 1408 1409
static int PTRCALL
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
                   const char **badPtr)
Martin v. Löwis's avatar
Martin v. Löwis committed
1410 1411 1412
{
  ptr += MINBPC(enc);
  end -= MINBPC(enc);
1413
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_DIGIT:
    case BT_HEX:
    case BT_MINUS:
    case BT_APOS:
    case BT_LPAR:
    case BT_RPAR:
    case BT_PLUS:
    case BT_COMMA:
    case BT_SOL:
    case BT_EQUALS:
    case BT_QUEST:
    case BT_CR:
    case BT_LF:
    case BT_SEMI:
    case BT_EXCL:
    case BT_AST:
    case BT_PERCNT:
    case BT_NUM:
#ifdef XML_NS
    case BT_COLON:
#endif
      break;
    case BT_S:
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1439 1440
        *badPtr = ptr;
        return 0;
Martin v. Löwis's avatar
Martin v. Löwis committed
1441 1442 1443 1444 1445
      }
      break;
    case BT_NAME:
    case BT_NMSTRT:
      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1446
        break;
1447
      /* fall through */
Martin v. Löwis's avatar
Martin v. Löwis committed
1448 1449 1450 1451
    default:
      switch (BYTE_TO_ASCII(enc, ptr)) {
      case 0x24: /* $ */
      case 0x40: /* @ */
1452
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
1453
      default:
1454 1455
        *badPtr = ptr;
        return 0;
Martin v. Löwis's avatar
Martin v. Löwis committed
1456 1457 1458 1459 1460 1461 1462
      }
      break;
    }
  }
  return 1;
}

1463 1464 1465 1466
/* This must only be called for a well-formed start-tag or empty
   element tag.  Returns the number of attributes.  Pointers to the
   first attsMax attributes are stored in atts.
*/
Martin v. Löwis's avatar
Martin v. Löwis committed
1467

1468 1469 1470
static int PTRCALL
PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
                int attsMax, ATTRIBUTE *atts)
Martin v. Löwis's avatar
Martin v. Löwis committed
1471 1472 1473 1474
{
  enum { other, inName, inValue } state = inName;
  int nAtts = 0;
  int open = 0; /* defined when state == inValue;
1475
                   initialization just to shut up compilers */
Martin v. Löwis's avatar
Martin v. Löwis committed
1476 1477 1478 1479 1480

  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
    switch (BYTE_TYPE(enc, ptr)) {
#define START_NAME \
      if (state == other) { \
1481 1482 1483 1484 1485
        if (nAtts < attsMax) { \
          atts[nAtts].name = ptr; \
          atts[nAtts].normalized = 1; \
        } \
        state = inName; \
Martin v. Löwis's avatar
Martin v. Löwis committed
1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498
      }
#define LEAD_CASE(n) \
    case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
    case BT_NONASCII:
    case BT_NMSTRT:
    case BT_HEX:
      START_NAME
      break;
#undef START_NAME
    case BT_QUOT:
      if (state != inValue) {
1499 1500
        if (nAtts < attsMax)
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
Martin v. Löwis's avatar
Martin v. Löwis committed
1501 1502 1503 1504 1505
        state = inValue;
        open = BT_QUOT;
      }
      else if (open == BT_QUOT) {
        state = other;
1506 1507 1508
        if (nAtts < attsMax)
          atts[nAtts].valueEnd = ptr;
        nAtts++;
Martin v. Löwis's avatar
Martin v. Löwis committed
1509 1510 1511 1512
      }
      break;
    case BT_APOS:
      if (state != inValue) {
1513 1514
        if (nAtts < attsMax)
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
Martin v. Löwis's avatar
Martin v. Löwis committed
1515 1516 1517 1518 1519
        state = inValue;
        open = BT_APOS;
      }
      else if (open == BT_APOS) {
        state = other;
1520 1521 1522
        if (nAtts < attsMax)
          atts[nAtts].valueEnd = ptr;
        nAtts++;
Martin v. Löwis's avatar
Martin v. Löwis committed
1523 1524 1525 1526
      }
      break;
    case BT_AMP:
      if (nAtts < attsMax)
1527
        atts[nAtts].normalized = 0;
Martin v. Löwis's avatar
Martin v. Löwis committed
1528 1529 1530 1531 1532
      break;
    case BT_S:
      if (state == inName)
        state = other;
      else if (state == inValue
1533 1534 1535 1536 1537 1538 1539
               && nAtts < attsMax
               && atts[nAtts].normalized
               && (ptr == atts[nAtts].valuePtr
                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
        atts[nAtts].normalized = 0;
Martin v. Löwis's avatar
Martin v. Löwis committed
1540 1541 1542 1543 1544 1545 1546
      break;
    case BT_CR: case BT_LF:
      /* This case ensures that the first attribute name is counted
         Apart from that we could just change state on the quote. */
      if (state == inName)
        state = other;
      else if (state == inValue && nAtts < attsMax)
1547
        atts[nAtts].normalized = 0;
Martin v. Löwis's avatar
Martin v. Löwis committed
1548 1549 1550 1551
      break;
    case BT_GT:
    case BT_SOL:
      if (state != inValue)
1552
        return nAtts;
Martin v. Löwis's avatar
Martin v. Löwis committed
1553 1554 1555 1556 1557 1558 1559 1560
      break;
    default:
      break;
    }
  }
  /* not reached */
}

1561
static int PTRFASTCALL
1562
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
Martin v. Löwis's avatar
Martin v. Löwis committed
1563 1564 1565 1566 1567
{
  int result = 0;
  /* skip &# */
  ptr += 2*MINBPC(enc);
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1568 1569 1570
    for (ptr += MINBPC(enc);
         !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
         ptr += MINBPC(enc)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
1571 1572 1573 1574
      int c = BYTE_TO_ASCII(enc, ptr);
      switch (c) {
      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587
        result <<= 4;
        result |= (c - ASCII_0);
        break;
      case ASCII_A: case ASCII_B: case ASCII_C:
      case ASCII_D: case ASCII_E: case ASCII_F:
        result <<= 4;
        result += 10 + (c - ASCII_A);
        break;
      case ASCII_a: case ASCII_b: case ASCII_c:
      case ASCII_d: case ASCII_e: case ASCII_f:
        result <<= 4;
        result += 10 + (c - ASCII_a);
        break;
Martin v. Löwis's avatar
Martin v. Löwis committed
1588 1589
      }
      if (result >= 0x110000)
1590
        return -1;
Martin v. Löwis's avatar
Martin v. Löwis committed
1591 1592 1593 1594 1595 1596 1597 1598
    }
  }
  else {
    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
      int c = BYTE_TO_ASCII(enc, ptr);
      result *= 10;
      result += (c - ASCII_0);
      if (result >= 0x110000)
1599
        return -1;
Martin v. Löwis's avatar
Martin v. Löwis committed
1600 1601 1602 1603 1604
    }
  }
  return checkCharRefNumber(result);
}

1605
static int PTRCALL
1606
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
1607
                             const char *end)
Martin v. Löwis's avatar
Martin v. Löwis committed
1608 1609 1610 1611 1612 1613
{
  switch ((end - ptr)/MINBPC(enc)) {
  case 2:
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
      switch (BYTE_TO_ASCII(enc, ptr)) {
      case ASCII_l:
1614
        return ASCII_LT;
Martin v. Löwis's avatar
Martin v. Löwis committed
1615
      case ASCII_g:
1616
        return ASCII_GT;
Martin v. Löwis's avatar
Martin v. Löwis committed
1617 1618 1619 1620 1621 1622 1623
      }
    }
    break;
  case 3:
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
      ptr += MINBPC(enc);
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1624 1625 1626
        ptr += MINBPC(enc);
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
          return ASCII_AMP;
Martin v. Löwis's avatar
Martin v. Löwis committed
1627 1628 1629 1630 1631 1632 1633 1634
      }
    }
    break;
  case 4:
    switch (BYTE_TO_ASCII(enc, ptr)) {
    case ASCII_q:
      ptr += MINBPC(enc);
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1635 1636 1637 1638 1639 1640
        ptr += MINBPC(enc);
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
          ptr += MINBPC(enc);
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
            return ASCII_QUOT;
        }
Martin v. Löwis's avatar
Martin v. Löwis committed
1641 1642 1643 1644 1645
      }
      break;
    case ASCII_a:
      ptr += MINBPC(enc);
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1646 1647 1648 1649 1650 1651
        ptr += MINBPC(enc);
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
          ptr += MINBPC(enc);
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
            return ASCII_APOS;
        }
Martin v. Löwis's avatar
Martin v. Löwis committed
1652 1653 1654 1655 1656 1657 1658
      }
      break;
    }
  }
  return 0;
}

1659
static int PTRCALL
1660
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
1661
                         const char *end1, const char *ptr2)
Martin v. Löwis's avatar
Martin v. Löwis committed
1662 1663
{
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1664
    if (end1 - ptr1 < MINBPC(enc)) {
1665 1666
      /* This line cannot be executed.  The incoming data has already
       * been tokenized once, so incomplete characters like this have
1667 1668 1669 1670 1671
       * already been eliminated from the input.  Retaining the
       * paranoia check is still valuable, however.
       */
      return 0; /* LCOV_EXCL_LINE */
    }
Martin v. Löwis's avatar
Martin v. Löwis committed
1672 1673 1674 1675 1676 1677
    if (!CHAR_MATCHES(enc, ptr1, *ptr2))
      return 0;
  }
  return ptr1 == end1;
}

1678 1679
static int PTRFASTCALL
PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
Martin v. Löwis's avatar
Martin v. Löwis committed
1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699
{
  const char *start = ptr;
  for (;;) {
    switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
    case BT_LEAD ## n: ptr += n; break;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
    case BT_NONASCII:
    case BT_NMSTRT:
#ifdef XML_NS
    case BT_COLON:
#endif
    case BT_HEX:
    case BT_DIGIT:
    case BT_NAME:
    case BT_MINUS:
      ptr += MINBPC(enc);
      break;
    default:
1700
      return (int)(ptr - start);
Martin v. Löwis's avatar
Martin v. Löwis committed
1701 1702 1703 1704
    }
  }
}

1705 1706
static const char * PTRFASTCALL
PREFIX(skipS)(const ENCODING *enc, const char *ptr)
Martin v. Löwis's avatar
Martin v. Löwis committed
1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720
{
  for (;;) {
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_LF:
    case BT_CR:
    case BT_S:
      ptr += MINBPC(enc);
      break;
    default:
      return ptr;
    }
  }
}

1721 1722 1723 1724 1725
static void PTRCALL
PREFIX(updatePosition)(const ENCODING *enc,
                       const char *ptr,
                       const char *end,
                       POSITION *pos)
Martin v. Löwis's avatar
Martin v. Löwis committed
1726
{
1727
  while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis's avatar
Martin v. Löwis committed
1728 1729 1730 1731 1732 1733 1734 1735
    switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
    case BT_LEAD ## n: \
      ptr += n; \
      break;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
    case BT_LF:
1736
      pos->columnNumber = (XML_Size)-1;
Martin v. Löwis's avatar
Martin v. Löwis committed
1737 1738 1739 1740 1741 1742
      pos->lineNumber++;
      ptr += MINBPC(enc);
      break;
    case BT_CR:
      pos->lineNumber++;
      ptr += MINBPC(enc);
1743
      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1744
        ptr += MINBPC(enc);
1745
      pos->columnNumber = (XML_Size)-1;
Martin v. Löwis's avatar
Martin v. Löwis committed
1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761
      break;
    default:
      ptr += MINBPC(enc);
      break;
    }
    pos->columnNumber++;
  }
}

#undef DO_LEAD_CASE
#undef MULTIBYTE_CASES
#undef INVALID_CASES
#undef CHECK_NAME_CASE
#undef CHECK_NAME_CASES
#undef CHECK_NMSTRT_CASE
#undef CHECK_NMSTRT_CASES
1762

1763
#endif /* XML_TOK_IMPL_C */