Blame


1 ea5e974d 2024-03-28 op /*
2 ea5e974d 2024-03-28 op * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
3 ea5e974d 2024-03-28 op *
4 ea5e974d 2024-03-28 op * Permission is hereby granted, free of charge, to any person obtaining a
5 ea5e974d 2024-03-28 op * copy of this software and associated documentation files (the "Software"),
6 ea5e974d 2024-03-28 op * to deal in the Software without restriction, including without limitation
7 ea5e974d 2024-03-28 op * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 ea5e974d 2024-03-28 op * and/or sell copies of the Software, and to permit persons to whom the
9 ea5e974d 2024-03-28 op * Software is furnished to do so, subject to the following conditions:
10 ea5e974d 2024-03-28 op *
11 ea5e974d 2024-03-28 op * The above copyright notice and this permission notice shall be included in
12 ea5e974d 2024-03-28 op * all copies or substantial portions of the Software.
13 ea5e974d 2024-03-28 op *
14 ea5e974d 2024-03-28 op * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 ea5e974d 2024-03-28 op * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 ea5e974d 2024-03-28 op * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 ea5e974d 2024-03-28 op * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 ea5e974d 2024-03-28 op * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 ea5e974d 2024-03-28 op * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 ea5e974d 2024-03-28 op * DEALINGS IN THE SOFTWARE.
21 ea5e974d 2024-03-28 op */
22 ea5e974d 2024-03-28 op
23 ea5e974d 2024-03-28 op // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
24 ea5e974d 2024-03-28 op
25 ea5e974d 2024-03-28 op #define UTF8_ACCEPT 0
26 ea5e974d 2024-03-28 op #define UTF8_REJECT 1
27 ea5e974d 2024-03-28 op
28 ea5e974d 2024-03-28 op static const uint8_t utf8d[] = {
29 ea5e974d 2024-03-28 op 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
30 ea5e974d 2024-03-28 op 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
31 ea5e974d 2024-03-28 op 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
32 ea5e974d 2024-03-28 op 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
33 ea5e974d 2024-03-28 op 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
34 ea5e974d 2024-03-28 op 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
35 ea5e974d 2024-03-28 op 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
36 ea5e974d 2024-03-28 op 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
37 ea5e974d 2024-03-28 op 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
38 ea5e974d 2024-03-28 op 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
39 ea5e974d 2024-03-28 op 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
40 ea5e974d 2024-03-28 op 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
41 ea5e974d 2024-03-28 op 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
42 ea5e974d 2024-03-28 op 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
43 ea5e974d 2024-03-28 op };
44 ea5e974d 2024-03-28 op
45 ea5e974d 2024-03-28 op static uint32_t inline
46 ea5e974d 2024-03-28 op decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
47 ea5e974d 2024-03-28 op uint32_t type = utf8d[byte];
48 ea5e974d 2024-03-28 op
49 ea5e974d 2024-03-28 op *codep = (*state != UTF8_ACCEPT) ?
50 ea5e974d 2024-03-28 op (byte & 0x3fu) | (*codep << 6) :
51 ea5e974d 2024-03-28 op (0xff >> type) & (byte);
52 ea5e974d 2024-03-28 op
53 ea5e974d 2024-03-28 op *state = utf8d[256 + *state*16 + type];
54 ea5e974d 2024-03-28 op return *state;
55 ea5e974d 2024-03-28 op }