commit b43957b06122d83db4e48001416e553dd7d51986
parent d78d582eaa99f63feccb08cf92399d921106867a
Author: Bakkeby <bakkeby@gmail.com>
Date: Thu, 7 Mar 2024 23:23:56 +0100
Optimise utf8decode()
Diffstat:
| M | st.c | | | 23 | +++++++++++++---------- |
1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/st.c b/st.c
@@ -217,9 +217,9 @@ static void selscroll(int, int);
static void selsnap(int *, int *, int);
static size_t utf8decode(const char *, Rune *, size_t);
-static Rune utf8decodebyte(char, size_t *);
-static char utf8encodebyte(Rune, size_t);
-static size_t utf8validate(Rune *, size_t);
+static inline Rune utf8decodebyte(char, size_t *);
+static inline char utf8encodebyte(Rune, size_t);
+static inline size_t utf8validate(Rune *, size_t);
static char *base64dec(const char *);
static char base64dec_getc(const char **);
@@ -297,21 +297,24 @@ xstrdup(const char *s)
size_t
utf8decode(const char *c, Rune *u, size_t clen)
{
- size_t i, j, len, type;
+ size_t i, len;
Rune udecoded;
*u = UTF_INVALID;
if (!clen)
return 0;
udecoded = utf8decodebyte(c[0], &len);
- if (!BETWEEN(len, 1, UTF_SIZ))
+ if (!BETWEEN(len, 2, UTF_SIZ)) {
+ *u = (len == 1) ? udecoded : UTF_INVALID;
return 1;
- for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
- udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
- if (type != 0)
- return j;
}
- if (j < len)
+ clen = MIN(clen, len);
+ for (i = 1; i < clen; ++i) {
+ if ((c[i] & 0xC0) != 0x80)
+ return i;
+ udecoded = (udecoded << 6) | (c[i] & 0x3F);
+ }
+ if (i < len)
return 0;
*u = (!BETWEEN(udecoded, utfmin[len], utfmax[len]) || BETWEEN(udecoded, 0xD800, 0xDFFF))
? UTF_INVALID : udecoded;