From 0231ce074ef34d184b293c4be34b96b264d12287 Mon Sep 17 00:00:00 2001 From: Jonathan Wilkes <jon.w.wilkes@gmail.com> Date: Fri, 7 Jul 2017 21:08:22 -0400 Subject: [PATCH] fix a very nasty utf8 code-point counter/offsetter that can easily dereferencing garbage thanks to Pd's use of non-null-terminated strings --- pd/src/g_rtext.c | 6 +++-- pd/src/s_utf8.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++- pd/src/s_utf8.h | 2 +- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/pd/src/g_rtext.c b/pd/src/g_rtext.c index 6c4b9cdfc..04a8f6761 100644 --- a/pd/src/g_rtext.c +++ b/pd/src/g_rtext.c @@ -248,7 +248,8 @@ static void rtext_senditup(t_rtext *x, int action, int *widthp, int *heightp, int inchars_c = x_bufsize_c - inindex_c; int maxindex_c = (inchars_c > widthlimit_c ? widthlimit_c : inchars_c); - int maxindex_b = u8_offset(x->x_buf + inindex_b, maxindex_c); + int maxindex_b = u8_offset(x->x_buf + inindex_b, maxindex_c, + x->x_bufsize - inindex_b); int eatchar = 1; //fprintf(stderr, "firstone <%s> inindex_b=%d maxindex_b=%d\n", x->x_buf + inindex_b, inindex_b, maxindex_b); int foundit_b = firstone(x->x_buf + inindex_b, '\n', maxindex_b); @@ -291,7 +292,8 @@ static void rtext_senditup(t_rtext *x, int action, int *widthp, int *heightp, { int actualx = (findx < 0 ? 0 : (findx > foundit_c ? foundit_c : findx)); - *indexp = inindex_b + u8_offset(x->x_buf + inindex_b, actualx); + *indexp = inindex_b + u8_offset(x->x_buf + inindex_b, actualx, + x->x_bufsize - inindex_b); reportedindex = 1; } strncpy(tempbuf+outchars_b, x->x_buf + inindex_b, foundit_b); diff --git a/pd/src/s_utf8.c b/pd/src/s_utf8.c index 6bc8e230c..8cf20aa27 100644 --- a/pd/src/s_utf8.c +++ b/pd/src/s_utf8.c @@ -185,8 +185,27 @@ int u8_wc_toutf8_nul(char *dest, uint32_t ch) } /* charnum => byte offset */ -int u8_offset(char *str, int charnum) +int u8_offset(char *str, int charnum, int bufsize) { + /* Implementation #1 of a tricky encoding in an unsafe language. It + assumes that we're dealing with null-terminated strings, but + x_buf of rtext isn't null-terminated. */ + /* + int offs=0; + + while (charnum > 0 && str[offs]) { + (void)(isutf(str[++offs]) || isutf(str[++offs]) || + isutf(str[++offs]) || ++offs); + charnum--; + } + return offs; + */ + + /* Implementation number 2 apparently tried to fix that. Instead, it + just made a reimplementation that still potential dereferences + non-existent pointers _if_ we try to get the offset at the last + character _and_ that last character happens to be wide. */ + /* char *string = str; while (charnum > 0 && *string != '\0') { @@ -205,11 +224,36 @@ int u8_offset(char *str, int charnum) } return (int)(string - str); + */ + + /* Here is an _extremely_ conservative implementation that protects + against dereferencing garbage pointers. */ + int offs = 0; + if (isutf(str[offs])) + { + for (offs = 0; offs < bufsize; offs++) + { + if (isutf(str[offs])) + { + if (charnum <= 0) + break; + charnum -= 1; + } + } + } + else + { + bug("u8_offset"); + } + return offs; } /* byte offset => charnum */ int u8_charnum(char *s, int offset) { + /* This has the same problem as the commented implementations of u8_offset + above. */ + /* int charnum = 0; char *string = s; char *const end = string + offset; @@ -229,6 +273,29 @@ int u8_charnum(char *s, int offset) ++charnum; } return charnum; + */ + + /* The original implementation which doesn't work well with + strings that aren't null terminated */ + /* + int charnum = 0, offs=0; + + while (offs < offset && s[offs]) { + (void)(isutf(s[++offs]) || isutf(s[++offs]) || + isutf(s[++offs]) || ++offs); + charnum++; + } + return charnum; + */ + + /* An _extremely_ conservative implementation to avoid dereferencing + garbage. */ + int charnum = 0, i; + for (i = 0; i < offset; i++) + { + if (isutf(s[i])) charnum += 1; + } + return charnum; } /* reads the next utf-8 sequence out of a string, updating an index */ diff --git a/pd/src/s_utf8.h b/pd/src/s_utf8.h index c8969e8be..88a324925 100644 --- a/pd/src/s_utf8.h +++ b/pd/src/s_utf8.h @@ -59,7 +59,7 @@ int u8_wc_toutf8(char *dest, uint32_t ch); int u8_wc_toutf8_nul(char *dest, uint32_t ch); /* character number to byte offset */ -int u8_offset(char *str, int charnum); +int u8_offset(char *str, int charnum, int bufsize); /* byte offset to character number */ int u8_charnum(char *s, int offset); -- GitLab