diff --git a/pd/src/g_rtext.c b/pd/src/g_rtext.c index 6c4b9cdfcf9661771c8bee95f72c23ac3504f677..04a8f6761bb8e89dadeed98aa57df9f0db272a41 100644 --- a/pd/src/g_rtext.c +++ b/pd/src/g_rtext.c @@ -248,7 +248,8 @@ static void rtext_senditup(t_rtext *x, int action, int *widthp, int *heightp, int inchars_c = x_bufsize_c - inindex_c; int maxindex_c = (inchars_c > widthlimit_c ? widthlimit_c : inchars_c); - int maxindex_b = u8_offset(x->x_buf + inindex_b, maxindex_c); + int maxindex_b = u8_offset(x->x_buf + inindex_b, maxindex_c, + x->x_bufsize - inindex_b); int eatchar = 1; //fprintf(stderr, "firstone <%s> inindex_b=%d maxindex_b=%d\n", x->x_buf + inindex_b, inindex_b, maxindex_b); int foundit_b = firstone(x->x_buf + inindex_b, '\n', maxindex_b); @@ -291,7 +292,8 @@ static void rtext_senditup(t_rtext *x, int action, int *widthp, int *heightp, { int actualx = (findx < 0 ? 0 : (findx > foundit_c ? foundit_c : findx)); - *indexp = inindex_b + u8_offset(x->x_buf + inindex_b, actualx); + *indexp = inindex_b + u8_offset(x->x_buf + inindex_b, actualx, + x->x_bufsize - inindex_b); reportedindex = 1; } strncpy(tempbuf+outchars_b, x->x_buf + inindex_b, foundit_b); diff --git a/pd/src/s_utf8.c b/pd/src/s_utf8.c index 6bc8e230cca03ab6050fbedc857f161d13158f59..8cf20aa27199e11e7ce82bcdb73c32acce37313b 100644 --- a/pd/src/s_utf8.c +++ b/pd/src/s_utf8.c @@ -185,8 +185,27 @@ int u8_wc_toutf8_nul(char *dest, uint32_t ch) } /* charnum => byte offset */ -int u8_offset(char *str, int charnum) +int u8_offset(char *str, int charnum, int bufsize) { + /* Implementation #1 of a tricky encoding in an unsafe language. It + assumes that we're dealing with null-terminated strings, but + x_buf of rtext isn't null-terminated. */ + /* + int offs=0; + + while (charnum > 0 && str[offs]) { + (void)(isutf(str[++offs]) || isutf(str[++offs]) || + isutf(str[++offs]) || ++offs); + charnum--; + } + return offs; + */ + + /* Implementation number 2 apparently tried to fix that. Instead, it + just made a reimplementation that still potential dereferences + non-existent pointers _if_ we try to get the offset at the last + character _and_ that last character happens to be wide. */ + /* char *string = str; while (charnum > 0 && *string != '\0') { @@ -205,11 +224,36 @@ int u8_offset(char *str, int charnum) } return (int)(string - str); + */ + + /* Here is an _extremely_ conservative implementation that protects + against dereferencing garbage pointers. */ + int offs = 0; + if (isutf(str[offs])) + { + for (offs = 0; offs < bufsize; offs++) + { + if (isutf(str[offs])) + { + if (charnum <= 0) + break; + charnum -= 1; + } + } + } + else + { + bug("u8_offset"); + } + return offs; } /* byte offset => charnum */ int u8_charnum(char *s, int offset) { + /* This has the same problem as the commented implementations of u8_offset + above. */ + /* int charnum = 0; char *string = s; char *const end = string + offset; @@ -229,6 +273,29 @@ int u8_charnum(char *s, int offset) ++charnum; } return charnum; + */ + + /* The original implementation which doesn't work well with + strings that aren't null terminated */ + /* + int charnum = 0, offs=0; + + while (offs < offset && s[offs]) { + (void)(isutf(s[++offs]) || isutf(s[++offs]) || + isutf(s[++offs]) || ++offs); + charnum++; + } + return charnum; + */ + + /* An _extremely_ conservative implementation to avoid dereferencing + garbage. */ + int charnum = 0, i; + for (i = 0; i < offset; i++) + { + if (isutf(s[i])) charnum += 1; + } + return charnum; } /* reads the next utf-8 sequence out of a string, updating an index */ diff --git a/pd/src/s_utf8.h b/pd/src/s_utf8.h index c8969e8be886904420c710bdc555de940dfadb47..88a32492511e9d572c9fff0d72c98991c0b229c3 100644 --- a/pd/src/s_utf8.h +++ b/pd/src/s_utf8.h @@ -59,7 +59,7 @@ int u8_wc_toutf8(char *dest, uint32_t ch); int u8_wc_toutf8_nul(char *dest, uint32_t ch); /* character number to byte offset */ -int u8_offset(char *str, int charnum); +int u8_offset(char *str, int charnum, int bufsize); /* byte offset to character number */ int u8_charnum(char *s, int offset);