Skip to content
Snippets Groups Projects
Commit 0231ce07 authored by Jonathan Wilkes's avatar Jonathan Wilkes
Browse files

fix a very nasty utf8 code-point counter/offsetter that can easily...

fix a very nasty utf8 code-point counter/offsetter that can easily dereferencing garbage thanks to Pd's use of non-null-terminated strings
parent 00448223
No related branches found
No related tags found
No related merge requests found
......@@ -248,7 +248,8 @@ static void rtext_senditup(t_rtext *x, int action, int *widthp, int *heightp,
int inchars_c = x_bufsize_c - inindex_c;
int maxindex_c =
(inchars_c > widthlimit_c ? widthlimit_c : inchars_c);
int maxindex_b = u8_offset(x->x_buf + inindex_b, maxindex_c);
int maxindex_b = u8_offset(x->x_buf + inindex_b, maxindex_c,
x->x_bufsize - inindex_b);
int eatchar = 1;
//fprintf(stderr, "firstone <%s> inindex_b=%d maxindex_b=%d\n", x->x_buf + inindex_b, inindex_b, maxindex_b);
int foundit_b = firstone(x->x_buf + inindex_b, '\n', maxindex_b);
......@@ -291,7 +292,8 @@ static void rtext_senditup(t_rtext *x, int action, int *widthp, int *heightp,
{
int actualx = (findx < 0 ? 0 :
(findx > foundit_c ? foundit_c : findx));
*indexp = inindex_b + u8_offset(x->x_buf + inindex_b, actualx);
*indexp = inindex_b + u8_offset(x->x_buf + inindex_b, actualx,
x->x_bufsize - inindex_b);
reportedindex = 1;
}
strncpy(tempbuf+outchars_b, x->x_buf + inindex_b, foundit_b);
......
......@@ -185,8 +185,27 @@ int u8_wc_toutf8_nul(char *dest, uint32_t ch)
}
/* charnum => byte offset */
int u8_offset(char *str, int charnum)
int u8_offset(char *str, int charnum, int bufsize)
{
/* Implementation #1 of a tricky encoding in an unsafe language. It
assumes that we're dealing with null-terminated strings, but
x_buf of rtext isn't null-terminated. */
/*
int offs=0;
while (charnum > 0 && str[offs]) {
(void)(isutf(str[++offs]) || isutf(str[++offs]) ||
isutf(str[++offs]) || ++offs);
charnum--;
}
return offs;
*/
/* Implementation number 2 apparently tried to fix that. Instead, it
just made a reimplementation that still potential dereferences
non-existent pointers _if_ we try to get the offset at the last
character _and_ that last character happens to be wide. */
/*
char *string = str;
while (charnum > 0 && *string != '\0') {
......@@ -205,11 +224,36 @@ int u8_offset(char *str, int charnum)
}
return (int)(string - str);
*/
/* Here is an _extremely_ conservative implementation that protects
against dereferencing garbage pointers. */
int offs = 0;
if (isutf(str[offs]))
{
for (offs = 0; offs < bufsize; offs++)
{
if (isutf(str[offs]))
{
if (charnum <= 0)
break;
charnum -= 1;
}
}
}
else
{
bug("u8_offset");
}
return offs;
}
/* byte offset => charnum */
int u8_charnum(char *s, int offset)
{
/* This has the same problem as the commented implementations of u8_offset
above. */
/*
int charnum = 0;
char *string = s;
char *const end = string + offset;
......@@ -229,6 +273,29 @@ int u8_charnum(char *s, int offset)
++charnum;
}
return charnum;
*/
/* The original implementation which doesn't work well with
strings that aren't null terminated */
/*
int charnum = 0, offs=0;
while (offs < offset && s[offs]) {
(void)(isutf(s[++offs]) || isutf(s[++offs]) ||
isutf(s[++offs]) || ++offs);
charnum++;
}
return charnum;
*/
/* An _extremely_ conservative implementation to avoid dereferencing
garbage. */
int charnum = 0, i;
for (i = 0; i < offset; i++)
{
if (isutf(s[i])) charnum += 1;
}
return charnum;
}
/* reads the next utf-8 sequence out of a string, updating an index */
......
......@@ -59,7 +59,7 @@ int u8_wc_toutf8(char *dest, uint32_t ch);
int u8_wc_toutf8_nul(char *dest, uint32_t ch);
/* character number to byte offset */
int u8_offset(char *str, int charnum);
int u8_offset(char *str, int charnum, int bufsize);
/* byte offset to character number */
int u8_charnum(char *s, int offset);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment