Skip to content

Commit

Permalink
gh-119614: Fix truncation of strings with embedded null characters in…
Browse files Browse the repository at this point in the history
… Tkinter (GH-120909)

Now the null character is always represented as \xc0\x80 for
Tcl_NewStringObj().
  • Loading branch information
serhiy-storchaka authored Jun 24, 2024
1 parent fc297b4 commit c38e2f6
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 7 deletions.
24 changes: 24 additions & 0 deletions Lib/test/test_tcl.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ def testCall(self):
tcl.call('set','a','1')
self.assertEqual(tcl.call('set','a'),'1')

def test_call_passing_null(self):
tcl = self.interp
tcl.call('set', 'a', 'a\0b') # ASCII-only
self.assertEqual(tcl.getvar('a'), 'a\x00b')
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
self.assertEqual(tcl.eval('set a'), 'a\x00b')

tcl.call('set', 'a', '\u20ac\0') # non-ASCII
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')

def testCallException(self):
tcl = self.interp
self.assertRaises(TclError,tcl.call,'set','a')
Expand All @@ -98,6 +110,18 @@ def testSetVar(self):
tcl.setvar('a','1')
self.assertEqual(tcl.eval('set a'),'1')

def test_setvar_passing_null(self):
tcl = self.interp
tcl.setvar('a', 'a\0b') # ASCII-only
self.assertEqual(tcl.getvar('a'), 'a\x00b')
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
self.assertEqual(tcl.eval('set a'), 'a\x00b')

tcl.setvar('a', '\u20ac\0') # non-ASCII
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')

def testSetVarArray(self):
tcl = self.interp
tcl.setvar('a(1)','1')
Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_tkinter/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,15 @@ def test_info_patchlevel(self):
self.assertEqual(vi.micro, 0)
self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))

def test_embedded_null(self):
widget = tkinter.Entry(self.root)
widget.insert(0, 'abc\0def') # ASCII-only
widget.selection_range(0, 'end')
self.assertEqual(widget.selection_get(), 'abc\x00def')
widget.insert(0, '\u20ac\0') # non-ASCII
widget.selection_range(0, 'end')
self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')


class WmTest(AbstractTkTest, unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix truncation of strings with embedded null characters in some internal
operations in :mod:`tkinter`.
40 changes: 33 additions & 7 deletions Modules/_tkinter.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
else
Py_UNREACHABLE();
}
#endif
#endif /* USE_TCL_UNICODE */
const char *s = Tcl_GetStringFromObj(value, &len);
return unicodeFromTclStringAndSize(s, len);
}
Expand Down Expand Up @@ -1018,7 +1018,9 @@ AsObj(PyObject *value)
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
if (PyUnicode_IS_ASCII(value)) {
if (PyUnicode_IS_ASCII(value) &&
strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value))
{
return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
(int)size);
}
Expand All @@ -1033,9 +1035,6 @@ AsObj(PyObject *value)
"surrogatepass", NATIVE_BYTEORDER);
else
Py_UNREACHABLE();
#else
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
#endif
if (!encoded) {
return NULL;
}
Expand All @@ -1045,12 +1044,39 @@ AsObj(PyObject *value)
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
#if USE_TCL_UNICODE
result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
(int)(size / sizeof(Tcl_UniChar)));
#else
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
if (!encoded) {
return NULL;
}
size = PyBytes_GET_SIZE(encoded);
if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) {
/* The string contains embedded null characters.
* Tcl needs a null character to be represented as \xc0\x80 in
* the Modified UTF-8 encoding. Otherwise the string can be
* truncated in some internal operations.
*
* NOTE: stringlib_replace() could be used here, but optimizing
* this obscure case isn't worth it unless stringlib_replace()
* was already exposed in the C API for other reasons. */
Py_SETREF(encoded,
PyObject_CallMethod(encoded, "replace", "y#y#",
"\0", (Py_ssize_t)1,
"\xc0\x80", (Py_ssize_t)2));
if (!encoded) {
return NULL;
}
size = PyBytes_GET_SIZE(encoded);
}
if (size > INT_MAX) {
Py_DECREF(encoded);
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
#endif
#endif /* USE_TCL_UNICODE */
Py_DECREF(encoded);
return result;
}
Expand Down

0 comments on commit c38e2f6

Please sign in to comment.