pidgin: 5b432252: util: Better validation of the allowed c...
darkrain42 at pidgin.im
darkrain42 at pidgin.im
Fri Apr 30 21:05:39 EDT 2010
-----------------------------------------------------------------
Revision: 5b4322528c199b000fbfc774bb782dfd6dc0e2f2
Ancestor: c9659a3fecdd575434640e531c3eb61f90de6976
Author: darkrain42 at pidgin.im
Date: 2010-04-29T17:17:00
Branch: im.pidgin.pidgin
URL: http://d.pidgin.im/viewmtn/revision/info/5b4322528c199b000fbfc774bb782dfd6dc0e2f2
Modified files:
libpurple/tests/test_util.c libpurple/util.c
ChangeLog:
util: Better validation of the allowed character values in XML 1.0
>From http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char. Refs #11257
This doesn't actually make a difference, because I think all the
invalid ranges aren't valid UTF-8 and so g_utf8_validate catches them.
-------------- next part --------------
============================================================
--- libpurple/tests/test_util.c 97bd0ced6da1a62f501c5640a12649056e037bbb
+++ libpurple/tests/test_util.c a5ecc8ec17d796a22a88277468eb36a51fea231d
@@ -121,6 +121,33 @@ END_TEST
}
END_TEST
+START_TEST(test_utf8_strip_unprintables)
+{
+ fail_unless(NULL == purple_utf8_strip_unprintables(NULL));
+ /* invalid UTF-8 */
+#if 0
+ /* disabled because make check fails on an assertion */
+ fail_unless(NULL == purple_utf8_strip_unprintables("abc\x80\x7f"));
+#endif
+ /* \t, \n, \r, space */
+ assert_string_equal_free("ab \tcd\nef\r ", purple_utf8_strip_unprintables("ab \tcd\nef\r "));
+ /* Basic ASCII */
+ assert_string_equal_free("Foobar", purple_utf8_strip_unprintables("Foobar"));
+ /* 0xE000 - 0xFFFD (UTF-8 encoded) */
+ /* U+F1F7 */
+ assert_string_equal_free("aaaa\xef\x87\xb7", purple_utf8_strip_unprintables("aaaa\xef\x87\xb7"));
+#if 0
+ /* disabled because make check fails on an assertion */
+ /* U+DB80 (Private Use High Surrogate, First) -- should be stripped */
+ assert_string_equal_free("aaaa", purple_utf8_strip_unprintables("aaaa\xed\xa0\x80"));
+ /* U+FFFE (should be stripped) */
+ assert_string_equal_free("aaaa", purple_utf8_strip_unprintables("aaaa\xef\xbf\xbe"));
+#endif
+ /* U+FEFF (should not be stripped) */
+ assert_string_equal_free("aaaa\xef\xbb\xbf", purple_utf8_strip_unprintables("aaaa\xef\xbb\xbf"));
+}
+END_TEST
+
START_TEST(test_mime_decode_field)
{
gchar *result = purple_mime_decode_field("=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=");
@@ -168,6 +195,10 @@ util_suite(void)
tcase_add_test(tc, test_markup_html_to_xhtml);
suite_add_tcase(s, tc);
+ tc = tcase_create("Stripping Unparseables");
+ tcase_add_test(tc, test_utf8_strip_unprintables);
+ suite_add_tcase(s, tc);
+
tc = tcase_create("MIME");
tcase_add_test(tc, test_mime_decode_field);
suite_add_tcase(s, tc);
============================================================
--- libpurple/util.c aa071cb1fe0710e3d75ebebc70d4f20674526d5a
+++ libpurple/util.c 807eb07e5315efd98f8e70facbb71e7928898f59
@@ -4593,12 +4593,22 @@ purple_utf8_strip_unprintables(const gch
}
workstr = iter = g_new(gchar, strlen(str) + 1);
- for ( ; *str; ++str) {
- guchar c = *str;
- if (c >= 0x20 || c == '\t' || c == '\n' || c == '\r') {
- *iter = c;
- ++iter;
+ while (*str) {
+ gunichar ch = g_utf8_get_char(str);
+ gchar *next = g_utf8_next_char(str);
+ /*
+ * Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
+ * [#x10000-#x10FFFF]
+ */
+ if ((ch == '\t' || ch == '\n' || ch == '\r') ||
+ (ch >= 0x20 && ch <= 0xD7FF) ||
+ (ch >= 0xE000 && ch <= 0xFFFD) ||
+ (ch >= 0x10000 && ch <= 0x10FFFF)) {
+ memcpy(iter, str, next - str);
+ iter += (next - str);
}
+
+ str = next;
}
/* nul-terminate the new string */
More information about the Commits
mailing list