]> Skullheadx's Git Forge - dmenu.git/commitdiff
render invalid utf8 sequences as U+FFFD
authorNRK <nrk@disroot.org>
Thu, 4 Jul 2024 21:27:47 +0000 (21:27 +0000)
committerHiltjo Posthuma <hiltjo@codemadness.org>
Sun, 14 Jul 2024 09:42:58 +0000 (11:42 +0200)
previously drw_text would do the width calculations as if
invalid utf8 sequences were replaced with U+FFFD but would pass
the invalid utf8 sequence to xft to render where xft would just
cut it off at the first invalid byte.

this change makes invalid utf8 render as U+FFFD and avoids
sending invalid sequences to xft. the following can be used to
check the behavior before and after the patch:

$ printf "0\xef1234567\ntest" | dmenu

Ref: https://lists.suckless.org/dev/2407/35646.html

drw.c

diff --git a/drw.c b/drw.c
index eb71da7f08e52722b11c70faf1a486bed2e59bb1..f151ae5e2780f77aaad53ab9d7255efe9e981311 100644 (file)
--- a/drw.c
+++ b/drw.c
@@ -237,7 +237,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
        XftResult result;
        int charexists = 0, overflow = 0;
        /* keep track of a couple codepoints for which we have no match. */
-       static unsigned int nomatches[128], ellipsis_width;
+       static unsigned int nomatches[128], ellipsis_width, invalid_width;
+       static const char invalid[] = "�";
 
        if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
                return 0;
@@ -257,6 +258,10 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
        usedfont = drw->fonts;
        if (!ellipsis_width && render)
                ellipsis_width = drw_fontset_getwidth(drw, "...");
+       if (!invalid_width) {
+               invalid_width = -1; /* stop infinite recursion */
+               invalid_width = drw_fontset_getwidth(drw, invalid);
+       }
        while (1) {
                ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
                utf8str = text;
@@ -284,9 +289,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                                                else
                                                        utf8strlen = ellipsis_len;
                                        } else if (curfont == usedfont) {
-                                               utf8strlen += utf8charlen;
                                                text += utf8charlen;
-                                               ew += tmpw;
+                                               utf8strlen += utf8err ? 0 : utf8charlen;
+                                               ew += utf8err ? 0 : tmpw;
                                        } else {
                                                nextfont = curfont;
                                        }
@@ -294,7 +299,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                                }
                        }
 
-                       if (overflow || !charexists || nextfont)
+                       if (overflow || !charexists || nextfont || utf8err)
                                break;
                        else
                                charexists = 0;
@@ -309,6 +314,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                        x += ew;
                        w -= ew;
                }
+               if (utf8err && (!render || invalid_width < w)) {
+                       if (render)
+                               drw_text(drw, x, y, w, h, 0, invalid, invert);
+                       x += invalid_width;
+                       w -= invalid_width;
+               }
                if (render && overflow)
                        drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);