Преглед изворни кода

libintl.h: route stubs through format_arg inlines; fix gettext_noop

uClibc-ng's <libintl.h> ships a set of NLS-disable pass-through macros
so that programs which would normally link against -lintl can build
against the libc-internal stubs.  The current form causes three
problems for strict GNU consumers (observed while building
elfutils-0.193 with GCC 15.2.0):

1) gettext_noop redefinition
   ---------------------------
   The header does

       #undef gettext_noop
       #define gettext_noop(X) X

   so it cleanly replaces any earlier definition.  But many GNU
   programs ship their own *identical* definition *after* including
   <libintl.h>, e.g. elfutils' lib/system.h line 157:

       #define gettext_noop(Str) Str

   Because the parameter name differs (X vs Str), the preprocessor
   treats it as a non-identical redefinition per C99 6.10.3p2
   ("same number and spelling of parameters") and -Werror promotes the
   warning to an error.  Switching libintl.h to "(Str) Str" makes the
   two definitions token-for-token identical and the redefinition is
   accepted silently.  Also gate the definition with #ifndef so a
   consumer that defines gettext_noop *before* including <libintl.h>
   keeps its definition (which #undef would have wiped out).

2) -Werror=format-security
   ------------------------
   The pass-through macros wrap their argument in a comma-expression:

       #define dgettext(dom, X) ((void)(dom), (char*) (X))

   to silence -Wunused-value on the unused 'dom' parameter.  GCC's
   -Wformat-security check, however, cannot see through such comma
   expressions, so

       printf(_("oops: %d"), n)
       // -> printf(dgettext("dom", "oops: %d"), n)
       // -> printf(((void)("dom"), (char*) ("oops: %d")), n)

   no longer counts as "format string is a literal" and -Werror fails.

3) Lost side effects in macro arguments
   --------------------------------------
   A naive fix - just dropping the (void)(...) tags - silences
   -Wformat-security at the price of losing every macro argument's
   side effect: dgettext(read_dom(), "fmt") would silently stop
   calling read_dom().  Unlikely in practice (real callers pass string
   literals), but a semantic regression nonetheless.

The fix used here mirrors glibc's libintl.h:
__attribute__((format_arg(N))) on each gettext-family function tells
GCC that the return value is a format-string equivalent to argument N.
-Wformat-security follows the attribute through the call and detects
the original string literal.

Concretely, the comma-expression macros are replaced by static-inline
stubs that are decorated with format_arg(N) and then wrapped by
simple, syntactically clean macros (no comma expressions):

    static inline __attribute__((__format_arg__(2)))
    char *__libintl_stub_dgettext(const char *__dom, const char *__msgid)
    {
        (void) __dom;
        return (char *) __msgid;
    }
    #define dgettext(dom, X) __libintl_stub_dgettext((dom), (X))

Properties:
  - function-call semantics evaluate every argument, so side effects
    in callers like dgettext(read_dom(), "fmt") are preserved;
  - the inline body collapses to (char*) msgid at -O1+ for literal
    msgid -- zero runtime overhead in the common case (verified with
    gcc -O2 -S: callsites collapse to a single printf call, no stub
    function call remains);
  - -Wformat-security sees through via format_arg and accepts the
    literal as a format string;
  - -Wunused-value is no longer raised because the discarded return
    is a function-call statement, not an unused expression value, so
    the previous `#pragma GCC diagnostic ignored "-Wunused-value"`
    inside the macro block is no longer needed and is dropped --
    improving diagnostic coverage in user code that uses libintl
    after the header is included.

The same format_arg annotation is also added to the extern declarations
of the gettext-family functions, so callers with -DLIBINTL_NO_MACROS
(which want to link against the real libintl) benefit from the same
-Wformat-security coverage.

The extern declarations remain intact, so &gettext etc. still resolves
to the libc-internal symbol (the function-like macro only fires on the
foo(...) call form, not on bare foo).  The libintl.c stub
implementation is unchanged and the ABI is preserved.

Cross-verified on the host GCC 13 (Linux) and the gcc-15.2.0 cross
toolchain (arm-bbs-linux-uclibcgnueabihf): printf(dgettext("dom",
"fmt %d"), n) compiles with -Werror=format-security at -O0/-O2, and
a probe with side-effect-bearing dom argument calls read_dom() exactly
once on both -O0 and -O2.

Signed-off-by: Ramin Moussavi <ramin.moussavi@yacoub.de>
Ramin Moussavi пре 2 недеља
родитељ
комит
0eb0bae057
1 измењених фајлова са 130 додато и 28 уклоњено
  1. 130 28
      include/libintl.h

+ 130 - 28
include/libintl.h

@@ -1,19 +1,47 @@
 #ifndef LIBINTL_H
 #define LIBINTL_H
 
-char *gettext(const char *msgid);
-char *dgettext(const char *domainname, const char *msgid);
-char *dcgettext(const char *domainname, const char *msgid, int category);
-char *ngettext(const char *msgid1, const char *msgid2, unsigned long n);
-char *dngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long n);
-char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long n, int category);
+/* BBS: __attribute__((format_arg(N))) tells GCC that this function
+   returns a format-string equivalent to argument N.  -Wformat-security
+   uses this to see through gettext/dgettext/... calls and detect that
+   the msgid argument was a string literal.  glibc's libintl.h ships the
+   same annotation; without it, programs that build with
+   -Werror=format-security (e.g. distro-style hardening flags) would
+   fail on every printf(_("fmt %d"), n).  */
+#if defined __GNUC__ && __GNUC__ >= 3
+# define __libintl_format_arg(N) __attribute__ ((__format_arg__ (N)))
+#else
+# define __libintl_format_arg(N)
+#endif
+
+char *gettext(const char *msgid)
+	__libintl_format_arg(1);
+char *dgettext(const char *domainname, const char *msgid)
+	__libintl_format_arg(2);
+char *dcgettext(const char *domainname, const char *msgid, int category)
+	__libintl_format_arg(2);
+char *ngettext(const char *msgid1, const char *msgid2, unsigned long n)
+	__libintl_format_arg(1) __libintl_format_arg(2);
+char *dngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long n)
+	__libintl_format_arg(2) __libintl_format_arg(3);
+char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long n, int category)
+	__libintl_format_arg(2) __libintl_format_arg(3);
 
 char *textdomain(const char *domainname);
 char *bind_textdomain_codeset(const char *domainname, const char *codeset);
 char *bindtextdomain(const char *domainname, const char *dirname);
 
-#undef gettext_noop
-#define gettext_noop(X) X
+/* BBS: gettext_noop is just compile-time identity; many GNU programs
+   (e.g. elfutils' lib/system.h line 157) ship
+       #define gettext_noop(Str) Str
+   verbatim.  Use the same token form so a redefinition is recognised
+   as identical by the preprocessor and does not trigger
+   -Werror=macro-redefined.  Also gate with #ifndef so a consumer that
+   defines it first keeps its definition (which the previous
+   "#undef gettext_noop" would have wiped).  */
+#ifndef gettext_noop
+#define gettext_noop(Str) Str
+#endif
 
 #ifndef LIBINTL_NO_MACROS
 /* if these macros are defined, configure checks will detect libintl as
@@ -22,31 +50,105 @@ char *bindtextdomain(const char *domainname, const char *dirname);
  * checking for ngettext in libc ... yes
  * the consequence is that -lintl will not be added to the LDFLAGS.
  * so if for some reason you want that libintl.a gets linked,
- * add -DLIBINTL_NO_MACROS=1 to your CPPFLAGS. */
-
-#define gettext(X) ((char*) (X))
-#define dgettext(dom, X) ((void)(dom), (char*) (X))
-#define dcgettext(dom, X, cat) ((void)(dom), (void)(cat), (char*) (X))
-#define ngettext(X, Y, N) \
-	((char*) (((N) == 1) ? ((void)(Y), (X)) : ((void)(X), (Y))))
-#define dngettext(dom, X, Y, N) \
-	((dom), (char*) (((N) == 1) ? ((void)(Y), (X)) : ((void)(X), (Y))))
-#define dcngettext(dom, X, Y, N, cat) \
-	((dom), (cat), (char*) (((N) == 1) ? ((void)(Y), (X)) : ((void)(X), (Y))))
-#define bindtextdomain(X, Y) ((void)(X), (void)(Y), (char*) "/")
-#define bind_textdomain_codeset(dom, codeset) \
-	((void)(dom), (void)(codeset), (char*) 0)
-#define textdomain(X) ((void)(X), (char*) "messages")
+ * add -DLIBINTL_NO_MACROS=1 to your CPPFLAGS.
+ *
+ * BBS: the previous implementation expanded to comma expressions like
+ *   #define dgettext(dom, X) ((void)(dom), (char*) (X))
+ * which silenced -Wunused-value on the unused dom argument but defeated
+ * -Wformat-security: GCC cannot see through a comma expression to
+ * decide that the result is still a string literal.  Route through
+ * static-inline stubs decorated with __attribute__((format_arg))
+ * instead:
+ *   - function-call semantics evaluate every argument, so side effects
+ *     in callers like dgettext(read_dom(), "fmt") are preserved;
+ *   - the inline body collapses to (char*) msgid at -O1+ for literal
+ *     msgid, so there is zero runtime overhead in the common case;
+ *   - format_arg tells -Wformat-security how to follow the call back
+ *     to the original literal.
+ * The static-inline stub names match the convention __libintl_stub_*.
+ * The user-visible macros shadow the extern declarations only at call
+ * sites (foo(...)), so taking &gettext still resolves to the extern
+ * function declared above.  */
+
+static inline __libintl_format_arg(1)
+char *__libintl_stub_gettext(const char *__msgid)
+{
+	return (char *) __msgid;
+}
+#define gettext(X) __libintl_stub_gettext(X)
+
+static inline __libintl_format_arg(2)
+char *__libintl_stub_dgettext(const char *__dom, const char *__msgid)
+{
+	(void) __dom;
+	return (char *) __msgid;
+}
+#define dgettext(dom, X) __libintl_stub_dgettext((dom), (X))
+
+static inline __libintl_format_arg(2)
+char *__libintl_stub_dcgettext(const char *__dom, const char *__msgid, int __cat)
+{
+	(void) __dom; (void) __cat;
+	return (char *) __msgid;
+}
+#define dcgettext(dom, X, cat) __libintl_stub_dcgettext((dom), (X), (cat))
+
+static inline __libintl_format_arg(1) __libintl_format_arg(2)
+char *__libintl_stub_ngettext(const char *__s1, const char *__s2, unsigned long __n)
+{
+	return (char *) ((__n == 1) ? __s1 : __s2);
+}
+#define ngettext(X, Y, N) __libintl_stub_ngettext((X), (Y), (N))
+
+static inline __libintl_format_arg(2) __libintl_format_arg(3)
+char *__libintl_stub_dngettext(const char *__dom, const char *__s1, const char *__s2, unsigned long __n)
+{
+	(void) __dom;
+	return (char *) ((__n == 1) ? __s1 : __s2);
+}
+#define dngettext(dom, X, Y, N) __libintl_stub_dngettext((dom), (X), (Y), (N))
+
+static inline __libintl_format_arg(2) __libintl_format_arg(3)
+char *__libintl_stub_dcngettext(const char *__dom, const char *__s1, const char *__s2, unsigned long __n, int __cat)
+{
+	(void) __dom; (void) __cat;
+	return (char *) ((__n == 1) ? __s1 : __s2);
+}
+#define dcngettext(dom, X, Y, N, cat) __libintl_stub_dcngettext((dom), (X), (Y), (N), (cat))
+
+static inline char *__libintl_stub_bindtextdomain(const char *__dom, const char *__dir)
+{
+	(void) __dom; (void) __dir;
+	return (char *) "/";
+}
+#define bindtextdomain(X, Y) __libintl_stub_bindtextdomain((X), (Y))
+
+static inline char *__libintl_stub_bind_textdomain_codeset(const char *__dom, const char *__codeset)
+{
+	(void) __dom; (void) __codeset;
+	return (char *) 0;
+}
+#define bind_textdomain_codeset(dom, codeset) __libintl_stub_bind_textdomain_codeset((dom), (codeset))
+
+static inline char *__libintl_stub_textdomain(const char *__dom)
+{
+	(void) __dom;
+	return (char *) "messages";
+}
+#define textdomain(X) __libintl_stub_textdomain(X)
 
 #undef ENABLE_NLS
 #undef DISABLE_NLS
 #define DISABLE_NLS 1
 
-#if __GNUC__ +0 > 3
-/* most ppl call bindtextdomain() without using its return value
-   thus we get tons of warnings about "statement with no effect" */
-#pragma GCC diagnostic ignored "-Wunused-value"
-#endif
+/* BBS: the previous version had
+ *   #pragma GCC diagnostic ignored "-Wunused-value"
+ * here because the comma-expression macros produced a (char*)"..."
+ * trailing rvalue that callers discarded as a statement.  With
+ * function-call semantics (static-inline stubs) the discarded return
+ * value is a regular function call statement, not an unused expression
+ * value, so -Wunused-value never fires and the pragma is no longer
+ * needed.  */
 
 #endif