123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612 |
- /* msgfmt utility (C) 2012 rofl0r
- * released under the MIT license, see LICENSE for details */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include <assert.h>
- #include "poparser.h"
- // in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr.
- // TODO: maybe make it write "" instead of echoing the msgid.
- //#define DO_NOTHING
- __attribute__((noreturn))
- static void syntax(void) {
- fprintf(stdout,
- "Usage: msgfmt [OPTION] filename.po ...\n");
- exit(1);
- }
- __attribute__((noreturn))
- static void version(void) {
- fprintf(stdout,
- "these are not (GNU gettext-tools) 99.9999.9999\n");
- exit(0);
- }
- #define streq(A, B) (!strcmp(A, B))
- #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
- struct mo_hdr {
- unsigned magic;
- int rev;
- unsigned numstring;
- unsigned off_tbl_org;
- unsigned off_tbl_trans;
- unsigned hash_tbl_size;
- unsigned off_tbl_hash;
- };
- /* file layout:
- header
- strtable (lenghts/offsets)
- transtable (lenghts/offsets)
- [hashtable]
- strings section
- translations section */
- const struct mo_hdr def_hdr = {
- 0x950412de,
- 0,
- 0,
- sizeof(struct mo_hdr),
- 0,
- 0,
- 0,
- };
- // pass 0: collect numbers of strings, calculate size and offsets for tables
- // print header
- // pass 1: create in-memory string tables
- enum passes {
- pass_first = 0,
- pass_collect_sizes = pass_first,
- pass_second,
- pass_max,
- };
- struct strtbl {
- unsigned len, off;
- };
- struct strmap {
- struct strtbl str, *trans;
- };
- struct callbackdata {
- enum passes pass;
- unsigned off;
- FILE* out;
- unsigned msgidbuf1_len;
- unsigned msgidbuf2_len;
- unsigned pluralbuf1_len;
- unsigned pluralbuf2_len;
- unsigned ctxtbuf_len;
- unsigned msgstr1_len;
- unsigned msgstr2_len;
- unsigned pluralstr_count;
- unsigned string_maxlen;
- char* msgidbuf1;
- char* msgidbuf2;
- char* pluralbuf1;
- char* pluralbuf2;
- char* msgctxtbuf;
- char* msgstrbuf1;
- char* msgstrbuf2;
- unsigned priv_type;
- unsigned priv_len;
- unsigned num[pe_maxstr];
- unsigned len[pe_maxstr];
- struct strmap *strlist;
- struct strtbl *translist;
- char *strbuffer[pe_maxstr];
- unsigned stroff[pe_maxstr];
- unsigned curr[pe_maxstr];
- };
- static struct callbackdata *cb_for_qsort;
- int strmap_comp(const void *a_, const void *b_) {
- const struct strmap *a = a_, *b = b_;
- return strcmp(cb_for_qsort->strbuffer[0] + a->str.off, cb_for_qsort->strbuffer[0] + b->str.off);
- }
- enum sysdep_types {
- st_priu32 = 0,
- st_priu64,
- st_priumax,
- st_max
- };
- static const char sysdep_str[][10]={
- [st_priu32] = "\x08<PRIu32>",
- [st_priu64] = "\x08<PRIu64>",
- [st_priumax] = "\x09<PRIuMAX>",
- };
- static const char sysdep_repl[][8]={
- [st_priu32] = "\x02lu\0u",
- [st_priu64] = "\x02lu\0llu",
- [st_priumax] = "\x01ju"
- };
- static const char *get_repl(enum sysdep_types type, unsigned nr) {
- assert(nr < (unsigned)sysdep_repl[type][0]);
- const char* p = sysdep_repl[type]+1;
- while(nr--) p+=strlen(p)+1;
- return p;
- }
- static void replace(char* text, unsigned textlen, const char* what, const char * with) {
- char*p = text;
- size_t la = strlen(what), li=strlen(with);
- assert(la >= li);
- for(p=text;textlen >= la;) {
- if(!memcmp(p,what,la)) {
- memcpy(p, with, li);
- textlen -= la;
- memmove(p+li,p+la,textlen+1);
- p+=li;
- } else {
- p++;
- textlen--;
- }
- }
- }
- static unsigned get_form(enum sysdep_types type, unsigned no, unsigned occurences[st_max]) {
- unsigned i,divisor = 1;
- for(i=type+1;i<st_max;i++) if(occurences[i]) divisor *= sysdep_repl[i][0];
- return (no/divisor)%sysdep_repl[type][0];
- }
- static char** sysdep_transform(const char* text, unsigned textlen, unsigned *len, unsigned *count, int simulate) {
- unsigned occurences[st_max] = {0};
- const char *p=text,*o;
- unsigned i,j, l = textlen;
- while(l && (o=strchr(p, '<'))) {
- l-=o-p;p=o;
- unsigned f = 0;
- for(i=0;i<st_max;i++)
- if(l>=(unsigned)sysdep_str[i][0] && !memcmp(p,sysdep_str[i]+1,sysdep_str[i][0])) {
- occurences[i]++;
- f=1;
- p+=sysdep_str[i][0];
- l-=sysdep_str[i][0];
- break;
- }
- if(!f) p++,l--;
- }
- *count = 1;
- for(i=0;i<st_max;i++) if(occurences[i]) *count *= sysdep_repl[i][0];
- l = textlen * *count;
- for(i=0;i<*count;i++) for(j=0;j<st_max;j++)
- if(occurences[j]) l-= occurences[j] * (sysdep_str[j][0] - strlen(get_repl(j, get_form(j, i, occurences))));
- *len = l+*count-1;
- char **out = 0;
- if(!simulate) {
- out = malloc((sizeof(char*)+textlen+1) * *count);
- assert(out);
- char *p = (void*)(out+*count);
- for(i=0;i<*count;i++) {
- out[i]=p;
- memcpy(p, text, textlen+1);
- p+=textlen+1;
- }
- for(i=0;i<*count;i++) for(j=0;j<st_max;j++)
- if(occurences[j])
- replace(out[i], textlen, sysdep_str[j]+1, get_repl(j, get_form(j, i, occurences)));
- }
- return out;
- }
- static inline void writemsg(struct callbackdata *d) {
- if(d->msgidbuf1_len != 0) {
- if(!d->strlist[d->curr[pe_msgid]].str.off)
- d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid];
- if(d->ctxtbuf_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len;
- d->stroff[pe_msgid]+=d->ctxtbuf_len;
- }
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf1, d->msgidbuf1_len);
- d->stroff[pe_msgid]+=d->msgidbuf1_len;
- d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf1_len-1;
- if(d->pluralbuf1_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf1, d->pluralbuf1_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf1_len;
- d->stroff[pe_msgid]+=d->pluralbuf1_len;
- }
- d->curr[pe_msgid]++;
- }
- if(d->msgidbuf2_len != 0) {
- if(!d->strlist[d->curr[pe_msgid]].str.off)
- d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid];
- if(d->ctxtbuf_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len;
- d->stroff[pe_msgid]+=d->ctxtbuf_len;
- }
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf2, d->msgidbuf2_len);
- d->stroff[pe_msgid]+=d->msgidbuf2_len;
- d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf2_len-1;
- if(d->pluralbuf2_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf2, d->pluralbuf2_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf2_len;
- d->stroff[pe_msgid]+=d->pluralbuf2_len;
- }
- d->curr[pe_msgid]++;
- }
- d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=0;
- }
- static inline void writestr(struct callbackdata *d, struct po_info *info) {
- // msgid xx; msgstr ""; is widely happened, it's invalid
- // https://github.com/sabotage-linux/gettext-tiny/issues/1
- // no invalid, when empty, check d->num[pe_msgid]
- if(!d->pluralstr_count && d->num[pe_msgid] > 0) {
- d->len[pe_msgid]-=d->msgidbuf1_len;
- d->len[pe_msgid]-=d->msgidbuf2_len;
- d->len[pe_plural]-=d->pluralbuf1_len;
- d->len[pe_plural]-=d->pluralbuf2_len;
- d->len[pe_ctxt]-=d->ctxtbuf_len;
- d->len[pe_msgstr]--;
- d->num[pe_msgid]--;
- d->num[pe_msgstr]--;
- d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0;
- return;
- }
- if(d->pluralstr_count && d->pluralstr_count <= info->nplurals) {
- writemsg(d);
- // plural <= nplurals is allowed
- d->translist[d->curr[pe_msgstr]].len=d->msgstr1_len-1;
- d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
- d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
- memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf1, d->msgstr1_len);
- d->stroff[pe_msgstr]+=d->msgstr1_len;
- d->curr[pe_msgstr]++;
- if(d->msgstr2_len) {
- d->translist[d->curr[pe_msgstr]].len=d->msgstr2_len-1;
- d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
- d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
- memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf2, d->msgstr2_len);
- d->stroff[pe_msgstr]+=d->msgstr2_len;
- d->curr[pe_msgstr]++;
- }
- d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0;
- }
- }
- int process_line_callback(struct po_info* info, void* user) {
- struct callbackdata *d = (struct callbackdata *) user;
- assert(info->type == pe_msgid || info->type == pe_ctxt || info->type == pe_msgstr || info->type == pe_plural);
- char **sysdeps;
- unsigned len, count, i, l;
- switch(d->pass) {
- case pass_collect_sizes:
- sysdep_transform(info->text, info->textlen, &len, &count, 1);
- d->num[info->type] += count;
- if(info->type == pe_msgid && count == 2 && d->priv_type == pe_ctxt) {
- // ctxt meets msgid with sysdeps, multiply num and len to suit it
- d->len[pe_ctxt] += d->priv_len +1;
- d->num[pe_ctxt]++;
- }
- if(count != 1 && info->type == pe_ctxt) {
- // except msgid, str, plural, all other types should not have sysdeps
- abort();
- }
- d->priv_type = info->type;
- d->priv_len = len;
- d->len[info->type] += len +1;
- if(len+1 > d->string_maxlen)
- d->string_maxlen = len+1;
- break;
- case pass_second:
- sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0);
- for(i=0;i<count;i++) {
- l = strlen(sysdeps[i]);
- assert(l+1 <= d->string_maxlen);
- if(info->type == pe_msgid) {
- if(i==0 && d->msgidbuf1_len)
- writestr(d, info);
- // just copy, it's written down when writemsg()
- if(i==0) {
- memcpy(d->msgidbuf1, sysdeps[i], l+1);
- d->msgidbuf1_len = l+1;
- } else {
- memcpy(d->msgidbuf2, sysdeps[i], l+1);
- d->msgidbuf2_len = l+1;
- }
- } else if(info->type == pe_plural) {
- if(i==0) {
- memcpy(d->pluralbuf1, sysdeps[i], l+1);
- d->pluralbuf1_len = l+1;
- } else {
- memcpy(d->pluralbuf2, sysdeps[i], l+1);
- d->pluralbuf2_len = l+1;
- }
- } else if(info->type == pe_ctxt) {
- writestr(d, info);
- d->ctxtbuf_len = l+1;
- memcpy(d->msgctxtbuf, sysdeps[i], l);
- d->msgctxtbuf[l] = 0x4;//EOT
- } else {
- // just copy, it's written down when writestr()
- if(l) {
- if(i==0) {
- memcpy(&d->msgstrbuf1[d->msgstr1_len], sysdeps[i], l+1);
- d->msgstr1_len += l+1;
- d->pluralstr_count++;
- } else {
- // sysdeps exist
- memcpy(&d->msgstrbuf2[d->msgstr2_len], sysdeps[i], l+1);
- d->msgstr2_len += l+1;
- }
- }
- }
- }
- free(sysdeps);
- break;
- default:
- abort();
- }
- return 0;
- }
- int process(FILE *in, FILE *out) {
- struct mo_hdr mohdr = def_hdr;
- char line[4096]; char *lp;
- char convbuf[16384];
- struct callbackdata d = {
- .num = {
- [pe_msgid] = 0,
- [pe_msgstr] = 0,
- [pe_plural] = 0,
- [pe_ctxt] = 0,
- },
- .len = {
- [pe_msgid] = 0,
- [pe_msgstr] = 0,
- [pe_plural] = 0,
- [pe_ctxt] = 0,
- },
- .off = 0,
- .out = out,
- .pass = pass_first,
- .ctxtbuf_len = 0,
- .pluralbuf1_len = 0,
- .pluralbuf2_len = 0,
- .msgidbuf1_len = 0,
- .msgidbuf2_len = 0,
- .msgstr1_len = 0,
- .msgstr2_len = 0,
- .pluralstr_count = 0,
- .string_maxlen = 0,
- };
- struct po_parser pb, *p = &pb;
- mohdr.off_tbl_trans = mohdr.off_tbl_org;
- for(d.pass = pass_first; d.pass <= pass_second; d.pass++) {
- if(d.pass == pass_second) {
- // start of second pass:
- // ensure we dont output when there's no strings at all
- if(d.num[pe_msgid] == 0) {
- return 1;
- }
- // check that data gathered in first pass is consistent
- if((d.num[pe_msgstr] < d.num[pe_msgid]) || (d.num[pe_msgstr] > (d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)))) {
- // one should actually abort here,
- // but gnu gettext simply writes an empty .mo and returns success.
- //abort();
- fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n");
- d.num[pe_msgid] = 0;
- return 0;
- }
- d.msgidbuf1 = calloc(d.string_maxlen*5+2*d.string_maxlen*p->info.nplurals, 1);
- d.msgidbuf2 = d.msgidbuf1 + d.string_maxlen;
- d.pluralbuf1 = d.msgidbuf2 + d.string_maxlen;
- d.pluralbuf2 = d.pluralbuf1 + d.string_maxlen;
- d.msgctxtbuf = d.pluralbuf2 + d.string_maxlen;
- d.msgstrbuf1 = d.msgctxtbuf + d.string_maxlen;
- d.msgstrbuf2 = d.msgstrbuf1 + d.string_maxlen*p->info.nplurals;
- d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1);
- d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1);
- d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1);
- d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1);
- d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0;
- assert(d.msgidbuf1 && d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]);
- }
- poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d);
- while((lp = fgets(line, sizeof(line), in))) {
- poparser_feed_line(p, lp, sizeof(line));
- }
- poparser_finish(p);
- if(d.pass == pass_second)
- writestr(&d, &p->info);
- if(d.pass == pass_second) {
- // calculate header fields from len and num arrays
- mohdr.numstring = d.num[pe_msgid];
- mohdr.off_tbl_org = sizeof(struct mo_hdr);
- mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2);
- // set offset startvalue
- d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2);
- }
- fseek(in, 0, SEEK_SET);
- }
- cb_for_qsort = &d;
- qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp);
- unsigned i;
- // print header
- fwrite(&mohdr, sizeof(mohdr), 1, out);
- for(i = 0; i < d.num[pe_msgid]; i++) {
- d.strlist[i].str.off += d.off;
- fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out);
- }
- for(i = 0; i < d.num[pe_msgid]; i++) {
- d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural] + d.len[pe_ctxt];
- fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out);
- }
- fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1, d.out);
- fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out);
- return 0;
- }
- void set_file(int out, char* fn, FILE** dest) {
- if(streq(fn, "-")) {
- if(out) {
- *dest = stdout;
- } else {
- char b[4096];
- size_t n=0;
- FILE* tmpf = tmpfile();
- if(!tmpf)
- perror("tmpfile");
- while((n=fread(b, sizeof(*b), sizeof(b), stdin)) > 0)
- fwrite(b, sizeof(*b), n, tmpf);
- fseek(tmpf, 0, SEEK_SET);
- *dest = tmpf;
- }
- } else {
- *dest = fopen(fn, out ? "w" : "r");
- }
- if(!*dest) {
- perror("fopen");
- exit(1);
- }
- }
- int main(int argc, char**argv) {
- if(argc == 1) syntax();
- int arg = 1;
- FILE *out = NULL;
- FILE *in = NULL;
- int expect_in_fn = 1;
- char* locale = NULL;
- char* dest = NULL;
- #define A argv[arg]
- for(; arg < argc; arg++) {
- if(A[0] == '-') {
- if(A[1] == '-') {
- if(
- streq(A+2, "java") ||
- streq(A+2, "java2") ||
- streq(A+2, "csharp") ||
- streq(A+2, "csharp-resources") ||
- streq(A+2, "tcl") ||
- streq(A+2, "qt") ||
- streq(A+2, "strict") ||
- streq(A+2, "properties-input") ||
- streq(A+2, "stringtable-input") ||
- streq(A+2, "use-fuzzy") ||
- strstarts(A+2, "alignment=") ||
- streq(A+2, "check") ||
- streq(A+2, "check-format") ||
- streq(A+2, "check-header") ||
- streq(A+2, "check-domain") ||
- streq(A+2, "check-compatibility") ||
- streq(A+2, "check-accelerators") ||
- streq(A+2, "no-hash") ||
- streq(A+2, "verbose") ||
- streq(A+2, "statistics") ||
- strstarts(A+2, "check-accelerators=") ||
- strstarts(A+2, "resource=")
- ) {
- } else if((dest = strstarts(A+2, "locale="))) {
- locale = dest;
- } else if((dest = strstarts(A+2, "output-file="))) {
- set_file(1, dest, &out);
- } else if(streq(A+2, "version")) {
- version();
- } else if(streq(A+2, "help")) {
- syntax();
- } else if (expect_in_fn) {
- set_file(0, A, &in);
- expect_in_fn = 0;
- }
- } else if(streq(A + 1, "o")) {
- arg++;
- dest = A;
- set_file(1, A, &out);
- } else if(
- streq(A+1, "j") ||
- streq(A+1, "r") ||
- streq(A+1, "P") ||
- streq(A+1, "f") ||
- streq(A+1, "a") ||
- streq(A+1, "c") ||
- streq(A+1, "v") ||
- streq(A+1, "C")
- ) {
- } else if (streq(A+1, "V")) {
- version();
- } else if (streq(A+1, "h")) {
- syntax();
- } else if (streq(A+1, "l")) {
- arg++;
- locale = A;
- } else if (streq(A+1, "d")) {
- arg++;
- dest = A;
- } else if (expect_in_fn) {
- set_file(0, A, &in);
- expect_in_fn = 0;
- }
- } else if (expect_in_fn) {
- set_file(0, A, &in);
- expect_in_fn = 0;
- }
- }
- if (locale != NULL && dest != NULL) {
- int sz = snprintf(NULL, 0, "%s/%s.msg", dest, locale);
- char msg[sz+1];
- snprintf(msg, sizeof(msg), "%s/%s.msg", dest, locale);
- FILE *fp = fopen(msg, "w");
- if (fp) {
- fclose(fp);
- return 0;
- } else return 1;
- }
- if(out == NULL) {
- dest = "messages.mo";
- set_file(1, "messages.mo", &out);
- }
- if(in == NULL || out == NULL) {
- return 1;
- }
- int ret = process(in, out);
- fflush(in); fflush(out);
- if(in != stdin) fclose(in);
- if(out != stdout) fclose(out);
- if (ret == 1) {
- return remove(dest);
- }
- return ret;
- }
|