poparser.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. #include <ctype.h>
  2. #include <assert.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <iconv.h>
  6. #include "poparser.h"
  7. #include "StringEscape.h"
  8. #define streq(A, B) (!strcmp(A, B))
  9. #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
  10. static unsigned fuzzymark = 0;
  11. static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) {
  12. enum po_entry result_type;
  13. char *x, *y;
  14. size_t start = (size_t) lp;
  15. while(isspace(*lp) && lp < end) lp++;
  16. if(lp[0] == '#') {
  17. char *s;
  18. if((s = strstr(lp, ", fuzzy"))) {
  19. if(fuzzymark != 0) fuzzymark++;
  20. else fuzzymark=2;
  21. }
  22. inv:
  23. *stringstart = 0;
  24. return pe_invalid;
  25. } else if((y = strstarts(lp, "msg"))) {
  26. if((x = strstarts(y, "id")) && isspace(*x))
  27. result_type = pe_msgid;
  28. else if ((x = strstarts(y, "id_plural")) && isspace(*x))
  29. result_type = pe_plural;
  30. else if ((x = strstarts(y, "ctxt")) && isspace(*x))
  31. result_type = pe_ctxt;
  32. else if ((x = strstarts(y, "str")) && (isspace(*x) ||
  33. (x[0] == '[' && (x[1]-'0') < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x))))
  34. result_type = pe_msgstr;
  35. else
  36. goto inv;
  37. while(isspace(*x) && x < end) x++;
  38. if(*x != '"') abort();
  39. conv:
  40. *stringstart = ((size_t) x - start) + 1;
  41. } else if(lp[0] == '"') {
  42. if(!(*info->charset)) {
  43. if((x = strstr(lp, "charset="))) {
  44. // charset=xxx\\n
  45. int len = strlen(x+=8) - 4;
  46. assert(len <= 11);
  47. if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) {
  48. memcpy(info->charset, x, len);
  49. info->charset[len] = 0;
  50. }
  51. }
  52. }
  53. if((x = strstr(lp, "nplurals=")))
  54. if(*(x+9) - '0')
  55. info->nplurals = *(x+9) - '0';
  56. result_type = pe_str;
  57. x = lp;
  58. goto conv;
  59. } else {
  60. goto inv;
  61. }
  62. return result_type;
  63. }
  64. /* expects a pointer to the first char after a opening " in a string,
  65. * converts the string into convbuf, and returns the length of that string */
  66. static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) {
  67. size_t result = 0;
  68. char* e = x + strlen(x);
  69. assert(e > x && e < end && *e == 0);
  70. e--;
  71. while(isspace(*e)) e--;
  72. if(*e != '"') abort();
  73. *e = 0;
  74. char *s;
  75. if(*info->charset) {
  76. iconv_t ret = iconv_open("UTF-8", info->charset);
  77. if(ret != (iconv_t)-1) {
  78. size_t a=end-x, b=a*4;
  79. char mid[b], *midp=mid;
  80. iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b);
  81. if((s = strstr(mid, "charset=")))
  82. memcpy(s+8, "UTF-8\\n\0", 8);
  83. result = unescape(mid, convbuf, convbuflen);
  84. // iconv doesnt recognize the encoding
  85. } else result = unescape(x, convbuf, convbuflen);
  86. } else result = unescape(x, convbuf, convbuflen);
  87. return result;
  88. }
  89. void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) {
  90. p->buf = workbuf;
  91. p->bufsize = bufsize;
  92. p->cb = cb;
  93. p->prev_type = pe_invalid;
  94. p->prev_rtype = pe_invalid;
  95. p->curr_len = 0;
  96. p->cbdata = cbdata;
  97. *(p->info.charset) = 0;
  98. // nplurals = 2 by default
  99. p->info.nplurals = 2;
  100. fuzzymark = 0;
  101. }
  102. enum lineactions {
  103. la_incr,
  104. la_proc,
  105. la_abort,
  106. la_nop,
  107. la_max,
  108. };
  109. /* return 0 on success */
  110. int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
  111. char *convbuf = p->buf;
  112. size_t convbuflen = p->bufsize;
  113. size_t strstart;
  114. static const enum lineactions action_tbl[pe_max][pe_max] = {
  115. // pe_str will never be set as curr_type
  116. [pe_str] = {
  117. [pe_str] = la_abort,
  118. [pe_msgid] = la_abort,
  119. [pe_ctxt] = la_abort,
  120. [pe_plural] = la_abort,
  121. [pe_msgstr] = la_abort,
  122. [pe_invalid] = la_abort,
  123. },
  124. [pe_msgid] = {
  125. [pe_str] = la_incr,
  126. [pe_msgid] = la_abort,
  127. [pe_ctxt] = la_abort,
  128. [pe_plural] = la_proc,
  129. [pe_msgstr] = la_proc,
  130. [pe_invalid] = la_proc,
  131. },
  132. [pe_ctxt] = {
  133. [pe_str] = la_incr,
  134. [pe_msgid] = la_proc,
  135. [pe_ctxt] = la_abort,
  136. [pe_plural] = la_abort,
  137. [pe_msgstr] = la_abort,
  138. [pe_invalid] = la_proc,
  139. },
  140. [pe_plural] = {
  141. [pe_str] = la_incr,
  142. [pe_msgid] = la_abort,
  143. [pe_ctxt] = la_abort,
  144. [pe_plural] = la_abort,
  145. [pe_msgstr] = la_proc,
  146. [pe_invalid] = la_proc,
  147. },
  148. [pe_msgstr] = {
  149. [pe_str] = la_incr,
  150. [pe_msgid] = la_proc,
  151. [pe_ctxt] = la_proc,
  152. [pe_plural] = la_abort,
  153. [pe_msgstr] = la_proc,
  154. [pe_invalid] = la_proc,
  155. },
  156. [pe_invalid] = {
  157. [pe_str] = la_nop,
  158. [pe_msgid] = la_incr,
  159. [pe_ctxt] = la_incr,
  160. [pe_plural] = la_nop,
  161. [pe_msgstr] = la_nop,
  162. [pe_invalid] = la_nop,
  163. },
  164. };
  165. enum po_entry type;
  166. type = get_type_and_start(&p->info, line, line + buflen, &strstart);
  167. if(p->prev_rtype != pe_invalid && action_tbl[p->prev_rtype][type] == la_abort)
  168. abort();
  169. if(type != pe_invalid && type != pe_str)
  170. p->prev_rtype = type;
  171. if(fuzzymark) {
  172. if(type == pe_ctxt && fuzzymark == 1) fuzzymark--;
  173. if(type == pe_msgid) fuzzymark--;
  174. if(fuzzymark > 0) return 0;
  175. }
  176. switch(action_tbl[p->prev_type][type]) {
  177. case la_incr:
  178. assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt);
  179. p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
  180. break;
  181. case la_proc:
  182. assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt);
  183. p->info.text = convbuf;
  184. p->info.textlen = p->curr_len;
  185. p->info.type = p->prev_type;
  186. p->cb(&p->info, p->cbdata);
  187. if(type != pe_invalid)
  188. p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen);
  189. else
  190. p->curr_len = 0;
  191. break;
  192. case la_nop:
  193. break;
  194. case la_abort:
  195. default:
  196. abort();
  197. // todo : return error code
  198. }
  199. if(type != pe_str) {
  200. p->prev_type = type;
  201. }
  202. return 0;
  203. }
  204. int poparser_finish(struct po_parser *p) {
  205. char empty[4] = "";
  206. return poparser_feed_line(p, empty, sizeof(empty));
  207. }