memcmp.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /* Copyright (C) 1991,1993,1995,1997,1998,2003,2004
  2. Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. #include <string.h>
  18. #undef memcmp
  19. #include "memcopy.h"
  20. #include <endian.h>
  21. #if __BYTE_ORDER == __BIG_ENDIAN
  22. # define WORDS_BIGENDIAN
  23. #endif
  24. #ifdef WORDS_BIGENDIAN
  25. # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
  26. #else
  27. # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b))
  28. #endif
  29. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
  30. /* The strategy of this memcmp is:
  31. 1. Compare bytes until one of the block pointers is aligned.
  32. 2. Compare using memcmp_common_alignment or
  33. memcmp_not_common_alignment, regarding the alignment of the other
  34. block after the initial byte operations. The maximum number of
  35. full words (of type op_t) are compared in this way.
  36. 3. Compare the few remaining bytes. */
  37. #ifndef WORDS_BIGENDIAN
  38. /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
  39. A and B are known to be different.
  40. This is needed only on little-endian machines. */
  41. static int memcmp_bytes __P((op_t, op_t));
  42. # ifdef __GNUC__
  43. __inline
  44. # endif
  45. static int
  46. memcmp_bytes (a, b)
  47. op_t a, b;
  48. {
  49. long int srcp1 = (long int) &a;
  50. long int srcp2 = (long int) &b;
  51. op_t a0, b0;
  52. do
  53. {
  54. a0 = ((byte *) srcp1)[0];
  55. b0 = ((byte *) srcp2)[0];
  56. srcp1 += 1;
  57. srcp2 += 1;
  58. }
  59. while (a0 == b0);
  60. return a0 - b0;
  61. }
  62. #endif
  63. static int memcmp_common_alignment __P((long, long, size_t));
  64. /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
  65. objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
  66. memory operations on `op_t's. */
  67. static int
  68. memcmp_common_alignment (srcp1, srcp2, len)
  69. long int srcp1;
  70. long int srcp2;
  71. size_t len;
  72. {
  73. op_t a0, a1;
  74. op_t b0, b1;
  75. switch (len % 4)
  76. {
  77. default: /* Avoid warning about uninitialized local variables. */
  78. case 2:
  79. a0 = ((op_t *) srcp1)[0];
  80. b0 = ((op_t *) srcp2)[0];
  81. srcp1 -= 2 * OPSIZ;
  82. srcp2 -= 2 * OPSIZ;
  83. len += 2;
  84. goto do1;
  85. case 3:
  86. a1 = ((op_t *) srcp1)[0];
  87. b1 = ((op_t *) srcp2)[0];
  88. srcp1 -= OPSIZ;
  89. srcp2 -= OPSIZ;
  90. len += 1;
  91. goto do2;
  92. case 0:
  93. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  94. return 0;
  95. a0 = ((op_t *) srcp1)[0];
  96. b0 = ((op_t *) srcp2)[0];
  97. goto do3;
  98. case 1:
  99. a1 = ((op_t *) srcp1)[0];
  100. b1 = ((op_t *) srcp2)[0];
  101. srcp1 += OPSIZ;
  102. srcp2 += OPSIZ;
  103. len -= 1;
  104. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  105. goto do0;
  106. /* Fall through. */
  107. }
  108. do
  109. {
  110. a0 = ((op_t *) srcp1)[0];
  111. b0 = ((op_t *) srcp2)[0];
  112. if (a1 != b1)
  113. return CMP_LT_OR_GT (a1, b1);
  114. do3:
  115. a1 = ((op_t *) srcp1)[1];
  116. b1 = ((op_t *) srcp2)[1];
  117. if (a0 != b0)
  118. return CMP_LT_OR_GT (a0, b0);
  119. do2:
  120. a0 = ((op_t *) srcp1)[2];
  121. b0 = ((op_t *) srcp2)[2];
  122. if (a1 != b1)
  123. return CMP_LT_OR_GT (a1, b1);
  124. do1:
  125. a1 = ((op_t *) srcp1)[3];
  126. b1 = ((op_t *) srcp2)[3];
  127. if (a0 != b0)
  128. return CMP_LT_OR_GT (a0, b0);
  129. srcp1 += 4 * OPSIZ;
  130. srcp2 += 4 * OPSIZ;
  131. len -= 4;
  132. }
  133. while (len != 0);
  134. /* This is the right position for do0. Please don't move
  135. it into the loop. */
  136. do0:
  137. if (a1 != b1)
  138. return CMP_LT_OR_GT (a1, b1);
  139. return 0;
  140. }
  141. static int memcmp_not_common_alignment __P((long, long, size_t));
  142. /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
  143. `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
  144. operations on `op_t', but SRCP1 *should be unaligned*. */
  145. static int
  146. memcmp_not_common_alignment (srcp1, srcp2, len)
  147. long int srcp1;
  148. long int srcp2;
  149. size_t len;
  150. {
  151. op_t a0, a1, a2, a3;
  152. op_t b0, b1, b2, b3;
  153. op_t x;
  154. int shl, shr;
  155. /* Calculate how to shift a word read at the memory operation
  156. aligned srcp1 to make it aligned for comparison. */
  157. shl = 8 * (srcp1 % OPSIZ);
  158. shr = 8 * OPSIZ - shl;
  159. /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
  160. it points in the middle of. */
  161. srcp1 &= -OPSIZ;
  162. switch (len % 4)
  163. {
  164. default: /* Avoid warning about uninitialized local variables. */
  165. case 2:
  166. a1 = ((op_t *) srcp1)[0];
  167. a2 = ((op_t *) srcp1)[1];
  168. b2 = ((op_t *) srcp2)[0];
  169. srcp1 -= 1 * OPSIZ;
  170. srcp2 -= 2 * OPSIZ;
  171. len += 2;
  172. goto do1;
  173. case 3:
  174. a0 = ((op_t *) srcp1)[0];
  175. a1 = ((op_t *) srcp1)[1];
  176. b1 = ((op_t *) srcp2)[0];
  177. srcp2 -= 1 * OPSIZ;
  178. len += 1;
  179. goto do2;
  180. case 0:
  181. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  182. return 0;
  183. a3 = ((op_t *) srcp1)[0];
  184. a0 = ((op_t *) srcp1)[1];
  185. b0 = ((op_t *) srcp2)[0];
  186. srcp1 += 1 * OPSIZ;
  187. goto do3;
  188. case 1:
  189. a2 = ((op_t *) srcp1)[0];
  190. a3 = ((op_t *) srcp1)[1];
  191. b3 = ((op_t *) srcp2)[0];
  192. srcp1 += 2 * OPSIZ;
  193. srcp2 += 1 * OPSIZ;
  194. len -= 1;
  195. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  196. goto do0;
  197. /* Fall through. */
  198. }
  199. do
  200. {
  201. a0 = ((op_t *) srcp1)[0];
  202. b0 = ((op_t *) srcp2)[0];
  203. x = MERGE(a2, shl, a3, shr);
  204. if (x != b3)
  205. return CMP_LT_OR_GT (x, b3);
  206. do3:
  207. a1 = ((op_t *) srcp1)[1];
  208. b1 = ((op_t *) srcp2)[1];
  209. x = MERGE(a3, shl, a0, shr);
  210. if (x != b0)
  211. return CMP_LT_OR_GT (x, b0);
  212. do2:
  213. a2 = ((op_t *) srcp1)[2];
  214. b2 = ((op_t *) srcp2)[2];
  215. x = MERGE(a0, shl, a1, shr);
  216. if (x != b1)
  217. return CMP_LT_OR_GT (x, b1);
  218. do1:
  219. a3 = ((op_t *) srcp1)[3];
  220. b3 = ((op_t *) srcp2)[3];
  221. x = MERGE(a1, shl, a2, shr);
  222. if (x != b2)
  223. return CMP_LT_OR_GT (x, b2);
  224. srcp1 += 4 * OPSIZ;
  225. srcp2 += 4 * OPSIZ;
  226. len -= 4;
  227. }
  228. while (len != 0);
  229. /* This is the right position for do0. Please don't move
  230. it into the loop. */
  231. do0:
  232. x = MERGE(a2, shl, a3, shr);
  233. if (x != b3)
  234. return CMP_LT_OR_GT (x, b3);
  235. return 0;
  236. }
  237. int
  238. attribute_hidden __memcmp (const __ptr_t s1, const __ptr_t s2, size_t len)
  239. {
  240. op_t a0;
  241. op_t b0;
  242. long int srcp1 = (long int) s1;
  243. long int srcp2 = (long int) s2;
  244. op_t res;
  245. if (len >= OP_T_THRES)
  246. {
  247. /* There are at least some bytes to compare. No need to test
  248. for LEN == 0 in this alignment loop. */
  249. while (srcp2 % OPSIZ != 0)
  250. {
  251. a0 = ((byte *) srcp1)[0];
  252. b0 = ((byte *) srcp2)[0];
  253. srcp1 += 1;
  254. srcp2 += 1;
  255. res = a0 - b0;
  256. if (res != 0)
  257. return res;
  258. len -= 1;
  259. }
  260. /* SRCP2 is now aligned for memory operations on `op_t'.
  261. SRCP1 alignment determines if we can do a simple,
  262. aligned compare or need to shuffle bits. */
  263. if (srcp1 % OPSIZ == 0)
  264. res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ);
  265. else
  266. res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
  267. if (res != 0)
  268. return res;
  269. /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
  270. srcp1 += len & -OPSIZ;
  271. srcp2 += len & -OPSIZ;
  272. len %= OPSIZ;
  273. }
  274. /* There are just a few bytes to compare. Use byte memory operations. */
  275. while (len != 0)
  276. {
  277. a0 = ((byte *) srcp1)[0];
  278. b0 = ((byte *) srcp2)[0];
  279. srcp1 += 1;
  280. srcp2 += 1;
  281. res = a0 - b0;
  282. if (res != 0)
  283. return res;
  284. len -= 1;
  285. }
  286. return 0;
  287. }
  288. strong_alias(__memcmp, memcmp)
  289. weak_alias(memcmp, bcmp)