memcmp.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. /* Copyright (C) 1991,1993,1995,1997,1998,2003,2004
  2. Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, see
  15. <http://www.gnu.org/licenses/>. */
  16. #include <string.h>
  17. #include "memcopy.h"
  18. #include <endian.h>
  19. #if __BYTE_ORDER == __BIG_ENDIAN
  20. # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
  21. #else
  22. # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b))
  23. #endif
  24. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
  25. /* The strategy of this memcmp is:
  26. 1. Compare bytes until one of the block pointers is aligned.
  27. 2. Compare using memcmp_common_alignment or
  28. memcmp_not_common_alignment, regarding the alignment of the other
  29. block after the initial byte operations. The maximum number of
  30. full words (of type op_t) are compared in this way.
  31. 3. Compare the few remaining bytes. */
  32. #if __BYTE_ORDER != __BIG_ENDIAN
  33. /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
  34. A and B are known to be different.
  35. This is needed only on little-endian machines. */
  36. static __inline__ int
  37. memcmp_bytes (op_t a, op_t b)
  38. {
  39. long int srcp1 = (long int) &a;
  40. long int srcp2 = (long int) &b;
  41. op_t a0, b0;
  42. do
  43. {
  44. a0 = ((byte *) srcp1)[0];
  45. b0 = ((byte *) srcp2)[0];
  46. srcp1 += 1;
  47. srcp2 += 1;
  48. }
  49. while (a0 == b0);
  50. return a0 - b0;
  51. }
  52. #endif
  53. /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
  54. objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
  55. memory operations on `op_t's. */
  56. static int
  57. memcmp_common_alignment (long int srcp1, long int srcp2, size_t len)
  58. {
  59. op_t a0, a1;
  60. op_t b0, b1;
  61. switch (len % 4)
  62. {
  63. default: /* Avoid warning about uninitialized local variables. */
  64. case 2:
  65. a0 = ((op_t *) srcp1)[0];
  66. b0 = ((op_t *) srcp2)[0];
  67. srcp1 -= 2 * OPSIZ;
  68. srcp2 -= 2 * OPSIZ;
  69. len += 2;
  70. goto do1;
  71. case 3:
  72. a1 = ((op_t *) srcp1)[0];
  73. b1 = ((op_t *) srcp2)[0];
  74. srcp1 -= OPSIZ;
  75. srcp2 -= OPSIZ;
  76. len += 1;
  77. goto do2;
  78. case 0:
  79. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  80. return 0;
  81. a0 = ((op_t *) srcp1)[0];
  82. b0 = ((op_t *) srcp2)[0];
  83. goto do3;
  84. case 1:
  85. a1 = ((op_t *) srcp1)[0];
  86. b1 = ((op_t *) srcp2)[0];
  87. srcp1 += OPSIZ;
  88. srcp2 += OPSIZ;
  89. len -= 1;
  90. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  91. goto do0;
  92. /* Fall through. */
  93. }
  94. do
  95. {
  96. a0 = ((op_t *) srcp1)[0];
  97. b0 = ((op_t *) srcp2)[0];
  98. if (a1 != b1)
  99. return CMP_LT_OR_GT (a1, b1);
  100. do3:
  101. a1 = ((op_t *) srcp1)[1];
  102. b1 = ((op_t *) srcp2)[1];
  103. if (a0 != b0)
  104. return CMP_LT_OR_GT (a0, b0);
  105. do2:
  106. a0 = ((op_t *) srcp1)[2];
  107. b0 = ((op_t *) srcp2)[2];
  108. if (a1 != b1)
  109. return CMP_LT_OR_GT (a1, b1);
  110. do1:
  111. a1 = ((op_t *) srcp1)[3];
  112. b1 = ((op_t *) srcp2)[3];
  113. if (a0 != b0)
  114. return CMP_LT_OR_GT (a0, b0);
  115. srcp1 += 4 * OPSIZ;
  116. srcp2 += 4 * OPSIZ;
  117. len -= 4;
  118. }
  119. while (len != 0);
  120. /* This is the right position for do0. Please don't move
  121. it into the loop. */
  122. do0:
  123. if (a1 != b1)
  124. return CMP_LT_OR_GT (a1, b1);
  125. return 0;
  126. }
  127. /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
  128. `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
  129. operations on `op_t', but SRCP1 *should be unaligned*. */
  130. static int
  131. memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len)
  132. {
  133. op_t a0, a1, a2, a3;
  134. op_t b0, b1, b2, b3;
  135. op_t x;
  136. int shl, shr;
  137. /* Calculate how to shift a word read at the memory operation
  138. aligned srcp1 to make it aligned for comparison. */
  139. shl = 8 * (srcp1 % OPSIZ);
  140. shr = 8 * OPSIZ - shl;
  141. /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
  142. it points in the middle of. */
  143. srcp1 &= -OPSIZ;
  144. switch (len % 4)
  145. {
  146. default: /* Avoid warning about uninitialized local variables. */
  147. case 2:
  148. a1 = ((op_t *) srcp1)[0];
  149. a2 = ((op_t *) srcp1)[1];
  150. b2 = ((op_t *) srcp2)[0];
  151. srcp1 -= 1 * OPSIZ;
  152. srcp2 -= 2 * OPSIZ;
  153. len += 2;
  154. goto do1;
  155. case 3:
  156. a0 = ((op_t *) srcp1)[0];
  157. a1 = ((op_t *) srcp1)[1];
  158. b1 = ((op_t *) srcp2)[0];
  159. srcp2 -= 1 * OPSIZ;
  160. len += 1;
  161. goto do2;
  162. case 0:
  163. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  164. return 0;
  165. a3 = ((op_t *) srcp1)[0];
  166. a0 = ((op_t *) srcp1)[1];
  167. b0 = ((op_t *) srcp2)[0];
  168. srcp1 += 1 * OPSIZ;
  169. goto do3;
  170. case 1:
  171. a2 = ((op_t *) srcp1)[0];
  172. a3 = ((op_t *) srcp1)[1];
  173. b3 = ((op_t *) srcp2)[0];
  174. srcp1 += 2 * OPSIZ;
  175. srcp2 += 1 * OPSIZ;
  176. len -= 1;
  177. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  178. goto do0;
  179. /* Fall through. */
  180. }
  181. do
  182. {
  183. a0 = ((op_t *) srcp1)[0];
  184. b0 = ((op_t *) srcp2)[0];
  185. x = MERGE(a2, shl, a3, shr);
  186. if (x != b3)
  187. return CMP_LT_OR_GT (x, b3);
  188. do3:
  189. a1 = ((op_t *) srcp1)[1];
  190. b1 = ((op_t *) srcp2)[1];
  191. x = MERGE(a3, shl, a0, shr);
  192. if (x != b0)
  193. return CMP_LT_OR_GT (x, b0);
  194. do2:
  195. a2 = ((op_t *) srcp1)[2];
  196. b2 = ((op_t *) srcp2)[2];
  197. x = MERGE(a0, shl, a1, shr);
  198. if (x != b1)
  199. return CMP_LT_OR_GT (x, b1);
  200. do1:
  201. a3 = ((op_t *) srcp1)[3];
  202. b3 = ((op_t *) srcp2)[3];
  203. x = MERGE(a1, shl, a2, shr);
  204. if (x != b2)
  205. return CMP_LT_OR_GT (x, b2);
  206. srcp1 += 4 * OPSIZ;
  207. srcp2 += 4 * OPSIZ;
  208. len -= 4;
  209. }
  210. while (len != 0);
  211. /* This is the right position for do0. Please don't move
  212. it into the loop. */
  213. do0:
  214. x = MERGE(a2, shl, a3, shr);
  215. if (x != b3)
  216. return CMP_LT_OR_GT (x, b3);
  217. return 0;
  218. }
  219. int
  220. memcmp (const __ptr_t s1, const __ptr_t s2, size_t len)
  221. {
  222. op_t a0;
  223. op_t b0;
  224. long int srcp1 = (long int) s1;
  225. long int srcp2 = (long int) s2;
  226. op_t res;
  227. if (len >= OP_T_THRES)
  228. {
  229. /* There are at least some bytes to compare. No need to test
  230. for LEN == 0 in this alignment loop. */
  231. while (srcp2 % OPSIZ != 0)
  232. {
  233. a0 = ((byte *) srcp1)[0];
  234. b0 = ((byte *) srcp2)[0];
  235. srcp1 += 1;
  236. srcp2 += 1;
  237. res = a0 - b0;
  238. if (res != 0)
  239. return res;
  240. len -= 1;
  241. }
  242. /* SRCP2 is now aligned for memory operations on `op_t'.
  243. SRCP1 alignment determines if we can do a simple,
  244. aligned compare or need to shuffle bits. */
  245. if (srcp1 % OPSIZ == 0)
  246. res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ);
  247. else
  248. res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
  249. if (res != 0)
  250. return res;
  251. /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
  252. srcp1 += len & -OPSIZ;
  253. srcp2 += len & -OPSIZ;
  254. len %= OPSIZ;
  255. }
  256. /* There are just a few bytes to compare. Use byte memory operations. */
  257. while (len != 0)
  258. {
  259. a0 = ((byte *) srcp1)[0];
  260. b0 = ((byte *) srcp2)[0];
  261. srcp1 += 1;
  262. srcp2 += 1;
  263. res = a0 - b0;
  264. if (res != 0)
  265. return res;
  266. len -= 1;
  267. }
  268. return 0;
  269. }
  270. libc_hidden_weak(memcmp)
  271. #ifdef __UCLIBC_SUSV3_LEGACY__
  272. strong_alias(memcmp,bcmp)
  273. #endif