_memcpy_fwd.c 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
  2. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  3. Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
  4. static void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len)
  5. {
  6. op_t a0, a1;
  7. switch (len % 8)
  8. {
  9. case 2:
  10. a0 = ((op_t *) srcp)[0];
  11. srcp -= 6 * OPSIZ;
  12. dstp -= 7 * OPSIZ;
  13. len += 6;
  14. goto do1;
  15. case 3:
  16. a1 = ((op_t *) srcp)[0];
  17. srcp -= 5 * OPSIZ;
  18. dstp -= 6 * OPSIZ;
  19. len += 5;
  20. goto do2;
  21. case 4:
  22. a0 = ((op_t *) srcp)[0];
  23. srcp -= 4 * OPSIZ;
  24. dstp -= 5 * OPSIZ;
  25. len += 4;
  26. goto do3;
  27. case 5:
  28. a1 = ((op_t *) srcp)[0];
  29. srcp -= 3 * OPSIZ;
  30. dstp -= 4 * OPSIZ;
  31. len += 3;
  32. goto do4;
  33. case 6:
  34. a0 = ((op_t *) srcp)[0];
  35. srcp -= 2 * OPSIZ;
  36. dstp -= 3 * OPSIZ;
  37. len += 2;
  38. goto do5;
  39. case 7:
  40. a1 = ((op_t *) srcp)[0];
  41. srcp -= 1 * OPSIZ;
  42. dstp -= 2 * OPSIZ;
  43. len += 1;
  44. goto do6;
  45. case 0:
  46. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  47. return;
  48. a0 = ((op_t *) srcp)[0];
  49. srcp -= 0 * OPSIZ;
  50. dstp -= 1 * OPSIZ;
  51. goto do7;
  52. case 1:
  53. a1 = ((op_t *) srcp)[0];
  54. srcp -=-1 * OPSIZ;
  55. dstp -= 0 * OPSIZ;
  56. len -= 1;
  57. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  58. goto do0;
  59. goto do8; /* No-op. */
  60. }
  61. do
  62. {
  63. do8:
  64. a0 = ((op_t *) srcp)[0];
  65. ((op_t *) dstp)[0] = a1;
  66. do7:
  67. a1 = ((op_t *) srcp)[1];
  68. ((op_t *) dstp)[1] = a0;
  69. do6:
  70. a0 = ((op_t *) srcp)[2];
  71. ((op_t *) dstp)[2] = a1;
  72. do5:
  73. a1 = ((op_t *) srcp)[3];
  74. ((op_t *) dstp)[3] = a0;
  75. do4:
  76. a0 = ((op_t *) srcp)[4];
  77. ((op_t *) dstp)[4] = a1;
  78. do3:
  79. a1 = ((op_t *) srcp)[5];
  80. ((op_t *) dstp)[5] = a0;
  81. do2:
  82. a0 = ((op_t *) srcp)[6];
  83. ((op_t *) dstp)[6] = a1;
  84. do1:
  85. a1 = ((op_t *) srcp)[7];
  86. ((op_t *) dstp)[7] = a0;
  87. srcp += 8 * OPSIZ;
  88. dstp += 8 * OPSIZ;
  89. len -= 8;
  90. }
  91. while (len != 0);
  92. /* This is the right position for do0. Please don't move
  93. it into the loop. */
  94. do0:
  95. ((op_t *) dstp)[0] = a1;
  96. }
  97. /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
  98. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  99. DSTP should be aligned for memory operations on `op_t's, but SRCP must
  100. *not* be aligned. */
  101. static void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len)
  102. {
  103. op_t a0, a1, a2, a3;
  104. int sh_1, sh_2;
  105. /* Calculate how to shift a word read at the memory operation
  106. aligned srcp to make it aligned for copy. */
  107. sh_1 = 8 * (srcp % OPSIZ);
  108. sh_2 = 8 * OPSIZ - sh_1;
  109. /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
  110. it points in the middle of. */
  111. srcp &= -OPSIZ;
  112. switch (len % 4)
  113. {
  114. case 2:
  115. a1 = ((op_t *) srcp)[0];
  116. a2 = ((op_t *) srcp)[1];
  117. srcp -= 1 * OPSIZ;
  118. dstp -= 3 * OPSIZ;
  119. len += 2;
  120. goto do1;
  121. case 3:
  122. a0 = ((op_t *) srcp)[0];
  123. a1 = ((op_t *) srcp)[1];
  124. srcp -= 0 * OPSIZ;
  125. dstp -= 2 * OPSIZ;
  126. len += 1;
  127. goto do2;
  128. case 0:
  129. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  130. return;
  131. a3 = ((op_t *) srcp)[0];
  132. a0 = ((op_t *) srcp)[1];
  133. srcp -=-1 * OPSIZ;
  134. dstp -= 1 * OPSIZ;
  135. len += 0;
  136. goto do3;
  137. case 1:
  138. a2 = ((op_t *) srcp)[0];
  139. a3 = ((op_t *) srcp)[1];
  140. srcp -=-2 * OPSIZ;
  141. dstp -= 0 * OPSIZ;
  142. len -= 1;
  143. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  144. goto do0;
  145. goto do4; /* No-op. */
  146. }
  147. do
  148. {
  149. do4:
  150. a0 = ((op_t *) srcp)[0];
  151. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  152. do3:
  153. a1 = ((op_t *) srcp)[1];
  154. ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
  155. do2:
  156. a2 = ((op_t *) srcp)[2];
  157. ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
  158. do1:
  159. a3 = ((op_t *) srcp)[3];
  160. ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
  161. srcp += 4 * OPSIZ;
  162. dstp += 4 * OPSIZ;
  163. len -= 4;
  164. }
  165. while (len != 0);
  166. /* This is the right position for do0. Please don't move
  167. it into the loop. */
  168. do0:
  169. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  170. }