memset.S 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /* Set a block of memory to some byte value.
  2. For UltraSPARC.
  3. Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller (davem@caip.rutgers.edu) and
  6. Jakub Jelinek (jj@ultra.linux.cz).
  7. The GNU C Library is free software; you can redistribute it and/or
  8. modify it under the terms of the GNU Lesser General Public
  9. License as published by the Free Software Foundation; either
  10. version 2.1 of the License, or (at your option) any later version.
  11. The GNU C Library is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. Lesser General Public License for more details.
  15. You should have received a copy of the GNU Lesser General Public
  16. License along with the GNU C Library; if not, write to the Free
  17. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18. 02111-1307 USA. */
  19. #include <features.h>
  20. #include <asm/asi.h>
  21. #ifndef XCC
  22. #define XCC xcc
  23. #define USE_BPR
  24. #endif
  25. #define FPRS_FEF 4
  26. #define SET_BLOCKS(base, offset, source) \
  27. stx source, [base - offset - 0x18]; \
  28. stx source, [base - offset - 0x10]; \
  29. stx source, [base - offset - 0x08]; \
  30. stx source, [base - offset - 0x00];
  31. /* Well, memset is a lot easier to get right than bcopy... */
  32. .text
  33. .align 32
  34. ENTRY(memset)
  35. andcc %o1, 0xff, %o1
  36. mov %o0, %o5
  37. be,a,pt %icc, 50f
  38. #ifndef USE_BPR
  39. srl %o2, 0, %o1
  40. #else
  41. mov %o2, %o1
  42. #endif
  43. cmp %o2, 7
  44. #ifndef USE_BPR
  45. srl %o2, 0, %o2
  46. #endif
  47. bleu,pn %XCC, 17f
  48. andcc %o0, 3, %g5
  49. be,pt %xcc, 4f
  50. and %o1, 0xff, %o1
  51. cmp %g5, 3
  52. be,pn %xcc, 2f
  53. stb %o1, [%o0 + 0x00]
  54. cmp %g5, 2
  55. be,pt %xcc, 2f
  56. stb %o1, [%o0 + 0x01]
  57. stb %o1, [%o0 + 0x02]
  58. 2: sub %g5, 4, %g5
  59. sub %o0, %g5, %o0
  60. add %o2, %g5, %o2
  61. 4: sllx %o1, 8, %g1
  62. andcc %o0, 4, %g0
  63. or %o1, %g1, %o1
  64. sllx %o1, 16, %g1
  65. or %o1, %g1, %o1
  66. be,pt %xcc, 2f
  67. sllx %o1, 32, %g1
  68. stw %o1, [%o0]
  69. sub %o2, 4, %o2
  70. add %o0, 4, %o0
  71. 2: cmp %o2, 128
  72. or %o1, %g1, %o1
  73. blu,pn %xcc, 9f
  74. andcc %o0, 0x38, %g5
  75. be,pn %icc, 6f
  76. mov 64, %o4
  77. andcc %o0, 8, %g0
  78. be,pn %icc, 1f
  79. sub %o4, %g5, %o4
  80. stx %o1, [%o0]
  81. add %o0, 8, %o0
  82. 1: andcc %o4, 16, %g0
  83. be,pn %icc, 1f
  84. sub %o2, %o4, %o2
  85. stx %o1, [%o0]
  86. stx %o1, [%o0 + 8]
  87. add %o0, 16, %o0
  88. 1: andcc %o4, 32, %g0
  89. be,pn %icc, 7f
  90. andncc %o2, 0x3f, %o3
  91. stw %o1, [%o0]
  92. stw %o1, [%o0 + 4]
  93. stw %o1, [%o0 + 8]
  94. stw %o1, [%o0 + 12]
  95. stw %o1, [%o0 + 16]
  96. stw %o1, [%o0 + 20]
  97. stw %o1, [%o0 + 24]
  98. stw %o1, [%o0 + 28]
  99. add %o0, 32, %o0
  100. 7: be,pn %xcc, 9f
  101. nop
  102. ldd [%o0 - 8], %f0
  103. 18: wr %g0, ASI_BLK_P, %asi
  104. membar #StoreStore | #LoadStore
  105. andcc %o3, 0xc0, %g5
  106. and %o2, 0x3f, %o2
  107. fmovd %f0, %f2
  108. fmovd %f0, %f4
  109. andn %o3, 0xff, %o3
  110. fmovd %f0, %f6
  111. cmp %g5, 64
  112. fmovd %f0, %f8
  113. fmovd %f0, %f10
  114. fmovd %f0, %f12
  115. brz,pn %g5, 10f
  116. fmovd %f0, %f14
  117. be,pn %icc, 2f
  118. stda %f0, [%o0 + 0x00] %asi
  119. cmp %g5, 128
  120. be,pn %icc, 2f
  121. stda %f0, [%o0 + 0x40] %asi
  122. stda %f0, [%o0 + 0x80] %asi
  123. 2: brz,pn %o3, 12f
  124. add %o0, %g5, %o0
  125. 10: stda %f0, [%o0 + 0x00] %asi
  126. stda %f0, [%o0 + 0x40] %asi
  127. stda %f0, [%o0 + 0x80] %asi
  128. stda %f0, [%o0 + 0xc0] %asi
  129. 11: subcc %o3, 256, %o3
  130. bne,pt %xcc, 10b
  131. add %o0, 256, %o0
  132. 12: wr %g0, FPRS_FEF, %fprs
  133. membar #StoreLoad | #StoreStore
  134. 9: andcc %o2, 0x78, %g5
  135. be,pn %xcc, 13f
  136. andcc %o2, 7, %o2
  137. 14: rd %pc, %o4
  138. srl %g5, 1, %o3
  139. sub %o4, %o3, %o4
  140. jmpl %o4 + (13f - 14b), %g0
  141. add %o0, %g5, %o0
  142. 12: SET_BLOCKS (%o0, 0x68, %o1)
  143. SET_BLOCKS (%o0, 0x48, %o1)
  144. SET_BLOCKS (%o0, 0x28, %o1)
  145. SET_BLOCKS (%o0, 0x08, %o1)
  146. 13: be,pn %xcc, 8f
  147. andcc %o2, 4, %g0
  148. be,pn %xcc, 1f
  149. andcc %o2, 2, %g0
  150. stw %o1, [%o0]
  151. add %o0, 4, %o0
  152. 1: be,pn %xcc, 1f
  153. andcc %o2, 1, %g0
  154. sth %o1, [%o0]
  155. add %o0, 2, %o0
  156. 1: bne,a,pn %xcc, 8f
  157. stb %o1, [%o0]
  158. 8: retl
  159. mov %o5, %o0
  160. 17: brz,pn %o2, 0f
  161. 8: add %o0, 1, %o0
  162. subcc %o2, 1, %o2
  163. bne,pt %xcc, 8b
  164. stb %o1, [%o0 - 1]
  165. 0: retl
  166. mov %o5, %o0
  167. 6: stx %o1, [%o0]
  168. andncc %o2, 0x3f, %o3
  169. be,pn %xcc, 9b
  170. nop
  171. ba,pt %xcc, 18b
  172. ldd [%o0], %f0
  173. END(memset)
  174. libc_hidden_def(memset)
  175. #define ZERO_BLOCKS(base, offset, source) \
  176. stx source, [base - offset - 0x38]; \
  177. stx source, [base - offset - 0x30]; \
  178. stx source, [base - offset - 0x28]; \
  179. stx source, [base - offset - 0x20]; \
  180. stx source, [base - offset - 0x18]; \
  181. stx source, [base - offset - 0x10]; \
  182. stx source, [base - offset - 0x08]; \
  183. stx source, [base - offset - 0x00];
  184. .text
  185. .align 32
  186. #ifdef __UCLIBC_SUSV3_LEGACY__
  187. ENTRY(bzero)
  188. #ifndef USE_BPR
  189. srl %o1, 0, %o1
  190. #endif
  191. mov %o0, %o5
  192. #endif
  193. 50: cmp %o1, 7
  194. bleu,pn %xcc, 17f
  195. andcc %o0, 3, %o2
  196. be,a,pt %xcc, 4f
  197. andcc %o0, 4, %g0
  198. cmp %o2, 3
  199. be,pn %xcc, 2f
  200. stb %g0, [%o0 + 0x00]
  201. cmp %o2, 2
  202. be,pt %xcc, 2f
  203. stb %g0, [%o0 + 0x01]
  204. stb %g0, [%o0 + 0x02]
  205. 2: sub %o2, 4, %o2
  206. sub %o0, %o2, %o0
  207. add %o1, %o2, %o1
  208. andcc %o0, 4, %g0
  209. 4: be,pt %xcc, 2f
  210. cmp %o1, 128
  211. stw %g0, [%o0]
  212. sub %o1, 4, %o1
  213. add %o0, 4, %o0
  214. 2: blu,pn %xcc, 9f
  215. andcc %o0, 0x38, %o2
  216. be,pn %icc, 6f
  217. mov 64, %o4
  218. andcc %o0, 8, %g0
  219. be,pn %icc, 1f
  220. sub %o4, %o2, %o4
  221. stx %g0, [%o0]
  222. add %o0, 8, %o0
  223. 1: andcc %o4, 16, %g0
  224. be,pn %icc, 1f
  225. sub %o1, %o4, %o1
  226. stx %g0, [%o0]
  227. stx %g0, [%o0 + 8]
  228. add %o0, 16, %o0
  229. 1: andcc %o4, 32, %g0
  230. be,pn %icc, 7f
  231. andncc %o1, 0x3f, %o3
  232. stx %g0, [%o0]
  233. stx %g0, [%o0 + 8]
  234. stx %g0, [%o0 + 16]
  235. stx %g0, [%o0 + 24]
  236. add %o0, 32, %o0
  237. 6: andncc %o1, 0x3f, %o3
  238. 7: be,pn %xcc, 9f
  239. wr %g0, ASI_BLK_P, %asi
  240. membar #StoreLoad | #StoreStore | #LoadStore
  241. fzero %f0
  242. andcc %o3, 0xc0, %o2
  243. and %o1, 0x3f, %o1
  244. fzero %f2
  245. andn %o3, 0xff, %o3
  246. faddd %f0, %f2, %f4
  247. fmuld %f0, %f2, %f6
  248. cmp %o2, 64
  249. faddd %f0, %f2, %f8
  250. fmuld %f0, %f2, %f10
  251. faddd %f0, %f2, %f12
  252. brz,pn %o2, 10f
  253. fmuld %f0, %f2, %f14
  254. be,pn %icc, 2f
  255. stda %f0, [%o0 + 0x00] %asi
  256. cmp %o2, 128
  257. be,pn %icc, 2f
  258. stda %f0, [%o0 + 0x40] %asi
  259. stda %f0, [%o0 + 0x80] %asi
  260. 2: brz,pn %o3, 12f
  261. add %o0, %o2, %o0
  262. 10: stda %f0, [%o0 + 0x00] %asi
  263. stda %f0, [%o0 + 0x40] %asi
  264. stda %f0, [%o0 + 0x80] %asi
  265. stda %f0, [%o0 + 0xc0] %asi
  266. 11: subcc %o3, 256, %o3
  267. bne,pt %xcc, 10b
  268. add %o0, 256, %o0
  269. 12: wr %g0, FPRS_FEF, %fprs
  270. membar #StoreLoad | #StoreStore
  271. 9: andcc %o1, 0xf8, %o2
  272. be,pn %xcc, 13f
  273. andcc %o1, 7, %o1
  274. 14: rd %pc, %o4
  275. srl %o2, 1, %o3
  276. sub %o4, %o3, %o4
  277. jmpl %o4 + (13f - 14b), %g0
  278. add %o0, %o2, %o0
  279. 12: ZERO_BLOCKS (%o0, 0xc8, %g0)
  280. ZERO_BLOCKS (%o0, 0x88, %g0)
  281. ZERO_BLOCKS (%o0, 0x48, %g0)
  282. ZERO_BLOCKS (%o0, 0x08, %g0)
  283. 13: be,pn %xcc, 8f
  284. andcc %o1, 4, %g0
  285. be,pn %xcc, 1f
  286. andcc %o1, 2, %g0
  287. stw %g0, [%o0]
  288. add %o0, 4, %o0
  289. 1: be,pn %xcc, 1f
  290. andcc %o1, 1, %g0
  291. sth %g0, [%o0]
  292. add %o0, 2, %o0
  293. 1: bne,a,pn %xcc, 8f
  294. stb %g0, [%o0]
  295. 8: retl
  296. mov %o5, %o0
  297. 17: be,pn %xcc, 13b
  298. orcc %o1, 0, %g0
  299. be,pn %xcc, 0f
  300. 8: add %o0, 1, %o0
  301. subcc %o1, 1, %o1
  302. bne,pt %xcc, 8b
  303. stb %g0, [%o0 - 1]
  304. 0: retl
  305. mov %o5, %o0
  306. #ifdef __UCLIBC_SUSV3_LEGACY__
  307. END(bzero)
  308. #endif