memset.S 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /* Set a block of memory to some byte value.
  2. For UltraSPARC.
  3. Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller (davem@caip.rutgers.edu) and
  6. Jakub Jelinek (jj@ultra.linux.cz).
  7. The GNU C Library is free software; you can redistribute it and/or
  8. modify it under the terms of the GNU Lesser General Public
  9. License as published by the Free Software Foundation; either
  10. version 2.1 of the License, or (at your option) any later version.
  11. The GNU C Library is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. Lesser General Public License for more details.
  15. You should have received a copy of the GNU Lesser General Public
  16. License along with the GNU C Library; if not, write to the Free
  17. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18. 02111-1307 USA. */
  19. #include <asm/asi.h>
  20. #ifndef XCC
  21. #define XCC xcc
  22. #define USE_BPR
  23. #endif
  24. #define FPRS_FEF 4
  25. #define SET_BLOCKS(base, offset, source) \
  26. stx source, [base - offset - 0x18]; \
  27. stx source, [base - offset - 0x10]; \
  28. stx source, [base - offset - 0x08]; \
  29. stx source, [base - offset - 0x00];
  30. /* Well, memset is a lot easier to get right than bcopy... */
  31. .text
  32. .align 32
  33. ENTRY(memset)
  34. andcc %o1, 0xff, %o1
  35. mov %o0, %o5
  36. be,a,pt %icc, 50f
  37. #ifndef USE_BPR
  38. srl %o2, 0, %o1
  39. #else
  40. mov %o2, %o1
  41. #endif
  42. cmp %o2, 7
  43. #ifndef USE_BPR
  44. srl %o2, 0, %o2
  45. #endif
  46. bleu,pn %XCC, 17f
  47. andcc %o0, 3, %g5
  48. be,pt %xcc, 4f
  49. and %o1, 0xff, %o1
  50. cmp %g5, 3
  51. be,pn %xcc, 2f
  52. stb %o1, [%o0 + 0x00]
  53. cmp %g5, 2
  54. be,pt %xcc, 2f
  55. stb %o1, [%o0 + 0x01]
  56. stb %o1, [%o0 + 0x02]
  57. 2: sub %g5, 4, %g5
  58. sub %o0, %g5, %o0
  59. add %o2, %g5, %o2
  60. 4: sllx %o1, 8, %g1
  61. andcc %o0, 4, %g0
  62. or %o1, %g1, %o1
  63. sllx %o1, 16, %g1
  64. or %o1, %g1, %o1
  65. be,pt %xcc, 2f
  66. sllx %o1, 32, %g1
  67. stw %o1, [%o0]
  68. sub %o2, 4, %o2
  69. add %o0, 4, %o0
  70. 2: cmp %o2, 128
  71. or %o1, %g1, %o1
  72. blu,pn %xcc, 9f
  73. andcc %o0, 0x38, %g5
  74. be,pn %icc, 6f
  75. mov 64, %o4
  76. andcc %o0, 8, %g0
  77. be,pn %icc, 1f
  78. sub %o4, %g5, %o4
  79. stx %o1, [%o0]
  80. add %o0, 8, %o0
  81. 1: andcc %o4, 16, %g0
  82. be,pn %icc, 1f
  83. sub %o2, %o4, %o2
  84. stx %o1, [%o0]
  85. stx %o1, [%o0 + 8]
  86. add %o0, 16, %o0
  87. 1: andcc %o4, 32, %g0
  88. be,pn %icc, 7f
  89. andncc %o2, 0x3f, %o3
  90. stw %o1, [%o0]
  91. stw %o1, [%o0 + 4]
  92. stw %o1, [%o0 + 8]
  93. stw %o1, [%o0 + 12]
  94. stw %o1, [%o0 + 16]
  95. stw %o1, [%o0 + 20]
  96. stw %o1, [%o0 + 24]
  97. stw %o1, [%o0 + 28]
  98. add %o0, 32, %o0
  99. 7: be,pn %xcc, 9f
  100. nop
  101. ldd [%o0 - 8], %f0
  102. 18: wr %g0, ASI_BLK_P, %asi
  103. membar #StoreStore | #LoadStore
  104. andcc %o3, 0xc0, %g5
  105. and %o2, 0x3f, %o2
  106. fmovd %f0, %f2
  107. fmovd %f0, %f4
  108. andn %o3, 0xff, %o3
  109. fmovd %f0, %f6
  110. cmp %g5, 64
  111. fmovd %f0, %f8
  112. fmovd %f0, %f10
  113. fmovd %f0, %f12
  114. brz,pn %g5, 10f
  115. fmovd %f0, %f14
  116. be,pn %icc, 2f
  117. stda %f0, [%o0 + 0x00] %asi
  118. cmp %g5, 128
  119. be,pn %icc, 2f
  120. stda %f0, [%o0 + 0x40] %asi
  121. stda %f0, [%o0 + 0x80] %asi
  122. 2: brz,pn %o3, 12f
  123. add %o0, %g5, %o0
  124. 10: stda %f0, [%o0 + 0x00] %asi
  125. stda %f0, [%o0 + 0x40] %asi
  126. stda %f0, [%o0 + 0x80] %asi
  127. stda %f0, [%o0 + 0xc0] %asi
  128. 11: subcc %o3, 256, %o3
  129. bne,pt %xcc, 10b
  130. add %o0, 256, %o0
  131. 12: wr %g0, FPRS_FEF, %fprs
  132. membar #StoreLoad | #StoreStore
  133. 9: andcc %o2, 0x78, %g5
  134. be,pn %xcc, 13f
  135. andcc %o2, 7, %o2
  136. 14: rd %pc, %o4
  137. srl %g5, 1, %o3
  138. sub %o4, %o3, %o4
  139. jmpl %o4 + (13f - 14b), %g0
  140. add %o0, %g5, %o0
  141. 12: SET_BLOCKS (%o0, 0x68, %o1)
  142. SET_BLOCKS (%o0, 0x48, %o1)
  143. SET_BLOCKS (%o0, 0x28, %o1)
  144. SET_BLOCKS (%o0, 0x08, %o1)
  145. 13: be,pn %xcc, 8f
  146. andcc %o2, 4, %g0
  147. be,pn %xcc, 1f
  148. andcc %o2, 2, %g0
  149. stw %o1, [%o0]
  150. add %o0, 4, %o0
  151. 1: be,pn %xcc, 1f
  152. andcc %o2, 1, %g0
  153. sth %o1, [%o0]
  154. add %o0, 2, %o0
  155. 1: bne,a,pn %xcc, 8f
  156. stb %o1, [%o0]
  157. 8: retl
  158. mov %o5, %o0
  159. 17: brz,pn %o2, 0f
  160. 8: add %o0, 1, %o0
  161. subcc %o2, 1, %o2
  162. bne,pt %xcc, 8b
  163. stb %o1, [%o0 - 1]
  164. 0: retl
  165. mov %o5, %o0
  166. 6: stx %o1, [%o0]
  167. andncc %o2, 0x3f, %o3
  168. be,pn %xcc, 9b
  169. nop
  170. ba,pt %xcc, 18b
  171. ldd [%o0], %f0
  172. END(memset)
  173. libc_hidden_def(memset)
  174. #define ZERO_BLOCKS(base, offset, source) \
  175. stx source, [base - offset - 0x38]; \
  176. stx source, [base - offset - 0x30]; \
  177. stx source, [base - offset - 0x28]; \
  178. stx source, [base - offset - 0x20]; \
  179. stx source, [base - offset - 0x18]; \
  180. stx source, [base - offset - 0x10]; \
  181. stx source, [base - offset - 0x08]; \
  182. stx source, [base - offset - 0x00];
  183. .text
  184. .align 32
  185. ENTRY(bzero)
  186. #ifndef USE_BPR
  187. srl %o1, 0, %o1
  188. #endif
  189. mov %o0, %o5
  190. 50: cmp %o1, 7
  191. bleu,pn %xcc, 17f
  192. andcc %o0, 3, %o2
  193. be,a,pt %xcc, 4f
  194. andcc %o0, 4, %g0
  195. cmp %o2, 3
  196. be,pn %xcc, 2f
  197. stb %g0, [%o0 + 0x00]
  198. cmp %o2, 2
  199. be,pt %xcc, 2f
  200. stb %g0, [%o0 + 0x01]
  201. stb %g0, [%o0 + 0x02]
  202. 2: sub %o2, 4, %o2
  203. sub %o0, %o2, %o0
  204. add %o1, %o2, %o1
  205. andcc %o0, 4, %g0
  206. 4: be,pt %xcc, 2f
  207. cmp %o1, 128
  208. stw %g0, [%o0]
  209. sub %o1, 4, %o1
  210. add %o0, 4, %o0
  211. 2: blu,pn %xcc, 9f
  212. andcc %o0, 0x38, %o2
  213. be,pn %icc, 6f
  214. mov 64, %o4
  215. andcc %o0, 8, %g0
  216. be,pn %icc, 1f
  217. sub %o4, %o2, %o4
  218. stx %g0, [%o0]
  219. add %o0, 8, %o0
  220. 1: andcc %o4, 16, %g0
  221. be,pn %icc, 1f
  222. sub %o1, %o4, %o1
  223. stx %g0, [%o0]
  224. stx %g0, [%o0 + 8]
  225. add %o0, 16, %o0
  226. 1: andcc %o4, 32, %g0
  227. be,pn %icc, 7f
  228. andncc %o1, 0x3f, %o3
  229. stx %g0, [%o0]
  230. stx %g0, [%o0 + 8]
  231. stx %g0, [%o0 + 16]
  232. stx %g0, [%o0 + 24]
  233. add %o0, 32, %o0
  234. 6: andncc %o1, 0x3f, %o3
  235. 7: be,pn %xcc, 9f
  236. wr %g0, ASI_BLK_P, %asi
  237. membar #StoreLoad | #StoreStore | #LoadStore
  238. fzero %f0
  239. andcc %o3, 0xc0, %o2
  240. and %o1, 0x3f, %o1
  241. fzero %f2
  242. andn %o3, 0xff, %o3
  243. faddd %f0, %f2, %f4
  244. fmuld %f0, %f2, %f6
  245. cmp %o2, 64
  246. faddd %f0, %f2, %f8
  247. fmuld %f0, %f2, %f10
  248. faddd %f0, %f2, %f12
  249. brz,pn %o2, 10f
  250. fmuld %f0, %f2, %f14
  251. be,pn %icc, 2f
  252. stda %f0, [%o0 + 0x00] %asi
  253. cmp %o2, 128
  254. be,pn %icc, 2f
  255. stda %f0, [%o0 + 0x40] %asi
  256. stda %f0, [%o0 + 0x80] %asi
  257. 2: brz,pn %o3, 12f
  258. add %o0, %o2, %o0
  259. 10: stda %f0, [%o0 + 0x00] %asi
  260. stda %f0, [%o0 + 0x40] %asi
  261. stda %f0, [%o0 + 0x80] %asi
  262. stda %f0, [%o0 + 0xc0] %asi
  263. 11: subcc %o3, 256, %o3
  264. bne,pt %xcc, 10b
  265. add %o0, 256, %o0
  266. 12: wr %g0, FPRS_FEF, %fprs
  267. membar #StoreLoad | #StoreStore
  268. 9: andcc %o1, 0xf8, %o2
  269. be,pn %xcc, 13f
  270. andcc %o1, 7, %o1
  271. 14: rd %pc, %o4
  272. srl %o2, 1, %o3
  273. sub %o4, %o3, %o4
  274. jmpl %o4 + (13f - 14b), %g0
  275. add %o0, %o2, %o0
  276. 12: ZERO_BLOCKS (%o0, 0xc8, %g0)
  277. ZERO_BLOCKS (%o0, 0x88, %g0)
  278. ZERO_BLOCKS (%o0, 0x48, %g0)
  279. ZERO_BLOCKS (%o0, 0x08, %g0)
  280. 13: be,pn %xcc, 8f
  281. andcc %o1, 4, %g0
  282. be,pn %xcc, 1f
  283. andcc %o1, 2, %g0
  284. stw %g0, [%o0]
  285. add %o0, 4, %o0
  286. 1: be,pn %xcc, 1f
  287. andcc %o1, 1, %g0
  288. sth %g0, [%o0]
  289. add %o0, 2, %o0
  290. 1: bne,a,pn %xcc, 8f
  291. stb %g0, [%o0]
  292. 8: retl
  293. mov %o5, %o0
  294. 17: be,pn %xcc, 13b
  295. orcc %o1, 0, %g0
  296. be,pn %xcc, 0f
  297. 8: add %o0, 1, %o0
  298. subcc %o1, 1, %o1
  299. bne,pt %xcc, 8b
  300. stb %g0, [%o0 - 1]
  301. 0: retl
  302. mov %o5, %o0
  303. END(bzero)