memset.S 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /* Set a block of memory to some byte value.
  2. For UltraSPARC.
  3. Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller (davem@caip.rutgers.edu) and
  6. Jakub Jelinek (jj@ultra.linux.cz).
  7. The GNU C Library is free software; you can redistribute it and/or
  8. modify it under the terms of the GNU Lesser General Public
  9. License as published by the Free Software Foundation; either
  10. version 2.1 of the License, or (at your option) any later version.
  11. The GNU C Library is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. Lesser General Public License for more details.
  15. You should have received a copy of the GNU Lesser General Public
  16. License along with the GNU C Library; if not, write to the Free
  17. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18. 02111-1307 USA. */
  19. #include <asm/asi.h>
  20. #ifndef XCC
  21. #define XCC xcc
  22. #define USE_BPR
  23. #endif
  24. #define FPRS_FEF 4
  25. #define SET_BLOCKS(base, offset, source) \
  26. stx source, [base - offset - 0x18]; \
  27. stx source, [base - offset - 0x10]; \
  28. stx source, [base - offset - 0x08]; \
  29. stx source, [base - offset - 0x00];
  30. /* Well, memset is a lot easier to get right than bcopy... */
  31. .text
  32. .align 32
  33. ENTRY(memset)
  34. andcc %o1, 0xff, %o1
  35. mov %o0, %o5
  36. be,a,pt %icc, 50f
  37. #ifndef USE_BPR
  38. srl %o2, 0, %o1
  39. #else
  40. mov %o2, %o1
  41. #endif
  42. cmp %o2, 7
  43. #ifndef USE_BPR
  44. srl %o2, 0, %o2
  45. #endif
  46. bleu,pn %XCC, 17f
  47. andcc %o0, 3, %g5
  48. be,pt %xcc, 4f
  49. and %o1, 0xff, %o1
  50. cmp %g5, 3
  51. be,pn %xcc, 2f
  52. stb %o1, [%o0 + 0x00]
  53. cmp %g5, 2
  54. be,pt %xcc, 2f
  55. stb %o1, [%o0 + 0x01]
  56. stb %o1, [%o0 + 0x02]
  57. 2: sub %g5, 4, %g5
  58. sub %o0, %g5, %o0
  59. add %o2, %g5, %o2
  60. 4: sllx %o1, 8, %g1
  61. andcc %o0, 4, %g0
  62. or %o1, %g1, %o1
  63. sllx %o1, 16, %g1
  64. or %o1, %g1, %o1
  65. be,pt %xcc, 2f
  66. sllx %o1, 32, %g1
  67. stw %o1, [%o0]
  68. sub %o2, 4, %o2
  69. add %o0, 4, %o0
  70. 2: cmp %o2, 128
  71. or %o1, %g1, %o1
  72. blu,pn %xcc, 9f
  73. andcc %o0, 0x38, %g5
  74. be,pn %icc, 6f
  75. mov 64, %o4
  76. andcc %o0, 8, %g0
  77. be,pn %icc, 1f
  78. sub %o4, %g5, %o4
  79. stx %o1, [%o0]
  80. add %o0, 8, %o0
  81. 1: andcc %o4, 16, %g0
  82. be,pn %icc, 1f
  83. sub %o2, %o4, %o2
  84. stx %o1, [%o0]
  85. stx %o1, [%o0 + 8]
  86. add %o0, 16, %o0
  87. 1: andcc %o4, 32, %g0
  88. be,pn %icc, 7f
  89. andncc %o2, 0x3f, %o3
  90. stw %o1, [%o0]
  91. stw %o1, [%o0 + 4]
  92. stw %o1, [%o0 + 8]
  93. stw %o1, [%o0 + 12]
  94. stw %o1, [%o0 + 16]
  95. stw %o1, [%o0 + 20]
  96. stw %o1, [%o0 + 24]
  97. stw %o1, [%o0 + 28]
  98. add %o0, 32, %o0
  99. 7: be,pn %xcc, 9f
  100. nop
  101. ldd [%o0 - 8], %f0
  102. 18: wr %g0, ASI_BLK_P, %asi
  103. membar #StoreStore | #LoadStore
  104. andcc %o3, 0xc0, %g5
  105. and %o2, 0x3f, %o2
  106. fmovd %f0, %f2
  107. fmovd %f0, %f4
  108. andn %o3, 0xff, %o3
  109. fmovd %f0, %f6
  110. cmp %g5, 64
  111. fmovd %f0, %f8
  112. fmovd %f0, %f10
  113. fmovd %f0, %f12
  114. brz,pn %g5, 10f
  115. fmovd %f0, %f14
  116. be,pn %icc, 2f
  117. stda %f0, [%o0 + 0x00] %asi
  118. cmp %g5, 128
  119. be,pn %icc, 2f
  120. stda %f0, [%o0 + 0x40] %asi
  121. stda %f0, [%o0 + 0x80] %asi
  122. 2: brz,pn %o3, 12f
  123. add %o0, %g5, %o0
  124. 10: stda %f0, [%o0 + 0x00] %asi
  125. stda %f0, [%o0 + 0x40] %asi
  126. stda %f0, [%o0 + 0x80] %asi
  127. stda %f0, [%o0 + 0xc0] %asi
  128. 11: subcc %o3, 256, %o3
  129. bne,pt %xcc, 10b
  130. add %o0, 256, %o0
  131. 12: wr %g0, FPRS_FEF, %fprs
  132. membar #StoreLoad | #StoreStore
  133. 9: andcc %o2, 0x78, %g5
  134. be,pn %xcc, 13f
  135. andcc %o2, 7, %o2
  136. 14: rd %pc, %o4
  137. srl %g5, 1, %o3
  138. sub %o4, %o3, %o4
  139. jmpl %o4 + (13f - 14b), %g0
  140. add %o0, %g5, %o0
  141. 12: SET_BLOCKS (%o0, 0x68, %o1)
  142. SET_BLOCKS (%o0, 0x48, %o1)
  143. SET_BLOCKS (%o0, 0x28, %o1)
  144. SET_BLOCKS (%o0, 0x08, %o1)
  145. 13: be,pn %xcc, 8f
  146. andcc %o2, 4, %g0
  147. be,pn %xcc, 1f
  148. andcc %o2, 2, %g0
  149. stw %o1, [%o0]
  150. add %o0, 4, %o0
  151. 1: be,pn %xcc, 1f
  152. andcc %o2, 1, %g0
  153. sth %o1, [%o0]
  154. add %o0, 2, %o0
  155. 1: bne,a,pn %xcc, 8f
  156. stb %o1, [%o0]
  157. 8: retl
  158. mov %o5, %o0
  159. 17: brz,pn %o2, 0f
  160. 8: add %o0, 1, %o0
  161. subcc %o2, 1, %o2
  162. bne,pt %xcc, 8b
  163. stb %o1, [%o0 - 1]
  164. 0: retl
  165. mov %o5, %o0
  166. 6: stx %o1, [%o0]
  167. andncc %o2, 0x3f, %o3
  168. be,pn %xcc, 9b
  169. nop
  170. ba,pt %xcc, 18b
  171. ldd [%o0], %f0
  172. END(memset)
  173. #define ZERO_BLOCKS(base, offset, source) \
  174. stx source, [base - offset - 0x38]; \
  175. stx source, [base - offset - 0x30]; \
  176. stx source, [base - offset - 0x28]; \
  177. stx source, [base - offset - 0x20]; \
  178. stx source, [base - offset - 0x18]; \
  179. stx source, [base - offset - 0x10]; \
  180. stx source, [base - offset - 0x08]; \
  181. stx source, [base - offset - 0x00];
  182. .text
  183. .align 32
  184. ENTRY(__bzero)
  185. #ifndef USE_BPR
  186. srl %o1, 0, %o1
  187. #endif
  188. mov %o0, %o5
  189. 50: cmp %o1, 7
  190. bleu,pn %xcc, 17f
  191. andcc %o0, 3, %o2
  192. be,a,pt %xcc, 4f
  193. andcc %o0, 4, %g0
  194. cmp %o2, 3
  195. be,pn %xcc, 2f
  196. stb %g0, [%o0 + 0x00]
  197. cmp %o2, 2
  198. be,pt %xcc, 2f
  199. stb %g0, [%o0 + 0x01]
  200. stb %g0, [%o0 + 0x02]
  201. 2: sub %o2, 4, %o2
  202. sub %o0, %o2, %o0
  203. add %o1, %o2, %o1
  204. andcc %o0, 4, %g0
  205. 4: be,pt %xcc, 2f
  206. cmp %o1, 128
  207. stw %g0, [%o0]
  208. sub %o1, 4, %o1
  209. add %o0, 4, %o0
  210. 2: blu,pn %xcc, 9f
  211. andcc %o0, 0x38, %o2
  212. be,pn %icc, 6f
  213. mov 64, %o4
  214. andcc %o0, 8, %g0
  215. be,pn %icc, 1f
  216. sub %o4, %o2, %o4
  217. stx %g0, [%o0]
  218. add %o0, 8, %o0
  219. 1: andcc %o4, 16, %g0
  220. be,pn %icc, 1f
  221. sub %o1, %o4, %o1
  222. stx %g0, [%o0]
  223. stx %g0, [%o0 + 8]
  224. add %o0, 16, %o0
  225. 1: andcc %o4, 32, %g0
  226. be,pn %icc, 7f
  227. andncc %o1, 0x3f, %o3
  228. stx %g0, [%o0]
  229. stx %g0, [%o0 + 8]
  230. stx %g0, [%o0 + 16]
  231. stx %g0, [%o0 + 24]
  232. add %o0, 32, %o0
  233. 6: andncc %o1, 0x3f, %o3
  234. 7: be,pn %xcc, 9f
  235. wr %g0, ASI_BLK_P, %asi
  236. membar #StoreLoad | #StoreStore | #LoadStore
  237. fzero %f0
  238. andcc %o3, 0xc0, %o2
  239. and %o1, 0x3f, %o1
  240. fzero %f2
  241. andn %o3, 0xff, %o3
  242. faddd %f0, %f2, %f4
  243. fmuld %f0, %f2, %f6
  244. cmp %o2, 64
  245. faddd %f0, %f2, %f8
  246. fmuld %f0, %f2, %f10
  247. faddd %f0, %f2, %f12
  248. brz,pn %o2, 10f
  249. fmuld %f0, %f2, %f14
  250. be,pn %icc, 2f
  251. stda %f0, [%o0 + 0x00] %asi
  252. cmp %o2, 128
  253. be,pn %icc, 2f
  254. stda %f0, [%o0 + 0x40] %asi
  255. stda %f0, [%o0 + 0x80] %asi
  256. 2: brz,pn %o3, 12f
  257. add %o0, %o2, %o0
  258. 10: stda %f0, [%o0 + 0x00] %asi
  259. stda %f0, [%o0 + 0x40] %asi
  260. stda %f0, [%o0 + 0x80] %asi
  261. stda %f0, [%o0 + 0xc0] %asi
  262. 11: subcc %o3, 256, %o3
  263. bne,pt %xcc, 10b
  264. add %o0, 256, %o0
  265. 12: wr %g0, FPRS_FEF, %fprs
  266. membar #StoreLoad | #StoreStore
  267. 9: andcc %o1, 0xf8, %o2
  268. be,pn %xcc, 13f
  269. andcc %o1, 7, %o1
  270. 14: rd %pc, %o4
  271. srl %o2, 1, %o3
  272. sub %o4, %o3, %o4
  273. jmpl %o4 + (13f - 14b), %g0
  274. add %o0, %o2, %o0
  275. 12: ZERO_BLOCKS (%o0, 0xc8, %g0)
  276. ZERO_BLOCKS (%o0, 0x88, %g0)
  277. ZERO_BLOCKS (%o0, 0x48, %g0)
  278. ZERO_BLOCKS (%o0, 0x08, %g0)
  279. 13: be,pn %xcc, 8f
  280. andcc %o1, 4, %g0
  281. be,pn %xcc, 1f
  282. andcc %o1, 2, %g0
  283. stw %g0, [%o0]
  284. add %o0, 4, %o0
  285. 1: be,pn %xcc, 1f
  286. andcc %o1, 1, %g0
  287. sth %g0, [%o0]
  288. add %o0, 2, %o0
  289. 1: bne,a,pn %xcc, 8f
  290. stb %g0, [%o0]
  291. 8: retl
  292. mov %o5, %o0
  293. 17: be,pn %xcc, 13b
  294. orcc %o1, 0, %g0
  295. be,pn %xcc, 0f
  296. 8: add %o0, 1, %o0
  297. subcc %o1, 1, %o1
  298. bne,pt %xcc, 8b
  299. stb %g0, [%o0 - 1]
  300. 0: retl
  301. mov %o5, %o0
  302. END(__bzero)
  303. weak_alias (__bzero, bzero)