memset.S 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. /* Set a block of memory to some byte value.
  2. For UltraSPARC.
  3. Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller (davem@caip.rutgers.edu) and
  6. Jakub Jelinek (jj@ultra.linux.cz).
  7. The GNU C Library is free software; you can redistribute it and/or
  8. modify it under the terms of the GNU Lesser General Public
  9. License as published by the Free Software Foundation; either
  10. version 2.1 of the License, or (at your option) any later version.
  11. The GNU C Library is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. Lesser General Public License for more details.
  15. You should have received a copy of the GNU Lesser General Public
  16. License along with the GNU C Library; if not, write to the Free
  17. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18. 02111-1307 USA. */
  19. #include <asm/asi.h>
  20. #ifndef XCC
  21. #define XCC xcc
  22. #define USE_BPR
  23. #endif
  24. #define FPRS_FEF 4
  25. #define SET_BLOCKS(base, offset, source) \
  26. stx source, [base - offset - 0x18]; \
  27. stx source, [base - offset - 0x10]; \
  28. stx source, [base - offset - 0x08]; \
  29. stx source, [base - offset - 0x00];
  30. /* Well, memset is a lot easier to get right than bcopy... */
  31. .text
  32. .align 32
  33. .globl memset
  34. .set memset,__memset
  35. .hidden __memset
  36. ENTRY(__memset)
  37. andcc %o1, 0xff, %o1
  38. mov %o0, %o5
  39. be,a,pt %icc, 50f
  40. #ifndef USE_BPR
  41. srl %o2, 0, %o1
  42. #else
  43. mov %o2, %o1
  44. #endif
  45. cmp %o2, 7
  46. #ifndef USE_BPR
  47. srl %o2, 0, %o2
  48. #endif
  49. bleu,pn %XCC, 17f
  50. andcc %o0, 3, %g5
  51. be,pt %xcc, 4f
  52. and %o1, 0xff, %o1
  53. cmp %g5, 3
  54. be,pn %xcc, 2f
  55. stb %o1, [%o0 + 0x00]
  56. cmp %g5, 2
  57. be,pt %xcc, 2f
  58. stb %o1, [%o0 + 0x01]
  59. stb %o1, [%o0 + 0x02]
  60. 2: sub %g5, 4, %g5
  61. sub %o0, %g5, %o0
  62. add %o2, %g5, %o2
  63. 4: sllx %o1, 8, %g1
  64. andcc %o0, 4, %g0
  65. or %o1, %g1, %o1
  66. sllx %o1, 16, %g1
  67. or %o1, %g1, %o1
  68. be,pt %xcc, 2f
  69. sllx %o1, 32, %g1
  70. stw %o1, [%o0]
  71. sub %o2, 4, %o2
  72. add %o0, 4, %o0
  73. 2: cmp %o2, 128
  74. or %o1, %g1, %o1
  75. blu,pn %xcc, 9f
  76. andcc %o0, 0x38, %g5
  77. be,pn %icc, 6f
  78. mov 64, %o4
  79. andcc %o0, 8, %g0
  80. be,pn %icc, 1f
  81. sub %o4, %g5, %o4
  82. stx %o1, [%o0]
  83. add %o0, 8, %o0
  84. 1: andcc %o4, 16, %g0
  85. be,pn %icc, 1f
  86. sub %o2, %o4, %o2
  87. stx %o1, [%o0]
  88. stx %o1, [%o0 + 8]
  89. add %o0, 16, %o0
  90. 1: andcc %o4, 32, %g0
  91. be,pn %icc, 7f
  92. andncc %o2, 0x3f, %o3
  93. stw %o1, [%o0]
  94. stw %o1, [%o0 + 4]
  95. stw %o1, [%o0 + 8]
  96. stw %o1, [%o0 + 12]
  97. stw %o1, [%o0 + 16]
  98. stw %o1, [%o0 + 20]
  99. stw %o1, [%o0 + 24]
  100. stw %o1, [%o0 + 28]
  101. add %o0, 32, %o0
  102. 7: be,pn %xcc, 9f
  103. nop
  104. ldd [%o0 - 8], %f0
  105. 18: wr %g0, ASI_BLK_P, %asi
  106. membar #StoreStore | #LoadStore
  107. andcc %o3, 0xc0, %g5
  108. and %o2, 0x3f, %o2
  109. fmovd %f0, %f2
  110. fmovd %f0, %f4
  111. andn %o3, 0xff, %o3
  112. fmovd %f0, %f6
  113. cmp %g5, 64
  114. fmovd %f0, %f8
  115. fmovd %f0, %f10
  116. fmovd %f0, %f12
  117. brz,pn %g5, 10f
  118. fmovd %f0, %f14
  119. be,pn %icc, 2f
  120. stda %f0, [%o0 + 0x00] %asi
  121. cmp %g5, 128
  122. be,pn %icc, 2f
  123. stda %f0, [%o0 + 0x40] %asi
  124. stda %f0, [%o0 + 0x80] %asi
  125. 2: brz,pn %o3, 12f
  126. add %o0, %g5, %o0
  127. 10: stda %f0, [%o0 + 0x00] %asi
  128. stda %f0, [%o0 + 0x40] %asi
  129. stda %f0, [%o0 + 0x80] %asi
  130. stda %f0, [%o0 + 0xc0] %asi
  131. 11: subcc %o3, 256, %o3
  132. bne,pt %xcc, 10b
  133. add %o0, 256, %o0
  134. 12: wr %g0, FPRS_FEF, %fprs
  135. membar #StoreLoad | #StoreStore
  136. 9: andcc %o2, 0x78, %g5
  137. be,pn %xcc, 13f
  138. andcc %o2, 7, %o2
  139. 14: rd %pc, %o4
  140. srl %g5, 1, %o3
  141. sub %o4, %o3, %o4
  142. jmpl %o4 + (13f - 14b), %g0
  143. add %o0, %g5, %o0
  144. 12: SET_BLOCKS (%o0, 0x68, %o1)
  145. SET_BLOCKS (%o0, 0x48, %o1)
  146. SET_BLOCKS (%o0, 0x28, %o1)
  147. SET_BLOCKS (%o0, 0x08, %o1)
  148. 13: be,pn %xcc, 8f
  149. andcc %o2, 4, %g0
  150. be,pn %xcc, 1f
  151. andcc %o2, 2, %g0
  152. stw %o1, [%o0]
  153. add %o0, 4, %o0
  154. 1: be,pn %xcc, 1f
  155. andcc %o2, 1, %g0
  156. sth %o1, [%o0]
  157. add %o0, 2, %o0
  158. 1: bne,a,pn %xcc, 8f
  159. stb %o1, [%o0]
  160. 8: retl
  161. mov %o5, %o0
  162. 17: brz,pn %o2, 0f
  163. 8: add %o0, 1, %o0
  164. subcc %o2, 1, %o2
  165. bne,pt %xcc, 8b
  166. stb %o1, [%o0 - 1]
  167. 0: retl
  168. mov %o5, %o0
  169. 6: stx %o1, [%o0]
  170. andncc %o2, 0x3f, %o3
  171. be,pn %xcc, 9b
  172. nop
  173. ba,pt %xcc, 18b
  174. ldd [%o0], %f0
  175. END(__memset)
  176. #define ZERO_BLOCKS(base, offset, source) \
  177. stx source, [base - offset - 0x38]; \
  178. stx source, [base - offset - 0x30]; \
  179. stx source, [base - offset - 0x28]; \
  180. stx source, [base - offset - 0x20]; \
  181. stx source, [base - offset - 0x18]; \
  182. stx source, [base - offset - 0x10]; \
  183. stx source, [base - offset - 0x08]; \
  184. stx source, [base - offset - 0x00];
  185. .text
  186. .align 32
  187. ENTRY(__bzero)
  188. #ifndef USE_BPR
  189. srl %o1, 0, %o1
  190. #endif
  191. mov %o0, %o5
  192. 50: cmp %o1, 7
  193. bleu,pn %xcc, 17f
  194. andcc %o0, 3, %o2
  195. be,a,pt %xcc, 4f
  196. andcc %o0, 4, %g0
  197. cmp %o2, 3
  198. be,pn %xcc, 2f
  199. stb %g0, [%o0 + 0x00]
  200. cmp %o2, 2
  201. be,pt %xcc, 2f
  202. stb %g0, [%o0 + 0x01]
  203. stb %g0, [%o0 + 0x02]
  204. 2: sub %o2, 4, %o2
  205. sub %o0, %o2, %o0
  206. add %o1, %o2, %o1
  207. andcc %o0, 4, %g0
  208. 4: be,pt %xcc, 2f
  209. cmp %o1, 128
  210. stw %g0, [%o0]
  211. sub %o1, 4, %o1
  212. add %o0, 4, %o0
  213. 2: blu,pn %xcc, 9f
  214. andcc %o0, 0x38, %o2
  215. be,pn %icc, 6f
  216. mov 64, %o4
  217. andcc %o0, 8, %g0
  218. be,pn %icc, 1f
  219. sub %o4, %o2, %o4
  220. stx %g0, [%o0]
  221. add %o0, 8, %o0
  222. 1: andcc %o4, 16, %g0
  223. be,pn %icc, 1f
  224. sub %o1, %o4, %o1
  225. stx %g0, [%o0]
  226. stx %g0, [%o0 + 8]
  227. add %o0, 16, %o0
  228. 1: andcc %o4, 32, %g0
  229. be,pn %icc, 7f
  230. andncc %o1, 0x3f, %o3
  231. stx %g0, [%o0]
  232. stx %g0, [%o0 + 8]
  233. stx %g0, [%o0 + 16]
  234. stx %g0, [%o0 + 24]
  235. add %o0, 32, %o0
  236. 6: andncc %o1, 0x3f, %o3
  237. 7: be,pn %xcc, 9f
  238. wr %g0, ASI_BLK_P, %asi
  239. membar #StoreLoad | #StoreStore | #LoadStore
  240. fzero %f0
  241. andcc %o3, 0xc0, %o2
  242. and %o1, 0x3f, %o1
  243. fzero %f2
  244. andn %o3, 0xff, %o3
  245. faddd %f0, %f2, %f4
  246. fmuld %f0, %f2, %f6
  247. cmp %o2, 64
  248. faddd %f0, %f2, %f8
  249. fmuld %f0, %f2, %f10
  250. faddd %f0, %f2, %f12
  251. brz,pn %o2, 10f
  252. fmuld %f0, %f2, %f14
  253. be,pn %icc, 2f
  254. stda %f0, [%o0 + 0x00] %asi
  255. cmp %o2, 128
  256. be,pn %icc, 2f
  257. stda %f0, [%o0 + 0x40] %asi
  258. stda %f0, [%o0 + 0x80] %asi
  259. 2: brz,pn %o3, 12f
  260. add %o0, %o2, %o0
  261. 10: stda %f0, [%o0 + 0x00] %asi
  262. stda %f0, [%o0 + 0x40] %asi
  263. stda %f0, [%o0 + 0x80] %asi
  264. stda %f0, [%o0 + 0xc0] %asi
  265. 11: subcc %o3, 256, %o3
  266. bne,pt %xcc, 10b
  267. add %o0, 256, %o0
  268. 12: wr %g0, FPRS_FEF, %fprs
  269. membar #StoreLoad | #StoreStore
  270. 9: andcc %o1, 0xf8, %o2
  271. be,pn %xcc, 13f
  272. andcc %o1, 7, %o1
  273. 14: rd %pc, %o4
  274. srl %o2, 1, %o3
  275. sub %o4, %o3, %o4
  276. jmpl %o4 + (13f - 14b), %g0
  277. add %o0, %o2, %o0
  278. 12: ZERO_BLOCKS (%o0, 0xc8, %g0)
  279. ZERO_BLOCKS (%o0, 0x88, %g0)
  280. ZERO_BLOCKS (%o0, 0x48, %g0)
  281. ZERO_BLOCKS (%o0, 0x08, %g0)
  282. 13: be,pn %xcc, 8f
  283. andcc %o1, 4, %g0
  284. be,pn %xcc, 1f
  285. andcc %o1, 2, %g0
  286. stw %g0, [%o0]
  287. add %o0, 4, %o0
  288. 1: be,pn %xcc, 1f
  289. andcc %o1, 1, %g0
  290. sth %g0, [%o0]
  291. add %o0, 2, %o0
  292. 1: bne,a,pn %xcc, 8f
  293. stb %g0, [%o0]
  294. 8: retl
  295. mov %o5, %o0
  296. 17: be,pn %xcc, 13b
  297. orcc %o1, 0, %g0
  298. be,pn %xcc, 0f
  299. 8: add %o0, 1, %o0
  300. subcc %o1, 1, %o1
  301. bne,pt %xcc, 8b
  302. stb %g0, [%o0 - 1]
  303. 0: retl
  304. mov %o5, %o0
  305. END(__bzero)
  306. weak_alias (__bzero, bzero)