memset.S 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. /* Set a block of memory to some byte value.
  2. For UltraSPARC.
  3. Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller (davem@caip.rutgers.edu) and
  6. Jakub Jelinek (jj@ultra.linux.cz).
  7. The GNU C Library is free software; you can redistribute it and/or
  8. modify it under the terms of the GNU Lesser General Public
  9. License as published by the Free Software Foundation; either
  10. version 2.1 of the License, or (at your option) any later version.
  11. The GNU C Library is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. Lesser General Public License for more details.
  15. You should have received a copy of the GNU Lesser General Public
  16. License along with the GNU C Library; if not, write to the Free
  17. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18. 02111-1307 USA. */
  19. #include <features.h>
  20. #include <asm/asi.h>
  21. #ifndef XCC
  22. #define XCC xcc
  23. #define USE_BPR
  24. #endif
  25. #define FPRS_FEF 4
  26. #define SET_BLOCKS(base, offset, source) \
  27. stx source, [base - offset - 0x18]; \
  28. stx source, [base - offset - 0x10]; \
  29. stx source, [base - offset - 0x08]; \
  30. stx source, [base - offset - 0x00];
  31. /* Well, memset is a lot easier to get right than bcopy... */
  32. .text
  33. .align 32
  34. ENTRY(memset)
  35. andcc %o1, 0xff, %o1
  36. mov %o0, %o5
  37. be,a,pt %icc, 50f
  38. #ifndef USE_BPR
  39. srl %o2, 0, %o1
  40. #else
  41. mov %o2, %o1
  42. #endif
  43. cmp %o2, 7
  44. #ifndef USE_BPR
  45. srl %o2, 0, %o2
  46. #endif
  47. bleu,pn %XCC, 17f
  48. andcc %o0, 3, %g5
  49. be,pt %xcc, 4f
  50. and %o1, 0xff, %o1
  51. cmp %g5, 3
  52. be,pn %xcc, 2f
  53. stb %o1, [%o0 + 0x00]
  54. cmp %g5, 2
  55. be,pt %xcc, 2f
  56. stb %o1, [%o0 + 0x01]
  57. stb %o1, [%o0 + 0x02]
  58. 2: sub %g5, 4, %g5
  59. sub %o0, %g5, %o0
  60. add %o2, %g5, %o2
  61. 4: sllx %o1, 8, %g1
  62. andcc %o0, 4, %g0
  63. or %o1, %g1, %o1
  64. sllx %o1, 16, %g1
  65. or %o1, %g1, %o1
  66. be,pt %xcc, 2f
  67. sllx %o1, 32, %g1
  68. stw %o1, [%o0]
  69. sub %o2, 4, %o2
  70. add %o0, 4, %o0
  71. 2: cmp %o2, 128
  72. or %o1, %g1, %o1
  73. blu,pn %xcc, 9f
  74. andcc %o0, 0x38, %g5
  75. be,pn %icc, 6f
  76. mov 64, %o4
  77. andcc %o0, 8, %g0
  78. be,pn %icc, 1f
  79. sub %o4, %g5, %o4
  80. stx %o1, [%o0]
  81. add %o0, 8, %o0
  82. 1: andcc %o4, 16, %g0
  83. be,pn %icc, 1f
  84. sub %o2, %o4, %o2
  85. stx %o1, [%o0]
  86. stx %o1, [%o0 + 8]
  87. add %o0, 16, %o0
  88. 1: andcc %o4, 32, %g0
  89. be,pn %icc, 7f
  90. andncc %o2, 0x3f, %o3
  91. stw %o1, [%o0]
  92. stw %o1, [%o0 + 4]
  93. stw %o1, [%o0 + 8]
  94. stw %o1, [%o0 + 12]
  95. stw %o1, [%o0 + 16]
  96. stw %o1, [%o0 + 20]
  97. stw %o1, [%o0 + 24]
  98. stw %o1, [%o0 + 28]
  99. add %o0, 32, %o0
  100. 7: be,pn %xcc, 9f
  101. nop
  102. ldd [%o0 - 8], %f0
  103. 18: wr %g0, ASI_BLK_P, %asi
  104. membar #StoreStore | #LoadStore
  105. andcc %o3, 0xc0, %g5
  106. and %o2, 0x3f, %o2
  107. fmovd %f0, %f2
  108. fmovd %f0, %f4
  109. andn %o3, 0xff, %o3
  110. fmovd %f0, %f6
  111. cmp %g5, 64
  112. fmovd %f0, %f8
  113. fmovd %f0, %f10
  114. fmovd %f0, %f12
  115. brz,pn %g5, 10f
  116. fmovd %f0, %f14
  117. be,pn %icc, 2f
  118. stda %f0, [%o0 + 0x00] %asi
  119. cmp %g5, 128
  120. be,pn %icc, 2f
  121. stda %f0, [%o0 + 0x40] %asi
  122. stda %f0, [%o0 + 0x80] %asi
  123. 2: brz,pn %o3, 12f
  124. add %o0, %g5, %o0
  125. 10: stda %f0, [%o0 + 0x00] %asi
  126. stda %f0, [%o0 + 0x40] %asi
  127. stda %f0, [%o0 + 0x80] %asi
  128. stda %f0, [%o0 + 0xc0] %asi
  129. 11: subcc %o3, 256, %o3
  130. bne,pt %xcc, 10b
  131. add %o0, 256, %o0
  132. 12: wr %g0, FPRS_FEF, %fprs
  133. membar #StoreLoad | #StoreStore
  134. 9: andcc %o2, 0x78, %g5
  135. be,pn %xcc, 13f
  136. andcc %o2, 7, %o2
  137. 14: rd %pc, %o4
  138. srl %g5, 1, %o3
  139. sub %o4, %o3, %o4
  140. jmpl %o4 + (13f - 14b), %g0
  141. add %o0, %g5, %o0
  142. 12: SET_BLOCKS (%o0, 0x68, %o1)
  143. SET_BLOCKS (%o0, 0x48, %o1)
  144. SET_BLOCKS (%o0, 0x28, %o1)
  145. SET_BLOCKS (%o0, 0x08, %o1)
  146. 13: be,pn %xcc, 8f
  147. andcc %o2, 4, %g0
  148. be,pn %xcc, 1f
  149. andcc %o2, 2, %g0
  150. stw %o1, [%o0]
  151. add %o0, 4, %o0
  152. 1: be,pn %xcc, 1f
  153. andcc %o2, 1, %g0
  154. sth %o1, [%o0]
  155. add %o0, 2, %o0
  156. 1: bne,a,pn %xcc, 8f
  157. stb %o1, [%o0]
  158. 8: retl
  159. mov %o5, %o0
  160. 17: brz,pn %o2, 0f
  161. 8: add %o0, 1, %o0
  162. subcc %o2, 1, %o2
  163. bne,pt %xcc, 8b
  164. stb %o1, [%o0 - 1]
  165. 0: retl
  166. mov %o5, %o0
  167. 6: stx %o1, [%o0]
  168. andncc %o2, 0x3f, %o3
  169. be,pn %xcc, 9b
  170. nop
  171. ba,pt %xcc, 18b
  172. ldd [%o0], %f0
  173. END(memset)
  174. libc_hidden_def(memset)
  175. #define ZERO_BLOCKS(base, offset, source) \
  176. stx source, [base - offset - 0x38]; \
  177. stx source, [base - offset - 0x30]; \
  178. stx source, [base - offset - 0x28]; \
  179. stx source, [base - offset - 0x20]; \
  180. stx source, [base - offset - 0x18]; \
  181. stx source, [base - offset - 0x10]; \
  182. stx source, [base - offset - 0x08]; \
  183. stx source, [base - offset - 0x00];
  184. .text
  185. .align 32
  186. #ifdef __UCLIBC_SUSV3_LEGACY__
  187. ENTRY(bzero)
  188. #ifndef USE_BPR
  189. srl %o1, 0, %o1
  190. #endif
  191. mov %o0, %o5
  192. 50: cmp %o1, 7
  193. bleu,pn %xcc, 17f
  194. andcc %o0, 3, %o2
  195. be,a,pt %xcc, 4f
  196. andcc %o0, 4, %g0
  197. cmp %o2, 3
  198. be,pn %xcc, 2f
  199. stb %g0, [%o0 + 0x00]
  200. cmp %o2, 2
  201. be,pt %xcc, 2f
  202. stb %g0, [%o0 + 0x01]
  203. stb %g0, [%o0 + 0x02]
  204. 2: sub %o2, 4, %o2
  205. sub %o0, %o2, %o0
  206. add %o1, %o2, %o1
  207. andcc %o0, 4, %g0
  208. 4: be,pt %xcc, 2f
  209. cmp %o1, 128
  210. stw %g0, [%o0]
  211. sub %o1, 4, %o1
  212. add %o0, 4, %o0
  213. 2: blu,pn %xcc, 9f
  214. andcc %o0, 0x38, %o2
  215. be,pn %icc, 6f
  216. mov 64, %o4
  217. andcc %o0, 8, %g0
  218. be,pn %icc, 1f
  219. sub %o4, %o2, %o4
  220. stx %g0, [%o0]
  221. add %o0, 8, %o0
  222. 1: andcc %o4, 16, %g0
  223. be,pn %icc, 1f
  224. sub %o1, %o4, %o1
  225. stx %g0, [%o0]
  226. stx %g0, [%o0 + 8]
  227. add %o0, 16, %o0
  228. 1: andcc %o4, 32, %g0
  229. be,pn %icc, 7f
  230. andncc %o1, 0x3f, %o3
  231. stx %g0, [%o0]
  232. stx %g0, [%o0 + 8]
  233. stx %g0, [%o0 + 16]
  234. stx %g0, [%o0 + 24]
  235. add %o0, 32, %o0
  236. 6: andncc %o1, 0x3f, %o3
  237. 7: be,pn %xcc, 9f
  238. wr %g0, ASI_BLK_P, %asi
  239. membar #StoreLoad | #StoreStore | #LoadStore
  240. fzero %f0
  241. andcc %o3, 0xc0, %o2
  242. and %o1, 0x3f, %o1
  243. fzero %f2
  244. andn %o3, 0xff, %o3
  245. faddd %f0, %f2, %f4
  246. fmuld %f0, %f2, %f6
  247. cmp %o2, 64
  248. faddd %f0, %f2, %f8
  249. fmuld %f0, %f2, %f10
  250. faddd %f0, %f2, %f12
  251. brz,pn %o2, 10f
  252. fmuld %f0, %f2, %f14
  253. be,pn %icc, 2f
  254. stda %f0, [%o0 + 0x00] %asi
  255. cmp %o2, 128
  256. be,pn %icc, 2f
  257. stda %f0, [%o0 + 0x40] %asi
  258. stda %f0, [%o0 + 0x80] %asi
  259. 2: brz,pn %o3, 12f
  260. add %o0, %o2, %o0
  261. 10: stda %f0, [%o0 + 0x00] %asi
  262. stda %f0, [%o0 + 0x40] %asi
  263. stda %f0, [%o0 + 0x80] %asi
  264. stda %f0, [%o0 + 0xc0] %asi
  265. 11: subcc %o3, 256, %o3
  266. bne,pt %xcc, 10b
  267. add %o0, 256, %o0
  268. 12: wr %g0, FPRS_FEF, %fprs
  269. membar #StoreLoad | #StoreStore
  270. 9: andcc %o1, 0xf8, %o2
  271. be,pn %xcc, 13f
  272. andcc %o1, 7, %o1
  273. 14: rd %pc, %o4
  274. srl %o2, 1, %o3
  275. sub %o4, %o3, %o4
  276. jmpl %o4 + (13f - 14b), %g0
  277. add %o0, %o2, %o0
  278. 12: ZERO_BLOCKS (%o0, 0xc8, %g0)
  279. ZERO_BLOCKS (%o0, 0x88, %g0)
  280. ZERO_BLOCKS (%o0, 0x48, %g0)
  281. ZERO_BLOCKS (%o0, 0x08, %g0)
  282. 13: be,pn %xcc, 8f
  283. andcc %o1, 4, %g0
  284. be,pn %xcc, 1f
  285. andcc %o1, 2, %g0
  286. stw %g0, [%o0]
  287. add %o0, 4, %o0
  288. 1: be,pn %xcc, 1f
  289. andcc %o1, 1, %g0
  290. sth %g0, [%o0]
  291. add %o0, 2, %o0
  292. 1: bne,a,pn %xcc, 8f
  293. stb %g0, [%o0]
  294. 8: retl
  295. mov %o5, %o0
  296. 17: be,pn %xcc, 13b
  297. orcc %o1, 0, %g0
  298. be,pn %xcc, 0f
  299. 8: add %o0, 1, %o0
  300. subcc %o1, 1, %o1
  301. bne,pt %xcc, 8b
  302. stb %g0, [%o0 - 1]
  303. 0: retl
  304. mov %o5, %o0
  305. END(bzero)
  306. #endif