dl-tlsdesc.S 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /*
  2. * Copyright (C) 2017 Waldemar Brodkorb <wbx@uclibc-ng.org>
  3. * Ported from GNU C Library
  4. * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  5. */
  6. /* Thread-local storage handling in the ELF dynamic linker.
  7. AArch64 version.
  8. Copyright (C) 2011-2017 Free Software Foundation, Inc.
  9. The GNU C Library is free software; you can redistribute it and/or
  10. modify it under the terms of the GNU Lesser General Public
  11. License as published by the Free Software Foundation; either
  12. version 2.1 of the License, or (at your option) any later version.
  13. The GNU C Library is distributed in the hope that it will be useful,
  14. but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. Lesser General Public License for more details.
  17. You should have received a copy of the GNU Lesser General Public
  18. License along with the GNU C Library; if not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #include <sysdep.h>
  21. #if defined __UCLIBC_HAS_TLS__
  22. #include <tls.h>
  23. #include "tlsdesc.h"
  24. #define PTR_REG(n) x##n
  25. #define PTR_LOG_SIZE 3
  26. #define PTR_SIZE (1<<PTR_LOG_SIZE)
  27. #define NSAVEDQREGPAIRS 16
  28. #define SAVE_Q_REGISTERS \
  29. stp q0, q1, [sp, #-32*NSAVEDQREGPAIRS]!; \
  30. cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS); \
  31. stp q2, q3, [sp, #32*1]; \
  32. stp q4, q5, [sp, #32*2]; \
  33. stp q6, q7, [sp, #32*3]; \
  34. stp q8, q9, [sp, #32*4]; \
  35. stp q10, q11, [sp, #32*5]; \
  36. stp q12, q13, [sp, #32*6]; \
  37. stp q14, q15, [sp, #32*7]; \
  38. stp q16, q17, [sp, #32*8]; \
  39. stp q18, q19, [sp, #32*9]; \
  40. stp q20, q21, [sp, #32*10]; \
  41. stp q22, q23, [sp, #32*11]; \
  42. stp q24, q25, [sp, #32*12]; \
  43. stp q26, q27, [sp, #32*13]; \
  44. stp q28, q29, [sp, #32*14]; \
  45. stp q30, q31, [sp, #32*15];
  46. #define RESTORE_Q_REGISTERS \
  47. ldp q2, q3, [sp, #32*1]; \
  48. ldp q4, q5, [sp, #32*2]; \
  49. ldp q6, q7, [sp, #32*3]; \
  50. ldp q8, q9, [sp, #32*4]; \
  51. ldp q10, q11, [sp, #32*5]; \
  52. ldp q12, q13, [sp, #32*6]; \
  53. ldp q14, q15, [sp, #32*7]; \
  54. ldp q16, q17, [sp, #32*8]; \
  55. ldp q18, q19, [sp, #32*9]; \
  56. ldp q20, q21, [sp, #32*10]; \
  57. ldp q22, q23, [sp, #32*11]; \
  58. ldp q24, q25, [sp, #32*12]; \
  59. ldp q26, q27, [sp, #32*13]; \
  60. ldp q28, q29, [sp, #32*14]; \
  61. ldp q30, q31, [sp, #32*15]; \
  62. ldp q0, q1, [sp], #32*NSAVEDQREGPAIRS; \
  63. cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
  64. .text
  65. /* Compute the thread pointer offset for symbols in the static
  66. TLS block. The offset is the same for all threads.
  67. Prototype:
  68. _dl_tlsdesc_return (tlsdesc *) ;
  69. */
  70. .hidden _dl_tlsdesc_return
  71. .global _dl_tlsdesc_return
  72. .type _dl_tlsdesc_return,%function
  73. .align 2
  74. _dl_tlsdesc_return:
  75. ldr x0,[x0,#8]
  76. ret
  77. .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
  78. #ifdef SHARED
  79. /* Handler for dynamic TLS symbols.
  80. Prototype:
  81. _dl_tlsdesc_dynamic (tlsdesc *) ;
  82. The second word of the descriptor points to a
  83. tlsdesc_dynamic_arg structure.
  84. Returns the offset between the thread pointer and the
  85. object referenced by the argument.
  86. ptrdiff_t
  87. __attribute__ ((__regparm__ (1)))
  88. _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
  89. {
  90. struct tlsdesc_dynamic_arg *td = tdp->arg;
  91. dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
  92. if (__builtin_expect (td->gen_count <= dtv[0].counter
  93. && (dtv[td->tlsinfo.ti_module].pointer.val
  94. != TLS_DTV_UNALLOCATED),
  95. 1))
  96. return dtv[td->tlsinfo.ti_module].pointer.val
  97. + td->tlsinfo.ti_offset
  98. - __thread_pointer;
  99. return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
  100. }
  101. */
  102. .hidden _dl_tlsdesc_dynamic
  103. .global _dl_tlsdesc_dynamic
  104. .type _dl_tlsdesc_dynamic,%function
  105. cfi_startproc
  106. .align 2
  107. _dl_tlsdesc_dynamic:
  108. # define NSAVEXREGPAIRS 2
  109. stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
  110. cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
  111. mov x29, sp
  112. /* Save just enough registers to support fast path, if we fall
  113. into slow path we will save additional registers. */
  114. stp x1, x2, [sp, #32+16*0]
  115. stp x3, x4, [sp, #32+16*1]
  116. mrs x4, tpidr_el0
  117. /* The ldar here happens after the load from [x0] at the call site
  118. (that is generated by the compiler as part of the TLS access ABI),
  119. so it reads the same value (this function is the final value of
  120. td->entry) and thus it synchronizes with the release store to
  121. td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
  122. from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
  123. ldar PTR_REG (zr), [x0]
  124. ldr PTR_REG (1), [x0,#TLSDESC_ARG]
  125. ldr PTR_REG (0), [x4,#TCBHEAD_DTV]
  126. ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
  127. ldr PTR_REG (2), [x0,#DTV_COUNTER]
  128. cmp PTR_REG (3), PTR_REG (2)
  129. b.hi 2f
  130. ldr PTR_REG (2), [x1,#TLSDESC_MODID]
  131. add PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
  132. ldr PTR_REG (0), [x0] /* Load val member of DTV entry. */
  133. cmp x0, #TLS_DTV_UNALLOCATED
  134. b.eq 2f
  135. ldr PTR_REG (1), [x1,#TLSDESC_MODOFF]
  136. add PTR_REG (0), PTR_REG (0), PTR_REG (1)
  137. sub PTR_REG (0), PTR_REG (0), PTR_REG (4)
  138. 1:
  139. ldp x1, x2, [sp, #32+16*0]
  140. ldp x3, x4, [sp, #32+16*1]
  141. ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
  142. cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
  143. # undef NSAVEXREGPAIRS
  144. ret
  145. 2:
  146. /* This is the slow path. We need to call __tls_get_addr() which
  147. means we need to save and restore all the register that the
  148. callee will trash. */
  149. /* Save the remaining registers that we must treat as caller save. */
  150. # define NSAVEXREGPAIRS 7
  151. stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]!
  152. cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
  153. stp x7, x8, [sp, #16*1]
  154. stp x9, x10, [sp, #16*2]
  155. stp x11, x12, [sp, #16*3]
  156. stp x13, x14, [sp, #16*4]
  157. stp x15, x16, [sp, #16*5]
  158. stp x17, x18, [sp, #16*6]
  159. SAVE_Q_REGISTERS
  160. mov x0, x1
  161. bl __tls_get_addr
  162. mrs x1, tpidr_el0
  163. sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
  164. RESTORE_Q_REGISTERS
  165. ldp x7, x8, [sp, #16*1]
  166. ldp x9, x10, [sp, #16*2]
  167. ldp x11, x12, [sp, #16*3]
  168. ldp x13, x14, [sp, #16*4]
  169. ldp x15, x16, [sp, #16*5]
  170. ldp x17, x18, [sp, #16*6]
  171. ldp x5, x6, [sp], #16*NSAVEXREGPAIRS
  172. cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
  173. b 1b
  174. cfi_endproc
  175. .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
  176. # undef NSAVEXREGPAIRS
  177. #endif // SHARED
  178. #endif // __UCLIBC_HAS_TLS__