resolve.S 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /*
  2. *
  3. * add ip, pc, #0xNN00000
  4. * add ip, ip, #0xNN000
  5. * ldr pc, [ip, #0xNNN]!
  6. *
  7. * So that, effectively, causes the following to happen:
  8. *
  9. * ip : = pc+0x0NNNNNNN
  10. * pc : = *ip
  11. *
  12. * For thumb the above fragment is preceded by "bx pc, nop" to switch to ARM
  13. * mode and the thumb 'bl' must go to PLT-4 - the PLT entry is expanded by
  14. * four bytes to accomodate the trampoline code.
  15. *
  16. * 0x0NNNNNNN is the offset of the GOT entry for this function relative to
  17. * the PLT entry for this function (where the code is). So the code in the
  18. * PLT causes a branch to whatever is in the GOT, leaving the actual address
  19. * of the GOT entry in ip. (Note that the GOT must follow the PLT - the
  20. * added value is 28 bit unsigned).
  21. *
  22. * ip is a pointer to the GOT entry for this function, the first time round
  23. * *ip points to this code:
  24. *
  25. * str lr, [sp, #-4]! @ save lr
  26. * ldr lr, [pc, #4] @ lr : = *dat (&GOT_TABLE[0]-.)
  27. * add lr, pc, lr @ lr += &dat (so lr == &GOT_TABLE[0])
  28. * ldr pc, [lr, #8]! @ pc : = GOT_TABLE[2]
  29. *dat: *.long &GOT_TABLE[0] - .
  30. *
  31. * (this code is actually held in the first entry of the PLT). The code
  32. * preserves lr then uses it as a scratch register (this preserves the ip
  33. * value calculated above). GOT_TABLE[2] is initialized by INIT_GOT in
  34. * dl-sysdep.h to point to _dl_linux_resolve - this function. The first
  35. * three entries in the GOT are reserved, then they are followed by the
  36. * entries for the PLT entries, in order.
  37. *
  38. * The linker initialises the following (non-reserved) GOT entries to
  39. * the offset of the PLT with an associated relocation so that on load
  40. * the entry is relocated to point to the PLT - the above code.
  41. *
  42. * The net effect of all this is that on the first call to an external (as
  43. * yet unresolved) function all seven of the above instructions are
  44. * executed in sequence and the program ends up executing _dl_linux_resolve
  45. * with the following important values in registers:
  46. *
  47. * ip - a pointer to the GOT entry for the as yet unresolved function
  48. * lr - &GOT_TABLE[2]
  49. *
  50. * GOT_TABLE[2] has already been initialised to _dl_linux_resolve, and
  51. * GOT_TABLE[1] is a pointer to the (elf_resolve*) from INIT_GOT.
  52. * _dl_linux_resolve unfrobnicates the ip and lr values to obtain arguments
  53. * for a call to _dl_linux_resolver (not the additional 'r' on the end) -
  54. * this is in elfinterp.c in this directory. The call takes arguments:
  55. *
  56. * _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
  57. *
  58. * And returns the address of the function, it also overwrites the GOT
  59. * table entry so that the next time round only the first code fragment will
  60. * be executed - it will call the function directly.
  61. *
  62. * [[Of course, this simply doesn't work on ARM 4T with a thumb target - because
  63. * 4T did not do the thumb/arm change on ldr pc! It can be made to work by
  64. * changing _dl_linux_resolver to return __%s_from_arm for an STT_TFUNC, but
  65. * this hasn't been done, and there is no guarantee that the linker generated
  66. * that glue anyway.]]
  67. *
  68. * _dl_linux_resolve gets the arguments to call the resolver as follows:
  69. *
  70. * tpnt *GOT_TABLE[1], [lr-4]
  71. * reloc-entry &GOT-&GOT_TABLE[3], (ip - lr - 4)/4
  72. *
  73. * (I.e. 'GOT' means the table entry for this function, the thing for which
  74. * ip holds the address.) The reloc-entry is passed as an index, since
  75. * since the GOT table has 4 byte entries the code needs to divide this by 4
  76. * to get the actual index.
  77. *
  78. * John Bowler, August 13, 2005 - determined by experiment and examination
  79. * of generated ARM code (there was no documentation...)
  80. *
  81. * This code is all ARM code - not thumb - _dl_linux_resolver may, itself,
  82. * be thumb, in which case the linker will insert the appropriate glue. A
  83. * call from thumb to the PLT hits the trampoline code described above.
  84. * This code (now) builds a proper stack frame.
  85. *
  86. * The code does *not* set sb (r9,v6) - to do that the basic PLT instructions
  87. * would need to save sb and load the new value and that would require
  88. * support in the linker since it generates those instructions. (Also note
  89. * that linux/uclibc seems to be using r10 - sl - as a PIC base register - see
  90. * dl-startup.c).
  91. */
  92. #include <features.h>
  93. #include <bits/arm_asm.h>
  94. #include <bits/arm_bx.h>
  95. #define sl r10
  96. #define fp r11
  97. #define ip r12
  98. .text
  99. .align 4 @ 16 byte boundary and there are 32 bytes below (arm case)
  100. #if 1 /*(!defined(__thumb__) || defined __THUMB_INTERWORK__) || defined(__thumb2__)*/
  101. /* On Thumb-only processors, force thumb encoding. These
  102. processors support Thumb-2, so the same source code can be
  103. used as in ARM mode. */
  104. #if !defined(__ARM_ARCH_ISA_ARM)
  105. .thumb
  106. .thumb_func
  107. #else
  108. .arm
  109. #endif
  110. .hidden _dl_linux_resolve
  111. .globl _dl_linux_resolve
  112. .type _dl_linux_resolve,%function
  113. .align 4;
  114. #if __FDPIC__
  115. /*
  116. * _dl_linux_resolve() FDPIC version receives the following parameters from
  117. * lazy PLT entry:
  118. * R12: GOT address for the resolver GOT
  119. * SP[0]: funcdesc_value_reloc_offset(foo)
  120. * R9: GOT address for the caller GOT
  121. * _dl_linux_resolver() will return a function descriptor address in R0.
  122. */
  123. _dl_linux_resolve:
  124. push {r0, r1, r2, r3, r14}
  125. ldr r0, [r9, #8]
  126. ldr r1, [sp, #20]
  127. mov r9, r12
  128. blx _dl_linux_resolver
  129. ldr r9, [r0, #4]
  130. ldr r12, [r0]
  131. pop {r0, r1, r2, r3, r14}
  132. add sp, sp, #4
  133. bx r12
  134. #else
  135. _dl_linux_resolve:
  136. @ _dl_linux_resolver is a standard subroutine call, therefore it
  137. @ preserves everything except r0-r3 (a1-a4), ip and lr. This
  138. @ function must branch to the real function, and that expects
  139. @ r0-r3 and lr to be as they were before the whole PLT stuff -
  140. @ ip can be trashed.
  141. @ This routine is called after pushing lr, so we must push an odd
  142. @ number of words to keep the stack correctly aligned.
  143. stmdb sp!, {r0, r1, r2, r3, r4}
  144. ldr r0, [lr, #-4] @ r0 : = [lr-4] (GOT_TABLE[1])
  145. sub r1, lr, ip @ r1 : = (lr-ip) (a multple of 4)
  146. mvn r1, r1, ASR #2 @ r1 : = ~((lr-ip)>>2), since -x = (1+~x)
  147. @ ~x = -x-1, therefore ~(r1>>2) = (-((lr-ip)>>2)-1)
  148. @ = - ((lr-ip)/4) - 1 = (ip - lr - 4)/4, as required
  149. bl _dl_linux_resolver
  150. mov ip, r0
  151. ldmia sp!, {r0, r1, r2, r3, r4, lr}
  152. BX(ip)
  153. #endif /* __FDPIC__ */
  154. #else
  155. @ In the thumb case _dl_linux_resolver is thumb. If a bl is used
  156. @ from arm code the linker will insert a stub call which, with
  157. @ binutils 2.16, is not PIC. Since this code is accessed by an
  158. @ ldr pc the reasonable fix is to make _dl_linux_resolve thumb too.
  159. .thumb
  160. .globl _dl_linux_resolve
  161. .thumb_func
  162. .type _dl_linux_resolve,%function
  163. _dl_linux_resolve:
  164. @ _dl_linux_resolver is a standard subroutine call, therefore it
  165. @ preserves everything except r0-r3 (a1-a4), ip and lr. This
  166. @ function must branch to the real function, and that expects
  167. @ r0-r3 and lr to be as they were before the whole PLT stuff -
  168. @ ip can be trashed.
  169. @ This routine is called after pushing lr, so we must push an odd
  170. @ number of words to keep the stack correctly aligned.
  171. push {r0-r4}
  172. mov r1, lr @ &GOT_TABLE[2]
  173. sub r0, r1, #4
  174. mov r2, ip @ &GOT[n]
  175. ldr r0, [r0] @ r0 := GOT_TABLE[1]
  176. @ for the function call r1 := n-3
  177. sub r1, r2
  178. asr r1, r1, #2
  179. mvn r1, r1 @ exactly as in the arm code above
  180. bl _dl_linux_resolver
  181. @ r0 contains the branch address, the return address is above
  182. @ the saved r0..r3
  183. mov ip, r0
  184. ldr r1, [sp, #20]
  185. mov lr, r1
  186. pop {r0-r4}
  187. add sp, #4
  188. bx ip
  189. #endif
  190. .size _dl_linux_resolve, .-_dl_linux_resolve