|
@@ -1,49 +1,164 @@
|
|
|
/*
|
|
|
- * This function is _not_ called directly. It is jumped to (so no return
|
|
|
- * address is on the stack) when attempting to use a symbol that has not yet
|
|
|
- * been resolved. The first time a jump symbol (such as a function call inside
|
|
|
- * a shared library) is used (before it gets resolved) it will jump here to
|
|
|
- * _dl_linux_resolve. When we get called the stack looks like this:
|
|
|
- * reloc_entry
|
|
|
- * tpnt
|
|
|
- *
|
|
|
- * This function saves all the registers, puts a copy of reloc_entry and tpnt
|
|
|
- * on the stack (as function arguments) then make the function call
|
|
|
- * _dl_linux_resolver(tpnt, reloc_entry). _dl_linux_resolver() figures out
|
|
|
- * where the jump symbol is _really_ supposed to have jumped to and returns
|
|
|
- * that to us. Once we have that, we overwrite tpnt with this fixed up
|
|
|
- * address. We then clean up after ourselves, put all the registers back how we
|
|
|
- * found them, then we jump to the fixed up address, which is where the jump
|
|
|
- * symbol that got us here really wanted to jump to in the first place.
|
|
|
- * -Erik Andersen
|
|
|
+ *
|
|
|
+ * add ip, pc, #0xNN00000
|
|
|
+ * add ip, ip, #0xNN000
|
|
|
+ * ldr pc, [ip, #0xNNN]!
|
|
|
+ *
|
|
|
+ * So that, effectively, causes the following to happen:
|
|
|
+ *
|
|
|
+ * ip : = pc+0x0NNNNNNN
|
|
|
+ * pc : = *ip
|
|
|
+ *
|
|
|
+ * For thumb the above fragment is preceded by "bx pc, nop" to switch to ARM
|
|
|
+ * mode and the thumb 'bl' must go to PLT-4 - the PLT entry is expanded by
|
|
|
+ * four bytes to accomodate the trampoline code.
|
|
|
+ *
|
|
|
+ * 0x0NNNNNNN is the offset of the GOT entry for this function relative to
|
|
|
+ * the PLT entry for this function (where the code is). So the code in the
|
|
|
+ * PLT causes a branch to whatever is in the GOT, leaving the actual address
|
|
|
+ * of the GOT entry in ip. (Note that the GOT must follow the PLT - the
|
|
|
+ * added value is 28 bit unsigned).
|
|
|
+ *
|
|
|
+ * ip is a pointer to the GOT entry for this function, the first time round
|
|
|
+ * *ip points to this code:
|
|
|
+ *
|
|
|
+ * str lr, [sp, #-4]! @ save lr
|
|
|
+ * ldr lr, [pc, #4] @ lr : = *dat (&GOT_TABLE[0]-.)
|
|
|
+ * add lr, pc, lr @ lr += &dat (so lr == &GOT_TABLE[0])
|
|
|
+ * ldr pc, [lr, #8]! @ pc : = GOT_TABLE[2]
|
|
|
+ *dat: *.long &GOT_TABLE[0] - .
|
|
|
+ *
|
|
|
+ * (this code is actually held in the first entry of the PLT). The code
|
|
|
+ * preserves lr then uses it as a scratch register (this preserves the ip
|
|
|
+ * value calculated above). GOT_TABLE[2] is initialized by INIT_GOT in
|
|
|
+ * dl-sysdep.h to point to _dl_linux_resolve - this function. The first
|
|
|
+ * three entries in the GOT are reserved, then they are followed by the
|
|
|
+ * entries for the PLT entries, in order.
|
|
|
+ *
|
|
|
+ * The linker initialises the following (non-reserved) GOT entries to
|
|
|
+ * the offset of the PLT with an associated relocation so that on load
|
|
|
+ * the entry is relocated to point to the PLT - the above code.
|
|
|
+ *
|
|
|
+ * The net effect of all this is that on the first call to an external (as
|
|
|
+ * yet unresolved) function all seven of the above instructions are
|
|
|
+ * executed in sequence and the program ends up executing _dl_linux_resolve
|
|
|
+ * with the following important values in registers:
|
|
|
+ *
|
|
|
+ * ip - a pointer to the GOT entry for the as yet unresolved function
|
|
|
+ * lr - &GOT_TABLE[2]
|
|
|
+ *
|
|
|
+ * GOT_TABLE[2] has already been initialised to _dl_linux_resolve, and
|
|
|
+ * GOT_TABLE[1] is a pointer to the (elf_resolve*) from INIT_GOT.
|
|
|
+ * _dl_linux_resolve unfrobnicates the ip and lr values to obtain arguments
|
|
|
+ * for a call to _dl_linux_resolver (not the additional 'r' on the end) -
|
|
|
+ * this is in elfinterp.c in this directory. The call takes arguments:
|
|
|
+ *
|
|
|
+ * _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
|
|
|
+ *
|
|
|
+ * And returns the address of the function, it also overwrites the GOT
|
|
|
+ * table entry so that the next time round only the first code fragment will
|
|
|
+ * be executed - it will call the function directly.
|
|
|
+ *
|
|
|
+ * [[Of course, this simply doesn't work on ARM 4T with a thumb target - because
|
|
|
+ * 4T did not do the thumb/arm change on ldr pc! It can be made to work by
|
|
|
+ * changing _dl_linux_resolver to return __%s_from_arm for an STT_TFUNC, but
|
|
|
+ * this hasn't been done, and there is no guarantee that the linker generated
|
|
|
+ * that glue anyway.]]
|
|
|
+ *
|
|
|
+ * _dl_linux_resolve gets the arguments to call the resolver as follows:
|
|
|
+ *
|
|
|
+ * tpnt *GOT_TABLE[1], [lr-4]
|
|
|
+ * reloc-entry &GOT-&GOT_TABLE[3], (ip - lr - 4)/4
|
|
|
+ *
|
|
|
+ * (I.e. 'GOT' means the table entry for this function, the thing for which
|
|
|
+ * ip holds the address.) The reloc-entry is passed as an index, since
|
|
|
+ * since the GOT table has 4 byte entries the code needs to divide this by 4
|
|
|
+ * to get the actual index.
|
|
|
+ *
|
|
|
+ * John Bowler, August 13, 2005 - determined by experiment and examination
|
|
|
+ * of generated ARM code (there was no documentation...)
|
|
|
+ *
|
|
|
+ * This code is all ARM code - not thumb - _dl_linux_resolver may, itself,
|
|
|
+ * be thumb, in which case the linker will insert the appropriate glue. A
|
|
|
+ * call from thumb to the PLT hits the trampoline code described above.
|
|
|
+ * This code (now) builds a proper stack frame.
|
|
|
+ *
|
|
|
+ * The code does *not* set sb (r9,v6) - to do that the basic PLT instructions
|
|
|
+ * would need to save sb and load the new value and that would require
|
|
|
+ * support in the linker since it generates those instructions. (Also note
|
|
|
+ * that linux/uclibc seems to be using r10 - sl - as a PIC base register - see
|
|
|
+ * dl-startup.c).
|
|
|
*/
|
|
|
|
|
|
-#include <features.h>
|
|
|
+#include <sys/syscall.h>
|
|
|
|
|
|
-#define sl r10
|
|
|
-#define fp r11
|
|
|
-#define ip r12
|
|
|
+#include <features.h>
|
|
|
|
|
|
-.text
|
|
|
-.globl _dl_linux_resolve
|
|
|
-.type _dl_linux_resolve,%function
|
|
|
-.align 4;
|
|
|
+ .text
|
|
|
+ .align 4 @ 16 byte boundary and there are 32 bytes below (arm case)
|
|
|
+ #if !defined(__thumb__)
|
|
|
+ .arm
|
|
|
+ .globl _dl_linux_resolve
|
|
|
+ .type _dl_linux_resolve,%function
|
|
|
+ .align 4;
|
|
|
|
|
|
_dl_linux_resolve:
|
|
|
- stmdb sp!, {r0, r1, r2, r3, sl, fp}
|
|
|
- sub r1, ip, lr
|
|
|
- sub r1, r1, #4
|
|
|
- add r1, r1, r1
|
|
|
- ldr r0, [lr, #-4]
|
|
|
- mov r3,r0
|
|
|
+ @ _dl_linux_resolver is a standard subroutine call, therefore it
|
|
|
+ @ preserves everything except r0-r3 (a1-a4), ip and lr. This
|
|
|
+ @ function must branch to the real function, and that expects
|
|
|
+ @ r0-r3 and lr to be as they were before the whole PLT stuff -
|
|
|
+ @ ip can be trashed.
|
|
|
+
|
|
|
+ stmdb sp!, {r0, r1, r2, r3, sl, fp}
|
|
|
+ ldr r0, [lr, #-4] @ r0 : = [lr-4] (GOT_TABLE[1])
|
|
|
+ sub r1, lr, ip @ r1 : = (lr-ip) (a multple of 4)
|
|
|
+ mvn r1, r1, ASR #2 @ r1 : = ~((lr-ip)>>2), since -x = (1+~x)
|
|
|
+ @ ~x = -x-1, therefore ~(r1>>2) = (-((lr-ip)>>2)-1)
|
|
|
+ @ = - ((lr-ip)/4) - 1 = (ip - lr - 4)/4, as required
|
|
|
|
|
|
bl _dl_linux_resolver
|
|
|
|
|
|
mov ip, r0
|
|
|
- ldmia sp!, {r0, r1, r2, r3, sl, fp, lr}
|
|
|
+ ldmia sp!, {r0-r3, lr}
|
|
|
+
|
|
|
#if defined(__USE_BX__)
|
|
|
bx ip
|
|
|
#else
|
|
|
mov pc,ip
|
|
|
#endif
|
|
|
+#else
|
|
|
+ @ In the thumb case _dl_linux_resolver is thumb. If a bl is used
|
|
|
+ @ from arm code the linker will insert a stub call which, with
|
|
|
+ @ binutils 2.16, is not PIC. Since this code is accessed by an
|
|
|
+ @ ldr pc the reasonable fix is to make _dl_linux_resolve thumb too.
|
|
|
+ .thumb
|
|
|
+ .globl _dl_linux_resolve
|
|
|
+ .thumb_func
|
|
|
+ .type _dl_linux_resolve,%function
|
|
|
+ _dl_linux_resolve:
|
|
|
+ @ _dl_linux_resolver is a standard subroutine call, therefore it
|
|
|
+ @ preserves everything except r0-r3 (a1-a4), ip and lr. This
|
|
|
+ @ function must branch to the real function, and that expects
|
|
|
+ @ r0-r3 and lr to be as they were before the whole PLT stuff -
|
|
|
+ @ ip can be trashed.
|
|
|
+ push {r0-r3}
|
|
|
+ mov r1, lr @ &GOT_TABLE[2]
|
|
|
+ sub r0, r1, #4
|
|
|
+ mov r2, ip @ &GOT[n]
|
|
|
+ ldr r0, [r0] @ r0 := GOT_TABLE[1]
|
|
|
+ @ for the function call r1 := n-3
|
|
|
+ sub r1, r2
|
|
|
+ asr r1, r1, #2
|
|
|
+ mvn r1, r1 @ exactly as in the arm code above
|
|
|
+ bl _dl_linux_resolver
|
|
|
+ @ r0 contains the branch address, the return address is above
|
|
|
+ @ the saved r0..r3
|
|
|
+ mov ip, r0
|
|
|
+ ldr r1, [sp, #16]
|
|
|
+ mov lr, r1
|
|
|
+ pop {r0-r3}
|
|
|
+ add sp, #4
|
|
|
+ bx ip
|
|
|
+
|
|
|
+#endif
|
|
|
.size _dl_linux_resolve, .-_dl_linux_resolve
|