Browse Source

Joakim Tjernlund writes:

    > Very interesting.  Do you have any suggestions for how
    > we could fix our powerpc shared library loader

    Removing those instr. comes with a very big performance
    penalty. To flush the dcache you will have read up to 8KB
    dummy data and to invalidate the icache you will have to
    execute up to 16KB nops. I don't know of any other way from
    user space.

    hmm, actually I think it will work reliable to perform a
    store to the same page(s) as the dcbst/icbi will act on. That
    way you will make the DTLB Error happen(if any) prior to the
    dcbst/icbi. The worst thing that can happen then is a regular
    DTLB Miss and that works for dcbst/icbi.

    You will have to lookout for if dcbst/icbi crosses a page
    boundary. Then you will have to perform a store to both
    pages.

     Jocke

# And again later writes:

    Hi again

    I think I know what the problem is. The
    PPC_DCBST;PPC_SYNC;PPC_ICBI;PPC_ISYNC sequence is executed
    even if no modification has been done i some cases:

     _dl_linux_resolver(), the last else has no store for insns[0].
			   these is a insns[1] = OPCODE_B(delta - 4) that
			   does not have a PPC_DCBST.

     _dl_do_lazy_reloc(), for R_PPC_NONE there is no store.
			  for R_PPC_JMP_SLOT there is a
			  insns[1] = OPCODE_B(delta)that does not
			  have a PPC_DCBST.

     _dl_do_reloc(), for R_PPC_COPY there is no store.
		     for R_PPC_JMP_SLOT there is a
		     reloc_addr[1] = OPCODE_B(delta) that does not
		     have a PPC_DCBST.

     _dl_init_got(), I THINK that the
	    PPC_DCBST(plt);
	    PPC_DCBST(plt+4);
	    PPC_DCBST(plt+8);
	    PPC_SYNC;
	    PPC_ICBI(plt);
	    PPC_ICBI(plt+4);
	    PPC_ICBI(plt+8);
	    PPC_ISYNC;
	    is off a bit. The address range does not match the sum
	    of the plt[] and tramp[] address range.

       Jocke


# And then later added the comment:


    I think that the tramp[] part should be included in the
    PPC_DCBST/PPC_ICBI sequence. Then you have to add entries for
    plt+12 and plt+16. If the tramp[] part should be excluded,
    then all is well.

   Jocke
Eric Andersen 21 years ago
parent
commit
2a01fa1548
1 changed files with 25 additions and 2 deletions
  1. 25 2
      ldso/ldso/powerpc/elfinterp.c

+ 25 - 2
ldso/ldso/powerpc/elfinterp.c

@@ -152,10 +152,14 @@ void _dl_init_got(unsigned long *plt,struct elf_resolve *tpnt)
 	PPC_DCBST(plt);
 	PPC_DCBST(plt+4);
 	PPC_DCBST(plt+8);
+	PPC_DCBST(plt+12);
+	PPC_DCBST(plt+16-1);
 	PPC_SYNC;
 	PPC_ICBI(plt);
-	PPC_ICBI(plt+4);
-	PPC_ICBI(plt+8);
+	PPC_ICBI(plt+4); /* glibc thinks this is not needed */
+	PPC_ICBI(plt+8); /* glibc thinks this is not needed */
+	PPC_ICBI(plt+12); /* glibc thinks this is not needed */
+	PPC_ICBI(plt+16-1);
 	PPC_ISYNC;
 }
 
@@ -245,7 +249,15 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 		//PPC_SYNC;
 		//PPC_ICBI(ptr+index);
 		//PPC_ISYNC;
+
+		/* instructions were modified */
 		insns[1] = OPCODE_B(delta - 4);
+		PPC_DCBST(insn_addr+1);
+		PPC_SYNC;
+		PPC_ICBI(insn_addr+1);
+		PPC_ISYNC;
+		
+		return new_addr;
 	}
 
 	/* instructions were modified */
@@ -344,6 +356,7 @@ _dl_do_lazy_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope,
 
 	switch (reloc_type) {
 		case R_PPC_NONE:
+			return 0;
 			break;
 		case R_PPC_JMP_SLOT:
 			{
@@ -380,8 +393,11 @@ _dl_do_lazy_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope,
 
 	/* instructions were modified */
 	PPC_DCBST(reloc_addr);
+	PPC_DCBST(reloc_addr+1);
 	PPC_SYNC;
 	PPC_ICBI(reloc_addr);
+	PPC_ICBI(reloc_addr+1);
+	PPC_ISYNC;
 
 #if defined (__SUPPORT_LD_DEBUG__)
 	if(_dl_debug_reloc && _dl_debug_detail)
@@ -435,6 +451,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 #endif
 		switch (reloc_type) {
 			case R_PPC_NONE:
+				return 0;
 				break;
 			case R_PPC_REL24:
 #if 0
@@ -494,6 +511,10 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 							//DPRINTF("        index %x delta %x\n",index,delta);
 							reloc_addr[0] = OPCODE_LI(11,index*4);
 							reloc_addr[1] = OPCODE_B(delta);
+
+							/* instructions were modified */
+							PPC_DCBST(reloc_addr+1);
+							PPC_ICBI(reloc_addr+1);
 						}
 					}
 					break;
@@ -503,6 +524,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 				break;
 			case R_PPC_COPY:
 				// handled later
+				return 0;
 				break;
 			default:
 #if 0
@@ -521,6 +543,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 		PPC_DCBST(reloc_addr);
 		PPC_SYNC;
 		PPC_ICBI(reloc_addr);
+		PPC_ISYNC;
 
 #if defined (__SUPPORT_LD_DEBUG__)
 	if(_dl_debug_reloc && _dl_debug_detail)