123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366 |
- From: Felix Fietkau <nbd@nbd.name>
- Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
- lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
- Signed-off-by: Felix Fietkau <nbd@nbd.name>
- ---
- arch/mips/Makefile | 5 +
- arch/mips/include/asm/module.h | 5 +
- arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++-
- 3 files changed, 284 insertions(+), 5 deletions(-)
- --- a/arch/mips/Makefile
- +++ b/arch/mips/Makefile
- @@ -93,8 +93,13 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
- cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
- cflags-y += -msoft-float
- LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
- +ifdef CONFIG_64BIT
- KBUILD_AFLAGS_MODULE += -mlong-calls
- KBUILD_CFLAGS_MODULE += -mlong-calls
- +else
- +KBUILD_AFLAGS_MODULE += -mno-long-calls
- +KBUILD_CFLAGS_MODULE += -mno-long-calls
- +endif
-
- ifeq ($(CONFIG_RELOCATABLE),y)
- LDFLAGS_vmlinux += --emit-relocs
- --- a/arch/mips/include/asm/module.h
- +++ b/arch/mips/include/asm/module.h
- @@ -12,6 +12,11 @@ struct mod_arch_specific {
- const struct exception_table_entry *dbe_start;
- const struct exception_table_entry *dbe_end;
- struct mips_hi16 *r_mips_hi16_list;
- +
- + void *phys_plt_tbl;
- + void *virt_plt_tbl;
- + unsigned int phys_plt_offset;
- + unsigned int virt_plt_offset;
- };
-
- typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
- --- a/arch/mips/kernel/module.c
- +++ b/arch/mips/kernel/module.c
- @@ -44,14 +44,221 @@ struct mips_hi16 {
- static LIST_HEAD(dbe_list);
- static DEFINE_SPINLOCK(dbe_lock);
-
- -#ifdef MODULE_START
- +/*
- + * Get the potential max trampolines size required of the init and
- + * non-init sections. Only used if we cannot find enough contiguous
- + * physically mapped memory to put the module into.
- + */
- +static unsigned int
- +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
- + const char *secstrings, unsigned int symindex, bool is_init)
- +{
- + unsigned long ret = 0;
- + unsigned int i, j;
- + Elf_Sym *syms;
- +
- + /* Everything marked ALLOC (this includes the exported symbols) */
- + for (i = 1; i < hdr->e_shnum; ++i) {
- + unsigned int info = sechdrs[i].sh_info;
- +
- + if (sechdrs[i].sh_type != SHT_REL
- + && sechdrs[i].sh_type != SHT_RELA)
- + continue;
- +
- + /* Not a valid relocation section? */
- + if (info >= hdr->e_shnum)
- + continue;
- +
- + /* Don't bother with non-allocated sections */
- + if (!(sechdrs[info].sh_flags & SHF_ALLOC))
- + continue;
- +
- + /* If it's called *.init*, and we're not init, we're
- + not interested */
- + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
- + != is_init)
- + continue;
- +
- + syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
- + if (sechdrs[i].sh_type == SHT_REL) {
- + Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
- + unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
- +
- + for (j = 0; j < size; ++j) {
- + Elf_Sym *sym;
- +
- + if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
- + continue;
- +
- + sym = syms + ELF_MIPS_R_SYM(rel[j]);
- + if (!is_init && sym->st_shndx != SHN_UNDEF)
- + continue;
- +
- + ret += 4 * sizeof(int);
- + }
- + } else {
- + Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
- + unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
- +
- + for (j = 0; j < size; ++j) {
- + Elf_Sym *sym;
- +
- + if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
- + continue;
- +
- + sym = syms + ELF_MIPS_R_SYM(rela[j]);
- + if (!is_init && sym->st_shndx != SHN_UNDEF)
- + continue;
- +
- + ret += 4 * sizeof(int);
- + }
- + }
- + }
- +
- + return ret;
- +}
- +
- +#ifndef MODULE_START
- +static void *alloc_phys(unsigned long size)
- +{
- + unsigned order;
- + struct page *page;
- + struct page *p;
- +
- + size = PAGE_ALIGN(size);
- + order = get_order(size);
- +
- + page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
- + __GFP_THISNODE, order);
- + if (!page)
- + return NULL;
- +
- + split_page(page, order);
- +
- + /* mark all pages except for the last one */
- + for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
- + set_bit(PG_owner_priv_1, &p->flags);
- +
- + for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
- + __free_page(p);
- +
- + return page_address(page);
- +}
- +#endif
- +
- +static void free_phys(void *ptr)
- +{
- + struct page *page;
- + bool free;
- +
- + page = virt_to_page(ptr);
- + do {
- + free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
- + __free_page(page);
- + page++;
- + } while (free);
- +}
- +
- +
- void *module_alloc(unsigned long size)
- {
- +#ifdef MODULE_START
- return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
- GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- +#else
- + void *ptr;
- +
- + if (size == 0)
- + return NULL;
- +
- + ptr = alloc_phys(size);
- +
- + /* If we failed to allocate physically contiguous memory,
- + * fall back to regular vmalloc. The module loader code will
- + * create jump tables to handle long jumps */
- + if (!ptr)
- + return vmalloc(size);
- +
- + return ptr;
- +#endif
- }
- +
- +static inline bool is_phys_addr(void *ptr)
- +{
- +#ifdef CONFIG_64BIT
- + return (KSEGX((unsigned long)ptr) == CKSEG0);
- +#else
- + return (KSEGX(ptr) == KSEG0);
- #endif
- +}
- +
- +/* Free memory returned from module_alloc */
- +void module_memfree(void *module_region)
- +{
- + if (is_phys_addr(module_region))
- + free_phys(module_region);
- + else
- + vfree(module_region);
- +}
- +
- +static void *__module_alloc(int size, bool phys)
- +{
- + void *ptr;
- +
- + if (phys)
- + ptr = kmalloc(size, GFP_KERNEL);
- + else
- + ptr = vmalloc(size);
- + return ptr;
- +}
- +
- +static void __module_free(void *ptr)
- +{
- + if (is_phys_addr(ptr))
- + kfree(ptr);
- + else
- + vfree(ptr);
- +}
- +
- +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
- + char *secstrings, struct module *mod)
- +{
- + unsigned int symindex = 0;
- + unsigned int core_size, init_size;
- + int i;
- +
- + mod->arch.phys_plt_offset = 0;
- + mod->arch.virt_plt_offset = 0;
- + mod->arch.phys_plt_tbl = NULL;
- + mod->arch.virt_plt_tbl = NULL;
- +
- + if (IS_ENABLED(CONFIG_64BIT))
- + return 0;
- +
- + for (i = 1; i < hdr->e_shnum; i++)
- + if (sechdrs[i].sh_type == SHT_SYMTAB)
- + symindex = i;
- +
- + core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
- + init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
- +
- + if ((core_size + init_size) == 0)
- + return 0;
- +
- + mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
- + if (!mod->arch.phys_plt_tbl)
- + return -ENOMEM;
- +
- + mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
- + if (!mod->arch.virt_plt_tbl) {
- + __module_free(mod->arch.phys_plt_tbl);
- + mod->arch.phys_plt_tbl = NULL;
- + return -ENOMEM;
- + }
- +
- + return 0;
- +}
-
- static int apply_r_mips_none(struct module *me, u32 *location,
- u32 base, Elf_Addr v, bool rela)
- @@ -67,9 +274,40 @@ static int apply_r_mips_32(struct module
- return 0;
- }
-
- +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
- + void *start, Elf_Addr v)
- +{
- + unsigned *tramp = start + *plt_offset;
- + *plt_offset += 4 * sizeof(int);
- +
- + /* adjust carry for addiu */
- + if (v & 0x00008000)
- + v += 0x10000;
- +
- + tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
- + tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
- + tramp[2] = 0x03200008; /* jr t9 */
- + tramp[3] = 0x00000000; /* nop */
- +
- + return (Elf_Addr) tramp;
- +}
- +
- +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
- +{
- + if (is_phys_addr(location))
- + return add_plt_entry_to(&me->arch.phys_plt_offset,
- + me->arch.phys_plt_tbl, v);
- + else
- + return add_plt_entry_to(&me->arch.virt_plt_offset,
- + me->arch.virt_plt_tbl, v);
- +
- +}
- +
- static int apply_r_mips_26(struct module *me, u32 *location,
- u32 base, Elf_Addr v, bool rela)
- {
- + u32 ofs = base & 0x03ffffff;
- +
- if (v % 4) {
- pr_err("module %s: dangerous R_MIPS_26 relocation\n",
- me->name);
- @@ -77,13 +315,17 @@ static int apply_r_mips_26(struct module
- }
-
- if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
- - pr_err("module %s: relocation overflow\n",
- - me->name);
- - return -ENOEXEC;
- + v = add_plt_entry(me, location, v + (ofs << 2));
- + if (!v) {
- + pr_err("module %s: relocation overflow\n",
- + me->name);
- + return -ENOEXEC;
- + }
- + ofs = 0;
- }
-
- *location = (*location & ~0x03ffffff) |
- - ((base + (v >> 2)) & 0x03ffffff);
- + ((ofs + (v >> 2)) & 0x03ffffff);
-
- return 0;
- }
- @@ -459,9 +701,36 @@ int module_finalize(const Elf_Ehdr *hdr,
- list_add(&me->arch.dbe_list, &dbe_list);
- spin_unlock_irq(&dbe_lock);
- }
- +
- + /* Get rid of the fixup trampoline if we're running the module
- + * from physically mapped address space */
- + if (me->arch.phys_plt_offset == 0) {
- + __module_free(me->arch.phys_plt_tbl);
- + me->arch.phys_plt_tbl = NULL;
- + }
- + if (me->arch.virt_plt_offset == 0) {
- + __module_free(me->arch.virt_plt_tbl);
- + me->arch.virt_plt_tbl = NULL;
- + }
- +
- return 0;
- }
-
- +void module_arch_freeing_init(struct module *mod)
- +{
- + if (mod->state == MODULE_STATE_LIVE)
- + return;
- +
- + if (mod->arch.phys_plt_tbl) {
- + __module_free(mod->arch.phys_plt_tbl);
- + mod->arch.phys_plt_tbl = NULL;
- + }
- + if (mod->arch.virt_plt_tbl) {
- + __module_free(mod->arch.virt_plt_tbl);
- + mod->arch.virt_plt_tbl = NULL;
- + }
- +}
- +
- void module_arch_cleanup(struct module *mod)
- {
- spin_lock_irq(&dbe_lock);
|