305-mips_module_reloc.patch 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. From: Felix Fietkau <nbd@nbd.name>
  2. Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
  3. lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
  4. Signed-off-by: Felix Fietkau <nbd@nbd.name>
  5. ---
  6. arch/mips/Makefile | 5 +
  7. arch/mips/include/asm/module.h | 5 +
  8. arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++-
  9. 3 files changed, 284 insertions(+), 5 deletions(-)
  10. --- a/arch/mips/Makefile
  11. +++ b/arch/mips/Makefile
  12. @@ -93,8 +93,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
  13. cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
  14. cflags-y += -msoft-float
  15. LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
  16. +ifdef CONFIG_64BIT
  17. KBUILD_AFLAGS_MODULE += -mlong-calls
  18. KBUILD_CFLAGS_MODULE += -mlong-calls
  19. +else
  20. + ifdef CONFIG_DYNAMIC_FTRACE
  21. + KBUILD_AFLAGS_MODULE += -mlong-calls
  22. + KBUILD_CFLAGS_MODULE += -mlong-calls
  23. + else
  24. + KBUILD_AFLAGS_MODULE += -mno-long-calls
  25. + KBUILD_CFLAGS_MODULE += -mno-long-calls
  26. + endif
  27. +endif
  28. ifeq ($(CONFIG_RELOCATABLE),y)
  29. LDFLAGS_vmlinux += --emit-relocs
  30. --- a/arch/mips/include/asm/module.h
  31. +++ b/arch/mips/include/asm/module.h
  32. @@ -11,6 +11,11 @@ struct mod_arch_specific {
  33. const struct exception_table_entry *dbe_start;
  34. const struct exception_table_entry *dbe_end;
  35. struct mips_hi16 *r_mips_hi16_list;
  36. +
  37. + void *phys_plt_tbl;
  38. + void *virt_plt_tbl;
  39. + unsigned int phys_plt_offset;
  40. + unsigned int virt_plt_offset;
  41. };
  42. typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
  43. --- a/arch/mips/kernel/module.c
  44. +++ b/arch/mips/kernel/module.c
  45. @@ -44,14 +44,221 @@ struct mips_hi16 {
  46. static LIST_HEAD(dbe_list);
  47. static DEFINE_SPINLOCK(dbe_lock);
  48. -#ifdef MODULE_START
  49. +/*
  50. + * Get the potential max trampolines size required of the init and
  51. + * non-init sections. Only used if we cannot find enough contiguous
  52. + * physically mapped memory to put the module into.
  53. + */
  54. +static unsigned int
  55. +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
  56. + const char *secstrings, unsigned int symindex, bool is_init)
  57. +{
  58. + unsigned long ret = 0;
  59. + unsigned int i, j;
  60. + Elf_Sym *syms;
  61. +
  62. + /* Everything marked ALLOC (this includes the exported symbols) */
  63. + for (i = 1; i < hdr->e_shnum; ++i) {
  64. + unsigned int info = sechdrs[i].sh_info;
  65. +
  66. + if (sechdrs[i].sh_type != SHT_REL
  67. + && sechdrs[i].sh_type != SHT_RELA)
  68. + continue;
  69. +
  70. + /* Not a valid relocation section? */
  71. + if (info >= hdr->e_shnum)
  72. + continue;
  73. +
  74. + /* Don't bother with non-allocated sections */
  75. + if (!(sechdrs[info].sh_flags & SHF_ALLOC))
  76. + continue;
  77. +
  78. + /* If it's called *.init*, and we're not init, we're
  79. + not interested */
  80. + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
  81. + != is_init)
  82. + continue;
  83. +
  84. + syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
  85. + if (sechdrs[i].sh_type == SHT_REL) {
  86. + Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
  87. + unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
  88. +
  89. + for (j = 0; j < size; ++j) {
  90. + Elf_Sym *sym;
  91. +
  92. + if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
  93. + continue;
  94. +
  95. + sym = syms + ELF_MIPS_R_SYM(rel[j]);
  96. + if (!is_init && sym->st_shndx != SHN_UNDEF)
  97. + continue;
  98. +
  99. + ret += 4 * sizeof(int);
  100. + }
  101. + } else {
  102. + Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
  103. + unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
  104. +
  105. + for (j = 0; j < size; ++j) {
  106. + Elf_Sym *sym;
  107. +
  108. + if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
  109. + continue;
  110. +
  111. + sym = syms + ELF_MIPS_R_SYM(rela[j]);
  112. + if (!is_init && sym->st_shndx != SHN_UNDEF)
  113. + continue;
  114. +
  115. + ret += 4 * sizeof(int);
  116. + }
  117. + }
  118. + }
  119. +
  120. + return ret;
  121. +}
  122. +
  123. +#ifndef MODULE_START
  124. +static void *alloc_phys(unsigned long size)
  125. +{
  126. + unsigned order;
  127. + struct page *page;
  128. + struct page *p;
  129. +
  130. + size = PAGE_ALIGN(size);
  131. + order = get_order(size);
  132. +
  133. + page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
  134. + __GFP_THISNODE, order);
  135. + if (!page)
  136. + return NULL;
  137. +
  138. + split_page(page, order);
  139. +
  140. + /* mark all pages except for the last one */
  141. + for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
  142. + set_bit(PG_owner_priv_1, &p->flags);
  143. +
  144. + for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
  145. + __free_page(p);
  146. +
  147. + return page_address(page);
  148. +}
  149. +#endif
  150. +
  151. +static void free_phys(void *ptr)
  152. +{
  153. + struct page *page;
  154. + bool free;
  155. +
  156. + page = virt_to_page(ptr);
  157. + do {
  158. + free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
  159. + __free_page(page);
  160. + page++;
  161. + } while (free);
  162. +}
  163. +
  164. +
  165. void *module_alloc(unsigned long size)
  166. {
  167. +#ifdef MODULE_START
  168. return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
  169. GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
  170. __builtin_return_address(0));
  171. +#else
  172. + void *ptr;
  173. +
  174. + if (size == 0)
  175. + return NULL;
  176. +
  177. + ptr = alloc_phys(size);
  178. +
  179. + /* If we failed to allocate physically contiguous memory,
  180. + * fall back to regular vmalloc. The module loader code will
  181. + * create jump tables to handle long jumps */
  182. + if (!ptr)
  183. + return vmalloc(size);
  184. +
  185. + return ptr;
  186. +#endif
  187. }
  188. +
  189. +static inline bool is_phys_addr(void *ptr)
  190. +{
  191. +#ifdef CONFIG_64BIT
  192. + return (KSEGX((unsigned long)ptr) == CKSEG0);
  193. +#else
  194. + return (KSEGX(ptr) == KSEG0);
  195. #endif
  196. +}
  197. +
  198. +/* Free memory returned from module_alloc */
  199. +void module_memfree(void *module_region)
  200. +{
  201. + if (is_phys_addr(module_region))
  202. + free_phys(module_region);
  203. + else
  204. + vfree(module_region);
  205. +}
  206. +
  207. +static void *__module_alloc(int size, bool phys)
  208. +{
  209. + void *ptr;
  210. +
  211. + if (phys)
  212. + ptr = kmalloc(size, GFP_KERNEL);
  213. + else
  214. + ptr = vmalloc(size);
  215. + return ptr;
  216. +}
  217. +
  218. +static void __module_free(void *ptr)
  219. +{
  220. + if (is_phys_addr(ptr))
  221. + kfree(ptr);
  222. + else
  223. + vfree(ptr);
  224. +}
  225. +
  226. +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
  227. + char *secstrings, struct module *mod)
  228. +{
  229. + unsigned int symindex = 0;
  230. + unsigned int core_size, init_size;
  231. + int i;
  232. +
  233. + mod->arch.phys_plt_offset = 0;
  234. + mod->arch.virt_plt_offset = 0;
  235. + mod->arch.phys_plt_tbl = NULL;
  236. + mod->arch.virt_plt_tbl = NULL;
  237. +
  238. + if (IS_ENABLED(CONFIG_64BIT))
  239. + return 0;
  240. +
  241. + for (i = 1; i < hdr->e_shnum; i++)
  242. + if (sechdrs[i].sh_type == SHT_SYMTAB)
  243. + symindex = i;
  244. +
  245. + core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
  246. + init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
  247. +
  248. + if ((core_size + init_size) == 0)
  249. + return 0;
  250. +
  251. + mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
  252. + if (!mod->arch.phys_plt_tbl)
  253. + return -ENOMEM;
  254. +
  255. + mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
  256. + if (!mod->arch.virt_plt_tbl) {
  257. + __module_free(mod->arch.phys_plt_tbl);
  258. + mod->arch.phys_plt_tbl = NULL;
  259. + return -ENOMEM;
  260. + }
  261. +
  262. + return 0;
  263. +}
  264. int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v)
  265. {
  266. @@ -65,8 +272,39 @@ static int apply_r_mips_32_rel(struct mo
  267. return 0;
  268. }
  269. +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
  270. + void *start, Elf_Addr v)
  271. +{
  272. + unsigned *tramp = start + *plt_offset;
  273. + *plt_offset += 4 * sizeof(int);
  274. +
  275. + /* adjust carry for addiu */
  276. + if (v & 0x00008000)
  277. + v += 0x10000;
  278. +
  279. + tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
  280. + tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
  281. + tramp[2] = 0x03200008; /* jr t9 */
  282. + tramp[3] = 0x00000000; /* nop */
  283. +
  284. + return (Elf_Addr) tramp;
  285. +}
  286. +
  287. +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
  288. +{
  289. + if (is_phys_addr(location))
  290. + return add_plt_entry_to(&me->arch.phys_plt_offset,
  291. + me->arch.phys_plt_tbl, v);
  292. + else
  293. + return add_plt_entry_to(&me->arch.virt_plt_offset,
  294. + me->arch.virt_plt_tbl, v);
  295. +
  296. +}
  297. +
  298. static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
  299. {
  300. + u32 ofs = *location & 0x03ffffff;
  301. +
  302. if (v % 4) {
  303. pr_err("module %s: dangerous R_MIPS_26 REL relocation\n",
  304. me->name);
  305. @@ -74,13 +312,17 @@ static int apply_r_mips_26_rel(struct mo
  306. }
  307. if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
  308. - pr_err("module %s: relocation overflow\n",
  309. - me->name);
  310. - return -ENOEXEC;
  311. + v = add_plt_entry(me, location, v + (ofs << 2));
  312. + if (!v) {
  313. + pr_err("module %s: relocation overflow\n",
  314. + me->name);
  315. + return -ENOEXEC;
  316. + }
  317. + ofs = 0;
  318. }
  319. *location = (*location & ~0x03ffffff) |
  320. - ((*location + (v >> 2)) & 0x03ffffff);
  321. + ((ofs + (v >> 2)) & 0x03ffffff);
  322. return 0;
  323. }
  324. @@ -349,9 +591,36 @@ int module_finalize(const Elf_Ehdr *hdr,
  325. list_add(&me->arch.dbe_list, &dbe_list);
  326. spin_unlock_irq(&dbe_lock);
  327. }
  328. +
  329. + /* Get rid of the fixup trampoline if we're running the module
  330. + * from physically mapped address space */
  331. + if (me->arch.phys_plt_offset == 0) {
  332. + __module_free(me->arch.phys_plt_tbl);
  333. + me->arch.phys_plt_tbl = NULL;
  334. + }
  335. + if (me->arch.virt_plt_offset == 0) {
  336. + __module_free(me->arch.virt_plt_tbl);
  337. + me->arch.virt_plt_tbl = NULL;
  338. + }
  339. +
  340. return 0;
  341. }
  342. +void module_arch_freeing_init(struct module *mod)
  343. +{
  344. + if (mod->state == MODULE_STATE_LIVE)
  345. + return;
  346. +
  347. + if (mod->arch.phys_plt_tbl) {
  348. + __module_free(mod->arch.phys_plt_tbl);
  349. + mod->arch.phys_plt_tbl = NULL;
  350. + }
  351. + if (mod->arch.virt_plt_tbl) {
  352. + __module_free(mod->arch.virt_plt_tbl);
  353. + mod->arch.virt_plt_tbl = NULL;
  354. + }
  355. +}
  356. +
  357. void module_arch_cleanup(struct module *mod)
  358. {
  359. spin_lock_irq(&dbe_lock);