123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- From: James Hogan <james.hogan@imgtec.com>
- Date: Mon, 25 Jan 2016 21:30:00 +0000
- Subject: [PATCH] MIPS: c-r4k: Use IPI calls for CM indexed cache ops
- The Coherence Manager (CM) can propagate address-based ("hit") cache
- operations to other cores in the coherent system, alleviating software
- of the need to use IPI calls, however indexed cache operations are not
- propagated since doing so makes no sense for separate caches.
- r4k_on_each_cpu() previously had a special case for CONFIG_MIPS_MT_SMP,
- intended to avoid the IPIs when the only other CPUs in the system were
- other VPEs in the same core, and hence sharing the same caches. This was
- changed by commit cccf34e9411c ("MIPS: c-r4k: Fix cache flushing for MT
- cores") to apparently handle multi-core multi-VPE systems, but it
- focussed mainly on hit cache ops, so the IPI calls were still disabled
- entirely for CM systems.
- This doesn't normally cause problems, but tests can be written to hit
- these corner cases by using multiple threads, or changing task
- affinities to force the process to migrate cores. For example the
- failure of mprotect RW->RX to globally sync icaches (via
- flush_cache_range) can be detected by modifying and mprotecting a code
- page on one core, and migrating to a different core to execute from it.
- Most of the functions called by r4k_on_each_cpu() perform cache
- operations exclusively with a single addressing-type (virtual address vs
- indexed), so add a type argument and modify the callers to pass in
- R4K_USER (user virtual addressing), R4K_KERN (global kernel virtual
- addressing) or R4K_INDEX (index into cache).
- local_r4k_flush_icache_range() is split up, to allow it to be called
- from the rest of the kernel, or from r4k_flush_icache_range() where it
- will choose either indexed or hit cache operations based on the size of
- the range and the cache sizes.
- local_r4k_flush_kernel_vmap_range() is split into two functions, each of
- which uses cache operations with a single addressing-type, with
- r4k_flush_kernel_vmap_range() making the decision whether to use indexed
- cache ops or not.
- Signed-off-by: James Hogan <james.hogan@imgtec.com>
- Cc: Ralf Baechle <ralf@linux-mips.org>
- Cc: Paul Burton <paul.burton@imgtec.com>
- Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com>
- Cc: linux-mips@linux-mips.org
- ---
- --- a/arch/mips/mm/c-r4k.c
- +++ b/arch/mips/mm/c-r4k.c
- @@ -40,6 +40,50 @@
- #include <asm/mips-cm.h>
-
- /*
- + * Bits describing what cache ops an IPI callback function may perform.
- + *
- + * R4K_USER - Virtual user address based cache operations.
- + * Ineffective on other CPUs.
- + * R4K_KERN - Virtual kernel address based cache operations (including kmap).
- + * Effective on other CPUs.
- + * R4K_INDEX - Index based cache operations.
- + * Effective on other CPUs.
- + */
- +
- +#define R4K_USER BIT(0)
- +#define R4K_KERN BIT(1)
- +#define R4K_INDEX BIT(2)
- +
- +#ifdef CONFIG_SMP
- +/* The Coherence manager propagates address-based cache ops to other cores */
- +#define r4k_hit_globalized mips_cm_present()
- +#define r4k_index_globalized 0
- +#else
- +/* If there's only 1 CPU, then all cache ops are globalized to that 1 CPU */
- +#define r4k_hit_globalized 1
- +#define r4k_index_globalized 1
- +#endif
- +
- +/**
- + * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core.
- + * @type: Type of cache operations (R4K_USER, R4K_KERN or R4K_INDEX).
- + *
- + * Returns: 1 if the cache operation @type should be done on every core in
- + * the system.
- + * 0 if the cache operation @type is globalized and only needs to
- + * be performed on a simple CPU.
- + */
- +static inline bool r4k_op_needs_ipi(unsigned int type)
- +{
- + /*
- + * If hardware doesn't globalize the required cache ops we must use IPIs
- + * to do so.
- + */
- + return (type & R4K_KERN && !r4k_hit_globalized) ||
- + (type & R4K_INDEX && !r4k_index_globalized);
- +}
- +
- +/*
- * Special Variant of smp_call_function for use by cache functions:
- *
- * o No return value
- @@ -48,19 +92,11 @@
- * primary cache.
- * o doesn't disable interrupts on the local CPU
- */
- -static inline void r4k_on_each_cpu(void (*func) (void *info), void *info)
- +static inline void r4k_on_each_cpu(unsigned int type,
- + void (*func) (void *info), void *info)
- {
- preempt_disable();
- -
- - /*
- - * The Coherent Manager propagates address-based cache ops to other
- - * cores but not index-based ops. However, r4k_on_each_cpu is used
- - * in both cases so there is no easy way to tell what kind of op is
- - * executed to the other cores. The best we can probably do is
- - * to restrict that call when a CM is not present because both
- - * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
- - */
- - if (!mips_cm_present())
- + if (r4k_op_needs_ipi(type))
- smp_call_function_many(&cpu_foreign_map, func, info, 1);
- func(info);
- preempt_enable();
- @@ -456,7 +492,7 @@ static inline void local_r4k___flush_cac
-
- static void r4k___flush_cache_all(void)
- {
- - r4k_on_each_cpu(local_r4k___flush_cache_all, NULL);
- + r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL);
- }
-
- static inline int has_valid_asid(const struct mm_struct *mm)
- @@ -503,7 +539,7 @@ static void r4k_flush_cache_range(struct
- int exec = vma->vm_flags & VM_EXEC;
-
- if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc))
- - r4k_on_each_cpu(local_r4k_flush_cache_range, vma);
- + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma);
- }
-
- static inline void local_r4k_flush_cache_mm(void * args)
- @@ -535,7 +571,7 @@ static void r4k_flush_cache_mm(struct mm
- if (!cpu_has_dc_aliases)
- return;
-
- - r4k_on_each_cpu(local_r4k_flush_cache_mm, mm);
- + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm);
- }
-
- struct flush_cache_page_args {
- @@ -629,7 +665,7 @@ static void r4k_flush_cache_page(struct
- args.addr = addr;
- args.pfn = pfn;
-
- - r4k_on_each_cpu(local_r4k_flush_cache_page, &args);
- + r4k_on_each_cpu(R4K_KERN, local_r4k_flush_cache_page, &args);
- }
-
- static inline void local_r4k_flush_data_cache_page(void * addr)
- @@ -642,18 +678,23 @@ static void r4k_flush_data_cache_page(un
- if (in_atomic())
- local_r4k_flush_data_cache_page((void *)addr);
- else
- - r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr);
- + r4k_on_each_cpu(R4K_KERN, local_r4k_flush_data_cache_page,
- + (void *) addr);
- }
-
- struct flush_icache_range_args {
- unsigned long start;
- unsigned long end;
- + unsigned int type;
- };
-
- -static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end)
- +static inline void __local_r4k_flush_icache_range(unsigned long start,
- + unsigned long end,
- + unsigned int type)
- {
- if (!cpu_has_ic_fills_f_dc) {
- - if (end - start >= dcache_size) {
- + if (type == R4K_INDEX ||
- + (type & R4K_INDEX && end - start >= dcache_size)) {
- r4k_blast_dcache();
- } else {
- R4600_HIT_CACHEOP_WAR_IMPL;
- @@ -661,7 +702,8 @@ static inline void local_r4k_flush_icach
- }
- }
-
- - if (end - start > icache_size)
- + if (type == R4K_INDEX ||
- + (type & R4K_INDEX && end - start > icache_size))
- r4k_blast_icache();
- else {
- switch (boot_cpu_type()) {
- @@ -687,23 +729,59 @@ static inline void local_r4k_flush_icach
- #endif
- }
-
- +static inline void local_r4k_flush_icache_range(unsigned long start,
- + unsigned long end)
- +{
- + __local_r4k_flush_icache_range(start, end, R4K_KERN | R4K_INDEX);
- +}
- +
- static inline void local_r4k_flush_icache_range_ipi(void *args)
- {
- struct flush_icache_range_args *fir_args = args;
- unsigned long start = fir_args->start;
- unsigned long end = fir_args->end;
- + unsigned int type = fir_args->type;
-
- - local_r4k_flush_icache_range(start, end);
- + __local_r4k_flush_icache_range(start, end, type);
- }
-
- static void r4k_flush_icache_range(unsigned long start, unsigned long end)
- {
- struct flush_icache_range_args args;
- + unsigned long size, cache_size;
-
- args.start = start;
- args.end = end;
- + args.type = R4K_KERN | R4K_INDEX;
-
- - r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args);
- + if (in_atomic()) {
- + /*
- + * We can't do blocking IPI calls from atomic context, so fall
- + * back to pure address-based cache ops if they globalize.
- + */
- + if (!r4k_index_globalized && r4k_hit_globalized) {
- + args.type &= ~R4K_INDEX;
- + } else {
- + /* Just do it locally instead. */
- + local_r4k_flush_icache_range(start, end);
- + instruction_hazard();
- + return;
- + }
- + } else if (!r4k_index_globalized && r4k_hit_globalized) {
- + /*
- + * If address-based cache ops are globalized, then we may be
- + * able to avoid the IPI for small flushes.
- + */
- + size = start - end;
- + cache_size = icache_size;
- + if (!cpu_has_ic_fills_f_dc) {
- + size *= 2;
- + cache_size += dcache_size;
- + }
- + if (size <= cache_size)
- + args.type &= ~R4K_INDEX;
- + }
- + r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args);
- instruction_hazard();
- }
-
- @@ -823,7 +901,12 @@ static void local_r4k_flush_cache_sigtra
-
- static void r4k_flush_cache_sigtramp(unsigned long addr)
- {
- - r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr);
- + /*
- + * FIXME this is a bit broken when !r4k_hit_globalized, since the user
- + * code probably won't be mapped on other CPUs, so if the process is
- + * migrated, it could end up hitting stale icache lines.
- + */
- + r4k_on_each_cpu(R4K_USER, local_r4k_flush_cache_sigtramp, (void *)addr);
- }
-
- static void r4k_flush_icache_all(void)
- @@ -837,6 +920,15 @@ struct flush_kernel_vmap_range_args {
- int size;
- };
-
- +static inline void local_r4k_flush_kernel_vmap_range_index(void *args)
- +{
- + /*
- + * Aliases only affect the primary caches so don't bother with
- + * S-caches or T-caches.
- + */
- + r4k_blast_dcache();
- +}
- +
- static inline void local_r4k_flush_kernel_vmap_range(void *args)
- {
- struct flush_kernel_vmap_range_args *vmra = args;
- @@ -847,12 +939,8 @@ static inline void local_r4k_flush_kerne
- * Aliases only affect the primary caches so don't bother with
- * S-caches or T-caches.
- */
- - if (cpu_has_safe_index_cacheops && size >= dcache_size)
- - r4k_blast_dcache();
- - else {
- - R4600_HIT_CACHEOP_WAR_IMPL;
- - blast_dcache_range(vaddr, vaddr + size);
- - }
- + R4600_HIT_CACHEOP_WAR_IMPL;
- + blast_dcache_range(vaddr, vaddr + size);
- }
-
- static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
- @@ -862,7 +950,12 @@ static void r4k_flush_kernel_vmap_range(
- args.vaddr = (unsigned long) vaddr;
- args.size = size;
-
- - r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args);
- + if (cpu_has_safe_index_cacheops && size >= dcache_size)
- + r4k_on_each_cpu(R4K_INDEX,
- + local_r4k_flush_kernel_vmap_range_index, NULL);
- + else
- + r4k_on_each_cpu(R4K_KERN, local_r4k_flush_kernel_vmap_range,
- + &args);
- }
-
- static inline void rm7k_erratum31(void)
|