5fb7dc37dc
per cpu data section contains two types of data. One set which is exclusively accessed by the local cpu and the other set which is per cpu, but also shared by remote cpus. In the current kernel, these two sets are not clearely separated out. This can potentially cause the same data cacheline shared between the two sets of data, which will result in unnecessary bouncing of the cacheline between cpus. One way to fix the problem is to cacheline align the remotely accessed per cpu data, both at the beginning and at the end. Because of the padding at both ends, this will likely cause some memory wastage and also the interface to achieve this is not clean. This patch: Moves the remotely accessed per cpu data (which is currently marked as ____cacheline_aligned_in_smp) into a different section, where all the data elements are cacheline aligned. And as such, this differentiates the local only data and remotely accessed data cleanly. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Acked-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Christoph Lameter <clameter@sgi.com> Cc: <linux-arch@vger.kernel.org> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
84 lines
2.7 KiB
C
84 lines
2.7 KiB
C
#ifndef _ASM_IA64_PERCPU_H
|
|
#define _ASM_IA64_PERCPU_H
|
|
|
|
/*
|
|
* Copyright (C) 2002-2003 Hewlett-Packard Co
|
|
* David Mosberger-Tang <davidm@hpl.hp.com>
|
|
*/
|
|
|
|
#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
|
|
|
|
#ifdef __ASSEMBLY__
|
|
# define THIS_CPU(var) (per_cpu__##var) /* use this to mark accesses to per-CPU variables... */
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
|
|
#include <linux/threads.h>
|
|
|
|
#ifdef HAVE_MODEL_SMALL_ATTRIBUTE
|
|
# define __SMALL_ADDR_AREA __attribute__((__model__ (__small__)))
|
|
#else
|
|
# define __SMALL_ADDR_AREA
|
|
#endif
|
|
|
|
#define DECLARE_PER_CPU(type, name) \
|
|
extern __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
|
|
|
|
/* Separate out the type, so (int[3], foo) works. */
|
|
#define DEFINE_PER_CPU(type, name) \
|
|
__attribute__((__section__(".data.percpu"))) \
|
|
__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
|
|
|
|
#ifdef CONFIG_SMP
|
|
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
|
|
__attribute__((__section__(".data.percpu.shared_aligned"))) \
|
|
__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name \
|
|
____cacheline_aligned_in_smp
|
|
#else
|
|
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
|
|
DEFINE_PER_CPU(type, name)
|
|
#endif
|
|
|
|
/*
|
|
* Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an
|
|
* external routine, to avoid include-hell.
|
|
*/
|
|
#ifdef CONFIG_SMP
|
|
|
|
extern unsigned long __per_cpu_offset[NR_CPUS];
|
|
#define per_cpu_offset(x) (__per_cpu_offset(x))
|
|
|
|
/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
|
|
DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
|
|
|
|
#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
|
|
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset)))
|
|
#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset)))
|
|
|
|
extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size);
|
|
extern void setup_per_cpu_areas (void);
|
|
extern void *per_cpu_init(void);
|
|
|
|
#else /* ! SMP */
|
|
|
|
#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var))
|
|
#define __get_cpu_var(var) per_cpu__##var
|
|
#define __raw_get_cpu_var(var) per_cpu__##var
|
|
#define per_cpu_init() (__phys_per_cpu_start)
|
|
|
|
#endif /* SMP */
|
|
|
|
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
|
|
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
|
|
|
|
/*
|
|
* Be extremely careful when taking the address of this variable! Due to virtual
|
|
* remapping, it is different from the canonical address returned by __get_cpu_var(var)!
|
|
* On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
|
|
* more efficient.
|
|
*/
|
|
#define __ia64_per_cpu_var(var) (per_cpu__##var)
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_IA64_PERCPU_H */
|