Merge branch 'x86/mpparse' into x86/devel

Conflicts:

	arch/x86/Kconfig
	arch/x86/kernel/io_apic_32.c
	arch/x86/kernel/setup_64.c
	arch/x86/mm/init_32.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar 2008-07-08 11:14:58 +02:00
commit 3de352bbd8
90 changed files with 3899 additions and 2993 deletions

View file

@ -610,6 +610,29 @@ and is between 256 and 4096 characters. It is defined in the file
See drivers/char/README.epca and
Documentation/digiepca.txt.
disable_mtrr_cleanup [X86]
enable_mtrr_cleanup [X86]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
entry later. This parameter enables/disables that.
mtrr_chunk_size=nn[KMG] [X86]
used for mtrr cleanup. It is largest continous chunk
that could hold holes aka. UC entries.
mtrr_gran_size=nn[KMG] [X86]
Used for mtrr cleanup. It is granularity of mtrr block.
Default is 1.
Large value could prevent small alignment from
using up MTRRs.
mtrr_spare_reg_nr=n [X86]
Format: <integer>
Range: 0,7 : spare reg number
Default : 1
Used for mtrr cleanup. It is spare mtrr entries number.
Set to 2 or more if your graphical card needs more.
disable_mtrr_trim [X86, Intel and AMD only]
By default the kernel will trim any uncacheable
memory out of your available memory pool based on

View file

@ -230,6 +230,27 @@ config SMP
If you don't know what to do here, say N.
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS
depends on X86_32
if ACPI
config X86_MPPARSE
def_bool y
bool "Enable MPS table"
depends on X86_LOCAL_APIC && !X86_VISWS
help
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
endif
if !ACPI
config X86_MPPARSE
def_bool y
depends on X86_LOCAL_APIC && !X86_VISWS
endif
choice
prompt "Subarchitecture Type"
default X86_PC
@ -261,36 +282,6 @@ config X86_VOYAGER
If you do not specifically know you have a Voyager based machine,
say N here, otherwise the kernel you build will not be bootable.
config X86_NUMAQ
bool "NUMAQ (IBM/Sequent)"
depends on SMP && X86_32 && PCI
select NUMA
help
This option is used for getting Linux to run on a (IBM/Sequent) NUMA
multiquad box. This changes the way that processors are bootstrapped,
and uses Clustered Logical APIC addressing mode instead of Flat Logical.
You will need a new lynxer.elf file to flash your firmware with - send
email to <Martin.Bligh@us.ibm.com>.
config X86_SUMMIT
bool "Summit/EXA (IBM x440)"
depends on X86_32 && SMP
help
This option is needed for IBM systems that use the Summit/EXA chipset.
In particular, it is needed for the x440.
If you don't have one of these computers, you should say N here.
If you want to build a NUMA kernel, you must select ACPI.
config X86_BIGSMP
bool "Support for other sub-arch SMP systems with more than 8 CPUs"
depends on X86_32 && SMP
help
This option is needed for the systems that have more than 8 CPUs
and if the system is not of any sub-arch type above.
If you don't have such a system, you should say N here.
config X86_VISWS
bool "SGI 320/540 (Visual Workstation)"
depends on X86_32 && !PCI
@ -304,12 +295,33 @@ config X86_VISWS
and vice versa. See <file:Documentation/sgi-visws.txt> for details.
config X86_GENERICARCH
bool "Generic architecture (Summit, bigsmp, ES7000, default)"
bool "Generic architecture"
depends on X86_32
help
This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
It is intended for a generic binary kernel.
If you want a NUMA kernel, select ACPI. We need SRAT for NUMA.
This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
subarchitectures. It is intended for a generic binary kernel.
if you select them all, kernel will probe it one by one. and will
fallback to default.
if X86_GENERICARCH
config X86_NUMAQ
bool "NUMAQ (IBM/Sequent)"
depends on SMP && X86_32 && PCI && X86_MPPARSE
select NUMA
help
This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
NUMA multiquad box. This changes the way that processors are
bootstrapped, and uses Clustered Logical APIC addressing mode instead
of Flat Logical. You will need a new lynxer.elf file to flash your
firmware with - send email to <Martin.Bligh@us.ibm.com>.
config X86_SUMMIT
bool "Summit/EXA (IBM x440)"
depends on X86_32 && SMP
help
This option is needed for IBM systems that use the Summit/EXA chipset.
In particular, it is needed for the x440.
config X86_ES7000
bool "Support for Unisys ES7000 IA32 series"
@ -317,8 +329,15 @@ config X86_ES7000
help
Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
supposed to run on an IA32-based Unisys ES7000 system.
Only choose this option if you have such a system, otherwise you
should say N here.
config X86_BIGSMP
bool "Support for big SMP systems with more than 8 CPUs"
depends on X86_32 && SMP
help
This option is needed for the systems that have more than 8 CPUs
and if the system is not of any sub-arch type above.
endif
config X86_RDC321X
bool "RDC R-321x SoC"
@ -432,7 +451,7 @@ config MEMTEST
config ACPI_SRAT
def_bool y
depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
depends on X86_32 && ACPI && NUMA && X86_GENERICARCH
select ACPI_NUMA
config HAVE_ARCH_PARSE_SRAT
@ -441,11 +460,11 @@ config HAVE_ARCH_PARSE_SRAT
config X86_SUMMIT_NUMA
def_bool y
depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH)
depends on X86_32 && NUMA && X86_GENERICARCH
config X86_CYCLONE_TIMER
def_bool y
depends on X86_32 && X86_SUMMIT || X86_GENERICARCH
depends on X86_GENERICARCH
config ES7000_CLUSTERED_APIC
def_bool y
@ -910,9 +929,9 @@ config X86_PAE
config NUMA
bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
depends on SMP
depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL)
depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT)
default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
help
Enable NUMA (Non Uniform Memory Access) support.
The kernel will try to allocate memory used by a CPU on the
@ -1089,6 +1108,40 @@ config MTRR
See <file:Documentation/mtrr.txt> for more information.
config MTRR_SANITIZER
def_bool y
prompt "MTRR cleanup support"
depends on MTRR
help
Convert MTRR layout from continuous to discrete, so some X driver
could add WB entries.
Say N here if you see bootup problems (boot crash, boot hang,
spontaneous reboots).
Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
could be used to send largest mtrr entry size for continuous block
to hold holes (aka. UC entries)
If unsure, say Y.
config MTRR_SANITIZER_ENABLE_DEFAULT
int "MTRR cleanup enable value (0-1)"
range 0 1
default "0"
depends on MTRR_SANITIZER
help
Enable mtrr cleanup default value
config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
int "MTRR cleanup spare reg num (0-7)"
range 0 7
default "1"
depends on MTRR_SANITIZER
help
mtrr cleanup spare entries default, it can be changed via
mtrr_spare_reg_nr=
config X86_PAT
bool
prompt "x86 PAT support"

View file

@ -137,15 +137,6 @@ config 4KSTACKS
on the VM subsystem for higher order allocations. This option
will also use IRQ stacks to compensate for the reduced stackspace.
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_LOCAL_APIC || X86_VOYAGER
depends on X86_32
config X86_MPPARSE
def_bool y
depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
config DOUBLEFAULT
default y
bool "Enable doublefault exception handler" if EMBEDDED

View file

@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/
mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws
mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/
# NUMAQ subarch support
mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq
mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/
# BIGSMP subarch support
mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp
mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/
#Summit subarch support
mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/
# generic subarchitecture
mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/
# ES7000 subarch support
mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000
fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/
mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/
# RDC R-321x subarch support
mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x
mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/
@ -160,6 +142,7 @@ KBUILD_AFLAGS += $(mflags-y)
head-y := arch/x86/kernel/head_$(BITS).o
head-y += arch/x86/kernel/head$(BITS).o
head-y += arch/x86/kernel/head.o
head-y += arch/x86/kernel/init_task.o
libs-y += arch/x86/lib/

View file

@ -218,10 +218,6 @@ static char *vidmem;
static int vidport;
static int lines, cols;
#ifdef CONFIG_X86_NUMAQ
void *xquad_portio;
#endif
#include "../../../../lib/inflate.c"
static void *malloc(int size)

View file

@ -13,6 +13,7 @@
*/
#include "boot.h"
#include <linux/kernel.h>
#define SMAP 0x534d4150 /* ASCII "SMAP" */
@ -53,7 +54,7 @@ static int detect_memory_e820(void)
count++;
desc++;
} while (next && count < E820MAX);
} while (next && count < ARRAY_SIZE(boot_params.e820_map));
return boot_params.e820_entries = count;
}

View file

@ -2,7 +2,7 @@
# Makefile for the linux kernel.
#
extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
obj-y += bootflag.o e820_$(BITS).o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o
obj-y += tsc_$(BITS).o io_delay.o rtc.o

View file

@ -83,6 +83,8 @@ int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
static int disable_irq0_through_ioapic __initdata;
u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
int acpi_skip_timer_override __initdata;
@ -338,8 +340,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#ifdef CONFIG_X86_IO_APIC
struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
@ -858,6 +858,372 @@ static int __init acpi_parse_madt_lapic_entries(void)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
#ifdef CONFIG_X86_ES7000
extern int es7000_plat;
#endif
static struct {
int apic_id;
int gsi_base;
int gsi_end;
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
static int mp_find_ioapic(int gsi)
{
int i = 0;
/* Find the IOAPIC that manages this GSI. */
for (i = 0; i < nr_ioapics; i++) {
if ((gsi >= mp_ioapic_routing[i].gsi_base)
&& (gsi <= mp_ioapic_routing[i].gsi_end))
return i;
}
printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
return -1;
}
static u8 __init uniq_ioapic_id(u8 id)
{
#ifdef CONFIG_X86_32
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
!APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
return io_apic_get_unique_id(nr_ioapics, id);
else
return id;
#else
int i;
DECLARE_BITMAP(used, 256);
bitmap_zero(used, 256);
for (i = 0; i < nr_ioapics; i++) {
struct mp_config_ioapic *ia = &mp_ioapics[i];
__set_bit(ia->mp_apicid, used);
}
if (!test_bit(id, used))
return id;
return find_first_zero_bit(used, 256);
#endif
}
static int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
"(found %d)\n", MAX_IO_APICS, nr_ioapics);
panic("Recompile kernel with bigger MAX_IO_APICS!\n");
}
if (!address) {
printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
" found in table, skipping!\n");
return 1;
}
return 0;
}
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
{
int idx = 0;
if (bad_ioapic(address))
return;
idx = nr_ioapics;
mp_ioapics[idx].mp_type = MP_IOAPIC;
mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
mp_ioapics[idx].mp_apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
#ifdef CONFIG_X86_32
mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
#else
mp_ioapics[idx].mp_apicver = 0;
#endif
/*
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
mp_ioapic_routing[idx].gsi_base = gsi_base;
mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
"GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
nr_ioapics++;
}
static void assign_to_mp_irq(struct mp_config_intsrc *m,
struct mp_config_intsrc *mp_irq)
{
memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
}
static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
struct mp_config_intsrc *m)
{
return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
}
static void save_mp_irq(struct mp_config_intsrc *m)
{
int i;
for (i = 0; i < mp_irq_entries; i++) {
if (!mp_irq_cmp(&mp_irqs[i], m))
return;
}
assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
if (++mp_irq_entries == MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!!\n");
}
void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
int ioapic;
int pin;
struct mp_config_intsrc mp_irq;
/* Skip the 8254 timer interrupt (IRQ 0) if requested. */
if (bus_irq == 0 && disable_irq0_through_ioapic)
return;
/*
* Convert 'gsi' to 'ioapic.pin'.
*/
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0)
return;
pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
/*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
* increase of timer interrupts!
*/
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
mp_irq.mp_type = MP_INTSRC;
mp_irq.mp_irqtype = mp_INT;
mp_irq.mp_irqflag = (trigger << 2) | polarity;
mp_irq.mp_srcbus = MP_ISA_BUS;
mp_irq.mp_srcbusirq = bus_irq; /* IRQ */
mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
mp_irq.mp_dstirq = pin; /* INTIN# */
save_mp_irq(&mp_irq);
}
void __init mp_config_acpi_legacy_irqs(void)
{
int i;
int ioapic;
unsigned int dstapic;
struct mp_config_intsrc mp_irq;
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
/*
* Fabricate the legacy ISA bus (bus #31).
*/
mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
#endif
set_bit(MP_ISA_BUS, mp_bus_not_pci);
Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
#ifdef CONFIG_X86_ES7000
/*
* Older generations of ES7000 have no legacy identity mappings
*/
if (es7000_plat == 1)
return;
#endif
/*
* Locate the IOAPIC that manages the ISA IRQs (0-15).
*/
ioapic = mp_find_ioapic(0);
if (ioapic < 0)
return;
dstapic = mp_ioapics[ioapic].mp_apicid;
/*
* Use the default configuration for the IRQs 0-15. Unless
* overridden by (MADT) interrupt source override entries.
*/
for (i = 0; i < 16; i++) {
int idx;
/* Skip the 8254 timer interrupt (IRQ 0) if requested. */
if (i == 0 && disable_irq0_through_ioapic)
continue;
for (idx = 0; idx < mp_irq_entries; idx++) {
struct mp_config_intsrc *irq = mp_irqs + idx;
/* Do we already have a mapping for this ISA IRQ? */
if (irq->mp_srcbus == MP_ISA_BUS
&& irq->mp_srcbusirq == i)
break;
/* Do we already have a mapping for this IOAPIC pin */
if (irq->mp_dstapic == dstapic &&
irq->mp_dstirq == i)
break;
}
if (idx != mp_irq_entries) {
printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
continue; /* IRQ already used */
}
mp_irq.mp_type = MP_INTSRC;
mp_irq.mp_irqflag = 0; /* Conforming */
mp_irq.mp_srcbus = MP_ISA_BUS;
mp_irq.mp_dstapic = dstapic;
mp_irq.mp_irqtype = mp_INT;
mp_irq.mp_srcbusirq = i; /* Identity mapped */
mp_irq.mp_dstirq = i;
save_mp_irq(&mp_irq);
}
}
int mp_register_gsi(u32 gsi, int triggering, int polarity)
{
int ioapic;
int ioapic_pin;
#ifdef CONFIG_X86_32
#define MAX_GSI_NUM 4096
#define IRQ_COMPRESSION_START 64
static int pci_irq = IRQ_COMPRESSION_START;
/*
* Mapping between Global System Interrupts, which
* represent all possible interrupts, and IRQs
* assigned to actual devices.
*/
static int gsi_to_irq[MAX_GSI_NUM];
#else
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
#endif
/* Don't set up the ACPI SCI because it's already set up */
if (acpi_gbl_FADT.sci_interrupt == gsi)
return gsi;
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0) {
printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
return gsi;
}
ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
#ifdef CONFIG_X86_32
if (ioapic_renumber_irq)
gsi = ioapic_renumber_irq(ioapic, gsi);
#endif
/*
* Avoid pin reprogramming. PRTs typically include entries
* with redundant pin->gsi mappings (but unique PCI devices);
* we only program the IOAPIC on the first.
*/
if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
printk(KERN_ERR "Invalid reference to IOAPIC pin "
"%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
ioapic_pin);
return gsi;
}
if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
#ifdef CONFIG_X86_32
return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
#else
return gsi;
#endif
}
set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
#ifdef CONFIG_X86_32
/*
* For GSI >= 64, use IRQ compression
*/
if ((gsi >= IRQ_COMPRESSION_START)
&& (triggering == ACPI_LEVEL_SENSITIVE)) {
/*
* For PCI devices assign IRQs in order, avoiding gaps
* due to unused I/O APIC pins.
*/
int irq = gsi;
if (gsi < MAX_GSI_NUM) {
/*
* Retain the VIA chipset work-around (gsi > 15), but
* avoid a problem where the 8254 timer (IRQ0) is setup
* via an override (so it's not on pin 0 of the ioapic),
* and at the same time, the pin 0 interrupt is a PCI
* type. The gsi > 15 test could cause these two pins
* to be shared as IRQ0, and they are not shareable.
* So test for this condition, and if necessary, avoid
* the pin collision.
*/
gsi = pci_irq++;
/*
* Don't assign IRQ used by ACPI SCI
*/
if (gsi == acpi_gbl_FADT.sci_interrupt)
gsi = pci_irq++;
gsi_to_irq[irq] = gsi;
} else {
printk(KERN_ERR "GSI %u is too high\n", gsi);
return gsi;
}
}
#endif
io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
return gsi;
}
int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
u32 gsi, int triggering, int polarity)
{
#ifdef CONFIG_X86_MPPARSE
struct mp_config_intsrc mp_irq;
int ioapic;
if (!acpi_ioapic)
return 0;
/* print the entry should happen on mptable identically */
mp_irq.mp_type = MP_INTSRC;
mp_irq.mp_irqtype = mp_INT;
mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
mp_irq.mp_srcbus = number;
mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
ioapic = mp_find_ioapic(gsi);
mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
save_mp_irq(&mp_irq);
#endif
return 0;
}
/*
* Parse IOAPIC related entries in MADT
* returns 0 on success, < 0 on error
@ -1058,6 +1424,17 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
return 0;
}
/*
* Don't register any I/O APIC entries for the 8254 timer IRQ.
*/
static int __init
dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d)
{
pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident);
disable_irq0_through_ioapic = 1;
return 0;
}
/*
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact acpi-devel@sourceforge.net
@ -1225,6 +1602,32 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
},
},
/*
* HP laptops which use a DSDT reporting as HP/SB400/10000,
* which includes some code which overrides all temperature
* trip points to 16C if the INTIN2 input of the I/O APIC
* is enabled. This input is incorrectly designated the
* ISA IRQ 0 via an interrupt source override even though
* it is wired to the output of the master 8259A and INTIN0
* is not connected at all. Abandon any attempts to route
* IRQ 0 through the I/O APIC therefore.
*/
{
.callback = dmi_disable_irq0_through_ioapic,
.ident = "HP NX6125 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"),
},
},
{
.callback = dmi_disable_irq0_through_ioapic,
.ident = "HP NX6325 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
},
},
{}
};

View file

@ -328,7 +328,7 @@ void __init early_gart_iommu_check(void)
E820_RAM)) {
/* reserve it, so we can reuse it in second kernel */
printk(KERN_INFO "update e820 for GART\n");
add_memory_region(aper_base, aper_size, E820_RESERVED);
e820_add_region(aper_base, aper_size, E820_RESERVED);
update_e820();
}
}

View file

@ -79,6 +79,11 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
*/
int apic_verbosity;
int pic_mode;
/* Have we found an MP table */
int smp_found_config;
static unsigned int calibration_result;
static int lapic_next_event(unsigned long delta,
@ -1202,7 +1207,7 @@ void __init init_apic_mappings(void)
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
ioapic_phys = mp_ioapics[i].mp_apicaddr;
if (!ioapic_phys) {
printk(KERN_ERR
"WARNING: bogus zero IO-APIC "
@ -1517,6 +1522,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
*/
cpu = 0;
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
/*
* Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
* but we need to work other dependencies like SMP_SUSPEND etc
@ -1524,7 +1532,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
* if (CPU_HOTPLUG_ENABLED || num_processors > 8)
* - Ashok Raj <ashok.raj@intel.com>
*/
if (num_processors > 8) {
if (max_physical_apicid >= 8) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (!APIC_XAPIC(version)) {

View file

@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
*/
int apic_verbosity;
/* Have we found an MP table */
int smp_found_config;
static struct resource lapic_resource = {
.name = "Local APIC",
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@ -1068,6 +1071,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
*/
cpu = 0;
}
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
/* are we being called early in kernel startup? */
if (x86_cpu_to_apicid_early_ptr) {
u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;

View file

@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
static unsigned long smp_changes_mask;
static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
static u64 tom2;
u64 mtrr_tom2;
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
}
}
if (tom2) {
if (start >= (1ULL<<32) && (end < tom2))
if (mtrr_tom2) {
if (start >= (1ULL<<32) && (end < mtrr_tom2))
return MTRR_TYPE_WRBACK;
}
@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
/* fill the MSR pair relating to a var range */
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
{
struct mtrr_var_range *vr;
vr = mtrr_state.var_ranges;
vr[index].base_lo = base_lo;
vr[index].base_hi = base_hi;
vr[index].mask_lo = mask_lo;
vr[index].mask_hi = mask_hi;
}
static void
get_fixed_ranges(mtrr_type * frs)
{
@ -213,13 +227,13 @@ void __init get_mtrr_state(void)
mtrr_state.enabled = (lo & 0xc00) >> 10;
if (amd_special_default_mtrr()) {
unsigned lo, hi;
unsigned low, high;
/* TOP_MEM2 */
rdmsr(MSR_K8_TOP_MEM2, lo, hi);
tom2 = hi;
tom2 <<= 32;
tom2 |= lo;
tom2 &= 0xffffff8000000ULL;
rdmsr(MSR_K8_TOP_MEM2, low, high);
mtrr_tom2 = high;
mtrr_tom2 <<= 32;
mtrr_tom2 |= low;
mtrr_tom2 &= 0xffffff800000ULL;
}
if (mtrr_show) {
int high_width;
@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
else
printk(KERN_INFO "MTRR %u disabled\n", i);
}
if (tom2) {
if (mtrr_tom2) {
printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
tom2, tom2>>20);
mtrr_tom2, mtrr_tom2>>20);
}
}
mtrr_state_set = 1;
@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
if (lo != msrwords[0] || hi != msrwords[1]) {
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86 == 15 &&
(boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
k8_enable_fixed_iorrs();
mtrr_wrmsr(msr, msrwords[0], msrwords[1]);

View file

@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = {
.resume = mtrr_restore,
};
/* should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
struct res_range {
unsigned long start;
unsigned long end;
};
static int __init
add_range(struct res_range *range, int nr_range, unsigned long start,
unsigned long end)
{
/* out of slots */
if (nr_range >= RANGE_NUM)
return nr_range;
range[nr_range].start = start;
range[nr_range].end = end;
nr_range++;
return nr_range;
}
static int __init
add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
unsigned long end)
{
int i;
/* try to merge it with old one */
for (i = 0; i < nr_range; i++) {
unsigned long final_start, final_end;
unsigned long common_start, common_end;
if (!range[i].end)
continue;
common_start = max(range[i].start, start);
common_end = min(range[i].end, end);
if (common_start > common_end + 1)
continue;
final_start = min(range[i].start, start);
final_end = max(range[i].end, end);
range[i].start = final_start;
range[i].end = final_end;
return nr_range;
}
/* need to add that */
return add_range(range, nr_range, start, end);
}
static void __init
subtract_range(struct res_range *range, unsigned long start, unsigned long end)
{
int i, j;
for (j = 0; j < RANGE_NUM; j++) {
if (!range[j].end)
continue;
if (start <= range[j].start && end >= range[j].end) {
range[j].start = 0;
range[j].end = 0;
continue;
}
if (start <= range[j].start && end < range[j].end &&
range[j].start < end + 1) {
range[j].start = end + 1;
continue;
}
if (start > range[j].start && end >= range[j].end &&
range[j].end > start - 1) {
range[j].end = start - 1;
continue;
}
if (start > range[j].start && end < range[j].end) {
/* find the new spare */
for (i = 0; i < RANGE_NUM; i++) {
if (range[i].end == 0)
break;
}
if (i < RANGE_NUM) {
range[i].end = range[j].end;
range[i].start = end + 1;
} else {
printk(KERN_ERR "run of slot in ranges\n");
}
range[j].end = start - 1;
continue;
}
}
}
static int __init cmp_range(const void *x1, const void *x2)
{
const struct res_range *r1 = x1;
const struct res_range *r2 = x2;
long start1, start2;
start1 = r1->start;
start2 = r2->start;
return start1 - start2;
}
struct var_mtrr_range_state {
unsigned long base_pfn;
unsigned long size_pfn;
mtrr_type type;
};
struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
static int __init
x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
unsigned long extra_remove_base,
unsigned long extra_remove_size)
{
unsigned long i, base, size;
mtrr_type type;
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
if (type != MTRR_TYPE_WRBACK)
continue;
base = range_state[i].base_pfn;
size = range_state[i].size_pfn;
nr_range = add_range_with_merge(range, nr_range, base,
base + size - 1);
}
if (debug_print) {
printk(KERN_DEBUG "After WB checking\n");
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
range[i].start, range[i].end + 1);
}
/* take out UC ranges */
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
if (type != MTRR_TYPE_UNCACHABLE)
continue;
size = range_state[i].size_pfn;
if (!size)
continue;
base = range_state[i].base_pfn;
subtract_range(range, base, base + size - 1);
}
if (extra_remove_size)
subtract_range(range, extra_remove_base,
extra_remove_base + extra_remove_size - 1);
/* get new range num */
nr_range = 0;
for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
nr_range++;
}
if (debug_print) {
printk(KERN_DEBUG "After UC checking\n");
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
range[i].start, range[i].end + 1);
}
/* sort the ranges */
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
if (debug_print) {
printk(KERN_DEBUG "After sorting\n");
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
range[i].start, range[i].end + 1);
}
/* clear those is not used */
for (i = nr_range; i < RANGE_NUM; i++)
memset(&range[i], 0, sizeof(range[i]));
return nr_range;
}
static struct res_range __initdata range[RANGE_NUM];
#ifdef CONFIG_MTRR_SANITIZER
static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
{
unsigned long sum;
int i;
sum = 0;
for (i = 0; i < nr_range; i++)
sum += range[i].end + 1 - range[i].start;
return sum;
}
static int enable_mtrr_cleanup __initdata =
CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
static int __init disable_mtrr_cleanup_setup(char *str)
{
if (enable_mtrr_cleanup != -1)
enable_mtrr_cleanup = 0;
return 0;
}
early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
static int __init enable_mtrr_cleanup_setup(char *str)
{
if (enable_mtrr_cleanup != -1)
enable_mtrr_cleanup = 1;
return 0;
}
early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
struct var_mtrr_state {
unsigned long range_startk;
unsigned long range_sizek;
unsigned long chunk_sizek;
unsigned long gran_sizek;
unsigned int reg;
};
static void __init
set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
unsigned char type, unsigned int address_bits)
{
u32 base_lo, base_hi, mask_lo, mask_hi;
u64 base, mask;
if (!sizek) {
fill_mtrr_var_range(reg, 0, 0, 0, 0);
return;
}
mask = (1ULL << address_bits) - 1;
mask &= ~((((u64)sizek) << 10) - 1);
base = ((u64)basek) << 10;
base |= type;
mask |= 0x800;
base_lo = base & ((1ULL<<32) - 1);
base_hi = base >> 32;
mask_lo = mask & ((1ULL<<32) - 1);
mask_hi = mask >> 32;
fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
}
static void __init
save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
unsigned char type)
{
range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
range_state[reg].type = type;
}
static void __init
set_var_mtrr_all(unsigned int address_bits)
{
unsigned long basek, sizek;
unsigned char type;
unsigned int reg;
for (reg = 0; reg < num_var_ranges; reg++) {
basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
type = range_state[reg].type;
set_var_mtrr(reg, basek, sizek, type, address_bits);
}
}
static unsigned int __init
range_to_mtrr(unsigned int reg, unsigned long range_startk,
unsigned long range_sizek, unsigned char type)
{
if (!range_sizek || (reg >= num_var_ranges))
return reg;
while (range_sizek) {
unsigned long max_align, align;
unsigned long sizek;
/* Compute the maximum size I can make a range */
if (range_startk)
max_align = ffs(range_startk) - 1;
else
max_align = 32;
align = fls(range_sizek) - 1;
if (align > max_align)
align = max_align;
sizek = 1 << align;
if (debug_print)
printk(KERN_DEBUG "Setting variable MTRR %d, "
"base: %ldMB, range: %ldMB, type %s\n",
reg, range_startk >> 10, sizek >> 10,
(type == MTRR_TYPE_UNCACHABLE)?"UC":
((type == MTRR_TYPE_WRBACK)?"WB":"Other")
);
save_var_mtrr(reg++, range_startk, sizek, type);
range_startk += sizek;
range_sizek -= sizek;
if (reg >= num_var_ranges)
break;
}
return reg;
}
static unsigned __init
range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
unsigned long sizek)
{
unsigned long hole_basek, hole_sizek;
unsigned long second_basek, second_sizek;
unsigned long range0_basek, range0_sizek;
unsigned long range_basek, range_sizek;
unsigned long chunk_sizek;
unsigned long gran_sizek;
hole_basek = 0;
hole_sizek = 0;
second_basek = 0;
second_sizek = 0;
chunk_sizek = state->chunk_sizek;
gran_sizek = state->gran_sizek;
/* align with gran size, prevent small block used up MTRRs */
range_basek = ALIGN(state->range_startk, gran_sizek);
if ((range_basek > basek) && basek)
return second_sizek;
state->range_sizek -= (range_basek - state->range_startk);
range_sizek = ALIGN(state->range_sizek, gran_sizek);
while (range_sizek > state->range_sizek) {
range_sizek -= gran_sizek;
if (!range_sizek)
return 0;
}
state->range_sizek = range_sizek;
/* try to append some small hole */
range0_basek = state->range_startk;
range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
if (range0_sizek == state->range_sizek) {
if (debug_print)
printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
range0_basek<<10,
(range0_basek + state->range_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
state->range_sizek, MTRR_TYPE_WRBACK);
return 0;
}
range0_sizek -= chunk_sizek;
if (range0_sizek && sizek) {
while (range0_basek + range0_sizek > (basek + sizek)) {
range0_sizek -= chunk_sizek;
if (!range0_sizek)
break;
}
}
if (range0_sizek) {
if (debug_print)
printk(KERN_DEBUG "range0: %016lx - %016lx\n",
range0_basek<<10,
(range0_basek + range0_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
range0_sizek, MTRR_TYPE_WRBACK);
}
range_basek = range0_basek + range0_sizek;
range_sizek = chunk_sizek;
if (range_basek + range_sizek > basek &&
range_basek + range_sizek <= (basek + sizek)) {
/* one hole */
second_basek = basek;
second_sizek = range_basek + range_sizek - basek;
}
/* if last piece, only could one hole near end */
if ((second_basek || !basek) &&
range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
(chunk_sizek >> 1)) {
/*
* one hole in middle (second_sizek is 0) or at end
* (second_sizek is 0 )
*/
hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
- second_sizek;
hole_basek = range_basek + range_sizek - hole_sizek
- second_sizek;
} else {
/* fallback for big hole, or several holes */
range_sizek = state->range_sizek - range0_sizek;
second_basek = 0;
second_sizek = 0;
}
if (debug_print)
printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
(range_basek + range_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
MTRR_TYPE_WRBACK);
if (hole_sizek) {
if (debug_print)
printk(KERN_DEBUG "hole: %016lx - %016lx\n",
hole_basek<<10, (hole_basek + hole_sizek)<<10);
state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
MTRR_TYPE_UNCACHABLE);
}
return second_sizek;
}
static void __init
set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
unsigned long size_pfn)
{
unsigned long basek, sizek;
unsigned long second_sizek = 0;
if (state->reg >= num_var_ranges)
return;
basek = base_pfn << (PAGE_SHIFT - 10);
sizek = size_pfn << (PAGE_SHIFT - 10);
/* See if I can merge with the last range */
if ((basek <= 1024) ||
(state->range_startk + state->range_sizek == basek)) {
unsigned long endk = basek + sizek;
state->range_sizek = endk - state->range_startk;
return;
}
/* Write the range mtrrs */
if (state->range_sizek != 0)
second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
/* Allocate an msr */
state->range_startk = basek + second_sizek;
state->range_sizek = sizek - second_sizek;
}
/* mininum size of mtrr block that can take hole */
static u64 mtrr_chunk_size __initdata = (256ULL<<20);
static int __init parse_mtrr_chunk_size_opt(char *p)
{
if (!p)
return -EINVAL;
mtrr_chunk_size = memparse(p, &p);
return 0;
}
early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
/* granity of mtrr of block */
static u64 mtrr_gran_size __initdata;
static int __init parse_mtrr_gran_size_opt(char *p)
{
if (!p)
return -EINVAL;
mtrr_gran_size = memparse(p, &p);
return 0;
}
early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
static int nr_mtrr_spare_reg __initdata =
CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
static int __init parse_mtrr_spare_reg(char *arg)
{
if (arg)
nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
return 0;
}
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
static int __init
x86_setup_var_mtrrs(struct res_range *range, int nr_range,
u64 chunk_size, u64 gran_size)
{
struct var_mtrr_state var_state;
int i;
int num_reg;
var_state.range_startk = 0;
var_state.range_sizek = 0;
var_state.reg = 0;
var_state.chunk_sizek = chunk_size >> 10;
var_state.gran_sizek = gran_size >> 10;
memset(range_state, 0, sizeof(range_state));
/* Write the range etc */
for (i = 0; i < nr_range; i++)
set_var_mtrr_range(&var_state, range[i].start,
range[i].end - range[i].start + 1);
/* Write the last range */
if (var_state.range_sizek != 0)
range_to_mtrr_with_hole(&var_state, 0, 0);
num_reg = var_state.reg;
/* Clear out the extra MTRR's */
while (var_state.reg < num_var_ranges) {
save_var_mtrr(var_state.reg, 0, 0, 0);
var_state.reg++;
}
return num_reg;
}
struct mtrr_cleanup_result {
unsigned long gran_sizek;
unsigned long chunk_sizek;
unsigned long lose_cover_sizek;
unsigned int num_reg;
int bad;
};
/*
* gran_size: 1M, 2M, ..., 2G
* chunk size: gran_size, ..., 4G
* so we need (2+13)*6
*/
#define NUM_RESULT 90
#define PSHIFT (PAGE_SHIFT - 10)
static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
static struct res_range __initdata range_new[RANGE_NUM];
static unsigned long __initdata min_loss_pfn[RANGE_NUM];
static int __init mtrr_cleanup(unsigned address_bits)
{
unsigned long extra_remove_base, extra_remove_size;
unsigned long i, base, size, def, dummy;
mtrr_type type;
int nr_range, nr_range_new;
u64 chunk_size, gran_size;
unsigned long range_sums, range_sums_new;
int index_good;
int num_reg_good;
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
return 0;
rdmsr(MTRRdefType_MSR, def, dummy);
def &= 0xff;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
/* get it and store it aside */
memset(range_state, 0, sizeof(range_state));
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i, &base, &size, &type);
range_state[i].base_pfn = base;
range_state[i].size_pfn = size;
range_state[i].type = type;
}
/* check entries number */
memset(num, 0, sizeof(num));
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
size = range_state[i].size_pfn;
if (type >= MTRR_NUM_TYPES)
continue;
if (!size)
type = MTRR_NUM_TYPES;
num[type]++;
}
/* check if we got UC entries */
if (!num[MTRR_TYPE_UNCACHABLE])
return 0;
/* check if we only had WB and UC */
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
memset(range, 0, sizeof(range));
extra_remove_size = 0;
if (mtrr_tom2) {
extra_remove_base = 1 << (32 - PAGE_SHIFT);
extra_remove_size =
(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
}
nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
extra_remove_size);
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM coverred: %ldM\n",
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
int num_reg;
debug_print = 1;
/* convert ranges to var ranges state */
num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
mtrr_gran_size);
/* we got new setting in range_state, check it */
memset(range_new, 0, sizeof(range_new));
nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
extra_remove_base,
extra_remove_size);
range_sums_new = sum_ranges(range_new, nr_range_new);
i = 0;
result[i].chunk_sizek = mtrr_chunk_size >> 10;
result[i].gran_sizek = mtrr_gran_size >> 10;
result[i].num_reg = num_reg;
if (range_sums < range_sums_new) {
result[i].lose_cover_sizek =
(range_sums_new - range_sums) << PSHIFT;
result[i].bad = 1;
} else
result[i].lose_cover_sizek =
(range_sums - range_sums_new) << PSHIFT;
printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
result[i].chunk_sizek >> 10);
printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
result[i].num_reg, result[i].bad?"-":"",
result[i].lose_cover_sizek >> 10);
if (!result[i].bad) {
set_var_mtrr_all(address_bits);
return 1;
}
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
"will find optimal one\n");
debug_print = 0;
memset(result, 0, sizeof(result[0]));
}
i = 0;
memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
memset(result, 0, sizeof(result));
for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
for (chunk_size = gran_size; chunk_size < (1ULL<<33);
chunk_size <<= 1) {
int num_reg;
if (debug_print)
printk(KERN_INFO
"\ngran_size: %lldM chunk_size_size: %lldM\n",
gran_size >> 20, chunk_size >> 20);
if (i >= NUM_RESULT)
continue;
/* convert ranges to var ranges state */
num_reg = x86_setup_var_mtrrs(range, nr_range,
chunk_size, gran_size);
/* we got new setting in range_state, check it */
memset(range_new, 0, sizeof(range_new));
nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
extra_remove_base, extra_remove_size);
range_sums_new = sum_ranges(range_new, nr_range_new);
result[i].chunk_sizek = chunk_size >> 10;
result[i].gran_sizek = gran_size >> 10;
result[i].num_reg = num_reg;
if (range_sums < range_sums_new) {
result[i].lose_cover_sizek =
(range_sums_new - range_sums) << PSHIFT;
result[i].bad = 1;
} else
result[i].lose_cover_sizek =
(range_sums - range_sums_new) << PSHIFT;
/* double check it */
if (!result[i].bad && !result[i].lose_cover_sizek) {
if (nr_range_new != nr_range ||
memcmp(range, range_new, sizeof(range)))
result[i].bad = 1;
}
if (!result[i].bad && (range_sums - range_sums_new <
min_loss_pfn[num_reg])) {
min_loss_pfn[num_reg] =
range_sums - range_sums_new;
}
i++;
}
}
/* print out all */
for (i = 0; i < NUM_RESULT; i++) {
printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
result[i].chunk_sizek >> 10);
printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
result[i].num_reg, result[i].bad?"-":"",
result[i].lose_cover_sizek >> 10);
}
/* try to find the optimal index */
if (nr_mtrr_spare_reg >= num_var_ranges)
nr_mtrr_spare_reg = num_var_ranges - 1;
num_reg_good = -1;
for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
if (!min_loss_pfn[i]) {
num_reg_good = i;
break;
}
}
index_good = -1;
if (num_reg_good != -1) {
for (i = 0; i < NUM_RESULT; i++) {
if (!result[i].bad &&
result[i].num_reg == num_reg_good &&
!result[i].lose_cover_sizek) {
index_good = i;
break;
}
}
}
if (index_good != -1) {
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
i = index_good;
printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
result[i].gran_sizek >> 10,
result[i].chunk_sizek >> 10);
printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
result[i].num_reg,
result[i].lose_cover_sizek >> 10);
/* convert ranges to var ranges state */
chunk_size = result[i].chunk_sizek;
chunk_size <<= 10;
gran_size = result[i].gran_sizek;
gran_size <<= 10;
debug_print = 1;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
set_var_mtrr_all(address_bits);
return 1;
}
printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
return 0;
}
#else
static int __init mtrr_cleanup(unsigned address_bits)
{
return 0;
}
#endif
static int __initdata changed_by_mtrr_cleanup;
static int disable_mtrr_trim;
static int __init disable_mtrr_trim_setup(char *str)
@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void)
return 0;
}
static u64 __init real_trim_memory(unsigned long start_pfn,
unsigned long limit_pfn)
{
u64 trim_start, trim_size;
trim_start = start_pfn;
trim_start <<= PAGE_SHIFT;
trim_size = limit_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
return e820_update_range(trim_start, trim_size, E820_RAM,
E820_RESERVED);
}
/**
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
* @end_pfn: ending page frame number
@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
u64 trim_start, trim_size;
int nr_range;
u64 total_trim_size;
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
/*
* Make sure we only trim uncachable memory on machines that
* support the Intel MTRR architecture:
@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
if (amd_special_default_mtrr())
return 0;
/* get it and store it aside */
memset(range_state, 0, sizeof(range_state));
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i, &base, &size, &type);
range_state[i].base_pfn = base;
range_state[i].size_pfn = size;
range_state[i].type = type;
}
/* Find highest cached pfn */
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i, &base, &size, &type);
type = range_state[i].type;
if (type != MTRR_TYPE_WRBACK)
continue;
base = range_state[i].base_pfn;
size = range_state[i].size_pfn;
if (highest_pfn < base + size)
highest_pfn = base + size;
}
@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
return 0;
}
if (highest_pfn < end_pfn) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
" all of memory, losing %luMB of RAM.\n",
(end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
/* check entries number */
memset(num, 0, sizeof(num));
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
if (type >= MTRR_NUM_TYPES)
continue;
size = range_state[i].size_pfn;
if (!size)
type = MTRR_NUM_TYPES;
num[type]++;
}
WARN_ON(1);
/* no entry for WB? */
if (!num[MTRR_TYPE_WRBACK])
return 0;
/* check if we only had WB and UC */
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
memset(range, 0, sizeof(range));
nr_range = 0;
if (mtrr_tom2) {
range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
if (highest_pfn < range[nr_range].end + 1)
highest_pfn = range[nr_range].end + 1;
nr_range++;
}
nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
total_trim_size = 0;
/* check the head */
if (range[0].start)
total_trim_size += real_trim_memory(0, range[0].start);
/* check the holes */
for (i = 0; i < nr_range - 1; i++) {
if (range[i].end + 1 < range[i+1].start)
total_trim_size += real_trim_memory(range[i].end + 1,
range[i+1].start);
}
/* check the top */
i = nr_range - 1;
if (range[i].end + 1 < end_pfn)
total_trim_size += real_trim_memory(range[i].end + 1,
end_pfn);
if (total_trim_size) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
" all of memory, losing %lluMB of RAM.\n",
total_trim_size >> 20);
if (!changed_by_mtrr_cleanup)
WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
trim_start = highest_pfn;
trim_start <<= PAGE_SHIFT;
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
update_memory_range(trim_start, trim_size, E820_RAM,
E820_RESERVED);
update_e820();
return 1;
}
@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
*/
void __init mtrr_bp_init(void)
{
u32 phys_addr;
init_ifs();
phys_addr = 32;
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
size_or_mask = 0xff000000; /* 36 bits */
size_and_mask = 0x00f00000;
phys_addr = 36;
/* This is an AMD specific MSR, but we assume(hope?) that
Intel will implement it to when they extend the address
bus of the Xeon. */
if (cpuid_eax(0x80000000) >= 0x80000008) {
u32 phys_addr;
phys_addr = cpuid_eax(0x80000008) & 0xff;
/* CPUID workaround for Intel 0F33/0F34 CPU */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void)
don't support PAE */
size_or_mask = 0xfff00000; /* 32 bits */
size_and_mask = 0;
phys_addr = 32;
}
} else {
switch (boot_cpu_data.x86_vendor) {
@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void)
if (mtrr_if) {
set_num_var_ranges();
init_table();
if (use_intel())
if (use_intel()) {
get_mtrr_state();
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
}
}
}
}
@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)
return 0;
if (use_intel())
mtrr_state_warn();
else {
if (use_intel()) {
if (!changed_by_mtrr_cleanup)
mtrr_state_warn();
} else {
/* The CPUs haven't MTRR and seem to not support SMP. They have
* specific drivers, we use a tricky method to support
* suspend/resume for them.

View file

@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
void get_mtrr_state(void);
extern void set_mtrr_ops(struct mtrr_ops * ops);
@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
extern unsigned int num_var_ranges;
extern u64 mtrr_tom2;
void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);

File diff suppressed because it is too large Load diff

View file

@ -1,775 +0,0 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/string.h>
#include <linux/kexec.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/e820.h>
#include <asm/setup.h>
struct e820map e820;
struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
unsigned long long addr; /* address for this change point */
};
static struct change_member change_point_list[2*E820MAX] __initdata;
static struct change_member *change_point[2*E820MAX] __initdata;
static struct e820entry *overlap_list[E820MAX] __initdata;
static struct e820entry new_bios[E820MAX] __initdata;
/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0x10000000;
#ifdef CONFIG_PCI
EXPORT_SYMBOL(pci_mem_start);
#endif
extern int user_defined_memmap;
static struct resource system_rom_resource = {
.name = "System ROM",
.start = 0xf0000,
.end = 0xfffff,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
};
static struct resource extension_rom_resource = {
.name = "Extension ROM",
.start = 0xe0000,
.end = 0xeffff,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
};
static struct resource adapter_rom_resources[] = { {
.name = "Adapter ROM",
.start = 0xc8000,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
} };
static struct resource video_rom_resource = {
.name = "Video ROM",
.start = 0xc0000,
.end = 0xc7fff,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
};
#define ROMSIGNATURE 0xaa55
static int __init romsignature(const unsigned char *rom)
{
const unsigned short * const ptr = (const unsigned short *)rom;
unsigned short sig;
return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
}
static int __init romchecksum(const unsigned char *rom, unsigned long length)
{
unsigned char sum, c;
for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
sum += c;
return !length && !sum;
}
static void __init probe_roms(void)
{
const unsigned char *rom;
unsigned long start, length, upper;
unsigned char c;
int i;
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
video_rom_resource.start = start;
if (probe_kernel_address(rom + 2, c) != 0)
continue;
/* 0 < length <= 0x7f * 512, historically */
length = c * 512;
/* if checksum okay, trust length byte */
if (length && romchecksum(rom, length))
video_rom_resource.end = start + length - 1;
request_resource(&iomem_resource, &video_rom_resource);
break;
}
start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
if (start < upper)
start = upper;
/* system rom */
request_resource(&iomem_resource, &system_rom_resource);
upper = system_rom_resource.start;
/* check for extension rom (ignore length byte!) */
rom = isa_bus_to_virt(extension_rom_resource.start);
if (romsignature(rom)) {
length = extension_rom_resource.end - extension_rom_resource.start + 1;
if (romchecksum(rom, length)) {
request_resource(&iomem_resource, &extension_rom_resource);
upper = extension_rom_resource.start;
}
}
/* check for adapter roms on 2k boundaries */
for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
if (probe_kernel_address(rom + 2, c) != 0)
continue;
/* 0 < length <= 0x7f * 512, historically */
length = c * 512;
/* but accept any length that fits if checksum okay */
if (!length || start + length > upper || !romchecksum(rom, length))
continue;
adapter_rom_resources[i].start = start;
adapter_rom_resources[i].end = start + length - 1;
request_resource(&iomem_resource, &adapter_rom_resources[i]);
start = adapter_rom_resources[i++].end & ~2047UL;
}
}
/*
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
void __init init_iomem_resources(struct resource *code_resource,
struct resource *data_resource,
struct resource *bss_resource)
{
int i;
probe_roms();
for (i = 0; i < e820.nr_map; i++) {
struct resource *res;
#ifndef CONFIG_RESOURCES_64BIT
if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
continue;
#endif
res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
switch (e820.map[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
res->start = e820.map[i].addr;
res->end = res->start + e820.map[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
if (request_resource(&iomem_resource, res)) {
kfree(res);
continue;
}
if (e820.map[i].type == E820_RAM) {
/*
* We don't know which RAM region contains kernel data,
* so we try it repeatedly and let the resource manager
* test it.
*/
request_resource(res, code_resource);
request_resource(res, data_resource);
request_resource(res, bss_resource);
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
request_resource(res, &crashk_res);
#endif
}
}
}
#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
/**
* e820_mark_nosave_regions - Find the ranges of physical addresses that do not
* correspond to e820 RAM areas and mark the corresponding pages as nosave for
* hibernation.
*
* This function requires the e820 map to be sorted and without any
* overlapping entries and assumes the first e820 area to be RAM.
*/
void __init e820_mark_nosave_regions(void)
{
int i;
unsigned long pfn;
pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
for (i = 1; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
if (pfn < PFN_UP(ei->addr))
register_nosave_region(pfn, PFN_UP(ei->addr));
pfn = PFN_DOWN(ei->addr + ei->size);
if (ei->type != E820_RAM)
register_nosave_region(PFN_UP(ei->addr), pfn);
if (pfn >= max_low_pfn)
break;
}
}
#endif
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type)
{
int x;
x = e820.nr_map;
if (x == E820MAX) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}
e820.map[x].addr = start;
e820.map[x].size = size;
e820.map[x].type = type;
e820.nr_map++;
} /* add_memory_region */
/*
* Sanitize the BIOS e820 map.
*
* Some e820 responses include overlapping entries. The following
* replaces the original e820 map with a new one, removing overlaps.
*
*/
int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
struct change_member *change_tmp;
unsigned long current_type, last_type;
unsigned long long last_addr;
int chgidx, still_changing;
int overlap_entries;
int new_bios_entry;
int old_nr, new_nr, chg_nr;
int i;
/*
Visually we're performing the following (1,2,3,4 = memory types)...
Sample memory map (w/overlaps):
____22__________________
______________________4_
____1111________________
_44_____________________
11111111________________
____________________33__
___________44___________
__________33333_________
______________22________
___________________2222_
_________111111111______
_____________________11_
_________________4______
Sanitized equivalent (no overlap):
1_______________________
_44_____________________
___1____________________
____22__________________
______11________________
_________1______________
__________3_____________
___________44___________
_____________33_________
_______________2________
________________1_______
_________________4______
___________________2____
____________________33__
______________________4_
*/
/* if there's only one memory region, don't bother */
if (*pnr_map < 2) {
return -1;
}
old_nr = *pnr_map;
/* bail out if we find any unreasonable addresses in bios map */
for (i=0; i<old_nr; i++)
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
return -1;
}
/* create pointers for initial change-point information (for sorting) */
for (i=0; i < 2*old_nr; i++)
change_point[i] = &change_point_list[i];
/* record all known change-points (starting and ending addresses),
omitting those that are for empty memory regions */
chgidx = 0;
for (i=0; i < old_nr; i++) {
if (biosmap[i].size != 0) {
change_point[chgidx]->addr = biosmap[i].addr;
change_point[chgidx++]->pbios = &biosmap[i];
change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
change_point[chgidx++]->pbios = &biosmap[i];
}
}
chg_nr = chgidx; /* true number of change-points */
/* sort change-point list by memory addresses (low -> high) */
still_changing = 1;
while (still_changing) {
still_changing = 0;
for (i=1; i < chg_nr; i++) {
/* if <current_addr> > <last_addr>, swap */
/* or, if current=<start_addr> & last=<end_addr>, swap */
if ((change_point[i]->addr < change_point[i-1]->addr) ||
((change_point[i]->addr == change_point[i-1]->addr) &&
(change_point[i]->addr == change_point[i]->pbios->addr) &&
(change_point[i-1]->addr != change_point[i-1]->pbios->addr))
)
{
change_tmp = change_point[i];
change_point[i] = change_point[i-1];
change_point[i-1] = change_tmp;
still_changing=1;
}
}
}
/* create a new bios memory map, removing overlaps */
overlap_entries=0; /* number of entries in the overlap table */
new_bios_entry=0; /* index for creating new bios map entries */
last_type = 0; /* start with undefined memory type */
last_addr = 0; /* start with 0 as last starting address */
/* loop through change-points, determining affect on the new bios map */
for (chgidx=0; chgidx < chg_nr; chgidx++)
{
/* keep track of all overlapping bios entries */
if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
{
/* add map entry to overlap list (> 1 entry implies an overlap) */
overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
}
else
{
/* remove entry from list (order independent, so swap with last) */
for (i=0; i<overlap_entries; i++)
{
if (overlap_list[i] == change_point[chgidx]->pbios)
overlap_list[i] = overlap_list[overlap_entries-1];
}
overlap_entries--;
}
/* if there are overlapping entries, decide which "type" to use */
/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
current_type = 0;
for (i=0; i<overlap_entries; i++)
if (overlap_list[i]->type > current_type)
current_type = overlap_list[i]->type;
/* continue building up new bios map based on this information */
if (current_type != last_type) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
/* move forward only if the new size was non-zero */
if (new_bios[new_bios_entry].size != 0)
if (++new_bios_entry >= E820MAX)
break; /* no more space left for new bios entries */
}
if (current_type != 0) {
new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
new_bios[new_bios_entry].type = current_type;
last_addr=change_point[chgidx]->addr;
}
last_type = current_type;
}
}
new_nr = new_bios_entry; /* retain count for new bios entries */
/* copy new bios mapping into original location */
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
*pnr_map = new_nr;
return 0;
}
/*
* Copy the BIOS e820 map into a safe place.
*
* Sanity-check it while we're at it..
*
* If we're lucky and live on a modern system, the setup code
* will have given us a memory map that we can use to properly
* set up memory. If we aren't, we'll fake a memory map.
*
* We check to see that the memory map contains at least 2 elements
* before we'll use it, because the detection code in setup.S may
* not be perfect and most every PC known to man has two memory
* regions: one from 0 to 640k, and one from 1mb up. (The IBM
* thinkpad 560x, for example, does not cooperate with the memory
* detection code.)
*/
int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
do {
u64 start = biosmap->addr;
u64 size = biosmap->size;
u64 end = start + size;
u32 type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
if (start > end)
return -1;
add_memory_region(start, size, type);
} while (biosmap++, --nr_map);
return 0;
}
/*
* Find the highest page frame number we have available
*/
void __init propagate_e820_map(void)
{
int i;
max_pfn = 0;
for (i = 0; i < e820.nr_map; i++) {
unsigned long start, end;
/* RAM? */
if (e820.map[i].type != E820_RAM)
continue;
start = PFN_UP(e820.map[i].addr);
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (start >= end)
continue;
if (end > max_pfn)
max_pfn = end;
memory_present(0, start, end);
}
}
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
void __init register_bootmem_low_pages(unsigned long max_low_pfn)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
unsigned long curr_pfn, last_pfn, size;
/*
* Reserve usable low memory
*/
if (e820.map[i].type != E820_RAM)
continue;
/*
* We are rounding up the start address of usable memory:
*/
curr_pfn = PFN_UP(e820.map[i].addr);
if (curr_pfn >= max_low_pfn)
continue;
/*
* ... and at the end of the usable range downwards:
*/
last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (last_pfn > max_low_pfn)
last_pfn = max_low_pfn;
/*
* .. finally, did all the rounding and playing
* around just make the area go away?
*/
if (last_pfn <= curr_pfn)
continue;
size = last_pfn - curr_pfn;
free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
}
}
void __init e820_register_memory(void)
{
unsigned long gapstart, gapsize, round;
unsigned long long last;
int i;
/*
* Search for the biggest gap in the low 32 bits of the e820
* memory space.
*/
last = 0x100000000ull;
gapstart = 0x10000000;
gapsize = 0x400000;
i = e820.nr_map;
while (--i >= 0) {
unsigned long long start = e820.map[i].addr;
unsigned long long end = start + e820.map[i].size;
/*
* Since "last" is at most 4GB, we know we'll
* fit in 32 bits if this condition is true
*/
if (last > end) {
unsigned long gap = last - end;
if (gap > gapsize) {
gapsize = gap;
gapstart = end;
}
}
if (start < last)
last = start;
}
/*
* See how much we want to round up: start off with
* rounding to the next 1MB area.
*/
round = 0x100000;
while ((gapsize >> 4) > round)
round += round;
/* Fun with two's complement */
pci_mem_start = (gapstart + round) & -round;
printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
pci_mem_start, gapstart, gapsize);
}
void __init print_memory_map(char *who)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
printk(" %s: %016Lx - %016Lx ", who,
e820.map[i].addr,
e820.map[i].addr + e820.map[i].size);
switch (e820.map[i].type) {
case E820_RAM: printk("(usable)\n");
break;
case E820_RESERVED:
printk("(reserved)\n");
break;
case E820_ACPI:
printk("(ACPI data)\n");
break;
case E820_NVS:
printk("(ACPI NVS)\n");
break;
default: printk("type %u\n", e820.map[i].type);
break;
}
}
}
void __init limit_regions(unsigned long long size)
{
unsigned long long current_addr;
int i;
print_memory_map("limit_regions start");
for (i = 0; i < e820.nr_map; i++) {
current_addr = e820.map[i].addr + e820.map[i].size;
if (current_addr < size)
continue;
if (e820.map[i].type != E820_RAM)
continue;
if (e820.map[i].addr >= size) {
/*
* This region starts past the end of the
* requested size, skip it completely.
*/
e820.nr_map = i;
} else {
e820.nr_map = i + 1;
e820.map[i].size -= current_addr - size;
}
print_memory_map("limit_regions endfor");
return;
}
print_memory_map("limit_regions endfunc");
}
/*
* This function checks if any part of the range <start,end> is mapped
* with type.
*/
int
e820_any_mapped(u64 start, u64 end, unsigned type)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
const struct e820entry *ei = &e820.map[i];
if (type && ei->type != type)
continue;
if (ei->addr >= end || ei->addr + ei->size <= start)
continue;
return 1;
}
return 0;
}
EXPORT_SYMBOL_GPL(e820_any_mapped);
/*
* This function checks if the entire range <start,end> is mapped with type.
*
* Note: this function only works correct if the e820 table is sorted and
* not-overlapping, which is the case
*/
int __init
e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
{
u64 start = s;
u64 end = e;
int i;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
if (type && ei->type != type)
continue;
/* is the region (part) in overlap with the current region ?*/
if (ei->addr >= end || ei->addr + ei->size <= start)
continue;
/* if the region is at the beginning of <start,end> we move
* start to the end of the region since it's ok until there
*/
if (ei->addr <= start)
start = ei->addr + ei->size;
/* if start is now at or beyond end, we're done, full
* coverage */
if (start >= end)
return 1; /* we're done */
}
return 0;
}
static int __init parse_memmap(char *arg)
{
if (!arg)
return -EINVAL;
if (strcmp(arg, "exactmap") == 0) {
#ifdef CONFIG_CRASH_DUMP
/* If we are doing a crash dump, we
* still need to know the real mem
* size before original memory map is
* reset.
*/
propagate_e820_map();
saved_max_pfn = max_pfn;
#endif
e820.nr_map = 0;
user_defined_memmap = 1;
} else {
/* If the user specifies memory size, we
* limit the BIOS-provided memory map to
* that size. exactmap can be used to specify
* the exact map. mem=number can be used to
* trim the existing memory map.
*/
unsigned long long start_at, mem_size;
mem_size = memparse(arg, &arg);
if (*arg == '@') {
start_at = memparse(arg+1, &arg);
add_memory_region(start_at, mem_size, E820_RAM);
} else if (*arg == '#') {
start_at = memparse(arg+1, &arg);
add_memory_region(start_at, mem_size, E820_ACPI);
} else if (*arg == '$') {
start_at = memparse(arg+1, &arg);
add_memory_region(start_at, mem_size, E820_RESERVED);
} else {
limit_regions(mem_size);
user_defined_memmap = 1;
}
}
return 0;
}
early_param("memmap", parse_memmap);
void __init update_memory_range(u64 start, u64 size, unsigned old_type,
unsigned new_type)
{
int i;
BUG_ON(old_type == new_type);
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
u64 final_start, final_end;
if (ei->type != old_type)
continue;
/* totally covered? */
if (ei->addr >= start && ei->size <= size) {
ei->type = new_type;
continue;
}
/* partially covered */
final_start = max(start, ei->addr);
final_end = min(start + size, ei->addr + ei->size);
if (final_start >= final_end)
continue;
add_memory_region(final_start, final_end - final_start,
new_type);
}
}
void __init update_e820(void)
{
u8 nr_map;
nr_map = e820.nr_map;
if (sanitize_e820_map(e820.map, &nr_map))
return;
e820.nr_map = nr_map;
printk(KERN_INFO "modified physical RAM map:\n");
print_memory_map("modified");
}

View file

@ -213,6 +213,48 @@ unsigned long efi_get_time(void)
eft.minute, eft.second);
}
/*
* Tell the kernel about the EFI memory map. This might include
* more than the max 128 entries that can fit in the e820 legacy
* (zeropage) memory map.
*/
static void __init add_efi_memmap(void)
{
void *p;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
efi_memory_desc_t *md = p;
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
int e820_type;
if (md->attribute & EFI_MEMORY_WB)
e820_type = E820_RAM;
else
e820_type = E820_RESERVED;
e820_add_region(start, size, e820_type);
}
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
void __init efi_reserve_early(void)
{
unsigned long pmap;
pmap = boot_params.efi_info.efi_memmap;
#ifdef CONFIG_X86_64
pmap += (__u64)boot_params.efi_info.efi_memmap_hi << 32;
#endif
memmap.phys_map = (void *)pmap;
memmap.nr_map = boot_params.efi_info.efi_memmap_size /
boot_params.efi_info.efi_memdesc_size;
memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size,
"EFI memmap");
}
#if EFI_DEBUG
static void __init print_efi_memmap(void)
{
@ -242,21 +284,11 @@ void __init efi_init(void)
int i = 0;
void *tmp;
#ifdef CONFIG_X86_32
efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
#else
efi_phys.systab = (efi_system_table_t *)
(boot_params.efi_info.efi_systab |
((__u64)boot_params.efi_info.efi_systab_hi<<32));
memmap.phys_map = (void *)
(boot_params.efi_info.efi_memmap |
((__u64)boot_params.efi_info.efi_memmap_hi<<32));
#ifdef CONFIG_X86_64
efi_phys.systab = (void *)efi_phys.systab +
((__u64)boot_params.efi_info.efi_systab_hi<<32);
#endif
memmap.nr_map = boot_params.efi_info.efi_memmap_size /
boot_params.efi_info.efi_memdesc_size;
memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
efi.systab = early_ioremap((unsigned long)efi_phys.systab,
sizeof(efi_system_table_t));
@ -370,6 +402,7 @@ void __init efi_init(void)
if (memmap.desc_size != sizeof(efi_memory_desc_t))
printk(KERN_WARNING "Kernel-defined memdesc"
"doesn't match the one from EFI!\n");
add_efi_memmap();
/* Setup for EFI runtime service */
reboot_type = BOOT_EFI;

View file

@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void)
early_runtime_code_mapping_set_exec(0);
}
void __init efi_reserve_bootmem(void)
{
reserve_bootmem_generic((unsigned long)memmap.phys_map,
memmap.nr_map * memmap.desc_size);
}
void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
{
static unsigned pages_mapped __initdata;
unsigned i, pages;

View file

@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
else
#endif
if (num_possible_cpus() <= 8)
if (max_physical_apicid < 8)
genapic = &apic_flat;
else
genapic = &apic_physflat;

73
arch/x86/kernel/head.c Normal file
View file

@ -0,0 +1,73 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <asm/setup.h>
#include <asm/bios_ebda.h>
#define BIOS_LOWMEM_KILOBYTES 0x413
/*
* The BIOS places the EBDA/XBDA at the top of conventional
* memory, and usually decreases the reported amount of
* conventional memory (int 0x12) too. This also contains a
* workaround for Dell systems that neglect to reserve EBDA.
* The same workaround also avoids a problem with the AMD768MPX
* chipset: reserve a page before VGA to prevent PCI prefetch
* into it (errata #56). Usually the page is reserved anyways,
* unless you have no PS/2 mouse plugged in.
*/
void __init reserve_ebda_region(void)
{
unsigned int lowmem, ebda_addr;
/* To determine the position of the EBDA and the */
/* end of conventional memory, we need to look at */
/* the BIOS data area. In a paravirtual environment */
/* that area is absent. We'll just have to assume */
/* that the paravirt case can handle memory setup */
/* correctly, without our help. */
if (paravirt_enabled())
return;
/* end of low (conventional) memory */
lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
lowmem <<= 10;
/* start of EBDA area */
ebda_addr = get_bios_ebda();
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
if ((lowmem - ebda_addr) <= 0x10000)
lowmem = ebda_addr;
/* Fixup: bios does not report an EBDA at all. */
/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
if ((ebda_addr == 0) && (lowmem >= 0x9f000))
lowmem = 0x9f000;
/* Paranoia: should never happen, but... */
if ((lowmem == 0) || (lowmem >= 0x100000))
lowmem = 0x9f000;
/* reserve all memory between lowmem and the 1MB mark */
reserve_early(lowmem, 0x100000, "BIOS reserved");
}
void __init reserve_setup_data(void)
{
struct setup_data *data;
u64 pa_data;
char buf[32];
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_ioremap(pa_data, sizeof(*data));
sprintf(buf, "setup data %x", data->type);
reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
pa_data = data->next;
early_iounmap(data, sizeof(*data));
}
}

View file

@ -8,7 +8,34 @@
#include <linux/init.h>
#include <linux/start_kernel.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
void __init i386_start_kernel(void)
{
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = ramdisk_image + ramdisk_size;
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
reserve_early(init_pg_tables_start, init_pg_tables_end,
"INIT_PG_TABLE");
reserve_ebda_region();
/*
* At this point everything still needed from the boot loader
* or BIOS or kernel text should be early reserved or marked not
* RAM in e820. All other memory is free game.
*/
start_kernel();
}

View file

@ -51,74 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
#define BIOS_LOWMEM_KILOBYTES 0x413
/*
* The BIOS places the EBDA/XBDA at the top of conventional
* memory, and usually decreases the reported amount of
* conventional memory (int 0x12) too. This also contains a
* workaround for Dell systems that neglect to reserve EBDA.
* The same workaround also avoids a problem with the AMD768MPX
* chipset: reserve a page before VGA to prevent PCI prefetch
* into it (errata #56). Usually the page is reserved anyways,
* unless you have no PS/2 mouse plugged in.
*/
static void __init reserve_ebda_region(void)
{
unsigned int lowmem, ebda_addr;
/* To determine the position of the EBDA and the */
/* end of conventional memory, we need to look at */
/* the BIOS data area. In a paravirtual environment */
/* that area is absent. We'll just have to assume */
/* that the paravirt case can handle memory setup */
/* correctly, without our help. */
if (paravirt_enabled())
return;
/* end of low (conventional) memory */
lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
lowmem <<= 10;
/* start of EBDA area */
ebda_addr = get_bios_ebda();
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
if ((lowmem - ebda_addr) <= 0x10000)
lowmem = ebda_addr;
/* Fixup: bios does not report an EBDA at all. */
/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
if ((ebda_addr == 0) && (lowmem >= 0x9f000))
lowmem = 0x9f000;
/* Paranoia: should never happen, but... */
if ((lowmem == 0) || (lowmem >= 0x100000))
lowmem = 0x9f000;
/* reserve all memory between lowmem and the 1MB mark */
reserve_early(lowmem, 0x100000, "BIOS reserved");
}
static void __init reserve_setup_data(void)
{
struct setup_data *data;
unsigned long pa_data;
char buf[32];
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_ioremap(pa_data, sizeof(*data));
sprintf(buf, "setup data %x", data->type);
reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
pa_data = data->next;
early_iounmap(data, sizeof(*data));
}
}
void __init x86_64_start_kernel(char * real_mode_data)
{
int i;

View file

@ -194,6 +194,7 @@ default_entry:
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(pg0), %edi
movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_pmd), %edx
movl $PTE_ATTR, %eax
10:
@ -219,6 +220,8 @@ default_entry:
jb 10b
1:
movl %edi,pa(init_pg_tables_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
@ -228,6 +231,7 @@ default_entry:
page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(pg0), %edi
movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_dir), %edx
movl $PTE_ATTR, %eax
10:
@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
cmpl %ebp,%eax
jb 10b
movl %edi,pa(init_pg_tables_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax

View file

@ -72,15 +72,21 @@ int sis_apic_bug = -1;
int nr_ioapic_registers[MAX_IO_APICS];
/* I/O APIC entries */
struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
/* MP IRQ source entries */
struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
static int disable_timer_pin_1 __initdata;
/*
@ -110,7 +116,7 @@ struct io_apic {
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
{
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
}
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@ -802,10 +808,10 @@ static int find_irq_entry(int apic, int pin, int type)
int i;
for (i = 0; i < mp_irq_entries; i++)
if (mp_irqs[i].mpc_irqtype == type &&
(mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
mp_irqs[i].mpc_dstirq == pin)
if (mp_irqs[i].mp_irqtype == type &&
(mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
mp_irqs[i].mp_dstirq == pin)
return i;
return -1;
@ -819,13 +825,13 @@ static int __init find_isa_irq_pin(int irq, int type)
int i;
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
int lbus = mp_irqs[i].mp_srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
(mp_irqs[i].mp_irqtype == type) &&
(mp_irqs[i].mp_srcbusirq == irq))
return mp_irqs[i].mpc_dstirq;
return mp_irqs[i].mp_dstirq;
}
return -1;
}
@ -835,17 +841,17 @@ static int __init find_isa_irq_apic(int irq, int type)
int i;
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
int lbus = mp_irqs[i].mp_srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
(mp_irqs[i].mp_irqtype == type) &&
(mp_irqs[i].mp_srcbusirq == irq))
break;
}
if (i < mp_irq_entries) {
int apic;
for (apic = 0; apic < nr_ioapics; apic++) {
if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
return apic;
}
}
@ -865,28 +871,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
"slot:%d, pin:%d.\n", bus, slot, pin);
if (mp_bus_id_to_pci_bus[bus] == -1) {
if (test_bit(bus, mp_bus_not_pci)) {
printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
return -1;
}
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
int lbus = mp_irqs[i].mp_srcbus;
for (apic = 0; apic < nr_ioapics; apic++)
if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
mp_irqs[i].mp_dstapic == MP_APIC_ALL)
break;
if (!test_bit(lbus, mp_bus_not_pci) &&
!mp_irqs[i].mpc_irqtype &&
!mp_irqs[i].mp_irqtype &&
(bus == lbus) &&
(slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
int irq = pin_2_irq(i, apic, mp_irqs[i].mpc_dstirq);
(slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
if (!(apic || IO_APIC_IRQ(irq)))
continue;
if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
if (pin == (mp_irqs[i].mp_srcbusirq & 3))
return irq;
/*
* Use the first all-but-pin matching entry as a
@ -953,7 +959,7 @@ static int EISA_ELCR(unsigned int irq)
* EISA conforming in the MP table, that means its trigger type must
* be read in from the ELCR */
#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
#define default_EISA_polarity(idx) default_ISA_polarity(idx)
/* PCI interrupts are always polarity one level triggered,
@ -970,13 +976,13 @@ static int EISA_ELCR(unsigned int irq)
static int MPBIOS_polarity(int idx)
{
int bus = mp_irqs[idx].mpc_srcbus;
int bus = mp_irqs[idx].mp_srcbus;
int polarity;
/*
* Determine IRQ line polarity (high active or low active):
*/
switch (mp_irqs[idx].mpc_irqflag & 3) {
switch (mp_irqs[idx].mp_irqflag & 3) {
case 0: /* conforms, ie. bus-type dependent polarity */
{
polarity = test_bit(bus, mp_bus_not_pci)?
@ -1012,13 +1018,13 @@ static int MPBIOS_polarity(int idx)
static int MPBIOS_trigger(int idx)
{
int bus = mp_irqs[idx].mpc_srcbus;
int bus = mp_irqs[idx].mp_srcbus;
int trigger;
/*
* Determine IRQ trigger mode (edge or level sensitive):
*/
switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) {
switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
case 0: /* conforms, ie. bus-type dependent */
{
trigger = test_bit(bus, mp_bus_not_pci)?
@ -1095,16 +1101,16 @@ static inline int irq_trigger(int idx)
static int pin_2_irq(int idx, int apic, int pin)
{
int irq, i;
int bus = mp_irqs[idx].mpc_srcbus;
int bus = mp_irqs[idx].mp_srcbus;
/*
* Debugging check, we are in big trouble if this message pops up!
*/
if (mp_irqs[idx].mpc_dstirq != pin)
if (mp_irqs[idx].mp_dstirq != pin)
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
if (test_bit(bus, mp_bus_not_pci))
irq = mp_irqs[idx].mpc_srcbusirq;
irq = mp_irqs[idx].mp_srcbusirq;
else {
/*
* PCI IRQs are mapped in order
@ -1248,12 +1254,12 @@ static void __init setup_IO_APIC_irqs(void)
if (first_notcon) {
apic_printk(APIC_VERBOSE, KERN_DEBUG
" IO-APIC (apicid-pin) %d-%d",
mp_ioapics[apic].mpc_apicid,
mp_ioapics[apic].mp_apicid,
pin);
first_notcon = 0;
} else
apic_printk(APIC_VERBOSE, ", %d-%d",
mp_ioapics[apic].mpc_apicid, pin);
mp_ioapics[apic].mp_apicid, pin);
continue;
}
@ -1348,7 +1354,7 @@ void __init print_IO_APIC(void)
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
/*
* We are a bit conservative about what we expect. We have to
@ -1367,7 +1373,7 @@ void __init print_IO_APIC(void)
reg_03.raw = io_apic_read(apic, 3);
spin_unlock_irqrestore(&ioapic_lock, flags);
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@ -1708,7 +1714,6 @@ void disable_IO_APIC(void)
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
#ifndef CONFIG_X86_NUMAQ
static void __init setup_ioapic_ids_from_mpc(void)
{
union IO_APIC_reg_00 reg_00;
@ -1718,6 +1723,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
unsigned char old_id;
unsigned long flags;
#ifdef CONFIG_X86_NUMAQ
if (found_numaq)
return;
#endif
/*
* Don't check I/O APIC IDs for xAPIC systems. They have
* no meaning without the serial APIC bus.
@ -1741,14 +1751,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
reg_00.raw = io_apic_read(apic, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
old_id = mp_ioapics[apic].mpc_apicid;
old_id = mp_ioapics[apic].mp_apicid;
if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
apic, mp_ioapics[apic].mpc_apicid);
apic, mp_ioapics[apic].mp_apicid);
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
reg_00.bits.ID);
mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
}
/*
@ -1757,9 +1767,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
if (check_apicid_used(phys_id_present_map,
mp_ioapics[apic].mpc_apicid)) {
mp_ioapics[apic].mp_apicid)) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
apic, mp_ioapics[apic].mpc_apicid);
apic, mp_ioapics[apic].mp_apicid);
for (i = 0; i < get_physical_broadcast(); i++)
if (!physid_isset(i, phys_id_present_map))
break;
@ -1768,13 +1778,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
i);
physid_set(i, phys_id_present_map);
mp_ioapics[apic].mpc_apicid = i;
mp_ioapics[apic].mp_apicid = i;
} else {
physid_mask_t tmp;
tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
apic_printk(APIC_VERBOSE, "Setting %d in the "
"phys_id_present_map\n",
mp_ioapics[apic].mpc_apicid);
mp_ioapics[apic].mp_apicid);
physids_or(phys_id_present_map, phys_id_present_map, tmp);
}
@ -1783,11 +1793,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
* We need to adjust the IRQ routing table
* if the ID changed.
*/
if (old_id != mp_ioapics[apic].mpc_apicid)
if (old_id != mp_ioapics[apic].mp_apicid)
for (i = 0; i < mp_irq_entries; i++)
if (mp_irqs[i].mpc_dstapic == old_id)
mp_irqs[i].mpc_dstapic
= mp_ioapics[apic].mpc_apicid;
if (mp_irqs[i].mp_dstapic == old_id)
mp_irqs[i].mp_dstapic
= mp_ioapics[apic].mp_apicid;
/*
* Read the right value from the MPC table and
@ -1795,9 +1805,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
*/
apic_printk(APIC_VERBOSE, KERN_INFO
"...changing IO-APIC physical APIC ID to %d ...",
mp_ioapics[apic].mpc_apicid);
mp_ioapics[apic].mp_apicid);
reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0, reg_00.raw);
spin_unlock_irqrestore(&ioapic_lock, flags);
@ -1808,15 +1818,12 @@ static void __init setup_ioapic_ids_from_mpc(void)
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
printk("could not set ID!\n");
else
apic_printk(APIC_VERBOSE, " ok.\n");
}
}
#else
static void __init setup_ioapic_ids_from_mpc(void) { }
#endif
int no_timer_check __initdata;
@ -2352,8 +2359,8 @@ static int ioapic_resume(struct sys_device *dev)
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(dev->id, 0);
if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@ -2785,7 +2792,7 @@ int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int ac
apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
edge_level, active_high_low);
ioapic_register_intr(irq, entry.vector, edge_level);
@ -2806,8 +2813,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
return -1;
for (i = 0; i < mp_irq_entries; i++)
if (mp_irqs[i].mpc_irqtype == mp_INT &&
mp_irqs[i].mpc_srcbusirq == bus_irq)
if (mp_irqs[i].mp_irqtype == mp_INT &&
mp_irqs[i].mp_srcbusirq == bus_irq)
break;
if (i >= mp_irq_entries)
return -1;

View file

@ -108,15 +108,17 @@ DEFINE_SPINLOCK(vector_lock);
int nr_ioapic_registers[MAX_IO_APICS];
/* I/O APIC entries */
struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
/* MP IRQ source entries */
struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
/*
* Rough estimation of how many shared IRQs there are, can
* be changed anytime.
@ -144,7 +146,7 @@ struct io_apic {
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
{
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
}
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@ -464,10 +466,10 @@ static int find_irq_entry(int apic, int pin, int type)
int i;
for (i = 0; i < mp_irq_entries; i++)
if (mp_irqs[i].mpc_irqtype == type &&
(mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
mp_irqs[i].mpc_dstirq == pin)
if (mp_irqs[i].mp_irqtype == type &&
(mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
mp_irqs[i].mp_dstirq == pin)
return i;
return -1;
@ -481,13 +483,13 @@ static int __init find_isa_irq_pin(int irq, int type)
int i;
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
int lbus = mp_irqs[i].mp_srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
(mp_irqs[i].mp_irqtype == type) &&
(mp_irqs[i].mp_srcbusirq == irq))
return mp_irqs[i].mpc_dstirq;
return mp_irqs[i].mp_dstirq;
}
return -1;
}
@ -497,17 +499,17 @@ static int __init find_isa_irq_apic(int irq, int type)
int i;
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
int lbus = mp_irqs[i].mp_srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
(mp_irqs[i].mp_irqtype == type) &&
(mp_irqs[i].mp_srcbusirq == irq))
break;
}
if (i < mp_irq_entries) {
int apic;
for(apic = 0; apic < nr_ioapics; apic++) {
if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
return apic;
}
}
@ -527,28 +529,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
bus, slot, pin);
if (mp_bus_id_to_pci_bus[bus] == -1) {
if (test_bit(bus, mp_bus_not_pci)) {
apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
return -1;
}
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
int lbus = mp_irqs[i].mp_srcbus;
for (apic = 0; apic < nr_ioapics; apic++)
if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
mp_irqs[i].mp_dstapic == MP_APIC_ALL)
break;
if (!test_bit(lbus, mp_bus_not_pci) &&
!mp_irqs[i].mpc_irqtype &&
!mp_irqs[i].mp_irqtype &&
(bus == lbus) &&
(slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
(slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
if (!(apic || IO_APIC_IRQ(irq)))
continue;
if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
if (pin == (mp_irqs[i].mp_srcbusirq & 3))
return irq;
/*
* Use the first all-but-pin matching entry as a
@ -576,13 +578,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
static int MPBIOS_polarity(int idx)
{
int bus = mp_irqs[idx].mpc_srcbus;
int bus = mp_irqs[idx].mp_srcbus;
int polarity;
/*
* Determine IRQ line polarity (high active or low active):
*/
switch (mp_irqs[idx].mpc_irqflag & 3)
switch (mp_irqs[idx].mp_irqflag & 3)
{
case 0: /* conforms, ie. bus-type dependent polarity */
if (test_bit(bus, mp_bus_not_pci))
@ -618,13 +620,13 @@ static int MPBIOS_polarity(int idx)
static int MPBIOS_trigger(int idx)
{
int bus = mp_irqs[idx].mpc_srcbus;
int bus = mp_irqs[idx].mp_srcbus;
int trigger;
/*
* Determine IRQ trigger mode (edge or level sensitive):
*/
switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
{
case 0: /* conforms, ie. bus-type dependent */
if (test_bit(bus, mp_bus_not_pci))
@ -671,16 +673,16 @@ static inline int irq_trigger(int idx)
static int pin_2_irq(int idx, int apic, int pin)
{
int irq, i;
int bus = mp_irqs[idx].mpc_srcbus;
int bus = mp_irqs[idx].mp_srcbus;
/*
* Debugging check, we are in big trouble if this message pops up!
*/
if (mp_irqs[idx].mpc_dstirq != pin)
if (mp_irqs[idx].mp_dstirq != pin)
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
if (test_bit(bus, mp_bus_not_pci)) {
irq = mp_irqs[idx].mpc_srcbusirq;
irq = mp_irqs[idx].mp_srcbusirq;
} else {
/*
* PCI IRQs are mapped in order
@ -857,7 +859,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
"IRQ %d Mode:%i Active:%i)\n",
apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
irq, trigger, polarity);
/*
@ -898,10 +900,10 @@ static void __init setup_IO_APIC_irqs(void)
idx = find_irq_entry(apic,pin,mp_INT);
if (idx == -1) {
if (first_notcon) {
apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
first_notcon = 0;
} else
apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
continue;
}
if (!first_notcon) {
@ -969,7 +971,7 @@ void __apicdebuginit print_IO_APIC(void)
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
/*
* We are a bit conservative about what we expect. We have to
@ -987,7 +989,7 @@ void __apicdebuginit print_IO_APIC(void)
spin_unlock_irqrestore(&ioapic_lock, flags);
printk("\n");
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
@ -1873,8 +1875,8 @@ static int ioapic_resume(struct sys_device *dev)
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(dev->id, 0);
if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@ -2274,8 +2276,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
return -1;
for (i = 0; i < mp_irq_entries; i++)
if (mp_irqs[i].mpc_irqtype == mp_INT &&
mp_irqs[i].mpc_srcbusirq == bus_irq)
if (mp_irqs[i].mp_irqtype == mp_INT &&
mp_irqs[i].mp_srcbusirq == bus_irq)
break;
if (i >= mp_irq_entries)
return -1;
@ -2368,7 +2370,7 @@ void __init ioapic_init_mappings(void)
ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
ioapic_phys = mp_ioapics[i].mp_apicaddr;
} else {
ioapic_phys = (unsigned long)
alloc_bootmem_pages(PAGE_SIZE);

File diff suppressed because it is too large Load diff

View file

@ -31,6 +31,8 @@
#include <asm/numaq.h>
#include <asm/topology.h>
#include <asm/processor.h>
#include <asm/mpspec.h>
#include <asm/e820.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
@ -58,6 +60,8 @@ static void __init smp_dump_qct(void)
node_end_pfn[node] = MB_TO_PAGES(
eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
e820_register_active_regions(node, node_start_pfn[node],
node_end_pfn[node]);
memory_present(node,
node_start_pfn[node], node_end_pfn[node]);
node_remap_size[node] = node_memmap_size_bytes(node,
@ -67,13 +71,24 @@ static void __init smp_dump_qct(void)
}
}
/*
* Unlike Summit, we don't really care to let the NUMA-Q
* fall back to flat mode. Don't compile for NUMA-Q
* unless you really need it!
*/
static __init void early_check_numaq(void)
{
/*
* Find possible boot-time SMP configuration:
*/
early_find_smp_config();
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
early_get_smp_config();
}
int __init get_memcfg_numaq(void)
{
early_check_numaq();
if (!found_numaq)
return 0;
smp_dump_qct();
return 1;
}

View file

@ -17,6 +17,7 @@ unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
unsigned int max_physical_apicid;
EXPORT_SYMBOL(boot_cpu_physical_apicid);
DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
@ -137,3 +138,28 @@ void __init setup_per_cpu_areas(void)
}
#endif
void __init parse_setup_data(void)
{
struct setup_data *data;
u64 pa_data;
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_ioremap(pa_data, PAGE_SIZE);
switch (data->type) {
case SETUP_E820_EXT:
parse_e820_ext(data, pa_data);
break;
default:
break;
}
#ifndef CONFIG_DEBUG_BOOT_PARAMS
free_early(pa_data, pa_data+sizeof(*data)+data->len);
#endif
pa_data = data->next;
early_iounmap(data, PAGE_SIZE);
}
}

View file

@ -59,6 +59,7 @@
#include <asm/setup.h>
#include <asm/arch_hooks.h>
#include <asm/sections.h>
#include <asm/dmi.h>
#include <asm/io_apic.h>
#include <asm/ist.h>
#include <asm/io.h>
@ -67,10 +68,13 @@
#include <asm/bios_ebda.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/efi.h>
#include <asm/bugs.h>
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
address, and must not be in the .bss segment! */
unsigned long init_pg_tables_start __initdata = ~0UL;
unsigned long init_pg_tables_end __initdata = ~0UL;
/*
@ -181,6 +185,12 @@ int bootloader_type;
/* user-defined highmem size */
static unsigned int highmem_pages = -1;
/*
* Early DMI memory
*/
int dmi_alloc_index;
char dmi_alloc_data[DMI_MAX_DATA];
/*
* Setup options
*/
@ -237,42 +247,6 @@ static inline void copy_edd(void)
}
#endif
int __initdata user_defined_memmap;
/*
* "mem=nopentium" disables the 4MB page tables.
* "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
* to <mem>, overriding the bios size.
* "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
* <start> to <start>+<mem>, overriding the bios size.
*
* HPA tells me bootloaders need to parse mem=, so no new
* option should be mem= [also see Documentation/i386/boot.txt]
*/
static int __init parse_mem(char *arg)
{
if (!arg)
return -EINVAL;
if (strcmp(arg, "nopentium") == 0) {
setup_clear_cpu_cap(X86_FEATURE_PSE);
} else {
/* If the user specifies memory size, we
* limit the BIOS-provided memory map to
* that size. exactmap can be used to specify
* the exact map. mem=number can be used to
* trim the existing memory map.
*/
unsigned long long mem_size;
mem_size = memparse(arg, &arg);
limit_regions(mem_size);
user_defined_memmap = 1;
}
return 0;
}
early_param("mem", parse_mem);
#ifdef CONFIG_PROC_VMCORE
/* elfcorehdr= specifies the location of elf core header
* stored by the crashed kernel.
@ -395,56 +369,6 @@ unsigned long __init find_max_low_pfn(void)
return max_low_pfn;
}
#define BIOS_LOWMEM_KILOBYTES 0x413
/*
* The BIOS places the EBDA/XBDA at the top of conventional
* memory, and usually decreases the reported amount of
* conventional memory (int 0x12) too. This also contains a
* workaround for Dell systems that neglect to reserve EBDA.
* The same workaround also avoids a problem with the AMD768MPX
* chipset: reserve a page before VGA to prevent PCI prefetch
* into it (errata #56). Usually the page is reserved anyways,
* unless you have no PS/2 mouse plugged in.
*/
static void __init reserve_ebda_region(void)
{
unsigned int lowmem, ebda_addr;
/* To determine the position of the EBDA and the */
/* end of conventional memory, we need to look at */
/* the BIOS data area. In a paravirtual environment */
/* that area is absent. We'll just have to assume */
/* that the paravirt case can handle memory setup */
/* correctly, without our help. */
if (paravirt_enabled())
return;
/* end of low (conventional) memory */
lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
lowmem <<= 10;
/* start of EBDA area */
ebda_addr = get_bios_ebda();
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
if ((lowmem - ebda_addr) <= 0x10000)
lowmem = ebda_addr;
/* Fixup: bios does not report an EBDA at all. */
/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
if ((ebda_addr == 0) && (lowmem >= 0x9f000))
lowmem = 0x9f000;
/* Paranoia: should never happen, but... */
if ((lowmem == 0) || (lowmem >= 0x100000))
lowmem = 0x9f000;
/* reserve all memory between lowmem and the 1MB mark */
reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
static void __init setup_bootmem_allocator(void);
static unsigned long __init setup_memory(void)
@ -462,11 +386,13 @@ static unsigned long __init setup_memory(void)
if (max_pfn > max_low_pfn) {
highstart_pfn = max_low_pfn;
}
memory_present(0, 0, highend_pfn);
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
memory_present(0, 0, max_low_pfn);
num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
@ -488,11 +414,12 @@ static void __init zone_sizes_init(void)
max_zone_pfns[ZONE_DMA] =
virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
remove_all_active_ranges();
#ifdef CONFIG_HIGHMEM
max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
add_active_range(0, 0, highend_pfn);
e820_register_active_regions(0, 0, highend_pfn);
#else
add_active_range(0, 0, max_low_pfn);
e820_register_active_regions(0, 0, max_low_pfn);
#endif
free_area_init_nodes(max_zone_pfns);
@ -526,25 +453,28 @@ static void __init reserve_crashkernel(void)
ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret == 0 && crash_size > 0) {
if (crash_base > 0) {
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
(unsigned long)(crash_size >> 20),
(unsigned long)(crash_base >> 20),
(unsigned long)(total_mem >> 20));
if (reserve_bootmem(crash_base, crash_size,
BOOTMEM_EXCLUSIVE) < 0) {
printk(KERN_INFO "crashkernel reservation "
"failed - memory is in use\n");
return;
}
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
} else
if (crash_base <= 0) {
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
return;
}
if (reserve_bootmem_generic(crash_base, crash_size,
BOOTMEM_EXCLUSIVE) < 0) {
printk(KERN_INFO "crashkernel reservation failed - "
"memory is in use\n");
return;
}
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
(unsigned long)(crash_size >> 20),
(unsigned long)(crash_base >> 20),
(unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
}
}
#else
@ -558,44 +488,57 @@ static bool do_relocate_initrd = false;
static void __init reserve_initrd(void)
{
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
unsigned long ramdisk_here;
initrd_start = 0;
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = ramdisk_image + ramdisk_size;
u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
u64 ramdisk_here;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
if (ramdisk_end < ramdisk_image) {
printk(KERN_ERR "initrd wraps around end of memory, "
"disabling initrd\n");
return;
}
initrd_start = 0;
if (ramdisk_size >= end_of_lowmem/2) {
free_early(ramdisk_image, ramdisk_end);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
}
printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image,
ramdisk_end);
if (ramdisk_end <= end_of_lowmem) {
/* All in lowmem, easy case */
reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
/*
* don't need to reserve again, already reserved early
* in i386_start_kernel
*/
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
return;
}
/* We need to move the initrd down into lowmem */
ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT,
end_of_lowmem, ramdisk_size,
PAGE_SIZE);
if (ramdisk_here == -1ULL)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
"NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
ramdisk_here, ramdisk_here + ramdisk_size);
do_relocate_initrd = true;
}
@ -604,10 +547,10 @@ static void __init reserve_initrd(void)
static void __init relocate_initrd(void)
{
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
unsigned long ramdisk_here;
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
@ -624,6 +567,10 @@ static void __init relocate_initrd(void)
p = (char *)__va(ramdisk_image);
memcpy(q, p, clen);
q += clen;
/* need to free these low pages...*/
printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
ramdisk_image, ramdisk_image + clen - 1);
free_bootmem(ramdisk_image, clen);
ramdisk_image += clen;
ramdisk_size -= clen;
}
@ -642,66 +589,47 @@ static void __init relocate_initrd(void)
ramdisk_image += clen;
ramdisk_size -= clen;
}
/* high pages is not converted by early_res_to_bootmem */
ramdisk_image = boot_params.hdr.ramdisk_image;
ramdisk_size = boot_params.hdr.ramdisk_size;
printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n",
ramdisk_image, ramdisk_image + ramdisk_size - 1,
ramdisk_here, ramdisk_here + ramdisk_size - 1);
/* need to free that, otherwise init highmem will reserve it again */
free_early(ramdisk_image, ramdisk_image+ramdisk_size);
}
#endif /* CONFIG_BLK_DEV_INITRD */
void __init setup_bootmem_allocator(void)
{
unsigned long bootmap_size;
int i;
unsigned long bootmap_size, bootmap;
/*
* Initialize the boot-time allocator (with low memory only):
*/
bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
register_bootmem_low_pages(max_low_pfn);
/*
* Reserve the bootmem bitmap itself as well. We do this in two
* steps (first step was init_bootmem()) because this catches
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
BOOTMEM_DEFAULT);
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
*/
reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
/* reserve EBDA region */
reserve_ebda_region();
#ifdef CONFIG_SMP
/*
* But first pinch a few for the stack/trampoline stuff
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
#endif
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
*/
acpi_reserve_bootmem();
#endif
#ifdef CONFIG_X86_FIND_SMP_CONFIG
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
#endif
bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
#ifdef CONFIG_BLK_DEV_INITRD
reserve_initrd();
#endif
numa_kva_reserve();
reserve_crashkernel();
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: %08lx - %08lx\n",
min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
printk(KERN_INFO " bootmap %08lx - %08lx\n",
bootmap, bootmap + bootmap_size);
for_each_online_node(i)
free_bootmem_with_active_regions(i, max_low_pfn);
early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
reserve_ibft_region();
}
/*
@ -731,12 +659,6 @@ static void set_mca_bus(int x)
static void set_mca_bus(int x) { }
#endif
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
char * __init __attribute__((weak)) memory_setup(void)
{
return machine_specific_memory_setup();
}
#ifdef CONFIG_NUMA
/*
* In the golden day, when everything among i386 and x86_64 will be
@ -749,6 +671,8 @@ int x86_cpu_to_node_map_init[NR_CPUS] = {
DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
#endif
static void probe_roms(void);
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@ -758,17 +682,21 @@ DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
*/
void __init setup_arch(char **cmdline_p)
{
int i;
unsigned long max_low_pfn;
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
pre_setup_arch_hook();
early_cpu_init();
early_ioremap_init();
reserve_setup_data();
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL32", 4))
"EL32", 4)) {
efi_enabled = 1;
efi_reserve_early();
}
#endif
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
@ -792,8 +720,7 @@ void __init setup_arch(char **cmdline_p)
#endif
ARCH_SETUP
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
print_memory_map(memory_setup());
setup_memory_map();
copy_edd();
@ -811,12 +738,18 @@ void __init setup_arch(char **cmdline_p)
bss_resource.start = virt_to_phys(&__bss_start);
bss_resource.end = virt_to_phys(&__bss_stop)-1;
parse_setup_data();
parse_early_param();
if (user_defined_memmap) {
printk(KERN_INFO "user-defined physical RAM map:\n");
print_memory_map("user");
}
finish_e820_parsing();
probe_roms();
/* after parse_early_param, so could debug it */
insert_resource(&iomem_resource, &code_resource);
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
@ -824,14 +757,67 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled)
efi_init();
if (ppro_with_ram_bug()) {
e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
E820_RESERVED);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
printk(KERN_INFO "fixed physical RAM map:\n");
e820_print_map("bad_ppro");
}
e820_register_active_regions(0, 0, -1UL);
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
max_pfn = e820_end_of_ram();
/* preallocate 4k for mptable mpc */
early_reserve_e820_mpc_new();
/* update e820 for memory not covered by WB MTRRs */
propagate_e820_map();
mtrr_bp_init();
if (mtrr_trim_uncached_memory(max_pfn))
propagate_e820_map();
if (mtrr_trim_uncached_memory(max_pfn)) {
remove_all_active_ranges();
e820_register_active_regions(0, 0, -1UL);
max_pfn = e820_end_of_ram();
}
dmi_scan_machine();
io_delay_init();
#ifdef CONFIG_ACPI
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
acpi_boot_table_init();
#endif
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
*/
acpi_numa_init();
#endif
max_low_pfn = setup_memory();
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
*/
acpi_reserve_bootmem();
#endif
#ifdef CONFIG_X86_FIND_SMP_CONFIG
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
#endif
reserve_crashkernel();
reserve_ibft_region();
#ifdef CONFIG_KVM_CLOCK
kvmclock_init();
#endif
@ -855,9 +841,6 @@ void __init setup_arch(char **cmdline_p)
* not to exceed the 8Mb limit.
*/
#ifdef CONFIG_SMP
smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
#endif
paging_init();
/*
@ -869,10 +852,6 @@ void __init setup_arch(char **cmdline_p)
init_ohci1394_dma_on_all_controllers();
#endif
remapped_pgdat_init();
sparse_init();
zone_sizes_init();
/*
* NOTE: at this point the bootmem allocator is fully available.
*/
@ -881,12 +860,12 @@ void __init setup_arch(char **cmdline_p)
relocate_initrd();
#endif
remapped_pgdat_init();
sparse_init();
zone_sizes_init();
paravirt_post_allocator_init();
dmi_scan_machine();
io_delay_init();
#ifdef CONFIG_X86_SMP
/*
* setup to use the early static init tables during kernel startup
@ -903,32 +882,31 @@ void __init setup_arch(char **cmdline_p)
generic_apic_probe();
#endif
#ifdef CONFIG_ACPI
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
acpi_boot_table_init();
#endif
early_quirks();
#ifdef CONFIG_ACPI
acpi_boot_init();
#endif
#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
if (smp_found_config)
get_smp_config();
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
if (def_to_bigsmp)
printk(KERN_WARNING "More than 8 CPUs detected and "
"CONFIG_X86_PC cannot handle it.\nUse "
"CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
#endif
#endif
#ifdef CONFIG_X86_LOCAL_APIC
if (smp_found_config)
get_smp_config();
#endif
e820_register_memory();
e820_mark_nosave_regions();
e820_reserve_resources();
e820_mark_nosave_regions(max_low_pfn);
request_resource(&iomem_resource, &video_ram_resource);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
request_resource(&ioport_resource, &standard_io_resources[i]);
e820_setup_gap();
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
@ -940,25 +918,147 @@ void __init setup_arch(char **cmdline_p)
#endif
}
/*
* Request address space for all standard resources
*
* This is called just before pcibios_init(), which is also a
* subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
*/
static int __init request_standard_resources(void)
static struct resource system_rom_resource = {
.name = "System ROM",
.start = 0xf0000,
.end = 0xfffff,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
};
static struct resource extension_rom_resource = {
.name = "Extension ROM",
.start = 0xe0000,
.end = 0xeffff,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
};
static struct resource adapter_rom_resources[] = { {
.name = "Adapter ROM",
.start = 0xc8000,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
}, {
.name = "Adapter ROM",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
} };
static struct resource video_rom_resource = {
.name = "Video ROM",
.start = 0xc0000,
.end = 0xc7fff,
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
};
#define ROMSIGNATURE 0xaa55
static int __init romsignature(const unsigned char *rom)
{
int i;
const unsigned short * const ptr = (const unsigned short *)rom;
unsigned short sig;
printk(KERN_INFO "Setting up standard PCI resources\n");
init_iomem_resources(&code_resource, &data_resource, &bss_resource);
request_resource(&iomem_resource, &video_ram_resource);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
request_resource(&ioport_resource, &standard_io_resources[i]);
return 0;
return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
}
static int __init romchecksum(const unsigned char *rom, unsigned long length)
{
unsigned char sum, c;
for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
sum += c;
return !length && !sum;
}
static void __init probe_roms(void)
{
const unsigned char *rom;
unsigned long start, length, upper;
unsigned char c;
int i;
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
video_rom_resource.start = start;
if (probe_kernel_address(rom + 2, c) != 0)
continue;
/* 0 < length <= 0x7f * 512, historically */
length = c * 512;
/* if checksum okay, trust length byte */
if (length && romchecksum(rom, length))
video_rom_resource.end = start + length - 1;
request_resource(&iomem_resource, &video_rom_resource);
break;
}
start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
if (start < upper)
start = upper;
/* system rom */
request_resource(&iomem_resource, &system_rom_resource);
upper = system_rom_resource.start;
/* check for extension rom (ignore length byte!) */
rom = isa_bus_to_virt(extension_rom_resource.start);
if (romsignature(rom)) {
length = extension_rom_resource.end - extension_rom_resource.start + 1;
if (romchecksum(rom, length)) {
request_resource(&iomem_resource, &extension_rom_resource);
upper = extension_rom_resource.start;
}
}
/* check for adapter roms on 2k boundaries */
for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
if (probe_kernel_address(rom + 2, c) != 0)
continue;
/* 0 < length <= 0x7f * 512, historically */
length = c * 512;
/* but accept any length that fits if checksum okay */
if (!length || start + length > upper || !romchecksum(rom, length))
continue;
adapter_rom_resources[i].start = start;
adapter_rom_resources[i].end = start + length - 1;
request_resource(&iomem_resource, &adapter_rom_resources[i]);
start = adapter_rom_resources[i++].end & ~2047UL;
}
}
subsys_initcall(request_standard_resources);

View file

@ -56,6 +56,7 @@
#include <asm/desc.h>
#include <video/edid.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/dma.h>
#include <asm/gart.h>
#include <asm/mpspec.h>
@ -245,7 +246,7 @@ static void __init reserve_crashkernel(void)
return;
}
if (reserve_bootmem(crash_base, crash_size,
if (reserve_bootmem_generic(crash_base, crash_size,
BOOTMEM_EXCLUSIVE) < 0) {
printk(KERN_INFO "crashkernel reservation failed - "
"memory is in use\n");
@ -267,34 +268,6 @@ static inline void __init reserve_crashkernel(void)
{}
#endif
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
void __attribute__((weak)) __init memory_setup(void)
{
machine_specific_memory_setup();
}
static void __init parse_setup_data(void)
{
struct setup_data *data;
unsigned long pa_data;
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_ioremap(pa_data, PAGE_SIZE);
switch (data->type) {
default:
break;
}
#ifndef CONFIG_DEBUG_BOOT_PARAMS
free_early(pa_data, pa_data+sizeof(*data)+data->len);
#endif
pa_data = data->next;
early_iounmap(data, PAGE_SIZE);
}
}
/*
* setup_arch - architecture-specific boot-time initializations
*
@ -319,13 +292,15 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL64", 4))
"EL64", 4)) {
efi_enabled = 1;
efi_reserve_early();
}
#endif
ARCH_SETUP
memory_setup();
setup_memory_map();
copy_edd();
if (!boot_params.hdr.root_flags)
@ -372,9 +347,13 @@ void __init setup_arch(char **cmdline_p)
* we are rounding upwards:
*/
end_pfn = e820_end_of_ram();
/* pre allocte 4k for mptable mpc */
early_reserve_e820_mpc_new();
/* update e820 for memory not covered by WB MTRRs */
mtrr_bp_init();
if (mtrr_trim_uncached_memory(end_pfn)) {
remove_all_active_ranges();
e820_register_active_regions(0, 0, -1UL);
end_pfn = e820_end_of_ram();
}
@ -383,7 +362,7 @@ void __init setup_arch(char **cmdline_p)
check_efer();
max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
if (efi_enabled)
efi_init();
@ -444,13 +423,12 @@ void __init setup_arch(char **cmdline_p)
acpi_reserve_bootmem();
#endif
if (efi_enabled)
efi_reserve_bootmem();
#ifdef CONFIG_X86_MPPARSE
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
#endif
#ifdef CONFIG_BLK_DEV_INITRD
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@ -493,11 +471,13 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
#ifdef CONFIG_X86_MPPARSE
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
get_smp_config();
#endif
init_apic_mappings();
ioapic_init_mappings();
@ -507,7 +487,7 @@ void __init setup_arch(char **cmdline_p)
* We trust e820 completely. No explicit ROM probing in memory.
*/
e820_reserve_resources();
e820_mark_nosave_regions();
e820_mark_nosave_regions(end_pfn);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)

View file

@ -554,23 +554,6 @@ cpumask_t cpu_coregroup_map(int cpu)
return c->llc_shared_map;
}
#ifdef CONFIG_X86_32
/*
* We are called very early to get the low memory for the
* SMP bootup trampoline page.
*/
void __init smp_alloc_memory(void)
{
trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
/*
* Has to be in very low memory so we can execute
* real-mode AP code.
*/
if (__pa(trampoline_base) >= 0x9F000)
BUG();
}
#endif
static void impress_friends(void)
{
int cpu;

View file

@ -31,6 +31,7 @@
#include <asm/srat.h>
#include <asm/topology.h>
#include <asm/smp.h>
#include <asm/e820.h>
/*
* proximity macros and definitions
@ -41,7 +42,7 @@
#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
/* bitmap length; _PXM is at most 255 */
#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
#define MAX_CHUNKS_PER_NODE 3
#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
@ -52,16 +53,37 @@ struct node_memory_chunk_s {
u8 nid; // which cnode contains this chunk?
u8 bank; // which mem bank on this node
};
static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
static int num_memory_chunks; /* total number of memory chunks */
static int __initdata num_memory_chunks; /* total number of memory chunks */
static u8 __initdata apicid_to_pxm[MAX_APICID];
/* Identify CPU proximity domains */
static void __init parse_cpu_affinity_structure(char *p)
int numa_off __initdata;
int acpi_numa __initdata;
static __init void bad_srat(void)
{
struct acpi_srat_cpu_affinity *cpu_affinity =
(struct acpi_srat_cpu_affinity *) p;
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
num_memory_chunks = 0;
}
static __init inline int srat_disabled(void)
{
return numa_off || acpi_numa < 0;
}
/* Identify CPU proximity domains */
void __init
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
{
if (srat_disabled())
return;
if (cpu_affinity->header.length !=
sizeof(struct acpi_srat_cpu_affinity)) {
bad_srat();
return;
}
if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return; /* empty entry */
@ -79,14 +101,21 @@ static void __init parse_cpu_affinity_structure(char *p)
* Identify memory proximity domains and hot-remove capabilities.
* Fill node memory chunk list structure.
*/
static void __init parse_memory_affinity_structure (char *sratp)
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
{
unsigned long long paddr, size;
unsigned long start_pfn, end_pfn;
u8 pxm;
struct node_memory_chunk_s *p, *q, *pend;
struct acpi_srat_mem_affinity *memory_affinity =
(struct acpi_srat_mem_affinity *) sratp;
if (srat_disabled())
return;
if (memory_affinity->header.length !=
sizeof(struct acpi_srat_mem_affinity)) {
bad_srat();
return;
}
if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
return; /* empty entry */
@ -134,6 +163,14 @@ static void __init parse_memory_affinity_structure (char *sratp)
"enabled and removable" : "enabled" ) );
}
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
}
void acpi_numa_arch_fixup(void)
{
}
/*
* The SRAT table always lists ascending addresses, so can always
* assume that the first "start" address that you see is the real
@ -166,39 +203,13 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
node_end_pfn[nid] = memory_chunk->end_pfn;
}
/* Parse the ACPI Static Resource Affinity Table */
static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
int __init get_memcfg_from_srat(void)
{
u8 *start, *end, *p;
int i, j, nid;
start = (u8 *)(&(sratp->reserved) + 1); /* skip header */
p = start;
end = (u8 *)sratp + sratp->header.length;
memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */
memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
num_memory_chunks = 0;
while (p < end) {
switch (*p) {
case ACPI_SRAT_TYPE_CPU_AFFINITY:
parse_cpu_affinity_structure(p);
break;
case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
parse_memory_affinity_structure(p);
break;
default:
printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]);
break;
}
p += p[1];
if (p[1] == 0) {
printk("acpi20_parse_srat: Entry length value is zero;"
" can't parse any further!\n");
break;
}
}
if (srat_disabled())
goto out_fail;
if (num_memory_chunks == 0) {
printk("could not finy any ACPI SRAT memory areas.\n");
@ -244,115 +255,19 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
node_read_chunk(chunk->nid, chunk);
add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
e820_register_active_regions(chunk->nid, chunk->start_pfn,
min(chunk->end_pfn, max_pfn));
}
for_each_online_node(nid) {
unsigned long start = node_start_pfn[nid];
unsigned long end = node_end_pfn[nid];
unsigned long end = min(node_end_pfn[nid], max_pfn);
memory_present(nid, start, end);
node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
}
return 1;
out_fail:
return 0;
}
struct acpi_static_rsdt {
struct acpi_table_rsdt table;
u32 padding[7]; /* Allow for 7 more table entries */
};
int __init get_memcfg_from_srat(void)
{
struct acpi_table_header *header = NULL;
struct acpi_table_rsdp *rsdp = NULL;
struct acpi_table_rsdt *rsdt = NULL;
acpi_native_uint rsdp_address = 0;
struct acpi_static_rsdt saved_rsdt;
int tables = 0;
int i = 0;
rsdp_address = acpi_os_get_root_pointer();
if (!rsdp_address) {
printk("%s: System description tables not found\n",
__func__);
goto out_err;
}
printk("%s: assigning address to rsdp\n", __func__);
rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address;
if (!rsdp) {
printk("%s: Didn't find ACPI root!\n", __func__);
goto out_err;
}
printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
rsdp->oem_id);
if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) {
printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__);
goto out_err;
}
rsdt = (struct acpi_table_rsdt *)
early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
if (!rsdt) {
printk(KERN_WARNING
"%s: ACPI: Invalid root system description tables (RSDT)\n",
__func__);
goto out_err;
}
header = &rsdt->header;
if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) {
printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
goto out_err;
}
/*
* The number of tables is computed by taking the
* size of all entries (header size minus total
* size of RSDT) divided by the size of each entry
* (4-byte table pointers).
*/
tables = (header->length - sizeof(struct acpi_table_header)) / 4;
if (!tables)
goto out_err;
memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) {
printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
saved_rsdt.table.header.length);
goto out_err;
}
printk("Begin SRAT table scan....\n");
for (i = 0; i < tables; i++) {
/* Map in header, then map in full table length. */
header = (struct acpi_table_header *)
early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
if (!header)
break;
header = (struct acpi_table_header *)
early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
if (!header)
break;
if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4))
continue;
/* we've found the srat table. don't need to look at any more tables */
return acpi20_parse_srat((struct acpi_table_srat *)header);
}
out_err:
remove_all_active_ranges();
printk("failed to get NUMA memory information from SRAT table\n");
return 0;
}

View file

@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata;
static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
#ifndef CONFIG_X86_NUMAQ
static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
#endif
static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
{

View file

@ -2,7 +2,7 @@
#include <asm/trampoline.h>
/* ready for x86_64, no harm for x86, since it will overwrite after alloc */
/* ready for x86_64 and x86 */
unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
/*

View file

@ -835,7 +835,7 @@ static __init char *lguest_memory_setup(void)
/* The Linux bootloader header contains an "e820" memory map: the
* Launcher populated the first entry with our memory limit. */
add_memory_region(boot_params.e820_map[0].addr,
e820_add_region(boot_params.e820_map[0].addr,
boot_params.e820_map[0].size,
boot_params.e820_map[0].type);
@ -1012,6 +1012,7 @@ __init void lguest_init(void)
* clobbered. The Launcher places our initial pagetables somewhere at
* the top of our physical memory, so we don't need extra space: set
* init_pg_tables_end to the end of the kernel. */
init_pg_tables_start = __pa(pg0);
init_pg_tables_end = __pa(pg0);
/* Load the %fs segment register (the per-cpu segment register) with
@ -1065,9 +1066,9 @@ __init void lguest_init(void)
pm_power_off = lguest_power_off;
machine_ops.restart = lguest_restart;
/* Now we're set up, call start_kernel() in init/main.c and we proceed
/* Now we're set up, call i386_start_kernel() in head32.c and we proceed
* to boot as normal. It never returns. */
start_kernel();
i386_start_kernel();
}
/*
* This marks the end of stage II of our journey, The Guest.

View file

@ -142,45 +142,3 @@ static int __init print_ipi_mode(void)
late_initcall(print_ipi_mode);
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
*
* Description:
* This is included late in kernel/setup.c so that it can make
* use of all of the static functions.
**/
char * __init machine_specific_memory_setup(void)
{
char *who;
who = "BIOS-e820";
/*
* Try to copy the BIOS-supplied E820-map.
*
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
< 0) {
unsigned long mem_size;
/* compare results from other methods and take the greater */
if (boot_params.alt_mem_k
< boot_params.screen_info.ext_mem_k) {
mem_size = boot_params.screen_info.ext_mem_k;
who = "BIOS-88";
} else {
mem_size = boot_params.alt_mem_k;
who = "BIOS-e801";
}
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
}
return who;
}

View file

@ -3,4 +3,3 @@
#
obj-$(CONFIG_X86_ES7000) := es7000plat.o
obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o

View file

@ -52,6 +52,8 @@ static struct mip_reg *host_reg;
static int mip_port;
static unsigned long mip_addr, host_addr;
int es7000_plat;
/*
* GSI override for ES7000 platforms.
*/
@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
}
#endif
/*
* This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic
* arch already has got following function definitions (asm-generic/es7000.c)
* hence no need to define these for that case.
*/
#ifndef CONFIG_X86_GENERICARCH
void es7000_sw_apic(void);
void __init enable_apic_mode(void)
{
es7000_sw_apic();
return;
}
__init int mps_oem_check(struct mp_config_table *mpc, char *oem,
char *productid)
{
if (mpc->mpc_oemptr) {
struct mp_config_oemtable *oem_table =
(struct mp_config_oemtable *)mpc->mpc_oemptr;
if (!strncmp(oem, "UNISYS", 6))
return parse_unisys_oem((char *)oem_table);
}
return 0;
}
#ifdef CONFIG_ACPI
/* Hook from generic ACPI tables.c */
int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
unsigned long oem_addr;
if (!find_unisys_acpi_oem_table(&oem_addr)) {
if (es7000_check_dsdt())
return parse_unisys_oem((char *)oem_addr);
else {
setup_unisys();
return 1;
}
}
return 0;
}
#else
int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return 0;
}
#endif
#endif /* COFIG_X86_GENERICARCH */
static void
es7000_spin(int n)
{

View file

@ -2,7 +2,11 @@
# Makefile for the generic architecture
#
EXTRA_CFLAGS := -Iarch/x86/kernel
EXTRA_CFLAGS := -Iarch/x86/kernel
obj-y := probe.o summit.o bigsmp.o es7000.o default.o
obj-y += ../../x86/mach-es7000/
obj-y := probe.o default.o
obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT) += summit.o
obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
obj-$(CONFIG_X86_ES7000) += es7000.o
obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/

View file

@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */
static int hp_ht_bigsmp(const struct dmi_system_id *d)
{
#ifdef CONFIG_X86_GENERICARCH
printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
dmi_bigsmp = 1;
#endif
return 0;
}
@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
static int probe_bigsmp(void)
{
if (def_to_bigsmp)
dmi_bigsmp = 1;
dmi_bigsmp = 1;
else
dmi_check_system(bigsmp_dmi_table);
return dmi_bigsmp;

View file

@ -0,0 +1,41 @@
/*
* APIC driver for the IBM NUMAQ chipset.
*/
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
#include <asm/mach-numaq/mach_apic.h>
#include <asm/mach-numaq/mach_apicdef.h>
#include <asm/mach-numaq/mach_ipi.h>
#include <asm/mach-numaq/mach_mpparse.h>
#include <asm/mach-numaq/mach_wakecpu.h>
#include <asm/numaq.h>
static int mps_oem_check(struct mp_config_table *mpc, char *oem,
char *productid)
{
numaq_mps_oem_check(mpc, oem, productid);
return found_numaq;
}
static int probe_numaq(void)
{
/* already know from get_memcfg_numaq() */
return found_numaq;
}
/* Hook from generic ACPI tables.c */
static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return 0;
}
struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);

View file

@ -16,6 +16,7 @@
#include <asm/apicdef.h>
#include <asm/genapic.h>
extern struct genapic apic_numaq;
extern struct genapic apic_summit;
extern struct genapic apic_bigsmp;
extern struct genapic apic_es7000;
@ -24,9 +25,18 @@ extern struct genapic apic_default;
struct genapic *genapic = &apic_default;
static struct genapic *apic_probe[] __initdata = {
#ifdef CONFIG_X86_NUMAQ
&apic_numaq,
#endif
#ifdef CONFIG_X86_SUMMIT
&apic_summit,
#endif
#ifdef CONFIG_X86_BIGSMP
&apic_bigsmp,
#endif
#ifdef CONFIG_X86_ES7000
&apic_es7000,
#endif
&apic_default, /* must be last */
NULL,
};
@ -54,6 +64,7 @@ early_param("apic", parse_apic);
void __init generic_bigsmp_probe(void)
{
#ifdef CONFIG_X86_BIGSMP
/*
* This routine is used to switch to bigsmp mode when
* - There is no apic= option specified by the user
@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void)
printk(KERN_INFO "Overriding APIC driver with %s\n",
genapic->name);
}
#endif
}
void __init generic_apic_probe(void)
@ -88,7 +100,8 @@ void __init generic_apic_probe(void)
/* These functions can switch the APIC even after the initial ->probe() */
int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid)
int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
char *productid)
{
int i;
for (i = 0; apic_probe[i]; ++i) {

View file

@ -8,11 +8,6 @@
#include "cobalt.h"
#include "mach_apic.h"
/* Have we found an MP table */
int smp_found_config;
int pic_mode;
extern unsigned int __cpuinitdata maxcpus;
/*
@ -76,7 +71,9 @@ void __init find_smp_config(void)
if (ncpus > maxcpus)
ncpus = maxcpus;
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
#endif
while (ncpus--)
MP_processor_info(mp++);

View file

@ -175,9 +175,9 @@ char * __init machine_specific_memory_setup(void)
sgivwfb_mem_size &= ~((1 << 20) - 1);
sgivwfb_mem_phys = mem_size - gfx_mem_size;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
e820_add_region(0, LOWMEMSIZE(), E820_RAM);
e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
return "PROM";
}

View file

@ -62,6 +62,7 @@ void __init time_init_hook(void)
char *__init machine_specific_memory_setup(void)
{
char *who;
int new_nr;
who = "NOT VOYAGER";
@ -73,7 +74,7 @@ char *__init machine_specific_memory_setup(void)
e820.nr_map = 0;
for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
add_memory_region(addr, length, E820_RAM);
e820_add_region(addr, length, E820_RAM);
}
return who;
} else if (voyager_level == 4) {
@ -91,43 +92,17 @@ char *__init machine_specific_memory_setup(void)
tom = (boot_params.screen_info.ext_mem_k) << 10;
}
who = "Voyager-TOM";
add_memory_region(0, 0x9f000, E820_RAM);
e820_add_region(0, 0x9f000, E820_RAM);
/* map from 1M to top of memory */
add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
E820_RAM);
/* FIXME: Should check the ASICs to see if I need to
* take out the 8M window. Just do it at the moment
* */
add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024,
e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024,
E820_RESERVED);
return who;
}
who = "BIOS-e820";
/*
* Try to copy the BIOS-supplied E820-map.
*
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
< 0) {
unsigned long mem_size;
/* compare results from other methods and take the greater */
if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
mem_size = boot_params.screen_info.ext_mem_k;
who = "BIOS-88";
} else {
mem_size = boot_params.alt_mem_k;
who = "BIOS-e801";
}
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
}
return who;
return default_machine_specific_memory_setup();
}

View file

@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0;
* activity count. Finally exported by i386_ksyms.c */
static int voyager_extended_cpus = 1;
/* Have we found an SMP box - used by time.c to do the profiling
interrupt for timeslicing; do not set to 1 until the per CPU timer
interrupt is active */
int smp_found_config = 0;
/* Used for the invalidate map that's also checked in the spinlock */
static volatile unsigned long smp_invalidate_needed;
@ -1137,15 +1132,6 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, 0, 1, 1);
}
/* used to set up the trampoline for other CPUs when the memory manager
* is sorted out */
void __init smp_alloc_memory(void)
{
trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
if (__pa(trampoline_base) >= 0x93000)
BUG();
}
/* send a reschedule CPI to one CPU by physical CPU number*/
static void voyager_smp_send_reschedule(int cpu)
{

View file

@ -38,6 +38,7 @@
#include <asm/setup.h>
#include <asm/mmzone.h>
#include <asm/bios_ebda.h>
#include <asm/proto.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
/*
* 4) physnode_map - the mapping between a pfn and owning node
* physnode_map keeps track of the physical memory layout of a generic
* numa node on a 256Mb break (each element of the array will
* represent 256Mb of memory and will be marked by the node id. so,
* numa node on a 64Mb break (each element of the array will
* represent 64Mb of memory and will be marked by the node id. so,
* if the first gig is on node 0, and the second gig is on node 1
* physnode_map will contain:
*
* physnode_map[0-3] = 0;
* physnode_map[4-7] = 1;
* physnode_map[8- ] = -1;
* physnode_map[0-15] = 0;
* physnode_map[16-31] = 1;
* physnode_map[32- ] = -1;
*/
s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
EXPORT_SYMBOL(physnode_map);
@ -81,9 +82,9 @@ void memory_present(int nid, unsigned long start, unsigned long end)
printk(KERN_DEBUG " ");
for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
printk("%ld ", pfn);
printk(KERN_CONT "%ld ", pfn);
}
printk("\n");
printk(KERN_CONT "\n");
}
unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
@ -99,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
#endif
extern unsigned long find_max_low_pfn(void);
extern void add_one_highpage_init(struct page *, int, int);
extern unsigned long highend_pfn, highstart_pfn;
#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
@ -119,11 +119,11 @@ int __init get_memcfg_numa_flat(void)
{
printk("NUMA - single node, flat memory mode\n");
/* Run the memory configuration and find the top of memory. */
propagate_e820_map();
node_start_pfn[0] = 0;
node_end_pfn[0] = max_pfn;
e820_register_active_regions(0, 0, max_pfn);
memory_present(0, 0, max_pfn);
node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
/* Indicate there is one node available. */
nodes_clear(node_online_map);
@ -159,9 +159,17 @@ static void __init allocate_pgdat(int nid)
if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid])
NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
else {
NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn));
min_low_pfn += PFN_UP(sizeof(pg_data_t));
unsigned long pgdat_phys;
pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
(nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT,
sizeof(pg_data_t),
PAGE_SIZE);
NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
"NODE_DATA");
}
printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
nid, (unsigned long)NODE_DATA(nid));
}
/*
@ -199,8 +207,12 @@ void __init remap_numa_kva(void)
int node;
for_each_online_node(node) {
printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
(unsigned long)vaddr,
node_remap_start_pfn[node] + pfn);
set_pmd_pfn((ulong) vaddr,
node_remap_start_pfn[node] + pfn,
PAGE_KERNEL_LARGE);
@ -212,17 +224,21 @@ static unsigned long calculate_numa_remap_pages(void)
{
int nid;
unsigned long size, reserve_pages = 0;
unsigned long pfn;
for_each_online_node(nid) {
unsigned old_end_pfn = node_end_pfn[nid];
u64 node_kva_target;
u64 node_kva_final;
/*
* The acpi/srat node info can show hot-add memroy zones
* where memory could be added but not currently present.
*/
printk("node %d pfn: [%lx - %lx]\n",
nid, node_start_pfn[nid], node_end_pfn[nid]);
if (node_start_pfn[nid] > max_pfn)
continue;
if (!node_end_pfn[nid])
continue;
if (node_end_pfn[nid] > max_pfn)
node_end_pfn[nid] = max_pfn;
@ -234,39 +250,45 @@ static unsigned long calculate_numa_remap_pages(void)
/* now the roundup is correct, convert to PAGE_SIZE pages */
size = size * PTRS_PER_PTE;
/*
* Validate the region we are allocating only contains valid
* pages.
*/
for (pfn = node_end_pfn[nid] - size;
pfn < node_end_pfn[nid]; pfn++)
if (!page_is_ram(pfn))
break;
node_kva_target = round_down(node_end_pfn[nid] - size,
PTRS_PER_PTE);
node_kva_target <<= PAGE_SHIFT;
do {
node_kva_final = find_e820_area(node_kva_target,
((u64)node_end_pfn[nid])<<PAGE_SHIFT,
((u64)size)<<PAGE_SHIFT,
LARGE_PAGE_BYTES);
node_kva_target -= LARGE_PAGE_BYTES;
} while (node_kva_final == -1ULL &&
(node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
if (pfn != node_end_pfn[nid])
size = 0;
if (node_kva_final == -1ULL)
panic("Can not get kva ram\n");
printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
size, nid);
node_remap_size[nid] = size;
node_remap_offset[nid] = reserve_pages;
reserve_pages += size;
printk("Shrinking node %d from %ld pages to %ld pages\n",
nid, node_end_pfn[nid], node_end_pfn[nid] - size);
printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
size, nid, node_kva_final>>PAGE_SHIFT);
if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
/*
* Align node_end_pfn[] and node_remap_start_pfn[] to
* pmd boundary. remap_numa_kva will barf otherwise.
*/
printk("Shrinking node %d further by %ld pages for proper alignment\n",
nid, node_end_pfn[nid] & (PTRS_PER_PTE-1));
size += node_end_pfn[nid] & (PTRS_PER_PTE-1);
}
/*
* prevent kva address below max_low_pfn want it on system
* with less memory later.
* layout will be: KVA address , KVA RAM
*
* we are supposed to only record the one less then max_low_pfn
* but we could have some hole in high memory, and it will only
* check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
* to use it as free.
* So reserve_early here, hope we don't run out of that array
*/
reserve_early(node_kva_final,
node_kva_final+(((u64)size)<<PAGE_SHIFT),
"KVA RAM");
node_end_pfn[nid] -= size;
node_remap_start_pfn[nid] = node_end_pfn[nid];
shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
remove_active_range(nid, node_remap_start_pfn[nid],
node_remap_start_pfn[nid] + size);
}
printk("Reserving total of %ld pages for numa KVA remap\n",
reserve_pages);
@ -284,8 +306,7 @@ static void init_remap_allocator(int nid)
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
(ulong) node_remap_start_vaddr[nid],
(ulong) pfn_to_kaddr(highstart_pfn
+ node_remap_offset[nid] + node_remap_size[nid]));
(ulong) node_remap_end_vaddr[nid]);
}
extern void setup_bootmem_allocator(void);
@ -293,7 +314,7 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
unsigned long wasted_pages;
long kva_target_pfn;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@ -302,34 +323,38 @@ unsigned long __init setup_memory(void)
* this space and use it to adjust the boundary between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
/* call find_max_low_pfn at first, it could update max_pfn */
system_max_low_pfn = max_low_pfn = find_max_low_pfn();
remove_all_active_ranges();
get_memcfg_numa();
kva_pages = calculate_numa_remap_pages();
kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
/* partially used pages are not usable - thus round upwards */
system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
kva_start_pfn = find_max_low_pfn() - kva_pages;
kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
do {
kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
max_low_pfn<<PAGE_SHIFT,
kva_pages<<PAGE_SHIFT,
PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
kva_target_pfn -= PTRS_PER_PTE;
} while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn);
#ifdef CONFIG_BLK_DEV_INITRD
/* Numa kva area is below the initrd */
if (initrd_start)
kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
- kva_pages;
#endif
if (kva_start_pfn == -1UL)
panic("Can not get kva space\n");
/*
* We waste pages past at the end of the KVA for no good reason other
* than how it is located. This is bad.
*/
wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
kva_start_pfn -= wasted_pages;
kva_pages += wasted_pages;
system_max_low_pfn = max_low_pfn = find_max_low_pfn();
printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
kva_start_pfn, max_low_pfn);
printk("max_pfn = %ld\n", max_pfn);
/* avoid clash with initrd */
reserve_early(kva_start_pfn<<PAGE_SHIFT,
(kva_start_pfn + kva_pages)<<PAGE_SHIFT,
"KVA PG");
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > system_max_low_pfn)
@ -365,16 +390,8 @@ unsigned long __init setup_memory(void)
return max_low_pfn;
}
void __init numa_kva_reserve(void)
{
if (kva_pages)
reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
BOOTMEM_DEFAULT);
}
void __init zone_sizes_init(void)
{
int nid;
unsigned long max_zone_pfns[MAX_NR_ZONES];
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] =
@ -384,27 +401,18 @@ void __init zone_sizes_init(void)
max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
#endif
/* If SRAT has not registered memory, register it now */
if (find_max_pfn_with_active_regions() == 0) {
for_each_online_node(nid) {
if (node_has_online_mem(nid))
add_active_range(nid, node_start_pfn[nid],
node_end_pfn[nid]);
}
}
free_area_init_nodes(max_zone_pfns);
return;
}
void __init set_highmem_pages_init(int bad_ppro)
void __init set_highmem_pages_init(void)
{
#ifdef CONFIG_HIGHMEM
struct zone *zone;
struct page *page;
int nid;
for_each_zone(zone) {
unsigned long node_pfn, zone_start_pfn, zone_end_pfn;
unsigned long zone_start_pfn, zone_end_pfn;
if (!is_highmem(zone))
continue;
@ -412,16 +420,12 @@ void __init set_highmem_pages_init(int bad_ppro)
zone_start_pfn = zone->zone_start_pfn;
zone_end_pfn = zone_start_pfn + zone->spanned_pages;
nid = zone_to_nid(zone);
printk("Initializing %s for node %d (%08lx:%08lx)\n",
zone->name, zone_to_nid(zone),
zone_start_pfn, zone_end_pfn);
zone->name, nid, zone_start_pfn, zone_end_pfn);
for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
add_one_highpage_init(page, node_pfn, bad_ppro);
}
add_highpages_with_active_regions(nid, zone_start_pfn,
zone_end_pfn);
}
totalram_pages += totalhigh_pages;
#endif

View file

@ -225,13 +225,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
update_page_count(PG_LEVEL_4K, pages_4k);
}
static inline int page_kills_ppro(unsigned long pagenr)
{
if (pagenr >= 0x70000 && pagenr <= 0x7003F)
return 1;
return 0;
}
/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number.
@ -292,29 +285,60 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
static void __init add_one_highpage_init(struct page *page, int pfn)
{
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
totalhigh_pages++;
} else
SetPageReserved(page);
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
struct add_highpages_data {
unsigned long start_pfn;
unsigned long end_pfn;
};
static void __init add_highpages_work_fn(unsigned long start_pfn,
unsigned long end_pfn, void *datax)
{
int node_pfn;
struct page *page;
unsigned long final_start_pfn, final_end_pfn;
struct add_highpages_data *data;
data = (struct add_highpages_data *)datax;
final_start_pfn = max(start_pfn, data->start_pfn);
final_end_pfn = min(end_pfn, data->end_pfn);
if (final_start_pfn >= final_end_pfn)
return;
for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
node_pfn++) {
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
add_one_highpage_init(page, node_pfn);
}
}
void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
struct add_highpages_data data;
data.start_pfn = start_pfn;
data.end_pfn = end_pfn;
work_with_active_regions(nid, add_highpages_work_fn, &data);
}
#ifndef CONFIG_NUMA
static void __init set_highmem_pages_init(int bad_ppro)
static void __init set_highmem_pages_init(void)
{
int pfn;
add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
/*
* Holes under sparsemem might not have no mem_map[]:
*/
if (pfn_valid(pfn))
add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
}
totalram_pages += totalhigh_pages;
}
#endif /* !CONFIG_NUMA */
@ -322,7 +346,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
#else
# define kmap_init() do { } while (0)
# define permanent_kmaps_init(pgd_base) do { } while (0)
# define set_highmem_pages_init(bad_ppro) do { } while (0)
# define set_highmem_pages_init() do { } while (0)
#endif /* CONFIG_HIGHMEM */
pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
@ -569,13 +593,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc;
void __init mem_init(void)
{
int codesize, reservedpages, datasize, initsize;
int tmp, bad_ppro;
int tmp;
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
bad_ppro = ppro_with_ram_bug();
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
@ -587,7 +609,7 @@ void __init mem_init(void)
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
set_highmem_pages_init(bad_ppro);
set_highmem_pages_init();
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
@ -776,3 +798,9 @@ void free_initrd_mem(unsigned long start, unsigned long end)
free_init_pages("initrd memory", start, end);
}
#endif
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
return reserve_bootmem(phys, len, flags);
}

View file

@ -48,6 +48,18 @@
#include <asm/numa.h>
#include <asm/cacheflush.h>
/*
* PFN of last memory page.
*/
unsigned long end_pfn;
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
* The direct mapping extends to max_pfn_mapped, so that we can directly access
* apertures, ACPI and other tables without having to play with fixmaps.
*/
unsigned long max_pfn_mapped;
static unsigned long dma_reserve __initdata;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@ -808,12 +820,14 @@ void free_initrd_mem(unsigned long start, unsigned long end)
}
#endif
void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
#ifdef CONFIG_NUMA
int nid, next_nid;
#endif
unsigned long pfn = phys >> PAGE_SHIFT;
int ret;
if (pfn >= end_pfn) {
/*
@ -821,11 +835,11 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
* firmware tables:
*/
if (pfn < max_pfn_mapped)
return;
return -EFAULT;
printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
phys, len);
return;
return -EFAULT;
}
/* Should check here against the e820 map to avoid double free */
@ -833,9 +847,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
nid = phys_to_nid(phys);
next_nid = phys_to_nid(phys + len - 1);
if (nid == next_nid)
reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
else
reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
ret = reserve_bootmem(phys, len, flags);
if (ret != 0)
return ret;
#else
reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
#endif
@ -844,6 +862,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
dma_reserve += len / PAGE_SIZE;
set_dma_reserve(dma_reserve);
}
return 0;
}
int kern_addr_valid(unsigned long addr)

View file

@ -57,18 +57,22 @@ static __init void early_get_boot_cpu_id(void)
/*
* Find possible boot-time SMP configuration:
*/
#ifdef CONFIG_X86_MPPARSE
early_find_smp_config();
#endif
#ifdef CONFIG_ACPI
/*
* Read APIC information from ACPI tables.
*/
early_acpi_boot_init();
#endif
#ifdef CONFIG_X86_MPPARSE
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
early_get_smp_config();
#endif
early_init_lapic_mapping();
}

View file

@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
else
bootmap_start = round_up(start, PAGE_SIZE);
/*
* SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
* SMP_CACHE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
*/
bootmap = early_node_mem(nodeid, bootmap_start, end,

View file

@ -13,10 +13,11 @@ pci-y := fixup.o
pci-$(CONFIG_ACPI) += acpi.o
pci-y += legacy.o irq.o
# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are
# Careful: VISWS overrule the pci-y above. The colons are
# therefor correct. This needs a proper fix by distangling the code.
pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
pci-$(CONFIG_X86_NUMAQ) += numa.o
# Necessary for NUMAQ as well
pci-$(CONFIG_NUMA) += mp_bus_to_node.o

View file

@ -388,7 +388,7 @@ static int __init early_fill_mp_bus_info(void)
/* need to take out [0, TOM) for RAM*/
address = MSR_K8_TOP_MEM1;
rdmsrl(address, val);
end = (val & 0xffffff8000000ULL);
end = (val & 0xffffff800000ULL);
printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
if (end < (1ULL<<32))
update_range(range, 0, end - 1);
@ -482,7 +482,7 @@ static int __init early_fill_mp_bus_info(void)
/* TOP_MEM2 */
address = MSR_K8_TOP_MEM2;
rdmsrl(address, val);
end = (val & 0xffffff8000000ULL);
end = (val & 0xffffff800000ULL);
printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
update_range(range, 1ULL<<32, end - 1);
}

View file

@ -6,45 +6,21 @@
#include <linux/init.h>
#include <linux/nodemask.h>
#include <mach_apic.h>
#include <asm/mpspec.h>
#include "pci.h"
#define XQUAD_PORTIO_BASE 0xfe400000
#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
int mp_bus_id_to_node[MAX_MP_BUSSES];
#define BUS2QUAD(global) (mp_bus_id_to_node[global])
int mp_bus_id_to_local[MAX_MP_BUSSES];
#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
struct mpc_config_translation *translation)
{
int quad = translation->trans_quad;
int local = translation->trans_local;
mp_bus_id_to_node[m->mpc_busid] = quad;
mp_bus_id_to_local[m->mpc_busid] = local;
printk(KERN_INFO "Bus #%d is %s (node %d)\n",
m->mpc_busid, name, quad);
}
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
void mpc_oem_pci_bus(struct mpc_config_bus *m,
struct mpc_config_translation *translation)
{
int quad = translation->trans_quad;
int local = translation->trans_local;
quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
}
/* Where the IO area was mapped on multiquad, always 0 otherwise */
void *xquad_portio;
#ifdef CONFIG_X86_NUMAQ
EXPORT_SYMBOL(xquad_portio);
#endif
#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
@ -179,6 +155,9 @@ static int __init pci_numa_init(void)
{
int quad;
if (!found_numaq)
return 0;
raw_pci_ops = &pci_direct_conf1_mq;
if (pcibios_scanned++)

View file

@ -1273,6 +1273,7 @@ asmlinkage void __init xen_start_kernel(void)
pgd = (pgd_t *)xen_start_info->pt_base;
init_pg_tables_start = __pa(pgd);
init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
init_mm.pgd = pgd; /* use the Xen pagetables to start */
@ -1316,5 +1317,5 @@ asmlinkage void __init xen_start_kernel(void)
}
/* Start the world */
start_kernel();
i386_start_kernel();
}

View file

@ -40,8 +40,8 @@ char * __init xen_memory_setup(void)
max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
e820_add_region(0, LOWMEMSIZE(), E820_RAM);
e820_add_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
return "Xen";
}

View file

@ -4,7 +4,6 @@
menuconfig ACPI
bool "ACPI (Advanced Configuration and Power Interface) Support"
depends on !X86_NUMAQ
depends on !X86_VISWS
depends on !IA64_HP_SIM
depends on IA64 || X86

View file

@ -8,6 +8,11 @@
#include <linux/slab.h>
#include <asm/dmi.h>
/*
* DMI stands for "Desktop Management Interface". It is part
* of and an antecedent to, SMBIOS, which stands for System
* Management BIOS. See further: http://www.dmtf.org/standards
*/
static char dmi_empty_string[] = " ";
static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)

View file

@ -28,6 +28,7 @@
#include <asm/numa.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
@ -160,9 +161,7 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
extern int acpi_scan_nodes(unsigned long start, unsigned long end);
#ifdef CONFIG_X86_64
# define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
#endif
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
int num_nodes);
#else

View file

@ -14,4 +14,6 @@ static inline unsigned int get_bios_ebda(void)
return address; /* 0 means none */
}
void reserve_ebda_region(void);
#endif /* _MACH_BIOS_EBDA_H */

View file

@ -11,6 +11,7 @@
/* setup data types */
#define SETUP_NONE 0
#define SETUP_E820_EXT 1
/* extensible setup data list node */
struct setup_data {
@ -107,4 +108,7 @@ struct boot_params {
__u8 _pad9[276]; /* 0xeec */
} __attribute__((packed));
void reserve_setup_data(void);
void parse_setup_data(void);
#endif /* _ASM_BOOTPARAM_H */

View file

@ -3,12 +3,6 @@
#include <asm/io.h>
#ifdef CONFIG_X86_32
#define dmi_alloc alloc_bootmem
#else /* CONFIG_X86_32 */
#define DMI_MAX_DATA 2048
extern int dmi_alloc_index;
@ -25,8 +19,6 @@ static inline void *dmi_alloc(unsigned len)
return dmi_alloc_data + idx;
}
#endif
/* Use early IO mappings for DMI because it's initialized early */
#define dmi_ioremap early_ioremap
#define dmi_iounmap early_iounmap

View file

@ -2,6 +2,41 @@
#define __ASM_E820_H
#define E820MAP 0x2d0 /* our map */
#define E820MAX 128 /* number of entries in E820MAP */
/*
* Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
* constrained space in the zeropage. If we have more nodes than
* that, and if we've booted off EFI firmware, then the EFI tables
* passed us from the EFI firmware can list more nodes. Size our
* internal memory map tables to have room for these additional
* nodes, based on up to three entries per node for which the
* kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
* plus E820MAX, allowing space for the possible duplicate E820
* entries that might need room in the same arrays, prior to the
* call to sanitize_e820_map() to remove duplicates. The allowance
* of three memory map entries per node is "enough" entries for
* the initial hardware platform motivating this mechanism to make
* use of additional EFI map entries. Future platforms may want
* to allow more than three entries per node or otherwise refine
* this size.
*/
/*
* Odd: 'make headers_check' complains about numa.h if I try
* to collapse the next two #ifdef lines to a single line:
* #if defined(__KERNEL__) && defined(CONFIG_EFI)
*/
#ifdef __KERNEL__
#ifdef CONFIG_EFI
#include <linux/numa.h>
#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
#else /* ! CONFIG_EFI */
#define E820_X_MAX E820MAX
#endif
#else /* ! __KERNEL__ */
#define E820_X_MAX E820MAX
#endif
#define E820NR 0x1e8 /* # entries in E820MAP */
#define E820_RAM 1
@ -18,8 +53,59 @@ struct e820entry {
struct e820map {
__u32 nr_map;
struct e820entry map[E820MAX];
struct e820entry map[E820_X_MAX];
};
extern struct e820map e820;
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern int e820_all_mapped(u64 start, u64 end, unsigned type);
extern void e820_add_region(u64 start, u64 size, int type);
extern void e820_print_map(char *who);
extern int
sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
unsigned new_type);
extern void update_e820(void);
extern void e820_setup_gap(void);
struct setup_data;
extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
#if defined(CONFIG_X86_64) || \
(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
extern void e820_mark_nosave_regions(unsigned long limit_pfn);
#else
static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
{
}
#endif
extern unsigned long end_user_pfn;
extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
extern void reserve_early(u64 start, u64 end, char *name);
extern void free_early(u64 start, u64 end);
extern void early_res_to_bootmem(u64 start, u64 end);
extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
extern unsigned long e820_end_of_ram(void);
extern int e820_find_active_region(const struct e820entry *ei,
unsigned long start_pfn,
unsigned long last_pfn,
unsigned long *ei_startpfn,
unsigned long *ei_endpfn);
extern void e820_register_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern u64 e820_hole_size(u64 start, u64 end);
extern void finish_e820_parsing(void);
extern void e820_reserve_resources(void);
extern void setup_memory_map(void);
extern char *default_machine_specific_memory_setup(void);
extern char *machine_specific_memory_setup(void);
extern char *memory_setup(void);
#endif /* __ASSEMBLY__ */
#define ISA_START_ADDRESS 0xa0000
@ -30,11 +116,9 @@ struct e820map {
#define BIOS_END 0x00100000
#ifdef __KERNEL__
#ifdef CONFIG_X86_32
# include "e820_32.h"
#else
# include "e820_64.h"
#endif
#include <linux/ioport.h>
#define HIGH_MEMORY (1024*1024)
#endif /* __KERNEL__ */
#endif /* __ASM_E820_H */

View file

@ -1,50 +0,0 @@
/*
* structures and definitions for the int 15, ax=e820 memory map
* scheme.
*
* In a nutshell, arch/i386/boot/setup.S populates a scratch table
* in the empty_zero_block that contains a list of usable address/size
* duples. In arch/i386/kernel/setup.c, this information is
* transferred into the e820map, and in arch/i386/mm/init.c, that
* new information is used to mark pages reserved or not.
*
*/
#ifndef __E820_HEADER
#define __E820_HEADER
#include <linux/ioport.h>
#define HIGH_MEMORY (1024*1024)
#ifndef __ASSEMBLY__
extern struct e820map e820;
extern void update_e820(void);
extern int e820_all_mapped(unsigned long start, unsigned long end,
unsigned type);
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern void propagate_e820_map(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
extern void add_memory_region(unsigned long long start,
unsigned long long size, int type);
extern void update_memory_range(u64 start, u64 size, unsigned old_type,
unsigned new_type);
extern void e820_register_memory(void);
extern void limit_regions(unsigned long long size);
extern void print_memory_map(char *who);
extern void init_iomem_resources(struct resource *code_resource,
struct resource *data_resource,
struct resource *bss_resource);
#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
extern void e820_mark_nosave_regions(void);
#else
static inline void e820_mark_nosave_regions(void)
{
}
#endif
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/

View file

@ -1,56 +0,0 @@
/*
* structures and definitions for the int 15, ax=e820 memory map
* scheme.
*
* In a nutshell, setup.S populates a scratch table in the
* empty_zero_block that contains a list of usable address/size
* duples. setup.c, this information is transferred into the e820map,
* and in init.c/numa.c, that new information is used to mark pages
* reserved or not.
*/
#ifndef __E820_HEADER
#define __E820_HEADER
#include <linux/ioport.h>
#ifndef __ASSEMBLY__
extern unsigned long find_e820_area(unsigned long start, unsigned long end,
unsigned long size, unsigned long align);
extern unsigned long find_e820_area_size(unsigned long start,
unsigned long *sizep,
unsigned long align);
extern void add_memory_region(unsigned long start, unsigned long size,
int type);
extern void update_memory_range(u64 start, u64 size, unsigned old_type,
unsigned new_type);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
extern void e820_reserve_resources(void);
extern void e820_mark_nosave_regions(void);
extern int e820_any_mapped(unsigned long start, unsigned long end,
unsigned type);
extern int e820_all_mapped(unsigned long start, unsigned long end,
unsigned type);
extern int e820_any_non_reserved(unsigned long start, unsigned long end);
extern int is_memory_any_valid(unsigned long start, unsigned long end);
extern int e820_all_non_reserved(unsigned long start, unsigned long end);
extern int is_memory_all_valid(unsigned long start, unsigned long end);
extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
extern void e820_setup_gap(void);
extern void e820_register_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern void finish_e820_parsing(void);
extern struct e820map e820;
extern void update_e820(void);
extern void reserve_early(unsigned long start, unsigned long end, char *name);
extern void free_early(unsigned long start, unsigned long end);
extern void early_res_to_bootmem(unsigned long start, unsigned long end);
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/

View file

@ -90,7 +90,7 @@ extern void *efi_ioremap(unsigned long addr, unsigned long size);
#endif /* CONFIG_X86_32 */
extern void efi_reserve_bootmem(void);
extern void efi_reserve_early(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);

View file

@ -74,6 +74,9 @@ struct page *kmap_atomic_to_page(void *ptr);
#define flush_cache_kmaps() do { } while (0)
extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn);
#endif /* __KERNEL__ */
#endif /* _ASM_HIGHMEM_H */

View file

@ -121,21 +121,32 @@ extern int nr_ioapic_registers[MAX_IO_APICS];
#define MP_MAX_IOAPIC_PIN 127
struct mp_ioapic_routing {
int apic_id;
int gsi_base;
int gsi_end;
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
struct mp_config_ioapic {
unsigned long mp_apicaddr;
unsigned int mp_apicid;
unsigned char mp_type;
unsigned char mp_apicver;
unsigned char mp_flags;
};
struct mp_config_intsrc {
unsigned int mp_dstapic;
unsigned char mp_type;
unsigned char mp_irqtype;
unsigned short mp_irqflag;
unsigned char mp_srcbus;
unsigned char mp_srcbusirq;
unsigned char mp_dstirq;
};
/* I/O APIC entries */
extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
/* # of MP IRQ source entries */
extern int mp_irq_entries;
/* MP IRQ source entries */
extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* non-0 if default (table-less) MP configuration */
extern int mpc_default_type;

View file

@ -20,6 +20,7 @@
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/smp.h>
/*
* the following functions deal with sending IPIs between CPUs.

View file

@ -1,8 +0,0 @@
#ifndef __ASM_MACH_MPSPEC_H
#define __ASM_MACH_MPSPEC_H
#define MAX_IRQ_SOURCES 256
#define MAX_MP_BUSSES 32
#endif /* __ASM_MACH_MPSPEC_H */

View file

@ -1,8 +0,0 @@
#ifndef __ASM_MACH_MPSPEC_H
#define __ASM_MACH_MPSPEC_H
#define MAX_IRQ_SOURCES 256
#define MAX_MP_BUSSES 256
#endif /* __ASM_MACH_MPSPEC_H */

View file

@ -1,7 +1,10 @@
#ifndef _MACH_MPPARSE_H
#define _MACH_MPPARSE_H 1
int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid);
int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
char *productid);
extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
#endif

View file

@ -20,8 +20,14 @@ static inline cpumask_t target_cpus(void)
#define INT_DELIVERY_MODE dest_LowestPrio
#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */
#define check_apicid_used(bitmap, apicid) physid_isset(apicid, bitmap)
#define check_apicid_present(bit) physid_isset(bit, phys_cpu_present_map)
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
return physid_isset(apicid, bitmap);
}
static inline unsigned long check_apicid_present(int bit)
{
return physid_isset(bit, phys_cpu_present_map);
}
#define apicid_cluster(apicid) (apicid & 0xF0)
static inline int apic_id_registered(void)
@ -77,11 +83,6 @@ static inline int cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
static inline int generate_logical_apicid(int quad, int phys_apicid)
{
return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
}
static inline int apicid_to_node(int logical_apicid)
{
return logical_apicid >> 4;
@ -95,30 +96,6 @@ static inline physid_mask_t apicid_to_cpu_present(int logical_apicid)
return physid_mask_of_physid(cpu + 4*node);
}
struct mpc_config_translation {
unsigned char mpc_type;
unsigned char trans_len;
unsigned char trans_type;
unsigned char trans_quad;
unsigned char trans_global;
unsigned char trans_local;
unsigned short trans_reserved;
};
static inline int mpc_apic_id(struct mpc_config_processor *m,
struct mpc_config_translation *translation_record)
{
int quad = translation_record->trans_quad;
int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
m->mpc_apicid,
(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
m->mpc_apicver, quad, logical_apicid);
return logical_apicid;
}
extern void *xquad_portio;
static inline void setup_portio_remap(void)

View file

@ -1,14 +1,7 @@
#ifndef __ASM_MACH_MPPARSE_H
#define __ASM_MACH_MPPARSE_H
extern void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
struct mpc_config_translation *translation);
extern void mpc_oem_pci_bus(struct mpc_config_bus *m,
struct mpc_config_translation *translation);
/* Hook from generic ACPI tables.c */
static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
}
extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
char *productid);
#endif /* __ASM_MACH_MPPARSE_H */

View file

@ -1,8 +0,0 @@
#ifndef __ASM_MACH_MPSPEC_H
#define __ASM_MACH_MPSPEC_H
#define MAX_IRQ_SOURCES 512
#define MAX_MP_BUSSES 32
#endif /* __ASM_MACH_MPSPEC_H */

View file

@ -1,9 +0,0 @@
#ifndef __ASM_MACH_MPSPEC_H
#define __ASM_MACH_MPSPEC_H
#define MAX_IRQ_SOURCES 256
/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
#define MAX_MP_BUSSES 260
#endif /* __ASM_MACH_MPSPEC_H */

View file

@ -12,11 +12,9 @@
extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[nid])
#ifdef CONFIG_X86_NUMAQ
#include <asm/numaq.h>
#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */
#include <asm/srat.h>
#endif
#include <asm/numaq.h>
/* summit or generic arch */
#include <asm/srat.h>
extern int get_memcfg_numa_flat(void);
/*
@ -26,28 +24,20 @@ extern int get_memcfg_numa_flat(void);
*/
static inline void get_memcfg_numa(void)
{
#ifdef CONFIG_X86_NUMAQ
if (get_memcfg_numaq())
return;
#elif defined(CONFIG_ACPI_SRAT)
if (get_memcfg_from_srat())
return;
#endif
get_memcfg_numa_flat();
}
extern int early_pfn_to_nid(unsigned long pfn);
extern void numa_kva_reserve(void);
#else /* !CONFIG_NUMA */
#define get_memcfg_numa get_memcfg_numa_flat
#define get_zholes_size(n) (0)
static inline void numa_kva_reserve(void)
{
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
@ -55,14 +45,14 @@ static inline void numa_kva_reserve(void)
/*
* generic node memory support, the following assumptions apply:
*
* 1) memory comes in 256Mb contigious chunks which are either present or not
* 1) memory comes in 64Mb contigious chunks which are either present or not
* 2) we will not have more than 64Gb in total
*
* for now assume that 64Gb is max amount of RAM for whole system
* 64Gb / 4096bytes/page = 16777216 pages
*/
#define MAX_NR_PAGES 16777216
#define MAX_ELEMENTS 256
#define MAX_ELEMENTS 1024
#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
extern s8 physnode_map[];
@ -87,9 +77,6 @@ static inline int pfn_to_nid(unsigned long pfn)
__pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
})
#ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */
#define pfn_valid(pfn) ((pfn) < num_physpages)
#else
static inline int pfn_valid(int pfn)
{
int nid = pfn_to_nid(pfn);
@ -98,7 +85,6 @@ static inline int pfn_valid(int pfn)
return (pfn < node_end_pfn(nid));
return 0;
}
#endif /* CONFIG_X86_NUMAQ */
#endif /* CONFIG_DISCONTIGMEM */

View file

@ -13,6 +13,12 @@ extern int apic_version[MAX_APICS];
extern u8 apicid_2_node[];
extern int pic_mode;
#ifdef CONFIG_X86_NUMAQ
extern int mp_bus_id_to_node[MAX_MP_BUSSES];
extern int mp_bus_id_to_local[MAX_MP_BUSSES];
extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#endif
#define MAX_APICID 256
#else
@ -21,26 +27,30 @@ extern int pic_mode;
/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */
#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
#endif
extern void early_find_smp_config(void);
extern void early_get_smp_config(void);
#endif
#if defined(CONFIG_MCA) || defined(CONFIG_EISA)
extern int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
extern unsigned int boot_cpu_physical_apicid;
extern unsigned int max_physical_apicid;
extern int smp_found_config;
extern int mpc_default_type;
extern unsigned long mp_lapic_addr;
extern void find_smp_config(void);
extern void get_smp_config(void);
#ifdef CONFIG_X86_MPPARSE
extern void early_reserve_e820_mpc_new(void);
#else
static inline void early_reserve_e820_mpc_new(void) { }
#endif
void __cpuinit generic_processor_info(int apicid, int version);
#ifdef CONFIG_ACPI
@ -49,6 +59,17 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi);
extern void mp_config_acpi_legacy_irqs(void);
extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
#ifdef CONFIG_X86_IO_APIC
extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
u32 gsi, int triggering, int polarity);
#else
static inline int
mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
u32 gsi, int triggering, int polarity)
{
return 0;
}
#endif
#endif /* CONFIG_ACPI */
#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS)

View file

@ -5,7 +5,7 @@ extern int pxm_to_nid(int pxm);
#ifdef CONFIG_NUMA
extern void __init remap_numa_kva(void);
extern void set_highmem_pages_init(int);
extern void set_highmem_pages_init(void);
#else
static inline void remap_numa_kva(void)
{

View file

@ -28,6 +28,7 @@
#ifdef CONFIG_X86_NUMAQ
extern int found_numaq;
extern int get_memcfg_numaq(void);
/*
@ -156,9 +157,10 @@ struct sys_cfg_data {
struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */
};
static inline unsigned long *get_zholes_size(int nid)
#else
static inline int get_memcfg_numaq(void)
{
return NULL;
return 0;
}
#endif /* CONFIG_X86_NUMAQ */
#endif /* NUMAQ_H */

View file

@ -14,8 +14,6 @@ extern void ia32_syscall(void);
extern void ia32_cstar_target(void);
extern void ia32_sysenter_target(void);
extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
extern void syscall32_cpu_init(void);
extern void check_efer(void);

View file

@ -8,7 +8,6 @@
/* Interrupt control for vSMPowered x86_64 systems */
void vsmp_init(void);
char *machine_specific_memory_setup(void);
#ifndef CONFIG_PARAVIRT
#define paravirt_post_allocator_init() do {} while (0)
#endif
@ -43,27 +42,19 @@ char *machine_specific_memory_setup(void);
*/
extern struct boot_params boot_params;
#ifdef __i386__
/*
* Do NOT EVER look at the BIOS memory size location.
* It does not work on many machines.
*/
#define LOWMEMSIZE() (0x9f000)
struct e820entry;
#ifdef __i386__
char * __init machine_specific_memory_setup(void);
char *memory_setup(void);
int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type);
void __init i386_start_kernel(void);
extern unsigned long init_pg_tables_start;
extern unsigned long init_pg_tables_end;
#endif /* __i386__ */
#endif /* _SETUP */
#endif /* __ASSEMBLY__ */

View file

@ -201,7 +201,6 @@ extern void cpu_exit_clear(void);
extern void cpu_uninit(void);
#endif
extern void smp_alloc_memory(void);
extern void lock_ipi_call_lock(void);
extern void unlock_ipi_call_lock(void);
#endif /* __ASSEMBLY__ */

View file

@ -27,11 +27,13 @@
#ifndef _ASM_SRAT_H_
#define _ASM_SRAT_H_
#ifndef CONFIG_ACPI_SRAT
#error CONFIG_ACPI_SRAT not defined, and srat.h header has been included
#ifdef CONFIG_ACPI_SRAT
extern int get_memcfg_from_srat(void);
#else
static inline int get_memcfg_from_srat(void)
{
return 0;
}
#endif
extern int get_memcfg_from_srat(void);
extern unsigned long *get_zholes_size(int);
#endif /* _ASM_SRAT_H_ */

View file

@ -303,7 +303,6 @@ static inline void clflush(volatile void *__p)
void disable_hlt(void);
void enable_hlt(void);
extern int es7000_plat;
void cpu_idle_wait(void);
extern unsigned long arch_align_stack(unsigned long sp);

View file

@ -84,6 +84,8 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
__alloc_bootmem_low(x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
int flags);
extern unsigned long free_all_bootmem(void);
extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
extern void *__alloc_bootmem_node(pg_data_t *pgdat,

View file

@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out)
extern void efi_init (void);
extern void *efi_get_pal_addr (void);
extern void efi_map_pal_code (void);
extern void efi_map_memmap(void);
extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
extern void efi_gettimeofday (struct timespec *ts);
extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */
@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void);
extern u32 efi_mem_type (unsigned long phys_addr);
extern u64 efi_mem_attributes (unsigned long phys_addr);
extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size,
u64 attr);
extern int __init efi_uart_console_only (void);
extern void efi_initialize_iomem_resources(struct resource *code_resource,
struct resource *data_resource, struct resource *bss_resource);
extern unsigned long efi_get_time(void);
extern int efi_set_rtc_mmss(unsigned long nowtime);
extern int is_available_memory(efi_memory_desc_t * md);
extern struct efi_memory_map memmap;
/**

View file

@ -998,8 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat,
extern void free_area_init_nodes(unsigned long *max_zone_pfn);
extern void add_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
unsigned long new_end_pfn);
extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern void remove_all_active_ranges(void);
@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void);
extern unsigned long find_max_pfn_with_active_regions(void);
extern void free_bootmem_with_active_regions(int nid,
unsigned long max_low_pfn);
typedef void (*work_fn_t)(unsigned long, unsigned long, void *);
extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
extern void sparse_memory_present_with_active_regions(int nid);
#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
extern int early_pfn_to_nid(unsigned long pfn);

View file

@ -25,13 +25,11 @@
#include <linux/types.h>
/* Macro to aid the definition of ranges of bits */
#define PB_range(name, required_bits) \
name, name ## _end = (name + required_bits) - 1
/* Bit indices that affect a whole block of pages */
enum pageblock_bits {
PB_range(PB_migrate, 3), /* 3 bits required for migrate types */
PB_migrate,
PB_migrate_end = PB_migrate + 3 - 1,
/* 3 bits required for migrate types */
NR_PAGEBLOCK_BITS
};

View file

@ -2929,6 +2929,14 @@ void __init free_bootmem_with_active_regions(int nid,
}
}
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
int i;
for_each_active_range_index_in_nid(i, nid)
work_fn(early_node_map[i].start_pfn, early_node_map[i].end_pfn,
data);
}
/**
* sparse_memory_present_with_active_regions - Call memory_present for each active range
* @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@ -3461,6 +3469,11 @@ void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat,
calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
nid, (unsigned long)pgdat,
(unsigned long)pgdat->node_mem_map);
#endif
free_area_init_core(pgdat, zones_size, zholes_size);
}
@ -3547,27 +3560,68 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
}
/**
* shrink_active_range - Shrink an existing registered range of PFNs
* remove_active_range - Shrink an existing registered range of PFNs
* @nid: The node id the range is on that should be shrunk
* @old_end_pfn: The old end PFN of the range
* @new_end_pfn: The new PFN of the range
* @start_pfn: The new PFN of the range
* @end_pfn: The new PFN of the range
*
* i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
* The map is kept at the end physical page range that has already been
* registered with add_active_range(). This function allows an arch to shrink
* an existing registered range.
* The map is kept near the end physical page range that has already been
* registered. This function allows an arch to shrink an existing registered
* range.
*/
void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
unsigned long new_end_pfn)
void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
int i;
int i, j;
int removed = 0;
printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
nid, start_pfn, end_pfn);
/* Find the old active region end and shrink */
for_each_active_range_index_in_nid(i, nid)
if (early_node_map[i].end_pfn == old_end_pfn) {
early_node_map[i].end_pfn = new_end_pfn;
break;
for_each_active_range_index_in_nid(i, nid) {
if (early_node_map[i].start_pfn >= start_pfn &&
early_node_map[i].end_pfn <= end_pfn) {
/* clear it */
early_node_map[i].start_pfn = 0;
early_node_map[i].end_pfn = 0;
removed = 1;
continue;
}
if (early_node_map[i].start_pfn < start_pfn &&
early_node_map[i].end_pfn > start_pfn) {
unsigned long temp_end_pfn = early_node_map[i].end_pfn;
early_node_map[i].end_pfn = start_pfn;
if (temp_end_pfn > end_pfn)
add_active_range(nid, end_pfn, temp_end_pfn);
continue;
}
if (early_node_map[i].start_pfn >= start_pfn &&
early_node_map[i].end_pfn > end_pfn &&
early_node_map[i].start_pfn < end_pfn) {
early_node_map[i].start_pfn = end_pfn;
continue;
}
}
if (!removed)
return;
/* remove the blank ones */
for (i = nr_nodemap_entries - 1; i > 0; i--) {
if (early_node_map[i].nid != nid)
continue;
if (early_node_map[i].end_pfn)
continue;
/* we found it, get rid of it */
for (j = i; j < nr_nodemap_entries - 1; j++)
memcpy(&early_node_map[j], &early_node_map[j+1],
sizeof(early_node_map[j]));
j = nr_nodemap_entries - 1;
memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
nr_nodemap_entries--;
}
}
/**