d158cbdf39
Make it possible to restore a hibernation image on x86_64 with the help of a kernel different from the one in the image. The idea is to split the core restoration code into two separate parts and to place each of them in a different page. The first part belongs to the boot kernel and is executed as the last step of the image kernel's memory restoration procedure. Before being executed, it is relocated to a safe page that won't be overwritten while copying the image kernel pages. The final operation performed by it is a jump to the second part of the core restoration code that belongs to the image kernel and has just been restored. This code makes the CPU switch to the image kernel's page tables and restores the state of general purpose registers (including the stack pointer) from before the hibernation. The main issue with this idea is that in order to jump to the second part of the core restoration code the boot kernel needs to know its address. However, this address may be passed to it in the image header. Namely, the part of the image header previously used for checking if the version of the image kernel is correct can be replaced with some architecture specific data that will allow the boot kernel to jump to the right address within the image kernel. These data should also be used for checking if the image kernel is compatible with the boot kernel (as far as the memory restroration procedure is concerned). It can be done, for example, with the help of a "magic" value that has to be equal in both kernels, so that they can be regarded as compatible. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Acked-by: Pavel Machek <pavel@ucw.cz> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
291 lines
7.3 KiB
C
291 lines
7.3 KiB
C
/*
|
|
* Suspend support specific for i386.
|
|
*
|
|
* Distribute under GPLv2
|
|
*
|
|
* Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
|
|
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
|
|
*/
|
|
|
|
#include <linux/smp.h>
|
|
#include <linux/suspend.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/page.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/mtrr.h>
|
|
|
|
/* References to section boundaries */
|
|
extern const void __nosave_begin, __nosave_end;
|
|
|
|
struct saved_context saved_context;
|
|
|
|
unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx;
|
|
unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi;
|
|
unsigned long saved_context_r08, saved_context_r09, saved_context_r10, saved_context_r11;
|
|
unsigned long saved_context_r12, saved_context_r13, saved_context_r14, saved_context_r15;
|
|
unsigned long saved_context_eflags;
|
|
|
|
void __save_processor_state(struct saved_context *ctxt)
|
|
{
|
|
kernel_fpu_begin();
|
|
|
|
/*
|
|
* descriptor tables
|
|
*/
|
|
asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
|
|
asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
|
|
asm volatile ("str %0" : "=m" (ctxt->tr));
|
|
|
|
/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
|
|
/*
|
|
* segment registers
|
|
*/
|
|
asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
|
|
asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
|
|
asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
|
|
asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
|
|
asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
|
|
|
|
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
|
|
rdmsrl(MSR_GS_BASE, ctxt->gs_base);
|
|
rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
|
|
mtrr_save_fixed_ranges(NULL);
|
|
|
|
/*
|
|
* control registers
|
|
*/
|
|
rdmsrl(MSR_EFER, ctxt->efer);
|
|
ctxt->cr0 = read_cr0();
|
|
ctxt->cr2 = read_cr2();
|
|
ctxt->cr3 = read_cr3();
|
|
ctxt->cr4 = read_cr4();
|
|
ctxt->cr8 = read_cr8();
|
|
}
|
|
|
|
void save_processor_state(void)
|
|
{
|
|
__save_processor_state(&saved_context);
|
|
}
|
|
|
|
static void do_fpu_end(void)
|
|
{
|
|
/*
|
|
* Restore FPU regs if necessary
|
|
*/
|
|
kernel_fpu_end();
|
|
}
|
|
|
|
void __restore_processor_state(struct saved_context *ctxt)
|
|
{
|
|
/*
|
|
* control registers
|
|
*/
|
|
wrmsrl(MSR_EFER, ctxt->efer);
|
|
write_cr8(ctxt->cr8);
|
|
write_cr4(ctxt->cr4);
|
|
write_cr3(ctxt->cr3);
|
|
write_cr2(ctxt->cr2);
|
|
write_cr0(ctxt->cr0);
|
|
|
|
/*
|
|
* now restore the descriptor tables to their proper values
|
|
* ltr is done i fix_processor_context().
|
|
*/
|
|
asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
|
|
asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
|
|
|
|
/*
|
|
* segment registers
|
|
*/
|
|
asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
|
|
asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
|
|
asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
|
|
load_gs_index(ctxt->gs);
|
|
asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
|
|
|
|
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
|
|
wrmsrl(MSR_GS_BASE, ctxt->gs_base);
|
|
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
|
|
|
|
fix_processor_context();
|
|
|
|
do_fpu_end();
|
|
mtrr_ap_init();
|
|
}
|
|
|
|
void restore_processor_state(void)
|
|
{
|
|
__restore_processor_state(&saved_context);
|
|
}
|
|
|
|
void fix_processor_context(void)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
struct tss_struct *t = &per_cpu(init_tss, cpu);
|
|
|
|
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
|
|
|
|
cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9;
|
|
|
|
syscall_init(); /* This sets MSR_*STAR and related */
|
|
load_TR_desc(); /* This does ltr */
|
|
load_LDT(¤t->active_mm->context); /* This does lldt */
|
|
|
|
/*
|
|
* Now maybe reload the debug registers
|
|
*/
|
|
if (current->thread.debugreg7){
|
|
loaddebug(¤t->thread, 0);
|
|
loaddebug(¤t->thread, 1);
|
|
loaddebug(¤t->thread, 2);
|
|
loaddebug(¤t->thread, 3);
|
|
/* no 4 and 5 */
|
|
loaddebug(¤t->thread, 6);
|
|
loaddebug(¤t->thread, 7);
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef CONFIG_HIBERNATION
|
|
/* Defined in arch/x86_64/kernel/suspend_asm.S */
|
|
extern int restore_image(void);
|
|
|
|
/*
|
|
* Address to jump to in the last phase of restore in order to get to the image
|
|
* kernel's text (this value is passed in the image header).
|
|
*/
|
|
unsigned long restore_jump_address;
|
|
|
|
pgd_t *temp_level4_pgt;
|
|
|
|
void *relocated_restore_code;
|
|
|
|
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
|
|
{
|
|
long i, j;
|
|
|
|
i = pud_index(address);
|
|
pud = pud + i;
|
|
for (; i < PTRS_PER_PUD; pud++, i++) {
|
|
unsigned long paddr;
|
|
pmd_t *pmd;
|
|
|
|
paddr = address + i*PUD_SIZE;
|
|
if (paddr >= end)
|
|
break;
|
|
|
|
pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!pmd)
|
|
return -ENOMEM;
|
|
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
|
|
for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
|
|
unsigned long pe;
|
|
|
|
if (paddr >= end)
|
|
break;
|
|
pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
|
|
pe &= __supported_pte_mask;
|
|
set_pmd(pmd, __pmd(pe));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int set_up_temporary_mappings(void)
|
|
{
|
|
unsigned long start, end, next;
|
|
int error;
|
|
|
|
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!temp_level4_pgt)
|
|
return -ENOMEM;
|
|
|
|
/* It is safe to reuse the original kernel mapping */
|
|
set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
|
|
init_level4_pgt[pgd_index(__START_KERNEL_map)]);
|
|
|
|
/* Set up the direct mapping from scratch */
|
|
start = (unsigned long)pfn_to_kaddr(0);
|
|
end = (unsigned long)pfn_to_kaddr(end_pfn);
|
|
|
|
for (; start < end; start = next) {
|
|
pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!pud)
|
|
return -ENOMEM;
|
|
next = start + PGDIR_SIZE;
|
|
if (next > end)
|
|
next = end;
|
|
if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
|
|
return error;
|
|
set_pgd(temp_level4_pgt + pgd_index(start),
|
|
mk_kernel_pgd(__pa(pud)));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int swsusp_arch_resume(void)
|
|
{
|
|
int error;
|
|
|
|
/* We have got enough memory and from now on we cannot recover */
|
|
if ((error = set_up_temporary_mappings()))
|
|
return error;
|
|
|
|
relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
|
|
if (!relocated_restore_code)
|
|
return -ENOMEM;
|
|
memcpy(relocated_restore_code, &core_restore_code,
|
|
&restore_registers - &core_restore_code);
|
|
|
|
restore_image();
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* pfn_is_nosave - check if given pfn is in the 'nosave' section
|
|
*/
|
|
|
|
int pfn_is_nosave(unsigned long pfn)
|
|
{
|
|
unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
|
|
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
|
|
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
|
|
}
|
|
|
|
struct restore_data_record {
|
|
unsigned long jump_address;
|
|
unsigned long control;
|
|
};
|
|
|
|
#define RESTORE_MAGIC 0x0123456789ABCDEFUL
|
|
|
|
/**
|
|
* arch_hibernation_header_save - populate the architecture specific part
|
|
* of a hibernation image header
|
|
* @addr: address to save the data at
|
|
*/
|
|
int arch_hibernation_header_save(void *addr, unsigned int max_size)
|
|
{
|
|
struct restore_data_record *rdr = addr;
|
|
|
|
if (max_size < sizeof(struct restore_data_record))
|
|
return -EOVERFLOW;
|
|
rdr->jump_address = restore_jump_address;
|
|
rdr->control = (restore_jump_address ^ RESTORE_MAGIC);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* arch_hibernation_header_restore - read the architecture specific data
|
|
* from the hibernation image header
|
|
* @addr: address to read the data from
|
|
*/
|
|
int arch_hibernation_header_restore(void *addr)
|
|
{
|
|
struct restore_data_record *rdr = addr;
|
|
|
|
restore_jump_address = rdr->jump_address;
|
|
return (rdr->control == (restore_jump_address ^ RESTORE_MAGIC)) ?
|
|
0 : -EINVAL;
|
|
}
|
|
#endif /* CONFIG_HIBERNATION */
|