1da177e4c3
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
77 lines
1.9 KiB
ArmAsm
77 lines
1.9 KiB
ArmAsm
/*
|
|
* Copyright (C) 1999-2002 Hewlett-Packard Co
|
|
* Stephane Eranian <eranian@hpl.hp.com>
|
|
* David Mosberger-Tang <davidm@hpl.hp.com>
|
|
* Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
|
|
*
|
|
* 1/06/01 davidm Tuned for Itanium.
|
|
* 2/12/02 kchen Tuned for both Itanium and McKinley
|
|
* 3/08/02 davidm Some more tweaking
|
|
*/
|
|
#include <linux/config.h>
|
|
|
|
#include <asm/asmmacro.h>
|
|
#include <asm/page.h>
|
|
|
|
#ifdef CONFIG_ITANIUM
|
|
# define L3_LINE_SIZE 64 // Itanium L3 line size
|
|
# define PREFETCH_LINES 9 // magic number
|
|
#else
|
|
# define L3_LINE_SIZE 128 // McKinley L3 line size
|
|
# define PREFETCH_LINES 12 // magic number
|
|
#endif
|
|
|
|
#define saved_lc r2
|
|
#define dst_fetch r3
|
|
#define dst1 r8
|
|
#define dst2 r9
|
|
#define dst3 r10
|
|
#define dst4 r11
|
|
|
|
#define dst_last r31
|
|
|
|
GLOBAL_ENTRY(clear_page)
|
|
.prologue
|
|
.regstk 1,0,0,0
|
|
mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until
|
|
.save ar.lc, saved_lc
|
|
mov saved_lc = ar.lc
|
|
|
|
.body
|
|
mov ar.lc = (PREFETCH_LINES - 1)
|
|
mov dst_fetch = in0
|
|
adds dst1 = 16, in0
|
|
adds dst2 = 32, in0
|
|
;;
|
|
.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
|
|
adds dst3 = 48, in0 // executing this multiple times is harmless
|
|
br.cloop.sptk.few .fetch
|
|
;;
|
|
addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
|
|
mov ar.lc = r16 // one L3 line per iteration
|
|
adds dst4 = 64, in0
|
|
;;
|
|
#ifdef CONFIG_ITANIUM
|
|
// Optimized for Itanium
|
|
1: stf.spill.nta [dst1] = f0, 64
|
|
stf.spill.nta [dst2] = f0, 64
|
|
cmp.lt p8,p0=dst_fetch, dst_last
|
|
;;
|
|
#else
|
|
// Optimized for McKinley
|
|
1: stf.spill.nta [dst1] = f0, 64
|
|
stf.spill.nta [dst2] = f0, 64
|
|
stf.spill.nta [dst3] = f0, 64
|
|
stf.spill.nta [dst4] = f0, 128
|
|
cmp.lt p8,p0=dst_fetch, dst_last
|
|
;;
|
|
stf.spill.nta [dst1] = f0, 64
|
|
stf.spill.nta [dst2] = f0, 64
|
|
#endif
|
|
stf.spill.nta [dst3] = f0, 64
|
|
(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
|
|
br.cloop.sptk.few 1b
|
|
;;
|
|
mov ar.lc = saved_lc // restore lc
|
|
br.ret.sptk.many rp
|
|
END(clear_page)
|