70674f95c0
Optimize select and poll by a using stack space for small fd sets This brings back an old optimization from Linux 2.0. Using the stack is faster than kmalloc. On a Intel P4 system it speeds up a select of a single pty fd by about 13% (~4000 cycles -> ~3500) It also saves memory because a daemon hanging in select or poll will usually save one or two less pages. This can add up - e.g. if you have 10 daemons blocking in poll/select you save 40KB of memory. I did a patch for this long ago, but it was never applied. This version is a reimplementation of the old patch that tries to be less intrusive. I only did the minimal changes needed for the stack allocation. The cut off point before external memory is allocated is currently at 832bytes. The system calls always allocate this much memory on the stack. These 832 bytes are divided into 256 bytes frontend data (for the select bitmaps of the pollfds) and the rest of the space for the wait queues used by the low level drivers. There are some extreme cases where this won't work out for select and it falls back to allocating memory too early - especially with very sparse large select bitmaps - but the majority of processes who only have a small number of file descriptors should be ok. [TBD: 832/256 might not be the best split for select or poll] I suspect more optimizations might be possible, but they would be more complicated. One way would be to cache the select/poll context over multiple system calls because typically the input values should be similar. Problem is when to flush the file descriptors out though. Signed-off-by: Andi Kleen <ak@suse.de> Cc: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
120 lines
2.9 KiB
C
120 lines
2.9 KiB
C
#ifndef _LINUX_POLL_H
|
|
#define _LINUX_POLL_H
|
|
|
|
#include <asm/poll.h>
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/compiler.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/string.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/uaccess.h>
|
|
|
|
/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
|
|
additional memory. */
|
|
#define MAX_STACK_ALLOC 832
|
|
#define FRONTEND_STACK_ALLOC 256
|
|
#define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC
|
|
#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC
|
|
#define WQUEUES_STACK_ALLOC (MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC)
|
|
#define N_INLINE_POLL_ENTRIES (WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry))
|
|
|
|
struct poll_table_struct;
|
|
|
|
/*
|
|
* structures and helpers for f_op->poll implementations
|
|
*/
|
|
typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
|
|
|
|
typedef struct poll_table_struct {
|
|
poll_queue_proc qproc;
|
|
} poll_table;
|
|
|
|
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
|
|
{
|
|
if (p && wait_address)
|
|
p->qproc(filp, wait_address, p);
|
|
}
|
|
|
|
static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
|
|
{
|
|
pt->qproc = qproc;
|
|
}
|
|
|
|
struct poll_table_entry {
|
|
struct file * filp;
|
|
wait_queue_t wait;
|
|
wait_queue_head_t * wait_address;
|
|
};
|
|
|
|
/*
|
|
* Structures and helpers for sys_poll/sys_poll
|
|
*/
|
|
struct poll_wqueues {
|
|
poll_table pt;
|
|
struct poll_table_page * table;
|
|
int error;
|
|
int inline_index;
|
|
struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
|
|
};
|
|
|
|
extern void poll_initwait(struct poll_wqueues *pwq);
|
|
extern void poll_freewait(struct poll_wqueues *pwq);
|
|
|
|
/*
|
|
* Scaleable version of the fd_set.
|
|
*/
|
|
|
|
typedef struct {
|
|
unsigned long *in, *out, *ex;
|
|
unsigned long *res_in, *res_out, *res_ex;
|
|
} fd_set_bits;
|
|
|
|
/*
|
|
* How many longwords for "nr" bits?
|
|
*/
|
|
#define FDS_BITPERLONG (8*sizeof(long))
|
|
#define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
|
|
#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long))
|
|
|
|
/*
|
|
* We do a VERIFY_WRITE here even though we are only reading this time:
|
|
* we'll write to it eventually..
|
|
*
|
|
* Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
|
|
*/
|
|
static inline
|
|
int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
|
|
{
|
|
nr = FDS_BYTES(nr);
|
|
if (ufdset)
|
|
return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
|
|
|
|
memset(fdset, 0, nr);
|
|
return 0;
|
|
}
|
|
|
|
static inline unsigned long __must_check
|
|
set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
|
|
{
|
|
if (ufdset)
|
|
return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
|
|
return 0;
|
|
}
|
|
|
|
static inline
|
|
void zero_fd_set(unsigned long nr, unsigned long *fdset)
|
|
{
|
|
memset(fdset, 0, FDS_BYTES(nr));
|
|
}
|
|
|
|
#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
|
|
|
|
extern int do_select(int n, fd_set_bits *fds, s64 *timeout);
|
|
extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
|
|
s64 *timeout);
|
|
|
|
#endif /* KERNEL */
|
|
|
|
#endif /* _LINUX_POLL_H */
|