2005-04-17 00:20:36 +02:00
|
|
|
#ifndef _IPV6_H
|
|
|
|
#define _IPV6_H
|
|
|
|
|
|
|
|
#include <linux/config.h>
|
|
|
|
#include <linux/in6.h>
|
|
|
|
#include <asm/byteorder.h>
|
|
|
|
|
|
|
|
/* The latest drafts declared increase in minimal mtu up to 1280. */
|
|
|
|
|
|
|
|
#define IPV6_MIN_MTU 1280
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Advanced API
|
|
|
|
* source interface/address selection, source routing, etc...
|
|
|
|
* *under construction*
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
struct in6_pktinfo {
|
|
|
|
struct in6_addr ipi6_addr;
|
|
|
|
int ipi6_ifindex;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct in6_ifreq {
|
|
|
|
struct in6_addr ifr6_addr;
|
|
|
|
__u32 ifr6_prefixlen;
|
|
|
|
int ifr6_ifindex;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */
|
|
|
|
#define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* routing header
|
|
|
|
*/
|
|
|
|
struct ipv6_rt_hdr {
|
|
|
|
__u8 nexthdr;
|
|
|
|
__u8 hdrlen;
|
|
|
|
__u8 type;
|
|
|
|
__u8 segments_left;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* type specific data
|
|
|
|
* variable length field
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct ipv6_opt_hdr {
|
|
|
|
__u8 nexthdr;
|
|
|
|
__u8 hdrlen;
|
|
|
|
/*
|
|
|
|
* TLV encoded option data follows.
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
|
|
|
|
#define ipv6_destopt_hdr ipv6_opt_hdr
|
|
|
|
#define ipv6_hopopt_hdr ipv6_opt_hdr
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* routing header type 0 (used in cmsghdr struct)
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct rt0_hdr {
|
|
|
|
struct ipv6_rt_hdr rt_hdr;
|
2005-09-10 09:15:06 +02:00
|
|
|
__u32 reserved;
|
2005-04-17 00:20:36 +02:00
|
|
|
struct in6_addr addr[0];
|
|
|
|
|
|
|
|
#define rt0_type rt_hdr.type
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ipv6_auth_hdr {
|
|
|
|
__u8 nexthdr;
|
|
|
|
__u8 hdrlen; /* This one is measured in 32 bit units! */
|
|
|
|
__u16 reserved;
|
|
|
|
__u32 spi;
|
|
|
|
__u32 seq_no; /* Sequence number */
|
|
|
|
__u8 auth_data[0]; /* Length variable but >=4. Mind the 64 bit alignment! */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ipv6_esp_hdr {
|
|
|
|
__u32 spi;
|
|
|
|
__u32 seq_no; /* Sequence number */
|
|
|
|
__u8 enc_data[0]; /* Length variable but >=8. Mind the 64 bit alignment! */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ipv6_comp_hdr {
|
|
|
|
__u8 nexthdr;
|
|
|
|
__u8 flags;
|
|
|
|
__u16 cpi;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* IPv6 fixed header
|
|
|
|
*
|
|
|
|
* BEWARE, it is incorrect. The first 4 bits of flow_lbl
|
|
|
|
* are glued to priority now, forming "class".
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct ipv6hdr {
|
|
|
|
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
|
|
|
__u8 priority:4,
|
|
|
|
version:4;
|
|
|
|
#elif defined(__BIG_ENDIAN_BITFIELD)
|
|
|
|
__u8 version:4,
|
|
|
|
priority:4;
|
|
|
|
#else
|
|
|
|
#error "Please fix <asm/byteorder.h>"
|
|
|
|
#endif
|
|
|
|
__u8 flow_lbl[3];
|
|
|
|
|
|
|
|
__u16 payload_len;
|
|
|
|
__u8 nexthdr;
|
|
|
|
__u8 hop_limit;
|
|
|
|
|
|
|
|
struct in6_addr saddr;
|
|
|
|
struct in6_addr daddr;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This structure contains configuration options per IPv6 link.
|
|
|
|
*/
|
|
|
|
struct ipv6_devconf {
|
|
|
|
__s32 forwarding;
|
|
|
|
__s32 hop_limit;
|
|
|
|
__s32 mtu6;
|
|
|
|
__s32 accept_ra;
|
|
|
|
__s32 accept_redirects;
|
|
|
|
__s32 autoconf;
|
|
|
|
__s32 dad_transmits;
|
|
|
|
__s32 rtr_solicits;
|
|
|
|
__s32 rtr_solicit_interval;
|
|
|
|
__s32 rtr_solicit_delay;
|
|
|
|
__s32 force_mld_version;
|
|
|
|
#ifdef CONFIG_IPV6_PRIVACY
|
|
|
|
__s32 use_tempaddr;
|
|
|
|
__s32 temp_valid_lft;
|
|
|
|
__s32 temp_prefered_lft;
|
|
|
|
__s32 regen_max_retry;
|
|
|
|
__s32 max_desync_factor;
|
|
|
|
#endif
|
|
|
|
__s32 max_addresses;
|
|
|
|
void *sysctl;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* index values for the variables in ipv6_devconf */
|
|
|
|
enum {
|
|
|
|
DEVCONF_FORWARDING = 0,
|
|
|
|
DEVCONF_HOPLIMIT,
|
|
|
|
DEVCONF_MTU6,
|
|
|
|
DEVCONF_ACCEPT_RA,
|
|
|
|
DEVCONF_ACCEPT_REDIRECTS,
|
|
|
|
DEVCONF_AUTOCONF,
|
|
|
|
DEVCONF_DAD_TRANSMITS,
|
|
|
|
DEVCONF_RTR_SOLICITS,
|
|
|
|
DEVCONF_RTR_SOLICIT_INTERVAL,
|
|
|
|
DEVCONF_RTR_SOLICIT_DELAY,
|
|
|
|
DEVCONF_USE_TEMPADDR,
|
|
|
|
DEVCONF_TEMP_VALID_LFT,
|
|
|
|
DEVCONF_TEMP_PREFERED_LFT,
|
|
|
|
DEVCONF_REGEN_MAX_RETRY,
|
|
|
|
DEVCONF_MAX_DESYNC_FACTOR,
|
|
|
|
DEVCONF_MAX_ADDRESSES,
|
|
|
|
DEVCONF_FORCE_MLD_VERSION,
|
|
|
|
DEVCONF_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/in6.h> /* struct sockaddr_in6 */
|
|
|
|
#include <linux/icmpv6.h>
|
|
|
|
#include <net/if_inet6.h> /* struct ipv6_mc_socklist */
|
|
|
|
#include <linux/tcp.h>
|
|
|
|
#include <linux/udp.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
This structure contains results of exthdrs parsing
|
|
|
|
as offsets from skb->nh.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct inet6_skb_parm {
|
|
|
|
int iif;
|
|
|
|
__u16 ra;
|
|
|
|
__u16 hop;
|
|
|
|
__u16 dst0;
|
|
|
|
__u16 srcrt;
|
|
|
|
__u16 dst1;
|
[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).
Support several new socket options / ancillary data:
IPV6_RECVPKTINFO, IPV6_PKTINFO,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS,
IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS,
IPV6_RECVRTHDR, IPV6_RTHDR,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS
Old semantics are preserved as IPV6_2292xxxx so that
we can maintain backward compatibility.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2005-09-08 02:59:17 +02:00
|
|
|
__u16 lastopt;
|
2005-04-17 00:20:36 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
|
|
|
|
|
2005-08-12 14:19:38 +02:00
|
|
|
static inline int inet6_iif(const struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return IP6CB(skb)->iif;
|
|
|
|
}
|
|
|
|
|
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 07:46:52 +02:00
|
|
|
struct tcp6_request_sock {
|
|
|
|
struct tcp_request_sock req;
|
|
|
|
struct in6_addr loc_addr;
|
|
|
|
struct in6_addr rmt_addr;
|
|
|
|
struct sk_buff *pktopts;
|
|
|
|
int iif;
|
|
|
|
};
|
|
|
|
|
2005-06-19 07:47:21 +02:00
|
|
|
static inline struct tcp6_request_sock *tcp6_rsk(const struct request_sock *sk)
|
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 07:46:52 +02:00
|
|
|
{
|
|
|
|
return (struct tcp6_request_sock *)sk;
|
|
|
|
}
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
/**
|
|
|
|
* struct ipv6_pinfo - ipv6 private area
|
|
|
|
*
|
|
|
|
* In the struct sock hierarchy (tcp6_sock, upd6_sock, etc)
|
|
|
|
* this _must_ be the last member, so that inet6_sk_generic
|
|
|
|
* is able to calculate its offset from the base struct sock
|
|
|
|
* by using the struct proto->slab_obj_size member. -acme
|
|
|
|
*/
|
|
|
|
struct ipv6_pinfo {
|
|
|
|
struct in6_addr saddr;
|
|
|
|
struct in6_addr rcv_saddr;
|
|
|
|
struct in6_addr daddr;
|
|
|
|
struct in6_addr *daddr_cache;
|
|
|
|
|
|
|
|
__u32 flow_label;
|
|
|
|
__u32 frag_size;
|
|
|
|
__s16 hop_limit;
|
|
|
|
__s16 mcast_hops;
|
|
|
|
int mcast_oif;
|
|
|
|
|
|
|
|
/* pktoption flags */
|
|
|
|
union {
|
|
|
|
struct {
|
[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).
Support several new socket options / ancillary data:
IPV6_RECVPKTINFO, IPV6_PKTINFO,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS,
IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS,
IPV6_RECVRTHDR, IPV6_RTHDR,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS
Old semantics are preserved as IPV6_2292xxxx so that
we can maintain backward compatibility.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2005-09-08 02:59:17 +02:00
|
|
|
__u16 srcrt:2,
|
|
|
|
osrcrt:2,
|
2005-04-17 00:20:36 +02:00
|
|
|
rxinfo:1,
|
[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).
Support several new socket options / ancillary data:
IPV6_RECVPKTINFO, IPV6_PKTINFO,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS,
IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS,
IPV6_RECVRTHDR, IPV6_RTHDR,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS
Old semantics are preserved as IPV6_2292xxxx so that
we can maintain backward compatibility.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2005-09-08 02:59:17 +02:00
|
|
|
rxoinfo:1,
|
2005-04-17 00:20:36 +02:00
|
|
|
rxhlim:1,
|
[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).
Support several new socket options / ancillary data:
IPV6_RECVPKTINFO, IPV6_PKTINFO,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS,
IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS,
IPV6_RECVRTHDR, IPV6_RTHDR,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS
Old semantics are preserved as IPV6_2292xxxx so that
we can maintain backward compatibility.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2005-09-08 02:59:17 +02:00
|
|
|
rxohlim:1,
|
2005-04-17 00:20:36 +02:00
|
|
|
hopopts:1,
|
[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).
Support several new socket options / ancillary data:
IPV6_RECVPKTINFO, IPV6_PKTINFO,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS,
IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS,
IPV6_RECVRTHDR, IPV6_RTHDR,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS
Old semantics are preserved as IPV6_2292xxxx so that
we can maintain backward compatibility.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2005-09-08 02:59:17 +02:00
|
|
|
ohopopts:1,
|
2005-04-17 00:20:36 +02:00
|
|
|
dstopts:1,
|
[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).
Support several new socket options / ancillary data:
IPV6_RECVPKTINFO, IPV6_PKTINFO,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS,
IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS,
IPV6_RECVRTHDR, IPV6_RTHDR,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS
Old semantics are preserved as IPV6_2292xxxx so that
we can maintain backward compatibility.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2005-09-08 02:59:17 +02:00
|
|
|
odstopts:1,
|
2005-09-08 03:19:03 +02:00
|
|
|
rxflow:1,
|
|
|
|
rxtclass:1;
|
2005-04-17 00:20:36 +02:00
|
|
|
} bits;
|
[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).
Support several new socket options / ancillary data:
IPV6_RECVPKTINFO, IPV6_PKTINFO,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS,
IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS,
IPV6_RECVRTHDR, IPV6_RTHDR,
IPV6_RECVHOPOPTS, IPV6_HOPOPTS
Old semantics are preserved as IPV6_2292xxxx so that
we can maintain backward compatibility.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
2005-09-08 02:59:17 +02:00
|
|
|
__u16 all;
|
2005-04-17 00:20:36 +02:00
|
|
|
} rxopt;
|
|
|
|
|
|
|
|
/* sockopt flags */
|
|
|
|
__u8 mc_loop:1,
|
|
|
|
recverr:1,
|
|
|
|
sndflow:1,
|
|
|
|
pmtudisc:2,
|
|
|
|
ipv6only:1;
|
2005-09-08 03:19:03 +02:00
|
|
|
__u8 tclass;
|
2005-04-17 00:20:36 +02:00
|
|
|
|
|
|
|
__u32 dst_cookie;
|
|
|
|
|
|
|
|
struct ipv6_mc_socklist *ipv6_mc_list;
|
|
|
|
struct ipv6_ac_socklist *ipv6_ac_list;
|
|
|
|
struct ipv6_fl_socklist *ipv6_fl_list;
|
|
|
|
|
|
|
|
struct ipv6_txoptions *opt;
|
|
|
|
struct sk_buff *pktoptions;
|
|
|
|
struct {
|
|
|
|
struct ipv6_txoptions *opt;
|
|
|
|
struct rt6_info *rt;
|
|
|
|
int hop_limit;
|
2005-09-08 03:19:03 +02:00
|
|
|
int tclass;
|
2005-04-17 00:20:36 +02:00
|
|
|
} cork;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
|
|
|
|
struct raw6_sock {
|
|
|
|
/* inet_sock has to be the first member of raw6_sock */
|
|
|
|
struct inet_sock inet;
|
|
|
|
__u32 checksum; /* perform checksum */
|
|
|
|
__u32 offset; /* checksum offset */
|
|
|
|
struct icmp6_filter filter;
|
|
|
|
/* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
|
|
|
|
struct ipv6_pinfo inet6;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct udp6_sock {
|
|
|
|
struct udp_sock udp;
|
|
|
|
/* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
|
|
|
|
struct ipv6_pinfo inet6;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct tcp6_sock {
|
|
|
|
struct tcp_sock tcp;
|
|
|
|
/* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
|
|
|
|
struct ipv6_pinfo inet6;
|
|
|
|
};
|
|
|
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
|
|
|
|
{
|
|
|
|
return inet_sk(__sk)->pinet6;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct raw6_sock *raw6_sk(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return (struct raw6_sock *)sk;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void inet_sk_copy_descendant(struct sock *sk_to,
|
|
|
|
const struct sock *sk_from)
|
|
|
|
{
|
|
|
|
int ancestor_size = sizeof(struct inet_sock);
|
|
|
|
|
|
|
|
if (sk_from->sk_family == PF_INET6)
|
|
|
|
ancestor_size += sizeof(struct ipv6_pinfo);
|
|
|
|
|
|
|
|
__inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only)
|
|
|
|
#define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
|
2005-08-10 05:09:30 +02:00
|
|
|
|
|
|
|
#include <linux/tcp.h>
|
|
|
|
|
|
|
|
struct tcp6_timewait_sock {
|
|
|
|
struct tcp_timewait_sock tw_v6_sk;
|
|
|
|
struct in6_addr tw_v6_daddr;
|
|
|
|
struct in6_addr tw_v6_rcv_saddr;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return (struct tcp6_timewait_sock *)sk;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return likely(sk->sk_state != TCP_TIME_WAIT) ?
|
|
|
|
&inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
|
|
|
|
}
|
|
|
|
|
2005-08-10 05:10:42 +02:00
|
|
|
static inline int inet_v6_ipv6only(const struct sock *sk)
|
2005-08-10 05:09:30 +02:00
|
|
|
{
|
|
|
|
return likely(sk->sk_state != TCP_TIME_WAIT) ?
|
2005-08-10 05:11:08 +02:00
|
|
|
ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only;
|
2005-08-10 05:09:30 +02:00
|
|
|
}
|
2005-04-17 00:20:36 +02:00
|
|
|
#else
|
|
|
|
#define __ipv6_only_sock(sk) 0
|
|
|
|
#define ipv6_only_sock(sk) 0
|
|
|
|
|
|
|
|
static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct raw6_sock *raw6_sk(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-08-10 05:09:30 +02:00
|
|
|
#define __tcp_v6_rcv_saddr(__sk) NULL
|
|
|
|
#define tcp_v6_rcv_saddr(__sk) NULL
|
|
|
|
#define tcp_twsk_ipv6only(__sk) 0
|
2005-08-10 05:10:42 +02:00
|
|
|
#define inet_v6_ipv6only(__sk) 0
|
2005-08-10 05:09:30 +02:00
|
|
|
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
|
2005-04-17 00:20:36 +02:00
|
|
|
|
[INET]: speedup inet (tcp/dccp) lookups
Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)
(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)
1) First some performance data :
--------------------------------
tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()
The most time critical code is :
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.
As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.
This can be problematic if some chains are very long.
2) The goal
-----------
The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.
3) Description of the patch
---------------------------
Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.
Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)
File include/net/inet_hashtables.h
64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash))
((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
(inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-10-03 23:13:38 +02:00
|
|
|
#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
|
|
|
|
(((__sk)->sk_hash == (__hash)) && \
|
|
|
|
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
|
2005-08-10 05:09:30 +02:00
|
|
|
((__sk)->sk_family == AF_INET6) && \
|
|
|
|
ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
|
|
|
|
ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
|
|
|
|
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2005-08-10 05:09:30 +02:00
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
|
|
|
|
#endif /* _IPV6_H */
|