diff --git a/include/net/tcp.h b/include/net/tcp.h index 92b06c6e7732..9c9b3768b350 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); @@ -981,8 +979,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* suggest number of segments for each skb to transmit (optional) */ - u32 (*tso_segs_goal)(struct sock *sk); + /* override sysctl_tcp_min_tso_segs */ + u32 (*min_tso_segs)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); /* call when packets are delivered to update cwnd and pacing rate, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index a471f696e13c..c92014cb1e16 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -97,10 +97,9 @@ struct bbr { packet_conservation:1, /* use packet conservation? */ restore_cwnd:1, /* decided to revert cwnd to old value */ round_start:1, /* start of packet-timed tx->ack round? */ - tso_segs_goal:7, /* segments we want in each skb we send */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:5, + unused:12, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ @@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* Return count of segments we want in the skbs we send, or 0 for default. */ -static u32 bbr_tso_segs_goal(struct sock *sk) +/* override sysctl_tcp_min_tso_segs */ +static u32 bbr_min_tso_segs(struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); - - return bbr->tso_segs_goal; + return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } -static void bbr_set_tso_segs_goal(struct sock *sk) +static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u32 min_segs; + u32 segs, bytes; - min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; - bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), - 0x7FU); + /* Sort of tcp_tso_autosize() but ignoring + * driver provided sk_gso_max_size. + */ + bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift, + GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); + + return min(segs, 0x7FU); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; /* Allow enough full-sized skbs in flight to utilize end systems. */ - cwnd += 3 * bbr->tso_segs_goal; + cwnd += 3 * bbr_tso_segs_goal(sk); /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; @@ -824,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs) bw = bbr_bw(sk); bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); - bbr_set_tso_segs_goal(sk); bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); } @@ -834,7 +834,6 @@ static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; - bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ bbr->rtt_cnt = 0; bbr->next_rtt_delivered = 0; bbr->prev_ca_state = TCP_CA_Open; @@ -936,7 +935,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .tso_segs_goal = bbr_tso_segs_goal, + .min_tso_segs = bbr_min_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 49d043de3476..383cac0ff0ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs) +static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, return segs; } -EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize); static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + u32 min_tso, tso_segs; - if (!tso_segs) - tso_segs = tcp_tso_autosize(sk, mss_now, - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); }