Flow 1.0.2
Flow project: Full implementation reference.
cong_ctl_classic_bw.cpp
Go to the documentation of this file.
1/* Flow
2 * Copyright 2023 Akamai Technologies, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the
5 * "License"); you may not use this file except in
6 * compliance with the License. You may obtain a copy
7 * of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in
12 * writing, software distributed under the License is
13 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14 * CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing
16 * permissions and limitations under the License. */
17
18/// @file
20
21namespace flow::net_flow
22{
23// Static initializations.
24
25// Use conservative start value which will be overridden with the next ACK. (Value taken from Linux tcp_westwood.c.)
27 = boost::chrono::seconds(20);
28
29// Implementations.
30
32 (log::Logger* logger_ptr, Peer_socket::Const_ptr sock) :
33
34 Congestion_control_strategy(logger_ptr, sock),
35 // Initialize the basics (CWND, SSTHRESH, etc.) in the usual way.
36 m_classic_data(logger_ptr, sock),
37 // See comment on S_INIT_RTT_MIN declaration inside class.
38 m_rtt_min(S_INIT_RTT_MIN),
39 // Trigger S_INIT_RTT_MIN to be disregarded as soon as we get an ACK.
40 m_reset_rtt_min(true)
41{
42 // Nothing.
43}
44
46{
48}
49
51 (const Fine_duration& packet_rtt,
52 [[maybe_unused]] const size_t bytes, [[maybe_unused]] const size_t sent_cwnd_bytes) // Virtual.
53{
54 using boost::chrono::ceil;
55 using boost::chrono::round;
56 using boost::chrono::milliseconds;
57
58 const Peer_socket::Const_ptr sock = socket();
59
60 /* As in the reference implementation (Linux's tcp_westwood.c), trivially maintain the lowest
61 * known individual RTT. The only deviation from that is certain events can cause the existing
62 * minimum RTT to be invalidated, starting a new sequence.
63 *
64 * Another caveat concerns units. Due to the math in congestion_window_adjust(), we store
65 * m_rtt_min in Send_bandwidth_estimator-friendly units, likely less fine than those used in
66 * Fine_duration but fine enough (see that alias's doc header for discussion). In converting,
67 * round up as a convention, if only to try to avoid 0. @todo Why is that really needed?
68 * There's no division by this number.... */
69
70 const Send_bandwidth_estimator::Time_unit packet_rtt_conv = ceil<Send_bandwidth_estimator::Time_unit>(packet_rtt);
71
73 {
74 m_reset_rtt_min = false;
75 m_rtt_min = packet_rtt_conv;
76
77 FLOW_LOG_TRACE("cong_ctl|bw_est [" << sock << "] update: RTT event; reset rtt_min "
78 "to [" << round<milliseconds>(m_rtt_min) << "].");
79 }
80 else
81 {
82 const milliseconds prev_rtt_min = round<milliseconds>(m_rtt_min);
83
84 if (packet_rtt_conv < m_rtt_min)
85 {
86 m_rtt_min = packet_rtt_conv;
87 }
88
89 FLOW_LOG_TRACE("cong_ctl|bw_est [" << sock << "] update: RTT event; set rtt_min = "
90 "min[" << prev_rtt_min
91 << ", " << round<milliseconds>(packet_rtt_conv) << "] "
92 "= [" << round<milliseconds>(m_rtt_min) << "].");
93 }
94} // Congestion_control_classic_with_bandwidth_est::on_individual_ack()
95
97 [[maybe_unused]] size_t packets) // Virtual.
98{
99 // Same as Congestion_control_classic; we are only different upon loss detection.
100 m_classic_data.on_acks(bytes); // Will log.
101}
102
104 [[maybe_unused]] size_t packets) // Virtual.
105{
106 /* Node has detected at least some lost bytes (at least one dropped packet), or at least so it
107 * thinks, but it's not a "major" (Drop Timeout-based) loss. At its core, we follow classic Reno,
108 * reset the available pipe (CWND) to some probably lower value; and set SSTHRESH to that same
109 * value, so that we immediately begin congestion avoidance (slow linear increase in CWND), and so
110 * that if we later enter slow start for whatever reason, it will end at this CWND value (unless
111 * something else changes SSTHRESH before then of course).
112 *
113 * Of course this is not Reno but instead "classic with bandwidth estimation." The difference is
114 * that we set CWND not to 1/2 (or any other constant) of anything but rather to our best guess on
115 * what the pipe can currently take. The standard formula is CWND = B * RTT, where B is the
116 * available bandwidth, and RTT is the delay of the pipe. RTT is taken to be the minimum RTT
117 * observed on the pipe, which should represent the "true" delay. B is estimated using
118 * Send_bandwidth_estimator. Altogether this is the Westwood+ algorithm, but the main part of it
119 * is the B estimation, which has been separated into Send_bandwidth_estimator, because its
120 * results may be useful outside of congestion control. For more info on Westwood+ see that
121 * class's doc header.
122 *
123 * As a reference implementation of the CWND = B * RTT formula, we use Linux kernel's
124 * tcp_westwood.c, which was implemented by the inventors of the algorithm. */
125
126 const size_t new_wnd_bytes = congestion_window_adjusted_bytes();
127
128 const Peer_socket::Const_ptr sock = socket();
129 FLOW_LOG_TRACE("cong_ctl|bw_est [" << sock << "] update: loss event; "
130 "set sl_st_thresh=cong_wnd to [" << sock->bytes_blocks_str(new_wnd_bytes) << "].");
131
132 m_classic_data.on_congestion_event(new_wnd_bytes, new_wnd_bytes); // Will log but not enough (so log the above).
133
134 // Beyond that, nothing else. See Congestion_control_classic comments in same spot which apply equally.
135} // Congestion_control_classic_with_bandwidth_est::on_loss_event()
136
138 [[maybe_unused]] size_t packets) // Virtual.
139{
140 /* Node has detected a major loss event in the form of a Drop Timeout. As in on_loss_event(), do
141 * the same thing as Congestion_control_classic does except that SSTHRESH is not 1/2 of anything
142 * but rather selected based on the bandwidth estimation.
143 *
144 * Additionally, a major loss event implies that the properties of the pipe may have changed
145 * (e.g., different IP route). Therefore we (as the reference implementation, Linux's
146 * tcp_westwood.c) also cause the obsolescence of m_rtt_min. We don't invalidate it now, since we
147 * don't have a better guess yet, but we do set m_reset_rtt_min, which will cause the next RTT
148 * measurement to override any existing minimum.
149 *
150 * Should we also reset the bandwidth estimator somehow? The reference implementation does not,
151 * so I won't either. As data transfer picks up, the estimator should pick up any changes in
152 * bandwidth gradually anyway, it seems.
153 *
154 * Other comments from Congestion_control_classic's same spot apply here. */
155
156 // Determine SSTHRESH.
157
158 const size_t new_slow_start_thresh_bytes = congestion_window_adjusted_bytes();
159
160 const Peer_socket::Const_ptr sock = socket();
161 FLOW_LOG_TRACE("cong_ctl|bw_est [" << sock << "] update: DTO event; set sl_st_thresh "
162 "to [" << sock->bytes_blocks_str(new_slow_start_thresh_bytes) << "]; "
163 "cong_wnd to minimal value; schedule rtt_min reset.");
164
165 // Now set SSTHRESH and let m_classic_data set CWND to a low value (common to most congestion control strategies).
166
167 // Will log but not enough (so log the above).
168 m_classic_data.on_drop_timeout(new_slow_start_thresh_bytes);
169
170 m_reset_rtt_min = true; // As explained above.
171} // Congestion_control_classic_with_bandwidth_est::on_drop_timeout()
172
174{
175 using std::max;
176
177 using Time_unit = Send_bandwidth_estimator::Time_unit;
178 using n_bytes_t = Send_bandwidth_estimator::n_bytes_t;
179
180 const Peer_socket::Const_ptr sock = socket();
181
182 /* The basic formula is CWND = B * RTTmin. Units and arithmetic are as follows. B is in units of
183 * bytes per Time_unit(1). (The selection of Time_unit(1) is discussed in detail in that
184 * alias's doc header.) RTTmin, accordingly, is in units of Time_unit. Therefore we can simply
185 * multiply the two values.
186 *
187 * Range issues: B is returned in n_bytes_t, which is potentially larger than size_t due to
188 * Send_bandwidth_estimator choosing to store safer internal values that way and return them in
189 * that format also. After the multiplication we convert the result to size_t; assuming
190 * reasonable bandwidths, this number should not overflow even a 32-bit size_t. However B can
191 * also be zero (either unavailable or just a very low bandwidth). As is standard, put a floor of
192 * 2 max-block-sizes on it to avoid craziness. */
193
194 const n_bytes_t bytes_per_time = sock->m_snd_bandwidth_estimator->bandwidth_bytes_per_time();
195
196 const size_t floor_wnd_bytes = 2 * sock->max_block_size();
197 const size_t new_wnd_bytes = max(size_t(bytes_per_time * m_rtt_min.count()),
198 floor_wnd_bytes);
199
200 FLOW_LOG_TRACE("cong_ctl|bw_est [" << sock << "] info: window calculation: wnd "
201 "= bw x rtt_min = [" << sock->bytes_blocks_str(size_t(bytes_per_time)) << "] bytes "
202 "per [" << Time_unit(1) << "] x [" << m_rtt_min << "] "
203 "(subject to floor [" << sock->bytes_blocks_str(floor_wnd_bytes) << "]) "
204 "= [" << sock->bytes_blocks_str(new_wnd_bytes) << "].");
205
206 return new_wnd_bytes;
207}
208
210{
211 /* Node has detected that nothing has been sent out by us for a while. (Note that this is
212 * different from a Drop Timeout. A Drop Timeout causes data to be sent (either retransmitted [if
213 * used] or new data), which resets the state back to non-idle for a while.)
214 *
215 * Do the same thing as Congestion_control_classic.
216 *
217 * It might actually seem like we should perhaps reset m_rtt_min here or reset the bandwidth
218 * estimation, but the reference implementation (tcp_westwood.c in Linux) does no such thing on
219 * idle timeout. For m_rtt_min it does make sense to me; there's no reason to necessarily think
220 * that just because the user hasn't sent anything in a while that the properties of the pipe
221 * have changed. For the bandwidth estimator, it also seems like some guess is better than no
222 * guess, and as traffic picks up the data should correct itself gradually, but it's iffier.
223 * Anyway, go with the reference implementation for now. */
224
226} // Congestion_control_classic_with_bandwidth_est::on_idle_timeout()
227
228} // namespace flow::net_flow
Interface that the user should implement, passing the implementing Logger into logging classes (Flow'...
Definition: log.hpp:1291
void on_drop_timeout(size_t new_slow_start_thresh_bytes)
Adjust state, including CWND and SSTHRESH, assuming a Drop Timeout just occurred.
void on_congestion_event(size_t new_slow_start_thresh_bytes, size_t new_cong_wnd_bytes)
Sets internally stored SSHTRESH and CWND to the given values; appropriately resets internal state so ...
void on_acks(size_t bytes)
Adjusts state, including potentially CWND, based on either "congestion avoidance" or "slow start" alg...
size_t congestion_window_bytes() const
Return current stored CWND value in bytes.
void on_idle_timeout()
Adjust state, namely CWND, assuming an Idle Timeout just occurred.
static const Send_bandwidth_estimator::Time_unit S_INIT_RTT_MIN
The initial value for m_rtt_min, until one RTT measurement comes in.
void on_drop_timeout(size_t bytes, size_t packets) override
Implements Congestion_control_strategy::on_drop_timeout() API.
void on_idle_timeout() override
Implements Congestion_control_strategy::on_idle_timeout() API.
bool m_reset_rtt_min
If true, then when the next RTT measurement comes in, any past measurement should be disregarded,...
void on_acks(size_t bytes, size_t packets) override
Implements Congestion_control_strategy::on_acks() API.
void on_individual_ack(const Fine_duration &packet_rtt, const size_t bytes, const size_t sent_cwnd_bytes) override
Implements Congestion_control_strategy::on_individual_ack() API.
void on_loss_event(size_t bytes, size_t packets) override
Implements Congestion_control_strategy::on_loss_event() API.
size_t congestion_window_adjusted_bytes() const
Returns the best bandwidth/latency-based guess for what the current window (pipe) size should be.
Send_bandwidth_estimator::Time_unit m_rtt_min
The minimum individual packet's RTT observed so far; it should be reset to the next sample if m_reset...
Congestion_control_classic_with_bandwidth_est(log::Logger *logger_ptr, Peer_socket::Const_ptr sock)
Constructs object by setting up logging and saving a pointer to the containing Peer_socket.
Congestion_control_classic_data m_classic_data
The Reno CWND/SSTHRESH-changing engine and CWND/SSTHRESH storage.
size_t congestion_window_bytes() const override
Implements Congestion_control_strategy::congestion_window_bytes() API.
The abstract interface for a per-socket module that determines the socket's congestion control behavi...
Definition: cong_ctl.hpp:180
Peer_socket::Const_ptr socket() const
Utility for subclasses that returns a handle to the containing Peer_socket.
Definition: cong_ctl.cpp:63
boost::chrono::milliseconds Time_unit
The primary time unit over which this class reports bandwidth.
Definition: bandwidth.hpp:167
uint64_t n_bytes_t
Type that represents the number of bytes, either as an absolute amount or over some time period.
Definition: bandwidth.hpp:134
Const_target_ptr Const_ptr
Short-hand for ref-counted pointer to immutable values of type Target_type::element_type (a-la T cons...
#define FLOW_LOG_TRACE(ARG_stream_fragment)
Logs a TRACE message into flow::log::Logger *get_logger() with flow::log::Component get_log_component...
Definition: log.hpp:227
Flow module containing the API and implementation of the Flow network protocol, a TCP-inspired stream...
Definition: node.cpp:25
Fine_clock::duration Fine_duration
A high-res time duration as computed from two Fine_time_pts.
Definition: common.hpp:411