aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrew Hauck <[email protected]>2024-08-20 10:44:20 -0700
committerEdward Wang <[email protected]>2024-09-06 13:43:53 -0700
commitd1d7a87b761eeb4f71fcaa3f7c4ae8e32f1d93c8 (patch)
tree1f71e461b7b922cc19e69eadff6059ae8d2d4394
parentacffb8aaf2a76e3ab8a4db698ed2f151cfb64566 (diff)
downloadpingora-d1d7a87b761eeb4f71fcaa3f7c4ae8e32f1d93c8.tar.gz
pingora-d1d7a87b761eeb4f71fcaa3f7c4ae8e32f1d93c8.zip
Add support for binding to local port ranges and retrying on EADDRNOTAVAIL
-rw-r--r--.bleep2
-rw-r--r--pingora-core/src/connectors/l4.rs206
-rw-r--r--pingora-core/src/connectors/mod.rs6
-rw-r--r--pingora-core/src/protocols/l4/ext.rs98
-rw-r--r--pingora-core/src/upstreams/peer.rs12
5 files changed, 286 insertions, 38 deletions
diff --git a/.bleep b/.bleep
index ac4d790..f403528 100644
--- a/.bleep
+++ b/.bleep
@@ -1 +1 @@
-28f94f2a402bbf66341bdac8fa670caf5b7311e9 \ No newline at end of file
+90c70086397a4708a4dadfed6e6915ce6dc33481 \ No newline at end of file
diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs
index 0162034..58bd209 100644
--- a/pingora-core/src/connectors/l4.rs
+++ b/pingora-core/src/connectors/l4.rs
@@ -33,8 +33,59 @@ pub trait Connect: std::fmt::Debug {
async fn connect(&self, addr: &SocketAddr) -> Result<Stream>;
}
+/// Settings for binding on connect
+#[derive(Clone, Debug, Default)]
+pub struct BindTo {
+ // local ip address
+ pub addr: Option<InetSocketAddr>,
+ // port range
+ port_range: Option<(u16, u16)>,
+ // whether we fallback and try again on bind errors when a port range is set
+ fallback: bool,
+}
+
+impl BindTo {
+ /// Sets the port range we will bind to where the first item in the tuple is the lower bound
+ /// and the second item is the upper bound.
+ ///
+ /// Note this bind option is only supported on Linux since 6.3, this is a no-op on other systems.
+ /// To reset the range, pass a `None` or `Some((0,0))`, more information can be found [here](https://man7.org/linux/man-pages/man7/ip.7.html)
+ pub fn set_port_range(&mut self, range: Option<(u16, u16)>) -> Result<()> {
+ if range.is_none() && self.port_range.is_none() {
+ // nothing to do
+ return Ok(());
+ }
+
+ match range {
+ // 0,0 is valid for resets
+ None | Some((0, 0)) => self.port_range = Some((0, 0)),
+ // set the port range if valid
+ Some((low, high)) if low > 0 && low < high => {
+ self.port_range = Some((low, high));
+ }
+ _ => return Error::e_explain(SocketError, "invalid port range: {range}"),
+ }
+ Ok(())
+ }
+
+ /// Set whether we fallback on no address available if a port range is set
+ pub fn set_fallback(&mut self, fallback: bool) {
+ self.fallback = fallback
+ }
+
+ /// Configured bind port range
+ pub fn port_range(&self) -> Option<(u16, u16)> {
+ self.port_range
+ }
+
+ /// Whether we attempt to fallback on no address available
+ pub fn will_fallback(&self) -> bool {
+ self.fallback && self.port_range.is_some()
+ }
+}
+
/// Establish a connection (l4) to the given peer using its settings and an optional bind address.
-pub(crate) async fn connect<P>(peer: &P, bind_to: Option<InetSocketAddr>) -> Result<Stream>
+pub(crate) async fn connect<P>(peer: &P, bind_to: Option<BindTo>) -> Result<Stream>
where
P: Peer + Send + Sync,
{
@@ -142,12 +193,8 @@ pub(crate) fn bind_to_random<P: Peer>(
peer: &P,
v4_list: &[InetSocketAddr],
v6_list: &[InetSocketAddr],
-) -> Option<InetSocketAddr> {
- let selected = peer.get_peer_options().and_then(|o| o.bind_to);
- if selected.is_some() {
- return selected;
- }
-
+) -> Option<BindTo> {
+ // helper function for randomly picking address
fn bind_to_ips(ips: &[InetSocketAddr]) -> Option<InetSocketAddr> {
match ips.len() {
0 => None,
@@ -159,13 +206,31 @@ pub(crate) fn bind_to_random<P: Peer>(
}
}
- match peer.address() {
+ let mut bind_to = peer.get_peer_options().and_then(|o| o.bind_to.clone());
+ if bind_to.as_ref().map(|b| b.addr).is_some() {
+ // already have a bind address selected
+ return bind_to;
+ }
+
+ let addr = match peer.address() {
SocketAddr::Inet(sockaddr) => match sockaddr {
InetSocketAddr::V4(_) => bind_to_ips(v4_list),
InetSocketAddr::V6(_) => bind_to_ips(v6_list),
},
SocketAddr::Unix(_) => None,
+ };
+
+ if addr.is_some() {
+ if let Some(bind_to) = bind_to.as_mut() {
+ bind_to.addr = addr;
+ } else {
+ bind_to = Some(BindTo {
+ addr,
+ ..Default::default()
+ });
+ }
}
+ bind_to
}
use crate::protocols::raw_connect;
@@ -238,16 +303,25 @@ mod tests {
#[tokio::test]
async fn test_conn_error_addr_not_avail() {
let peer = HttpPeer::new("127.0.0.1:121".to_string(), false, "".to_string());
- let new_session = connect(&peer, Some("192.0.2.2:0".parse().unwrap())).await;
+ let addr = "192.0.2.2:0".parse().ok();
+ let bind_to = BindTo {
+ addr,
+ ..Default::default()
+ };
+ let new_session = connect(&peer, Some(bind_to)).await;
assert_eq!(new_session.unwrap_err().etype(), &InternalError)
}
#[tokio::test]
async fn test_conn_error_other() {
let peer = HttpPeer::new("240.0.0.1:80".to_string(), false, "".to_string()); // non localhost
-
+ let addr = "127.0.0.1:0".parse().ok();
// create an error: cannot send from src addr: localhost to dst addr: a public IP
- let new_session = connect(&peer, Some("127.0.0.1:0".parse().unwrap())).await;
+ let bind_to = BindTo {
+ addr,
+ ..Default::default()
+ };
+ let new_session = connect(&peer, Some(bind_to)).await;
let error = new_session.unwrap_err();
// XXX: some system will allow the socket to bind and connect without error, only to timeout
assert!(error.etype() == &ConnectError || error.etype() == &ConnectTimedout)
@@ -371,4 +445,114 @@ mod tests {
assert_eq!(err.etype(), &ConnectionClosed);
assert!(!err.retry());
}
+
+ #[cfg(target_os = "linux")]
+ #[tokio::test(flavor = "multi_thread")]
+ async fn test_bind_to_port_range_on_connect() {
+ fn get_ip_local_port_range() -> (u16, u16) {
+ let path = "/proc/sys/net/ipv4/ip_local_port_range";
+ let file = std::fs::read_to_string(path).unwrap();
+ let mut parts = file.split_whitespace();
+ (
+ parts.next().unwrap().parse().unwrap(),
+ parts.next().unwrap().parse().unwrap(),
+ )
+ }
+
+ // one-off mock server
+ async fn mock_inet_connect_server() {
+ use tokio::net::TcpListener;
+ let listener = TcpListener::bind("127.0.0.1:10020").await.unwrap();
+ if let Ok((mut stream, _addr)) = listener.accept().await {
+ stream.write_all(b"HTTP/1.1 200 OK\r\n\r\n").await.unwrap();
+ // wait a bit so that the client can read
+ tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+ }
+ }
+
+ fn in_port_range(session: Stream, lower: u16, upper: u16) -> bool {
+ let digest = session.get_socket_digest();
+ let local_addr = digest
+ .as_ref()
+ .and_then(|s| s.local_addr())
+ .unwrap()
+ .as_inet()
+ .unwrap();
+
+ // assert range
+ local_addr.port() >= lower && local_addr.port() <= upper
+ }
+
+ tokio::spawn(async {
+ mock_inet_connect_server().await;
+ });
+ // wait for the server to start
+ tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+ // need to read /proc/sys/net/ipv4/ip_local_port_range for this test to work
+ // IP_LOCAL_PORT_RANGE clamp only works on ports in /proc/sys/net/ipv4/ip_local_port_range
+ let (low, _) = get_ip_local_port_range();
+ let high = low + 1;
+
+ let peer = HttpPeer::new("127.0.0.1:10020".to_string(), false, "".to_string());
+ let mut bind_to = BindTo {
+ addr: "127.0.0.1:0".parse().ok(),
+ ..Default::default()
+ };
+ bind_to.set_port_range(Some((low, high))).unwrap();
+
+ let session1 = connect(&peer, Some(bind_to.clone())).await.unwrap();
+ assert!(in_port_range(session1, low, high));
+
+ // execute more connect()
+ let session2 = connect(&peer, Some(bind_to.clone())).await.unwrap();
+ assert!(in_port_range(session2, low, high));
+ let session3 = connect(&peer, Some(bind_to.clone())).await.unwrap();
+ assert!(in_port_range(session3, low, high));
+
+ // disabled fallback, should be AddrNotAvailable error
+ let err = connect(&peer, Some(bind_to.clone())).await.unwrap_err();
+ assert_eq!(err.etype(), &InternalError);
+
+ // enable fallback, assert not in port range but successful
+ bind_to.set_fallback(true);
+ let session4 = connect(&peer, Some(bind_to.clone())).await.unwrap();
+ assert!(!in_port_range(session4, low, high));
+
+ // works without bind IP, shift up to use new ports
+ let low = low + 2;
+ let high = low + 1;
+ let mut bind_to = BindTo::default();
+ bind_to.set_port_range(Some((low, high))).unwrap();
+ let session5 = connect(&peer, Some(bind_to.clone())).await.unwrap();
+ assert!(in_port_range(session5, low, high));
+ }
+
+ #[test]
+ fn test_bind_to_port_ranges() {
+ let addr = "127.0.0.1:0".parse().ok();
+ let mut bind_to = BindTo {
+ addr,
+ ..Default::default()
+ };
+
+ // None because the previous value was None
+ bind_to.set_port_range(None).unwrap();
+ assert!(bind_to.port_range.is_none());
+
+ // zeroes are handled
+ bind_to.set_port_range(Some((0, 0))).unwrap();
+ assert_eq!(bind_to.port_range, Some((0, 0)));
+
+ // zeroes because the previous value was Some
+ bind_to.set_port_range(None).unwrap();
+ assert_eq!(bind_to.port_range, Some((0, 0)));
+
+ // low > high is error
+ assert!(bind_to.set_port_range(Some((2000, 1000))).is_err());
+
+ // low < high success
+ bind_to.set_port_range(Some((1000, 2000))).unwrap();
+ assert_eq!(bind_to.port_range, Some((1000, 2000)));
+ }
}
diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs
index 2d4584c..cbe299f 100644
--- a/pingora-core/src/connectors/mod.rs
+++ b/pingora-core/src/connectors/mod.rs
@@ -24,8 +24,8 @@ use crate::server::configuration::ServerConf;
use crate::tls::ssl::SslConnector;
use crate::upstreams::peer::{Peer, ALPN};
-use l4::connect as l4_connect;
pub use l4::Connect as L4Connect;
+use l4::{connect as l4_connect, BindTo};
use log::{debug, error, warn};
use offload::OffloadRuntime;
use parking_lot::RwLock;
@@ -273,7 +273,7 @@ impl TransportConnector {
// connection timeout if there is one
async fn do_connect<P: Peer + Send + Sync>(
peer: &P,
- bind_to: Option<SocketAddr>,
+ bind_to: Option<BindTo>,
alpn_override: Option<ALPN>,
tls_ctx: &SslConnector,
) -> Result<Stream> {
@@ -296,7 +296,7 @@ async fn do_connect<P: Peer + Send + Sync>(
// Perform the actual L4 and tls connection steps with no timeout
async fn do_connect_inner<P: Peer + Send + Sync>(
peer: &P,
- bind_to: Option<SocketAddr>,
+ bind_to: Option<BindTo>,
alpn_override: Option<ALPN>,
tls_ctx: &SslConnector,
) -> Result<Stream> {
diff --git a/pingora-core/src/protocols/l4/ext.rs b/pingora-core/src/protocols/l4/ext.rs
index 5123bdf..4b65f84 100644
--- a/pingora-core/src/protocols/l4/ext.rs
+++ b/pingora-core/src/protocols/l4/ext.rs
@@ -27,6 +27,8 @@ use std::os::unix::io::{AsRawFd, RawFd};
use std::time::Duration;
use tokio::net::{TcpSocket, TcpStream, UnixStream};
+use crate::connectors::l4::BindTo;
+
/// The (copy of) the kernel struct tcp_info returns
#[repr(C)]
#[derive(Copy, Clone, Debug)]
@@ -160,9 +162,12 @@ fn cvt_linux_error(t: i32) -> io::Result<i32> {
#[cfg(target_os = "linux")]
fn ip_bind_addr_no_port(fd: RawFd, val: bool) -> io::Result<()> {
- const IP_BIND_ADDRESS_NO_PORT: i32 = 24;
-
- set_opt(fd, libc::IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, val as c_int)
+ set_opt(
+ fd,
+ libc::IPPROTO_IP,
+ libc::IP_BIND_ADDRESS_NO_PORT,
+ val as c_int,
+ )
}
#[cfg(not(target_os = "linux"))]
@@ -170,6 +175,26 @@ fn ip_bind_addr_no_port(_fd: RawFd, _val: bool) -> io::Result<()> {
Ok(())
}
+/// IP_LOCAL_PORT_RANGE is only supported on Linux 6.3 and higher,
+/// ip_local_port_range() is a no-op on unsupported versions.
+/// See the [man page](https://man7.org/linux/man-pages/man7/ip.7.html) for more details.
+#[cfg(target_os = "linux")]
+fn ip_local_port_range(fd: RawFd, low: u16, high: u16) -> io::Result<()> {
+ const IP_LOCAL_PORT_RANGE: i32 = 51;
+ let range: u32 = (low as u32) | ((high as u32) << 16);
+
+ let result = set_opt(fd, libc::IPPROTO_IP, IP_LOCAL_PORT_RANGE, range as c_int);
+ match result {
+ Err(e) if e.raw_os_error() != Some(libc::ENOPROTOOPT) => Err(e),
+ _ => Ok(()), // no error or ENOPROTOOPT
+ }
+}
+
+#[cfg(not(target_os = "linux"))]
+fn ip_local_port_range(_fd: RawFd, _low: u16, _high: u16) -> io::Result<()> {
+ Ok(())
+}
+
#[cfg(target_os = "linux")]
fn set_so_keepalive(fd: RawFd, val: bool) -> io::Result<()> {
set_opt(fd, libc::SOL_SOCKET, libc::SO_KEEPALIVE, val as c_int)
@@ -310,14 +335,42 @@ pub fn get_socket_cookie(_fd: RawFd) -> io::Result<u64> {
Ok(0) // SO_COOKIE is a Linux concept
}
-/// connect() to the given address while optionally binding to the specific source address.
+/// connect() to the given address while optionally binding to the specific source address and port range.
///
/// The `set_socket` callback can be used to tune the socket before `connect()` is called.
///
+/// If a [`BindTo`] is set with a port range and fallback setting enabled this function will retry
+/// on EADDRNOTAVAIL ignoring the port range.
+///
/// `IP_BIND_ADDRESS_NO_PORT` is used.
-pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
+/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
+pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()> + Clone>(
addr: &SocketAddr,
- bind_to: Option<&SocketAddr>,
+ bind_to: Option<&BindTo>,
+ set_socket: F,
+) -> Result<TcpStream> {
+ if bind_to.as_ref().map_or(false, |b| b.will_fallback()) {
+ // if we see an EADDRNOTAVAIL error clear the port range and try again
+ let connect_result = inner_connect_with(addr, bind_to, set_socket.clone()).await;
+ if let Err(e) = connect_result.as_ref() {
+ if matches!(e.etype(), BindError) {
+ let mut new_bind_to = BindTo::default();
+ new_bind_to.addr = bind_to.as_ref().and_then(|b| b.addr);
+ // reset the port range
+ new_bind_to.set_port_range(None).unwrap();
+ return inner_connect_with(addr, Some(&new_bind_to), set_socket).await;
+ }
+ }
+ connect_result
+ } else {
+ // not retryable
+ inner_connect_with(addr, bind_to, set_socket).await
+ }
+}
+
+async fn inner_connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
+ addr: &SocketAddr,
+ bind_to: Option<&BindTo>,
set_socket: F,
) -> Result<TcpStream> {
let socket = if addr.is_ipv4() {
@@ -328,14 +381,23 @@ pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
.or_err(SocketError, "failed to create socket")?;
if cfg!(target_os = "linux") {
- ip_bind_addr_no_port(socket.as_raw_fd(), true)
- .or_err(SocketError, "failed to set socket opts")?;
-
- if let Some(baddr) = bind_to {
- socket
- .bind(*baddr)
- .or_err_with(BindError, || format!("failed to bind to socket {}", *baddr))?;
- };
+ ip_bind_addr_no_port(socket.as_raw_fd(), true).or_err(
+ SocketError,
+ "failed to set socket opts IP_BIND_ADDRESS_NO_PORT",
+ )?;
+
+ if let Some(bind_to) = bind_to {
+ if let Some((low, high)) = bind_to.port_range() {
+ ip_local_port_range(socket.as_raw_fd(), low, high)
+ .or_err(SocketError, "failed to set socket opts IP_LOCAL_PORT_RANGE")?;
+ }
+
+ if let Some(baddr) = bind_to.addr {
+ socket
+ .bind(baddr)
+ .or_err_with(BindError, || format!("failed to bind to socket {}", baddr))?;
+ }
+ }
}
// TODO: add support for bind on other platforms
@@ -349,8 +411,9 @@ pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
/// connect() to the given address while optionally binding to the specific source address.
///
-/// `IP_BIND_ADDRESS_NO_PORT` is used.
-pub async fn connect(addr: &SocketAddr, bind_to: Option<&SocketAddr>) -> Result<TcpStream> {
+/// `IP_BIND_ADDRESS_NO_PORT` is used
+/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
+pub async fn connect(addr: &SocketAddr, bind_to: Option<&BindTo>) -> Result<TcpStream> {
connect_with(addr, bind_to, |_| Ok(())).await
}
@@ -365,7 +428,8 @@ fn wrap_os_connect_error(e: std::io::Error, context: String) -> Box<Error> {
match e.kind() {
ErrorKind::ConnectionRefused => Error::because(ConnectRefused, context, e),
ErrorKind::TimedOut => Error::because(ConnectTimedout, context, e),
- ErrorKind::PermissionDenied | ErrorKind::AddrInUse | ErrorKind::AddrNotAvailable => {
+ ErrorKind::AddrNotAvailable => Error::because(BindError, context, e),
+ ErrorKind::PermissionDenied | ErrorKind::AddrInUse => {
Error::because(InternalError, context, e)
}
_ => match e.raw_os_error() {
diff --git a/pingora-core/src/upstreams/peer.rs b/pingora-core/src/upstreams/peer.rs
index fad8471..832791d 100644
--- a/pingora-core/src/upstreams/peer.rs
+++ b/pingora-core/src/upstreams/peer.rs
@@ -29,7 +29,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
-use crate::connectors::L4Connect;
+use crate::connectors::{l4::BindTo, L4Connect};
use crate::protocols::l4::socket::SocketAddr;
use crate::protocols::ConnFdReusable;
use crate::protocols::TcpKeepalive;
@@ -110,8 +110,8 @@ pub trait Peer: Display + Clone {
None => None,
}
}
- /// Which local source address this connection should be bind to.
- fn bind_to(&self) -> Option<&InetSocketAddr> {
+ /// Information about the local source address this connection should be bound to.
+ fn bind_to(&self) -> Option<&BindTo> {
match self.get_peer_options() {
Some(opt) => opt.bind_to.as_ref(),
None => None,
@@ -243,7 +243,7 @@ impl Peer for BasicPeer {
!self.sni.is_empty()
}
- fn bind_to(&self) -> Option<&InetSocketAddr> {
+ fn bind_to(&self) -> Option<&BindTo> {
None
}
@@ -294,7 +294,7 @@ impl Scheme {
/// See [`Peer`] for the meaning of the fields
#[derive(Clone, Debug)]
pub struct PeerOptions {
- pub bind_to: Option<InetSocketAddr>,
+ pub bind_to: Option<BindTo>,
pub connection_timeout: Option<Duration>,
pub total_connection_timeout: Option<Duration>,
pub read_timeout: Option<Duration>,
@@ -365,7 +365,7 @@ impl PeerOptions {
impl Display for PeerOptions {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
- if let Some(b) = self.bind_to {
+ if let Some(b) = self.bind_to.as_ref() {
write!(f, "bind_to: {:?},", b)?;
}
if let Some(t) = self.connection_timeout {