aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorewang <[email protected]>2024-03-29 17:18:48 -0700
committerAndrew Hauck <[email protected]>2024-04-05 11:46:20 -0700
commitf54d17a3392e26900dc861f015d820d4dfa86d7f (patch)
treec4e49f54d1dd00c3b40d985d1a30aaa2bfe6afd2
parentdfcd3d1d9f493b830e97757ba8d43c07ee81c030 (diff)
downloadpingora-f54d17a3392e26900dc861f015d820d4dfa86d7f.tar.gz
pingora-f54d17a3392e26900dc861f015d820d4dfa86d7f.zip
Clarify cache-control regex comments
Also update a few comment links for the newest HTTP RFCs.
-rw-r--r--.bleep2
-rw-r--r--pingora-cache/src/cache_control.rs32
2 files changed, 21 insertions, 13 deletions
diff --git a/.bleep b/.bleep
index 4251f76..e317cd1 100644
--- a/.bleep
+++ b/.bleep
@@ -1 +1 @@
-3ca77e7ac5f665afa94f77ef9d3852db3195470d \ No newline at end of file
+a1e399e99a67e743d00ba50ba7c3999c25136fe5 \ No newline at end of file
diff --git a/pingora-cache/src/cache_control.rs b/pingora-cache/src/cache_control.rs
index f7aa69f..3934a93 100644
--- a/pingora-cache/src/cache_control.rs
+++ b/pingora-cache/src/cache_control.rs
@@ -26,7 +26,7 @@ use std::num::IntErrorKind;
use std::slice;
use std::str;
-/// The max delta-second per [RFC 7234](https://datatracker.ietf.org/doc/html/rfc7234#section-1.2.1)
+/// The max delta-second per [RFC 9111](https://datatracker.ietf.org/doc/html/rfc9111#section-1.2.2)
// "If a cache receives a delta-seconds
// value greater than the greatest integer it can represent, or if any
// of its subsequent calculations overflows, the cache MUST consider the
@@ -111,7 +111,7 @@ impl<'a> ListValueIter<'a> {
}
}
-// https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.3
+// https://datatracker.ietf.org/doc/html/rfc9110#name-whitespace
// optional whitespace OWS = *(SP / HTAB); SP = 0x20, HTAB = 0x09
fn trim_ows(bytes: &[u8]) -> &[u8] {
fn not_ows(b: &u8) -> bool {
@@ -135,23 +135,31 @@ impl<'a> Iterator for ListValueIter<'a> {
}
}
-/*
- Originally from https://github.com/hapijs/wreck:
- Cache-Control = 1#cache-directive
- cache-directive = token [ "=" ( token / quoted-string ) ]
- token = [^\x00-\x20\(\)<>@\,;\:\\"\/\[\]\?\=\{\}\x7F]+
- quoted-string = "(?:[^"\\]|\\.)*"
-*/
+// Originally from https://github.com/hapijs/wreck which has the following comments:
+// Cache-Control = 1#cache-directive
+// cache-directive = token [ "=" ( token / quoted-string ) ]
+// token = [^\x00-\x20\(\)<>@\,;\:\\"\/\[\]\?\=\{\}\x7F]+
+// quoted-string = "(?:[^"\\]|\\.)*"
+//
+// note the `token` implementation excludes disallowed ASCII ranges
+// and disallowed delimiters: https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2
+// though it does not forbid `obs-text`: %x80-FF
static RE_CACHE_DIRECTIVE: Lazy<Regex> =
- // unicode support disabled, allow ; or , delimiter | capture groups: 1: directive = 2: token OR quoted-string
+ // to break our version down further:
+ // `(?-u)`: unicode support disabled, which puts the regex into "ASCII compatible mode" for specifying literal bytes like \x7F: https://docs.rs/regex/1.10.4/regex/bytes/index.html#syntax
+ // `(?:^|(?:\s*[,;]\s*)`: allow either , or ; as a delimiter
+ // `([^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+)`: token (directive name capture group)
+ // `(?:=((?:[^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+|(?:"(?:[^"\\]|\\.)*"))))`: token OR quoted-string (directive value capture-group)
Lazy::new(|| {
Regex::new(r#"(?-u)(?:^|(?:\s*[,;]\s*))([^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+)(?:=((?:[^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+|(?:"(?:[^"\\]|\\.)*"))))?"#).unwrap()
});
impl CacheControl {
// Our parsing strategy is more permissive than the RFC in a few ways:
- // - Allows semicolons as delimiters (in addition to commas).
- // - Allows octets outside of visible ASCII in tokens.
+ // - Allows semicolons as delimiters (in addition to commas). See the regex above.
+ // - Allows octets outside of visible ASCII in `token`s, and in later RFCs, octets outside of
+ // the `quoted-string` range: https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2
+ // See the regex above.
// - Doesn't require no-value for "boolean directives," such as must-revalidate
// - Allows quoted-string format for numeric values.
fn from_headers(headers: http::header::GetAll<HeaderValue>) -> Option<Self> {