diff options
author | Yuchen Wu <[email protected]> | 2024-02-27 20:25:44 -0800 |
---|---|---|
committer | Yuchen Wu <[email protected]> | 2024-02-27 20:25:44 -0800 |
commit | 8797329225018c4d0ab990166dd020338ae292dc (patch) | |
tree | 1e8d0bf6f3c27e987559f52319d91ff75e4da5cb /pingora-header-serde | |
parent | 0bca116c1027a878469b72352e1e9e3916e85dde (diff) | |
download | pingora-8797329225018c4d0ab990166dd020338ae292dc.tar.gz pingora-8797329225018c4d0ab990166dd020338ae292dc.zip |
Release Pingora version 0.1.0v0.1.0
Co-authored-by: Andrew Hauck <[email protected]>
Co-authored-by: Edward Wang <[email protected]>
Diffstat (limited to 'pingora-header-serde')
-rw-r--r-- | pingora-header-serde/Cargo.toml | 32 | ||||
-rw-r--r-- | pingora-header-serde/LICENSE | 202 | ||||
-rw-r--r-- | pingora-header-serde/samples/test/1 | 15 | ||||
-rw-r--r-- | pingora-header-serde/samples/test/2 | 15 | ||||
-rw-r--r-- | pingora-header-serde/samples/test/3 | 15 | ||||
-rw-r--r-- | pingora-header-serde/samples/test/4 | 15 | ||||
-rw-r--r-- | pingora-header-serde/samples/test/5 | 15 | ||||
-rw-r--r-- | pingora-header-serde/samples/test/6 | 15 | ||||
-rw-r--r-- | pingora-header-serde/samples/test/7 | 14 | ||||
-rw-r--r-- | pingora-header-serde/src/dict.rs | 88 | ||||
-rw-r--r-- | pingora-header-serde/src/lib.rs | 203 | ||||
-rw-r--r-- | pingora-header-serde/src/thread_zstd.rs | 79 | ||||
-rw-r--r-- | pingora-header-serde/src/trainer.rs | 23 |
13 files changed, 731 insertions, 0 deletions
diff --git a/pingora-header-serde/Cargo.toml b/pingora-header-serde/Cargo.toml new file mode 100644 index 0000000..2968cae --- /dev/null +++ b/pingora-header-serde/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "pingora-header-serde" +version = "0.1.0" +authors = ["Yuchen Wu <[email protected]>"] +license = "Apache-2.0" +edition = "2021" +repository = "https://github.com/cloudflare/pingora" +categories = ["compression"] +keywords = ["http", "compression", "pingora"] +exclude = ["samples/*"] +description = """ +HTTP header (de)serialization and compression for Pingora. +""" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "pingora_header_serde" +path = "src/lib.rs" + +[[bin]] +name = "trainer" +path = "src/trainer.rs" + +[dependencies] +zstd = "0.9.0" +zstd-safe = "4.1.1" +http = { workspace = true } +bytes = { workspace = true } +httparse = { workspace = true } +pingora-error = { version = "0.1.0", path = "../pingora-error" } +pingora-http = { version = "0.1.0", path = "../pingora-http" } +thread_local = "1.0" diff --git a/pingora-header-serde/LICENSE b/pingora-header-serde/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/pingora-header-serde/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/pingora-header-serde/samples/test/1 b/pingora-header-serde/samples/test/1 new file mode 100644 index 0000000..9d4c680 --- /dev/null +++ b/pingora-header-serde/samples/test/1 @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Wed, 22 Dec 2021 06:30:29 GMT +Content-Type: application/javascript +Last-Modified: Mon, 29 Nov 2021 10:13:32 GMT +Transfer-Encoding: chunked +Connection: keep-alive +Vary: Accept-Encoding +ETag: W/"61a4a7cc-21df8" +Access-Control-Allow-Origin: * +Access-Control-Allow-Credentials: true +Access-Control-Expose-Headers: Content-Length,Content-Range +Access-Control-Allow-Headers: Range +Content-Encoding: gzip + diff --git a/pingora-header-serde/samples/test/2 b/pingora-header-serde/samples/test/2 new file mode 100644 index 0000000..5cd4026 --- /dev/null +++ b/pingora-header-serde/samples/test/2 @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Thu, 23 Dec 2021 15:12:32 GMT +Content-Type: application/javascript +Last-Modified: Mon, 09 Sep 2019 12:47:14 GMT +Transfer-Encoding: chunked +Connection: keep-alive +Vary: Accept-Encoding +ETag: W/"5d7649d2-16ec64" +Access-Control-Allow-Origin: * +Access-Control-Allow-Credentials: true +Access-Control-Expose-Headers: Content-Length,Content-Range +Access-Control-Allow-Headers: Range +Content-Encoding: gzip + diff --git a/pingora-header-serde/samples/test/3 b/pingora-header-serde/samples/test/3 new file mode 100644 index 0000000..b02aadd --- /dev/null +++ b/pingora-header-serde/samples/test/3 @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Wed, 22 Dec 2021 12:29:00 GMT +Content-Type: application/javascript +Last-Modified: Mon, 09 Sep 2019 07:47:37 GMT +Transfer-Encoding: chunked +Connection: keep-alive +Vary: Accept-Encoding +ETag: W/"5d760399-52868" +Access-Control-Allow-Origin: * +Access-Control-Allow-Credentials: true +Access-Control-Expose-Headers: Content-Length,Content-Range +Access-Control-Allow-Headers: Range +Content-Encoding: gzip + diff --git a/pingora-header-serde/samples/test/4 b/pingora-header-serde/samples/test/4 new file mode 100644 index 0000000..8215d6e --- /dev/null +++ b/pingora-header-serde/samples/test/4 @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Wed, 22 Dec 2021 06:11:09 GMT +Content-Type: application/javascript +Last-Modified: Mon, 20 Dec 2021 01:23:10 GMT +Transfer-Encoding: chunked +Connection: keep-alive +Vary: Accept-Encoding +ETag: W/"61bfdafe-21bc4" +Access-Control-Allow-Origin: * +Access-Control-Allow-Credentials: true +Access-Control-Expose-Headers: Content-Length,Content-Range +Access-Control-Allow-Headers: Range +Content-Encoding: gzip + diff --git a/pingora-header-serde/samples/test/5 b/pingora-header-serde/samples/test/5 new file mode 100644 index 0000000..4bae598 --- /dev/null +++ b/pingora-header-serde/samples/test/5 @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Thu, 23 Dec 2021 15:23:29 GMT +Content-Type: application/javascript +Last-Modified: Sat, 09 Oct 2021 23:41:34 GMT +Transfer-Encoding: chunked +Connection: keep-alive +Vary: Accept-Encoding +ETag: W/"616228ae-52054" +Access-Control-Allow-Origin: * +Access-Control-Allow-Credentials: true +Access-Control-Expose-Headers: Content-Length,Content-Range +Access-Control-Allow-Headers: Range +Content-Encoding: gzip + diff --git a/pingora-header-serde/samples/test/6 b/pingora-header-serde/samples/test/6 new file mode 100644 index 0000000..9d4c680 --- /dev/null +++ b/pingora-header-serde/samples/test/6 @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Wed, 22 Dec 2021 06:30:29 GMT +Content-Type: application/javascript +Last-Modified: Mon, 29 Nov 2021 10:13:32 GMT +Transfer-Encoding: chunked +Connection: keep-alive +Vary: Accept-Encoding +ETag: W/"61a4a7cc-21df8" +Access-Control-Allow-Origin: * +Access-Control-Allow-Credentials: true +Access-Control-Expose-Headers: Content-Length,Content-Range +Access-Control-Allow-Headers: Range +Content-Encoding: gzip + diff --git a/pingora-header-serde/samples/test/7 b/pingora-header-serde/samples/test/7 new file mode 100644 index 0000000..b57e5c0 --- /dev/null +++ b/pingora-header-serde/samples/test/7 @@ -0,0 +1,14 @@ +HTTP/1.1 200 OK +server: nginx +date: Sat, 25 Dec 2021 03:05:35 GMT +content-type: application/javascript +last-modified: Fri, 24 Dec 2021 04:20:01 GMT +transfer-encoding: chunked +connection: keep-alive +vary: Accept-Encoding +etag: W/"61c54a71-2d590" +access-control-allow-origin: * +access-control-allow-credentials: true +access-control-expose-headers: Content-Length,Content-Range +access-control-allow-headers: Range +content-encoding: gzip diff --git a/pingora-header-serde/src/dict.rs b/pingora-header-serde/src/dict.rs new file mode 100644 index 0000000..bc50ada --- /dev/null +++ b/pingora-header-serde/src/dict.rs @@ -0,0 +1,88 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Training to generate the zstd dictionary. + +use std::fs; +use zstd::dict; + +/// Train the zstd dictionary from all the files under the given `dir_path` +/// +/// The output will be the trained dictionary +pub fn train<P: AsRef<std::path::Path>>(dir_path: P) -> Vec<u8> { + // TODO: check f is file, it can be dir + let files = fs::read_dir(dir_path) + .unwrap() + .filter_map(|entry| entry.ok().map(|f| f.path())); + dict::from_files(files, 64 * 1024 * 1024).unwrap() +} + +#[cfg(test)] +mod test { + use super::*; + use crate::resp_header_to_buf; + use pingora_http::ResponseHeader; + + fn gen_test_dict() -> Vec<u8> { + let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("samples/test"); + train(path) + } + + fn gen_test_header() -> ResponseHeader { + let mut header = ResponseHeader::build(200, None).unwrap(); + header + .append_header("Date", "Thu, 23 Dec 2021 11:23:29 GMT") + .unwrap(); + header + .append_header("Last-Modified", "Sat, 09 Oct 2021 22:41:34 GMT") + .unwrap(); + header.append_header("Connection", "keep-alive").unwrap(); + header.append_header("Vary", "Accept-encoding").unwrap(); + header.append_header("Content-Encoding", "gzip").unwrap(); + header + .append_header("Access-Control-Allow-Origin", "*") + .unwrap(); + header + } + + #[test] + fn test_ser_with_dict() { + let dict = gen_test_dict(); + let serde = crate::HeaderSerde::new(Some(dict)); + let serde_no_dict = crate::HeaderSerde::new(None); + let header = gen_test_header(); + + let compressed = serde.serialize(&header).unwrap(); + let compressed_no_dict = serde_no_dict.serialize(&header).unwrap(); + let mut buf = vec![]; + let uncompressed = resp_header_to_buf(&header, &mut buf); + + assert!(compressed.len() < uncompressed); + assert!(compressed.len() < compressed_no_dict.len()); + } + + #[test] + fn test_ser_de_with_dict() { + let dict = gen_test_dict(); + let serde = crate::HeaderSerde::new(Some(dict)); + let header = gen_test_header(); + + let compressed = serde.serialize(&header).unwrap(); + let header2 = serde.deserialize(&compressed).unwrap(); + + assert_eq!(header.status, header2.status); + assert_eq!(header.headers, header2.headers); + } +} diff --git a/pingora-header-serde/src/lib.rs b/pingora-header-serde/src/lib.rs new file mode 100644 index 0000000..73b9b29 --- /dev/null +++ b/pingora-header-serde/src/lib.rs @@ -0,0 +1,203 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! HTTP Response header serialization with compression +//! +//! This crate is able to serialize http response header to about 1/3 of its original size (HTTP/1.1 wire format) +//! with trained dictionary. + +#![warn(clippy::all)] +#![allow(clippy::new_without_default)] +#![allow(clippy::type_complexity)] + +pub mod dict; +mod thread_zstd; + +use bytes::BufMut; +use http::Version; +use pingora_error::{Error, ErrorType, Result}; +use pingora_http::ResponseHeader; +use std::cell::RefCell; +use std::ops::DerefMut; +use thread_local::ThreadLocal; + +/// HTTP Response header serialization +/// +/// This struct provides the APIs to convert HTTP response header into compressed wired format for +/// storage. +pub struct HeaderSerde { + compression: thread_zstd::Compression, + level: i32, + // internal buffer for uncompressed data to be compressed and vice versa + buf: ThreadLocal<RefCell<Vec<u8>>>, +} + +const MAX_HEADER_SIZE: usize = 64 * 1024; +const COMPRESS_LEVEL: i32 = 3; + +impl HeaderSerde { + /// Create a new [HeaderSerde] + /// + /// An optional zstd compression dictionary can be provided to improve the compression ratio + /// and speed. See [dict] for more details. + pub fn new(dict: Option<Vec<u8>>) -> Self { + if let Some(dict) = dict { + HeaderSerde { + compression: thread_zstd::Compression::with_dict(dict), + level: COMPRESS_LEVEL, + buf: ThreadLocal::new(), + } + } else { + HeaderSerde { + compression: thread_zstd::Compression::new(), + level: COMPRESS_LEVEL, + buf: ThreadLocal::new(), + } + } + } + + /// Serialize the given response header + pub fn serialize(&self, header: &ResponseHeader) -> Result<Vec<u8>> { + // for now we use HTTP 1.1 wire format for that + // TODO: should convert to h1 if the incoming header is for h2 + let mut buf = self + .buf + .get_or(|| RefCell::new(Vec::with_capacity(MAX_HEADER_SIZE))) + .borrow_mut(); + buf.clear(); // reset the buf + resp_header_to_buf(header, &mut buf); + self.compression + .compress(&buf, self.level) + .map_err(|e| into_error(e, "compress header")) + } + + /// Deserialize the given response header + pub fn deserialize(&self, data: &[u8]) -> Result<ResponseHeader> { + let mut buf = self + .buf + .get_or(|| RefCell::new(Vec::with_capacity(MAX_HEADER_SIZE))) + .borrow_mut(); + buf.clear(); // reset the buf + self.compression + .decompress_to_buffer(data, buf.deref_mut()) + .map_err(|e| into_error(e, "decompress header"))?; + buf_to_http_header(&buf) + } +} + +#[inline] +fn into_error(e: &'static str, context: &'static str) -> Box<Error> { + Error::because(ErrorType::InternalError, context, e) +} + +const CRLF: &[u8; 2] = b"\r\n"; + +// Borrowed from pingora http1 +#[inline] +fn resp_header_to_buf(resp: &ResponseHeader, buf: &mut Vec<u8>) -> usize { + // Status-Line + let version = match resp.version { + Version::HTTP_10 => "HTTP/1.0 ", + Version::HTTP_11 => "HTTP/1.1 ", + _ => "HTTP/1.1 ", // store everything else (including h2) in http 1.1 format + }; + buf.put_slice(version.as_bytes()); + let status = resp.status; + buf.put_slice(status.as_str().as_bytes()); + buf.put_u8(b' '); + let reason = status.canonical_reason(); + if let Some(reason_buf) = reason { + buf.put_slice(reason_buf.as_bytes()); + } + buf.put_slice(CRLF); + + // headers + resp.header_to_h1_wire(buf); + + buf.put_slice(CRLF); + + buf.len() +} + +// Should match pingora http1 setting +const MAX_HEADERS: usize = 160; + +#[inline] +fn buf_to_http_header(buf: &[u8]) -> Result<ResponseHeader> { + let mut headers = vec![httparse::EMPTY_HEADER; MAX_HEADERS]; + let mut resp = httparse::Response::new(&mut headers); + + match resp.parse(buf) { + Ok(s) => match s { + httparse::Status::Complete(_size) => parsed_to_header(&resp), + // we always feed the but that contains the entire header to parse + _ => Error::e_explain(ErrorType::InternalError, "incomplete uncompressed header"), + }, + Err(e) => Error::e_because( + ErrorType::InternalError, + format!( + "parsing failed on uncompressed header, {}", + String::from_utf8_lossy(buf) + ), + e, + ), + } +} + +#[inline] +fn parsed_to_header(parsed: &httparse::Response) -> Result<ResponseHeader> { + // code should always be there + let mut resp = ResponseHeader::build(parsed.code.unwrap(), Some(parsed.headers.len()))?; + + for header in parsed.headers.iter() { + resp.append_header(header.name.to_string(), header.value)?; + } + + Ok(resp) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ser_wo_dict() { + let serde = HeaderSerde::new(None); + let mut header = ResponseHeader::build(200, None).unwrap(); + header.append_header("foo", "bar").unwrap(); + header.append_header("foo", "barbar").unwrap(); + header.append_header("foo", "barbarbar").unwrap(); + header.append_header("Server", "Pingora").unwrap(); + + let compressed = serde.serialize(&header).unwrap(); + let mut buf = vec![]; + let uncompressed = resp_header_to_buf(&header, &mut buf); + assert!(compressed.len() < uncompressed); + } + + #[test] + fn test_ser_de_no_dict() { + let serde = HeaderSerde::new(None); + let mut header = ResponseHeader::build(200, None).unwrap(); + header.append_header("foo1", "bar1").unwrap(); + header.append_header("foo2", "barbar2").unwrap(); + header.append_header("foo3", "barbarbar3").unwrap(); + header.append_header("Server", "Pingora").unwrap(); + + let compressed = serde.serialize(&header).unwrap(); + let header2 = serde.deserialize(&compressed).unwrap(); + assert_eq!(header.status, header2.status); + assert_eq!(header.headers, header2.headers); + } +} diff --git a/pingora-header-serde/src/thread_zstd.rs b/pingora-header-serde/src/thread_zstd.rs new file mode 100644 index 0000000..5c6406e --- /dev/null +++ b/pingora-header-serde/src/thread_zstd.rs @@ -0,0 +1,79 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cell::RefCell; +use thread_local::ThreadLocal; + +/// Each thread will own its compression and decompression CTXes, and they share a single dict +/// https://facebook.github.io/zstd/zstd_manual.html recommends to reuse ctx per thread + +#[derive(Default)] +pub struct Compression { + com_context: ThreadLocal<RefCell<zstd_safe::CCtx<'static>>>, + de_context: ThreadLocal<RefCell<zstd_safe::DCtx<'static>>>, + dict: Vec<u8>, +} + +// these codes are inspired by zstd crate + +impl Compression { + pub fn new() -> Self { + Compression { + com_context: ThreadLocal::new(), + de_context: ThreadLocal::new(), + dict: vec![], + } + } + pub fn with_dict(dict: Vec<u8>) -> Self { + Compression { + com_context: ThreadLocal::new(), + de_context: ThreadLocal::new(), + dict, + } + } + + pub fn compress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>( + &self, + source: &[u8], + destination: &mut C, + level: i32, + ) -> Result<usize, &'static str> { + self.com_context + .get_or(|| RefCell::new(zstd_safe::create_cctx())) + .borrow_mut() + .compress_using_dict(destination, source, &self.dict[..], level) + .map_err(zstd_safe::get_error_name) + } + + pub fn compress(&self, data: &[u8], level: i32) -> Result<Vec<u8>, &'static str> { + let buffer_len = zstd_safe::compress_bound(data.len()); + let mut buffer = Vec::with_capacity(buffer_len); + + self.compress_to_buffer(data, &mut buffer, level)?; + + Ok(buffer) + } + + pub fn decompress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>( + &self, + source: &[u8], + destination: &mut C, + ) -> Result<usize, &'static str> { + self.de_context + .get_or(|| RefCell::new(zstd_safe::create_dctx())) + .borrow_mut() + .decompress_using_dict(destination, source, &self.dict) + .map_err(zstd_safe::get_error_name) + } +} diff --git a/pingora-header-serde/src/trainer.rs b/pingora-header-serde/src/trainer.rs new file mode 100644 index 0000000..36308e5 --- /dev/null +++ b/pingora-header-serde/src/trainer.rs @@ -0,0 +1,23 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use pingora_header_serde::dict::train; +use std::env; +use std::io::{self, Write}; + +pub fn main() { + let args: Vec<String> = env::args().collect(); + let dict = train(&args[1]); + io::stdout().write_all(&dict).unwrap(); +} |