1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
// Copyright 2024 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Training to generate the zstd dictionary.
use std::fs;
use zstd::dict;
/// Train the zstd dictionary from all the files under the given `dir_path`
///
/// The output will be the trained dictionary
pub fn train<P: AsRef<std::path::Path>>(dir_path: P) -> Vec<u8> {
// TODO: check f is file, it can be dir
let files = fs::read_dir(dir_path)
.unwrap()
.filter_map(|entry| entry.ok().map(|f| f.path()));
dict::from_files(files, 64 * 1024 * 1024).unwrap()
}
#[cfg(test)]
mod test {
use super::*;
use crate::resp_header_to_buf;
use pingora_http::ResponseHeader;
fn gen_test_dict() -> Vec<u8> {
let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("samples/test");
train(path)
}
fn gen_test_header() -> ResponseHeader {
let mut header = ResponseHeader::build(200, None).unwrap();
header
.append_header("Date", "Thu, 23 Dec 2021 11:23:29 GMT")
.unwrap();
header
.append_header("Last-Modified", "Sat, 09 Oct 2021 22:41:34 GMT")
.unwrap();
header.append_header("Connection", "keep-alive").unwrap();
header.append_header("Vary", "Accept-encoding").unwrap();
header.append_header("Content-Encoding", "gzip").unwrap();
header
.append_header("Access-Control-Allow-Origin", "*")
.unwrap();
header
}
#[test]
fn test_ser_with_dict() {
let dict = gen_test_dict();
let serde = crate::HeaderSerde::new(Some(dict));
let serde_no_dict = crate::HeaderSerde::new(None);
let header = gen_test_header();
let compressed = serde.serialize(&header).unwrap();
let compressed_no_dict = serde_no_dict.serialize(&header).unwrap();
let mut buf = vec![];
let uncompressed = resp_header_to_buf(&header, &mut buf);
assert!(compressed.len() < uncompressed);
assert!(compressed.len() < compressed_no_dict.len());
}
#[test]
fn test_deserialize_with_dict() {
let dict = gen_test_dict();
let serde = crate::HeaderSerde::new(Some(dict));
let serde_no_dict = crate::HeaderSerde::new(None);
let header = gen_test_header();
let compressed = serde.serialize(&header).unwrap();
let compressed_no_dict = serde_no_dict.serialize(&header).unwrap();
let from_dict_header = serde.deserialize(&compressed).unwrap();
let from_no_dict_header = serde_no_dict.deserialize(&compressed_no_dict).unwrap();
assert_eq!(from_dict_header.status, from_no_dict_header.status);
assert_eq!(from_dict_header.headers, from_no_dict_header.headers);
}
#[test]
fn test_ser_de_with_dict() {
let dict = gen_test_dict();
let serde = crate::HeaderSerde::new(Some(dict));
let header = gen_test_header();
let compressed = serde.serialize(&header).unwrap();
let header2 = serde.deserialize(&compressed).unwrap();
assert_eq!(header.status, header2.status);
assert_eq!(header.headers, header2.headers);
}
}
|