aboutsummaryrefslogtreecommitdiffhomepage
path: root/modules/caddyhttp/encode/encode.go
blob: bea86083a6704701c07b9bd46f2da7902981b442 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
// Copyright 2015 Matthew Holt and The Caddy Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package encode implements an encoder middleware for Caddy. The initial
// enhancements related to Accept-Encoding, minimum content length, and
// buffer/writer pools were adapted from https://github.com/xi2/httpgzip
// then modified heavily to accommodate modular encoders and fix bugs.
// Code borrowed from that repository is Copyright (c) 2015 The Httpgzip Authors.
package encode

import (
	"fmt"
	"io"
	"math"
	"net/http"
	"slices"
	"sort"
	"strconv"
	"strings"
	"sync"

	"github.com/caddyserver/caddy/v2"
	"github.com/caddyserver/caddy/v2/modules/caddyhttp"
)

func init() {
	caddy.RegisterModule(Encode{})
}

// Encode is a middleware which can encode responses.
type Encode struct {
	// Selection of compression algorithms to choose from. The best one
	// will be chosen based on the client's Accept-Encoding header.
	EncodingsRaw caddy.ModuleMap `json:"encodings,omitempty" caddy:"namespace=http.encoders"`

	// If the client has no strong preference, choose these encodings in order.
	Prefer []string `json:"prefer,omitempty"`

	// Only encode responses that are at least this many bytes long.
	MinLength int `json:"minimum_length,omitempty"`

	// Only encode responses that match against this ResponseMmatcher.
	// The default is a collection of text-based Content-Type headers.
	Matcher *caddyhttp.ResponseMatcher `json:"match,omitempty"`

	writerPools map[string]*sync.Pool // TODO: these pools do not get reused through config reloads...
}

// CaddyModule returns the Caddy module information.
func (Encode) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.handlers.encode",
		New: func() caddy.Module { return new(Encode) },
	}
}

// Provision provisions enc.
func (enc *Encode) Provision(ctx caddy.Context) error {
	mods, err := ctx.LoadModule(enc, "EncodingsRaw")
	if err != nil {
		return fmt.Errorf("loading encoder modules: %v", err)
	}
	for modName, modIface := range mods.(map[string]any) {
		err = enc.addEncoding(modIface.(Encoding))
		if err != nil {
			return fmt.Errorf("adding encoding %s: %v", modName, err)
		}
	}
	if enc.MinLength == 0 {
		enc.MinLength = defaultMinLength
	}

	if enc.Matcher == nil {
		// common text-based content types
		// list based on https://developers.cloudflare.com/speed/optimization/content/brotli/content-compression/#compression-between-cloudflare-and-website-visitors
		enc.Matcher = &caddyhttp.ResponseMatcher{
			Headers: http.Header{
				"Content-Type": []string{
					"application/atom+xml*",
					"application/eot*",
					"application/font*",
					"application/geo+json*",
					"application/graphql+json*",
					"application/javascript*",
					"application/json*",
					"application/ld+json*",
					"application/manifest+json*",
					"application/opentype*",
					"application/otf*",
					"application/rss+xml*",
					"application/truetype*",
					"application/ttf*",
					"application/vnd.api+json*",
					"application/vnd.ms-fontobject*",
					"application/wasm*",
					"application/x-httpd-cgi*",
					"application/x-javascript*",
					"application/x-opentype*",
					"application/x-otf*",
					"application/x-perl*",
					"application/x-protobuf*",
					"application/x-ttf*",
					"application/xhtml+xml*",
					"application/xml*",
					"font/ttf*",
					"font/otf*",
					"image/svg+xml*",
					"image/vnd.microsoft.icon*",
					"image/x-icon*",
					"multipart/bag*",
					"multipart/mixed*",
					"text/*",
				},
			},
		}
	}

	return nil
}

// Validate ensures that enc's configuration is valid.
func (enc *Encode) Validate() error {
	check := make(map[string]bool)
	for _, encName := range enc.Prefer {
		if _, ok := enc.writerPools[encName]; !ok {
			return fmt.Errorf("encoding %s not enabled", encName)
		}

		if _, ok := check[encName]; ok {
			return fmt.Errorf("encoding %s is duplicated in prefer", encName)
		}
		check[encName] = true
	}

	return nil
}

func isEncodeAllowed(h http.Header) bool {
	return !strings.Contains(h.Get("Cache-Control"), "no-transform")
}

func (enc *Encode) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
	if isEncodeAllowed(r.Header) {
		for _, encName := range AcceptedEncodings(r, enc.Prefer) {
			if _, ok := enc.writerPools[encName]; !ok {
				continue // encoding not offered
			}
			w = enc.openResponseWriter(encName, w, r.Method == http.MethodConnect)
			defer w.(*responseWriter).Close()

			// to comply with RFC 9110 section 8.8.3(.3), we modify the Etag when encoding
			// by appending a hyphen and the encoder name; the problem is, the client will
			// send back that Etag in a If-None-Match header, but upstream handlers that set
			// the Etag in the first place don't know that we appended to their Etag! so here
			// we have to strip our addition so the upstream handlers can still honor client
			// caches without knowing about our changes...
			if etag := r.Header.Get("If-None-Match"); etag != "" && !strings.HasPrefix(etag, "W/") {
				ourSuffix := "-" + encName + `"`
				if strings.HasSuffix(etag, ourSuffix) {
					etag = strings.TrimSuffix(etag, ourSuffix) + `"`
					r.Header.Set("If-None-Match", etag)
				}
			}

			break
		}
	}
	return next.ServeHTTP(w, r)
}

func (enc *Encode) addEncoding(e Encoding) error {
	ae := e.AcceptEncoding()
	if ae == "" {
		return fmt.Errorf("encoder does not specify an Accept-Encoding value")
	}
	if _, ok := enc.writerPools[ae]; ok {
		return fmt.Errorf("encoder already added: %s", ae)
	}
	if enc.writerPools == nil {
		enc.writerPools = make(map[string]*sync.Pool)
	}
	enc.writerPools[ae] = &sync.Pool{
		New: func() any {
			return e.NewEncoder()
		},
	}
	return nil
}

// openResponseWriter creates a new response writer that may (or may not)
// encode the response with encodingName. The returned response writer MUST
// be closed after the handler completes.
func (enc *Encode) openResponseWriter(encodingName string, w http.ResponseWriter, isConnect bool) *responseWriter {
	var rw responseWriter
	return enc.initResponseWriter(&rw, encodingName, w, isConnect)
}

// initResponseWriter initializes the responseWriter instance
// allocated in openResponseWriter, enabling mid-stack inlining.
func (enc *Encode) initResponseWriter(rw *responseWriter, encodingName string, wrappedRW http.ResponseWriter, isConnect bool) *responseWriter {
	if rww, ok := wrappedRW.(*caddyhttp.ResponseWriterWrapper); ok {
		rw.ResponseWriter = rww
	} else {
		rw.ResponseWriter = &caddyhttp.ResponseWriterWrapper{ResponseWriter: wrappedRW}
	}
	rw.encodingName = encodingName
	rw.config = enc
	rw.isConnect = isConnect

	return rw
}

// responseWriter writes to an underlying response writer
// using the encoding represented by encodingName and
// configured by config.
type responseWriter struct {
	http.ResponseWriter
	encodingName string
	w            Encoder
	config       *Encode
	statusCode   int
	wroteHeader  bool
	isConnect    bool
}

// WriteHeader stores the status to write when the time comes
// to actually write the header.
func (rw *responseWriter) WriteHeader(status int) {
	rw.statusCode = status

	// See #5849 and RFC 9110 section 15.4.5 (https://www.rfc-editor.org/rfc/rfc9110.html#section-15.4.5) - 304
	// Not Modified must have certain headers set as if it was a 200 response, and according to the issue
	// we would miss the Vary header in this case when compression was also enabled; note that we set this
	// header in the responseWriter.init() method but that is only called if we are writing a response body
	if status == http.StatusNotModified && !hasVaryValue(rw.Header(), "Accept-Encoding") {
		rw.Header().Add("Vary", "Accept-Encoding")
	}

	// write status immediately if status is 2xx and the request is CONNECT
	// since it means the response is successful.
	// see: https://github.com/caddyserver/caddy/issues/6733#issuecomment-2525058845
	if rw.isConnect && 200 <= status && status <= 299 {
		rw.ResponseWriter.WriteHeader(status)
		rw.wroteHeader = true
	}

	// write status immediately when status code is informational
	// see: https://caddy.community/t/disappear-103-early-hints-response-with-encode-enable-caddy-v2-7-6/23081/5
	if 100 <= status && status <= 199 {
		rw.ResponseWriter.WriteHeader(status)
	}
}

// Match determines, if encoding should be done based on the ResponseMatcher.
func (enc *Encode) Match(rw *responseWriter) bool {
	return enc.Matcher.Match(rw.statusCode, rw.Header())
}

// FlushError is an alternative Flush returning an error. It delays the actual Flush of the underlying
// ResponseWriterWrapper until headers were written.
func (rw *responseWriter) FlushError() error {
	// WriteHeader wasn't called and is a CONNECT request, treat it as a success.
	// otherwise, wait until header is written.
	if rw.isConnect && !rw.wroteHeader && rw.statusCode == 0 {
		rw.WriteHeader(http.StatusOK)
	}

	if !rw.wroteHeader {
		// flushing the underlying ResponseWriter will write header and status code,
		// but we need to delay that until we can determine if we must encode and
		// therefore add the Content-Encoding header; this happens in the first call
		// to rw.Write (see bug in #4314)
		return nil
	}
	// also flushes the encoder, if any
	// see: https://github.com/jjiang-stripe/caddy-slow-gzip
	if rw.w != nil {
		err := rw.w.Flush()
		if err != nil {
			return err
		}
	}
	//nolint:bodyclose
	return http.NewResponseController(rw.ResponseWriter).Flush()
}

// Write writes to the response. If the response qualifies,
// it is encoded using the encoder, which is initialized
// if not done so already.
func (rw *responseWriter) Write(p []byte) (int, error) {
	// ignore zero data writes, probably head request
	if len(p) == 0 {
		return 0, nil
	}

	// WriteHeader wasn't called and is a CONNECT request, treat it as a success.
	// otherwise, determine if the response should be compressed.
	if rw.isConnect && !rw.wroteHeader && rw.statusCode == 0 {
		rw.WriteHeader(http.StatusOK)
	}

	// sniff content-type and determine content-length
	if !rw.wroteHeader && rw.config.MinLength > 0 {
		var gtMinLength bool
		if len(p) > rw.config.MinLength {
			gtMinLength = true
		} else if cl, err := strconv.Atoi(rw.Header().Get("Content-Length")); err == nil && cl > rw.config.MinLength {
			gtMinLength = true
		}

		if gtMinLength {
			if rw.Header().Get("Content-Type") == "" {
				rw.Header().Set("Content-Type", http.DetectContentType(p))
			}
			rw.init()
		}
	}

	// before we write to the response, we need to make
	// sure the header is written exactly once; we do
	// that by checking if a status code has been set,
	// and if so, that means we haven't written the
	// header OR the default status code will be written
	// by the standard library
	if !rw.wroteHeader {
		if rw.statusCode != 0 {
			rw.ResponseWriter.WriteHeader(rw.statusCode)
		}
		rw.wroteHeader = true
	}

	if rw.w != nil {
		return rw.w.Write(p)
	} else {
		return rw.ResponseWriter.Write(p)
	}
}

// used to mask ReadFrom method
type writerOnly struct {
	io.Writer
}

// copied from stdlib
const sniffLen = 512

// ReadFrom will try to use sendfile to copy from the reader to the response writer.
// It's only used if the response writer implements io.ReaderFrom and the data can't be compressed.
// It's based on stdlin http1.1 response writer implementation.
// https://github.com/golang/go/blob/f4e3ec3dbe3b8e04a058d266adf8e048bab563f2/src/net/http/server.go#L586
func (rw *responseWriter) ReadFrom(r io.Reader) (int64, error) {
	rf, ok := rw.ResponseWriter.(io.ReaderFrom)
	// sendfile can't be used anyway
	if !ok {
		// mask ReadFrom to avoid infinite recursion
		return io.Copy(writerOnly{rw}, r)
	}

	var ns int64
	// try to sniff the content type and determine if the response should be compressed
	if !rw.wroteHeader && rw.config.MinLength > 0 {
		var (
			err error
			buf [sniffLen]byte
		)
		// mask ReadFrom to let Write determine if the response should be compressed
		ns, err = io.CopyBuffer(writerOnly{rw}, io.LimitReader(r, sniffLen), buf[:])
		if err != nil || ns < sniffLen {
			return ns, err
		}
	}

	// the response will be compressed, no sendfile support
	if rw.w != nil {
		nr, err := io.Copy(rw.w, r)
		return nr + ns, err
	}
	nr, err := rf.ReadFrom(r)
	return nr + ns, err
}

// Close writes any remaining buffered response and
// deallocates any active resources.
func (rw *responseWriter) Close() error {
	// didn't write, probably head request
	if !rw.wroteHeader {
		cl, err := strconv.Atoi(rw.Header().Get("Content-Length"))
		if err == nil && cl > rw.config.MinLength {
			rw.init()
		}

		// issue #5059, don't write status code if not set explicitly.
		if rw.statusCode != 0 {
			rw.ResponseWriter.WriteHeader(rw.statusCode)
		}
		rw.wroteHeader = true
	}

	var err error
	if rw.w != nil {
		err = rw.w.Close()
		rw.w.Reset(nil)
		rw.config.writerPools[rw.encodingName].Put(rw.w)
		rw.w = nil
	}
	return err
}

// Unwrap returns the underlying ResponseWriter.
func (rw *responseWriter) Unwrap() http.ResponseWriter {
	return rw.ResponseWriter
}

// init should be called before we write a response, if rw.buf has contents.
func (rw *responseWriter) init() {
	hdr := rw.Header()
	if hdr.Get("Content-Encoding") == "" && isEncodeAllowed(hdr) &&
		rw.config.Match(rw) {
		rw.w = rw.config.writerPools[rw.encodingName].Get().(Encoder)
		rw.w.Reset(rw.ResponseWriter)
		hdr.Del("Content-Length") // https://github.com/golang/go/issues/14975
		hdr.Set("Content-Encoding", rw.encodingName)
		if !hasVaryValue(hdr, "Accept-Encoding") {
			hdr.Add("Vary", "Accept-Encoding")
		}
		hdr.Del("Accept-Ranges") // we don't know ranges for dynamically-encoded content

		// strong ETags need to be distinct depending on the encoding ("selected representation")
		// see RFC 9110 section 8.8.3.3:
		// https://www.rfc-editor.org/rfc/rfc9110.html#name-example-entity-tags-varying
		// I don't know a great way to do this... how about appending? That's a neat trick!
		// (We have to strip the value we append from If-None-Match headers before
		// sending subsequent requests back upstream, however, since upstream handlers
		// don't know about our appending to their Etag since they've already done their work)
		if etag := hdr.Get("Etag"); etag != "" && !strings.HasPrefix(etag, "W/") {
			etag = fmt.Sprintf(`%s-%s"`, strings.TrimSuffix(etag, `"`), rw.encodingName)
			hdr.Set("Etag", etag)
		}
	}
}

func hasVaryValue(hdr http.Header, target string) bool {
	for _, vary := range hdr.Values("Vary") {
		vals := strings.Split(vary, ",")
		for _, val := range vals {
			if strings.EqualFold(strings.TrimSpace(val), target) {
				return true
			}
		}
	}
	return false
}

// AcceptedEncodings returns the list of encodings that the
// client supports, in descending order of preference.
// The client preference via q-factor and the server
// preference via Prefer setting are taken into account. If
// the Sec-WebSocket-Key header is present then non-identity
// encodings are not considered. See
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html.
func AcceptedEncodings(r *http.Request, preferredOrder []string) []string {
	acceptEncHeader := r.Header.Get("Accept-Encoding")
	websocketKey := r.Header.Get("Sec-WebSocket-Key")
	if acceptEncHeader == "" {
		return []string{}
	}

	prefs := []encodingPreference{}

	for _, accepted := range strings.Split(acceptEncHeader, ",") {
		parts := strings.Split(accepted, ";")
		encName := strings.ToLower(strings.TrimSpace(parts[0]))

		// determine q-factor
		qFactor := 1.0
		if len(parts) > 1 {
			qFactorStr := strings.ToLower(strings.TrimSpace(parts[1]))
			if strings.HasPrefix(qFactorStr, "q=") {
				if qFactorFloat, err := strconv.ParseFloat(qFactorStr[2:], 32); err == nil {
					if qFactorFloat >= 0 && qFactorFloat <= 1 {
						qFactor = qFactorFloat
					}
				}
			}
		}

		// encodings with q-factor of 0 are not accepted;
		// use a small threshold to account for float precision
		if qFactor < 0.00001 {
			continue
		}

		// don't encode WebSocket handshakes
		if websocketKey != "" && encName != "identity" {
			continue
		}

		// set server preference
		prefOrder := slices.Index(preferredOrder, encName)
		if prefOrder > -1 {
			prefOrder = len(preferredOrder) - prefOrder
		}

		prefs = append(prefs, encodingPreference{
			encoding:    encName,
			q:           qFactor,
			preferOrder: prefOrder,
		})
	}

	// sort preferences by descending q-factor first, then by preferOrder
	sort.Slice(prefs, func(i, j int) bool {
		if math.Abs(prefs[i].q-prefs[j].q) < 0.00001 {
			return prefs[i].preferOrder > prefs[j].preferOrder
		}
		return prefs[i].q > prefs[j].q
	})

	prefEncNames := make([]string, len(prefs))
	for i := range prefs {
		prefEncNames[i] = prefs[i].encoding
	}

	return prefEncNames
}

// encodingPreference pairs an encoding with its q-factor.
type encodingPreference struct {
	encoding    string
	q           float64
	preferOrder int
}

// Encoder is a type which can encode a stream of data.
type Encoder interface {
	io.WriteCloser
	Reset(io.Writer)
	Flush() error // encoder by default buffers data to maximize compressing rate
}

// Encoding is a type which can create encoders of its kind
// and return the name used in the Accept-Encoding header.
type Encoding interface {
	AcceptEncoding() string
	NewEncoder() Encoder
}

// Precompressed is a type which returns filename suffix of precompressed
// file and Accept-Encoding header to use when serving this file.
type Precompressed interface {
	AcceptEncoding() string
	Suffix() string
}

// defaultMinLength is the minimum length at which to compress content.
const defaultMinLength = 512

// Interface guards
var (
	_ caddy.Provisioner           = (*Encode)(nil)
	_ caddy.Validator             = (*Encode)(nil)
	_ caddyhttp.MiddlewareHandler = (*Encode)(nil)
)