From 113b6641ea3e4aca7d2e97e48e4e393461713e7f Mon Sep 17 00:00:00 2001 From: Laura Date: Wed, 13 Aug 2025 21:32:29 +0200 Subject: [PATCH] better sniff and dng --- README.md | 2 +- go.mod | 1 + go.sum | 2 + integration/integration_test.go | 2 + internal/codec/detect.go | 80 ++++++++++---- internal/codec/dng/dng.go | 169 +++++++++++++++++++++++++++++ internal/codec/svg/svg.go | 2 +- internal/codec/tga/tga.go | 85 ++++++++++----- internal/codec/tiff/tiff.go | 36 +++--- internal/codec/{xpm => xbm}/xbm.go | 2 +- internal/codec/{xbm => xpm}/xpm.go | 4 +- 11 files changed, 317 insertions(+), 68 deletions(-) create mode 100644 internal/codec/dng/dng.go rename internal/codec/{xpm => xbm}/xbm.go (98%) rename internal/codec/{xbm => xpm}/xpm.go (96%) diff --git a/README.md b/README.md index 3a25b97..9668f44 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ FFWebP is a small, single-binary CLI for converting images between formats, thin - Single binary: no external tools required - Auto-detects input codec and infers output from the file extension -- Supports AVIF, BMP, Farbfeld, GIF, HEIF/HEIC (decode-only), ICO/CUR, JPEG, JPEG XL, PCX, PNG, PNM (PBM/PGM/PPM/PAM), PSD (decode-only), QOI, SVG (decode-only), TGA, TIFF, WebP, XBM, XCF (decode-only) and XPM +- Supports AVIF, BMP, DNG (decode-only), Farbfeld, GIF, HEIF/HEIC (decode-only), ICO/CUR, JPEG, JPEG XL, PCX, PNG, PNM (PBM/PGM/PPM/PAM), PSD (decode-only), QOI, SVG (decode-only), TGA, TIFF, WebP, XBM, XCF (decode-only) and XPM - Lossy or lossless output with configurable quality - Thumbnail generation via Lanczos3 resampling - Per-codec flags for fine-grained control (see `ffwebp --help`) diff --git a/go.mod b/go.mod index fd4b1e4..f856a64 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/gonutz/xcf v0.0.0-20180404091035-c002b9533d97 github.com/hullerob/go.farbfeld v0.0.0-20181222022525-3661193c725f github.com/kriticalflare/qoi v0.0.0-20240815192827-34f66f23bcef + github.com/mdouchement/dng v0.0.0-20230730131840-4066c9106942 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 github.com/oov/psd v0.0.0-20220121172623-5db5eafcecbb github.com/samuel/go-pcx v0.0.0-20210515040514-6a5ce4d132f7 diff --git a/go.sum b/go.sum index 3f00f2b..3888c46 100644 --- a/go.sum +++ b/go.sum @@ -28,6 +28,8 @@ github.com/hullerob/go.farbfeld v0.0.0-20181222022525-3661193c725f h1:1LkiAnH6Rh github.com/hullerob/go.farbfeld v0.0.0-20181222022525-3661193c725f/go.mod h1:mQEoc766DxPTAwQ54neWTK/lFqIeSO7OU6bqZsceglw= github.com/kriticalflare/qoi v0.0.0-20240815192827-34f66f23bcef h1:XHb/eK43B8XuqAO5jHILCXzZP3pBamGmn5PcGjTZTuE= github.com/kriticalflare/qoi v0.0.0-20240815192827-34f66f23bcef/go.mod h1:skc5Zgfi3XE//1zgGGPC1abynJwsZhFxOiwkCrwL4Z8= +github.com/mdouchement/dng v0.0.0-20230730131840-4066c9106942 h1:UA97jLO4tz9u69BhytirCXKwQhce4FoaUEYj0Sgp/HQ= +github.com/mdouchement/dng v0.0.0-20230730131840-4066c9106942/go.mod h1:pui8xMvtvG4x7qr6cUrto3et8w2np6n9cTLZFpN/ELY= github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ= github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= github.com/oov/psd v0.0.0-20220121172623-5db5eafcecbb h1:JF9kOhBBk4WPF7luXFu5yR+WgaFm9L/KiHJHhU9vDwA= diff --git a/integration/integration_test.go b/integration/integration_test.go index 2ad1729..9810bab 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -15,10 +15,12 @@ import ( var ( executable = "ffwebp" encodeOnly = map[string]bool{ + "dng": true, "heic": true, "heif": true, "psd": true, "svg": true, + "xcf": true, } ) diff --git a/internal/codec/detect.go b/internal/codec/detect.go index 071e6eb..8ca7ca2 100644 --- a/internal/codec/detect.go +++ b/internal/codec/detect.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "path/filepath" + "sort" "strings" ) @@ -30,17 +31,15 @@ func (s *Sniffed) String() string { } func Sniff(reader io.Reader, input string, ignoreExtension bool) (*Sniffed, io.Reader, error) { + var ( + hintedExt string + hintedCodec Codec + ) + if !ignoreExtension { - ext := strings.ToLower(strings.TrimPrefix(filepath.Ext(input), ".")) - if ext != "" { - codec, _ := FindCodec(ext, false) - if codec != nil { - return &Sniffed{ - Header: []byte("." + ext), - Confidence: 100, - Codec: codec, - }, reader, nil - } + hintedExt = strings.ToLower(strings.TrimPrefix(filepath.Ext(input), ".")) + if hintedExt != "" { + hintedCodec, _ = FindCodec(hintedExt, false) } } @@ -51,10 +50,15 @@ func Sniff(reader io.Reader, input string, ignoreExtension bool) (*Sniffed, io.R ra := bytes.NewReader(buf) + type candidate struct { + codec Codec + confidence int + header []byte + } + var ( - best int - magic []byte - guess Codec + best int + list []candidate ) for _, codec := range codecs { @@ -69,21 +73,59 @@ func Sniff(reader io.Reader, input string, ignoreExtension bool) (*Sniffed, io.R return nil, nil, err } + fmt.Println(codec.String(), confidence) + + if confidence <= 0 { + continue + } + + list = append(list, candidate{ + codec: codec, + confidence: confidence, + header: header, + }) + if confidence > best { best = confidence - magic = header - guess = codec } } - if guess == nil { + if len(list) == 0 || best <= 0 { return nil, nil, errors.New("unknown input format") } + var top []candidate + + for _, cand := range list { + if cand.confidence == best { + top = append(top, cand) + } + } + + if hintedCodec != nil { + for _, cand := range top { + if cand.codec != hintedCodec { + continue + } + + return &Sniffed{ + Header: cand.header, + Confidence: cand.confidence, + Codec: cand.codec, + }, bytes.NewReader(buf), nil + } + } + + sort.Slice(top, func(i, j int) bool { + return top[i].codec.String() < top[j].codec.String() + }) + + chosen := top[0] + return &Sniffed{ - Header: magic, - Confidence: best, - Codec: guess, + Header: chosen.header, + Confidence: chosen.confidence, + Codec: chosen.codec, }, bytes.NewReader(buf), nil } diff --git a/internal/codec/dng/dng.go b/internal/codec/dng/dng.go new file mode 100644 index 0000000..f4cd63e --- /dev/null +++ b/internal/codec/dng/dng.go @@ -0,0 +1,169 @@ +package dng + +import ( + "bytes" + "encoding/binary" + "errors" + "image" + "io" + + "github.com/coalaura/ffwebp/internal/codec" + "github.com/coalaura/ffwebp/internal/opts" + "github.com/urfave/cli/v3" + + // pure-go DNG preview extractor + "github.com/mdouchement/dng" +) + +func init() { + codec.Register(impl{}) +} + +type impl struct{} + +func (impl) String() string { + return "dng" +} + +func (impl) Extensions() []string { + return []string{"dng"} +} + +func (impl) CanEncode() bool { + return false +} + +func (impl) Flags(flags []cli.Flag) []cli.Flag { + return flags +} + +func (impl) Sniff(reader io.ReaderAt) (int, []byte, error) { + header := make([]byte, 16) + if _, err := reader.ReadAt(header, 0); err != nil && err != io.EOF { + return 0, nil, err + } + + if len(header) < 8 { + return 0, nil, nil + } + + isLE := bytes.Equal(header[0:2], []byte{'I', 'I'}) + isBE := bytes.Equal(header[0:2], []byte{'M', 'M'}) + + if !isLE && !isBE { + return 0, nil, nil + } + + var ord binary.ByteOrder + + if isLE { + ord = binary.LittleEndian + } else { + ord = binary.BigEndian + } + + sig := ord.Uint16(header[2:4]) + + const ( + tiffClassic = 42 + tiffBig = 43 + tagDNGVersion = 0xC612 + ) + + switch sig { + case tiffClassic: + ifd0, err := readU32(reader, ord, 4) + if err != nil || ifd0 == 0 { + return 0, nil, nil + } + + n, err := readU16(reader, ord, int64(ifd0)) + if err != nil { + return 0, nil, nil + } + + for i := 0; i < int(n); i++ { + off := int64(ifd0) + 2 + int64(i)*12 + + tag, err := readU16(reader, ord, off) + if err != nil { + return 0, nil, nil + } + + if uint16(tag) == uint16(tagDNGVersion) { + return 110, header[:8], nil + } + } + case tiffBig: + ifd0, err := readU64(reader, ord, 8) + if err != nil || ifd0 == 0 { + return 0, nil, nil + } + + var bcnt [8]byte + + if _, err := reader.ReadAt(bcnt[:], int64(ifd0)); err != nil { + return 0, nil, nil + } + + n := ord.Uint64(bcnt[:]) + max := n + + if max > 1024 { + max = 1024 + } + + for i := uint64(0); i < max; i++ { + off := int64(ifd0) + 8 + int64(i)*20 + + tag, err := readU16(reader, ord, off) + if err != nil { + return 0, nil, nil + } + + if uint16(tag) == uint16(tagDNGVersion) { + return 110, header[:8], nil + } + } + } + + return 0, nil, nil +} + +func (impl) Decode(r io.Reader) (image.Image, error) { + return dng.Decode(r) +} + +func (impl) Encode(w io.Writer, img image.Image, _ opts.Common) error { + return errors.New("dng: encode not supported") +} + +func readU16(reader io.ReaderAt, ord binary.ByteOrder, off int64) (uint16, error) { + var b [2]byte + + if _, err := reader.ReadAt(b[:], off); err != nil { + return 0, err + } + + return ord.Uint16(b[:]), nil +} + +func readU32(reader io.ReaderAt, ord binary.ByteOrder, off int64) (uint32, error) { + var b [4]byte + + if _, err := reader.ReadAt(b[:], off); err != nil { + return 0, err + } + + return ord.Uint32(b[:]), nil +} + +func readU64(reader io.ReaderAt, ord binary.ByteOrder, off int64) (uint64, error) { + var b [8]byte + + if _, err := reader.ReadAt(b[:], off); err != nil { + return 0, err + } + + return ord.Uint64(b[:]), nil +} diff --git a/internal/codec/svg/svg.go b/internal/codec/svg/svg.go index 2c8784a..c8a3e00 100644 --- a/internal/codec/svg/svg.go +++ b/internal/codec/svg/svg.go @@ -64,7 +64,7 @@ func (impl) Flags(flags []cli.Flag) []cli.Flag { } func (impl) Sniff(reader io.ReaderAt) (int, []byte, error) { - buf := make([]byte, 128) + buf := make([]byte, 256) n, err := reader.ReadAt(buf, 0) if err != nil && err != io.EOF { diff --git a/internal/codec/tga/tga.go b/internal/codec/tga/tga.go index 781a01a..a06364f 100644 --- a/internal/codec/tga/tga.go +++ b/internal/codec/tga/tga.go @@ -1,14 +1,15 @@ package tga import ( - "image" - "io" + "encoding/binary" + "image" + "io" - "github.com/ftrvxmtrx/tga" + "github.com/ftrvxmtrx/tga" - "github.com/coalaura/ffwebp/internal/codec" - "github.com/coalaura/ffwebp/internal/opts" - "github.com/urfave/cli/v3" + "github.com/coalaura/ffwebp/internal/codec" + "github.com/coalaura/ffwebp/internal/opts" + "github.com/urfave/cli/v3" ) func init() { @@ -34,38 +35,62 @@ func (impl) Flags(flags []cli.Flag) []cli.Flag { } func (impl) Sniff(reader io.ReaderAt) (int, []byte, error) { - buf := make([]byte, 3) + // Validate full 18-byte TGA header to reduce false positives. + // Ref: https://www.fileformat.info/format/tga/egff.htm + hdr := make([]byte, 18) + if _, err := reader.ReadAt(hdr, 0); err != nil && err != io.EOF { + return 0, nil, err + } + if len(hdr) < 18 { + return 0, nil, nil + } - if _, err := reader.ReadAt(buf, 0); err != nil { - return 0, nil, err - } + idLength := hdr[0] + colorMapType := hdr[1] + imageType := hdr[2] - colorMapType := buf[1] + if colorMapType > 1 { + return 0, nil, nil + } - if colorMapType > 1 { - return 0, nil, nil - } + switch imageType { + case 1, 2, 3, 9, 10, 11: + // valid image types + default: + // Exclude type 0 (no image data) to avoid matching random files like ISO BMFF. + return 0, nil, nil + } - validImageTypes := map[byte]bool{ - 0: true, // no image data - 1: true, // colormapped, uncompressed - 2: true, // truecolor, uncompressed - 3: true, // grayscale, uncompressed - 9: true, // colormapped, RLE - 10: true, // truecolor, RLE - 11: true, // grayscale, RLE - } + // Width/height must be > 0 + width := binary.LittleEndian.Uint16(hdr[12:14]) + height := binary.LittleEndian.Uint16(hdr[14:16]) + if width == 0 || height == 0 { + return 0, nil, nil + } - imageType := buf[2] + // Pixel depth must be one of common values + bpp := hdr[16] + switch bpp { + case 8, 15, 16, 24, 32: + // ok + default: + return 0, nil, nil + } - if !validImageTypes[imageType] { - return 0, nil, nil - } + // If color map is present, validate that the length is non-zero + if colorMapType == 1 { + colorMapLength := binary.LittleEndian.Uint16(hdr[5:7]) + if colorMapLength == 0 { + return 0, nil, nil + } + } - header := make([]byte, 3) - copy(header, buf) + // Basic sanity: idLength must not push us past file start (not strictly necessary for sniff) + _ = idLength - return 100, header, nil + header := make([]byte, 18) + copy(header, hdr) + return 100, header, nil } func (impl) Decode(reader io.Reader) (image.Image, error) { diff --git a/internal/codec/tiff/tiff.go b/internal/codec/tiff/tiff.go index c12e034..b951715 100644 --- a/internal/codec/tiff/tiff.go +++ b/internal/codec/tiff/tiff.go @@ -62,24 +62,32 @@ func (impl) Flags(flags []cli.Flag) []cli.Flag { } func (impl) Sniff(reader io.ReaderAt) (int, []byte, error) { - magicLE := []byte{0x49, 0x49, 0x2A, 0x00} - magicBE := []byte{0x4D, 0x4D, 0x00, 0x2A} + // Recognize classic TIFF (II*\0 or MM\0*) and BigTIFF (signature 43) + buf := make([]byte, 16) + if _, err := reader.ReadAt(buf, 0); err != nil && err != io.EOF { + return 0, nil, err + } - buf := make([]byte, 4) + if len(buf) < 8 { + return 0, nil, nil + } - if _, err := reader.ReadAt(buf, 0); err != nil { - return 0, nil, err - } + if bytes.Equal(buf[0:4], []byte{0x49, 0x49, 0x2A, 0x00}) || + bytes.Equal(buf[0:4], []byte{0x4D, 0x4D, 0x00, 0x2A}) { + return 100, buf[:8], nil + } - if bytes.Equal(buf, magicLE) { - return 100, magicLE, nil - } + // BigTIFF: byte order + 43 marker, bytesize=8 + isLE := bytes.Equal(buf[0:2], []byte{'I', 'I'}) + isBE := bytes.Equal(buf[0:2], []byte{'M', 'M'}) + if isLE || isBE { + if (isLE && buf[2] == 0x2B && buf[3] == 0x00 && buf[4] == 0x08 && buf[5] == 0x00) || + (isBE && buf[2] == 0x00 && buf[3] == 0x2B && buf[4] == 0x00 && buf[5] == 0x08) { + return 100, buf[:8], nil + } + } - if bytes.Equal(buf, magicBE) { - return 100, magicBE, nil - } - - return 0, nil, nil + return 0, nil, nil } func (impl) Decode(reader io.Reader) (image.Image, error) { diff --git a/internal/codec/xpm/xbm.go b/internal/codec/xbm/xbm.go similarity index 98% rename from internal/codec/xpm/xbm.go rename to internal/codec/xbm/xbm.go index cd7f75b..47c5d29 100644 --- a/internal/codec/xpm/xbm.go +++ b/internal/codec/xbm/xbm.go @@ -54,7 +54,7 @@ func (impl) Sniff(reader io.ReaderAt) (int, []byte, error) { buf = buf[:n] if bytes.Contains(buf, []byte("#define")) && bytes.Contains(buf, []byte("bits[]")) { - return 90, buf, nil + return 80, buf, nil } return 0, nil, nil diff --git a/internal/codec/xbm/xpm.go b/internal/codec/xpm/xpm.go similarity index 96% rename from internal/codec/xbm/xpm.go rename to internal/codec/xpm/xpm.go index 17ff5ee..aa9b3dd 100644 --- a/internal/codec/xbm/xpm.go +++ b/internal/codec/xpm/xpm.go @@ -53,8 +53,8 @@ func (impl) Sniff(reader io.ReaderAt) (int, []byte, error) { buf = buf[:n] - if bytes.Contains(buf, []byte("/* XPM */")) && bytes.Contains(buf, []byte("bits[]")) { - return 90, buf, nil + if bytes.Contains(buf, []byte("/* XPM */")) && bytes.Contains(buf, []byte("[] = {")) { + return 80, buf, nil } return 0, nil, nil