80 Commits
rle ... main

Author SHA1 Message Date
f90c16e34b Implement CPM support 2024-01-10 16:23:09 -05:00
b6046e67dd Implement CPM support 2024-01-10 16:13:30 -05:00
df2a649cd3 Implement CPM support 2024-01-10 16:01:47 -05:00
3475cb379c Implement CPM support 2024-01-10 15:50:34 -05:00
9f2ddd5f76 Implement CPM support 2024-01-10 15:43:15 -05:00
1148e5c916 Update CMakeLists.txt 2024-01-10 15:25:22 -05:00
Rich Geldreich
7298d34590 Update README.md 2023-12-05 01:27:30 -05:00
Rich Geldreich
163d5714a0 Update README.md 2023-04-20 16:48:44 -04:00
Rich Geldreich
1039f5aa9c Update README.md 2023-04-20 16:47:54 -04:00
Rich Geldreich
b49d1d4d5a Update README.md 2023-04-20 16:46:09 -04:00
Rich Geldreich
da6991f598 Update README.md 2023-04-20 16:45:04 -04:00
Rich Geldreich
970d9290eb Update README.md 2023-04-20 16:44:26 -04:00
Richard Geldreich
6ed6544cc9 Changing the # of times to encode to 3, instead of 1, when CSV files aren't being generated. 2023-04-20 16:40:25 -04:00
Richard Geldreich
a77506cd2f Upgrading lodepng, QOI, and stb_image.h/stb_image_write.h, and adding the pvpng reader from basisu for benchmarking/comparison purposes 2023-04-20 16:21:37 -04:00
Richard Geldreich
a2b4e1bdf6 new files 2023-04-20 16:20:48 -04:00
Rich Geldreich
357d3a6b73 Update README.md 2023-03-30 20:01:28 -04:00
Rich Geldreich
6926f5a0a7 Update README.md 2022-01-11 16:01:37 -05:00
Rich Geldreich
75c8f930ef Update README.md 2022-01-10 14:56:56 -05:00
Rich Geldreich
a05746ac46 Update README.md 2022-01-10 14:55:02 -05:00
Richard Geldreich
e3834907b9 Merge remote-tracking branch 'origin/main' 2022-01-10 14:54:17 -05:00
Richard Geldreich
645d49cf6b Disabling Wuff's CRC checking
Minor printf() fixes
2022-01-10 14:53:52 -05:00
Rich Geldreich
85a734e9f2 Update README.md 2022-01-10 14:48:40 -05:00
Rich Geldreich
6fea592540 Update README.md 2022-01-05 19:39:26 -05:00
Rich Geldreich
32d7f3a388 Update README.md 2022-01-05 17:53:36 -05:00
Rich Geldreich
01296df391 Bumping version to 1.0.6 2022-01-05 17:14:20 -05:00
Rich Geldreich
a80ccdd937 New file 2022-01-05 17:14:20 -05:00
Rich Geldreich
c8ea38f3ee Update README.md 2022-01-05 17:11:43 -05:00
Rich Geldreich
3bd9c4dbda Adding workaround to the encoder to work around a bug in wuff's distance table decoder
Adding -t Huffman table training option to test harness - set FPNG_TRAIN_HUFFMAN_TABLES to 1 to use it
Re-trained the single pass mode's Huffman tables
2022-01-05 17:08:49 -05:00
Rich Geldreich
42303b97e2 Update README.md 2022-01-05 08:34:25 -05:00
Rich Geldreich
f813c4dfdb fixing cpuid code so it's only compiled on x86 2022-01-04 16:59:30 -05:00
Rich Geldreich
49f3505062 Update README.md 2022-01-02 13:18:01 -05:00
Rich Geldreich
1c9d03942c Fixing comment 2021-12-31 19:22:33 -05:00
Rich Geldreich
66b531956b Update README.md 2021-12-31 18:54:48 -05:00
Rich Geldreich
c471ebef9d Changing SSE adler32 to iterate 16 bytes at a time vs. 8.
Changing adler32 function types to match the crc32 function.
2021-12-31 18:52:55 -05:00
Rich Geldreich
c83e17b38a Adding SSE to the filter code, for another 10-15% compression perf gain 2021-12-31 18:22:36 -05:00
Rich Geldreich
3a3f22f968 fixing comment 2021-12-31 16:46:09 -05:00
Rich Geldreich
583b75c986 Fixing check here so it uses a uint64_t multiply 2021-12-31 16:41:33 -05:00
Rich Geldreich
b864f3324f Fixing typo in remark 2021-12-31 14:12:32 -05:00
Rich Geldreich
6bd0b8eef1 Ensuring temp buffer is always a little larger than it's needed, because the compresssors read 24bpp pixels as DWORD's 2021-12-31 07:24:23 -05:00
Rich Geldreich
53baa00cd9 Update README.md 2021-12-31 05:57:24 -05:00
Rich Geldreich
97fd32be9b Update README.md 2021-12-31 05:56:05 -05:00
Rich Geldreich
ec5d9b9bdb Update README.md 2021-12-31 05:55:14 -05:00
Rich Geldreich
4dae96d787 Update README.md 2021-12-30 19:40:18 -05:00
Rich Geldreich
0411071b71 Update README.md 2021-12-30 19:39:57 -05:00
Rich Geldreich
bfe5f9c69e Update README.md 2021-12-30 19:36:04 -05:00
Rich Geldreich
eb114d1ec4 Update README.md 2021-12-30 19:35:39 -05:00
Rich Geldreich
35ea4a1d0a Update README.md 2021-12-30 14:46:21 -05:00
Rich Geldreich
e73c9e85be Update README.md 2021-12-30 14:45:19 -05:00
Rich Geldreich
f365a60261 Update README.md 2021-12-30 14:44:18 -05:00
Rich Geldreich
cf3628e1cb Update README.md 2021-12-30 14:42:33 -05:00
Rich Geldreich
e38fec84ae Update README.md 2021-12-30 14:42:09 -05:00
Rich Geldreich
6f2c18c87c Update README.md 2021-12-30 14:41:47 -05:00
Rich Geldreich
510ee73d50 Removing temp variable from SSE CRC32 2021-12-30 14:12:48 -05:00
Rich Geldreich
e41442adba Update README.md 2021-12-30 13:30:59 -05:00
Rich Geldreich
1d9dd78754 Update README.md 2021-12-30 13:30:07 -05:00
Rich Geldreich
2b6fe20287 Update README.md 2021-12-30 13:28:42 -05:00
Rich Geldreich
fb92ce3441 Update README.md 2021-12-30 13:28:33 -05:00
Rich Geldreich
638515db72 Update README.md 2021-12-30 13:28:17 -05:00
Rich Geldreich
f5092dc912 Update README.md 2021-12-30 13:27:53 -05:00
Rich Geldreich
f2bfbe346b Update README.md 2021-12-30 13:27:32 -05:00
Rich Geldreich
08510bda69 Update README.md 2021-12-30 13:26:39 -05:00
Rich Geldreich
19ce397228 Update README.md 2021-12-30 13:26:18 -05:00
Rich Geldreich
1d95fa5283 Update README.md 2021-12-30 13:25:53 -05:00
Rich Geldreich
3d52195ee4 Update README.md 2021-12-30 13:25:06 -05:00
Rich Geldreich
dc5b3c8d97 Update README.md 2021-12-30 13:24:35 -05:00
Rich Geldreich
5a46a8d0fd Update README.md 2021-12-30 13:21:58 -05:00
Rich Geldreich
8de2ebaffa Update README.md 2021-12-30 13:21:03 -05:00
Rich Geldreich
c3266d15f6 Update README.md 2021-12-30 13:19:59 -05:00
Rich Geldreich
19f7736d9e Update README.md 2021-12-30 13:18:58 -05:00
Rich Geldreich
a046613061 Update README.md 2021-12-30 13:18:29 -05:00
Rich Geldreich
145ae411f7 Update README.md 2021-12-30 13:17:57 -05:00
Rich Geldreich
042482f62f Update README.md 2021-12-30 13:17:01 -05:00
Rich Geldreich
ab871b3b27 Update README.md 2021-12-30 13:16:12 -05:00
Rich Geldreich
6a9f1873b9 Update README.md 2021-12-30 13:15:33 -05:00
Rich Geldreich
1d8221cfcf Update README.md 2021-12-30 13:14:28 -05:00
Rich Geldreich
f95df78874 Adding SSE4.1 optimized CRC32 and Adler32 - 15% faster for 24bpp with MSVC 2022
Switching back to unlicense
Exposing some utility API's
Adding "SSE" option to CMakeLists.txt file
2021-12-30 12:57:21 -05:00
Rich Geldreich
6326cc844e Adding unaligned load/store macros, defaulting to unaligned on non-x86/x64, adding runtime endian_check() function to detect misconfiguration. 2021-12-28 04:24:28 -05:00
Rich Geldreich
c72b410573 big endian fix 2021-12-28 03:31:26 -05:00
Rich Geldreich
36686c0195 Inner loop simplifications, one minor bugfix to code which checks if a match would be larger than 4 literals 2021-12-27 16:46:16 -05:00
Rich Geldreich
3cb9b931c2 OSX fixes, some compile warning fixes 2021-12-27 15:53:44 -05:00
17 changed files with 51520 additions and 2012 deletions

View File

@@ -1,72 +1,99 @@
project(fpng_test)
cmake_minimum_required(VERSION 3.0)
option(BUILD_X64 "build 64-bit" TRUE)
message("Initial BUILD_X64=${BUILD_X64}")
if( NOT CMAKE_BUILD_TYPE )
set( CMAKE_BUILD_TYPE Release )
endif()
message( ${PROJECT_NAME} " build type: " ${CMAKE_BUILD_TYPE} )
if (BUILD_X64)
message("Building 64-bit")
else()
message("Building 32-bit")
endif()
if (NOT MSVC)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_CXX_FLAGS -std=c++11)
set(GCC_COMPILE_FLAGS "-fvisibility=hidden -fPIC -fno-strict-aliasing -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -Wall -Wextra -Isrc")
if (NOT BUILD_X64)
set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -m32")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -Wl,-rpath .")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
set(FPNG_SRC_LIST ${COMMON_SRC_LIST}
src/fpng.cpp
src/Crc32.cpp
src/fpng_test.cpp
src/lodepng.cpp
)
if (APPLE)
set(BIN_DIRECTORY "bin_osx")
else()
set(BIN_DIRECTORY "bin")
endif()
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${BIN_DIRECTORY})
add_executable(fpng_test ${FPNG_SRC_LIST})
if (NOT MSVC)
target_link_libraries(fpng_test m pthread)
endif()
install(TARGETS fpng_test DESTINATION bin)
project(fpng)
cmake_minimum_required(VERSION 3.0)
option(BUILD_X64 "build 64-bit" TRUE)
option(SSE "SSE 4.1 support" FALSE)
message("Initial BUILD_X64=${BUILD_X64}")
message("Initial SSE=${SSE}")
if( NOT CMAKE_BUILD_TYPE )
set( CMAKE_BUILD_TYPE Release )
endif()
message( ${PROJECT_NAME} " build type: " ${CMAKE_BUILD_TYPE} )
if (BUILD_X64)
message("Building 64-bit")
else()
message("Building 32-bit")
endif()
if (SSE)
message("SSE enabled")
else()
message("SSE disabled")
endif()
if (NOT MSVC)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_CXX_FLAGS -std=c++11)
set(GCC_COMPILE_FLAGS "-fvisibility=hidden -fPIC -fno-strict-aliasing -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -Wall -Wextra -Isrc")
if (NOT BUILD_X64)
set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -m32")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -Wl,-rpath .")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG")
if (SSE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DFPNG_NO_SSE=0 -msse4.1 -mpclmul")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFPNG_NO_SSE=0 -msse4.1 -mpclmul")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DFPNG_NO_SSE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFPNG_NO_SSE=1")
endif()
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
set(FPNG_SRC_LIST ${COMMON_SRC_LIST}
src/fpng.cpp
src/lodepng.cpp
src/pvpngreader.cpp
)
file(GLOB_RECURSE fpng_HEADERS "src/*.h")
add_library(fpng SHARED ${FPNG_SRC_LIST})
set_target_properties(fpng PROPERTIES LINKER_LANGUAGE CXX)
install(TARGETS ${PROJECT_NAME} DESTINATION lib/${PROJECT_NAME})
install(FILES ${fpng_HEADERS} DESTINATION include/${PROJECT_NAME})
include_directories("src")
if (APPLE)
set(BIN_DIRECTORY "bin_osx")
else()
set(BIN_DIRECTORY "bin")
endif()
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${BIN_DIRECTORY})
add_executable(fpng_test "src/fpng_test.cpp")
if (NOT MSVC)
target_link_libraries(fpng_test fpng m pthread)
endif()
install(TARGETS fpng_test DESTINATION bin)

141
README.md
View File

@@ -1,8 +1,13 @@
# fpng
fpng is a very fast C++ .PNG image reader/writer for 24/32bpp images. fpng.cpp was written to see just how fast you can write .PNG's without sacrificing too much compression relative to QOI and stb_image_write. The files written by fpng follow the [PNG standard](https://www.w3.org/TR/PNG/), are readable using any PNG decoder, and validate successfully using [pngcheck](https://www.w3.org/TR/PNG/).
fpng is a very fast C++ .PNG image reader/writer for 24/32bpp images. It's a [single source file](src/fpng.h) with no dependencies on any other library. fpng.cpp was written to see just how fast you can write .PNG's without sacrificing too much compression. The files written by fpng conform to the [PNG standard](https://www.w3.org/TR/PNG/), are readable using any PNG decoder, and load or validate successfully using libpng, wuffs, lodepng, stb_image, and [pngcheck](http://www.libpng.org/pub/png/apps/pngcheck.html). PNG files written using fpng can also be read using fpng faster than other PNG libraries, due to its explicit use of [Length-Limited Prefix Codes](https://create.stephan-brumme.com/length-limited-prefix-codes/) and an [optimized decoder](https://fastcompression.blogspot.com/2015/10/huffman-revisited-part-4-multi-bytes.html) that exploits the properties of these codes.
fpng.cpp compression compared to stb_image_write.h: 12-19x faster with roughly 5-11% avg. smaller files.
fpng.cpp decompression compared to stb_image_write.h: ~3x faster
fpng.cpp compression compared to stb_image_write.h: 12-19x faster with roughly 5-11% avg. smaller files.
fpng.cpp decompression compared to stb_image.h: 2.5-3x faster (on fpng compressed PNG's)
fpng.cpp compared to libpng: ~23x faster compression, 2.5-3x faster decompression (on fpng compressed PNG's)
fpng.cpp compared to Wuffs decompression: roughly 10% faster decompression (on fpng compressed PNG's - note Wuffs decompression is in general *extremely* fast)
Here's an example image encoded by fpng (a downsampled version of "bridge" from [here](http://imagecompression.info/test_images/)):
![fpng encoded "bridge" image](https://github.com/richgel999/fpng/blob/main/example.png)
@@ -42,41 +47,20 @@ lodepng: 352.10 MB 4.25 mps 28.84 mps
A well-behaved lossless compressor should output files roughly up to 1/3rd larger in this test. QOI's compressed output files are 1.94x larger vs. the 24bpp variants (697.20MB vs. 359.55MB), which is significantly more expansion than I would expect.
Benchmarks were made using the included fpng_test tool to generate .CSV files, MSVC 2019, on a Xeon E5-2690 3.00 GHz
## Notes
This version of FPNG always uses PNG filter #2 and is limited to only RLE matches (i.e. LZ matches with a match distance of either 3 or 4). It's around 5% weaker than the original release, which used LZRW1 parsing. (I'll eventually add back in the original parser as an option, but doing that will add more code/complexity to the project.)
Importantly, the fpng decoder can explictly/purposely only decode PNG files written by fpng, otherwise it returns fpng::FPNG_DECODE_NOT_FPNG (so you can fall back to a general purpose PNG decoder).
Note fpng's built-in (precomputed) dynamic Huffman tables (used in the default one pass mode) were generated from the 6,600 images I use for testing texture compressors.
fpng's compressor places a special private ancillary chunk in its output files, which other PNG decompressors will ignore. The decompressor uses this chunk to determine if the file was written by fpng (enabling fast decompression). This chunk's definition is [here](https://github.com/richgel999/fpng/wiki/fdEC-PNG-chunk-definition).
lodepng v20210627 fetched 12/18/2021
stb_image_write.h v1.16 fetched 12/18/2021
qoi.h fetched 12/18/2021
## Low-level description
fpng's compressor uses a custom pixel-wise Deflate compressor which was optimized for simplicity over high ratios. The "parser" only supports RLE matches using a match distance of 3/4 bytes, all literals (except the PNG filter bytes) are output in groups of 3 or 4, all matches are multiples of 3/4 bytes, and it only utilizes a single dynamic Huffman block within a single PNG IDAT chunk. It utilizes 64-bit registers and exploits unaligned little endian reads/writes. (On big endian CPU's it'll use 32/64bpp byteswaps.)
There are two compressor variants in this release: a faster single pass compressor that utilizes a set of precomputed Huffman tables, or a slightly better two pass compressor that results in smaller files (enabled by passing FPNG_ENCODE_SLOWER flag to the compressor). fpng will fall back to using uncompressed Deflate blocks if the image fails to compress.
The fast decompressor included in fpng.cpp can explictly only handle PNG files created by fpng. To detect these files, it looks for a PNG private ancillary chunk named "fdEC", which other readers will ignore because it's not marked as a "critical" PNG chunk. If this chunk isn't found, or the file doesn't conform to fpng's single IDAT and zlib constraints, the decompressor returns FPNG_DECODE_NOT_FPNG. The decompressor itself has numerous checks to ensure the PNG file was written by fpng (i.e. even if the fdEC chunk is present we don't blindly assume the Deflate data follows the right constraints).
The decompressor's memory usage is low relative to other PNG decompressors, because it doesn't need to make any temporary allocations to temporarily hold the decompressed zlib data. (This is one side benefit of always using LZ matches with a distance of only 3 or 4 bytes.) The only large allocation is the one used to hold the output image buffer, which it directly decompresses into. This property is useful on memory-constrained embedded platforms. It's possible for a fpng decompressor to only need to hold 2 scanlines in memory.
Passes over the input image and dynamic allocations are minimized, although it does use ```std::vector``` internally. The first scanline always uses filter #0, and the rest use filter #2 (previous scanline). It uses the fast CRC-32 code described by Brumme [here](https://create.stephan-brumme.com/crc32/). The original high-level PNG function (that code that writes the headers) was written by [Alex Evans](https://gist.github.com/908299).
Benchmarks were made using the included fpng_test tool to generate .CSV files, MSVC 2019, on a Xeon E5-2690 3.00 GHz. The above benchmarks were made before SSE adler32/crc32 functions were added to the encoder. With 24bpp images and MSVC2022 the encoder is now around 15% faster.
## Building
To build, compile from the included .SLN with Visual Studio 2019/2022 or use cmake to generate a .SLN file. For Linux/OSX, use "cmake ." then "make". Tested with MSVC 2019/gcc/clang.
To build, compile from the included .SLN with Visual Studio 2019/2022 or use cmake to generate a .SLN file. For Linux/OSX, use
I have only tested fpng.cpp on little endian systems. The code is there for big endian, and it should work, but it needs testing.
```
cmake -DSSE=1 .
make
```
Remove "-DSSE=1" on non-x86/x64 systems. The test executable will be in the "bin" or "bin_osx" subdirectory.
Tested with MSVC 2022/2019/gcc 7.5.0/clang 6.0 and 10.0. I have only tested fpng.cpp on little endian systems. The code is there for big endian, and it should work, but it needs testing.
## Testing
@@ -98,26 +82,36 @@ The test app decompresses fpng's output using lodepng, stb_image, and the fpng d
## Using fpng
To use fpng.cpp in other programs, copy fpng.cpp/.h and Crc32.cpp/.h into your project. No other configuration or files are needed. Computing the CRC-32 of buffers is a substantial proportion of overall compression time in fpng, so if you have a faster CRC-32 function you can modify `fpng_crc()` in fpng.cpp to call that instead. The one included in Crc32.cpp doesn't utilize any special CPU instruction sets, so it could be faster.
To use fpng.cpp in other programs, copy fpng.cpp/.h into your project. Alternatively, `#include "fpng.cpp"` and `#include "fpng.h"` in one place, and then `#include "fpng.h"` everywhere else.
`#include "fpng.h"` then call one of these C-style functions in the "fpng" namespace to encode an image:
There are a few optional compile-time defines you can use to configure fpng, particularly `FPNG_NO_SSE`. With gcc/clang on x86/x64, to get SSE you must compile with "-msse4.1 -mpclmul". Also, the code has only been tested with `-fno-strict-aliasing` (same as the Linux kernel, and MSVC's default). See the top of fpng.cpp for a list of the optional defines.
### Initialization
**Call `fpng::fpng_init()` once before using fpng** so it can detect if the CPU supports SSE 4.1+pclmul (for fast CRC-32 and Adler32). Otherwise, it'll always use the slower scalar fallbacks.
### Encoding
Call one of these C-style functions in the "fpng" namespace:
```
namespace fpng {
bool fpng_encode_image_to_memory(const void* pImage, uint32_t w, uint32_t h, uint32_t num_chans, std::vector<uint8_t>& out_buf, uint32_t flags = 0);
bool fpng_encode_image_to_file(const char* pFilename, const void* pImage, uint32_t w, uint32_t h, uint32_t num_chans, uint32_t flags = 0);
bool fpng_encode_image_to_memory(const void* pImage, uint32_t w, uint32_t h, uint32_t num_chans, std::vector<uint8_t>& out_buf, uint32_t flags = 0);
bool fpng_encode_image_to_file(const char* pFilename, const void* pImage, uint32_t w, uint32_t h, uint32_t num_chans, uint32_t flags = 0);
}
```
`num_chans` must be 3 or 4. There must be ```w*3*h``` or ```w*4*h``` bytes pointed to by ```pImage```. The image row pitch is always ```w*3``` or ```w*4``` bytes. There is no automatic determination if the image actually uses an alpha channel, so if you call it with 4 you will always get a 32bpp .PNG file.
The included fast decoder will only decode PNG files created by fpng. However, it has a full PNG chunk parser, and when it detects PNG files not written by fpng it returns the error code `FPNG_DECODE_NOT_FPNG` so you can fall back to a general purpose PNG reader. Also, the decompressor validates the compressed data during decompression and will immediately stop and return `FPNG_DECODE_NOT_FPNG` whenever any of the fpng constraints (implied by the fdEC marker's presence) are violated. You can use ```fpng_get_info()``` to quickly detect if a PNG file can be decoded using fpng.
### Decoding
**The included fast decoder will only decode PNG files created by fpng.** However, it has a full PNG chunk parser, and when it detects PNG files not written by fpng it returns the error code `FPNG_DECODE_NOT_FPNG` so you can fall back to a general purpose PNG reader. Also, the decompressor validates the compressed data during decompression and will immediately stop and return `FPNG_DECODE_NOT_FPNG` whenever any of the fpng constraints (implied by the fdEC marker's presence) are violated. You can use ```fpng_get_info()``` to quickly detect if a PNG file can be decoded using fpng.
```
namespace fpng {
int fpng_get_info(const void* pImage, uint32_t image_size, uint32_t& width, uint32_t& height, uint32_t& channels_in_file);
int fpng_decode_memory(const void* pImage, uint32_t image_size, std::vector<uint8_t>& out, uint32_t& width, uint32_t& height, uint32_t& channels_in_file, uint32_t desired_channels);
int fpng_decode_file(const char* pFilename, std::vector<uint8_t>& out, uint32_t& width, uint32_t& height, uint32_t& channels_in_file, uint32_t desired_channels);
int fpng_get_info(const void* pImage, uint32_t image_size, uint32_t& width, uint32_t& height, uint32_t& channels_in_file);
int fpng_decode_memory(const void* pImage, uint32_t image_size, std::vector<uint8_t>& out, uint32_t& width, uint32_t& height, uint32_t& channels_in_file, uint32_t desired_channels);
int fpng_decode_file(const char* pFilename, std::vector<uint8_t>& out, uint32_t& width, uint32_t& height, uint32_t& channels_in_file, uint32_t desired_channels);
}
```
@@ -125,10 +119,63 @@ int fpng_decode_file(const char* pFilename, std::vector<uint8_t>& out, uint32_t&
`width`, `height`, `channels_in_file` will be set to the image's dimensions and number of channels, which will always be 3 or 4.
`desired_channels` must be 3 or 4. If the input PNG file is 32bpp and you request 24bpp, the alpha channel be discarded. If the input is 24bpp and you request 32bpp, the alpha channel will be set to 0xFF.
`desired_channels` must be 3 or 4. If the input PNG file is 32bpp and you request 24bpp, the alpha channel will be discarded. If the input is 24bpp and you request 32bpp, the alpha channel will be set to 0xFF.
The return code will be `fpng::FPNG_DECODE_SUCCESS` on success, `fpng::FPNG_DECODE_NOT_FPNG` if the PNG file should be decoded with a general purpose decoder, or one of the other error values.
### Utility Functions
For convenience some of the lib's internal functionality is exposed through these API's:
```
namespace fpng {
bool fpng_cpu_supports_sse41();
uint32_t fpng_crc32(const void* pData, size_t size, uint32_t prev_crc32 = FPNG_CRC32_INIT);
uint32_t fpng_adler32(const void* pData, size_t size, uint32_t adler = FPNG_ADLER32_INIT);
}
```
## Python Bindings
They are [here](https://github.com/qrmt/fpng-python). Thanks [Oskar!](https://github.com/qrmt).
## Notes
- 4/20/2023: I upgraded lodepng, stb_image, and qoi to the latest versions. I also added pvpngreader.cpp/.h for benchmarking, which uses miniz internally for decompression. The relative encoding/decoding performance of QOI vs. PNG in general seems quite dependent on the C/C++ compiler you use.
pvpngreader.cpp relies on miniz.h for zlib decompression. It's been fuzzed using zzuf and is used in the [Basis Universal repo](https://github.com/binomialLLC/basis_universal) for PNG reading.
lodepng v20230410 fetched 4/20/2023
stb_image.h v2.28 fetched 4/20/2023
stb_image_write.h v1.16 fetched 12/18/2021 (still latest as of 4/20/2023)
qoi.h fetched 4/20/2023
- This version of FPNG always uses PNG filter #2 and is limited to only RLE matches (i.e. LZ matches with a match distance of either 3 or 4). It's around 5% weaker than the original release, which used LZRW1 parsing. (I'll eventually add back in the original parser as an option, but doing that will add more code/complexity to the project.)
Importantly, the fpng decoder can explictly/purposely only decode PNG files written by fpng, otherwise it returns fpng::FPNG_DECODE_NOT_FPNG (so you can fall back to a general purpose PNG decoder).
fpng's compressor places a special private ancillary chunk in its output files, which other PNG decompressors will ignore. The decompressor uses this chunk to determine if the file was written by fpng (enabling fast decompression). This chunk's definition is [here](https://github.com/richgel999/fpng/wiki/fdEC-PNG-chunk-definition).
In single pass mode (the default), fpng uses a set of precomputed Deflate dynamic Huffman tables. Here's [how to use the fpng_test tool to compute custom tables](https://github.com/richgel999/fpng/wiki/How-to-train-new-Huffman-tables-for-custom-content).
Earlier versions of fpng (before 1.0.5) wrote valid PNG's that wuffs wouldn't accept. As far as I can tell this is a [bug in wuffs](https://github.com/google/wuffs/issues/66). I've added a workaround to fpng's encoder and re-trained its single pass Huffman tables, and I've also added the wuffs decoder to the png_test app.
## Low-level description
fpng's compressor uses a custom pixel-wise Deflate compressor which was optimized for simplicity over high ratios. The "parser" only supports RLE matches using a match distance of 3/4 bytes, all literals (except the PNG filter bytes) are output in groups of 3 or 4, all matches are multiples of 3/4 bytes, and it only utilizes a single dynamic Huffman block within a single PNG IDAT chunk. It utilizes 64-bit registers and exploits unaligned little endian reads/writes. (On big endian CPU's it'll use 32/64bpp byteswaps.)
There are two compressor variants in this release: a faster single pass compressor that utilizes a set of precomputed Huffman tables, or a slightly better two pass compressor that results in smaller files (enabled by passing FPNG_ENCODE_SLOWER flag to the compressor). fpng will fall back to using uncompressed Deflate blocks if the image fails to compress.
The fast decompressor included in fpng.cpp can explictly only handle PNG files created by fpng. To detect these files, it looks for a PNG private ancillary chunk named "fdEC", which other readers will ignore because it's not marked as a "critical" PNG chunk. If this chunk isn't found, or the file doesn't conform to fpng's single IDAT and zlib constraints, the decompressor returns FPNG_DECODE_NOT_FPNG. The decompressor itself has numerous checks to ensure the PNG file was written by fpng (i.e. even if the fdEC chunk is present we don't blindly assume the Deflate data follows the right constraints).
The decompressor's memory usage is low relative to other PNG decompressors, because it doesn't need to make any temporary allocations to hold the decompressed zlib data. (This is one side benefit of always using LZ matches with a distance of only 3 or 4 bytes.) The only large allocation is the one used to hold the output image buffer, which it directly decompresses into. This property is useful on memory-constrained embedded platforms. It's possible for a fpng decompressor to only need to hold 2 scanlines in memory.
Passes over the input image and dynamic allocations are minimized, although it does use ```std::vector``` internally. The first scanline always uses filter #0, and the rest use filter #2 (previous scanline). It uses the fast "slice by 4" CRC-32 algorithm described by Brumme [here](https://create.stephan-brumme.com/crc32/). The original high-level PNG function (that code that writes the headers) was written by [Alex Evans](https://gist.github.com/908299).
## Fuzzing
fpng's encoder and decoder has been fuzzed to check for failures or crashes with random/corrupted input images and random image dimensions. The -e and -E options are used for this sort of fuzzing.
@@ -139,10 +186,8 @@ The fpng decoder's parser has been fuzzed to check for crashes with [zzuf](http:
zzuf -s 1:1000000 ./fpng_test -f fpng.png
```
## License
## License for fpng.cpp/.h
fpng.cpp/.h: Apache 2.0 license. See the end of fpng.cpp.
See the [unlicense](https://unlicense.org/)
Crc32.cpp/.h: Copyright (c) 2011-2019 Stephan Brumme. zlib license:
https://github.com/stbrumme/crc32
At least in the US, no license is necessary, as this code is not Intellectual Property, and not copyrighted. It has been explictly and purposely placed into the Public Domain.

View File

@@ -134,6 +134,7 @@
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -143,20 +144,21 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="src/Crc32.cpp" />
<ClCompile Include="src/fpng.cpp" />
<ClCompile Include="src/fpng_test.cpp" />
<ClCompile Include="src/lodepng.cpp" />
<ClCompile Include="src\pvpngreader.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="src/Crc32.h" />
<ClInclude Include="src/fpng.h" />
<ClInclude Include="src/lodepng.h" />
<ClInclude Include="src/qoi.h" />
<ClInclude Include="src/stb_image.h" />
<ClInclude Include="src/stb_image_write.h" />
<ClInclude Include="src\basisu_miniz.h" />
<ClInclude Include="src\pvpngreader.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@@ -18,20 +18,17 @@
<ClCompile Include="src/fpng.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src/Crc32.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src/fpng_test.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src/lodepng.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\pvpngreader.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="src/Crc32.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src/fpng.h">
<Filter>Source Files</Filter>
</ClInclude>
@@ -47,5 +44,11 @@
<ClInclude Include="src/lodepng.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\pvpngreader.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\basisu_miniz.h">
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
</Project>
</Project>

File diff suppressed because it is too large Load Diff

View File

@@ -1,69 +0,0 @@
// //////////////////////////////////////////////////////////
// Crc32.h
// Copyright (c) 2011-2019 Stephan Brumme. All rights reserved.
// Slicing-by-16 contributed by Bulat Ziganshin
// Tableless bytewise CRC contributed by Hagai Gold
// see http://create.stephan-brumme.com/disclaimer.html
//
// if running on an embedded system, you might consider shrinking the
// big Crc32Lookup table by undefining these lines:
//#define CRC32_USE_LOOKUP_TABLE_BYTE
//#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_8
//#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
// - crc32_bitwise doesn't need it at all
// - crc32_halfbyte has its own small lookup table
// - crc32_1byte_tableless and crc32_1byte_tableless2 don't need it at all
// - crc32_1byte needs only Crc32Lookup[0]
// - crc32_4bytes needs only Crc32Lookup[0..3]
// - crc32_8bytes needs only Crc32Lookup[0..7]
// - crc32_4x8bytes needs only Crc32Lookup[0..7]
// - crc32_16bytes needs all of Crc32Lookup
// using the aforementioned #defines the table is automatically fitted to your needs
// uint8_t, uint32_t, int32_t
#include <stdint.h>
// size_t
#include <cstddef>
// crc32_fast selects the fastest algorithm depending on flags (CRC32_USE_LOOKUP_...)
/// compute CRC32 using the fastest algorithm for large datasets on modern CPUs
uint32_t crc32_fast (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// merge two CRC32 such that result = crc32(dataB, lengthB, crc32(dataA, lengthA))
uint32_t crc32_combine (uint32_t crcA, uint32_t crcB, size_t lengthB);
/// compute CRC32 (bitwise algorithm)
uint32_t crc32_bitwise (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (half-byte algoritm)
uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32 = 0);
#ifdef CRC32_USE_LOOKUP_TABLE_BYTE
/// compute CRC32 (standard algorithm)
uint32_t crc32_1byte (const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif
/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless2(const void* data, size_t length, uint32_t previousCrc32 = 0);
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
/// compute CRC32 (Slicing-by-4 algorithm)
uint32_t crc32_4bytes (const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_8
/// compute CRC32 (Slicing-by-8 algorithm)
uint32_t crc32_8bytes (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (Slicing-by-8 algorithm), unroll inner loop 4 times
uint32_t crc32_4x8bytes(const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
/// compute CRC32 (Slicing-by-16 algorithm)
uint32_t crc32_16bytes (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (Slicing-by-16 algorithm, prefetch upcoming data blocks)
uint32_t crc32_16bytes_prefetch(const void* data, size_t length, uint32_t previousCrc32 = 0, size_t prefetchAhead = 256);
#endif

2533
src/basisu_miniz.h Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +1,35 @@
// fpng.h - Copyright (C) 2021 Richard Geldreich, Jr. - Apache 2.0 license (see end of fpng.cpp)
// fpng.h - unlicense (see end of fpng.cpp)
#pragma once
#include <stdlib.h>
#include <stdint.h>
#include <vector>
#ifndef FPNG_TRAIN_HUFFMAN_TABLES
// Set to 1 when using the -t (training) option in fpng_test to generate new opaque/alpha Huffman tables for the single pass encoder.
#define FPNG_TRAIN_HUFFMAN_TABLES (0)
#endif
namespace fpng
{
// ---- Library initialization - call once to identify if the processor supports SSE.
// Otherwise you'll only get scalar fallbacks.
void fpng_init();
// ---- Useful Utilities
// Returns true if the CPU supports SSE 4.1, and SSE support wasn't disabled by setting FPNG_NO_SSE=1.
// fpng_init() must have been called first, or it'll assert and return false.
bool fpng_cpu_supports_sse41();
// Fast CRC-32 SSE4.1+pclmul or a scalar fallback (slice by 4)
const uint32_t FPNG_CRC32_INIT = 0;
uint32_t fpng_crc32(const void* pData, size_t size, uint32_t prev_crc32 = FPNG_CRC32_INIT);
// Fast Adler32 SSE4.1 Adler-32 with a scalar fallback.
const uint32_t FPNG_ADLER32_INIT = 1;
uint32_t fpng_adler32(const void* pData, size_t size, uint32_t adler = FPNG_ADLER32_INIT);
// ---- Compression
enum
{
@@ -88,4 +111,12 @@ namespace fpng
int fpng_decode_file(const char* pFilename, std::vector<uint8_t>& out, uint32_t& width, uint32_t& height, uint32_t& channels_in_file, uint32_t desired_channels);
#endif
// ---- Internal API used for Huffman table training purposes
#if FPNG_TRAIN_HUFFMAN_TABLES
const uint32_t HUFF_COUNTS_SIZE = 288;
extern uint64_t g_huff_counts[HUFF_COUNTS_SIZE];
bool create_dynamic_block_prefix(uint64_t* pFreq, uint32_t num_chans, std::vector<uint8_t>& prefix, uint64_t& bit_buf, int& bit_buf_size, uint32_t *pCodes, uint8_t *pCodesizes);
#endif
} // namespace fpng

View File

@@ -23,6 +23,13 @@
#define QOI_IMPLEMENTATION
#include "qoi.h"
#define WUFFS_IMPLEMENTATION
#define WUFFS_CONFIG__STATIC_FUNCTIONS
#include "wuffs-v0.3.c"
#include "basisu_miniz.h"
#include "pvpngreader.h"
typedef std::vector<uint8_t> uint8_vec;
typedef uint64_t timer_ticks;
@@ -272,7 +279,11 @@ static void write_func_stbi(void* context, void* data, int size)
static bool load_listing_file(const std::string& f, std::vector<std::string>& filenames)
{
std::string filename(f);
//filename.erase(0, 1);
if (filename.size() == 0)
return false;
if (filename[0] == '@')
filename.erase(0, 1);
FILE* pFile = nullptr;
#ifdef _WIN32
@@ -670,8 +681,301 @@ static int fuzz_test_encoder2(uint32_t fpng_flags)
return EXIT_SUCCESS;
}
static void* wuffs_decode(void* pData, size_t data_len, uint32_t &width, uint32_t &height)
{
wuffs_png__decoder* pDec = wuffs_png__decoder__alloc();
if (!pDec)
return nullptr;
wuffs_png__decoder__set_quirk_enabled(pDec, WUFFS_BASE__QUIRK_IGNORE_CHECKSUM, true);
wuffs_base__image_config ic;
wuffs_base__io_buffer src = wuffs_base__ptr_u8__reader((uint8_t *)pData, data_len, true);
wuffs_base__status status = wuffs_png__decoder__decode_image_config(pDec, &ic, &src);
if (status.repr)
{
free(pDec);
return nullptr;
}
width = wuffs_base__pixel_config__width(&ic.pixcfg);
height = wuffs_base__pixel_config__height(&ic.pixcfg);
wuffs_base__pixel_config__set(&ic.pixcfg, WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL, WUFFS_BASE__PIXEL_SUBSAMPLING__NONE, width, height);
uint64_t workbuf_len = wuffs_png__decoder__workbuf_len(pDec).max_incl;
if (workbuf_len > SIZE_MAX)
{
free(pDec);
return nullptr;
}
wuffs_base__slice_u8 workbuf_slice = wuffs_base__make_slice_u8( (uint8_t *)malloc((size_t)workbuf_len), (size_t)workbuf_len);
if (!workbuf_slice.ptr)
{
free(pDec);
return nullptr;
}
const uint64_t total_pixels = (uint64_t)width * (uint64_t)height;
if (total_pixels > (SIZE_MAX >> 2U))
{
free(workbuf_slice.ptr);
free(pDec);
return nullptr;
}
void* pDecode_buf = malloc((size_t)(total_pixels * sizeof(uint32_t)));
if (!pDecode_buf)
{
free(workbuf_slice.ptr);
free(pDec);
return nullptr;
}
wuffs_base__slice_u8 pixbuf_slice = wuffs_base__make_slice_u8((uint8_t*)pDecode_buf, (size_t)(total_pixels * sizeof(uint32_t)));
wuffs_base__pixel_buffer pb;
status = wuffs_base__pixel_buffer__set_from_slice(&pb, &ic.pixcfg, pixbuf_slice);
if (status.repr)
{
free(workbuf_slice.ptr);
free(pDecode_buf);
free(pDec);
return nullptr;
}
status = wuffs_png__decoder__decode_frame(pDec, &pb, &src, WUFFS_BASE__PIXEL_BLEND__SRC, workbuf_slice, NULL);
if (status.repr)
{
free(workbuf_slice.ptr);
free(pDecode_buf);
free(pDec);
return nullptr;
}
free(workbuf_slice.ptr);
free(pDec);
return pDecode_buf;
}
#if FPNG_TRAIN_HUFFMAN_TABLES
static int training_mode(const char* pFilename)
{
if (pFilename[0] != '@')
{
fprintf(stderr, "Must specify list of files to read using @filelist.txt\n");
return EXIT_FAILURE;
}
std::vector<std::string> files_to_process;
if (!load_listing_file(std::string(pFilename), files_to_process))
return EXIT_FAILURE;
uint64_t opaque_freq[fpng::HUFF_COUNTS_SIZE], alpha_freq[fpng::HUFF_COUNTS_SIZE];
memset(opaque_freq, 0, sizeof(opaque_freq));
memset(alpha_freq, 0, sizeof(alpha_freq));
uint32_t total_alpha_files = 0, total_opaque_files = 0, total_failed_loading = 0;
for (uint32_t file_index = 0; file_index < files_to_process.size(); file_index++)
{
const char* pFilename = files_to_process[file_index].c_str();
printf("Processing file \"%s\"\n", pFilename);
uint8_vec source_file_data;
if (!read_file_to_vec(pFilename, source_file_data))
{
fprintf(stderr, "Failed reading source file data \"%s\"\n", pFilename);
return EXIT_FAILURE;
}
uint32_t source_width = 0, source_height = 0;
uint8_t* pSource_image_buffer = nullptr;
unsigned error = lodepng_decode_memory(&pSource_image_buffer, &source_width, &source_height, source_file_data.data(), source_file_data.size(), LCT_RGBA, 8);
if (error != 0)
{
fprintf(stderr, "WARNING: Failed unpacking source file \"%s\" using lodepng! Skipping.\n", pFilename);
total_failed_loading++;
continue;
}
const color_rgba* pSource_pixels32 = (const color_rgba*)pSource_image_buffer;
uint32_t total_source_pixels = source_width * source_height;
bool has_alpha = false;
for (uint32_t i = 0; i < total_source_pixels; i++)
{
if (pSource_pixels32[i].m_c[3] < 255)
{
has_alpha = true;
break;
}
}
const uint32_t source_chans = has_alpha ? 4 : 3;
printf("Dimensions: %ux%u, Has Alpha: %u, Total Pixels: %u, bytes: %u (%f MB)\n", source_width, source_height, has_alpha, total_source_pixels, total_source_pixels * source_chans, total_source_pixels * source_chans / (1024.0f * 1024.0f));
uint8_vec source_image_buffer24(total_source_pixels * 3);
for (uint32_t i = 0; i < total_source_pixels; i++)
{
source_image_buffer24[i * 3 + 0] = pSource_pixels32[i].m_c[0];
source_image_buffer24[i * 3 + 1] = pSource_pixels32[i].m_c[1];
source_image_buffer24[i * 3 + 2] = pSource_pixels32[i].m_c[2];
}
const uint8_t* pSource_pixels24 = source_image_buffer24.data();
memset(fpng::g_huff_counts, 0, sizeof(fpng::g_huff_counts));
std::vector<uint8_t> fpng_file_buf;
bool status = fpng::fpng_encode_image_to_memory((source_chans == 4) ? (const void*)pSource_pixels32 : (const void*)pSource_pixels24, source_width, source_height, source_chans, fpng_file_buf, fpng::FPNG_ENCODE_SLOWER);
if (!status)
{
fprintf(stderr, "fpng_encode_image_to_memory() failed!\n");
return EXIT_FAILURE;
}
// Sanity check the PNG file using lodepng
{
uint32_t lodepng_decoded_w = 0, lodepng_decoded_h = 0;
uint8_t* lodepng_decoded_buffer = nullptr;
int error = lodepng_decode_memory(&lodepng_decoded_buffer, &lodepng_decoded_w, &lodepng_decoded_h, (uint8_t*)fpng_file_buf.data(), fpng_file_buf.size(), LCT_RGBA, 8);
if (error != 0)
{
fprintf(stderr, "lodepng_decode_memory() failed!\n");
return EXIT_FAILURE;
}
if (memcmp(lodepng_decoded_buffer, pSource_pixels32, total_source_pixels * 4) != 0)
{
fprintf(stderr, "FPNG decode verification failed (using lodepng)!\n");
return EXIT_FAILURE;
}
free(lodepng_decoded_buffer);
}
if (source_chans == 4)
{
for (uint32_t i = 0; i < fpng::HUFF_COUNTS_SIZE; i++)
alpha_freq[i] += fpng::g_huff_counts[i];
total_alpha_files++;
}
else
{
for (uint32_t i = 0; i < fpng::HUFF_COUNTS_SIZE; i++)
opaque_freq[i] += fpng::g_huff_counts[i];
total_opaque_files++;
}
} // filename_index
printf("Total alpha files: %u\n", total_alpha_files);
printf("Total opaque files: %u\n", total_opaque_files);
printf("Total failed loading: %u\n", total_failed_loading);
if (!total_alpha_files && !total_opaque_files)
{
fprintf(stderr, "No failed were loaded!\n");
return EXIT_FAILURE;
}
if (total_opaque_files)
{
std::vector<uint8_t> dyn_prefix;
uint64_t bit_buf = 0;
int bit_buf_size = 0;
uint32_t codes[fpng::HUFF_COUNTS_SIZE];
uint8_t codesizes[fpng::HUFF_COUNTS_SIZE];
bool status = fpng::create_dynamic_block_prefix(opaque_freq, 3, dyn_prefix, bit_buf, bit_buf_size, codes, codesizes);
if (!status)
{
fprintf(stderr, "fpng::create_dynamic_block_prefix() failed!\n");
return EXIT_FAILURE;
}
printf("\n");
printf("static const uint8_t g_dyn_huff_3[] = {\n");
for (uint32_t i = 0; i < dyn_prefix.size(); i++)
{
printf("%u%c ", dyn_prefix[i], (i != (dyn_prefix.size() - 1)) ? ',' : ' ');
if ((i & 31) == 31)
printf("\n");
}
printf("};\n");
printf("const uint32_t DYN_HUFF_3_BITBUF = %u, DYN_HUFF_3_BITBUF_SIZE = %u;\n", (uint32_t)bit_buf, (uint32_t)bit_buf_size);
printf("static const struct { uint8_t m_code_size; uint16_t m_code; } g_dyn_huff_3_codes[288] = {\n");
for (uint32_t i = 0; i < fpng::HUFF_COUNTS_SIZE; i++)
{
printf("{%u,%u}%c", codesizes[i], codes[i], (i != (fpng::HUFF_COUNTS_SIZE - 1)) ? ',' : ' ');
if ((i & 31) == 31)
printf("\n");
}
printf("};\n");
}
if (total_alpha_files)
{
std::vector<uint8_t> dyn_prefix;
uint64_t bit_buf = 0;
int bit_buf_size = 0;
uint32_t codes[fpng::HUFF_COUNTS_SIZE];
uint8_t codesizes[fpng::HUFF_COUNTS_SIZE];
bool status = fpng::create_dynamic_block_prefix(alpha_freq, 4, dyn_prefix, bit_buf, bit_buf_size, codes, codesizes);
if (!status)
{
fprintf(stderr, "fpng::create_dynamic_block_prefix() failed!\n");
return EXIT_FAILURE;
}
printf("\n");
printf("static const uint8_t g_dyn_huff_4[] = {\n");
for (uint32_t i = 0; i < dyn_prefix.size(); i++)
{
printf("%u%c ", dyn_prefix[i], (i != (dyn_prefix.size() - 1)) ? ',' : ' ');
if ((i & 31) == 31)
printf("\n");
}
printf("};\n");
printf("const uint32_t DYN_HUFF_4_BITBUF = %u, DYN_HUFF_4_BITBUF_SIZE = %u;\n", (uint32_t)bit_buf, (uint32_t)bit_buf_size);
printf("static const struct { uint8_t m_code_size; uint16_t m_code; } g_dyn_huff_4_codes[288] = {\n");
for (uint32_t i = 0; i < fpng::HUFF_COUNTS_SIZE; i++)
{
printf("{%u,%u}%c", codesizes[i], codes[i], (i != (fpng::HUFF_COUNTS_SIZE - 1)) ? ',' : ' ');
if ((i & 31) == 31)
printf("\n");
}
printf("};\n");
}
return EXIT_SUCCESS;
}
#else
static int training_mode(const char* pFilename)
{
(void)pFilename;
fprintf(stderr, "Must compile with FPNG_TRAIN_HUFFMAN_TABLES set to 1\n");
return EXIT_FAILURE;
}
#endif
int main(int arg_c, char **arg_v)
{
fpng::fpng_init();
if (arg_c < 2)
{
printf("Usage: fpng_test [filename.png] <alpha_filename.png>\n");
@@ -684,6 +988,7 @@ int main(int arg_c, char **arg_v)
printf("-e: Fuzz encoder/decoder by randomly modifying an input image's pixels\n");
printf("-f: Decompress specified PNG image using FPNG, then exit\n");
printf("-a: Swizzle input image's green to alpha, for testing 32bpp correlation alpha\n");
printf("-t: Train Huffman tables on @filelist.txt (must compile with FPNG_TRAIN_HUFFMAN_TABLES=1)\n");
return EXIT_FAILURE;
}
@@ -696,6 +1001,7 @@ int main(int arg_c, char **arg_v)
bool fuzz_encoder2 = false;
bool fuzz_decoder = false;
bool swizzle_green_to_alpha = false;
bool training_mode_flag = false;
for (int i = 1; i < arg_c; i++)
{
@@ -730,6 +1036,10 @@ int main(int arg_c, char **arg_v)
{
swizzle_green_to_alpha = true;
}
else if (pArg[1] == 't')
{
training_mode_flag = true;
}
else
{
fprintf(stderr, "Unrecognized option: %s\n", pArg);
@@ -760,8 +1070,13 @@ int main(int arg_c, char **arg_v)
if (fuzz_encoder2)
return fuzz_test_encoder2(fpng_flags);
if (training_mode_flag)
return training_mode(pFilename);
if (!csv_flag)
{
printf("SSE 4.1 supported: %u\n", fpng::fpng_cpu_supports_sse41());
printf("Filename: %s\n", pFilename);
if (pAlpha_filename)
printf("Alpha filename: %s\n", pFilename);
@@ -863,7 +1178,7 @@ int main(int arg_c, char **arg_v)
const uint8_t* pSource_pixels24 = source_image_buffer24.data();
const uint32_t NUM_TIMES_TO_ENCODE = csv_flag ? 3 : 1;
const uint32_t NUM_TIMES_TO_ENCODE = csv_flag ? 3 : 3;
const uint32_t NUM_TIMES_TO_DECODE = 5;
interval_timer tm;
@@ -916,7 +1231,7 @@ int main(int arg_c, char **arg_v)
#endif
}
double fpng_decode_time = 0.0f, lodepng_decode_time = 0.0f, stbi_decode_time = 0.0f, qoi_decode_time = 0.0f;
double fpng_decode_time = 0.0f, lodepng_decode_time = 0.0f, stbi_decode_time = 0.0f, qoi_decode_time = 0.0f, wuffs_decode_time = 0.0f, pvpng_decode_time = 0.0f;
// Decode the file using our decompressor
{
@@ -1083,6 +1398,51 @@ int main(int arg_c, char **arg_v)
}
free(p);
}
// Verify FPNG's output data using wuffs
{
void* p = nullptr;
//static void*
wuffs_decode_time = 1e+9f;
for (uint32_t i = 0; i < NUM_TIMES_TO_DECODE; i++)
{
if (p)
{
free(p);
p = nullptr;
}
tm.start();
uint32_t w, h;
p = wuffs_decode(fpng_file_buf.data(), fpng_file_buf.size(), w, h);
if (!p)
break;
if ((w != source_width) || (h != source_height))
{
fprintf(stderr, "wuffs failed decompressing FPNG's output PNG file!\n");
return EXIT_FAILURE;
}
wuffs_decode_time = minimum(wuffs_decode_time, tm.get_elapsed_secs());
}
if (!p)
{
fprintf(stderr, "wuffs failed decompressing FPNG's output PNG file!\n");
return EXIT_FAILURE;
}
if (memcmp(p, pSource_pixels32, total_source_pixels * 4) != 0)
{
fprintf(stderr, "FPNG decode verification failed (using wuffs)!\n");
return EXIT_FAILURE;
}
free(p);
}
// Compress with lodepng
@@ -1137,7 +1497,7 @@ int main(int arg_c, char **arg_v)
}
if (!csv_flag)
printf("stbi: %4.6f secs, %u bytes, %4.3f MB, %4.3f MP/s\n", stbi_best_time, (uint32_t)stbi_file_buf.size(), (double)stbi_file_buf.size() / (1024.0f * 1024.0f), (total_source_pixels / (1024.0f * 1024.0f)) / stbi_best_time);
printf("stbi: %4.6f secs, %u bytes, %4.3f MB, %4.3f MP/sec\n", stbi_best_time, (uint32_t)stbi_file_buf.size(), (double)stbi_file_buf.size() / (1024.0f * 1024.0f), (total_source_pixels / (1024.0f * 1024.0f)) / stbi_best_time);
if (!csv_flag)
{
@@ -1185,29 +1545,75 @@ int main(int arg_c, char **arg_v)
// Validate QOI's output file
{
qoi_decode_time = 1e+9f;
qoi_desc qddesc;
tm.start();
void* pQOI_decomp_data = qoi_decode(pQOI_data, qoi_len, &qddesc, 4);
qoi_decode_time = tm.get_elapsed_secs();
if (memcmp(pQOI_decomp_data, pSource_pixels32, total_source_pixels * 4) != 0)
for (uint32_t i = 0; i < NUM_TIMES_TO_ENCODE; i++)
{
fprintf(stderr, "QOI verification failure!\n");
return EXIT_FAILURE;
tm.start();
void* pQOI_decomp_data = qoi_decode(pQOI_data, qoi_len, &qddesc, 4);
qoi_decode_time = minimum(qoi_decode_time, tm.get_elapsed_secs());
if (memcmp(pQOI_decomp_data, pSource_pixels32, total_source_pixels * 4) != 0)
{
fprintf(stderr, "QOI verification failure!\n");
return EXIT_FAILURE;
}
free(pQOI_decomp_data);
}
free(pQOI_decomp_data);
}
free(pQOI_data);
pQOI_data = nullptr;
{
// Decode the PNG file using pvpng, which ships with BasisU and uses miniz for decompression.
pvpng_decode_time = 1e+9f;
for (uint32_t i = 0; i < NUM_TIMES_TO_ENCODE; i++)
{
uint32_t width = 0, height = 0, num_chans = 0;
tm.start();
void* pImage_data = pv_png::load_png(fpng_file_buf.data(), fpng_file_buf.size(), source_chans, width, height, num_chans);
pvpng_decode_time = minimum(pvpng_decode_time, tm.get_elapsed_secs());
if (!pImage_data)
{
fprintf(stderr, "Failed decoding using pvpng! (1)\n");
return EXIT_FAILURE;
}
if ((num_chans != source_chans) || (width != source_width) || (height != source_height))
{
fprintf(stderr, "Failed decoding using pvpng! (2)\n");
return EXIT_FAILURE;
}
if (memcmp((source_chans == 3) ? (const void*)pSource_pixels24 : (const void*)pSource_pixels32, pImage_data, width * height * source_chans) != 0)
{
fprintf(stderr, "Failed decoding using pvpng! (3)\n");
return EXIT_FAILURE;
}
free(pImage_data);
}
}
if (!csv_flag)
{
printf("** Decoding:\n");
printf("FPNG: %3.6f secs, %4.3f MP/s\n", fpng_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / fpng_decode_time);
printf("lodepng: %3.6f secs, %4.3f MP/s\n", lodepng_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / lodepng_decode_time);
printf("stbi: %3.6f secs, %4.3f MP/s\n", stbi_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / stbi_decode_time);
printf("qoi: %3.6f secs, %4.3f MP/s\n", qoi_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / qoi_decode_time);
printf("FPNG: %3.6f secs, %4.3f MP/sec\n", fpng_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / fpng_decode_time);
printf("lodepng: %3.6f secs, %4.3f MP/sec\n", lodepng_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / lodepng_decode_time);
printf("stbi: %3.6f secs, %4.3f MP/sec\n", stbi_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / stbi_decode_time);
printf("wuffs: %3.6f secs, %4.3f MP/sec\n", wuffs_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / wuffs_decode_time);
printf("pvpng: %3.6f secs, %4.3f MP/sec\n", pvpng_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / pvpng_decode_time);
printf("qoi: %3.6f secs, %4.3f MP/sec\n", qoi_decode_time, (total_source_pixels / (1024.0f * 1024.0f)) / qoi_decode_time);
}
if (csv_flag)
@@ -1216,12 +1622,13 @@ int main(int arg_c, char **arg_v)
const double source_megapixels = total_source_pixels / (1024.0f * 1024.0f);
printf("%s, %u, %u, %u, %f, %f, %f, %4.1f, %4.1f, %f, %f, %f, %4.1f, %4.1f, %f, %f, %f, %4.1f, %4.1f, %f, %f, %f, %4.1f, %4.1f\n",
printf("%s, %u, %u, %u, %f, %f, %f, %4.3f, %4.3f, %f, %f, %f, %4.3f, %4.3f, %f, %f, %f, %4.3f, %4.3f, %f, %f, %f, %4.3f, %4.3f, %4.3f, %4.3f\n",
pFilename, source_width, source_height, source_chans,
qoi_best_time, (double)qoi_len / MB, qoi_decode_time, source_megapixels / qoi_best_time, source_megapixels / qoi_decode_time,
fpng_best_time, (double)fpng_file_buf.size() / MB, fpng_decode_time, source_megapixels / fpng_best_time, source_megapixels / fpng_decode_time,
lodepng_best_time, (double)lodepng_file_buf.size() / MB, lodepng_decode_time, source_megapixels / lodepng_best_time, source_megapixels / lodepng_decode_time,
stbi_best_time, (double)stbi_file_buf.size() / MB, stbi_decode_time, source_megapixels / stbi_best_time, source_megapixels / stbi_decode_time
stbi_best_time, (double)stbi_file_buf.size() / MB, stbi_decode_time, source_megapixels / stbi_best_time, source_megapixels / stbi_decode_time,
pvpng_decode_time, source_megapixels / pvpng_decode_time
);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
LodePNG version 20210627
LodePNG version 20230410
Copyright (c) 2005-2021 Lode Vandevenne
Copyright (c) 2005-2023 Lode Vandevenne
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@@ -35,43 +35,50 @@ The following #defines are used to create code sections. They can be disabled
to disable code sections, which can give faster compile time and smaller binary.
The "NO_COMPILE" defines are designed to be used to pass as defines to the
compiler command to disable them without modifying this header, e.g.
-DLODEPNG_NO_COMPILE_ZLIB for gcc.
In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
allow implementing a custom lodepng_crc32.
-DLODEPNG_NO_COMPILE_ZLIB for gcc or clang.
*/
/*deflate & zlib. If disabled, you must specify alternative zlib functions in
the custom_zlib field of the compress and decompress settings*/
#ifndef LODEPNG_NO_COMPILE_ZLIB
/*pass -DLODEPNG_NO_COMPILE_ZLIB to the compiler to disable this, or comment out LODEPNG_COMPILE_ZLIB below*/
#define LODEPNG_COMPILE_ZLIB
#endif
/*png encoder and png decoder*/
#ifndef LODEPNG_NO_COMPILE_PNG
/*pass -DLODEPNG_NO_COMPILE_PNG to the compiler to disable this, or comment out LODEPNG_COMPILE_PNG below*/
#define LODEPNG_COMPILE_PNG
#endif
/*deflate&zlib decoder and png decoder*/
#ifndef LODEPNG_NO_COMPILE_DECODER
/*pass -DLODEPNG_NO_COMPILE_DECODER to the compiler to disable this, or comment out LODEPNG_COMPILE_DECODER below*/
#define LODEPNG_COMPILE_DECODER
#endif
/*deflate&zlib encoder and png encoder*/
#ifndef LODEPNG_NO_COMPILE_ENCODER
/*pass -DLODEPNG_NO_COMPILE_ENCODER to the compiler to disable this, or comment out LODEPNG_COMPILE_ENCODER below*/
#define LODEPNG_COMPILE_ENCODER
#endif
/*the optional built in harddisk file loading and saving functions*/
#ifndef LODEPNG_NO_COMPILE_DISK
/*pass -DLODEPNG_NO_COMPILE_DISK to the compiler to disable this, or comment out LODEPNG_COMPILE_DISK below*/
#define LODEPNG_COMPILE_DISK
#endif
/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
/*pass -DLODEPNG_NO_COMPILE_ANCILLARY_CHUNKS to the compiler to disable this,
or comment out LODEPNG_COMPILE_ANCILLARY_CHUNKS below*/
#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
#endif
/*ability to convert error numerical codes to English text string*/
#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
/*pass -DLODEPNG_NO_COMPILE_ERROR_TEXT to the compiler to disable this,
or comment out LODEPNG_COMPILE_ERROR_TEXT below*/
#define LODEPNG_COMPILE_ERROR_TEXT
#endif
@@ -79,12 +86,27 @@ the custom_zlib field of the compress and decompress settings*/
you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
source files with custom allocators.*/
#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
/*pass -DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler to disable the built-in ones,
or comment out LODEPNG_COMPILE_ALLOCATORS below*/
#define LODEPNG_COMPILE_ALLOCATORS
#endif
/*Disable built-in CRC function, in that case a custom implementation of
lodepng_crc32 must be defined externally so that it can be linked in.
The default built-in CRC code comes with 8KB of lookup tables, so for memory constrained environment you may want it
disabled and provide a much smaller implementation externally as said above. You can find such an example implementation
in a comment in the lodepng.c(pp) file in the 'else' case of the searchable LODEPNG_COMPILE_CRC section.*/
#ifndef LODEPNG_NO_COMPILE_CRC
/*pass -DLODEPNG_NO_COMPILE_CRC to the compiler to disable the built-in one,
or comment out LODEPNG_COMPILE_CRC below*/
#define LODEPNG_COMPILE_CRC
#endif
/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
#ifdef __cplusplus
#ifndef LODEPNG_NO_COMPILE_CPP
/*pass -DLODEPNG_NO_COMPILE_CPP to the compiler to disable C++ (not needed if a C-only compiler),
or comment out LODEPNG_COMPILE_CPP below*/
#define LODEPNG_COMPILE_CPP
#endif
#endif
@@ -374,8 +396,10 @@ typedef struct LodePNGColorMode {
The alpha channels must be set as well, set them to 255 for opaque images.
When decoding, by default you can ignore this palette, since LodePNG already
fills the palette colors in the pixels of the raw RGBA output.
When decoding, with the default settings you can ignore this palette, since
LodePNG already fills the palette colors in the pixels of the raw RGBA output,
but when decoding to the original PNG color mode it is needed to reconstruct
the colors.
The palette is only supported for color type 3.
*/
@@ -465,10 +489,12 @@ typedef struct LodePNGInfo {
with values truncated to the bit depth in the unsigned integer.
For grayscale and palette PNGs, the value is stored in background_r. The values
in background_g and background_b are then unused.
in background_g and background_b are then unused. The decoder will set them
equal to background_r, the encoder ignores them in this case.
So when decoding, you may get these in a different color mode than the one you requested
for the raw pixels.
When decoding, you may get these in a different color mode than the one you requested
for the raw pixels: the colortype and bitdepth defined by info_png.color, that is the
ones defined in the header of the PNG image, are used.
When encoding with auto_convert, you must use the color model defined in info_png.color for
these values. The encoder normally ignores info_png.color when auto_convert is on, but will
@@ -535,7 +561,7 @@ typedef struct LodePNGInfo {
unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
/*
Color profile related chunks: gAMA, cHRM, sRGB, iCPP
Color profile related chunks: gAMA, cHRM, sRGB, iCPP, sBIT
LodePNG does not apply any color conversions on pixels in the encoder or decoder and does not interpret these color
profile values. It merely passes on the information. If you wish to use color profiles and convert colors, please
@@ -598,6 +624,45 @@ typedef struct LodePNGInfo {
unsigned char* iccp_profile;
unsigned iccp_profile_size; /* The size of iccp_profile in bytes */
/*
sBIT chunk: significant bits. Optional metadata, only set this if needed.
If defined, these values give the bit depth of the original data. Since PNG only stores 1, 2, 4, 8 or 16-bit
per channel data, the significant bits value can be used to indicate the original encoded data has another
sample depth, such as 10 or 12.
Encoders using this value, when storing the pixel data, should use the most significant bits
of the data to store the original bits, and use a good sample depth scaling method such as
"left bit replication" to fill in the least significant bits, rather than fill zeroes.
Decoders using this value, if able to work with data that's e.g. 10-bit or 12-bit, should right
shift the data to go back to the original bit depth, but decoders are also allowed to ignore
sbit and work e.g. with the 8-bit or 16-bit data from the PNG directly, since thanks
to the encoder contract, the values encoded in PNG are in valid range for the PNG bit depth.
For grayscale images, sbit_g and sbit_b are not used, and for images that don't use color
type RGBA or grayscale+alpha, sbit_a is not used (it's not used even for palette images with
translucent palette values, or images with color key). The values that are used must be
greater than zero and smaller than or equal to the PNG bit depth.
The color type from the header in the PNG image defines these used and unused fields: if
decoding with a color mode conversion, such as always decoding to RGBA, this metadata still
only uses the color type of the original PNG, and may e.g. lack the alpha channel info
if the PNG was RGB. When encoding with auto_convert (as well as without), also always the
color model defined in info_png.color determines this.
NOTE: enabling sbit can hurt compression, because the encoder can then not always use
auto_convert to choose a more optimal color mode for the data, because the PNG format has
strict requirements for the allowed sbit values in combination with color modes.
For example, setting these fields to 10-bit will force the encoder to keep using a 16-bit per channel
color mode, even if the pixel data would in fact fit in a more efficient 8-bit mode.
*/
unsigned sbit_defined; /*is significant bits given? if not, the values below are unused*/
unsigned sbit_r; /*red or gray component of significant bits*/
unsigned sbit_g; /*green component of significant bits*/
unsigned sbit_b; /*blue component of significant bits*/
unsigned sbit_a; /*alpha component of significant bits*/
/* End of color profile related chunks */
@@ -770,7 +835,11 @@ typedef struct LodePNGEncoderSettings {
const unsigned char* predefined_filters;
/*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
If colortype is 3, PLTE is _always_ created.*/
If colortype is 3, PLTE is always created. If color type is explicitely set
to a grayscale type (1 or 4), this is not done and is ignored. If enabling this,
a palette must be present in the info_png.
NOTE: enabling this may worsen compression if auto_convert is used to choose
optimal color mode, because it cannot use grayscale color modes in this case*/
unsigned force_palette;
#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
/*add LodePNG identifier and version as a text chunk, for debugging*/
@@ -824,8 +893,8 @@ unsigned lodepng_inspect(unsigned* w, unsigned* h,
#endif /*LODEPNG_COMPILE_DECODER*/
/*
Reads one metadata chunk (other than IHDR) of the PNG file and outputs what it
read in the state. Returns error code on failure.
Reads one metadata chunk (other than IHDR, which is handled by lodepng_inspect)
of the PNG file and outputs what it read in the state. Returns error code on failure.
Use lodepng_inspect first with a new state, then e.g. lodepng_chunk_find_const
to find the desired chunk type, and if non null use lodepng_inspect_chunk (with
chunk_pointer - start_of_file as pos).
@@ -1103,7 +1172,7 @@ TODO:
[.] check compatibility with various compilers - done but needs to be redone for every newer version
[X] converting color to 16-bit per channel types
[X] support color profile chunk types (but never let them touch RGB values by default)
[ ] support all public PNG chunk types (almost done except sBIT, sPLT and hIST)
[ ] support all public PNG chunk types (almost done except sPLT and hIST)
[ ] make sure encoder generates no chunks with size > (2^31)-1
[ ] partial decoding (stream processing)
[X] let the "isFullyOpaque" function check color keys and transparent palettes too
@@ -1230,18 +1299,16 @@ The following features are supported by the decoder:
gAMA: RGB gamma correction
iCCP: ICC color profile
sRGB: rendering intent
sBIT: significant bits
1.2. features not supported
---------------------------
The following features are _not_ supported:
The following features are not (yet) supported:
*) some features needed to make a conformant PNG-Editor might be still missing.
*) partial loading/stream processing. All data must be available and is processed in one call.
*) The following public chunks are not (yet) supported but treated as unknown chunks by LodePNG:
sBIT
hIST
sPLT
*) The hIST and sPLT public chunks are not (yet) supported but treated as unknown chunks
2. C and C++ version
@@ -1845,6 +1912,9 @@ symbol.
Not all changes are listed here, the commit history in github lists more:
https://github.com/lvandeve/lodepng
*) 10 apr 2023: faster CRC32 implementation, but with larger lookup table.
*) 13 jun 2022: added support for the sBIT chunk.
*) 09 jan 2022: minor decoder speed improvements.
*) 27 jun 2021: added warnings that file reading/writing functions don't support
wide-character filenames (support for this is not planned, opening files is
not the core part of PNG decoding/decoding and is platform dependent).
@@ -2015,5 +2085,5 @@ Domain: gmail dot com.
Account: lode dot vandevenne.
Copyright (c) 2005-2021 Lode Vandevenne
Copyright (c) 2005-2022 Lode Vandevenne
*/

2662
src/pvpngreader.cpp Normal file

File diff suppressed because it is too large Load Diff

48
src/pvpngreader.h Normal file
View File

@@ -0,0 +1,48 @@
// pngreader.h - Public Domain - see unlicense at bottom of pvpngreader.cpp
#pragma once
#include <stdint.h>
namespace pv_png
{
// PNG color types
enum
{
PNG_COLOR_TYPE_GREYSCALE = 0,
PNG_COLOR_TYPE_TRUECOLOR = 2,
PNG_COLOR_TYPE_PALETTIZED = 3,
PNG_COLOR_TYPE_GREYSCALE_ALPHA = 4,
PNG_COLOR_TYPE_TRUECOLOR_ALPHA = 6
};
// PNG file description
struct png_info
{
uint32_t m_width;
uint32_t m_height;
uint32_t m_num_chans; // The number of channels, factoring in transparency. Ranges from [1-4].
uint32_t m_bit_depth; // PNG ihdr bit depth: 1, 2, 4, 8 or 16
uint32_t m_color_type; // PNG ihdr color type, PNG_COLOR_TYPE_GRAYSCALE etc.
bool m_has_gamma; // true if the PNG file had a GAMA chunk
uint32_t m_gamma_value; // PNG GAMA chunk value, scaled by 100000
bool m_has_trns; // true if the PNG file used colorkey transparency
};
// Retrieved information about the PNG file.
// Returns false on any errors.
bool get_png_info(const void* pImage_buf, size_t buf_size, png_info& info);
// Input parameters:
// pImage_buf, buf_size - pointer to PNG image data
// desired_chans - desired number of output channels. 0=auto, 1=grayscale, 2=grayscale alpha, 3=24bpp RGB, 4=32bpp RGBA
//
// Output parameters:
// width, height - PNG image resolution
// num_chans - actual number of channels in PNG, from [1,4] (factoring in transparency)
//
// Returns nullptr on any errors.
void* load_png(const void* pImage_buf, size_t buf_size, uint32_t desired_chans, uint32_t &width, uint32_t &height, uint32_t& num_chans);
}

215
src/qoi.h
View File

@@ -1,39 +1,16 @@
/*
Copyright (c) 2021, Dominic Szablewski - https://phoboslab.org
SPDX-License-Identifier: MIT
QOI - The "Quite OK Image" format for fast, lossless image compression
Dominic Szablewski - https://phoboslab.org
-- LICENSE: The MIT License(MIT)
Copyright(c) 2021 Dominic Szablewski
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files(the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions :
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-- About
QOI encodes and decodes images in a lossless format. An encoded QOI image is
usually around 10--30% larger than a decently optimized PNG image.
QOI outperforms simpler PNG encoders in compression ratio and performance. QOI
images are typically 20% smaller than PNGs written with stbi_image. Encoding is
25-50x faster and decoding is 3-4x faster than stbi_image or libpng.
QOI encodes and decodes images in a lossless format. Compared to stb_image and
stb_image_write QOI offers 20x-50x faster encoding, 3x-4x faster decoding and
20% better compression.
-- Synopsis
@@ -48,7 +25,7 @@ images are typically 20% smaller than PNGs written with stbi_image. Encoding is
// the input pixel data.
qoi_write("image_new.qoi", rgba_pixels, &(qoi_desc){
.width = 1920,
.height = 1080,
.height = 1080,
.channels = 4,
.colorspace = QOI_SRGB
});
@@ -77,14 +54,14 @@ QOI_NO_STDIO before including this library.
This library uses malloc() and free(). To supply your own malloc implementation
you can define QOI_MALLOC and QOI_FREE before including this library.
This library uses memset() to zero-initialize the index. To supply your own
This library uses memset() to zero-initialize the index. To supply your own
implementation you can define QOI_ZEROARR before including this library.
-- Data Format
A QOI file has a 14 byte header, followed by any number of data "chunks" and 8
zero-bytes to mark the end of the data stream.
A QOI file has a 14 byte header, followed by any number of data "chunks" and an
8-byte end marker.
struct qoi_header_t {
char magic[4]; // magic bytes "qoif"
@@ -94,33 +71,36 @@ struct qoi_header_t {
uint8_t colorspace; // 0 = sRGB with linear alpha, 1 = all channels linear
};
The decoder and encoder start with {r: 0, g: 0, b: 0, a: 255} as the previous
pixel value. Pixels are either encoded as
Images are encoded row by row, left to right, top to bottom. The decoder and
encoder start with {r: 0, g: 0, b: 0, a: 255} as the previous pixel value. An
image is complete when all pixels specified by width * height have been covered.
Pixels are encoded as
- a run of the previous pixel
- an index into an array of previously seen pixels
- a difference to the previous pixel value in r,g,b
- full r,g,b or r,g,b,a values
The color channels are assumed to not be premultiplied with the alpha channel
The color channels are assumed to not be premultiplied with the alpha channel
("un-premultiplied alpha").
A running array[64] (zero-initialized) of previously seen pixel values is
A running array[64] (zero-initialized) of previously seen pixel values is
maintained by the encoder and decoder. Each pixel that is seen by the encoder
and decoder is put into this array at the position formed by a hash function of
the color value. In the encoder, if the pixel value at the index matches the
current pixel, this index position is written to the stream as QOI_OP_INDEX.
current pixel, this index position is written to the stream as QOI_OP_INDEX.
The hash function for the index is:
index_position = (r * 3 + g * 5 + b * 7 + a * 11) % 64
Each chunk starts with a 2- or 8-bit tag, followed by a number of data bits. The
bit length of chunks is divisible by 8 - i.e. all chunks are byte aligned. All
Each chunk starts with a 2- or 8-bit tag, followed by a number of data bits. The
bit length of chunks is divisible by 8 - i.e. all chunks are byte aligned. All
values encoded in these data bits have the most significant bit on the left.
The 8-bit tags have precedence over the 2-bit tags. A decoder must check for the
presence of an 8-bit tag first.
The byte stream is padded with 8 zero-bytes at the end.
The byte stream's end is marked with 7 0x00 bytes followed a single 0x01 byte.
The possible chunks are:
@@ -135,8 +115,11 @@ The possible chunks are:
2-bit tag b00
6-bit index into the color index array: 0..63
A valid encoder must not issue 2 or more consecutive QOI_OP_INDEX chunks to the
same index. QOI_OP_RUN should be used instead.
.- QOI_OP_DIFF -----------.
.- QOI_OP_DIFF -----------.
| Byte[0] |
| 7 6 5 4 3 2 1 0 |
|-------+-----+-----+-----|
@@ -147,14 +130,16 @@ The possible chunks are:
2-bit green channel difference from the previous pixel between -2..1
2-bit blue channel difference from the previous pixel between -2..1
The difference to the current channel values are using a wraparound operation,
The difference to the current channel values are using a wraparound operation,
so "1 - 2" will result in 255, while "255 + 1" will result in 0.
Values are stored as unsigned integers with a bias of 2. E.g. -2 is stored as
Values are stored as unsigned integers with a bias of 2. E.g. -2 is stored as
0 (b00). 1 is stored as 3 (b11).
The alpha value remains unchanged from the previous pixel.
.- QOI_OP_LUMA -------------------------------------.
.- QOI_OP_LUMA -------------------------------------.
| Byte[0] | Byte[1] |
| 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 |
|-------+-----------------+-------------+-----------|
@@ -165,18 +150,20 @@ Values are stored as unsigned integers with a bias of 2. E.g. -2 is stored as
4-bit red channel difference minus green channel difference -8..7
4-bit blue channel difference minus green channel difference -8..7
The green channel is used to indicate the general direction of change and is
encoded in 6 bits. The red and green channels (dr and db) base their diffs off
The green channel is used to indicate the general direction of change and is
encoded in 6 bits. The red and blue channels (dr and db) base their diffs off
of the green channel difference and are encoded in 4 bits. I.e.:
dr_dg = (last_px.r - cur_px.r) - (last_px.g - cur_px.g)
db_dg = (last_px.b - cur_px.b) - (last_px.g - cur_px.g)
dr_dg = (cur_px.r - prev_px.r) - (cur_px.g - prev_px.g)
db_dg = (cur_px.b - prev_px.b) - (cur_px.g - prev_px.g)
The difference to the current channel values are using a wraparound operation,
The difference to the current channel values are using a wraparound operation,
so "10 - 13" will result in 253, while "250 + 7" will result in 1.
Values are stored as unsigned integers with a bias of 32 for the green channel
Values are stored as unsigned integers with a bias of 32 for the green channel
and a bias of 8 for the red and blue channel.
The alpha value remains unchanged from the previous pixel.
.- QOI_OP_RUN ------------.
| Byte[0] |
@@ -187,8 +174,8 @@ and a bias of 8 for the red and blue channel.
2-bit tag b11
6-bit run-length repeating the previous pixel: 1..62
The run-length is stored with a bias of 1. Note that the run-lengths 63 and 64
(b111110 and b111111) are illegal as they are occupied by the QOI_OP_RGB and
The run-length is stored with a bias of -1. Note that the run-lengths 63 and 64
(b111110 and b111111) are illegal as they are occupied by the QOI_OP_RGB and
QOI_OP_RGBA tags.
@@ -203,6 +190,8 @@ QOI_OP_RGBA tags.
8-bit green channel value
8-bit blue channel value
The alpha value remains unchanged from the previous pixel.
.- QOI_OP_RGBA ---------------------------------------------------.
| Byte[0] | Byte[1] | Byte[2] | Byte[3] | Byte[4] |
@@ -216,13 +205,6 @@ QOI_OP_RGBA tags.
8-bit blue channel value
8-bit alpha channel value
The byte stream is padded at the end with 8 zero bytes. Since the longest legal
chunk is 5 bytes (QOI_OP_RGBA), with this padding it is possible to check for an
overrun only once per decode loop iteration. These 0x00 bytes also mark the end
of the data stream, as an encoder should never produce 8 consecutive zero bytes
within the stream.
*/
@@ -236,17 +218,17 @@ Header - Public functions */
extern "C" {
#endif
/* A pointer to a qoi_desc struct has to be supplied to all of qoi's functions.
It describes either the input format (for qoi_write and qoi_encode), or is
/* A pointer to a qoi_desc struct has to be supplied to all of qoi's functions.
It describes either the input format (for qoi_write and qoi_encode), or is
filled with the description read from the file header (for qoi_read and
qoi_decode).
The colorspace in this qoi_desc is an enum where
The colorspace in this qoi_desc is an enum where
0 = sRGB, i.e. gamma scaled RGB channels and a linear alpha channel
1 = all channels are linear
You may use the constants QOI_SRGB or QOI_LINEAR. The colorspace is purely
You may use the constants QOI_SRGB or QOI_LINEAR. The colorspace is purely
informative. It will be saved to the file header, but does not affect
en-/decoding in any way. */
how chunks are en-/decoded. */
#define QOI_SRGB 0
#define QOI_LINEAR 1
@@ -260,11 +242,11 @@ typedef struct {
#ifndef QOI_NO_STDIO
/* Encode raw RGB or RGBA pixels into a QOI image and write it to the file
system. The qoi_desc struct must be filled with the image width, height,
number of channels (3 = RGB, 4 = RGBA) and the colorspace.
/* Encode raw RGB or RGBA pixels into a QOI image and write it to the file
system. The qoi_desc struct must be filled with the image width, height,
number of channels (3 = RGB, 4 = RGBA) and the colorspace.
The function returns 0 on failure (invalid parameters, or fopen or malloc
The function returns 0 on failure (invalid parameters, or fopen or malloc
failed) or the number of bytes written on success. */
int qoi_write(const char *filename, const void *data, const qoi_desc *desc);
@@ -275,7 +257,7 @@ number of channels from the file header is used. If channels is 3 or 4 the
output format will be forced into this number of channels.
The function either returns NULL on failure (invalid data, or malloc or fopen
failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
will be filled with the description from the file header.
The returned pixel data should be free()d after use. */
@@ -287,8 +269,8 @@ void *qoi_read(const char *filename, qoi_desc *desc, int channels);
/* Encode raw RGB or RGBA pixels into a QOI image in memory.
The function either returns NULL on failure (invalid parameters or malloc
failed) or a pointer to the encoded data on success. On success the out_len
The function either returns NULL on failure (invalid parameters or malloc
failed) or a pointer to the encoded data on success. On success the out_len
is set to the size in bytes of the encoded data.
The returned qoi data should be free()d after use. */
@@ -298,8 +280,8 @@ void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len);
/* Decode a QOI image from memory.
The function either returns NULL on failure (invalid parameters or malloc
failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
The function either returns NULL on failure (invalid parameters or malloc
failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
is filled with the description from the file header.
The returned pixel data should be free()d after use. */
@@ -342,21 +324,28 @@ Implementation */
(((unsigned int)'q') << 24 | ((unsigned int)'o') << 16 | \
((unsigned int)'i') << 8 | ((unsigned int)'f'))
#define QOI_HEADER_SIZE 14
#define QOI_PADDING 8
/* 2GB is the max file size that this implementation can safely handle. We guard
against anything larger than that, assuming the worst case with 5 bytes per
pixel, rounded down to a nice clean value. 400 million pixels ought to be
enough for anybody. */
#define QOI_PIXELS_MAX ((unsigned int)400000000)
typedef union {
struct { unsigned char r, g, b, a; } rgba;
unsigned int v;
} qoi_rgba_t;
void qoi_write_32(unsigned char *bytes, int *p, unsigned int v) {
static const unsigned char qoi_padding[8] = {0,0,0,0,0,0,0,1};
static void qoi_write_32(unsigned char *bytes, int *p, unsigned int v) {
bytes[(*p)++] = (0xff000000 & v) >> 24;
bytes[(*p)++] = (0x00ff0000 & v) >> 16;
bytes[(*p)++] = (0x0000ff00 & v) >> 8;
bytes[(*p)++] = (0x000000ff & v);
}
unsigned int qoi_read_32(const unsigned char *bytes, int *p) {
static unsigned int qoi_read_32(const unsigned char *bytes, int *p) {
unsigned int a = bytes[(*p)++];
unsigned int b = bytes[(*p)++];
unsigned int c = bytes[(*p)++];
@@ -376,14 +365,15 @@ void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) {
data == NULL || out_len == NULL || desc == NULL ||
desc->width == 0 || desc->height == 0 ||
desc->channels < 3 || desc->channels > 4 ||
desc->colorspace > 2
desc->colorspace > 1 ||
desc->height >= QOI_PIXELS_MAX / desc->width
) {
return NULL;
}
max_size =
desc->width * desc->height * (desc->channels + 1) +
QOI_HEADER_SIZE + QOI_PADDING;
max_size =
desc->width * desc->height * (desc->channels + 1) +
QOI_HEADER_SIZE + sizeof(qoi_padding);
p = 0;
bytes = (unsigned char *) QOI_MALLOC(max_size);
@@ -408,19 +398,18 @@ void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) {
px_prev.rgba.b = 0;
px_prev.rgba.a = 255;
px = px_prev;
px_len = desc->width * desc->height * desc->channels;
px_end = px_len - desc->channels;
channels = desc->channels;
for (px_pos = 0; px_pos < px_len; px_pos += channels) {
px.rgba.r = pixels[px_pos + 0];
px.rgba.g = pixels[px_pos + 1];
px.rgba.b = pixels[px_pos + 2];
if (channels == 4) {
px = *(qoi_rgba_t *)(pixels + px_pos);
}
else {
px.rgba.r = pixels[px_pos + 0];
px.rgba.g = pixels[px_pos + 1];
px.rgba.b = pixels[px_pos + 2];
px.rgba.a = pixels[px_pos + 3];
}
if (px.v == px_prev.v) {
@@ -456,14 +445,14 @@ void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) {
if (
vr > -3 && vr < 2 &&
vg > -3 && vg < 2 &&
vg > -3 && vg < 2 &&
vb > -3 && vb < 2
) {
bytes[p++] = QOI_OP_DIFF | (vr + 2) << 4 | (vg + 2) << 2 | (vb + 2);
}
else if (
vg_r > -9 && vg_r < 8 &&
vg > -33 && vg < 32 &&
vg_r > -9 && vg_r < 8 &&
vg > -33 && vg < 32 &&
vg_b > -9 && vg_b < 8
) {
bytes[p++] = QOI_OP_LUMA | (vg + 32);
@@ -488,8 +477,8 @@ void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) {
px_prev = px;
}
for (i = 0; i < QOI_PADDING; i++) {
bytes[p++] = 0;
for (i = 0; i < (int)sizeof(qoi_padding); i++) {
bytes[p++] = qoi_padding[i];
}
*out_len = p;
@@ -502,13 +491,13 @@ void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) {
unsigned char *pixels;
qoi_rgba_t index[64];
qoi_rgba_t px;
int px_len, chunks_len, px_pos;
int px_len, chunks_len, px_pos;
int p = 0, run = 0;
if (
data == NULL || desc == NULL ||
(channels != 0 && channels != 3 && channels != 4) ||
size < QOI_HEADER_SIZE + QOI_PADDING
size < QOI_HEADER_SIZE + (int)sizeof(qoi_padding)
) {
return NULL;
}
@@ -522,10 +511,11 @@ void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) {
desc->colorspace = bytes[p++];
if (
desc->width == 0 || desc->height == 0 ||
desc->width == 0 || desc->height == 0 ||
desc->channels < 3 || desc->channels > 4 ||
desc->colorspace > 2 ||
header_magic != QOI_MAGIC
desc->colorspace > 1 ||
header_magic != QOI_MAGIC ||
desc->height >= QOI_PIXELS_MAX / desc->width
) {
return NULL;
}
@@ -546,7 +536,7 @@ void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) {
px.rgba.b = 0;
px.rgba.a = 255;
chunks_len = size - QOI_PADDING;
chunks_len = size - (int)sizeof(qoi_padding);
for (px_pos = 0; px_pos < px_len; px_pos += channels) {
if (run > 0) {
run--;
@@ -587,13 +577,12 @@ void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) {
index[QOI_COLOR_HASH(px) % 64] = px;
}
if (channels == 4) {
*(qoi_rgba_t*)(pixels + px_pos) = px;
}
else {
pixels[px_pos + 0] = px.rgba.r;
pixels[px_pos + 1] = px.rgba.g;
pixels[px_pos + 2] = px.rgba.b;
pixels[px_pos + 0] = px.rgba.r;
pixels[px_pos + 1] = px.rgba.g;
pixels[px_pos + 2] = px.rgba.b;
if (channels == 4) {
pixels[px_pos + 3] = px.rgba.a;
}
}
@@ -616,11 +605,11 @@ int qoi_write(const char *filename, const void *data, const qoi_desc *desc) {
if (!encoded) {
fclose(f);
return 0;
}
}
fwrite(encoded, 1, size, f);
fclose(f);
QOI_FREE(encoded);
return size;
}
@@ -636,6 +625,10 @@ void *qoi_read(const char *filename, qoi_desc *desc, int channels) {
fseek(f, 0, SEEK_END);
size = ftell(f);
if (size <= 0) {
fclose(f);
return NULL;
}
fseek(f, 0, SEEK_SET);
data = QOI_MALLOC(size);
@@ -653,4 +646,4 @@ void *qoi_read(const char *filename, qoi_desc *desc, int channels) {
}
#endif /* QOI_NO_STDIO */
#endif /* QOI_IMPLEMENTATION */
#endif /* QOI_IMPLEMENTATION */

View File

@@ -1,4 +1,4 @@
/* stb_image - v2.27 - public domain image loader - http://nothings.org/stb
/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb
no warranty implied; use at your own risk
Do this:
@@ -48,6 +48,7 @@ LICENSE
RECENT REVISION HISTORY:
2.28 (2023-01-29) many error fixes, security errors, just tons of stuff
2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
2.26 (2020-07-13) many minor fixes
2.25 (2020-02-02) fix warnings
@@ -108,7 +109,7 @@ RECENT REVISION HISTORY:
Cass Everitt Ryamond Barbiero github:grim210
Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus
Josh Tobin Matthew Gregan github:poppolopoppo
Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo
Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
Brad Weinberger Matvey Cherevko github:mosra
@@ -140,7 +141,7 @@ RECENT REVISION HISTORY:
// // ... x = width, y = height, n = # 8-bit components per pixel ...
// // ... replace '0' with '1'..'4' to force that many components per pixel
// // ... but 'n' will always be the number that it would have been if you said 0
// stbi_image_free(data)
// stbi_image_free(data);
//
// Standard parameters:
// int *x -- outputs image width in pixels
@@ -635,7 +636,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
#endif
#endif
#ifdef _MSC_VER
#if defined(_MSC_VER) || defined(__SYMBIAN32__)
typedef unsigned short stbi__uint16;
typedef signed short stbi__int16;
typedef unsigned int stbi__uint32;
@@ -1063,6 +1064,23 @@ static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
}
#endif
// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
static int stbi__addints_valid(int a, int b)
{
if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
return a <= INT_MAX - b;
}
// returns 1 if the product of two signed shorts is valid, 0 on overflow.
static int stbi__mul2shorts_valid(short a, short b)
{
if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
return a >= SHRT_MIN / b;
}
// stbi__err - error
// stbi__errpf - error returning pointer to float
// stbi__errpuc - error returning pointer to unsigned char
@@ -1985,9 +2003,12 @@ static int stbi__build_huffman(stbi__huffman *h, int *count)
int i,j,k=0;
unsigned int code;
// build size list for each symbol (from JPEG spec)
for (i=0; i < 16; ++i)
for (j=0; j < count[i]; ++j)
for (i=0; i < 16; ++i) {
for (j=0; j < count[i]; ++j) {
h->size[k++] = (stbi_uc) (i+1);
if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
}
}
h->size[k] = 0;
// compute actual symbols (from jpeg spec)
@@ -2112,6 +2133,8 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
// convert the huffman code to the symbol id
c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
if(c < 0 || c >= 256) // symbol id out of bounds!
return -1;
STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
// convert the id to a symbol
@@ -2130,6 +2153,7 @@ stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
unsigned int k;
int sgn;
if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
k = stbi_lrot(j->code_buffer, n);
@@ -2144,6 +2168,7 @@ stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
{
unsigned int k;
if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
k = stbi_lrot(j->code_buffer, n);
j->code_buffer = k & ~stbi__bmask[n];
k &= stbi__bmask[n];
@@ -2155,6 +2180,7 @@ stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
{
unsigned int k;
if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
k = j->code_buffer;
j->code_buffer <<= 1;
--j->code_bits;
@@ -2192,8 +2218,10 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
memset(data,0,64*sizeof(data[0]));
diff = t ? stbi__extend_receive(j, t) : 0;
if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
dc = j->img_comp[b].dc_pred + diff;
j->img_comp[b].dc_pred = dc;
if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
data[0] = (short) (dc * dequant[0]);
// decode AC components, see JPEG spec
@@ -2207,6 +2235,7 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
if (r) { // fast-AC path
k += (r >> 4) & 15; // run
s = r & 15; // combined length
if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
j->code_buffer <<= s;
j->code_bits -= s;
// decode into unzigzag'd location
@@ -2246,8 +2275,10 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__
if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
diff = t ? stbi__extend_receive(j, t) : 0;
if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
dc = j->img_comp[b].dc_pred + diff;
j->img_comp[b].dc_pred = dc;
if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
data[0] = (short) (dc * (1 << j->succ_low));
} else {
// refinement scan for DC coefficient
@@ -2282,6 +2313,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
if (r) { // fast-AC path
k += (r >> 4) & 15; // run
s = r & 15; // combined length
if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
j->code_buffer <<= s;
j->code_bits -= s;
zig = stbi__jpeg_dezigzag[k++];
@@ -3102,6 +3134,7 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
sizes[i] = stbi__get8(z->s);
n += sizes[i];
}
if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
L -= 17;
if (tc == 0) {
if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
@@ -3351,6 +3384,28 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
return 1;
}
static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
{
// some JPEGs have junk at end, skip over it but if we find what looks
// like a valid marker, resume there
while (!stbi__at_eof(j->s)) {
int x = stbi__get8(j->s);
while (x == 255) { // might be a marker
if (stbi__at_eof(j->s)) return STBI__MARKER_none;
x = stbi__get8(j->s);
if (x != 0x00 && x != 0xff) {
// not a stuffed zero or lead-in to another marker, looks
// like an actual marker, return it
return x;
}
// stuffed zero has x=0 now which ends the loop, meaning we go
// back to regular scan loop.
// repeated 0xff keeps trying to read the next byte of the marker.
}
}
return STBI__MARKER_none;
}
// decode image to YCbCr format
static int stbi__decode_jpeg_image(stbi__jpeg *j)
{
@@ -3367,25 +3422,22 @@ static int stbi__decode_jpeg_image(stbi__jpeg *j)
if (!stbi__process_scan_header(j)) return 0;
if (!stbi__parse_entropy_coded_data(j)) return 0;
if (j->marker == STBI__MARKER_none ) {
// handle 0s at the end of image data from IP Kamera 9060
while (!stbi__at_eof(j->s)) {
int x = stbi__get8(j->s);
if (x == 255) {
j->marker = stbi__get8(j->s);
break;
}
}
j->marker = stbi__skip_jpeg_junk_at_end(j);
// if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
}
m = stbi__get_marker(j);
if (STBI__RESTART(m))
m = stbi__get_marker(j);
} else if (stbi__DNL(m)) {
int Ld = stbi__get16be(j->s);
stbi__uint32 NL = stbi__get16be(j->s);
if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
m = stbi__get_marker(j);
} else {
if (!stbi__process_marker(j, m)) return 0;
if (!stbi__process_marker(j, m)) return 1;
m = stbi__get_marker(j);
}
m = stbi__get_marker(j);
}
if (j->progressive)
stbi__jpeg_finish(j);
@@ -3976,6 +4028,7 @@ static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int re
unsigned char* result;
stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
if (!j) return stbi__errpuc("outofmem", "Out of memory");
memset(j, 0, sizeof(stbi__jpeg));
STBI_NOTUSED(ri);
j->s = s;
stbi__setup_jpeg(j);
@@ -3989,6 +4042,7 @@ static int stbi__jpeg_test(stbi__context *s)
int r;
stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
if (!j) return stbi__err("outofmem", "Out of memory");
memset(j, 0, sizeof(stbi__jpeg));
j->s = s;
stbi__setup_jpeg(j);
r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
@@ -4014,6 +4068,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
int result;
stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
if (!j) return stbi__err("outofmem", "Out of memory");
memset(j, 0, sizeof(stbi__jpeg));
j->s = s;
result = stbi__jpeg_info_raw(j, x, y, comp);
STBI_FREE(j);
@@ -4256,11 +4311,12 @@ static int stbi__parse_huffman_block(stbi__zbuf *a)
a->zout = zout;
return 1;
}
if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
z -= 257;
len = stbi__zlength_base[z];
if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
z = stbi__zhuffman_decode(a, &a->z_distance);
if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
dist = stbi__zdist_base[z];
if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
@@ -4955,7 +5011,7 @@ STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
STBIDEF void stbi__unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
{
stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
stbi__unpremultiply_on_load_set = 1;
@@ -5064,14 +5120,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (!pal_img_n) {
s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
if (scan == STBI__SCAN_header) return 1;
} else {
// if paletted, then pal_n is our final components, and
// img_n is # components to decompress/filter.
s->img_n = 1;
if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
// if SCAN_header, have to scan to see if we have a tRNS
}
// even with SCAN_header, have to scan to see if we have a tRNS
break;
}
@@ -5103,6 +5158,8 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
has_trans = 1;
// non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
if (z->depth == 16) {
for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
} else {
@@ -5115,7 +5172,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
case STBI__PNG_TYPE('I','D','A','T'): {
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
if (scan == STBI__SCAN_header) {
// header scan definitely stops at first IDAT
if (pal_img_n)
s->img_n = pal_img_n;
return 1;
}
if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
if ((int)(ioff + c.length) < (int)ioff) return 0;
if (ioff + c.length > idata_limit) {
stbi__uint32 idata_limit_old = idata_limit;
@@ -5498,8 +5561,22 @@ static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req
psize = (info.offset - info.extra_read - info.hsz) >> 2;
}
if (psize == 0) {
if (info.offset != s->callback_already_read + (s->img_buffer - s->img_buffer_original)) {
return stbi__errpuc("bad offset", "Corrupt BMP");
// accept some number of extra bytes after the header, but if the offset points either to before
// the header ends or implies a large amount of extra data, reject the file as malformed
int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
int header_limit = 1024; // max we actually read is below 256 bytes currently.
int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
return stbi__errpuc("bad header", "Corrupt BMP");
}
// we established that bytes_read_so_far is positive and sensible.
// the first half of this test rejects offsets that are either too small positives, or
// negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
// ensures the number computed in the second half of the test can't overflow.
if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
return stbi__errpuc("bad offset", "Corrupt BMP");
} else {
stbi__skip(s, info.offset - bytes_read_so_far);
}
}
@@ -7187,12 +7264,12 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
// Run
value = stbi__get8(s);
count -= 128;
if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
for (z = 0; z < count; ++z)
scanline[i++ * 4 + k] = value;
} else {
// Dump
if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
for (z = 0; z < count; ++z)
scanline[i++ * 4 + k] = stbi__get8(s);
}
@@ -7446,10 +7523,17 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req
out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
if (!out) return stbi__errpuc("outofmem", "Out of memory");
stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8));
if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
STBI_FREE(out);
return stbi__errpuc("bad PNM", "PNM file truncated");
}
if (req_comp && req_comp != s->img_n) {
out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
if (ri->bits_per_channel == 16) {
out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
} else {
out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
}
if (out == NULL) return out; // stbi__convert_format frees input on failure
}
return out;
@@ -7486,6 +7570,8 @@ static int stbi__pnm_getinteger(stbi__context *s, char *c)
while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
value = value*10 + (*c - '0');
*c = (char) stbi__get8(s);
if((value > 214748364) || (value == 214748364 && *c > '7'))
return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
}
return value;
@@ -7516,9 +7602,13 @@ static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
stbi__pnm_skip_whitespace(s, &c);
*x = stbi__pnm_getinteger(s, &c); // read width
if(*x == 0)
return stbi__err("invalid width", "PPM image header had zero or overflowing width");
stbi__pnm_skip_whitespace(s, &c);
*y = stbi__pnm_getinteger(s, &c); // read height
if (*y == 0)
return stbi__err("invalid width", "PPM image header had zero or overflowing width");
stbi__pnm_skip_whitespace(s, &c);
maxv = stbi__pnm_getinteger(s, &c); // read max value
@@ -7894,4 +7984,4 @@ AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
------------------------------------------------------------------------------
*/
*/

43920
src/wuffs-v0.3.c Normal file

File diff suppressed because it is too large Load Diff