diff --git a/.github/workflows/on_PR_linux_fuzz.yml b/.github/workflows/on_PR_linux_fuzz.yml index 9fb1e5b2..21b3bc71 100644 --- a/.github/workflows/on_PR_linux_fuzz.yml +++ b/.github/workflows/on_PR_linux_fuzz.yml @@ -27,4 +27,4 @@ jobs: run: | cd build mkdir corpus - ./bin/fuzz-read-print-write corpus ../test/data/ -jobs=$(nproc) -workers=$(nproc) -max_total_time=120 -max_len=4096 + ./bin/fuzz-read-print-write corpus ../test/data/ -dict=../fuzz/exiv2.dict -jobs=$(nproc) -workers=$(nproc) -max_total_time=120 -max_len=4096 diff --git a/README.md b/README.md index a8c0a6f3..b47755fa 100644 --- a/README.md +++ b/README.md @@ -1040,12 +1040,31 @@ $ cd /build $ make python_tests 2>&1 | grep FAIL ``` +[TOC](#TOC) +
+ +### 4.5 Test Summary + +| *Tests* | Unix Style Platforms _(bash)_ | Visual Studio _(cmd.exe)_ | +|:-- |:--- |:-- | +| | $ cd \/build | \> cd \/build | +| tests | $ make tests | \> cmake --build . --config Release --target tests | +| bash_tests | $ make bash_tests | \> cmake --build . --config Release --target bash_tests | +| python_tests | $ make python_tests | \> cmake --build . --config Release --target python_tests | +| unit_test | $ make unit_test | \> cmake --build . --config Release --target unit_test | +| version_test | $ make version_test | \> cmake --build . --config Release --target version_test | + +The name **bash_tests** is historical. They are implemented in python. + +[TOC](#TOC) +
+ ### 4.6 Fuzzing The code for the fuzzers is in `exiv2dir/fuzz` To build the fuzzers, use the *cmake* option `-DEXIV2_BUILD_FUZZ_TESTS=ON` and `-DEXIV2_TEAM_USE_SANITIZERS=ON`. -Note that it only works with clang compiler as libFuzzer is integrate with clang > 6.0 +Note that it only works with clang compiler as libFuzzer is integrated with clang > 6.0 To build the fuzzers: @@ -1064,21 +1083,7 @@ mkdir corpus ./bin/fuzz-read-print-write corpus ../test/data/ -jobs=$(nproc) -workers=$(nproc) -max_len=4096 ``` -[TOC](#TOC) -
- -### 4.5 Test Summary - -| *Tests* | Unix Style Platforms _(bash)_ | Visual Studio _(cmd.exe)_ | -|:-- |:--- |:-- | -| | $ cd \/build | \> cd \/build | -| tests | $ make tests | \> cmake --build . --config Release --target tests | -| bash_tests | $ make bash_tests | \> cmake --build . --config Release --target bash_tests | -| python_tests | $ make python_tests | \> cmake --build . --config Release --target python_tests | -| unit_test | $ make unit_test | \> cmake --build . --config Release --target unit_test | -| version_test | $ make version_test | \> cmake --build . --config Release --target version_test | - -The name **bash_tests** is historical. They are implemented in python. +For more information about fuzzing see [`fuzz/README.md`](fuzz/README.md). [TOC](#TOC)
diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 00000000..326c71cf --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,63 @@ +# Exiv2 fuzzing + +This directory contains a [libFuzzer](https://llvm.org/docs/LibFuzzer.html) fuzzing target for Exiv2. The fuzzer is run for a short period of time on every pull request by the [`on_PR_linux_fuzz`](/.github/workflows/on_PR_linux_fuzz.yml) Action. + +## Running the fuzzer + +To run the fuzzer locally, first build it: + +```bash +cd +mkdir build-fuzz +cd build-fuzz +cmake -DEXIV2_ENABLE_PNG=ON -DEXIV2_ENABLE_WEBREADY=ON -DEXIV2_ENABLE_CURL=ON -DEXIV2_ENABLE_BMFF=ON -DEXIV2_TEAM_WARNINGS_AS_ERRORS=ON -DCMAKE_CXX_COMPILER=$(which clang++) -DEXIV2_BUILD_FUZZ_TESTS=ON -DEXIV2_TEAM_USE_SANITIZERS=ON .. +make -j $(nproc) +``` + +This is the command to run the fuzzer for 2 minutes: + +```bash +cd /build-fuzz +mkdir corpus +./bin/fuzz-read-print-write corpus ../test/data/ -dict=../fuzz/exiv2.dict -jobs=$(nproc) -workers=$(nproc) -max_total_time=120 +``` + +Alternatively, a simple script is provided for running the fuzzer in a continuous loop: + +```bash +../fuzz/fuzzloop.sh +``` + +## Generating a dictionary + +Fuzzers perform better with a [dictionary](https://llvm.org/docs/LibFuzzer.html#dictionaries). For example, suppose the code contains a condition like [this](https://github.com/Exiv2/exiv2/blob/15098f4ef50cc721ad0018218acab2ff06e60beb/src/xmpsidecar.cpp#L177-L179): + +```c +if (xmpPacket_.substr(0, 5) != " +codeql database create --language=cpp exiv2db +``` + +Then run the query and convert the results to JSON: + +```bash +codeql query run --database=exiv2db --output=dict.bqrs fuzz/mkdictionary.ql +codeql bqrs decode --format=json --output dict.json dict.bqrs +``` + +Finally, use [`mkdictionary.py`](mkdictionary.py) to convert the JSON to libFuzzer's dictionary format: + +```bash +./fuzz/mkdictionary.py dict.json | sort > ./fuzz/exiv2.dict +``` diff --git a/fuzz/exiv2.dict b/fuzz/exiv2.dict new file mode 100644 index 00000000..197649a1 --- /dev/null +++ b/fuzz/exiv2.dict @@ -0,0 +1,398 @@ +" " +"#" +"*" +"-" +"--" +"/" +"://" +"=:" +"[]" +"\\" +"\\/" +"]" +"%0" +"0" +"0002" +"0x" +"0x1234" +"0xabcd" +"%1" +"1" +"1000" +"100mm F2.8 Macro" +"%2" +"2000" +"20D" +"24 24 1" +"%3" +"3000" +"30D" +"3100" +"3200" +"3300" +"3310" +"3320" +"350D" +"400D" +"8" +"95" +"9999:99:99 00:00:00" +"about" +"ad" +"adjust" +"%ADOBeginClientInjection: PageSetup End \"AI11EPS\"" +"%ADOBeginClientInjection: PageTrailer Start \"AI11EPS\"" +"%ADO_ContainsXMP:" +"%ADO_ContainsXMP: MainFirst" +"%ADO_ContainsXMP:MainFirst" +"%ADO_ContainsXMP: NoMain" +"%ADO_ContainsXMP:NoMain" +"AF-A " +"AF-C " +"AF-S " +"%AI7_Thumbnail:" +"all" +"ALL" +"Alt" +"AOC\x00" +"ascii" +"Bag" +"base64," +"%%BeginData:" +"%%BeginDefaults" +"%%BeginDocument:" +"%%BeginPageSetup" +"%BeginPhotoshop:" +"%%BeginPreview:" +"%%BeginProlog" +"%%BeginResource: procset wCorel" +"%%BeginSetup" +"%begin_xml_code" +"%begin_xml_packet:" +"cano" +"Canon EOS 30D" +"charset=" +"Collection" +"Content-Length" +"%%Creator: Adobe Illustrator" +"crs:RawFileName" +"crs:ToneCurveName" +"crs:Version" +"--curl" +"D50" +"D70" +"Date" +"dc:contributor" +"dc:creator" +"dc:date" +"dc:description" +"dc:format" +"dc:language" +"dc:publisher" +"dc:relation" +"dc:rights" +"dc:subject" +"dc:title" +"dc:type" +"delete" +"delta" +"Description" +"Destination" +"_dflt_" +"_dflt_:" +"dryrun" +"DSLR-A330" +"DSLR-A380" +"dst" +"DT 11-18mm F4.5-5.6" +"E-3 " +"E-30 " +"ele" +"[/EMC pdfmark" +"%%EndComments" +"%%EndData" +"%%EndDefaults" +"%%EndDocument" +"%%EndPageSetup" +"%EndPhotoshop" +"%%EndPreview" +"%%EndProlog" +"%%EndSetup" +"% &&end XMP packet marker&&" +"% &&end XMP packet marker&&" +"Envelope" +"%%EOF" +"ex" +"eXIf" +"Exif" +"Exif." +"Exif.CanonCs.Lens" +"Exif.CanonCs.MaxAperture" +"/exif:Fired" +"/exif:Function" +"Exif.GPSInfo.GPSDateStamp" +"Exif.GPSInfo.GPSTimeStamp" +"Exif.Image.DateTime" +"Exif.Image.ImageLength" +"Exif.Image.ImageWidth" +"Exif.Image.Model" +"Exif.MakerNote.ByteOrder" +"/exif:Mode" +"Exif.Nikon3.Focus" +"Exif.Nikon3.LensType" +"Exif.PanasonicRaw.SensorHeight" +"Exif.PanasonicRaw.SensorWidth" +"Exif.Pentax.Date" +"Exif.PentaxDng.Date" +"Exif.PentaxDng.LensInfo" +"Exif.PentaxDng.Time" +"Exif.Pentax.LensInfo" +"Exif.Pentax.Time" +"Exif.Photo.DateTimeDigitized" +"Exif.Photo.DateTimeOriginal" +"Exif.Photo.ISOSpeedRatings" +"Exif.Photo.PixelXDimension" +"Exif.Photo.PixelYDimension" +"Exif.Photo.SubSecTime" +"Exif.Photo.SubSecTimeDigitized" +"Exif.Photo.SubSecTimeOriginal" +"Exif.Photo.UserComment" +"/exif:RedEyeMode" +"/exif:Return" +"exif:UserComment" +"%Exiv2BeginXMP: After %%PageTrailer" +"%Exiv2BeginXMP: Before %%EndPageSetup" +"%Exiv2Version:" +"%Exiv2Website:" +"extract" +"f" +"false" +"FALSE" +"fc" +"FFF" +"fi" +"fixcom" +"fixcomment" +"fixiso" +"FLIR" +"Focus" +":format" +"FUJIFILM" +"--group" +"Groups" +"HasExtendedXMP" +":height" +"--help" +"help" +"Hex" +"hex-ai7thumbnail-pnm" +"hex-irb" +"http" +"http://" +"-http1_0" +"http://ns.adobe.com/camera-raw-settings/1.0/" +"http://ns.adobe.com/exif/1.0/" +"http://ns.adobe.com/exif/1.0/aux/" +"http://ns.adobe.com/pdf/1.3/" +"http://ns.adobe.com/photoshop/1.0/" +"http://ns.adobe.com/png/1.0/" +"http://ns.adobe.com/StockPhoto/1.0/" +"http://ns.adobe.com/tiff/1.0/" +"http://ns.adobe.com/x" +"http://ns.adobe.com/xap/1.0/" +"http://ns.adobe.com/xap/1.0/g/" +"http://ns.adobe.com/xap/1.0/g/img/" +"http://ns.adobe.com/xap/1.0/mm/" +"http://ns.adobe.com/xap/1.0/sType/Font#" +"http://ns.adobe.com/xap/1.0/t/" +"http://ns.adobe.com/xap/1.0/t/pg/" +"http://ns.adobe.com/xmp/note/" +"http://purl.org/dc/1.1/" +"http://purl.org/dc/elements/1.1/" +"iCCP" +"ID" +"IEND" +"IHDR" +"II" +"ILCE-6000" +":image" +"Image" +"Image2" +"image/jpeg" +".ImageLength" +"image/tiff" +".ImageWidth" +"image/x-canon-cr2" +"image/x-portable-anymap" +"image/x-wmf" +"in" +"%%IncludeDocument:" +"insert" +"Interoperability" +"InteroperabilityVersion" +"Iop" +"Iptc" +"Iptc.0x1234.0xabcd" +"Iptc.Envelope.CharacterSet" +"Iptc.Envelope.Destination" +"iTXt" +"iX:changes" +"JPEG" +"K236" +"Kiss Digital N" +"Kiss Digital X" +"lang=" +"%%LanguageLevel:" +"%%LanguageLevel: 1" +"%%LanguageLevel:1" +"[last()" +"lat" +"--lint" +"Literal" +"lon" +"Makernote" +"MakerNote" +"mime\x00application/rdf+xml" +"mime\x00xmp" +"MM" +"mo" +"Model" +"modify" +"MPF" +"mv" +"[/NamespacePop pdfmark" +"[{nextImage} 1 dict begin /Metadata {photoshop_metadata_stream} def currentdict end /PUT pdfmark" +"Nikon" +"NIKON D" +"Nikon\x00" +"no" +"--nocurl" +"NULL" +"off" +"OLYMPUS\x00II" +"on" +"%%Page:" +"%%Pages:" +"%%PageTrailer" +"pdf:BaseURL" +"pdf:Creator" +"pdf:ModDate" +"pdf:PDFVersion" +"pdf:Producer" +"PENTAX *ist DL" +"PENTAX K100D" +"PENTAX K-3" +"PENTAX \x00" +"Photoshop 3.0" +"photoshop:ICCProfile" +"pr" +"print" +"prof" +"%!PS-Adobe-3.0 EPSF-3.0" +"QVC\x00\x00\x00" +"Raw profile type APP1" +"Raw profile type exif" +"Raw profile type iptc" +"Raw profile type xmp" +"rdf:" +"rdf:about" +"rdf:aboutEach" +"rdf:aboutEachPrefix" +"rdf:Alt" +"rdf:Bag" +"rdf:bagID" +"rdf:datatype" +"rdf:Description" +"rdf:ID" +"rdf:li" +"rdf:nodeID" +"rdf:parseType" +"rdf:RDF" +"rdf:resource" +"rdf:Seq" +"rdf:type" +"rdf:value" +"REBEL XT" +"REBEL XTi" +"Ref" +"rename" +"Resource" +"rICC" +"rm" +"Seq" +"Sigma" +"SLT-A77V" +"Sony1" +"SONY DSC \x00\x00\x00" +".StripOffsets" +"Struct" +"t" +"tEXt" +".tga" +".TGA" +"this is a little teAppending this on the end" +"this is a little test of MemIo" +"Thumbnail" +"tiff:Artist" +"tiff:Copyright" +"tiff:ImageDescription" +".TileOffsets" +"time" +"%%Trailer" +"trkpt" +"trkseg" +"true" +"TRUE" +"TRUEVISION-XFILE" +"TTW" +"type=" +"type=\"Alt\"" +"type=\"Bag\"" +"type=\"Seq\"" +"type=\"Struct\"" +"tz" +"UserComment" +"uuid:" +"verbose" +"--version" +"version" +"--version-test" +":width" +"&#x" +"\x00MRM" +" \x09" +"\x1B%G" +"\xC2\x85\xC3\x80\xC2\xB6\xC2\x87\xC2\x82\x0F\x11\xC3\xA0\xC2\x81\x11\xC3\xB4\xC3\x8EF+jH" +"\xC2\xBEz\xC3\x8F\xC3\x8B\xC2\x97\xC2\xA9B\xC3\xA8\xC2\x9Cq\xC2\x99\xC2\x94\xC2\x91\xC3\xA3\xC2\xAF\xC2\xAC" +"\xC3\xAA\xC3\xB4+^\x1C\xC2\x98K\xC2\x88\xC2\xB9\xC3\xBB\xC2\xB7\xC3\x9C@nM\x16" +"\xC3\xAF\xC2\xBB\xC2\xBF" +"\xC3\xBE\xC3\xBF" +"\xC3\xBF\xC3\xBE" +"x-default" +"printStructure(buffer, Exiv2::kpsNone); + image->printStructure(buffer, Exiv2::kpsBasic); + image->printStructure(buffer, Exiv2::kpsXMP); + image->printStructure(buffer, Exiv2::kpsRecursive); + image->printStructure(buffer, Exiv2::kpsIccProfile); + image->printStructure(buffer, Exiv2::kpsIptcErase); image->writeMetadata(); diff --git a/fuzz/fuzzloop.sh b/fuzz/fuzzloop.sh new file mode 100755 index 00000000..07a57102 --- /dev/null +++ b/fuzz/fuzzloop.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Stop when an error is found +set -e + +# The memory and disk usage grows over time, so this loop restarts the +# fuzzer every 4 hours. The `-merge=1` option is used to minimize the +# corpus on each iteration. +while : +do + date + echo restarting loop + + # Minimize the corpus + mv corpus/ corpus2 + mkdir corpus + echo minimizing corpus + ./bin/fuzz-read-print-write -merge=1 corpus ../test/data/ corpus2/ + rm -r corpus2 + + # Run the fuzzer for 4 hours + date + echo start fuzzer + ./bin/fuzz-read-print-write corpus -dict=../fuzz/exiv2.dict -jobs=$(nproc) -workers=$(nproc) -max_total_time=14400 +done diff --git a/fuzz/mkdictionary.py b/fuzz/mkdictionary.py new file mode 100755 index 00000000..9a8000ab --- /dev/null +++ b/fuzz/mkdictionary.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +# Utility for generating a fuzzing dictionary for Exiv2. +# See README.md (in this directory) for more information. + +import sys +import json + +# If `c` is not a printable ASCII character then escape it. +# For example, a zero byte is converted to '\x00'. +def escapeChar(c): + if c == '\\': + return '\\\\' + elif c == '"': + return '\\"' + elif c.isascii() and c.isprintable(): + return c + else: + return '\\x{:02X}'.format(ord(c)) + +def escapeString(str): + return ''.join(map(lambda c: escapeChar(chr(c)), bytes(str, 'utf-8'))) + +if len(sys.argv) < 2: + print("usage: mkdict.py dict.json") + sys.exit(1) + +f = open(sys.argv[1], 'r') +dict_json = json.loads(f.read()) +tuples = dict_json["#select"]["tuples"] +for r in tuples: + print('"' + escapeString(r[0]) + '"') diff --git a/fuzz/mkdictionary.ql b/fuzz/mkdictionary.ql new file mode 100644 index 00000000..fff8b1d7 --- /dev/null +++ b/fuzz/mkdictionary.ql @@ -0,0 +1,23 @@ +// Utility for generating a fuzzing dictionary for Exiv2. +// See README.md (in this directory) for more information. + +import cpp +import semmle.code.cpp.dataflow.DataFlow + +predicate parser_string(string s, StringLiteral l) { + s = l.getValue() and + exists(FunctionCall call, string fcnName | + DataFlow::localExprFlow(l, call.getAChild+()) and + fcnName = call.getTarget().getName() + | + fcnName.matches("%cmp%") or + fcnName.matches("%find%") or + fcnName = "startsWith" or + fcnName = "operator==" or + fcnName = "operator!=" + ) +} + +from string s +where parser_string(s, _) +select s diff --git a/fuzz/qlpack.yml b/fuzz/qlpack.yml new file mode 100644 index 00000000..6b3a0aa3 --- /dev/null +++ b/fuzz/qlpack.yml @@ -0,0 +1,3 @@ +name: exiv2-cpp-fuzz-queries +version: 0.0.0 +libraryPathDependencies: codeql-cpp diff --git a/src/image.cpp b/src/image.cpp index cde7d4a0..e5a182c2 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -377,7 +377,7 @@ namespace Exiv2 { // Break for unknown tag types else we may segfault. if ( !typeValid(type) ) { - std::cerr << "invalid type in tiff structure" << type << std::endl; + EXV_ERROR << "invalid type in tiff structure" << type << std::endl; start = 0; // break from do loop throw Error(kerInvalidTypeValue); } diff --git a/src/jpgimage.cpp b/src/jpgimage.cpp index 5b89409e..d111e45a 100644 --- a/src/jpgimage.cpp +++ b/src/jpgimage.cpp @@ -752,9 +752,11 @@ namespace Exiv2 { break; start++; } +#ifdef EXIV2_DEBUG_MESSAGES if (start < max) std::cout << " FFF start = " << start << std::endl; // << " index = " << pFFF->dwIndexOff << std::endl; +#endif } if (bPS) {