From 5b948d1cc49e944521ebb7e0b2df5eb529d4a714 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 18 Apr 2024 17:03:52 +0200 Subject: [PATCH] GTiff: MultiThreadedRead(): make it take into account AdviseRead() limit to reduce the number of I/O requests Fixes #9682 --- autotest/gcore/tiff_read.py | 86 +++++++++++++++++++++++++++++++ frmts/gtiff/gtiffdataset_read.cpp | 45 ++++++++++++++++ 2 files changed, 131 insertions(+) diff --git a/autotest/gcore/tiff_read.py b/autotest/gcore/tiff_read.py index 01e1d2b0a471..d580b6f70e15 100755 --- a/autotest/gcore/tiff_read.py +++ b/autotest/gcore/tiff_read.py @@ -4447,6 +4447,92 @@ def method(request): gdal.GetDriverByName("GTIFF").Delete(cog_filename) +############################################################################### +# Test GTiffDataset::MultiThreadedRead() when the amount of requested bytes +# exceed the allowed limit. + + +@pytest.mark.require_curl() +@pytest.mark.skipif( + not check_libtiff_internal_or_at_least(4, 0, 11), + reason="libtiff >= 4.0.11 required", +) +def test_tiff_read_vsicurl_multi_threaded_beyond_advise_read_limit(tmp_path): + + webserver_process = None + webserver_port = 0 + + (webserver_process, webserver_port) = webserver.launch( + handler=webserver.DispatcherHttpHandler + ) + if webserver_port == 0: + pytest.skip() + + gdal.VSICurlClearCache() + + tmp_filename = str(tmp_path / "tmp.tif") + gdal.Translate( + tmp_filename, + "data/utmsmall.tif", + options="-co TILED=YES -co COMPRESS=LZW -outsize 1024 0", + ) + ds = gdal.Open(tmp_filename) + expected_data = ds.ReadRaster() + ds = None + + try: + filesize = os.stat(tmp_filename).st_size + handler = webserver.SequentialHandler() + handler.add("HEAD", "/test.tif", 200, {"Content-Length": "%d" % filesize}) + + def method(request): + # sys.stderr.write('%s\n' % str(request.headers)) + + if request.headers["Range"].startswith("bytes="): + rng = request.headers["Range"][len("bytes=") :] + assert len(rng.split("-")) == 2 + start = int(rng.split("-")[0]) + end = int(rng.split("-")[1]) + + request.protocol_version = "HTTP/1.1" + request.send_response(206) + request.send_header("Content-type", "application/octet-stream") + request.send_header( + "Content-Range", "bytes %d-%d/%d" % (start, end, filesize) + ) + request.send_header("Content-Length", end - start + 1) + request.send_header("Connection", "close") + request.end_headers() + with open(tmp_filename, "rb") as f: + f.seek(start, 0) + request.wfile.write(f.read(end - start + 1)) + + for i in range(3): + handler.add("GET", "/test.tif", custom_method=method) + + with webserver.install_http_handler(handler): + with gdaltest.config_options( + { + "GDAL_NUM_THREADS": "2", + "CPL_VSIL_CURL_ALLOWED_EXTENSIONS": ".tif", + "GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR", + "CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT": str( + 2 * filesize // 3 + ), + } + ): + ds = gdal.Open("/vsicurl/http://127.0.0.1:%d/test.tif" % webserver_port) + assert ds is not None, "could not open dataset" + + got_data = ds.ReadRaster() + assert got_data == expected_data + + finally: + webserver.server_stop(webserver_process, webserver_port) + + gdal.VSICurlClearCache() + + ############################################################################### # Check that GetMetadataDomainList() works properly diff --git a/frmts/gtiff/gtiffdataset_read.cpp b/frmts/gtiff/gtiffdataset_read.cpp index d4f4226527a4..9d2dcb8e2931 100644 --- a/frmts/gtiff/gtiffdataset_read.cpp +++ b/frmts/gtiff/gtiffdataset_read.cpp @@ -1274,6 +1274,9 @@ CPLErr GTiffDataset::MultiThreadedRead(int nXOff, int nYOff, int nXSize, std::vector anSizes(nBlocks); int iJob = 0; int nAdviseReadRanges = 0; + const size_t nAdviseReadTotalBytesLimit = + sContext.poHandle->GetAdviseReadTotalBytesLimit(); + size_t nAdviseReadAccBytes = 0; for (int y = 0; y < nYBlocks; ++y) { for (int x = 0; x < nXBlocks; ++x) @@ -1383,6 +1386,48 @@ CPLErr GTiffDataset::MultiThreadedRead(int nXOff, int nYOff, int nXSize, static_cast(std::min( std::numeric_limits::max(), asJobs[iJob].nSize)); + + // If the total number of bytes we must read excess the + // capacity of AdviseRead(), then split the RasterIO() + // request in 2 halves. + if (nAdviseReadTotalBytesLimit > 0 && + anSizes[nAdviseReadRanges] < + nAdviseReadTotalBytesLimit && + anSizes[nAdviseReadRanges] > + nAdviseReadTotalBytesLimit - nAdviseReadAccBytes && + nYBlocks >= 2) + { + const int nYOff2 = + (nBlockYStart + nYBlocks / 2) * m_nBlockYSize; + CPLDebugOnly("GTiff", + "Splitting request (%d,%d,%dx%d) into " + "(%d,%d,%dx%d) and (%d,%d,%dx%d)", + nXOff, nYOff, nXSize, nYSize, nXOff, nYOff, + nXSize, nYOff2 - nYOff, nXOff, nYOff2, + nXSize, nYOff + nYSize - nYOff2); + + asJobs.clear(); + anOffsets.clear(); + anSizes.clear(); + poQueue.reset(); + + CPLErr eErr = MultiThreadedRead( + nXOff, nYOff, nXSize, nYOff2 - nYOff, pData, + eBufType, nBandCount, panBandMap, nPixelSpace, + nLineSpace, nBandSpace); + if (eErr == CE_None) + { + eErr = MultiThreadedRead( + nXOff, nYOff2, nXSize, nYOff + nYSize - nYOff2, + static_cast(pData) + + (nYOff2 - nYOff) * nLineSpace, + eBufType, nBandCount, panBandMap, nPixelSpace, + nLineSpace, nBandSpace); + } + return eErr; + } + nAdviseReadAccBytes += anSizes[nAdviseReadRanges]; + ++nAdviseReadRanges; }