diff --git a/autotest/gcore/tiff_read.py b/autotest/gcore/tiff_read.py index 01e1d2b0a471..d580b6f70e15 100755 --- a/autotest/gcore/tiff_read.py +++ b/autotest/gcore/tiff_read.py @@ -4447,6 +4447,92 @@ def method(request): gdal.GetDriverByName("GTIFF").Delete(cog_filename) +############################################################################### +# Test GTiffDataset::MultiThreadedRead() when the amount of requested bytes +# exceed the allowed limit. + + +@pytest.mark.require_curl() +@pytest.mark.skipif( + not check_libtiff_internal_or_at_least(4, 0, 11), + reason="libtiff >= 4.0.11 required", +) +def test_tiff_read_vsicurl_multi_threaded_beyond_advise_read_limit(tmp_path): + + webserver_process = None + webserver_port = 0 + + (webserver_process, webserver_port) = webserver.launch( + handler=webserver.DispatcherHttpHandler + ) + if webserver_port == 0: + pytest.skip() + + gdal.VSICurlClearCache() + + tmp_filename = str(tmp_path / "tmp.tif") + gdal.Translate( + tmp_filename, + "data/utmsmall.tif", + options="-co TILED=YES -co COMPRESS=LZW -outsize 1024 0", + ) + ds = gdal.Open(tmp_filename) + expected_data = ds.ReadRaster() + ds = None + + try: + filesize = os.stat(tmp_filename).st_size + handler = webserver.SequentialHandler() + handler.add("HEAD", "/test.tif", 200, {"Content-Length": "%d" % filesize}) + + def method(request): + # sys.stderr.write('%s\n' % str(request.headers)) + + if request.headers["Range"].startswith("bytes="): + rng = request.headers["Range"][len("bytes=") :] + assert len(rng.split("-")) == 2 + start = int(rng.split("-")[0]) + end = int(rng.split("-")[1]) + + request.protocol_version = "HTTP/1.1" + request.send_response(206) + request.send_header("Content-type", "application/octet-stream") + request.send_header( + "Content-Range", "bytes %d-%d/%d" % (start, end, filesize) + ) + request.send_header("Content-Length", end - start + 1) + request.send_header("Connection", "close") + request.end_headers() + with open(tmp_filename, "rb") as f: + f.seek(start, 0) + request.wfile.write(f.read(end - start + 1)) + + for i in range(3): + handler.add("GET", "/test.tif", custom_method=method) + + with webserver.install_http_handler(handler): + with gdaltest.config_options( + { + "GDAL_NUM_THREADS": "2", + "CPL_VSIL_CURL_ALLOWED_EXTENSIONS": ".tif", + "GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR", + "CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT": str( + 2 * filesize // 3 + ), + } + ): + ds = gdal.Open("/vsicurl/http://127.0.0.1:%d/test.tif" % webserver_port) + assert ds is not None, "could not open dataset" + + got_data = ds.ReadRaster() + assert got_data == expected_data + + finally: + webserver.server_stop(webserver_process, webserver_port) + + gdal.VSICurlClearCache() + + ############################################################################### # Check that GetMetadataDomainList() works properly diff --git a/frmts/gtiff/gtiffdataset_read.cpp b/frmts/gtiff/gtiffdataset_read.cpp index e9f52d11f706..9d2dcb8e2931 100644 --- a/frmts/gtiff/gtiffdataset_read.cpp +++ b/frmts/gtiff/gtiffdataset_read.cpp @@ -349,7 +349,10 @@ CPLErr GTiffDataset::ReadCompressedData(const char *pszFormat, int nXOff, struct GTiffDecompressContext { - std::mutex oMutex{}; + // The mutex must be recursive because ThreadDecompressionFuncErrorHandler() + // which acquires the mutex can be called from a section where the mutex is + // already acquired. + std::recursive_mutex oMutex{}; bool bSuccess = true; std::vector aoErrors{}; @@ -416,7 +419,7 @@ static void CPL_STDCALL ThreadDecompressionFuncErrorHandler( { GTiffDecompressContext *psContext = static_cast(CPLGetErrorHandlerUserData()); - std::lock_guard oLock(psContext->oMutex); + std::lock_guard oLock(psContext->oMutex); psContext->aoErrors.emplace_back(eErr, eErrorNum, pszMsg); } @@ -495,7 +498,7 @@ static void CPL_STDCALL ThreadDecompressionFuncErrorHandler( if (psJob->nSize == 0) { { - std::lock_guard oLock(psContext->oMutex); + std::lock_guard oLock(psContext->oMutex); if (!psContext->bSuccess) return; } @@ -616,7 +619,7 @@ static void CPL_STDCALL ThreadDecompressionFuncErrorHandler( if (psContext->bHasPRead) { { - std::lock_guard oLock(psContext->oMutex); + std::lock_guard oLock(psContext->oMutex); if (!psContext->bSuccess) return; @@ -634,7 +637,7 @@ static void CPL_STDCALL ThreadDecompressionFuncErrorHandler( { if (!AllocInputBuffer()) { - std::lock_guard oLock(psContext->oMutex); + std::lock_guard oLock(psContext->oMutex); psContext->bSuccess = false; return; } @@ -647,7 +650,7 @@ static void CPL_STDCALL ThreadDecompressionFuncErrorHandler( static_cast(psJob->nSize), static_cast(psJob->nOffset)); - std::lock_guard oLock(psContext->oMutex); + std::lock_guard oLock(psContext->oMutex); psContext->bSuccess = false; return; } @@ -655,7 +658,7 @@ static void CPL_STDCALL ThreadDecompressionFuncErrorHandler( } else { - std::lock_guard oLock(psContext->oMutex); + std::lock_guard oLock(psContext->oMutex); if (!psContext->bSuccess) return; @@ -808,7 +811,7 @@ static void CPL_STDCALL ThreadDecompressionFuncErrorHandler( if (!bRet) { - std::lock_guard oLock(psContext->oMutex); + std::lock_guard oLock(psContext->oMutex); psContext->bSuccess = false; return; } @@ -1271,6 +1274,9 @@ CPLErr GTiffDataset::MultiThreadedRead(int nXOff, int nYOff, int nXSize, std::vector anSizes(nBlocks); int iJob = 0; int nAdviseReadRanges = 0; + const size_t nAdviseReadTotalBytesLimit = + sContext.poHandle->GetAdviseReadTotalBytesLimit(); + size_t nAdviseReadAccBytes = 0; for (int y = 0; y < nYBlocks; ++y) { for (int x = 0; x < nXBlocks; ++x) @@ -1298,7 +1304,8 @@ CPLErr GTiffDataset::MultiThreadedRead(int nXOff, int nYOff, int nXSize, // false since we could have concurrent uses of the handle, // when when reading the TIFF TileOffsets / TileByteCounts // array - std::lock_guard oLock(sContext.oMutex); + std::lock_guard oLock( + sContext.oMutex); IsBlockAvailable(nBlockId, &asJobs[iJob].nOffset, &asJobs[iJob].nSize); @@ -1314,7 +1321,8 @@ CPLErr GTiffDataset::MultiThreadedRead(int nXOff, int nYOff, int nXSize, { if (nFileSize == 0) { - std::lock_guard oLock(sContext.oMutex); + std::lock_guard oLock( + sContext.oMutex); sContext.poHandle->Seek(0, SEEK_END); nFileSize = sContext.poHandle->Tell(); } @@ -1326,7 +1334,8 @@ CPLErr GTiffDataset::MultiThreadedRead(int nXOff, int nYOff, int nXSize, static_cast(asJobs[iJob].nSize), static_cast(asJobs[iJob].nOffset)); - std::lock_guard oLock(sContext.oMutex); + std::lock_guard oLock( + sContext.oMutex); sContext.bSuccess = false; break; } @@ -1377,6 +1386,48 @@ CPLErr GTiffDataset::MultiThreadedRead(int nXOff, int nYOff, int nXSize, static_cast(std::min( std::numeric_limits::max(), asJobs[iJob].nSize)); + + // If the total number of bytes we must read excess the + // capacity of AdviseRead(), then split the RasterIO() + // request in 2 halves. + if (nAdviseReadTotalBytesLimit > 0 && + anSizes[nAdviseReadRanges] < + nAdviseReadTotalBytesLimit && + anSizes[nAdviseReadRanges] > + nAdviseReadTotalBytesLimit - nAdviseReadAccBytes && + nYBlocks >= 2) + { + const int nYOff2 = + (nBlockYStart + nYBlocks / 2) * m_nBlockYSize; + CPLDebugOnly("GTiff", + "Splitting request (%d,%d,%dx%d) into " + "(%d,%d,%dx%d) and (%d,%d,%dx%d)", + nXOff, nYOff, nXSize, nYSize, nXOff, nYOff, + nXSize, nYOff2 - nYOff, nXOff, nYOff2, + nXSize, nYOff + nYSize - nYOff2); + + asJobs.clear(); + anOffsets.clear(); + anSizes.clear(); + poQueue.reset(); + + CPLErr eErr = MultiThreadedRead( + nXOff, nYOff, nXSize, nYOff2 - nYOff, pData, + eBufType, nBandCount, panBandMap, nPixelSpace, + nLineSpace, nBandSpace); + if (eErr == CE_None) + { + eErr = MultiThreadedRead( + nXOff, nYOff2, nXSize, nYOff + nYSize - nYOff2, + static_cast(pData) + + (nYOff2 - nYOff) * nLineSpace, + eBufType, nBandCount, panBandMap, nPixelSpace, + nLineSpace, nBandSpace); + } + return eErr; + } + nAdviseReadAccBytes += anSizes[nAdviseReadRanges]; + ++nAdviseReadRanges; } diff --git a/port/cpl_vsi_virtual.h b/port/cpl_vsi_virtual.h index e2458e37371e..225e2eded076 100644 --- a/port/cpl_vsi_virtual.h +++ b/port/cpl_vsi_virtual.h @@ -87,6 +87,23 @@ struct CPL_DLL VSIVirtualHandle { } + /** Return the total maximum number of bytes that AdviseRead() can handle + * at once. + * + * Some AdviseRead() implementations may give up if the sum of the values + * in the panSizes[] array provided to AdviseRead() exceeds a limit. + * + * Callers might use that threshold to optimize the efficiency of + * AdviseRead(). + * + * A returned value of 0 indicates a unknown limit. + * @since GDAL 3.9 + */ + virtual size_t GetAdviseReadTotalBytesLimit() const + { + return 0; + } + virtual size_t Write(const void *pBuffer, size_t nSize, size_t nCount) = 0; int Printf(CPL_FORMAT_STRING(const char *pszFormat), ...) diff --git a/port/cpl_vsil_curl.cpp b/port/cpl_vsil_curl.cpp index c8d6640a7100..38d6140529bb 100644 --- a/port/cpl_vsil_curl.cpp +++ b/port/cpl_vsil_curl.cpp @@ -32,9 +32,10 @@ #include #include -#include +#include #include #include +#include #include "cpl_aws.h" #include "cpl_json.h" @@ -3153,6 +3154,21 @@ size_t VSICurlHandle::PRead(void *pBuffer, size_t nSize, return nRet; } +/************************************************************************/ +/* GetAdviseReadTotalBytesLimit() */ +/************************************************************************/ + +size_t VSICurlHandle::GetAdviseReadTotalBytesLimit() const +{ + return static_cast(std::min( + std::numeric_limits::max(), + // 100 MB + std::strtoull( + CPLGetConfigOption("CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT", + "104857600"), + nullptr, 10))); +} + /************************************************************************/ /* AdviseRead() */ /************************************************************************/ @@ -3171,9 +3187,10 @@ void VSICurlHandle::AdviseRead(int nRanges, const vsi_l_offset *panOffsets, // Give up if we need to allocate too much memory vsi_l_offset nMaxSize = 0; + const size_t nLimit = GetAdviseReadTotalBytesLimit(); for (int i = 0; i < nRanges; ++i) { - if (panSizes[i] > 100 * 1024 * 1024 - nMaxSize) + if (panSizes[i] > nLimit - nMaxSize) { CPLDebug(poFS->GetDebugKey(), "Trying to request too many bytes in AdviseRead()"); @@ -3994,7 +4011,10 @@ const char *VSICurlFilesystemHandlerBase::GetActualURL(const char *pszFilename) "default='16384000'/>" \ "