Kaydet (Commit) 1d32c56f authored tarafından Michael Meeks's avatar Michael Meeks

sot: re-work OLE2 offset-to-page computation

The gotcha here is that if we get ahead of ourselves, and read to
the end of the stream, we detect bad chains too early, so instead
incrementally build the page chain cache, which is also quicker
and behaves more similarly to the previous code.
üst c948e655
...@@ -340,38 +340,40 @@ void StgStrm::SetEntry( StgDirEntry& r ) ...@@ -340,38 +340,40 @@ void StgStrm::SetEntry( StgDirEntry& r )
* for this each time build a simple flat in-memory vector list * for this each time build a simple flat in-memory vector list
* of pages. * of pages.
*/ */
bool StgStrm::buildPageChainCache() void StgStrm::scanBuildPageChainCache(sal_Int32 *pOptionalCalcSize)
{ {
if (nSize > 0) if (nSize > 0)
m_aPagesCache.reserve(nSize/nPageSize); m_aPagesCache.reserve(nSize/nPageSize);
bool bError = false;
sal_Int32 nBgn = nStart; sal_Int32 nBgn = nStart;
while (nBgn >= 0) sal_Int32 nOldBgn = -1;
sal_Int32 nOptSize = 0;
while( nBgn >= 0 && nBgn != nOldBgn )
{ {
m_aPagesCache.push_back(nBgn); if( nBgn >= 0 )
sal_Int32 nOldBgn = nBgn; m_aPagesCache.push_back(nBgn);
nBgn = pFat->GetNextPage(nBgn); nOldBgn = nBgn;
if (nBgn == nOldBgn) nBgn = pFat->GetNextPage( nBgn );
return false; if( nBgn == nOldBgn )
bError = true;
nOptSize += nPageSize;
} }
if (bError)
return true; {
if (pOptionalCalcSize)
rIo.SetError( ERRCODE_IO_WRONGFORMAT );
m_aPagesCache.clear();
}
if (pOptionalCalcSize)
*pOptionalCalcSize = nOptSize;
} }
//See fdo#47644 for a .doc with a vast amount of pages where seeking around the
//document takes a colossal amount of time
//
//There's a cost to building a page cache, so only build one if the number of
//pages to seek through hits some sufficiently high value where it's worth it.
#define ARBITRARY_LARGE_AMOUNT_OF_PAGES 8 * 512
// Compute page number and offset for the given byte position. // Compute page number and offset for the given byte position.
// If the position is behind the size, set the stream right // If the position is behind the size, set the stream right
// behind the EOF. // behind the EOF.
sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos ) sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
{ {
sal_Int32 nRel, nBgn;
// Values < 0 seek to the end // Values < 0 seek to the end
if( nBytePos < 0 || nBytePos >= nSize ) if( nBytePos < 0 || nBytePos >= nSize )
nBytePos = nSize; nBytePos = nSize;
...@@ -385,69 +387,59 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos ) ...@@ -385,69 +387,59 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
if( nOld == nNew ) if( nOld == nNew )
return sal_True; return sal_True;
if (m_aPagesCache.empty() && nNew > ARBITRARY_LARGE_AMOUNT_OF_PAGES) // See fdo#47644 for a .doc with a vast amount of pages where seeking around the
// document takes a colossal amount of time
//
// Please Note: we build the pagescache incrementally as we go if necessary,
// so that a corrupted FAT doesn't poison the stream state for earlier reads
size_t nIdx = nNew / nPageSize;
if( nIdx >= m_aPagesCache.size() )
{ {
SAL_WARN("sot", "kicking off large seek helper\n"); // Extend the FAT cache ! ...
buildPageChainCache(); size_t nToAdd = nIdx + 1;
}
if (!m_aPagesCache.empty()) if (m_aPagesCache.empty())
{ m_aPagesCache.push_back( nStart );
size_t nIdx = nNew / nPageSize;
// special case: seek to 1st byte of new, unallocated page nToAdd -= m_aPagesCache.size();
// (in case the file size is a multiple of the page size)
if( nBytePos == nSize && !nOffset && nIdx == m_aPagesCache.size() ) sal_Int32 nBgn = m_aPagesCache.back();
{
nIdx--;
nOffset = nPageSize;
}
if (nIdx < m_aPagesCache.size()) // Start adding pages while we can
while( nToAdd > 0 && nBgn >= 0 )
{ {
nPage = m_aPagesCache[ nIdx ]; nBgn = pFat->GetNextPage( nBgn );
return sal_Bool( nPage >= 0 ); if( nBgn >= 0 )
{
m_aPagesCache.push_back( nBgn );
nToAdd--;
}
} }
} }
if( nNew > nOld ) if ( nIdx > m_aPagesCache.size() )
{ {
// the new position is after the current, so an incremental rIo.SetError( SVSTREAM_FILEFORMAT_ERROR );
// positioning is OK. Set the page relative position nPage = STG_EOF;
nRel = nNew - nOld; nOffset = nPageSize;
nBgn = nPage; return sal_False;
} }
else // special case: seek to 1st byte of new, unallocated page
// (in case the file size is a multiple of the page size)
if( nBytePos == nSize && !nOffset && nIdx > 0 && nIdx == m_aPagesCache.size() )
{ {
// the new position is before the current, so we have to scan nIdx--;
// the entire chain. nOffset = nPageSize;
nRel = nNew;
nBgn = nStart;
} }
// now, traverse the FAT chain. else if ( nIdx == m_aPagesCache.size() )
nRel /= nPageSize;
sal_Int32 nLast = STG_EOF;
while (nRel && nBgn >= 0)
{ {
nLast = nBgn; nPage = STG_EOF;
nBgn = pFat->GetNextPage( nBgn ); return sal_False;
nRel--;
} }
// special case: seek to 1st byte of new, unallocated page nPage = m_aPagesCache[ nIdx ];
// (in case the file size is a multiple of the page size)
if( nBytePos == nSize && nBgn == STG_EOF && !nRel && !nOffset )
nBgn = nLast, nOffset = nPageSize;
if( nBgn < 0 && nBgn != STG_EOF ) return nPage >= 0;
{
rIo.SetError( SVSTREAM_FILEFORMAT_ERROR );
nBgn = STG_EOF;
nOffset = nPageSize;
}
nPage = nBgn;
return sal_Bool( nRel == 0 && nPage >= 0 );
} }
// Retrieve the physical page for a given byte offset. // Retrieve the physical page for a given byte offset.
...@@ -817,10 +809,7 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen ) ...@@ -817,10 +809,7 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen )
{ {
// determine the actual size of the stream by scanning // determine the actual size of the stream by scanning
// the FAT chain and counting the # of pages allocated // the FAT chain and counting the # of pages allocated
bool bOk = buildPageChainCache(); scanBuildPageChainCache( &nSize );
if (!bOk)
rIo.SetError( ERRCODE_IO_WRONGFORMAT );
nSize = m_aPagesCache.size() * nPageSize;
} }
} }
......
...@@ -79,7 +79,7 @@ protected: ...@@ -79,7 +79,7 @@ protected:
short nOffset; // offset into current page short nOffset; // offset into current page
short nPageSize; // logical page size short nPageSize; // logical page size
std::vector<sal_Int32> m_aPagesCache; std::vector<sal_Int32> m_aPagesCache;
bool buildPageChainCache(); void scanBuildPageChainCache(sal_Int32 *pOptionalCalcSize = NULL);
sal_Bool Copy( sal_Int32 nFrom, sal_Int32 nBytes ); sal_Bool Copy( sal_Int32 nFrom, sal_Int32 nBytes );
StgStrm( StgIo& ); StgStrm( StgIo& );
public: public:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment