Kaydet (Commit) f15a69bd authored tarafından Miklos Vajna's avatar Miklos Vajna

vcl PDF import: don't assume larger offset -> newer trailer

Usually when the PDF file contains incremental updates the updates are
appended at the end of the document. But this is not required, the
various trailers can be in any order. Make sure that we look at the last
trailer (logically last, not the one with the largest file offset) when
looking for pages.

Change-Id: Idcd85a7c6bbf08c9436dd73933d79cdb683f482c
Reviewed-on: https://gerrit.libreoffice.org/36527Reviewed-by: 's avatarMiklos Vajna <vmiklos@collabora.co.uk>
Tested-by: 's avatarJenkins <ci@libreoffice.org>
üst df556aa4
...@@ -308,6 +308,10 @@ class VCL_DLLPUBLIC PDFDocument ...@@ -308,6 +308,10 @@ class VCL_DLLPUBLIC PDFDocument
std::map<size_t, PDFObjectElement*> m_aIDObjects; std::map<size_t, PDFObjectElement*> m_aIDObjects;
/// List of xref offsets we know. /// List of xref offsets we know.
std::vector<size_t> m_aStartXRefs; std::vector<size_t> m_aStartXRefs;
/// Offsets of trailers, from latest to oldest.
std::vector<size_t> m_aTrailerOffsets;
/// Trailer offset <-> Trailer pointer map.
std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
/// List of EOF offsets we know. /// List of EOF offsets we know.
std::vector<size_t> m_aEOFs; std::vector<size_t> m_aEOFs;
PDFTrailerElement* m_pTrailer; PDFTrailerElement* m_pTrailer;
......
...@@ -112,11 +112,14 @@ class PDFTrailerElement : public PDFElement ...@@ -112,11 +112,14 @@ class PDFTrailerElement : public PDFElement
{ {
PDFDocument& m_rDoc; PDFDocument& m_rDoc;
std::map<OString, PDFElement*> m_aDictionary; std::map<OString, PDFElement*> m_aDictionary;
/// Location of the end of the trailer token.
sal_uInt64 m_nOffset = 0;
public: public:
explicit PDFTrailerElement(PDFDocument& rDoc); explicit PDFTrailerElement(PDFDocument& rDoc);
bool Read(SvStream& rStream) override; bool Read(SvStream& rStream) override;
PDFElement* Lookup(const OString& rDictionaryKey); PDFElement* Lookup(const OString& rDictionaryKey);
sal_uInt64 GetLocation() const;
}; };
XRefEntry::XRefEntry() XRefEntry::XRefEntry()
...@@ -1176,6 +1179,11 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s ...@@ -1176,6 +1179,11 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
else if (aKeyword == "trailer") else if (aKeyword == "trailer")
{ {
auto pTrailer = new PDFTrailerElement(*this); auto pTrailer = new PDFTrailerElement(*this);
// Make it possible to find this trailer later by offset.
pTrailer->Read(rStream);
m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
// When reading till the first EOF token only, remember // When reading till the first EOF token only, remember
// just the first trailer token. // just the first trailer token.
if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer) if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
...@@ -1261,7 +1269,13 @@ bool PDFDocument::Read(SvStream& rStream) ...@@ -1261,7 +1269,13 @@ bool PDFDocument::Read(SvStream& rStream)
PDFNumberElement* pPrev = nullptr; PDFNumberElement* pPrev = nullptr;
if (m_pTrailer) if (m_pTrailer)
{
pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev")); pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
// Remember the offset of this trailer in the correct order. It's
// possible that newer trailers don't have a larger offset.
m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
}
else if (m_pXRefStream) else if (m_pXRefStream)
pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev")); pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
if (pPrev) if (pPrev)
...@@ -1788,8 +1802,20 @@ std::vector<PDFObjectElement*> PDFDocument::GetPages() ...@@ -1788,8 +1802,20 @@ std::vector<PDFObjectElement*> PDFDocument::GetPages()
std::vector<PDFObjectElement*> aRet; std::vector<PDFObjectElement*> aRet;
PDFReferenceElement* pRoot = nullptr; PDFReferenceElement* pRoot = nullptr;
if (m_pTrailer)
pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
PDFTrailerElement* pTrailer = nullptr;
if (!m_aTrailerOffsets.empty())
{
// Get access to the latest trailer, and work with the keys of that
// one.
auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
if (it != m_aOffsetTrailers.end())
pTrailer = it->second;
}
if (pTrailer)
pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
else if (m_pXRefStream) else if (m_pXRefStream)
pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root")); pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
...@@ -2085,8 +2111,9 @@ PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc) ...@@ -2085,8 +2111,9 @@ PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
{ {
} }
bool PDFTrailerElement::Read(SvStream& /*rStream*/) bool PDFTrailerElement::Read(SvStream& rStream)
{ {
m_nOffset = rStream.Tell();
return true; return true;
} }
...@@ -2098,6 +2125,10 @@ PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey) ...@@ -2098,6 +2125,10 @@ PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey); return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
} }
sal_uInt64 PDFTrailerElement::GetLocation() const
{
return m_nOffset;
}
double PDFNumberElement::GetValue() const double PDFNumberElement::GetValue() const
{ {
......
...@@ -395,6 +395,7 @@ void PDFSigningTest::testTokenize() ...@@ -395,6 +395,7 @@ void PDFSigningTest::testTokenize()
"noeol.pdf", "noeol.pdf",
// File that's intentionally smaller than 1024 bytes. // File that's intentionally smaller than 1024 bytes.
"small.pdf", "small.pdf",
"tdf107149.pdf",
}; };
for (const auto& rName : aNames) for (const auto& rName : aNames)
...@@ -403,6 +404,11 @@ void PDFSigningTest::testTokenize() ...@@ -403,6 +404,11 @@ void PDFSigningTest::testTokenize()
vcl::filter::PDFDocument aDocument; vcl::filter::PDFDocument aDocument;
// Just make sure the tokenizer finishes without an error, don't look at the signature. // Just make sure the tokenizer finishes without an error, don't look at the signature.
CPPUNIT_ASSERT(aDocument.Read(aStream)); CPPUNIT_ASSERT(aDocument.Read(aStream));
OUString aNoPages("tdf107149.pdf");
if (aNoPages == rName)
// This failed, page list was empty.
CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aDocument.GetPages().size());
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment