Kaydet (Commit) 9612bdbf authored tarafından Matúš Kukan's avatar Matúš Kukan

fastparser: implementation using two threads

Instead of calling methods directly, generate EventList - vector of
Events, where arguments for the callee are stored.

Change-Id: I227a0ef3038566664ac8f294770152c8b445997b
üst 2df047bf
...@@ -28,6 +28,7 @@ $(eval $(call gb_Library_use_libraries,fastsax,\ ...@@ -28,6 +28,7 @@ $(eval $(call gb_Library_use_libraries,fastsax,\
cppu \ cppu \
cppuhelper \ cppuhelper \
sal \ sal \
salhelper \
sax \ sax \
$(gb_UWINAPI) \ $(gb_UWINAPI) \
)) ))
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <osl/diagnose.h> #include <osl/diagnose.h>
#include <rtl/ustrbuf.hxx> #include <rtl/ustrbuf.hxx>
#include <salhelper/thread.hxx>
#include <com/sun/star/lang/DisposedException.hpp> #include <com/sun/star/lang/DisposedException.hpp>
#include <com/sun/star/xml/sax/SAXParseException.hpp> #include <com/sun/star/xml/sax/SAXParseException.hpp>
...@@ -61,6 +62,25 @@ struct NamespaceDefine ...@@ -61,6 +62,25 @@ struct NamespaceDefine
NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {} NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
}; };
class ParserThread: public salhelper::Thread
{
FastSaxParser *mpParser;
public:
ParserThread(FastSaxParser *pParser): Thread("Parser"), mpParser(pParser) {}
private:
virtual void execute()
{
try
{
mpParser->parse();
}
catch (const SAXParseException& e)
{
mpParser->produce(Event( CallbackType::EXCEPTION ));
}
}
};
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// FastLocatorImpl // FastLocatorImpl
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -167,31 +187,65 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) ...@@ -167,31 +187,65 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
ParserData::ParserData() Event::Event(const CallbackType& t): maType(t)
{}
Event::Event(const CallbackType& t, const OUString& sChars): Event(t)
{ {
msChars = sChars;
} }
ParserData::~ParserData() Event::Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
const OUString& aElementName, FastAttributeList *pAttributes): Event(t)
{ {
mnElementToken = nElementToken;
maNamespace = aNamespace;
maElementName = aElementName;
mpAttributes = rtl::Reference< FastAttributeList >(pAttributes);
} }
Event::~Event()
{}
// --------------------------------------------------------------------
ParserData::ParserData()
{}
ParserData::~ParserData()
{}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
Entity::Entity( const ParserData& rData ) : Entity::Entity( const ParserData& rData ) :
ParserData( rData ) ParserData( rData )
{ {
// performance-improvement. Reference is needed when calling the startTag callback. mpProducedEvents = 0;
// Handing out the same object with every call is allowed (see sax-specification) }
mxAttributes.set( new FastAttributeList( mxTokenHandler ) );
Entity::Entity( const Entity& e ) :
ParserData( e )
,maStructSource(e.maStructSource)
,mpParser(e.mpParser)
,maConverter(e.maConverter)
,maSavedException(e.maSavedException)
,maNamespaceStack(e.maNamespaceStack)
,maContextStack(e.maContextStack)
,maNamespaceCount(e.maNamespaceCount)
,maNamespaceDefines(e.maNamespaceDefines)
{
mpProducedEvents = 0;
} }
Entity::~Entity() Entity::~Entity()
{ {
} }
void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace, void Entity::startElement( Event *pEvent )
const OUString& aElementName, FastAttributeList *pAttributes )
{ {
const sal_Int32& nElementToken = pEvent->mnElementToken.get();
const OUString& aNamespace = pEvent->maNamespace.get();
const OUString& aElementName = pEvent->maElementName.get();
Reference< XFastContextHandler > xParentContext; Reference< XFastContextHandler > xParentContext;
if( !maContextStack.empty() ) if( !maContextStack.empty() )
{ {
...@@ -207,7 +261,7 @@ void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace, ...@@ -207,7 +261,7 @@ void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace,
try try
{ {
Reference< XFastAttributeList > xAttr( pAttributes ); Reference< XFastAttributeList > xAttr( pEvent->mpAttributes.get().get() );
Reference< XFastContextHandler > xContext; Reference< XFastContextHandler > xContext;
if( nElementToken == FastToken::DONTKNOW ) if( nElementToken == FastToken::DONTKNOW )
{ {
...@@ -495,6 +549,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx ...@@ -495,6 +549,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef ); XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef );
pushEntity( entity ); pushEntity( entity );
Entity& rEntity = getEntity();
try try
{ {
// start the document // start the document
...@@ -505,7 +560,24 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx ...@@ -505,7 +560,24 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
entity.mxDocumentHandler->startDocument(); entity.mxDocumentHandler->startDocument();
} }
parse(); rtl::Reference<ParserThread> xParser;
xParser = new ParserThread(this);
xParser->launch();
bool done = false;
do {
rEntity.maEventsPushed.wait();
rEntity.maEventsPushed.reset();
MutexGuard aGuard(rEntity.maEventProtector);
while (!rEntity.maPendingEvents.empty())
{
EventList *pEventList = rEntity.maPendingEvents.front();
rEntity.maPendingEvents.pop();
if (!consume(pEventList))
done = true;
}
} while (!done);
xParser->join();
deleteUsedEvents();
// finish document // finish document
if( entity.mxDocumentHandler.is() ) if( entity.mxDocumentHandler.is() )
...@@ -673,6 +745,88 @@ OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int3 ...@@ -673,6 +745,88 @@ OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int3
} // namespace } // namespace
void FastSaxParser::deleteUsedEvents()
{
Entity& rEntity = getEntity();
while (!rEntity.maUsedEvents.empty())
{
EventList *pEventList = rEntity.maUsedEvents.front();
rEntity.maUsedEvents.pop();
delete pEventList;
}
}
void FastSaxParser::produce(const Event& aEvent)
{
Entity& rEntity = getEntity();
if (!rEntity.mpProducedEvents)
{
rEntity.mpProducedEvents = new EventList();
rEntity.mpProducedEvents->reserve(rEntity.mnEventListSize);
}
rEntity.mpProducedEvents->push_back( aEvent );
if (aEvent->maType == CallbackType::DONE ||
aEvent->maType == CallbackType::EXCEPTION ||
rEntity.mpProducedEvents->size() == rEntity.mnEventListSize)
{
MutexGuard aGuard(rEntity.maEventProtector);
rEntity.maPendingEvents.push(rEntity.mpProducedEvents);
rEntity.mpProducedEvents = 0;
deleteUsedEvents();
rEntity.maEventsPushed.set();
}
}
bool FastSaxParser::consume(EventList *pEventList)
{
Entity& rEntity = getEntity();
bool bIsParserFinished = false;
for (EventList::iterator aEventIt = pEventList->begin();
aEventIt != pEventList->end(); ++aEventIt)
{
switch ((*aEventIt).maType)
{
case CallbackType::START_ELEMENT:
rEntity.startElement( &(*aEventIt) );
break;
case CallbackType::END_ELEMENT:
rEntity.endElement();
break;
case CallbackType::CHARACTERS:
rEntity.characters( (*aEventIt).msChars.get() );
break;
case CallbackType::DONE:
bIsParserFinished = true;
assert(aEventIt+1 == pEventList->end());
break;
case CallbackType::EXCEPTION:
assert( rEntity.maSavedException.hasValue() );
// Error during parsing !
XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
OUString sSystemId = mxDocumentLocator->getSystemId();
sal_Int32 nLine = mxDocumentLocator->getLineNumber();
SAXParseException aExcept(
lclGetErrorMessage( xmlE, sSystemId, nLine ),
Reference< XInterface >(),
Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ),
mxDocumentLocator->getPublicId(),
mxDocumentLocator->getSystemId(),
mxDocumentLocator->getLineNumber(),
mxDocumentLocator->getColumnNumber()
);
// error handler is set, it may throw the exception
if( rEntity.mxErrorHandler.is() )
rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
throw aExcept;
}
}
rEntity.maUsedEvents.push(pEventList);
return !bIsParserFinished;
}
// starts parsing with actual parser ! // starts parsing with actual parser !
void FastSaxParser::parse() void FastSaxParser::parse()
{ {
...@@ -720,6 +874,7 @@ void FastSaxParser::parse() ...@@ -720,6 +874,7 @@ void FastSaxParser::parse()
} }
} }
while( nRead > 0 ); while( nRead > 0 );
produce(Event( CallbackType::DONE ));
} }
//------------------------------------------ //------------------------------------------
...@@ -741,14 +896,13 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char ...@@ -741,14 +896,13 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() ); rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
} }
rEntity.mxAttributes->clear();
// create attribute map and process namespace instructions // create attribute map and process namespace instructions
sal_Int32 nNameLen, nPrefixLen; sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName; const XML_Char *pName;
const XML_Char *pPrefix; const XML_Char *pPrefix;
OUString sNamespace; OUString sNamespace;
sal_Int32 nNamespaceToken = FastToken::DONTKNOW; sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
FastAttributeList *pAttributes = new FastAttributeList( rEntity.mxTokenHandler );
if (!rEntity.maNamespaceStack.empty()) if (!rEntity.maNamespaceStack.empty())
{ {
sNamespace = rEntity.maNamespaceStack.top().msName; sNamespace = rEntity.maNamespaceStack.top().msName;
...@@ -796,9 +950,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char ...@@ -796,9 +950,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{ {
sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen ); sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW ) if( nAttributeToken != FastToken::DONTKNOW )
rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] ); pAttributes->add( nAttributeToken, awAttributes[i+1] );
else else
rEntity.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ), pAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
OString(pName, nNameLen), awAttributes[i+1] ); OString(pName, nNameLen), awAttributes[i+1] );
} }
} }
...@@ -808,9 +962,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char ...@@ -808,9 +962,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{ {
sal_Int32 nAttributeToken = GetToken( pName, nNameLen ); sal_Int32 nAttributeToken = GetToken( pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW ) if( nAttributeToken != FastToken::DONTKNOW )
rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] ); pAttributes->add( nAttributeToken, awAttributes[i+1] );
else else
rEntity.mxAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] ); pAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
} }
} }
} }
...@@ -832,8 +986,8 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char ...@@ -832,8 +986,8 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
} }
rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) ); rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
rEntity.startElement( nElementToken, sNamespace, produce(Event( CallbackType::START_ELEMENT, nElementToken, sNamespace,
OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), rEntity.mxAttributes.get() ); OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), pAttributes ));
} }
catch (const Exception& e) catch (const Exception& e)
{ {
...@@ -852,13 +1006,13 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* ) ...@@ -852,13 +1006,13 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* )
if( !rEntity.maNamespaceStack.empty() ) if( !rEntity.maNamespaceStack.empty() )
rEntity.maNamespaceStack.pop(); rEntity.maNamespaceStack.pop();
rEntity.endElement(); produce(Event( CallbackType::END_ELEMENT ));
} }
void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen ) void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
{ {
getEntity().characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) ); produce(Event( CallbackType::CHARACTERS, OUString(s, nLen, RTL_TEXTENCODING_UTF8) ));
} }
void FastSaxParser::callbackEntityDecl( void FastSaxParser::callbackEntityDecl(
......
...@@ -20,11 +20,13 @@ ...@@ -20,11 +20,13 @@
#ifndef _SAX_FASTPARSER_HXX_ #ifndef _SAX_FASTPARSER_HXX_
#define _SAX_FASTPARSER_HXX_ #define _SAX_FASTPARSER_HXX_
#include <queue>
#include <vector> #include <vector>
#include <stack> #include <stack>
#include <boost/optional.hpp> #include <boost/optional.hpp>
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp> #include <boost/unordered_map.hpp>
#include <osl/conditn.hxx>
#include <rtl/ref.hxx> #include <rtl/ref.hxx>
#include <com/sun/star/xml/sax/XFastContextHandler.hpp> #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
#include <com/sun/star/xml/sax/XFastDocumentHandler.hpp> #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
...@@ -43,6 +45,7 @@ ...@@ -43,6 +45,7 @@
namespace sax_fastparser { namespace sax_fastparser {
struct Event;
class FastLocatorImpl; class FastLocatorImpl;
struct NamespaceDefine; struct NamespaceDefine;
...@@ -59,6 +62,24 @@ struct NameWithToken ...@@ -59,6 +62,24 @@ struct NameWithToken
msName(sName), mnToken(nToken) {} msName(sName), mnToken(nToken) {}
}; };
typedef std::vector<Event> EventList;
enum CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION };
struct Event {
boost::optional< OUString > msChars;
boost::optional< sal_Int32 > mnElementToken;
boost::optional< OUString > maNamespace;
boost::optional< OUString > maElementName;
boost::optional< rtl::Reference< FastAttributeList > > mpAttributes;
CallbackType maType;
Event(const CallbackType& t);
Event(const CallbackType& t, const OUString& sChars);
Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
const OUString& aElementName, FastAttributeList *pAttributes);
~Event();
};
// -------------------------------------------------------------------- // --------------------------------------------------------------------
struct SaxContext struct SaxContext
...@@ -86,13 +107,24 @@ struct ParserData ...@@ -86,13 +107,24 @@ struct ParserData
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Entity binds all information needed for a single file // Entity binds all information needed for a single file | single call of parseStream
struct Entity : public ParserData struct Entity : public ParserData
{ {
// Amount of work producer sends to consumer in one iteration:
static const size_t mnEventListSize = 1000;
// unique for each Entity instance:
EventList *mpProducedEvents;
std::queue< EventList * > maPendingEvents;
std::queue< EventList * > maUsedEvents;
osl::Mutex maEventProtector;
osl::Condition maEventsPushed;
// copied in copy constructor:
::com::sun::star::xml::sax::InputSource maStructSource; ::com::sun::star::xml::sax::InputSource maStructSource;
XML_Parser mpParser; XML_Parser mpParser;
::sax_expatwrap::XMLFile2UTFConverter maConverter; ::sax_expatwrap::XMLFile2UTFConverter maConverter;
::rtl::Reference< FastAttributeList > mxAttributes;
// Exceptions cannot be thrown through the C-XmlParser (possible resource leaks), // Exceptions cannot be thrown through the C-XmlParser (possible resource leaks),
// therefore the exception must be saved somewhere. // therefore the exception must be saved somewhere.
...@@ -108,9 +140,9 @@ struct Entity : public ParserData ...@@ -108,9 +140,9 @@ struct Entity : public ParserData
::std::vector< NamespaceDefineRef > maNamespaceDefines; ::std::vector< NamespaceDefineRef > maNamespaceDefines;
explicit Entity( const ParserData& rData ); explicit Entity( const ParserData& rData );
Entity( const Entity& rEntity );
~Entity(); ~Entity();
void startElement( sal_Int32 nElementToken, const OUString& aNamespace, void startElement( Event *pEvent );
const OUString& aElementName, FastAttributeList *pAttributes );
void characters( const OUString& sChars ); void characters( const OUString& sChars );
void endElement(); void endElement();
}; };
...@@ -155,9 +187,12 @@ public: ...@@ -155,9 +187,12 @@ public:
inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); } inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); }
inline void popEntity() { maEntities.pop(); } inline void popEntity() { maEntities.pop(); }
Entity& getEntity() { return maEntities.top(); } Entity& getEntity() { return maEntities.top(); }
void parse();
void produce( const Event& );
private: private:
void parse(); bool consume(EventList *);
void deleteUsedEvents();
sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 ); sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 );
sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException); sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException);
...@@ -173,8 +208,7 @@ private: ...@@ -173,8 +208,7 @@ private:
void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ); void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen );
private: private:
::osl::Mutex maMutex; osl::Mutex maMutex; ///< Protecting whole parseStream() execution
::rtl::Reference< FastLocatorImpl > mxDocumentLocator; ::rtl::Reference< FastLocatorImpl > mxDocumentLocator;
NamespaceMap maNamespaceMap; NamespaceMap maNamespaceMap;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment