Kaydet (Commit) f577a422 authored tarafından Daniel Sikeler's avatar Daniel Sikeler Kaydeden (comit) Eike Rathke

GSoC - implement global tokenhandler for odf-tokens

This generates perfect hash for odf-tokens and use them with the tokenhandler.
With added test case to check to and fro mapping between tokens.
This is taken from Daniel's work in feature/fastparser branch.

Change-Id: I7cf77c1eb6c9dd68fd78108c6e0726507c7672e1
Reviewed-on: https://gerrit.libreoffice.org/28073Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
Tested-by: 's avatarEike Rathke <erack@redhat.com>
üst 10c6bef3
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef INCLUDED_XMLOFF_FASTTOKENHANDLER_HXX
#define INCLUDED_XMLOFF_FASTTOKENHANDLER_HXX
#include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
#include <cppuhelper/implbase1.hxx>
#include <sax/fastattribs.hxx>
#include <xmloff/token/tokens.hxx>
#include <rtl/instance.hxx>
#include <xmloff/dllapi.h>
namespace xmloff {
namespace token {
class TokenMap
{
public:
explicit TokenMap();
~TokenMap();
/** Returns the token identifier for the passed Unicode token name. */
sal_Int32 getTokenFromUnicode( const OUString& rUnicodeName ) const;
/** Returns the UTF8 name of the passed token identifier as byte sequence. */
css::uno::Sequence< sal_Int8 > getUtf8TokenName( sal_Int32 nToken ) const
{
SAL_WARN_IF(nToken < 0 || nToken >= XML_TOKEN_COUNT, "xmloff", "Wrong nToken parameter");
if( 0 <= nToken && nToken < XML_TOKEN_COUNT )
return maTokenNames[ nToken ];
return css::uno::Sequence< sal_Int8 >();
}
/** Returns the token identifier for the passed UTF8 token name. */
sal_Int32 getTokenFromUtf8( const css::uno::Sequence< sal_Int8 >& rUtf8Name ) const
{
return getTokenFromUTF8( reinterpret_cast< const char* >(
rUtf8Name.getConstArray() ), rUtf8Name.getLength() );
}
/** Returns the token identifier for a UTF8 string passed in pToken */
sal_Int32 getTokenFromUTF8( const char *pToken, sal_Int32 nLength ) const
{
return getTokenPerfectHash( pToken, nLength );
}
private:
sal_Int32 getTokenPerfectHash( const char *pToken, sal_Int32 nLength ) const;
std::vector< css::uno::Sequence< sal_Int8 > > maTokenNames;
};
struct StaticTokenMap : public rtl::Static< TokenMap, StaticTokenMap > {};
class XMLOFF_DLLPUBLIC FastTokenHandler : public cppu::WeakImplHelper1<
css::xml::sax::XFastTokenHandler >,
public sax_fastparser::FastTokenHandlerBase
{
public:
explicit FastTokenHandler();
virtual ~FastTokenHandler();
// XFastTokenHandler
virtual css::uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken )
throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier )
throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
// Much faster direct C++ shortcut to the method that matters
virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const SAL_OVERRIDE;
private:
TokenMap& mrTokenMap;
};
} // namespace token
} // namespace xmloff
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
$(eval $(call gb_CppunitTest_CppunitTest,xmloff_tokenmap))
$(eval $(call gb_CppunitTest_add_exception_objects,xmloff_tokenmap, \
xmloff/qa/unit/tokenmap-test \
))
$(eval $(call gb_CppunitTest_use_custom_headers,xmloff_tokenmap, \
xmloff/generated \
))
$(eval $(call gb_CppunitTest_use_library_objects,xmloff_tokenmap,xo))
$(eval $(call gb_CppunitTest_use_api,xmloff_tokenmap, \
offapi \
udkapi \
))
$(eval $(call gb_CppunitTest_use_libraries,xmloff_tokenmap, \
basegfx \
comphelper \
cppu \
cppuhelper \
i18nlangtag \
sal \
salhelper \
sax \
svl \
tl \
utl \
vcl \
$(gb_UWINAPI) \
))
# vim: set noet sw=4 ts=4:
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
$(eval $(call gb_CustomTarget_CustomTarget,xmloff/generated))
#Generates a hashtable for the odf-tags
xmloff_SRC := $(SRCDIR)/xmloff/source/token
xmloff_MISC := $(call gb_CustomTarget_get_workdir,xmloff/generated)/misc
xmloff_INC := $(call gb_CustomTarget_get_workdir,xmloff/generated)
xmloff_GENHEADERPATH := $(xmloff_INC)/xmloff/token
$(eval $(call gb_CustomTarget_token_hash,xmloff/generated,tokenhash.inc,tokenhash.gperf))
$(eval $(call gb_CustomTarget_generate_tokens,xmloff/generated,xmloff,xmloff/source/token,tokens,token,tokenhash.gperf))
$(call gb_CustomTarget_get_target,xmloff/generated) : \
$(xmloff_INC)/tokenhash.inc \
$(xmloff_INC)/tokennames.inc \
$(xmloff_GENHEADERPATH)/tokens.hxx \
# vim: set noet sw=4 ts=4:
...@@ -36,6 +36,7 @@ $(eval $(call gb_Library_use_external,xo,boost_headers)) ...@@ -36,6 +36,7 @@ $(eval $(call gb_Library_use_external,xo,boost_headers))
$(eval $(call gb_Library_use_custom_headers,xo,\ $(eval $(call gb_Library_use_custom_headers,xo,\
officecfg/registry \ officecfg/registry \
xmloff/generated \
)) ))
$(eval $(call gb_Library_use_sdk_api,xo)) $(eval $(call gb_Library_use_sdk_api,xo))
...@@ -90,6 +91,7 @@ $(eval $(call gb_Library_add_exception_objects,xo,\ ...@@ -90,6 +91,7 @@ $(eval $(call gb_Library_add_exception_objects,xo,\
xmloff/source/core/DocumentSettingsContext \ xmloff/source/core/DocumentSettingsContext \
xmloff/source/core/DomBuilderContext \ xmloff/source/core/DomBuilderContext \
xmloff/source/core/DomExport \ xmloff/source/core/DomExport \
xmloff/source/core/fasttokenhandler \
xmloff/source/core/ProgressBarHelper \ xmloff/source/core/ProgressBarHelper \
xmloff/source/core/PropertySetMerger \ xmloff/source/core/PropertySetMerger \
xmloff/source/core/RDFaExportHelper \ xmloff/source/core/RDFaExportHelper \
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
$(eval $(call gb_Module_Module,xmloff)) $(eval $(call gb_Module_Module,xmloff))
$(eval $(call gb_Module_add_targets,xmloff,\ $(eval $(call gb_Module_add_targets,xmloff,\
CustomTarget_generated \
Library_xo \ Library_xo \
Library_xof \ Library_xof \
Package_dtd \ Package_dtd \
...@@ -28,6 +29,7 @@ $(eval $(call gb_Module_add_targets,xmloff,\ ...@@ -28,6 +29,7 @@ $(eval $(call gb_Module_add_targets,xmloff,\
$(eval $(call gb_Module_add_check_targets,xmloff,\ $(eval $(call gb_Module_add_check_targets,xmloff,\
$(if $(MERGELIBS),, \ $(if $(MERGELIBS),, \
CppunitTest_xmloff_uxmloff) \ CppunitTest_xmloff_uxmloff) \
CppunitTest_xmloff_tokenmap \
)) ))
$(eval $(call gb_Module_add_subsequentcheck_targets,xmloff,\ $(eval $(call gb_Module_add_subsequentcheck_targets,xmloff,\
......
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <cppunit/TestAssert.h>
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/plugin/TestPlugIn.h>
#include "xmloff/fasttokenhandler.hxx"
#include "xmloff/token/tokens.hxx"
using namespace std;
using namespace com::sun::star::uno;
namespace xmloff {
class TokenmapTest: public CppUnit::TestFixture
{
public:
void test_roundTrip();
CPPUNIT_TEST_SUITE(TokenmapTest);
CPPUNIT_TEST(test_roundTrip);
CPPUNIT_TEST_SUITE_END();
private:
token::TokenMap tokenMap;
};
void TokenmapTest::test_roundTrip()
{
for ( sal_Int32 nToken = 0; nToken < XML_TOKEN_COUNT; ++nToken )
{
// check that the getIdentifier <-> getToken roundtrip works
Sequence< sal_Int8 > rUtf8Name = tokenMap.getUtf8TokenName(nToken);
sal_Int32 ret = tokenMap.getTokenFromUTF8(
reinterpret_cast< const char * >(rUtf8Name.getConstArray()),
rUtf8Name.getLength() );
CPPUNIT_ASSERT_EQUAL(ret, nToken);
}
}
CPPUNIT_TEST_SUITE_REGISTRATION(TokenmapTest);
}
CPPUNIT_PLUGIN_IMPLEMENT();
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "xmloff/fasttokenhandler.hxx"
#include <xmloff/token/tokens.hxx>
namespace xmloff {
namespace {
#if defined __clang__
#if __has_warning("-Wdeprecated-register")
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-register"
#endif
#endif
#include "tokenhash.inc"
#if defined __clang__
#if __has_warning("-Wdeprecated-register")
#pragma GCC diagnostic pop
#endif
#endif
} // namespace
namespace token {
using namespace css;
TokenMap::TokenMap() :
maTokenNames( static_cast< size_t >( XML_TOKEN_COUNT ) )
{
static const sal_Char* sppcTokenNames[] =
{
#include "tokennames.inc"
""
};
const sal_Char* const* ppcTokenName = sppcTokenNames;
for( std::vector< uno::Sequence< sal_Int8 > >::iterator aIt = maTokenNames.begin(), aEnd = maTokenNames.end();
aIt != aEnd; ++aIt, ++ppcTokenName )
{
OString aUtf8Token( *ppcTokenName );
*aIt = uno::Sequence< sal_Int8 >( reinterpret_cast< const sal_Int8* >(
aUtf8Token.getStr() ), aUtf8Token.getLength() );
}
}
TokenMap::~TokenMap()
{
}
sal_Int32 TokenMap::getTokenFromUnicode( const OUString& rUnicodeName ) const
{
OString aUtf8Name = OUStringToOString( rUnicodeName, RTL_TEXTENCODING_UTF8 );
const struct xmltoken* pToken = Perfect_Hash::in_word_set( aUtf8Name.getStr(), aUtf8Name.getLength() );
return pToken ? pToken->nToken : XML_TOKEN_INVALID;
}
sal_Int32 TokenMap::getTokenPerfectHash( const char *pStr, sal_Int32 nLength ) const
{
const struct xmltoken *pToken = Perfect_Hash::in_word_set( pStr, nLength );
return pToken ? pToken->nToken : XML_TOKEN_INVALID;
}
FastTokenHandler::FastTokenHandler() :
mrTokenMap( StaticTokenMap::get() )
{
}
FastTokenHandler::~FastTokenHandler()
{
}
// XFastTokenHandler
uno::Sequence< sal_Int8 > FastTokenHandler::getUTF8Identifier( sal_Int32 nToken )
throw (uno::RuntimeException, std::exception)
{
return mrTokenMap.getUtf8TokenName( nToken );
}
sal_Int32 FastTokenHandler::getTokenFromUTF8( const uno::Sequence< sal_Int8 >& rIdentifier )
throw (uno::RuntimeException, std::exception)
{
return mrTokenMap.getTokenFromUtf8( rIdentifier );
}
// Much faster direct C++ shortcut
sal_Int32 FastTokenHandler::getTokenDirect( const char* pToken, sal_Int32 nLength ) const
{
return mrTokenMap.getTokenFromUTF8( pToken, nLength );
}
} // namespace token
} // namespace xmloff
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef XMLOFF_TOKEN_TOKENS_HXX
#define XMLOFF_TOKEN_TOKENS_HXX
#include <com/sun/star/xml/sax/FastToken.hpp>
namespace xmloff {
const sal_Int32 XML_TOKEN_INVALID = css::xml::sax::FastToken::DONTKNOW;
} // namespace xmloff
#endif
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment