Kaydet (Commit) e5345f62 authored tarafından Tobias Lippert's avatar Tobias Lippert Kaydeden (comit) Caolán McNamara

Unittest the whitespace stripping logic for tox text generation

The separate class allows to unittest this functionality in isoloation.

Change-Id: I1e5eddfb455ca85a662ea38c03302883decc5d58
Reviewed-on: https://gerrit.libreoffice.org/9608Tested-by: 's avatarCaolán McNamara <caolanm@redhat.com>
Reviewed-by: 's avatarCaolán McNamara <caolanm@redhat.com>
üst 60272bb3
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This file contains the unit test definition for class in the sources/core/tox subfolder
# The macro which defines the main method is contained in test_ToxWhitespaceStripper.cxx
$(eval $(call gb_CppunitTest_CppunitTest,sw_tox_test))
$(eval $(call gb_CppunitTest_add_exception_objects,sw_tox_test, \
sw/qa/cppunit/tox/test_ToxWhitespaceStripper \
))
$(eval $(call gb_CppunitTest_use_libraries,sw_tox_test, \
comphelper \
cppu \
cppuhelper \
sal \
svt \
sw \
test \
unotest \
vcl \
tl \
utl \
$(gb_UWINAPI) \
))
$(eval $(call gb_CppunitTest_use_externals,sw_tox_test, \
boost_headers \
libxml2 \
))
$(eval $(call gb_CppunitTest_use_api,sw_tox_test,\
offapi \
udkapi \
))
$(eval $(call gb_CppunitTest_set_include,sw_tox_test,\
-I$(SRCDIR)/sw/inc \
-I$(SRCDIR)/sw/source/core/inc \
$$(INCLUDE) \
))
# vim: set noet sw=4 ts=4:
...@@ -389,6 +389,7 @@ $(eval $(call gb_Library_add_exception_objects,sw,\ ...@@ -389,6 +389,7 @@ $(eval $(call gb_Library_add_exception_objects,sw,\
sw/source/core/tox/toxhlp \ sw/source/core/tox/toxhlp \
sw/source/core/tox/txmsrt \ sw/source/core/tox/txmsrt \
sw/source/core/tox/ToxTextGenerator \ sw/source/core/tox/ToxTextGenerator \
sw/source/core/tox/ToxWhitespaceStripper \
sw/source/core/txtnode/SwGrammarContact \ sw/source/core/txtnode/SwGrammarContact \
sw/source/core/txtnode/atrfld \ sw/source/core/txtnode/atrfld \
sw/source/core/txtnode/atrflyin \ sw/source/core/txtnode/atrflyin \
......
...@@ -46,6 +46,10 @@ $(eval $(call gb_Module_add_targets,sw,\ ...@@ -46,6 +46,10 @@ $(eval $(call gb_Module_add_targets,sw,\
endif endif
$(eval $(call gb_Module_add_check_targets,sw,\
CppunitTest_sw_tox \
))
$(eval $(call gb_Module_add_slowcheck_targets,sw,\ $(eval $(call gb_Module_add_slowcheck_targets,sw,\
CppunitTest_sw_uwriter \ CppunitTest_sw_uwriter \
CppunitTest_sw_htmlexport \ CppunitTest_sw_htmlexport \
......
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef TOXWHITESPACESTRIPPER_HXX_
#define TOXWHITESPACESTRIPPER_HXX_
#include "rtl/ustring.hxx"
#include <vector>
namespace sw {
/** This class helps to remove unwanted whitespaces from a string to use in a Tox.
*
* The new string will have
* - Newlines changed to spaces
* - Consecutive spaces merged
* - Trailing spaces removed
*
* It also allows to find the corresponding new positions of the input string in the stripped string.
* This is important for attributes which might have to be imported, e.g., it helps to answer the question:
* The 3rd character of the input string is subscript, which character in the output string is that?
*
* @note One leading whitespace is preserved.
*/
class SAL_DLLPUBLIC ToxWhitespaceStripper {
public:
ToxWhitespaceStripper(const OUString&);
sal_Int32
GetPositionInStrippedString(sal_Int32 pos) const;
OUString
GetStrippedString() const;
private:
OUString mStripped;
std::vector<sal_Int32> mNewPositions;
};
} // end namespace sw
#endif /* TOXWHITESPACESTRIPPER_HXX_ */
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <stdexcept>
#include <sal/types.h>
#include <rtl/ustring.hxx>
#include <ToxWhitespaceStripper.hxx>
#include <cppunit/TestAssert.h>
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/plugin/TestPlugIn.h>
using namespace sw;
class ToxWhitespaceStripperTest : public CppUnit::TestFixture
{
void
MappingCharactersToVariousStrippedStringsWorks();
void
StrippingWhitespacesFromVariousStringsWorks();
void
PositionAfterStringCanBeRequested();
CPPUNIT_TEST_SUITE(ToxWhitespaceStripperTest);
CPPUNIT_TEST(MappingCharactersToVariousStrippedStringsWorks);
CPPUNIT_TEST(StrippingWhitespacesFromVariousStringsWorks);
CPPUNIT_TEST(PositionAfterStringCanBeRequested);
CPPUNIT_TEST_SUITE_END();
};
void
ToxWhitespaceStripperTest::MappingCharactersToVariousStrippedStringsWorks()
{
{
OUString test("abc\n");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
}
{
OUString test("abc\n\n");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
}
{
OUString test("abc\ndef");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(4));
CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(5));
CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(6));
}
{
// 012345 6789
OUString test(" abc \ndef");
// 01234567
// " abc def"
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(3));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(5));
CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(6));
CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(7));
CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(8));
CPPUNIT_ASSERT_EQUAL(7, sut.GetPositionInStrippedString(9));
}
}
void
ToxWhitespaceStripperTest::StrippingWhitespacesFromVariousStringsWorks()
{
{
OUString test("abc\n");
OUString expected("abc");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test("abc\n\n");
OUString expected("abc");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test("abc\ndef");
OUString expected("abc def");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test(" abc \ndef");
OUString expected(" abc def");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test(" ");
OUString expected("");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test("d ");
OUString expected("d");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
}
void
ToxWhitespaceStripperTest::PositionAfterStringCanBeRequested()
{
OUString test("abc");
ToxWhitespaceStripper sut(test);
sal_Int32 expected = test.getLength();
CPPUNIT_ASSERT_EQUAL(expected, sut.GetPositionInStrippedString(test.getLength()));
}
// Put the test suite in the registry
CPPUNIT_TEST_SUITE_REGISTRATION(ToxWhitespaceStripperTest);
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "fmtpdsc.hxx" #include "fmtpdsc.hxx"
#include "DocumentSettingManager.hxx" #include "DocumentSettingManager.hxx"
#include "SwStyleNameMapper.hxx" #include "SwStyleNameMapper.hxx"
#include "ToxWhitespaceStripper.hxx"
#include "editeng/tstpitem.hxx" #include "editeng/tstpitem.hxx"
#include "editeng/lrspitem.hxx" #include "editeng/lrspitem.hxx"
...@@ -49,32 +50,6 @@ struct LinkStruct ...@@ -49,32 +50,6 @@ struct LinkStruct
nEndTextPos(nEnd) {} nEndTextPos(nEnd) {}
}; };
/// Generate String with newlines changed to spaces, consecutive spaces changed
/// to a single space, and trailing space removed.
OUString lcl_RemoveLineBreaks(const OUString &rRet)
{
if (rRet.isEmpty())
return rRet;
sal_Int32 nOffset = 0;
OUStringBuffer sRet(rRet.replace('\n', ' '));
for (sal_Int32 i = 1; i < sRet.getLength(); ++i)
{
if ( sRet[i - 1] == ' ' && sRet[i] == ' ' )
{
nOffset += 1;
}
else
{
sRet[i - nOffset] = sRet[i];
}
}
if (sRet[sRet.getLength() - 1] == ' ')
{
nOffset += 1;
}
return sRet.copy(0, sRet.getLength() - nOffset).toString();
}
/// Generate String according to the Form and remove the /// Generate String according to the Form and remove the
/// special characters 0-31 and 255. /// special characters 0-31 and 255.
static OUString lcl_GetNumString( const SwTOXSortTabBase& rBase, bool bUsePrefix, sal_uInt8 nLevel ) static OUString lcl_GetNumString( const SwTOXSortTabBase& rBase, bool bUsePrefix, sal_uInt8 nLevel )
...@@ -144,8 +119,8 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB ...@@ -144,8 +119,8 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB
case TOKEN_ENTRY_TEXT: case TOKEN_ENTRY_TEXT:
{ {
SwIndex aIdx( pTOXNd, std::min(pTOXNd->GetTxt().getLength(),rTxt.getLength()) ); SwIndex aIdx( pTOXNd, std::min(pTOXNd->GetTxt().getLength(),rTxt.getLength()) );
rBase.FillText( *pTOXNd, aIdx ); ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
rTxt = lcl_RemoveLineBreaks(rTxt); pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
} }
break; break;
...@@ -153,10 +128,9 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB ...@@ -153,10 +128,9 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB
{ {
// for TOC numbering // for TOC numbering
rTxt += lcl_GetNumString( rBase, true, MAXLEVEL ); rTxt += lcl_GetNumString( rBase, true, MAXLEVEL );
SwIndex aIdx( pTOXNd, rTxt.getLength() ); SwIndex aIdx( pTOXNd, rTxt.getLength() );
rBase.FillText( *pTOXNd, aIdx ); ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
rTxt = lcl_RemoveLineBreaks(rTxt); pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
} }
break; break;
......
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "ToxWhitespaceStripper.hxx"
#include "rtl/ustrbuf.hxx"
#include <boost/numeric/conversion/cast.hpp>
namespace sw {
ToxWhitespaceStripper::ToxWhitespaceStripper(const OUString& inputString)
{
OUStringBuffer buffer;
bool lastCharacterWasWhitespace = false;
for (sal_Int32 pos = 0; pos < inputString.getLength(); ++pos) {
sal_Unicode cur = inputString[pos];
if (cur == ' ' || cur == '\n') {
// merge consecutive whitespaces (and translate them to spaces)
if (!lastCharacterWasWhitespace) {
buffer.append(' ');
}
lastCharacterWasWhitespace = true;
}
else {
buffer.append(cur);
lastCharacterWasWhitespace = false;
}
mNewPositions.push_back(buffer.getLength()-1);
}
// Add one position if the position after the stripped string is requested, e.g., for attributes which
// extend beyond the string.
mNewPositions.push_back(buffer.getLength());
// strip the last whitespace (if there was one)
if (lastCharacterWasWhitespace) {
buffer.truncate(buffer.getLength() - 1);
}
mStripped = buffer.getStr();
}
sal_Int32
ToxWhitespaceStripper::GetPositionInStrippedString(sal_Int32 pos) const
{
size_t upos = boost::numeric_cast<size_t>(pos);
return mNewPositions.at(upos);
}
OUString
ToxWhitespaceStripper::GetStrippedString() const
{
return mStripped;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment