Kaydet (Commit) 042725a5 authored tarafından Stephan Bergmann's avatar Stephan Bergmann

Stick to a single O[U]String hash function

8f8bc0dc "Move string hash function into String
class" had introduced a new getHash64 that, besides returning sal_uInt64 instead
of just sal_Int32, didn't do sampling of only a handful of characters, but
always computed the hash over all characters (as the usage in SfxItemSet and
SdPage appears to require for either performance or approximated correctness).

However, it would be advantageous to keep the stable URE interface as small as
possible.  Now, O(1) sampling was apparently considered state of the art when
the rtl string classes were first created, closely copying java.lang.String,
which at that time demanded sampling for hashCode(), too---but never sampling
more than 15 characters, with the obvious (in hindsight, at least) performance
catastrophes, so they changed it to O(n) somewhere along the way.

Based on that, this commit changes the existing hash functions to not do
sampling any more, and removes the newly introduced -64 variants again.  (Where
the extended value range of sal_uInt64 compared to sal_Int32 was hopefully not
vital to the existing uses.)

The old implementation used sampling only for strings of length >= 256, so I did
a "make check" build with an instrumented hash function that flagged all uses
with inputs of length >= 256, and grepped workdir/{Cppunit,Junit,Python}Test for
hits.  Of the 2849 hits encountered, 2845 where in the range from 256 to 295
characters, and only the remaining four where of 2472 characters.  Those four
were from CppunitTest_sc_subsequent_filters_test, importing long text into a
cell, causing ScDocumentImport::setStringCell to call
svl::SharedStringPool::intern, which internally uses an unordered_set.  These
results appear to justify the change.

Change-Id: I78fcc3b0f07389bdf36a21701b95a1ff0a0d970f
üst f2529fc7
...@@ -277,24 +277,6 @@ SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_str_hashCode( ...@@ -277,24 +277,6 @@ SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_str_hashCode(
SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_str_hashCode_WithLength( SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_str_hashCode_WithLength(
const sal_Char * str, sal_Int32 len ) SAL_THROW_EXTERN_C(); const sal_Char * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
/** Return a hash code (64bit) for a string.
It is not allowed to store the hash code persistently, because later
versions could return other hash codes.
@param str
a string. Need not be null-terminated, but must be at least as long as
the specified len.
@param len
the length of the string.
@return
a hash code for the given string.
*/
SAL_DLLPUBLIC sal_uInt64 SAL_CALL rtl_str_hashCode64_WithLength(
const sal_Char * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
/** Search for the first occurrence of a character within a string. /** Search for the first occurrence of a character within a string.
The string must be null-terminated. The string must be null-terminated.
......
...@@ -891,21 +891,6 @@ public: ...@@ -891,21 +891,6 @@ public:
return !( literal == rStr ); return !( literal == rStr );
} }
/**
Returns a 64bit hash of the string data.
This hashes the entire data, while hashCode would do sampling for larger string sizes.
@return a hash code value of the string data
@see hashCode() for simple hashes
@since LibreOffice 4.3
*/
sal_uInt64 hashCode64() const SAL_THROW(())
{
return rtl_str_hashCode64_WithLength( pData->buffer, pData->length );
}
/** /**
Returns a hashcode for this string. Returns a hashcode for this string.
......
...@@ -551,24 +551,6 @@ SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_ustr_hashCode( ...@@ -551,24 +551,6 @@ SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_ustr_hashCode(
SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_ustr_hashCode_WithLength( SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_ustr_hashCode_WithLength(
const sal_Unicode * str, sal_Int32 len ) SAL_THROW_EXTERN_C(); const sal_Unicode * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
/** Return a hash code (64bit) for a string.
It is not allowed to store the hash code persistently, because later
versions could return other hash codes.
@param str
a string. Need not be null-terminated, but must be at least as long as
the specified len.
@param len
the length of the string.
@return
a hash code for the given string.
*/
SAL_DLLPUBLIC sal_uInt64 SAL_CALL rtl_ustr_hashCode64_WithLength(
const sal_Unicode * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
/** Search for the first occurrence of a character within a string. /** Search for the first occurrence of a character within a string.
The string must be null-terminated. The string must be null-terminated.
......
...@@ -1226,21 +1226,6 @@ public: ...@@ -1226,21 +1226,6 @@ public:
return !string.equalsAsciiL( literal, internal::ConstCharArrayDetector< T, void >::size - 1 ); return !string.equalsAsciiL( literal, internal::ConstCharArrayDetector< T, void >::size - 1 );
} }
/**
Returns a 64bit hash of the string data.
This hashes the entire data, while hashCode would do sampling for larger string sizes.
@return a hash code value of the string data
@see hashCode() for simple hashes
@since LibreOffice 4.3
*/
sal_uInt64 hashCode64() const SAL_THROW(())
{
return rtl_ustr_hashCode64_WithLength( pData->buffer, pData->length );
}
/** /**
Returns a hashcode for this string. Returns a hashcode for this string.
......
...@@ -144,7 +144,7 @@ public: ...@@ -144,7 +144,7 @@ public:
virtual SvStream & Store( SvStream &, bool bDirect = false ) const; virtual SvStream & Store( SvStream &, bool bDirect = false ) const;
bool operator==(const SfxItemSet &) const; bool operator==(const SfxItemSet &) const;
virtual sal_uInt64 getHash() const; sal_Int32 getHash() const;
virtual OString stringify() const; virtual OString stringify() const;
}; };
......
...@@ -252,68 +252,17 @@ sal_Int32 SAL_CALL IMPL_RTL_STRNAME( hashCode )( const IMPL_RTL_STRCODE* pStr ) ...@@ -252,68 +252,17 @@ sal_Int32 SAL_CALL IMPL_RTL_STRNAME( hashCode )( const IMPL_RTL_STRCODE* pStr )
/* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */
sal_uInt64 SAL_CALL IMPL_RTL_STRNAME( hashCode64_WithLength )( const IMPL_RTL_STRCODE* pStr,
sal_Int32 nLen )
SAL_THROW_EXTERN_C()
{
sal_uInt64 nHash = 0;
for( sal_Int32 i = 0; i < nLen; i++ )
nHash = (nHash << 5) - nHash + *pStr++;
return nHash;
}
/* ----------------------------------------------------------------------- */
sal_Int32 SAL_CALL IMPL_RTL_STRNAME( hashCode_WithLength )( const IMPL_RTL_STRCODE* pStr, sal_Int32 SAL_CALL IMPL_RTL_STRNAME( hashCode_WithLength )( const IMPL_RTL_STRCODE* pStr,
sal_Int32 nLen ) sal_Int32 nLen )
SAL_THROW_EXTERN_C() SAL_THROW_EXTERN_C()
{ {
sal_uInt32 h = static_cast<sal_uInt32>(nLen); sal_uInt32 h = static_cast<sal_uInt32>(nLen);
while ( nLen > 0 )
if ( nLen < 256 )
{
while ( nLen > 0 )
{
h = (h*37U) + IMPL_RTL_USTRCODE( *pStr );
pStr++;
nLen--;
}
}
else
{ {
sal_Int32 nSkip; h = (h*37U) + IMPL_RTL_USTRCODE( *pStr );
const IMPL_RTL_STRCODE* pEndStr = pStr+nLen-5;
/* only sample some characters */
/* the first 3, some characters between, and the last 5 */
h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
pStr++;
h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
pStr++;
h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
pStr++; pStr++;
nLen--;
nSkip = nLen / 8;
nLen -= 8;
while ( nLen > 0 )
{
h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
pStr += nSkip;
nLen -= nSkip;
}
h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
pEndStr++;
h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
pEndStr++;
h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
pEndStr++;
h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
pEndStr++;
h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
} }
return static_cast<sal_Int32>(h); return static_cast<sal_Int32>(h);
} }
......
...@@ -670,12 +670,6 @@ LIBO_UDK_4.2 { # symbols available in >= LibO 4.2 ...@@ -670,12 +670,6 @@ LIBO_UDK_4.2 { # symbols available in >= LibO 4.2
rtl_ustr_toUInt32; rtl_ustr_toUInt32;
} LIBO_UDK_4.1; } LIBO_UDK_4.1;
LIBO_UDK_4.3 { #symbols available in >= LibO 4.3
global:
rtl_str_hashCode64_WithLength;
rtl_ustr_hashCode64_WithLength;
} LIBO_UDK_4.2;
PRIVATE_1.0 { PRIVATE_1.0 {
global: global:
osl_detail_ObjectRegistry_storeAddresses; osl_detail_ObjectRegistry_storeAddresses;
......
...@@ -378,7 +378,7 @@ public: ...@@ -378,7 +378,7 @@ public:
void removeAnnotation( const ::com::sun::star::uno::Reference< ::com::sun::star::office::XAnnotation >& xAnnotation ); void removeAnnotation( const ::com::sun::star::uno::Reference< ::com::sun::star::office::XAnnotation >& xAnnotation );
const sd::AnnotationVector& getAnnotations() const { return maAnnotations; } const sd::AnnotationVector& getAnnotations() const { return maAnnotations; }
bool hasAnnotations() const { return !maAnnotations.empty(); } bool hasAnnotations() const { return !maAnnotations.empty(); }
sal_uInt64 getHash() const; sal_Int32 getHash() const;
virtual OString stringify() const; virtual OString stringify() const;
......
...@@ -603,9 +603,9 @@ OString SdPage::stringify() const ...@@ -603,9 +603,9 @@ OString SdPage::stringify() const
return aString.makeStringAndClear(); return aString.makeStringAndClear();
} }
sal_uInt64 SdPage::getHash() const sal_Int32 SdPage::getHash() const
{ {
return stringify().hashCode64(); return stringify().hashCode();
} }
......
...@@ -650,7 +650,7 @@ void SdStyleSheetPool::CopySheets(SdStyleSheetPool& rSourcePool, SfxStyleFamily ...@@ -650,7 +650,7 @@ void SdStyleSheetPool::CopySheets(SdStyleSheetPool& rSourcePool, SfxStyleFamily
SfxStyleSheetBase* pExistingSheet = Find(aName, eFamily); SfxStyleSheetBase* pExistingSheet = Find(aName, eFamily);
if( pExistingSheet && !rRenameSuffix.isEmpty() ) if( pExistingSheet && !rRenameSuffix.isEmpty() )
{ {
sal_uInt64 nHash = xSheet->GetItemSet().getHash(); sal_Int32 nHash = xSheet->GetItemSet().getHash();
if( pExistingSheet->GetItemSet().getHash() != nHash ) if( pExistingSheet->GetItemSet().getHash() != nHash )
{ {
OUString aTmpName = aName + rRenameSuffix; OUString aTmpName = aName + rRenameSuffix;
......
...@@ -2032,9 +2032,9 @@ SfxItemSet *SfxAllItemSet::Clone(sal_Bool bItems, SfxItemPool *pToPool ) const ...@@ -2032,9 +2032,9 @@ SfxItemSet *SfxAllItemSet::Clone(sal_Bool bItems, SfxItemPool *pToPool ) const
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
sal_uInt64 SfxItemSet::getHash() const sal_Int32 SfxItemSet::getHash() const
{ {
return stringify().hashCode64(); return stringify().hashCode();
} }
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment