Kaydet (Commit) 2fd86ba7 authored tarafından Pedro Giffuni's avatar Pedro Giffuni

Update Apache Lucene core to version 2.9.4.

The new version brings in many bugfixes and performance
improvements. Unfortunately this also means we lose the
Windows specific enhancements in the long_path.patch but
it must be expected the newer versions of Lucene will
have much better portability.

We also bring in support for using preinstalled version 3
from BZ i115241 (author: bero@arklinux).
üst f7087b0e
......@@ -4102,7 +4102,22 @@ if test -n "$with_system_lucene" -o -n "$with_system_libs" && \
}'; then
AC_MSG_RESULT([yes])
else
AC_MSG_ERROR([no, you need lucene 2])
AC_MSG_RESULT([no])
AC_MSG_CHECKING([whether lucene is version 3.x])
if $PERL -e 'use Archive::Zip;
my $file = "$ENV{'LUCENE_CORE_JAR'}";
my $zip = Archive::Zip->new( $file );
my $mf = $zip->contents ( "META-INF/MANIFEST.MF" );
if ( $mf =~ m/Specification-Version: 3.*/ ) {
exit 0;
} else {
exit 1;
}'; then
AC_MSG_RESULT([yes])
else
AC_MSG_ERROR([no, you need lucene 2 or 3])
fi
fi
else
......
......@@ -63,7 +63,7 @@ public class HelpFileDocument
if( aFile != null )
{
String aPath = "#HLP#" + aModule + "/" + aFile.getName();
doc.add(new Field("path", aPath, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("path", aPath, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
// Add the caption of the file to a field named "caption". Specify a Reader,
......
......@@ -35,21 +35,14 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.Version;
import org.apache.lucene.store.NIOFSDirectory;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;
/**
When this tool is used with long path names on Windows, that is paths which start
with \\?\, then the caller must make sure that the path is unique. This is achieved
by removing '.' and '..' from the path. Paths which are created by
osl_getSystemPathFromFileURL fulfill this requirement. This is necessary because
lucene is patched to not use File.getCanonicalPath. See long_path.patch in the lucene
module.
*/
public class HelpIndexerTool
{
public HelpIndexerTool()
......@@ -182,8 +175,8 @@ public class HelpIndexerTool
try
{
Date start = new Date();
Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer() : (Analyzer)new StandardAnalyzer();
IndexWriter writer = new IndexWriter( aIndexDir, analyzer, true );
Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer(Version.LUCENE_29) : (Analyzer)new StandardAnalyzer(Version.LUCENE_29);
IndexWriter writer = new IndexWriter( NIOFSDirectory.open(aIndexDir), analyzer, true, IndexWriter.MaxFieldLength.LIMITED );
if( !bExtensionMode )
System.out.println( "Lucene: Indexing to directory '" + aIndexDir + "'..." );
int nRet = indexDocs( writer, aModule, bExtensionMode, aCaptionFilesDir, aContentFilesDir );
......
......@@ -95,7 +95,7 @@ JAVACLASSFILES = \
.IF "$(SYSTEM_LUCENE)" == "YES"
EXTRAJARFILES += $(LUCENE_CORE_JAR) $(LUCENE_ANALYZERS_JAR)
.ELSE
JARFILES += lucene-core-2.3.jar lucene-analyzers-2.3.jar
JARFILES += lucene-core-2.9.4-dev.jar lucene-analyzers-2.9.4-dev.jar
.ENDIF
JAVAFILES = $(subst,$(CLASSDIR)$/$(PACKAGE)$/, $(subst,.class,.java $(JAVACLASSFILES)))
......
--- misc/lucene-2.3.2/src/java/org/apache/lucene/store/FSDirectory.java 2008-05-01 22:27:58.000000000 +0200
+++ misc/build/lucene-2.3.2/src/java/org/apache/lucene/store/FSDirectory.java 2011-02-23 16:36:26.249515000 +0100
@@ -165,7 +165,15 @@
public static FSDirectory getDirectory(File file, LockFactory lockFactory)
throws IOException
{
- file = new File(file.getCanonicalPath());
+ String path = file.getPath();
+ //File.getCanonicalPath fails on Windows with long path names
+ //Tested with Java SE 6u23
+ //Long path names created by osl_getSystemPathFromFileURL are already
+ //unique because its implementation replaces the occurrences of .. and .
+ //That is using the com.sun.star.help.HelpIndexer service from c++ is
+ //relatively safe.
+ if (!path.startsWith("\\\\?\\"))
+ file = new File(file.getCanonicalPath());
if (file.exists() && !file.isDirectory())
throw new IOException(file + " not a directory");
@@ -455,7 +463,16 @@
public String getLockID() {
String dirName; // name to be hashed
try {
- dirName = directory.getCanonicalPath();
+ //File.getCanonicalPath fails on Windows with long path names
+ //Tested with Java SE 6u23
+ //Long path names created by osl_getSystemPathFromFileURL are already
+ //unique because its implementation replaces the occurrences of .. and .
+ //That is using the com.sun.star.help.HelpIndexer service from c++ is
+ //relatively safe.
+ if (!directory.getPath().startsWith("\\\\?\\"))
+ dirName = directory.getCanonicalPath();
+ else
+ dirName = directory.getPath();
} catch (IOException e) {
throw new RuntimeException(e.toString(), e);
}
--- lucene-2.3.2/common-build.xml 2008-05-01 22:28:56.000000000 +0200
+++ misc/build/lucene-2.3.2/common-build.xml 2008-06-16 16:01:28.812500000 +0200
@@ -280 +280 @@
- value="${version} ${svnversion} - ${user.name} - ${DSTAMP} ${TSTAMP}"/>
--- lucene-2.9.4/common-build.xml 2012-05-11 17:34:48.000000000 -0500
+++ misc/build/lucene-2.9.4/common-build.xml 2012-05-11 17:37:20.000000000 -0500
@@ -304,7 +304,7 @@
<attribute name="Implementation-Title" value="org.apache.lucene"/>
<!-- impl version can be any string -->
<attribute name="Implementation-Version"
- value="${version} ${svnversion} - ${DSTAMP} ${TSTAMP}"/>
+ value="${version}"/>
@@ -297,6 +296,0 @@
<attribute name="Implementation-Vendor"
value="The Apache Software Foundation"/>
<attribute name="X-Compile-Source-JDK"
@@ -321,12 +321,6 @@
<element name="manifest-attributes" optional="yes"/>
<element name="metainf-includes" optional="yes"/>
<sequential>
- <!-- If possible, include the svnversion -->
- <exec dir="." executable="svnversion"
- <exec dir="." executable="${svnversion.exe}"
- outputproperty="svnversion" failifexecutionfails="false">
- <arg line="."/>
- </exec>
-
<build-manifest/>
<jar
......@@ -36,27 +36,24 @@ TARGET=so_lucene
.IF "$(SOLAR_JAVA)" != ""
LUCENE_MAJOR=2
LUCENE_MINOR=3
LUCENE_MICRO=2
LUCENE_MINOR=9
LUCENE_MICRO=4
LUCENE_NAME=lucene-$(LUCENE_MAJOR).$(LUCENE_MINOR).$(LUCENE_MICRO)
# NOTE that the jar names do not contain the micro version
LUCENE_CORE_JAR=lucene-core-$(LUCENE_MAJOR).$(LUCENE_MINOR).jar
LUCENE_ANALYZERS_JAR=lucene-analyzers-$(LUCENE_MAJOR).$(LUCENE_MINOR).jar
LUCENE_CORE_JAR=lucene-core-$(LUCENE_MAJOR).$(LUCENE_MINOR).$(LUCENE_MICRO)-dev.jar
LUCENE_ANALYZERS_JAR=lucene-analyzers-$(LUCENE_MAJOR).$(LUCENE_MINOR).$(LUCENE_MICRO)-dev.jar
TARFILE_NAME=$(LUCENE_NAME)
TARFILE_MD5=48d8169acc35f97e05d8dcdfd45be7f2
PATCH_FILES=lucene.patch
.IF "$(OS)" == "WNT"
PATCH_FILES+= long_path.patch
.ENDIF
TARFILE_NAME=$(LUCENE_NAME)-src
TARFILE_MD5=17960f35b2239654ba608cf1f3e256b3
TARFILE_ROOTDIR=$(LUCENE_NAME)
PATCH_FILES=lucene.patch
BUILD_DIR=.
BUILD_ACTION= ${ANT} -buildfile .$/contrib$/analyzers$/build.xml
OUT2BIN=.$/build$/$(LUCENE_CORE_JAR) .$/build$/contrib$/analyzers$/$(LUCENE_ANALYZERS_JAR)
OUT2BIN=.$/build$/$(LUCENE_CORE_JAR) .$/build$/contrib$/analyzers/common$/$(LUCENE_ANALYZERS_JAR)
.ENDIF
......
......@@ -373,8 +373,8 @@ STD_JAR_FILE( gid_File_Jar_Bsh, bsh )
#endif
#if defined SOLAR_JAVA && ! defined SYSTEM_LUCENE
STD_JAR_FILE( gid_File_Jar_Lucene_Core, lucene-core-2.3 )
STD_JAR_FILE( gid_File_Jar_Lucene_Analyzers, lucene-analyzers-2.3 )
STD_JAR_FILE( gid_File_Jar_Lucene_Core, lucene-core-2.9.4-dev )
STD_JAR_FILE( gid_File_Jar_Lucene_Analyzers, lucene-analyzers-2.9.4-dev )
#endif
......
......@@ -26,7 +26,7 @@ my_cp:=$(CLASSPATH)$(PATH_SEPERATOR)$(SOLARBINDIR)/jaxp.jar$(PATH_SEPERATOR)$(SO
.IF "$(SYSTEM_LUCENE)" == "YES"
my_cp!:=$(my_cp)$(PATH_SEPERATOR)$(LUCENE_CORE_JAR)$(PATH_SEPERATOR)$(LUCENE_ANALYZERS_JAR)
.ELSE
my_cp!:=$(my_cp)$(PATH_SEPERATOR)$(SOLARBINDIR)/lucene-core-2.3.jar$(PATH_SEPERATOR)$(SOLARBINDIR)/lucene-analyzers-2.3.jar
my_cp!:=$(my_cp)$(PATH_SEPERATOR)$(SOLARBINDIR)/lucene-core-2.9.4-dev.jar$(PATH_SEPERATOR)$(SOLARBINDIR)/lucene-analyzers-2.9.4-dev.jar
.ENDIF
#aux_alllangiso*:=$(foreach,i,$(alllangiso) $(foreach,j,$(aux_langdirs) $(eq,$i,$j $i $(NULL))))
......
......@@ -36,14 +36,6 @@ import java.io.IOException;
import java.util.Date;
import java.util.zip.ZipOutputStream;
/**
When this tool is used with long path names on Windows, that is paths which start
with \\?\, then the caller must make sure that the path is unique. This is achieved
by removing '.' and '..' from the path. Paths which are created by
osl_getSystemPathFromFileURL fulfill this requirement. This is necessary because
lucene is patched to not use File.getCanonicalPath. See long_path.patch in the lucene
module.
*/
public class HelpIndexer extends WeakBase
implements XServiceInfo, XInvocation
{
......
......@@ -40,16 +40,21 @@ import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
import org.apache.lucene.store.NIOFSDirectory;
import com.sun.star.script.XInvocation;
import com.sun.star.beans.XIntrospectionAccess;
import java.io.File;
/** This class capsulates the class, that implements the minimal component and a
* factory for creating the service (<CODE>__getComponentFactory</CODE>).
*/
......@@ -249,9 +254,10 @@ public class HelpSearch
private static String[] queryImpl( String aLanguageStr, String aIndexStr, String aQueryStr,
boolean bCaptionOnly, Object[] aScoreOutArray ) throws Exception
{
IndexReader reader = IndexReader.open( aIndexStr );
File aIndexFile = new File( aIndexStr );
IndexReader reader = IndexReader.open( NIOFSDirectory.open( aIndexFile ), true );
Searcher searcher = new IndexSearcher( reader );
Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer() : (Analyzer)new StandardAnalyzer();
Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer(Version.LUCENE_29) : (Analyzer)new StandardAnalyzer(Version.LUCENE_29);
String aField;
if( bCaptionOnly )
......@@ -266,18 +272,18 @@ public class HelpSearch
aQuery = new TermQuery( new Term( aField, aQueryStr ) );
// Perform search
Hits aHits = searcher.search( aQuery );
int nHitCount = aHits.length();
TopDocs aHits = searcher.search( aQuery, 100 );
int nHitCount = aHits.totalHits;
String aDocs[] = new String[nHitCount];
float aScores[] = null;
aScores = new float[nHitCount];
for( int iHit = 0 ; iHit < nHitCount ; iHit++ )
{
Document aDoc = aHits.doc( iHit );
String aPath = aDoc.get( "path" );
ScoreDoc aDoc = aHits.scoreDocs[iHit];
String aPath = searcher.doc(aDoc.doc).get( "path" );
aDocs[iHit] = ( aPath != null ) ? aPath : "";
aScores[iHit] = aHits.score( iHit );
aScores[iHit] = aDoc.score;
}
aScoreOutArray[0] = aScores;
......
......@@ -47,8 +47,8 @@ JARFILES = ridl.jar jurt.jar unoil.jar juh.jar
EXTRAJARFILES = $(LUCENE_CORE_JAR) $(LUCENE_ANALYZERS_JAR)
JARCLASSPATH = $(EXTRAJARFILES)
.ELSE
JARFILES += lucene-core-2.3.jar lucene-analyzers-2.3.jar
JARCLASSPATH = lucene-core-2.3.jar lucene-analyzers-2.3.jar
JARFILES += lucene-core-2.9.4-dev.jar lucene-analyzers-2.9.4-dev.jar
JARCLASSPATH = lucene-core-2.9.4-dev.jar lucene-analyzers-2.9.4-dev.jar
.ENDIF
JARTARGET = LuceneHelpWrapper.jar
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment