[Yanel-dev] Patch for LuceneIndexer
Balz Schreier
balz.schreier at gmail.com
Wed Mar 23 15:46:11 CET 2011
Hi Michael,
attached is a patch for the current LuceneIndexer.
Some points that I addressed in my last mail are fixed in this patch.
If you want an HG version to pull from my repo, let me know, I can submit it
there too.
Cheers
Balz
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wyona.org/pipermail/yanel-development/attachments/20110323/cd90d2d5/attachment.html>
-------------- next part --------------
Index: trunk/src/impl/java/org/wyona/yarep/impl/search/lucene/LuceneIndexer.java
===================================================================
--- trunk/src/impl/java/org/wyona/yarep/impl/search/lucene/LuceneIndexer.java (revision 57447)
+++ trunk/src/impl/java/org/wyona/yarep/impl/search/lucene/LuceneIndexer.java (working copy)
@@ -14,7 +14,6 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.WriteOutContentHandler;
import org.wyona.yarep.core.Node;
import org.wyona.yarep.core.Property;
import org.wyona.yarep.core.Repository;
@@ -23,6 +22,7 @@
import org.wyona.yarep.core.search.SearchException;
import org.wyona.yarep.impl.repo.vfs.VirtualFileSystemNode;
import org.wyona.yarep.impl.repo.vfs.VirtualFileSystemRepository;
+import org.wyona.yarep.impl.search.lucene.LuceneConfig;
/**
* Lucene implementation of indexer
@@ -30,6 +30,11 @@
public class LuceneIndexer implements Indexer {
static Logger log = Logger.getLogger(LuceneIndexer.class);
+ private static final String LOCK = "lock";
+ private static enum INDEX_TYPE {FULLTEXT, PROPERTIES};
+ public static final String INDEX_PROPERTY_FULL = "_FULLTEXT";
+ public static final String INDEX_PROPERTY_YAREPPATH = "_PATH";
+
protected LuceneConfig config;
/**
@@ -82,7 +87,7 @@
//luceneDoc.add(new Field("_FULLTEXT", fullText, Field.Store.NO, Field.Index.TOKENIZED));
try {
- updateDocument(createFulltextIndexWriter(), node.getPath(), luceneDoc);
+ updateDocument(INDEX_TYPE.FULLTEXT, node.getPath(), luceneDoc);
} catch(org.apache.lucene.store.LockObtainFailedException e) {
log.warn("Could not init fulltext IndexWriter (maybe because of existing lock), hence content of node '" + node.getPath() + "' will not be indexed!");
}
@@ -105,23 +110,26 @@
IndexWriter indexWriter = null;
VirtualFileSystemRepository vfsRepo = ((VirtualFileSystemNode) node).getRepository();
String nodePath = "Could not get Path of node.";
- try {
- nodePath = node.getPath();
- indexWriter = createFulltextIndexWriter();
- indexWriter.deleteDocuments(new org.apache.lucene.index.Term("_PATH", node.getPath()));
- indexWriter.close();
- } catch(Exception e) {
- log.warn("Could not init IndexWriter, because of existing lock, hence content of node '" + nodePath + "' will not be deleted from the index!");
+ synchronized (LOCK) {
try {
+ nodePath = node.getPath();
+ indexWriter = createFulltextIndexWriter();
+ indexWriter.deleteDocuments(new org.apache.lucene.index.Term("_PATH", node.getPath()));
indexWriter.close();
- } catch (Exception e2) {
- log.warn("Could not close indexWriter. Exception: " + e2.getMessage());
+
+ } catch(Throwable e) {
+ log.warn("Could not init IndexWriter, because of existing lock, hence content of node '" + nodePath + "' will not be deleted from the index!");
+ } finally {
+ try {
+ indexWriter.close();
+ } catch (Throwable e2) {
+ }
}
}
}
/**
- * Get index writer
+ * Get index writer: PUBLIC REALLY NEEDED???
*/
public IndexWriter createFulltextIndexWriter() throws Exception {
log.debug("Fulltext search index directory: " + config.getFulltextSearchIndexFile());
@@ -131,7 +139,7 @@
}
/**
- *
+ * Get index writer: PUBLIC REALLY NEEDED???
*/
public IndexWriter createPropertiesIndexWriter() throws Exception {
return createIndexWriter(config.getPropertiesSearchIndexFile(), config.getPropertyAnalyzer());
@@ -183,13 +191,13 @@
public void index(Node node, Property property) throws SearchException {
try {
String path = node.getPath();
- if (config.doIndexRevisions() && org.wyona.yarep.util.YarepUtil.isRevision(node)) {
- String revisionName = ((org.wyona.yarep.core.Revision)node).getRevisionName();
- log.debug("Index property '" + property.getName() + " of revision: " + path + " (" + revisionName + "), " + node.getClass().getName());
- path = path + "#revision=" + revisionName; // TODO: Discuss the separator
- } else {
- log.debug("Index property '" + property.getName() + " of node: " + path);
- }
+// if (config.doIndexRevisions() && org.wyona.yarep.util.YarepUtil.isRevision(node)) {
+// String revisionName = ((org.wyona.yarep.core.Revision)node).getRevisionName();
+// log.debug("Index property '" + property.getName() + " of revision: " + path + " (" + revisionName + "), " + node.getClass().getName());
+// path = path + "#revision=" + revisionName; // TODO: Discuss the separator
+// } else {
+// log.debug("Index property '" + property.getName() + " of node: " + path);
+// }
Document luceneDoc = getDocument(path);
@@ -234,7 +242,7 @@
// INFO: Now add lucene document containing all properties to index
try {
- updateDocument(createPropertiesIndexWriter(), path, luceneDoc);
+ updateDocument(INDEX_TYPE.PROPERTIES, path, luceneDoc);
} catch(org.apache.lucene.store.LockObtainFailedException e) {
log.warn("Could not init properties IndexWriter (maybe because of existing lock), hence properties of node '" + path + "' will not be indexed!");
}
@@ -268,15 +276,34 @@
* @param path Path of node with which the fields and values are related to
* @param document Lucene document containing the fields and values
*/
- private void updateDocument(IndexWriter indexWriter, String path, Document document) throws Exception {
- if (indexWriter != null) {
- if (log.isDebugEnabled()) log.debug("Node will be indexed: " + path);
- indexWriter.updateDocument(new org.apache.lucene.index.Term("_PATH", path), document);
- indexWriter.close();
- //indexWriter.flush();
- } else {
- throw new Exception("IndexWriter is null and hence node will not be indexed: " + path);
- //log.warn("IndexWriter is null and hence node will not be indexed: " + path);
+ private void updateDocument(INDEX_TYPE type, String path, Document document) throws Exception {
+ IndexWriter indexWriter = null;
+ synchronized (LOCK) {
+ try {
+ if (type == INDEX_TYPE.FULLTEXT) {
+ indexWriter = createFulltextIndexWriter();
+ } else {
+ indexWriter = createPropertiesIndexWriter();
+ }
+ if (indexWriter != null) {
+ if (log.isDebugEnabled()) log.debug("Node will be indexed: " + path);
+ indexWriter.updateDocument(new org.apache.lucene.index.Term("_PATH", path), document);
+ indexWriter.close();
+ //indexWriter.flush();
+ } else {
+ log.fatal("IndexWriter is null!");
+ throw new Exception("IndexWriter is null and hence node will not be indexed: " + path);
+ //log.warn("IndexWriter is null and hence node will not be indexed: " + path);
+ }
+ } catch (Throwable t) {
+ log.error(t,t);
+
+ } finally {
+ try {
+ indexWriter.close();
+ } catch (Throwable t) {
+ }
+ }
}
}
More information about the Yanel-development
mailing list