From 9486e7937dc5e8614d4fe0fe73c004decc76d9b3 Mon Sep 17 00:00:00 2001 From: _Bastler Date: Sat, 4 Dec 2021 15:33:51 +0100 Subject: [PATCH] fix indexing --- .gitignore | 3 ++- src/main/java/de/bstly/board/LuceneConfig.java | 18 +++++++++++++++--- .../java/de/bstly/board/model/Comment.java | 6 +++--- src/main/java/de/bstly/board/model/Entry.java | 10 +++++----- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 613c139..ca82d20 100755 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ target/ .classpath hs_err*.log application.properties -usernames.txt \ No newline at end of file +usernames.txt +lucene \ No newline at end of file diff --git a/src/main/java/de/bstly/board/LuceneConfig.java b/src/main/java/de/bstly/board/LuceneConfig.java index ab5d39a..ec86821 100644 --- a/src/main/java/de/bstly/board/LuceneConfig.java +++ b/src/main/java/de/bstly/board/LuceneConfig.java @@ -3,9 +3,9 @@ */ package de.bstly.board; -import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory; import org.apache.lucene.analysis.core.LowerCaseFilterFactory; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; +import org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory; import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory; import org.apache.lucene.analysis.standard.StandardTokenizerFactory; import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurationContext; @@ -27,13 +27,25 @@ public class LuceneConfig implements LuceneAnalysisConfigurer { @Override public void configure(LuceneAnalysisConfigurationContext context) { context.analyzer("english").custom().tokenizer(StandardTokenizerFactory.class) - .charFilter(HTMLStripCharFilterFactory.class) + .tokenFilter(LowerCaseFilterFactory.class) + .tokenFilter(SnowballPorterFilterFactory.class).param("language", "English") + .tokenFilter(ASCIIFoldingFilterFactory.class) + .tokenFilter(EdgeNGramFilterFactory.class).param("minGramSize", "3") + .param("maxGramSize", "7"); + + context.analyzer("english_search").custom().tokenizer(StandardTokenizerFactory.class) .tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(SnowballPorterFilterFactory.class).param("language", "English") .tokenFilter(ASCIIFoldingFilterFactory.class); context.analyzer("german").custom().tokenizer(StandardTokenizerFactory.class) - .charFilter(HTMLStripCharFilterFactory.class) + .tokenFilter(LowerCaseFilterFactory.class) + .tokenFilter(SnowballPorterFilterFactory.class).param("language", "German") + .tokenFilter(ASCIIFoldingFilterFactory.class) + .tokenFilter(EdgeNGramFilterFactory.class).param("minGramSize", "3") + .param("maxGramSize", "7"); + + context.analyzer("german_search").custom().tokenizer(StandardTokenizerFactory.class) .tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(SnowballPorterFilterFactory.class).param("language", "German") .tokenFilter(ASCIIFoldingFilterFactory.class); diff --git a/src/main/java/de/bstly/board/model/Comment.java b/src/main/java/de/bstly/board/model/Comment.java index 9a81834..fbfcd18 100644 --- a/src/main/java/de/bstly/board/model/Comment.java +++ b/src/main/java/de/bstly/board/model/Comment.java @@ -39,7 +39,7 @@ import de.bstly.board.model.support.Types; @Entity @Table(name = "comments") @EntityListeners({ AuditingEntityListener.class }) -@Indexed +@Indexed(index = "lucene/comment") public class Comment { @Id @@ -57,8 +57,8 @@ public class Comment { private Long parent; @Lob @Column(name = "text", nullable = false) - @FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english") - @FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german") + @FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english", searchAnalyzer = "english_search") + @FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german", searchAnalyzer = "german_search") private String text; @Enumerated(EnumType.STRING) @Column(name = "flagged_status", nullable = false, columnDefinition = "varchar(255) default 'NORMAL'") diff --git a/src/main/java/de/bstly/board/model/Entry.java b/src/main/java/de/bstly/board/model/Entry.java index 99dbaa1..6907fc1 100644 --- a/src/main/java/de/bstly/board/model/Entry.java +++ b/src/main/java/de/bstly/board/model/Entry.java @@ -43,7 +43,7 @@ import de.bstly.board.model.support.Types; @Entity @Table(name = "entries") @EntityListeners({ AuditingEntityListener.class }) -@Indexed +@Indexed(index = "lucene/entry") public class Entry { @Id @@ -68,13 +68,13 @@ public class Entry { @Column(name = "url") private String url; @Column(name = "title", nullable = false) - @FullTextField(name = "title", searchable = Searchable.YES, analyzer = "english") - @FullTextField(name = "title_de", searchable = Searchable.YES, analyzer = "german") + @FullTextField(name = "title", searchable = Searchable.YES, analyzer = "english", searchAnalyzer = "english_search") + @FullTextField(name = "title_de", searchable = Searchable.YES, analyzer = "german", searchAnalyzer = "german_search") private String title; @Lob @Column(name = "text") - @FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english") - @FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german") + @FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english", searchAnalyzer = "english_search") + @FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german", searchAnalyzer = "german_search") private String text; @Transient private List tags;