fix indexing

This commit is contained in:
_Bastler 2021-12-04 15:33:51 +01:00
parent 8b20596398
commit 9486e7937d
4 changed files with 25 additions and 12 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ target/
hs_err*.log hs_err*.log
application.properties application.properties
usernames.txt usernames.txt
lucene

View File

@ -3,9 +3,9 @@
*/ */
package de.bstly.board; package de.bstly.board;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
import org.apache.lucene.analysis.core.LowerCaseFilterFactory; import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
import org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory; import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.apache.lucene.analysis.standard.StandardTokenizerFactory; import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurationContext; import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurationContext;
@ -27,13 +27,25 @@ public class LuceneConfig implements LuceneAnalysisConfigurer {
@Override @Override
public void configure(LuceneAnalysisConfigurationContext context) { public void configure(LuceneAnalysisConfigurationContext context) {
context.analyzer("english").custom().tokenizer(StandardTokenizerFactory.class) context.analyzer("english").custom().tokenizer(StandardTokenizerFactory.class)
.charFilter(HTMLStripCharFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class)
.tokenFilter(SnowballPorterFilterFactory.class).param("language", "English")
.tokenFilter(ASCIIFoldingFilterFactory.class)
.tokenFilter(EdgeNGramFilterFactory.class).param("minGramSize", "3")
.param("maxGramSize", "7");
context.analyzer("english_search").custom().tokenizer(StandardTokenizerFactory.class)
.tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class)
.tokenFilter(SnowballPorterFilterFactory.class).param("language", "English") .tokenFilter(SnowballPorterFilterFactory.class).param("language", "English")
.tokenFilter(ASCIIFoldingFilterFactory.class); .tokenFilter(ASCIIFoldingFilterFactory.class);
context.analyzer("german").custom().tokenizer(StandardTokenizerFactory.class) context.analyzer("german").custom().tokenizer(StandardTokenizerFactory.class)
.charFilter(HTMLStripCharFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class)
.tokenFilter(SnowballPorterFilterFactory.class).param("language", "German")
.tokenFilter(ASCIIFoldingFilterFactory.class)
.tokenFilter(EdgeNGramFilterFactory.class).param("minGramSize", "3")
.param("maxGramSize", "7");
context.analyzer("german_search").custom().tokenizer(StandardTokenizerFactory.class)
.tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class)
.tokenFilter(SnowballPorterFilterFactory.class).param("language", "German") .tokenFilter(SnowballPorterFilterFactory.class).param("language", "German")
.tokenFilter(ASCIIFoldingFilterFactory.class); .tokenFilter(ASCIIFoldingFilterFactory.class);

View File

@ -39,7 +39,7 @@ import de.bstly.board.model.support.Types;
@Entity @Entity
@Table(name = "comments") @Table(name = "comments")
@EntityListeners({ AuditingEntityListener.class }) @EntityListeners({ AuditingEntityListener.class })
@Indexed @Indexed(index = "lucene/comment")
public class Comment { public class Comment {
@Id @Id
@ -57,8 +57,8 @@ public class Comment {
private Long parent; private Long parent;
@Lob @Lob
@Column(name = "text", nullable = false) @Column(name = "text", nullable = false)
@FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english") @FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english", searchAnalyzer = "english_search")
@FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german") @FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german", searchAnalyzer = "german_search")
private String text; private String text;
@Enumerated(EnumType.STRING) @Enumerated(EnumType.STRING)
@Column(name = "flagged_status", nullable = false, columnDefinition = "varchar(255) default 'NORMAL'") @Column(name = "flagged_status", nullable = false, columnDefinition = "varchar(255) default 'NORMAL'")

View File

@ -43,7 +43,7 @@ import de.bstly.board.model.support.Types;
@Entity @Entity
@Table(name = "entries") @Table(name = "entries")
@EntityListeners({ AuditingEntityListener.class }) @EntityListeners({ AuditingEntityListener.class })
@Indexed @Indexed(index = "lucene/entry")
public class Entry { public class Entry {
@Id @Id
@ -68,13 +68,13 @@ public class Entry {
@Column(name = "url") @Column(name = "url")
private String url; private String url;
@Column(name = "title", nullable = false) @Column(name = "title", nullable = false)
@FullTextField(name = "title", searchable = Searchable.YES, analyzer = "english") @FullTextField(name = "title", searchable = Searchable.YES, analyzer = "english", searchAnalyzer = "english_search")
@FullTextField(name = "title_de", searchable = Searchable.YES, analyzer = "german") @FullTextField(name = "title_de", searchable = Searchable.YES, analyzer = "german", searchAnalyzer = "german_search")
private String title; private String title;
@Lob @Lob
@Column(name = "text") @Column(name = "text")
@FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english") @FullTextField(name = "text", searchable = Searchable.YES, analyzer = "english", searchAnalyzer = "english_search")
@FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german") @FullTextField(name = "text_de", searchable = Searchable.YES, analyzer = "german", searchAnalyzer = "german_search")
private String text; private String text;
@Transient @Transient
private List<String> tags; private List<String> tags;