一 创建并配置文件
1.1 pom.xml配置
创建一个maven项目,项目的pom的大概结构如下
<project xmlns ="http://maven.apache.org/POM/4.0.0" xmlns:xsi ="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation ="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" > <modelVersion > 4.0.0</modelVersion > <groupId > com.demo.aidoc</groupId > <artifactId > es-plugin</artifactId > <version > 0.0.1-SNAPSHOT</version > <packaging > jar</packaging > <name > es-plugin</name > <url > http://maven.apache.org</url > <repositories > <repository > <id > spring</id > <url > https://maven.aliyun.com/repository/spring</url > <releases > <enabled > true</enabled > </releases > <snapshots > <enabled > true</enabled > </snapshots > </repository > </repositories > <properties > <project.build.sourceEncoding > UTF-8</project.build.sourceEncoding > <maven.compiler.target > 1.8</maven.compiler.target > <elasticsearch.version > 7.16.0</elasticsearch.version > <hanlp.version > portable-1.7.3</hanlp.version > <maven.compiler.target > 1.8</maven.compiler.target > </properties > <dependencies > <dependency > <groupId > com.hankcs</groupId > <artifactId > hanlp</artifactId > <version > ${hanlp.version}</version > </dependency > <dependency > <groupId > org.elasticsearch</groupId > <artifactId > elasticsearch</artifactId > <version > ${elasticsearch.version}</version > <scope > compile</scope > </dependency > <dependency > <groupId > org.apache.httpcomponents</groupId > <artifactId > httpclient</artifactId > <version > 4.5.2</version > </dependency > <dependency > <groupId > junit</groupId > <artifactId > junit</artifactId > <version > 4.12</version > <scope > test</scope > </dependency > <dependency > <groupId > com.alibaba</groupId > <artifactId > fastjson</artifactId > <version > 1.2.79</version > </dependency > </dependencies > <build > <finalName > ${project.name}</finalName > <resources > <resource > <directory > src/main/resources</directory > <filtering > false</filtering > <excludes > <exclude > plugin-descriptor.properties</exclude > </excludes > </resource > </resources > <plugins > <plugin > <groupId > org.apache.maven.plugins</groupId > <artifactId > maven-assembly-plugin</artifactId > <version > 2.6</version > <configuration > <appendAssemblyId > false</appendAssemblyId > <outputDirectory > ${project.build.directory}/releases/</outputDirectory > <descriptors > <descriptor > ${basedir}/src/main/resources/plugin.xml</descriptor > </descriptors > </configuration > <executions > <execution > <phase > package</phase > <goals > <goal > single</goal > </goals > </execution > </executions > </plugin > <plugin > <groupId > org.apache.maven.plugins</groupId > <artifactId > maven-compiler-plugin</artifactId > <version > 3.5.1</version > <configuration > <source > ${maven.compiler.target}</source > <target > ${maven.compiler.target}</target > </configuration > </plugin > <plugin > <groupId > org.apache.maven.plugins</groupId > <artifactId > maven-resources-plugin</artifactId > <version > 3.2.0</version > <configuration > <encoding > ${project.build.sourceEncoding}</encoding > </configuration > <executions > <execution > <id > copy-spring-boot-resources</id > <phase > validate</phase > <goals > <goal > copy-resources</goal > </goals > <configuration > <encoding > utf-8</encoding > <outputDirectory > ${basedir}/target/config</outputDirectory > <resources > <resource > <directory > ${basedir}/config</directory > <includes > <include > *</include > </includes > </resource > </resources > </configuration > </execution > </executions > </plugin > </plugins > </build > </project >
注意其中依赖的elasticsearch的版本要与目标elasticsearch的版本保持一致。
1.2 assembly
插件配置
文件路径src/main/assembly/plugin.xml
<?xml version="1.0"?> <assembly > <id > analysis-gridsum</id > <formats > <format > zip</format > </formats > <includeBaseDirectory > false</includeBaseDirectory > <fileSets > <fileSet > <directory > ${project.basedir}/config</directory > <outputDirectory > config</outputDirectory > </fileSet > </fileSets > <files > <file > <source > ${project.basedir}/src/main/resources/plugin-descriptor.properties</source > <outputDirectory /> <filtered > true</filtered > </file > <file > <source > ${project.basedir}/src/main/resources/plugin-security.policy</source > <outputDirectory /> <filtered > true</filtered > </file > </files > <dependencySets > <dependencySet > <outputDirectory /> <useProjectArtifact > true</useProjectArtifact > <useTransitiveFiltering > true</useTransitiveFiltering > <excludes > <exclude > org.elasticsearch:elasticsearch</exclude > </excludes > </dependencySet > <dependencySet > <outputDirectory /> <useProjectArtifact > true</useProjectArtifact > <useTransitiveFiltering > true</useTransitiveFiltering > <includes > <include > org.apache.httpcomponents:httpclient</include > </includes > </dependencySet > </dependencySets > </assembly >
1.3 插件描述文件
resources
目录下,plugin-descriptor.properties
文件。
所有插件都必须包含一个名为 plugin-descriptor.properties 的文件。 此文件的格式在此示例中详细描述:https://www.elastic.co/guide/en/elasticsearch/plugins/master/plugin-authors.html#_plugin_descriptor_file
官方示例如下
# Elasticsearch plugin descriptor file # This file must exist as 'plugin-descriptor.properties' inside a plugin. # ### example plugin for "foo" # # foo.zip <-- zip file for the plugin, with this structure: # |____ <arbitrary name1>.jar <-- classes, resources, dependencies # |____ <arbitrary nameN>.jar <-- any number of jars # |____ plugin-descriptor.properties <-- example contents below: # # classname=foo.bar.BazPlugin # description=My cool plugin # version=6.0 # elasticsearch.version=6.0 # java.version=1.8 # ### mandatory elements for all plugins: # # 'type': the type of this plugin. 'isolated' indicated a typical sandboxed plugin, # whereas 'bootstrap' indicates a plugin whose jars are added to the JVM's boot # classpath. type=${type} # # 'description': simple summary of the plugin description=${description} # # 'version': plugin's version version=${version} # # 'name': the plugin name name=${name} <% if (type != "bootstrap") { %> # # 'classname': the name of the class to load, fully-qualified. Only applies to # "isolated" plugins classname=${classname} <% } %> # # 'java.version': version of java the code is built against # use the system property java.specification.version # version string must be a sequence of nonnegative decimal integers # separated by "."'s and may have leading zeros java.version=${javaVersion} # # 'elasticsearch.version': version of elasticsearch compiled against elasticsearch.version=${elasticsearchVersion} ### optional elements for plugins: # # 'extended.plugins': other plugins this plugin extends through SPI extended.plugins=${extendedPlugins} # # 'has.native.controller': whether or not the plugin has a native controller has.native.controller=${hasNativeController} <% if (type == "bootstrap") { %> # # 'java.opts': any additional command line parameters to pass to the JVM when # Elasticsearch starts. Only applies to "bootstrap" plugins. java.opts=${javaOpts} <% } %> <% if (licensed) { %> # This plugin requires that a license agreement be accepted before installation licensed=${licensed} <% } %>
解释如下
Element
Type
Description
description
String
simple summary of the plugin
version
String
plugin’s version
name
String
the plugin name
classname
String
要加载的类的名称,完全限定。
java.version
String
构建代码的java版本。 使用系统属性 java.specification.version。 版本字符串必须是由“.”分隔的非负十进制整数序列,并且可以有前导零。
请注意,只有插件根目录下的 jar 文件才会添加到插件的类路径中! 如果您需要其他资源,请将它们打包到资源 jar 中。
插件发布生命周期:您必须为每个新的 Elasticsearch 版本发布一个新版本的插件。 加载插件时会检查此版本,因此 Elasticsearch 将拒绝在存在具有不正确 elasticsearch.version 的插件时启动。
本项目中实际的描述文件如下:
description=${project.description} version=${project.version} name=${project.name} classname=com.demo.aidoc.es_plugin.DemoPlugin java.version=${maven.compiler.target} elasticsearch.version=${elasticsearch.version}
**注意:**上述配置中的 classname 的值一定要按照实际情况修改为启动类的路径
plugin-descriptor.properties
的配置是从pom.xml
读取的properties配置,这样维护更方便,在打包时会替换掉占位符
1.4 权限声明文件
resources
目录下,文件名为plugin-security.policy
。
jdk的安全策略限制,必须声明项目使用的权限
一些插件可能需要额外的安全权限。 插件可以包含可选的 plugin-security.policy 文件,其中包含用于附加权限的授权语句。 任何额外的权限都会向用户显示一个大警告,并且他们必须在交互式安装插件时确认它们。 因此,如果可能,最好避免请求任何虚假权限!
如果您使用的是 Elasticsearch Gradle 构建系统,请将这个文件放在 src/main/plugin-metadata 中,它也会在单元测试期间应用。
请记住,Java 安全模型是基于堆栈的,额外的权限只会授予插件中的 jar,因此您将围绕需要提升权限的操作编写适当的安全代码。 建议添加检查以防止非特权代码(例如脚本)获得升级的权限。 例如:
import org.elasticsearch.SpecialPermission;SecurityManager sm = System.getSecurityManager(); if (sm != null ) { sm.checkPermission(new SpecialPermission()); } AccessController.doPrivileged( );
以ik分词器中权限检查部分的代码为
SpecialPermission.check(); AccessController.doPrivileged((PrivilegedAction<Void>) () -> { return null ; });
本例中的plugin-security.policy的内容如下
grant { permission java.util.PropertyPermission "*" , "read,write" ; permission java.net.SocketPermission "*" , "connect,resolve" ;// okhttp permission java.lang.RuntimePermission "getClassLoader" ; // okhttp permission java.net.NetPermission "getProxySelector" ; // okhttp permission java.lang.reflect.ReflectPermission "suppressAccessChecks" ; permission java.lang.RuntimePermission "accessDeclaredMembers" ; permission java.lang.RuntimePermission "setContextClassLoader" ; permission java.lang.RuntimePermission "createClassLoader" ; };
二 编写插件核心代码
开发插件只需要继承Plugin实现AnalysisPlugin就可以了
DemoTokenizer是分词器,继承Tokenizer,通过重写incrementToken方法来实现自己的分词程序
DemoAnalyzer是分析器,继承Analyzer,里面需要塞一个分词器
DemoAnalyzerProvider是分析器提供程序,继承AbstractIndexAnalyzerProvider,通过重写get方法返回自定义分析器
DemoTokenizerFactory是分词器工厂,继承AbstractTokenizerFactory,通过重写create方法返回自定义的分词器
DemoPlugin自定义插件的主要实现,继承Plugin实现AnalysisPlugin,通过重写getTokenizers将分词器工厂放入map,通过重写getAnalyzers将分析器放入map(这里的key后面会用到)
2.1 自定义Tokenizer
先实现一个自定义Tokenizer,该类是项目中重要的代码部门,主要是在这里实现自己的分词逻辑。
本例中的代码如下
package com.demo.aidoc.es_plugin.tokenizer;import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import com.hankcs.hanlp.corpus.tag.Nature;import com.hankcs.hanlp.seg.common.Term;import org.apache.lucene.analysis.Tokenizer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;import org.apache.lucene.analysis.tokenattributes.TypeAttribute;import java.io.*;import java.net.URL;import java.util.List;import java.util.Properties;import java.util.concurrent.atomic.AtomicInteger;import java.util.stream.Collectors;public class DemoTokenizer extends Tokenizer { private final static String PUNCTION = " -()/" ; private final StringBuilder buffer = new StringBuilder(); private int suffixOffset; private int tokenStart = 0 , tokenEnd = 0 ; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class ) ; private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class ) ; private final PositionIncrementAttribute positionAttr = addAttribute(PositionIncrementAttribute.class ) ; private TypeAttribute typeAtt = addAttribute(TypeAttribute.class ) ; private List<Term> list = null ; private final AtomicInteger offset = new AtomicInteger(0 ); @Override public final boolean incrementToken () throws IOException { clearAttributes(); buffer.setLength(0 ); if (null == list || offset.get() >= list.size()) { termAtt.setEmpty(); return false ; } Term term = list.get(offset.get()); tokenStart = term.offset; termAtt.setEmpty().append(term.word); offsetAtt.setOffset(term.offset, term.offset + term.length()); positionAttr.setPositionIncrement(term.length()); typeAtt.setType(term.nature.toString()); offset.incrementAndGet(); return true ; } @Override public final void end () { final int finalOffset = correctOffset(suffixOffset); this .offsetAtt.setOffset(finalOffset, finalOffset); } @Override public void reset () throws IOException { super .reset(); tokenStart = tokenEnd = 0 ; this .offset.set(0 ); } } }
2.2 自定义Tokenizer工厂
本例中的代码如下
package com.demo.aidoc.es_plugin.factory;import org.apache.lucene.analysis.Tokenizer;import org.elasticsearch.common.settings.Settings;import org.elasticsearch.env.Environment;import org.elasticsearch.index.IndexSettings;import org.elasticsearch.index.analysis.AbstractTokenizerFactory;import com.demo.aidoc.es_plugin.tokenizer.DemoTokenizer;public class DemoTokenizerFactory extends AbstractTokenizerFactory { public DemoTokenizerFactory (IndexSettings indexSettings, Environment env, String name, Settings settings) { super (indexSettings, settings, name); } @Override public Tokenizer create () { return new DemoTokenizer(); } }
2.3 自定义Analyzer
本例中的代码如下
package com.demo.aidoc.es_plugin.analyzer;import org.apache.lucene.analysis.Analyzer;import com.demo.aidoc.es_plugin.tokenizer.DemoTokenizer;public class DemoAnalyzer extends Analyzer { @Override protected TokenStreamComponents createComponents (String fieldName) { return new TokenStreamComponents(new DemoTokenizer()); } }
2.4 自定义Plugin
本例中的代码如下
package com.demo.aidoc.es_plugin;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.Analyzer;import org.elasticsearch.index.analysis.AnalyzerProvider;import org.elasticsearch.index.analysis.TokenizerFactory;import org.elasticsearch.indices.analysis.AnalysisModule;import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;import org.elasticsearch.plugins.AnalysisPlugin;import org.elasticsearch.plugins.Plugin;import com.demo.aidoc.es_plugin.factory.DemoTokenizerFactory;import com.demo.aidoc.es_plugin.provider.DemoAnalyzerProvider;public class DemoPlugin extends Plugin implements AnalysisPlugin { private final static String PLUGIN_NAME="demo" ; @Override public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() { Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>(); extra.put(PLUGIN_NAME, DemoTokenizerFactory::new ); return extra; } @Override public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() { Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> extra = new HashMap<>(); extra.put(PLUGIN_NAME, DemoAnalyzerProvider::new ); return extra; } }
注意上述代码中插件的名字demo
很重要,在使用自定义分词器时插件的名字就是这个。
2.5 测试自定义插件
public class DemoMain { public static void main (String[] args) throws Exception { DemoAnalyzer analyzer = new DemoAnalyzer(); TokenStream ts = analyzer.tokenStream("text" , "中华人民共和国车船税法" ); CharTermAttribute term = ts.addAttribute(CharTermAttribute.class ) ; ts.reset(); while (ts.incrementToken()) { System.out.println(term.toString()); } ts.end(); ts.close(); } }
三 部署测试
测试 Java 插件时,只有在plugins/
目录下才会自动加载。 使用 bin/elasticsearch-plugin install file:///path/to/your/plugin
安装插件进行测试。
你也可以在集成测试的测试框架中加载你的插件。
测试请求如下
POST /_analyze { "analyzer" : "demo" , "text" : "我爱北京 天安门" }