1.显示分词词汇
public static void displayToken(String str, Analyzer analyzer){
try {
/**
* TokenStream tokenStream(String fieldName, Reader reader):
* 获取TokenStream(分词流)
* 参数一:域名(这里没有什么实际意义)
* 参数二:输入流
*/
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str));
//创建一个属性(里面有分词后的词汇),这个属性添加到TokenStream中,随TokenStream移动而增加
CharTermAttribute cta = tokenStream.addAttribute(CharTermAttribute.class);
//遍历TokenStream
while (tokenStream.incrementToken()){
System.out.print("["+cta+"]");
}
System.out.println("");
System.out.println("--------------------------------");
} catch (IOException e) {
e.printStackTrace();
}
}
2.显示分词所有信息
public static void displayAllTokenInfo(String str, Analyzer analyzer){
try {
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str));
//位置增量的属性,存储语汇单元之间的距离
PositionIncrementAttribute pia = tokenStream.addAttribute(PositionIncrementAttribute.class);
//每个语汇单元的位置偏移量
OffsetAttribute oa = tokenStream.addAttribute(OffsetAttribute.class);
//使用的分词器的类型信息
TypeAttribute ta = tokenStream.addAttribute(TypeAttribute.class);
//存储每一个语汇单元的信息(分词单元信息)
CharTermAttribute ca = tokenStream.addAttribute(CharTermAttribute.class);
while (tokenStream.incrementToken()){
System.out.println("位置增量:"+pia+" 词汇的偏移量:"+oa.startOffset()+"--"+oa.endOffset()+" 类型:"+ta+" 分词:"+ca);
}
} catch (IOException e) {
e.printStackTrace();
}
}