RPackage005---Rwordseg-CFANZ编程社区

直接上代码，有问题看文档，大部分能解决吧~
# 安装说明 --------------------------------------------------------------------
## https://r-forge.r-project.org/R/?group_id=1054 官网下载手动安装
## rJava不赘，网上有教程

library(rJava)  
library(Rwordseg)  
segment.options(isNameRecognition=F) #人名识别为FALSE  
# segmentCN 参数设置 -----------------------------------------------------------

#### 参数设置：
###### 1、strwords  字符串，或者文本文件目录
######## A Chinese sentence in UTF-8 or the path of a text file.
###### 2、analyzer 分词工具
######## A JAVA object of analyzer.
###### 3、nature 是否输出词性，默认不输出
######## Whether to recognise the nature of the words.
###### 4、nosymbol 是否保留标点  
######## Whether to keep symbols in the sentence.
###### 5、returnType 返回结果的格式，默认是字符串向量;"tm"输出tm格式数据
######## Default is a string vector but we also can choose 'tm' to output a single string separated by space so that it can be used by Corpus directly.
###### 6、isfast 是否使用快的分析器，如果采用这个形式，则无法输出词性
######## Whether to run the fast analyzer.
###### 7、outfile  strwords是文件时，输出结果的路径
######## The path of output if strwords is a file.
###### 8、blocklines strwords是文件时，一次读取文件的最大行数
######## The (maximal) number of lines to read at one time when strwords is a file.

segmentCN(strwords,
          analyzer = get("Analyzer", envir = .RwordsegEnv),
          nature = FALSE, nosymbol = TRUE,
          returnType = c("vector", "tm"), isfast = FALSE,
          outfile = "", blocklines = 1000)

# 词典管理 --------------------------------------------------------------------
## 查看安装的字典
listDict()

## 载入词典
#### 支持普通格式的文本词典和Sogou的Secl格式细胞词典
#### 参数设置：
###### 1、dictpath 词典绝对路径
###### 2、dictname 词典名称
###### 3、dicttype 词典类型，默认txt
###### 4、load 是否立即载入字典

installDict(
  dictpath = "./Rwordseg/Dict/chinese-surname.scel",
  dictname = "chinese-surname",
  dicttype = "scel",
  load = TRUE
) 
## 移除字典 
uninstallDict(removedict = "chinese-surname")

# 自定义文本字典 -----------------------------------------------------------------
### %R_HOME%\library\Rwordseg\dict
### 可以在文件中添加任意后缀为.dic的文件，输入自定义的词，一行一词，回车换行。
### 修改后loadDict()即可导入字典
# 其他参数 --------------------------------------------------------------------
## 人名识别
segment.options(isNameRecognition=FALSE)   

# Demo --------------------------------------------------------------------
## 不加载词典
segmentCN("重庆市涪陵区皮家街14号1-1")
# [1] "重庆市" "涪陵"   "区"     "皮"     "家"     "街"     "14号"  "1"      "1"  
segmentCN("贵州省纳雍县曙光乡鼠场村大树脚组")
# [1] "贵州省" "纳"     "雍"     "县"     "曙光"   "乡"     "鼠"    
# [8] "场"     "村"     "大树"   "脚"     "组" 
## 加载词典
installDict(
  dictpath = "./Rwordseg/Dict/chinese-area.scel",
  dictname = "chinese-surname",
  dicttype = "scel",
  load = TRUE
) 
segmentCN("重庆市涪陵区皮家街14号1-1")
# [1] "重庆市" "涪陵区" "皮"     "家"     "街"     "14号"   "1"  "1"
segmentCN("贵州省纳雍县曙光乡鼠场村大树脚组")
# [1] "贵州省" "纳雍县" "曙光乡" "鼠"     "场"     "村"     "大树" "脚"     "组"
uninstallDict()