0
点赞
收藏
分享

微信扫一扫

Jenkins磁盘空间批量清理脚本

霍华德 03-30 21:31 阅读 2

需求描述:热搜方式的分词查询。
数据环境:mysql中存在已爬数据表名t_bj(id,titile,content,publishtime),使用logstash同步到es中,springboot连接es实现rest接口给前端页面。

springboot

dom文件:

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>
    <dependency>
        <groupId>org.elasticsearch</groupId>
        <artifactId>elasticsearch</artifactId>
        <version>7.17.3</version>
    </dependency>

yml文件:
elasticsearch:
uris: 192.168.0.1:9200
connection-timeout: 1s
socket-timeout: 30s

java文件:
RsController.java
@RestController
@RequestMapping(“/rs”)
public class RsController {
@Autowired
private TBjService tBjService;
@PostMapping(“/bjKey”)
public PageInfo getTLawEs(@RequestBody ReqKey reqKey) {
return tService.getTBj(reqKey.getKeyValue(),reqKey.getPageNum(),reqKey.getPageSize());
}
}

ReqKey.java
@Data
public class ReqKey {
private String keyValue;
private int pageNum;
private int pageSize;
}

TBj.java
@Data
@Document(indexName = “t_bj_index”)
public class TBj {
private String id;
private String title;
private String url;
private String content;
private String publishtime;
private String createtime;
}

RsConfig.java
@EnableElasticsearchRepositories(basePackages = {“cn.rs.elastic.repository”})
@Configuration
public class RsConfig {
@Value(“${spring.elasticsearch.uris}”)
private String hostAndPort;
@Bean
public RestHighLevelClient elasticsearchClient() {
ClientConfiguration clientConfiguration = ClientConfiguration.builder()
.connectedTo(hostAndPort)
.build();
return RestClients.create(clientConfiguration).rest();
}
}

TBjRepository.java
public interface TBjRepository extends ElasticsearchRepository<TBj, String> {
}

TBjService.java
@Service
public class TBjService {

private final TjRepository tBjRepository;
@Autowired
RsUtils rsUtils;
@Autowired
public TBjService(TBjRepository TBjRepository) {
    this.tBjRepository = TBjRepository;
}
public PageInfo<TBj> getTBj(String keywords, Integer pageNum, Integer pageSize) {
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    String[] queryFields = { "title"};
    QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(keywords, queryFields)
            .field("content", 2F)
            .tieBreaker(0.3F);
    searchSourceBuilder.query(queryBuilder);
    List<String> highFields = ListUtil.toList(queryFields);
    highFields.add("content");
    return esUtils.page("t_bj_index", searchSourceBuilder, TBj.class, pageNum,pageSize, highFields);
}

}

RsUtils.java
package cn.rs.elastic.utils;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.json.JSONUtil;
import com.github.pagehelper.PageInfo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
import org.springframework.util.ReflectionUtils;
import javax.annotation.Resource;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;

@Slf4j
@Component
public class EsUtils {
@Resource
private RestHighLevelClient restHighLevelClient;
public PageInfo page(String index, SearchSourceBuilder searchSourceBuilder, Class resultClass,
int currentPage, int size, List highFields) {
SearchRequest request = new SearchRequest(index);
if (CollectionUtil.isNotEmpty(highFields)) {
buildHighLight(searchSourceBuilder, highFields);
}
int num = (currentPage - 1) * size;
searchSourceBuilder.from(num)
.size(size);
request.source(searchSourceBuilder);
SearchResponse response = null;
try {
response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
assert response != null;
return analysisResponse(response, resultClass, currentPage, size, highFields);
}

private <T> PageInfo<T> analysisResponse(SearchResponse response, Class<T> resultClass, int currentPage, int size, List<String> highFields) {
    SearchHit[] searchHits = response.getHits().getHits();
    List<T> retList = new ArrayList<>(searchHits.length);
    for (SearchHit searchHit : searchHits) {
        String strJson = searchHit.getSourceAsString();
        T t = JSONUtil.toBean(strJson, resultClass);
        try {
            setId(resultClass, t, String.valueOf(searchHit.getId()));
        } catch (Exception e) {
            log.info("rs 查询数据设置主键id值异常", e);
        }
        if (!CollectionUtils.isEmpty(highFields)) {
            Map<String, HighlightField> highlightFieldMap = searchHit.getHighlightFields();
            HighlightField highlightField;
            for (String field : highFields) {
                highlightField = highlightFieldMap.get(field);
                if (highlightField != null) {
                    Text[] fragments = highlightField.getFragments();
                    StringBuilder builder = new StringBuilder();
                    for (Text text : fragments) {
                        builder.append(text);
                    }
                    setValue(resultClass, t, builder.toString(), field);
                }
            }
        }
        retList.add(t);
    }
    long totalNum = response.getHits().getTotalHits().value;
    PageInfo<T> pageVo = new PageInfo<>();
    pageVo.setPageNum(currentPage);
    pageVo.setPageSize(size);
    pageVo.setTotal(totalNum);
    pageVo.setList(retList);
    return pageVo;
}

@SneakyThrows
private <T> void setId(Class<T> resultClass, T t, Object id) {
    Field field = ReflectionUtils.findField(resultClass, "id");
    if (null != field) {
        field.setAccessible(true);
        Object object = ReflectionUtils.getField(field, t);
        if (object == null) {
            Method method = resultClass.getMethod("setId", String.class);
            ReflectionUtils.invokeMethod(method, t, id);
        }
    }
}

@SneakyThrows
private <T> void setValue(Class<T> resultClass, T t, Object fieldValue, String fieldName) {
    Field field = ReflectionUtils.findField(resultClass, fieldName);
    if (null != field) {
        field.setAccessible(true);
        String methodName = "set".concat(captureName(fieldName));
        Method method = resultClass.getMethod(methodName, String.class);
        ReflectionUtils.invokeMethod(method, t, fieldValue);
    }
}

private String captureName(String str) {
    char[] cs = str.toCharArray();
    cs[0] -= 32;
    return String.valueOf(cs);
}

private void buildHighLight(SearchSourceBuilder searchSourceBuilder, List<String> fields) {
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    fields.forEach(highlightBuilder::field);
    highlightBuilder.preTags("<em>");
    highlightBuilder.postTags("</em>");
    searchSourceBuilder.highlighter(highlightBuilder);
}

@AllArgsConstructor
@Data
public class ScrollPageBean<T> {
    private String scrollId;
    private PageInfo<T> scrollPage;
}

}

elasticSearch

作用:热搜引擎
1下载安装
2配置参数。conf/elasticsearch.yml
cluster.name: rs-single-node-cluster
node.name: rs-single-node
node.roles: [“master”, “data”]
path.data: /path/to/data
path.logs: /path/to/logs
network.host: 0.0.0.0
http.port: 9200
cluster.initial_master_nodes: [“sj-single-node”]
xpack.security.enabled: false
xpack.security.enrollment.enabled: false
xpack.security.http.ssl:
enabled: false
keystore.path: certs/http.p12
xpack.security.transport.ssl:
enabled: false
verification_mode: certificate
keystore.path: certs/transport.p12
truststore.path: certs/transport.p12
http.host: 0.0.0.0
transport.port: 9300
xpack.ml.enabled: false
http.cors.enabled: true
http.cors.allow-origin: “*”

2启动。bin/elasticsearch.bat
3启动成功默认端口9200,可录入http://127.0.0.1:9200 查看启动参数

logstash

作用:同步mysql数据到es,可指定同步策略如间隔时间等
1下载安装。
2配置mysql和es连接。
主目录下创建目录和文件mysql_rs_conf/mysql_rs.conf。并将驱动文件拷贝至该目录下。配置文件内容:
input {
jdbc {
jdbc_connection_string => “jdbc:mysql://192.168.1.1:3306/dbBj?useUnicode=true&useSSL=false&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&rewriteBatchedStatements=true”
jdbc_user => “bj”
jdbc_password => “123456”
jdbc_driver_library => “D:/logstash/logstash-8.12.2/mysql_es_conf/mysql-connector-j-8.0.33.jar”
jdbc_driver_class => “com.mysql.jdbc.Driver”
jdbc_paging_enabled => “true”
jdbc_page_size => “10000”
statement => “select * from t_bj”
schedule => “* * * * *”
}
}

output {
if [table] == “t_bj” {
elasticsearch {
hosts => “192.168.1.36:9200”
index => “t_bj_index”
document_id => “%{id}”
}
}

stdout {
codec => json_lines
}
}

3启动。默认端口5601
可手创建启动文件lg.cmd。内容:
chcp 65001
logstash -f ./mysql_rs_conf/mysql_rs.conf
4验证与访问。

kinana

作用:可查看同步数据、测试、监控
1下载安装。
2配置指向ES。
3启动。默认端口5601
4验证与访问。URL:127.0.0.1:5601/app/integrations/browse
mysql

作用:热搜的中间过渡存储,用于存储爬虫数据。
建表:
CREATE TABLE t_bj (
id int(11) NOT NULL AUTO_INCREMENT,
title varchar(1000) NOT NULL COMMENT ‘文章标题’,
url varchar(1000) DEFAULT NULL COMMENT ‘文章地址’,
content longtext COMMENT ‘文章内容’,
publishtime varchar(30) DEFAULT NULL COMMENT ‘发布时间’,
createtime datetime DEFAULT CURRENT_TIMESTAMP COMMENT ‘创建时间’,
PRIMARY KEY (id,title) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=492 DEFAULT CHARSET=utf8 COMMENT=‘保健’

举报

相关推荐

0 条评论