0
点赞
收藏
分享

微信扫一扫

12、Flink source和sink 的 clickhouse 详细示例



文章目录

  • Flink 系列文章
  • 一、Flink sink Clickhouse
  • ClickHouse系列文章
  • 1、maven依赖
  • 2、创建clickhouse表
  • 3、验证clickhouse web页面是否正常
  • 4、实现
  • 1)、user bean
  • 2)、sink实现
  • 5、验证
  • 1)、nc 输入
  • 2)、启动应用程序
  • 3)、观察应用程序控制台输出
  • 4)、查看clickhouse表中的数据
  • 二、Flink source Clickhouse
  • 1、maven依赖
  • 2、实现
  • 1)、user bean
  • 2)、source实现
  • 3、验证


本文简单的介绍了Flink 以clickhouse为数据源或者将数据sink到clickhouse的实现示例。
本文依赖clickhouse的环境好用
本文分为2个部分,即sink到clickhouse和以clickhouse作为数据源。

一、Flink sink Clickhouse

关于clickhouse的基本知识详见该系列文章,关于该部分不再赘述。

1、maven依赖

<dependency>
  <groupId>ru.ivi.opensource</groupId>
  <artifactId>flink-clickhouse-sink</artifactId>
  <version>1.3.1</version>
</dependency>

2、创建clickhouse表

-- 1、创建数据库 tutorial
略
-- 2、创建表
CREATE TABLE t_flink_sink_clickhouse (    
id UInt16 COMMENT '员工id',    
name String COMMENT '员工姓名',     
age UInt8 COMMENT '员工年龄' ) 
ENGINE = MergeTree 
ORDER BY id;

3、验证clickhouse web页面是否正常

http://192.168.10.42:8123/12、Flink source和sink 的 clickhouse 详细示例_flink

4、实现

1)、user bean

import lombok.Data;

@Data
public class User {
	private int id;
	private String name;
	private int age;

	public User(int id, String name, int age) {
		this.id = id;
		this.name = name;
		this.age = age;
	}

	// Java Bean 必须实现的方法,信息通过字符串进行拼接
	public static String convertToCsv(User user) {
		StringBuilder builder = new StringBuilder();
		builder.append("(");

		// add user.id
		builder.append(user.id);
		builder.append(", ");

		// add user.name
		builder.append("'");
		builder.append(String.valueOf(user.name));
		builder.append("', ");

		// add user.age
		builder.append(user.age);

		builder.append(" )");
		return builder.toString();
	}
}

2)、sink实现

package org.clickhouse.test;

import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.clickhouse.ClickHouseSink;
import org.clickhouse.model.ClickHouseClusterSettings;
import org.clickhouse.model.ClickHouseSinkConst;

/**
 * @author alanchan
 *
 */
public class TestFinkSinkClickhouse {
	public static void main(String[] args) throws Exception {
		// env
		StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();

		// source
		// nc
		DataStream<String> inputStream = env.socketTextStream("192.168.10.42", 9999);

		// Transform
		SingleOutputStreamOperator<String> dataStream = inputStream.map(new MapFunction<String, String>() {
			@Override
			public String map(String data) throws Exception {
				String[] split = data.split(",");
				User user = new User(Integer.parseInt(split[0]), split[1], Integer.parseInt(split[2]));
				return User.convertToCsv(user);
			}
		});

		// create props for sink
		Map<String, String> globalParameters = new HashMap<>();
		// clickhouse 的服务地址,该链接访问返回ok
		globalParameters.put(ClickHouseClusterSettings.CLICKHOUSE_HOSTS, "http://192.168.10.42:8123/");
		// common
		globalParameters.put(ClickHouseSinkConst.TIMEOUT_SEC, "1");
		globalParameters.put(ClickHouseSinkConst.FAILED_RECORDS_PATH, "/usr/local/bigdata/testdata/clickhouse_failpath");
		globalParameters.put(ClickHouseSinkConst.NUM_WRITERS, "2");
		globalParameters.put(ClickHouseSinkConst.NUM_RETRIES, "2");
		globalParameters.put(ClickHouseSinkConst.QUEUE_MAX_CAPACITY, "10");
		globalParameters.put(ClickHouseSinkConst.IGNORING_CLICKHOUSE_SENDING_EXCEPTION_ENABLED, "false");

		// set global paramaters
		ParameterTool parameters = ParameterTool.fromMap(globalParameters);
		env.getConfig().setGlobalJobParameters(parameters);

//		env.setParallelism(1);
		Properties props = new Properties();
		// 数据库tutorial和表名称t_flink_sink_clickhouse
		// 需要先创建数据库和表
		// CREATE TABLE t_flink_sink_clickhouse (id UInt16 COMMENT '员工id',name String
		// COMMENT '员工姓名',age UInt8 COMMENT '员工年龄' ) ENGINE = MergeTree ORDER BY id;
		props.put(ClickHouseSinkConst.TARGET_TABLE_NAME, "tutorial.t_flink_sink_clickhouse");
		props.put(ClickHouseSinkConst.MAX_BUFFER_SIZE, "10000");
		ClickHouseSink sink = new ClickHouseSink(props);
		dataStream.addSink(sink);
		dataStream.print();

		env.execute();

	}
}

5、验证

1)、nc 输入

[root@server2 etc]# nc -lk 9999
1,alanchan,19
2,alan,20
3,chan,21

2)、启动应用程序

3)、观察应用程序控制台输出

4)、查看clickhouse表中的数据

server2 :) select * from t_flink_sink_clickhouse;

SELECT *
FROM t_flink_sink_clickhouse

Query id: aea358e8-8d9d-4caa-98b1-54903356a7d0

┌─id─┬─name─┬─age─┐
│  2 │ alan │  20 │
└────┴──────┴─────┘
┌─id─┬─name─┬─age─┐
│  3 │ chan │  21 │
└────┴──────┴─────┘
┌─id─┬─name─────┬─age─┐
│  1 │ alanchan │  19 │
└────┴──────────┴─────┘

3 rows in set. Elapsed: 0.003 sec.

以上,与预期一致。

二、Flink source Clickhouse

1、maven依赖

本处依赖与本文上一个示例中的依赖不一样,另外,如果该示例中的依赖如果不够,则需要自己去maven上找到相关的即可。

<dependency>
			<groupId>ru.yandex.clickhouse</groupId>
			<artifactId>clickhouse-jdbc</artifactId>
			<version>0.1.40</version>

		</dependency>

2、实现

1)、user bean

import lombok.Data;

/**
 * @author alanchan
 *
 */
@Data
public class UserSource {
	private int id;
	private String name;
	private int age;

	public UserSource(int id, String name, int age) {
		this.id = id;
		this.name = name;
		this.age = age;
	}

	public UserSource() {
	}
}

2)、source实现

import java.sql.ResultSet;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;

import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;

import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHouseDataSource;
import ru.yandex.clickhouse.ClickHouseStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;
import ru.yandex.clickhouse.settings.ClickHouseQueryParam;

/**
 * @author alanchan
 *
 */
public class Source_Clickhouse {

	/**
	 * @param args
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		// env
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
		// source
		DataStream<UserSource> users = env.addSource(new ClickhouseSource());

		// transformation

		// sink
		users.print();

		// execute
		env.execute();

	}

	private static class ClickhouseSource extends RichParallelSourceFunction<UserSource> {
		private boolean flag = true;
		private ClickHouseConnection conn = null;
		private ClickHouseStatement stmt = null;
		private ResultSet rs = null;
		private Map<ClickHouseQueryParam, String> additionalDBParams = new HashMap<>();

		UserSource user = null;
		private String sql = "select id,name,age from t_flink_sink_clickhouse";

		// open只执行一次,适合开启资源
		@Override
		public void open(Configuration parameters) throws Exception {
			ClickHouseProperties properties = new ClickHouseProperties();
			String url = "jdbc:clickhouse://192.168.10.42:8123/tutorial";

			properties.setSessionId(UUID.randomUUID().toString());
//			properties.setDatabase("tutorial");
//			properties.setHost("192.168.10.42");

			ClickHouseDataSource dataSource = new ClickHouseDataSource(url, properties);
			// ClickHouseProperties
			additionalDBParams.put(ClickHouseQueryParam.SESSION_ID, UUID.randomUUID().toString());
			conn = dataSource.getConnection();
			stmt = conn.createStatement();
		}

		@Override
		public void run(SourceContext<UserSource> ctx) throws Exception {
			while (flag) {
				rs = stmt.executeQuery(sql, additionalDBParams);
				while (rs.next()) {
					user = new UserSource(rs.getInt(1), rs.getString(2), rs.getInt(3));
					ctx.collect(user);
				}
			}
		}

		// 接收到cancel命令时取消数据生成
		@Override
		public void cancel() {
			flag = false;
		}

		@Override
		public void close() throws Exception {
			if (conn != null)
				conn.close();
			if (stmt != null)
				stmt.close();
			if (rs != null)
				rs.close();
		}

	}
}

3、验证

启动应用程序,查看应用程序控制台输出是不是与期望的一致即可。

以上,本文简单的介绍了Flink 以clickhouse为数据源或者将数据sink到clickhouse的实现示例。


举报

相关推荐

0 条评论