写在前面
项目数据原先存储于虚拟机的hadoop集群上,由于机器需要报废处理,数据需要进行转移,特别是hbase中的数据,备份和转移过程相对较为繁琐,特写此文章以记录。
查看hbase中的数据信息
进入hbase shell查看所有表格
hbase(main):002:0> hbase shell
hbase(main):002:0> list
TABLE
dataset
diagResult
fault
realData
records
testData
testDataTest
trainData
trainDataTest
zdsTest
10 row(s) in 0.2450 seconds
=> ["dataset", "diagResult", "fault", "realData", "records", "testData", "testDataTest", "trainData", "trainDataTest", "zdsTest"]
依次查看表格信息
hbase(main):008:0> describe "dataset"
Table dataset is ENABLED
dataset
COLUMN FAMILIES DESCRIPTION
{NAME => 'info', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERS
IONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.2740 seconds
hbase(main):009:0> describe "diagResult"
Table diagResult is ENABLED
diagResult
COLUMN FAMILIES DESCRIPTION
{NAME => 'dataList', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_
VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.0310 seconds
hbase(main):010:0> describe "fault"
Table fault is ENABLED
fault
COLUMN FAMILIES DESCRIPTION
{NAME => 'dataList', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_
VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.0220 seconds
hbase(main):011:0> describe "realData"
Table realData is ENABLED
realData
COLUMN FAMILIES DESCRIPTION
{NAME => 'dataList', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_
VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.0250 seconds
hbase(main):012:0> describe "records"
Table records is ENABLED
records
COLUMN FAMILIES DESCRIPTION
{NAME => 'record', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VE
RSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.0110 seconds
hbase(main):013:0> describe "testData"
Table testData is ENABLED
testData
COLUMN FAMILIES DESCRIPTION
{NAME => 'dataList', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_
VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.0170 seconds
hbase(main):014:0> describe "testDataTest"
Table testDataTest is ENABLED
testDataTest
COLUMN FAMILIES DESCRIPTION
{NAME => 'actionData', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MI
N_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'gapData', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_V
ERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'showData', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_
VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
3 row(s) in 0.0440 seconds
hbase(main):015:0> describe "trainData"
Table trainData is ENABLED
trainData
COLUMN FAMILIES DESCRIPTION
{NAME => 'dataList', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_
VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.0210 seconds
hbase(main):016:0> describe "trainDataTest"
Table trainDataTest is ENABLED
trainDataTest
COLUMN FAMILIES DESCRIPTION
{NAME => 'actionData', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MI
N_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'gapData', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_V
ERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'showData', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_
VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
3 row(s) in 0.0340 seconds
hbase(main):017:0> describe "zdsTest"
Table zdsTest is ENABLED
zdsTest
COLUMN FAMILIES DESCRIPTION
{NAME => 'actionData', BLOOMFILTER => 'ROW', VERSIONS => '3', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MI
N_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'gapData', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_V
ERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
2 row(s) in 0.0340 seconds
记录各表格列族名
表名 | 列族名 |
---|---|
dataset | info |
diagResult | dataList |
fault | dataList |
realData | dataList |
records | record |
testData | dataList |
testDataTest | actionData |
gapData | |
showData | |
trainData | dataList |
trainDataTest | actionData |
gapData | |
showData | |
zdsTest | actionData |
gapData |
将hbase中的数据备份到本地文件系统
逐个执行备份命令
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export dataset file:///home/hadoop/hbase-data-back/dataset_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export diagResult file:///home/hadoop/hbase-data-back/diagResult_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export fault file:///home/hadoop/hbase-data-back/fault_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export realData file:///home/hadoop/hbase-data-back/realData_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export records file:///home/hadoop/hbase-data-back/records_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export testData file:///home/hadoop/hbase-data-back/testData_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export testDataTest file:///home/hadoop/hbase-data-back/testDataTest_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export trainData file:///home/hadoop/hbase-data-back/trainData_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export trainDataTest file:///home/hadoop/hbase-data-back/trainDataTest_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver export zdsTest file:///home/hadoop/hbase-data-back/zdsTest_back
使用xftp查看是否备份完成
可以看到所有表格都已备份到本地
传输到本地
将hbase-data-back文件夹整个拖拽到本地文件系统的指定路径保存
新集群环境配置
对应记录的表格名与列族在新集群中创建
hadoop@master:~$ hbase shell
hbase(main):001:0> create 'dataset', 'info'
0 row(s) in 2.5490 seconds
=> Hbase::Table - dataset
hbase(main):002:0> create 'diagResult', 'dataList'
0 row(s) in 2.4090 seconds
=> Hbase::Table - diagResult
hbase(main):002:0> create "fault", "dataList"
0 row(s) in 2.4090 seconds
=> Hbase::Table - fault
hbase(main):002:0> create "realData", "dataList"
0 row(s) in 2.4090 seconds
=> Hbase::Table - realData
hbase(main):002:0> create "records", "record"
0 row(s) in 2.4090 seconds
=> Hbase::Table - records
hbase(main):002:0> create "testData", "dataList"
0 row(s) in 2.4090 seconds
=> Hbase::Table - testData
hbase(main):002:0> create "testDataTest", "actionData", "gapData", "showData"
0 row(s) in 2.4090 seconds
=> Hbase::Table - testDataTest
hbase(main):002:0> create "trainData", "dataList"
0 row(s) in 2.4090 seconds
=> Hbase::Table - trainData
hbase(main):002:0> create "trainDataTest", "actionData", "gapData", "showData"
0 row(s) in 2.4090 seconds
=> Hbase::Table - trainDataTest
hbase(main):002:0> create "zdsTest", "actionData", "gapData"
0 row(s) in 2.4090 seconds
=> Hbase::Table - zdsTest
本地数据导入新集群
hdfs中建立文件夹
hadoop@master:~$ hdfs dfs -mkdir -p /home/hadoop
上传数据到hdfs
hadoop@master:~$ hdfs dfs -put /home/hadoop/hbase-data-back /home/hadoop/hbase-data-back
导入数据到hbase
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import dataset /home/hadoop/hbase-data-back/dataset_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import diagResult /home/hadoop/hbase-data-back/diagResult_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import fault /home/hadoop/hbase-data-back/fault_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import realData /home/hadoop/hbase-data-back/realData_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import records /home/hadoop/hbase-data-back/records_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import testData /home/hadoop/hbase-data-back/testData_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import testDataTest /home/hadoop/hbase-data-back/testDataTest_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import trainData /home/hadoop/hbase-data-back/trainData_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import trainDataTest /home/hadoop/hbase-data-back/trainDataTest_back
hadoop@master:~$ hbase org.apache.hadoop.hbase.mapreduce.Driver import zdsTest /home/hadoop/hbase-data-back/zdsTest_back
mysql数据备份与转移
使用Navicat连接旧集群MySQL数据库服务器,将要转移的表格转储为sql文件,再连接新集群MySQL服务器运行sql文件导入即可。
数据转移后的工程配置与测试
概要配置信息
写在连接前
连接该集群使用各项服务时请先挂载机房VPN
集群节点(host文件配置):
master:192.168.100.98
slave1:192.168.100.99
slave2:192.168.100.100
slave3:192.168.100.101
xshell与xftp连接(ssh,操作hbase,管理文件等)
主机:192.168.100.98
端口:22
用户名密码与之前一致
MySQL连接
主机:192.168.100.98
端口:3306
用户名密码与之前一致
远程桌面连接
主机:211.71.65.193
端口:3392
用户名密码与之前一致
关键目录
- source:存储算法源文件
- model:存储算法训练生成的模型
目录树形结构如下:
————————————————
├── home
│ └── hadoop
│ │ └── alg
│ │ ├── CNN
│ │ │ └── source
│ │ │ └── model
│ │ └── LSTM
│ │ │ └── source
│ │ │ └── model
│ │ └── SVM
│ │ │ └── source
│ │ │ └── model
————————————————
工程配置
修改本地hosts文件
注释旧的集群节点配置,将新集群节点配置取消注释
# The IP address and the host name should be separated by at least one
# space.
#
# Additionally, comments (such as these) may be inserted on individual
# lines or following the machine name denoted by a '#' symbol.
#
# For example:
#
# 102.54.94.97 rhino.acme.com # source server
# 38.25.63.10 x.acme.com # x client host
# localhost name resolution is handled within DNS itself.
# 127.0.0.1 localhost
# ::1 localhost
127.0.0.1 activate.navicat.com
#192.168.1.72 master
#192.168.1.202 slave1
#192.168.1.78 slave3
#192.168.1.218 slave4
#192.168.1.76 slave5
192.168.100.98 master
192.168.100.99 slave1
192.168.100.100 slave2
192.168.100.101 slave3
#192.168.3.91 master
#192.168.3.92 slave1
#192.168.3.93 slave2
修改配置文件application.properties
#111远程计算机quorum配置
#spring.data.hbase.quorum=master:2181,slave1:2181,slave2:2181
#津航北京集群quorum配置
#spring.data.hbase.quorum=master:2181,slave1:2181,slave3:2181,slave4:2181,slave5:2181
#数字孪生98集群quorum配置
spring.data.hbase.quorum=master:2181,slave1:2181,slave2:2181,slave3:2181
#津航北京集群rootDir配置
#spring.data.hbase.rootDir=hdfs://master:9000/hbase/hbase_db
#其余集群rootDir配置
spring.data.hbase.rootDir=hdfs://master:9000/hbase
#所有集群nodeParent配置
spring.data.hbase.nodeParent=/hbase
#111集群局域网ip、port
#server.address=192.168.1.111
#server.port=8085
#其余集群端口
server.port=8083
#mysql相关配置
#津航
#spring.datasource.url=jdbc:mysql://192.168.1.72:3306/jh_db?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true&useSSL=false
#111集群
#spring.datasource.url=jdbc:mysql://192.168.3.91:3306/mysql?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true&useSSL=false&serverTimezone=UTC
#98集群
spring.datasource.url=jdbc:mysql://192.168.100.98:3306/mysql?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true&useSSL=false&serverTimezone=UTC
spring.datasource.username=root
spring.datasource.password=123456
spring.jpa.database=mysql
spring.jpa.database-platform=org.hibernate.dialect.MySQL5InnoDBDialect
spring.jpa.show-sql=true
spring.jpa.hibernate.ddl-auto=update
spring.jpa.hibernate.naming.physical-strategy = org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl