!242 feat: 优化starRocks数据的批量写入

* Merge branch 'master' of gitee.com:dromara/dbswitch into zsj
* feat: 优化starRocks数据写入,将写入更新操作从sql模式切换成stream load模式,提高更新、新增效率
This commit is contained in:
Kingkazuma
2025-06-03 13:02:55 +00:00
committed by inrgihc
parent 9d221dee60
commit 50ecc06f70
8 changed files with 323 additions and 21 deletions

View File

@@ -33,6 +33,11 @@
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,13 @@
package org.dromara.dbswitch.product.sr;
import lombok.Data;
@Data
public class FrontendEntity {
String ip;
String httpport;
Boolean alive;
Boolean join;
String role;
}

View File

@@ -9,25 +9,148 @@
/////////////////////////////////////////////////////////////
package org.dromara.dbswitch.product.sr;
import org.dromara.dbswitch.core.provider.meta.MetadataProvider;
import org.dromara.dbswitch.core.schema.ColumnDescription;
import org.dromara.dbswitch.core.util.GenerateSqlUtils;
import cn.hutool.core.bean.BeanUtil;
import cn.hutool.core.util.ReUtil;
import cn.hutool.db.Db;
import cn.hutool.db.Entity;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.dromara.dbswitch.common.entity.CloseableDataSource;
import java.sql.Connection;
import java.util.List;
import javax.sql.DataSource;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
import java.util.*;
import java.util.stream.Collectors;
@Slf4j
public final class StarRocksUtils {
public static String getTableDDL(MetadataProvider provider, Connection connection, String schema,
String table) {
List<ColumnDescription> columnDescriptions = provider.queryTableColumnMeta(connection, schema, table);
List<String> pks = provider.queryTablePrimaryKeys(connection, schema, table);
return GenerateSqlUtils.getDDLCreateTableSQL(
provider, columnDescriptions, pks, schema, table, false);
}
private StarRocksUtils() {
throw new IllegalStateException();
}
private String indexName;
private volatile String dbName;
private volatile String tbName;
private volatile String host;
private volatile String username;
private volatile String password;
private volatile CloseableDataSource dataSource;
private volatile String httpPort;
public void init(String schemaName, String tableName, DataSource dataSource) {
this.getHttpPort(dataSource);
this.dataSource = (CloseableDataSource) dataSource;
this.indexName = tableName;
this.host = ReUtil.extractMulti("jdbc:mysql://(.*):[0-9]{2,8}/", this.dataSource.getJdbcUrl(), "$1");
this.username = this.dataSource.getUserName();
this.password = this.dataSource.getPassword();
this.tbName = tableName;
this.dbName = schemaName;
}
public void getHttpPort(DataSource dataSource) {
Db use = Db.use(dataSource);
try {
List<Entity> frontends = use.query("SHOW FRONTENDS");
List<FrontendEntity> frontendEntities = BeanUtil.copyToList(frontends, FrontendEntity.class);
List<FrontendEntity> leader = frontendEntities.stream().filter(i -> i.getRole().equals("LEADER")).collect(Collectors.toList());
FrontendEntity frontendEntity = leader.get(0);
this.httpPort = frontendEntity.getHttpport();
} catch (Exception e) {
log.error(e.getMessage());
}
}
public long addOrUpdateData(List<String> fieldNames, List<Object[]> recordValues) {
List<Object> objectList = asObjectList(fieldNames, recordValues);
JSONArray array = JSONUtil.parseArray(objectList);
JSONObject jsonObject = JSONUtil.createObj()
.set("data", array);
try {
sendData(jsonObject.toString());
return recordValues.size();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void sendData(String content) throws Exception {
final String loadUrl = String.format("http://%s:%s/api/%s/%s/_stream_load",
this.host,
this.httpPort,
this.dbName,
this.tbName);
final HttpClientBuilder httpClientBuilder = HttpClients
.custom()
.setRedirectStrategy(new DefaultRedirectStrategy() {
@Override
protected boolean isRedirectable(String method) {
return true;
}
});
try (CloseableHttpClient client = httpClientBuilder.build()) {
HttpPut put = new HttpPut(loadUrl);
StringEntity entity = new StringEntity(content, "UTF-8");
put.setHeader(HttpHeaders.EXPECT, "100-continue");
put.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(this.username, this.password));
put.setHeader("strip_outer_array", "true");
put.setHeader("format", "JSON");
put.setHeader("json_root", "$.data");
put.setHeader("ignore_json_size", "true");
put.setHeader("Content-Type", "application/json");
put.setEntity(entity);
try (CloseableHttpResponse response = client.execute(put)) {
String loadResult = "";
if (response.getEntity() != null) {
loadResult = EntityUtils.toString(response.getEntity());
}
final int statusCode = response.getStatusLine().getStatusCode();
// statusCode 200 just indicates that starrocks be service is ok, not stream load
// you should see the output content to find whether stream load is success
if (statusCode != 200) {
throw new IOException(
String.format("Stream load failed, statusCode=%s load result=%s", statusCode, loadResult));
}
}
}
}
private String basicAuthHeader(String username, String password) {
final String tobeEncode = username + ":" + password;
byte[] encoded = Base64.getEncoder().encode(tobeEncode.getBytes(StandardCharsets.UTF_8));
return "Basic " + new String(encoded);
}
private List<Object> asObjectList(List<String> fieldNames, List<Object[]> recordValues) {
int fieldCount = Math.min(fieldNames.size(), recordValues.get(0).length);
List<Object> rows = new ArrayList<>(recordValues.size());
for (Object[] row : recordValues) {
Map<String, Object> columns = new LinkedHashMap<>(fieldCount);
for (int i = 0; i < fieldCount; ++i) {
Object rowValue = row[i];
if (row[i] instanceof Timestamp) {
rowValue = String.valueOf(rowValue);
}
columns.put(fieldNames.get(i), rowValue);
}
rows.add(columns);
}
return rows;
}
}

View File

@@ -39,12 +39,12 @@ public class StarrocksFactoryProvider extends AbstractFactoryProvider {
@Override
public TableDataWriteProvider createTableDataWriteProvider(boolean useInsert) {
return new AutoCastTableDataWriteProvider(this);
return new StarrocksTableDataWriteProvider(this);
}
@Override
public TableDataSynchronizeProvider createTableDataSynchronizeProvider() {
return new AutoCastTableDataSynchronizeProvider(this);
return new StarrocksTableDataSynchronizer(this);
}

View File

@@ -323,10 +323,7 @@ public class StarrocksMetadataQueryProvider extends AbstractMetadataProvider {
break;
case ColumnMetaData.TYPE_STRING:
//see: https://docs.starrocks.io/zh/docs/category/string/
long newLength = length * 3;
if (newLength < 255) {
retval += "VARCHAR(" + newLength + ")";
} else if (newLength <= 65533) {
if (length <= 65533) {
retval += "STRING";
} else if (newLength <= 1048576) {
retval += "VARCHAR(" + newLength + ")";

View File

@@ -0,0 +1,108 @@
// Copyright tang. All rights reserved.
// https://gitee.com/inrgihc/dbswitch
//
// Use of this source code is governed by a BSD-style license
//
// Author: tang (inrgihc@126.com)
// Date : 2020/1/2
// Location: beijing , china
/////////////////////////////////////////////////////////////
package org.dromara.dbswitch.product.sr;
import org.apache.commons.collections4.CollectionUtils;
import org.dromara.dbswitch.common.entity.CloseableDataSource;
import org.dromara.dbswitch.core.provider.ProductFactoryProvider;
import org.dromara.dbswitch.core.provider.sync.DefaultTableDataSynchronizeProvider;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class StarrocksTableDataSynchronizer extends DefaultTableDataSynchronizeProvider {
private volatile List<String> fieldNames;
private final CloseableDataSource dataSource;
private final StarRocksUtils starRocksUtils = new StarRocksUtils();
public StarrocksTableDataSynchronizer(ProductFactoryProvider factoryProvider) {
super(factoryProvider);
dataSource = (CloseableDataSource) factoryProvider.getDataSource();
}
@Override
public void prepare(String schemaName, String tableName, List<String> fieldNames, List<String> pks) {
starRocksUtils.init(schemaName, tableName, dataSource);
this.fieldNames = fieldNames;
if (fieldNames.isEmpty() || pks.isEmpty() || fieldNames.size() < pks.size()) {
throw new IllegalArgumentException("字段列表和主键列表不能为空,或者字段总个数应不小于主键总个数");
}
if (!fieldNames.containsAll(pks)) {
throw new IllegalArgumentException("字段列表必须包含主键列表");
}
Map<String, Integer> columnType = getTableColumnMetaData(schemaName, tableName, fieldNames);
this.fieldOrders = new ArrayList<>(fieldNames);
this.pksOrders = new ArrayList<>(pks);
this.insertStatementSql = getInsertPrepareStatementSql(schemaName, tableName, fieldNames);
this.updateStatementSql = getUpdatePrepareStatementSql(schemaName, tableName, fieldNames, pks);
this.deleteStatementSql = getDeletePrepareStatementSql(schemaName, tableName, pks);
insertArgsType = new int[fieldNames.size()];
for (int k = 0; k < fieldNames.size(); ++k) {
String field = fieldNames.get(k);
insertArgsType[k] = columnType.get(field);
}
updateArgsType = new int[fieldNames.size()];
int idx = 0;
for (int i = 0; i < fieldNames.size(); ++i) {
String field = fieldNames.get(i);
if (!pks.contains(field)) {
updateArgsType[idx++] = columnType.get(field);
}
}
for (String pk : pks) {
updateArgsType[idx++] = columnType.get(pk);
}
deleteArgsType = new int[pks.size()];
for (int j = 0; j < pks.size(); ++j) {
String pk = pks.get(j);
deleteArgsType[j] = columnType.get(pk);
}
}
@Override
public long executeInsert(List<Object[]> recordValues) {
if (CollectionUtils.isEmpty(fieldNames) || CollectionUtils.isEmpty(recordValues)) {
return 0L;
}
if (CollectionUtils.isEmpty(fieldNames) || CollectionUtils.isEmpty(recordValues)) {
return 0L;
}
return starRocksUtils.addOrUpdateData(fieldNames, recordValues);
}
@Override
public long executeUpdate(List<Object[]> recordValues) {
if (CollectionUtils.isEmpty(fieldNames) || CollectionUtils.isEmpty(recordValues)) {
return 0L;
}
if (CollectionUtils.isEmpty(fieldNames) || CollectionUtils.isEmpty(recordValues)) {
return 0L;
}
return starRocksUtils.addOrUpdateData(fieldNames, recordValues);
}
}

View File

@@ -0,0 +1,50 @@
// Copyright tang. All rights reserved.
// https://gitee.com/inrgihc/dbswitch
//
// Use of this source code is governed by a BSD-style license
//
// Author: tang (inrgihc@126.com)
// Date : 2020/1/2
// Location: beijing , china
/////////////////////////////////////////////////////////////
package org.dromara.dbswitch.product.sr;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.dromara.dbswitch.common.entity.CloseableDataSource;
import org.dromara.dbswitch.core.provider.ProductFactoryProvider;
import org.dromara.dbswitch.core.provider.write.DefaultTableDataWriteProvider;
import java.util.List;
@Slf4j
public class StarrocksTableDataWriteProvider extends DefaultTableDataWriteProvider {
private final CloseableDataSource dataSource;
private final StarRocksUtils starRocksUtils = new StarRocksUtils();
;
public StarrocksTableDataWriteProvider(ProductFactoryProvider factoryProvider) {
super(factoryProvider);
dataSource = (CloseableDataSource) factoryProvider.getDataSource();
}
@Override
public void prepareWrite(String schemaName, String tableName, List<String> fieldNames) {
starRocksUtils.init(schemaName, tableName, dataSource);
}
@Override
public long write(List<String> fieldNames, List<Object[]> recordValues) {
if (CollectionUtils.isEmpty(fieldNames) || CollectionUtils.isEmpty(recordValues)) {
return 0L;
}
return starRocksUtils.addOrUpdateData(fieldNames, recordValues);
}
}

View File

@@ -79,6 +79,12 @@
<version>5.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>