Skip to content

Commit

Permalink
[FLINK-18072][hbase] Fix HBaseLookupFunction can not work with new in…
Browse files Browse the repository at this point in the history
…ternal data structure RowData

This closes apache#12594
  • Loading branch information
leonardBang authored Jun 17, 2020
1 parent 204aa6d commit 03a8ef6
Show file tree
Hide file tree
Showing 7 changed files with 408 additions and 273 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public LookupRuntimeProvider getLookupRuntimeProvider(LookupContext context) {
.isPresent(),
"Currently, HBase table only supports lookup by rowkey field.");

return TableFunctionProvider.of(new HBaseLookupFunction(conf, tableName, hbaseSchema));
return TableFunctionProvider.of(new HBaseRowDataLookupFunction(conf, tableName, hbaseSchema, nullStringLiteral));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

/**
* The HBaseLookupFunction is a standard user-defined table function, it can be used in tableAPI
* and also useful for temporal table join plan in SQL.
* and also useful for temporal table join plan in SQL. It looks up the result as {@link Row}.
*/
@Internal
public class HBaseLookupFunction extends TableFunction<Row> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.connector.hbase.source;

import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.connector.hbase.util.HBaseConfigurationUtil;
import org.apache.flink.connector.hbase.util.HBaseSerde;
import org.apache.flink.connector.hbase.util.HBaseTableSchema;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.functions.FunctionContext;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.util.StringUtils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
* The HBaseRowDataLookupFunction is a standard user-defined table function, it can be used in tableAPI
* and also useful for temporal table join plan in SQL. It looks up the result as {@link RowData}.
*/
@Internal
public class HBaseRowDataLookupFunction extends TableFunction<RowData> {

private static final Logger LOG = LoggerFactory.getLogger(HBaseRowDataLookupFunction.class);
private static final long serialVersionUID = 1L;

private final String hTableName;
private final byte[] serializedConfig;
private final HBaseTableSchema hbaseTableSchema;
private final String nullStringLiteral;

private transient Connection hConnection;
private transient HTable table;
private transient HBaseSerde serde;

public HBaseRowDataLookupFunction(
Configuration configuration,
String hTableName,
HBaseTableSchema hbaseTableSchema,
String nullStringLiteral) {
this.serializedConfig = HBaseConfigurationUtil.serializeConfiguration(configuration);
this.hTableName = hTableName;
this.hbaseTableSchema = hbaseTableSchema;
this.nullStringLiteral = nullStringLiteral;
}

/**
* The invoke entry point of lookup function.
* @param rowKey the lookup key. Currently only support single rowkey.
*/
public void eval(Object rowKey) throws IOException {
// fetch result
Get get = serde.createGet(rowKey);
if (get != null) {
Result result = table.get(get);
if (!result.isEmpty()) {
// parse and collect
collect(serde.convertToRow(result));
}
}
}

private Configuration prepareRuntimeConfiguration() {
// create default configuration from current runtime env (`hbase-site.xml` in classpath) first,
// and overwrite configuration using serialized configuration from client-side env (`hbase-site.xml` in classpath).
// user params from client-side have the highest priority
Configuration runtimeConfig = HBaseConfigurationUtil.deserializeConfiguration(
serializedConfig,
HBaseConfigurationUtil.getHBaseConfiguration());

// do validation: check key option(s) in final runtime configuration
if (StringUtils.isNullOrWhitespaceOnly(runtimeConfig.get(HConstants.ZOOKEEPER_QUORUM))) {
LOG.error("can not connect to HBase without {} configuration", HConstants.ZOOKEEPER_QUORUM);
throw new IllegalArgumentException("check HBase configuration failed, lost: '" + HConstants.ZOOKEEPER_QUORUM + "'!");
}

return runtimeConfig;
}

@Override
public void open(FunctionContext context) {
LOG.info("start open ...");
Configuration config = prepareRuntimeConfiguration();
try {
hConnection = ConnectionFactory.createConnection(config);
table = (HTable) hConnection.getTable(TableName.valueOf(hTableName));
} catch (TableNotFoundException tnfe) {
LOG.error("Table '{}' not found ", hTableName, tnfe);
throw new RuntimeException("HBase table '" + hTableName + "' not found.", tnfe);
} catch (IOException ioe) {
LOG.error("Exception while creating connection to HBase.", ioe);
throw new RuntimeException("Cannot create connection to HBase.", ioe);
}
this.serde = new HBaseSerde(hbaseTableSchema, nullStringLiteral);
LOG.info("end open.");
}

@Override
public void close() {
LOG.info("start close ...");
if (null != table) {
try {
table.close();
table = null;
} catch (IOException e) {
// ignore exception when close.
LOG.warn("exception when close table", e);
}
}
if (null != hConnection) {
try {
hConnection.close();
hConnection = null;
} catch (IOException e) {
// ignore exception when close.
LOG.warn("exception when close connection", e);
}
}
LOG.info("end close.");
}

@VisibleForTesting
public String getHTableName() {
return hTableName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.flink.table.types.logical.LogicalTypeFamily;

import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
Expand Down Expand Up @@ -76,6 +77,7 @@ public class HBaseSerde {
private final @Nullable FieldDecoder keyDecoder;
private final FieldEncoder[][] qualifierEncoders;
private final FieldDecoder[][] qualifierDecoders;
private final GenericRowData rowWithRowKey;

public HBaseSerde(HBaseTableSchema hbaseSchema, final String nullStringLiteral) {
this.families = hbaseSchema.getFamilyKeys();
Expand Down Expand Up @@ -115,6 +117,7 @@ public HBaseSerde(HBaseTableSchema hbaseSchema, final String nullStringLiteral)
.toArray(FieldDecoder[]::new);
this.reusedFamilyRows[f] = new GenericRowData(dataTypes.length);
}
this.rowWithRowKey = new GenericRowData(1);
}

/**
Expand Down Expand Up @@ -195,6 +198,29 @@ public Scan createScan() {
return scan;
}

/**
* Returns an instance of Get that retrieves the matches records from the HBase table.
*
* @return The appropriate instance of Get for this use case.
*/
public Get createGet(Object rowKey) {
checkArgument(keyEncoder != null, "row key is not set.");
rowWithRowKey.setField(0, rowKey);
byte[] rowkey = keyEncoder.encode(rowWithRowKey, 0);
if (rowkey.length == 0) {
// drop dirty records, rowkey shouldn't be zero length
return null;
}
Get get = new Get(rowkey);
for (int f = 0; f < families.length; f++) {
byte[] family = families[f];
for (byte[] qualifier : qualifiers[f]) {
get.addColumn(family, qualifier);
}
}
return get;
}

/**
* Converts HBase {@link Result} into {@link RowData}.
*/
Expand Down
Loading

0 comments on commit 03a8ef6

Please sign in to comment.