Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Requantize scale fix (#21100)
Browse files Browse the repository at this point in the history
* Requantize scale fix and refactor

* Comment fix

* Review suggestions

* Formatting fix
  • Loading branch information
DominikaJedynak committed Jul 19, 2022
1 parent cca8f4e commit 7b1daf9
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
14 changes: 7 additions & 7 deletions src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ static void DNNLQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs,
float output_max = 0;
float output_scale = 0;
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) {
output_min = params.min_calib_range.value();
output_max = params.max_calib_range.value();
output_scale = output_data_range / MaxAbs(output_min, output_max);
output_min = params.min_calib_range.value();
output_max = params.max_calib_range.value();
output_scale = output_data_range / MaxAbs(output_min, output_max);
} else {
output_max = A_absmax + B_absmax;
output_min = -output_max;
Expand All @@ -189,8 +189,8 @@ static void DNNLQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs,
// rescale uint8 to int8 by reorder to temporary memory
auto s8_desc = is_A_int8 ? A_mem->get_desc() : B_mem->get_desc();
rescaled_mem = TmpMemMgr::Get()->Alloc(s8_desc);
const float u8_reorder_scale = 0.5;
std::vector<float> reorder_scale = {u8_reorder_scale};
const float u8_to_s8_scale = 0.5;
std::vector<float> reorder_scale = {u8_to_s8_scale};
auto engine = CpuEngine::Get()->get_engine();
dnnl::primitive_attr reorder_attr;
reorder_attr.set_output_scales(0, reorder_scale);
Expand All @@ -202,10 +202,10 @@ static void DNNLQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs,
// Modify scale to restore original uint8 values:
if (is_A_int8) {
B_mem = rescaled_mem;
scales[1] *= 1.0 / u8_reorder_scale;
scales[1] *= 1.0 / u8_to_s8_scale;
} else {
A_mem = rescaled_mem;
scales[0] *= 1.0 / u8_reorder_scale;
scales[0] *= 1.0 / u8_to_s8_scale;
}
}
}
Expand Down
12 changes: 7 additions & 5 deletions src/operator/quantization/dnnl/dnnl_requantize-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,13 @@ static void DNNLRequantizeForward(const nnvm::NodeAttrs& attrs,
if (data_mins[i] < data_min)
data_min = data_mins[i];
}
float src_range = MinAbs(MinValue<SrcDType>(), MaxValue<SrcDType>());
// MaxAbs is not used here as it converts data to float what could cause overflow errors.
SrcDType data_range = std::max(std::abs(data_min), std::abs(data_max));
float data_scale = MaxAbs(*inputs[1].data().dptr<float>(), *inputs[2].data().dptr<float>());
real_range = data_range * data_scale / src_range;
float src_range = MinAbs(MinValue<SrcDType>(), MaxValue<SrcDType>());
// MaxAbs is not used here as it converts data to float what could cause errors.
// int64 is used because in case of std::abs(int32_MIN), overflow was occurring.
int64_t data_range = std::max(std::abs(static_cast<int64_t>(data_min)),
std::abs(static_cast<int64_t>(data_max)));
float data_scale = MaxAbs(*inputs[1].data().dptr<float>(), *inputs[2].data().dptr<float>());
real_range = data_range * data_scale / src_range;
}
auto out_type = GetQuantizeOutputType(param);
if (out_type == mshadow::kUint8) {
Expand Down
6 changes: 3 additions & 3 deletions src/operator/subgraph/dnnl/dnnl_fc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class SgDNNLFCOp {
private:
enum { kDataMin = 0, kDataMax, kWeightMin, kWeightMax, kBiasMin, kBiasMax, kSumMin, kSumMax };
const size_t MIN_MAX_COUNT = 8;
const float u8_to_s8_scale = 0.5;

NDArray PrepareOutputWithSum(const NDArray& sum_input, const NDArray& output);
bool CheckInitializationConditions(const std::vector<NDArray>& inputs,
Expand Down Expand Up @@ -275,8 +276,7 @@ NDArray SgDNNLFCOp::PrepareOutputWithSum(const NDArray& sum_input, const NDArray
dnnl_mem_ptr tmp_mem(new dnnl::memory(
sum_mem_desc, CpuEngine::Get()->get_engine(), out_dnnl_mem->get_data_handle()));
DNNLStream::Get()->RegisterMem(tmp_mem);
const float u8_reorder_scale = 0.5;
std::vector<float> reorder_scale = {u8_reorder_scale};
std::vector<float> reorder_scale = {u8_to_s8_scale};
dnnl::primitive_attr reorder_attr;
reorder_attr.set_output_scales(0, reorder_scale);
const auto reorder_pd = dnnl::reorder::primitive_desc(CpuEngine::Get()->get_engine(),
Expand Down Expand Up @@ -498,7 +498,7 @@ bool SgDNNLFCOp::PrepareQuantization(const OpContext& ctx,
if (in_data[idx.sum].dtype() == mshadow::kUint8 && output.dtype() == mshadow::kInt8) {
// In this case, reorder with scale 0.5 is used on in_data[idx.sum] to
// scale it to s8 range, so sum_scale has to be rescaled as well
full_param_.sum_scale *= 2.0;
full_param_.sum_scale *= 1.0 / u8_to_s8_scale;
}
}
return support_channelwise_scale;
Expand Down

0 comments on commit 7b1daf9

Please sign in to comment.