Skip to content

Commit

Permalink
Project import generated by Copybara.
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 4c7ab18dc782f03e88a12b9eba658848e53b6f75
  • Loading branch information
Myscale authored and MscaleDB committed Jun 12, 2024
1 parent 7b35ff2 commit cb4a6b2
Show file tree
Hide file tree
Showing 66 changed files with 735 additions and 558 deletions.
6 changes: 3 additions & 3 deletions cmake/autogenerated_myscale_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(MYSCALE_VERSION_MAJOR 1)
SET(MYSCALE_VERSION_MINOR 5)
SET(MYSCALE_VERSION_MINOR 6)
SET(MYSCALE_VERSION_PATCH 0)
SET(MYSCALE_VERSION_DESCRIBE myscale-v1.5.0)
SET(MYSCALE_VERSION_STRING 1.5.0)
SET(MYSCALE_VERSION_DESCRIBE myscale-v1.6.0)
SET(MYSCALE_VERSION_STRING 1.6.0)
# end of autochange

2 changes: 1 addition & 1 deletion programs/keeper/Keeper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ try
config().getString("path", ""),
std::move(unused_cache),
unused_event,
[&](ConfigurationPtr config, bool /* initial_loading */)
[&](ConfigurationPtr config, XMLDocumentPtr /* preprocessed_xml */, bool /* initial_loading */)
{
if (config->has("keeper_server"))
tiny_context->updateKeeperConfiguration(*config);
Expand Down
4 changes: 3 additions & 1 deletion programs/server/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1264,7 +1264,7 @@ try
config().getString("path", ""),
std::move(main_config_zk_node_cache),
main_config_zk_changed_event,
[&](ConfigurationPtr config, bool initial_loading)
[&](ConfigurationPtr config, [[maybe_unused]] XMLDocumentPtr preprocessed_xml, bool initial_loading)
{
Settings::checkNoSettingNamesAtTopLevel(*config, config_path);

Expand Down Expand Up @@ -1641,6 +1641,8 @@ try
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");

SCOPE_EXIT({


async_metrics.stop();

/** Ask to cancel background jobs all table engines,
Expand Down
2 changes: 0 additions & 2 deletions programs/server/Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ class Server : public BaseDaemon, public IServer

std::string getDefaultCorePath() const override;


private:
ContextMutablePtr global_context;
/// Updated/recent config, to compare http_handlers
Expand All @@ -90,7 +89,6 @@ class Server : public BaseDaemon, public IServer
AsynchronousMetrics & async_metrics,
bool & is_secure);


using CreateServerFunc = std::function<ProtocolServerAdapter(UInt16)>;
void createServer(
Poco::Util::AbstractConfiguration & config,
Expand Down
2 changes: 1 addition & 1 deletion src/Access/UsersConfigAccessStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ void UsersConfigAccessStorage::load(
preprocessed_dir,
zkutil::ZooKeeperNodeCache(get_zookeeper_function),
std::make_shared<Poco::Event>(),
[&](Poco::AutoPtr<Poco::Util::AbstractConfiguration> new_config, bool /*initial_loading*/)
[&](Poco::AutoPtr<Poco::Util::AbstractConfiguration> new_config, XMLDocumentPtr /*preprocessed_xml*/, bool /*initial_loading*/)
{
Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path);
parseFromConfig(*new_config);
Expand Down
2 changes: 1 addition & 1 deletion src/Common/AsynchronousMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ void AsynchronousMetrics::update(TimePoint update_time)
Int64 rss = data.resident;
#if defined(OS_LINUX)
// To obtain a more precise memory usage, we deduct the shared
// memory utilized by MSTG mmap files.
// memory utilized by MyScale vector index mmap files.
if (first_run)
base_shared = data.shared;
Int64 shared = data.shared - base_shared;
Expand Down
2 changes: 1 addition & 1 deletion src/Common/Config/ConfigReloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac

try
{
updater(loaded_config.configuration, initial_loading);
updater(loaded_config.configuration, loaded_config.preprocessed_xml, initial_loading);
}
catch (...)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Common/Config/ConfigReloader.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Context;
class ConfigReloader
{
public:
using Updater = std::function<void(ConfigurationPtr, bool)>;
using Updater = std::function<void(ConfigurationPtr, XMLDocumentPtr, bool)>;

/** include_from_path is usually /etc/metrika.xml (i.e. value of <include_from> tag)
*/
Expand Down
3 changes: 2 additions & 1 deletion src/Common/ErrorCodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,8 @@
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \
M(1002, UNKNOWN_EXCEPTION) \
M(1003, UNKNOWN_EXCEPTION)

/* See END */

namespace DB
Expand Down
1 change: 1 addition & 0 deletions src/Core/Settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,7 @@ class IColumn;
M(Bool, enable_brute_force_vector_search, false, "Enable brute-force search for data parts without vector indexes.", 0) \
M(Float, hybrid_search_fusion_weight, 0.5f, "Default fusion_weight for hybrid search Relative Score Fusion (RSF) function. Valid value is in interval [0.0f, 1.0f]", 0) \
M(UInt64, hybrid_search_fusion_k, 60, "Default fusion_k for hybrid search Reciprocal Rank Fusion (RRF) function", 0) \
M(Bool, optimize_prefilter_in_search, true, "Enable prewhere optimization for vector or text search if some partition columns in prewhere condition.", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

Expand Down
6 changes: 6 additions & 0 deletions src/DataTypes/DataTypeObjectToFetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,14 @@ static inline IColumn & extractElementColumn(IColumn & column, size_t idx)
template <typename F>
static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
{
/// We use the assumption that tuples of zero size do not exist.
size_t old_size = column.size();

try
{
impl();

// Check that all columns now have the same size.
size_t new_size = column.size();

for (auto i : collections::range(0, elems.size()))
Expand Down Expand Up @@ -150,6 +152,10 @@ MutableColumnPtr DataTypeObjectToFetch::createColumn() const

MutableColumnPtr DataTypeObjectToFetch::createColumn(const ISerialization & serialization) const
{
/// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
/// several times to allow to reconstruct the substream path name.
/// Here we don't need substream path name, so we drop first several wrapper serializations.

const auto * current_serialization = &serialization;
while (const auto * serialization_named = typeid_cast<const SerializationNamed *>(current_serialization))
current_serialization = serialization_named->getNested().get();
Expand Down
6 changes: 6 additions & 0 deletions src/DataTypes/DataTypeTuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,14 @@ static inline IColumn & extractElementColumn(IColumn & column, size_t idx)
template <typename F>
static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
{
/// We use the assumption that tuples of zero size do not exist.
size_t old_size = column.size();

try
{
impl();

// Check that all columns now have the same size.
size_t new_size = column.size();

for (auto i : collections::range(0, elems.size()))
Expand Down Expand Up @@ -148,6 +150,10 @@ MutableColumnPtr DataTypeTuple::createColumn() const

MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const
{
/// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
/// several times to allow to reconstruct the substream path name.
/// Here we don't need substream path name, so we drop first several wrapper serializations.

const auto * current_serialization = &serialization;
while (const auto * serialization_named = typeid_cast<const SerializationNamed *>(current_serialization))
current_serialization = serialization_named->getNested().get();
Expand Down
8 changes: 8 additions & 0 deletions src/DataTypes/Serializations/SerializationObjectToFetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,25 @@ void SerializationObjectToFetch::serializeBinary(const IColumn & column, size_t
template <typename F>
static void addElementSafe(size_t num_elems, IColumn & column, F && impl)
{
/// We use the assumption that ObjectToFetchs of zero size do not exist.
size_t old_size = column.size();

try
{
impl();

// Check that all columns now have the same size.
size_t new_size = column.size();
for (size_t i = 1; i < num_elems; ++i)
{
const auto & element_column = extractElementColumn(column, i);
if (element_column.size() != new_size)
{
// This is not a logical error because it may work with
// user-supplied data.
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
"Cannot read a ObjectToFetch because not all elements are present");
}
}
}
catch (...)
Expand Down Expand Up @@ -131,9 +137,11 @@ void SerializationObjectToFetch::deserializeText(IColumn & column, ReadBuffer &
}
});

// Special format for one element objecttofetch (1,)
if (1 == elems.size())
{
skipWhitespaceIfAny(istr);
// Allow both (1) and (1,)
checkChar(',', istr);
}
skipWhitespaceIfAny(istr);
Expand Down
2 changes: 2 additions & 0 deletions src/DataTypes/Serializations/SerializationTuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,14 @@ void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num,
template <typename F>
static void addElementSafe(size_t num_elems, IColumn & column, F && impl)
{
/// We use the assumption that tuples of zero size do not exist.
size_t old_size = column.size();

try
{
impl();

// Check that all columns now have the same size.
size_t new_size = column.size();
for (size_t i = 1; i < num_elems; ++i)
{
Expand Down
5 changes: 4 additions & 1 deletion src/Disks/ObjectStorages/S3/S3ObjectStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class S3ObjectStorage : public IObjectStorage
String version_id_,
const S3Capabilities & s3_capabilities_,
String bucket_,
String connection_string)
String connection_string,
ContextPtr context = nullptr)
: bucket(bucket_)
, client(std::move(client_))
, s3_settings(std::move(s3_settings_))
Expand All @@ -61,6 +62,8 @@ class S3ObjectStorage : public IObjectStorage
data_source_description.description = connection_string;
data_source_description.is_cached = false;
data_source_description.is_encrypted = false;
if (context)
applyRemoteThrottlingSettings(context);

log = &Poco::Logger::get(logger_name);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Disks/ObjectStorages/S3/registerDiskS3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
}
else
{
s3_storage = std::make_shared<S3ObjectStorage>(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint);
s3_storage = std::make_shared<S3ObjectStorage>(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, context);
auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Interpreters/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ struct ContextSharedPart : boost::noncopyable
String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function.
String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries.
String user_scripts_path; /// Path to the directory with user provided scripts.
String vector_index_cache_path; /// Path to the directory of vector index cache for MSTG disk mode.
String vector_index_cache_path; /// Path to the directory of vector index cache for MyScale vector index disk mode.
String tantivy_index_cache_path; /// Path to the directory of tantivy index cache.
ConfigurationPtr config; /// Global configuration settings.

Expand Down
1 change: 1 addition & 0 deletions src/Interpreters/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,7 @@ class Context: public std::enable_shared_from_this<Context>
HybridSearchInfoPtr getHybridSearchInfo() const;
void setHybridSearchInfo(HybridSearchInfoPtr hybrid_search_info) const;
void resetHybridSearchInfo() const;

private:
std::unique_lock<std::recursive_mutex> getLock() const;

Expand Down
13 changes: 8 additions & 5 deletions src/Interpreters/ExpressionAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,12 @@ inline void checkTantivyIndex([[maybe_unused]]const StorageSnapshotPtr & storage

std::pair<String, bool> getVectorIndexTypeAndParameterCheck(const StorageMetadataPtr & metadata_snapshot, ContextPtr context, String & search_column_name)
{
auto log = getLogger();
String index_type = "";
/// Obtain the default value of the `use_parameter_check` in the MergeTreeSetting.
std::unique_ptr<MergeTreeSettings> storage_settings = std::make_unique<MergeTreeSettings>(context->getMergeTreeSettings());
bool use_parameter_check = storage_settings->vector_index_parameter_check;
LOG_TRACE(getLogger(), "vector_index_parameter_check value in MergeTreeSetting: {}", use_parameter_check);
LOG_TRACE(log, "vector_index_parameter_check value in MergeTreeSetting: {}", use_parameter_check);

/// Obtain the type of the vector index recorded in the meta_data.
if (metadata_snapshot)
Expand All @@ -175,7 +176,7 @@ std::pair<String, bool> getVectorIndexTypeAndParameterCheck(const StorageMetadat
if (vec_index_desc.column == search_column_name)
{
index_type = vec_index_desc.type;
LOG_TRACE(getLogger(), "The vector index type used for the query is `{}`", Poco::toUpper(index_type));
LOG_TRACE(log, "The vector index type used for the query is `{}`", Poco::toUpper(index_type));

break;
}
Expand All @@ -196,7 +197,7 @@ std::pair<String, bool> getVectorIndexTypeAndParameterCheck(const StorageMetadat
{
use_parameter_check = new_value.get<bool>();
LOG_TRACE(
getLogger(), "vector_index_parameter_check value in sql definition: {}", use_parameter_check);
log, "vector_index_parameter_check value in sql definition: {}", use_parameter_check);
break;
}
}
Expand Down Expand Up @@ -570,7 +571,7 @@ void ExpressionAnalyzer::analyzeVectorScan(ActionsDAGPtr & temp_actions)
has_vector_scan = true;
}
}

/// Fill in dim and recognize VectorSearchType from metadata
if (has_vector_scan)
{
getAndCheckVectorScanInfoFromMetadata(syntax->storage_snapshot, vector_scan_descriptions[0], getContext());
Expand Down Expand Up @@ -1837,6 +1838,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(

const auto & node = step.actions()->findInOutputs(prewhere_column_name);
auto filter_type = node.result_type;
LOG_DEBUG(getLogger(), "[appendPrewhere] filter_type: {}", filter_type->getName());
if (!filter_type->canBeUsedInBooleanContext())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}",
filter_type->getName());
Expand All @@ -1846,6 +1848,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
/// Remove unused source_columns from prewhere actions.
auto tmp_actions_dag = std::make_shared<ActionsDAG>(sourceColumns());
getRootActions(select_query->prewhere(), only_types, tmp_actions_dag);
/// Constants cannot be removed since they can be used in other parts of the query.
/// And if they are not used anywhere, except PREWHERE, they will be removed on the next step.
tmp_actions_dag->removeUnusedActions(
NameSet{prewhere_column_name},
/* allow_remove_inputs= */ true,
Expand Down Expand Up @@ -2116,7 +2120,6 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
const auto * select_query = getSelectQuery();

ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);

getRootActions(select_query->select(), only_types, step.actions());

for (const auto & child : select_query->select()->children)
Expand Down
1 change: 0 additions & 1 deletion src/Interpreters/InterpreterInsertQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ namespace Search
enum class DataType;
}


namespace DB
{

Expand Down
7 changes: 0 additions & 7 deletions src/Parsers/ParserCreateQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,6 @@ bool ParserVectorIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
type = function_node;
}


// if (!s_granularity.ignore(pos, expected))
// return false;

// if (!granularity_p.parse(pos, granularity, expected))
// return false;

auto index = std::make_shared<ASTVIDeclaration>();
index->name = name->as<ASTIdentifier &>().name();
index->column = column->as<ASTIdentifier &>().name();
Expand Down
41 changes: 0 additions & 41 deletions src/Processors/QueryPlan/ReadWithVectorScan.cpp.rej

This file was deleted.

Loading

0 comments on commit cb4a6b2

Please sign in to comment.