Skip to content

Commit

Permalink
support create a default vector index
Browse files Browse the repository at this point in the history
  • Loading branch information
Libao Yang authored and Shanfeng Pang committed Apr 30, 2024
1 parent 06087e4 commit 72ddda3
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/Parsers/ASTCreateIndexQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace DB
{

/** CREATE INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value
* CREATE VECTOR INDEX [IF NOT EXISTS] name on [db].name column TYPE typename(args)
* CREATE VECTOR INDEX [IF NOT EXISTS] name on [db].name column [TYPE typename(args)]
*/

class ASTCreateIndexQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
Expand Down
18 changes: 13 additions & 5 deletions src/Parsers/ParserCreateIndexQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,19 @@ bool ParserCreateVectorIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Exp
if (!column_p.parse(pos, column, expected))
return false;

if (!s_type.ignore(pos, expected))
return false;

if (!data_type_p.parse(pos, type, expected))
return false;
if (s_type.ignore(pos, expected))
{
if (!data_type_p.parse(pos, type, expected))
return false;
}
else
{
/// The "TYPE typename(args)" field in "CREATE VECTOR INDEX" query is omitted, creating a default vector index
auto function_node = std::make_shared<ASTFunction>();
function_node->name = "DEFAULT";
function_node->no_empty_args = true;
type = function_node;
}

auto index = std::make_shared<ASTVIDeclaration>();
index->std_create = true;
Expand Down
2 changes: 1 addition & 1 deletion src/Parsers/ParserCreateIndexQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace DB

/** Query like this:
* CREATE INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value
* CREATE VECTOR INDEX [IF NOT EXISTS] name on [db].name column TYPE typename(args)
* CREATE VECTOR INDEX [IF NOT EXISTS] name on [db].name column [TYPE typename(args)]
*/

class ParserCreateIndexQuery : public IParserBase
Expand Down
17 changes: 13 additions & 4 deletions src/Parsers/ParserCreateQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,20 @@ bool ParserVectorIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
if (!column_p.parse(pos, column, expected))
return false;

if (!s_type.ignore(pos, expected))
return false;
if (s_type.ignore(pos, expected))
{
if (!data_type_p.parse(pos, type, expected))
return false;
}
else
{
/// The "TYPE typename(args)" field in "ADD VECTOR INDEX" query is omitted, creating a default vector index
auto function_node = std::make_shared<ASTFunction>();
function_node->name = "DEFAULT";
function_node->no_empty_args = true;
type = function_node;
}

if (!data_type_p.parse(pos, type, expected))
return false;

// if (!s_granularity.ignore(pos, expected))
// return false;
Expand Down
13 changes: 8 additions & 5 deletions src/VectorIndex/Storages/VIDescriptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
namespace Search
{
enum class DataType;
std::string getDefaultIndexType(const DataType & search_type);
}
namespace DB
{
Expand Down Expand Up @@ -129,7 +130,12 @@ VIDescription VIDescription::getVectorIndexFromAST(
result.name = vec_index_definition->name;
result.column = vec_index_definition->column;
result.data_type = columns.get(result.column).type;
result.type = vec_index_definition->type->name;
result.vector_search_type = getSearchIndexDataType(result.data_type);
result.type = Poco::toUpper(vec_index_definition->type->name) == "DEFAULT" ? Search::getDefaultIndexType(result.vector_search_type)
: vec_index_definition->type->name;

/// check the validity of vector column type
Search::getVectorIndexType(result.type, result.vector_search_type);

/// currently not used
const auto & definition_arguments = vec_index_definition->type->arguments;
Expand All @@ -139,14 +145,11 @@ VIDescription VIDescription::getVectorIndexFromAST(
{
const auto * argument = definition_arguments->children[i]->as<ASTLiteral>();
if (!argument)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments");
throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be search index arguments");
result.arguments.emplace_back(argument->value);
}
}

result.vector_search_type = getSearchIndexDataType(result.data_type);
Search::getVectorIndexType(result.type, result.vector_search_type);

if (result.vector_search_type == Search::DataType::FloatVector && !constraints.empty())
{
result.dim = static_cast<int>(constraints.getArrayLengthByColumnName(result.column).first);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Create default vector indices when creating table
vec_ind_1 MSTG vec_ind_1 vector_1 TYPE DEFAULT
vec_ind_2 MSTG vec_ind_2 vector_2 TYPE default(\'metric_type=IP\')
vec_ind_7 BinaryMSTG vec_ind_7 vector_7 TYPE DEFAULT
vec_ind_8 BinaryMSTG vec_ind_8 vector_8 TYPE default(\'metric_type=Jaccard\')
Create default vector indices in ALTER TABLE ADD VECTOR INDEX query
vec_ind_1 MSTG vec_ind_1 vector_1 TYPE DEFAULT
vec_ind_2 MSTG vec_ind_2 vector_2 TYPE default(\'metric_type=IP\')
vec_ind_7 BinaryMSTG vec_ind_7 vector_7 TYPE DEFAULT
vec_ind_8 BinaryMSTG vec_ind_8 vector_8 TYPE default(\'metric_type=Jaccard\')
vec_ind_3 MSTG vec_ind_3 vector_3 TYPE DEFAULT
vec_ind_4 MSTG vec_ind_4 vector_4 TYPE default
vec_ind_9 BinaryMSTG vec_ind_9 vector_9 TYPE DEFAULT
vec_ind_10 BinaryMSTG vec_ind_10 vector_10 TYPE default
Create default vector indices in CREATE VECTOR INDEX query
vec_ind_1 MSTG vec_ind_1 vector_1 TYPE DEFAULT
vec_ind_2 MSTG vec_ind_2 vector_2 TYPE default(\'metric_type=IP\')
vec_ind_7 BinaryMSTG vec_ind_7 vector_7 TYPE DEFAULT
vec_ind_8 BinaryMSTG vec_ind_8 vector_8 TYPE default(\'metric_type=Jaccard\')
vec_ind_3 MSTG vec_ind_3 vector_3 TYPE DEFAULT
vec_ind_4 MSTG vec_ind_4 vector_4 TYPE default
vec_ind_9 BinaryMSTG vec_ind_9 vector_9 TYPE DEFAULT
vec_ind_10 BinaryMSTG vec_ind_10 vector_10 TYPE default
vec_ind_5 MSTG vec_ind_5 vector_5 TYPE DEFAULT
vec_ind_6 MSTG vec_ind_6 vector_6 TYPE default
vec_ind_11 BinaryMSTG vec_ind_11 vector_11 TYPE DEFAULT
vec_ind_12 BinaryMSTG vec_ind_12 vector_12 TYPE default
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
-- Tags: no-parallel

DROP TABLE IF EXISTS t_create_default_index;

-- create table and create default vector indices
CREATE TABLE t_create_default_index(
id UInt64,
vector_1 Array(Float32),
vector_2 Array(Float32),
vector_3 Array(Float32),
vector_4 Array(Float32),
vector_5 Array(Float32),
vector_6 Array(Float32),
vector_7 FixedString(3),
vector_8 FixedString(3),
vector_9 FixedString(3),
vector_10 FixedString(3),
vector_11 FixedString(3),
vector_12 FixedString(3),
CONSTRAINT vector_len_1 CHECK length(vector_1) = 3,
CONSTRAINT vector_len_2 CHECK length(vector_2) = 3,
CONSTRAINT vector_len_3 CHECK length(vector_3) = 3,
CONSTRAINT vector_len_4 CHECK length(vector_4) = 3,
CONSTRAINT vector_len_5 CHECK length(vector_5) = 3,
CONSTRAINT vector_len_6 CHECK length(vector_6) = 3,
VECTOR INDEX vec_ind_1 vector_1,
VECTOR INDEX vec_ind_2 vector_2 TYPE default('metric_type=IP'),
VECTOR INDEX vec_ind_7 vector_7,
VECTOR INDEX vec_ind_8 vector_8 TYPE default('metric_type=Jaccard'),
) ENGINE = MergeTree ORDER BY id;

INSERT INTO t_create_default_index SELECT
number,
[number, number, number],
[number, number, number],
[number, number, number],
[number, number, number],
[number, number, number],
[number, number, number],
char(number, number, number),
char(number, number, number),
char(number, number, number),
char(number, number, number),
char(number, number, number),
char(number, number, number)
FROM numbers(10);

SELECT 'Create default vector indices when creating table';
SELECT name, type, expr FROM system.vector_indices WHERE table = 't_create_default_index';


-- alter table to add default vector indices
ALTER TABLE t_create_default_index ADD VECTOR INDEX vec_ind_3 vector_3;
ALTER TABLE t_create_default_index ADD VECTOR INDEX vec_ind_4 vector_4 TYPE default;

ALTER TABLE t_create_default_index ADD VECTOR INDEX vec_ind_9 vector_9;
ALTER TABLE t_create_default_index ADD VECTOR INDEX vec_ind_10 vector_10 TYPE default;

SELECT 'Create default vector indices in ALTER TABLE ADD VECTOR INDEX query';
SELECT name, type, expr FROM system.vector_indices WHERE table = 't_create_default_index';

-- create default vector indices
CREATE VECTOR INDEX vec_ind_5 ON t_create_default_index vector_5;
CREATE VECTOR INDEX vec_ind_6 ON t_create_default_index vector_6 TYPE default;

CREATE VECTOR INDEX vec_ind_11 ON t_create_default_index vector_11;
CREATE VECTOR INDEX vec_ind_12 ON t_create_default_index vector_12 TYPE default;

SELECT 'Create default vector indices in CREATE VECTOR INDEX query';
SELECT name, type, expr FROM system.vector_indices WHERE table = 't_create_default_index';

DROP TABLE IF EXISTS t_create_default_index;

0 comments on commit 72ddda3

Please sign in to comment.