Skip to content

Commit

Permalink
MDEV-31340 Remove MY_COLLATION_HANDLER::strcasecmp()
Browse files Browse the repository at this point in the history
This patch also fixes:
  MDEV-33050 Build-in schemas like oracle_schema are accent insensitive
  MDEV-33084 LASTVAL(t1) and LASTVAL(T1) do not work well with lower-case-table-names=0
  MDEV-33085 Tables T1 and t1 do not work well with ENGINE=CSV and lower-case-table-names=0
  MDEV-33086 SHOW OPEN TABLES IN DB1 -- is case insensitive with lower-case-table-names=0
  MDEV-33088 Cannot create triggers in the database `MYSQL`
  MDEV-33103 LOCK TABLE t1 AS t2 -- alias is not case sensitive with lower-case-table-names=0
  MDEV-33109 DROP DATABASE MYSQL -- does not drop SP with lower-case-table-names=0
  MDEV-33110 HANDLER commands are case insensitive with lower-case-table-names=0
  MDEV-33119 User is case insensitive in INFORMATION_SCHEMA.VIEWS
  MDEV-33120 System log table names are case insensitive with lower-cast-table-names=0

- Removing the virtual function strnncoll() from MY_COLLATION_HANDLER

- Adding a wrapper function CHARSET_INFO::streq(), to compare
  two strings for equality. For now it calls strnncoll() internally.
  In the future it will turn into a virtual function.

- Adding new accent sensitive case insensitive collations:
    - utf8mb4_general1400_as_ci
    - utf8mb3_general1400_as_ci
  They implement accent sensitive case insensitive comparison.
  The weight of a character is equal to the code point of its
  upper case variant. These collations use Unicode-14.0.0 casefolding data.

  The result of
     my_charset_utf8mb3_general1400_as_ci.strcoll()
  is very close to the former
     my_charset_utf8mb3_general_ci.strcasecmp()

  There is only a difference in a couple dozen rare characters, because:
    - the switch from "tolower" to "toupper" comparison, to make
      utf8mb3_general1400_as_ci closer to utf8mb3_general_ci
    - the switch from Unicode-3.0.0 to Unicode-14.0.0
  This difference should be tolarable. See the list of affected
  characters in the MDEV description.

  Note, utf8mb4_general1400_as_ci correctly handles non-BMP characters!
  Unlike utf8mb4_general_ci, it does not treat all BMP characters
  as equal.

- Adding classes representing names of the file based database objects:

    Lex_ident_db
    Lex_ident_table
    Lex_ident_trigger

  Their comparison collation depends on the underlying
  file system case sensitivity and on --lower-case-table-names
  and can be either my_charset_bin or my_charset_utf8mb3_general1400_as_ci.

- Adding classes representing names of other database objects,
  whose names have case insensitive comparison style,
  using my_charset_utf8mb3_general1400_as_ci:

  Lex_ident_column
  Lex_ident_sys_var
  Lex_ident_user_var
  Lex_ident_sp_var
  Lex_ident_ps
  Lex_ident_i_s_table
  Lex_ident_window
  Lex_ident_func
  Lex_ident_partition
  Lex_ident_with_element
  Lex_ident_rpl_filter
  Lex_ident_master_info
  Lex_ident_host
  Lex_ident_locale
  Lex_ident_plugin
  Lex_ident_engine
  Lex_ident_server
  Lex_ident_savepoint
  Lex_ident_charset
  engine_option_value::Name

- All the mentioned Lex_ident_xxx classes implement a method streq():

  if (ident1.streq(ident2))
     do_equal();

  This method works as a wrapper for CHARSET_INFO::streq().

- Changing a lot of "LEX_CSTRING name" to "Lex_ident_xxx name"
  in class members and in function/method parameters.

- Replacing all calls like
    system_charset_info->coll->strcasecmp(ident1, ident2)
  to
    ident1.streq(ident2)

- Taking advantage of the c++11 user defined literal operator
  for LEX_CSTRING (see m_strings.h) and Lex_ident_xxx (see lex_ident.h)
  data types. Use example:

  const Lex_ident_column primary_key_name= "PRIMARY"_Lex_ident_column;

  is now a shorter version of:

  const Lex_ident_column primary_key_name=
    Lex_ident_column({STRING_WITH_LEN("PRIMARY")});
  • Loading branch information
abarkov committed Apr 18, 2024
1 parent 159b7ca commit fd247cc
Show file tree
Hide file tree
Showing 204 changed files with 8,969 additions and 3,200 deletions.
6 changes: 3 additions & 3 deletions client/mysql.cc
Expand Up @@ -207,9 +207,9 @@ static void my_vidattr(chtype attrs)
#endif

#ifdef FN_NO_CASE_SENSE
#define cmp_database(cs,A,B) my_strcasecmp((cs), (A), (B))
#define cmp_database(A,B) my_strcasecmp_latin1((A), (B))
#else
#define cmp_database(cs,A,B) strcmp((A),(B))
#define cmp_database(A,B) strcmp((A),(B))
#endif

#include "completion_hash.h"
Expand Down Expand Up @@ -4764,7 +4764,7 @@ com_use(String *buffer __attribute__((unused)), char *line)
*/
get_current_db();

if (!current_db || cmp_database(charset_info, current_db,tmp))
if (!current_db || cmp_database(current_db, tmp))
{
if (one_database)
{
Expand Down
10 changes: 8 additions & 2 deletions client/mysqlcheck.c
Expand Up @@ -247,6 +247,12 @@ static char *fix_table_name(char *dest, char *src);
int what_to_do = 0;


static inline int cmp_database(const char *a, const char *b)
{
return my_strcasecmp_latin1(a, b);
}


static void usage(void)
{
DBUG_ENTER("usage");
Expand Down Expand Up @@ -869,10 +875,10 @@ static int use_db(char *database)
DBUG_ENTER("use_db");

if (mysql_get_server_version(sock) >= FIRST_INFORMATION_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, database, INFORMATION_SCHEMA_DB_NAME))
!cmp_database(database, INFORMATION_SCHEMA_DB_NAME))
DBUG_RETURN(1);
if (mysql_get_server_version(sock) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, database, PERFORMANCE_SCHEMA_DB_NAME))
!cmp_database(database, PERFORMANCE_SCHEMA_DB_NAME))
DBUG_RETURN(1);
if (mysql_select_db(sock, database))
{
Expand Down
45 changes: 29 additions & 16 deletions client/mysqldump.cc
Expand Up @@ -658,6 +658,19 @@ static int dump_tablespaces_for_databases(char** databases);
static int dump_tablespaces(char* ts_where);
static void print_comment(FILE *, my_bool, const char *, ...);


static inline int cmp_database(const char *a, const char *b)
{
return my_strcasecmp_latin1(a, b);
}


static inline int cmp_table(const char *a, const char *b)
{
return my_strcasecmp_latin1(a, b);
}


/*
Print the supplied message if in verbose mode
Expand Down Expand Up @@ -2993,10 +3006,10 @@ static uint dump_routines_for_db(char *db)
static inline my_bool general_log_or_slow_log_tables(const char *db,
const char *table)
{
return (!my_strcasecmp(charset_info, db, "mysql")) &&
(!my_strcasecmp(charset_info, table, "general_log") ||
!my_strcasecmp(charset_info, table, "slow_log") ||
!my_strcasecmp(charset_info, table, "transaction_registry"));
return (!cmp_database(db, "mysql")) &&
(!cmp_table(table, "general_log") ||
!cmp_table(table, "slow_log") ||
!cmp_table(table, "transaction_registry"));
}
/*
get_sequence_structure-- retrieves sequence structure, prints out corresponding
Expand Down Expand Up @@ -4165,8 +4178,8 @@ static void dump_table(const char *table, const char *db, const uchar *hash_key,
discarding SHOW CREATE EVENT statements generation. The myslq.event
table data should be skipped too.
*/
if (!opt_events && !my_strcasecmp(&my_charset_latin1, db, "mysql") &&
!my_strcasecmp(&my_charset_latin1, table, "event"))
if (!opt_events && !cmp_database(db, "mysql") &&
!cmp_table(table, "event"))
{
verbose_msg("-- Skipping data table mysql.event, --skip-events was used\n");
DBUG_VOID_RETURN;
Expand Down Expand Up @@ -5430,15 +5443,15 @@ static int dump_all_databases()
while ((row= mysql_fetch_row(tableres)))
{
if (mysql_get_server_version(mysql) >= FIRST_INFORMATION_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, row[0], INFORMATION_SCHEMA_DB_NAME))
!cmp_database(row[0], INFORMATION_SCHEMA_DB_NAME))
continue;

if (mysql_get_server_version(mysql) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, row[0], PERFORMANCE_SCHEMA_DB_NAME))
!cmp_database(row[0], PERFORMANCE_SCHEMA_DB_NAME))
continue;

if (mysql_get_server_version(mysql) >= FIRST_SYS_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, row[0], SYS_SCHEMA_DB_NAME))
!cmp_database(row[0], SYS_SCHEMA_DB_NAME))
continue;

if (include_database(row[0]))
Expand All @@ -5458,15 +5471,15 @@ static int dump_all_databases()
while ((row= mysql_fetch_row(tableres)))
{
if (mysql_get_server_version(mysql) >= FIRST_INFORMATION_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, row[0], INFORMATION_SCHEMA_DB_NAME))
!cmp_database(row[0], INFORMATION_SCHEMA_DB_NAME))
continue;

if (mysql_get_server_version(mysql) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, row[0], PERFORMANCE_SCHEMA_DB_NAME))
!cmp_database(row[0], PERFORMANCE_SCHEMA_DB_NAME))
continue;

if (mysql_get_server_version(mysql) >= FIRST_SYS_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, row[0], SYS_SCHEMA_DB_NAME))
!cmp_database(row[0], SYS_SCHEMA_DB_NAME))
continue;

if (include_database(row[0]))
Expand Down Expand Up @@ -5676,7 +5689,7 @@ static int dump_all_tables_in_db(char *database)
char hash_key[2*NAME_LEN+2]; /* "db.tablename" */
char *afterdot;
my_bool transaction_registry_table_exists= 0;
int using_mysql_db= !my_strcasecmp(charset_info, database, "mysql");
int using_mysql_db= !cmp_database(database, "mysql");
DBUG_ENTER("dump_all_tables_in_db");

afterdot= strmov(hash_key, database);
Expand Down Expand Up @@ -5787,7 +5800,7 @@ static int dump_all_tables_in_db(char *database)
after 'UNLOCK TABLES' query is executed on the session, get the table
structure from server and dump it in the file.
*/
if (using_mysql_db && !my_strcasecmp(charset_info, table, "transaction_registry"))
if (using_mysql_db && !cmp_table(table, "transaction_registry"))
transaction_registry_table_exists= 1;
}
}
Expand Down Expand Up @@ -6070,9 +6083,9 @@ static int dump_selected_tables(char *db, char **table_names, int tables)
/* Can't LOCK TABLES in I_S / P_S, so don't try. */
if (lock_tables &&
!(mysql_get_server_version(mysql) >= FIRST_INFORMATION_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, db, INFORMATION_SCHEMA_DB_NAME)) &&
!cmp_database(db, INFORMATION_SCHEMA_DB_NAME)) &&
!(mysql_get_server_version(mysql) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
!my_strcasecmp(&my_charset_latin1, db, PERFORMANCE_SCHEMA_DB_NAME)))
!cmp_database(db, PERFORMANCE_SCHEMA_DB_NAME)))
{
if (mysql_real_query(mysql, lock_tables_query.str,
(ulong)lock_tables_query.length-1))
Expand Down
2 changes: 1 addition & 1 deletion client/mysqlshow.c
Expand Up @@ -413,7 +413,7 @@ list_dbs(MYSQL *mysql,const char *wild)
if (wild && mysql_num_rows(result) == 1)
{
row= mysql_fetch_row(result);
if (!my_strcasecmp(&my_charset_latin1, row[0], wild))
if (!my_strcasecmp_latin1(row[0], wild))
{
mysql_free_result(result);
if (opt_status)
Expand Down
2 changes: 0 additions & 2 deletions include/ft_global.h
Expand Up @@ -56,8 +56,6 @@ struct _ft_vft_ext
#define FTS_ORDERED_RESULT (1LL << 1)
#define FTS_DOCID_IN_RESULT (1LL << 2)

#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID"

#ifndef FT_CORE
struct st_ft_info
{
Expand Down
50 changes: 43 additions & 7 deletions include/m_ctype.h
Expand Up @@ -547,8 +547,6 @@ struct my_collation_handler_st
const char *wildstr,const char *wildend,
int escape,int w_one, int w_many);

int (*strcasecmp)(CHARSET_INFO *, const char *, const char *);

uint (*instr)(CHARSET_INFO *,
const char *b, size_t b_length,
const char *s, size_t s_length,
Expand Down Expand Up @@ -804,6 +802,17 @@ struct charset_info_st

#ifdef __cplusplus
/* Character set routines */

/* Make sure the comparison operand is valid. */
static bool is_valid_string(const LEX_CSTRING &str)
{
/*
LEX_CSTRING::str can be NULL, but only if LEX_CSTRING::length is 0.
Does not have to be a 0-terminated string.
*/
return str.str != NULL || str.length == 0;
}

bool use_mb() const
{
return mbmaxlen > 1;
Expand Down Expand Up @@ -1027,6 +1036,26 @@ struct charset_info_st
return state & MY_CS_COMPILED;
}

/*
Compare two strings for equality.
There may be a separate more optimized virtual function streq() in
MY_COLLATION_HANDLER eventually. For now it's a wrapper for strnncoll().
*/
my_bool streq(const LEX_CSTRING a, const LEX_CSTRING b) const
{
return 0 == strnncoll(a, b, FALSE);
}

int strnncoll(const LEX_CSTRING a, const LEX_CSTRING b,
my_bool b_is_prefix= FALSE) const
{
DBUG_ASSERT(is_valid_string(a));
DBUG_ASSERT(is_valid_string(b));
return (coll->strnncoll)(this,
(const uchar *) a.str, a.length,
(const uchar *) b.str, b.length, b_is_prefix);
}

int strnncoll(const uchar *a, size_t alen,
const uchar *b, size_t blen, my_bool b_is_prefix= FALSE) const
{
Expand Down Expand Up @@ -1392,6 +1421,10 @@ extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1;
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1_nopad;
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_filename;
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_utf8mb3_general_ci;
extern MYSQL_PLUGIN_IMPORT struct charset_info_st
my_charset_utf8mb3_general1400_as_ci;
extern MYSQL_PLUGIN_IMPORT struct charset_info_st
my_charset_utf8mb4_general1400_as_ci;

extern struct charset_info_st my_charset_big5_bin;
extern struct charset_info_st my_charset_big5_chinese_ci;
Expand Down Expand Up @@ -1658,7 +1691,6 @@ extern size_t my_caseup_ujis(CHARSET_INFO *,
extern size_t my_casedn_ujis(CHARSET_INFO *,
const char *src, size_t srclen,
char *dst, size_t dstlen);
extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *);

int my_wildcmp_mb(CHARSET_INFO *,
const char *str,const char *str_end,
Expand All @@ -1677,9 +1709,6 @@ int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many);

int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t);

void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2);

Expand Down Expand Up @@ -1838,7 +1867,6 @@ size_t my_convert_fix(CHARSET_INFO *dstcs, char *dst, size_t dst_length,
#define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
#define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
#define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
#define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))

/**
Detect if the leftmost character in a string is a valid multi-byte character
Expand Down Expand Up @@ -1886,6 +1914,14 @@ my_well_formed_length(CHARSET_INFO *cs, const char *b, const char *e,
}


static inline int
my_strcasecmp_latin1(const char *a, const char *b)
{
return my_strcasecmp_8bit(&my_charset_latin1, a, b);
}



/* XXX: still need to take care of this one */
#ifdef MY_CHARSET_TIS620
#error The TIS620 charset is broken at the moment. Tell tim to fix it.
Expand Down
9 changes: 9 additions & 0 deletions include/m_string.h
Expand Up @@ -220,6 +220,15 @@ template<typename T> inline constexpr const char *_swl_check(T s)

typedef struct st_mysql_const_lex_string LEX_CSTRING;

#ifdef __cplusplus
static inline constexpr
LEX_CSTRING operator"" _LEX_CSTRING(const char *str, size_t length)
{
return LEX_CSTRING{str, length};
}
#endif /* __cplusplus */


/* A variant with const and unsigned */
struct st_mysql_const_unsigned_lex_string
{
Expand Down
15 changes: 15 additions & 0 deletions mysql-test/main/ctype_like_range.result
Expand Up @@ -4477,3 +4477,18 @@ DROP TABLE t1;
#
# End of 10.2 tests
#
#
# Start of 11.5 tests
#
#
# MDEV-33806 Server crashes when executing Admin SQL/DML after setting character_set_collations to utf8mb3_general1400_as_ci
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb3 COLLATE utf8mb3_general1400_as_ci);
INSERT INTO t1 VALUES ('111%');
SELECT a, HEX(LIKE_RANGE_MAX(a,40)) FROM t1 ORDER BY a;
a HEX(LIKE_RANGE_MAX(a,40))
111% 313131EFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBF20
DROP TABLE t1;
#
# End of 11.5 tests
#
18 changes: 18 additions & 0 deletions mysql-test/main/ctype_like_range.test
Expand Up @@ -197,3 +197,21 @@ DROP TABLE t1;
--echo #
--echo # End of 10.2 tests
--echo #


--echo #
--echo # Start of 11.5 tests
--echo #

--echo #
--echo # MDEV-33806 Server crashes when executing Admin SQL/DML after setting character_set_collations to utf8mb3_general1400_as_ci
--echo #

CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb3 COLLATE utf8mb3_general1400_as_ci);
INSERT INTO t1 VALUES ('111%');
SELECT a, HEX(LIKE_RANGE_MAX(a,40)) FROM t1 ORDER BY a;
DROP TABLE t1;

--echo #
--echo # End of 11.5 tests
--echo #
25 changes: 25 additions & 0 deletions mysql-test/main/ctype_utf8mb3_geeral1400_as_ci.result
@@ -0,0 +1,25 @@
#
# Start of 11.5 tests
#
#
# MDEV-33806 Server crashes when executing Admin SQL/DML after setting character_set_collations to utf8mb3_general1400_as_ci
#
CREATE TABLE t1(a CHAR (32),KEY (a)) DEFAULT CHARSET=utf8mb3 COLLATE utf8mb3_general1400_as_ci;
SELECT * FROM t1 WHERE a LIKE 'a%';
a
INSERT INTO t1 VALUES ('a');
SELECT * FROM t1 WHERE a LIKE 'a%';
a
a
FOR i IN 0..32
DO
INSERT INTO t1 VALUES (CONCAT('b', i));
END FOR;
$$
SELECT * FROM t1 WHERE a LIKE 'a%';
a
a
DROP TABLE t1;
#
# End of 11.5 tests
#
25 changes: 25 additions & 0 deletions mysql-test/main/ctype_utf8mb3_geeral1400_as_ci.test
@@ -0,0 +1,25 @@
--echo #
--echo # Start of 11.5 tests
--echo #

--echo #
--echo # MDEV-33806 Server crashes when executing Admin SQL/DML after setting character_set_collations to utf8mb3_general1400_as_ci
--echo #

CREATE TABLE t1(a CHAR (32),KEY (a)) DEFAULT CHARSET=utf8mb3 COLLATE utf8mb3_general1400_as_ci;
SELECT * FROM t1 WHERE a LIKE 'a%';
INSERT INTO t1 VALUES ('a');
SELECT * FROM t1 WHERE a LIKE 'a%';
DELIMITER $$;
FOR i IN 0..32
DO
INSERT INTO t1 VALUES (CONCAT('b', i));
END FOR;
$$
DELIMITER ;$$
SELECT * FROM t1 WHERE a LIKE 'a%';
DROP TABLE t1;

--echo #
--echo # End of 11.5 tests
--echo #

0 comments on commit fd247cc

Please sign in to comment.