Skip to content

Commit

Permalink
MDEV-33095 innodb_flush_method=O_DIRECT creates excessive errors on S…
Browse files Browse the repository at this point in the history
…olaris

The directio(3C) function on Solaris is supported on NFS and UFS
while the majority of users should be on ZFS, which is a copy-on-write
file system that implements transparent compression and therefore
cannot support unbuffered I/O.

Let us remove the call to directio() and simply treat
innodb_flush_method=O_DIRECT in the same way as the previous
default value innodb_flush_method=fsync on Solaris. Also, let us
remove some dead code around calls to os_file_set_nocache() on
platforms where fcntl(2) is not usable with O_DIRECT.

On IBM AIX, O_DIRECT is not documented for fcntl(2), only for open(2).
  • Loading branch information
dr-m authored and grooverdan committed Jan 19, 2024
1 parent f63045b commit a6290a5
Show file tree
Hide file tree
Showing 13 changed files with 50 additions and 39 deletions.
3 changes: 3 additions & 0 deletions cmake/os/AIX.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,8 @@ ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -maix64 -pthread -mcmodel=large")
ENDIF()

# fcntl(fd, F_SETFL, O_DIRECT) is not supported; O_DIRECT is an open(2) flag
SET(HAVE_FCNTL_DIRECT 0 CACHE INTERNAL "")

# make it WARN by default, not AUTO (that implies -Werror)
SET(MYSQL_MAINTAINER_MODE "WARN" CACHE STRING "Enable MariaDB maintainer-specific warnings. One of: NO (warnings are disabled) WARN (warnings are enabled) ERR (warnings are errors) AUTO (warnings are errors in Debug only)")
4 changes: 4 additions & 0 deletions cmake/os/SunOS.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ INCLUDE(CheckSymbolExists)
INCLUDE(CheckCSourceRuns)
INCLUDE(CheckCSourceCompiles)

# fcntl(fd, F_SETFL, O_DIRECT) is not supported,
# and directio(3C) would only work on UFS or NFS, not ZFS.
SET(HAVE_FCNTL_DIRECT 0 CACHE INTERNAL "")

# Enable 64 bit file offsets
SET(_FILE_OFFSET_BITS 64)

Expand Down
1 change: 1 addition & 0 deletions cmake/os/WindowsCache.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ SET(HAVE_EXECINFO_H CACHE INTERNAL "")
SET(HAVE_FCHMOD CACHE INTERNAL "")
SET(HAVE_FCNTL CACHE INTERNAL "")
SET(HAVE_FCNTL_H 1 CACHE INTERNAL "")
SET(HAVE_FCNTL_DIRECT 0 CACHE INTERNAL "")
SET(HAVE_FCNTL_NONBLOCK CACHE INTERNAL "")
SET(HAVE_FDATASYNC CACHE INTERNAL "")
SET(HAVE_DECL_FDATASYNC CACHE INTERNAL "")
Expand Down
1 change: 1 addition & 0 deletions config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#cmakedefine HAVE_DLFCN_H 1
#cmakedefine HAVE_EXECINFO_H 1
#cmakedefine HAVE_FCNTL_H 1
#cmakedefine HAVE_FCNTL_DIRECT 1
#cmakedefine HAVE_FENV_H 1
#cmakedefine HAVE_FLOAT_H 1
#cmakedefine HAVE_FNMATCH_H 1
Expand Down
1 change: 1 addition & 0 deletions configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ CHECK_SYMBOL_EXISTS(O_NONBLOCK "unistd.h;fcntl.h" HAVE_FCNTL_NONBLOCK)
IF(NOT HAVE_FCNTL_NONBLOCK)
SET(NO_FCNTL_NONBLOCK 1)
ENDIF()
CHECK_SYMBOL_EXISTS(O_DIRECT "fcntl.h" HAVE_FCNTL_DIRECT)

#
# Test for how the C compiler does inline, if at all
Expand Down
2 changes: 2 additions & 0 deletions extra/mariabackup/fil_cur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,13 @@ xb_fil_cur_open(
return(XB_FIL_CUR_SKIP);
}

#ifdef HAVE_FCNTL_DIRECT
if (srv_file_flush_method == SRV_O_DIRECT
|| srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC) {

os_file_set_nocache(cursor->file, node->name, "OPEN");
}
#endif

posix_fadvise(cursor->file, 0, 0, POSIX_FADV_SEQUENTIAL);

Expand Down
2 changes: 1 addition & 1 deletion mysql-test/mariadb-test-run.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4506,7 +4506,7 @@ ($$)
qr|InnoDB: io_setup\(\) failed with EAGAIN|,
qr|io_uring_queue_init\(\) failed with|,
qr|InnoDB: liburing disabled|,
qr/InnoDB: Failed to set (O_DIRECT|DIRECTIO_ON) on file/,
qr/InnoDB: Failed to set O_DIRECT on file/,
qr|setrlimit could not change the size of core files to 'infinity';|,
qr|feedback plugin: failed to retrieve the MAC address|,
qr|Plugin 'FEEDBACK' init function returned error|,
Expand Down
12 changes: 10 additions & 2 deletions storage/innobase/fil/fil0fil.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,9 @@ static bool fil_node_open_file_low(fil_node_t *node)
ut_ad(!node->is_open());
ut_ad(node->space->is_closing());
mysql_mutex_assert_owner(&fil_system.mutex);
ulint type;
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096, "compatibility");
#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
ulint type;
switch (FSP_FLAGS_GET_ZIP_SSIZE(node->space->flags)) {
case 1:
case 2:
Expand All @@ -367,6 +368,9 @@ static bool fil_node_open_file_low(fil_node_t *node)
default:
type= OS_DATA_FILE;
}
#else
constexpr auto type= OS_DATA_FILE;
#endif

for (;;)
{
Expand Down Expand Up @@ -1937,9 +1941,10 @@ fil_ibd_create(
mtr.commit();
log_write_up_to(mtr.commit_lsn(), true);

ulint type;
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
"compatibility");
#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
ulint type;
switch (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
case 1:
case 2:
Expand All @@ -1948,6 +1953,9 @@ fil_ibd_create(
default:
type = OS_DATA_FILE;
}
#else
constexpr auto type = OS_DATA_FILE;
#endif

file = os_file_create(
innodb_data_file_key, path,
Expand Down
5 changes: 0 additions & 5 deletions storage/innobase/fil/fil0pagecompress.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,6 @@ Updated 14/02/2015
#include "buf0lru.h"
#include "ibuf0ibuf.h"
#include "zlib.h"
#ifdef __linux__
#include <linux/fs.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#endif
#include "row0mysql.h"
#ifdef HAVE_LZ4
#include "lz4.h"
Expand Down
8 changes: 4 additions & 4 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4102,7 +4102,7 @@ static int innodb_init_params()

data_mysql_default_charset_coll = (ulint) default_charset_info->number;

#ifndef _WIN32
#ifdef HAVE_FCNTL_DIRECT
if (srv_use_atomic_writes && my_may_have_atomic_write) {
/*
Force O_DIRECT on Unixes (on Windows writes are always
Expand Down Expand Up @@ -4134,9 +4134,7 @@ static int innodb_init_params()
}
#endif

#ifndef _WIN32
ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC);
#else
#ifdef _WIN32
switch (srv_file_flush_method) {
case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */:
srv_file_flush_method = SRV_ALL_O_DIRECT_FSYNC;
Expand All @@ -4147,6 +4145,8 @@ static int innodb_init_params()
default:
ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC);
}
#else
ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC);
#endif
innodb_buffer_pool_size_init();

Expand Down
10 changes: 6 additions & 4 deletions storage/innobase/include/os0file.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,11 @@ static const ulint OS_FILE_NORMAL = 62;
/* @} */

/** Types for file create @{ */
static const ulint OS_DATA_FILE = 100;
static const ulint OS_LOG_FILE = 101;
static const ulint OS_DATA_FILE_NO_O_DIRECT = 103;
static constexpr ulint OS_DATA_FILE = 100;
static constexpr ulint OS_LOG_FILE = 101;
#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
static constexpr ulint OS_DATA_FILE_NO_O_DIRECT = 103;
#endif
/* @} */

/** Error codes from os_file_get_last_error @{ */
Expand Down Expand Up @@ -382,7 +384,7 @@ os_file_create_simple_no_error_handling_func(
bool* success)
MY_ATTRIBUTE((warn_unused_result));

#ifdef _WIN32
#ifndef HAVE_FCNTL_DIRECT
#define os_file_set_nocache(fd, file_name, operation_name) do{}while(0)
#else
/** Tries to disable OS caching on an opened file descriptor.
Expand Down
37 changes: 15 additions & 22 deletions storage/innobase/os/os0file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@ os_file_create_simple_func(
*success = false;

int create_flag;
const char* mode_str = NULL;
const char* mode_str __attribute__((unused));

ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
Expand Down Expand Up @@ -1046,12 +1046,14 @@ os_file_create_simple_func(

} while (retry);

#ifdef HAVE_FCNTL_DIRECT
/* This function is always called for data files, we should disable
OS caching (O_DIRECT) here as we do in os_file_create_func(), so
we open the same file in the same mode, see man page of open(2). */
if (!srv_read_only_mode && *success) {
os_file_set_nocache(file, name, mode_str);
}
#endif

#ifndef _WIN32
if (!read_only
Expand Down Expand Up @@ -1137,7 +1139,7 @@ os_file_create_func(
);

int create_flag;
const char* mode_str = NULL;
const char* mode_str __attribute__((unused));

on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
? true : false;
Expand Down Expand Up @@ -1179,9 +1181,13 @@ os_file_create_func(
return(OS_FILE_CLOSED);
}

#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
ut_a(type == OS_LOG_FILE
|| type == OS_DATA_FILE
|| type == OS_DATA_FILE_NO_O_DIRECT);
#else
ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE);
#endif

ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);

Expand Down Expand Up @@ -1224,13 +1230,15 @@ os_file_create_func(

} while (retry);

#ifdef HAVE_FCNTL_DIRECT
/* We disable OS caching (O_DIRECT) only on data files */
if (!read_only
&& *success
&& type != OS_LOG_FILE
&& type != OS_DATA_FILE_NO_O_DIRECT) {
os_file_set_nocache(file, name, mode_str);
}
#endif

#ifndef _WIN32
if (!read_only
Expand Down Expand Up @@ -2147,12 +2155,14 @@ os_file_create_func(
}
break;

#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
case SRV_O_DIRECT_NO_FSYNC:
case SRV_O_DIRECT:
if (type != OS_DATA_FILE) {
break;
}
/* fall through */
#endif
case SRV_ALL_O_DIRECT_FSYNC:
/*Traditional Windows behavior, no buffering for any files.*/
if (type != OS_DATA_FILE_NO_O_DIRECT) {
Expand Down Expand Up @@ -3037,17 +3047,14 @@ os_file_handle_error_cond_exit(
return(false);
}

#ifndef _WIN32
#ifdef HAVE_FCNTL_DIRECT
/** Tries to disable OS caching on an opened file descriptor.
@param[in] fd file descriptor to alter
@param[in] file_name file name, used in the diagnostic message
@param[in] name "open" or "create"; used in the diagnostic
message */
void
os_file_set_nocache(
int fd MY_ATTRIBUTE((unused)),
const char* file_name MY_ATTRIBUTE((unused)),
const char* operation_name MY_ATTRIBUTE((unused)))
os_file_set_nocache(int fd, const char *file_name, const char *operation_name)
{
const auto innodb_flush_method = srv_file_flush_method;
switch (innodb_flush_method) {
Expand All @@ -3058,18 +3065,6 @@ os_file_set_nocache(
return;
}

/* some versions of Solaris may not have DIRECTIO_ON */
#if defined(__sun__) && defined(DIRECTIO_ON)
if (directio(fd, DIRECTIO_ON) == -1) {
int errno_save = errno;

ib::error()
<< "Failed to set DIRECTIO_ON on file "
<< file_name << "; " << operation_name << ": "
<< strerror(errno_save) << ","
" continuing anyway.";
}
#elif defined(O_DIRECT)
if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
int errno_save = errno;
static bool warning_message_printed = false;
Expand All @@ -3088,10 +3083,8 @@ os_file_set_nocache(
<< ", continuing anyway.";
}
}
#endif /* defined(__sun__) && defined(DIRECTIO_ON) */
}

#endif /* _WIN32 */
#endif /* HAVE_FCNTL_DIRECT */

/** Check if the file system supports sparse files.
@param fh file handle
Expand Down
3 changes: 2 additions & 1 deletion storage/innobase/row/row0merge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4193,13 +4193,14 @@ row_merge_file_create(
merge_file->fd = row_merge_file_create_low(path);
merge_file->offset = 0;
merge_file->n_rec = 0;

#ifdef HAVE_FCNTL_DIRECT
if (merge_file->fd != OS_FILE_CLOSED) {
if (srv_disable_sort_file_cache) {
os_file_set_nocache(merge_file->fd,
"row0merge.cc", "sort");
}
}
#endif
return(merge_file->fd);
}

Expand Down

0 comments on commit a6290a5

Please sign in to comment.