Skip to content
This repository has been archived by the owner on Nov 7, 2023. It is now read-only.

Commit

Permalink
Add regular expressions module and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jchristopherson committed Jun 16, 2020
1 parent 3caff9b commit fb8616e
Show file tree
Hide file tree
Showing 8 changed files with 255 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ endif()
# set(nonlin_LIBRARIES nonlin)
# endif()

# Locate the include directory
include_directories(${PROJECT_SOURCE_DIR}/include)

# Locate the source directory
add_subdirectory(src)

Expand Down
32 changes: 32 additions & 0 deletions include/regular_expressions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef REGULAR_EXPRESSIONS_H_
#define REGULAR_EXPRESSIONS_H_

#include <stdbool.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
* Looks for sequences that match the requested pattern.
*
* @param src The string to search.
* @param pattern The pattern to match.
* @param numbuff The number of strings that can be stored in the buffer.
* @param buffsize The capacity of each buffer string in the buffer.
* @param buffer The buffer to which the matched strings will be written.
* @param itemsizes An array of size numbuff that will be used to return the
* actual number of characters (not including the null character) to each
* buffer string.
*
* @return The number of matches found. This number may be greater than the
* capacity of the buffer. If so, only the first numbuff values are written to
* the buffer.
*/
int c_regex_match(const char *src, const char *pattern, int numbuff,
int buffsizes, char **buffer, int *itemsizes);

#ifdef __cplusplus
}
#endif
#endif
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ set(fcore_sources
strings.f90
strings_string_builder.f90
strings_ops.f90
regular_expressions.f90
regular_expressions.cpp
)

# Build the library
Expand Down
40 changes: 40 additions & 0 deletions src/regular_expressions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// regular_expressions.cpp

#include "regular_expressions.h"
#include <regex>
#include <string>
#include <cstring>

using namespace std;

#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif

// References:
// - http:https://www.cplusplus.com/reference/regex/regex_match/


int c_regex_match(const char *src, const char *pattern, int numbuff,
int buffsizes, char **buffer, int *itemsizes)
{
// Perform the match process
cmatch cm;
regex ex(pattern);
regex_match(src, cm, ex);

// Process each match
int count = (int)cm.size();
for (int i = 0; i < MIN(count, numbuff); ++i)
{
string rst = cm[i];
itemsizes[i] = (int)rst.size();
strncpy(
buffer[i],
rst.c_str(),
(size_t)MIN((size_t)buffsizes, rst.size()) );
}

// End
return count;
}
118 changes: 118 additions & 0 deletions src/regular_expressions.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
! regular_expressions.f90

! https://docs.microsoft.com/en-us/cpp/standard-library/regex?view=vs-2019

!> @brief A collection of routines providing regular expression support.
module regular_expressions
use iso_fortran_env
use iso_c_binding
use strings
implicit none
private
public :: regex_match

! ******************************************************************************
! C INTERFACE
! ------------------------------------------------------------------------------
interface
function c_regex_match(src, pattern, numbuff, buffsizes, buffer, &
itemsizes) result(rst) bind(C, name = "c_regex_match")
! Arguments
use iso_c_binding
character(kind = c_char), intent(in) :: src(*), pattern(*)
integer(c_int), intent(in), value :: numbuff, buffsizes
type(c_ptr), intent(out) :: buffer(numbuff)
integer(c_int), intent(out) :: itemsizes(numbuff)
integer(c_int) :: rst
end function
end interface

! ******************************************************************************
! FORTRAN INTERFACES
! ------------------------------------------------------------------------------
!> @brief Looks for sequences that match the requested pattern.
interface regex_match
module procedure :: regex_match_char
module procedure :: regex_match_str
end interface

! ------------------------------------------------------------------------------
contains
! ------------------------------------------------------------------------------
!> @brief Looks for sequences that match the requested pattern.
!!
!! @param[in] src The string to search.
!! @param[in] pattern The pattern to match.
!!
!! @return A list of all matching sequences in @p src.
function regex_match_char(src, pattern) result(rst)
! Arguments
character(len = *), intent(in) :: src, pattern
type(string), allocatable, dimension(:) :: rst

! Parameters
integer(int32), parameter :: bufferSize = 1024
integer(int32), parameter :: numBuffers = 1024

! Local Variables
integer(int32) :: i, nsrc, nptrn, nitems, n
type(c_ptr), allocatable, dimension(:) :: buffer
character(kind = c_char, len = bufferSize), allocatable, target, &
dimension(:) :: bufferStrings
character(kind = c_char), allocatable, dimension(:) :: csrc, cpattern
integer(int32), allocatable, dimension(:) :: sizeList

! Get the location of each buffer string
allocate(buffer(numBuffers))
allocate(bufferStrings(numBuffers))
do i = 1, numBuffers
buffer(i) = c_loc(bufferStrings(i))
end do

! Convert the input strings into something useful for C
nsrc = len(src) + 1
nptrn = len(pattern) + 1 ! +1 allows for null character
allocate(csrc(nsrc))
allocate(cpattern(nptrn))
call to_c_string(src, csrc, nsrc)
call to_c_string(pattern, cpattern, nptrn)

! Perform the operation
allocate(sizeList(numBuffers))
nitems = c_regex_match(csrc, cpattern, numBuffers, bufferSize, &
buffer, sizeList)

! Define output
allocate(rst(nitems))
do i = 1, nitems
n = sizeList(i)
rst(i)%str = bufferStrings(i)(1:n)
end do
end function

! --------------------
!> @brief Looks for sequences that match the requested pattern.
!!
!! @param[in] src The string to search.
!! @param[in] pattern The pattern to match.
!!
!! @return A list of all matching sequences in @p src.
function regex_match_str(src, pattern) result(rst)
! Arguments
class(string), intent(in) :: src, pattern
type(string), allocatable, dimension(:) :: rst

! Process
rst = regex_match_char(src%str, pattern%str)
end function

! ------------------------------------------------------------------------------

! ------------------------------------------------------------------------------

! ------------------------------------------------------------------------------

! ------------------------------------------------------------------------------

! ------------------------------------------------------------------------------
end module
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(test_sources
test_fcore.f90
test_fcore_strings.f90
test_fcore_regex.f90
)

# Build the executable
Expand Down
3 changes: 3 additions & 0 deletions tests/test_fcore.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
program main
use iso_fortran_env
use test_fcore_strings
use test_fcore_regex
implicit none

! Local Variables
Expand Down Expand Up @@ -33,6 +34,8 @@ program main
local = test_to_lower_1()
if (.not.local) overall = .false.

local = test_regex_match()
if (.not.local) overall = .false.

! End
if (overall) then
Expand Down
56 changes: 56 additions & 0 deletions tests/test_fcore_regex.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
! test_fcore_regex.f90

module test_fcore_regex
use iso_fortran_env
use strings
use regular_expressions
implicit none
contains
! ------------------------------------------------------------------------------
function test_regex_match() result(rst)
! Variables
logical :: rst
character(len = *), parameter :: test_string = &
"subject"
character(len = *), parameter :: pattern = "(sub)(.*)"

type(string), allocatable, dimension(:) :: matches

! Initialization
rst = .true.

! Test 1
matches = regex_match(test_string, pattern)

if (size(matches) /= 3) then
rst = .false.
print '(AI0A)', &
"TEST_REGEX_MATCH (Test 1a): Expected: 3, but found: ", &
size(matches), "."
return
end if

if (matches(1) /= "subject") then
rst = .false.
print '(A)', &
"TEST_REGEX_MATCH (Test 1b): Expected: subject, but found: " &
// matches(1)%str // "."
end if

if (matches(2) /= "sub") then
rst = .false.
print '(A)', &
"TEST_REGEX_MATCH (Test 1c): Expected: sub, but found: " &
// matches(1)%str // "."
end if

if (matches(3) /= "ject") then
rst = .false.
print '(A)', &
"TEST_REGEX_MATCH (Test 1d): Expected: ject, but found: " &
// matches(1)%str // "."
end if
end function

! ------------------------------------------------------------------------------
end module

0 comments on commit fb8616e

Please sign in to comment.