This repository has been archived by the owner on Nov 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add regular expressions module and tests
- Loading branch information
1 parent
3caff9b
commit fb8616e
Showing
8 changed files
with
255 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#ifndef REGULAR_EXPRESSIONS_H_ | ||
#define REGULAR_EXPRESSIONS_H_ | ||
|
||
#include <stdbool.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
/** | ||
* Looks for sequences that match the requested pattern. | ||
* | ||
* @param src The string to search. | ||
* @param pattern The pattern to match. | ||
* @param numbuff The number of strings that can be stored in the buffer. | ||
* @param buffsize The capacity of each buffer string in the buffer. | ||
* @param buffer The buffer to which the matched strings will be written. | ||
* @param itemsizes An array of size numbuff that will be used to return the | ||
* actual number of characters (not including the null character) to each | ||
* buffer string. | ||
* | ||
* @return The number of matches found. This number may be greater than the | ||
* capacity of the buffer. If so, only the first numbuff values are written to | ||
* the buffer. | ||
*/ | ||
int c_regex_match(const char *src, const char *pattern, int numbuff, | ||
int buffsizes, char **buffer, int *itemsizes); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// regular_expressions.cpp | ||
|
||
#include "regular_expressions.h" | ||
#include <regex> | ||
#include <string> | ||
#include <cstring> | ||
|
||
using namespace std; | ||
|
||
#ifndef MIN | ||
#define MIN(a, b) ((a) < (b) ? (a) : (b)) | ||
#endif | ||
|
||
// References: | ||
// - http:https://www.cplusplus.com/reference/regex/regex_match/ | ||
|
||
|
||
int c_regex_match(const char *src, const char *pattern, int numbuff, | ||
int buffsizes, char **buffer, int *itemsizes) | ||
{ | ||
// Perform the match process | ||
cmatch cm; | ||
regex ex(pattern); | ||
regex_match(src, cm, ex); | ||
|
||
// Process each match | ||
int count = (int)cm.size(); | ||
for (int i = 0; i < MIN(count, numbuff); ++i) | ||
{ | ||
string rst = cm[i]; | ||
itemsizes[i] = (int)rst.size(); | ||
strncpy( | ||
buffer[i], | ||
rst.c_str(), | ||
(size_t)MIN((size_t)buffsizes, rst.size()) ); | ||
} | ||
|
||
// End | ||
return count; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
! regular_expressions.f90 | ||
|
||
! https://docs.microsoft.com/en-us/cpp/standard-library/regex?view=vs-2019 | ||
|
||
!> @brief A collection of routines providing regular expression support. | ||
module regular_expressions | ||
use iso_fortran_env | ||
use iso_c_binding | ||
use strings | ||
implicit none | ||
private | ||
public :: regex_match | ||
|
||
! ****************************************************************************** | ||
! C INTERFACE | ||
! ------------------------------------------------------------------------------ | ||
interface | ||
function c_regex_match(src, pattern, numbuff, buffsizes, buffer, & | ||
itemsizes) result(rst) bind(C, name = "c_regex_match") | ||
! Arguments | ||
use iso_c_binding | ||
character(kind = c_char), intent(in) :: src(*), pattern(*) | ||
integer(c_int), intent(in), value :: numbuff, buffsizes | ||
type(c_ptr), intent(out) :: buffer(numbuff) | ||
integer(c_int), intent(out) :: itemsizes(numbuff) | ||
integer(c_int) :: rst | ||
end function | ||
end interface | ||
|
||
! ****************************************************************************** | ||
! FORTRAN INTERFACES | ||
! ------------------------------------------------------------------------------ | ||
!> @brief Looks for sequences that match the requested pattern. | ||
interface regex_match | ||
module procedure :: regex_match_char | ||
module procedure :: regex_match_str | ||
end interface | ||
|
||
! ------------------------------------------------------------------------------ | ||
contains | ||
! ------------------------------------------------------------------------------ | ||
!> @brief Looks for sequences that match the requested pattern. | ||
!! | ||
!! @param[in] src The string to search. | ||
!! @param[in] pattern The pattern to match. | ||
!! | ||
!! @return A list of all matching sequences in @p src. | ||
function regex_match_char(src, pattern) result(rst) | ||
! Arguments | ||
character(len = *), intent(in) :: src, pattern | ||
type(string), allocatable, dimension(:) :: rst | ||
|
||
! Parameters | ||
integer(int32), parameter :: bufferSize = 1024 | ||
integer(int32), parameter :: numBuffers = 1024 | ||
|
||
! Local Variables | ||
integer(int32) :: i, nsrc, nptrn, nitems, n | ||
type(c_ptr), allocatable, dimension(:) :: buffer | ||
character(kind = c_char, len = bufferSize), allocatable, target, & | ||
dimension(:) :: bufferStrings | ||
character(kind = c_char), allocatable, dimension(:) :: csrc, cpattern | ||
integer(int32), allocatable, dimension(:) :: sizeList | ||
|
||
! Get the location of each buffer string | ||
allocate(buffer(numBuffers)) | ||
allocate(bufferStrings(numBuffers)) | ||
do i = 1, numBuffers | ||
buffer(i) = c_loc(bufferStrings(i)) | ||
end do | ||
|
||
! Convert the input strings into something useful for C | ||
nsrc = len(src) + 1 | ||
nptrn = len(pattern) + 1 ! +1 allows for null character | ||
allocate(csrc(nsrc)) | ||
allocate(cpattern(nptrn)) | ||
call to_c_string(src, csrc, nsrc) | ||
call to_c_string(pattern, cpattern, nptrn) | ||
|
||
! Perform the operation | ||
allocate(sizeList(numBuffers)) | ||
nitems = c_regex_match(csrc, cpattern, numBuffers, bufferSize, & | ||
buffer, sizeList) | ||
|
||
! Define output | ||
allocate(rst(nitems)) | ||
do i = 1, nitems | ||
n = sizeList(i) | ||
rst(i)%str = bufferStrings(i)(1:n) | ||
end do | ||
end function | ||
|
||
! -------------------- | ||
!> @brief Looks for sequences that match the requested pattern. | ||
!! | ||
!! @param[in] src The string to search. | ||
!! @param[in] pattern The pattern to match. | ||
!! | ||
!! @return A list of all matching sequences in @p src. | ||
function regex_match_str(src, pattern) result(rst) | ||
! Arguments | ||
class(string), intent(in) :: src, pattern | ||
type(string), allocatable, dimension(:) :: rst | ||
|
||
! Process | ||
rst = regex_match_char(src%str, pattern%str) | ||
end function | ||
|
||
! ------------------------------------------------------------------------------ | ||
|
||
! ------------------------------------------------------------------------------ | ||
|
||
! ------------------------------------------------------------------------------ | ||
|
||
! ------------------------------------------------------------------------------ | ||
|
||
! ------------------------------------------------------------------------------ | ||
end module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
set(test_sources | ||
test_fcore.f90 | ||
test_fcore_strings.f90 | ||
test_fcore_regex.f90 | ||
) | ||
|
||
# Build the executable | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
! test_fcore_regex.f90 | ||
|
||
module test_fcore_regex | ||
use iso_fortran_env | ||
use strings | ||
use regular_expressions | ||
implicit none | ||
contains | ||
! ------------------------------------------------------------------------------ | ||
function test_regex_match() result(rst) | ||
! Variables | ||
logical :: rst | ||
character(len = *), parameter :: test_string = & | ||
"subject" | ||
character(len = *), parameter :: pattern = "(sub)(.*)" | ||
|
||
type(string), allocatable, dimension(:) :: matches | ||
|
||
! Initialization | ||
rst = .true. | ||
|
||
! Test 1 | ||
matches = regex_match(test_string, pattern) | ||
|
||
if (size(matches) /= 3) then | ||
rst = .false. | ||
print '(AI0A)', & | ||
"TEST_REGEX_MATCH (Test 1a): Expected: 3, but found: ", & | ||
size(matches), "." | ||
return | ||
end if | ||
|
||
if (matches(1) /= "subject") then | ||
rst = .false. | ||
print '(A)', & | ||
"TEST_REGEX_MATCH (Test 1b): Expected: subject, but found: " & | ||
// matches(1)%str // "." | ||
end if | ||
|
||
if (matches(2) /= "sub") then | ||
rst = .false. | ||
print '(A)', & | ||
"TEST_REGEX_MATCH (Test 1c): Expected: sub, but found: " & | ||
// matches(1)%str // "." | ||
end if | ||
|
||
if (matches(3) /= "ject") then | ||
rst = .false. | ||
print '(A)', & | ||
"TEST_REGEX_MATCH (Test 1d): Expected: ject, but found: " & | ||
// matches(1)%str // "." | ||
end if | ||
end function | ||
|
||
! ------------------------------------------------------------------------------ | ||
end module |