-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
10,468 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
" | ||
Perl-Style Regular Expressions in Smalltalk | ||
Documentation | ||
The documentation category of this method contains substantial documentation on the operation of this Class. | ||
Re aGeneralComment | ||
Re aGlobalSearchComment | ||
Re aRegexComment | ||
Re aRegexGoryDetailsComment | ||
Re aVersionsComment | ||
Re anReComment | ||
Re anReOverviewComment | ||
Re aLicenseComment | ||
Examples: | ||
(Re on: 'a.*y') search: 'Candy is dandy.' | ||
'a.*y' asRe search: 'Candy is dandy.' | ||
'Candy is dandy' reMatch: 'a.*y' | ||
(Re on: '\w+') searchAndCollect: 'Candy is dandy.' | ||
'\w+' asRe searchAndCollect: 'Candy is dandy.' | ||
'Candy is dandy.' reMatch: '\w+' andCollect: [:m | m match] | ||
Structure: | ||
pattern String -- the string with the regular expression source code | ||
compiledPattern RePlugin representing a compiled pattern | ||
isAnchored Boolean -- representing an option setting | ||
is ... Booleans -- for the other options below | ||
List ofcommon public methods: | ||
#opt: | ||
sets options using Perl-style string | ||
#beAnchored #beNotAnchored #isAnchored #isAnchored: | ||
#beBeginningOfLine #beNotBeginningOfLine #isBeginningOfLine #isBeginningOfLine: | ||
#beCaseSensitive #beNotCaseSensitive #isCaseSensitive #isCaseSensitive: | ||
#beDollarEndOnly #beNotDollarEndOnly #isDollarEndOnly #isDollarEndOnly: | ||
#beDotIncludesNewline #beNotDotIncludesNewline #isDotIncludesNewLine #isDotIncludesNewline: | ||
#beEndOfLine #beNotEndOfLine #isEndOfLine #isEndOfLine: | ||
#beExtended #beNotExtended #isExtended #isExtended: | ||
#beExtra #beNotExtra #isExtra #isNotExtra: | ||
#beGreedy #beNotGreedy #isGreedy #isGreedy: | ||
#beMultiline #beNotMultiline #isMultiline #isMultiline: | ||
Getters and setters for options in traditional Smalltalk style | ||
search: aTargetString | ||
search aTargetString from: startInteger to: endInteger | ||
Compiling the pattern, if necessary, search a string (or substring) using the pattern. Answers nil if no match. | ||
searchAndCollect: aTargetString | ||
search: aTargetString andCollect: aBlock | ||
search: aTargetString andCollect: aBlock matchCount: anInteger | ||
Compiling the pattern, if necessary, gather all (or, if specified, the first anInteger) non-overlapping matches to me in aTargetString. Answer a collection of the results of applying aBlock to each ReMatch result. | ||
search: aTargetString andReplace: aBlock | ||
search: aTargetString andReplace: aBlock matchCount: anInteger | ||
Compiling the pattern, if necessary, find all (or, if specified, the first anInteger) non-overlapping matches to me in aTargetString. Answer a new string, created by substituting the results of applying aBlock to each ReMatch result for the matched substring. | ||
(44 16 109 1 1 18 2 23 2 16 2 27 2 19 2 14 2 22 3 1 19 1 12 280 11 236 30 1 6 40 687 66 8 13 8 13 7 12 5 10 1 118 18 13 9 13 13 6 9 13 13 6 13 9 1 217 8 13 13 6 9 13 13 6 13 9 1 266)bf3,bf1,f1,bf1,f1,f1LRe aGeneralComment;,f1,f1LRe aGlobalSearchComment;,f1,f1LRe aRegexComment;,f1,f1LRe aRegexGoryDetailsComment;,f1,f1LRe aVersionsComment;,f1,f1LRe anReComment;,f1,f1LRe anReOverviewComment;,bf1,f1,f1LRe aLicenseComment;,f1,bf1,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1,f1b,f1 | ||
" | ||
Class { | ||
#name : #Re, | ||
#superclass : #Object, | ||
#instVars : [ | ||
'pattern', | ||
'compiledPattern', | ||
'isAnchored', | ||
'isCaseSensitive', | ||
'isDollarEndOnly', | ||
'isDotIncludesNewline', | ||
'isExtended', | ||
'isExtra', | ||
'isMultiline', | ||
'isBeginningOfLine', | ||
'isEndOfLine', | ||
'isGreedy' | ||
], | ||
#category : #RePCRE | ||
} | ||
|
||
{ #category : #documentation } | ||
Re >> anOptionsComment [ " | ||
Compilation and Matching Options | ||
Message Name Code Explanation | ||
beCaseSensitive -i Case sensitive matching | ||
beNotCaseSensitive i Ignore case during matching | ||
beNotMultiline -m Anchor chars don't match line ending | ||
beMultiline m Anchor chars match on line ending | ||
beNotDotIncludesNewline -s '.' does not match line ending | ||
beDotIncludesNewline s '.' matches line endings | ||
beNotExtended -x extended mode off (see below) | ||
beExtended x extended mode on (see below) | ||
beNotDollarEndOnly -E $ matches \n before end of line | ||
beDollarEndOnly E $ does not match \n before end of line | ||
beGreedy -U quantifiers have ordinary meaning | ||
beNotGreedy U reverses meaning of * and :*, also + and :+ | ||
beNotExtra -X PCRE Extra mode off (see below) | ||
beExtra X PCRE Extra mode on (see below) | ||
beNotAnchored -A Matches may begin anywhere | ||
beAnchored A Matches must start with first character | ||
beBeginningOfLine -B subject starts at beginning of a line | ||
beNotBeginningOfLine B subject start not at beginning of a line | ||
beEndOfLine -Z subject end may be at end of line | ||
beNotEndOfLine Z subject end may not be at end of a line | ||
In extended mode (beExtended), whitespace are ignored unless escaped, and # precedes comment to next newline. PCRE Extra mode is described in detail in the accompanying documention. Options may be changed at any time, but a pattern recompile occurs after changing the value any option other than anchored (A), beginning of line (B) or end of line (Z). | ||
Options may be specified using messages or by Perl-style option codes: | ||
'a.*y' asRe | ||
beNotCaseSensitive; | ||
beDotIncludesNewline; | ||
search: 'CANDY IS ', Character cr asString, 'DANDY, BUT LIQUOR IS QUICKER' | ||
'a.*y' asRe | ||
opt: 'is'; | ||
search: 'CANDY IS ', Character cr asString, 'DANDY, BUT LIQUOR IS QUICKER' | ||
" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
" | ||
ReMatch: Perl-Style Regular Expression Search Results | ||
I. Introduction | ||
This Class is part of a package of classes providing a Smalltalk wrapper to Philip Hazel's excellent PCRE library. The Plugin interface and Smalltalk wrapper was written by Andrew C. Greenberg. As discussed in RePattern aGeneralComment, the functionality is essentially embodied in this class, Class RePattern and certain new messages in Class String. A summary of the regular expression syntax can be found in RePattern aRegexComment and a summary of the compile option codes available can be found in RePattern anOptionsComment. | ||
II. Principal Match Results | ||
The substring of searchString matched by re is given by: | ||
m match | ||
which can be derived from searchString as follows: | ||
m searchString | ||
copyFrom: (m from) | ||
to: (m to) | ||
III. Captured Groups (and Collections of Captured Groups) | ||
The number of substrings capturable by a parenthetical grouping in an re (regardless of the number actually matched to create m) is given by: | ||
m numGroups | ||
The string captured by parenthetical grouping i, where 1<=i<=(m numGroups) is given by | ||
m matchAt: i | ||
and this can be generated as follows: | ||
m searchString | ||
copyFrom: (m fromAt: i) | ||
to: (m toAt: i) | ||
And an array of size (m numGroups) can be generated from strings and indices accordingly: | ||
m matches | ||
m froms | ||
m tos | ||
! | ||
ReMatch class | ||
instanceVariableNames: '' | ||
" | ||
Class { | ||
#name : #ReMatch, | ||
#superclass : #Object, | ||
#instVars : [ | ||
'matchArray', | ||
're', | ||
'searchString', | ||
'pos', | ||
'endpos' | ||
], | ||
#category : #RePCRE | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
" | ||
RePattern: Compiled Perl-Style Regular Expressions | ||
I. Introduction. | ||
This Smalltalk implementation of modern Perl-Style regular expressions was compiled by Andrew Greenberg <[email protected]> and contributors, based upon the excellent PCRE library by Philip Hazel. As discussed in RePattern aGeneralComment, the functionality is essentially embodied in this class, Class ReMatch and certain new messages in Class String. A summary of the regular expression syntax can be found in RePattern aRegexComment and a summary of the compile option codes available can be found in RePattern anOptionsComment. | ||
A substantially more detailed description of RePlugin is available downloading the file ""RePluginDoco,"" which can be obtained from http:https://www.gate.net/~werdna/RePlugin.html, into your default directory, and then executing | ||
Utilities reconstructTextWindowsFromFileNamed: 'RePluginDoco' | ||
II. To Search a String or Substring For Pattern Matches (Once Only): | ||
Examples: | ||
'Squeak or Squawk!' reMatch: '^Squ(ea|aw)k' | ||
'Squeak or Squawk!' reMatch: '^Squ(ea|aw)k' opt: 'imsxABEXZ' | ||
'Squeak or Squawk!' reMatch: '^Squ(ea|aw)k!' from: 11 | ||
more generally, | ||
srchStr reMatch: patStr [opt: oStr] [from: start] [to: stop] | ||
For a one-time search of a string (or substring) for occurences of a match pattern. The message will be answered with nil (if there is no match) or an instance of ReMatch, which can then be queried for further details about the match. | ||
III. Global Searching and Replacing | ||
The re package provides rudimentary facilities for global searches and replacements on a string. The following expressions | ||
'\w+' reMatch: 'this is a test' collect: [:m | m] | ||
(RePattern on: '\w+') search: 'this is a test' collect: [:m | m] | ||
return an ordered collection of the results of repeated non-overlapping applications of the pattern to the string, or nil if there are no matches in the string. To produce a list of matched strings, you can for example execute the following: | ||
'\w+' reMatch: 'this is a test' collect: [:m| m match] | ||
(RePattern on: '\w+') search: 'this is a test' collect: [:m | m match] | ||
You can also perform global search and string replacements, where the answer is a string with unmatched text left alone, and matched text replaced by the result of a call to a Block passed the ReMatch object as a single parameter. For example, | ||
('\w+' reMatch: 'this is a test' sub: [:m| '<', (m match), '>'] | ||
and | ||
(RePattern on: '\w+') search: 'this is a test' sub: [:m| '<', (m match), '>'] | ||
return a string with each nonblank word surrounded by angle brackets. For more details, see RePattern aGlobalSearchComment. | ||
IV. To Create Compiled Regular Expression Objects (For Repeated Matching): | ||
'^Squ(ea|aw)k!$' asRePattern | ||
'^Squ(ea|aw)k!$' asRePatternOpt: 'imsxAEX' | ||
'^Squ(ea|aw)k!$' asRePatternOpt: 'imsxAEX' onErrorRun: aBlock | ||
RePattern on: '^Squ(ea|aw)k!$' | ||
RePattern on: '^Squ(ea|aw)k!$' opt: 'imsxAEX' | ||
RePattern | ||
on: '^Squ(ea|aw)k!$' | ||
opt: 'imsxAEX' | ||
onErrorRun: [:pat :offset :message | ""your code here"" ] | ||
Each of the preceding expressions returns an instance of RePattern, compiled for efficient matching when the pattern is repeatedly searched against different strings. RePattern ordinarily caches a dozen or so of the most recently compiled patterns, but nevertheless invokes a cost for the table lookup. To avoid compile and lookup costs, use the above messages. To perform a one-time search, see above. | ||
V. To Search a Compiled Regexp Against A String or Substring for Matches: | ||
searchString reMatch: re [from: from] [to: to] [opt: optStr] | ||
or | ||
re search: searchString [from: from] [to: to] [opt: optStr] | ||
Examples: | ||
'Squeak or Squawk' reMatch: re. | ||
re search: 'Squeak or Squawk!'. | ||
re search: 'Squeak or Squawk!' opt: 'ABZ'. | ||
If no match is found, these messages answer nil. Otherwise, they answer with a corresponding instance of ReMatch.! | ||
RePattern class | ||
instanceVariableNames: 'Patterns Options CompileObjects Front ' | ||
" | ||
Class { | ||
#name : #RePattern, | ||
#superclass : #Object, | ||
#instVars : [ | ||
'pattern', | ||
'compileOptions', | ||
'pcrePointer', | ||
'extraPointer', | ||
'errorString', | ||
'offset', | ||
'matchOptions', | ||
'matchSpace', | ||
'lastMatchResult' | ||
], | ||
#classInstVars : [ | ||
'Patterns', | ||
'Options', | ||
'CompileObjects', | ||
'Front' | ||
], | ||
#category : #RePCRE | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
" | ||
/* Regular Expression Plugin (This class comment becomes part of rePlugin.c) | ||
RePlugin translate: 'RePlugin.c' doInlining: true. | ||
See documentation and source code for the PCRE C Library Code. This plugin is designed to serve an object such as RePattern: | ||
patternStr A 0-terminated string comprising the pattern to be compiled. | ||
compileFlags An Integer representing re compiler options | ||
PCREBuffer A ByteArray of regular expression bytecodes | ||
extraPtr A ByteArray of match optimization data (or nil) | ||
errorString A String Object For Holding an Error Message (when compile failed) | ||
errorOffset The index in patternStr (0-based) where the error ocurred (when compile failed) | ||
matchFlags An Integer representing re matcher options | ||
matchSpaceObj An Integer array for match results and workspace during matching. | ||
The instance variables must appear in the preceding order. MatchSpaceObj must be allocated by the calling routine and contain at least 6*(numGroups+1) bytes. | ||
*/ | ||
#include ""pcre.h"" | ||
#include ""internal.h"" | ||
/* Slight machine-specific hack for MacOS Memory Management */ | ||
#ifdef TARGET_OS_MAC | ||
#define malloc(ptr) NewPtr(ptr) | ||
#define free(ptr) DisposePtr(aPointer) | ||
#endif | ||
/* Adjust malloc and free routines as used by PCRE */ | ||
void rePluginFree(void * aPointer); | ||
void * rePluginMalloc(size_t anInteger); | ||
void *(*pcre_malloc)(size_t) = rePluginMalloc; | ||
void (*pcre_free)(void *) = rePluginFree; | ||
" | ||
Class { | ||
#name : #RePlugin, | ||
#superclass : #Object, | ||
#instVars : [ | ||
'netMemory', | ||
'numAllocs', | ||
'numFrees', | ||
'lastAlloc', | ||
'patternStr', | ||
'rcvr', | ||
'compileFlags', | ||
'pcrePtr', | ||
'extraPtr', | ||
'errorStr', | ||
'errorOffset', | ||
'matchFlags', | ||
'patternStrPtr', | ||
'errorStrBuffer' | ||
], | ||
#category : #RePCRE | ||
} |
Oops, something went wrong.