IDA C++ SDK 9.2
Loading...
Searching...
No Matches
regex.h
Go to the documentation of this file.
1/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE2 is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42#ifndef _REGEX_H_
43#define _REGEX_H_
44#ifdef __cplusplus
45#include <kernwin.hpp>
46#endif
47
48#include <pro.h>
49
50#ifdef __GNUC__
51#undef __P
52#endif
53
54typedef off_t regoff_t;
55/* The structure representing a compiled regular expression. */
56
57struct regex_t
58{
60 size_t re_nsub; /* number of parenthesized subexpressions */
61 const char *re_endp; /* end pointer for REG_PEND */
62 void *re_g; /* none of your business :-) */
63};
64
65/* The structure in which a captured offset is returned. */
66
68{
69 regoff_t rm_so; /* start of match */
70 regoff_t rm_eo; /* end of match */
71};
72#ifndef REG_ICASE
73/* Options, mostly defined by POSIX, but with some extras. */
74
75#define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */
76#define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */
77#define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */
78#define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */
79#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */
80#define REG_NOSUB 0x0020 /* Maps to PCRE2_NO_AUTO_CAPTURE */
81#define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */
82#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
83#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */
84#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */
85#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */
86#define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */
87#define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */
88
89/* This is not used by PCRE2, but by defining it we make it easier
90to slot PCRE2 into existing programs that make POSIX calls. */
91
92#define REG_EXTENDED 0
93#define REG_TRACE 0 // unsupported by PCRE2
94
95/* Error values. Not all these are relevant or used by the wrapper. */
96
97
98enum
99{
100 REG_ASSERT = 1, /* internal error ? */
101 REG_BADBR, /* invalid repeat counts in {} */
102 REG_BADPAT, /* pattern error */
103 REG_BADRPT, /* ? * + invalid */
104 REG_EBRACE, /* unbalanced {} */
105 REG_EBRACK, /* unbalanced [] */
106 REG_ECOLLATE, /* collation error - not relevant */
107 REG_ECTYPE, /* bad class */
108 REG_EESCAPE, /* bad escape sequence */
109 REG_EMPTY, /* empty expression */
110 REG_EPAREN, /* unbalanced () */
111 REG_ERANGE, /* bad range inside [] */
112 REG_ESIZE, /* expression too big */
113 REG_ESPACE, /* failed to get memory */
114 REG_ESUBREG, /* bad back reference */
115 REG_INVARG, /* bad argument */
116 REG_NOMATCH /* match failed */
117};
118#endif //REG_ICASE
119
120/* The functions */
121
122// compile the regular expression
123idaman THREAD_SAFE int ida_export qregcomp(
124 struct regex_t *preg,
125 const char *pattern,
126 int cflags);
127
128// mapping from error codes returned by qregcomp() and qregexec() to a string
129idaman THREAD_SAFE size_t ida_export qregerror(
130 int errcode,
131 const struct regex_t *preg,
132 char *errbuf,
133 size_t errbuf_size);
134
135// match regex against a string
136idaman THREAD_SAFE int ida_export qregexec(
137 const struct regex_t *preg,
138 const char *str,
139 size_t nmatch,
140 struct regmatch_t pmatch[],
141 int eflags);
142
143// free any memory allocated by qregcomp
144idaman THREAD_SAFE void ida_export qregfree(struct regex_t *preg);
145
146
147#ifdef __cplusplus
148
149//-------------------------------------------------------------------------
150class refcnted_regex_t : public qrefcnt_obj_t
151{
152 regex_t regex;
153
154 refcnted_regex_t()
155 {
156 regex = {};
157 }
158 virtual ~refcnted_regex_t()
159 {
160 qregfree(&regex);
161 }
162public:
163 virtual void idaapi release(void) override
164 {
165 delete this;
166 }
167 int exec(const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
168 {
169 return qregexec(&regex, string, nmatch, pmatch, eflags);
170 }
171 int process_errors(int code, qstring *errmsg)
172 {
173 if ( code != 0 && errmsg != nullptr )
174 {
175 char errbuf[MAXSTR];
176 qregerror(code, &regex, errbuf, sizeof(errbuf));
177 *errmsg = errbuf;
178 }
179 return code;
180 }
181 static refcnted_regex_t *create(
182 const qstring &text,
183 bool case_insensitive,
184 qstring *errmsg)
185 {
186 if ( text.empty() )
187 return nullptr;
188 refcnted_regex_t *p = new refcnted_regex_t();
189 int rflags = REG_EXTENDED;
190 if ( case_insensitive )
191 rflags |= REG_ICASE;
192 int code = qregcomp(&p->regex, text.begin(), rflags);
193 if ( p->process_errors(code, errmsg) != 0 )
194 {
195 // It is unnecessary to qregfree() here: the deletion of 'p' will
196 // call qregfree (but anyway, even that is unnecessary, because
197 // if we end up here, it means qregcomp() failed, and when that
198 // happens, qregcomp() frees the regex itself.)
199 delete p;
200 p = nullptr;
201 }
202 return p;
203 }
204 size_t nsub(void)
205 {
206 /* number of parenthesized subexpressions */
207 return regex.re_nsub;
208 }
209 DECLARE_UNCOPYABLE(refcnted_regex_t);
210};
212
213//---------------------------------------------------------------------------
215{
216 bool _find_or_create(regex_ptr_t **out, const qstring &str, qstring *errbuf=nullptr)
217 {
218 regex_cache_map_t::iterator it = cache.find(str);
219 if ( it == cache.end() )
220 {
221 qstring errmsg;
222 regex_ptr_t rx = regex_ptr_t(refcnted_regex_t::create(str, false, errbuf));
223 if ( rx == nullptr )
224 return false;
225 it = cache.insert(regex_cache_map_t::value_type(str, rx)).first;
226 }
227 *out = &it->second;
228 return true;
229 }
231 {
232 regex_ptr_t *ptr;
233 qstring errbuf;
234 if ( !_find_or_create(&ptr, str, &errbuf) )
235 error("%s", errbuf.c_str());
236 return *ptr;
237 }
238
239private:
240 typedef std::map<qstring, regex_ptr_t> regex_cache_map_t;
241 regex_cache_map_t cache;
242};
243
244#endif //__cplusplus
245#endif /* !_REGEX_H_ */
bool empty(void) const
Does the qstring have 0 non-null elements?
Definition pro.h:3168
iterator begin(void)
Get a pointer to the beginning of the qstring.
Definition pro.h:3177
const qchar * c_str(void) const
Convert the qstring to a char *.
Definition pro.h:3170
qrefcnt_obj_t(void)
Constructor.
Definition pro.h:2972
Smart pointer to objects derived from qrefcnt_obj_t.
Definition pro.h:2920
static refcnted_regex_t * create(const qstring &text, bool case_insensitive, qstring *errmsg)
Definition regex.h:181
DECLARE_UNCOPYABLE(refcnted_regex_t)
virtual void idaapi release(void) override
Call destructor.
Definition regex.h:163
int process_errors(int code, qstring *errmsg)
Definition regex.h:171
int exec(const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
Definition regex.h:167
size_t nsub(void)
Definition regex.h:204
int code
Definition fpro.h:88
Defines the interface between the kernel and the UI.
This is the first header included in the IDA project.
_qstring< char > qstring
regular string
Definition pro.h:3694
@ REG_INVARG
Definition regex.h:115
@ REG_ERANGE
Definition regex.h:111
@ REG_EMPTY
Definition regex.h:109
@ REG_ESIZE
Definition regex.h:112
@ REG_ASSERT
Definition regex.h:100
@ REG_NOMATCH
Definition regex.h:116
@ REG_BADPAT
Definition regex.h:102
@ REG_ECOLLATE
Definition regex.h:106
@ REG_EBRACK
Definition regex.h:105
@ REG_ECTYPE
Definition regex.h:107
@ REG_ESPACE
Definition regex.h:113
@ REG_EESCAPE
Definition regex.h:108
@ REG_EPAREN
Definition regex.h:110
@ REG_ESUBREG
Definition regex.h:114
@ REG_BADBR
Definition regex.h:101
@ REG_BADRPT
Definition regex.h:103
@ REG_EBRACE
Definition regex.h:104
qrefcnt_t< refcnted_regex_t > regex_ptr_t
Definition regex.h:211
idaman THREAD_SAFE int ida_export qregexec(const struct regex_t *preg, const char *str, size_t nmatch, struct regmatch_t pmatch[], int eflags)
idaman THREAD_SAFE size_t ida_export qregerror(int errcode, const struct regex_t *preg, char *errbuf, size_t errbuf_size)
idaman THREAD_SAFE int ida_export qregcomp(struct regex_t *preg, const char *pattern, int cflags)
idaman THREAD_SAFE void ida_export qregfree(struct regex_t *preg)
off_t regoff_t
Definition regex.h:54
Definition regex.h:215
regex_ptr_t & find_or_create(const qstring &str)
Definition regex.h:230
bool _find_or_create(regex_ptr_t **out, const qstring &str, qstring *errbuf=nullptr)
Definition regex.h:216
Definition regex.h:58
size_t re_nsub
Definition regex.h:60
int re_magic
Definition regex.h:59
const char * re_endp
Definition regex.h:61
void * re_g
Definition regex.h:62
Definition regex.h:68
regoff_t rm_eo
Definition regex.h:70
regoff_t rm_so
Definition regex.h:69