Ruby  2.0.0p353(2013-11-22revision43784)
regenc.h
Go to the documentation of this file.
1 #ifndef ONIGURUMA_REGENC_H
2 #define ONIGURUMA_REGENC_H
3 /**********************************************************************
4  regenc.h - Onigmo (Oniguruma-mod) (regular expression library)
5 **********************************************************************/
6 /*-
7  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8  * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 #ifndef REGINT_H
33 #ifndef RUBY_EXTERN
34 #include "ruby/config.h"
35 #include "ruby/defines.h"
36 #endif
37 #ifdef ONIG_ESCAPE_UCHAR_COLLISION
38 #undef ONIG_ESCAPE_UCHAR_COLLISION
39 #endif
40 #endif
41 #include "ruby/oniguruma.h"
42 
43 #if defined __GNUC__ && __GNUC__ >= 4
44 #pragma GCC visibility push(default)
45 #endif
46 
47 typedef struct {
51 
52 
53 #ifndef NULL
54 #define NULL ((void* )0)
55 #endif
56 
57 #ifndef TRUE
58 #define TRUE 1
59 #endif
60 
61 #ifndef FALSE
62 #define FALSE 0
63 #endif
64 
65 #ifndef ARG_UNUSED
66 #if defined(__GNUC__)
67 # define ARG_UNUSED __attribute__ ((unused))
68 #else
69 # define ARG_UNUSED
70 #endif
71 #endif
72 
73 #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
74 #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
75 #define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
76 #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
77 
78 #define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
79 
80 /* character types bit flag */
81 #define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
82 #define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
83 #define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
84 #define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
85 #define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
86 #define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
87 #define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
88 #define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
89 #define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
90 #define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
91 #define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
92 #define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
93 #define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
94 #define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
95 #define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
96 
97 #define CTYPE_TO_BIT(ctype) (1<<(ctype))
98 #define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
99  ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
100  (ctype) == ONIGENC_CTYPE_PRINT)
101 
102 
103 typedef struct {
104  const UChar *name;
105  int ctype;
106  short int len;
108 
109 #define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
110 
111 #define USE_CRNL_AS_LINE_TERMINATOR
112 #define USE_UNICODE_PROPERTIES
113 /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
114 /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
115 
116 
117 #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
118 
119 /* for encoding system implementation (internal) */
122 ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
123 ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
125 ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc));
126 
127 
128 /* methods for single byte encoding */
129 ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc));
137 ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
138 
139 /* methods for multi byte encoding */
141 ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
145 ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
146 ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
149 ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
150 
151 
152 /* in enc/unicode.c */
153 ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
154 ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
155 ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
156 ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
157 ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
158 ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
159 
160 
161 #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
162 #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
163 
164 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
165  OnigEncISO_8859_1_ToLowerCaseTable[c]
166 #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
167  OnigEncISO_8859_1_ToUpperCaseTable[c]
168 
171 
172 ONIG_EXTERN int
173 onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
174 ONIG_EXTERN int
175 onigenc_with_ascii_strnicmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
177 onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
178 
179 /* defined in regexec.c, but used in enc/xxx.c */
180 extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
181 
185 ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
186 
187 #define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
188 #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
189 #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
190 #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
191  ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
192 #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
193  (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
194  ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
195 
196 /* Check if the code is in the range. (from <= code && code <= to) */
197 #define ONIGENC_IS_IN_RANGE(code, from, to) \
198  ((OnigCodePoint )((code) - (from)) <= (OnigCodePoint )((to) - (from)))
199 
200 
201 #ifdef ONIG_ENC_REGISTER
202 extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*);
203 #define OnigEncodingName(n) encoding_##n
204 #define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n)
205 #define OnigEncodingDefine(f,n) \
206  OnigEncodingDeclare(n); \
207  void Init_##f(void) { \
208  ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
209  &OnigEncodingName(n)); \
210  } \
211  OnigEncodingDeclare(n)
212 #else
213 #define OnigEncodingName(n) OnigEncoding##n
214 #define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n)
215 #define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
216 #endif
217 
218 /* macros for define replica encoding and encoding alias */
219 #define ENC_REPLICATE(name, orig)
220 #define ENC_ALIAS(name, orig)
221 #define ENC_DUMMY(name)
222 
223 #if defined __GNUC__ && __GNUC__ >= 4
224 #pragma GCC visibility pop
225 #endif
226 
227 #endif /* ONIGURUMA_REGENC_H */
unsigned int OnigCodePoint
Definition: oniguruma.h:114
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding
Definition: regenc.h:182
int onigenc_always_true_is_allowed_reverse_match(const UChar *s ARG_UNUSED, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:650
OnigCodePoint from
Definition: regenc.h:48
OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar *p, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:620
unsigned int OnigCaseFoldType
Definition: oniguruma.h:121
int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:664
int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, const UChar **pp, const UChar *end ARG_UNUSED, UChar *lower)
Definition: regenc.c:692
int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:734
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6024
int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
Definition: regenc.c:751
int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void *arg, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:410
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]
Definition: regenc.h:184
unsigned char OnigUChar
Definition: oniguruma.h:113
int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype)
Definition: regenc.c:824
Win32OLEIDispatch * p
Definition: win32ole.c:786
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]
Definition: regenc.h:185
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]
Definition: regenc.h:169
int onigenc_always_false_is_allowed_reverse_match(const UChar *s ARG_UNUSED, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:657
int onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:873
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void *arg, OnigEncoding enc))
int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void *arg)
Definition: regenc.c:463
int onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:627
const UChar * name
Definition: regenc.h:104
UChar * onigenc_single_byte_left_adjust_char_head(const UChar *start ARG_UNUSED, const UChar *s, const UChar *end, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:642
int onigenc_is_mbc_newline_0x0a(const UChar *p, const UChar *end, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:581
#define ONIG_EXTERN
Definition: oniguruma.h:98
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]
Definition: regenc.h:170
int(* OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint *to, int to_len, void *arg)
Definition: oniguruma.h:158
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]
Definition: regenc.h:183
int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype)
Definition: regenc.c:839
int ctype
Definition: regenc.h:105
unsigned int OnigCtype
Definition: oniguruma.h:115
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
int onigenc_single_byte_mbc_enc_len(const UChar *p ARG_UNUSED, const UChar *e ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:613
int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint *ranges[], OnigEncoding enc)
Definition: regenc.c:573
int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, const OnigUChar *p, const OnigUChar *end ARG_UNUSED, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:433
#define UChar
Definition: oniguruma.h:110
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
Definition: regenc.c:113
int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
Definition: regenc.c:768
#define f
int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:742
int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:633
Definition: regenc.h:103
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:854
OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:674
short int len
Definition: regenc.h:106
OnigCodePoint to
Definition: regenc.h:49
int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar *p, UChar *end)
Definition: regenc.c:791
int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar **p, const UChar *end, UChar *lower, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:591
int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, const OnigUChar *p, const OnigUChar *end, OnigCaseFoldCodeItem items[])
Definition: regenc.c:491