Ruby  2.0.0p353(2013-11-22revision43784)
string.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  string.c -
4 
5  $Author: nagachika $
6  created at: Mon Aug 9 17:12:58 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/ruby.h"
15 #include "ruby/re.h"
16 #include "ruby/encoding.h"
17 #include "vm_core.h"
18 #include "internal.h"
19 #include "probes.h"
20 #include <assert.h>
21 
22 #define BEG(no) (regs->beg[(no)])
23 #define END(no) (regs->end[(no)])
24 
25 #include <math.h>
26 #include <ctype.h>
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
33 
34 #undef rb_str_new_cstr
35 #undef rb_tainted_str_new_cstr
36 #undef rb_usascii_str_new_cstr
37 #undef rb_external_str_new_cstr
38 #undef rb_locale_str_new_cstr
39 #undef rb_str_new2
40 #undef rb_str_new3
41 #undef rb_str_new4
42 #undef rb_str_new5
43 #undef rb_tainted_str_new2
44 #undef rb_usascii_str_new2
45 #undef rb_str_dup_frozen
46 #undef rb_str_buf_new_cstr
47 #undef rb_str_buf_new2
48 #undef rb_str_buf_cat2
49 #undef rb_str_cat2
50 
51 static VALUE rb_str_clear(VALUE str);
52 
55 
56 #define RUBY_MAX_CHAR_LEN 16
57 #define STR_TMPLOCK FL_USER7
58 #define STR_NOEMBED FL_USER1
59 #define STR_SHARED FL_USER2 /* = ELTS_SHARED */
60 #define STR_ASSOC FL_USER3
61 #define STR_SHARED_P(s) FL_ALL((s), STR_NOEMBED|ELTS_SHARED)
62 #define STR_ASSOC_P(s) FL_ALL((s), STR_NOEMBED|STR_ASSOC)
63 #define STR_NOCAPA (STR_NOEMBED|ELTS_SHARED|STR_ASSOC)
64 #define STR_NOCAPA_P(s) (FL_TEST((s),STR_NOEMBED) && FL_ANY((s),ELTS_SHARED|STR_ASSOC))
65 #define STR_UNSET_NOCAPA(s) do {\
66  if (FL_TEST((s),STR_NOEMBED)) FL_UNSET((s),(ELTS_SHARED|STR_ASSOC));\
67 } while (0)
68 
69 
70 #define STR_SET_NOEMBED(str) do {\
71  FL_SET((str), STR_NOEMBED);\
72  STR_SET_EMBED_LEN((str), 0);\
73 } while (0)
74 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED)
75 #define STR_EMBED_P(str) (!FL_TEST((str), STR_NOEMBED))
76 #define STR_SET_EMBED_LEN(str, n) do { \
77  long tmp_n = (n);\
78  RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
79  RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
80 } while (0)
81 
82 #define STR_SET_LEN(str, n) do { \
83  if (STR_EMBED_P(str)) {\
84  STR_SET_EMBED_LEN((str), (n));\
85  }\
86  else {\
87  RSTRING(str)->as.heap.len = (n);\
88  }\
89 } while (0)
90 
91 #define STR_DEC_LEN(str) do {\
92  if (STR_EMBED_P(str)) {\
93  long n = RSTRING_LEN(str);\
94  n--;\
95  STR_SET_EMBED_LEN((str), n);\
96  }\
97  else {\
98  RSTRING(str)->as.heap.len--;\
99  }\
100 } while (0)
101 
102 #define RESIZE_CAPA(str,capacity) do {\
103  if (STR_EMBED_P(str)) {\
104  if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
105  char *tmp = ALLOC_N(char, (capacity)+1);\
106  memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
107  RSTRING(str)->as.heap.ptr = tmp;\
108  RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
109  STR_SET_NOEMBED(str);\
110  RSTRING(str)->as.heap.aux.capa = (capacity);\
111  }\
112  }\
113  else {\
114  REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
115  if (!STR_NOCAPA_P(str))\
116  RSTRING(str)->as.heap.aux.capa = (capacity);\
117  }\
118 } while (0)
119 
120 #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
121 #define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
122 
123 #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
124 
125 static inline int
127 {
128  rb_encoding *enc;
129 
130  /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
131  if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
132  return 1;
133 
134  enc = STR_ENC_GET(str);
135  if (rb_enc_mbmaxlen(enc) == 1)
136  return 1;
137 
138  /* Conservative. Possibly single byte.
139  * "\xa1" in Shift_JIS for example. */
140  return 0;
141 }
142 
144 
145 static inline const char *
146 search_nonascii(const char *p, const char *e)
147 {
148 #if SIZEOF_VALUE == 8
149 # define NONASCII_MASK 0x8080808080808080ULL
150 #elif SIZEOF_VALUE == 4
151 # define NONASCII_MASK 0x80808080UL
152 #endif
153 #ifdef NONASCII_MASK
154  if ((int)sizeof(VALUE) * 2 < e - p) {
155  const VALUE *s, *t;
156  const VALUE lowbits = sizeof(VALUE) - 1;
157  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
158  while (p < (const char *)s) {
159  if (!ISASCII(*p))
160  return p;
161  p++;
162  }
163  t = (const VALUE*)(~lowbits & (VALUE)e);
164  while (s < t) {
165  if (*s & NONASCII_MASK) {
166  t = s;
167  break;
168  }
169  s++;
170  }
171  p = (const char *)t;
172  }
173 #endif
174  while (p < e) {
175  if (!ISASCII(*p))
176  return p;
177  p++;
178  }
179  return NULL;
180 }
181 
182 static int
183 coderange_scan(const char *p, long len, rb_encoding *enc)
184 {
185  const char *e = p + len;
186 
187  if (rb_enc_to_index(enc) == 0) {
188  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
189  p = search_nonascii(p, e);
191  }
192 
193  if (rb_enc_asciicompat(enc)) {
194  p = search_nonascii(p, e);
195  if (!p) {
196  return ENC_CODERANGE_7BIT;
197  }
198  while (p < e) {
199  int ret = rb_enc_precise_mbclen(p, e, enc);
200  if (!MBCLEN_CHARFOUND_P(ret)) {
201  return ENC_CODERANGE_BROKEN;
202  }
203  p += MBCLEN_CHARFOUND_LEN(ret);
204  if (p < e) {
205  p = search_nonascii(p, e);
206  if (!p) {
207  return ENC_CODERANGE_VALID;
208  }
209  }
210  }
211  if (e < p) {
212  return ENC_CODERANGE_BROKEN;
213  }
214  return ENC_CODERANGE_VALID;
215  }
216 
217  while (p < e) {
218  int ret = rb_enc_precise_mbclen(p, e, enc);
219 
220  if (!MBCLEN_CHARFOUND_P(ret)) {
221  return ENC_CODERANGE_BROKEN;
222  }
223  p += MBCLEN_CHARFOUND_LEN(ret);
224  }
225  if (e < p) {
226  return ENC_CODERANGE_BROKEN;
227  }
228  return ENC_CODERANGE_VALID;
229 }
230 
231 long
232 rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
233 {
234  const char *p = s;
235 
236  if (*cr == ENC_CODERANGE_BROKEN)
237  return e - s;
238 
239  if (rb_enc_to_index(enc) == 0) {
240  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
241  p = search_nonascii(p, e);
243  return e - s;
244  }
245  else if (rb_enc_asciicompat(enc)) {
246  p = search_nonascii(p, e);
247  if (!p) {
248  if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
249  return e - s;
250  }
251  while (p < e) {
252  int ret = rb_enc_precise_mbclen(p, e, enc);
253  if (!MBCLEN_CHARFOUND_P(ret)) {
255  return p - s;
256  }
257  p += MBCLEN_CHARFOUND_LEN(ret);
258  if (p < e) {
259  p = search_nonascii(p, e);
260  if (!p) {
261  *cr = ENC_CODERANGE_VALID;
262  return e - s;
263  }
264  }
265  }
267  return p - s;
268  }
269  else {
270  while (p < e) {
271  int ret = rb_enc_precise_mbclen(p, e, enc);
272  if (!MBCLEN_CHARFOUND_P(ret)) {
274  return p - s;
275  }
276  p += MBCLEN_CHARFOUND_LEN(ret);
277  }
279  return p - s;
280  }
281 }
282 
283 static inline void
285 {
286  rb_enc_set_index(str1, ENCODING_GET(str2));
287 }
288 
289 static void
291 {
292  /* this function is designed for copying encoding and coderange
293  * from src to new string "dest" which is made from the part of src.
294  */
295  str_enc_copy(dest, src);
296  if (RSTRING_LEN(dest) == 0) {
297  if (!rb_enc_asciicompat(STR_ENC_GET(src)))
299  else
301  return;
302  }
303  switch (ENC_CODERANGE(src)) {
304  case ENC_CODERANGE_7BIT:
306  break;
307  case ENC_CODERANGE_VALID:
308  if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
311  else
313  break;
314  default:
315  break;
316  }
317 }
318 
319 static void
321 {
322  str_enc_copy(dest, src);
323  ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
324 }
325 
326 int
328 {
329  int cr = ENC_CODERANGE(str);
330 
331  if (cr == ENC_CODERANGE_UNKNOWN) {
332  rb_encoding *enc = STR_ENC_GET(str);
333  cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
334  ENC_CODERANGE_SET(str, cr);
335  }
336  return cr;
337 }
338 
339 int
341 {
342  rb_encoding *enc = STR_ENC_GET(str);
343 
344  if (!rb_enc_asciicompat(enc))
345  return FALSE;
346  else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
347  return TRUE;
348  return FALSE;
349 }
350 
351 static inline void
352 str_mod_check(VALUE s, const char *p, long len)
353 {
354  if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){
355  rb_raise(rb_eRuntimeError, "string modified");
356  }
357 }
358 
359 size_t
361 {
362  if (STR_EMBED_P(str)) {
363  return RSTRING_EMBED_LEN_MAX;
364  }
365  else if (STR_NOCAPA_P(str)) {
366  return RSTRING(str)->as.heap.len;
367  }
368  else {
369  return RSTRING(str)->as.heap.aux.capa;
370  }
371 }
372 
373 static inline VALUE
375 {
376  NEWOBJ_OF(str, struct RString, klass, T_STRING);
377 
378  str->as.heap.ptr = 0;
379  str->as.heap.len = 0;
380  str->as.heap.aux.capa = 0;
381 
382  return (VALUE)str;
383 }
384 
385 static inline VALUE
387 {
390  }
391  return str_alloc(klass);
392 }
393 
394 static VALUE
395 str_new(VALUE klass, const char *ptr, long len)
396 {
397  VALUE str;
398 
399  if (len < 0) {
400  rb_raise(rb_eArgError, "negative string size (or size too big)");
401  }
402 
405  }
406 
407  str = str_alloc(klass);
408  if (len > RSTRING_EMBED_LEN_MAX) {
409  RSTRING(str)->as.heap.aux.capa = len;
410  RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1);
411  STR_SET_NOEMBED(str);
412  }
413  else if (len == 0) {
415  }
416  if (ptr) {
417  memcpy(RSTRING_PTR(str), ptr, len);
418  }
419  STR_SET_LEN(str, len);
420  RSTRING_PTR(str)[len] = '\0';
421  return str;
422 }
423 
424 VALUE
425 rb_str_new(const char *ptr, long len)
426 {
427  return str_new(rb_cString, ptr, len);
428 }
429 
430 VALUE
431 rb_usascii_str_new(const char *ptr, long len)
432 {
433  VALUE str = rb_str_new(ptr, len);
435  return str;
436 }
437 
438 VALUE
439 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
440 {
441  VALUE str = rb_str_new(ptr, len);
442  rb_enc_associate(str, enc);
443  return str;
444 }
445 
446 VALUE
447 rb_str_new_cstr(const char *ptr)
448 {
449  if (!ptr) {
450  rb_raise(rb_eArgError, "NULL pointer given");
451  }
452  return rb_str_new(ptr, strlen(ptr));
453 }
454 
456 #define rb_str_new2 rb_str_new_cstr
457 
458 VALUE
459 rb_usascii_str_new_cstr(const char *ptr)
460 {
461  VALUE str = rb_str_new2(ptr);
463  return str;
464 }
465 
467 #define rb_usascii_str_new2 rb_usascii_str_new_cstr
468 
469 VALUE
470 rb_tainted_str_new(const char *ptr, long len)
471 {
472  VALUE str = rb_str_new(ptr, len);
473 
474  OBJ_TAINT(str);
475  return str;
476 }
477 
478 VALUE
479 rb_tainted_str_new_cstr(const char *ptr)
480 {
481  VALUE str = rb_str_new2(ptr);
482 
483  OBJ_TAINT(str);
484  return str;
485 }
486 
488 #define rb_tainted_str_new2 rb_tainted_str_new_cstr
489 
490 VALUE
491 rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
492 {
494  rb_econv_t *ec;
495  rb_econv_result_t ret;
496  long len, olen;
497  VALUE econv_wrapper;
498  VALUE newstr;
499  const unsigned char *start, *sp;
500  unsigned char *dest, *dp;
501  size_t converted_output = 0;
502 
503  if (!to) return str;
504  if (!from) from = rb_enc_get(str);
505  if (from == to) return str;
506  if ((rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
507  to == rb_ascii8bit_encoding()) {
508  if (STR_ENC_GET(str) != to) {
509  str = rb_str_dup(str);
510  rb_enc_associate(str, to);
511  }
512  return str;
513  }
514 
515  len = RSTRING_LEN(str);
516  newstr = rb_str_new(0, len);
517  olen = len;
518 
519  econv_wrapper = rb_obj_alloc(rb_cEncodingConverter);
520  RBASIC(econv_wrapper)->klass = 0;
521  ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts);
522  if (!ec) return str;
523  DATA_PTR(econv_wrapper) = ec;
524 
525  sp = (unsigned char*)RSTRING_PTR(str);
526  start = sp;
527  while ((dest = (unsigned char*)RSTRING_PTR(newstr)),
528  (dp = dest + converted_output),
529  (ret = rb_econv_convert(ec, &sp, start + len, &dp, dest + olen, 0)),
531  /* destination buffer short */
532  size_t converted_input = sp - start;
533  size_t rest = len - converted_input;
534  converted_output = dp - dest;
535  rb_str_set_len(newstr, converted_output);
536  if (converted_input && converted_output &&
537  rest < (LONG_MAX / converted_output)) {
538  rest = (rest * converted_output) / converted_input;
539  }
540  else {
541  rest = olen;
542  }
543  olen += rest < 2 ? 2 : rest;
544  rb_str_resize(newstr, olen);
545  }
546  DATA_PTR(econv_wrapper) = 0;
547  rb_econv_close(ec);
548  rb_gc_force_recycle(econv_wrapper);
549  switch (ret) {
550  case econv_finished:
551  len = dp - (unsigned char*)RSTRING_PTR(newstr);
552  rb_str_set_len(newstr, len);
553  rb_enc_associate(newstr, to);
554  return newstr;
555 
556  default:
557  /* some error, return original */
558  return str;
559  }
560 }
561 
562 VALUE
564 {
565  return rb_str_conv_enc_opts(str, from, to, 0, Qnil);
566 }
567 
568 VALUE
569 rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
570 {
571  VALUE str;
572 
573  str = rb_tainted_str_new(ptr, len);
574  if (eenc == rb_usascii_encoding() &&
577  return str;
578  }
579  rb_enc_associate(str, eenc);
580  return rb_str_conv_enc(str, eenc, rb_default_internal_encoding());
581 }
582 
583 VALUE
584 rb_external_str_new(const char *ptr, long len)
585 {
587 }
588 
589 VALUE
590 rb_external_str_new_cstr(const char *ptr)
591 {
593 }
594 
595 VALUE
596 rb_locale_str_new(const char *ptr, long len)
597 {
599 }
600 
601 VALUE
602 rb_locale_str_new_cstr(const char *ptr)
603 {
605 }
606 
607 VALUE
608 rb_filesystem_str_new(const char *ptr, long len)
609 {
611 }
612 
613 VALUE
615 {
617 }
618 
619 VALUE
621 {
623 }
624 
625 VALUE
627 {
628  return rb_str_conv_enc(str, STR_ENC_GET(str), rb_locale_encoding());
629 }
630 
631 VALUE
633 {
634  return rb_str_conv_enc(str, STR_ENC_GET(str), enc);
635 }
636 
637 static VALUE
639 {
640  if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) {
641  STR_SET_EMBED(str2);
642  memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
643  STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
644  }
645  else {
646  str = rb_str_new_frozen(str);
647  FL_SET(str2, STR_NOEMBED);
648  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
649  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
650  RSTRING(str2)->as.heap.aux.shared = str;
651  FL_SET(str2, ELTS_SHARED);
652  }
653  return str2;
654 }
655 
656 static VALUE
658 {
660  rb_enc_cr_str_exact_copy(str2, str);
661  return str2;
662 }
663 
664 static VALUE
666 {
667  return str_replace_shared(str_alloc(klass), str);
668 }
669 
670 static VALUE
671 str_new3(VALUE klass, VALUE str)
672 {
673  return str_new_shared(klass, str);
674 }
675 
676 VALUE
678 {
679  VALUE str2 = str_new3(rb_obj_class(str), str);
680 
681  OBJ_INFECT(str2, str);
682  return str2;
683 }
684 
686 #define rb_str_new3 rb_str_new_shared
687 
688 static VALUE
689 str_new4(VALUE klass, VALUE str)
690 {
691  VALUE str2;
692 
693  str2 = str_alloc(klass);
694  STR_SET_NOEMBED(str2);
695  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
696  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
697  if (STR_SHARED_P(str)) {
698  VALUE shared = RSTRING(str)->as.heap.aux.shared;
699  assert(OBJ_FROZEN(shared));
700  FL_SET(str2, ELTS_SHARED);
701  RSTRING(str2)->as.heap.aux.shared = shared;
702  }
703  else {
704  FL_SET(str, ELTS_SHARED);
705  RSTRING(str)->as.heap.aux.shared = str2;
706  }
707  rb_enc_cr_str_exact_copy(str2, str);
708  OBJ_INFECT(str2, str);
709  return str2;
710 }
711 
712 VALUE
714 {
715  VALUE klass, str;
716 
717  if (OBJ_FROZEN(orig)) return orig;
718  klass = rb_obj_class(orig);
719  if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) {
720  long ofs;
721  assert(OBJ_FROZEN(str));
722  ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
723  if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
724  ((RBASIC(str)->flags ^ RBASIC(orig)->flags) & (FL_TAINT|FL_UNTRUSTED)) ||
725  ENCODING_GET(str) != ENCODING_GET(orig)) {
726  str = str_new3(klass, str);
727  RSTRING(str)->as.heap.ptr += ofs;
728  RSTRING(str)->as.heap.len -= ofs;
729  rb_enc_cr_str_exact_copy(str, orig);
730  OBJ_INFECT(str, orig);
731  }
732  }
733  else if (STR_EMBED_P(orig)) {
734  str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
735  rb_enc_cr_str_exact_copy(str, orig);
736  OBJ_INFECT(str, orig);
737  }
738  else if (STR_ASSOC_P(orig)) {
739  VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
740  FL_UNSET(orig, STR_ASSOC);
741  str = str_new4(klass, orig);
742  FL_SET(str, STR_ASSOC);
743  RSTRING(str)->as.heap.aux.shared = assoc;
744  }
745  else {
746  str = str_new4(klass, orig);
747  }
748  OBJ_FREEZE(str);
749  return str;
750 }
751 
753 #define rb_str_new4 rb_str_new_frozen
754 
755 VALUE
756 rb_str_new_with_class(VALUE obj, const char *ptr, long len)
757 {
758  return str_new(rb_obj_class(obj), ptr, len);
759 }
760 
761 RUBY_ALIAS_FUNCTION(rb_str_new5(VALUE obj, const char *ptr, long len),
762  rb_str_new_with_class, (obj, ptr, len))
763 #define rb_str_new5 rb_str_new_with_class
764 
765 static VALUE
766 str_new_empty(VALUE str)
767 {
768  VALUE v = rb_str_new5(str, 0, 0);
769  rb_enc_copy(v, str);
770  OBJ_INFECT(v, str);
771  return v;
772 }
773 
774 #define STR_BUF_MIN_SIZE 128
775 
776 VALUE
777 rb_str_buf_new(long capa)
778 {
779  VALUE str = str_alloc(rb_cString);
780 
781  if (capa < STR_BUF_MIN_SIZE) {
782  capa = STR_BUF_MIN_SIZE;
783  }
784  FL_SET(str, STR_NOEMBED);
785  RSTRING(str)->as.heap.aux.capa = capa;
786  RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
787  RSTRING(str)->as.heap.ptr[0] = '\0';
788 
789  return str;
790 }
791 
792 VALUE
793 rb_str_buf_new_cstr(const char *ptr)
794 {
795  VALUE str;
796  long len = strlen(ptr);
797 
798  str = rb_str_buf_new(len);
799  rb_str_buf_cat(str, ptr, len);
800 
801  return str;
802 }
803 
805 #define rb_str_buf_new2 rb_str_buf_new_cstr
806 
807 VALUE
808 rb_str_tmp_new(long len)
809 {
810  return str_new(0, 0, len);
811 }
812 
813 void *
814 rb_alloc_tmp_buffer(volatile VALUE *store, long len)
815 {
816  VALUE s = rb_str_tmp_new(len);
817  *store = s;
818  return RSTRING_PTR(s);
819 }
820 
821 void
822 rb_free_tmp_buffer(volatile VALUE *store)
823 {
824  VALUE s = *store;
825  *store = 0;
826  if (s) rb_str_clear(s);
827 }
828 
829 void
831 {
832  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
833  xfree(RSTRING(str)->as.heap.ptr);
834  }
835 }
836 
837 RUBY_FUNC_EXPORTED size_t
839 {
840  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
841  return RSTRING(str)->as.heap.aux.capa;
842  }
843  else {
844  return 0;
845  }
846 }
847 
848 VALUE
850 {
851  return rb_convert_type(str, T_STRING, "String", "to_str");
852 }
853 
854 static inline void str_discard(VALUE str);
855 
856 void
858 {
859  rb_encoding *enc;
860  int cr;
861  if (str == str2) return;
862  enc = STR_ENC_GET(str2);
863  cr = ENC_CODERANGE(str2);
864  str_discard(str);
865  OBJ_INFECT(str, str2);
866  if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
867  STR_SET_EMBED(str);
868  memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
869  STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
870  rb_enc_associate(str, enc);
871  ENC_CODERANGE_SET(str, cr);
872  return;
873  }
874  STR_SET_NOEMBED(str);
875  STR_UNSET_NOCAPA(str);
876  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
877  RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
878  if (STR_NOCAPA_P(str2)) {
879  FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
880  RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared;
881  }
882  else {
883  RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
884  }
885  STR_SET_EMBED(str2); /* abandon str2 */
886  RSTRING_PTR(str2)[0] = 0;
887  STR_SET_EMBED_LEN(str2, 0);
888  rb_enc_associate(str, enc);
889  ENC_CODERANGE_SET(str, cr);
890 }
891 
892 static ID id_to_s;
893 
894 VALUE
896 {
897  VALUE str;
898 
899  if (RB_TYPE_P(obj, T_STRING)) {
900  return obj;
901  }
902  str = rb_funcall(obj, id_to_s, 0);
903  if (!RB_TYPE_P(str, T_STRING))
904  return rb_any_to_s(obj);
905  if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
906  return str;
907 }
908 
909 static VALUE
911 {
912  long len;
913 
914  len = RSTRING_LEN(str2);
915  if (STR_ASSOC_P(str2)) {
916  str2 = rb_str_new4(str2);
917  }
918  if (STR_SHARED_P(str2)) {
919  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
920  assert(OBJ_FROZEN(shared));
921  STR_SET_NOEMBED(str);
922  RSTRING(str)->as.heap.len = len;
923  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
924  FL_SET(str, ELTS_SHARED);
925  FL_UNSET(str, STR_ASSOC);
926  RSTRING(str)->as.heap.aux.shared = shared;
927  }
928  else {
929  str_replace_shared(str, str2);
930  }
931 
932  OBJ_INFECT(str, str2);
933  rb_enc_cr_str_exact_copy(str, str2);
934  return str;
935 }
936 
937 static VALUE
939 {
940  VALUE dup = str_alloc(klass);
941  str_replace(dup, str);
942  return dup;
943 }
944 
945 VALUE
947 {
948  return str_duplicate(rb_obj_class(str), str);
949 }
950 
951 VALUE
953 {
957  }
958  return str_replace(str_alloc(rb_cString), str);
959 }
960 
961 /*
962  * call-seq:
963  * String.new(str="") -> new_str
964  *
965  * Returns a new string object containing a copy of <i>str</i>.
966  */
967 
968 static VALUE
970 {
971  VALUE orig;
972 
973  if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1)
974  rb_str_replace(str, orig);
975  return str;
976 }
977 
978 static inline long
979 enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
980 {
981  long c;
982  const char *q;
983 
984  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
985  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
986  }
987  else if (rb_enc_asciicompat(enc)) {
988  c = 0;
989  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
990  while (p < e) {
991  if (ISASCII(*p)) {
992  q = search_nonascii(p, e);
993  if (!q)
994  return c + (e - p);
995  c += q - p;
996  p = q;
997  }
998  p += rb_enc_fast_mbclen(p, e, enc);
999  c++;
1000  }
1001  }
1002  else {
1003  while (p < e) {
1004  if (ISASCII(*p)) {
1005  q = search_nonascii(p, e);
1006  if (!q)
1007  return c + (e - p);
1008  c += q - p;
1009  p = q;
1010  }
1011  p += rb_enc_mbclen(p, e, enc);
1012  c++;
1013  }
1014  }
1015  return c;
1016  }
1017 
1018  for (c=0; p<e; c++) {
1019  p += rb_enc_mbclen(p, e, enc);
1020  }
1021  return c;
1022 }
1023 
1024 long
1025 rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
1026 {
1027  return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
1028 }
1029 
1030 long
1031 rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
1032 {
1033  long c;
1034  const char *q;
1035  int ret;
1036 
1037  *cr = 0;
1038  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1039  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1040  }
1041  else if (rb_enc_asciicompat(enc)) {
1042  c = 0;
1043  while (p < e) {
1044  if (ISASCII(*p)) {
1045  q = search_nonascii(p, e);
1046  if (!q) {
1047  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1048  return c + (e - p);
1049  }
1050  c += q - p;
1051  p = q;
1052  }
1053  ret = rb_enc_precise_mbclen(p, e, enc);
1054  if (MBCLEN_CHARFOUND_P(ret)) {
1055  *cr |= ENC_CODERANGE_VALID;
1056  p += MBCLEN_CHARFOUND_LEN(ret);
1057  }
1058  else {
1059  *cr = ENC_CODERANGE_BROKEN;
1060  p++;
1061  }
1062  c++;
1063  }
1064  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1065  return c;
1066  }
1067 
1068  for (c=0; p<e; c++) {
1069  ret = rb_enc_precise_mbclen(p, e, enc);
1070  if (MBCLEN_CHARFOUND_P(ret)) {
1071  *cr |= ENC_CODERANGE_VALID;
1072  p += MBCLEN_CHARFOUND_LEN(ret);
1073  }
1074  else {
1075  *cr = ENC_CODERANGE_BROKEN;
1076  if (p + rb_enc_mbminlen(enc) <= e)
1077  p += rb_enc_mbminlen(enc);
1078  else
1079  p = e;
1080  }
1081  }
1082  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1083  return c;
1084 }
1085 
1086 #ifdef NONASCII_MASK
1087 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1088 
1089 /*
1090  * UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
1091  * bit represention. (see http://en.wikipedia.org/wiki/UTF-8)
1092  * Therefore, following pseudo code can detect UTF-8 leading byte.
1093  *
1094  * if (!(byte & 0x80))
1095  * byte |= 0x40; // turn on bit6
1096  * return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
1097  *
1098  * This function calculate every bytes in the argument word `s'
1099  * using the above logic concurrently. and gather every bytes result.
1100  */
1101 static inline VALUE
1102 count_utf8_lead_bytes_with_word(const VALUE *s)
1103 {
1104  VALUE d = *s;
1105 
1106  /* Transform into bit0 represent UTF-8 leading or not. */
1107  d |= ~(d>>1);
1108  d >>= 6;
1109  d &= NONASCII_MASK >> 7;
1110 
1111  /* Gather every bytes. */
1112  d += (d>>8);
1113  d += (d>>16);
1114 #if SIZEOF_VALUE == 8
1115  d += (d>>32);
1116 #endif
1117  return (d&0xF);
1118 }
1119 #endif
1120 
1121 static long
1123 {
1124  const char *p, *e;
1125  long n;
1126  int cr;
1127 
1128  if (single_byte_optimizable(str)) return RSTRING_LEN(str);
1129  if (!enc) enc = STR_ENC_GET(str);
1130  p = RSTRING_PTR(str);
1131  e = RSTRING_END(str);
1132  cr = ENC_CODERANGE(str);
1133 #ifdef NONASCII_MASK
1134  if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1135  enc == rb_utf8_encoding()) {
1136 
1137  VALUE len = 0;
1138  if ((int)sizeof(VALUE) * 2 < e - p) {
1139  const VALUE *s, *t;
1140  const VALUE lowbits = sizeof(VALUE) - 1;
1141  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1142  t = (const VALUE*)(~lowbits & (VALUE)e);
1143  while (p < (const char *)s) {
1144  if (is_utf8_lead_byte(*p)) len++;
1145  p++;
1146  }
1147  while (s < t) {
1148  len += count_utf8_lead_bytes_with_word(s);
1149  s++;
1150  }
1151  p = (const char *)s;
1152  }
1153  while (p < e) {
1154  if (is_utf8_lead_byte(*p)) len++;
1155  p++;
1156  }
1157  return (long)len;
1158  }
1159 #endif
1160  n = rb_enc_strlen_cr(p, e, enc, &cr);
1161  if (cr) {
1162  ENC_CODERANGE_SET(str, cr);
1163  }
1164  return n;
1165 }
1166 
1167 long
1169 {
1170  return str_strlen(str, STR_ENC_GET(str));
1171 }
1172 
1173 /*
1174  * call-seq:
1175  * str.length -> integer
1176  * str.size -> integer
1177  *
1178  * Returns the character length of <i>str</i>.
1179  */
1180 
1181 VALUE
1183 {
1184  long len;
1185 
1186  len = str_strlen(str, STR_ENC_GET(str));
1187  return LONG2NUM(len);
1188 }
1189 
1190 /*
1191  * call-seq:
1192  * str.bytesize -> integer
1193  *
1194  * Returns the length of +str+ in bytes.
1195  *
1196  * "\x80\u3042".bytesize #=> 4
1197  * "hello".bytesize #=> 5
1198  */
1199 
1200 static VALUE
1202 {
1203  return LONG2NUM(RSTRING_LEN(str));
1204 }
1205 
1206 /*
1207  * call-seq:
1208  * str.empty? -> true or false
1209  *
1210  * Returns <code>true</code> if <i>str</i> has a length of zero.
1211  *
1212  * "hello".empty? #=> false
1213  * " ".empty? #=> false
1214  * "".empty? #=> true
1215  */
1216 
1217 static VALUE
1219 {
1220  if (RSTRING_LEN(str) == 0)
1221  return Qtrue;
1222  return Qfalse;
1223 }
1224 
1225 /*
1226  * call-seq:
1227  * str + other_str -> new_str
1228  *
1229  * Concatenation---Returns a new <code>String</code> containing
1230  * <i>other_str</i> concatenated to <i>str</i>.
1231  *
1232  * "Hello from " + self.to_s #=> "Hello from main"
1233  */
1234 
1235 VALUE
1237 {
1238  VALUE str3;
1239  rb_encoding *enc;
1240 
1241  StringValue(str2);
1242  enc = rb_enc_check(str1, str2);
1243  str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
1244  memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
1245  memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
1246  RSTRING_PTR(str2), RSTRING_LEN(str2));
1247  RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
1248 
1249  if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
1250  OBJ_TAINT(str3);
1253  return str3;
1254 }
1255 
1256 /*
1257  * call-seq:
1258  * str * integer -> new_str
1259  *
1260  * Copy --- Returns a new String containing +integer+ copies of the receiver.
1261  * +integer+ must be greater than or equal to 0.
1262  *
1263  * "Ho! " * 3 #=> "Ho! Ho! Ho! "
1264  * "Ho! " * 0 #=> ""
1265  */
1266 
1267 VALUE
1269 {
1270  VALUE str2;
1271  long n, len;
1272  char *ptr2;
1273 
1274  len = NUM2LONG(times);
1275  if (len < 0) {
1276  rb_raise(rb_eArgError, "negative argument");
1277  }
1278  if (len && LONG_MAX/len < RSTRING_LEN(str)) {
1279  rb_raise(rb_eArgError, "argument too big");
1280  }
1281 
1282  str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
1283  ptr2 = RSTRING_PTR(str2);
1284  if (len) {
1285  n = RSTRING_LEN(str);
1286  memcpy(ptr2, RSTRING_PTR(str), n);
1287  while (n <= len/2) {
1288  memcpy(ptr2 + n, ptr2, n);
1289  n *= 2;
1290  }
1291  memcpy(ptr2 + n, ptr2, len-n);
1292  }
1293  ptr2[RSTRING_LEN(str2)] = '\0';
1294  OBJ_INFECT(str2, str);
1295  rb_enc_cr_str_copy_for_substr(str2, str);
1296 
1297  return str2;
1298 }
1299 
1300 /*
1301  * call-seq:
1302  * str % arg -> new_str
1303  *
1304  * Format---Uses <i>str</i> as a format specification, and returns the result
1305  * of applying it to <i>arg</i>. If the format specification contains more than
1306  * one substitution, then <i>arg</i> must be an <code>Array</code> or <code>Hash</code>
1307  * containing the values to be substituted. See <code>Kernel::sprintf</code> for
1308  * details of the format string.
1309  *
1310  * "%05d" % 123 #=> "00123"
1311  * "%-5s: %08x" % [ "ID", self.object_id ] #=> "ID : 200e14d6"
1312  * "foo = %{foo}" % { :foo => 'bar' } #=> "foo = bar"
1313  */
1314 
1315 static VALUE
1317 {
1318  volatile VALUE tmp = rb_check_array_type(arg);
1319 
1320  if (!NIL_P(tmp)) {
1321  return rb_str_format(RARRAY_LENINT(tmp), RARRAY_PTR(tmp), str);
1322  }
1323  return rb_str_format(1, &arg, str);
1324 }
1325 
1326 static inline void
1328 {
1329  if (FL_TEST(str, STR_TMPLOCK)) {
1330  rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1331  }
1332  rb_check_frozen(str);
1333  if (!OBJ_UNTRUSTED(str) && rb_safe_level() >= 4)
1334  rb_raise(rb_eSecurityError, "Insecure: can't modify string");
1335 }
1336 
1337 static inline int
1339 {
1340  str_modifiable(str);
1341  if (!STR_SHARED_P(str)) return 1;
1342  if (STR_EMBED_P(str)) return 1;
1343  return 0;
1344 }
1345 
1346 static void
1348 {
1349  char *ptr;
1350  long len = RSTRING_LEN(str);
1351  long capa = len + expand;
1352 
1353  if (len > capa) len = capa;
1354  ptr = ALLOC_N(char, capa + 1);
1355  if (RSTRING_PTR(str)) {
1356  memcpy(ptr, RSTRING_PTR(str), len);
1357  }
1358  STR_SET_NOEMBED(str);
1359  STR_UNSET_NOCAPA(str);
1360  ptr[len] = 0;
1361  RSTRING(str)->as.heap.ptr = ptr;
1362  RSTRING(str)->as.heap.len = len;
1363  RSTRING(str)->as.heap.aux.capa = capa;
1364 }
1365 
1366 #define str_make_independent(str) str_make_independent_expand((str), 0L)
1367 
1368 void
1370 {
1371  if (!str_independent(str))
1372  str_make_independent(str);
1373  ENC_CODERANGE_CLEAR(str);
1374 }
1375 
1376 void
1377 rb_str_modify_expand(VALUE str, long expand)
1378 {
1379  if (expand < 0) {
1380  rb_raise(rb_eArgError, "negative expanding string size");
1381  }
1382  if (!str_independent(str)) {
1383  str_make_independent_expand(str, expand);
1384  }
1385  else if (expand > 0) {
1386  long len = RSTRING_LEN(str);
1387  long capa = len + expand;
1388  if (!STR_EMBED_P(str)) {
1389  REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
1390  RSTRING(str)->as.heap.aux.capa = capa;
1391  }
1392  else if (capa > RSTRING_EMBED_LEN_MAX) {
1393  str_make_independent_expand(str, expand);
1394  }
1395  }
1396  ENC_CODERANGE_CLEAR(str);
1397 }
1398 
1399 /* As rb_str_modify(), but don't clear coderange */
1400 static void
1402 {
1403  if (!str_independent(str))
1404  str_make_independent(str);
1405  if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
1406  /* Force re-scan later */
1407  ENC_CODERANGE_CLEAR(str);
1408 }
1409 
1410 static inline void
1412 {
1413  str_modifiable(str);
1414  if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) {
1415  xfree(RSTRING_PTR(str));
1416  RSTRING(str)->as.heap.ptr = 0;
1417  RSTRING(str)->as.heap.len = 0;
1418  }
1419 }
1420 
1421 void
1423 {
1424  /* sanity check */
1425  rb_check_frozen(str);
1426  if (STR_ASSOC_P(str)) {
1427  /* already associated */
1428  rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1429  }
1430  else {
1431  if (STR_SHARED_P(str)) {
1432  VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1433  str_make_independent(str);
1434  if (STR_ASSOC_P(assoc)) {
1435  assoc = RSTRING(assoc)->as.heap.aux.shared;
1436  rb_ary_concat(assoc, add);
1437  add = assoc;
1438  }
1439  }
1440  else if (STR_EMBED_P(str)) {
1441  str_make_independent(str);
1442  }
1443  else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1444  RESIZE_CAPA(str, RSTRING_LEN(str));
1445  }
1446  FL_SET(str, STR_ASSOC);
1447  RBASIC(add)->klass = 0;
1448  RSTRING(str)->as.heap.aux.shared = add;
1449  }
1450 }
1451 
1452 VALUE
1454 {
1455  if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared;
1456  if (STR_ASSOC_P(str)) {
1457  return RSTRING(str)->as.heap.aux.shared;
1458  }
1459  return Qfalse;
1460 }
1461 
1462 void
1464 {
1465  rb_encoding *enc = rb_enc_get(str);
1466  if (!rb_enc_asciicompat(enc)) {
1467  rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1468  }
1469 }
1470 
1471 VALUE
1472 rb_string_value(volatile VALUE *ptr)
1473 {
1474  VALUE s = *ptr;
1475  if (!RB_TYPE_P(s, T_STRING)) {
1476  s = rb_str_to_str(s);
1477  *ptr = s;
1478  }
1479  return s;
1480 }
1481 
1482 char *
1484 {
1485  VALUE str = rb_string_value(ptr);
1486  return RSTRING_PTR(str);
1487 }
1488 
1489 char *
1491 {
1492  VALUE str = rb_string_value(ptr);
1493  char *s = RSTRING_PTR(str);
1494  long len = RSTRING_LEN(str);
1495 
1496  if (!s || memchr(s, 0, len)) {
1497  rb_raise(rb_eArgError, "string contains null byte");
1498  }
1499  if (s[len]) {
1500  rb_str_modify(str);
1501  s = RSTRING_PTR(str);
1502  s[RSTRING_LEN(str)] = 0;
1503  }
1504  return s;
1505 }
1506 
1507 VALUE
1509 {
1510  str = rb_check_convert_type(str, T_STRING, "String", "to_str");
1511  return str;
1512 }
1513 
1514 /*
1515  * call-seq:
1516  * String.try_convert(obj) -> string or nil
1517  *
1518  * Try to convert <i>obj</i> into a String, using to_str method.
1519  * Returns converted string or nil if <i>obj</i> cannot be converted
1520  * for any reason.
1521  *
1522  * String.try_convert("str") #=> "str"
1523  * String.try_convert(/re/) #=> nil
1524  */
1525 static VALUE
1527 {
1528  return rb_check_string_type(str);
1529 }
1530 
1531 static char*
1532 str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
1533 {
1534  long nth = *nthp;
1535  if (rb_enc_mbmaxlen(enc) == 1) {
1536  p += nth;
1537  }
1538  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1539  p += nth * rb_enc_mbmaxlen(enc);
1540  }
1541  else if (rb_enc_asciicompat(enc)) {
1542  const char *p2, *e2;
1543  int n;
1544 
1545  while (p < e && 0 < nth) {
1546  e2 = p + nth;
1547  if (e < e2) {
1548  *nthp = nth;
1549  return (char *)e;
1550  }
1551  if (ISASCII(*p)) {
1552  p2 = search_nonascii(p, e2);
1553  if (!p2) {
1554  nth -= e2 - p;
1555  *nthp = nth;
1556  return (char *)e2;
1557  }
1558  nth -= p2 - p;
1559  p = p2;
1560  }
1561  n = rb_enc_mbclen(p, e, enc);
1562  p += n;
1563  nth--;
1564  }
1565  *nthp = nth;
1566  if (nth != 0) {
1567  return (char *)e;
1568  }
1569  return (char *)p;
1570  }
1571  else {
1572  while (p < e && nth--) {
1573  p += rb_enc_mbclen(p, e, enc);
1574  }
1575  }
1576  if (p > e) p = e;
1577  *nthp = nth;
1578  return (char*)p;
1579 }
1580 
1581 char*
1582 rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
1583 {
1584  return str_nth_len(p, e, &nth, enc);
1585 }
1586 
1587 static char*
1588 str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1589 {
1590  if (singlebyte)
1591  p += nth;
1592  else {
1593  p = str_nth_len(p, e, &nth, enc);
1594  }
1595  if (!p) return 0;
1596  if (p > e) p = e;
1597  return (char *)p;
1598 }
1599 
1600 /* char offset to byte offset */
1601 static long
1602 str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1603 {
1604  const char *pp = str_nth(p, e, nth, enc, singlebyte);
1605  if (!pp) return e - p;
1606  return pp - p;
1607 }
1608 
1609 long
1610 rb_str_offset(VALUE str, long pos)
1611 {
1612  return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
1614 }
1615 
1616 #ifdef NONASCII_MASK
1617 static char *
1618 str_utf8_nth(const char *p, const char *e, long *nthp)
1619 {
1620  long nth = *nthp;
1621  if ((int)SIZEOF_VALUE * 2 < e - p && (int)SIZEOF_VALUE * 2 < nth) {
1622  const VALUE *s, *t;
1623  const VALUE lowbits = sizeof(VALUE) - 1;
1624  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1625  t = (const VALUE*)(~lowbits & (VALUE)e);
1626  while (p < (const char *)s) {
1627  if (is_utf8_lead_byte(*p)) nth--;
1628  p++;
1629  }
1630  do {
1631  nth -= count_utf8_lead_bytes_with_word(s);
1632  s++;
1633  } while (s < t && (int)sizeof(VALUE) <= nth);
1634  p = (char *)s;
1635  }
1636  while (p < e) {
1637  if (is_utf8_lead_byte(*p)) {
1638  if (nth == 0) break;
1639  nth--;
1640  }
1641  p++;
1642  }
1643  *nthp = nth;
1644  return (char *)p;
1645 }
1646 
1647 static long
1648 str_utf8_offset(const char *p, const char *e, long nth)
1649 {
1650  const char *pp = str_utf8_nth(p, e, &nth);
1651  return pp - p;
1652 }
1653 #endif
1654 
1655 /* byte offset to char offset */
1656 long
1657 rb_str_sublen(VALUE str, long pos)
1658 {
1659  if (single_byte_optimizable(str) || pos < 0)
1660  return pos;
1661  else {
1662  char *p = RSTRING_PTR(str);
1663  return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
1664  }
1665 }
1666 
1667 VALUE
1668 rb_str_subseq(VALUE str, long beg, long len)
1669 {
1670  VALUE str2;
1671 
1672  if (RSTRING_LEN(str) == beg + len &&
1673  RSTRING_EMBED_LEN_MAX < len) {
1674  str2 = rb_str_new_shared(rb_str_new_frozen(str));
1675  rb_str_drop_bytes(str2, beg);
1676  }
1677  else {
1678  str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
1679  RB_GC_GUARD(str);
1680  }
1681 
1682  rb_enc_cr_str_copy_for_substr(str2, str);
1683  OBJ_INFECT(str2, str);
1684 
1685  return str2;
1686 }
1687 
1688 static char *
1689 rb_str_subpos(VALUE str, long beg, long *lenp)
1690 {
1691  long len = *lenp;
1692  long slen = -1L;
1693  long blen = RSTRING_LEN(str);
1694  rb_encoding *enc = STR_ENC_GET(str);
1695  char *p, *s = RSTRING_PTR(str), *e = s + blen;
1696 
1697  if (len < 0) return 0;
1698  if (!blen) {
1699  len = 0;
1700  }
1701  if (single_byte_optimizable(str)) {
1702  if (beg > blen) return 0;
1703  if (beg < 0) {
1704  beg += blen;
1705  if (beg < 0) return 0;
1706  }
1707  if (beg + len > blen)
1708  len = blen - beg;
1709  if (len < 0) return 0;
1710  p = s + beg;
1711  goto end;
1712  }
1713  if (beg < 0) {
1714  if (len > -beg) len = -beg;
1715  if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1716  beg = -beg;
1717  while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1718  p = e;
1719  if (!p) return 0;
1720  while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1721  if (!p) return 0;
1722  len = e - p;
1723  goto end;
1724  }
1725  else {
1726  slen = str_strlen(str, enc);
1727  beg += slen;
1728  if (beg < 0) return 0;
1729  p = s + beg;
1730  if (len == 0) goto end;
1731  }
1732  }
1733  else if (beg > 0 && beg > RSTRING_LEN(str)) {
1734  return 0;
1735  }
1736  if (len == 0) {
1737  if (beg > str_strlen(str, enc)) return 0;
1738  p = s + beg;
1739  }
1740 #ifdef NONASCII_MASK
1741  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1742  enc == rb_utf8_encoding()) {
1743  p = str_utf8_nth(s, e, &beg);
1744  if (beg > 0) return 0;
1745  len = str_utf8_offset(p, e, len);
1746  }
1747 #endif
1748  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1749  int char_sz = rb_enc_mbmaxlen(enc);
1750 
1751  p = s + beg * char_sz;
1752  if (p > e) {
1753  return 0;
1754  }
1755  else if (len * char_sz > e - p)
1756  len = e - p;
1757  else
1758  len *= char_sz;
1759  }
1760  else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
1761  if (beg > 0) return 0;
1762  len = 0;
1763  }
1764  else {
1765  len = str_offset(p, e, len, enc, 0);
1766  }
1767  end:
1768  *lenp = len;
1769  RB_GC_GUARD(str);
1770  return p;
1771 }
1772 
1773 VALUE
1774 rb_str_substr(VALUE str, long beg, long len)
1775 {
1776  VALUE str2;
1777  char *p = rb_str_subpos(str, beg, &len);
1778 
1779  if (!p) return Qnil;
1780  if (len > RSTRING_EMBED_LEN_MAX && p + len == RSTRING_END(str)) {
1781  str2 = rb_str_new4(str);
1782  str2 = str_new3(rb_obj_class(str2), str2);
1783  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1784  RSTRING(str2)->as.heap.len = len;
1785  }
1786  else {
1787  str2 = rb_str_new5(str, p, len);
1788  rb_enc_cr_str_copy_for_substr(str2, str);
1789  OBJ_INFECT(str2, str);
1790  RB_GC_GUARD(str);
1791  }
1792 
1793  return str2;
1794 }
1795 
1796 VALUE
1798 {
1799  if (STR_ASSOC_P(str)) {
1800  VALUE ary = RSTRING(str)->as.heap.aux.shared;
1801  OBJ_FREEZE(ary);
1802  }
1803  return rb_obj_freeze(str);
1804 }
1805 
1807 #define rb_str_dup_frozen rb_str_new_frozen
1808 
1809 VALUE
1810 rb_str_locktmp(VALUE str)
1811 {
1812  if (FL_TEST(str, STR_TMPLOCK)) {
1813  rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1814  }
1815  FL_SET(str, STR_TMPLOCK);
1816  return str;
1817 }
1818 
1819 VALUE
1821 {
1822  if (!FL_TEST(str, STR_TMPLOCK)) {
1823  rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1824  }
1825  FL_UNSET(str, STR_TMPLOCK);
1826  return str;
1827 }
1828 
1829 VALUE
1831 {
1832  rb_str_locktmp(str);
1833  return rb_ensure(func, arg, rb_str_unlocktmp, str);
1834 }
1835 
1836 void
1837 rb_str_set_len(VALUE str, long len)
1838 {
1839  long capa;
1840 
1841  str_modifiable(str);
1842  if (STR_SHARED_P(str)) {
1843  rb_raise(rb_eRuntimeError, "can't set length of shared string");
1844  }
1845  if (len > (capa = (long)rb_str_capacity(str))) {
1846  rb_bug("probable buffer overflow: %ld for %ld", len, capa);
1847  }
1848  STR_SET_LEN(str, len);
1849  RSTRING_PTR(str)[len] = '\0';
1850 }
1851 
1852 VALUE
1853 rb_str_resize(VALUE str, long len)
1854 {
1855  long slen;
1856  int independent;
1857 
1858  if (len < 0) {
1859  rb_raise(rb_eArgError, "negative string size (or size too big)");
1860  }
1861 
1862  independent = str_independent(str);
1863  ENC_CODERANGE_CLEAR(str);
1864  slen = RSTRING_LEN(str);
1865  if (len != slen) {
1866  if (STR_EMBED_P(str)) {
1867  if (len <= RSTRING_EMBED_LEN_MAX) {
1868  STR_SET_EMBED_LEN(str, len);
1869  RSTRING(str)->as.ary[len] = '\0';
1870  return str;
1871  }
1872  str_make_independent_expand(str, len - slen);
1873  STR_SET_NOEMBED(str);
1874  }
1875  else if (len <= RSTRING_EMBED_LEN_MAX) {
1876  char *ptr = RSTRING(str)->as.heap.ptr;
1877  STR_SET_EMBED(str);
1878  if (slen > len) slen = len;
1879  if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
1880  RSTRING(str)->as.ary[len] = '\0';
1881  STR_SET_EMBED_LEN(str, len);
1882  if (independent) xfree(ptr);
1883  return str;
1884  }
1885  else if (!independent) {
1886  str_make_independent_expand(str, len - slen);
1887  }
1888  else if (slen < len || slen - len > 1024) {
1889  REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1);
1890  }
1891  if (!STR_NOCAPA_P(str)) {
1892  RSTRING(str)->as.heap.aux.capa = len;
1893  }
1894  RSTRING(str)->as.heap.len = len;
1895  RSTRING(str)->as.heap.ptr[len] = '\0'; /* sentinel */
1896  }
1897  return str;
1898 }
1899 
1900 static VALUE
1901 str_buf_cat(VALUE str, const char *ptr, long len)
1902 {
1903  long capa, total, off = -1;
1904 
1905  if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
1906  off = ptr - RSTRING_PTR(str);
1907  }
1908  rb_str_modify(str);
1909  if (len == 0) return 0;
1910  if (STR_ASSOC_P(str)) {
1911  FL_UNSET(str, STR_ASSOC);
1912  capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
1913  }
1914  else if (STR_EMBED_P(str)) {
1915  capa = RSTRING_EMBED_LEN_MAX;
1916  }
1917  else {
1918  capa = RSTRING(str)->as.heap.aux.capa;
1919  }
1920  if (RSTRING_LEN(str) >= LONG_MAX - len) {
1921  rb_raise(rb_eArgError, "string sizes too big");
1922  }
1923  total = RSTRING_LEN(str)+len;
1924  if (capa <= total) {
1925  while (total > capa) {
1926  if (capa + 1 >= LONG_MAX / 2) {
1927  capa = (total + 4095) / 4096;
1928  break;
1929  }
1930  capa = (capa + 1) * 2;
1931  }
1932  RESIZE_CAPA(str, capa);
1933  }
1934  if (off != -1) {
1935  ptr = RSTRING_PTR(str) + off;
1936  }
1937  memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len);
1938  STR_SET_LEN(str, total);
1939  RSTRING_PTR(str)[total] = '\0'; /* sentinel */
1940 
1941  return str;
1942 }
1943 
1944 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
1945 
1946 VALUE
1947 rb_str_buf_cat(VALUE str, const char *ptr, long len)
1948 {
1949  if (len == 0) return str;
1950  if (len < 0) {
1951  rb_raise(rb_eArgError, "negative string size (or size too big)");
1952  }
1953  return str_buf_cat(str, ptr, len);
1954 }
1955 
1956 VALUE
1957 rb_str_buf_cat2(VALUE str, const char *ptr)
1958 {
1959  return rb_str_buf_cat(str, ptr, strlen(ptr));
1960 }
1961 
1962 VALUE
1963 rb_str_cat(VALUE str, const char *ptr, long len)
1964 {
1965  if (len < 0) {
1966  rb_raise(rb_eArgError, "negative string size (or size too big)");
1967  }
1968  if (STR_ASSOC_P(str)) {
1969  char *p;
1970  rb_str_modify_expand(str, len);
1971  p = RSTRING(str)->as.heap.ptr;
1972  memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
1973  len = RSTRING(str)->as.heap.len += len;
1974  p[len] = '\0'; /* sentinel */
1975  return str;
1976  }
1977 
1978  return rb_str_buf_cat(str, ptr, len);
1979 }
1980 
1981 VALUE
1982 rb_str_cat2(VALUE str, const char *ptr)
1983 {
1984  return rb_str_cat(str, ptr, strlen(ptr));
1985 }
1986 
1987 static VALUE
1988 rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
1989  int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
1990 {
1991  int str_encindex = ENCODING_GET(str);
1992  int res_encindex;
1993  int str_cr, res_cr;
1994 
1995  str_cr = ENC_CODERANGE(str);
1996 
1997  if (str_encindex == ptr_encindex) {
1998  if (str_cr == ENC_CODERANGE_UNKNOWN)
1999  ptr_cr = ENC_CODERANGE_UNKNOWN;
2000  else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2001  ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
2002  }
2003  }
2004  else {
2005  rb_encoding *str_enc = rb_enc_from_index(str_encindex);
2006  rb_encoding *ptr_enc = rb_enc_from_index(ptr_encindex);
2007  if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
2008  if (len == 0)
2009  return str;
2010  if (RSTRING_LEN(str) == 0) {
2011  rb_str_buf_cat(str, ptr, len);
2012  ENCODING_CODERANGE_SET(str, ptr_encindex, ptr_cr);
2013  return str;
2014  }
2015  goto incompatible;
2016  }
2017  if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2018  ptr_cr = coderange_scan(ptr, len, ptr_enc);
2019  }
2020  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2021  if (ENCODING_IS_ASCII8BIT(str) || ptr_cr != ENC_CODERANGE_7BIT) {
2022  str_cr = rb_enc_str_coderange(str);
2023  }
2024  }
2025  }
2026  if (ptr_cr_ret)
2027  *ptr_cr_ret = ptr_cr;
2028 
2029  if (str_encindex != ptr_encindex &&
2030  str_cr != ENC_CODERANGE_7BIT &&
2031  ptr_cr != ENC_CODERANGE_7BIT) {
2032  incompatible:
2033  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
2034  rb_enc_name(rb_enc_from_index(str_encindex)),
2035  rb_enc_name(rb_enc_from_index(ptr_encindex)));
2036  }
2037 
2038  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2039  res_encindex = str_encindex;
2040  res_cr = ENC_CODERANGE_UNKNOWN;
2041  }
2042  else if (str_cr == ENC_CODERANGE_7BIT) {
2043  if (ptr_cr == ENC_CODERANGE_7BIT) {
2044  res_encindex = str_encindex;
2045  res_cr = ENC_CODERANGE_7BIT;
2046  }
2047  else {
2048  res_encindex = ptr_encindex;
2049  res_cr = ptr_cr;
2050  }
2051  }
2052  else if (str_cr == ENC_CODERANGE_VALID) {
2053  res_encindex = str_encindex;
2054  if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2055  res_cr = str_cr;
2056  else
2057  res_cr = ptr_cr;
2058  }
2059  else { /* str_cr == ENC_CODERANGE_BROKEN */
2060  res_encindex = str_encindex;
2061  res_cr = str_cr;
2062  if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
2063  }
2064 
2065  if (len < 0) {
2066  rb_raise(rb_eArgError, "negative string size (or size too big)");
2067  }
2068  str_buf_cat(str, ptr, len);
2069  ENCODING_CODERANGE_SET(str, res_encindex, res_cr);
2070  return str;
2071 }
2072 
2073 VALUE
2074 rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
2075 {
2076  return rb_enc_cr_str_buf_cat(str, ptr, len,
2078 }
2079 
2080 VALUE
2081 rb_str_buf_cat_ascii(VALUE str, const char *ptr)
2082 {
2083  /* ptr must reference NUL terminated ASCII string. */
2084  int encindex = ENCODING_GET(str);
2085  rb_encoding *enc = rb_enc_from_index(encindex);
2086  if (rb_enc_asciicompat(enc)) {
2087  return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
2088  encindex, ENC_CODERANGE_7BIT, 0);
2089  }
2090  else {
2091  char *buf = ALLOCA_N(char, rb_enc_mbmaxlen(enc));
2092  while (*ptr) {
2093  unsigned int c = (unsigned char)*ptr;
2094  int len = rb_enc_codelen(c, enc);
2095  rb_enc_mbcput(c, buf, enc);
2096  rb_enc_cr_str_buf_cat(str, buf, len,
2097  encindex, ENC_CODERANGE_VALID, 0);
2098  ptr++;
2099  }
2100  return str;
2101  }
2102 }
2103 
2104 VALUE
2106 {
2107  int str2_cr;
2108 
2109  str2_cr = ENC_CODERANGE(str2);
2110 
2111  rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
2112  ENCODING_GET(str2), str2_cr, &str2_cr);
2113 
2114  OBJ_INFECT(str, str2);
2115  ENC_CODERANGE_SET(str2, str2_cr);
2116 
2117  return str;
2118 }
2119 
2120 VALUE
2122 {
2123  rb_encoding *enc;
2124  int cr, cr2;
2125  long len2;
2126 
2127  StringValue(str2);
2128  if ((len2 = RSTRING_LEN(str2)) > 0 && STR_ASSOC_P(str)) {
2129  long len = RSTRING_LEN(str) + len2;
2130  enc = rb_enc_check(str, str2);
2131  cr = ENC_CODERANGE(str);
2132  if ((cr2 = ENC_CODERANGE(str2)) > cr) cr = cr2;
2133  rb_str_modify_expand(str, len2);
2134  memcpy(RSTRING(str)->as.heap.ptr + RSTRING(str)->as.heap.len,
2135  RSTRING_PTR(str2), len2+1);
2136  RSTRING(str)->as.heap.len = len;
2137  rb_enc_associate(str, enc);
2138  ENC_CODERANGE_SET(str, cr);
2139  OBJ_INFECT(str, str2);
2140  return str;
2141  }
2142  return rb_str_buf_append(str, str2);
2143 }
2144 
2145 /*
2146  * call-seq:
2147  * str << integer -> str
2148  * str.concat(integer) -> str
2149  * str << obj -> str
2150  * str.concat(obj) -> str
2151  *
2152  * Append---Concatenates the given object to <i>str</i>. If the object is a
2153  * <code>Integer</code>, it is considered as a codepoint, and is converted
2154  * to a character before concatenation.
2155  *
2156  * a = "hello "
2157  * a << "world" #=> "hello world"
2158  * a.concat(33) #=> "hello world!"
2159  */
2160 
2161 VALUE
2163 {
2164  unsigned int code;
2165  rb_encoding *enc = STR_ENC_GET(str1);
2166 
2167  if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
2168  if (rb_num_to_uint(str2, &code) == 0) {
2169  }
2170  else if (FIXNUM_P(str2)) {
2171  rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2172  }
2173  else {
2174  rb_raise(rb_eRangeError, "bignum out of char range");
2175  }
2176  }
2177  else {
2178  return rb_str_append(str1, str2);
2179  }
2180 
2181  if (enc == rb_usascii_encoding()) {
2182  /* US-ASCII automatically extended to ASCII-8BIT */
2183  char buf[1];
2184  buf[0] = (char)code;
2185  if (code > 0xFF) {
2186  rb_raise(rb_eRangeError, "%u out of char range", code);
2187  }
2188  rb_str_cat(str1, buf, 1);
2189  if (code > 127) {
2192  }
2193  }
2194  else {
2195  long pos = RSTRING_LEN(str1);
2196  int cr = ENC_CODERANGE(str1);
2197  int len;
2198  char *buf;
2199 
2200  switch (len = rb_enc_codelen(code, enc)) {
2202  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2203  break;
2205  case 0:
2206  rb_raise(rb_eRangeError, "%u out of char range", code);
2207  break;
2208  }
2209  buf = ALLOCA_N(char, len + 1);
2210  rb_enc_mbcput(code, buf, enc);
2211  if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
2212  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2213  }
2214  rb_str_resize(str1, pos+len);
2215  memcpy(RSTRING_PTR(str1) + pos, buf, len);
2216  if (cr == ENC_CODERANGE_7BIT && code > 127)
2217  cr = ENC_CODERANGE_VALID;
2218  ENC_CODERANGE_SET(str1, cr);
2219  }
2220  return str1;
2221 }
2222 
2223 /*
2224  * call-seq:
2225  * str.prepend(other_str) -> str
2226  *
2227  * Prepend---Prepend the given string to <i>str</i>.
2228  *
2229  * a = "world"
2230  * a.prepend("hello ") #=> "hello world"
2231  * a #=> "hello world"
2232  */
2233 
2234 static VALUE
2236 {
2237  StringValue(str2);
2238  StringValue(str);
2239  rb_str_update(str, 0L, 0L, str2);
2240  return str;
2241 }
2242 
2243 st_index_t
2245 {
2246  int e = ENCODING_GET(str);
2247  if (e && rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
2248  e = 0;
2249  }
2250  return rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
2251 }
2252 
2253 int
2255 {
2256  long len;
2257 
2258  if (!rb_str_comparable(str1, str2)) return 1;
2259  if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
2260  memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
2261  return 0;
2262  }
2263  return 1;
2264 }
2265 
2266 /*
2267  * call-seq:
2268  * str.hash -> fixnum
2269  *
2270  * Return a hash based on the string's length and content.
2271  */
2272 
2273 static VALUE
2275 {
2276  st_index_t hval = rb_str_hash(str);
2277  return INT2FIX(hval);
2278 }
2279 
2280 #define lesser(a,b) (((a)>(b))?(b):(a))
2281 
2282 int
2284 {
2285  int idx1, idx2;
2286  int rc1, rc2;
2287 
2288  if (RSTRING_LEN(str1) == 0) return TRUE;
2289  if (RSTRING_LEN(str2) == 0) return TRUE;
2290  idx1 = ENCODING_GET(str1);
2291  idx2 = ENCODING_GET(str2);
2292  if (idx1 == idx2) return TRUE;
2293  rc1 = rb_enc_str_coderange(str1);
2294  rc2 = rb_enc_str_coderange(str2);
2295  if (rc1 == ENC_CODERANGE_7BIT) {
2296  if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
2298  return TRUE;
2299  }
2300  if (rc2 == ENC_CODERANGE_7BIT) {
2302  return TRUE;
2303  }
2304  return FALSE;
2305 }
2306 
2307 int
2309 {
2310  long len1, len2;
2311  const char *ptr1, *ptr2;
2312  int retval;
2313 
2314  if (str1 == str2) return 0;
2315  RSTRING_GETMEM(str1, ptr1, len1);
2316  RSTRING_GETMEM(str2, ptr2, len2);
2317  if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
2318  if (len1 == len2) {
2319  if (!rb_str_comparable(str1, str2)) {
2320  if (ENCODING_GET(str1) > ENCODING_GET(str2))
2321  return 1;
2322  return -1;
2323  }
2324  return 0;
2325  }
2326  if (len1 > len2) return 1;
2327  return -1;
2328  }
2329  if (retval > 0) return 1;
2330  return -1;
2331 }
2332 
2333 /* expect tail call optimization */
2334 static VALUE
2335 str_eql(const VALUE str1, const VALUE str2)
2336 {
2337  const long len = RSTRING_LEN(str1);
2338  const char *ptr1, *ptr2;
2339 
2340  if (len != RSTRING_LEN(str2)) return Qfalse;
2341  if (!rb_str_comparable(str1, str2)) return Qfalse;
2342  if ((ptr1 = RSTRING_PTR(str1)) == (ptr2 = RSTRING_PTR(str2)))
2343  return Qtrue;
2344  if (memcmp(ptr1, ptr2, len) == 0)
2345  return Qtrue;
2346  return Qfalse;
2347 }
2348 
2349 /*
2350  * call-seq:
2351  * str == obj -> true or false
2352  *
2353  * Equality---If <i>obj</i> is not a <code>String</code>, returns
2354  * <code>false</code>. Otherwise, returns <code>true</code> if <i>str</i>
2355  * <code><=></code> <i>obj</i> returns zero.
2356  */
2357 
2358 VALUE
2360 {
2361  if (str1 == str2) return Qtrue;
2362  if (!RB_TYPE_P(str2, T_STRING)) {
2363  if (!rb_respond_to(str2, rb_intern("to_str"))) {
2364  return Qfalse;
2365  }
2366  return rb_equal(str2, str1);
2367  }
2368  return str_eql(str1, str2);
2369 }
2370 
2371 /*
2372  * call-seq:
2373  * str.eql?(other) -> true or false
2374  *
2375  * Two strings are equal if they have the same length and content.
2376  */
2377 
2378 static VALUE
2380 {
2381  if (str1 == str2) return Qtrue;
2382  if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
2383  return str_eql(str1, str2);
2384 }
2385 
2386 /*
2387  * call-seq:
2388  * string <=> other_string -> -1, 0, +1 or nil
2389  *
2390  *
2391  * Comparison---Returns -1, 0, +1 or nil depending on whether +string+ is less
2392  * than, equal to, or greater than +other_string+.
2393  *
2394  * +nil+ is returned if the two values are incomparable.
2395  *
2396  * If the strings are of different lengths, and the strings are equal when
2397  * compared up to the shortest length, then the longer string is considered
2398  * greater than the shorter one.
2399  *
2400  * <code><=></code> is the basis for the methods <code><</code>,
2401  * <code><=</code>, <code>></code>, <code>>=</code>, and
2402  * <code>between?</code>, included from module Comparable. The method
2403  * String#== does not use Comparable#==.
2404  *
2405  * "abcdef" <=> "abcde" #=> 1
2406  * "abcdef" <=> "abcdef" #=> 0
2407  * "abcdef" <=> "abcdefg" #=> -1
2408  * "abcdef" <=> "ABCDEF" #=> 1
2409  */
2410 
2411 static VALUE
2413 {
2414  int result;
2415 
2416  if (!RB_TYPE_P(str2, T_STRING)) {
2417  VALUE tmp = rb_check_funcall(str2, rb_intern("to_str"), 0, 0);
2418  if (RB_TYPE_P(tmp, T_STRING)) {
2419  result = rb_str_cmp(str1, tmp);
2420  }
2421  else {
2422  return rb_invcmp(str1, str2);
2423  }
2424  }
2425  else {
2426  result = rb_str_cmp(str1, str2);
2427  }
2428  return INT2FIX(result);
2429 }
2430 
2431 /*
2432  * call-seq:
2433  * str.casecmp(other_str) -> -1, 0, +1 or nil
2434  *
2435  * Case-insensitive version of <code>String#<=></code>.
2436  *
2437  * "abcdef".casecmp("abcde") #=> 1
2438  * "aBcDeF".casecmp("abcdef") #=> 0
2439  * "abcdef".casecmp("abcdefg") #=> -1
2440  * "abcdef".casecmp("ABCDEF") #=> 0
2441  */
2442 
2443 static VALUE
2445 {
2446  long len;
2447  rb_encoding *enc;
2448  char *p1, *p1end, *p2, *p2end;
2449 
2450  StringValue(str2);
2451  enc = rb_enc_compatible(str1, str2);
2452  if (!enc) {
2453  return Qnil;
2454  }
2455 
2456  p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
2457  p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
2458  if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
2459  while (p1 < p1end && p2 < p2end) {
2460  if (*p1 != *p2) {
2461  unsigned int c1 = TOUPPER(*p1 & 0xff);
2462  unsigned int c2 = TOUPPER(*p2 & 0xff);
2463  if (c1 != c2)
2464  return INT2FIX(c1 < c2 ? -1 : 1);
2465  }
2466  p1++;
2467  p2++;
2468  }
2469  }
2470  else {
2471  while (p1 < p1end && p2 < p2end) {
2472  int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
2473  int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
2474 
2475  if (0 <= c1 && 0 <= c2) {
2476  c1 = TOUPPER(c1);
2477  c2 = TOUPPER(c2);
2478  if (c1 != c2)
2479  return INT2FIX(c1 < c2 ? -1 : 1);
2480  }
2481  else {
2482  int r;
2483  l1 = rb_enc_mbclen(p1, p1end, enc);
2484  l2 = rb_enc_mbclen(p2, p2end, enc);
2485  len = l1 < l2 ? l1 : l2;
2486  r = memcmp(p1, p2, len);
2487  if (r != 0)
2488  return INT2FIX(r < 0 ? -1 : 1);
2489  if (l1 != l2)
2490  return INT2FIX(l1 < l2 ? -1 : 1);
2491  }
2492  p1 += l1;
2493  p2 += l2;
2494  }
2495  }
2496  if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
2497  if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
2498  return INT2FIX(-1);
2499 }
2500 
2501 static long
2502 rb_str_index(VALUE str, VALUE sub, long offset)
2503 {
2504  long pos;
2505  char *s, *sptr, *e;
2506  long len, slen;
2507  rb_encoding *enc;
2508 
2509  enc = rb_enc_check(str, sub);
2510  if (is_broken_string(sub)) {
2511  return -1;
2512  }
2513  len = str_strlen(str, enc);
2514  slen = str_strlen(sub, enc);
2515  if (offset < 0) {
2516  offset += len;
2517  if (offset < 0) return -1;
2518  }
2519  if (len - offset < slen) return -1;
2520  s = RSTRING_PTR(str);
2521  e = s + RSTRING_LEN(str);
2522  if (offset) {
2523  offset = str_offset(s, RSTRING_END(str), offset, enc, single_byte_optimizable(str));
2524  s += offset;
2525  }
2526  if (slen == 0) return offset;
2527  /* need proceed one character at a time */
2528  sptr = RSTRING_PTR(sub);
2529  slen = RSTRING_LEN(sub);
2530  len = RSTRING_LEN(str) - offset;
2531  for (;;) {
2532  char *t;
2533  pos = rb_memsearch(sptr, slen, s, len, enc);
2534  if (pos < 0) return pos;
2535  t = rb_enc_right_char_head(s, s+pos, e, enc);
2536  if (t == s + pos) break;
2537  if ((len -= t - s) <= 0) return -1;
2538  offset += t - s;
2539  s = t;
2540  }
2541  return pos + offset;
2542 }
2543 
2544 
2545 /*
2546  * call-seq:
2547  * str.index(substring [, offset]) -> fixnum or nil
2548  * str.index(regexp [, offset]) -> fixnum or nil
2549  *
2550  * Returns the index of the first occurrence of the given <i>substring</i> or
2551  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2552  * found. If the second parameter is present, it specifies the position in the
2553  * string to begin the search.
2554  *
2555  * "hello".index('e') #=> 1
2556  * "hello".index('lo') #=> 3
2557  * "hello".index('a') #=> nil
2558  * "hello".index(?e) #=> 1
2559  * "hello".index(/[aeiou]/, -3) #=> 4
2560  */
2561 
2562 static VALUE
2564 {
2565  VALUE sub;
2566  VALUE initpos;
2567  long pos;
2568 
2569  if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
2570  pos = NUM2LONG(initpos);
2571  }
2572  else {
2573  pos = 0;
2574  }
2575  if (pos < 0) {
2576  pos += str_strlen(str, STR_ENC_GET(str));
2577  if (pos < 0) {
2578  if (RB_TYPE_P(sub, T_REGEXP)) {
2580  }
2581  return Qnil;
2582  }
2583  }
2584 
2585  if (SPECIAL_CONST_P(sub)) goto generic;
2586  switch (BUILTIN_TYPE(sub)) {
2587  case T_REGEXP:
2588  if (pos > str_strlen(str, STR_ENC_GET(str)))
2589  return Qnil;
2590  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2591  rb_enc_check(str, sub), single_byte_optimizable(str));
2592 
2593  pos = rb_reg_search(sub, str, pos, 0);
2594  pos = rb_str_sublen(str, pos);
2595  break;
2596 
2597  generic:
2598  default: {
2599  VALUE tmp;
2600 
2601  tmp = rb_check_string_type(sub);
2602  if (NIL_P(tmp)) {
2603  rb_raise(rb_eTypeError, "type mismatch: %s given",
2604  rb_obj_classname(sub));
2605  }
2606  sub = tmp;
2607  }
2608  /* fall through */
2609  case T_STRING:
2610  pos = rb_str_index(str, sub, pos);
2611  pos = rb_str_sublen(str, pos);
2612  break;
2613  }
2614 
2615  if (pos == -1) return Qnil;
2616  return LONG2NUM(pos);
2617 }
2618 
2619 static long
2620 rb_str_rindex(VALUE str, VALUE sub, long pos)
2621 {
2622  long len, slen;
2623  char *s, *sbeg, *e, *t;
2624  rb_encoding *enc;
2625  int singlebyte = single_byte_optimizable(str);
2626 
2627  enc = rb_enc_check(str, sub);
2628  if (is_broken_string(sub)) {
2629  return -1;
2630  }
2631  len = str_strlen(str, enc);
2632  slen = str_strlen(sub, enc);
2633  /* substring longer than string */
2634  if (len < slen) return -1;
2635  if (len - pos < slen) {
2636  pos = len - slen;
2637  }
2638  if (len == 0) {
2639  return pos;
2640  }
2641  sbeg = RSTRING_PTR(str);
2642  e = RSTRING_END(str);
2643  t = RSTRING_PTR(sub);
2644  slen = RSTRING_LEN(sub);
2645  s = str_nth(sbeg, e, pos, enc, singlebyte);
2646  while (s) {
2647  if (memcmp(s, t, slen) == 0) {
2648  return pos;
2649  }
2650  if (pos == 0) break;
2651  pos--;
2652  s = rb_enc_prev_char(sbeg, s, e, enc);
2653  }
2654  return -1;
2655 }
2656 
2657 
2658 /*
2659  * call-seq:
2660  * str.rindex(substring [, fixnum]) -> fixnum or nil
2661  * str.rindex(regexp [, fixnum]) -> fixnum or nil
2662  *
2663  * Returns the index of the last occurrence of the given <i>substring</i> or
2664  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2665  * found. If the second parameter is present, it specifies the position in the
2666  * string to end the search---characters beyond this point will not be
2667  * considered.
2668  *
2669  * "hello".rindex('e') #=> 1
2670  * "hello".rindex('l') #=> 3
2671  * "hello".rindex('a') #=> nil
2672  * "hello".rindex(?e) #=> 1
2673  * "hello".rindex(/[aeiou]/, -2) #=> 1
2674  */
2675 
2676 static VALUE
2678 {
2679  VALUE sub;
2680  VALUE vpos;
2681  rb_encoding *enc = STR_ENC_GET(str);
2682  long pos, len = str_strlen(str, enc);
2683 
2684  if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
2685  pos = NUM2LONG(vpos);
2686  if (pos < 0) {
2687  pos += len;
2688  if (pos < 0) {
2689  if (RB_TYPE_P(sub, T_REGEXP)) {
2691  }
2692  return Qnil;
2693  }
2694  }
2695  if (pos > len) pos = len;
2696  }
2697  else {
2698  pos = len;
2699  }
2700 
2701  if (SPECIAL_CONST_P(sub)) goto generic;
2702  switch (BUILTIN_TYPE(sub)) {
2703  case T_REGEXP:
2704  /* enc = rb_get_check(str, sub); */
2705  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2707 
2708  if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
2709  pos = rb_reg_search(sub, str, pos, 1);
2710  pos = rb_str_sublen(str, pos);
2711  }
2712  if (pos >= 0) return LONG2NUM(pos);
2713  break;
2714 
2715  generic:
2716  default: {
2717  VALUE tmp;
2718 
2719  tmp = rb_check_string_type(sub);
2720  if (NIL_P(tmp)) {
2721  rb_raise(rb_eTypeError, "type mismatch: %s given",
2722  rb_obj_classname(sub));
2723  }
2724  sub = tmp;
2725  }
2726  /* fall through */
2727  case T_STRING:
2728  pos = rb_str_rindex(str, sub, pos);
2729  if (pos >= 0) return LONG2NUM(pos);
2730  break;
2731  }
2732  return Qnil;
2733 }
2734 
2735 /*
2736  * call-seq:
2737  * str =~ obj -> fixnum or nil
2738  *
2739  * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
2740  * against <i>str</i>,and returns the position the match starts, or
2741  * <code>nil</code> if there is no match. Otherwise, invokes
2742  * <i>obj.=~</i>, passing <i>str</i> as an argument. The default
2743  * <code>=~</code> in <code>Object</code> returns <code>nil</code>.
2744  *
2745  * Note: <code>str =~ regexp</code> is not the same as
2746  * <code>regexp =~ str</code>. Strings captured from named capture groups
2747  * are assigned to local variables only in the second case.
2748  *
2749  * "cat o' 9 tails" =~ /\d/ #=> 7
2750  * "cat o' 9 tails" =~ 9 #=> nil
2751  */
2752 
2753 static VALUE
2755 {
2756  if (SPECIAL_CONST_P(y)) goto generic;
2757  switch (BUILTIN_TYPE(y)) {
2758  case T_STRING:
2759  rb_raise(rb_eTypeError, "type mismatch: String given");
2760 
2761  case T_REGEXP:
2762  return rb_reg_match(y, x);
2763 
2764  generic:
2765  default:
2766  return rb_funcall(y, rb_intern("=~"), 1, x);
2767  }
2768 }
2769 
2770 
2771 static VALUE get_pat(VALUE, int);
2772 
2773 
2774 /*
2775  * call-seq:
2776  * str.match(pattern) -> matchdata or nil
2777  * str.match(pattern, pos) -> matchdata or nil
2778  *
2779  * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
2780  * then invokes its <code>match</code> method on <i>str</i>. If the second
2781  * parameter is present, it specifies the position in the string to begin the
2782  * search.
2783  *
2784  * 'hello'.match('(.)\1') #=> #<MatchData "ll" 1:"l">
2785  * 'hello'.match('(.)\1')[0] #=> "ll"
2786  * 'hello'.match(/(.)\1/)[0] #=> "ll"
2787  * 'hello'.match('xx') #=> nil
2788  *
2789  * If a block is given, invoke the block with MatchData if match succeed, so
2790  * that you can write
2791  *
2792  * str.match(pat) {|m| ...}
2793  *
2794  * instead of
2795  *
2796  * if m = str.match(pat)
2797  * ...
2798  * end
2799  *
2800  * The return value is a value from block execution in this case.
2801  */
2802 
2803 static VALUE
2805 {
2806  VALUE re, result;
2807  if (argc < 1)
2808  rb_check_arity(argc, 1, 2);
2809  re = argv[0];
2810  argv[0] = str;
2811  result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
2812  if (!NIL_P(result) && rb_block_given_p()) {
2813  return rb_yield(result);
2814  }
2815  return result;
2816 }
2817 
2822 };
2823 
2824 static enum neighbor_char
2825 enc_succ_char(char *p, long len, rb_encoding *enc)
2826 {
2827  long i;
2828  int l;
2829  while (1) {
2830  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
2831  p[i] = '\0';
2832  if (i < 0)
2833  return NEIGHBOR_WRAPPED;
2834  ++((unsigned char*)p)[i];
2835  l = rb_enc_precise_mbclen(p, p+len, enc);
2836  if (MBCLEN_CHARFOUND_P(l)) {
2837  l = MBCLEN_CHARFOUND_LEN(l);
2838  if (l == len) {
2839  return NEIGHBOR_FOUND;
2840  }
2841  else {
2842  memset(p+l, 0xff, len-l);
2843  }
2844  }
2845  if (MBCLEN_INVALID_P(l) && i < len-1) {
2846  long len2;
2847  int l2;
2848  for (len2 = len-1; 0 < len2; len2--) {
2849  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2850  if (!MBCLEN_INVALID_P(l2))
2851  break;
2852  }
2853  memset(p+len2+1, 0xff, len-(len2+1));
2854  }
2855  }
2856 }
2857 
2858 static enum neighbor_char
2859 enc_pred_char(char *p, long len, rb_encoding *enc)
2860 {
2861  long i;
2862  int l;
2863  while (1) {
2864  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
2865  p[i] = '\xff';
2866  if (i < 0)
2867  return NEIGHBOR_WRAPPED;
2868  --((unsigned char*)p)[i];
2869  l = rb_enc_precise_mbclen(p, p+len, enc);
2870  if (MBCLEN_CHARFOUND_P(l)) {
2871  l = MBCLEN_CHARFOUND_LEN(l);
2872  if (l == len) {
2873  return NEIGHBOR_FOUND;
2874  }
2875  else {
2876  memset(p+l, 0, len-l);
2877  }
2878  }
2879  if (MBCLEN_INVALID_P(l) && i < len-1) {
2880  long len2;
2881  int l2;
2882  for (len2 = len-1; 0 < len2; len2--) {
2883  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2884  if (!MBCLEN_INVALID_P(l2))
2885  break;
2886  }
2887  memset(p+len2+1, 0, len-(len2+1));
2888  }
2889  }
2890 }
2891 
2892 /*
2893  overwrite +p+ by succeeding letter in +enc+ and returns
2894  NEIGHBOR_FOUND or NEIGHBOR_WRAPPED.
2895  When NEIGHBOR_WRAPPED, carried-out letter is stored into carry.
2896  assuming each ranges are successive, and mbclen
2897  never change in each ranges.
2898  NEIGHBOR_NOT_CHAR is returned if invalid character or the range has only one
2899  character.
2900  */
2901 static enum neighbor_char
2902 enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
2903 {
2904  enum neighbor_char ret;
2905  unsigned int c;
2906  int ctype;
2907  int range;
2908  char save[ONIGENC_CODE_TO_MBC_MAXLEN];
2909 
2910  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2911  if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
2912  ctype = ONIGENC_CTYPE_DIGIT;
2913  else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
2914  ctype = ONIGENC_CTYPE_ALPHA;
2915  else
2916  return NEIGHBOR_NOT_CHAR;
2917 
2918  MEMCPY(save, p, char, len);
2919  ret = enc_succ_char(p, len, enc);
2920  if (ret == NEIGHBOR_FOUND) {
2921  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2922  if (rb_enc_isctype(c, ctype, enc))
2923  return NEIGHBOR_FOUND;
2924  }
2925  MEMCPY(p, save, char, len);
2926  range = 1;
2927  while (1) {
2928  MEMCPY(save, p, char, len);
2929  ret = enc_pred_char(p, len, enc);
2930  if (ret == NEIGHBOR_FOUND) {
2931  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2932  if (!rb_enc_isctype(c, ctype, enc)) {
2933  MEMCPY(p, save, char, len);
2934  break;
2935  }
2936  }
2937  else {
2938  MEMCPY(p, save, char, len);
2939  break;
2940  }
2941  range++;
2942  }
2943  if (range == 1) {
2944  return NEIGHBOR_NOT_CHAR;
2945  }
2946 
2947  if (ctype != ONIGENC_CTYPE_DIGIT) {
2948  MEMCPY(carry, p, char, len);
2949  return NEIGHBOR_WRAPPED;
2950  }
2951 
2952  MEMCPY(carry, p, char, len);
2953  enc_succ_char(carry, len, enc);
2954  return NEIGHBOR_WRAPPED;
2955 }
2956 
2957 
2958 /*
2959  * call-seq:
2960  * str.succ -> new_str
2961  * str.next -> new_str
2962  *
2963  * Returns the successor to <i>str</i>. The successor is calculated by
2964  * incrementing characters starting from the rightmost alphanumeric (or
2965  * the rightmost character if there are no alphanumerics) in the
2966  * string. Incrementing a digit always results in another digit, and
2967  * incrementing a letter results in another letter of the same case.
2968  * Incrementing nonalphanumerics uses the underlying character set's
2969  * collating sequence.
2970  *
2971  * If the increment generates a ``carry,'' the character to the left of
2972  * it is incremented. This process repeats until there is no carry,
2973  * adding an additional character if necessary.
2974  *
2975  * "abcd".succ #=> "abce"
2976  * "THX1138".succ #=> "THX1139"
2977  * "<<koala>>".succ #=> "<<koalb>>"
2978  * "1999zzz".succ #=> "2000aaa"
2979  * "ZZZ9999".succ #=> "AAAA0000"
2980  * "***".succ #=> "**+"
2981  */
2982 
2983 VALUE
2985 {
2986  rb_encoding *enc;
2987  VALUE str;
2988  char *sbeg, *s, *e, *last_alnum = 0;
2989  int c = -1;
2990  long l;
2991  char carry[ONIGENC_CODE_TO_MBC_MAXLEN] = "\1";
2992  long carry_pos = 0, carry_len = 1;
2993  enum neighbor_char neighbor = NEIGHBOR_FOUND;
2994 
2995  str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
2996  rb_enc_cr_str_copy_for_substr(str, orig);
2997  OBJ_INFECT(str, orig);
2998  if (RSTRING_LEN(str) == 0) return str;
2999 
3000  enc = STR_ENC_GET(orig);
3001  sbeg = RSTRING_PTR(str);
3002  s = e = sbeg + RSTRING_LEN(str);
3003 
3004  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3005  if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
3006  if (ISALPHA(*last_alnum) ? ISDIGIT(*s) :
3007  ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) {
3008  s = last_alnum;
3009  break;
3010  }
3011  }
3012  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3013  neighbor = enc_succ_alnum_char(s, l, enc, carry);
3014  switch (neighbor) {
3015  case NEIGHBOR_NOT_CHAR:
3016  continue;
3017  case NEIGHBOR_FOUND:
3018  return str;
3019  case NEIGHBOR_WRAPPED:
3020  last_alnum = s;
3021  break;
3022  }
3023  c = 1;
3024  carry_pos = s - sbeg;
3025  carry_len = l;
3026  }
3027  if (c == -1) { /* str contains no alnum */
3028  s = e;
3029  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3030  enum neighbor_char neighbor;
3031  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3032  neighbor = enc_succ_char(s, l, enc);
3033  if (neighbor == NEIGHBOR_FOUND)
3034  return str;
3035  if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
3036  /* wrapped to \0...\0. search next valid char. */
3037  enc_succ_char(s, l, enc);
3038  }
3039  if (!rb_enc_asciicompat(enc)) {
3040  MEMCPY(carry, s, char, l);
3041  carry_len = l;
3042  }
3043  carry_pos = s - sbeg;
3044  }
3045  }
3046  RESIZE_CAPA(str, RSTRING_LEN(str) + carry_len);
3047  s = RSTRING_PTR(str) + carry_pos;
3048  memmove(s + carry_len, s, RSTRING_LEN(str) - carry_pos);
3049  memmove(s, carry, carry_len);
3050  STR_SET_LEN(str, RSTRING_LEN(str) + carry_len);
3051  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3052  rb_enc_str_coderange(str);
3053  return str;
3054 }
3055 
3056 
3057 /*
3058  * call-seq:
3059  * str.succ! -> str
3060  * str.next! -> str
3061  *
3062  * Equivalent to <code>String#succ</code>, but modifies the receiver in
3063  * place.
3064  */
3065 
3066 static VALUE
3068 {
3070 
3071  return str;
3072 }
3073 
3074 
3075 /*
3076  * call-seq:
3077  * str.upto(other_str, exclusive=false) {|s| block } -> str
3078  * str.upto(other_str, exclusive=false) -> an_enumerator
3079  *
3080  * Iterates through successive values, starting at <i>str</i> and
3081  * ending at <i>other_str</i> inclusive, passing each value in turn to
3082  * the block. The <code>String#succ</code> method is used to generate
3083  * each value. If optional second argument exclusive is omitted or is false,
3084  * the last value will be included; otherwise it will be excluded.
3085  *
3086  * If no block is given, an enumerator is returned instead.
3087  *
3088  * "a8".upto("b6") {|s| print s, ' ' }
3089  * for s in "a8".."b6"
3090  * print s, ' '
3091  * end
3092  *
3093  * <em>produces:</em>
3094  *
3095  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3096  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3097  *
3098  * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
3099  * both are recognized as decimal numbers. In addition, the width of
3100  * string (e.g. leading zeros) is handled appropriately.
3101  *
3102  * "9".upto("11").to_a #=> ["9", "10", "11"]
3103  * "25".upto("5").to_a #=> []
3104  * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"]
3105  */
3106 
3107 static VALUE
3109 {
3110  VALUE end, exclusive;
3111  VALUE current, after_end;
3112  ID succ;
3113  int n, excl, ascii;
3114  rb_encoding *enc;
3115 
3116  rb_scan_args(argc, argv, "11", &end, &exclusive);
3117  RETURN_ENUMERATOR(beg, argc, argv);
3118  excl = RTEST(exclusive);
3119  CONST_ID(succ, "succ");
3120  StringValue(end);
3121  enc = rb_enc_check(beg, end);
3122  ascii = (is_ascii_string(beg) && is_ascii_string(end));
3123  /* single character */
3124  if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
3125  char c = RSTRING_PTR(beg)[0];
3126  char e = RSTRING_PTR(end)[0];
3127 
3128  if (c > e || (excl && c == e)) return beg;
3129  for (;;) {
3130  rb_yield(rb_enc_str_new(&c, 1, enc));
3131  if (!excl && c == e) break;
3132  c++;
3133  if (excl && c == e) break;
3134  }
3135  return beg;
3136  }
3137  /* both edges are all digits */
3138  if (ascii && ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0])) {
3139  char *s, *send;
3140  VALUE b, e;
3141  int width;
3142 
3143  s = RSTRING_PTR(beg); send = RSTRING_END(beg);
3144  width = rb_long2int(send - s);
3145  while (s < send) {
3146  if (!ISDIGIT(*s)) goto no_digits;
3147  s++;
3148  }
3149  s = RSTRING_PTR(end); send = RSTRING_END(end);
3150  while (s < send) {
3151  if (!ISDIGIT(*s)) goto no_digits;
3152  s++;
3153  }
3154  b = rb_str_to_inum(beg, 10, FALSE);
3155  e = rb_str_to_inum(end, 10, FALSE);
3156  if (FIXNUM_P(b) && FIXNUM_P(e)) {
3157  long bi = FIX2LONG(b);
3158  long ei = FIX2LONG(e);
3159  rb_encoding *usascii = rb_usascii_encoding();
3160 
3161  while (bi <= ei) {
3162  if (excl && bi == ei) break;
3163  rb_yield(rb_enc_sprintf(usascii, "%.*ld", width, bi));
3164  bi++;
3165  }
3166  }
3167  else {
3168  ID op = excl ? '<' : rb_intern("<=");
3169  VALUE args[2], fmt = rb_obj_freeze(rb_usascii_str_new_cstr("%.*d"));
3170 
3171  args[0] = INT2FIX(width);
3172  while (rb_funcall(b, op, 1, e)) {
3173  args[1] = b;
3174  rb_yield(rb_str_format(numberof(args), args, fmt));
3175  b = rb_funcall(b, succ, 0, 0);
3176  }
3177  }
3178  return beg;
3179  }
3180  /* normal case */
3181  no_digits:
3182  n = rb_str_cmp(beg, end);
3183  if (n > 0 || (excl && n == 0)) return beg;
3184 
3185  after_end = rb_funcall(end, succ, 0, 0);
3186  current = rb_str_dup(beg);
3187  while (!rb_str_equal(current, after_end)) {
3188  VALUE next = Qnil;
3189  if (excl || !rb_str_equal(current, end))
3190  next = rb_funcall(current, succ, 0, 0);
3191  rb_yield(current);
3192  if (NIL_P(next)) break;
3193  current = next;
3194  StringValue(current);
3195  if (excl && rb_str_equal(current, end)) break;
3196  if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
3197  break;
3198  }
3199 
3200  return beg;
3201 }
3202 
3203 static VALUE
3204 rb_str_subpat(VALUE str, VALUE re, VALUE backref)
3205 {
3206  if (rb_reg_search(re, str, 0, 0) >= 0) {
3208  int nth = rb_reg_backref_number(match, backref);
3209  return rb_reg_nth_match(nth, match);
3210  }
3211  return Qnil;
3212 }
3213 
3214 static VALUE
3216 {
3217  long idx;
3218 
3219  if (FIXNUM_P(indx)) {
3220  idx = FIX2LONG(indx);
3221 
3222  num_index:
3223  str = rb_str_substr(str, idx, 1);
3224  if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
3225  return str;
3226  }
3227 
3228  if (SPECIAL_CONST_P(indx)) goto generic;
3229  switch (BUILTIN_TYPE(indx)) {
3230  case T_REGEXP:
3231  return rb_str_subpat(str, indx, INT2FIX(0));
3232 
3233  case T_STRING:
3234  if (rb_str_index(str, indx, 0) != -1)
3235  return rb_str_dup(indx);
3236  return Qnil;
3237 
3238  generic:
3239  default:
3240  /* check if indx is Range */
3241  {
3242  long beg, len;
3243  VALUE tmp;
3244 
3245  len = str_strlen(str, STR_ENC_GET(str));
3246  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
3247  case Qfalse:
3248  break;
3249  case Qnil:
3250  return Qnil;
3251  default:
3252  tmp = rb_str_substr(str, beg, len);
3253  return tmp;
3254  }
3255  }
3256  idx = NUM2LONG(indx);
3257  goto num_index;
3258  }
3259 
3260  UNREACHABLE;
3261 }
3262 
3263 
3264 /*
3265  * call-seq:
3266  * str[index] -> new_str or nil
3267  * str[start, length] -> new_str or nil
3268  * str[range] -> new_str or nil
3269  * str[regexp] -> new_str or nil
3270  * str[regexp, capture] -> new_str or nil
3271  * str[match_str] -> new_str or nil
3272  * str.slice(index) -> new_str or nil
3273  * str.slice(start, length) -> new_str or nil
3274  * str.slice(range) -> new_str or nil
3275  * str.slice(regexp) -> new_str or nil
3276  * str.slice(regexp, capture) -> new_str or nil
3277  * str.slice(match_str) -> new_str or nil
3278  *
3279  * Element Reference --- If passed a single +index+, returns a substring of
3280  * one character at that index. If passed a +start+ index and a +length+,
3281  * returns a substring containing +length+ characters starting at the
3282  * +index+. If passed a +range+, its beginning and end are interpreted as
3283  * offsets delimiting the substring to be returned.
3284  *
3285  * In these three cases, if an index is negative, it is counted from the end
3286  * of the string. For the +start+ and +range+ cases the starting index
3287  * is just before a character and an index matching the string's size.
3288  * Additionally, an empty string is returned when the starting index for a
3289  * character range is at the end of the string.
3290  *
3291  * Returns +nil+ if the initial index falls outside the string or the length
3292  * is negative.
3293  *
3294  * If a +Regexp+ is supplied, the matching portion of the string is
3295  * returned. If a +capture+ follows the regular expression, which may be a
3296  * capture group index or name, follows the regular expression that component
3297  * of the MatchData is returned instead.
3298  *
3299  * If a +match_str+ is given, that string is returned if it occurs in
3300  * the string.
3301  *
3302  * Returns +nil+ if the regular expression does not match or the match string
3303  * cannot be found.
3304  *
3305  * a = "hello there"
3306  *
3307  * a[1] #=> "e"
3308  * a[2, 3] #=> "llo"
3309  * a[2..3] #=> "ll"
3310  *
3311  * a[-3, 2] #=> "er"
3312  * a[7..-2] #=> "her"
3313  * a[-4..-2] #=> "her"
3314  * a[-2..-4] #=> ""
3315  *
3316  * a[11, 0] #=> ""
3317  * a[11] #=> nil
3318  * a[12, 0] #=> nil
3319  * a[12..-1] #=> nil
3320  *
3321  * a[/[aeiou](.)\1/] #=> "ell"
3322  * a[/[aeiou](.)\1/, 0] #=> "ell"
3323  * a[/[aeiou](.)\1/, 1] #=> "l"
3324  * a[/[aeiou](.)\1/, 2] #=> nil
3325  *
3326  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
3327  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"] #=> "e"
3328  *
3329  * a["lo"] #=> "lo"
3330  * a["bye"] #=> nil
3331  */
3332 
3333 static VALUE
3335 {
3336  if (argc == 2) {
3337  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3338  return rb_str_subpat(str, argv[0], argv[1]);
3339  }
3340  return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
3341  }
3342  rb_check_arity(argc, 1, 2);
3343  return rb_str_aref(str, argv[0]);
3344 }
3345 
3346 VALUE
3347 rb_str_drop_bytes(VALUE str, long len)
3348 {
3349  char *ptr = RSTRING_PTR(str);
3350  long olen = RSTRING_LEN(str), nlen;
3351 
3352  str_modifiable(str);
3353  if (len > olen) len = olen;
3354  nlen = olen - len;
3355  if (nlen <= RSTRING_EMBED_LEN_MAX) {
3356  char *oldptr = ptr;
3357  int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|ELTS_SHARED));
3358  STR_SET_EMBED(str);
3359  STR_SET_EMBED_LEN(str, nlen);
3360  ptr = RSTRING(str)->as.ary;
3361  memmove(ptr, oldptr + len, nlen);
3362  if (fl == STR_NOEMBED) xfree(oldptr);
3363  }
3364  else {
3365  if (!STR_SHARED_P(str)) rb_str_new4(str);
3366  ptr = RSTRING(str)->as.heap.ptr += len;
3367  RSTRING(str)->as.heap.len = nlen;
3368  }
3369  ptr[nlen] = 0;
3370  ENC_CODERANGE_CLEAR(str);
3371  return str;
3372 }
3373 
3374 static void
3375 rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
3376 {
3377  if (beg == 0 && RSTRING_LEN(val) == 0) {
3378  rb_str_drop_bytes(str, len);
3379  OBJ_INFECT(str, val);
3380  return;
3381  }
3382 
3383  rb_str_modify(str);
3384  if (len < RSTRING_LEN(val)) {
3385  /* expand string */
3386  RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1);
3387  }
3388 
3389  if (RSTRING_LEN(val) != len) {
3390  memmove(RSTRING_PTR(str) + beg + RSTRING_LEN(val),
3391  RSTRING_PTR(str) + beg + len,
3392  RSTRING_LEN(str) - (beg + len));
3393  }
3394  if (RSTRING_LEN(val) < beg && len < 0) {
3395  MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len);
3396  }
3397  if (RSTRING_LEN(val) > 0) {
3398  memmove(RSTRING_PTR(str)+beg, RSTRING_PTR(val), RSTRING_LEN(val));
3399  }
3400  STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len);
3401  if (RSTRING_PTR(str)) {
3402  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3403  }
3404  OBJ_INFECT(str, val);
3405 }
3406 
3407 static void
3408 rb_str_splice(VALUE str, long beg, long len, VALUE val)
3409 {
3410  long slen;
3411  char *p, *e;
3412  rb_encoding *enc;
3413  int singlebyte = single_byte_optimizable(str);
3414  int cr;
3415 
3416  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
3417 
3418  StringValue(val);
3419  enc = rb_enc_check(str, val);
3420  slen = str_strlen(str, enc);
3421 
3422  if (slen < beg) {
3423  out_of_range:
3424  rb_raise(rb_eIndexError, "index %ld out of string", beg);
3425  }
3426  if (beg < 0) {
3427  if (-beg > slen) {
3428  goto out_of_range;
3429  }
3430  beg += slen;
3431  }
3432  if (slen < len || slen < beg + len) {
3433  len = slen - beg;
3434  }
3435  str_modify_keep_cr(str);
3436  p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
3437  if (!p) p = RSTRING_END(str);
3438  e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
3439  if (!e) e = RSTRING_END(str);
3440  /* error check */
3441  beg = p - RSTRING_PTR(str); /* physical position */
3442  len = e - p; /* physical length */
3443  rb_str_splice_0(str, beg, len, val);
3444  rb_enc_associate(str, enc);
3446  if (cr != ENC_CODERANGE_BROKEN)
3447  ENC_CODERANGE_SET(str, cr);
3448 }
3449 
3450 void
3451 rb_str_update(VALUE str, long beg, long len, VALUE val)
3452 {
3453  rb_str_splice(str, beg, len, val);
3454 }
3455 
3456 static void
3458 {
3459  int nth;
3460  VALUE match;
3461  long start, end, len;
3462  rb_encoding *enc;
3463  struct re_registers *regs;
3464 
3465  if (rb_reg_search(re, str, 0, 0) < 0) {
3466  rb_raise(rb_eIndexError, "regexp not matched");
3467  }
3468  match = rb_backref_get();
3469  nth = rb_reg_backref_number(match, backref);
3470  regs = RMATCH_REGS(match);
3471  if (nth >= regs->num_regs) {
3472  out_of_range:
3473  rb_raise(rb_eIndexError, "index %d out of regexp", nth);
3474  }
3475  if (nth < 0) {
3476  if (-nth >= regs->num_regs) {
3477  goto out_of_range;
3478  }
3479  nth += regs->num_regs;
3480  }
3481 
3482  start = BEG(nth);
3483  if (start == -1) {
3484  rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
3485  }
3486  end = END(nth);
3487  len = end - start;
3488  StringValue(val);
3489  enc = rb_enc_check(str, val);
3490  rb_str_splice_0(str, start, len, val);
3491  rb_enc_associate(str, enc);
3492 }
3493 
3494 static VALUE
3496 {
3497  long idx, beg;
3498 
3499  if (FIXNUM_P(indx)) {
3500  idx = FIX2LONG(indx);
3501  num_index:
3502  rb_str_splice(str, idx, 1, val);
3503  return val;
3504  }
3505 
3506  if (SPECIAL_CONST_P(indx)) goto generic;
3507  switch (TYPE(indx)) {
3508  case T_REGEXP:
3509  rb_str_subpat_set(str, indx, INT2FIX(0), val);
3510  return val;
3511 
3512  case T_STRING:
3513  beg = rb_str_index(str, indx, 0);
3514  if (beg < 0) {
3515  rb_raise(rb_eIndexError, "string not matched");
3516  }
3517  beg = rb_str_sublen(str, beg);
3518  rb_str_splice(str, beg, str_strlen(indx, 0), val);
3519  return val;
3520 
3521  generic:
3522  default:
3523  /* check if indx is Range */
3524  {
3525  long beg, len;
3526  if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
3527  rb_str_splice(str, beg, len, val);
3528  return val;
3529  }
3530  }
3531  idx = NUM2LONG(indx);
3532  goto num_index;
3533  }
3534 }
3535 
3536 /*
3537  * call-seq:
3538  * str[fixnum] = new_str
3539  * str[fixnum, fixnum] = new_str
3540  * str[range] = aString
3541  * str[regexp] = new_str
3542  * str[regexp, fixnum] = new_str
3543  * str[regexp, name] = new_str
3544  * str[other_str] = new_str
3545  *
3546  * Element Assignment---Replaces some or all of the content of <i>str</i>. The
3547  * portion of the string affected is determined using the same criteria as
3548  * <code>String#[]</code>. If the replacement string is not the same length as
3549  * the text it is replacing, the string will be adjusted accordingly. If the
3550  * regular expression or string is used as the index doesn't match a position
3551  * in the string, <code>IndexError</code> is raised. If the regular expression
3552  * form is used, the optional second <code>Fixnum</code> allows you to specify
3553  * which portion of the match to replace (effectively using the
3554  * <code>MatchData</code> indexing rules. The forms that take a
3555  * <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
3556  * out of range; the <code>Range</code> form will raise a
3557  * <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
3558  * will raise an <code>IndexError</code> on negative match.
3559  */
3560 
3561 static VALUE
3563 {
3564  if (argc == 3) {
3565  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3566  rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
3567  }
3568  else {
3569  rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
3570  }
3571  return argv[2];
3572  }
3573  rb_check_arity(argc, 2, 3);
3574  return rb_str_aset(str, argv[0], argv[1]);
3575 }
3576 
3577 /*
3578  * call-seq:
3579  * str.insert(index, other_str) -> str
3580  *
3581  * Inserts <i>other_str</i> before the character at the given
3582  * <i>index</i>, modifying <i>str</i>. Negative indices count from the
3583  * end of the string, and insert <em>after</em> the given character.
3584  * The intent is insert <i>aString</i> so that it starts at the given
3585  * <i>index</i>.
3586  *
3587  * "abcd".insert(0, 'X') #=> "Xabcd"
3588  * "abcd".insert(3, 'X') #=> "abcXd"
3589  * "abcd".insert(4, 'X') #=> "abcdX"
3590  * "abcd".insert(-3, 'X') #=> "abXcd"
3591  * "abcd".insert(-1, 'X') #=> "abcdX"
3592  */
3593 
3594 static VALUE
3596 {
3597  long pos = NUM2LONG(idx);
3598 
3599  if (pos == -1) {
3600  return rb_str_append(str, str2);
3601  }
3602  else if (pos < 0) {
3603  pos++;
3604  }
3605  rb_str_splice(str, pos, 0, str2);
3606  return str;
3607 }
3608 
3609 
3610 /*
3611  * call-seq:
3612  * str.slice!(fixnum) -> fixnum or nil
3613  * str.slice!(fixnum, fixnum) -> new_str or nil
3614  * str.slice!(range) -> new_str or nil
3615  * str.slice!(regexp) -> new_str or nil
3616  * str.slice!(other_str) -> new_str or nil
3617  *
3618  * Deletes the specified portion from <i>str</i>, and returns the portion
3619  * deleted.
3620  *
3621  * string = "this is a string"
3622  * string.slice!(2) #=> "i"
3623  * string.slice!(3..6) #=> " is "
3624  * string.slice!(/s.*t/) #=> "sa st"
3625  * string.slice!("r") #=> "r"
3626  * string #=> "thing"
3627  */
3628 
3629 static VALUE
3631 {
3632  VALUE result;
3633  VALUE buf[3];
3634  int i;
3635 
3636  rb_check_arity(argc, 1, 2);
3637  for (i=0; i<argc; i++) {
3638  buf[i] = argv[i];
3639  }
3640  str_modify_keep_cr(str);
3641  result = rb_str_aref_m(argc, buf, str);
3642  if (!NIL_P(result)) {
3643  buf[i] = rb_str_new(0,0);
3644  rb_str_aset_m(argc+1, buf, str);
3645  }
3646  return result;
3647 }
3648 
3649 static VALUE
3650 get_pat(VALUE pat, int quote)
3651 {
3652  VALUE val;
3653 
3654  switch (TYPE(pat)) {
3655  case T_REGEXP:
3656  return pat;
3657 
3658  case T_STRING:
3659  break;
3660 
3661  default:
3662  val = rb_check_string_type(pat);
3663  if (NIL_P(val)) {
3664  Check_Type(pat, T_REGEXP);
3665  }
3666  pat = val;
3667  }
3668 
3669  if (quote) {
3670  pat = rb_reg_quote(pat);
3671  }
3672 
3673  return rb_reg_regcomp(pat);
3674 }
3675 
3676 
3677 /*
3678  * call-seq:
3679  * str.sub!(pattern, replacement) -> str or nil
3680  * str.sub!(pattern) {|match| block } -> str or nil
3681  *
3682  * Performs the same substitution as String#sub in-place.
3683  *
3684  * Returns +str+ if a substitution was performed or +nil+ if no substitution
3685  * was performed.
3686  */
3687 
3688 static VALUE
3690 {
3691  VALUE pat, repl, hash = Qnil;
3692  int iter = 0;
3693  int tainted = 0;
3694  int untrusted = 0;
3695  long plen;
3696  int min_arity = rb_block_given_p() ? 1 : 2;
3697 
3698  rb_check_arity(argc, min_arity, 2);
3699  if (argc == 1) {
3700  iter = 1;
3701  }
3702  else {
3703  repl = argv[1];
3704  hash = rb_check_hash_type(argv[1]);
3705  if (NIL_P(hash)) {
3706  StringValue(repl);
3707  }
3708  if (OBJ_TAINTED(repl)) tainted = 1;
3709  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3710  }
3711 
3712  pat = get_pat(argv[0], 1);
3713  str_modifiable(str);
3714  if (rb_reg_search(pat, str, 0, 0) >= 0) {
3715  rb_encoding *enc;
3716  int cr = ENC_CODERANGE(str);
3718  struct re_registers *regs = RMATCH_REGS(match);
3719  long beg0 = BEG(0);
3720  long end0 = END(0);
3721  char *p, *rp;
3722  long len, rlen;
3723 
3724  if (iter || !NIL_P(hash)) {
3725  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3726 
3727  if (iter) {
3728  repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3729  }
3730  else {
3731  repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
3732  repl = rb_obj_as_string(repl);
3733  }
3734  str_mod_check(str, p, len);
3735  rb_check_frozen(str);
3736  }
3737  else {
3738  repl = rb_reg_regsub(repl, str, regs, pat);
3739  }
3740  enc = rb_enc_compatible(str, repl);
3741  if (!enc) {
3742  rb_encoding *str_enc = STR_ENC_GET(str);
3743  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3744  if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
3745  coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
3746  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
3747  rb_enc_name(str_enc),
3748  rb_enc_name(STR_ENC_GET(repl)));
3749  }
3750  enc = STR_ENC_GET(repl);
3751  }
3752  rb_str_modify(str);
3753  rb_enc_associate(str, enc);
3754  if (OBJ_TAINTED(repl)) tainted = 1;
3755  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3756  if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
3757  int cr2 = ENC_CODERANGE(repl);
3758  if (cr2 == ENC_CODERANGE_BROKEN ||
3759  (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
3760  cr = ENC_CODERANGE_UNKNOWN;
3761  else
3762  cr = cr2;
3763  }
3764  plen = end0 - beg0;
3765  rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
3766  len = RSTRING_LEN(str);
3767  if (rlen > plen) {
3768  RESIZE_CAPA(str, len + rlen - plen);
3769  }
3770  p = RSTRING_PTR(str);
3771  if (rlen != plen) {
3772  memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
3773  }
3774  memcpy(p + beg0, rp, rlen);
3775  len += rlen - plen;
3776  STR_SET_LEN(str, len);
3777  RSTRING_PTR(str)[len] = '\0';
3778  ENC_CODERANGE_SET(str, cr);
3779  if (tainted) OBJ_TAINT(str);
3780  if (untrusted) OBJ_UNTRUST(str);
3781 
3782  return str;
3783  }
3784  return Qnil;
3785 }
3786 
3787 
3788 /*
3789  * call-seq:
3790  * str.sub(pattern, replacement) -> new_str
3791  * str.sub(pattern, hash) -> new_str
3792  * str.sub(pattern) {|match| block } -> new_str
3793  *
3794  * Returns a copy of +str+ with the _first_ occurrence of +pattern+
3795  * replaced by the second argument. The +pattern+ is typically a Regexp; if
3796  * given as a String, any regular expression metacharacters it contains will
3797  * be interpreted literally, e.g. <code>'\\\d'</code> will match a backlash
3798  * followed by 'd', instead of a digit.
3799  *
3800  * If +replacement+ is a String it will be substituted for the matched text.
3801  * It may contain back-references to the pattern's capture groups of the form
3802  * <code>"\\d"</code>, where <i>d</i> is a group number, or
3803  * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a
3804  * double-quoted string, both back-references must be preceded by an
3805  * additional backslash. However, within +replacement+ the special match
3806  * variables, such as <code>&$</code>, will not refer to the current match.
3807  *
3808  * If the second argument is a Hash, and the matched text is one of its keys,
3809  * the corresponding value is the replacement string.
3810  *
3811  * In the block form, the current match string is passed in as a parameter,
3812  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3813  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
3814  * returned by the block will be substituted for the match on each call.
3815  *
3816  * The result inherits any tainting in the original string or any supplied
3817  * replacement string.
3818  *
3819  * "hello".sub(/[aeiou]/, '*') #=> "h*llo"
3820  * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo"
3821  * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello"
3822  * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
3823  * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
3824  * #=> "Is /bin/bash your preferred shell?"
3825  */
3826 
3827 static VALUE
3829 {
3830  str = rb_str_dup(str);
3831  rb_str_sub_bang(argc, argv, str);
3832  return str;
3833 }
3834 
3835 static VALUE
3836 str_gsub(int argc, VALUE *argv, VALUE str, int bang)
3837 {
3838  VALUE pat, val, repl, match, dest, hash = Qnil;
3839  struct re_registers *regs;
3840  long beg, n;
3841  long beg0, end0;
3842  long offset, blen, slen, len, last;
3843  int iter = 0;
3844  char *sp, *cp;
3845  int tainted = 0;
3846  rb_encoding *str_enc;
3847 
3848  switch (argc) {
3849  case 1:
3850  RETURN_ENUMERATOR(str, argc, argv);
3851  iter = 1;
3852  break;
3853  case 2:
3854  repl = argv[1];
3855  hash = rb_check_hash_type(argv[1]);
3856  if (NIL_P(hash)) {
3857  StringValue(repl);
3858  }
3859  if (OBJ_TAINTED(repl)) tainted = 1;
3860  break;
3861  default:
3862  rb_check_arity(argc, 1, 2);
3863  }
3864 
3865  pat = get_pat(argv[0], 1);
3866  beg = rb_reg_search(pat, str, 0, 0);
3867  if (beg < 0) {
3868  if (bang) return Qnil; /* no match, no substitution */
3869  return rb_str_dup(str);
3870  }
3871 
3872  offset = 0;
3873  n = 0;
3874  blen = RSTRING_LEN(str) + 30; /* len + margin */
3875  dest = rb_str_buf_new(blen);
3876  sp = RSTRING_PTR(str);
3877  slen = RSTRING_LEN(str);
3878  cp = sp;
3879  str_enc = STR_ENC_GET(str);
3880  rb_enc_associate(dest, str_enc);
3882 
3883  do {
3884  n++;
3885  match = rb_backref_get();
3886  regs = RMATCH_REGS(match);
3887  beg0 = BEG(0);
3888  end0 = END(0);
3889  if (iter || !NIL_P(hash)) {
3890  if (iter) {
3891  val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3892  }
3893  else {
3894  val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
3895  val = rb_obj_as_string(val);
3896  }
3897  str_mod_check(str, sp, slen);
3898  if (val == dest) { /* paranoid check [ruby-dev:24827] */
3899  rb_raise(rb_eRuntimeError, "block should not cheat");
3900  }
3901  }
3902  else {
3903  val = rb_reg_regsub(repl, str, regs, pat);
3904  }
3905 
3906  if (OBJ_TAINTED(val)) tainted = 1;
3907 
3908  len = beg0 - offset; /* copy pre-match substr */
3909  if (len) {
3910  rb_enc_str_buf_cat(dest, cp, len, str_enc);
3911  }
3912 
3913  rb_str_buf_append(dest, val);
3914 
3915  last = offset;
3916  offset = end0;
3917  if (beg0 == end0) {
3918  /*
3919  * Always consume at least one character of the input string
3920  * in order to prevent infinite loops.
3921  */
3922  if (RSTRING_LEN(str) <= end0) break;
3923  len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
3924  rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0, len, str_enc);
3925  offset = end0 + len;
3926  }
3927  cp = RSTRING_PTR(str) + offset;
3928  if (offset > RSTRING_LEN(str)) break;
3929  beg = rb_reg_search(pat, str, offset, 0);
3930  } while (beg >= 0);
3931  if (RSTRING_LEN(str) > offset) {
3932  rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
3933  }
3934  rb_reg_search(pat, str, last, 0);
3935  if (bang) {
3936  rb_str_shared_replace(str, dest);
3937  }
3938  else {
3939  RBASIC(dest)->klass = rb_obj_class(str);
3940  OBJ_INFECT(dest, str);
3941  str = dest;
3942  }
3943 
3944  if (tainted) OBJ_TAINT(str);
3945  return str;
3946 }
3947 
3948 
3949 /*
3950  * call-seq:
3951  * str.gsub!(pattern, replacement) -> str or nil
3952  * str.gsub!(pattern) {|match| block } -> str or nil
3953  * str.gsub!(pattern) -> an_enumerator
3954  *
3955  * Performs the substitutions of <code>String#gsub</code> in place, returning
3956  * <i>str</i>, or <code>nil</code> if no substitutions were performed.
3957  * If no block and no <i>replacement</i> is given, an enumerator is returned instead.
3958  */
3959 
3960 static VALUE
3962 {
3963  str_modify_keep_cr(str);
3964  return str_gsub(argc, argv, str, 1);
3965 }
3966 
3967 
3968 /*
3969  * call-seq:
3970  * str.gsub(pattern, replacement) -> new_str
3971  * str.gsub(pattern, hash) -> new_str
3972  * str.gsub(pattern) {|match| block } -> new_str
3973  * str.gsub(pattern) -> enumerator
3974  *
3975  * Returns a copy of <i>str</i> with the <em>all</em> occurrences of
3976  * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
3977  * typically a <code>Regexp</code>; if given as a <code>String</code>, any
3978  * regular expression metacharacters it contains will be interpreted
3979  * literally, e.g. <code>'\\\d'</code> will match a backlash followed by 'd',
3980  * instead of a digit.
3981  *
3982  * If <i>replacement</i> is a <code>String</code> it will be substituted for
3983  * the matched text. It may contain back-references to the pattern's capture
3984  * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
3985  * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
3986  * double-quoted string, both back-references must be preceded by an
3987  * additional backslash. However, within <i>replacement</i> the special match
3988  * variables, such as <code>$&</code>, will not refer to the current match.
3989  *
3990  * If the second argument is a <code>Hash</code>, and the matched text is one
3991  * of its keys, the corresponding value is the replacement string.
3992  *
3993  * In the block form, the current match string is passed in as a parameter,
3994  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3995  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
3996  * returned by the block will be substituted for the match on each call.
3997  *
3998  * The result inherits any tainting in the original string or any supplied
3999  * replacement string.
4000  *
4001  * When neither a block nor a second argument is supplied, an
4002  * <code>Enumerator</code> is returned.
4003  *
4004  * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
4005  * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
4006  * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
4007  * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
4008  * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
4009  */
4010 
4011 static VALUE
4013 {
4014  return str_gsub(argc, argv, str, 0);
4015 }
4016 
4017 
4018 /*
4019  * call-seq:
4020  * str.replace(other_str) -> str
4021  *
4022  * Replaces the contents and taintedness of <i>str</i> with the corresponding
4023  * values in <i>other_str</i>.
4024  *
4025  * s = "hello" #=> "hello"
4026  * s.replace "world" #=> "world"
4027  */
4028 
4029 VALUE
4031 {
4032  str_modifiable(str);
4033  if (str == str2) return str;
4034 
4035  StringValue(str2);
4036  str_discard(str);
4037  return str_replace(str, str2);
4038 }
4039 
4040 /*
4041  * call-seq:
4042  * string.clear -> string
4043  *
4044  * Makes string empty.
4045  *
4046  * a = "abcde"
4047  * a.clear #=> ""
4048  */
4049 
4050 static VALUE
4052 {
4053  str_discard(str);
4054  STR_SET_EMBED(str);
4055  STR_SET_EMBED_LEN(str, 0);
4056  RSTRING_PTR(str)[0] = 0;
4057  if (rb_enc_asciicompat(STR_ENC_GET(str)))
4059  else
4061  return str;
4062 }
4063 
4064 /*
4065  * call-seq:
4066  * string.chr -> string
4067  *
4068  * Returns a one-character string at the beginning of the string.
4069  *
4070  * a = "abcde"
4071  * a.chr #=> "a"
4072  */
4073 
4074 static VALUE
4076 {
4077  return rb_str_substr(str, 0, 1);
4078 }
4079 
4080 /*
4081  * call-seq:
4082  * str.getbyte(index) -> 0 .. 255
4083  *
4084  * returns the <i>index</i>th byte as an integer.
4085  */
4086 static VALUE
4088 {
4089  long pos = NUM2LONG(index);
4090 
4091  if (pos < 0)
4092  pos += RSTRING_LEN(str);
4093  if (pos < 0 || RSTRING_LEN(str) <= pos)
4094  return Qnil;
4095 
4096  return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
4097 }
4098 
4099 /*
4100  * call-seq:
4101  * str.setbyte(index, integer) -> integer
4102  *
4103  * modifies the <i>index</i>th byte as <i>integer</i>.
4104  */
4105 static VALUE
4106 rb_str_setbyte(VALUE str, VALUE index, VALUE value)
4107 {
4108  long pos = NUM2LONG(index);
4109  int byte = NUM2INT(value);
4110 
4111  rb_str_modify(str);
4112 
4113  if (pos < -RSTRING_LEN(str) || RSTRING_LEN(str) <= pos)
4114  rb_raise(rb_eIndexError, "index %ld out of string", pos);
4115  if (pos < 0)
4116  pos += RSTRING_LEN(str);
4117 
4118  RSTRING_PTR(str)[pos] = byte;
4119 
4120  return value;
4121 }
4122 
4123 static VALUE
4124 str_byte_substr(VALUE str, long beg, long len)
4125 {
4126  char *p, *s = RSTRING_PTR(str);
4127  long n = RSTRING_LEN(str);
4128  VALUE str2;
4129 
4130  if (beg > n || len < 0) return Qnil;
4131  if (beg < 0) {
4132  beg += n;
4133  if (beg < 0) return Qnil;
4134  }
4135  if (beg + len > n)
4136  len = n - beg;
4137  if (len <= 0) {
4138  len = 0;
4139  p = 0;
4140  }
4141  else
4142  p = s + beg;
4143 
4144  if (len > RSTRING_EMBED_LEN_MAX && beg + len == n) {
4145  str2 = rb_str_new4(str);
4146  str2 = str_new3(rb_obj_class(str2), str2);
4147  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
4148  RSTRING(str2)->as.heap.len = len;
4149  }
4150  else {
4151  str2 = rb_str_new5(str, p, len);
4152  }
4153 
4154  str_enc_copy(str2, str);
4155 
4156  if (RSTRING_LEN(str2) == 0) {
4157  if (!rb_enc_asciicompat(STR_ENC_GET(str)))
4159  else
4161  }
4162  else {
4163  switch (ENC_CODERANGE(str)) {
4164  case ENC_CODERANGE_7BIT:
4166  break;
4167  default:
4169  break;
4170  }
4171  }
4172 
4173  OBJ_INFECT(str2, str);
4174 
4175  return str2;
4176 }
4177 
4178 static VALUE
4180 {
4181  long idx;
4182  switch (TYPE(indx)) {
4183  case T_FIXNUM:
4184  idx = FIX2LONG(indx);
4185 
4186  num_index:
4187  str = str_byte_substr(str, idx, 1);
4188  if (NIL_P(str) || RSTRING_LEN(str) == 0) return Qnil;
4189  return str;
4190 
4191  default:
4192  /* check if indx is Range */
4193  {
4194  long beg, len = RSTRING_LEN(str);
4195 
4196  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
4197  case Qfalse:
4198  break;
4199  case Qnil:
4200  return Qnil;
4201  default:
4202  return str_byte_substr(str, beg, len);
4203  }
4204  }
4205  idx = NUM2LONG(indx);
4206  goto num_index;
4207  }
4208 
4209  UNREACHABLE;
4210 }
4211 
4212 /*
4213  * call-seq:
4214  * str.byteslice(fixnum) -> new_str or nil
4215  * str.byteslice(fixnum, fixnum) -> new_str or nil
4216  * str.byteslice(range) -> new_str or nil
4217  *
4218  * Byte Reference---If passed a single <code>Fixnum</code>, returns a
4219  * substring of one byte at that position. If passed two <code>Fixnum</code>
4220  * objects, returns a substring starting at the offset given by the first, and
4221  * a length given by the second. If given a <code>Range</code>, a substring containing
4222  * bytes at offsets given by the range is returned. In all three cases, if
4223  * an offset is negative, it is counted from the end of <i>str</i>. Returns
4224  * <code>nil</code> if the initial offset falls outside the string, the length
4225  * is negative, or the beginning of the range is greater than the end.
4226  * The encoding of the resulted string keeps original encoding.
4227  *
4228  * "hello".byteslice(1) #=> "e"
4229  * "hello".byteslice(-1) #=> "o"
4230  * "hello".byteslice(1, 2) #=> "el"
4231  * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
4232  * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
4233  */
4234 
4235 static VALUE
4237 {
4238  if (argc == 2) {
4239  return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
4240  }
4241  rb_check_arity(argc, 1, 2);
4242  return str_byte_aref(str, argv[0]);
4243 }
4244 
4245 /*
4246  * call-seq:
4247  * str.reverse -> new_str
4248  *
4249  * Returns a new string with the characters from <i>str</i> in reverse order.
4250  *
4251  * "stressed".reverse #=> "desserts"
4252  */
4253 
4254 static VALUE
4256 {
4257  rb_encoding *enc;
4258  VALUE rev;
4259  char *s, *e, *p;
4260  int single = 1;
4261 
4262  if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
4263  enc = STR_ENC_GET(str);
4264  rev = rb_str_new5(str, 0, RSTRING_LEN(str));
4265  s = RSTRING_PTR(str); e = RSTRING_END(str);
4266  p = RSTRING_END(rev);
4267 
4268  if (RSTRING_LEN(str) > 1) {
4269  if (single_byte_optimizable(str)) {
4270  while (s < e) {
4271  *--p = *s++;
4272  }
4273  }
4274  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
4275  while (s < e) {
4276  int clen = rb_enc_fast_mbclen(s, e, enc);
4277 
4278  if (clen > 1 || (*s & 0x80)) single = 0;
4279  p -= clen;
4280  memcpy(p, s, clen);
4281  s += clen;
4282  }
4283  }
4284  else {
4285  while (s < e) {
4286  int clen = rb_enc_mbclen(s, e, enc);
4287 
4288  if (clen > 1 || (*s & 0x80)) single = 0;
4289  p -= clen;
4290  memcpy(p, s, clen);
4291  s += clen;
4292  }
4293  }
4294  }
4295  STR_SET_LEN(rev, RSTRING_LEN(str));
4296  OBJ_INFECT(rev, str);
4297  if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
4298  if (single) {
4300  }
4301  else {
4303  }
4304  }
4306 
4307  return rev;
4308 }
4309 
4310 
4311 /*
4312  * call-seq:
4313  * str.reverse! -> str
4314  *
4315  * Reverses <i>str</i> in place.
4316  */
4317 
4318 static VALUE
4320 {
4321  if (RSTRING_LEN(str) > 1) {
4322  if (single_byte_optimizable(str)) {
4323  char *s, *e, c;
4324 
4325  str_modify_keep_cr(str);
4326  s = RSTRING_PTR(str);
4327  e = RSTRING_END(str) - 1;
4328  while (s < e) {
4329  c = *s;
4330  *s++ = *e;
4331  *e-- = c;
4332  }
4333  }
4334  else {
4336  }
4337  }
4338  else {
4339  str_modify_keep_cr(str);
4340  }
4341  return str;
4342 }
4343 
4344 
4345 /*
4346  * call-seq:
4347  * str.include? other_str -> true or false
4348  *
4349  * Returns <code>true</code> if <i>str</i> contains the given string or
4350  * character.
4351  *
4352  * "hello".include? "lo" #=> true
4353  * "hello".include? "ol" #=> false
4354  * "hello".include? ?h #=> true
4355  */
4356 
4357 static VALUE
4359 {
4360  long i;
4361 
4362  StringValue(arg);
4363  i = rb_str_index(str, arg, 0);
4364 
4365  if (i == -1) return Qfalse;
4366  return Qtrue;
4367 }
4368 
4369 
4370 /*
4371  * call-seq:
4372  * str.to_i(base=10) -> integer
4373  *
4374  * Returns the result of interpreting leading characters in <i>str</i> as an
4375  * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
4376  * end of a valid number are ignored. If there is not a valid number at the
4377  * start of <i>str</i>, <code>0</code> is returned. This method never raises an
4378  * exception when <i>base</i> is valid.
4379  *
4380  * "12345".to_i #=> 12345
4381  * "99 red balloons".to_i #=> 99
4382  * "0a".to_i #=> 0
4383  * "0a".to_i(16) #=> 10
4384  * "hello".to_i #=> 0
4385  * "1100101".to_i(2) #=> 101
4386  * "1100101".to_i(8) #=> 294977
4387  * "1100101".to_i(10) #=> 1100101
4388  * "1100101".to_i(16) #=> 17826049
4389  */
4390 
4391 static VALUE
4393 {
4394  int base;
4395 
4396  if (argc == 0) base = 10;
4397  else {
4398  VALUE b;
4399 
4400  rb_scan_args(argc, argv, "01", &b);
4401  base = NUM2INT(b);
4402  }
4403  if (base < 0) {
4404  rb_raise(rb_eArgError, "invalid radix %d", base);
4405  }
4406  return rb_str_to_inum(str, base, FALSE);
4407 }
4408 
4409 
4410 /*
4411  * call-seq:
4412  * str.to_f -> float
4413  *
4414  * Returns the result of interpreting leading characters in <i>str</i> as a
4415  * floating point number. Extraneous characters past the end of a valid number
4416  * are ignored. If there is not a valid number at the start of <i>str</i>,
4417  * <code>0.0</code> is returned. This method never raises an exception.
4418  *
4419  * "123.45e1".to_f #=> 1234.5
4420  * "45.67 degrees".to_f #=> 45.67
4421  * "thx1138".to_f #=> 0.0
4422  */
4423 
4424 static VALUE
4426 {
4427  return DBL2NUM(rb_str_to_dbl(str, FALSE));
4428 }
4429 
4430 
4431 /*
4432  * call-seq:
4433  * str.to_s -> str
4434  * str.to_str -> str
4435  *
4436  * Returns the receiver.
4437  */
4438 
4439 static VALUE
4441 {
4442  if (rb_obj_class(str) != rb_cString) {
4443  return str_duplicate(rb_cString, str);
4444  }
4445  return str;
4446 }
4447 
4448 #if 0
4449 static void
4450 str_cat_char(VALUE str, unsigned int c, rb_encoding *enc)
4451 {
4452  char s[RUBY_MAX_CHAR_LEN];
4453  int n = rb_enc_codelen(c, enc);
4454 
4455  rb_enc_mbcput(c, s, enc);
4456  rb_enc_str_buf_cat(str, s, n, enc);
4457 }
4458 #endif
4459 
4460 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
4461 
4462 int
4463 rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
4464 {
4465  char buf[CHAR_ESC_LEN + 1];
4466  int l;
4467 
4468 #if SIZEOF_INT > 4
4469  c &= 0xffffffff;
4470 #endif
4471  if (unicode_p) {
4472  if (c < 0x7F && ISPRINT(c)) {
4473  snprintf(buf, CHAR_ESC_LEN, "%c", c);
4474  }
4475  else if (c < 0x10000) {
4476  snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
4477  }
4478  else {
4479  snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
4480  }
4481  }
4482  else {
4483  if (c < 0x100) {
4484  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
4485  }
4486  else {
4487  snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
4488  }
4489  }
4490  l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
4491  rb_str_buf_cat(result, buf, l);
4492  return l;
4493 }
4494 
4495 /*
4496  * call-seq:
4497  * str.inspect -> string
4498  *
4499  * Returns a printable version of _str_, surrounded by quote marks,
4500  * with special characters escaped.
4501  *
4502  * str = "hello"
4503  * str[3] = "\b"
4504  * str.inspect #=> "\"hel\\bo\""
4505  */
4506 
4507 VALUE
4509 {
4510  rb_encoding *enc = STR_ENC_GET(str);
4511  const char *p, *pend, *prev;
4512  char buf[CHAR_ESC_LEN + 1];
4515  int unicode_p = rb_enc_unicode_p(enc);
4516  int asciicompat = rb_enc_asciicompat(enc);
4517  static rb_encoding *utf16, *utf32;
4518 
4519  if (!utf16) utf16 = rb_enc_find("UTF-16");
4520  if (!utf32) utf32 = rb_enc_find("UTF-32");
4521  if (resenc == NULL) resenc = rb_default_external_encoding();
4522  if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
4523  rb_enc_associate(result, resenc);
4524  str_buf_cat2(result, "\"");
4525 
4526  p = RSTRING_PTR(str); pend = RSTRING_END(str);
4527  prev = p;
4528  if (enc == utf16) {
4529  const unsigned char *q = (const unsigned char *)p;
4530  if (q[0] == 0xFE && q[1] == 0xFF)
4531  enc = rb_enc_find("UTF-16BE");
4532  else if (q[0] == 0xFF && q[1] == 0xFE)
4533  enc = rb_enc_find("UTF-16LE");
4534  else
4535  unicode_p = 0;
4536  }
4537  else if (enc == utf32) {
4538  const unsigned char *q = (const unsigned char *)p;
4539  if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF)
4540  enc = rb_enc_find("UTF-32BE");
4541  else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF)
4542  enc = rb_enc_find("UTF-32LE");
4543  else
4544  unicode_p = 0;
4545  }
4546  while (p < pend) {
4547  unsigned int c, cc;
4548  int n;
4549 
4550  n = rb_enc_precise_mbclen(p, pend, enc);
4551  if (!MBCLEN_CHARFOUND_P(n)) {
4552  if (p > prev) str_buf_cat(result, prev, p - prev);
4553  n = rb_enc_mbminlen(enc);
4554  if (pend < p + n)
4555  n = (int)(pend - p);
4556  while (n--) {
4557  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
4558  str_buf_cat(result, buf, strlen(buf));
4559  prev = ++p;
4560  }
4561  continue;
4562  }
4563  n = MBCLEN_CHARFOUND_LEN(n);
4564  c = rb_enc_mbc_to_codepoint(p, pend, enc);
4565  p += n;
4566  if ((asciicompat || unicode_p) &&
4567  (c == '"'|| c == '\\' ||
4568  (c == '#' &&
4569  p < pend &&
4571  (cc = rb_enc_codepoint(p,pend,enc),
4572  (cc == '$' || cc == '@' || cc == '{'))))) {
4573  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4574  str_buf_cat2(result, "\\");
4575  if (asciicompat || enc == resenc) {
4576  prev = p - n;
4577  continue;
4578  }
4579  }
4580  switch (c) {
4581  case '\n': cc = 'n'; break;
4582  case '\r': cc = 'r'; break;
4583  case '\t': cc = 't'; break;
4584  case '\f': cc = 'f'; break;
4585  case '\013': cc = 'v'; break;
4586  case '\010': cc = 'b'; break;
4587  case '\007': cc = 'a'; break;
4588  case 033: cc = 'e'; break;
4589  default: cc = 0; break;
4590  }
4591  if (cc) {
4592  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4593  buf[0] = '\\';
4594  buf[1] = (char)cc;
4595  str_buf_cat(result, buf, 2);
4596  prev = p;
4597  continue;
4598  }
4599  if ((enc == resenc && rb_enc_isprint(c, enc)) ||
4600  (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
4601  continue;
4602  }
4603  else {
4604  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4605  rb_str_buf_cat_escaped_char(result, c, unicode_p);
4606  prev = p;
4607  continue;
4608  }
4609  }
4610  if (p > prev) str_buf_cat(result, prev, p - prev);
4611  str_buf_cat2(result, "\"");
4612 
4613  OBJ_INFECT(result, str);
4614  return result;
4615 }
4616 
4617 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
4618 
4619 /*
4620  * call-seq:
4621  * str.dump -> new_str
4622  *
4623  * Produces a version of +str+ with all non-printing characters replaced by
4624  * <code>\nnn</code> notation and all special characters escaped.
4625  *
4626  * "hello \n ''".dump #=> "\"hello \\n ''\"
4627  */
4628 
4629 VALUE
4631 {
4632  rb_encoding *enc = rb_enc_get(str);
4633  long len;
4634  const char *p, *pend;
4635  char *q, *qend;
4636  VALUE result;
4637  int u8 = (enc == rb_utf8_encoding());
4638 
4639  len = 2; /* "" */
4640  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4641  while (p < pend) {
4642  unsigned char c = *p++;
4643  switch (c) {
4644  case '"': case '\\':
4645  case '\n': case '\r':
4646  case '\t': case '\f':
4647  case '\013': case '\010': case '\007': case '\033':
4648  len += 2;
4649  break;
4650 
4651  case '#':
4652  len += IS_EVSTR(p, pend) ? 2 : 1;
4653  break;
4654 
4655  default:
4656  if (ISPRINT(c)) {
4657  len++;
4658  }
4659  else {
4660  if (u8) { /* \u{NN} */
4661  int n = rb_enc_precise_mbclen(p-1, pend, enc);
4662  if (MBCLEN_CHARFOUND_P(n-1)) {
4663  unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4664  while (cc >>= 4) len++;
4665  len += 5;
4666  p += MBCLEN_CHARFOUND_LEN(n)-1;
4667  break;
4668  }
4669  }
4670  len += 4; /* \xNN */
4671  }
4672  break;
4673  }
4674  }
4675  if (!rb_enc_asciicompat(enc)) {
4676  len += 19; /* ".force_encoding('')" */
4677  len += strlen(enc->name);
4678  }
4679 
4680  result = rb_str_new5(str, 0, len);
4681  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4682  q = RSTRING_PTR(result); qend = q + len + 1;
4683 
4684  *q++ = '"';
4685  while (p < pend) {
4686  unsigned char c = *p++;
4687 
4688  if (c == '"' || c == '\\') {
4689  *q++ = '\\';
4690  *q++ = c;
4691  }
4692  else if (c == '#') {
4693  if (IS_EVSTR(p, pend)) *q++ = '\\';
4694  *q++ = '#';
4695  }
4696  else if (c == '\n') {
4697  *q++ = '\\';
4698  *q++ = 'n';
4699  }
4700  else if (c == '\r') {
4701  *q++ = '\\';
4702  *q++ = 'r';
4703  }
4704  else if (c == '\t') {
4705  *q++ = '\\';
4706  *q++ = 't';
4707  }
4708  else if (c == '\f') {
4709  *q++ = '\\';
4710  *q++ = 'f';
4711  }
4712  else if (c == '\013') {
4713  *q++ = '\\';
4714  *q++ = 'v';
4715  }
4716  else if (c == '\010') {
4717  *q++ = '\\';
4718  *q++ = 'b';
4719  }
4720  else if (c == '\007') {
4721  *q++ = '\\';
4722  *q++ = 'a';
4723  }
4724  else if (c == '\033') {
4725  *q++ = '\\';
4726  *q++ = 'e';
4727  }
4728  else if (ISPRINT(c)) {
4729  *q++ = c;
4730  }
4731  else {
4732  *q++ = '\\';
4733  if (u8) {
4734  int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
4735  if (MBCLEN_CHARFOUND_P(n)) {
4736  int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4737  p += n;
4738  snprintf(q, qend-q, "u{%x}", cc);
4739  q += strlen(q);
4740  continue;
4741  }
4742  }
4743  snprintf(q, qend-q, "x%02X", c);
4744  q += 3;
4745  }
4746  }
4747  *q++ = '"';
4748  *q = '\0';
4749  if (!rb_enc_asciicompat(enc)) {
4750  snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
4751  enc = rb_ascii8bit_encoding();
4752  }
4753  OBJ_INFECT(result, str);
4754  /* result from dump is ASCII */
4755  rb_enc_associate(result, enc);
4757  return result;
4758 }
4759 
4760 
4761 static void
4763 {
4764  if (rb_enc_dummy_p(enc)) {
4765  rb_raise(rb_eEncCompatError, "incompatible encoding with this operation: %s",
4766  rb_enc_name(enc));
4767  }
4768 }
4769 
4770 /*
4771  * call-seq:
4772  * str.upcase! -> str or nil
4773  *
4774  * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
4775  * were made.
4776  * Note: case replacement is effective only in ASCII region.
4777  */
4778 
4779 static VALUE
4781 {
4782  rb_encoding *enc;
4783  char *s, *send;
4784  int modify = 0;
4785  int n;
4786 
4787  str_modify_keep_cr(str);
4788  enc = STR_ENC_GET(str);
4790  s = RSTRING_PTR(str); send = RSTRING_END(str);
4791  if (single_byte_optimizable(str)) {
4792  while (s < send) {
4793  unsigned int c = *(unsigned char*)s;
4794 
4795  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4796  *s = 'A' + (c - 'a');
4797  modify = 1;
4798  }
4799  s++;
4800  }
4801  }
4802  else {
4803  int ascompat = rb_enc_asciicompat(enc);
4804 
4805  while (s < send) {
4806  unsigned int c;
4807 
4808  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4809  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4810  *s = 'A' + (c - 'a');
4811  modify = 1;
4812  }
4813  s++;
4814  }
4815  else {
4816  c = rb_enc_codepoint_len(s, send, &n, enc);
4817  if (rb_enc_islower(c, enc)) {
4818  /* assuming toupper returns codepoint with same size */
4819  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4820  modify = 1;
4821  }
4822  s += n;
4823  }
4824  }
4825  }
4826 
4827  if (modify) return str;
4828  return Qnil;
4829 }
4830 
4831 
4832 /*
4833  * call-seq:
4834  * str.upcase -> new_str
4835  *
4836  * Returns a copy of <i>str</i> with all lowercase letters replaced with their
4837  * uppercase counterparts. The operation is locale insensitive---only
4838  * characters ``a'' to ``z'' are affected.
4839  * Note: case replacement is effective only in ASCII region.
4840  *
4841  * "hEllO".upcase #=> "HELLO"
4842  */
4843 
4844 static VALUE
4846 {
4847  str = rb_str_dup(str);
4848  rb_str_upcase_bang(str);
4849  return str;
4850 }
4851 
4852 
4853 /*
4854  * call-seq:
4855  * str.downcase! -> str or nil
4856  *
4857  * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
4858  * changes were made.
4859  * Note: case replacement is effective only in ASCII region.
4860  */
4861 
4862 static VALUE
4864 {
4865  rb_encoding *enc;
4866  char *s, *send;
4867  int modify = 0;
4868 
4869  str_modify_keep_cr(str);
4870  enc = STR_ENC_GET(str);
4872  s = RSTRING_PTR(str); send = RSTRING_END(str);
4873  if (single_byte_optimizable(str)) {
4874  while (s < send) {
4875  unsigned int c = *(unsigned char*)s;
4876 
4877  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4878  *s = 'a' + (c - 'A');
4879  modify = 1;
4880  }
4881  s++;
4882  }
4883  }
4884  else {
4885  int ascompat = rb_enc_asciicompat(enc);
4886 
4887  while (s < send) {
4888  unsigned int c;
4889  int n;
4890 
4891  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4892  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4893  *s = 'a' + (c - 'A');
4894  modify = 1;
4895  }
4896  s++;
4897  }
4898  else {
4899  c = rb_enc_codepoint_len(s, send, &n, enc);
4900  if (rb_enc_isupper(c, enc)) {
4901  /* assuming toupper returns codepoint with same size */
4902  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4903  modify = 1;
4904  }
4905  s += n;
4906  }
4907  }
4908  }
4909 
4910  if (modify) return str;
4911  return Qnil;
4912 }
4913 
4914 
4915 /*
4916  * call-seq:
4917  * str.downcase -> new_str
4918  *
4919  * Returns a copy of <i>str</i> with all uppercase letters replaced with their
4920  * lowercase counterparts. The operation is locale insensitive---only
4921  * characters ``A'' to ``Z'' are affected.
4922  * Note: case replacement is effective only in ASCII region.
4923  *
4924  * "hEllO".downcase #=> "hello"
4925  */
4926 
4927 static VALUE
4929 {
4930  str = rb_str_dup(str);
4931  rb_str_downcase_bang(str);
4932  return str;
4933 }
4934 
4935 
4936 /*
4937  * call-seq:
4938  * str.capitalize! -> str or nil
4939  *
4940  * Modifies <i>str</i> by converting the first character to uppercase and the
4941  * remainder to lowercase. Returns <code>nil</code> if no changes are made.
4942  * Note: case conversion is effective only in ASCII region.
4943  *
4944  * a = "hello"
4945  * a.capitalize! #=> "Hello"
4946  * a #=> "Hello"
4947  * a.capitalize! #=> nil
4948  */
4949 
4950 static VALUE
4952 {
4953  rb_encoding *enc;
4954  char *s, *send;
4955  int modify = 0;
4956  unsigned int c;
4957  int n;
4958 
4959  str_modify_keep_cr(str);
4960  enc = STR_ENC_GET(str);
4962  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
4963  s = RSTRING_PTR(str); send = RSTRING_END(str);
4964 
4965  c = rb_enc_codepoint_len(s, send, &n, enc);
4966  if (rb_enc_islower(c, enc)) {
4967  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4968  modify = 1;
4969  }
4970  s += n;
4971  while (s < send) {
4972  c = rb_enc_codepoint_len(s, send, &n, enc);
4973  if (rb_enc_isupper(c, enc)) {
4974  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4975  modify = 1;
4976  }
4977  s += n;
4978  }
4979 
4980  if (modify) return str;
4981  return Qnil;
4982 }
4983 
4984 
4985 /*
4986  * call-seq:
4987  * str.capitalize -> new_str
4988  *
4989  * Returns a copy of <i>str</i> with the first character converted to uppercase
4990  * and the remainder to lowercase.
4991  * Note: case conversion is effective only in ASCII region.
4992  *
4993  * "hello".capitalize #=> "Hello"
4994  * "HELLO".capitalize #=> "Hello"
4995  * "123ABC".capitalize #=> "123abc"
4996  */
4997 
4998 static VALUE
5000 {
5001  str = rb_str_dup(str);
5003  return str;
5004 }
5005 
5006 
5007 /*
5008  * call-seq:
5009  * str.swapcase! -> str or nil
5010  *
5011  * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
5012  * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
5013  * Note: case conversion is effective only in ASCII region.
5014  */
5015 
5016 static VALUE
5018 {
5019  rb_encoding *enc;
5020  char *s, *send;
5021  int modify = 0;
5022  int n;
5023 
5024  str_modify_keep_cr(str);
5025  enc = STR_ENC_GET(str);
5027  s = RSTRING_PTR(str); send = RSTRING_END(str);
5028  while (s < send) {
5029  unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
5030 
5031  if (rb_enc_isupper(c, enc)) {
5032  /* assuming toupper returns codepoint with same size */
5033  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5034  modify = 1;
5035  }
5036  else if (rb_enc_islower(c, enc)) {
5037  /* assuming tolower returns codepoint with same size */
5038  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5039  modify = 1;
5040  }
5041  s += n;
5042  }
5043 
5044  if (modify) return str;
5045  return Qnil;
5046 }
5047 
5048 
5049 /*
5050  * call-seq:
5051  * str.swapcase -> new_str
5052  *
5053  * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
5054  * to lowercase and lowercase characters converted to uppercase.
5055  * Note: case conversion is effective only in ASCII region.
5056  *
5057  * "Hello".swapcase #=> "hELLO"
5058  * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
5059  */
5060 
5061 static VALUE
5063 {
5064  str = rb_str_dup(str);
5065  rb_str_swapcase_bang(str);
5066  return str;
5067 }
5068 
5069 typedef unsigned char *USTR;
5070 
5071 struct tr {
5072  int gen;
5073  unsigned int now, max;
5074  char *p, *pend;
5075 };
5076 
5077 static unsigned int
5078 trnext(struct tr *t, rb_encoding *enc)
5079 {
5080  int n;
5081 
5082  for (;;) {
5083  if (!t->gen) {
5084 nextpart:
5085  if (t->p == t->pend) return -1;
5086  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '\\' && t->p + n < t->pend) {
5087  t->p += n;
5088  }
5089  t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5090  t->p += n;
5091  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '-' && t->p + n < t->pend) {
5092  t->p += n;
5093  if (t->p < t->pend) {
5094  unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5095  t->p += n;
5096  if (t->now > c) {
5097  if (t->now < 0x80 && c < 0x80) {
5099  "invalid range \"%c-%c\" in string transliteration",
5100  t->now, c);
5101  }
5102  else {
5103  rb_raise(rb_eArgError, "invalid range in string transliteration");
5104  }
5105  continue; /* not reached */
5106  }
5107  t->gen = 1;
5108  t->max = c;
5109  }
5110  }
5111  return t->now;
5112  }
5113  else {
5114  while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
5115  if (t->now == t->max) {
5116  t->gen = 0;
5117  goto nextpart;
5118  }
5119  }
5120  if (t->now < t->max) {
5121  return t->now;
5122  }
5123  else {
5124  t->gen = 0;
5125  return t->max;
5126  }
5127  }
5128  }
5129 }
5130 
5131 static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
5132 
5133 static VALUE
5134 tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
5135 {
5136  const unsigned int errc = -1;
5137  unsigned int trans[256];
5138  rb_encoding *enc, *e1, *e2;
5139  struct tr trsrc, trrepl;
5140  int cflag = 0;
5141  unsigned int c, c0, last = 0;
5142  int modify = 0, i, l;
5143  char *s, *send;
5144  VALUE hash = 0;
5145  int singlebyte = single_byte_optimizable(str);
5146  int cr;
5147 
5148 #define CHECK_IF_ASCII(c) \
5149  (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
5150  (cr = ENC_CODERANGE_VALID) : 0)
5151 
5152  StringValue(src);
5153  StringValue(repl);
5154  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5155  if (RSTRING_LEN(repl) == 0) {
5156  return rb_str_delete_bang(1, &src, str);
5157  }
5158 
5159  cr = ENC_CODERANGE(str);
5160  e1 = rb_enc_check(str, src);
5161  e2 = rb_enc_check(str, repl);
5162  if (e1 == e2) {
5163  enc = e1;
5164  }
5165  else {
5166  enc = rb_enc_check(src, repl);
5167  }
5168  trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
5169  if (RSTRING_LEN(src) > 1 &&
5170  rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^' &&
5171  trsrc.p + l < trsrc.pend) {
5172  cflag = 1;
5173  trsrc.p += l;
5174  }
5175  trrepl.p = RSTRING_PTR(repl);
5176  trrepl.pend = trrepl.p + RSTRING_LEN(repl);
5177  trsrc.gen = trrepl.gen = 0;
5178  trsrc.now = trrepl.now = 0;
5179  trsrc.max = trrepl.max = 0;
5180 
5181  if (cflag) {
5182  for (i=0; i<256; i++) {
5183  trans[i] = 1;
5184  }
5185  while ((c = trnext(&trsrc, enc)) != errc) {
5186  if (c < 256) {
5187  trans[c] = errc;
5188  }
5189  else {
5190  if (!hash) hash = rb_hash_new();
5191  rb_hash_aset(hash, UINT2NUM(c), Qtrue);
5192  }
5193  }
5194  while ((c = trnext(&trrepl, enc)) != errc)
5195  /* retrieve last replacer */;
5196  last = trrepl.now;
5197  for (i=0; i<256; i++) {
5198  if (trans[i] != errc) {
5199  trans[i] = last;
5200  }
5201  }
5202  }
5203  else {
5204  unsigned int r;
5205 
5206  for (i=0; i<256; i++) {
5207  trans[i] = errc;
5208  }
5209  while ((c = trnext(&trsrc, enc)) != errc) {
5210  r = trnext(&trrepl, enc);
5211  if (r == errc) r = trrepl.now;
5212  if (c < 256) {
5213  trans[c] = r;
5214  if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
5215  }
5216  else {
5217  if (!hash) hash = rb_hash_new();
5218  rb_hash_aset(hash, UINT2NUM(c), UINT2NUM(r));
5219  }
5220  }
5221  }
5222 
5223  if (cr == ENC_CODERANGE_VALID)
5224  cr = ENC_CODERANGE_7BIT;
5225  str_modify_keep_cr(str);
5226  s = RSTRING_PTR(str); send = RSTRING_END(str);
5227  if (sflag) {
5228  int clen, tlen;
5229  long offset, max = RSTRING_LEN(str);
5230  unsigned int save = -1;
5231  char *buf = ALLOC_N(char, max), *t = buf;
5232 
5233  while (s < send) {
5234  int may_modify = 0;
5235 
5236  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5237  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5238 
5239  s += clen;
5240  if (c < 256) {
5241  c = trans[c];
5242  }
5243  else if (hash) {
5244  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5245  if (NIL_P(tmp)) {
5246  if (cflag) c = last;
5247  else c = errc;
5248  }
5249  else if (cflag) c = errc;
5250  else c = NUM2INT(tmp);
5251  }
5252  else {
5253  c = errc;
5254  }
5255  if (c != (unsigned int)-1) {
5256  if (save == c) {
5257  CHECK_IF_ASCII(c);
5258  continue;
5259  }
5260  save = c;
5261  tlen = rb_enc_codelen(c, enc);
5262  modify = 1;
5263  }
5264  else {
5265  save = -1;
5266  c = c0;
5267  if (enc != e1) may_modify = 1;
5268  }
5269  while (t - buf + tlen >= max) {
5270  offset = t - buf;
5271  max *= 2;
5272  REALLOC_N(buf, char, max);
5273  t = buf + offset;
5274  }
5275  rb_enc_mbcput(c, t, enc);
5276  if (may_modify && memcmp(s, t, tlen) != 0) {
5277  modify = 1;
5278  }
5279  CHECK_IF_ASCII(c);
5280  t += tlen;
5281  }
5282  if (!STR_EMBED_P(str)) {
5283  xfree(RSTRING(str)->as.heap.ptr);
5284  }
5285  *t = '\0';
5286  RSTRING(str)->as.heap.ptr = buf;
5287  RSTRING(str)->as.heap.len = t - buf;
5288  STR_SET_NOEMBED(str);
5289  RSTRING(str)->as.heap.aux.capa = max;
5290  }
5291  else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
5292  while (s < send) {
5293  c = (unsigned char)*s;
5294  if (trans[c] != errc) {
5295  if (!cflag) {
5296  c = trans[c];
5297  *s = c;
5298  modify = 1;
5299  }
5300  else {
5301  *s = last;
5302  modify = 1;
5303  }
5304  }
5305  CHECK_IF_ASCII(c);
5306  s++;
5307  }
5308  }
5309  else {
5310  int clen, tlen, max = (int)(RSTRING_LEN(str) * 1.2);
5311  long offset;
5312  char *buf = ALLOC_N(char, max), *t = buf;
5313 
5314  while (s < send) {
5315  int may_modify = 0;
5316  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5317  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5318 
5319  if (c < 256) {
5320  c = trans[c];
5321  }
5322  else if (hash) {
5323  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5324  if (NIL_P(tmp)) {
5325  if (cflag) c = last;
5326  else c = errc;
5327  }
5328  else if (cflag) c = errc;
5329  else c = NUM2INT(tmp);
5330  }
5331  else {
5332  c = cflag ? last : errc;
5333  }
5334  if (c != errc) {
5335  tlen = rb_enc_codelen(c, enc);
5336  modify = 1;
5337  }
5338  else {
5339  c = c0;
5340  if (enc != e1) may_modify = 1;
5341  }
5342  while (t - buf + tlen >= max) {
5343  offset = t - buf;
5344  max *= 2;
5345  REALLOC_N(buf, char, max);
5346  t = buf + offset;
5347  }
5348  if (s != t) {
5349  rb_enc_mbcput(c, t, enc);
5350  if (may_modify && memcmp(s, t, tlen) != 0) {
5351  modify = 1;
5352  }
5353  }
5354  CHECK_IF_ASCII(c);
5355  s += clen;
5356  t += tlen;
5357  }
5358  if (!STR_EMBED_P(str)) {
5359  xfree(RSTRING(str)->as.heap.ptr);
5360  }
5361  *t = '\0';
5362  RSTRING(str)->as.heap.ptr = buf;
5363  RSTRING(str)->as.heap.len = t - buf;
5364  STR_SET_NOEMBED(str);
5365  RSTRING(str)->as.heap.aux.capa = max;
5366  }
5367 
5368  if (modify) {
5369  if (cr != ENC_CODERANGE_BROKEN)
5370  ENC_CODERANGE_SET(str, cr);
5371  rb_enc_associate(str, enc);
5372  return str;
5373  }
5374  return Qnil;
5375 }
5376 
5377 
5378 /*
5379  * call-seq:
5380  * str.tr!(from_str, to_str) -> str or nil
5381  *
5382  * Translates <i>str</i> in place, using the same rules as
5383  * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
5384  * changes were made.
5385  */
5386 
5387 static VALUE
5389 {
5390  return tr_trans(str, src, repl, 0);
5391 }
5392 
5393 
5394 /*
5395  * call-seq:
5396  * str.tr(from_str, to_str) => new_str
5397  *
5398  * Returns a copy of +str+ with the characters in +from_str+ replaced by the
5399  * corresponding characters in +to_str+. If +to_str+ is shorter than
5400  * +from_str+, it is padded with its last character in order to maintain the
5401  * correspondence.
5402  *
5403  * "hello".tr('el', 'ip') #=> "hippo"
5404  * "hello".tr('aeiou', '*') #=> "h*ll*"
5405  * "hello".tr('aeiou', 'AA*') #=> "hAll*"
5406  *
5407  * Both strings may use the <code>c1-c2</code> notation to denote ranges of
5408  * characters, and +from_str+ may start with a <code>^</code>, which denotes
5409  * all characters except those listed.
5410  *
5411  * "hello".tr('a-y', 'b-z') #=> "ifmmp"
5412  * "hello".tr('^aeiou', '*') #=> "*e**o"
5413  *
5414  * The backslash character <code></code> can be used to escape
5415  * <code>^</code> or <code>-</code> and is otherwise ignored unless it
5416  * appears at the end of a range or the end of the +from_str+ or +to_str+:
5417  *
5418  * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
5419  * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
5420  *
5421  * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
5422  * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
5423  * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
5424  *
5425  * "X['\\b']".tr("X\\", "") #=> "['b']"
5426  * "X['\\b']".tr("X-\\]", "") #=> "'b'"
5427  */
5428 
5429 static VALUE
5430 rb_str_tr(VALUE str, VALUE src, VALUE repl)
5431 {
5432  str = rb_str_dup(str);
5433  tr_trans(str, src, repl, 0);
5434  return str;
5435 }
5436 
5437 #define TR_TABLE_SIZE 257
5438 static void
5439 tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first,
5440  VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
5441 {
5442  const unsigned int errc = -1;
5443  char buf[256];
5444  struct tr tr;
5445  unsigned int c;
5446  VALUE table = 0, ptable = 0;
5447  int i, l, cflag = 0;
5448 
5449  tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
5450  tr.gen = tr.now = tr.max = 0;
5451 
5452  if (RSTRING_LEN(str) > 1 && rb_enc_ascget(tr.p, tr.pend, &l, enc) == '^') {
5453  cflag = 1;
5454  tr.p += l;
5455  }
5456  if (first) {
5457  for (i=0; i<256; i++) {
5458  stable[i] = 1;
5459  }
5460  stable[256] = cflag;
5461  }
5462  else if (stable[256] && !cflag) {
5463  stable[256] = 0;
5464  }
5465  for (i=0; i<256; i++) {
5466  buf[i] = cflag;
5467  }
5468 
5469  while ((c = trnext(&tr, enc)) != errc) {
5470  if (c < 256) {
5471  buf[c & 0xff] = !cflag;
5472  }
5473  else {
5474  VALUE key = UINT2NUM(c);
5475 
5476  if (!table && (first || *tablep || stable[256])) {
5477  if (cflag) {
5478  ptable = *ctablep;
5479  table = ptable ? ptable : rb_hash_new();
5480  *ctablep = table;
5481  }
5482  else {
5483  table = rb_hash_new();
5484  ptable = *tablep;
5485  *tablep = table;
5486  }
5487  }
5488  if (table && (!ptable || (cflag ^ !NIL_P(rb_hash_aref(ptable, key))))) {
5489  rb_hash_aset(table, key, Qtrue);
5490  }
5491  }
5492  }
5493  for (i=0; i<256; i++) {
5494  stable[i] = stable[i] && buf[i];
5495  }
5496  if (!table && !cflag) {
5497  *tablep = 0;
5498  }
5499 }
5500 
5501 
5502 static int
5503 tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
5504 {
5505  if (c < 256) {
5506  return table[c] != 0;
5507  }
5508  else {
5509  VALUE v = UINT2NUM(c);
5510 
5511  if (del) {
5512  if (!NIL_P(rb_hash_lookup(del, v)) &&
5513  (!nodel || NIL_P(rb_hash_lookup(nodel, v)))) {
5514  return TRUE;
5515  }
5516  }
5517  else if (nodel && !NIL_P(rb_hash_lookup(nodel, v))) {
5518  return FALSE;
5519  }
5520  return table[256] ? TRUE : FALSE;
5521  }
5522 }
5523 
5524 /*
5525  * call-seq:
5526  * str.delete!([other_str]+) -> str or nil
5527  *
5528  * Performs a <code>delete</code> operation in place, returning <i>str</i>, or
5529  * <code>nil</code> if <i>str</i> was not modified.
5530  */
5531 
5532 static VALUE
5534 {
5535  char squeez[TR_TABLE_SIZE];
5536  rb_encoding *enc = 0;
5537  char *s, *send, *t;
5538  VALUE del = 0, nodel = 0;
5539  int modify = 0;
5540  int i, ascompat, cr;
5541 
5542  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5544  for (i=0; i<argc; i++) {
5545  VALUE s = argv[i];
5546 
5547  StringValue(s);
5548  enc = rb_enc_check(str, s);
5549  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5550  }
5551 
5552  str_modify_keep_cr(str);
5553  ascompat = rb_enc_asciicompat(enc);
5554  s = t = RSTRING_PTR(str);
5555  send = RSTRING_END(str);
5556  cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
5557  while (s < send) {
5558  unsigned int c;
5559  int clen;
5560 
5561  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5562  if (squeez[c]) {
5563  modify = 1;
5564  }
5565  else {
5566  if (t != s) *t = c;
5567  t++;
5568  }
5569  s++;
5570  }
5571  else {
5572  c = rb_enc_codepoint_len(s, send, &clen, enc);
5573 
5574  if (tr_find(c, squeez, del, nodel)) {
5575  modify = 1;
5576  }
5577  else {
5578  if (t != s) rb_enc_mbcput(c, t, enc);
5579  t += clen;
5580  if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
5581  }
5582  s += clen;
5583  }
5584  }
5585  *t = '\0';
5586  STR_SET_LEN(str, t - RSTRING_PTR(str));
5587  ENC_CODERANGE_SET(str, cr);
5588 
5589  if (modify) return str;
5590  return Qnil;
5591 }
5592 
5593 
5594 /*
5595  * call-seq:
5596  * str.delete([other_str]+) -> new_str
5597  *
5598  * Returns a copy of <i>str</i> with all characters in the intersection of its
5599  * arguments deleted. Uses the same rules for building the set of characters as
5600  * <code>String#count</code>.
5601  *
5602  * "hello".delete "l","lo" #=> "heo"
5603  * "hello".delete "lo" #=> "he"
5604  * "hello".delete "aeiou", "^e" #=> "hell"
5605  * "hello".delete "ej-m" #=> "ho"
5606  */
5607 
5608 static VALUE
5610 {
5611  str = rb_str_dup(str);
5612  rb_str_delete_bang(argc, argv, str);
5613  return str;
5614 }
5615 
5616 
5617 /*
5618  * call-seq:
5619  * str.squeeze!([other_str]*) -> str or nil
5620  *
5621  * Squeezes <i>str</i> in place, returning either <i>str</i>, or
5622  * <code>nil</code> if no changes were made.
5623  */
5624 
5625 static VALUE
5627 {
5628  char squeez[TR_TABLE_SIZE];
5629  rb_encoding *enc = 0;
5630  VALUE del = 0, nodel = 0;
5631  char *s, *send, *t;
5632  int i, modify = 0;
5633  int ascompat, singlebyte = single_byte_optimizable(str);
5634  unsigned int save;
5635 
5636  if (argc == 0) {
5637  enc = STR_ENC_GET(str);
5638  }
5639  else {
5640  for (i=0; i<argc; i++) {
5641  VALUE s = argv[i];
5642 
5643  StringValue(s);
5644  enc = rb_enc_check(str, s);
5645  if (singlebyte && !single_byte_optimizable(s))
5646  singlebyte = 0;
5647  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5648  }
5649  }
5650 
5651  str_modify_keep_cr(str);
5652  s = t = RSTRING_PTR(str);
5653  if (!s || RSTRING_LEN(str) == 0) return Qnil;
5654  send = RSTRING_END(str);
5655  save = -1;
5656  ascompat = rb_enc_asciicompat(enc);
5657 
5658  if (singlebyte) {
5659  while (s < send) {
5660  unsigned int c = *(unsigned char*)s++;
5661  if (c != save || (argc > 0 && !squeez[c])) {
5662  *t++ = save = c;
5663  }
5664  }
5665  } else {
5666  while (s < send) {
5667  unsigned int c;
5668  int clen;
5669 
5670  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5671  if (c != save || (argc > 0 && !squeez[c])) {
5672  *t++ = save = c;
5673  }
5674  s++;
5675  }
5676  else {
5677  c = rb_enc_codepoint_len(s, send, &clen, enc);
5678 
5679  if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
5680  if (t != s) rb_enc_mbcput(c, t, enc);
5681  save = c;
5682  t += clen;
5683  }
5684  s += clen;
5685  }
5686  }
5687  }
5688 
5689  *t = '\0';
5690  if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
5691  STR_SET_LEN(str, t - RSTRING_PTR(str));
5692  modify = 1;
5693  }
5694 
5695  if (modify) return str;
5696  return Qnil;
5697 }
5698 
5699 
5700 /*
5701  * call-seq:
5702  * str.squeeze([other_str]*) -> new_str
5703  *
5704  * Builds a set of characters from the <i>other_str</i> parameter(s) using the
5705  * procedure described for <code>String#count</code>. Returns a new string
5706  * where runs of the same character that occur in this set are replaced by a
5707  * single character. If no arguments are given, all runs of identical
5708  * characters are replaced by a single character.
5709  *
5710  * "yellow moon".squeeze #=> "yelow mon"
5711  * " now is the".squeeze(" ") #=> " now is the"
5712  * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
5713  */
5714 
5715 static VALUE
5717 {
5718  str = rb_str_dup(str);
5719  rb_str_squeeze_bang(argc, argv, str);
5720  return str;
5721 }
5722 
5723 
5724 /*
5725  * call-seq:
5726  * str.tr_s!(from_str, to_str) -> str or nil
5727  *
5728  * Performs <code>String#tr_s</code> processing on <i>str</i> in place,
5729  * returning <i>str</i>, or <code>nil</code> if no changes were made.
5730  */
5731 
5732 static VALUE
5734 {
5735  return tr_trans(str, src, repl, 1);
5736 }
5737 
5738 
5739 /*
5740  * call-seq:
5741  * str.tr_s(from_str, to_str) -> new_str
5742  *
5743  * Processes a copy of <i>str</i> as described under <code>String#tr</code>,
5744  * then removes duplicate characters in regions that were affected by the
5745  * translation.
5746  *
5747  * "hello".tr_s('l', 'r') #=> "hero"
5748  * "hello".tr_s('el', '*') #=> "h*o"
5749  * "hello".tr_s('el', 'hx') #=> "hhxo"
5750  */
5751 
5752 static VALUE
5753 rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
5754 {
5755  str = rb_str_dup(str);
5756  tr_trans(str, src, repl, 1);
5757  return str;
5758 }
5759 
5760 
5761 /*
5762  * call-seq:
5763  * str.count([other_str]+) -> fixnum
5764  *
5765  * Each +other_str+ parameter defines a set of characters to count. The
5766  * intersection of these sets defines the characters to count in +str+. Any
5767  * +other_str+ that starts with a caret <code>^</code> is negated. The
5768  * sequence <code>c1-c2</code> means all characters between c1 and c2. The
5769  * backslash character <code></code> can be used to escape <code>^</code> or
5770  * <code>-</code> and is otherwise ignored unless it appears at the end of a
5771  * sequence or the end of a +other_str+.
5772  *
5773  * a = "hello world"
5774  * a.count "lo" #=> 5
5775  * a.count "lo", "o" #=> 2
5776  * a.count "hello", "^l" #=> 4
5777  * a.count "ej-m" #=> 4
5778  *
5779  * "hello^world".count "\\^aeiou" #=> 4
5780  * "hello-world".count "a\\-eo" #=> 4
5781  *
5782  * c = "hello world\\r\\n"
5783  * c.count "\\" #=> 2
5784  * c.count "\\A" #=> 0
5785  * c.count "X-\\w" #=> 3
5786  */
5787 
5788 static VALUE
5790 {
5791  char table[TR_TABLE_SIZE];
5792  rb_encoding *enc = 0;
5793  VALUE del = 0, nodel = 0;
5794  char *s, *send;
5795  int i;
5796  int ascompat;
5797 
5799  for (i=0; i<argc; i++) {
5800  VALUE tstr = argv[i];
5801  unsigned char c;
5802 
5803  StringValue(tstr);
5804  enc = rb_enc_check(str, tstr);
5805  if (argc == 1 && RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
5806  (c = RSTRING_PTR(tstr)[0]) < 0x80 && !is_broken_string(str)) {
5807  int n = 0;
5808 
5809  s = RSTRING_PTR(str);
5810  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5811  send = RSTRING_END(str);
5812  while (s < send) {
5813  if (*(unsigned char*)s++ == c) n++;
5814  }
5815  return INT2NUM(n);
5816  }
5817  tr_setup_table(tstr, table, i==0, &del, &nodel, enc);
5818  }
5819 
5820  s = RSTRING_PTR(str);
5821  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5822  send = RSTRING_END(str);
5823  ascompat = rb_enc_asciicompat(enc);
5824  i = 0;
5825  while (s < send) {
5826  unsigned int c;
5827 
5828  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5829  if (table[c]) {
5830  i++;
5831  }
5832  s++;
5833  }
5834  else {
5835  int clen;
5836  c = rb_enc_codepoint_len(s, send, &clen, enc);
5837  if (tr_find(c, table, del, nodel)) {
5838  i++;
5839  }
5840  s += clen;
5841  }
5842  }
5843 
5844  return INT2NUM(i);
5845 }
5846 
5847 static const char isspacetable[256] = {
5848  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
5849  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5850  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5851  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5852  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5853  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5854  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5855  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5856  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5857  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5858  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5859  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5860  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5861  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5862  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5863  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5864 };
5865 
5866 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
5867 
5868 /*
5869  * call-seq:
5870  * str.split(pattern=$;, [limit]) -> anArray
5871  *
5872  * Divides <i>str</i> into substrings based on a delimiter, returning an array
5873  * of these substrings.
5874  *
5875  * If <i>pattern</i> is a <code>String</code>, then its contents are used as
5876  * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
5877  * space, <i>str</i> is split on whitespace, with leading whitespace and runs
5878  * of contiguous whitespace characters ignored.
5879  *
5880  * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
5881  * pattern matches. Whenever the pattern matches a zero-length string,
5882  * <i>str</i> is split into individual characters. If <i>pattern</i> contains
5883  * groups, the respective matches will be returned in the array as well.
5884  *
5885  * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
5886  * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
5887  * split on whitespace as if ` ' were specified.
5888  *
5889  * If the <i>limit</i> parameter is omitted, trailing null fields are
5890  * suppressed. If <i>limit</i> is a positive number, at most that number of
5891  * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
5892  * string is returned as the only entry in an array). If negative, there is no
5893  * limit to the number of fields returned, and trailing null fields are not
5894  * suppressed.
5895  *
5896  * When the input +str+ is empty an empty Array is returned as the string is
5897  * considered to have no fields to split.
5898  *
5899  * " now's the time".split #=> ["now's", "the", "time"]
5900  * " now's the time".split(' ') #=> ["now's", "the", "time"]
5901  * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
5902  * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
5903  * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
5904  * "hello".split(//, 3) #=> ["h", "e", "llo"]
5905  * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
5906  *
5907  * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
5908  * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
5909  * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
5910  * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
5911  *
5912  * "".split(',', -1) #=> []
5913  */
5914 
5915 static VALUE
5917 {
5918  rb_encoding *enc;
5919  VALUE spat;
5920  VALUE limit;
5921  enum {awk, string, regexp} split_type;
5922  long beg, end, i = 0;
5923  int lim = 0;
5924  VALUE result, tmp;
5925 
5926  if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
5927  lim = NUM2INT(limit);
5928  if (lim <= 0) limit = Qnil;
5929  else if (lim == 1) {
5930  if (RSTRING_LEN(str) == 0)
5931  return rb_ary_new2(0);
5932  return rb_ary_new3(1, str);
5933  }
5934  i = 1;
5935  }
5936 
5937  enc = STR_ENC_GET(str);
5938  if (NIL_P(spat)) {
5939  if (!NIL_P(rb_fs)) {
5940  spat = rb_fs;
5941  goto fs_set;
5942  }
5943  split_type = awk;
5944  }
5945  else {
5946  fs_set:
5947  if (RB_TYPE_P(spat, T_STRING)) {
5948  rb_encoding *enc2 = STR_ENC_GET(spat);
5949 
5950  split_type = string;
5951  if (RSTRING_LEN(spat) == 0) {
5952  /* Special case - split into chars */
5953  spat = rb_reg_regcomp(spat);
5954  split_type = regexp;
5955  }
5956  else if (rb_enc_asciicompat(enc2) == 1) {
5957  if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
5958  split_type = awk;
5959  }
5960  }
5961  else {
5962  int l;
5963  if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
5964  RSTRING_LEN(spat) == l) {
5965  split_type = awk;
5966  }
5967  }
5968  }
5969  else {
5970  spat = get_pat(spat, 1);
5971  split_type = regexp;
5972  }
5973  }
5974 
5975  result = rb_ary_new();
5976  beg = 0;
5977  if (split_type == awk) {
5978  char *ptr = RSTRING_PTR(str);
5979  char *eptr = RSTRING_END(str);
5980  char *bptr = ptr;
5981  int skip = 1;
5982  unsigned int c;
5983 
5984  end = beg;
5985  if (is_ascii_string(str)) {
5986  while (ptr < eptr) {
5987  c = (unsigned char)*ptr++;
5988  if (skip) {
5989  if (ascii_isspace(c)) {
5990  beg = ptr - bptr;
5991  }
5992  else {
5993  end = ptr - bptr;
5994  skip = 0;
5995  if (!NIL_P(limit) && lim <= i) break;
5996  }
5997  }
5998  else if (ascii_isspace(c)) {
5999  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6000  skip = 1;
6001  beg = ptr - bptr;
6002  if (!NIL_P(limit)) ++i;
6003  }
6004  else {
6005  end = ptr - bptr;
6006  }
6007  }
6008  }
6009  else {
6010  while (ptr < eptr) {
6011  int n;
6012 
6013  c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
6014  ptr += n;
6015  if (skip) {
6016  if (rb_isspace(c)) {
6017  beg = ptr - bptr;
6018  }
6019  else {
6020  end = ptr - bptr;
6021  skip = 0;
6022  if (!NIL_P(limit) && lim <= i) break;
6023  }
6024  }
6025  else if (rb_isspace(c)) {
6026  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6027  skip = 1;
6028  beg = ptr - bptr;
6029  if (!NIL_P(limit)) ++i;
6030  }
6031  else {
6032  end = ptr - bptr;
6033  }
6034  }
6035  }
6036  }
6037  else if (split_type == string) {
6038  char *ptr = RSTRING_PTR(str);
6039  char *temp = ptr;
6040  char *eptr = RSTRING_END(str);
6041  char *sptr = RSTRING_PTR(spat);
6042  long slen = RSTRING_LEN(spat);
6043 
6044  if (is_broken_string(str)) {
6045  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
6046  }
6047  if (is_broken_string(spat)) {
6048  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(spat)));
6049  }
6050  enc = rb_enc_check(str, spat);
6051  while (ptr < eptr &&
6052  (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
6053  /* Check we are at the start of a char */
6054  char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
6055  if (t != ptr + end) {
6056  ptr = t;
6057  continue;
6058  }
6059  rb_ary_push(result, rb_str_subseq(str, ptr - temp, end));
6060  ptr += end + slen;
6061  if (!NIL_P(limit) && lim <= ++i) break;
6062  }
6063  beg = ptr - temp;
6064  }
6065  else {
6066  char *ptr = RSTRING_PTR(str);
6067  long len = RSTRING_LEN(str);
6068  long start = beg;
6069  long idx;
6070  int last_null = 0;
6071  struct re_registers *regs;
6072 
6073  while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
6074  regs = RMATCH_REGS(rb_backref_get());
6075  if (start == end && BEG(0) == END(0)) {
6076  if (!ptr) {
6077  rb_ary_push(result, str_new_empty(str));
6078  break;
6079  }
6080  else if (last_null == 1) {
6081  rb_ary_push(result, rb_str_subseq(str, beg,
6082  rb_enc_fast_mbclen(ptr+beg,
6083  ptr+len,
6084  enc)));
6085  beg = start;
6086  }
6087  else {
6088  if (ptr+start == ptr+len)
6089  start++;
6090  else
6091  start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
6092  last_null = 1;
6093  continue;
6094  }
6095  }
6096  else {
6097  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6098  beg = start = END(0);
6099  }
6100  last_null = 0;
6101 
6102  for (idx=1; idx < regs->num_regs; idx++) {
6103  if (BEG(idx) == -1) continue;
6104  if (BEG(idx) == END(idx))
6105  tmp = str_new_empty(str);
6106  else
6107  tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
6108  rb_ary_push(result, tmp);
6109  }
6110  if (!NIL_P(limit) && lim <= ++i) break;
6111  }
6112  }
6113  if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
6114  if (RSTRING_LEN(str) == beg)
6115  tmp = str_new_empty(str);
6116  else
6117  tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
6118  rb_ary_push(result, tmp);
6119  }
6120  if (NIL_P(limit) && lim == 0) {
6121  long len;
6122  while ((len = RARRAY_LEN(result)) > 0 &&
6123  (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
6124  rb_ary_pop(result);
6125  }
6126 
6127  return result;
6128 }
6129 
6130 VALUE
6131 rb_str_split(VALUE str, const char *sep0)
6132 {
6133  VALUE sep;
6134 
6135  StringValue(str);
6136  sep = rb_str_new2(sep0);
6137  return rb_str_split_m(1, &sep, str);
6138 }
6139 
6140 
6141 static VALUE
6142 rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
6143 {
6144  rb_encoding *enc;
6145  VALUE rs;
6146  unsigned int newline;
6147  const char *p, *pend, *s, *ptr;
6148  long len, rslen;
6149  VALUE line;
6150  int n;
6151  VALUE orig = str;
6152  VALUE UNINITIALIZED_VAR(ary);
6153 
6154  if (argc == 0) {
6155  rs = rb_rs;
6156  }
6157  else {
6158  rb_scan_args(argc, argv, "01", &rs);
6159  }
6160 
6161  if (rb_block_given_p()) {
6162  if (wantarray) {
6163 #if 0 /* next major */
6164  rb_warn("given block not used");
6165  ary = rb_ary_new();
6166 #else
6167  rb_warning("passing a block to String#lines is deprecated");
6168  wantarray = 0;
6169 #endif
6170  }
6171  }
6172  else {
6173  if (wantarray)
6174  ary = rb_ary_new();
6175  else
6176  RETURN_ENUMERATOR(str, argc, argv);
6177  }
6178 
6179  if (NIL_P(rs)) {
6180  if (wantarray) {
6181  rb_ary_push(ary, str);
6182  return ary;
6183  }
6184  else {
6185  rb_yield(str);
6186  return orig;
6187  }
6188  }
6189  str = rb_str_new4(str);
6190  ptr = p = s = RSTRING_PTR(str);
6191  pend = p + RSTRING_LEN(str);
6192  len = RSTRING_LEN(str);
6193  StringValue(rs);
6194  if (rs == rb_default_rs) {
6195  enc = rb_enc_get(str);
6196  while (p < pend) {
6197  char *p0;
6198 
6199  p = memchr(p, '\n', pend - p);
6200  if (!p) break;
6201  p0 = rb_enc_left_char_head(s, p, pend, enc);
6202  if (!rb_enc_is_newline(p0, pend, enc)) {
6203  p++;
6204  continue;
6205  }
6206  p = p0 + rb_enc_mbclen(p0, pend, enc);
6207  line = rb_str_subseq(str, s - ptr, p - s);
6208  if (wantarray)
6209  rb_ary_push(ary, line);
6210  else
6211  rb_yield(line);
6212  str_mod_check(str, ptr, len);
6213  s = p;
6214  }
6215  goto finish;
6216  }
6217 
6218  enc = rb_enc_check(str, rs);
6219  rslen = RSTRING_LEN(rs);
6220  if (rslen == 0) {
6221  newline = '\n';
6222  }
6223  else {
6224  newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
6225  }
6226 
6227  while (p < pend) {
6228  unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
6229 
6230  again:
6231  if (rslen == 0 && c == newline) {
6232  p += n;
6233  if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
6234  goto again;
6235  }
6236  while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
6237  p += n;
6238  }
6239  p -= n;
6240  }
6241  if (c == newline &&
6242  (rslen <= 1 ||
6243  (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
6244  const char *pp = p + (rslen ? rslen : n);
6245  line = rb_str_subseq(str, s - ptr, pp - s);
6246  if (wantarray)
6247  rb_ary_push(ary, line);
6248  else
6249  rb_yield(line);
6250  str_mod_check(str, ptr, len);
6251  s = pp;
6252  }
6253  p += n;
6254  }
6255 
6256  finish:
6257  if (s != pend) {
6258  line = rb_str_subseq(str, s - ptr, pend - s);
6259  if (wantarray)
6260  rb_ary_push(ary, line);
6261  else
6262  rb_yield(line);
6263  RB_GC_GUARD(str);
6264  }
6265 
6266  if (wantarray)
6267  return ary;
6268  else
6269  return orig;
6270 }
6271 
6272 /*
6273  * call-seq:
6274  * str.each_line(separator=$/) {|substr| block } -> str
6275  * str.each_line(separator=$/) -> an_enumerator
6276  *
6277  * Splits <i>str</i> using the supplied parameter as the record
6278  * separator (<code>$/</code> by default), passing each substring in
6279  * turn to the supplied block. If a zero-length record separator is
6280  * supplied, the string is split into paragraphs delimited by
6281  * multiple successive newlines.
6282  *
6283  * If no block is given, an enumerator is returned instead.
6284  *
6285  * print "Example one\n"
6286  * "hello\nworld".each_line {|s| p s}
6287  * print "Example two\n"
6288  * "hello\nworld".each_line('l') {|s| p s}
6289  * print "Example three\n"
6290  * "hello\n\n\nworld".each_line('') {|s| p s}
6291  *
6292  * <em>produces:</em>
6293  *
6294  * Example one
6295  * "hello\n"
6296  * "world"
6297  * Example two
6298  * "hel"
6299  * "l"
6300  * "o\nworl"
6301  * "d"
6302  * Example three
6303  * "hello\n\n\n"
6304  * "world"
6305  */
6306 
6307 static VALUE
6309 {
6310  return rb_str_enumerate_lines(argc, argv, str, 0);
6311 }
6312 
6313 /*
6314  * call-seq:
6315  * str.lines(separator=$/) -> an_array
6316  *
6317  * Returns an array of lines in <i>str</i> split using the supplied
6318  * record separator (<code>$/</code> by default). This is a
6319  * shorthand for <code>str.each_line(separator).to_a</code>.
6320  *
6321  * If a block is given, which is a deprecated form, works the same as
6322  * <code>each_line</code>.
6323  */
6324 
6325 static VALUE
6327 {
6328  return rb_str_enumerate_lines(argc, argv, str, 1);
6329 }
6330 
6331 static VALUE
6333 {
6334  return LONG2FIX(RSTRING_LEN(str));
6335 }
6336 
6337 static VALUE
6338 rb_str_enumerate_bytes(VALUE str, int wantarray)
6339 {
6340  long i;
6341  VALUE UNINITIALIZED_VAR(ary);
6342 
6343  if (rb_block_given_p()) {
6344  if (wantarray) {
6345 #if 0 /* next major */
6346  rb_warn("given block not used");
6347  ary = rb_ary_new();
6348 #else
6349  rb_warning("passing a block to String#bytes is deprecated");
6350  wantarray = 0;
6351 #endif
6352  }
6353  }
6354  else {
6355  if (wantarray)
6356  ary = rb_ary_new2(RSTRING_LEN(str));
6357  else
6359  }
6360 
6361  for (i=0; i<RSTRING_LEN(str); i++) {
6362  if (wantarray)
6363  rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6364  else
6365  rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6366  }
6367  if (wantarray)
6368  return ary;
6369  else
6370  return str;
6371 }
6372 
6373 /*
6374  * call-seq:
6375  * str.each_byte {|fixnum| block } -> str
6376  * str.each_byte -> an_enumerator
6377  *
6378  * Passes each byte in <i>str</i> to the given block, or returns an
6379  * enumerator if no block is given.
6380  *
6381  * "hello".each_byte {|c| print c, ' ' }
6382  *
6383  * <em>produces:</em>
6384  *
6385  * 104 101 108 108 111
6386  */
6387 
6388 static VALUE
6390 {
6391  return rb_str_enumerate_bytes(str, 0);
6392 }
6393 
6394 /*
6395  * call-seq:
6396  * str.bytes -> an_array
6397  *
6398  * Returns an array of bytes in <i>str</i>. This is a shorthand for
6399  * <code>str.each_byte.to_a</code>.
6400  *
6401  * If a block is given, which is a deprecated form, works the same as
6402  * <code>each_byte</code>.
6403  */
6404 
6405 static VALUE
6407 {
6408  return rb_str_enumerate_bytes(str, 1);
6409 }
6410 
6411 static VALUE
6413 {
6414  long len = RSTRING_LEN(str);
6415  if (!single_byte_optimizable(str)) {
6416  const char *ptr = RSTRING_PTR(str);
6417  rb_encoding *enc = rb_enc_get(str);
6418  const char *end_ptr = ptr + len;
6419  for (len = 0; ptr < end_ptr; ++len) {
6420  ptr += rb_enc_mbclen(ptr, end_ptr, enc);
6421  }
6422  }
6423  return LONG2FIX(len);
6424 }
6425 
6426 static VALUE
6427 rb_str_enumerate_chars(VALUE str, int wantarray)
6428 {
6429  VALUE orig = str;
6430  VALUE substr;
6431  long i, len, n;
6432  const char *ptr;
6433  rb_encoding *enc;
6434  VALUE UNINITIALIZED_VAR(ary);
6435 
6436  if (rb_block_given_p()) {
6437  if (wantarray) {
6438 #if 0 /* next major */
6439  rb_warn("given block not used");
6440  ary = rb_ary_new();
6441 #else
6442  rb_warning("passing a block to String#chars is deprecated");
6443  wantarray = 0;
6444 #endif
6445  }
6446  }
6447  else {
6448  if (wantarray)
6449  ary = rb_ary_new();
6450  else
6452  }
6453 
6454  str = rb_str_new4(str);
6455  ptr = RSTRING_PTR(str);
6456  len = RSTRING_LEN(str);
6457  enc = rb_enc_get(str);
6458  switch (ENC_CODERANGE(str)) {
6459  case ENC_CODERANGE_VALID:
6460  case ENC_CODERANGE_7BIT:
6461  for (i = 0; i < len; i += n) {
6462  n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
6463  substr = rb_str_subseq(str, i, n);
6464  if (wantarray)
6465  rb_ary_push(ary, substr);
6466  else
6467  rb_yield(substr);
6468  }
6469  break;
6470  default:
6471  for (i = 0; i < len; i += n) {
6472  n = rb_enc_mbclen(ptr + i, ptr + len, enc);
6473  substr = rb_str_subseq(str, i, n);
6474  if (wantarray)
6475  rb_ary_push(ary, substr);
6476  else
6477  rb_yield(substr);
6478  }
6479  }
6480  RB_GC_GUARD(str);
6481  if (wantarray)
6482  return ary;
6483  else
6484  return orig;
6485 }
6486 
6487 /*
6488  * call-seq:
6489  * str.each_char {|cstr| block } -> str
6490  * str.each_char -> an_enumerator
6491  *
6492  * Passes each character in <i>str</i> to the given block, or returns
6493  * an enumerator if no block is given.
6494  *
6495  * "hello".each_char {|c| print c, ' ' }
6496  *
6497  * <em>produces:</em>
6498  *
6499  * h e l l o
6500  */
6501 
6502 static VALUE
6504 {
6505  return rb_str_enumerate_chars(str, 0);
6506 }
6507 
6508 /*
6509  * call-seq:
6510  * str.chars -> an_array
6511  *
6512  * Returns an array of characters in <i>str</i>. This is a shorthand
6513  * for <code>str.each_char.to_a</code>.
6514  *
6515  * If a block is given, which is a deprecated form, works the same as
6516  * <code>each_char</code>.
6517  */
6518 
6519 static VALUE
6521 {
6522  return rb_str_enumerate_chars(str, 1);
6523 }
6524 
6525 
6526 static VALUE
6528 {
6529  VALUE orig = str;
6530  int n;
6531  unsigned int c;
6532  const char *ptr, *end;
6533  rb_encoding *enc;
6534  VALUE UNINITIALIZED_VAR(ary);
6535 
6536  if (single_byte_optimizable(str))
6537  return rb_str_enumerate_bytes(str, wantarray);
6538 
6539  if (rb_block_given_p()) {
6540  if (wantarray) {
6541 #if 0 /* next major */
6542  rb_warn("given block not used");
6543  ary = rb_ary_new();
6544 #else
6545  rb_warning("passing a block to String#codepoints is deprecated");
6546  wantarray = 0;
6547 #endif
6548  }
6549  }
6550  else {
6551  if (wantarray)
6552  ary = rb_ary_new();
6553  else
6555  }
6556 
6557  str = rb_str_new4(str);
6558  ptr = RSTRING_PTR(str);
6559  end = RSTRING_END(str);
6560  enc = STR_ENC_GET(str);
6561  while (ptr < end) {
6562  c = rb_enc_codepoint_len(ptr, end, &n, enc);
6563  if (wantarray)
6564  rb_ary_push(ary, UINT2NUM(c));
6565  else
6566  rb_yield(UINT2NUM(c));
6567  ptr += n;
6568  }
6569  RB_GC_GUARD(str);
6570  if (wantarray)
6571  return ary;
6572  else
6573  return orig;
6574 }
6575 
6576 /*
6577  * call-seq:
6578  * str.each_codepoint {|integer| block } -> str
6579  * str.each_codepoint -> an_enumerator
6580  *
6581  * Passes the <code>Integer</code> ordinal of each character in <i>str</i>,
6582  * also known as a <i>codepoint</i> when applied to Unicode strings to the
6583  * given block.
6584  *
6585  * If no block is given, an enumerator is returned instead.
6586  *
6587  * "hello\u0639".each_codepoint {|c| print c, ' ' }
6588  *
6589  * <em>produces:</em>
6590  *
6591  * 104 101 108 108 111 1593
6592  */
6593 
6594 static VALUE
6596 {
6597  return rb_str_enumerate_codepoints(str, 0);
6598 }
6599 
6600 /*
6601  * call-seq:
6602  * str.codepoints -> an_array
6603  *
6604  * Returns an array of the <code>Integer</code> ordinals of the
6605  * characters in <i>str</i>. This is a shorthand for
6606  * <code>str.each_codepoint.to_a</code>.
6607  *
6608  * If a block is given, which is a deprecated form, works the same as
6609  * <code>each_codepoint</code>.
6610  */
6611 
6612 static VALUE
6614 {
6615  return rb_str_enumerate_codepoints(str, 1);
6616 }
6617 
6618 
6619 static long
6621 {
6622  rb_encoding *enc = STR_ENC_GET(str);
6623  const char *p, *p2, *beg, *end;
6624 
6625  beg = RSTRING_PTR(str);
6626  end = beg + RSTRING_LEN(str);
6627  if (beg > end) return 0;
6628  p = rb_enc_prev_char(beg, end, end, enc);
6629  if (!p) return 0;
6630  if (p > beg && rb_enc_ascget(p, end, 0, enc) == '\n') {
6631  p2 = rb_enc_prev_char(beg, p, end, enc);
6632  if (p2 && rb_enc_ascget(p2, end, 0, enc) == '\r') p = p2;
6633  }
6634  return p - beg;
6635 }
6636 
6637 /*
6638  * call-seq:
6639  * str.chop! -> str or nil
6640  *
6641  * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
6642  * or <code>nil</code> if <i>str</i> is the empty string. See also
6643  * <code>String#chomp!</code>.
6644  */
6645 
6646 static VALUE
6648 {
6649  str_modify_keep_cr(str);
6650  if (RSTRING_LEN(str) > 0) {
6651  long len;
6652  len = chopped_length(str);
6653  STR_SET_LEN(str, len);
6654  RSTRING_PTR(str)[len] = '\0';
6655  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6656  ENC_CODERANGE_CLEAR(str);
6657  }
6658  return str;
6659  }
6660  return Qnil;
6661 }
6662 
6663 
6664 /*
6665  * call-seq:
6666  * str.chop -> new_str
6667  *
6668  * Returns a new <code>String</code> with the last character removed. If the
6669  * string ends with <code>\r\n</code>, both characters are removed. Applying
6670  * <code>chop</code> to an empty string returns an empty
6671  * string. <code>String#chomp</code> is often a safer alternative, as it leaves
6672  * the string unchanged if it doesn't end in a record separator.
6673  *
6674  * "string\r\n".chop #=> "string"
6675  * "string\n\r".chop #=> "string\n"
6676  * "string\n".chop #=> "string"
6677  * "string".chop #=> "strin"
6678  * "x".chop.chop #=> ""
6679  */
6680 
6681 static VALUE
6683 {
6684  return rb_str_subseq(str, 0, chopped_length(str));
6685 }
6686 
6687 
6688 /*
6689  * call-seq:
6690  * str.chomp!(separator=$/) -> str or nil
6691  *
6692  * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
6693  * returning <i>str</i>, or <code>nil</code> if no modifications were made.
6694  */
6695 
6696 static VALUE
6698 {
6699  rb_encoding *enc;
6700  VALUE rs;
6701  int newline;
6702  char *p, *pp, *e;
6703  long len, rslen;
6704 
6705  str_modify_keep_cr(str);
6706  len = RSTRING_LEN(str);
6707  if (len == 0) return Qnil;
6708  p = RSTRING_PTR(str);
6709  e = p + len;
6710  if (argc == 0) {
6711  rs = rb_rs;
6712  if (rs == rb_default_rs) {
6713  smart_chomp:
6714  enc = rb_enc_get(str);
6715  if (rb_enc_mbminlen(enc) > 1) {
6716  pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
6717  if (rb_enc_is_newline(pp, e, enc)) {
6718  e = pp;
6719  }
6720  pp = e - rb_enc_mbminlen(enc);
6721  if (pp >= p) {
6722  pp = rb_enc_left_char_head(p, pp, e, enc);
6723  if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
6724  e = pp;
6725  }
6726  }
6727  if (e == RSTRING_END(str)) {
6728  return Qnil;
6729  }
6730  len = e - RSTRING_PTR(str);
6731  STR_SET_LEN(str, len);
6732  }
6733  else {
6734  if (RSTRING_PTR(str)[len-1] == '\n') {
6735  STR_DEC_LEN(str);
6736  if (RSTRING_LEN(str) > 0 &&
6737  RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
6738  STR_DEC_LEN(str);
6739  }
6740  }
6741  else if (RSTRING_PTR(str)[len-1] == '\r') {
6742  STR_DEC_LEN(str);
6743  }
6744  else {
6745  return Qnil;
6746  }
6747  }
6748  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6749  return str;
6750  }
6751  }
6752  else {
6753  rb_scan_args(argc, argv, "01", &rs);
6754  }
6755  if (NIL_P(rs)) return Qnil;
6756  StringValue(rs);
6757  rslen = RSTRING_LEN(rs);
6758  if (rslen == 0) {
6759  while (len>0 && p[len-1] == '\n') {
6760  len--;
6761  if (len>0 && p[len-1] == '\r')
6762  len--;
6763  }
6764  if (len < RSTRING_LEN(str)) {
6765  STR_SET_LEN(str, len);
6766  RSTRING_PTR(str)[len] = '\0';
6767  return str;
6768  }
6769  return Qnil;
6770  }
6771  if (rslen > len) return Qnil;
6772  newline = RSTRING_PTR(rs)[rslen-1];
6773  if (rslen == 1 && newline == '\n')
6774  goto smart_chomp;
6775 
6776  enc = rb_enc_check(str, rs);
6777  if (is_broken_string(rs)) {
6778  return Qnil;
6779  }
6780  pp = e - rslen;
6781  if (p[len-1] == newline &&
6782  (rslen <= 1 ||
6783  memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
6784  if (rb_enc_left_char_head(p, pp, e, enc) != pp)
6785  return Qnil;
6786  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6787  ENC_CODERANGE_CLEAR(str);
6788  }
6789  STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
6790  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6791  return str;
6792  }
6793  return Qnil;
6794 }
6795 
6796 
6797 /*
6798  * call-seq:
6799  * str.chomp(separator=$/) -> new_str
6800  *
6801  * Returns a new <code>String</code> with the given record separator removed
6802  * from the end of <i>str</i> (if present). If <code>$/</code> has not been
6803  * changed from the default Ruby record separator, then <code>chomp</code> also
6804  * removes carriage return characters (that is it will remove <code>\n</code>,
6805  * <code>\r</code>, and <code>\r\n</code>).
6806  *
6807  * "hello".chomp #=> "hello"
6808  * "hello\n".chomp #=> "hello"
6809  * "hello\r\n".chomp #=> "hello"
6810  * "hello\n\r".chomp #=> "hello\n"
6811  * "hello\r".chomp #=> "hello"
6812  * "hello \n there".chomp #=> "hello \n there"
6813  * "hello".chomp("llo") #=> "he"
6814  */
6815 
6816 static VALUE
6818 {
6819  str = rb_str_dup(str);
6820  rb_str_chomp_bang(argc, argv, str);
6821  return str;
6822 }
6823 
6824 /*
6825  * call-seq:
6826  * str.lstrip! -> self or nil
6827  *
6828  * Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
6829  * change was made. See also <code>String#rstrip!</code> and
6830  * <code>String#strip!</code>.
6831  *
6832  * " hello ".lstrip #=> "hello "
6833  * "hello".lstrip! #=> nil
6834  */
6835 
6836 static VALUE
6838 {
6839  rb_encoding *enc;
6840  char *s, *t, *e;
6841 
6842  str_modify_keep_cr(str);
6843  enc = STR_ENC_GET(str);
6844  s = RSTRING_PTR(str);
6845  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6846  e = t = RSTRING_END(str);
6847  /* remove spaces at head */
6848  while (s < e) {
6849  int n;
6850  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
6851 
6852  if (!rb_isspace(cc)) break;
6853  s += n;
6854  }
6855 
6856  if (s > RSTRING_PTR(str)) {
6857  STR_SET_LEN(str, t-s);
6858  memmove(RSTRING_PTR(str), s, RSTRING_LEN(str));
6859  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6860  return str;
6861  }
6862  return Qnil;
6863 }
6864 
6865 
6866 /*
6867  * call-seq:
6868  * str.lstrip -> new_str
6869  *
6870  * Returns a copy of <i>str</i> with leading whitespace removed. See also
6871  * <code>String#rstrip</code> and <code>String#strip</code>.
6872  *
6873  * " hello ".lstrip #=> "hello "
6874  * "hello".lstrip #=> "hello"
6875  */
6876 
6877 static VALUE
6879 {
6880  str = rb_str_dup(str);
6881  rb_str_lstrip_bang(str);
6882  return str;
6883 }
6884 
6885 
6886 /*
6887  * call-seq:
6888  * str.rstrip! -> self or nil
6889  *
6890  * Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
6891  * no change was made. See also <code>String#lstrip!</code> and
6892  * <code>String#strip!</code>.
6893  *
6894  * " hello ".rstrip #=> " hello"
6895  * "hello".rstrip! #=> nil
6896  */
6897 
6898 static VALUE
6900 {
6901  rb_encoding *enc;
6902  char *s, *t, *e;
6903 
6904  str_modify_keep_cr(str);
6905  enc = STR_ENC_GET(str);
6907  s = RSTRING_PTR(str);
6908  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6909  t = e = RSTRING_END(str);
6910 
6911  /* remove trailing spaces or '\0's */
6912  if (single_byte_optimizable(str)) {
6913  unsigned char c;
6914  while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
6915  }
6916  else {
6917  char *tp;
6918 
6919  while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
6920  unsigned int c = rb_enc_codepoint(tp, e, enc);
6921  if (c && !rb_isspace(c)) break;
6922  t = tp;
6923  }
6924  }
6925  if (t < e) {
6926  long len = t-RSTRING_PTR(str);
6927 
6928  STR_SET_LEN(str, len);
6929  RSTRING_PTR(str)[len] = '\0';
6930  return str;
6931  }
6932  return Qnil;
6933 }
6934 
6935 
6936 /*
6937  * call-seq:
6938  * str.rstrip -> new_str
6939  *
6940  * Returns a copy of <i>str</i> with trailing whitespace removed. See also
6941  * <code>String#lstrip</code> and <code>String#strip</code>.
6942  *
6943  * " hello ".rstrip #=> " hello"
6944  * "hello".rstrip #=> "hello"
6945  */
6946 
6947 static VALUE
6949 {
6950  str = rb_str_dup(str);
6951  rb_str_rstrip_bang(str);
6952  return str;
6953 }
6954 
6955 
6956 /*
6957  * call-seq:
6958  * str.strip! -> str or nil
6959  *
6960  * Removes leading and trailing whitespace from <i>str</i>. Returns
6961  * <code>nil</code> if <i>str</i> was not altered.
6962  */
6963 
6964 static VALUE
6966 {
6967  VALUE l = rb_str_lstrip_bang(str);
6968  VALUE r = rb_str_rstrip_bang(str);
6969 
6970  if (NIL_P(l) && NIL_P(r)) return Qnil;
6971  return str;
6972 }
6973 
6974 
6975 /*
6976  * call-seq:
6977  * str.strip -> new_str
6978  *
6979  * Returns a copy of <i>str</i> with leading and trailing whitespace removed.
6980  *
6981  * " hello ".strip #=> "hello"
6982  * "\tgoodbye\r\n".strip #=> "goodbye"
6983  */
6984 
6985 static VALUE
6987 {
6988  str = rb_str_dup(str);
6989  rb_str_strip_bang(str);
6990  return str;
6991 }
6992 
6993 static VALUE
6994 scan_once(VALUE str, VALUE pat, long *start)
6995 {
6996  VALUE result, match;
6997  struct re_registers *regs;
6998  int i;
6999 
7000  if (rb_reg_search(pat, str, *start, 0) >= 0) {
7001  match = rb_backref_get();
7002  regs = RMATCH_REGS(match);
7003  if (BEG(0) == END(0)) {
7004  rb_encoding *enc = STR_ENC_GET(str);
7005  /*
7006  * Always consume at least one character of the input string
7007  */
7008  if (RSTRING_LEN(str) > END(0))
7009  *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
7010  RSTRING_END(str), enc);
7011  else
7012  *start = END(0)+1;
7013  }
7014  else {
7015  *start = END(0);
7016  }
7017  if (regs->num_regs == 1) {
7018  return rb_reg_nth_match(0, match);
7019  }
7020  result = rb_ary_new2(regs->num_regs);
7021  for (i=1; i < regs->num_regs; i++) {
7022  rb_ary_push(result, rb_reg_nth_match(i, match));
7023  }
7024 
7025  return result;
7026  }
7027  return Qnil;
7028 }
7029 
7030 
7031 /*
7032  * call-seq:
7033  * str.scan(pattern) -> array
7034  * str.scan(pattern) {|match, ...| block } -> str
7035  *
7036  * Both forms iterate through <i>str</i>, matching the pattern (which may be a
7037  * <code>Regexp</code> or a <code>String</code>). For each match, a result is
7038  * generated and either added to the result array or passed to the block. If
7039  * the pattern contains no groups, each individual result consists of the
7040  * matched string, <code>$&</code>. If the pattern contains groups, each
7041  * individual result is itself an array containing one entry per group.
7042  *
7043  * a = "cruel world"
7044  * a.scan(/\w+/) #=> ["cruel", "world"]
7045  * a.scan(/.../) #=> ["cru", "el ", "wor"]
7046  * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]]
7047  * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]]
7048  *
7049  * And the block form:
7050  *
7051  * a.scan(/\w+/) {|w| print "<<#{w}>> " }
7052  * print "\n"
7053  * a.scan(/(.)(.)/) {|x,y| print y, x }
7054  * print "\n"
7055  *
7056  * <em>produces:</em>
7057  *
7058  * <<cruel>> <<world>>
7059  * rceu lowlr
7060  */
7061 
7062 static VALUE
7064 {
7065  VALUE result;
7066  long start = 0;
7067  long last = -1, prev = 0;
7068  char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
7069 
7070  pat = get_pat(pat, 1);
7071  if (!rb_block_given_p()) {
7072  VALUE ary = rb_ary_new();
7073 
7074  while (!NIL_P(result = scan_once(str, pat, &start))) {
7075  last = prev;
7076  prev = start;
7077  rb_ary_push(ary, result);
7078  }
7079  if (last >= 0) rb_reg_search(pat, str, last, 0);
7080  return ary;
7081  }
7082 
7083  while (!NIL_P(result = scan_once(str, pat, &start))) {
7084  last = prev;
7085  prev = start;
7086  rb_yield(result);
7087  str_mod_check(str, p, len);
7088  }
7089  if (last >= 0) rb_reg_search(pat, str, last, 0);
7090  return str;
7091 }
7092 
7093 
7094 /*
7095  * call-seq:
7096  * str.hex -> integer
7097  *
7098  * Treats leading characters from <i>str</i> as a string of hexadecimal digits
7099  * (with an optional sign and an optional <code>0x</code>) and returns the
7100  * corresponding number. Zero is returned on error.
7101  *
7102  * "0x0a".hex #=> 10
7103  * "-1234".hex #=> -4660
7104  * "0".hex #=> 0
7105  * "wombat".hex #=> 0
7106  */
7107 
7108 static VALUE
7110 {
7111  return rb_str_to_inum(str, 16, FALSE);
7112 }
7113 
7114 
7115 /*
7116  * call-seq:
7117  * str.oct -> integer
7118  *
7119  * Treats leading characters of <i>str</i> as a string of octal digits (with an
7120  * optional sign) and returns the corresponding number. Returns 0 if the
7121  * conversion fails.
7122  *
7123  * "123".oct #=> 83
7124  * "-377".oct #=> -255
7125  * "bad".oct #=> 0
7126  * "0377bad".oct #=> 255
7127  */
7128 
7129 static VALUE
7131 {
7132  return rb_str_to_inum(str, -8, FALSE);
7133 }
7134 
7135 
7136 /*
7137  * call-seq:
7138  * str.crypt(salt_str) -> new_str
7139  *
7140  * Applies a one-way cryptographic hash to <i>str</i> by invoking the
7141  * standard library function <code>crypt(3)</code> with the given
7142  * salt string. While the format and the result are system and
7143  * implementation dependent, using a salt matching the regular
7144  * expression <code>\A[a-zA-Z0-9./]{2}</code> should be valid and
7145  * safe on any platform, in which only the first two characters are
7146  * significant.
7147  *
7148  * This method is for use in system specific scripts, so if you want
7149  * a cross-platform hash function consider using Digest or OpenSSL
7150  * instead.
7151  */
7152 
7153 static VALUE
7155 {
7156  extern char *crypt(const char *, const char *);
7157  VALUE result;
7158  const char *s, *saltp;
7159  char *res;
7160 #ifdef BROKEN_CRYPT
7161  char salt_8bit_clean[3];
7162 #endif
7163 
7164  StringValue(salt);
7165  if (RSTRING_LEN(salt) < 2)
7166  rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
7167 
7168  s = RSTRING_PTR(str);
7169  if (!s) s = "";
7170  saltp = RSTRING_PTR(salt);
7171 #ifdef BROKEN_CRYPT
7172  if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
7173  salt_8bit_clean[0] = saltp[0] & 0x7f;
7174  salt_8bit_clean[1] = saltp[1] & 0x7f;
7175  salt_8bit_clean[2] = '\0';
7176  saltp = salt_8bit_clean;
7177  }
7178 #endif
7179  res = crypt(s, saltp);
7180  if (!res) {
7181  rb_sys_fail("crypt");
7182  }
7183  result = rb_str_new2(res);
7184  OBJ_INFECT(result, str);
7185  OBJ_INFECT(result, salt);
7186  return result;
7187 }
7188 
7189 
7190 /*
7191  * call-seq:
7192  * str.intern -> symbol
7193  * str.to_sym -> symbol
7194  *
7195  * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
7196  * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
7197  *
7198  * "Koala".intern #=> :Koala
7199  * s = 'cat'.to_sym #=> :cat
7200  * s == :cat #=> true
7201  * s = '@cat'.to_sym #=> :@cat
7202  * s == :@cat #=> true
7203  *
7204  * This can also be used to create symbols that cannot be represented using the
7205  * <code>:xxx</code> notation.
7206  *
7207  * 'cat and dog'.to_sym #=> :"cat and dog"
7208  */
7209 
7210 VALUE
7212 {
7213  VALUE str = RB_GC_GUARD(s);
7214  ID id;
7215 
7216  id = rb_intern_str(str);
7217  return ID2SYM(id);
7218 }
7219 
7220 
7221 /*
7222  * call-seq:
7223  * str.ord -> integer
7224  *
7225  * Return the <code>Integer</code> ordinal of a one-character string.
7226  *
7227  * "a".ord #=> 97
7228  */
7229 
7230 VALUE
7232 {
7233  unsigned int c;
7234 
7236  return UINT2NUM(c);
7237 }
7238 /*
7239  * call-seq:
7240  * str.sum(n=16) -> integer
7241  *
7242  * Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
7243  * where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
7244  * to 16. The result is simply the sum of the binary value of each character in
7245  * <i>str</i> modulo <code>2**n - 1</code>. This is not a particularly good
7246  * checksum.
7247  */
7248 
7249 static VALUE
7251 {
7252  VALUE vbits;
7253  int bits;
7254  char *ptr, *p, *pend;
7255  long len;
7256  VALUE sum = INT2FIX(0);
7257  unsigned long sum0 = 0;
7258 
7259  if (argc == 0) {
7260  bits = 16;
7261  }
7262  else {
7263  rb_scan_args(argc, argv, "01", &vbits);
7264  bits = NUM2INT(vbits);
7265  }
7266  ptr = p = RSTRING_PTR(str);
7267  len = RSTRING_LEN(str);
7268  pend = p + len;
7269 
7270  while (p < pend) {
7271  if (FIXNUM_MAX - UCHAR_MAX < sum0) {
7272  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7273  str_mod_check(str, ptr, len);
7274  sum0 = 0;
7275  }
7276  sum0 += (unsigned char)*p;
7277  p++;
7278  }
7279 
7280  if (bits == 0) {
7281  if (sum0) {
7282  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7283  }
7284  }
7285  else {
7286  if (sum == INT2FIX(0)) {
7287  if (bits < (int)sizeof(long)*CHAR_BIT) {
7288  sum0 &= (((unsigned long)1)<<bits)-1;
7289  }
7290  sum = LONG2FIX(sum0);
7291  }
7292  else {
7293  VALUE mod;
7294 
7295  if (sum0) {
7296  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7297  }
7298 
7299  mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
7300  mod = rb_funcall(mod, '-', 1, INT2FIX(1));
7301  sum = rb_funcall(sum, '&', 1, mod);
7302  }
7303  }
7304  return sum;
7305 }
7306 
7307 static VALUE
7308 rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
7309 {
7310  rb_encoding *enc;
7311  VALUE w;
7312  long width, len, flen = 1, fclen = 1;
7313  VALUE res;
7314  char *p;
7315  const char *f = " ";
7316  long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
7317  volatile VALUE pad;
7318  int singlebyte = 1, cr;
7319 
7320  rb_scan_args(argc, argv, "11", &w, &pad);
7321  enc = STR_ENC_GET(str);
7322  width = NUM2LONG(w);
7323  if (argc == 2) {
7324  StringValue(pad);
7325  enc = rb_enc_check(str, pad);
7326  f = RSTRING_PTR(pad);
7327  flen = RSTRING_LEN(pad);
7328  fclen = str_strlen(pad, enc);
7329  singlebyte = single_byte_optimizable(pad);
7330  if (flen == 0 || fclen == 0) {
7331  rb_raise(rb_eArgError, "zero width padding");
7332  }
7333  }
7334  len = str_strlen(str, enc);
7335  if (width < 0 || len >= width) return rb_str_dup(str);
7336  n = width - len;
7337  llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
7338  rlen = n - llen;
7339  cr = ENC_CODERANGE(str);
7340  if (flen > 1) {
7341  llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
7342  rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
7343  }
7344  size = RSTRING_LEN(str);
7345  if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
7346  (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
7347  (len += llen2 + rlen2) >= LONG_MAX - size) {
7348  rb_raise(rb_eArgError, "argument too big");
7349  }
7350  len += size;
7351  res = rb_str_new5(str, 0, len);
7352  p = RSTRING_PTR(res);
7353  if (flen <= 1) {
7354  memset(p, *f, llen);
7355  p += llen;
7356  }
7357  else {
7358  while (llen >= fclen) {
7359  memcpy(p,f,flen);
7360  p += flen;
7361  llen -= fclen;
7362  }
7363  if (llen > 0) {
7364  memcpy(p, f, llen2);
7365  p += llen2;
7366  }
7367  }
7368  memcpy(p, RSTRING_PTR(str), size);
7369  p += size;
7370  if (flen <= 1) {
7371  memset(p, *f, rlen);
7372  p += rlen;
7373  }
7374  else {
7375  while (rlen >= fclen) {
7376  memcpy(p,f,flen);
7377  p += flen;
7378  rlen -= fclen;
7379  }
7380  if (rlen > 0) {
7381  memcpy(p, f, rlen2);
7382  p += rlen2;
7383  }
7384  }
7385  *p = '\0';
7386  STR_SET_LEN(res, p-RSTRING_PTR(res));
7387  OBJ_INFECT(res, str);
7388  if (!NIL_P(pad)) OBJ_INFECT(res, pad);
7389  rb_enc_associate(res, enc);
7390  if (argc == 2)
7391  cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad));
7392  if (cr != ENC_CODERANGE_BROKEN)
7393  ENC_CODERANGE_SET(res, cr);
7394  return res;
7395 }
7396 
7397 
7398 /*
7399  * call-seq:
7400  * str.ljust(integer, padstr=' ') -> new_str
7401  *
7402  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7403  * <code>String</code> of length <i>integer</i> with <i>str</i> left justified
7404  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7405  *
7406  * "hello".ljust(4) #=> "hello"
7407  * "hello".ljust(20) #=> "hello "
7408  * "hello".ljust(20, '1234') #=> "hello123412341234123"
7409  */
7410 
7411 static VALUE
7413 {
7414  return rb_str_justify(argc, argv, str, 'l');
7415 }
7416 
7417 
7418 /*
7419  * call-seq:
7420  * str.rjust(integer, padstr=' ') -> new_str
7421  *
7422  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7423  * <code>String</code> of length <i>integer</i> with <i>str</i> right justified
7424  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7425  *
7426  * "hello".rjust(4) #=> "hello"
7427  * "hello".rjust(20) #=> " hello"
7428  * "hello".rjust(20, '1234') #=> "123412341234123hello"
7429  */
7430 
7431 static VALUE
7433 {
7434  return rb_str_justify(argc, argv, str, 'r');
7435 }
7436 
7437 
7438 /*
7439  * call-seq:
7440  * str.center(width, padstr=' ') -> new_str
7441  *
7442  * Centers +str+ in +width+. If +width+ is greater than the length of +str+,
7443  * returns a new String of length +width+ with +str+ centered and padded with
7444  * +padstr+; otherwise, returns +str+.
7445  *
7446  * "hello".center(4) #=> "hello"
7447  * "hello".center(20) #=> " hello "
7448  * "hello".center(20, '123') #=> "1231231hello12312312"
7449  */
7450 
7451 static VALUE
7453 {
7454  return rb_str_justify(argc, argv, str, 'c');
7455 }
7456 
7457 /*
7458  * call-seq:
7459  * str.partition(sep) -> [head, sep, tail]
7460  * str.partition(regexp) -> [head, match, tail]
7461  *
7462  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string
7463  * and returns the part before it, the match, and the part
7464  * after it.
7465  * If it is not found, returns two empty strings and <i>str</i>.
7466  *
7467  * "hello".partition("l") #=> ["he", "l", "lo"]
7468  * "hello".partition("x") #=> ["hello", "", ""]
7469  * "hello".partition(/.l/) #=> ["h", "el", "lo"]
7470  */
7471 
7472 static VALUE
7474 {
7475  long pos;
7476  int regex = FALSE;
7477 
7478  if (RB_TYPE_P(sep, T_REGEXP)) {
7479  pos = rb_reg_search(sep, str, 0, 0);
7480  regex = TRUE;
7481  }
7482  else {
7483  VALUE tmp;
7484 
7485  tmp = rb_check_string_type(sep);
7486  if (NIL_P(tmp)) {
7487  rb_raise(rb_eTypeError, "type mismatch: %s given",
7488  rb_obj_classname(sep));
7489  }
7490  sep = tmp;
7491  pos = rb_str_index(str, sep, 0);
7492  }
7493  if (pos < 0) {
7494  failed:
7495  return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
7496  }
7497  if (regex) {
7498  sep = rb_str_subpat(str, sep, INT2FIX(0));
7499  if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
7500  }
7501  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7502  sep,
7503  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7504  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7505 }
7506 
7507 /*
7508  * call-seq:
7509  * str.rpartition(sep) -> [head, sep, tail]
7510  * str.rpartition(regexp) -> [head, match, tail]
7511  *
7512  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string from the end
7513  * of the string, and returns the part before it, the match, and the part
7514  * after it.
7515  * If it is not found, returns two empty strings and <i>str</i>.
7516  *
7517  * "hello".rpartition("l") #=> ["hel", "l", "o"]
7518  * "hello".rpartition("x") #=> ["", "", "hello"]
7519  * "hello".rpartition(/.l/) #=> ["he", "ll", "o"]
7520  */
7521 
7522 static VALUE
7524 {
7525  long pos = RSTRING_LEN(str);
7526  int regex = FALSE;
7527 
7528  if (RB_TYPE_P(sep, T_REGEXP)) {
7529  pos = rb_reg_search(sep, str, pos, 1);
7530  regex = TRUE;
7531  }
7532  else {
7533  VALUE tmp;
7534 
7535  tmp = rb_check_string_type(sep);
7536  if (NIL_P(tmp)) {
7537  rb_raise(rb_eTypeError, "type mismatch: %s given",
7538  rb_obj_classname(sep));
7539  }
7540  sep = tmp;
7541  pos = rb_str_sublen(str, pos);
7542  pos = rb_str_rindex(str, sep, pos);
7543  }
7544  if (pos < 0) {
7545  return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), str);
7546  }
7547  if (regex) {
7548  sep = rb_reg_nth_match(0, rb_backref_get());
7549  }
7550  return rb_ary_new3(3, rb_str_substr(str, 0, pos),
7551  sep,
7552  rb_str_substr(str,pos+str_strlen(sep,STR_ENC_GET(sep)),RSTRING_LEN(str)));
7553 }
7554 
7555 /*
7556  * call-seq:
7557  * str.start_with?([prefixes]+) -> true or false
7558  *
7559  * Returns true if +str+ starts with one of the +prefixes+ given.
7560  *
7561  * "hello".start_with?("hell") #=> true
7562  *
7563  * # returns true if one of the prefixes matches.
7564  * "hello".start_with?("heaven", "hell") #=> true
7565  * "hello".start_with?("heaven", "paradise") #=> false
7566  */
7567 
7568 static VALUE
7570 {
7571  int i;
7572 
7573  for (i=0; i<argc; i++) {
7574  VALUE tmp = argv[i];
7575  StringValue(tmp);
7576  rb_enc_check(str, tmp);
7577  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7578  if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7579  return Qtrue;
7580  }
7581  return Qfalse;
7582 }
7583 
7584 /*
7585  * call-seq:
7586  * str.end_with?([suffixes]+) -> true or false
7587  *
7588  * Returns true if +str+ ends with one of the +suffixes+ given.
7589  */
7590 
7591 static VALUE
7593 {
7594  int i;
7595  char *p, *s, *e;
7596  rb_encoding *enc;
7597 
7598  for (i=0; i<argc; i++) {
7599  VALUE tmp = argv[i];
7600  StringValue(tmp);
7601  enc = rb_enc_check(str, tmp);
7602  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7603  p = RSTRING_PTR(str);
7604  e = p + RSTRING_LEN(str);
7605  s = e - RSTRING_LEN(tmp);
7606  if (rb_enc_left_char_head(p, s, e, enc) != s)
7607  continue;
7608  if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7609  return Qtrue;
7610  }
7611  return Qfalse;
7612 }
7613 
7614 void
7616 {
7617  if (!NIL_P(val) && !RB_TYPE_P(val, T_STRING)) {
7618  rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
7619  }
7620  *var = val;
7621 }
7622 
7623 
7624 /*
7625  * call-seq:
7626  * str.force_encoding(encoding) -> str
7627  *
7628  * Changes the encoding to +encoding+ and returns self.
7629  */
7630 
7631 static VALUE
7633 {
7634  str_modifiable(str);
7635  rb_enc_associate(str, rb_to_encoding(enc));
7636  ENC_CODERANGE_CLEAR(str);
7637  return str;
7638 }
7639 
7640 /*
7641  * call-seq:
7642  * str.b -> str
7643  *
7644  * Returns a copied string whose encoding is ASCII-8BIT.
7645  */
7646 
7647 static VALUE
7649 {
7650  VALUE str2 = str_alloc(rb_cString);
7651  str_replace_shared_without_enc(str2, str);
7652  OBJ_INFECT(str2, str);
7654  return str2;
7655 }
7656 
7657 /*
7658  * call-seq:
7659  * str.valid_encoding? -> true or false
7660  *
7661  * Returns true for a string which encoded correctly.
7662  *
7663  * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
7664  * "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
7665  * "\x80".force_encoding("UTF-8").valid_encoding? #=> false
7666  */
7667 
7668 static VALUE
7670 {
7671  int cr = rb_enc_str_coderange(str);
7672 
7673  return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
7674 }
7675 
7676 /*
7677  * call-seq:
7678  * str.ascii_only? -> true or false
7679  *
7680  * Returns true for a string which has only ASCII characters.
7681  *
7682  * "abc".force_encoding("UTF-8").ascii_only? #=> true
7683  * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
7684  */
7685 
7686 static VALUE
7688 {
7689  int cr = rb_enc_str_coderange(str);
7690 
7691  return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
7692 }
7693 
7708 VALUE
7709 rb_str_ellipsize(VALUE str, long len)
7710 {
7711  static const char ellipsis[] = "...";
7712  const long ellipsislen = sizeof(ellipsis) - 1;
7713  rb_encoding *const enc = rb_enc_get(str);
7714  const long blen = RSTRING_LEN(str);
7715  const char *const p = RSTRING_PTR(str), *e = p + blen;
7716  VALUE estr, ret = 0;
7717 
7718  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
7719  if (len * rb_enc_mbminlen(enc) >= blen ||
7720  (e = rb_enc_nth(p, e, len, enc)) - p == blen) {
7721  ret = str;
7722  }
7723  else if (len <= ellipsislen ||
7724  !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
7725  if (rb_enc_asciicompat(enc)) {
7726  ret = rb_str_new_with_class(str, ellipsis, len);
7727  rb_enc_associate(ret, enc);
7728  }
7729  else {
7730  estr = rb_usascii_str_new(ellipsis, len);
7731  ret = rb_str_encode(estr, rb_enc_from_encoding(enc), 0, Qnil);
7732  }
7733  }
7734  else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
7735  rb_str_cat(ret, ellipsis, ellipsislen);
7736  }
7737  else {
7738  estr = rb_str_encode(rb_usascii_str_new(ellipsis, ellipsislen),
7739  rb_enc_from_encoding(enc), 0, Qnil);
7740  rb_str_append(ret, estr);
7741  }
7742  return ret;
7743 }
7744 
7745 /**********************************************************************
7746  * Document-class: Symbol
7747  *
7748  * <code>Symbol</code> objects represent names and some strings
7749  * inside the Ruby
7750  * interpreter. They are generated using the <code>:name</code> and
7751  * <code>:"string"</code> literals
7752  * syntax, and by the various <code>to_sym</code> methods. The same
7753  * <code>Symbol</code> object will be created for a given name or string
7754  * for the duration of a program's execution, regardless of the context
7755  * or meaning of that name. Thus if <code>Fred</code> is a constant in
7756  * one context, a method in another, and a class in a third, the
7757  * <code>Symbol</code> <code>:Fred</code> will be the same object in
7758  * all three contexts.
7759  *
7760  * module One
7761  * class Fred
7762  * end
7763  * $f1 = :Fred
7764  * end
7765  * module Two
7766  * Fred = 1
7767  * $f2 = :Fred
7768  * end
7769  * def Fred()
7770  * end
7771  * $f3 = :Fred
7772  * $f1.object_id #=> 2514190
7773  * $f2.object_id #=> 2514190
7774  * $f3.object_id #=> 2514190
7775  *
7776  */
7777 
7778 
7779 /*
7780  * call-seq:
7781  * sym == obj -> true or false
7782  *
7783  * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
7784  * symbol, returns <code>true</code>.
7785  */
7786 
7787 static VALUE
7788 sym_equal(VALUE sym1, VALUE sym2)
7789 {
7790  if (sym1 == sym2) return Qtrue;
7791  return Qfalse;
7792 }
7793 
7794 
7795 static int
7796 sym_printable(const char *s, const char *send, rb_encoding *enc)
7797 {
7798  while (s < send) {
7799  int n;
7800  int c = rb_enc_codepoint_len(s, send, &n, enc);
7801 
7802  if (!rb_enc_isprint(c, enc)) return FALSE;
7803  s += n;
7804  }
7805  return TRUE;
7806 }
7807 
7808 int
7810 {
7811  rb_encoding *enc;
7812  const char *ptr;
7813  long len;
7815 
7816  if (resenc == NULL) resenc = rb_default_external_encoding();
7817  enc = STR_ENC_GET(sym);
7818  ptr = RSTRING_PTR(sym);
7819  len = RSTRING_LEN(sym);
7820  if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) ||
7821  !rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) {
7822  return FALSE;
7823  }
7824  return TRUE;
7825 }
7826 
7827 VALUE
7829 {
7830  rb_encoding *enc;
7831  const char *ptr;
7832  long len;
7833  rb_encoding *resenc;
7834 
7835  Check_Type(str, T_STRING);
7836  resenc = rb_default_internal_encoding();
7837  if (resenc == NULL) resenc = rb_default_external_encoding();
7838  enc = STR_ENC_GET(str);
7839  ptr = RSTRING_PTR(str);
7840  len = RSTRING_LEN(str);
7841  if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
7842  !sym_printable(ptr, ptr + len, enc)) {
7843  return rb_str_inspect(str);
7844  }
7845  return str;
7846 }
7847 
7848 VALUE
7850 {
7851  return rb_str_quote_unprintable(rb_id2str(id));
7852 }
7853 
7854 /*
7855  * call-seq:
7856  * sym.inspect -> string
7857  *
7858  * Returns the representation of <i>sym</i> as a symbol literal.
7859  *
7860  * :fred.inspect #=> ":fred"
7861  */
7862 
7863 static VALUE
7865 {
7866  VALUE str;
7867  const char *ptr;
7868  long len;
7869  ID id = SYM2ID(sym);
7870  char *dest;
7871 
7872  sym = rb_id2str(id);
7873  if (!rb_str_symname_p(sym)) {
7874  str = rb_str_inspect(sym);
7875  len = RSTRING_LEN(str);
7876  rb_str_resize(str, len + 1);
7877  dest = RSTRING_PTR(str);
7878  memmove(dest + 1, dest, len);
7879  dest[0] = ':';
7880  }
7881  else {
7882  rb_encoding *enc = STR_ENC_GET(sym);
7883  ptr = RSTRING_PTR(sym);
7884  len = RSTRING_LEN(sym);
7885  str = rb_enc_str_new(0, len + 1, enc);
7886  dest = RSTRING_PTR(str);
7887  dest[0] = ':';
7888  memcpy(dest + 1, ptr, len);
7889  }
7890  return str;
7891 }
7892 
7893 
7894 /*
7895  * call-seq:
7896  * sym.id2name -> string
7897  * sym.to_s -> string
7898  *
7899  * Returns the name or string corresponding to <i>sym</i>.
7900  *
7901  * :fred.id2name #=> "fred"
7902  */
7903 
7904 
7905 VALUE
7907 {
7908  ID id = SYM2ID(sym);
7909 
7910  return str_new3(rb_cString, rb_id2str(id));
7911 }
7912 
7913 
7914 /*
7915  * call-seq:
7916  * sym.to_sym -> sym
7917  * sym.intern -> sym
7918  *
7919  * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
7920  * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
7921  * in this case.
7922  */
7923 
7924 static VALUE
7926 {
7927  return sym;
7928 }
7929 
7930 static VALUE
7931 sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
7932 {
7933  VALUE obj;
7934 
7935  if (argc < 1) {
7936  rb_raise(rb_eArgError, "no receiver given");
7937  }
7938  obj = argv[0];
7939  return rb_funcall_with_block(obj, (ID)sym, argc - 1, argv + 1, passed_proc);
7940 }
7941 
7942 /*
7943  * call-seq:
7944  * sym.to_proc
7945  *
7946  * Returns a _Proc_ object which respond to the given method by _sym_.
7947  *
7948  * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
7949  */
7950 
7951 static VALUE
7953 {
7954  static VALUE sym_proc_cache = Qfalse;
7955  enum {SYM_PROC_CACHE_SIZE = 67};
7956  VALUE proc;
7957  long id, index;
7958  VALUE *aryp;
7959 
7960  if (!sym_proc_cache) {
7961  sym_proc_cache = rb_ary_tmp_new(SYM_PROC_CACHE_SIZE * 2);
7962  rb_gc_register_mark_object(sym_proc_cache);
7963  rb_ary_store(sym_proc_cache, SYM_PROC_CACHE_SIZE*2 - 1, Qnil);
7964  }
7965 
7966  id = SYM2ID(sym);
7967  index = (id % SYM_PROC_CACHE_SIZE) << 1;
7968 
7969  aryp = RARRAY_PTR(sym_proc_cache);
7970  if (aryp[index] == sym) {
7971  return aryp[index + 1];
7972  }
7973  else {
7974  proc = rb_proc_new(sym_call, (VALUE)id);
7975  aryp[index] = sym;
7976  aryp[index + 1] = proc;
7977  return proc;
7978  }
7979 }
7980 
7981 /*
7982  * call-seq:
7983  *
7984  * sym.succ
7985  *
7986  * Same as <code>sym.to_s.succ.intern</code>.
7987  */
7988 
7989 static VALUE
7991 {
7992  return rb_str_intern(rb_str_succ(rb_sym_to_s(sym)));
7993 }
7994 
7995 /*
7996  * call-seq:
7997  *
7998  * symbol <=> other_symbol -> -1, 0, +1 or nil
7999  *
8000  * Compares +symbol+ with +other_symbol+ after calling #to_s on each of the
8001  * symbols. Returns -1, 0, +1 or nil depending on whether +symbol+ is less
8002  * than, equal to, or greater than +other_symbol+.
8003  *
8004  * +nil+ is returned if the two values are incomparable.
8005  *
8006  * See String#<=> for more information.
8007  */
8008 
8009 static VALUE
8011 {
8012  if (!SYMBOL_P(other)) {
8013  return Qnil;
8014  }
8015  return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other));
8016 }
8017 
8018 /*
8019  * call-seq:
8020  *
8021  * sym.casecmp(other) -> -1, 0, +1 or nil
8022  *
8023  * Case-insensitive version of <code>Symbol#<=></code>.
8024  */
8025 
8026 static VALUE
8028 {
8029  if (!SYMBOL_P(other)) {
8030  return Qnil;
8031  }
8032  return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other));
8033 }
8034 
8035 /*
8036  * call-seq:
8037  * sym =~ obj -> fixnum or nil
8038  *
8039  * Returns <code>sym.to_s =~ obj</code>.
8040  */
8041 
8042 static VALUE
8044 {
8045  return rb_str_match(rb_sym_to_s(sym), other);
8046 }
8047 
8048 /*
8049  * call-seq:
8050  * sym[idx] -> char
8051  * sym[b, n] -> char
8052  *
8053  * Returns <code>sym.to_s[]</code>.
8054  */
8055 
8056 static VALUE
8058 {
8059  return rb_str_aref_m(argc, argv, rb_sym_to_s(sym));
8060 }
8061 
8062 /*
8063  * call-seq:
8064  * sym.length -> integer
8065  *
8066  * Same as <code>sym.to_s.length</code>.
8067  */
8068 
8069 static VALUE
8071 {
8072  return rb_str_length(rb_id2str(SYM2ID(sym)));
8073 }
8074 
8075 /*
8076  * call-seq:
8077  * sym.empty? -> true or false
8078  *
8079  * Returns that _sym_ is :"" or not.
8080  */
8081 
8082 static VALUE
8084 {
8085  return rb_str_empty(rb_id2str(SYM2ID(sym)));
8086 }
8087 
8088 /*
8089  * call-seq:
8090  * sym.upcase -> symbol
8091  *
8092  * Same as <code>sym.to_s.upcase.intern</code>.
8093  */
8094 
8095 static VALUE
8097 {
8099 }
8100 
8101 /*
8102  * call-seq:
8103  * sym.downcase -> symbol
8104  *
8105  * Same as <code>sym.to_s.downcase.intern</code>.
8106  */
8107 
8108 static VALUE
8110 {
8112 }
8113 
8114 /*
8115  * call-seq:
8116  * sym.capitalize -> symbol
8117  *
8118  * Same as <code>sym.to_s.capitalize.intern</code>.
8119  */
8120 
8121 static VALUE
8123 {
8125 }
8126 
8127 /*
8128  * call-seq:
8129  * sym.swapcase -> symbol
8130  *
8131  * Same as <code>sym.to_s.swapcase.intern</code>.
8132  */
8133 
8134 static VALUE
8136 {
8138 }
8139 
8140 /*
8141  * call-seq:
8142  * sym.encoding -> encoding
8143  *
8144  * Returns the Encoding object that represents the encoding of _sym_.
8145  */
8146 
8147 static VALUE
8149 {
8150  return rb_obj_encoding(rb_id2str(SYM2ID(sym)));
8151 }
8152 
8153 ID
8155 {
8156  VALUE tmp;
8157 
8158  switch (TYPE(name)) {
8159  default:
8160  tmp = rb_check_string_type(name);
8161  if (NIL_P(tmp)) {
8162  tmp = rb_inspect(name);
8163  rb_raise(rb_eTypeError, "%s is not a symbol",
8164  RSTRING_PTR(tmp));
8165  }
8166  name = tmp;
8167  /* fall through */
8168  case T_STRING:
8169  name = rb_str_intern(name);
8170  /* fall through */
8171  case T_SYMBOL:
8172  return SYM2ID(name);
8173  }
8174 
8175  UNREACHABLE;
8176 }
8177 
8178 /*
8179  * A <code>String</code> object holds and manipulates an arbitrary sequence of
8180  * bytes, typically representing characters. String objects may be created
8181  * using <code>String::new</code> or as literals.
8182  *
8183  * Because of aliasing issues, users of strings should be aware of the methods
8184  * that modify the contents of a <code>String</code> object. Typically,
8185  * methods with names ending in ``!'' modify their receiver, while those
8186  * without a ``!'' return a new <code>String</code>. However, there are
8187  * exceptions, such as <code>String#[]=</code>.
8188  *
8189  */
8190 
8191 void
8193 {
8194 #undef rb_intern
8195 #define rb_intern(str) rb_intern_const(str)
8196 
8197  rb_cString = rb_define_class("String", rb_cObject);
8201  rb_define_method(rb_cString, "initialize", rb_str_init, -1);
8202  rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
8206  rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
8208  rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
8214  rb_define_method(rb_cString, "insert", rb_str_insert, 2);
8215  rb_define_method(rb_cString, "length", rb_str_length, 0);
8217  rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
8218  rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
8225  rb_define_method(rb_cString, "upto", rb_str_upto, -1);
8228  rb_define_method(rb_cString, "replace", rb_str_replace, 1);
8231  rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
8232  rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
8233  rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
8234 
8235  rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
8238  rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
8239  rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
8241 
8242  rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
8243  rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
8244  rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
8245  rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
8246 
8251 
8255  rb_define_method(rb_cString, "lines", rb_str_lines, -1);
8258  rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
8259  rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
8261  rb_define_method(rb_cString, "concat", rb_str_concat, 1);
8263  rb_define_method(rb_cString, "prepend", rb_str_prepend, 1);
8265  rb_define_method(rb_cString, "intern", rb_str_intern, 0);
8266  rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
8268 
8269  rb_define_method(rb_cString, "include?", rb_str_include, 1);
8270  rb_define_method(rb_cString, "start_with?", rb_str_start_with, -1);
8271  rb_define_method(rb_cString, "end_with?", rb_str_end_with, -1);
8272 
8274 
8275  rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
8276  rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
8277  rb_define_method(rb_cString, "center", rb_str_center, -1);
8278 
8279  rb_define_method(rb_cString, "sub", rb_str_sub, -1);
8280  rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
8282  rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
8284  rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
8285  rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
8286 
8294 
8297  rb_define_method(rb_cString, "delete", rb_str_delete, -1);
8298  rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
8299  rb_define_method(rb_cString, "count", rb_str_count, -1);
8300 
8305 
8306  rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
8307  rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
8308  rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
8309  rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0);
8310 
8311  rb_define_method(rb_cString, "sum", rb_str_sum, -1);
8312 
8313  rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
8315 
8316  rb_define_method(rb_cString, "partition", rb_str_partition, 1);
8317  rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
8318 
8319  rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
8320  rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
8322  rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
8324 
8325  id_to_s = rb_intern("to_s");
8326 
8327  rb_fs = Qnil;
8328  rb_define_variable("$;", &rb_fs);
8329  rb_define_variable("$-F", &rb_fs);
8330 
8331  rb_cSymbol = rb_define_class("Symbol", rb_cObject);
8335  rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
8336 
8339  rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
8341  rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
8342  rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
8343  rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
8344  rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
8345  rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
8346  rb_define_method(rb_cSymbol, "next", sym_succ, 0);
8347 
8348  rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
8349  rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
8351 
8352  rb_define_method(rb_cSymbol, "[]", sym_aref, -1);
8353  rb_define_method(rb_cSymbol, "slice", sym_aref, -1);
8354  rb_define_method(rb_cSymbol, "length", sym_length, 0);
8355  rb_define_method(rb_cSymbol, "size", sym_length, 0);
8356  rb_define_method(rb_cSymbol, "empty?", sym_empty, 0);
8357  rb_define_method(rb_cSymbol, "match", sym_match, 1);
8358 
8359  rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0);
8360  rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
8361  rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
8362  rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
8363 
8364  rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0);
8365 }
static int str_independent(VALUE str)
Definition: string.c:1338
#define ELTS_SHARED
Definition: ruby.h:839
#define rb_usascii_str_new2
int rb_enc_str_asciionly_p(VALUE str)
Definition: string.c:340
static VALUE sym_upcase(VALUE sym)
Definition: string.c:8096
#define T_SYMBOL
Definition: ruby.h:502
static long chopped_length(VALUE str)
Definition: string.c:6620
VALUE rb_str_resize(VALUE str, long len)
Definition: string.c:1853
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str)
Definition: string.c:638
Definition: string.c:5071
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:954
static VALUE rb_str_bytesize(VALUE str)
Definition: string.c:1201
int rb_reg_backref_number(VALUE match, VALUE backref)
Definition: re.c:1075
#define rb_tainted_str_new2
static VALUE str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1901
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:138
static long rb_str_rindex(VALUE str, VALUE sub, long pos)
Definition: string.c:2620
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:778
VALUE rb_ary_pop(VALUE ary)
Definition: array.c:879
rb_econv_result_t
Definition: encoding.h:242
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:139
#define RESIZE_CAPA(str, capacity)
Definition: string.c:102
#define RARRAY_LEN(a)
Definition: ruby.h:899
void rb_bug(const char *fmt,...)
Definition: error.c:290
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:155
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:856
#define FALSE
Definition: nkf.h:174
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
Definition: oniguruma.h:589
#define rb_hash_lookup
Definition: tcltklib.c:268
#define RSTRING(obj)
Definition: ruby.h:1099
#define rb_intern(str)
size_t strlen(const char *)
#define INT2NUM(x)
Definition: ruby.h:1178
int i
Definition: win32ole.c:784
#define CHECK_IF_ASCII(c)
void rb_backref_set(VALUE)
Definition: vm.c:768
#define T_FIXNUM
Definition: ruby.h:497
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:73
VALUE rb_id2str(ID id)
Definition: ripper.c:16944
static int sym_printable(const char *s, const char *send, rb_encoding *enc)
Definition: string.c:7796
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:2359
#define NUM2INT(x)
Definition: ruby.h:622
static int max(int a, int b)
Definition: strftime.c:141
VALUE rb_locale_str_new_cstr(const char *ptr)
Definition: string.c:602
VALUE rb_sym_to_s(VALUE sym)
Definition: string.c:7906
#define ascii_isspace(c)
Definition: string.c:5866
static int coderange_scan(const char *p, long len, rb_encoding *enc)
Definition: string.c:183
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
Definition: string.c:569
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:482
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1497
static VALUE rb_str_to_f(VALUE str)
Definition: string.c:4425
VALUE rb_str_new_frozen(VALUE orig)
Definition: string.c:713
static VALUE rb_str_oct(VALUE str)
Definition: string.c:7130
st_index_t rb_str_hash(VALUE str)
Definition: string.c:2244
#define FL_TAINT
Definition: ruby.h:1115
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
Definition: string.c:2074
#define CLASS_OF(v)
Definition: ruby.h:448
#define STR_NOCAPA
Definition: string.c:63
static VALUE rb_str_scan(VALUE str, VALUE pat)
Definition: string.c:7063
VALUE rb_locale_str_new(const char *ptr, long len)
Definition: string.c:596
static VALUE rb_str_gsub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4012
static VALUE rb_str_match(VALUE x, VALUE y)
Definition: string.c:2754
#define FIXNUM_MAX
Definition: ruby.h:238
#define Qtrue
Definition: ruby.h:434
RUBY_ALIAS_FUNCTION(rb_str_new2(const char *ptr), rb_str_new_cstr,(ptr))
Definition: string.c:455
void rb_str_set_len(VALUE str, long len)
Definition: string.c:1837
static void rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
Definition: string.c:290
unsigned char * USTR
Definition: string.c:5069
static unsigned int trnext(struct tr *t, rb_encoding *enc)
Definition: string.c:5078
#define ENC_CODERANGE_SET(obj, cr)
Definition: encoding.h:63
#define is_broken_string(str)
Definition: string.c:121
static VALUE sym_swapcase(VALUE sym)
Definition: string.c:8135
static VALUE rb_str_b(VALUE str)
Definition: string.c:7648
char * pend
Definition: string.c:5074
const int id
Definition: nkf.c:209
void Init_String(void)
Definition: string.c:8192
static VALUE rb_str_clear(VALUE str)
Definition: string.c:4051
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:194
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:65
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1702
#define STR_UNSET_NOCAPA(s)
Definition: string.c:65
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:103
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:970
VALUE rb_str_new_with_class(VALUE, const char *, long)
VALUE rb_eTypeError
Definition: error.c:511
static VALUE str_gsub(int argc, VALUE *argv, VALUE str, int bang)
Definition: string.c:3836
#define UNREACHABLE
Definition: ruby.h:40
VALUE rb_reg_match(VALUE, VALUE)
Definition: re.c:2746
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
Definition: re.c:227
static VALUE rb_str_succ_bang(VALUE str)
Definition: string.c:3067
static VALUE rb_str_enumerate_bytes(VALUE str, int wantarray)
Definition: string.c:6338
static VALUE rb_str_each_line(int argc, VALUE *argv, VALUE str)
Definition: string.c:6308
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1373
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
#define rb_long2int(n)
Definition: ruby.h:325
static VALUE str_new3(VALUE klass, VALUE str)
Definition: string.c:671
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:108
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
Definition: re.c:3286
#define SYM2ID(x)
Definition: ruby.h:364
RUBY_EXTERN char * crypt(const char *, const char *)
Definition: crypt.c:500
st_index_t rb_memhash(const void *ptr, long len)
Definition: random.c:1422
int rb_usascii_encindex(void)
Definition: encoding.c:1192
VALUE rb_str_split(VALUE str, const char *sep0)
Definition: string.c:6131
static VALUE rb_str_prepend(VALUE str, VALUE str2)
Definition: string.c:2235
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:789
static VALUE rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3961
VALUE rb_ary_tmp_new(long capa)
Definition: array.c:465
VALUE rb_str_export_to_enc(VALUE str, rb_encoding *enc)
Definition: string.c:632
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:774
static VALUE rb_str_codepoints(VALUE str)
Definition: string.c:6613
#define str_buf_cat2(str, ptr)
Definition: string.c:1944
static VALUE rb_str_swapcase_bang(VALUE str)
Definition: string.c:5017
static VALUE rb_str_rstrip(VALUE str)
Definition: string.c:6948
VALUE rb_filesystem_str_new(const char *ptr, long len)
Definition: string.c:608
VALUE rb_str_export(VALUE str)
Definition: string.c:620
static VALUE rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
Definition: string.c:7308
static VALUE rb_str_include(VALUE str, VALUE arg)
Definition: string.c:4358
static void rb_str_check_dummy_enc(rb_encoding *enc)
Definition: string.c:4762
VALUE rb_backref_get(void)
Definition: vm.c:762
#define str_make_independent(str)
Definition: string.c:1366
VALUE rb_str_freeze(VALUE str)
Definition: string.c:1797
long rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
Definition: string.c:1025
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:933
#define Check_Type(v, t)
Definition: ruby.h:539
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:1031
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1780
char * p
Definition: string.c:5074
static VALUE sym_downcase(VALUE sym)
Definition: string.c:8109
static VALUE str_replace(VALUE str, VALUE str2)
Definition: string.c:910
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Definition: ruby.h:875
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:766
VALUE rb_funcall_with_block(VALUE, ID, int, const VALUE *, VALUE)
Definition: vm_eval.c:834
static VALUE rb_str_to_i(int argc, VALUE *argv, VALUE str)
Definition: string.c:4392
char * rb_string_value_ptr(volatile VALUE *ptr)
Definition: string.c:1483
VALUE rb_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2368
static VALUE rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3630
VALUE rb_str_intern(VALUE s)
Definition: string.c:7211
#define STR_NOEMBED
Definition: string.c:58
#define RB_GC_GUARD(v)
Definition: ruby.h:530
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:886
VALUE rb_ary_new3(long n,...)
Definition: array.c:432
static VALUE rb_str_empty(VALUE str)
Definition: string.c:1218
static VALUE rb_str_chars(VALUE str)
Definition: string.c:6520
VALUE rb_eSecurityError
Definition: error.c:520
static VALUE rb_str_reverse_bang(VALUE str)
Definition: string.c:4319
#define DATA_PTR(dta)
Definition: ruby.h:985
void rb_include_module(VALUE klass, VALUE module)
Definition: class.c:695
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: oniguruma.h:191
static VALUE rb_str_center(int argc, VALUE *argv, VALUE str)
Definition: string.c:7452
static VALUE rb_str_each_char_size(VALUE str)
Definition: string.c:6412
st_data_t st_index_t
Definition: st.h:63
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
Definition: range.c:987
double rb_str_to_dbl(VALUE, int)
Definition: object.c:2619
#define rb_enc_islower(c, enc)
Definition: encoding.h:176
RUBY_FUNC_EXPORTED size_t rb_str_memsize(VALUE str)
Definition: string.c:838
static VALUE rb_str_subpat(VALUE str, VALUE re, VALUE backref)
Definition: string.c:3204
VALUE rb_str_new(const char *ptr, long len)
Definition: string.c:425
static VALUE rb_str_aset_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3562
#define rb_enc_mbmaxlen(enc)
Definition: encoding.h:128
static VALUE rb_str_upcase_bang(VALUE str)
Definition: string.c:4780
#define ISDIGIT(c)
unsigned int last
Definition: nkf.c:4310
static VALUE rb_str_format_m(VALUE str, VALUE arg)
Definition: string.c:1316
#define STR_SET_NOEMBED(str)
Definition: string.c:70
#define STR_DEC_LEN(str)
Definition: string.c:91
#define numberof(array)
Definition: string.c:32
static long str_strlen(VALUE str, rb_encoding *enc)
Definition: string.c:1122
#define FIXNUM_P(f)
Definition: ruby.h:355
static VALUE rb_str_chomp(int argc, VALUE *argv, VALUE str)
Definition: string.c:6817
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1168
VALUE rb_str_export_locale(VALUE str)
Definition: string.c:626
#define BEG(no)
Definition: string.c:22
VALUE rb_str_tmp_new(long)
static VALUE sym_length(VALUE sym)
Definition: string.c:8070
VALUE rb_str_new_shared(VALUE str)
Definition: string.c:677
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1362
#define CHAR_ESC_LEN
Definition: string.c:4460
VALUE rb_sym_all_symbols(void)
Definition: ripper.c:17037
static VALUE empty_str_alloc(VALUE klass)
Definition: string.c:386
static VALUE rb_str_upcase(VALUE str)
Definition: string.c:4845
#define ONIGENC_CTYPE_ALPHA
Definition: oniguruma.h:197
static VALUE rb_str_hash_m(VALUE str)
Definition: string.c:2274
static int tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
Definition: string.c:5503
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
VALUE rb_cString
Definition: string.c:53
static VALUE rb_str_aset(VALUE str, VALUE indx, VALUE val)
Definition: string.c:3495
#define OBJ_TAINTED(x)
Definition: ruby.h:1153
#define rb_str_new5
#define ENC_CODERANGE_7BIT
Definition: encoding.h:58
VALUE rb_eRangeError
Definition: error.c:515
const char * rb_obj_classname(VALUE)
Definition: variable.c:396
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1251
void rb_gc_force_recycle(VALUE p)
Definition: gc.c:2961
#define rb_str_buf_new2
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:964
#define sym(x)
Definition: date_core.c:3715
static VALUE rb_str_insert(VALUE str, VALUE idx, VALUE str2)
Definition: string.c:3595
VALUE rb_str_append(VALUE str, VALUE str2)
Definition: string.c:2121
#define NEWOBJ_OF(obj, type, klass, flags)
Definition: ruby.h:683
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define ISALPHA(c)
Definition: ruby.h:1636
static VALUE sym_equal(VALUE sym1, VALUE sym2)
Definition: string.c:7788
static VALUE sym_inspect(VALUE sym)
Definition: string.c:7864
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Definition: string.c:2254
int args
Definition: win32ole.c:785
static VALUE rb_str_partition(VALUE str, VALUE sep)
Definition: string.c:7473
static long str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1602
#define rb_enc_isctype(c, t, enc)
Definition: encoding.h:173
static VALUE rb_str_ljust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7412
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1537
int rb_enc_str_coderange(VALUE str)
Definition: string.c:327
#define FL_UNTRUSTED
Definition: ruby.h:1116
#define MEMZERO(p, type, n)
Definition: ruby.h:1241
Definition: ruby.h:842
#define STR_SHARED_P(s)
Definition: string.c:61
VALUE rb_str_plus(VALUE str1, VALUE str2)
Definition: string.c:1236
static VALUE rb_str_setbyte(VALUE str, VALUE index, VALUE value)
Definition: string.c:4106
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1288
#define FL_TEST(x, f)
Definition: ruby.h:1146
#define ONIGENC_CTYPE_DIGIT
Definition: oniguruma.h:200
VALUE rb_mComparable
Definition: compar.c:14
neighbor_char
Definition: string.c:2818
static VALUE rb_str_capitalize_bang(VALUE str)
Definition: string.c:4951
static VALUE rb_str_strip(VALUE str)
Definition: string.c:6986
#define rb_intern_str(string)
Definition: generator.h:17
unsigned int now
Definition: string.c:5073
#define ALLOC_N(type, n)
Definition: ruby.h:1223
int rb_block_given_p(void)
Definition: eval.c:672
static VALUE rb_str_split_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:5916
#define val
static int single_byte_optimizable(VALUE str)
Definition: string.c:126
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:880
static void rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3375
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1426
VALUE rb_eRuntimeError
Definition: error.c:510
static VALUE sym_to_sym(VALUE sym)
Definition: string.c:7925
#define rb_enc_isascii(c, enc)
Definition: encoding.h:174
void * rb_alloc_tmp_buffer(volatile VALUE *store, long len)
Definition: string.c:814
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Definition: bignum.c:777
static VALUE str_new_shared(VALUE klass, VALUE str)
Definition: string.c:665
VALUE rb_str_length(VALUE str)
Definition: string.c:1182
#define RSTRING_END(str)
Definition: ruby.h:870
static VALUE rb_str_rpartition(VALUE str, VALUE sep)
Definition: string.c:7523
int rb_isspace(int c)
Definition: encoding.c:1893
static VALUE rb_str_crypt(VALUE str, VALUE salt)
Definition: string.c:7154
static VALUE rb_str_cmp_m(VALUE str1, VALUE str2)
Definition: string.c:2412
int rb_str_symname_p(VALUE sym)
Definition: string.c:7809
VALUE rb_ary_new(void)
Definition: array.c:424
VALUE rb_str_new_cstr(const char *ptr)
Definition: string.c:447
static void str_modify_keep_cr(VALUE str)
Definition: string.c:1401
#define dp(v)
Definition: vm_debug.h:23
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Definition: transcode.c:2570
#define UINT2NUM(x)
Definition: ruby.h:1188
#define STR_BUF_MIN_SIZE
Definition: string.c:774
#define rb_str_new3
#define STR_SET_EMBED(str)
Definition: string.c:74
static VALUE rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5753
#define snprintf
Definition: subst.h:6
#define NIL_P(v)
Definition: ruby.h:446
#define ISASCII(c)
Definition: ruby.h:1629
#define add(x, y)
Definition: date_strftime.c:23
static VALUE rb_str_delete(int argc, VALUE *argv, VALUE str)
Definition: string.c:5609
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:499
#define rb_enc_step_back(s, p, e, n, enc)
Definition: encoding.h:168
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:741
static VALUE rb_str_each_byte_size(VALUE str, VALUE args)
Definition: string.c:6332
static VALUE rb_str_enumerate_chars(VALUE str, int wantarray)
Definition: string.c:6427
static VALUE rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
Definition: string.c:6142
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:719
static VALUE rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5733
#define RUBY_DTRACE_STRING_CREATE_ENABLED()
Definition: probes.h:63
VALUE rb_str_concat(VALUE str1, VALUE str2)
Definition: string.c:2162
#define TOUPPER(c)
Definition: ruby.h:1640
#define END(no)
Definition: string.c:23
#define OBJ_FROZEN(x)
Definition: ruby.h:1163
#define OBJ_UNTRUST(x)
Definition: ruby.h:1156
#define STR_ENC_GET(str)
Definition: string.c:123
static VALUE rb_str_strip_bang(VALUE str)
Definition: string.c:6965
#define TYPE(x)
Definition: ruby.h:513
int argc
Definition: ruby.c:130
#define Qfalse
Definition: ruby.h:433
VALUE rb_cEncodingConverter
Definition: transcode.c:25
long rb_str_offset(VALUE str, long pos)
Definition: string.c:1610
#define rb_sourcefile()
Definition: tcltklib.c:97
#define STR_SET_EMBED_LEN(str, n)
Definition: string.c:76
#define ALLOCA_N(type, n)
Definition: ruby.h:1227
#define T_BIGNUM
Definition: ruby.h:495
#define range(low, item, hi)
Definition: date_strftime.c:21
#define ENC_CODERANGE_UNKNOWN
Definition: encoding.h:57
#define LONG_MAX
Definition: ruby.h:201
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:2980
static VALUE rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5388
#define rb_enc_isprint(c, enc)
Definition: encoding.h:180
#define RUBY_FUNC_EXPORTED
Definition: defines.h:184
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1242
#define ENC_CODERANGE_BROKEN
Definition: encoding.h:60
#define rb_enc_isupper(c, enc)
Definition: encoding.h:177
VALUE rb_eEncCompatError
Definition: error.c:518
VALUE rb_obj_alloc(VALUE)
Definition: object.c:1721
#define rb_enc_codepoint(p, e, enc)
Definition: encoding.h:154
#define OBJ_FREEZE(x)
Definition: ruby.h:1164
void rb_str_update(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3451
#define rb_enc_mbminlen(enc)
Definition: encoding.h:127
unsigned int max
Definition: string.c:5073
static VALUE sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
Definition: string.c:7931
VALUE rb_eIndexError
Definition: error.c:513
static VALUE rb_str_rjust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7432
#define ENC_CODERANGE_VALID
Definition: encoding.h:59
long rb_str_sublen(VALUE str, long pos)
Definition: string.c:1657
static VALUE sym_capitalize(VALUE sym)
Definition: string.c:8122
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:269
VALUE rb_str_times(VALUE str, VALUE times)
Definition: string.c:1268
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:910
static VALUE sym_cmp(VALUE sym, VALUE other)
Definition: string.c:8010
void rb_str_modify_expand(VALUE str, long expand)
Definition: string.c:1377
#define sub(x, y)
Definition: date_strftime.c:24
static void rb_str_splice(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3408
static VALUE str_eql(const VALUE str1, const VALUE str2)
Definition: string.c:2335
#define RSTRING_LEN(str)
Definition: ruby.h:862
static VALUE sym_encoding(VALUE sym)
Definition: string.c:8148
VALUE rb_yield(VALUE)
Definition: vm_eval.c:934
static VALUE rb_str_swapcase(VALUE str)
Definition: string.c:5062
VALUE rb_obj_as_string(VALUE obj)
Definition: string.c:895
VALUE rb_str_subseq(VALUE str, long beg, long len)
Definition: string.c:1668
#define REALLOC_N(var, type, n)
Definition: ruby.h:1225
char * rb_string_value_cstr(volatile VALUE *ptr)
Definition: string.c:1490
#define RUBY_MAX_CHAR_LEN
Definition: string.c:56
#define TRUE
Definition: nkf.h:175
static VALUE rb_str_byteslice(int argc, VALUE *argv, VALUE str)
Definition: string.c:4236
VALUE rb_check_funcall(VALUE, ID, int, VALUE *)
Definition: vm_eval.c:408
VALUE rb_funcall2(VALUE, ID, int, const VALUE *)
Calls a method.
Definition: vm_eval.c:805
#define OBJ_UNTRUSTED(x)
Definition: ruby.h:1155
VALUE rb_str_format(int, const VALUE *, VALUE)
Definition: sprintf.c:439
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:898
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:459
#define STR_TMPLOCK
Definition: string.c:57
#define rb_enc_name(enc)
Definition: encoding.h:124
#define RSTRING_EMBED_LEN_MAX
Definition: ruby.h:841
int rb_enc_symname_p(const char *name, rb_encoding *enc)
Definition: ripper.c:16640
static VALUE rb_str_tr(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5430
static VALUE rb_str_chop_bang(VALUE str)
Definition: string.c:6647
VALUE rb_hash_new(void)
Definition: hash.c:234
#define STR_ASSOC
Definition: string.c:60
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1570
static VALUE rb_str_enumerate_codepoints(VALUE str, int wantarray)
Definition: string.c:6527
static VALUE rb_str_squeeze(int argc, VALUE *argv, VALUE str)
Definition: string.c:5716
long rb_reg_search(VALUE, VALUE, long, int)
Definition: re.c:1352
static VALUE str_duplicate(VALUE klass, VALUE str)
Definition: string.c:938
VALUE rb_check_hash_type(VALUE hash)
Definition: hash.c:461
int rb_str_cmp(VALUE str1, VALUE str2)
Definition: string.c:2308
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define no_digits()
unsigned long ID
Definition: ruby.h:105
VALUE rb_str_buf_new_cstr(const char *ptr)
Definition: string.c:793
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1183
static VALUE rb_str_aref_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3334
static VALUE sym_to_proc(VALUE sym)
Definition: string.c:7952
#define Qnil
Definition: ruby.h:435
#define rb_str_new2
static VALUE rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:5626
static VALUE get_pat(VALUE, int)
Definition: string.c:3650
const char * name
Definition: oniguruma.h:162
VALUE rb_str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1947
#define BUILTIN_TYPE(x)
Definition: ruby.h:510
#define OBJ_TAINT(x)
Definition: ruby.h:1154
unsigned long VALUE
Definition: ruby.h:104
static enum neighbor_char enc_pred_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2859
VALUE rb_cSymbol
Definition: string.c:54
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1214
static VALUE result
Definition: nkf.c:40
VALUE rb_str_replace(VALUE str, VALUE str2)
Definition: string.c:4030
static VALUE rb_str_lstrip_bang(VALUE str)
Definition: string.c:6837
#define rb_enc_is_newline(p, end, enc)
Definition: encoding.h:171
static VALUE str_new(VALUE klass, const char *ptr, long len)
Definition: string.c:395
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
Definition: intern.h:215
static VALUE str_alloc(VALUE klass)
Definition: string.c:374
#define UNINITIALIZED_VAR(x)
Definition: vm_core.h:121
#define RBASIC(obj)
Definition: ruby.h:1094
static VALUE rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3689
VALUE rb_str_buf_cat2(VALUE str, const char *ptr)
Definition: string.c:1957
#define ENC_CODERANGE_AND(a, b)
Definition: encoding.h:68
static VALUE rb_str_is_ascii_only_p(VALUE str)
Definition: string.c:7687
void rb_str_shared_replace(VALUE str, VALUE str2)
Definition: string.c:857
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:870
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.h:235
#define RUBY_DTRACE_STRING_CREATE(arg0, arg1, arg2)
Definition: probes.h:64
#define rb_enc_asciicompat(enc)
Definition: encoding.h:184
static VALUE rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:6697
VALUE rb_ensure(VALUE(*b_proc)(ANYARGS), VALUE data1, VALUE(*e_proc)(ANYARGS), VALUE data2)
Definition: eval.c:804
VALUE rb_str_buf_cat_ascii(VALUE str, const char *ptr)
Definition: string.c:2081
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
VALUE rb_str_quote_unprintable(VALUE str)
Definition: string.c:7828
static VALUE sym_casecmp(VALUE sym, VALUE other)
Definition: string.c:8027
long rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:232
static char * str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
Definition: string.c:1532
#define RARRAY_LENINT(ary)
Definition: ruby.h:908
RUBY_EXTERN VALUE rb_rs
Definition: intern.h:489
static VALUE rb_str_getbyte(VALUE str, VALUE index)
Definition: string.c:4087
static void rb_enc_cr_str_exact_copy(VALUE dest, VALUE src)
Definition: string.c:320
void rb_sys_fail(const char *mesg)
Definition: error.c:1899
static VALUE rb_str_chr(VALUE str)
Definition: string.c:4075
#define ENCODING_IS_ASCII8BIT(obj)
Definition: encoding.h:52
#define rb_str_new4
static const char * search_nonascii(const char *p, const char *e)
Definition: string.c:146
static void str_modifiable(VALUE str)
Definition: string.c:1327
static VALUE rb_str_bytes(VALUE str)
Definition: string.c:6406
static VALUE rb_str_index_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2563
#define CHAR_BIT
Definition: ruby.h:208
VALUE rb_str_to_str(VALUE str)
Definition: string.c:849
static VALUE rb_str_match_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2804
void xfree(void *)
#define FL_UNSET(x, f)
Definition: ruby.h:1150
static void str_mod_check(VALUE s, const char *p, long len)
Definition: string.c:352
VALUE rb_string_value(volatile VALUE *ptr)
Definition: string.c:1472
static VALUE rb_str_lines(int argc, VALUE *argv, VALUE str)
Definition: string.c:6326
VALUE rb_tainted_str_new_cstr(const char *ptr)
Definition: string.c:479
#define LONG2NUM(x)
Definition: ruby.h:1199
static const char isspacetable[256]
Definition: string.c:5847
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:1564
static VALUE scan_once(VALUE str, VALUE pat, long *start)
Definition: string.c:6994
static VALUE rb_str_sub(int argc, VALUE *argv, VALUE str)
Definition: string.c:3828
VALUE rb_usascii_str_new(const char *ptr, long len)
Definition: string.c:431
VALUE rb_str_buf_append(VALUE str, VALUE str2)
Definition: string.c:2105
static VALUE rb_str_s_try_convert(VALUE dummy, VALUE str)
Definition: string.c:1526
#define RMATCH_REGS(obj)
Definition: re.h:54
RUBY_EXTERN VALUE rb_default_rs
Definition: intern.h:490
static VALUE sym_succ(VALUE sym)
Definition: string.c:7990
void rb_str_free(VALUE str)
Definition: string.c:830
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Definition: string.c:614
static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7592
#define RSTRING_PTR(str)
Definition: ruby.h:866
#define rb_enc_right_char_head(s, p, e, enc)
Definition: encoding.h:167
static void str_enc_copy(VALUE str1, VALUE str2)
Definition: string.c:284
#define ENCODING_GET(obj)
Definition: encoding.h:47
VALUE rb_equal(VALUE, VALUE)
Definition: object.c:56
static ID id_to_s
Definition: string.c:892
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:772
int size
Definition: encoding.c:52
static VALUE rb_str_hex(VALUE str)
Definition: string.c:7109
char * rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
Definition: string.c:1582
static char * str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1588
#define f
static VALUE rb_str_reverse(VALUE str)
Definition: string.c:4255
#define rb_check_arity(argc, min, max)
Definition: intern.h:277
#define INT2FIX(i)
Definition: ruby.h:241
static VALUE rb_str_downcase(VALUE str)
Definition: string.c:4928
#define UNLIMITED_ARGUMENTS
Definition: intern.h:54
int rb_sourceline(void)
Definition: vm.c:816
VALUE rb_str_unlocktmp(VALUE str)
Definition: string.c:1820
#define MBCLEN_INVALID_P(ret)
Definition: encoding.h:140
static VALUE rb_str_valid_encoding_p(VALUE str)
Definition: string.c:7669
static VALUE rb_str_each_byte(VALUE str)
Definition: string.c:6389
static VALUE rb_str_chop(VALUE str)
Definition: string.c:6682
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Definition: transcode.c:1446
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2383
static VALUE rb_str_count(int argc, VALUE *argv, VALUE str)
Definition: string.c:5789
#define STR_SET_LEN(str, n)
Definition: string.c:82
static VALUE rb_str_eql(VALUE str1, VALUE str2)
Definition: string.c:2379
static void rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
Definition: string.c:3457
static long enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
Definition: string.c:979
static VALUE rb_str_lstrip(VALUE str)
Definition: string.c:6878
#define lesser(a, b)
Definition: string.c:2280
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:557
VALUE rb_hash_aref(VALUE hash, VALUE key)
Definition: hash.c:560
static enum neighbor_char enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
Definition: string.c:2902
#define RARRAY_PTR(a)
Definition: ruby.h:904
static VALUE sym_match(VALUE sym, VALUE other)
Definition: string.c:8043
VALUE rb_reg_quote(VALUE)
Definition: re.c:2965
static long rb_str_index(VALUE str, VALUE sub, long offset)
Definition: string.c:2502
#define ENC_CODERANGE(obj)
Definition: encoding.h:61
static VALUE rb_str_upto(int argc, VALUE *argv, VALUE beg)
Definition: string.c:3108
static VALUE str_byte_substr(VALUE str, long beg, long len)
Definition: string.c:4124
uint8_t key[16]
Definition: random.c:1370
VALUE rb_str_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1963
VALUE rb_any_to_s(VALUE)
Definition: object.c:384
long rb_str_strlen(VALUE str)
Definition: string.c:1168
#define STR_EMBED_P(str)
Definition: string.c:75
#define LONG2FIX(i)
Definition: ruby.h:242
#define SIZEOF_VALUE
Definition: ruby.h:107
static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
Definition: string.c:5134
#define STR_NOCAPA_P(s)
Definition: string.c:64
#define RTEST(v)
Definition: ruby.h:445
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
Definition: string.c:4463
VALUE rb_proc_new(VALUE(*)(ANYARGS), VALUE)
Definition: proc.c:2018
#define T_STRING
Definition: ruby.h:490
VALUE rb_str_locktmp(VALUE)
#define OBJ_INFECT(x, s)
Definition: ruby.h:1157
#define RREGEXP(obj)
Definition: ruby.h:1100
static VALUE rb_str_capitalize(VALUE str)
Definition: string.c:4999
VALUE rb_str_drop_bytes(VALUE str, long len)
Definition: string.c:3347
size_t rb_str_capacity(VALUE str)
Definition: string.c:360
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1248
static VALUE rb_str_init(int argc, VALUE *argv, VALUE str)
Definition: string.c:969
void rb_define_variable(const char *, VALUE *)
Definition: variable.c:594
v
Definition: win32ole.c:798
void rb_str_setter(VALUE val, ID id, VALUE *var)
Definition: string.c:7615
static VALUE rb_str_rstrip_bang(VALUE str)
Definition: string.c:6899
static VALUE rb_str_each_char(VALUE str)
Definition: string.c:6503
VALUE rb_fs
Definition: string.c:143
#define ISPRINT(c)
Definition: ruby.h:1631
#define rb_enc_left_char_head(s, p, e, enc)
Definition: encoding.h:166
static VALUE str_replace_shared(VALUE str2, VALUE str)
Definition: string.c:657
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Definition: string.c:563
static void str_make_independent_expand(VALUE str, long expand)
Definition: string.c:1347
VALUE rb_ary_concat(VALUE x, VALUE y)
Definition: array.c:3382
static unsigned int hash(const char *str, unsigned int len)
Definition: lex.c:56
#define RETURN_ENUMERATOR(obj, argc, argv)
Definition: intern.h:220
static VALUE rb_str_start_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7569
VALUE rb_ary_new2(long capa)
Definition: array.c:417
#define rb_safe_level()
Definition: tcltklib.c:94
VALUE rb_str_substr(VALUE str, long beg, long len)
Definition: string.c:1774
static void str_discard(VALUE str)
Definition: string.c:1411
#define RREGEXP_SRC_LEN(r)
Definition: ruby.h:918
void rb_must_asciicompat(VALUE str)
Definition: string.c:1463
#define assert(condition)
Definition: ossl.h:45
const char * name
Definition: nkf.c:208
#define FL_SET(x, f)
Definition: ruby.h:1149
VALUE rb_str_associated(VALUE str)
Definition: string.c:1453
#define ID2SYM(x)
Definition: ruby.h:363
const char * rb_id2name(ID id)
Definition: ripper.c:17005
int gen
Definition: string.c:5072
static VALUE sym_empty(VALUE sym)
Definition: string.c:8083
static VALUE rb_str_to_s(VALUE str)
Definition: string.c:4440
static VALUE str_byte_aref(VALUE str, VALUE indx)
Definition: string.c:4179
VALUE rb_external_str_new(const char *ptr, long len)
Definition: string.c:584
void rb_str_associate(VALUE str, VALUE add)
Definition: string.c:1422
VALUE rb_inspect(VALUE)
Definition: object.c:402
#define rb_enc_to_index(enc)
Definition: encoding.h:86
VALUE rb_str_succ(VALUE orig)
Definition: string.c:2984
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1153
static VALUE rb_str_downcase_bang(VALUE str)
Definition: string.c:4863
static VALUE rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
Definition: string.c:1988
void rb_warning(const char *fmt,...)
Definition: error.c:229
#define rb_check_frozen(obj)
Definition: intern.h:258
#define CONST_ID(var, str)
Definition: ruby.h:1318
static VALUE rb_str_sum(int argc, VALUE *argv, VALUE str)
Definition: string.c:7250
VALUE rb_str_inspect(VALUE str)
Definition: string.c:4508
void rb_free_tmp_buffer(volatile VALUE *store)
Definition: string.c:822
static void tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
Definition: string.c:5439
VALUE rb_obj_freeze(VALUE)
Definition: object.c:989
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1143
#define is_ascii_string(str)
Definition: string.c:120
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2867
VALUE rb_str_buf_new(long capa)
Definition: string.c:777
int rb_num_to_uint(VALUE val, unsigned int *ret)
Definition: numeric.c:122
VALUE rb_tainted_str_new(const char *, long)
static VALUE rb_str_casecmp(VALUE str1, VALUE str2)
Definition: string.c:2444
#define ONIGERR_INVALID_CODE_POINT_VALUE
Definition: oniguruma.h:587
int rb_str_comparable(VALUE str1, VALUE str2)
Definition: string.c:2283
#define rb_enc_mbcput(c, buf, enc)
Definition: encoding.h:161
VALUE rb_str_cat2(VALUE str, const char *ptr)
Definition: string.c:1982
#define SYMBOL_P(x)
Definition: ruby.h:362
#define mod(x, y)
Definition: date_strftime.c:28
static char * rb_str_subpos(VALUE str, long beg, long *lenp)
Definition: string.c:1689
VALUE rb_str_ord(VALUE s)
Definition: string.c:7231
VALUE rb_str_locktmp_ensure(VALUE str, VALUE(*func)(VALUE), VALUE arg)
Definition: string.c:1830
#define rb_str_dup_frozen
#define STR_ASSOC_P(s)
Definition: string.c:62
static VALUE sym_aref(int argc, VALUE *argv, VALUE sym)
Definition: string.c:8057
#define NULL
Definition: _sdbm.c:103
#define FIX2LONG(x)
Definition: ruby.h:353
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
VALUE rb_invcmp(VALUE x, VALUE y)
Definition: compar.c:42
VALUE rb_str_resurrect(VALUE str)
Definition: string.c:952
static VALUE rb_str_aref(VALUE str, VALUE indx)
Definition: string.c:3215
VALUE rb_check_string_type(VALUE str)
Definition: string.c:1508
VALUE rb_id_quote_unprintable(ID id)
Definition: string.c:7849
VALUE rb_reg_regcomp(VALUE)
Definition: re.c:2547
static int match(VALUE str, VALUE pat, VALUE hash, int(*cb)(VALUE, VALUE))
Definition: date_parse.c:273
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1344
static VALUE rb_str_delete_bang(int, VALUE *, VALUE)
Definition: string.c:5533
void rb_warn(const char *fmt,...)
Definition: error.c:216
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition: string.c:7709
VALUE rb_eArgError
Definition: error.c:512
#define rb_enc_prev_char(s, p, e, enc)
Definition: encoding.h:164
static VALUE rb_str_force_encoding(VALUE str, VALUE enc)
Definition: string.c:7632
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:659
#define T_REGEXP
Definition: ruby.h:491
#define IS_EVSTR(p, e)
Definition: string.c:4617
VALUE rb_str_dump(VALUE str)
Definition: string.c:4630
#define NUM2LONG(x)
Definition: ruby.h:592
static VALUE rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2677
#define TR_TABLE_SIZE
Definition: string.c:5437
VALUE rb_reg_nth_match(int, VALUE)
Definition: re.c:1457
static VALUE rb_str_each_codepoint(VALUE str)
Definition: string.c:6595
void rb_str_modify(VALUE str)
Definition: string.c:1369
VALUE rb_usascii_str_new_cstr(const char *)
char ** argv
Definition: ruby.c:131
ID rb_to_id(VALUE name)
Definition: string.c:8154
#define DBL2NUM(dbl)
Definition: ruby.h:837
#define StringValue(v)
Definition: ruby.h:546
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Definition: string.c:439
static enum neighbor_char enc_succ_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2825
VALUE rb_external_str_new_cstr(const char *ptr)
Definition: string.c:590
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:548
VALUE rb_obj_class(VALUE)
Definition: object.c:194
VALUE rb_str_dup(VALUE str)
Definition: string.c:946