2 ** string.c - String class
4 ** See Copyright Notice in mruby.h
13 #include "mruby/array.h"
14 #include "mruby/class.h"
15 #include "mruby/range.h"
16 #include "mruby/string.h"
19 const char mrb_digitmap
[] = "0123456789abcdefghijklmnopqrstuvwxyz";
21 typedef struct mrb_shared_string
{
28 static mrb_value
str_replace(mrb_state
*mrb
, struct RString
*s1
, struct RString
*s2
);
29 static mrb_value
mrb_str_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
);
31 #define RESIZE_CAPA(s,capacity) do {\
32 s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
33 s->aux.capa = capacity;\
37 str_decref(mrb_state
*mrb
, mrb_shared_string
*shared
)
40 if (shared
->refcnt
== 0) {
41 if (!shared
->nofree
) {
42 mrb_free(mrb
, shared
->ptr
);
44 mrb_free(mrb
, shared
);
49 mrb_str_modify(mrb_state
*mrb
, struct RString
*s
)
51 if (s
->flags
& MRB_STR_SHARED
) {
52 mrb_shared_string
*shared
= s
->aux
.shared
;
54 if (shared
->refcnt
== 1 && s
->ptr
== shared
->ptr
) {
56 s
->aux
.capa
= shared
->len
;
57 s
->ptr
[s
->len
] = '\0';
58 mrb_free(mrb
, shared
);
66 ptr
= (char *)mrb_malloc(mrb
, (size_t)len
+ 1);
73 str_decref(mrb
, shared
);
75 s
->flags
&= ~MRB_STR_SHARED
;
78 if (s
->flags
& MRB_STR_NOFREE
) {
81 s
->ptr
= (char *)mrb_malloc(mrb
, (size_t)s
->len
+1);
83 memcpy(s
->ptr
, p
, s
->len
);
85 s
->ptr
[s
->len
] = '\0';
87 s
->flags
&= ~MRB_STR_NOFREE
;
93 mrb_str_resize(mrb_state
*mrb
, mrb_value str
, mrb_int len
)
96 struct RString
*s
= mrb_str_ptr(str
);
98 mrb_str_modify(mrb
, s
);
101 if (slen
< len
|| slen
- len
> 256) {
105 s
->ptr
[len
] = '\0'; /* sentinel */
110 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
112 /* char offset to byte offset */
114 mrb_str_offset(mrb_state
*mrb
, mrb_value str
, int pos
)
119 static struct RString
*
120 str_new(mrb_state
*mrb
, const char *p
, mrb_int len
)
124 s
= mrb_obj_alloc_string(mrb
);
127 s
->ptr
= (char *)mrb_malloc(mrb
, (size_t)len
+1);
129 memcpy(s
->ptr
, p
, len
);
136 str_with_class(mrb_state
*mrb
, struct RString
*s
, mrb_value obj
)
138 s
->c
= mrb_str_ptr(obj
)->c
;
142 mrb_str_new_empty(mrb_state
*mrb
, mrb_value str
)
144 struct RString
*s
= str_new(mrb
, 0, 0);
146 str_with_class(mrb
, s
, str
);
147 return mrb_obj_value(s
);
150 #ifndef MRB_STR_BUF_MIN_SIZE
151 # define MRB_STR_BUF_MIN_SIZE 128
155 mrb_str_buf_new(mrb_state
*mrb
, mrb_int capa
)
159 s
= mrb_obj_alloc_string(mrb
);
161 if (capa
< MRB_STR_BUF_MIN_SIZE
) {
162 capa
= MRB_STR_BUF_MIN_SIZE
;
166 s
->ptr
= (char *)mrb_malloc(mrb
, capa
+1);
169 return mrb_obj_value(s
);
173 str_buf_cat(mrb_state
*mrb
, struct RString
*s
, const char *ptr
, size_t len
)
179 mrb_str_modify(mrb
, s
);
180 if (ptr
>= s
->ptr
&& ptr
<= s
->ptr
+ s
->len
) {
183 if (len
== 0) return;
185 if (s
->len
>= MRB_INT_MAX
- (mrb_int
)len
) {
186 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string sizes too big");
190 while (total
> capa
) {
191 if (capa
+ 1 >= MRB_INT_MAX
/ 2) {
192 capa
= (total
+ 4095) / 4096;
195 capa
= (capa
+ 1) * 2;
197 RESIZE_CAPA(s
, capa
);
202 memcpy(s
->ptr
+ s
->len
, ptr
, len
);
204 s
->ptr
[total
] = '\0'; /* sentinel */
208 mrb_str_buf_cat(mrb_state
*mrb
, mrb_value str
, const char *ptr
, size_t len
)
210 if (len
== 0) return str
;
211 str_buf_cat(mrb
, mrb_str_ptr(str
), ptr
, len
);
216 mrb_str_new(mrb_state
*mrb
, const char *p
, size_t len
)
220 s
= str_new(mrb
, p
, len
);
221 return mrb_obj_value(s
);
225 * call-seq: (Caution! NULL string)
226 * String.new(str="") => new_str
228 * Returns a new string object containing a copy of <i>str</i>.
232 mrb_str_new_cstr(mrb_state
*mrb
, const char *p
)
239 if ((mrb_int
)len
< 0) {
240 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "argument too big");
247 s
= str_new(mrb
, p
, len
);
249 return mrb_obj_value(s
);
253 mrb_str_new_static(mrb_state
*mrb
, const char *p
, size_t len
)
257 s
= mrb_obj_alloc_string(mrb
);
259 s
->aux
.capa
= 0; /* nofree */
261 s
->flags
= MRB_STR_NOFREE
;
262 return mrb_obj_value(s
);
266 mrb_gc_free_str(mrb_state
*mrb
, struct RString
*str
)
268 if (str
->flags
& MRB_STR_SHARED
)
269 str_decref(mrb
, str
->aux
.shared
);
270 else if ((str
->flags
& MRB_STR_NOFREE
) == 0)
271 mrb_free(mrb
, str
->ptr
);
275 mrb_str_to_cstr(mrb_state
*mrb
, mrb_value str0
)
279 if (!mrb_string_p(str0
)) {
280 mrb_raise(mrb
, E_TYPE_ERROR
, "expected String");
283 s
= str_new(mrb
, RSTRING_PTR(str0
), RSTRING_LEN(str0
));
284 if ((strlen(s
->ptr
) ^ s
->len
) != 0) {
285 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
291 str_make_shared(mrb_state
*mrb
, struct RString
*s
)
293 if (!(s
->flags
& MRB_STR_SHARED
)) {
294 mrb_shared_string
*shared
= (mrb_shared_string
*)mrb_malloc(mrb
, sizeof(mrb_shared_string
));
297 if (s
->flags
& MRB_STR_NOFREE
) {
298 shared
->nofree
= TRUE
;
299 shared
->ptr
= s
->ptr
;
300 s
->flags
&= ~MRB_STR_NOFREE
;
303 shared
->nofree
= FALSE
;
304 if (s
->aux
.capa
> s
->len
) {
305 s
->ptr
= shared
->ptr
= (char *)mrb_realloc(mrb
, s
->ptr
, s
->len
+1);
308 shared
->ptr
= s
->ptr
;
311 shared
->len
= s
->len
;
312 s
->aux
.shared
= shared
;
313 s
->flags
|= MRB_STR_SHARED
;
319 * char* str = String("abcd"), len=strlen("abcd")
321 * Returns a new string object containing a copy of <i>str</i>.
324 mrb_str_body(mrb_value str
, int *len_p
)
326 struct RString
*s
= mrb_str_ptr(str
);
333 * call-seq: (Caution! String("abcd") change)
334 * String("abcdefg") = String("abcd") + String("efg")
336 * Returns a new string object containing a copy of <i>str</i>.
339 mrb_str_concat(mrb_state
*mrb
, mrb_value self
, mrb_value other
)
341 struct RString
*s1
= mrb_str_ptr(self
), *s2
;
344 mrb_str_modify(mrb
, s1
);
345 if (!mrb_string_p(other
)) {
346 other
= mrb_str_to_str(mrb
, other
);
348 s2
= mrb_str_ptr(other
);
349 len
= s1
->len
+ s2
->len
;
351 if (s1
->aux
.capa
< len
) {
353 s1
->ptr
= (char *)mrb_realloc(mrb
, s1
->ptr
, len
+1);
355 memcpy(s1
->ptr
+s1
->len
, s2
->ptr
, s2
->len
);
361 * call-seq: (Caution! String("abcd") remain)
362 * String("abcdefg") = String("abcd") + String("efg")
364 * Returns a new string object containing a copy of <i>str</i>.
367 mrb_str_plus(mrb_state
*mrb
, mrb_value a
, mrb_value b
)
369 struct RString
*s
= mrb_str_ptr(a
);
370 struct RString
*s2
= mrb_str_ptr(b
);
373 t
= str_new(mrb
, 0, s
->len
+ s2
->len
);
374 memcpy(t
->ptr
, s
->ptr
, s
->len
);
375 memcpy(t
->ptr
+ s
->len
, s2
->ptr
, s2
->len
);
377 return mrb_obj_value(t
);
383 * call-seq: (Caution! String("abcd") remain) for stack_argument
384 * String("abcdefg") = String("abcd") + String("efg")
386 * Returns a new string object containing a copy of <i>str</i>.
389 mrb_str_plus_m(mrb_state
*mrb
, mrb_value self
)
393 mrb_get_args(mrb
, "S", &str
);
394 return mrb_str_plus(mrb
, self
, str
);
399 * len = strlen(String("abcd"))
401 * Returns a new string object containing a copy of <i>str</i>.
404 mrb_str_bytesize(mrb_state
*mrb
, mrb_value self
)
406 struct RString
*s
= mrb_str_ptr(self
);
407 return mrb_fixnum_value(s
->len
);
414 * len = strlen(String("abcd"))
416 * Returns a new string object containing a copy of <i>str</i>.
419 mrb_str_size(mrb_state
*mrb
, mrb_value self
)
421 struct RString
*s
= mrb_str_ptr(self
);
422 return mrb_fixnum_value(s
->len
);
428 * str * integer => new_str
430 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
433 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
436 mrb_str_times(mrb_state
*mrb
, mrb_value self
)
439 struct RString
*str2
;
442 mrb_get_args(mrb
, "i", ×
);
444 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative argument");
446 if (times
&& MRB_INT_MAX
/ times
< RSTRING_LEN(self
)) {
447 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "argument too big");
450 len
= RSTRING_LEN(self
)*times
;
451 str2
= str_new(mrb
, 0, len
);
452 str_with_class(mrb
, str2
, self
);
455 n
= RSTRING_LEN(self
);
456 memcpy(p
, RSTRING_PTR(self
), n
);
461 memcpy(p
+ n
, p
, len
-n
);
465 return mrb_obj_value(str2
);
467 /* -------------------------------------------------------------- */
469 #define lesser(a,b) (((a)>(b))?(b):(a))
471 /* ---------------------------*/
474 * mrb_value str1 <=> mrb_value str2 => int
480 mrb_str_cmp(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
484 struct RString
*s1
= mrb_str_ptr(str1
);
485 struct RString
*s2
= mrb_str_ptr(str2
);
487 len
= lesser(s1
->len
, s2
->len
);
488 retval
= memcmp(s1
->ptr
, s2
->ptr
, len
);
490 if (s1
->len
== s2
->len
) return 0;
491 if (s1
->len
> s2
->len
) return 1;
494 if (retval
> 0) return 1;
502 * str <=> other_str => -1, 0, +1
504 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
505 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
506 * <i>str</i>. If the strings are of different lengths, and the strings are
507 * equal when compared up to the shortest length, then the longer string is
508 * considered greater than the shorter one. If the variable <code>$=</code> is
509 * <code>false</code>, the comparison is based on comparing the binary values
510 * of each character in the string. In older versions of Ruby, setting
511 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
512 * in favor of using <code>String#casecmp</code>.
514 * <code><=></code> is the basis for the methods <code><</code>,
515 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
516 * included from module <code>Comparable</code>. The method
517 * <code>String#==</code> does not use <code>Comparable#==</code>.
519 * "abcdef" <=> "abcde" #=> 1
520 * "abcdef" <=> "abcdef" #=> 0
521 * "abcdef" <=> "abcdefg" #=> -1
522 * "abcdef" <=> "ABCDEF" #=> 1
525 mrb_str_cmp_m(mrb_state
*mrb
, mrb_value str1
)
530 mrb_get_args(mrb
, "o", &str2
);
531 if (!mrb_string_p(str2
)) {
532 if (!mrb_respond_to(mrb
, str2
, mrb_intern_lit(mrb
, "to_s"))) {
533 return mrb_nil_value();
535 else if (!mrb_respond_to(mrb
, str2
, mrb_intern_lit(mrb
, "<=>"))) {
536 return mrb_nil_value();
539 mrb_value tmp
= mrb_funcall(mrb
, str2
, "<=>", 1, str1
);
541 if (mrb_nil_p(tmp
)) return mrb_nil_value();
542 if (!mrb_fixnum(tmp
)) {
543 return mrb_funcall(mrb
, mrb_fixnum_value(0), "-", 1, tmp
);
545 result
= -mrb_fixnum(tmp
);
549 result
= mrb_str_cmp(mrb
, str1
, str2
);
551 return mrb_fixnum_value(result
);
555 str_eql(mrb_state
*mrb
, const mrb_value str1
, const mrb_value str2
)
557 const mrb_int len
= RSTRING_LEN(str1
);
559 if (len
!= RSTRING_LEN(str2
)) return FALSE
;
560 if (memcmp(RSTRING_PTR(str1
), RSTRING_PTR(str2
), (size_t)len
) == 0)
566 mrb_str_equal(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
568 if (mrb_obj_equal(mrb
, str1
, str2
)) return TRUE
;
569 if (!mrb_string_p(str2
)) {
570 if (mrb_nil_p(str2
)) return FALSE
;
571 if (!mrb_respond_to(mrb
, str2
, mrb_intern_lit(mrb
, "to_str"))) {
574 str2
= mrb_funcall(mrb
, str2
, "to_str", 0);
575 return mrb_equal(mrb
, str2
, str1
);
577 return str_eql(mrb
, str1
, str2
);
583 * str == obj => true or false
586 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
587 * Otherwise, returns <code>false</code> or <code>true</code>
589 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
592 mrb_str_equal_m(mrb_state
*mrb
, mrb_value str1
)
597 mrb_get_args(mrb
, "o", &str2
);
598 equal_p
= mrb_str_equal(mrb
, str1
, str2
);
600 return mrb_bool_value(equal_p
);
602 /* ---------------------------------- */
604 mrb_str_to_str(mrb_state
*mrb
, mrb_value str
)
608 if (!mrb_string_p(str
)) {
609 s
= mrb_check_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
611 s
= mrb_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_s");
619 mrb_string_value_ptr(mrb_state
*mrb
, mrb_value ptr
)
621 mrb_value str
= mrb_str_to_str(mrb
, ptr
);
622 return RSTRING_PTR(str
);
626 noregexp(mrb_state
*mrb
, mrb_value self
)
628 mrb_raise(mrb
, E_NOTIMP_ERROR
, "Regexp class not implemented");
629 return mrb_nil_value();
633 regexp_check(mrb_state
*mrb
, mrb_value obj
)
635 if (!memcmp(mrb_obj_classname(mrb
, obj
), REGEXP_CLASS
, sizeof(REGEXP_CLASS
) - 1)) {
640 static inline mrb_int
641 mrb_memsearch_qs(const unsigned char *xs
, mrb_int m
, const unsigned char *ys
, mrb_int n
)
643 const unsigned char *x
= xs
, *xe
= xs
+ m
;
644 const unsigned char *y
= ys
;
648 for (i
= 0; i
< 256; ++i
)
651 qstable
[*x
] = xe
- x
;
653 for (; y
+ m
<= ys
+ n
; y
+= *(qstable
+ y
[m
])) {
654 if (*xs
== *y
&& memcmp(xs
, y
, m
) == 0)
661 mrb_memsearch(const void *x0
, mrb_int m
, const void *y0
, mrb_int n
)
663 const unsigned char *x
= (const unsigned char *)x0
, *y
= (const unsigned char *)y0
;
665 if (m
> n
) return -1;
667 return memcmp(x0
, y0
, m
) == 0 ? 0 : -1;
673 const unsigned char *ys
= y
, *ye
= ys
+ n
;
674 for (; y
< ye
; ++y
) {
680 return mrb_memsearch_qs((const unsigned char *)x0
, m
, (const unsigned char *)y0
, n
);
684 mrb_str_index(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int offset
)
690 len
= RSTRING_LEN(str
);
691 slen
= RSTRING_LEN(sub
);
694 if (offset
< 0) return -1;
696 if (len
- offset
< slen
) return -1;
697 s
= RSTRING_PTR(str
);
701 if (slen
== 0) return offset
;
702 /* need proceed one character at a time */
703 sptr
= RSTRING_PTR(sub
);
704 slen
= RSTRING_LEN(sub
);
705 len
= RSTRING_LEN(str
) - offset
;
706 pos
= mrb_memsearch(sptr
, slen
, s
, len
);
707 if (pos
< 0) return pos
;
712 mrb_str_dup(mrb_state
*mrb
, mrb_value str
)
714 /* should return shared string */
715 struct RString
*s
= mrb_str_ptr(str
);
717 return mrb_str_new(mrb
, s
->ptr
, s
->len
);
721 mrb_str_aref(mrb_state
*mrb
, mrb_value str
, mrb_value indx
)
725 regexp_check(mrb
, indx
);
726 switch (mrb_type(indx
)) {
728 idx
= mrb_fixnum(indx
);
731 str
= mrb_str_substr(mrb
, str
, idx
, 1);
732 if (!mrb_nil_p(str
) && RSTRING_LEN(str
) == 0) return mrb_nil_value();
736 if (mrb_str_index(mrb
, str
, indx
, 0) != -1)
737 return mrb_str_dup(mrb
, indx
);
738 return mrb_nil_value();
741 /* check if indx is Range */
745 len
= RSTRING_LEN(str
);
746 if (mrb_range_beg_len(mrb
, indx
, &beg
, &len
, len
)) {
747 return mrb_str_subseq(mrb
, str
, beg
, len
);
750 return mrb_nil_value();
754 idx
= mrb_fixnum(indx
);
757 return mrb_nil_value(); /* not reached */
764 * str[fixnum] => fixnum or nil
765 * str[fixnum, fixnum] => new_str or nil
766 * str[range] => new_str or nil
767 * str[regexp] => new_str or nil
768 * str[regexp, fixnum] => new_str or nil
769 * str[other_str] => new_str or nil
770 * str.slice(fixnum) => fixnum or nil
771 * str.slice(fixnum, fixnum) => new_str or nil
772 * str.slice(range) => new_str or nil
773 * str.slice(regexp) => new_str or nil
774 * str.slice(regexp, fixnum) => new_str or nil
775 * str.slice(other_str) => new_str or nil
777 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
778 * of the character at that position. If passed two <code>Fixnum</code>
779 * objects, returns a substring starting at the offset given by the first, and
780 * a length given by the second. If given a range, a substring containing
781 * characters at offsets given by the range is returned. In all three cases, if
782 * an offset is negative, it is counted from the end of <i>str</i>. Returns
783 * <code>nil</code> if the initial offset falls outside the string, the length
784 * is negative, or the beginning of the range is greater than the end.
786 * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
787 * returned. If a numeric parameter follows the regular expression, that
788 * component of the <code>MatchData</code> is returned instead. If a
789 * <code>String</code> is given, that string is returned if it occurs in
790 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
794 * a[1] #=> 101(1.8.7) "e"(1.9.2)
798 * a[-4..-2] #=> "her"
801 * a[/[aeiou](.)\1/] #=> "ell"
802 * a[/[aeiou](.)\1/, 0] #=> "ell"
803 * a[/[aeiou](.)\1/, 1] #=> "l"
804 * a[/[aeiou](.)\1/, 2] #=> nil
809 mrb_str_aref_m(mrb_state
*mrb
, mrb_value str
)
814 argc
= mrb_get_args(mrb
, "o|o", &a1
, &a2
);
816 regexp_check(mrb
, a1
);
817 return mrb_str_substr(mrb
, str
, mrb_fixnum(a1
), mrb_fixnum(a2
));
820 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc
));
822 return mrb_str_aref(mrb
, str
, a1
);
828 * str.capitalize! => str or nil
830 * Modifies <i>str</i> by converting the first character to uppercase and the
831 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
834 * a.capitalize! #=> "Hello"
836 * a.capitalize! #=> nil
839 mrb_str_capitalize_bang(mrb_state
*mrb
, mrb_value str
)
843 struct RString
*s
= mrb_str_ptr(str
);
845 mrb_str_modify(mrb
, s
);
846 if (s
->len
== 0 || !s
->ptr
) return mrb_nil_value();
847 p
= s
->ptr
; pend
= s
->ptr
+ s
->len
;
858 if (modify
) return str
;
859 return mrb_nil_value();
865 * str.capitalize => new_str
867 * Returns a copy of <i>str</i> with the first character converted to uppercase
868 * and the remainder to lowercase.
870 * "hello".capitalize #=> "Hello"
871 * "HELLO".capitalize #=> "Hello"
872 * "123ABC".capitalize #=> "123abc"
875 mrb_str_capitalize(mrb_state
*mrb
, mrb_value self
)
879 str
= mrb_str_dup(mrb
, self
);
880 mrb_str_capitalize_bang(mrb
, str
);
887 * str.chomp!(separator=$/) => str or nil
889 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
890 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
893 mrb_str_chomp_bang(mrb_state
*mrb
, mrb_value str
)
900 struct RString
*s
= mrb_str_ptr(str
);
902 mrb_str_modify(mrb
, s
);
904 if (mrb_get_args(mrb
, "|S", &rs
) == 0) {
905 if (len
== 0) return mrb_nil_value();
907 if (s
->ptr
[len
-1] == '\n') {
910 s
->ptr
[s
->len
-1] == '\r') {
914 else if (s
->ptr
[len
-1] == '\r') {
918 return mrb_nil_value();
920 s
->ptr
[s
->len
] = '\0';
924 if (len
== 0 || mrb_nil_p(rs
)) return mrb_nil_value();
926 rslen
= RSTRING_LEN(rs
);
928 while (len
>0 && p
[len
-1] == '\n') {
930 if (len
>0 && p
[len
-1] == '\r')
938 return mrb_nil_value();
940 if (rslen
> len
) return mrb_nil_value();
941 newline
= RSTRING_PTR(rs
)[rslen
-1];
942 if (rslen
== 1 && newline
== '\n')
943 newline
= RSTRING_PTR(rs
)[rslen
-1];
944 if (rslen
== 1 && newline
== '\n')
947 pp
= p
+ len
- rslen
;
948 if (p
[len
-1] == newline
&&
950 memcmp(RSTRING_PTR(rs
), pp
, rslen
) == 0)) {
951 s
->len
= len
- rslen
;
955 return mrb_nil_value();
961 * str.chomp(separator=$/) => new_str
963 * Returns a new <code>String</code> with the given record separator removed
964 * from the end of <i>str</i> (if present). If <code>$/</code> has not been
965 * changed from the default Ruby record separator, then <code>chomp</code> also
966 * removes carriage return characters (that is it will remove <code>\n</code>,
967 * <code>\r</code>, and <code>\r\n</code>).
969 * "hello".chomp #=> "hello"
970 * "hello\n".chomp #=> "hello"
971 * "hello\r\n".chomp #=> "hello"
972 * "hello\n\r".chomp #=> "hello\n"
973 * "hello\r".chomp #=> "hello"
974 * "hello \n there".chomp #=> "hello \n there"
975 * "hello".chomp("llo") #=> "he"
978 mrb_str_chomp(mrb_state
*mrb
, mrb_value self
)
982 str
= mrb_str_dup(mrb
, self
);
983 mrb_str_chomp_bang(mrb
, str
);
990 * str.chop! => str or nil
992 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
993 * or <code>nil</code> if <i>str</i> is the empty string. See also
994 * <code>String#chomp!</code>.
997 mrb_str_chop_bang(mrb_state
*mrb
, mrb_value str
)
999 struct RString
*s
= mrb_str_ptr(str
);
1001 mrb_str_modify(mrb
, s
);
1005 if (s
->ptr
[len
] == '\n') {
1007 s
->ptr
[len
-1] == '\r') {
1015 return mrb_nil_value();
1021 * str.chop => new_str
1023 * Returns a new <code>String</code> with the last character removed. If the
1024 * string ends with <code>\r\n</code>, both characters are removed. Applying
1025 * <code>chop</code> to an empty string returns an empty
1026 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1027 * the string unchanged if it doesn't end in a record separator.
1029 * "string\r\n".chop #=> "string"
1030 * "string\n\r".chop #=> "string\n"
1031 * "string\n".chop #=> "string"
1032 * "string".chop #=> "strin"
1036 mrb_str_chop(mrb_state
*mrb
, mrb_value self
)
1039 str
= mrb_str_dup(mrb
, self
);
1040 mrb_str_chop_bang(mrb
, str
);
1047 * str.downcase! => str or nil
1049 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1050 * changes were made.
1053 mrb_str_downcase_bang(mrb_state
*mrb
, mrb_value str
)
1057 struct RString
*s
= mrb_str_ptr(str
);
1059 mrb_str_modify(mrb
, s
);
1061 pend
= s
->ptr
+ s
->len
;
1070 if (modify
) return str
;
1071 return mrb_nil_value();
1077 * str.downcase => new_str
1079 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1080 * lowercase counterparts. The operation is locale insensitive---only
1081 * characters ``A'' to ``Z'' are affected.
1083 * "hEllO".downcase #=> "hello"
1086 mrb_str_downcase(mrb_state
*mrb
, mrb_value self
)
1090 str
= mrb_str_dup(mrb
, self
);
1091 mrb_str_downcase_bang(mrb
, str
);
1098 * str.empty? => true or false
1100 * Returns <code>true</code> if <i>str</i> has a length of zero.
1102 * "hello".empty? #=> false
1103 * "".empty? #=> true
1106 mrb_str_empty_p(mrb_state
*mrb
, mrb_value self
)
1108 struct RString
*s
= mrb_str_ptr(self
);
1110 return mrb_bool_value(s
->len
== 0);
1116 * str.eql?(other) => true or false
1118 * Two strings are equal if the have the same length and content.
1121 mrb_str_eql(mrb_state
*mrb
, mrb_value self
)
1126 mrb_get_args(mrb
, "o", &str2
);
1127 eql_p
= (mrb_type(str2
) == MRB_TT_STRING
) && str_eql(mrb
, self
, str2
);
1129 return mrb_bool_value(eql_p
);
1133 mrb_str_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
1135 struct RString
*orig
, *s
;
1136 mrb_shared_string
*shared
;
1138 orig
= mrb_str_ptr(str
);
1139 str_make_shared(mrb
, orig
);
1140 shared
= orig
->aux
.shared
;
1141 s
= mrb_obj_alloc_string(mrb
);
1142 s
->ptr
= orig
->ptr
+ beg
;
1144 s
->aux
.shared
= shared
;
1145 s
->flags
|= MRB_STR_SHARED
;
1148 return mrb_obj_value(s
);
1152 mrb_str_substr(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
1156 if (len
< 0) return mrb_nil_value();
1157 if (!RSTRING_LEN(str
)) {
1160 if (beg
> RSTRING_LEN(str
)) return mrb_nil_value();
1162 beg
+= RSTRING_LEN(str
);
1163 if (beg
< 0) return mrb_nil_value();
1165 if (beg
+ len
> RSTRING_LEN(str
))
1166 len
= RSTRING_LEN(str
) - beg
;
1170 str2
= mrb_str_subseq(mrb
, str
, beg
, len
);
1176 mrb_str_buf_append(mrb_state
*mrb
, mrb_value str
, mrb_value str2
)
1178 mrb_str_cat(mrb
, str
, RSTRING_PTR(str2
), RSTRING_LEN(str2
));
1183 mrb_str_hash(mrb_state
*mrb
, mrb_value str
)
1186 struct RString
*s
= mrb_str_ptr(str
);
1187 mrb_int len
= s
->len
;
1192 key
= key
*65599 + *p
;
1195 key
= key
+ (key
>>5);
1202 * str.hash => fixnum
1204 * Return a hash based on the string's length and content.
1207 mrb_str_hash_m(mrb_state
*mrb
, mrb_value self
)
1209 mrb_int key
= mrb_str_hash(mrb
, self
);
1210 return mrb_fixnum_value(key
);
1216 * str.include? other_str => true or false
1217 * str.include? fixnum => true or false
1219 * Returns <code>true</code> if <i>str</i> contains the given string or
1222 * "hello".include? "lo" #=> true
1223 * "hello".include? "ol" #=> false
1224 * "hello".include? ?h #=> true
1227 mrb_str_include(mrb_state
*mrb
, mrb_value self
)
1233 mrb_get_args(mrb
, "o", &str2
);
1234 if (mrb_fixnum_p(str2
)) {
1235 include_p
= (memchr(RSTRING_PTR(self
), mrb_fixnum(str2
), RSTRING_LEN(self
)) != NULL
);
1238 str2
= mrb_str_to_str(mrb
, str2
);
1239 i
= mrb_str_index(mrb
, self
, str2
, 0);
1241 include_p
= (i
!= -1);
1244 return mrb_bool_value(include_p
);
1250 * str.index(substring [, offset]) => fixnum or nil
1251 * str.index(fixnum [, offset]) => fixnum or nil
1252 * str.index(regexp [, offset]) => fixnum or nil
1254 * Returns the index of the first occurrence of the given
1256 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1258 * <code>nil</code> if not found.
1259 * If the second parameter is present, it
1260 * specifies the position in the string to begin the search.
1262 * "hello".index('e') #=> 1
1263 * "hello".index('lo') #=> 3
1264 * "hello".index('a') #=> nil
1265 * "hello".index(101) #=> 1(101=0x65='e')
1266 * "hello".index(/[aeiou]/, -3) #=> 4
1269 mrb_str_index_m(mrb_state
*mrb
, mrb_value str
)
1277 mrb_get_args(mrb
, "*", &argv
, &argc
);
1279 pos
= mrb_fixnum(argv
[1]);
1287 sub
= mrb_nil_value();
1290 regexp_check(mrb
, sub
);
1292 pos
+= RSTRING_LEN(str
);
1294 return mrb_nil_value();
1298 switch (mrb_type(sub
)) {
1299 case MRB_TT_FIXNUM
: {
1300 int c
= mrb_fixnum(sub
);
1301 mrb_int len
= RSTRING_LEN(str
);
1302 unsigned char *p
= (unsigned char*)RSTRING_PTR(str
);
1304 for (;pos
<len
;pos
++) {
1305 if (p
[pos
] == c
) return mrb_fixnum_value(pos
);
1307 return mrb_nil_value();
1313 tmp
= mrb_check_string_type(mrb
, sub
);
1314 if (mrb_nil_p(tmp
)) {
1315 mrb_raisef(mrb
, E_TYPE_ERROR
, "type mismatch: %S given", sub
);
1321 pos
= mrb_str_index(mrb
, str
, sub
, pos
);
1325 if (pos
== -1) return mrb_nil_value();
1326 return mrb_fixnum_value(pos
);
1329 #define STR_REPLACE_SHARED_MIN 10
1332 str_replace(mrb_state
*mrb
, struct RString
*s1
, struct RString
*s2
)
1334 if (s2
->flags
& MRB_STR_SHARED
) {
1336 if (s1
->flags
& MRB_STR_SHARED
){
1337 str_decref(mrb
, s1
->aux
.shared
);
1340 mrb_free(mrb
, s1
->ptr
);
1344 s1
->aux
.shared
= s2
->aux
.shared
;
1345 s1
->flags
|= MRB_STR_SHARED
;
1346 s1
->aux
.shared
->refcnt
++;
1348 else if (s2
->len
> STR_REPLACE_SHARED_MIN
) {
1349 str_make_shared(mrb
, s2
);
1353 if (s1
->flags
& MRB_STR_SHARED
) {
1354 str_decref(mrb
, s1
->aux
.shared
);
1355 s1
->flags
&= ~MRB_STR_SHARED
;
1356 s1
->ptr
= (char *)mrb_malloc(mrb
, s2
->len
+1);
1359 s1
->ptr
= (char *)mrb_realloc(mrb
, s1
->ptr
, s2
->len
+1);
1361 memcpy(s1
->ptr
, s2
->ptr
, s2
->len
);
1362 s1
->ptr
[s2
->len
] = 0;
1364 s1
->aux
.capa
= s2
->len
;
1366 return mrb_obj_value(s1
);
1373 * str.replace(other_str) => str
1375 * s = "hello" #=> "hello"
1376 * s.replace "world" #=> "world"
1379 mrb_str_replace(mrb_state
*mrb
, mrb_value str
)
1383 mrb_get_args(mrb
, "S", &str2
);
1384 return str_replace(mrb
, mrb_str_ptr(str
), mrb_str_ptr(str2
));
1390 * String.new(str="") => new_str
1392 * Returns a new string object containing a copy of <i>str</i>.
1395 mrb_str_init(mrb_state
*mrb
, mrb_value self
)
1399 if (mrb_get_args(mrb
, "|S", &str2
) == 1) {
1400 str_replace(mrb
, mrb_str_ptr(self
), mrb_str_ptr(str2
));
1409 * str.intern => symbol
1410 * str.to_sym => symbol
1412 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1413 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1415 * "Koala".intern #=> :Koala
1416 * s = 'cat'.to_sym #=> :cat
1417 * s == :cat #=> true
1418 * s = '@cat'.to_sym #=> :@cat
1419 * s == :@cat #=> true
1421 * This can also be used to create symbols that cannot be represented using the
1422 * <code>:xxx</code> notation.
1424 * 'cat and dog'.to_sym #=> :"cat and dog"
1427 mrb_str_intern(mrb_state
*mrb
, mrb_value self
)
1431 id
= mrb_intern_str(mrb
, self
);
1432 return mrb_symbol_value(id
);
1435 /* ---------------------------------- */
1437 mrb_obj_as_string(mrb_state
*mrb
, mrb_value obj
)
1441 if (mrb_string_p(obj
)) {
1444 str
= mrb_funcall(mrb
, obj
, "to_s", 0);
1445 if (!mrb_string_p(str
))
1446 return mrb_any_to_s(mrb
, obj
);
1451 mrb_ptr_to_str(mrb_state
*mrb
, void *p
)
1453 struct RString
*p_str
;
1456 uintptr_t n
= (uintptr_t)p
;
1458 p_str
= str_new(mrb
, NULL
, 2 + sizeof(uintptr_t) * CHAR_BIT
/ 4);
1465 *p2
++ = mrb_digitmap
[n
% 16];
1469 p_str
->len
= (mrb_int
)(p2
- p_str
->ptr
);
1477 return mrb_obj_value(p_str
);
1481 mrb_string_type(mrb_state
*mrb
, mrb_value str
)
1483 return mrb_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
1487 mrb_check_string_type(mrb_state
*mrb
, mrb_value str
)
1489 return mrb_check_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
1492 /* ---------------------------------- */
1496 * str.reverse => new_str
1498 * Returns a new string with the characters from <i>str</i> in reverse order.
1500 * "stressed".reverse #=> "desserts"
1503 mrb_str_reverse(mrb_state
*mrb
, mrb_value str
)
1508 if (RSTRING(str
)->len
<= 1) return mrb_str_dup(mrb
, str
);
1510 s2
= str_new(mrb
, 0, RSTRING(str
)->len
);
1511 str_with_class(mrb
, s2
, str
);
1512 s
= RSTRING_PTR(str
); e
= RSTRING_END(str
) - 1;
1518 return mrb_obj_value(s2
);
1524 * str.reverse! => str
1526 * Reverses <i>str</i> in place.
1529 mrb_str_reverse_bang(mrb_state
*mrb
, mrb_value str
)
1531 struct RString
*s
= mrb_str_ptr(str
);
1535 mrb_str_modify(mrb
, s
);
1550 * str.rindex(substring [, fixnum]) => fixnum or nil
1551 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1552 * str.rindex(regexp [, fixnum]) => fixnum or nil
1554 * Returns the index of the last occurrence of the given <i>substring</i>,
1555 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1556 * <code>nil</code> if not found. If the second parameter is present, it
1557 * specifies the position in the string to end the search---characters beyond
1558 * this point will not be considered.
1560 * "hello".rindex('e') #=> 1
1561 * "hello".rindex('l') #=> 3
1562 * "hello".rindex('a') #=> nil
1563 * "hello".rindex(101) #=> 1
1564 * "hello".rindex(/[aeiou]/, -2) #=> 1
1567 mrb_str_rindex(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int pos
)
1570 struct RString
*ps
= mrb_str_ptr(str
);
1571 struct RString
*psub
= mrb_str_ptr(sub
);
1572 mrb_int len
= psub
->len
;
1574 /* substring longer than string */
1575 if (ps
->len
< len
) return -1;
1576 if (ps
->len
- pos
< len
) {
1577 pos
= ps
->len
- len
;
1584 if (memcmp(s
, t
, len
) == 0) {
1599 * str.rindex(substring [, fixnum]) => fixnum or nil
1600 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1601 * str.rindex(regexp [, fixnum]) => fixnum or nil
1603 * Returns the index of the last occurrence of the given <i>substring</i>,
1604 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1605 * <code>nil</code> if not found. If the second parameter is present, it
1606 * specifies the position in the string to end the search---characters beyond
1607 * this point will not be considered.
1609 * "hello".rindex('e') #=> 1
1610 * "hello".rindex('l') #=> 3
1611 * "hello".rindex('a') #=> nil
1612 * "hello".rindex(101) #=> 1
1613 * "hello".rindex(/[aeiou]/, -2) #=> 1
1616 mrb_str_rindex_m(mrb_state
*mrb
, mrb_value str
)
1622 int pos
, len
= RSTRING_LEN(str
);
1624 mrb_get_args(mrb
, "*", &argv
, &argc
);
1628 pos
= mrb_fixnum(vpos
);
1632 regexp_check(mrb
, sub
);
1633 return mrb_nil_value();
1636 if (pos
> len
) pos
= len
;
1643 sub
= mrb_nil_value();
1645 regexp_check(mrb
, sub
);
1647 switch (mrb_type(sub
)) {
1648 case MRB_TT_FIXNUM
: {
1649 int c
= mrb_fixnum(sub
);
1650 mrb_int len
= RSTRING_LEN(str
);
1651 unsigned char *p
= (unsigned char*)RSTRING_PTR(str
);
1653 for (pos
=len
;pos
>=0;pos
--) {
1654 if (p
[pos
] == c
) return mrb_fixnum_value(pos
);
1656 return mrb_nil_value();
1662 tmp
= mrb_check_string_type(mrb
, sub
);
1663 if (mrb_nil_p(tmp
)) {
1664 mrb_raisef(mrb
, E_TYPE_ERROR
, "type mismatch: %S given", sub
);
1670 pos
= mrb_str_rindex(mrb
, str
, sub
, pos
);
1671 if (pos
>= 0) return mrb_fixnum_value(pos
);
1674 } /* end of switch (TYPE(sub)) */
1675 return mrb_nil_value();
1678 static const char isspacetable
[256] = {
1679 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1681 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1697 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1703 * str.split(pattern=$;, [limit]) => anArray
1705 * Divides <i>str</i> into substrings based on a delimiter, returning an array
1706 * of these substrings.
1708 * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1709 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1710 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1711 * of contiguous whitespace characters ignored.
1713 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1714 * pattern matches. Whenever the pattern matches a zero-length string,
1715 * <i>str</i> is split into individual characters.
1717 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1718 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1719 * split on whitespace as if ` ' were specified.
1721 * If the <i>limit</i> parameter is omitted, trailing null fields are
1722 * suppressed. If <i>limit</i> is a positive number, at most that number of
1723 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1724 * string is returned as the only entry in an array). If negative, there is no
1725 * limit to the number of fields returned, and trailing null fields are not
1728 * " now's the time".split #=> ["now's", "the", "time"]
1729 * " now's the time".split(' ') #=> ["now's", "the", "time"]
1730 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1731 * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
1732 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1733 * "hello".split(//, 3) #=> ["h", "e", "llo"]
1734 * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
1736 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1737 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1738 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1739 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1743 mrb_str_split_m(mrb_state
*mrb
, mrb_value str
)
1746 mrb_value spat
= mrb_nil_value();
1747 enum {awk
, string
, regexp
} split_type
= string
;
1752 mrb_value result
, tmp
;
1754 argc
= mrb_get_args(mrb
, "|oi", &spat
, &lim
);
1755 lim_p
= (lim
> 0 && argc
== 2);
1758 if (RSTRING_LEN(str
) == 0)
1759 return mrb_ary_new_capa(mrb
, 0);
1760 return mrb_ary_new_from_values(mrb
, 1, &str
);
1765 if (argc
== 0 || mrb_nil_p(spat
)) {
1769 if (mrb_string_p(spat
)) {
1770 split_type
= string
;
1771 if (RSTRING_LEN(spat
) == 1 && RSTRING_PTR(spat
)[0] == ' '){
1780 result
= mrb_ary_new(mrb
);
1782 if (split_type
== awk
) {
1783 char *ptr
= RSTRING_PTR(str
);
1784 char *eptr
= RSTRING_END(str
);
1790 while (ptr
< eptr
) {
1791 int ai
= mrb_gc_arena_save(mrb
);
1792 c
= (unsigned char)*ptr
++;
1794 if (ascii_isspace(c
)) {
1800 if (lim_p
&& lim
<= i
) break;
1803 else if (ascii_isspace(c
)) {
1804 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, beg
, end
-beg
));
1805 mrb_gc_arena_restore(mrb
, ai
);
1815 else if (split_type
== string
) {
1816 char *ptr
= RSTRING_PTR(str
);
1818 char *eptr
= RSTRING_END(str
);
1819 mrb_int slen
= RSTRING_LEN(spat
);
1822 int ai
= mrb_gc_arena_save(mrb
);
1823 while (ptr
< eptr
) {
1824 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, ptr
-temp
, 1));
1825 mrb_gc_arena_restore(mrb
, ai
);
1827 if (lim_p
&& lim
<= ++i
) break;
1831 char *sptr
= RSTRING_PTR(spat
);
1832 int ai
= mrb_gc_arena_save(mrb
);
1834 while (ptr
< eptr
&&
1835 (end
= mrb_memsearch(sptr
, slen
, ptr
, eptr
- ptr
)) >= 0) {
1836 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, ptr
- temp
, end
));
1837 mrb_gc_arena_restore(mrb
, ai
);
1839 if (lim_p
&& lim
<= ++i
) break;
1847 if (RSTRING_LEN(str
) > 0 && (lim_p
|| RSTRING_LEN(str
) > beg
|| lim
< 0)) {
1848 if (RSTRING_LEN(str
) == beg
) {
1849 tmp
= mrb_str_new_empty(mrb
, str
);
1852 tmp
= mrb_str_subseq(mrb
, str
, beg
, RSTRING_LEN(str
)-beg
);
1854 mrb_ary_push(mrb
, result
, tmp
);
1856 if (!lim_p
&& lim
== 0) {
1858 while ((len
= RARRAY_LEN(result
)) > 0 &&
1859 (tmp
= RARRAY_PTR(result
)[len
-1], RSTRING_LEN(tmp
) == 0))
1860 mrb_ary_pop(mrb
, result
);
1867 mrb_cstr_to_inum(mrb_state
*mrb
, const char *str
, int base
, int badcheck
)
1876 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1877 #define conv_digit(c) \
1878 (!ISASCII(c) ? -1 : \
1879 isdigit(c) ? ((c) - '0') : \
1880 islower(c) ? ((c) - 'a' + 10) : \
1881 isupper(c) ? ((c) - 'A' + 10) : \
1885 if (badcheck
) goto bad
;
1886 return mrb_fixnum_value(0);
1888 while (ISSPACE(*str
)) str
++;
1890 if (str
[0] == '+') {
1893 else if (str
[0] == '-') {
1897 if (str
[0] == '+' || str
[0] == '-') {
1898 if (badcheck
) goto bad
;
1899 return mrb_fixnum_value(0);
1902 if (str
[0] == '0') {
1920 else if (base
< -1) {
1929 if (str
[0] == '0' && (str
[1] == 'b'||str
[1] == 'B')) {
1936 if (str
[0] == '0' && (str
[1] == 'o'||str
[1] == 'O')) {
1939 case 4: case 5: case 6: case 7:
1942 if (str
[0] == '0' && (str
[1] == 'd'||str
[1] == 'D')) {
1945 case 9: case 11: case 12: case 13: case 14: case 15:
1948 if (str
[0] == '0' && (str
[1] == 'x'||str
[1] == 'X')) {
1953 if (base
< 2 || 36 < base
) {
1954 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %S", mrb_fixnum_value(base
));
1957 } /* end of switch (base) { */
1958 if (*str
== '0') { /* squeeze preceeding 0s */
1960 while ((c
= *++str
) == '0' || c
== '_') {
1968 if (!(c
= *str
) || ISSPACE(c
)) --str
;
1972 if (c
< 0 || c
>= base
) {
1973 if (badcheck
) goto bad
;
1974 return mrb_fixnum_value(0);
1977 n
= strtoul((char*)str
, &end
, base
);
1978 if (n
> MRB_INT_MAX
) {
1979 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "string (%S) too big for integer", mrb_str_new_cstr(mrb
, str
));
1983 if (end
== str
) goto bad
; /* no number */
1984 while (*end
&& ISSPACE(*end
)) end
++;
1985 if (*end
) goto bad
; /* trailing garbage */
1988 return mrb_fixnum_value(sign
? val
: -val
);
1990 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for number(%S)", mrb_str_new_cstr(mrb
, str
));
1992 return mrb_fixnum_value(0);
1996 mrb_string_value_cstr(mrb_state
*mrb
, mrb_value
*ptr
)
1998 struct RString
*ps
= mrb_str_ptr(*ptr
);
2001 if (!s
|| ps
->len
!= strlen(s
)) {
2002 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
2008 mrb_str_to_inum(mrb_state
*mrb
, mrb_value str
, int base
, int badcheck
)
2013 str
= mrb_str_to_str(mrb
, str
);
2015 s
= mrb_string_value_cstr(mrb
, &str
);
2018 s
= RSTRING_PTR(str
);
2021 len
= RSTRING_LEN(str
);
2022 if (s
[len
]) { /* no sentinel somehow */
2023 struct RString
*temp_str
= str_new(mrb
, s
, len
);
2027 return mrb_cstr_to_inum(mrb
, s
, base
, badcheck
);
2033 * str.to_i(base=10) => integer
2035 * Returns the result of interpreting leading characters in <i>str</i> as an
2036 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2037 * end of a valid number are ignored. If there is not a valid number at the
2038 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2041 * "12345".to_i #=> 12345
2042 * "99 red balloons".to_i #=> 99
2044 * "0a".to_i(16) #=> 10
2045 * "hello".to_i #=> 0
2046 * "1100101".to_i(2) #=> 101
2047 * "1100101".to_i(8) #=> 294977
2048 * "1100101".to_i(10) #=> 1100101
2049 * "1100101".to_i(16) #=> 17826049
2052 mrb_str_to_i(mrb_state
*mrb
, mrb_value self
)
2058 mrb_get_args(mrb
, "*", &argv
, &argc
);
2062 base
= mrb_fixnum(argv
[0]);
2065 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %S", mrb_fixnum_value(base
));
2067 return mrb_str_to_inum(mrb
, self
, base
, 0/*Qfalse*/);
2071 mrb_cstr_to_dbl(mrb_state
*mrb
, const char * p
, int badcheck
)
2075 #if !defined(DBL_DIG)
2079 enum {max_width
= 20};
2080 #define OutOfRange() (((w = end - p) > max_width) ? \
2081 (w = max_width, ellipsis = "...") : \
2082 (w = (int)(end - p), ellipsis = ""))
2085 while (ISSPACE(*p
)) p
++;
2087 if (!badcheck
&& p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2090 d
= strtod(p
, &end
);
2094 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for float(%S)", mrb_str_new_cstr(mrb
, p
));
2100 char buf
[DBL_DIG
* 4 + 10];
2102 char *e
= buf
+ sizeof(buf
) - 1;
2105 while (p
< end
&& n
< e
) prev
= *n
++ = *p
++;
2108 /* remove underscores between digits */
2110 if (n
== buf
|| !ISDIGIT(prev
)) goto bad
;
2112 if (!ISDIGIT(*p
)) goto bad
;
2115 while (*++p
== '_');
2120 if (n
< e
) *n
++ = prev
;
2125 if (!badcheck
&& p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2129 d
= strtod(p
, &end
);
2131 if (!end
|| p
== end
) goto bad
;
2132 while (*end
&& ISSPACE(*end
)) end
++;
2140 mrb_str_to_dbl(mrb_state
*mrb
, mrb_value str
, int badcheck
)
2145 str
= mrb_str_to_str(mrb
, str
);
2146 s
= RSTRING_PTR(str
);
2147 len
= RSTRING_LEN(str
);
2149 if (badcheck
&& memchr(s
, '\0', len
)) {
2150 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string for Float contains null byte");
2152 if (s
[len
]) { /* no sentinel somehow */
2153 struct RString
*temp_str
= str_new(mrb
, s
, len
);
2157 return mrb_cstr_to_dbl(mrb
, s
, badcheck
);
2165 * Returns the result of interpreting leading characters in <i>str</i> as a
2166 * floating point number. Extraneous characters past the end of a valid number
2167 * are ignored. If there is not a valid number at the start of <i>str</i>,
2168 * <code>0.0</code> is returned. This method never raises an exception.
2170 * "123.45e1".to_f #=> 1234.5
2171 * "45.67 degrees".to_f #=> 45.67
2172 * "thx1138".to_f #=> 0.0
2175 mrb_str_to_f(mrb_state
*mrb
, mrb_value self
)
2177 return mrb_float_value(mrb
, mrb_str_to_dbl(mrb
, self
, 0/*Qfalse*/));
2186 * Returns the receiver.
2189 mrb_str_to_s(mrb_state
*mrb
, mrb_value self
)
2191 if (mrb_obj_class(mrb
, self
) != mrb
->string_class
) {
2192 return mrb_str_dup(mrb
, self
);
2200 * str.upcase! => str or nil
2202 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2206 mrb_str_upcase_bang(mrb_state
*mrb
, mrb_value str
)
2208 struct RString
*s
= mrb_str_ptr(str
);
2212 mrb_str_modify(mrb
, s
);
2213 p
= RSTRING_PTR(str
);
2214 pend
= RSTRING_END(str
);
2223 if (modify
) return str
;
2224 return mrb_nil_value();
2230 * str.upcase => new_str
2232 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2233 * uppercase counterparts. The operation is locale insensitive---only
2234 * characters ``a'' to ``z'' are affected.
2236 * "hEllO".upcase #=> "HELLO"
2239 mrb_str_upcase(mrb_state
*mrb
, mrb_value self
)
2243 str
= mrb_str_dup(mrb
, self
);
2244 mrb_str_upcase_bang(mrb
, str
);
2250 * str.dump -> new_str
2252 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2253 * <code>\nnn</code> notation and all special characters escaped.
2256 mrb_str_dump(mrb_state
*mrb
, mrb_value str
)
2259 const char *p
, *pend
;
2261 struct RString
*result
;
2264 p
= RSTRING_PTR(str
); pend
= p
+ RSTRING_LEN(str
);
2266 unsigned char c
= *p
++;
2268 case '"': case '\\':
2269 case '\n': case '\r':
2270 case '\t': case '\f':
2271 case '\013': case '\010': case '\007': case '\033':
2276 len
+= IS_EVSTR(p
, pend
) ? 2 : 1;
2284 len
+= 4; /* \NNN */
2290 result
= str_new(mrb
, 0, len
);
2291 str_with_class(mrb
, result
, str
);
2292 p
= RSTRING_PTR(str
); pend
= p
+ RSTRING_LEN(str
);
2297 unsigned char c
= *p
++;
2347 if (IS_EVSTR(p
, pend
)) *q
++ = '\\';
2357 q
[2] = '0' + c
% 8; c
/= 8;
2358 q
[1] = '0' + c
% 8; c
/= 8;
2365 return mrb_obj_value(result
);
2369 mrb_str_cat(mrb_state
*mrb
, mrb_value str
, const char *ptr
, size_t len
)
2371 if ((mrb_int
)len
< 0) {
2372 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative string size (or size too big)");
2374 str_buf_cat(mrb
, mrb_str_ptr(str
), ptr
, len
);
2379 mrb_str_cat_cstr(mrb_state
*mrb
, mrb_value str
, const char *ptr
)
2381 return mrb_str_cat(mrb
, str
, ptr
, strlen(ptr
));
2385 mrb_str_append(mrb_state
*mrb
, mrb_value str
, mrb_value str2
)
2387 str2
= mrb_str_to_str(mrb
, str2
);
2388 return mrb_str_buf_append(mrb
, str
, str2
);
2391 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2395 * str.inspect -> string
2397 * Returns a printable version of _str_, surrounded by quote marks,
2398 * with special characters escaped.
2402 * str.inspect #=> "\"hel\\bo\""
2405 mrb_str_inspect(mrb_state
*mrb
, mrb_value str
)
2407 const char *p
, *pend
;
2408 char buf
[CHAR_ESC_LEN
+ 1];
2409 mrb_value result
= mrb_str_new(mrb
, "\"", 1);
2411 p
= RSTRING_PTR(str
); pend
= RSTRING_END(str
);
2412 for (;p
< pend
; p
++) {
2413 unsigned char c
, cc
;
2416 if (c
== '"'|| c
== '\\' || (c
== '#' && IS_EVSTR(p
, pend
))) {
2417 buf
[0] = '\\'; buf
[1] = c
;
2418 mrb_str_buf_cat(mrb
, result
, buf
, 2);
2423 mrb_str_buf_cat(mrb
, result
, buf
, 1);
2427 case '\n': cc
= 'n'; break;
2428 case '\r': cc
= 'r'; break;
2429 case '\t': cc
= 't'; break;
2430 case '\f': cc
= 'f'; break;
2431 case '\013': cc
= 'v'; break;
2432 case '\010': cc
= 'b'; break;
2433 case '\007': cc
= 'a'; break;
2434 case 033: cc
= 'e'; break;
2435 default: cc
= 0; break;
2440 mrb_str_buf_cat(mrb
, result
, buf
, 2);
2445 buf
[3] = '0' + c
% 8; c
/= 8;
2446 buf
[2] = '0' + c
% 8; c
/= 8;
2447 buf
[1] = '0' + c
% 8;
2448 mrb_str_buf_cat(mrb
, result
, buf
, 4);
2452 mrb_str_buf_cat(mrb
, result
, "\"", 1);
2459 * str.bytes -> array of fixnums
2461 * Returns an array of bytes in _str_.
2464 * str.bytes #=> [104, 101, 108, 108, 111]
2467 mrb_str_bytes(mrb_state
*mrb
, mrb_value str
)
2469 struct RString
*s
= mrb_str_ptr(str
);
2470 mrb_value a
= mrb_ary_new_capa(mrb
, s
->len
);
2471 unsigned char *p
= (unsigned char *)(s
->ptr
), *pend
= p
+ s
->len
;
2474 mrb_ary_push(mrb
, a
, mrb_fixnum_value(p
[0]));
2480 /* ---------------------------*/
2482 mrb_init_string(mrb_state
*mrb
)
2486 s
= mrb
->string_class
= mrb_define_class(mrb
, "String", mrb
->object_class
);
2487 MRB_SET_INSTANCE_TT(s
, MRB_TT_STRING
);
2488 mrb_include_module(mrb
, s
, mrb_class_get(mrb
, "Comparable"));
2491 mrb_define_method(mrb
, s
, "bytesize", mrb_str_bytesize
, MRB_ARGS_NONE());
2493 mrb_define_method(mrb
, s
, "<=>", mrb_str_cmp_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2494 mrb_define_method(mrb
, s
, "==", mrb_str_equal_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2495 mrb_define_method(mrb
, s
, "+", mrb_str_plus_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2496 mrb_define_method(mrb
, s
, "*", mrb_str_times
, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2497 mrb_define_method(mrb
, s
, "[]", mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2498 mrb_define_method(mrb
, s
, "capitalize", mrb_str_capitalize
, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2499 mrb_define_method(mrb
, s
, "capitalize!", mrb_str_capitalize_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
2500 mrb_define_method(mrb
, s
, "chomp", mrb_str_chomp
, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2501 mrb_define_method(mrb
, s
, "chomp!", mrb_str_chomp_bang
, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2502 mrb_define_method(mrb
, s
, "chop", mrb_str_chop
, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
2503 mrb_define_method(mrb
, s
, "chop!", mrb_str_chop_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
2504 mrb_define_method(mrb
, s
, "downcase", mrb_str_downcase
, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2505 mrb_define_method(mrb
, s
, "downcase!", mrb_str_downcase_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2506 mrb_define_method(mrb
, s
, "empty?", mrb_str_empty_p
, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2507 mrb_define_method(mrb
, s
, "eql?", mrb_str_eql
, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2509 mrb_define_method(mrb
, s
, "hash", mrb_str_hash_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
2510 mrb_define_method(mrb
, s
, "include?", mrb_str_include
, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2511 mrb_define_method(mrb
, s
, "index", mrb_str_index_m
, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2512 mrb_define_method(mrb
, s
, "initialize", mrb_str_init
, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2513 mrb_define_method(mrb
, s
, "initialize_copy", mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2514 mrb_define_method(mrb
, s
, "intern", mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2515 mrb_define_method(mrb
, s
, "length", mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2516 mrb_define_method(mrb
, s
, "replace", mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2517 mrb_define_method(mrb
, s
, "reverse", mrb_str_reverse
, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2518 mrb_define_method(mrb
, s
, "reverse!", mrb_str_reverse_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2519 mrb_define_method(mrb
, s
, "rindex", mrb_str_rindex_m
, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2520 mrb_define_method(mrb
, s
, "size", mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2521 mrb_define_method(mrb
, s
, "slice", mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2522 mrb_define_method(mrb
, s
, "split", mrb_str_split_m
, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2524 mrb_define_method(mrb
, s
, "to_f", mrb_str_to_f
, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2525 mrb_define_method(mrb
, s
, "to_i", mrb_str_to_i
, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2526 mrb_define_method(mrb
, s
, "to_s", mrb_str_to_s
, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2527 mrb_define_method(mrb
, s
, "to_str", mrb_str_to_s
, MRB_ARGS_NONE());
2528 mrb_define_method(mrb
, s
, "to_sym", mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2529 mrb_define_method(mrb
, s
, "upcase", mrb_str_upcase
, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
2530 mrb_define_method(mrb
, s
, "upcase!", mrb_str_upcase_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
2531 mrb_define_method(mrb
, s
, "inspect", mrb_str_inspect
, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2532 mrb_define_method(mrb
, s
, "bytes", mrb_str_bytes
, MRB_ARGS_NONE());