2 ** string.c - String class
4 ** See Copyright Notice in mruby.h
13 #include "mruby/array.h"
14 #include "mruby/class.h"
15 #include "mruby/range.h"
16 #include "mruby/string.h"
19 const char mrb_digitmap
[] = "0123456789abcdefghijklmnopqrstuvwxyz";
21 typedef struct mrb_shared_string
{
28 static mrb_value
str_replace(mrb_state
*mrb
, struct RString
*s1
, struct RString
*s2
);
29 static mrb_value
mrb_str_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
);
31 #define RESIZE_CAPA(s,capacity) do {\
32 s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
33 s->aux.capa = capacity;\
37 str_decref(mrb_state
*mrb
, mrb_shared_string
*shared
)
40 if (shared
->refcnt
== 0) {
41 if (!shared
->nofree
) {
42 mrb_free(mrb
, shared
->ptr
);
44 mrb_free(mrb
, shared
);
49 mrb_str_modify(mrb_state
*mrb
, struct RString
*s
)
51 if (s
->flags
& MRB_STR_SHARED
) {
52 mrb_shared_string
*shared
= s
->aux
.shared
;
54 if (shared
->refcnt
== 1 && s
->ptr
== shared
->ptr
) {
56 s
->aux
.capa
= shared
->len
;
57 s
->ptr
[s
->len
] = '\0';
58 mrb_free(mrb
, shared
);
66 ptr
= (char *)mrb_malloc(mrb
, (size_t)len
+ 1);
73 str_decref(mrb
, shared
);
75 s
->flags
&= ~MRB_STR_SHARED
;
78 if (s
->flags
& MRB_STR_NOFREE
) {
81 s
->ptr
= (char *)mrb_malloc(mrb
, (size_t)s
->len
+1);
83 memcpy(s
->ptr
, p
, s
->len
);
85 s
->ptr
[s
->len
] = '\0';
87 s
->flags
&= ~MRB_STR_NOFREE
;
93 mrb_str_resize(mrb_state
*mrb
, mrb_value str
, mrb_int len
)
96 struct RString
*s
= mrb_str_ptr(str
);
98 mrb_str_modify(mrb
, s
);
101 if (slen
< len
|| slen
- len
> 256) {
105 s
->ptr
[len
] = '\0'; /* sentinel */
110 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
112 /* char offset to byte offset */
114 mrb_str_offset(mrb_state
*mrb
, mrb_value str
, int pos
)
119 static struct RString
*
120 str_new(mrb_state
*mrb
, const char *p
, mrb_int len
)
124 s
= mrb_obj_alloc_string(mrb
);
127 s
->ptr
= (char *)mrb_malloc(mrb
, (size_t)len
+1);
129 memcpy(s
->ptr
, p
, len
);
136 str_with_class(mrb_state
*mrb
, struct RString
*s
, mrb_value obj
)
138 s
->c
= mrb_str_ptr(obj
)->c
;
142 mrb_str_new_empty(mrb_state
*mrb
, mrb_value str
)
144 struct RString
*s
= str_new(mrb
, 0, 0);
146 str_with_class(mrb
, s
, str
);
147 return mrb_obj_value(s
);
150 #ifndef MRB_STR_BUF_MIN_SIZE
151 # define MRB_STR_BUF_MIN_SIZE 128
155 mrb_str_buf_new(mrb_state
*mrb
, mrb_int capa
)
159 s
= mrb_obj_alloc_string(mrb
);
161 if (capa
< MRB_STR_BUF_MIN_SIZE
) {
162 capa
= MRB_STR_BUF_MIN_SIZE
;
166 s
->ptr
= (char *)mrb_malloc(mrb
, capa
+1);
169 return mrb_obj_value(s
);
173 str_buf_cat(mrb_state
*mrb
, struct RString
*s
, const char *ptr
, size_t len
)
179 mrb_str_modify(mrb
, s
);
180 if (ptr
>= s
->ptr
&& ptr
<= s
->ptr
+ s
->len
) {
183 if (len
== 0) return;
185 if (s
->len
>= MRB_INT_MAX
- (mrb_int
)len
) {
186 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string sizes too big");
190 while (total
> capa
) {
191 if (capa
+ 1 >= MRB_INT_MAX
/ 2) {
192 capa
= (total
+ 4095) / 4096;
195 capa
= (capa
+ 1) * 2;
197 RESIZE_CAPA(s
, capa
);
202 memcpy(s
->ptr
+ s
->len
, ptr
, len
);
204 s
->ptr
[total
] = '\0'; /* sentinel */
208 mrb_str_buf_cat(mrb_state
*mrb
, mrb_value str
, const char *ptr
, size_t len
)
210 if (len
== 0) return str
;
211 str_buf_cat(mrb
, mrb_str_ptr(str
), ptr
, len
);
216 mrb_str_new(mrb_state
*mrb
, const char *p
, size_t len
)
220 s
= str_new(mrb
, p
, len
);
221 return mrb_obj_value(s
);
225 * call-seq: (Caution! NULL string)
226 * String.new(str="") => new_str
228 * Returns a new string object containing a copy of <i>str</i>.
232 mrb_str_new_cstr(mrb_state
*mrb
, const char *p
)
239 if ((mrb_int
)len
< 0) {
240 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "argument too big");
247 s
= str_new(mrb
, p
, len
);
249 return mrb_obj_value(s
);
253 mrb_str_new_static(mrb_state
*mrb
, const char *p
, size_t len
)
257 s
= mrb_obj_alloc_string(mrb
);
259 s
->aux
.capa
= 0; /* nofree */
261 s
->flags
= MRB_STR_NOFREE
;
262 return mrb_obj_value(s
);
266 mrb_gc_free_str(mrb_state
*mrb
, struct RString
*str
)
268 if (str
->flags
& MRB_STR_SHARED
)
269 str_decref(mrb
, str
->aux
.shared
);
270 else if ((str
->flags
& MRB_STR_NOFREE
) == 0)
271 mrb_free(mrb
, str
->ptr
);
275 mrb_str_to_cstr(mrb_state
*mrb
, mrb_value str0
)
279 if (!mrb_string_p(str0
)) {
280 mrb_raise(mrb
, E_TYPE_ERROR
, "expected String");
283 s
= str_new(mrb
, RSTRING_PTR(str0
), RSTRING_LEN(str0
));
284 if ((strlen(s
->ptr
) ^ s
->len
) != 0) {
285 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
291 str_make_shared(mrb_state
*mrb
, struct RString
*s
)
293 if (!(s
->flags
& MRB_STR_SHARED
)) {
294 mrb_shared_string
*shared
= (mrb_shared_string
*)mrb_malloc(mrb
, sizeof(mrb_shared_string
));
297 if (s
->flags
& MRB_STR_NOFREE
) {
298 shared
->nofree
= TRUE
;
299 shared
->ptr
= s
->ptr
;
300 s
->flags
&= ~MRB_STR_NOFREE
;
303 shared
->nofree
= FALSE
;
304 if (s
->aux
.capa
> s
->len
) {
305 s
->ptr
= shared
->ptr
= (char *)mrb_realloc(mrb
, s
->ptr
, s
->len
+1);
308 shared
->ptr
= s
->ptr
;
311 shared
->len
= s
->len
;
312 s
->aux
.shared
= shared
;
313 s
->flags
|= MRB_STR_SHARED
;
319 * char* str = String("abcd"), len=strlen("abcd")
321 * Returns a new string object containing a copy of <i>str</i>.
324 mrb_str_body(mrb_value str
, int *len_p
)
326 struct RString
*s
= mrb_str_ptr(str
);
333 * call-seq: (Caution! String("abcd") change)
334 * String("abcdefg") = String("abcd") + String("efg")
336 * Returns a new string object containing a copy of <i>str</i>.
339 mrb_str_concat(mrb_state
*mrb
, mrb_value self
, mrb_value other
)
341 struct RString
*s1
= mrb_str_ptr(self
), *s2
;
344 mrb_str_modify(mrb
, s1
);
345 if (!mrb_string_p(other
)) {
346 other
= mrb_str_to_str(mrb
, other
);
348 s2
= mrb_str_ptr(other
);
349 len
= s1
->len
+ s2
->len
;
351 if (s1
->aux
.capa
< len
) {
353 s1
->ptr
= (char *)mrb_realloc(mrb
, s1
->ptr
, len
+1);
355 memcpy(s1
->ptr
+s1
->len
, s2
->ptr
, s2
->len
);
361 * call-seq: (Caution! String("abcd") remain)
362 * String("abcdefg") = String("abcd") + String("efg")
364 * Returns a new string object containing a copy of <i>str</i>.
367 mrb_str_plus(mrb_state
*mrb
, mrb_value a
, mrb_value b
)
369 struct RString
*s
= mrb_str_ptr(a
);
370 struct RString
*s2
= mrb_str_ptr(b
);
373 t
= str_new(mrb
, 0, s
->len
+ s2
->len
);
374 memcpy(t
->ptr
, s
->ptr
, s
->len
);
375 memcpy(t
->ptr
+ s
->len
, s2
->ptr
, s2
->len
);
377 return mrb_obj_value(t
);
383 * call-seq: (Caution! String("abcd") remain) for stack_argument
384 * String("abcdefg") = String("abcd") + String("efg")
386 * Returns a new string object containing a copy of <i>str</i>.
389 mrb_str_plus_m(mrb_state
*mrb
, mrb_value self
)
393 mrb_get_args(mrb
, "S", &str
);
394 return mrb_str_plus(mrb
, self
, str
);
399 * len = strlen(String("abcd"))
401 * Returns a new string object containing a copy of <i>str</i>.
404 mrb_str_bytesize(mrb_state
*mrb
, mrb_value self
)
406 struct RString
*s
= mrb_str_ptr(self
);
407 return mrb_fixnum_value(s
->len
);
414 * len = strlen(String("abcd"))
416 * Returns a new string object containing a copy of <i>str</i>.
419 mrb_str_size(mrb_state
*mrb
, mrb_value self
)
421 struct RString
*s
= mrb_str_ptr(self
);
422 return mrb_fixnum_value(s
->len
);
428 * str * integer => new_str
430 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
433 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
436 mrb_str_times(mrb_state
*mrb
, mrb_value self
)
439 struct RString
*str2
;
442 mrb_get_args(mrb
, "i", ×
);
444 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative argument");
446 if (times
&& MRB_INT_MAX
/ times
< RSTRING_LEN(self
)) {
447 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "argument too big");
450 len
= RSTRING_LEN(self
)*times
;
451 str2
= str_new(mrb
, 0, len
);
452 str_with_class(mrb
, str2
, self
);
455 n
= RSTRING_LEN(self
);
456 memcpy(p
, RSTRING_PTR(self
), n
);
461 memcpy(p
+ n
, p
, len
-n
);
465 return mrb_obj_value(str2
);
467 /* -------------------------------------------------------------- */
469 #define lesser(a,b) (((a)>(b))?(b):(a))
471 /* ---------------------------*/
474 * mrb_value str1 <=> mrb_value str2 => int
480 mrb_str_cmp(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
484 struct RString
*s1
= mrb_str_ptr(str1
);
485 struct RString
*s2
= mrb_str_ptr(str2
);
487 len
= lesser(s1
->len
, s2
->len
);
488 retval
= memcmp(s1
->ptr
, s2
->ptr
, len
);
490 if (s1
->len
== s2
->len
) return 0;
491 if (s1
->len
> s2
->len
) return 1;
494 if (retval
> 0) return 1;
502 * str <=> other_str => -1, 0, +1
504 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
505 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
506 * <i>str</i>. If the strings are of different lengths, and the strings are
507 * equal when compared up to the shortest length, then the longer string is
508 * considered greater than the shorter one. If the variable <code>$=</code> is
509 * <code>false</code>, the comparison is based on comparing the binary values
510 * of each character in the string. In older versions of Ruby, setting
511 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
512 * in favor of using <code>String#casecmp</code>.
514 * <code><=></code> is the basis for the methods <code><</code>,
515 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
516 * included from module <code>Comparable</code>. The method
517 * <code>String#==</code> does not use <code>Comparable#==</code>.
519 * "abcdef" <=> "abcde" #=> 1
520 * "abcdef" <=> "abcdef" #=> 0
521 * "abcdef" <=> "abcdefg" #=> -1
522 * "abcdef" <=> "ABCDEF" #=> 1
525 mrb_str_cmp_m(mrb_state
*mrb
, mrb_value str1
)
530 mrb_get_args(mrb
, "o", &str2
);
531 if (!mrb_string_p(str2
)) {
532 if (!mrb_respond_to(mrb
, str2
, mrb_intern_lit(mrb
, "to_s"))) {
533 return mrb_nil_value();
535 else if (!mrb_respond_to(mrb
, str2
, mrb_intern_lit(mrb
, "<=>"))) {
536 return mrb_nil_value();
539 mrb_value tmp
= mrb_funcall(mrb
, str2
, "<=>", 1, str1
);
541 if (mrb_nil_p(tmp
)) return mrb_nil_value();
542 if (!mrb_fixnum(tmp
)) {
543 return mrb_funcall(mrb
, mrb_fixnum_value(0), "-", 1, tmp
);
545 result
= -mrb_fixnum(tmp
);
549 result
= mrb_str_cmp(mrb
, str1
, str2
);
551 return mrb_fixnum_value(result
);
555 str_eql(mrb_state
*mrb
, const mrb_value str1
, const mrb_value str2
)
557 const mrb_int len
= RSTRING_LEN(str1
);
559 if (len
!= RSTRING_LEN(str2
)) return FALSE
;
560 if (memcmp(RSTRING_PTR(str1
), RSTRING_PTR(str2
), (size_t)len
) == 0)
566 mrb_str_equal(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
568 if (mrb_obj_equal(mrb
, str1
, str2
)) return TRUE
;
569 if (!mrb_string_p(str2
)) {
570 if (mrb_nil_p(str2
)) return FALSE
;
571 if (!mrb_respond_to(mrb
, str2
, mrb_intern_lit(mrb
, "to_str"))) {
574 str2
= mrb_funcall(mrb
, str2
, "to_str", 0);
575 return mrb_equal(mrb
, str2
, str1
);
577 return str_eql(mrb
, str1
, str2
);
583 * str == obj => true or false
586 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
587 * Otherwise, returns <code>false</code> or <code>true</code>
589 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
592 mrb_str_equal_m(mrb_state
*mrb
, mrb_value str1
)
597 mrb_get_args(mrb
, "o", &str2
);
598 equal_p
= mrb_str_equal(mrb
, str1
, str2
);
600 return mrb_bool_value(equal_p
);
602 /* ---------------------------------- */
604 mrb_str_to_str(mrb_state
*mrb
, mrb_value str
)
608 if (!mrb_string_p(str
)) {
609 s
= mrb_check_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
611 s
= mrb_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_s");
619 mrb_string_value_ptr(mrb_state
*mrb
, mrb_value ptr
)
621 mrb_value str
= mrb_str_to_str(mrb
, ptr
);
622 return RSTRING_PTR(str
);
626 noregexp(mrb_state
*mrb
, mrb_value self
)
628 mrb_raise(mrb
, E_NOTIMP_ERROR
, "Regexp class not implemented");
629 return mrb_nil_value();
633 regexp_check(mrb_state
*mrb
, mrb_value obj
)
635 if (!memcmp(mrb_obj_classname(mrb
, obj
), REGEXP_CLASS
, sizeof(REGEXP_CLASS
) - 1)) {
640 static inline mrb_int
641 mrb_memsearch_qs(const unsigned char *xs
, mrb_int m
, const unsigned char *ys
, mrb_int n
)
643 const unsigned char *x
= xs
, *xe
= xs
+ m
;
644 const unsigned char *y
= ys
;
648 for (i
= 0; i
< 256; ++i
)
651 qstable
[*x
] = xe
- x
;
653 for (; y
+ m
<= ys
+ n
; y
+= *(qstable
+ y
[m
])) {
654 if (*xs
== *y
&& memcmp(xs
, y
, m
) == 0)
661 mrb_memsearch(const void *x0
, mrb_int m
, const void *y0
, mrb_int n
)
663 const unsigned char *x
= (const unsigned char *)x0
, *y
= (const unsigned char *)y0
;
665 if (m
> n
) return -1;
667 return memcmp(x0
, y0
, m
) == 0 ? 0 : -1;
673 const unsigned char *ys
= y
, *ye
= ys
+ n
;
674 for (; y
< ye
; ++y
) {
680 return mrb_memsearch_qs((const unsigned char *)x0
, m
, (const unsigned char *)y0
, n
);
684 mrb_str_index(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int offset
)
690 len
= RSTRING_LEN(str
);
691 slen
= RSTRING_LEN(sub
);
694 if (offset
< 0) return -1;
696 if (len
- offset
< slen
) return -1;
697 s
= RSTRING_PTR(str
);
701 if (slen
== 0) return offset
;
702 /* need proceed one character at a time */
703 sptr
= RSTRING_PTR(sub
);
704 slen
= RSTRING_LEN(sub
);
705 len
= RSTRING_LEN(str
) - offset
;
706 pos
= mrb_memsearch(sptr
, slen
, s
, len
);
707 if (pos
< 0) return pos
;
712 mrb_str_dup(mrb_state
*mrb
, mrb_value str
)
714 /* should return shared string */
715 struct RString
*s
= mrb_str_ptr(str
);
717 return mrb_str_new(mrb
, s
->ptr
, s
->len
);
721 mrb_str_aref(mrb_state
*mrb
, mrb_value str
, mrb_value indx
)
725 regexp_check(mrb
, indx
);
726 switch (mrb_type(indx
)) {
728 idx
= mrb_fixnum(indx
);
731 str
= mrb_str_substr(mrb
, str
, idx
, 1);
732 if (!mrb_nil_p(str
) && RSTRING_LEN(str
) == 0) return mrb_nil_value();
736 if (mrb_str_index(mrb
, str
, indx
, 0) != -1)
737 return mrb_str_dup(mrb
, indx
);
738 return mrb_nil_value();
741 /* check if indx is Range */
746 len
= RSTRING_LEN(str
);
747 if (mrb_range_beg_len(mrb
, indx
, &beg
, &len
, len
)) {
748 tmp
= mrb_str_subseq(mrb
, str
, beg
, len
);
752 return mrb_nil_value();
756 idx
= mrb_fixnum(indx
);
759 return mrb_nil_value(); /* not reached */
766 * str[fixnum] => fixnum or nil
767 * str[fixnum, fixnum] => new_str or nil
768 * str[range] => new_str or nil
769 * str[regexp] => new_str or nil
770 * str[regexp, fixnum] => new_str or nil
771 * str[other_str] => new_str or nil
772 * str.slice(fixnum) => fixnum or nil
773 * str.slice(fixnum, fixnum) => new_str or nil
774 * str.slice(range) => new_str or nil
775 * str.slice(regexp) => new_str or nil
776 * str.slice(regexp, fixnum) => new_str or nil
777 * str.slice(other_str) => new_str or nil
779 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
780 * of the character at that position. If passed two <code>Fixnum</code>
781 * objects, returns a substring starting at the offset given by the first, and
782 * a length given by the second. If given a range, a substring containing
783 * characters at offsets given by the range is returned. In all three cases, if
784 * an offset is negative, it is counted from the end of <i>str</i>. Returns
785 * <code>nil</code> if the initial offset falls outside the string, the length
786 * is negative, or the beginning of the range is greater than the end.
788 * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
789 * returned. If a numeric parameter follows the regular expression, that
790 * component of the <code>MatchData</code> is returned instead. If a
791 * <code>String</code> is given, that string is returned if it occurs in
792 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
796 * a[1] #=> 101(1.8.7) "e"(1.9.2)
800 * a[-4..-2] #=> "her"
803 * a[/[aeiou](.)\1/] #=> "ell"
804 * a[/[aeiou](.)\1/, 0] #=> "ell"
805 * a[/[aeiou](.)\1/, 1] #=> "l"
806 * a[/[aeiou](.)\1/, 2] #=> nil
811 mrb_str_aref_m(mrb_state
*mrb
, mrb_value str
)
816 argc
= mrb_get_args(mrb
, "o|o", &a1
, &a2
);
818 regexp_check(mrb
, a1
);
819 return mrb_str_substr(mrb
, str
, mrb_fixnum(a1
), mrb_fixnum(a2
));
822 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc
));
824 return mrb_str_aref(mrb
, str
, a1
);
830 * str.capitalize! => str or nil
832 * Modifies <i>str</i> by converting the first character to uppercase and the
833 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
836 * a.capitalize! #=> "Hello"
838 * a.capitalize! #=> nil
841 mrb_str_capitalize_bang(mrb_state
*mrb
, mrb_value str
)
845 struct RString
*s
= mrb_str_ptr(str
);
847 mrb_str_modify(mrb
, s
);
848 if (s
->len
== 0 || !s
->ptr
) return mrb_nil_value();
849 p
= s
->ptr
; pend
= s
->ptr
+ s
->len
;
860 if (modify
) return str
;
861 return mrb_nil_value();
867 * str.capitalize => new_str
869 * Returns a copy of <i>str</i> with the first character converted to uppercase
870 * and the remainder to lowercase.
872 * "hello".capitalize #=> "Hello"
873 * "HELLO".capitalize #=> "Hello"
874 * "123ABC".capitalize #=> "123abc"
877 mrb_str_capitalize(mrb_state
*mrb
, mrb_value self
)
881 str
= mrb_str_dup(mrb
, self
);
882 mrb_str_capitalize_bang(mrb
, str
);
889 * str.chomp!(separator=$/) => str or nil
891 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
892 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
895 mrb_str_chomp_bang(mrb_state
*mrb
, mrb_value str
)
902 struct RString
*s
= mrb_str_ptr(str
);
904 mrb_str_modify(mrb
, s
);
906 if (mrb_get_args(mrb
, "|S", &rs
) == 0) {
907 if (len
== 0) return mrb_nil_value();
909 if (s
->ptr
[len
-1] == '\n') {
912 s
->ptr
[s
->len
-1] == '\r') {
916 else if (s
->ptr
[len
-1] == '\r') {
920 return mrb_nil_value();
922 s
->ptr
[s
->len
] = '\0';
926 if (len
== 0 || mrb_nil_p(rs
)) return mrb_nil_value();
928 rslen
= RSTRING_LEN(rs
);
930 while (len
>0 && p
[len
-1] == '\n') {
932 if (len
>0 && p
[len
-1] == '\r')
940 return mrb_nil_value();
942 if (rslen
> len
) return mrb_nil_value();
943 newline
= RSTRING_PTR(rs
)[rslen
-1];
944 if (rslen
== 1 && newline
== '\n')
945 newline
= RSTRING_PTR(rs
)[rslen
-1];
946 if (rslen
== 1 && newline
== '\n')
949 pp
= p
+ len
- rslen
;
950 if (p
[len
-1] == newline
&&
952 memcmp(RSTRING_PTR(rs
), pp
, rslen
) == 0)) {
953 s
->len
= len
- rslen
;
957 return mrb_nil_value();
963 * str.chomp(separator=$/) => new_str
965 * Returns a new <code>String</code> with the given record separator removed
966 * from the end of <i>str</i> (if present). If <code>$/</code> has not been
967 * changed from the default Ruby record separator, then <code>chomp</code> also
968 * removes carriage return characters (that is it will remove <code>\n</code>,
969 * <code>\r</code>, and <code>\r\n</code>).
971 * "hello".chomp #=> "hello"
972 * "hello\n".chomp #=> "hello"
973 * "hello\r\n".chomp #=> "hello"
974 * "hello\n\r".chomp #=> "hello\n"
975 * "hello\r".chomp #=> "hello"
976 * "hello \n there".chomp #=> "hello \n there"
977 * "hello".chomp("llo") #=> "he"
980 mrb_str_chomp(mrb_state
*mrb
, mrb_value self
)
984 str
= mrb_str_dup(mrb
, self
);
985 mrb_str_chomp_bang(mrb
, str
);
992 * str.chop! => str or nil
994 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
995 * or <code>nil</code> if <i>str</i> is the empty string. See also
996 * <code>String#chomp!</code>.
999 mrb_str_chop_bang(mrb_state
*mrb
, mrb_value str
)
1001 struct RString
*s
= mrb_str_ptr(str
);
1003 mrb_str_modify(mrb
, s
);
1007 if (s
->ptr
[len
] == '\n') {
1009 s
->ptr
[len
-1] == '\r') {
1017 return mrb_nil_value();
1023 * str.chop => new_str
1025 * Returns a new <code>String</code> with the last character removed. If the
1026 * string ends with <code>\r\n</code>, both characters are removed. Applying
1027 * <code>chop</code> to an empty string returns an empty
1028 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1029 * the string unchanged if it doesn't end in a record separator.
1031 * "string\r\n".chop #=> "string"
1032 * "string\n\r".chop #=> "string\n"
1033 * "string\n".chop #=> "string"
1034 * "string".chop #=> "strin"
1038 mrb_str_chop(mrb_state
*mrb
, mrb_value self
)
1041 str
= mrb_str_dup(mrb
, self
);
1042 mrb_str_chop_bang(mrb
, str
);
1049 * str.downcase! => str or nil
1051 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1052 * changes were made.
1055 mrb_str_downcase_bang(mrb_state
*mrb
, mrb_value str
)
1059 struct RString
*s
= mrb_str_ptr(str
);
1061 mrb_str_modify(mrb
, s
);
1063 pend
= s
->ptr
+ s
->len
;
1072 if (modify
) return str
;
1073 return mrb_nil_value();
1079 * str.downcase => new_str
1081 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1082 * lowercase counterparts. The operation is locale insensitive---only
1083 * characters ``A'' to ``Z'' are affected.
1085 * "hEllO".downcase #=> "hello"
1088 mrb_str_downcase(mrb_state
*mrb
, mrb_value self
)
1092 str
= mrb_str_dup(mrb
, self
);
1093 mrb_str_downcase_bang(mrb
, str
);
1100 * str.empty? => true or false
1102 * Returns <code>true</code> if <i>str</i> has a length of zero.
1104 * "hello".empty? #=> false
1105 * "".empty? #=> true
1108 mrb_str_empty_p(mrb_state
*mrb
, mrb_value self
)
1110 struct RString
*s
= mrb_str_ptr(self
);
1112 return mrb_bool_value(s
->len
== 0);
1118 * str.eql?(other) => true or false
1120 * Two strings are equal if the have the same length and content.
1123 mrb_str_eql(mrb_state
*mrb
, mrb_value self
)
1128 mrb_get_args(mrb
, "o", &str2
);
1129 eql_p
= (mrb_type(str2
) == MRB_TT_STRING
) && str_eql(mrb
, self
, str2
);
1131 return mrb_bool_value(eql_p
);
1135 mrb_str_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
1137 struct RString
*orig
, *s
;
1138 mrb_shared_string
*shared
;
1140 orig
= mrb_str_ptr(str
);
1141 str_make_shared(mrb
, orig
);
1142 shared
= orig
->aux
.shared
;
1143 s
= mrb_obj_alloc_string(mrb
);
1144 s
->ptr
= orig
->ptr
+ beg
;
1146 s
->aux
.shared
= shared
;
1147 s
->flags
|= MRB_STR_SHARED
;
1150 return mrb_obj_value(s
);
1154 mrb_str_substr(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
1158 if (len
< 0) return mrb_nil_value();
1159 if (!RSTRING_LEN(str
)) {
1162 if (beg
> RSTRING_LEN(str
)) return mrb_nil_value();
1164 beg
+= RSTRING_LEN(str
);
1165 if (beg
< 0) return mrb_nil_value();
1167 if (beg
+ len
> RSTRING_LEN(str
))
1168 len
= RSTRING_LEN(str
) - beg
;
1172 str2
= mrb_str_subseq(mrb
, str
, beg
, len
);
1178 mrb_str_buf_append(mrb_state
*mrb
, mrb_value str
, mrb_value str2
)
1180 mrb_str_cat(mrb
, str
, RSTRING_PTR(str2
), RSTRING_LEN(str2
));
1185 mrb_str_hash(mrb_state
*mrb
, mrb_value str
)
1188 struct RString
*s
= mrb_str_ptr(str
);
1189 mrb_int len
= s
->len
;
1194 key
= key
*65599 + *p
;
1197 key
= key
+ (key
>>5);
1204 * str.hash => fixnum
1206 * Return a hash based on the string's length and content.
1209 mrb_str_hash_m(mrb_state
*mrb
, mrb_value self
)
1211 mrb_int key
= mrb_str_hash(mrb
, self
);
1212 return mrb_fixnum_value(key
);
1218 * str.include? other_str => true or false
1219 * str.include? fixnum => true or false
1221 * Returns <code>true</code> if <i>str</i> contains the given string or
1224 * "hello".include? "lo" #=> true
1225 * "hello".include? "ol" #=> false
1226 * "hello".include? ?h #=> true
1229 mrb_str_include(mrb_state
*mrb
, mrb_value self
)
1235 mrb_get_args(mrb
, "o", &str2
);
1236 if (mrb_fixnum_p(str2
)) {
1237 include_p
= (memchr(RSTRING_PTR(self
), mrb_fixnum(str2
), RSTRING_LEN(self
)) != NULL
);
1240 str2
= mrb_str_to_str(mrb
, str2
);
1241 i
= mrb_str_index(mrb
, self
, str2
, 0);
1243 include_p
= (i
!= -1);
1246 return mrb_bool_value(include_p
);
1252 * str.index(substring [, offset]) => fixnum or nil
1253 * str.index(fixnum [, offset]) => fixnum or nil
1254 * str.index(regexp [, offset]) => fixnum or nil
1256 * Returns the index of the first occurrence of the given
1258 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1260 * <code>nil</code> if not found.
1261 * If the second parameter is present, it
1262 * specifies the position in the string to begin the search.
1264 * "hello".index('e') #=> 1
1265 * "hello".index('lo') #=> 3
1266 * "hello".index('a') #=> nil
1267 * "hello".index(101) #=> 1(101=0x65='e')
1268 * "hello".index(/[aeiou]/, -3) #=> 4
1271 mrb_str_index_m(mrb_state
*mrb
, mrb_value str
)
1279 mrb_get_args(mrb
, "*", &argv
, &argc
);
1281 pos
= mrb_fixnum(argv
[1]);
1289 sub
= mrb_nil_value();
1292 regexp_check(mrb
, sub
);
1294 pos
+= RSTRING_LEN(str
);
1296 return mrb_nil_value();
1300 switch (mrb_type(sub
)) {
1301 case MRB_TT_FIXNUM
: {
1302 int c
= mrb_fixnum(sub
);
1303 mrb_int len
= RSTRING_LEN(str
);
1304 unsigned char *p
= (unsigned char*)RSTRING_PTR(str
);
1306 for (;pos
<len
;pos
++) {
1307 if (p
[pos
] == c
) return mrb_fixnum_value(pos
);
1309 return mrb_nil_value();
1315 tmp
= mrb_check_string_type(mrb
, sub
);
1316 if (mrb_nil_p(tmp
)) {
1317 mrb_raisef(mrb
, E_TYPE_ERROR
, "type mismatch: %S given", sub
);
1323 pos
= mrb_str_index(mrb
, str
, sub
, pos
);
1327 if (pos
== -1) return mrb_nil_value();
1328 return mrb_fixnum_value(pos
);
1331 #define STR_REPLACE_SHARED_MIN 10
1334 str_replace(mrb_state
*mrb
, struct RString
*s1
, struct RString
*s2
)
1336 if (s2
->flags
& MRB_STR_SHARED
) {
1338 if (s1
->flags
& MRB_STR_SHARED
){
1339 str_decref(mrb
, s1
->aux
.shared
);
1342 mrb_free(mrb
, s1
->ptr
);
1346 s1
->aux
.shared
= s2
->aux
.shared
;
1347 s1
->flags
|= MRB_STR_SHARED
;
1348 s1
->aux
.shared
->refcnt
++;
1350 else if (s2
->len
> STR_REPLACE_SHARED_MIN
) {
1351 str_make_shared(mrb
, s2
);
1355 if (s1
->flags
& MRB_STR_SHARED
) {
1356 str_decref(mrb
, s1
->aux
.shared
);
1357 s1
->flags
&= ~MRB_STR_SHARED
;
1358 s1
->ptr
= (char *)mrb_malloc(mrb
, s2
->len
+1);
1361 s1
->ptr
= (char *)mrb_realloc(mrb
, s1
->ptr
, s2
->len
+1);
1363 memcpy(s1
->ptr
, s2
->ptr
, s2
->len
);
1364 s1
->ptr
[s2
->len
] = 0;
1366 s1
->aux
.capa
= s2
->len
;
1368 return mrb_obj_value(s1
);
1375 * str.replace(other_str) => str
1377 * s = "hello" #=> "hello"
1378 * s.replace "world" #=> "world"
1381 mrb_str_replace(mrb_state
*mrb
, mrb_value str
)
1385 mrb_get_args(mrb
, "S", &str2
);
1386 return str_replace(mrb
, mrb_str_ptr(str
), mrb_str_ptr(str2
));
1392 * String.new(str="") => new_str
1394 * Returns a new string object containing a copy of <i>str</i>.
1397 mrb_str_init(mrb_state
*mrb
, mrb_value self
)
1401 if (mrb_get_args(mrb
, "|S", &str2
) == 1) {
1402 str_replace(mrb
, mrb_str_ptr(self
), mrb_str_ptr(str2
));
1411 * str.intern => symbol
1412 * str.to_sym => symbol
1414 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1415 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1417 * "Koala".intern #=> :Koala
1418 * s = 'cat'.to_sym #=> :cat
1419 * s == :cat #=> true
1420 * s = '@cat'.to_sym #=> :@cat
1421 * s == :@cat #=> true
1423 * This can also be used to create symbols that cannot be represented using the
1424 * <code>:xxx</code> notation.
1426 * 'cat and dog'.to_sym #=> :"cat and dog"
1429 mrb_str_intern(mrb_state
*mrb
, mrb_value self
)
1433 id
= mrb_intern_str(mrb
, self
);
1434 return mrb_symbol_value(id
);
1437 /* ---------------------------------- */
1439 mrb_obj_as_string(mrb_state
*mrb
, mrb_value obj
)
1443 if (mrb_string_p(obj
)) {
1446 str
= mrb_funcall(mrb
, obj
, "to_s", 0);
1447 if (!mrb_string_p(str
))
1448 return mrb_any_to_s(mrb
, obj
);
1453 mrb_ptr_to_str(mrb_state
*mrb
, void *p
)
1455 struct RString
*p_str
;
1458 uintptr_t n
= (uintptr_t)p
;
1460 p_str
= str_new(mrb
, NULL
, 2 + sizeof(uintptr_t) * CHAR_BIT
/ 4);
1467 *p2
++ = mrb_digitmap
[n
% 16];
1471 p_str
->len
= (mrb_int
)(p2
- p_str
->ptr
);
1479 return mrb_obj_value(p_str
);
1483 mrb_string_type(mrb_state
*mrb
, mrb_value str
)
1485 return mrb_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
1489 mrb_check_string_type(mrb_state
*mrb
, mrb_value str
)
1491 return mrb_check_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
1494 /* ---------------------------------- */
1498 * str.reverse => new_str
1500 * Returns a new string with the characters from <i>str</i> in reverse order.
1502 * "stressed".reverse #=> "desserts"
1505 mrb_str_reverse(mrb_state
*mrb
, mrb_value str
)
1510 if (RSTRING(str
)->len
<= 1) return mrb_str_dup(mrb
, str
);
1512 s2
= str_new(mrb
, 0, RSTRING(str
)->len
);
1513 str_with_class(mrb
, s2
, str
);
1514 s
= RSTRING_PTR(str
); e
= RSTRING_END(str
) - 1;
1520 return mrb_obj_value(s2
);
1526 * str.reverse! => str
1528 * Reverses <i>str</i> in place.
1531 mrb_str_reverse_bang(mrb_state
*mrb
, mrb_value str
)
1533 struct RString
*s
= mrb_str_ptr(str
);
1537 mrb_str_modify(mrb
, s
);
1552 * str.rindex(substring [, fixnum]) => fixnum or nil
1553 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1554 * str.rindex(regexp [, fixnum]) => fixnum or nil
1556 * Returns the index of the last occurrence of the given <i>substring</i>,
1557 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1558 * <code>nil</code> if not found. If the second parameter is present, it
1559 * specifies the position in the string to end the search---characters beyond
1560 * this point will not be considered.
1562 * "hello".rindex('e') #=> 1
1563 * "hello".rindex('l') #=> 3
1564 * "hello".rindex('a') #=> nil
1565 * "hello".rindex(101) #=> 1
1566 * "hello".rindex(/[aeiou]/, -2) #=> 1
1569 mrb_str_rindex(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int pos
)
1572 struct RString
*ps
= mrb_str_ptr(str
);
1573 struct RString
*psub
= mrb_str_ptr(sub
);
1574 mrb_int len
= psub
->len
;
1576 /* substring longer than string */
1577 if (ps
->len
< len
) return -1;
1578 if (ps
->len
- pos
< len
) {
1579 pos
= ps
->len
- len
;
1586 if (memcmp(s
, t
, len
) == 0) {
1601 * str.rindex(substring [, fixnum]) => fixnum or nil
1602 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1603 * str.rindex(regexp [, fixnum]) => fixnum or nil
1605 * Returns the index of the last occurrence of the given <i>substring</i>,
1606 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1607 * <code>nil</code> if not found. If the second parameter is present, it
1608 * specifies the position in the string to end the search---characters beyond
1609 * this point will not be considered.
1611 * "hello".rindex('e') #=> 1
1612 * "hello".rindex('l') #=> 3
1613 * "hello".rindex('a') #=> nil
1614 * "hello".rindex(101) #=> 1
1615 * "hello".rindex(/[aeiou]/, -2) #=> 1
1618 mrb_str_rindex_m(mrb_state
*mrb
, mrb_value str
)
1624 int pos
, len
= RSTRING_LEN(str
);
1626 mrb_get_args(mrb
, "*", &argv
, &argc
);
1630 pos
= mrb_fixnum(vpos
);
1634 regexp_check(mrb
, sub
);
1635 return mrb_nil_value();
1638 if (pos
> len
) pos
= len
;
1645 sub
= mrb_nil_value();
1647 regexp_check(mrb
, sub
);
1649 switch (mrb_type(sub
)) {
1650 case MRB_TT_FIXNUM
: {
1651 int c
= mrb_fixnum(sub
);
1652 mrb_int len
= RSTRING_LEN(str
);
1653 unsigned char *p
= (unsigned char*)RSTRING_PTR(str
);
1655 for (pos
=len
;pos
>=0;pos
--) {
1656 if (p
[pos
] == c
) return mrb_fixnum_value(pos
);
1658 return mrb_nil_value();
1664 tmp
= mrb_check_string_type(mrb
, sub
);
1665 if (mrb_nil_p(tmp
)) {
1666 mrb_raisef(mrb
, E_TYPE_ERROR
, "type mismatch: %S given", sub
);
1672 pos
= mrb_str_rindex(mrb
, str
, sub
, pos
);
1673 if (pos
>= 0) return mrb_fixnum_value(pos
);
1676 } /* end of switch (TYPE(sub)) */
1677 return mrb_nil_value();
1680 static const char isspacetable
[256] = {
1681 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1683 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1699 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1705 * str.split(pattern=$;, [limit]) => anArray
1707 * Divides <i>str</i> into substrings based on a delimiter, returning an array
1708 * of these substrings.
1710 * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1711 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1712 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1713 * of contiguous whitespace characters ignored.
1715 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1716 * pattern matches. Whenever the pattern matches a zero-length string,
1717 * <i>str</i> is split into individual characters.
1719 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1720 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1721 * split on whitespace as if ` ' were specified.
1723 * If the <i>limit</i> parameter is omitted, trailing null fields are
1724 * suppressed. If <i>limit</i> is a positive number, at most that number of
1725 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1726 * string is returned as the only entry in an array). If negative, there is no
1727 * limit to the number of fields returned, and trailing null fields are not
1730 * " now's the time".split #=> ["now's", "the", "time"]
1731 * " now's the time".split(' ') #=> ["now's", "the", "time"]
1732 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1733 * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
1734 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1735 * "hello".split(//, 3) #=> ["h", "e", "llo"]
1736 * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
1738 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1739 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1740 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1741 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1745 mrb_str_split_m(mrb_state
*mrb
, mrb_value str
)
1748 mrb_value spat
= mrb_nil_value();
1749 enum {awk
, string
, regexp
} split_type
= string
;
1754 mrb_value result
, tmp
;
1756 argc
= mrb_get_args(mrb
, "|oi", &spat
, &lim
);
1757 lim_p
= (lim
> 0 && argc
== 2);
1760 if (RSTRING_LEN(str
) == 0)
1761 return mrb_ary_new_capa(mrb
, 0);
1762 return mrb_ary_new_from_values(mrb
, 1, &str
);
1767 if (argc
== 0 || mrb_nil_p(spat
)) {
1771 if (mrb_string_p(spat
)) {
1772 split_type
= string
;
1773 if (RSTRING_LEN(spat
) == 1 && RSTRING_PTR(spat
)[0] == ' '){
1782 result
= mrb_ary_new(mrb
);
1784 if (split_type
== awk
) {
1785 char *ptr
= RSTRING_PTR(str
);
1786 char *eptr
= RSTRING_END(str
);
1792 while (ptr
< eptr
) {
1793 int ai
= mrb_gc_arena_save(mrb
);
1794 c
= (unsigned char)*ptr
++;
1796 if (ascii_isspace(c
)) {
1802 if (lim_p
&& lim
<= i
) break;
1805 else if (ascii_isspace(c
)) {
1806 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, beg
, end
-beg
));
1807 mrb_gc_arena_restore(mrb
, ai
);
1817 else if (split_type
== string
) {
1818 char *ptr
= RSTRING_PTR(str
);
1820 char *eptr
= RSTRING_END(str
);
1821 mrb_int slen
= RSTRING_LEN(spat
);
1824 int ai
= mrb_gc_arena_save(mrb
);
1825 while (ptr
< eptr
) {
1826 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, ptr
-temp
, 1));
1827 mrb_gc_arena_restore(mrb
, ai
);
1829 if (lim_p
&& lim
<= ++i
) break;
1833 char *sptr
= RSTRING_PTR(spat
);
1834 int ai
= mrb_gc_arena_save(mrb
);
1836 while (ptr
< eptr
&&
1837 (end
= mrb_memsearch(sptr
, slen
, ptr
, eptr
- ptr
)) >= 0) {
1838 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, ptr
- temp
, end
));
1839 mrb_gc_arena_restore(mrb
, ai
);
1841 if (lim_p
&& lim
<= ++i
) break;
1849 if (RSTRING_LEN(str
) > 0 && (lim_p
|| RSTRING_LEN(str
) > beg
|| lim
< 0)) {
1850 if (RSTRING_LEN(str
) == beg
) {
1851 tmp
= mrb_str_new_empty(mrb
, str
);
1854 tmp
= mrb_str_subseq(mrb
, str
, beg
, RSTRING_LEN(str
)-beg
);
1856 mrb_ary_push(mrb
, result
, tmp
);
1858 if (!lim_p
&& lim
== 0) {
1860 while ((len
= RARRAY_LEN(result
)) > 0 &&
1861 (tmp
= RARRAY_PTR(result
)[len
-1], RSTRING_LEN(tmp
) == 0))
1862 mrb_ary_pop(mrb
, result
);
1869 mrb_cstr_to_inum(mrb_state
*mrb
, const char *str
, int base
, int badcheck
)
1878 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1879 #define conv_digit(c) \
1880 (!ISASCII(c) ? -1 : \
1881 isdigit(c) ? ((c) - '0') : \
1882 islower(c) ? ((c) - 'a' + 10) : \
1883 isupper(c) ? ((c) - 'A' + 10) : \
1887 if (badcheck
) goto bad
;
1888 return mrb_fixnum_value(0);
1890 while (ISSPACE(*str
)) str
++;
1892 if (str
[0] == '+') {
1895 else if (str
[0] == '-') {
1899 if (str
[0] == '+' || str
[0] == '-') {
1900 if (badcheck
) goto bad
;
1901 return mrb_fixnum_value(0);
1904 if (str
[0] == '0') {
1922 else if (base
< -1) {
1931 if (str
[0] == '0' && (str
[1] == 'b'||str
[1] == 'B')) {
1938 if (str
[0] == '0' && (str
[1] == 'o'||str
[1] == 'O')) {
1941 case 4: case 5: case 6: case 7:
1944 if (str
[0] == '0' && (str
[1] == 'd'||str
[1] == 'D')) {
1947 case 9: case 11: case 12: case 13: case 14: case 15:
1950 if (str
[0] == '0' && (str
[1] == 'x'||str
[1] == 'X')) {
1955 if (base
< 2 || 36 < base
) {
1956 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %S", mrb_fixnum_value(base
));
1959 } /* end of switch (base) { */
1960 if (*str
== '0') { /* squeeze preceeding 0s */
1962 while ((c
= *++str
) == '0' || c
== '_') {
1970 if (!(c
= *str
) || ISSPACE(c
)) --str
;
1974 if (c
< 0 || c
>= base
) {
1975 if (badcheck
) goto bad
;
1976 return mrb_fixnum_value(0);
1979 n
= strtoul((char*)str
, &end
, base
);
1980 if (n
> MRB_INT_MAX
) {
1981 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "string (%S) too big for integer", mrb_str_new_cstr(mrb
, str
));
1985 if (end
== str
) goto bad
; /* no number */
1986 while (*end
&& ISSPACE(*end
)) end
++;
1987 if (*end
) goto bad
; /* trailing garbage */
1990 return mrb_fixnum_value(sign
? val
: -val
);
1992 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for number(%S)", mrb_str_new_cstr(mrb
, str
));
1994 return mrb_fixnum_value(0);
1998 mrb_string_value_cstr(mrb_state
*mrb
, mrb_value
*ptr
)
2000 struct RString
*ps
= mrb_str_ptr(*ptr
);
2003 if (!s
|| ps
->len
!= strlen(s
)) {
2004 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
2010 mrb_str_to_inum(mrb_state
*mrb
, mrb_value str
, int base
, int badcheck
)
2015 str
= mrb_str_to_str(mrb
, str
);
2017 s
= mrb_string_value_cstr(mrb
, &str
);
2020 s
= RSTRING_PTR(str
);
2023 len
= RSTRING_LEN(str
);
2024 if (s
[len
]) { /* no sentinel somehow */
2025 struct RString
*temp_str
= str_new(mrb
, s
, len
);
2029 return mrb_cstr_to_inum(mrb
, s
, base
, badcheck
);
2035 * str.to_i(base=10) => integer
2037 * Returns the result of interpreting leading characters in <i>str</i> as an
2038 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2039 * end of a valid number are ignored. If there is not a valid number at the
2040 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2043 * "12345".to_i #=> 12345
2044 * "99 red balloons".to_i #=> 99
2046 * "0a".to_i(16) #=> 10
2047 * "hello".to_i #=> 0
2048 * "1100101".to_i(2) #=> 101
2049 * "1100101".to_i(8) #=> 294977
2050 * "1100101".to_i(10) #=> 1100101
2051 * "1100101".to_i(16) #=> 17826049
2054 mrb_str_to_i(mrb_state
*mrb
, mrb_value self
)
2060 mrb_get_args(mrb
, "*", &argv
, &argc
);
2064 base
= mrb_fixnum(argv
[0]);
2067 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %S", mrb_fixnum_value(base
));
2069 return mrb_str_to_inum(mrb
, self
, base
, 0/*Qfalse*/);
2073 mrb_cstr_to_dbl(mrb_state
*mrb
, const char * p
, int badcheck
)
2077 #if !defined(DBL_DIG)
2081 enum {max_width
= 20};
2082 #define OutOfRange() (((w = end - p) > max_width) ? \
2083 (w = max_width, ellipsis = "...") : \
2084 (w = (int)(end - p), ellipsis = ""))
2087 while (ISSPACE(*p
)) p
++;
2089 if (!badcheck
&& p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2092 d
= strtod(p
, &end
);
2096 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for float(%S)", mrb_str_new_cstr(mrb
, p
));
2102 char buf
[DBL_DIG
* 4 + 10];
2104 char *e
= buf
+ sizeof(buf
) - 1;
2107 while (p
< end
&& n
< e
) prev
= *n
++ = *p
++;
2110 /* remove underscores between digits */
2112 if (n
== buf
|| !ISDIGIT(prev
)) goto bad
;
2114 if (!ISDIGIT(*p
)) goto bad
;
2117 while (*++p
== '_');
2122 if (n
< e
) *n
++ = prev
;
2127 if (!badcheck
&& p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2131 d
= strtod(p
, &end
);
2133 if (!end
|| p
== end
) goto bad
;
2134 while (*end
&& ISSPACE(*end
)) end
++;
2142 mrb_str_to_dbl(mrb_state
*mrb
, mrb_value str
, int badcheck
)
2147 str
= mrb_str_to_str(mrb
, str
);
2148 s
= RSTRING_PTR(str
);
2149 len
= RSTRING_LEN(str
);
2151 if (badcheck
&& memchr(s
, '\0', len
)) {
2152 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string for Float contains null byte");
2154 if (s
[len
]) { /* no sentinel somehow */
2155 struct RString
*temp_str
= str_new(mrb
, s
, len
);
2159 return mrb_cstr_to_dbl(mrb
, s
, badcheck
);
2167 * Returns the result of interpreting leading characters in <i>str</i> as a
2168 * floating point number. Extraneous characters past the end of a valid number
2169 * are ignored. If there is not a valid number at the start of <i>str</i>,
2170 * <code>0.0</code> is returned. This method never raises an exception.
2172 * "123.45e1".to_f #=> 1234.5
2173 * "45.67 degrees".to_f #=> 45.67
2174 * "thx1138".to_f #=> 0.0
2177 mrb_str_to_f(mrb_state
*mrb
, mrb_value self
)
2179 return mrb_float_value(mrb
, mrb_str_to_dbl(mrb
, self
, 0/*Qfalse*/));
2188 * Returns the receiver.
2191 mrb_str_to_s(mrb_state
*mrb
, mrb_value self
)
2193 if (mrb_obj_class(mrb
, self
) != mrb
->string_class
) {
2194 return mrb_str_dup(mrb
, self
);
2202 * str.upcase! => str or nil
2204 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2208 mrb_str_upcase_bang(mrb_state
*mrb
, mrb_value str
)
2210 struct RString
*s
= mrb_str_ptr(str
);
2214 mrb_str_modify(mrb
, s
);
2215 p
= RSTRING_PTR(str
);
2216 pend
= RSTRING_END(str
);
2225 if (modify
) return str
;
2226 return mrb_nil_value();
2232 * str.upcase => new_str
2234 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2235 * uppercase counterparts. The operation is locale insensitive---only
2236 * characters ``a'' to ``z'' are affected.
2238 * "hEllO".upcase #=> "HELLO"
2241 mrb_str_upcase(mrb_state
*mrb
, mrb_value self
)
2245 str
= mrb_str_dup(mrb
, self
);
2246 mrb_str_upcase_bang(mrb
, str
);
2252 * str.dump -> new_str
2254 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2255 * <code>\nnn</code> notation and all special characters escaped.
2258 mrb_str_dump(mrb_state
*mrb
, mrb_value str
)
2261 const char *p
, *pend
;
2263 struct RString
*result
;
2266 p
= RSTRING_PTR(str
); pend
= p
+ RSTRING_LEN(str
);
2268 unsigned char c
= *p
++;
2270 case '"': case '\\':
2271 case '\n': case '\r':
2272 case '\t': case '\f':
2273 case '\013': case '\010': case '\007': case '\033':
2278 len
+= IS_EVSTR(p
, pend
) ? 2 : 1;
2286 len
+= 4; /* \NNN */
2292 result
= str_new(mrb
, 0, len
);
2293 str_with_class(mrb
, result
, str
);
2294 p
= RSTRING_PTR(str
); pend
= p
+ RSTRING_LEN(str
);
2299 unsigned char c
= *p
++;
2349 if (IS_EVSTR(p
, pend
)) *q
++ = '\\';
2359 q
[2] = '0' + c
% 8; c
/= 8;
2360 q
[1] = '0' + c
% 8; c
/= 8;
2367 return mrb_obj_value(result
);
2371 mrb_str_cat(mrb_state
*mrb
, mrb_value str
, const char *ptr
, size_t len
)
2373 if ((mrb_int
)len
< 0) {
2374 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative string size (or size too big)");
2376 str_buf_cat(mrb
, mrb_str_ptr(str
), ptr
, len
);
2381 mrb_str_cat_cstr(mrb_state
*mrb
, mrb_value str
, const char *ptr
)
2383 return mrb_str_cat(mrb
, str
, ptr
, strlen(ptr
));
2387 mrb_str_append(mrb_state
*mrb
, mrb_value str
, mrb_value str2
)
2389 str2
= mrb_str_to_str(mrb
, str2
);
2390 return mrb_str_buf_append(mrb
, str
, str2
);
2393 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2397 * str.inspect -> string
2399 * Returns a printable version of _str_, surrounded by quote marks,
2400 * with special characters escaped.
2404 * str.inspect #=> "\"hel\\bo\""
2407 mrb_str_inspect(mrb_state
*mrb
, mrb_value str
)
2409 const char *p
, *pend
;
2410 char buf
[CHAR_ESC_LEN
+ 1];
2411 mrb_value result
= mrb_str_new(mrb
, "\"", 1);
2413 p
= RSTRING_PTR(str
); pend
= RSTRING_END(str
);
2414 for (;p
< pend
; p
++) {
2418 if (c
== '"'|| c
== '\\' || (c
== '#' && IS_EVSTR(p
, pend
))) {
2419 buf
[0] = '\\'; buf
[1] = c
;
2420 mrb_str_buf_cat(mrb
, result
, buf
, 2);
2425 mrb_str_buf_cat(mrb
, result
, buf
, 1);
2429 case '\n': cc
= 'n'; break;
2430 case '\r': cc
= 'r'; break;
2431 case '\t': cc
= 't'; break;
2432 case '\f': cc
= 'f'; break;
2433 case '\013': cc
= 'v'; break;
2434 case '\010': cc
= 'b'; break;
2435 case '\007': cc
= 'a'; break;
2436 case 033: cc
= 'e'; break;
2437 default: cc
= 0; break;
2442 mrb_str_buf_cat(mrb
, result
, buf
, 2);
2447 buf
[3] = '0' + c
% 8; c
/= 8;
2448 buf
[2] = '0' + c
% 8; c
/= 8;
2449 buf
[1] = '0' + c
% 8;
2450 mrb_str_buf_cat(mrb
, result
, buf
, 4);
2454 mrb_str_buf_cat(mrb
, result
, "\"", 1);
2461 * str.bytes -> array of fixnums
2463 * Returns an array of bytes in _str_.
2466 * str.bytes #=> [104, 101, 108, 108, 111]
2469 mrb_str_bytes(mrb_state
*mrb
, mrb_value str
)
2471 struct RString
*s
= mrb_str_ptr(str
);
2472 mrb_value a
= mrb_ary_new_capa(mrb
, s
->len
);
2473 unsigned char *p
= (unsigned char *)(s
->ptr
), *pend
= p
+ s
->len
;
2476 mrb_ary_push(mrb
, a
, mrb_fixnum_value(p
[0]));
2482 /* ---------------------------*/
2484 mrb_init_string(mrb_state
*mrb
)
2488 s
= mrb
->string_class
= mrb_define_class(mrb
, "String", mrb
->object_class
);
2489 MRB_SET_INSTANCE_TT(s
, MRB_TT_STRING
);
2490 mrb_include_module(mrb
, s
, mrb_class_get(mrb
, "Comparable"));
2493 mrb_define_method(mrb
, s
, "bytesize", mrb_str_bytesize
, MRB_ARGS_NONE());
2495 mrb_define_method(mrb
, s
, "<=>", mrb_str_cmp_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2496 mrb_define_method(mrb
, s
, "==", mrb_str_equal_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2497 mrb_define_method(mrb
, s
, "+", mrb_str_plus_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2498 mrb_define_method(mrb
, s
, "*", mrb_str_times
, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2499 mrb_define_method(mrb
, s
, "[]", mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2500 mrb_define_method(mrb
, s
, "capitalize", mrb_str_capitalize
, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2501 mrb_define_method(mrb
, s
, "capitalize!", mrb_str_capitalize_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
2502 mrb_define_method(mrb
, s
, "chomp", mrb_str_chomp
, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2503 mrb_define_method(mrb
, s
, "chomp!", mrb_str_chomp_bang
, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2504 mrb_define_method(mrb
, s
, "chop", mrb_str_chop
, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
2505 mrb_define_method(mrb
, s
, "chop!", mrb_str_chop_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
2506 mrb_define_method(mrb
, s
, "downcase", mrb_str_downcase
, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2507 mrb_define_method(mrb
, s
, "downcase!", mrb_str_downcase_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2508 mrb_define_method(mrb
, s
, "empty?", mrb_str_empty_p
, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2509 mrb_define_method(mrb
, s
, "eql?", mrb_str_eql
, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2511 mrb_define_method(mrb
, s
, "hash", mrb_str_hash_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
2512 mrb_define_method(mrb
, s
, "include?", mrb_str_include
, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2513 mrb_define_method(mrb
, s
, "index", mrb_str_index_m
, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2514 mrb_define_method(mrb
, s
, "initialize", mrb_str_init
, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2515 mrb_define_method(mrb
, s
, "initialize_copy", mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2516 mrb_define_method(mrb
, s
, "intern", mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2517 mrb_define_method(mrb
, s
, "length", mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2518 mrb_define_method(mrb
, s
, "replace", mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2519 mrb_define_method(mrb
, s
, "reverse", mrb_str_reverse
, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2520 mrb_define_method(mrb
, s
, "reverse!", mrb_str_reverse_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2521 mrb_define_method(mrb
, s
, "rindex", mrb_str_rindex_m
, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2522 mrb_define_method(mrb
, s
, "size", mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2523 mrb_define_method(mrb
, s
, "slice", mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2524 mrb_define_method(mrb
, s
, "split", mrb_str_split_m
, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2526 mrb_define_method(mrb
, s
, "to_f", mrb_str_to_f
, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2527 mrb_define_method(mrb
, s
, "to_i", mrb_str_to_i
, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2528 mrb_define_method(mrb
, s
, "to_s", mrb_str_to_s
, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2529 mrb_define_method(mrb
, s
, "to_str", mrb_str_to_s
, MRB_ARGS_NONE());
2530 mrb_define_method(mrb
, s
, "to_sym", mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2531 mrb_define_method(mrb
, s
, "upcase", mrb_str_upcase
, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
2532 mrb_define_method(mrb
, s
, "upcase!", mrb_str_upcase_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
2533 mrb_define_method(mrb
, s
, "inspect", mrb_str_inspect
, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2534 mrb_define_method(mrb
, s
, "bytes", mrb_str_bytes
, MRB_ARGS_NONE());