2 ** string.c - String class
4 ** See Copyright Notice in mruby.h
9 /* Some versions of VC++
10 * has SIZE_MAX in stdint.h
18 #include "mruby/array.h"
19 #include "mruby/class.h"
20 #include "mruby/range.h"
21 #include "mruby/string.h"
24 const char mrb_digitmap
[] = "0123456789abcdefghijklmnopqrstuvwxyz";
26 typedef struct mrb_shared_string
{
33 #define MRB_STR_SHARED 1
34 #define MRB_STR_NOFREE 2
36 static mrb_value
str_replace(mrb_state
*mrb
, struct RString
*s1
, struct RString
*s2
);
37 static mrb_value
mrb_str_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
);
39 #define RESIZE_CAPA(s,capacity) do {\
40 s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
41 s->aux.capa = capacity;\
45 str_decref(mrb_state
*mrb
, mrb_shared_string
*shared
)
48 if (shared
->refcnt
== 0) {
49 if (!shared
->nofree
) {
50 mrb_free(mrb
, shared
->ptr
);
52 mrb_free(mrb
, shared
);
57 mrb_str_modify(mrb_state
*mrb
, struct RString
*s
)
59 if (s
->flags
& MRB_STR_SHARED
) {
60 mrb_shared_string
*shared
= s
->aux
.shared
;
62 if (shared
->refcnt
== 1 && s
->ptr
== shared
->ptr
) {
64 s
->aux
.capa
= shared
->len
;
65 s
->ptr
[s
->len
] = '\0';
66 mrb_free(mrb
, shared
);
74 ptr
= (char *)mrb_malloc(mrb
, (size_t)len
+ 1);
81 str_decref(mrb
, shared
);
83 s
->flags
&= ~MRB_STR_SHARED
;
86 if (s
->flags
& MRB_STR_NOFREE
) {
89 s
->ptr
= (char *)mrb_malloc(mrb
, (size_t)s
->len
+1);
91 memcpy(s
->ptr
, p
, s
->len
);
93 s
->ptr
[s
->len
] = '\0';
100 mrb_str_resize(mrb_state
*mrb
, mrb_value str
, mrb_int len
)
103 struct RString
*s
= mrb_str_ptr(str
);
105 mrb_str_modify(mrb
, s
);
108 if (slen
< len
|| slen
- len
> 256) {
112 s
->ptr
[len
] = '\0'; /* sentinel */
118 str_mod_check(mrb_state
*mrb
, mrb_value str
, char *p
, mrb_int len
)
120 struct RString
*s
= mrb_str_ptr(str
);
122 if (s
->ptr
!= p
|| s
->len
!= len
) {
123 mrb_raise(mrb
, E_RUNTIME_ERROR
, "string modified");
127 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
129 /* char offset to byte offset */
131 mrb_str_offset(mrb_state
*mrb
, mrb_value str
, int pos
)
136 static struct RString
*
137 str_new(mrb_state
*mrb
, const char *p
, mrb_int len
)
141 s
= mrb_obj_alloc_string(mrb
);
144 s
->ptr
= (char *)mrb_malloc(mrb
, (size_t)len
+1);
146 memcpy(s
->ptr
, p
, len
);
153 str_with_class(mrb_state
*mrb
, struct RString
*s
, mrb_value obj
)
155 s
->c
= mrb_str_ptr(obj
)->c
;
159 mrb_str_new_empty(mrb_state
*mrb
, mrb_value str
)
161 struct RString
*s
= str_new(mrb
, 0, 0);
163 str_with_class(mrb
, s
, str
);
164 return mrb_obj_value(s
);
167 #ifndef MRB_STR_BUF_MIN_SIZE
168 # define MRB_STR_BUF_MIN_SIZE 128
172 mrb_str_buf_new(mrb_state
*mrb
, mrb_int capa
)
176 s
= mrb_obj_alloc_string(mrb
);
178 if (capa
< MRB_STR_BUF_MIN_SIZE
) {
179 capa
= MRB_STR_BUF_MIN_SIZE
;
183 s
->ptr
= (char *)mrb_malloc(mrb
, capa
+1);
186 return mrb_obj_value(s
);
190 str_buf_cat(mrb_state
*mrb
, struct RString
*s
, const char *ptr
, size_t len
)
196 mrb_str_modify(mrb
, s
);
197 if (ptr
>= s
->ptr
&& ptr
<= s
->ptr
+ s
->len
) {
200 if (len
== 0) return;
202 if (s
->len
>= MRB_INT_MAX
- (mrb_int
)len
) {
203 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string sizes too big");
207 while (total
> capa
) {
208 if (capa
+ 1 >= MRB_INT_MAX
/ 2) {
209 capa
= (total
+ 4095) / 4096;
212 capa
= (capa
+ 1) * 2;
214 RESIZE_CAPA(s
, capa
);
219 memcpy(s
->ptr
+ s
->len
, ptr
, len
);
221 s
->ptr
[total
] = '\0'; /* sentinel */
225 mrb_str_buf_cat(mrb_state
*mrb
, mrb_value str
, const char *ptr
, size_t len
)
227 if (len
== 0) return str
;
228 str_buf_cat(mrb
, mrb_str_ptr(str
), ptr
, len
);
233 mrb_str_new(mrb_state
*mrb
, const char *p
, size_t len
)
237 s
= str_new(mrb
, p
, len
);
238 return mrb_obj_value(s
);
242 * call-seq: (Caution! NULL string)
243 * String.new(str="") => new_str
245 * Returns a new string object containing a copy of <i>str</i>.
249 mrb_str_new_cstr(mrb_state
*mrb
, const char *p
)
256 if ((mrb_int
)len
< 0) {
257 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "argument too big");
264 s
= str_new(mrb
, p
, len
);
266 return mrb_obj_value(s
);
270 mrb_str_new_static(mrb_state
*mrb
, const char *p
, size_t len
)
274 s
= mrb_obj_alloc_string(mrb
);
276 s
->aux
.capa
= 0; /* nofree */
278 s
->flags
= MRB_STR_NOFREE
;
279 return mrb_obj_value(s
);
283 mrb_gc_free_str(mrb_state
*mrb
, struct RString
*str
)
285 if (str
->flags
& MRB_STR_SHARED
)
286 str_decref(mrb
, str
->aux
.shared
);
287 else if ((str
->flags
& MRB_STR_NOFREE
) == 0)
288 mrb_free(mrb
, str
->ptr
);
292 mrb_str_to_cstr(mrb_state
*mrb
, mrb_value str0
)
296 if (!mrb_string_p(str0
)) {
297 mrb_raise(mrb
, E_TYPE_ERROR
, "expected String");
300 s
= str_new(mrb
, RSTRING_PTR(str0
), RSTRING_LEN(str0
));
301 if ((strlen(s
->ptr
) ^ s
->len
) != 0) {
302 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
308 str_make_shared(mrb_state
*mrb
, struct RString
*s
)
310 if (!(s
->flags
& MRB_STR_SHARED
)) {
311 mrb_shared_string
*shared
= (mrb_shared_string
*)mrb_malloc(mrb
, sizeof(mrb_shared_string
));
314 if (s
->flags
& MRB_STR_NOFREE
) {
315 shared
->nofree
= TRUE
;
316 shared
->ptr
= s
->ptr
;
317 s
->flags
&= ~MRB_STR_NOFREE
;
320 shared
->nofree
= FALSE
;
321 if (s
->aux
.capa
> s
->len
) {
322 s
->ptr
= shared
->ptr
= (char *)mrb_realloc(mrb
, s
->ptr
, s
->len
+1);
325 shared
->ptr
= s
->ptr
;
328 shared
->len
= s
->len
;
329 s
->aux
.shared
= shared
;
330 s
->flags
|= MRB_STR_SHARED
;
336 * char* str = String("abcd"), len=strlen("abcd")
338 * Returns a new string object containing a copy of <i>str</i>.
341 mrb_str_body(mrb_value str
, int *len_p
)
343 struct RString
*s
= mrb_str_ptr(str
);
350 * call-seq: (Caution! String("abcd") change)
351 * String("abcdefg") = String("abcd") + String("efg")
353 * Returns a new string object containing a copy of <i>str</i>.
356 mrb_str_concat(mrb_state
*mrb
, mrb_value self
, mrb_value other
)
358 struct RString
*s1
= mrb_str_ptr(self
), *s2
;
361 mrb_str_modify(mrb
, s1
);
362 if (!mrb_string_p(other
)) {
363 other
= mrb_str_to_str(mrb
, other
);
365 s2
= mrb_str_ptr(other
);
366 len
= s1
->len
+ s2
->len
;
368 if (s1
->aux
.capa
< len
) {
370 s1
->ptr
= (char *)mrb_realloc(mrb
, s1
->ptr
, len
+1);
372 memcpy(s1
->ptr
+s1
->len
, s2
->ptr
, s2
->len
);
378 * call-seq: (Caution! String("abcd") remain)
379 * String("abcdefg") = String("abcd") + String("efg")
381 * Returns a new string object containing a copy of <i>str</i>.
384 mrb_str_plus(mrb_state
*mrb
, mrb_value a
, mrb_value b
)
386 struct RString
*s
= mrb_str_ptr(a
);
387 struct RString
*s2
= mrb_str_ptr(b
);
390 t
= str_new(mrb
, 0, s
->len
+ s2
->len
);
391 memcpy(t
->ptr
, s
->ptr
, s
->len
);
392 memcpy(t
->ptr
+ s
->len
, s2
->ptr
, s2
->len
);
394 return mrb_obj_value(t
);
400 * call-seq: (Caution! String("abcd") remain) for stack_argument
401 * String("abcdefg") = String("abcd") + String("efg")
403 * Returns a new string object containing a copy of <i>str</i>.
406 mrb_str_plus_m(mrb_state
*mrb
, mrb_value self
)
410 mrb_get_args(mrb
, "S", &str
);
411 return mrb_str_plus(mrb
, self
, str
);
416 * len = strlen(String("abcd"))
418 * Returns a new string object containing a copy of <i>str</i>.
421 mrb_str_bytesize(mrb_state
*mrb
, mrb_value self
)
423 struct RString
*s
= mrb_str_ptr(self
);
424 return mrb_fixnum_value(s
->len
);
431 * len = strlen(String("abcd"))
433 * Returns a new string object containing a copy of <i>str</i>.
436 mrb_str_size(mrb_state
*mrb
, mrb_value self
)
438 struct RString
*s
= mrb_str_ptr(self
);
439 return mrb_fixnum_value(s
->len
);
445 * str * integer => new_str
447 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
450 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
453 mrb_str_times(mrb_state
*mrb
, mrb_value self
)
456 struct RString
*str2
;
459 mrb_get_args(mrb
, "i", ×
);
461 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative argument");
463 if (times
&& MRB_INT_MAX
/ times
< RSTRING_LEN(self
)) {
464 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "argument too big");
467 len
= RSTRING_LEN(self
)*times
;
468 str2
= str_new(mrb
, 0, len
);
469 str_with_class(mrb
, str2
, self
);
472 n
= RSTRING_LEN(self
);
473 memcpy(p
, RSTRING_PTR(self
), n
);
478 memcpy(p
+ n
, p
, len
-n
);
482 return mrb_obj_value(str2
);
484 /* -------------------------------------------------------------- */
486 #define lesser(a,b) (((a)>(b))?(b):(a))
488 /* ---------------------------*/
491 * mrb_value str1 <=> mrb_value str2 => int
497 mrb_str_cmp(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
501 struct RString
*s1
= mrb_str_ptr(str1
);
502 struct RString
*s2
= mrb_str_ptr(str2
);
504 len
= lesser(s1
->len
, s2
->len
);
505 retval
= memcmp(s1
->ptr
, s2
->ptr
, len
);
507 if (s1
->len
== s2
->len
) return 0;
508 if (s1
->len
> s2
->len
) return 1;
511 if (retval
> 0) return 1;
519 * str <=> other_str => -1, 0, +1
521 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
522 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
523 * <i>str</i>. If the strings are of different lengths, and the strings are
524 * equal when compared up to the shortest length, then the longer string is
525 * considered greater than the shorter one. If the variable <code>$=</code> is
526 * <code>false</code>, the comparison is based on comparing the binary values
527 * of each character in the string. In older versions of Ruby, setting
528 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
529 * in favor of using <code>String#casecmp</code>.
531 * <code><=></code> is the basis for the methods <code><</code>,
532 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
533 * included from module <code>Comparable</code>. The method
534 * <code>String#==</code> does not use <code>Comparable#==</code>.
536 * "abcdef" <=> "abcde" #=> 1
537 * "abcdef" <=> "abcdef" #=> 0
538 * "abcdef" <=> "abcdefg" #=> -1
539 * "abcdef" <=> "ABCDEF" #=> 1
542 mrb_str_cmp_m(mrb_state
*mrb
, mrb_value str1
)
547 mrb_get_args(mrb
, "o", &str2
);
548 if (!mrb_string_p(str2
)) {
549 if (!mrb_respond_to(mrb
, str2
, mrb_intern(mrb
, "to_s", 4))) {
550 return mrb_nil_value();
552 else if (!mrb_respond_to(mrb
, str2
, mrb_intern(mrb
, "<=>", 3))) {
553 return mrb_nil_value();
556 mrb_value tmp
= mrb_funcall(mrb
, str2
, "<=>", 1, str1
);
558 if (mrb_nil_p(tmp
)) return mrb_nil_value();
559 if (!mrb_fixnum(tmp
)) {
560 return mrb_funcall(mrb
, mrb_fixnum_value(0), "-", 1, tmp
);
562 result
= -mrb_fixnum(tmp
);
566 result
= mrb_str_cmp(mrb
, str1
, str2
);
568 return mrb_fixnum_value(result
);
572 str_eql(mrb_state
*mrb
, const mrb_value str1
, const mrb_value str2
)
574 const mrb_int len
= RSTRING_LEN(str1
);
576 if (len
!= RSTRING_LEN(str2
)) return FALSE
;
577 if (memcmp(RSTRING_PTR(str1
), RSTRING_PTR(str2
), (size_t)len
) == 0)
583 mrb_str_equal(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
585 if (mrb_obj_equal(mrb
, str1
, str2
)) return TRUE
;
586 if (!mrb_string_p(str2
)) {
587 if (mrb_nil_p(str2
)) return FALSE
;
588 if (!mrb_respond_to(mrb
, str2
, mrb_intern(mrb
, "to_str", 6))) {
591 str2
= mrb_funcall(mrb
, str2
, "to_str", 0);
592 return mrb_equal(mrb
, str2
, str1
);
594 return str_eql(mrb
, str1
, str2
);
600 * str == obj => true or false
603 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
604 * Otherwise, returns <code>false</code> or <code>true</code>
606 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
609 mrb_str_equal_m(mrb_state
*mrb
, mrb_value str1
)
614 mrb_get_args(mrb
, "o", &str2
);
615 equal_p
= mrb_str_equal(mrb
, str1
, str2
);
617 return mrb_bool_value(equal_p
);
619 /* ---------------------------------- */
621 mrb_str_to_str(mrb_state
*mrb
, mrb_value str
)
625 if (!mrb_string_p(str
)) {
626 s
= mrb_check_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
628 s
= mrb_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_s");
636 mrb_string_value_ptr(mrb_state
*mrb
, mrb_value ptr
)
638 mrb_value str
= mrb_str_to_str(mrb
, ptr
);
639 return RSTRING_PTR(str
);
643 noregexp(mrb_state
*mrb
, mrb_value self
)
645 mrb_raise(mrb
, E_NOTIMP_ERROR
, "Regexp class not implemented");
646 return mrb_nil_value();
650 regexp_check(mrb_state
*mrb
, mrb_value obj
)
652 if (!memcmp(mrb_obj_classname(mrb
, obj
), REGEXP_CLASS
, sizeof(REGEXP_CLASS
) - 1)) {
657 static inline mrb_int
658 mrb_memsearch_qs(const unsigned char *xs
, mrb_int m
, const unsigned char *ys
, mrb_int n
)
660 const unsigned char *x
= xs
, *xe
= xs
+ m
;
661 const unsigned char *y
= ys
;
665 for (i
= 0; i
< 256; ++i
)
668 qstable
[*x
] = xe
- x
;
670 for (; y
+ m
<= ys
+ n
; y
+= *(qstable
+ y
[m
])) {
671 if (*xs
== *y
&& memcmp(xs
, y
, m
) == 0)
678 mrb_memsearch(const void *x0
, mrb_int m
, const void *y0
, mrb_int n
)
680 const unsigned char *x
= (const unsigned char *)x0
, *y
= (const unsigned char *)y0
;
682 if (m
> n
) return -1;
684 return memcmp(x0
, y0
, m
) == 0 ? 0 : -1;
690 const unsigned char *ys
= y
, *ye
= ys
+ n
;
691 for (; y
< ye
; ++y
) {
697 return mrb_memsearch_qs((const unsigned char *)x0
, m
, (const unsigned char *)y0
, n
);
701 mrb_str_index(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int offset
)
707 len
= RSTRING_LEN(str
);
708 slen
= RSTRING_LEN(sub
);
711 if (offset
< 0) return -1;
713 if (len
- offset
< slen
) return -1;
714 s
= RSTRING_PTR(str
);
718 if (slen
== 0) return offset
;
719 /* need proceed one character at a time */
720 sptr
= RSTRING_PTR(sub
);
721 slen
= RSTRING_LEN(sub
);
722 len
= RSTRING_LEN(str
) - offset
;
723 pos
= mrb_memsearch(sptr
, slen
, s
, len
);
724 if (pos
< 0) return pos
;
729 mrb_str_dup(mrb_state
*mrb
, mrb_value str
)
731 /* should return shared string */
732 struct RString
*s
= mrb_str_ptr(str
);
734 return mrb_str_new(mrb
, s
->ptr
, s
->len
);
738 mrb_str_aref(mrb_state
*mrb
, mrb_value str
, mrb_value indx
)
742 regexp_check(mrb
, indx
);
743 switch (mrb_type(indx
)) {
745 idx
= mrb_fixnum(indx
);
748 str
= mrb_str_substr(mrb
, str
, idx
, 1);
749 if (!mrb_nil_p(str
) && RSTRING_LEN(str
) == 0) return mrb_nil_value();
753 if (mrb_str_index(mrb
, str
, indx
, 0) != -1)
754 return mrb_str_dup(mrb
, indx
);
755 return mrb_nil_value();
758 /* check if indx is Range */
763 len
= RSTRING_LEN(str
);
764 if (mrb_range_beg_len(mrb
, indx
, &beg
, &len
, len
)) {
765 tmp
= mrb_str_subseq(mrb
, str
, beg
, len
);
769 return mrb_nil_value();
773 idx
= mrb_fixnum(indx
);
776 return mrb_nil_value(); /* not reached */
783 * str[fixnum] => fixnum or nil
784 * str[fixnum, fixnum] => new_str or nil
785 * str[range] => new_str or nil
786 * str[regexp] => new_str or nil
787 * str[regexp, fixnum] => new_str or nil
788 * str[other_str] => new_str or nil
789 * str.slice(fixnum) => fixnum or nil
790 * str.slice(fixnum, fixnum) => new_str or nil
791 * str.slice(range) => new_str or nil
792 * str.slice(regexp) => new_str or nil
793 * str.slice(regexp, fixnum) => new_str or nil
794 * str.slice(other_str) => new_str or nil
796 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
797 * of the character at that position. If passed two <code>Fixnum</code>
798 * objects, returns a substring starting at the offset given by the first, and
799 * a length given by the second. If given a range, a substring containing
800 * characters at offsets given by the range is returned. In all three cases, if
801 * an offset is negative, it is counted from the end of <i>str</i>. Returns
802 * <code>nil</code> if the initial offset falls outside the string, the length
803 * is negative, or the beginning of the range is greater than the end.
805 * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
806 * returned. If a numeric parameter follows the regular expression, that
807 * component of the <code>MatchData</code> is returned instead. If a
808 * <code>String</code> is given, that string is returned if it occurs in
809 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
813 * a[1] #=> 101(1.8.7) "e"(1.9.2)
817 * a[-4..-2] #=> "her"
820 * a[/[aeiou](.)\1/] #=> "ell"
821 * a[/[aeiou](.)\1/, 0] #=> "ell"
822 * a[/[aeiou](.)\1/, 1] #=> "l"
823 * a[/[aeiou](.)\1/, 2] #=> nil
828 mrb_str_aref_m(mrb_state
*mrb
, mrb_value str
)
833 argc
= mrb_get_args(mrb
, "o|o", &a1
, &a2
);
835 regexp_check(mrb
, a1
);
836 return mrb_str_substr(mrb
, str
, mrb_fixnum(a1
), mrb_fixnum(a2
));
839 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc
));
841 return mrb_str_aref(mrb
, str
, a1
);
847 * str.capitalize! => str or nil
849 * Modifies <i>str</i> by converting the first character to uppercase and the
850 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
853 * a.capitalize! #=> "Hello"
855 * a.capitalize! #=> nil
858 mrb_str_capitalize_bang(mrb_state
*mrb
, mrb_value str
)
862 struct RString
*s
= mrb_str_ptr(str
);
864 mrb_str_modify(mrb
, s
);
865 if (s
->len
== 0 || !s
->ptr
) return mrb_nil_value();
866 p
= s
->ptr
; pend
= s
->ptr
+ s
->len
;
877 if (modify
) return str
;
878 return mrb_nil_value();
884 * str.capitalize => new_str
886 * Returns a copy of <i>str</i> with the first character converted to uppercase
887 * and the remainder to lowercase.
889 * "hello".capitalize #=> "Hello"
890 * "HELLO".capitalize #=> "Hello"
891 * "123ABC".capitalize #=> "123abc"
894 mrb_str_capitalize(mrb_state
*mrb
, mrb_value self
)
898 str
= mrb_str_dup(mrb
, self
);
899 mrb_str_capitalize_bang(mrb
, str
);
906 * str.chomp!(separator=$/) => str or nil
908 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
909 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
912 mrb_str_chomp_bang(mrb_state
*mrb
, mrb_value str
)
919 struct RString
*s
= mrb_str_ptr(str
);
921 mrb_str_modify(mrb
, s
);
923 if (mrb_get_args(mrb
, "|S", &rs
) == 0) {
924 if (len
== 0) return mrb_nil_value();
926 if (s
->ptr
[len
-1] == '\n') {
929 s
->ptr
[s
->len
-1] == '\r') {
933 else if (s
->ptr
[len
-1] == '\r') {
937 return mrb_nil_value();
939 s
->ptr
[s
->len
] = '\0';
943 if (len
== 0 || mrb_nil_p(rs
)) return mrb_nil_value();
945 rslen
= RSTRING_LEN(rs
);
947 while (len
>0 && p
[len
-1] == '\n') {
949 if (len
>0 && p
[len
-1] == '\r')
957 return mrb_nil_value();
959 if (rslen
> len
) return mrb_nil_value();
960 newline
= RSTRING_PTR(rs
)[rslen
-1];
961 if (rslen
== 1 && newline
== '\n')
962 newline
= RSTRING_PTR(rs
)[rslen
-1];
963 if (rslen
== 1 && newline
== '\n')
966 pp
= p
+ len
- rslen
;
967 if (p
[len
-1] == newline
&&
969 memcmp(RSTRING_PTR(rs
), pp
, rslen
) == 0)) {
970 s
->len
= len
- rslen
;
974 return mrb_nil_value();
980 * str.chomp(separator=$/) => new_str
982 * Returns a new <code>String</code> with the given record separator removed
983 * from the end of <i>str</i> (if present). If <code>$/</code> has not been
984 * changed from the default Ruby record separator, then <code>chomp</code> also
985 * removes carriage return characters (that is it will remove <code>\n</code>,
986 * <code>\r</code>, and <code>\r\n</code>).
988 * "hello".chomp #=> "hello"
989 * "hello\n".chomp #=> "hello"
990 * "hello\r\n".chomp #=> "hello"
991 * "hello\n\r".chomp #=> "hello\n"
992 * "hello\r".chomp #=> "hello"
993 * "hello \n there".chomp #=> "hello \n there"
994 * "hello".chomp("llo") #=> "he"
997 mrb_str_chomp(mrb_state
*mrb
, mrb_value self
)
1001 str
= mrb_str_dup(mrb
, self
);
1002 mrb_str_chomp_bang(mrb
, str
);
1009 * str.chop! => str or nil
1011 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
1012 * or <code>nil</code> if <i>str</i> is the empty string. See also
1013 * <code>String#chomp!</code>.
1016 mrb_str_chop_bang(mrb_state
*mrb
, mrb_value str
)
1018 struct RString
*s
= mrb_str_ptr(str
);
1020 mrb_str_modify(mrb
, s
);
1024 if (s
->ptr
[len
] == '\n') {
1026 s
->ptr
[len
-1] == '\r') {
1034 return mrb_nil_value();
1040 * str.chop => new_str
1042 * Returns a new <code>String</code> with the last character removed. If the
1043 * string ends with <code>\r\n</code>, both characters are removed. Applying
1044 * <code>chop</code> to an empty string returns an empty
1045 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1046 * the string unchanged if it doesn't end in a record separator.
1048 * "string\r\n".chop #=> "string"
1049 * "string\n\r".chop #=> "string\n"
1050 * "string\n".chop #=> "string"
1051 * "string".chop #=> "strin"
1055 mrb_str_chop(mrb_state
*mrb
, mrb_value self
)
1058 str
= mrb_str_dup(mrb
, self
);
1059 mrb_str_chop_bang(mrb
, str
);
1066 * str.downcase! => str or nil
1068 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1069 * changes were made.
1072 mrb_str_downcase_bang(mrb_state
*mrb
, mrb_value str
)
1076 struct RString
*s
= mrb_str_ptr(str
);
1078 mrb_str_modify(mrb
, s
);
1080 pend
= s
->ptr
+ s
->len
;
1089 if (modify
) return str
;
1090 return mrb_nil_value();
1096 * str.downcase => new_str
1098 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1099 * lowercase counterparts. The operation is locale insensitive---only
1100 * characters ``A'' to ``Z'' are affected.
1102 * "hEllO".downcase #=> "hello"
1105 mrb_str_downcase(mrb_state
*mrb
, mrb_value self
)
1109 str
= mrb_str_dup(mrb
, self
);
1110 mrb_str_downcase_bang(mrb
, str
);
1117 * str.empty? => true or false
1119 * Returns <code>true</code> if <i>str</i> has a length of zero.
1121 * "hello".empty? #=> false
1122 * "".empty? #=> true
1125 mrb_str_empty_p(mrb_state
*mrb
, mrb_value self
)
1127 struct RString
*s
= mrb_str_ptr(self
);
1129 return mrb_bool_value(s
->len
== 0);
1135 * str.eql?(other) => true or false
1137 * Two strings are equal if the have the same length and content.
1140 mrb_str_eql(mrb_state
*mrb
, mrb_value self
)
1145 mrb_get_args(mrb
, "o", &str2
);
1146 eql_p
= (mrb_type(str2
) == MRB_TT_STRING
) && str_eql(mrb
, self
, str2
);
1148 return mrb_bool_value(eql_p
);
1152 mrb_str_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
1154 struct RString
*orig
, *s
;
1155 mrb_shared_string
*shared
;
1157 orig
= mrb_str_ptr(str
);
1158 str_make_shared(mrb
, orig
);
1159 shared
= orig
->aux
.shared
;
1160 s
= mrb_obj_alloc_string(mrb
);
1161 s
->ptr
= orig
->ptr
+ beg
;
1163 s
->aux
.shared
= shared
;
1164 s
->flags
|= MRB_STR_SHARED
;
1167 return mrb_obj_value(s
);
1171 mrb_str_substr(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
1175 if (len
< 0) return mrb_nil_value();
1176 if (!RSTRING_LEN(str
)) {
1179 if (beg
> RSTRING_LEN(str
)) return mrb_nil_value();
1181 beg
+= RSTRING_LEN(str
);
1182 if (beg
< 0) return mrb_nil_value();
1184 if (beg
+ len
> RSTRING_LEN(str
))
1185 len
= RSTRING_LEN(str
) - beg
;
1189 str2
= mrb_str_subseq(mrb
, str
, beg
, len
);
1195 mrb_str_buf_append(mrb_state
*mrb
, mrb_value str
, mrb_value str2
)
1197 mrb_str_cat(mrb
, str
, RSTRING_PTR(str2
), RSTRING_LEN(str2
));
1202 mrb_str_hash(mrb_state
*mrb
, mrb_value str
)
1205 struct RString
*s
= mrb_str_ptr(str
);
1206 mrb_int len
= s
->len
;
1211 key
= key
*65599 + *p
;
1214 key
= key
+ (key
>>5);
1221 * str.hash => fixnum
1223 * Return a hash based on the string's length and content.
1226 mrb_str_hash_m(mrb_state
*mrb
, mrb_value self
)
1228 mrb_int key
= mrb_str_hash(mrb
, self
);
1229 return mrb_fixnum_value(key
);
1235 * str.include? other_str => true or false
1236 * str.include? fixnum => true or false
1238 * Returns <code>true</code> if <i>str</i> contains the given string or
1241 * "hello".include? "lo" #=> true
1242 * "hello".include? "ol" #=> false
1243 * "hello".include? ?h #=> true
1246 mrb_str_include(mrb_state
*mrb
, mrb_value self
)
1252 mrb_get_args(mrb
, "o", &str2
);
1253 if (mrb_type(str2
) == MRB_TT_FIXNUM
) {
1254 include_p
= (memchr(RSTRING_PTR(self
), mrb_fixnum(str2
), RSTRING_LEN(self
)) != NULL
);
1257 str2
= mrb_str_to_str(mrb
, str2
);
1258 i
= mrb_str_index(mrb
, self
, str2
, 0);
1260 include_p
= (i
!= -1);
1263 return mrb_bool_value(include_p
);
1269 * str.index(substring [, offset]) => fixnum or nil
1270 * str.index(fixnum [, offset]) => fixnum or nil
1271 * str.index(regexp [, offset]) => fixnum or nil
1273 * Returns the index of the first occurrence of the given
1275 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1277 * <code>nil</code> if not found.
1278 * If the second parameter is present, it
1279 * specifies the position in the string to begin the search.
1281 * "hello".index('e') #=> 1
1282 * "hello".index('lo') #=> 3
1283 * "hello".index('a') #=> nil
1284 * "hello".index(101) #=> 1(101=0x65='e')
1285 * "hello".index(/[aeiou]/, -3) #=> 4
1288 mrb_str_index_m(mrb_state
*mrb
, mrb_value str
)
1296 mrb_get_args(mrb
, "*", &argv
, &argc
);
1298 pos
= mrb_fixnum(argv
[1]);
1306 sub
= mrb_nil_value();
1309 regexp_check(mrb
, sub
);
1311 pos
+= RSTRING_LEN(str
);
1313 return mrb_nil_value();
1317 switch (mrb_type(sub
)) {
1318 case MRB_TT_FIXNUM
: {
1319 int c
= mrb_fixnum(sub
);
1320 mrb_int len
= RSTRING_LEN(str
);
1321 unsigned char *p
= (unsigned char*)RSTRING_PTR(str
);
1323 for (;pos
<len
;pos
++) {
1324 if (p
[pos
] == c
) return mrb_fixnum_value(pos
);
1326 return mrb_nil_value();
1332 tmp
= mrb_check_string_type(mrb
, sub
);
1333 if (mrb_nil_p(tmp
)) {
1334 mrb_raisef(mrb
, E_TYPE_ERROR
, "type mismatch: %S given", sub
);
1340 pos
= mrb_str_index(mrb
, str
, sub
, pos
);
1344 if (pos
== -1) return mrb_nil_value();
1345 return mrb_fixnum_value(pos
);
1348 #define STR_REPLACE_SHARED_MIN 10
1351 str_replace(mrb_state
*mrb
, struct RString
*s1
, struct RString
*s2
)
1353 if (s2
->flags
& MRB_STR_SHARED
) {
1355 if (s1
->flags
& MRB_STR_SHARED
){
1356 str_decref(mrb
, s1
->aux
.shared
);
1359 mrb_free(mrb
, s1
->ptr
);
1363 s1
->aux
.shared
= s2
->aux
.shared
;
1364 s1
->flags
|= MRB_STR_SHARED
;
1365 s1
->aux
.shared
->refcnt
++;
1367 else if (s2
->len
> STR_REPLACE_SHARED_MIN
) {
1368 str_make_shared(mrb
, s2
);
1372 if (s1
->flags
& MRB_STR_SHARED
) {
1373 str_decref(mrb
, s1
->aux
.shared
);
1374 s1
->flags
&= ~MRB_STR_SHARED
;
1375 s1
->ptr
= (char *)mrb_malloc(mrb
, s2
->len
+1);
1378 s1
->ptr
= (char *)mrb_realloc(mrb
, s1
->ptr
, s2
->len
+1);
1380 memcpy(s1
->ptr
, s2
->ptr
, s2
->len
);
1381 s1
->ptr
[s2
->len
] = 0;
1383 s1
->aux
.capa
= s2
->len
;
1385 return mrb_obj_value(s1
);
1392 * str.replace(other_str) => str
1394 * s = "hello" #=> "hello"
1395 * s.replace "world" #=> "world"
1398 mrb_str_replace(mrb_state
*mrb
, mrb_value str
)
1402 mrb_get_args(mrb
, "S", &str2
);
1403 return str_replace(mrb
, mrb_str_ptr(str
), mrb_str_ptr(str2
));
1409 * String.new(str="") => new_str
1411 * Returns a new string object containing a copy of <i>str</i>.
1414 mrb_str_init(mrb_state
*mrb
, mrb_value self
)
1418 if (mrb_get_args(mrb
, "|S", &str2
) == 1) {
1419 str_replace(mrb
, mrb_str_ptr(self
), mrb_str_ptr(str2
));
1428 * str.intern => symbol
1429 * str.to_sym => symbol
1431 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1432 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1434 * "Koala".intern #=> :Koala
1435 * s = 'cat'.to_sym #=> :cat
1436 * s == :cat #=> true
1437 * s = '@cat'.to_sym #=> :@cat
1438 * s == :@cat #=> true
1440 * This can also be used to create symbols that cannot be represented using the
1441 * <code>:xxx</code> notation.
1443 * 'cat and dog'.to_sym #=> :"cat and dog"
1446 mrb_str_intern(mrb_state
*mrb
, mrb_value self
)
1450 id
= mrb_intern_str(mrb
, self
);
1451 return mrb_symbol_value(id
);
1454 /* ---------------------------------- */
1456 mrb_obj_as_string(mrb_state
*mrb
, mrb_value obj
)
1460 if (mrb_string_p(obj
)) {
1463 str
= mrb_funcall(mrb
, obj
, "to_s", 0);
1464 if (!mrb_string_p(str
))
1465 return mrb_any_to_s(mrb
, obj
);
1470 mrb_ptr_to_str(mrb_state
*mrb
, void *p
)
1472 struct RString
*p_str
;
1475 uintptr_t n
= (uintptr_t)p
;
1477 p_str
= str_new(mrb
, NULL
, 2 + sizeof(uintptr_t) * CHAR_BIT
/ 4);
1484 *p2
++ = mrb_digitmap
[n
% 16];
1488 p_str
->len
= (mrb_int
)(p2
- p_str
->ptr
);
1496 return mrb_obj_value(p_str
);
1500 mrb_string_type(mrb_state
*mrb
, mrb_value str
)
1502 return mrb_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
1506 mrb_check_string_type(mrb_state
*mrb
, mrb_value str
)
1508 return mrb_check_convert_type(mrb
, str
, MRB_TT_STRING
, "String", "to_str");
1511 /* ---------------------------------- */
1515 * str.reverse => new_str
1517 * Returns a new string with the characters from <i>str</i> in reverse order.
1519 * "stressed".reverse #=> "desserts"
1522 mrb_str_reverse(mrb_state
*mrb
, mrb_value str
)
1527 if (RSTRING(str
)->len
<= 1) return mrb_str_dup(mrb
, str
);
1529 s2
= str_new(mrb
, 0, RSTRING(str
)->len
);
1530 str_with_class(mrb
, s2
, str
);
1531 s
= RSTRING_PTR(str
); e
= RSTRING_END(str
) - 1;
1537 return mrb_obj_value(s2
);
1543 * str.reverse! => str
1545 * Reverses <i>str</i> in place.
1548 mrb_str_reverse_bang(mrb_state
*mrb
, mrb_value str
)
1550 struct RString
*s
= mrb_str_ptr(str
);
1554 mrb_str_modify(mrb
, s
);
1569 * str.rindex(substring [, fixnum]) => fixnum or nil
1570 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1571 * str.rindex(regexp [, fixnum]) => fixnum or nil
1573 * Returns the index of the last occurrence of the given <i>substring</i>,
1574 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1575 * <code>nil</code> if not found. If the second parameter is present, it
1576 * specifies the position in the string to end the search---characters beyond
1577 * this point will not be considered.
1579 * "hello".rindex('e') #=> 1
1580 * "hello".rindex('l') #=> 3
1581 * "hello".rindex('a') #=> nil
1582 * "hello".rindex(101) #=> 1
1583 * "hello".rindex(/[aeiou]/, -2) #=> 1
1586 mrb_str_rindex(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int pos
)
1589 struct RString
*ps
= mrb_str_ptr(str
);
1590 struct RString
*psub
= mrb_str_ptr(sub
);
1591 mrb_int len
= psub
->len
;
1593 /* substring longer than string */
1594 if (ps
->len
< len
) return -1;
1595 if (ps
->len
- pos
< len
) {
1596 pos
= ps
->len
- len
;
1603 if (memcmp(s
, t
, len
) == 0) {
1618 * str.rindex(substring [, fixnum]) => fixnum or nil
1619 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1620 * str.rindex(regexp [, fixnum]) => fixnum or nil
1622 * Returns the index of the last occurrence of the given <i>substring</i>,
1623 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1624 * <code>nil</code> if not found. If the second parameter is present, it
1625 * specifies the position in the string to end the search---characters beyond
1626 * this point will not be considered.
1628 * "hello".rindex('e') #=> 1
1629 * "hello".rindex('l') #=> 3
1630 * "hello".rindex('a') #=> nil
1631 * "hello".rindex(101) #=> 1
1632 * "hello".rindex(/[aeiou]/, -2) #=> 1
1635 mrb_str_rindex_m(mrb_state
*mrb
, mrb_value str
)
1641 int pos
, len
= RSTRING_LEN(str
);
1643 mrb_get_args(mrb
, "*", &argv
, &argc
);
1647 pos
= mrb_fixnum(vpos
);
1651 regexp_check(mrb
, sub
);
1652 return mrb_nil_value();
1655 if (pos
> len
) pos
= len
;
1662 sub
= mrb_nil_value();
1664 regexp_check(mrb
, sub
);
1666 switch (mrb_type(sub
)) {
1667 case MRB_TT_FIXNUM
: {
1668 int c
= mrb_fixnum(sub
);
1669 mrb_int len
= RSTRING_LEN(str
);
1670 unsigned char *p
= (unsigned char*)RSTRING_PTR(str
);
1672 for (pos
=len
;pos
>=0;pos
--) {
1673 if (p
[pos
] == c
) return mrb_fixnum_value(pos
);
1675 return mrb_nil_value();
1681 tmp
= mrb_check_string_type(mrb
, sub
);
1682 if (mrb_nil_p(tmp
)) {
1683 mrb_raisef(mrb
, E_TYPE_ERROR
, "type mismatch: %S given", sub
);
1689 pos
= mrb_str_rindex(mrb
, str
, sub
, pos
);
1690 if (pos
>= 0) return mrb_fixnum_value(pos
);
1693 } /* end of switch (TYPE(sub)) */
1694 return mrb_nil_value();
1697 static const char isspacetable
[256] = {
1698 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1700 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1701 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1702 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1705 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1707 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1710 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1716 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1722 * str.split(pattern=$;, [limit]) => anArray
1724 * Divides <i>str</i> into substrings based on a delimiter, returning an array
1725 * of these substrings.
1727 * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1728 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1729 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1730 * of contiguous whitespace characters ignored.
1732 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1733 * pattern matches. Whenever the pattern matches a zero-length string,
1734 * <i>str</i> is split into individual characters.
1736 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1737 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1738 * split on whitespace as if ` ' were specified.
1740 * If the <i>limit</i> parameter is omitted, trailing null fields are
1741 * suppressed. If <i>limit</i> is a positive number, at most that number of
1742 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1743 * string is returned as the only entry in an array). If negative, there is no
1744 * limit to the number of fields returned, and trailing null fields are not
1747 * " now's the time".split #=> ["now's", "the", "time"]
1748 * " now's the time".split(' ') #=> ["now's", "the", "time"]
1749 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1750 * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
1751 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1752 * "hello".split(//, 3) #=> ["h", "e", "llo"]
1753 * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
1755 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1756 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1757 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1758 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1762 mrb_str_split_m(mrb_state
*mrb
, mrb_value str
)
1765 mrb_value spat
= mrb_nil_value();
1766 enum {awk
, string
, regexp
} split_type
= string
;
1771 mrb_value result
, tmp
;
1773 argc
= mrb_get_args(mrb
, "|oi", &spat
, &lim
);
1774 lim_p
= (lim
> 0 && argc
== 2);
1777 if (RSTRING_LEN(str
) == 0)
1778 return mrb_ary_new_capa(mrb
, 0);
1779 return mrb_ary_new_from_values(mrb
, 1, &str
);
1784 if (argc
== 0 || mrb_nil_p(spat
)) {
1788 if (mrb_string_p(spat
)) {
1789 split_type
= string
;
1790 if (RSTRING_LEN(spat
) == 1 && RSTRING_PTR(spat
)[0] == ' '){
1799 result
= mrb_ary_new(mrb
);
1801 if (split_type
== awk
) {
1802 char *ptr
= RSTRING_PTR(str
);
1803 char *eptr
= RSTRING_END(str
);
1809 while (ptr
< eptr
) {
1810 int ai
= mrb_gc_arena_save(mrb
);
1811 c
= (unsigned char)*ptr
++;
1813 if (ascii_isspace(c
)) {
1819 if (lim_p
&& lim
<= i
) break;
1822 else if (ascii_isspace(c
)) {
1823 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, beg
, end
-beg
));
1824 mrb_gc_arena_restore(mrb
, ai
);
1834 else if (split_type
== string
) {
1835 char *ptr
= RSTRING_PTR(str
);
1837 char *eptr
= RSTRING_END(str
);
1838 mrb_int slen
= RSTRING_LEN(spat
);
1841 int ai
= mrb_gc_arena_save(mrb
);
1842 while (ptr
< eptr
) {
1843 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, ptr
-temp
, 1));
1844 mrb_gc_arena_restore(mrb
, ai
);
1846 if (lim_p
&& lim
<= ++i
) break;
1850 char *sptr
= RSTRING_PTR(spat
);
1851 int ai
= mrb_gc_arena_save(mrb
);
1853 while (ptr
< eptr
&&
1854 (end
= mrb_memsearch(sptr
, slen
, ptr
, eptr
- ptr
)) >= 0) {
1855 mrb_ary_push(mrb
, result
, mrb_str_subseq(mrb
, str
, ptr
- temp
, end
));
1856 mrb_gc_arena_restore(mrb
, ai
);
1858 if (lim_p
&& lim
<= ++i
) break;
1866 if (RSTRING_LEN(str
) > 0 && (lim_p
|| RSTRING_LEN(str
) > beg
|| lim
< 0)) {
1867 if (RSTRING_LEN(str
) == beg
) {
1868 tmp
= mrb_str_new_empty(mrb
, str
);
1871 tmp
= mrb_str_subseq(mrb
, str
, beg
, RSTRING_LEN(str
)-beg
);
1873 mrb_ary_push(mrb
, result
, tmp
);
1875 if (!lim_p
&& lim
== 0) {
1877 while ((len
= RARRAY_LEN(result
)) > 0 &&
1878 (tmp
= RARRAY_PTR(result
)[len
-1], RSTRING_LEN(tmp
) == 0))
1879 mrb_ary_pop(mrb
, result
);
1886 mrb_cstr_to_inum(mrb_state
*mrb
, const char *str
, int base
, int badcheck
)
1895 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1896 #define conv_digit(c) \
1897 (!ISASCII(c) ? -1 : \
1898 isdigit(c) ? ((c) - '0') : \
1899 islower(c) ? ((c) - 'a' + 10) : \
1900 isupper(c) ? ((c) - 'A' + 10) : \
1904 if (badcheck
) goto bad
;
1905 return mrb_fixnum_value(0);
1907 while (ISSPACE(*str
)) str
++;
1909 if (str
[0] == '+') {
1912 else if (str
[0] == '-') {
1916 if (str
[0] == '+' || str
[0] == '-') {
1917 if (badcheck
) goto bad
;
1918 return mrb_fixnum_value(0);
1921 if (str
[0] == '0') {
1939 else if (base
< -1) {
1948 if (str
[0] == '0' && (str
[1] == 'b'||str
[1] == 'B')) {
1955 if (str
[0] == '0' && (str
[1] == 'o'||str
[1] == 'O')) {
1958 case 4: case 5: case 6: case 7:
1961 if (str
[0] == '0' && (str
[1] == 'd'||str
[1] == 'D')) {
1964 case 9: case 11: case 12: case 13: case 14: case 15:
1967 if (str
[0] == '0' && (str
[1] == 'x'||str
[1] == 'X')) {
1972 if (base
< 2 || 36 < base
) {
1973 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %S", mrb_fixnum_value(base
));
1976 } /* end of switch (base) { */
1977 if (*str
== '0') { /* squeeze preceeding 0s */
1979 while ((c
= *++str
) == '0' || c
== '_') {
1987 if (!(c
= *str
) || ISSPACE(c
)) --str
;
1991 if (c
< 0 || c
>= base
) {
1992 if (badcheck
) goto bad
;
1993 return mrb_fixnum_value(0);
1996 n
= strtoul((char*)str
, &end
, base
);
1997 if (n
> MRB_INT_MAX
) {
1998 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "string (%S) too big for integer", mrb_str_new_cstr(mrb
, str
));
2002 if (end
== str
) goto bad
; /* no number */
2003 while (*end
&& ISSPACE(*end
)) end
++;
2004 if (*end
) goto bad
; /* trailing garbage */
2007 return mrb_fixnum_value(sign
? val
: -val
);
2009 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for number(%S)", mrb_str_new_cstr(mrb
, str
));
2011 return mrb_fixnum_value(0);
2015 mrb_string_value_cstr(mrb_state
*mrb
, mrb_value
*ptr
)
2017 struct RString
*ps
= mrb_str_ptr(*ptr
);
2020 if (!s
|| ps
->len
!= strlen(s
)) {
2021 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
2027 mrb_str_to_inum(mrb_state
*mrb
, mrb_value str
, int base
, int badcheck
)
2032 str
= mrb_str_to_str(mrb
, str
);
2034 s
= mrb_string_value_cstr(mrb
, &str
);
2037 s
= RSTRING_PTR(str
);
2040 len
= RSTRING_LEN(str
);
2041 if (s
[len
]) { /* no sentinel somehow */
2042 struct RString
*temp_str
= str_new(mrb
, s
, len
);
2046 return mrb_cstr_to_inum(mrb
, s
, base
, badcheck
);
2052 * str.to_i(base=10) => integer
2054 * Returns the result of interpreting leading characters in <i>str</i> as an
2055 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2056 * end of a valid number are ignored. If there is not a valid number at the
2057 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2060 * "12345".to_i #=> 12345
2061 * "99 red balloons".to_i #=> 99
2063 * "0a".to_i(16) #=> 10
2064 * "hello".to_i #=> 0
2065 * "1100101".to_i(2) #=> 101
2066 * "1100101".to_i(8) #=> 294977
2067 * "1100101".to_i(10) #=> 1100101
2068 * "1100101".to_i(16) #=> 17826049
2071 mrb_str_to_i(mrb_state
*mrb
, mrb_value self
)
2077 mrb_get_args(mrb
, "*", &argv
, &argc
);
2081 base
= mrb_fixnum(argv
[0]);
2084 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %S", mrb_fixnum_value(base
));
2086 return mrb_str_to_inum(mrb
, self
, base
, 0/*Qfalse*/);
2090 mrb_cstr_to_dbl(mrb_state
*mrb
, const char * p
, int badcheck
)
2094 #if !defined(DBL_DIG)
2098 enum {max_width
= 20};
2099 #define OutOfRange() (((w = end - p) > max_width) ? \
2100 (w = max_width, ellipsis = "...") : \
2101 (w = (int)(end - p), ellipsis = ""))
2104 while (ISSPACE(*p
)) p
++;
2106 if (!badcheck
&& p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2109 d
= strtod(p
, &end
);
2113 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for float(%S)", mrb_str_new_cstr(mrb
, p
));
2119 char buf
[DBL_DIG
* 4 + 10];
2121 char *e
= buf
+ sizeof(buf
) - 1;
2124 while (p
< end
&& n
< e
) prev
= *n
++ = *p
++;
2127 /* remove underscores between digits */
2129 if (n
== buf
|| !ISDIGIT(prev
)) goto bad
;
2131 if (!ISDIGIT(*p
)) goto bad
;
2134 while (*++p
== '_');
2139 if (n
< e
) *n
++ = prev
;
2144 if (!badcheck
&& p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2148 d
= strtod(p
, &end
);
2150 if (!end
|| p
== end
) goto bad
;
2151 while (*end
&& ISSPACE(*end
)) end
++;
2159 mrb_str_to_dbl(mrb_state
*mrb
, mrb_value str
, int badcheck
)
2164 str
= mrb_str_to_str(mrb
, str
);
2165 s
= RSTRING_PTR(str
);
2166 len
= RSTRING_LEN(str
);
2168 if (badcheck
&& memchr(s
, '\0', len
)) {
2169 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string for Float contains null byte");
2171 if (s
[len
]) { /* no sentinel somehow */
2172 struct RString
*temp_str
= str_new(mrb
, s
, len
);
2176 return mrb_cstr_to_dbl(mrb
, s
, badcheck
);
2184 * Returns the result of interpreting leading characters in <i>str</i> as a
2185 * floating point number. Extraneous characters past the end of a valid number
2186 * are ignored. If there is not a valid number at the start of <i>str</i>,
2187 * <code>0.0</code> is returned. This method never raises an exception.
2189 * "123.45e1".to_f #=> 1234.5
2190 * "45.67 degrees".to_f #=> 45.67
2191 * "thx1138".to_f #=> 0.0
2194 mrb_str_to_f(mrb_state
*mrb
, mrb_value self
)
2196 return mrb_float_value(mrb
, mrb_str_to_dbl(mrb
, self
, 0/*Qfalse*/));
2205 * Returns the receiver.
2208 mrb_str_to_s(mrb_state
*mrb
, mrb_value self
)
2210 if (mrb_obj_class(mrb
, self
) != mrb
->string_class
) {
2211 return mrb_str_dup(mrb
, self
);
2219 * str.upcase! => str or nil
2221 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2225 mrb_str_upcase_bang(mrb_state
*mrb
, mrb_value str
)
2227 struct RString
*s
= mrb_str_ptr(str
);
2231 mrb_str_modify(mrb
, s
);
2232 p
= RSTRING_PTR(str
);
2233 pend
= RSTRING_END(str
);
2242 if (modify
) return str
;
2243 return mrb_nil_value();
2249 * str.upcase => new_str
2251 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2252 * uppercase counterparts. The operation is locale insensitive---only
2253 * characters ``a'' to ``z'' are affected.
2255 * "hEllO".upcase #=> "HELLO"
2258 mrb_str_upcase(mrb_state
*mrb
, mrb_value self
)
2262 str
= mrb_str_dup(mrb
, self
);
2263 mrb_str_upcase_bang(mrb
, str
);
2269 * str.dump -> new_str
2271 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2272 * <code>\nnn</code> notation and all special characters escaped.
2275 mrb_str_dump(mrb_state
*mrb
, mrb_value str
)
2278 const char *p
, *pend
;
2280 struct RString
*result
;
2283 p
= RSTRING_PTR(str
); pend
= p
+ RSTRING_LEN(str
);
2285 unsigned char c
= *p
++;
2287 case '"': case '\\':
2288 case '\n': case '\r':
2289 case '\t': case '\f':
2290 case '\013': case '\010': case '\007': case '\033':
2295 len
+= IS_EVSTR(p
, pend
) ? 2 : 1;
2303 len
+= 4; /* \NNN */
2309 result
= str_new(mrb
, 0, len
);
2310 str_with_class(mrb
, result
, str
);
2311 p
= RSTRING_PTR(str
); pend
= p
+ RSTRING_LEN(str
);
2316 unsigned char c
= *p
++;
2366 if (IS_EVSTR(p
, pend
)) *q
++ = '\\';
2376 q
[2] = '0' + c
% 8; c
/= 8;
2377 q
[1] = '0' + c
% 8; c
/= 8;
2384 return mrb_obj_value(result
);
2388 mrb_str_cat(mrb_state
*mrb
, mrb_value str
, const char *ptr
, size_t len
)
2390 if ((mrb_int
)len
< 0) {
2391 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative string size (or size too big)");
2393 str_buf_cat(mrb
, mrb_str_ptr(str
), ptr
, len
);
2398 mrb_str_cat_cstr(mrb_state
*mrb
, mrb_value str
, const char *ptr
)
2400 return mrb_str_cat(mrb
, str
, ptr
, strlen(ptr
));
2404 mrb_str_append(mrb_state
*mrb
, mrb_value str
, mrb_value str2
)
2406 str2
= mrb_str_to_str(mrb
, str2
);
2407 return mrb_str_buf_append(mrb
, str
, str2
);
2410 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2414 * str.inspect -> string
2416 * Returns a printable version of _str_, surrounded by quote marks,
2417 * with special characters escaped.
2421 * str.inspect #=> "\"hel\\bo\""
2424 mrb_str_inspect(mrb_state
*mrb
, mrb_value str
)
2426 const char *p
, *pend
;
2427 char buf
[CHAR_ESC_LEN
+ 1];
2428 mrb_value result
= mrb_str_new(mrb
, "\"", 1);
2430 p
= RSTRING_PTR(str
); pend
= RSTRING_END(str
);
2431 for (;p
< pend
; p
++) {
2435 if (c
== '"'|| c
== '\\' || (c
== '#' && IS_EVSTR(p
, pend
))) {
2436 buf
[0] = '\\'; buf
[1] = c
;
2437 mrb_str_buf_cat(mrb
, result
, buf
, 2);
2442 mrb_str_buf_cat(mrb
, result
, buf
, 1);
2446 case '\n': cc
= 'n'; break;
2447 case '\r': cc
= 'r'; break;
2448 case '\t': cc
= 't'; break;
2449 case '\f': cc
= 'f'; break;
2450 case '\013': cc
= 'v'; break;
2451 case '\010': cc
= 'b'; break;
2452 case '\007': cc
= 'a'; break;
2453 case 033: cc
= 'e'; break;
2454 default: cc
= 0; break;
2459 mrb_str_buf_cat(mrb
, result
, buf
, 2);
2464 buf
[3] = '0' + c
% 8; c
/= 8;
2465 buf
[2] = '0' + c
% 8; c
/= 8;
2466 buf
[1] = '0' + c
% 8;
2467 mrb_str_buf_cat(mrb
, result
, buf
, 4);
2471 mrb_str_buf_cat(mrb
, result
, "\"", 1);
2478 * str.bytes -> array of fixnums
2480 * Returns an array of bytes in _str_.
2483 * str.bytes #=> [104, 101, 108, 108, 111]
2486 mrb_str_bytes(mrb_state
*mrb
, mrb_value str
)
2488 struct RString
*s
= mrb_str_ptr(str
);
2489 mrb_value a
= mrb_ary_new_capa(mrb
, s
->len
);
2490 unsigned char *p
= (unsigned char *)(s
->ptr
), *pend
= p
+ s
->len
;
2493 mrb_ary_push(mrb
, a
, mrb_fixnum_value(p
[0]));
2499 /* ---------------------------*/
2501 mrb_init_string(mrb_state
*mrb
)
2505 s
= mrb
->string_class
= mrb_define_class(mrb
, "String", mrb
->object_class
);
2506 MRB_SET_INSTANCE_TT(s
, MRB_TT_STRING
);
2507 mrb_include_module(mrb
, s
, mrb_class_get(mrb
, "Comparable"));
2510 mrb_define_method(mrb
, s
, "bytesize", mrb_str_bytesize
, MRB_ARGS_NONE());
2512 mrb_define_method(mrb
, s
, "<=>", mrb_str_cmp_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2513 mrb_define_method(mrb
, s
, "==", mrb_str_equal_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2514 mrb_define_method(mrb
, s
, "+", mrb_str_plus_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2515 mrb_define_method(mrb
, s
, "*", mrb_str_times
, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2516 mrb_define_method(mrb
, s
, "[]", mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2517 mrb_define_method(mrb
, s
, "capitalize", mrb_str_capitalize
, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2518 mrb_define_method(mrb
, s
, "capitalize!", mrb_str_capitalize_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
2519 mrb_define_method(mrb
, s
, "chomp", mrb_str_chomp
, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2520 mrb_define_method(mrb
, s
, "chomp!", mrb_str_chomp_bang
, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2521 mrb_define_method(mrb
, s
, "chop", mrb_str_chop
, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
2522 mrb_define_method(mrb
, s
, "chop!", mrb_str_chop_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
2523 mrb_define_method(mrb
, s
, "downcase", mrb_str_downcase
, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2524 mrb_define_method(mrb
, s
, "downcase!", mrb_str_downcase_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2525 mrb_define_method(mrb
, s
, "empty?", mrb_str_empty_p
, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2526 mrb_define_method(mrb
, s
, "eql?", mrb_str_eql
, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2528 mrb_define_method(mrb
, s
, "hash", mrb_str_hash_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
2529 mrb_define_method(mrb
, s
, "include?", mrb_str_include
, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2530 mrb_define_method(mrb
, s
, "index", mrb_str_index_m
, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2531 mrb_define_method(mrb
, s
, "initialize", mrb_str_init
, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2532 mrb_define_method(mrb
, s
, "initialize_copy", mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2533 mrb_define_method(mrb
, s
, "intern", mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2534 mrb_define_method(mrb
, s
, "length", mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2535 mrb_define_method(mrb
, s
, "replace", mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2536 mrb_define_method(mrb
, s
, "reverse", mrb_str_reverse
, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2537 mrb_define_method(mrb
, s
, "reverse!", mrb_str_reverse_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2538 mrb_define_method(mrb
, s
, "rindex", mrb_str_rindex_m
, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2539 mrb_define_method(mrb
, s
, "size", mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2540 mrb_define_method(mrb
, s
, "slice", mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2541 mrb_define_method(mrb
, s
, "split", mrb_str_split_m
, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2543 mrb_define_method(mrb
, s
, "to_f", mrb_str_to_f
, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2544 mrb_define_method(mrb
, s
, "to_i", mrb_str_to_i
, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2545 mrb_define_method(mrb
, s
, "to_s", mrb_str_to_s
, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2546 mrb_define_method(mrb
, s
, "to_str", mrb_str_to_s
, MRB_ARGS_NONE());
2547 mrb_define_method(mrb
, s
, "to_sym", mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2548 mrb_define_method(mrb
, s
, "upcase", mrb_str_upcase
, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
2549 mrb_define_method(mrb
, s
, "upcase!", mrb_str_upcase_bang
, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
2550 mrb_define_method(mrb
, s
, "inspect", mrb_str_inspect
, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2551 mrb_define_method(mrb
, s
, "bytes", mrb_str_bytes
, MRB_ARGS_NONE());