rename mrb_intern2() to mrb_intern(); huge API incompatibility; close #1513
[mruby.git] / src / string.c
blob42f0353f2b7a1b404c10a53443bbbc9fe97dfdf6
1 /*
2 ** string.c - String class
3 **
4 ** See Copyright Notice in mruby.h
5 */
7 #include <ctype.h>
8 #ifndef SIZE_MAX
9 /* Some versions of VC++
10 * has SIZE_MAX in stdint.h
12 # include <limits.h>
13 #endif
14 #include <stddef.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "mruby.h"
18 #include "mruby/array.h"
19 #include "mruby/class.h"
20 #include "mruby/range.h"
21 #include "mruby/string.h"
22 #include "re.h"
24 const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
26 typedef struct mrb_shared_string {
27 mrb_bool nofree;
28 int refcnt;
29 char *ptr;
30 mrb_int len;
31 } mrb_shared_string;
33 #define MRB_STR_SHARED 1
34 #define MRB_STR_NOFREE 2
36 static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
37 static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
39 #define RESIZE_CAPA(s,capacity) do {\
40 s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
41 s->aux.capa = capacity;\
42 } while(0)
44 static void
45 str_decref(mrb_state *mrb, mrb_shared_string *shared)
47 shared->refcnt--;
48 if (shared->refcnt == 0) {
49 if (!shared->nofree) {
50 mrb_free(mrb, shared->ptr);
52 mrb_free(mrb, shared);
56 void
57 mrb_str_modify(mrb_state *mrb, struct RString *s)
59 if (s->flags & MRB_STR_SHARED) {
60 mrb_shared_string *shared = s->aux.shared;
62 if (shared->refcnt == 1 && s->ptr == shared->ptr) {
63 s->ptr = shared->ptr;
64 s->aux.capa = shared->len;
65 s->ptr[s->len] = '\0';
66 mrb_free(mrb, shared);
68 else {
69 char *ptr, *p;
70 mrb_int len;
72 p = s->ptr;
73 len = s->len;
74 ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
75 if (p) {
76 memcpy(ptr, p, len);
78 ptr[len] = '\0';
79 s->ptr = ptr;
80 s->aux.capa = len;
81 str_decref(mrb, shared);
83 s->flags &= ~MRB_STR_SHARED;
84 return;
86 if (s->flags & MRB_STR_NOFREE) {
87 char *p = s->ptr;
89 s->ptr = (char *)mrb_malloc(mrb, (size_t)s->len+1);
90 if (p) {
91 memcpy(s->ptr, p, s->len);
93 s->ptr[s->len] = '\0';
94 s->aux.capa = s->len;
95 return;
99 mrb_value
100 mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
102 int slen;
103 struct RString *s = mrb_str_ptr(str);
105 mrb_str_modify(mrb, s);
106 slen = s->len;
107 if (len != slen) {
108 if (slen < len || slen - len > 256) {
109 RESIZE_CAPA(s, len);
111 s->len = len;
112 s->ptr[len] = '\0'; /* sentinel */
114 return str;
117 static inline void
118 str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
120 struct RString *s = mrb_str_ptr(str);
122 if (s->ptr != p || s->len != len) {
123 mrb_raise(mrb, E_RUNTIME_ERROR, "string modified");
127 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
129 /* char offset to byte offset */
131 mrb_str_offset(mrb_state *mrb, mrb_value str, int pos)
133 return pos;
136 static struct RString*
137 str_new(mrb_state *mrb, const char *p, mrb_int len)
139 struct RString *s;
141 s = mrb_obj_alloc_string(mrb);
142 s->len = len;
143 s->aux.capa = len;
144 s->ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
145 if (p) {
146 memcpy(s->ptr, p, len);
148 s->ptr[len] = '\0';
149 return s;
152 void
153 str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
155 s->c = mrb_str_ptr(obj)->c;
158 static mrb_value
159 mrb_str_new_empty(mrb_state *mrb, mrb_value str)
161 struct RString *s = str_new(mrb, 0, 0);
163 str_with_class(mrb, s, str);
164 return mrb_obj_value(s);
167 #ifndef MRB_STR_BUF_MIN_SIZE
168 # define MRB_STR_BUF_MIN_SIZE 128
169 #endif
171 mrb_value
172 mrb_str_buf_new(mrb_state *mrb, mrb_int capa)
174 struct RString *s;
176 s = mrb_obj_alloc_string(mrb);
178 if (capa < MRB_STR_BUF_MIN_SIZE) {
179 capa = MRB_STR_BUF_MIN_SIZE;
181 s->len = 0;
182 s->aux.capa = capa;
183 s->ptr = (char *)mrb_malloc(mrb, capa+1);
184 s->ptr[0] = '\0';
186 return mrb_obj_value(s);
189 static void
190 str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
192 mrb_int capa;
193 mrb_int total;
194 ptrdiff_t off = -1;
196 mrb_str_modify(mrb, s);
197 if (ptr >= s->ptr && ptr <= s->ptr + s->len) {
198 off = ptr - s->ptr;
200 if (len == 0) return;
201 capa = s->aux.capa;
202 if (s->len >= MRB_INT_MAX - (mrb_int)len) {
203 mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big");
205 total = s->len+len;
206 if (capa <= total) {
207 while (total > capa) {
208 if (capa + 1 >= MRB_INT_MAX / 2) {
209 capa = (total + 4095) / 4096;
210 break;
212 capa = (capa + 1) * 2;
214 RESIZE_CAPA(s, capa);
216 if (off != -1) {
217 ptr = s->ptr + off;
219 memcpy(s->ptr + s->len, ptr, len);
220 s->len = total;
221 s->ptr[total] = '\0'; /* sentinel */
224 mrb_value
225 mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
227 if (len == 0) return str;
228 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
229 return str;
232 mrb_value
233 mrb_str_new(mrb_state *mrb, const char *p, size_t len)
235 struct RString *s;
237 s = str_new(mrb, p, len);
238 return mrb_obj_value(s);
242 * call-seq: (Caution! NULL string)
243 * String.new(str="") => new_str
245 * Returns a new string object containing a copy of <i>str</i>.
248 mrb_value
249 mrb_str_new_cstr(mrb_state *mrb, const char *p)
251 struct RString *s;
252 size_t len;
254 if (p) {
255 len = strlen(p);
256 if ((mrb_int)len < 0) {
257 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
260 else {
261 len = 0;
264 s = str_new(mrb, p, len);
266 return mrb_obj_value(s);
269 mrb_value
270 mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
272 struct RString *s;
274 s = mrb_obj_alloc_string(mrb);
275 s->len = len;
276 s->aux.capa = 0; /* nofree */
277 s->ptr = (char *)p;
278 s->flags = MRB_STR_NOFREE;
279 return mrb_obj_value(s);
282 void
283 mrb_gc_free_str(mrb_state *mrb, struct RString *str)
285 if (str->flags & MRB_STR_SHARED)
286 str_decref(mrb, str->aux.shared);
287 else if ((str->flags & MRB_STR_NOFREE) == 0)
288 mrb_free(mrb, str->ptr);
291 char *
292 mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
294 struct RString *s;
296 if (!mrb_string_p(str0)) {
297 mrb_raise(mrb, E_TYPE_ERROR, "expected String");
300 s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
301 if ((strlen(s->ptr) ^ s->len) != 0) {
302 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
304 return s->ptr;
307 static void
308 str_make_shared(mrb_state *mrb, struct RString *s)
310 if (!(s->flags & MRB_STR_SHARED)) {
311 mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
313 shared->refcnt = 1;
314 if (s->flags & MRB_STR_NOFREE) {
315 shared->nofree = TRUE;
316 shared->ptr = s->ptr;
317 s->flags &= ~MRB_STR_NOFREE;
319 else {
320 shared->nofree = FALSE;
321 if (s->aux.capa > s->len) {
322 s->ptr = shared->ptr = (char *)mrb_realloc(mrb, s->ptr, s->len+1);
324 else {
325 shared->ptr = s->ptr;
328 shared->len = s->len;
329 s->aux.shared = shared;
330 s->flags |= MRB_STR_SHARED;
335 * call-seq:
336 * char* str = String("abcd"), len=strlen("abcd")
338 * Returns a new string object containing a copy of <i>str</i>.
340 const char*
341 mrb_str_body(mrb_value str, int *len_p)
343 struct RString *s = mrb_str_ptr(str);
345 *len_p = s->len;
346 return s->ptr;
350 * call-seq: (Caution! String("abcd") change)
351 * String("abcdefg") = String("abcd") + String("efg")
353 * Returns a new string object containing a copy of <i>str</i>.
355 void
356 mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
358 struct RString *s1 = mrb_str_ptr(self), *s2;
359 mrb_int len;
361 mrb_str_modify(mrb, s1);
362 if (!mrb_string_p(other)) {
363 other = mrb_str_to_str(mrb, other);
365 s2 = mrb_str_ptr(other);
366 len = s1->len + s2->len;
368 if (s1->aux.capa < len) {
369 s1->aux.capa = len;
370 s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, len+1);
372 memcpy(s1->ptr+s1->len, s2->ptr, s2->len);
373 s1->len = len;
374 s1->ptr[len] = '\0';
378 * call-seq: (Caution! String("abcd") remain)
379 * String("abcdefg") = String("abcd") + String("efg")
381 * Returns a new string object containing a copy of <i>str</i>.
383 mrb_value
384 mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
386 struct RString *s = mrb_str_ptr(a);
387 struct RString *s2 = mrb_str_ptr(b);
388 struct RString *t;
390 t = str_new(mrb, 0, s->len + s2->len);
391 memcpy(t->ptr, s->ptr, s->len);
392 memcpy(t->ptr + s->len, s2->ptr, s2->len);
394 return mrb_obj_value(t);
397 /* 15.2.10.5.2 */
400 * call-seq: (Caution! String("abcd") remain) for stack_argument
401 * String("abcdefg") = String("abcd") + String("efg")
403 * Returns a new string object containing a copy of <i>str</i>.
405 static mrb_value
406 mrb_str_plus_m(mrb_state *mrb, mrb_value self)
408 mrb_value str;
410 mrb_get_args(mrb, "S", &str);
411 return mrb_str_plus(mrb, self, str);
415 * call-seq:
416 * len = strlen(String("abcd"))
418 * Returns a new string object containing a copy of <i>str</i>.
420 static mrb_value
421 mrb_str_bytesize(mrb_state *mrb, mrb_value self)
423 struct RString *s = mrb_str_ptr(self);
424 return mrb_fixnum_value(s->len);
427 /* 15.2.10.5.26 */
428 /* 15.2.10.5.33 */
430 * call-seq:
431 * len = strlen(String("abcd"))
433 * Returns a new string object containing a copy of <i>str</i>.
435 mrb_value
436 mrb_str_size(mrb_state *mrb, mrb_value self)
438 struct RString *s = mrb_str_ptr(self);
439 return mrb_fixnum_value(s->len);
442 /* 15.2.10.5.1 */
444 * call-seq:
445 * str * integer => new_str
447 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
448 * the receiver.
450 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
452 static mrb_value
453 mrb_str_times(mrb_state *mrb, mrb_value self)
455 mrb_int n,len,times;
456 struct RString *str2;
457 char *p;
459 mrb_get_args(mrb, "i", &times);
460 if (times < 0) {
461 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
463 if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
464 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
467 len = RSTRING_LEN(self)*times;
468 str2 = str_new(mrb, 0, len);
469 str_with_class(mrb, str2, self);
470 p = str2->ptr;
471 if (len > 0) {
472 n = RSTRING_LEN(self);
473 memcpy(p, RSTRING_PTR(self), n);
474 while (n <= len/2) {
475 memcpy(p + n, p, n);
476 n *= 2;
478 memcpy(p + n, p, len-n);
480 p[str2->len] = '\0';
482 return mrb_obj_value(str2);
484 /* -------------------------------------------------------------- */
486 #define lesser(a,b) (((a)>(b))?(b):(a))
488 /* ---------------------------*/
490 * call-seq:
491 * mrb_value str1 <=> mrb_value str2 => int
492 * > 1
493 * = 0
494 * < -1
497 mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
499 mrb_int len;
500 mrb_int retval;
501 struct RString *s1 = mrb_str_ptr(str1);
502 struct RString *s2 = mrb_str_ptr(str2);
504 len = lesser(s1->len, s2->len);
505 retval = memcmp(s1->ptr, s2->ptr, len);
506 if (retval == 0) {
507 if (s1->len == s2->len) return 0;
508 if (s1->len > s2->len) return 1;
509 return -1;
511 if (retval > 0) return 1;
512 return -1;
515 /* 15.2.10.5.3 */
518 * call-seq:
519 * str <=> other_str => -1, 0, +1
521 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
522 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
523 * <i>str</i>. If the strings are of different lengths, and the strings are
524 * equal when compared up to the shortest length, then the longer string is
525 * considered greater than the shorter one. If the variable <code>$=</code> is
526 * <code>false</code>, the comparison is based on comparing the binary values
527 * of each character in the string. In older versions of Ruby, setting
528 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
529 * in favor of using <code>String#casecmp</code>.
531 * <code><=></code> is the basis for the methods <code><</code>,
532 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
533 * included from module <code>Comparable</code>. The method
534 * <code>String#==</code> does not use <code>Comparable#==</code>.
536 * "abcdef" <=> "abcde" #=> 1
537 * "abcdef" <=> "abcdef" #=> 0
538 * "abcdef" <=> "abcdefg" #=> -1
539 * "abcdef" <=> "ABCDEF" #=> 1
541 static mrb_value
542 mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
544 mrb_value str2;
545 mrb_int result;
547 mrb_get_args(mrb, "o", &str2);
548 if (!mrb_string_p(str2)) {
549 if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_s", 4))) {
550 return mrb_nil_value();
552 else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>", 3))) {
553 return mrb_nil_value();
555 else {
556 mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
558 if (mrb_nil_p(tmp)) return mrb_nil_value();
559 if (!mrb_fixnum(tmp)) {
560 return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
562 result = -mrb_fixnum(tmp);
565 else {
566 result = mrb_str_cmp(mrb, str1, str2);
568 return mrb_fixnum_value(result);
571 static mrb_bool
572 str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
574 const mrb_int len = RSTRING_LEN(str1);
576 if (len != RSTRING_LEN(str2)) return FALSE;
577 if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
578 return TRUE;
579 return FALSE;
582 mrb_bool
583 mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
585 if (mrb_obj_equal(mrb, str1, str2)) return TRUE;
586 if (!mrb_string_p(str2)) {
587 if (mrb_nil_p(str2)) return FALSE;
588 if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_str", 6))) {
589 return FALSE;
591 str2 = mrb_funcall(mrb, str2, "to_str", 0);
592 return mrb_equal(mrb, str2, str1);
594 return str_eql(mrb, str1, str2);
597 /* 15.2.10.5.4 */
599 * call-seq:
600 * str == obj => true or false
602 * Equality---
603 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
604 * Otherwise, returns <code>false</code> or <code>true</code>
606 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
608 static mrb_value
609 mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
611 mrb_value str2;
612 mrb_bool equal_p;
614 mrb_get_args(mrb, "o", &str2);
615 equal_p = mrb_str_equal(mrb, str1, str2);
617 return mrb_bool_value(equal_p);
619 /* ---------------------------------- */
620 mrb_value
621 mrb_str_to_str(mrb_state *mrb, mrb_value str)
623 mrb_value s;
625 if (!mrb_string_p(str)) {
626 s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
627 if (mrb_nil_p(s)) {
628 s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
630 return s;
632 return str;
635 char *
636 mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
638 mrb_value str = mrb_str_to_str(mrb, ptr);
639 return RSTRING_PTR(str);
642 static mrb_value
643 noregexp(mrb_state *mrb, mrb_value self)
645 mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
646 return mrb_nil_value();
649 static void
650 regexp_check(mrb_state *mrb, mrb_value obj)
652 if (!memcmp(mrb_obj_classname(mrb, obj), REGEXP_CLASS, sizeof(REGEXP_CLASS) - 1)) {
653 noregexp(mrb, obj);
657 static inline mrb_int
658 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
660 const unsigned char *x = xs, *xe = xs + m;
661 const unsigned char *y = ys;
662 int i, qstable[256];
664 /* Preprocessing */
665 for (i = 0; i < 256; ++i)
666 qstable[i] = m + 1;
667 for (; x < xe; ++x)
668 qstable[*x] = xe - x;
669 /* Searching */
670 for (; y + m <= ys + n; y += *(qstable + y[m])) {
671 if (*xs == *y && memcmp(xs, y, m) == 0)
672 return y - ys;
674 return -1;
677 static mrb_int
678 mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
680 const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
682 if (m > n) return -1;
683 else if (m == n) {
684 return memcmp(x0, y0, m) == 0 ? 0 : -1;
686 else if (m < 1) {
687 return 0;
689 else if (m == 1) {
690 const unsigned char *ys = y, *ye = ys + n;
691 for (; y < ye; ++y) {
692 if (*x == *y)
693 return y - ys;
695 return -1;
697 return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
700 static mrb_int
701 mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
703 mrb_int pos;
704 char *s, *sptr;
705 mrb_int len, slen;
707 len = RSTRING_LEN(str);
708 slen = RSTRING_LEN(sub);
709 if (offset < 0) {
710 offset += len;
711 if (offset < 0) return -1;
713 if (len - offset < slen) return -1;
714 s = RSTRING_PTR(str);
715 if (offset) {
716 s += offset;
718 if (slen == 0) return offset;
719 /* need proceed one character at a time */
720 sptr = RSTRING_PTR(sub);
721 slen = RSTRING_LEN(sub);
722 len = RSTRING_LEN(str) - offset;
723 pos = mrb_memsearch(sptr, slen, s, len);
724 if (pos < 0) return pos;
725 return pos + offset;
728 mrb_value
729 mrb_str_dup(mrb_state *mrb, mrb_value str)
731 /* should return shared string */
732 struct RString *s = mrb_str_ptr(str);
734 return mrb_str_new(mrb, s->ptr, s->len);
737 static mrb_value
738 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
740 mrb_int idx;
742 regexp_check(mrb, indx);
743 switch (mrb_type(indx)) {
744 case MRB_TT_FIXNUM:
745 idx = mrb_fixnum(indx);
747 num_index:
748 str = mrb_str_substr(mrb, str, idx, 1);
749 if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
750 return str;
752 case MRB_TT_STRING:
753 if (mrb_str_index(mrb, str, indx, 0) != -1)
754 return mrb_str_dup(mrb, indx);
755 return mrb_nil_value();
757 case MRB_TT_RANGE:
758 /* check if indx is Range */
760 mrb_int beg, len;
761 mrb_value tmp;
763 len = RSTRING_LEN(str);
764 if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
765 tmp = mrb_str_subseq(mrb, str, beg, len);
766 return tmp;
768 else {
769 return mrb_nil_value();
772 default:
773 idx = mrb_fixnum(indx);
774 goto num_index;
776 return mrb_nil_value(); /* not reached */
779 /* 15.2.10.5.6 */
780 /* 15.2.10.5.34 */
782 * call-seq:
783 * str[fixnum] => fixnum or nil
784 * str[fixnum, fixnum] => new_str or nil
785 * str[range] => new_str or nil
786 * str[regexp] => new_str or nil
787 * str[regexp, fixnum] => new_str or nil
788 * str[other_str] => new_str or nil
789 * str.slice(fixnum) => fixnum or nil
790 * str.slice(fixnum, fixnum) => new_str or nil
791 * str.slice(range) => new_str or nil
792 * str.slice(regexp) => new_str or nil
793 * str.slice(regexp, fixnum) => new_str or nil
794 * str.slice(other_str) => new_str or nil
796 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
797 * of the character at that position. If passed two <code>Fixnum</code>
798 * objects, returns a substring starting at the offset given by the first, and
799 * a length given by the second. If given a range, a substring containing
800 * characters at offsets given by the range is returned. In all three cases, if
801 * an offset is negative, it is counted from the end of <i>str</i>. Returns
802 * <code>nil</code> if the initial offset falls outside the string, the length
803 * is negative, or the beginning of the range is greater than the end.
805 * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
806 * returned. If a numeric parameter follows the regular expression, that
807 * component of the <code>MatchData</code> is returned instead. If a
808 * <code>String</code> is given, that string is returned if it occurs in
809 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
810 * match.
812 * a = "hello there"
813 * a[1] #=> 101(1.8.7) "e"(1.9.2)
814 * a[1,3] #=> "ell"
815 * a[1..3] #=> "ell"
816 * a[-3,2] #=> "er"
817 * a[-4..-2] #=> "her"
818 * a[12..-1] #=> nil
819 * a[-2..-4] #=> ""
820 * a[/[aeiou](.)\1/] #=> "ell"
821 * a[/[aeiou](.)\1/, 0] #=> "ell"
822 * a[/[aeiou](.)\1/, 1] #=> "l"
823 * a[/[aeiou](.)\1/, 2] #=> nil
824 * a["lo"] #=> "lo"
825 * a["bye"] #=> nil
827 static mrb_value
828 mrb_str_aref_m(mrb_state *mrb, mrb_value str)
830 mrb_value a1, a2;
831 int argc;
833 argc = mrb_get_args(mrb, "o|o", &a1, &a2);
834 if (argc == 2) {
835 regexp_check(mrb, a1);
836 return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
838 if (argc != 1) {
839 mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
841 return mrb_str_aref(mrb, str, a1);
844 /* 15.2.10.5.8 */
846 * call-seq:
847 * str.capitalize! => str or nil
849 * Modifies <i>str</i> by converting the first character to uppercase and the
850 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
852 * a = "hello"
853 * a.capitalize! #=> "Hello"
854 * a #=> "Hello"
855 * a.capitalize! #=> nil
857 static mrb_value
858 mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
860 char *p, *pend;
861 int modify = 0;
862 struct RString *s = mrb_str_ptr(str);
864 mrb_str_modify(mrb, s);
865 if (s->len == 0 || !s->ptr) return mrb_nil_value();
866 p = s->ptr; pend = s->ptr + s->len;
867 if (ISLOWER(*p)) {
868 *p = TOUPPER(*p);
869 modify = 1;
871 while (++p < pend) {
872 if (ISUPPER(*p)) {
873 *p = TOLOWER(*p);
874 modify = 1;
877 if (modify) return str;
878 return mrb_nil_value();
881 /* 15.2.10.5.7 */
883 * call-seq:
884 * str.capitalize => new_str
886 * Returns a copy of <i>str</i> with the first character converted to uppercase
887 * and the remainder to lowercase.
889 * "hello".capitalize #=> "Hello"
890 * "HELLO".capitalize #=> "Hello"
891 * "123ABC".capitalize #=> "123abc"
893 static mrb_value
894 mrb_str_capitalize(mrb_state *mrb, mrb_value self)
896 mrb_value str;
898 str = mrb_str_dup(mrb, self);
899 mrb_str_capitalize_bang(mrb, str);
900 return str;
903 /* 15.2.10.5.10 */
905 * call-seq:
906 * str.chomp!(separator=$/) => str or nil
908 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
909 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
911 static mrb_value
912 mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
914 mrb_value rs;
915 mrb_int newline;
916 char *p, *pp;
917 mrb_int rslen;
918 mrb_int len;
919 struct RString *s = mrb_str_ptr(str);
921 mrb_str_modify(mrb, s);
922 len = s->len;
923 if (mrb_get_args(mrb, "|S", &rs) == 0) {
924 if (len == 0) return mrb_nil_value();
925 smart_chomp:
926 if (s->ptr[len-1] == '\n') {
927 s->len--;
928 if (s->len > 0 &&
929 s->ptr[s->len-1] == '\r') {
930 s->len--;
933 else if (s->ptr[len-1] == '\r') {
934 s->len--;
936 else {
937 return mrb_nil_value();
939 s->ptr[s->len] = '\0';
940 return str;
943 if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
944 p = s->ptr;
945 rslen = RSTRING_LEN(rs);
946 if (rslen == 0) {
947 while (len>0 && p[len-1] == '\n') {
948 len--;
949 if (len>0 && p[len-1] == '\r')
950 len--;
952 if (len < s->len) {
953 s->len = len;
954 p[len] = '\0';
955 return str;
957 return mrb_nil_value();
959 if (rslen > len) return mrb_nil_value();
960 newline = RSTRING_PTR(rs)[rslen-1];
961 if (rslen == 1 && newline == '\n')
962 newline = RSTRING_PTR(rs)[rslen-1];
963 if (rslen == 1 && newline == '\n')
964 goto smart_chomp;
966 pp = p + len - rslen;
967 if (p[len-1] == newline &&
968 (rslen <= 1 ||
969 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
970 s->len = len - rslen;
971 p[s->len] = '\0';
972 return str;
974 return mrb_nil_value();
977 /* 15.2.10.5.9 */
979 * call-seq:
980 * str.chomp(separator=$/) => new_str
982 * Returns a new <code>String</code> with the given record separator removed
983 * from the end of <i>str</i> (if present). If <code>$/</code> has not been
984 * changed from the default Ruby record separator, then <code>chomp</code> also
985 * removes carriage return characters (that is it will remove <code>\n</code>,
986 * <code>\r</code>, and <code>\r\n</code>).
988 * "hello".chomp #=> "hello"
989 * "hello\n".chomp #=> "hello"
990 * "hello\r\n".chomp #=> "hello"
991 * "hello\n\r".chomp #=> "hello\n"
992 * "hello\r".chomp #=> "hello"
993 * "hello \n there".chomp #=> "hello \n there"
994 * "hello".chomp("llo") #=> "he"
996 static mrb_value
997 mrb_str_chomp(mrb_state *mrb, mrb_value self)
999 mrb_value str;
1001 str = mrb_str_dup(mrb, self);
1002 mrb_str_chomp_bang(mrb, str);
1003 return str;
1006 /* 15.2.10.5.12 */
1008 * call-seq:
1009 * str.chop! => str or nil
1011 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
1012 * or <code>nil</code> if <i>str</i> is the empty string. See also
1013 * <code>String#chomp!</code>.
1015 static mrb_value
1016 mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
1018 struct RString *s = mrb_str_ptr(str);
1020 mrb_str_modify(mrb, s);
1021 if (s->len > 0) {
1022 int len;
1023 len = s->len - 1;
1024 if (s->ptr[len] == '\n') {
1025 if (len > 0 &&
1026 s->ptr[len-1] == '\r') {
1027 len--;
1030 s->len = len;
1031 s->ptr[len] = '\0';
1032 return str;
1034 return mrb_nil_value();
1037 /* 15.2.10.5.11 */
1039 * call-seq:
1040 * str.chop => new_str
1042 * Returns a new <code>String</code> with the last character removed. If the
1043 * string ends with <code>\r\n</code>, both characters are removed. Applying
1044 * <code>chop</code> to an empty string returns an empty
1045 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1046 * the string unchanged if it doesn't end in a record separator.
1048 * "string\r\n".chop #=> "string"
1049 * "string\n\r".chop #=> "string\n"
1050 * "string\n".chop #=> "string"
1051 * "string".chop #=> "strin"
1052 * "x".chop #=> ""
1054 static mrb_value
1055 mrb_str_chop(mrb_state *mrb, mrb_value self)
1057 mrb_value str;
1058 str = mrb_str_dup(mrb, self);
1059 mrb_str_chop_bang(mrb, str);
1060 return str;
1063 /* 15.2.10.5.14 */
1065 * call-seq:
1066 * str.downcase! => str or nil
1068 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1069 * changes were made.
1071 static mrb_value
1072 mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
1074 char *p, *pend;
1075 int modify = 0;
1076 struct RString *s = mrb_str_ptr(str);
1078 mrb_str_modify(mrb, s);
1079 p = s->ptr;
1080 pend = s->ptr + s->len;
1081 while (p < pend) {
1082 if (ISUPPER(*p)) {
1083 *p = TOLOWER(*p);
1084 modify = 1;
1086 p++;
1089 if (modify) return str;
1090 return mrb_nil_value();
1093 /* 15.2.10.5.13 */
1095 * call-seq:
1096 * str.downcase => new_str
1098 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1099 * lowercase counterparts. The operation is locale insensitive---only
1100 * characters ``A'' to ``Z'' are affected.
1102 * "hEllO".downcase #=> "hello"
1104 static mrb_value
1105 mrb_str_downcase(mrb_state *mrb, mrb_value self)
1107 mrb_value str;
1109 str = mrb_str_dup(mrb, self);
1110 mrb_str_downcase_bang(mrb, str);
1111 return str;
1114 /* 15.2.10.5.16 */
1116 * call-seq:
1117 * str.empty? => true or false
1119 * Returns <code>true</code> if <i>str</i> has a length of zero.
1121 * "hello".empty? #=> false
1122 * "".empty? #=> true
1124 static mrb_value
1125 mrb_str_empty_p(mrb_state *mrb, mrb_value self)
1127 struct RString *s = mrb_str_ptr(self);
1129 return mrb_bool_value(s->len == 0);
1132 /* 15.2.10.5.17 */
1134 * call-seq:
1135 * str.eql?(other) => true or false
1137 * Two strings are equal if the have the same length and content.
1139 static mrb_value
1140 mrb_str_eql(mrb_state *mrb, mrb_value self)
1142 mrb_value str2;
1143 mrb_bool eql_p;
1145 mrb_get_args(mrb, "o", &str2);
1146 eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
1148 return mrb_bool_value(eql_p);
1151 static mrb_value
1152 mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1154 struct RString *orig, *s;
1155 mrb_shared_string *shared;
1157 orig = mrb_str_ptr(str);
1158 str_make_shared(mrb, orig);
1159 shared = orig->aux.shared;
1160 s = mrb_obj_alloc_string(mrb);
1161 s->ptr = orig->ptr + beg;
1162 s->len = len;
1163 s->aux.shared = shared;
1164 s->flags |= MRB_STR_SHARED;
1165 shared->refcnt++;
1167 return mrb_obj_value(s);
1170 mrb_value
1171 mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1173 mrb_value str2;
1175 if (len < 0) return mrb_nil_value();
1176 if (!RSTRING_LEN(str)) {
1177 len = 0;
1179 if (beg > RSTRING_LEN(str)) return mrb_nil_value();
1180 if (beg < 0) {
1181 beg += RSTRING_LEN(str);
1182 if (beg < 0) return mrb_nil_value();
1184 if (beg + len > RSTRING_LEN(str))
1185 len = RSTRING_LEN(str) - beg;
1186 if (len <= 0) {
1187 len = 0;
1189 str2 = mrb_str_subseq(mrb, str, beg, len);
1191 return str2;
1194 mrb_value
1195 mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
1197 mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
1198 return str;
1201 mrb_int
1202 mrb_str_hash(mrb_state *mrb, mrb_value str)
1204 /* 1-8-7 */
1205 struct RString *s = mrb_str_ptr(str);
1206 mrb_int len = s->len;
1207 char *p = s->ptr;
1208 mrb_int key = 0;
1210 while (len--) {
1211 key = key*65599 + *p;
1212 p++;
1214 key = key + (key>>5);
1215 return key;
1218 /* 15.2.10.5.20 */
1220 * call-seq:
1221 * str.hash => fixnum
1223 * Return a hash based on the string's length and content.
1225 static mrb_value
1226 mrb_str_hash_m(mrb_state *mrb, mrb_value self)
1228 mrb_int key = mrb_str_hash(mrb, self);
1229 return mrb_fixnum_value(key);
1232 /* 15.2.10.5.21 */
1234 * call-seq:
1235 * str.include? other_str => true or false
1236 * str.include? fixnum => true or false
1238 * Returns <code>true</code> if <i>str</i> contains the given string or
1239 * character.
1241 * "hello".include? "lo" #=> true
1242 * "hello".include? "ol" #=> false
1243 * "hello".include? ?h #=> true
1245 static mrb_value
1246 mrb_str_include(mrb_state *mrb, mrb_value self)
1248 mrb_int i;
1249 mrb_value str2;
1250 mrb_bool include_p;
1252 mrb_get_args(mrb, "o", &str2);
1253 if (mrb_type(str2) == MRB_TT_FIXNUM) {
1254 include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL);
1256 else {
1257 str2 = mrb_str_to_str(mrb, str2);
1258 i = mrb_str_index(mrb, self, str2, 0);
1260 include_p = (i != -1);
1263 return mrb_bool_value(include_p);
1266 /* 15.2.10.5.22 */
1268 * call-seq:
1269 * str.index(substring [, offset]) => fixnum or nil
1270 * str.index(fixnum [, offset]) => fixnum or nil
1271 * str.index(regexp [, offset]) => fixnum or nil
1273 * Returns the index of the first occurrence of the given
1274 * <i>substring</i>,
1275 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1276 * Returns
1277 * <code>nil</code> if not found.
1278 * If the second parameter is present, it
1279 * specifies the position in the string to begin the search.
1281 * "hello".index('e') #=> 1
1282 * "hello".index('lo') #=> 3
1283 * "hello".index('a') #=> nil
1284 * "hello".index(101) #=> 1(101=0x65='e')
1285 * "hello".index(/[aeiou]/, -3) #=> 4
1287 static mrb_value
1288 mrb_str_index_m(mrb_state *mrb, mrb_value str)
1290 mrb_value *argv;
1291 int argc;
1293 mrb_value sub;
1294 mrb_int pos;
1296 mrb_get_args(mrb, "*", &argv, &argc);
1297 if (argc == 2) {
1298 pos = mrb_fixnum(argv[1]);
1299 sub = argv[0];
1301 else {
1302 pos = 0;
1303 if (argc > 0)
1304 sub = argv[0];
1305 else
1306 sub = mrb_nil_value();
1309 regexp_check(mrb, sub);
1310 if (pos < 0) {
1311 pos += RSTRING_LEN(str);
1312 if (pos < 0) {
1313 return mrb_nil_value();
1317 switch (mrb_type(sub)) {
1318 case MRB_TT_FIXNUM: {
1319 int c = mrb_fixnum(sub);
1320 mrb_int len = RSTRING_LEN(str);
1321 unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1323 for (;pos<len;pos++) {
1324 if (p[pos] == c) return mrb_fixnum_value(pos);
1326 return mrb_nil_value();
1329 default: {
1330 mrb_value tmp;
1332 tmp = mrb_check_string_type(mrb, sub);
1333 if (mrb_nil_p(tmp)) {
1334 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1336 sub = tmp;
1338 /* fall through */
1339 case MRB_TT_STRING:
1340 pos = mrb_str_index(mrb, str, sub, pos);
1341 break;
1344 if (pos == -1) return mrb_nil_value();
1345 return mrb_fixnum_value(pos);
1348 #define STR_REPLACE_SHARED_MIN 10
1350 static mrb_value
1351 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
1353 if (s2->flags & MRB_STR_SHARED) {
1354 L_SHARE:
1355 if (s1->flags & MRB_STR_SHARED){
1356 str_decref(mrb, s1->aux.shared);
1358 else {
1359 mrb_free(mrb, s1->ptr);
1361 s1->ptr = s2->ptr;
1362 s1->len = s2->len;
1363 s1->aux.shared = s2->aux.shared;
1364 s1->flags |= MRB_STR_SHARED;
1365 s1->aux.shared->refcnt++;
1367 else if (s2->len > STR_REPLACE_SHARED_MIN) {
1368 str_make_shared(mrb, s2);
1369 goto L_SHARE;
1371 else {
1372 if (s1->flags & MRB_STR_SHARED) {
1373 str_decref(mrb, s1->aux.shared);
1374 s1->flags &= ~MRB_STR_SHARED;
1375 s1->ptr = (char *)mrb_malloc(mrb, s2->len+1);
1377 else {
1378 s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, s2->len+1);
1380 memcpy(s1->ptr, s2->ptr, s2->len);
1381 s1->ptr[s2->len] = 0;
1382 s1->len = s2->len;
1383 s1->aux.capa = s2->len;
1385 return mrb_obj_value(s1);
1388 /* 15.2.10.5.24 */
1389 /* 15.2.10.5.28 */
1391 * call-seq:
1392 * str.replace(other_str) => str
1394 * s = "hello" #=> "hello"
1395 * s.replace "world" #=> "world"
1397 static mrb_value
1398 mrb_str_replace(mrb_state *mrb, mrb_value str)
1400 mrb_value str2;
1402 mrb_get_args(mrb, "S", &str2);
1403 return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
1406 /* 15.2.10.5.23 */
1408 * call-seq:
1409 * String.new(str="") => new_str
1411 * Returns a new string object containing a copy of <i>str</i>.
1413 static mrb_value
1414 mrb_str_init(mrb_state *mrb, mrb_value self)
1416 mrb_value str2;
1418 if (mrb_get_args(mrb, "|S", &str2) == 1) {
1419 str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
1421 return self;
1424 /* 15.2.10.5.25 */
1425 /* 15.2.10.5.41 */
1427 * call-seq:
1428 * str.intern => symbol
1429 * str.to_sym => symbol
1431 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1432 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1434 * "Koala".intern #=> :Koala
1435 * s = 'cat'.to_sym #=> :cat
1436 * s == :cat #=> true
1437 * s = '@cat'.to_sym #=> :@cat
1438 * s == :@cat #=> true
1440 * This can also be used to create symbols that cannot be represented using the
1441 * <code>:xxx</code> notation.
1443 * 'cat and dog'.to_sym #=> :"cat and dog"
1445 mrb_value
1446 mrb_str_intern(mrb_state *mrb, mrb_value self)
1448 mrb_sym id;
1450 id = mrb_intern_str(mrb, self);
1451 return mrb_symbol_value(id);
1454 /* ---------------------------------- */
1455 mrb_value
1456 mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
1458 mrb_value str;
1460 if (mrb_string_p(obj)) {
1461 return obj;
1463 str = mrb_funcall(mrb, obj, "to_s", 0);
1464 if (!mrb_string_p(str))
1465 return mrb_any_to_s(mrb, obj);
1466 return str;
1469 mrb_value
1470 mrb_ptr_to_str(mrb_state *mrb, void *p)
1472 struct RString *p_str;
1473 char *p1;
1474 char *p2;
1475 uintptr_t n = (uintptr_t)p;
1477 p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
1478 p1 = p_str->ptr;
1479 *p1++ = '0';
1480 *p1++ = 'x';
1481 p2 = p1;
1483 do {
1484 *p2++ = mrb_digitmap[n % 16];
1485 n /= 16;
1486 } while (n > 0);
1487 *p2 = '\0';
1488 p_str->len = (mrb_int)(p2 - p_str->ptr);
1490 while (p1 < p2) {
1491 const char c = *p1;
1492 *p1++ = *--p2;
1493 *p2 = c;
1496 return mrb_obj_value(p_str);
1499 mrb_value
1500 mrb_string_type(mrb_state *mrb, mrb_value str)
1502 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1505 mrb_value
1506 mrb_check_string_type(mrb_state *mrb, mrb_value str)
1508 return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1511 /* ---------------------------------- */
1512 /* 15.2.10.5.29 */
1514 * call-seq:
1515 * str.reverse => new_str
1517 * Returns a new string with the characters from <i>str</i> in reverse order.
1519 * "stressed".reverse #=> "desserts"
1521 static mrb_value
1522 mrb_str_reverse(mrb_state *mrb, mrb_value str)
1524 struct RString *s2;
1525 char *s, *e, *p;
1527 if (RSTRING(str)->len <= 1) return mrb_str_dup(mrb, str);
1529 s2 = str_new(mrb, 0, RSTRING(str)->len);
1530 str_with_class(mrb, s2, str);
1531 s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
1532 p = s2->ptr;
1534 while (e >= s) {
1535 *p++ = *e--;
1537 return mrb_obj_value(s2);
1540 /* 15.2.10.5.30 */
1542 * call-seq:
1543 * str.reverse! => str
1545 * Reverses <i>str</i> in place.
1547 static mrb_value
1548 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
1550 struct RString *s = mrb_str_ptr(str);
1551 char *p, *e;
1552 char c;
1554 mrb_str_modify(mrb, s);
1555 if (s->len > 1) {
1556 p = s->ptr;
1557 e = p + s->len - 1;
1558 while (p < e) {
1559 c = *p;
1560 *p++ = *e;
1561 *e-- = c;
1564 return str;
1568 * call-seq:
1569 * str.rindex(substring [, fixnum]) => fixnum or nil
1570 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1571 * str.rindex(regexp [, fixnum]) => fixnum or nil
1573 * Returns the index of the last occurrence of the given <i>substring</i>,
1574 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1575 * <code>nil</code> if not found. If the second parameter is present, it
1576 * specifies the position in the string to end the search---characters beyond
1577 * this point will not be considered.
1579 * "hello".rindex('e') #=> 1
1580 * "hello".rindex('l') #=> 3
1581 * "hello".rindex('a') #=> nil
1582 * "hello".rindex(101) #=> 1
1583 * "hello".rindex(/[aeiou]/, -2) #=> 1
1585 static mrb_int
1586 mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
1588 char *s, *sbeg, *t;
1589 struct RString *ps = mrb_str_ptr(str);
1590 struct RString *psub = mrb_str_ptr(sub);
1591 mrb_int len = psub->len;
1593 /* substring longer than string */
1594 if (ps->len < len) return -1;
1595 if (ps->len - pos < len) {
1596 pos = ps->len - len;
1598 sbeg = ps->ptr;
1599 s = ps->ptr + pos;
1600 t = psub->ptr;
1601 if (len) {
1602 while (sbeg <= s) {
1603 if (memcmp(s, t, len) == 0) {
1604 return s - ps->ptr;
1606 s--;
1608 return -1;
1610 else {
1611 return pos;
1615 /* 15.2.10.5.31 */
1617 * call-seq:
1618 * str.rindex(substring [, fixnum]) => fixnum or nil
1619 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1620 * str.rindex(regexp [, fixnum]) => fixnum or nil
1622 * Returns the index of the last occurrence of the given <i>substring</i>,
1623 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1624 * <code>nil</code> if not found. If the second parameter is present, it
1625 * specifies the position in the string to end the search---characters beyond
1626 * this point will not be considered.
1628 * "hello".rindex('e') #=> 1
1629 * "hello".rindex('l') #=> 3
1630 * "hello".rindex('a') #=> nil
1631 * "hello".rindex(101) #=> 1
1632 * "hello".rindex(/[aeiou]/, -2) #=> 1
1634 static mrb_value
1635 mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
1637 mrb_value *argv;
1638 int argc;
1639 mrb_value sub;
1640 mrb_value vpos;
1641 int pos, len = RSTRING_LEN(str);
1643 mrb_get_args(mrb, "*", &argv, &argc);
1644 if (argc == 2) {
1645 sub = argv[0];
1646 vpos = argv[1];
1647 pos = mrb_fixnum(vpos);
1648 if (pos < 0) {
1649 pos += len;
1650 if (pos < 0) {
1651 regexp_check(mrb, sub);
1652 return mrb_nil_value();
1655 if (pos > len) pos = len;
1657 else {
1658 pos = len;
1659 if (argc > 0)
1660 sub = argv[0];
1661 else
1662 sub = mrb_nil_value();
1664 regexp_check(mrb, sub);
1666 switch (mrb_type(sub)) {
1667 case MRB_TT_FIXNUM: {
1668 int c = mrb_fixnum(sub);
1669 mrb_int len = RSTRING_LEN(str);
1670 unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1672 for (pos=len;pos>=0;pos--) {
1673 if (p[pos] == c) return mrb_fixnum_value(pos);
1675 return mrb_nil_value();
1678 default: {
1679 mrb_value tmp;
1681 tmp = mrb_check_string_type(mrb, sub);
1682 if (mrb_nil_p(tmp)) {
1683 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1685 sub = tmp;
1687 /* fall through */
1688 case MRB_TT_STRING:
1689 pos = mrb_str_rindex(mrb, str, sub, pos);
1690 if (pos >= 0) return mrb_fixnum_value(pos);
1691 break;
1693 } /* end of switch (TYPE(sub)) */
1694 return mrb_nil_value();
1697 static const char isspacetable[256] = {
1698 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1700 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1701 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1702 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1705 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1707 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1710 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1716 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1718 /* 15.2.10.5.35 */
1721 * call-seq:
1722 * str.split(pattern=$;, [limit]) => anArray
1724 * Divides <i>str</i> into substrings based on a delimiter, returning an array
1725 * of these substrings.
1727 * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1728 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1729 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1730 * of contiguous whitespace characters ignored.
1732 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1733 * pattern matches. Whenever the pattern matches a zero-length string,
1734 * <i>str</i> is split into individual characters.
1736 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1737 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1738 * split on whitespace as if ` ' were specified.
1740 * If the <i>limit</i> parameter is omitted, trailing null fields are
1741 * suppressed. If <i>limit</i> is a positive number, at most that number of
1742 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1743 * string is returned as the only entry in an array). If negative, there is no
1744 * limit to the number of fields returned, and trailing null fields are not
1745 * suppressed.
1747 * " now's the time".split #=> ["now's", "the", "time"]
1748 * " now's the time".split(' ') #=> ["now's", "the", "time"]
1749 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1750 * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
1751 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1752 * "hello".split(//, 3) #=> ["h", "e", "llo"]
1753 * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
1755 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1756 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1757 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1758 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1761 static mrb_value
1762 mrb_str_split_m(mrb_state *mrb, mrb_value str)
1764 int argc;
1765 mrb_value spat = mrb_nil_value();
1766 enum {awk, string, regexp} split_type = string;
1767 long i = 0, lim_p;
1768 mrb_int beg;
1769 mrb_int end;
1770 mrb_int lim = 0;
1771 mrb_value result, tmp;
1773 argc = mrb_get_args(mrb, "|oi", &spat, &lim);
1774 lim_p = (lim > 0 && argc == 2);
1775 if (argc == 2) {
1776 if (lim == 1) {
1777 if (RSTRING_LEN(str) == 0)
1778 return mrb_ary_new_capa(mrb, 0);
1779 return mrb_ary_new_from_values(mrb, 1, &str);
1781 i = 1;
1784 if (argc == 0 || mrb_nil_p(spat)) {
1785 split_type = awk;
1787 else {
1788 if (mrb_string_p(spat)) {
1789 split_type = string;
1790 if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
1791 split_type = awk;
1794 else {
1795 noregexp(mrb, str);
1799 result = mrb_ary_new(mrb);
1800 beg = 0;
1801 if (split_type == awk) {
1802 char *ptr = RSTRING_PTR(str);
1803 char *eptr = RSTRING_END(str);
1804 char *bptr = ptr;
1805 int skip = 1;
1806 unsigned int c;
1808 end = beg;
1809 while (ptr < eptr) {
1810 int ai = mrb_gc_arena_save(mrb);
1811 c = (unsigned char)*ptr++;
1812 if (skip) {
1813 if (ascii_isspace(c)) {
1814 beg = ptr - bptr;
1816 else {
1817 end = ptr - bptr;
1818 skip = 0;
1819 if (lim_p && lim <= i) break;
1822 else if (ascii_isspace(c)) {
1823 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
1824 mrb_gc_arena_restore(mrb, ai);
1825 skip = 1;
1826 beg = ptr - bptr;
1827 if (lim_p) ++i;
1829 else {
1830 end = ptr - bptr;
1834 else if (split_type == string) {
1835 char *ptr = RSTRING_PTR(str);
1836 char *temp = ptr;
1837 char *eptr = RSTRING_END(str);
1838 mrb_int slen = RSTRING_LEN(spat);
1840 if (slen == 0) {
1841 int ai = mrb_gc_arena_save(mrb);
1842 while (ptr < eptr) {
1843 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
1844 mrb_gc_arena_restore(mrb, ai);
1845 ptr++;
1846 if (lim_p && lim <= ++i) break;
1849 else {
1850 char *sptr = RSTRING_PTR(spat);
1851 int ai = mrb_gc_arena_save(mrb);
1853 while (ptr < eptr &&
1854 (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
1855 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
1856 mrb_gc_arena_restore(mrb, ai);
1857 ptr += end + slen;
1858 if (lim_p && lim <= ++i) break;
1861 beg = ptr - temp;
1863 else {
1864 noregexp(mrb, str);
1866 if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
1867 if (RSTRING_LEN(str) == beg) {
1868 tmp = mrb_str_new_empty(mrb, str);
1870 else {
1871 tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
1873 mrb_ary_push(mrb, result, tmp);
1875 if (!lim_p && lim == 0) {
1876 mrb_int len;
1877 while ((len = RARRAY_LEN(result)) > 0 &&
1878 (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
1879 mrb_ary_pop(mrb, result);
1882 return result;
1885 mrb_value
1886 mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
1888 char *end;
1889 char sign = 1;
1890 int c;
1891 unsigned long n;
1892 mrb_int val;
1894 #undef ISDIGIT
1895 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1896 #define conv_digit(c) \
1897 (!ISASCII(c) ? -1 : \
1898 isdigit(c) ? ((c) - '0') : \
1899 islower(c) ? ((c) - 'a' + 10) : \
1900 isupper(c) ? ((c) - 'A' + 10) : \
1903 if (!str) {
1904 if (badcheck) goto bad;
1905 return mrb_fixnum_value(0);
1907 while (ISSPACE(*str)) str++;
1909 if (str[0] == '+') {
1910 str++;
1912 else if (str[0] == '-') {
1913 str++;
1914 sign = 0;
1916 if (str[0] == '+' || str[0] == '-') {
1917 if (badcheck) goto bad;
1918 return mrb_fixnum_value(0);
1920 if (base <= 0) {
1921 if (str[0] == '0') {
1922 switch (str[1]) {
1923 case 'x': case 'X':
1924 base = 16;
1925 break;
1926 case 'b': case 'B':
1927 base = 2;
1928 break;
1929 case 'o': case 'O':
1930 base = 8;
1931 break;
1932 case 'd': case 'D':
1933 base = 10;
1934 break;
1935 default:
1936 base = 8;
1939 else if (base < -1) {
1940 base = -base;
1942 else {
1943 base = 10;
1946 switch (base) {
1947 case 2:
1948 if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
1949 str += 2;
1951 break;
1952 case 3:
1953 break;
1954 case 8:
1955 if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
1956 str += 2;
1958 case 4: case 5: case 6: case 7:
1959 break;
1960 case 10:
1961 if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
1962 str += 2;
1964 case 9: case 11: case 12: case 13: case 14: case 15:
1965 break;
1966 case 16:
1967 if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
1968 str += 2;
1970 break;
1971 default:
1972 if (base < 2 || 36 < base) {
1973 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
1975 break;
1976 } /* end of switch (base) { */
1977 if (*str == '0') { /* squeeze preceeding 0s */
1978 int us = 0;
1979 while ((c = *++str) == '0' || c == '_') {
1980 if (c == '_') {
1981 if (++us >= 2)
1982 break;
1984 else
1985 us = 0;
1987 if (!(c = *str) || ISSPACE(c)) --str;
1989 c = *str;
1990 c = conv_digit(c);
1991 if (c < 0 || c >= base) {
1992 if (badcheck) goto bad;
1993 return mrb_fixnum_value(0);
1996 n = strtoul((char*)str, &end, base);
1997 if (n > MRB_INT_MAX) {
1998 mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str));
2000 val = n;
2001 if (badcheck) {
2002 if (end == str) goto bad; /* no number */
2003 while (*end && ISSPACE(*end)) end++;
2004 if (*end) goto bad; /* trailing garbage */
2007 return mrb_fixnum_value(sign ? val : -val);
2008 bad:
2009 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str));
2010 /* not reached */
2011 return mrb_fixnum_value(0);
2014 char *
2015 mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
2017 struct RString *ps = mrb_str_ptr(*ptr);
2018 char *s = ps->ptr;
2020 if (!s || ps->len != strlen(s)) {
2021 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
2023 return s;
2026 mrb_value
2027 mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
2029 char *s;
2030 int len;
2032 str = mrb_str_to_str(mrb, str);
2033 if (badcheck) {
2034 s = mrb_string_value_cstr(mrb, &str);
2036 else {
2037 s = RSTRING_PTR(str);
2039 if (s) {
2040 len = RSTRING_LEN(str);
2041 if (s[len]) { /* no sentinel somehow */
2042 struct RString *temp_str = str_new(mrb, s, len);
2043 s = temp_str->ptr;
2046 return mrb_cstr_to_inum(mrb, s, base, badcheck);
2049 /* 15.2.10.5.38 */
2051 * call-seq:
2052 * str.to_i(base=10) => integer
2054 * Returns the result of interpreting leading characters in <i>str</i> as an
2055 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2056 * end of a valid number are ignored. If there is not a valid number at the
2057 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2058 * exception.
2060 * "12345".to_i #=> 12345
2061 * "99 red balloons".to_i #=> 99
2062 * "0a".to_i #=> 0
2063 * "0a".to_i(16) #=> 10
2064 * "hello".to_i #=> 0
2065 * "1100101".to_i(2) #=> 101
2066 * "1100101".to_i(8) #=> 294977
2067 * "1100101".to_i(10) #=> 1100101
2068 * "1100101".to_i(16) #=> 17826049
2070 static mrb_value
2071 mrb_str_to_i(mrb_state *mrb, mrb_value self)
2073 mrb_value *argv;
2074 int argc;
2075 int base;
2077 mrb_get_args(mrb, "*", &argv, &argc);
2078 if (argc == 0)
2079 base = 10;
2080 else
2081 base = mrb_fixnum(argv[0]);
2083 if (base < 0) {
2084 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
2086 return mrb_str_to_inum(mrb, self, base, 0/*Qfalse*/);
2089 double
2090 mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck)
2092 char *end;
2093 double d;
2094 #if !defined(DBL_DIG)
2095 # define DBL_DIG 16
2096 #endif
2098 enum {max_width = 20};
2099 #define OutOfRange() (((w = end - p) > max_width) ? \
2100 (w = max_width, ellipsis = "...") : \
2101 (w = (int)(end - p), ellipsis = ""))
2103 if (!p) return 0.0;
2104 while (ISSPACE(*p)) p++;
2106 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2107 return 0.0;
2109 d = strtod(p, &end);
2110 if (p == end) {
2111 if (badcheck) {
2112 bad:
2113 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
2114 /* not reached */
2116 return d;
2118 if (*end) {
2119 char buf[DBL_DIG * 4 + 10];
2120 char *n = buf;
2121 char *e = buf + sizeof(buf) - 1;
2122 char prev = 0;
2124 while (p < end && n < e) prev = *n++ = *p++;
2125 while (*p) {
2126 if (*p == '_') {
2127 /* remove underscores between digits */
2128 if (badcheck) {
2129 if (n == buf || !ISDIGIT(prev)) goto bad;
2130 ++p;
2131 if (!ISDIGIT(*p)) goto bad;
2133 else {
2134 while (*++p == '_');
2135 continue;
2138 prev = *p++;
2139 if (n < e) *n++ = prev;
2141 *n = '\0';
2142 p = buf;
2144 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2145 return 0.0;
2148 d = strtod(p, &end);
2149 if (badcheck) {
2150 if (!end || p == end) goto bad;
2151 while (*end && ISSPACE(*end)) end++;
2152 if (*end) goto bad;
2155 return d;
2158 double
2159 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
2161 char *s;
2162 int len;
2164 str = mrb_str_to_str(mrb, str);
2165 s = RSTRING_PTR(str);
2166 len = RSTRING_LEN(str);
2167 if (s) {
2168 if (badcheck && memchr(s, '\0', len)) {
2169 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
2171 if (s[len]) { /* no sentinel somehow */
2172 struct RString *temp_str = str_new(mrb, s, len);
2173 s = temp_str->ptr;
2176 return mrb_cstr_to_dbl(mrb, s, badcheck);
2179 /* 15.2.10.5.39 */
2181 * call-seq:
2182 * str.to_f => float
2184 * Returns the result of interpreting leading characters in <i>str</i> as a
2185 * floating point number. Extraneous characters past the end of a valid number
2186 * are ignored. If there is not a valid number at the start of <i>str</i>,
2187 * <code>0.0</code> is returned. This method never raises an exception.
2189 * "123.45e1".to_f #=> 1234.5
2190 * "45.67 degrees".to_f #=> 45.67
2191 * "thx1138".to_f #=> 0.0
2193 static mrb_value
2194 mrb_str_to_f(mrb_state *mrb, mrb_value self)
2196 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, 0/*Qfalse*/));
2199 /* 15.2.10.5.40 */
2201 * call-seq:
2202 * str.to_s => str
2203 * str.to_str => str
2205 * Returns the receiver.
2207 static mrb_value
2208 mrb_str_to_s(mrb_state *mrb, mrb_value self)
2210 if (mrb_obj_class(mrb, self) != mrb->string_class) {
2211 return mrb_str_dup(mrb, self);
2213 return self;
2216 /* 15.2.10.5.43 */
2218 * call-seq:
2219 * str.upcase! => str or nil
2221 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2222 * were made.
2224 static mrb_value
2225 mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
2227 struct RString *s = mrb_str_ptr(str);
2228 char *p, *pend;
2229 int modify = 0;
2231 mrb_str_modify(mrb, s);
2232 p = RSTRING_PTR(str);
2233 pend = RSTRING_END(str);
2234 while (p < pend) {
2235 if (ISLOWER(*p)) {
2236 *p = TOUPPER(*p);
2237 modify = 1;
2239 p++;
2242 if (modify) return str;
2243 return mrb_nil_value();
2246 /* 15.2.10.5.42 */
2248 * call-seq:
2249 * str.upcase => new_str
2251 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2252 * uppercase counterparts. The operation is locale insensitive---only
2253 * characters ``a'' to ``z'' are affected.
2255 * "hEllO".upcase #=> "HELLO"
2257 static mrb_value
2258 mrb_str_upcase(mrb_state *mrb, mrb_value self)
2260 mrb_value str;
2262 str = mrb_str_dup(mrb, self);
2263 mrb_str_upcase_bang(mrb, str);
2264 return str;
2268 * call-seq:
2269 * str.dump -> new_str
2271 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2272 * <code>\nnn</code> notation and all special characters escaped.
2274 mrb_value
2275 mrb_str_dump(mrb_state *mrb, mrb_value str)
2277 mrb_int len;
2278 const char *p, *pend;
2279 char *q;
2280 struct RString *result;
2282 len = 2; /* "" */
2283 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2284 while (p < pend) {
2285 unsigned char c = *p++;
2286 switch (c) {
2287 case '"': case '\\':
2288 case '\n': case '\r':
2289 case '\t': case '\f':
2290 case '\013': case '\010': case '\007': case '\033':
2291 len += 2;
2292 break;
2294 case '#':
2295 len += IS_EVSTR(p, pend) ? 2 : 1;
2296 break;
2298 default:
2299 if (ISPRINT(c)) {
2300 len++;
2302 else {
2303 len += 4; /* \NNN */
2305 break;
2309 result = str_new(mrb, 0, len);
2310 str_with_class(mrb, result, str);
2311 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2312 q = result->ptr;
2314 *q++ = '"';
2315 while (p < pend) {
2316 unsigned char c = *p++;
2318 switch (c) {
2319 case '"':
2320 case '\\':
2321 *q++ = '\\';
2322 *q++ = c;
2323 break;
2325 case '\n':
2326 *q++ = '\\';
2327 *q++ = 'n';
2328 break;
2330 case '\r':
2331 *q++ = '\\';
2332 *q++ = 'r';
2333 break;
2335 case '\t':
2336 *q++ = '\\';
2337 *q++ = 't';
2338 break;
2340 case '\f':
2341 *q++ = '\\';
2342 *q++ = 'f';
2343 break;
2345 case '\013':
2346 *q++ = '\\';
2347 *q++ = 'v';
2348 break;
2350 case '\010':
2351 *q++ = '\\';
2352 *q++ = 'b';
2353 break;
2355 case '\007':
2356 *q++ = '\\';
2357 *q++ = 'a';
2358 break;
2360 case '\033':
2361 *q++ = '\\';
2362 *q++ = 'e';
2363 break;
2365 case '#':
2366 if (IS_EVSTR(p, pend)) *q++ = '\\';
2367 *q++ = '#';
2368 break;
2370 default:
2371 if (ISPRINT(c)) {
2372 *q++ = c;
2374 else {
2375 *q++ = '\\';
2376 q[2] = '0' + c % 8; c /= 8;
2377 q[1] = '0' + c % 8; c /= 8;
2378 q[0] = '0' + c % 8;
2379 q += 3;
2383 *q++ = '"';
2384 return mrb_obj_value(result);
2387 mrb_value
2388 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
2390 if ((mrb_int)len < 0) {
2391 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
2393 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
2394 return str;
2397 mrb_value
2398 mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
2400 return mrb_str_cat(mrb, str, ptr, strlen(ptr));
2403 mrb_value
2404 mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
2406 str2 = mrb_str_to_str(mrb, str2);
2407 return mrb_str_buf_append(mrb, str, str2);
2410 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2413 * call-seq:
2414 * str.inspect -> string
2416 * Returns a printable version of _str_, surrounded by quote marks,
2417 * with special characters escaped.
2419 * str = "hello"
2420 * str[3] = "\b"
2421 * str.inspect #=> "\"hel\\bo\""
2423 mrb_value
2424 mrb_str_inspect(mrb_state *mrb, mrb_value str)
2426 const char *p, *pend;
2427 char buf[CHAR_ESC_LEN + 1];
2428 mrb_value result = mrb_str_new(mrb, "\"", 1);
2430 p = RSTRING_PTR(str); pend = RSTRING_END(str);
2431 for (;p < pend; p++) {
2432 unsigned int c, cc;
2434 c = *p;
2435 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
2436 buf[0] = '\\'; buf[1] = c;
2437 mrb_str_buf_cat(mrb, result, buf, 2);
2438 continue;
2440 if (ISPRINT(c)) {
2441 buf[0] = c;
2442 mrb_str_buf_cat(mrb, result, buf, 1);
2443 continue;
2445 switch (c) {
2446 case '\n': cc = 'n'; break;
2447 case '\r': cc = 'r'; break;
2448 case '\t': cc = 't'; break;
2449 case '\f': cc = 'f'; break;
2450 case '\013': cc = 'v'; break;
2451 case '\010': cc = 'b'; break;
2452 case '\007': cc = 'a'; break;
2453 case 033: cc = 'e'; break;
2454 default: cc = 0; break;
2456 if (cc) {
2457 buf[0] = '\\';
2458 buf[1] = (char)cc;
2459 mrb_str_buf_cat(mrb, result, buf, 2);
2460 continue;
2462 else {
2463 buf[0] = '\\';
2464 buf[3] = '0' + c % 8; c /= 8;
2465 buf[2] = '0' + c % 8; c /= 8;
2466 buf[1] = '0' + c % 8;
2467 mrb_str_buf_cat(mrb, result, buf, 4);
2468 continue;
2471 mrb_str_buf_cat(mrb, result, "\"", 1);
2473 return result;
2477 * call-seq:
2478 * str.bytes -> array of fixnums
2480 * Returns an array of bytes in _str_.
2482 * str = "hello"
2483 * str.bytes #=> [104, 101, 108, 108, 111]
2485 static mrb_value
2486 mrb_str_bytes(mrb_state *mrb, mrb_value str)
2488 struct RString *s = mrb_str_ptr(str);
2489 mrb_value a = mrb_ary_new_capa(mrb, s->len);
2490 unsigned char *p = (unsigned char *)(s->ptr), *pend = p + s->len;
2492 while (p < pend) {
2493 mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
2494 p++;
2496 return a;
2499 /* ---------------------------*/
2500 void
2501 mrb_init_string(mrb_state *mrb)
2503 struct RClass *s;
2505 s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class);
2506 MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
2507 mrb_include_module(mrb, s, mrb_class_get(mrb, "Comparable"));
2510 mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
2512 mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2513 mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2514 mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2515 mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2516 mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2517 mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2518 mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
2519 mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2520 mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2521 mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
2522 mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
2523 mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2524 mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2525 mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2526 mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2528 mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
2529 mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2530 mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2531 mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2532 mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2533 mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2534 mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2535 mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2536 mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2537 mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2538 mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2539 mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2540 mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2541 mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2543 mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2544 mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2545 mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2546 mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
2547 mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2548 mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
2549 mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
2550 mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2551 mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());