Merge branch 'master' of github.com:mruby/mruby
[mruby.git] / src / string.c
blob7d0cdaf0d9bff29572bb7462e58dec47247b051b
1 /*
2 ** string.c - String class
3 **
4 ** See Copyright Notice in mruby.h
5 */
7 #include <ctype.h>
8 #include <limits.h>
9 #include <stddef.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include "mruby.h"
13 #include "mruby/array.h"
14 #include "mruby/class.h"
15 #include "mruby/range.h"
16 #include "mruby/string.h"
17 #include "re.h"
19 const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
21 typedef struct mrb_shared_string {
22 mrb_bool nofree;
23 int refcnt;
24 char *ptr;
25 mrb_int len;
26 } mrb_shared_string;
28 static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
29 static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
31 #define RESIZE_CAPA(s,capacity) do {\
32 s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
33 s->aux.capa = capacity;\
34 } while(0)
36 static void
37 str_decref(mrb_state *mrb, mrb_shared_string *shared)
39 shared->refcnt--;
40 if (shared->refcnt == 0) {
41 if (!shared->nofree) {
42 mrb_free(mrb, shared->ptr);
44 mrb_free(mrb, shared);
48 void
49 mrb_str_modify(mrb_state *mrb, struct RString *s)
51 if (s->flags & MRB_STR_SHARED) {
52 mrb_shared_string *shared = s->aux.shared;
54 if (shared->refcnt == 1 && s->ptr == shared->ptr) {
55 s->ptr = shared->ptr;
56 s->aux.capa = shared->len;
57 s->ptr[s->len] = '\0';
58 mrb_free(mrb, shared);
60 else {
61 char *ptr, *p;
62 mrb_int len;
64 p = s->ptr;
65 len = s->len;
66 ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
67 if (p) {
68 memcpy(ptr, p, len);
70 ptr[len] = '\0';
71 s->ptr = ptr;
72 s->aux.capa = len;
73 str_decref(mrb, shared);
75 s->flags &= ~MRB_STR_SHARED;
76 return;
78 if (s->flags & MRB_STR_NOFREE) {
79 char *p = s->ptr;
81 s->ptr = (char *)mrb_malloc(mrb, (size_t)s->len+1);
82 if (p) {
83 memcpy(s->ptr, p, s->len);
85 s->ptr[s->len] = '\0';
86 s->aux.capa = s->len;
87 s->flags &= ~MRB_STR_NOFREE;
88 return;
92 mrb_value
93 mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
95 int slen;
96 struct RString *s = mrb_str_ptr(str);
98 mrb_str_modify(mrb, s);
99 slen = s->len;
100 if (len != slen) {
101 if (slen < len || slen - len > 256) {
102 RESIZE_CAPA(s, len);
104 s->len = len;
105 s->ptr[len] = '\0'; /* sentinel */
107 return str;
110 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
112 /* char offset to byte offset */
114 mrb_str_offset(mrb_state *mrb, mrb_value str, int pos)
116 return pos;
119 static struct RString*
120 str_new(mrb_state *mrb, const char *p, mrb_int len)
122 struct RString *s;
124 s = mrb_obj_alloc_string(mrb);
125 s->len = len;
126 s->aux.capa = len;
127 s->ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
128 if (p) {
129 memcpy(s->ptr, p, len);
131 s->ptr[len] = '\0';
132 return s;
135 void
136 str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
138 s->c = mrb_str_ptr(obj)->c;
141 static mrb_value
142 mrb_str_new_empty(mrb_state *mrb, mrb_value str)
144 struct RString *s = str_new(mrb, 0, 0);
146 str_with_class(mrb, s, str);
147 return mrb_obj_value(s);
150 #ifndef MRB_STR_BUF_MIN_SIZE
151 # define MRB_STR_BUF_MIN_SIZE 128
152 #endif
154 mrb_value
155 mrb_str_buf_new(mrb_state *mrb, mrb_int capa)
157 struct RString *s;
159 s = mrb_obj_alloc_string(mrb);
161 if (capa < MRB_STR_BUF_MIN_SIZE) {
162 capa = MRB_STR_BUF_MIN_SIZE;
164 s->len = 0;
165 s->aux.capa = capa;
166 s->ptr = (char *)mrb_malloc(mrb, capa+1);
167 s->ptr[0] = '\0';
169 return mrb_obj_value(s);
172 static void
173 str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
175 mrb_int capa;
176 mrb_int total;
177 ptrdiff_t off = -1;
179 mrb_str_modify(mrb, s);
180 if (ptr >= s->ptr && ptr <= s->ptr + s->len) {
181 off = ptr - s->ptr;
183 if (len == 0) return;
184 capa = s->aux.capa;
185 if (s->len >= MRB_INT_MAX - (mrb_int)len) {
186 mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big");
188 total = s->len+len;
189 if (capa <= total) {
190 while (total > capa) {
191 if (capa + 1 >= MRB_INT_MAX / 2) {
192 capa = (total + 4095) / 4096;
193 break;
195 capa = (capa + 1) * 2;
197 RESIZE_CAPA(s, capa);
199 if (off != -1) {
200 ptr = s->ptr + off;
202 memcpy(s->ptr + s->len, ptr, len);
203 s->len = total;
204 s->ptr[total] = '\0'; /* sentinel */
207 mrb_value
208 mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
210 if (len == 0) return str;
211 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
212 return str;
215 mrb_value
216 mrb_str_new(mrb_state *mrb, const char *p, size_t len)
218 struct RString *s;
220 s = str_new(mrb, p, len);
221 return mrb_obj_value(s);
225 * call-seq: (Caution! NULL string)
226 * String.new(str="") => new_str
228 * Returns a new string object containing a copy of <i>str</i>.
231 mrb_value
232 mrb_str_new_cstr(mrb_state *mrb, const char *p)
234 struct RString *s;
235 size_t len;
237 if (p) {
238 len = strlen(p);
239 if ((mrb_int)len < 0) {
240 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
243 else {
244 len = 0;
247 s = str_new(mrb, p, len);
249 return mrb_obj_value(s);
252 mrb_value
253 mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
255 struct RString *s;
257 s = mrb_obj_alloc_string(mrb);
258 s->len = len;
259 s->aux.capa = 0; /* nofree */
260 s->ptr = (char *)p;
261 s->flags = MRB_STR_NOFREE;
262 return mrb_obj_value(s);
265 void
266 mrb_gc_free_str(mrb_state *mrb, struct RString *str)
268 if (str->flags & MRB_STR_SHARED)
269 str_decref(mrb, str->aux.shared);
270 else if ((str->flags & MRB_STR_NOFREE) == 0)
271 mrb_free(mrb, str->ptr);
274 char *
275 mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
277 struct RString *s;
279 if (!mrb_string_p(str0)) {
280 mrb_raise(mrb, E_TYPE_ERROR, "expected String");
283 s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
284 if ((strlen(s->ptr) ^ s->len) != 0) {
285 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
287 return s->ptr;
290 static void
291 str_make_shared(mrb_state *mrb, struct RString *s)
293 if (!(s->flags & MRB_STR_SHARED)) {
294 mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
296 shared->refcnt = 1;
297 if (s->flags & MRB_STR_NOFREE) {
298 shared->nofree = TRUE;
299 shared->ptr = s->ptr;
300 s->flags &= ~MRB_STR_NOFREE;
302 else {
303 shared->nofree = FALSE;
304 if (s->aux.capa > s->len) {
305 s->ptr = shared->ptr = (char *)mrb_realloc(mrb, s->ptr, s->len+1);
307 else {
308 shared->ptr = s->ptr;
311 shared->len = s->len;
312 s->aux.shared = shared;
313 s->flags |= MRB_STR_SHARED;
318 * call-seq:
319 * char* str = String("abcd"), len=strlen("abcd")
321 * Returns a new string object containing a copy of <i>str</i>.
323 const char*
324 mrb_str_body(mrb_value str, int *len_p)
326 struct RString *s = mrb_str_ptr(str);
328 *len_p = s->len;
329 return s->ptr;
333 * call-seq: (Caution! String("abcd") change)
334 * String("abcdefg") = String("abcd") + String("efg")
336 * Returns a new string object containing a copy of <i>str</i>.
338 void
339 mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
341 struct RString *s1 = mrb_str_ptr(self), *s2;
342 mrb_int len;
344 mrb_str_modify(mrb, s1);
345 if (!mrb_string_p(other)) {
346 other = mrb_str_to_str(mrb, other);
348 s2 = mrb_str_ptr(other);
349 len = s1->len + s2->len;
351 if (s1->aux.capa < len) {
352 s1->aux.capa = len;
353 s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, len+1);
355 memcpy(s1->ptr+s1->len, s2->ptr, s2->len);
356 s1->len = len;
357 s1->ptr[len] = '\0';
361 * call-seq: (Caution! String("abcd") remain)
362 * String("abcdefg") = String("abcd") + String("efg")
364 * Returns a new string object containing a copy of <i>str</i>.
366 mrb_value
367 mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
369 struct RString *s = mrb_str_ptr(a);
370 struct RString *s2 = mrb_str_ptr(b);
371 struct RString *t;
373 t = str_new(mrb, 0, s->len + s2->len);
374 memcpy(t->ptr, s->ptr, s->len);
375 memcpy(t->ptr + s->len, s2->ptr, s2->len);
377 return mrb_obj_value(t);
380 /* 15.2.10.5.2 */
383 * call-seq: (Caution! String("abcd") remain) for stack_argument
384 * String("abcdefg") = String("abcd") + String("efg")
386 * Returns a new string object containing a copy of <i>str</i>.
388 static mrb_value
389 mrb_str_plus_m(mrb_state *mrb, mrb_value self)
391 mrb_value str;
393 mrb_get_args(mrb, "S", &str);
394 return mrb_str_plus(mrb, self, str);
398 * call-seq:
399 * len = strlen(String("abcd"))
401 * Returns a new string object containing a copy of <i>str</i>.
403 static mrb_value
404 mrb_str_bytesize(mrb_state *mrb, mrb_value self)
406 struct RString *s = mrb_str_ptr(self);
407 return mrb_fixnum_value(s->len);
410 /* 15.2.10.5.26 */
411 /* 15.2.10.5.33 */
413 * call-seq:
414 * len = strlen(String("abcd"))
416 * Returns a new string object containing a copy of <i>str</i>.
418 mrb_value
419 mrb_str_size(mrb_state *mrb, mrb_value self)
421 struct RString *s = mrb_str_ptr(self);
422 return mrb_fixnum_value(s->len);
425 /* 15.2.10.5.1 */
427 * call-seq:
428 * str * integer => new_str
430 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
431 * the receiver.
433 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
435 static mrb_value
436 mrb_str_times(mrb_state *mrb, mrb_value self)
438 mrb_int n,len,times;
439 struct RString *str2;
440 char *p;
442 mrb_get_args(mrb, "i", &times);
443 if (times < 0) {
444 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
446 if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
447 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
450 len = RSTRING_LEN(self)*times;
451 str2 = str_new(mrb, 0, len);
452 str_with_class(mrb, str2, self);
453 p = str2->ptr;
454 if (len > 0) {
455 n = RSTRING_LEN(self);
456 memcpy(p, RSTRING_PTR(self), n);
457 while (n <= len/2) {
458 memcpy(p + n, p, n);
459 n *= 2;
461 memcpy(p + n, p, len-n);
463 p[str2->len] = '\0';
465 return mrb_obj_value(str2);
467 /* -------------------------------------------------------------- */
469 #define lesser(a,b) (((a)>(b))?(b):(a))
471 /* ---------------------------*/
473 * call-seq:
474 * mrb_value str1 <=> mrb_value str2 => int
475 * > 1
476 * = 0
477 * < -1
480 mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
482 mrb_int len;
483 mrb_int retval;
484 struct RString *s1 = mrb_str_ptr(str1);
485 struct RString *s2 = mrb_str_ptr(str2);
487 len = lesser(s1->len, s2->len);
488 retval = memcmp(s1->ptr, s2->ptr, len);
489 if (retval == 0) {
490 if (s1->len == s2->len) return 0;
491 if (s1->len > s2->len) return 1;
492 return -1;
494 if (retval > 0) return 1;
495 return -1;
498 /* 15.2.10.5.3 */
501 * call-seq:
502 * str <=> other_str => -1, 0, +1
504 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
505 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
506 * <i>str</i>. If the strings are of different lengths, and the strings are
507 * equal when compared up to the shortest length, then the longer string is
508 * considered greater than the shorter one. If the variable <code>$=</code> is
509 * <code>false</code>, the comparison is based on comparing the binary values
510 * of each character in the string. In older versions of Ruby, setting
511 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
512 * in favor of using <code>String#casecmp</code>.
514 * <code><=></code> is the basis for the methods <code><</code>,
515 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
516 * included from module <code>Comparable</code>. The method
517 * <code>String#==</code> does not use <code>Comparable#==</code>.
519 * "abcdef" <=> "abcde" #=> 1
520 * "abcdef" <=> "abcdef" #=> 0
521 * "abcdef" <=> "abcdefg" #=> -1
522 * "abcdef" <=> "ABCDEF" #=> 1
524 static mrb_value
525 mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
527 mrb_value str2;
528 mrb_int result;
530 mrb_get_args(mrb, "o", &str2);
531 if (!mrb_string_p(str2)) {
532 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) {
533 return mrb_nil_value();
535 else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) {
536 return mrb_nil_value();
538 else {
539 mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
541 if (mrb_nil_p(tmp)) return mrb_nil_value();
542 if (!mrb_fixnum(tmp)) {
543 return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
545 result = -mrb_fixnum(tmp);
548 else {
549 result = mrb_str_cmp(mrb, str1, str2);
551 return mrb_fixnum_value(result);
554 static mrb_bool
555 str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
557 const mrb_int len = RSTRING_LEN(str1);
559 if (len != RSTRING_LEN(str2)) return FALSE;
560 if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
561 return TRUE;
562 return FALSE;
565 mrb_bool
566 mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
568 if (mrb_obj_equal(mrb, str1, str2)) return TRUE;
569 if (!mrb_string_p(str2)) {
570 if (mrb_nil_p(str2)) return FALSE;
571 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) {
572 return FALSE;
574 str2 = mrb_funcall(mrb, str2, "to_str", 0);
575 return mrb_equal(mrb, str2, str1);
577 return str_eql(mrb, str1, str2);
580 /* 15.2.10.5.4 */
582 * call-seq:
583 * str == obj => true or false
585 * Equality---
586 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
587 * Otherwise, returns <code>false</code> or <code>true</code>
589 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
591 static mrb_value
592 mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
594 mrb_value str2;
595 mrb_bool equal_p;
597 mrb_get_args(mrb, "o", &str2);
598 equal_p = mrb_str_equal(mrb, str1, str2);
600 return mrb_bool_value(equal_p);
602 /* ---------------------------------- */
603 mrb_value
604 mrb_str_to_str(mrb_state *mrb, mrb_value str)
606 mrb_value s;
608 if (!mrb_string_p(str)) {
609 s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
610 if (mrb_nil_p(s)) {
611 s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
613 return s;
615 return str;
618 char *
619 mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
621 mrb_value str = mrb_str_to_str(mrb, ptr);
622 return RSTRING_PTR(str);
625 static mrb_value
626 noregexp(mrb_state *mrb, mrb_value self)
628 mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
629 return mrb_nil_value();
632 static void
633 regexp_check(mrb_state *mrb, mrb_value obj)
635 if (!memcmp(mrb_obj_classname(mrb, obj), REGEXP_CLASS, sizeof(REGEXP_CLASS) - 1)) {
636 noregexp(mrb, obj);
640 static inline mrb_int
641 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
643 const unsigned char *x = xs, *xe = xs + m;
644 const unsigned char *y = ys;
645 int i, qstable[256];
647 /* Preprocessing */
648 for (i = 0; i < 256; ++i)
649 qstable[i] = m + 1;
650 for (; x < xe; ++x)
651 qstable[*x] = xe - x;
652 /* Searching */
653 for (; y + m <= ys + n; y += *(qstable + y[m])) {
654 if (*xs == *y && memcmp(xs, y, m) == 0)
655 return y - ys;
657 return -1;
660 static mrb_int
661 mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
663 const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
665 if (m > n) return -1;
666 else if (m == n) {
667 return memcmp(x0, y0, m) == 0 ? 0 : -1;
669 else if (m < 1) {
670 return 0;
672 else if (m == 1) {
673 const unsigned char *ys = y, *ye = ys + n;
674 for (; y < ye; ++y) {
675 if (*x == *y)
676 return y - ys;
678 return -1;
680 return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
683 static mrb_int
684 mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
686 mrb_int pos;
687 char *s, *sptr;
688 mrb_int len, slen;
690 len = RSTRING_LEN(str);
691 slen = RSTRING_LEN(sub);
692 if (offset < 0) {
693 offset += len;
694 if (offset < 0) return -1;
696 if (len - offset < slen) return -1;
697 s = RSTRING_PTR(str);
698 if (offset) {
699 s += offset;
701 if (slen == 0) return offset;
702 /* need proceed one character at a time */
703 sptr = RSTRING_PTR(sub);
704 slen = RSTRING_LEN(sub);
705 len = RSTRING_LEN(str) - offset;
706 pos = mrb_memsearch(sptr, slen, s, len);
707 if (pos < 0) return pos;
708 return pos + offset;
711 mrb_value
712 mrb_str_dup(mrb_state *mrb, mrb_value str)
714 /* should return shared string */
715 struct RString *s = mrb_str_ptr(str);
717 return mrb_str_new(mrb, s->ptr, s->len);
720 static mrb_value
721 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
723 mrb_int idx;
725 regexp_check(mrb, indx);
726 switch (mrb_type(indx)) {
727 case MRB_TT_FIXNUM:
728 idx = mrb_fixnum(indx);
730 num_index:
731 str = mrb_str_substr(mrb, str, idx, 1);
732 if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
733 return str;
735 case MRB_TT_STRING:
736 if (mrb_str_index(mrb, str, indx, 0) != -1)
737 return mrb_str_dup(mrb, indx);
738 return mrb_nil_value();
740 case MRB_TT_RANGE:
741 /* check if indx is Range */
743 mrb_int beg, len;
744 mrb_value tmp;
746 len = RSTRING_LEN(str);
747 if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
748 tmp = mrb_str_subseq(mrb, str, beg, len);
749 return tmp;
751 else {
752 return mrb_nil_value();
755 default:
756 idx = mrb_fixnum(indx);
757 goto num_index;
759 return mrb_nil_value(); /* not reached */
762 /* 15.2.10.5.6 */
763 /* 15.2.10.5.34 */
765 * call-seq:
766 * str[fixnum] => fixnum or nil
767 * str[fixnum, fixnum] => new_str or nil
768 * str[range] => new_str or nil
769 * str[regexp] => new_str or nil
770 * str[regexp, fixnum] => new_str or nil
771 * str[other_str] => new_str or nil
772 * str.slice(fixnum) => fixnum or nil
773 * str.slice(fixnum, fixnum) => new_str or nil
774 * str.slice(range) => new_str or nil
775 * str.slice(regexp) => new_str or nil
776 * str.slice(regexp, fixnum) => new_str or nil
777 * str.slice(other_str) => new_str or nil
779 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
780 * of the character at that position. If passed two <code>Fixnum</code>
781 * objects, returns a substring starting at the offset given by the first, and
782 * a length given by the second. If given a range, a substring containing
783 * characters at offsets given by the range is returned. In all three cases, if
784 * an offset is negative, it is counted from the end of <i>str</i>. Returns
785 * <code>nil</code> if the initial offset falls outside the string, the length
786 * is negative, or the beginning of the range is greater than the end.
788 * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
789 * returned. If a numeric parameter follows the regular expression, that
790 * component of the <code>MatchData</code> is returned instead. If a
791 * <code>String</code> is given, that string is returned if it occurs in
792 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
793 * match.
795 * a = "hello there"
796 * a[1] #=> 101(1.8.7) "e"(1.9.2)
797 * a[1,3] #=> "ell"
798 * a[1..3] #=> "ell"
799 * a[-3,2] #=> "er"
800 * a[-4..-2] #=> "her"
801 * a[12..-1] #=> nil
802 * a[-2..-4] #=> ""
803 * a[/[aeiou](.)\1/] #=> "ell"
804 * a[/[aeiou](.)\1/, 0] #=> "ell"
805 * a[/[aeiou](.)\1/, 1] #=> "l"
806 * a[/[aeiou](.)\1/, 2] #=> nil
807 * a["lo"] #=> "lo"
808 * a["bye"] #=> nil
810 static mrb_value
811 mrb_str_aref_m(mrb_state *mrb, mrb_value str)
813 mrb_value a1, a2;
814 int argc;
816 argc = mrb_get_args(mrb, "o|o", &a1, &a2);
817 if (argc == 2) {
818 regexp_check(mrb, a1);
819 return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
821 if (argc != 1) {
822 mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
824 return mrb_str_aref(mrb, str, a1);
827 /* 15.2.10.5.8 */
829 * call-seq:
830 * str.capitalize! => str or nil
832 * Modifies <i>str</i> by converting the first character to uppercase and the
833 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
835 * a = "hello"
836 * a.capitalize! #=> "Hello"
837 * a #=> "Hello"
838 * a.capitalize! #=> nil
840 static mrb_value
841 mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
843 char *p, *pend;
844 int modify = 0;
845 struct RString *s = mrb_str_ptr(str);
847 mrb_str_modify(mrb, s);
848 if (s->len == 0 || !s->ptr) return mrb_nil_value();
849 p = s->ptr; pend = s->ptr + s->len;
850 if (ISLOWER(*p)) {
851 *p = TOUPPER(*p);
852 modify = 1;
854 while (++p < pend) {
855 if (ISUPPER(*p)) {
856 *p = TOLOWER(*p);
857 modify = 1;
860 if (modify) return str;
861 return mrb_nil_value();
864 /* 15.2.10.5.7 */
866 * call-seq:
867 * str.capitalize => new_str
869 * Returns a copy of <i>str</i> with the first character converted to uppercase
870 * and the remainder to lowercase.
872 * "hello".capitalize #=> "Hello"
873 * "HELLO".capitalize #=> "Hello"
874 * "123ABC".capitalize #=> "123abc"
876 static mrb_value
877 mrb_str_capitalize(mrb_state *mrb, mrb_value self)
879 mrb_value str;
881 str = mrb_str_dup(mrb, self);
882 mrb_str_capitalize_bang(mrb, str);
883 return str;
886 /* 15.2.10.5.10 */
888 * call-seq:
889 * str.chomp!(separator=$/) => str or nil
891 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
892 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
894 static mrb_value
895 mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
897 mrb_value rs;
898 mrb_int newline;
899 char *p, *pp;
900 mrb_int rslen;
901 mrb_int len;
902 struct RString *s = mrb_str_ptr(str);
904 mrb_str_modify(mrb, s);
905 len = s->len;
906 if (mrb_get_args(mrb, "|S", &rs) == 0) {
907 if (len == 0) return mrb_nil_value();
908 smart_chomp:
909 if (s->ptr[len-1] == '\n') {
910 s->len--;
911 if (s->len > 0 &&
912 s->ptr[s->len-1] == '\r') {
913 s->len--;
916 else if (s->ptr[len-1] == '\r') {
917 s->len--;
919 else {
920 return mrb_nil_value();
922 s->ptr[s->len] = '\0';
923 return str;
926 if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
927 p = s->ptr;
928 rslen = RSTRING_LEN(rs);
929 if (rslen == 0) {
930 while (len>0 && p[len-1] == '\n') {
931 len--;
932 if (len>0 && p[len-1] == '\r')
933 len--;
935 if (len < s->len) {
936 s->len = len;
937 p[len] = '\0';
938 return str;
940 return mrb_nil_value();
942 if (rslen > len) return mrb_nil_value();
943 newline = RSTRING_PTR(rs)[rslen-1];
944 if (rslen == 1 && newline == '\n')
945 newline = RSTRING_PTR(rs)[rslen-1];
946 if (rslen == 1 && newline == '\n')
947 goto smart_chomp;
949 pp = p + len - rslen;
950 if (p[len-1] == newline &&
951 (rslen <= 1 ||
952 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
953 s->len = len - rslen;
954 p[s->len] = '\0';
955 return str;
957 return mrb_nil_value();
960 /* 15.2.10.5.9 */
962 * call-seq:
963 * str.chomp(separator=$/) => new_str
965 * Returns a new <code>String</code> with the given record separator removed
966 * from the end of <i>str</i> (if present). If <code>$/</code> has not been
967 * changed from the default Ruby record separator, then <code>chomp</code> also
968 * removes carriage return characters (that is it will remove <code>\n</code>,
969 * <code>\r</code>, and <code>\r\n</code>).
971 * "hello".chomp #=> "hello"
972 * "hello\n".chomp #=> "hello"
973 * "hello\r\n".chomp #=> "hello"
974 * "hello\n\r".chomp #=> "hello\n"
975 * "hello\r".chomp #=> "hello"
976 * "hello \n there".chomp #=> "hello \n there"
977 * "hello".chomp("llo") #=> "he"
979 static mrb_value
980 mrb_str_chomp(mrb_state *mrb, mrb_value self)
982 mrb_value str;
984 str = mrb_str_dup(mrb, self);
985 mrb_str_chomp_bang(mrb, str);
986 return str;
989 /* 15.2.10.5.12 */
991 * call-seq:
992 * str.chop! => str or nil
994 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
995 * or <code>nil</code> if <i>str</i> is the empty string. See also
996 * <code>String#chomp!</code>.
998 static mrb_value
999 mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
1001 struct RString *s = mrb_str_ptr(str);
1003 mrb_str_modify(mrb, s);
1004 if (s->len > 0) {
1005 int len;
1006 len = s->len - 1;
1007 if (s->ptr[len] == '\n') {
1008 if (len > 0 &&
1009 s->ptr[len-1] == '\r') {
1010 len--;
1013 s->len = len;
1014 s->ptr[len] = '\0';
1015 return str;
1017 return mrb_nil_value();
1020 /* 15.2.10.5.11 */
1022 * call-seq:
1023 * str.chop => new_str
1025 * Returns a new <code>String</code> with the last character removed. If the
1026 * string ends with <code>\r\n</code>, both characters are removed. Applying
1027 * <code>chop</code> to an empty string returns an empty
1028 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1029 * the string unchanged if it doesn't end in a record separator.
1031 * "string\r\n".chop #=> "string"
1032 * "string\n\r".chop #=> "string\n"
1033 * "string\n".chop #=> "string"
1034 * "string".chop #=> "strin"
1035 * "x".chop #=> ""
1037 static mrb_value
1038 mrb_str_chop(mrb_state *mrb, mrb_value self)
1040 mrb_value str;
1041 str = mrb_str_dup(mrb, self);
1042 mrb_str_chop_bang(mrb, str);
1043 return str;
1046 /* 15.2.10.5.14 */
1048 * call-seq:
1049 * str.downcase! => str or nil
1051 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1052 * changes were made.
1054 static mrb_value
1055 mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
1057 char *p, *pend;
1058 int modify = 0;
1059 struct RString *s = mrb_str_ptr(str);
1061 mrb_str_modify(mrb, s);
1062 p = s->ptr;
1063 pend = s->ptr + s->len;
1064 while (p < pend) {
1065 if (ISUPPER(*p)) {
1066 *p = TOLOWER(*p);
1067 modify = 1;
1069 p++;
1072 if (modify) return str;
1073 return mrb_nil_value();
1076 /* 15.2.10.5.13 */
1078 * call-seq:
1079 * str.downcase => new_str
1081 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1082 * lowercase counterparts. The operation is locale insensitive---only
1083 * characters ``A'' to ``Z'' are affected.
1085 * "hEllO".downcase #=> "hello"
1087 static mrb_value
1088 mrb_str_downcase(mrb_state *mrb, mrb_value self)
1090 mrb_value str;
1092 str = mrb_str_dup(mrb, self);
1093 mrb_str_downcase_bang(mrb, str);
1094 return str;
1097 /* 15.2.10.5.16 */
1099 * call-seq:
1100 * str.empty? => true or false
1102 * Returns <code>true</code> if <i>str</i> has a length of zero.
1104 * "hello".empty? #=> false
1105 * "".empty? #=> true
1107 static mrb_value
1108 mrb_str_empty_p(mrb_state *mrb, mrb_value self)
1110 struct RString *s = mrb_str_ptr(self);
1112 return mrb_bool_value(s->len == 0);
1115 /* 15.2.10.5.17 */
1117 * call-seq:
1118 * str.eql?(other) => true or false
1120 * Two strings are equal if the have the same length and content.
1122 static mrb_value
1123 mrb_str_eql(mrb_state *mrb, mrb_value self)
1125 mrb_value str2;
1126 mrb_bool eql_p;
1128 mrb_get_args(mrb, "o", &str2);
1129 eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
1131 return mrb_bool_value(eql_p);
1134 static mrb_value
1135 mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1137 struct RString *orig, *s;
1138 mrb_shared_string *shared;
1140 orig = mrb_str_ptr(str);
1141 str_make_shared(mrb, orig);
1142 shared = orig->aux.shared;
1143 s = mrb_obj_alloc_string(mrb);
1144 s->ptr = orig->ptr + beg;
1145 s->len = len;
1146 s->aux.shared = shared;
1147 s->flags |= MRB_STR_SHARED;
1148 shared->refcnt++;
1150 return mrb_obj_value(s);
1153 mrb_value
1154 mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1156 mrb_value str2;
1158 if (len < 0) return mrb_nil_value();
1159 if (!RSTRING_LEN(str)) {
1160 len = 0;
1162 if (beg > RSTRING_LEN(str)) return mrb_nil_value();
1163 if (beg < 0) {
1164 beg += RSTRING_LEN(str);
1165 if (beg < 0) return mrb_nil_value();
1167 if (beg + len > RSTRING_LEN(str))
1168 len = RSTRING_LEN(str) - beg;
1169 if (len <= 0) {
1170 len = 0;
1172 str2 = mrb_str_subseq(mrb, str, beg, len);
1174 return str2;
1177 mrb_value
1178 mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
1180 mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
1181 return str;
1184 mrb_int
1185 mrb_str_hash(mrb_state *mrb, mrb_value str)
1187 /* 1-8-7 */
1188 struct RString *s = mrb_str_ptr(str);
1189 mrb_int len = s->len;
1190 char *p = s->ptr;
1191 mrb_int key = 0;
1193 while (len--) {
1194 key = key*65599 + *p;
1195 p++;
1197 key = key + (key>>5);
1198 return key;
1201 /* 15.2.10.5.20 */
1203 * call-seq:
1204 * str.hash => fixnum
1206 * Return a hash based on the string's length and content.
1208 static mrb_value
1209 mrb_str_hash_m(mrb_state *mrb, mrb_value self)
1211 mrb_int key = mrb_str_hash(mrb, self);
1212 return mrb_fixnum_value(key);
1215 /* 15.2.10.5.21 */
1217 * call-seq:
1218 * str.include? other_str => true or false
1219 * str.include? fixnum => true or false
1221 * Returns <code>true</code> if <i>str</i> contains the given string or
1222 * character.
1224 * "hello".include? "lo" #=> true
1225 * "hello".include? "ol" #=> false
1226 * "hello".include? ?h #=> true
1228 static mrb_value
1229 mrb_str_include(mrb_state *mrb, mrb_value self)
1231 mrb_int i;
1232 mrb_value str2;
1233 mrb_bool include_p;
1235 mrb_get_args(mrb, "o", &str2);
1236 if (mrb_fixnum_p(str2)) {
1237 include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL);
1239 else {
1240 str2 = mrb_str_to_str(mrb, str2);
1241 i = mrb_str_index(mrb, self, str2, 0);
1243 include_p = (i != -1);
1246 return mrb_bool_value(include_p);
1249 /* 15.2.10.5.22 */
1251 * call-seq:
1252 * str.index(substring [, offset]) => fixnum or nil
1253 * str.index(fixnum [, offset]) => fixnum or nil
1254 * str.index(regexp [, offset]) => fixnum or nil
1256 * Returns the index of the first occurrence of the given
1257 * <i>substring</i>,
1258 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1259 * Returns
1260 * <code>nil</code> if not found.
1261 * If the second parameter is present, it
1262 * specifies the position in the string to begin the search.
1264 * "hello".index('e') #=> 1
1265 * "hello".index('lo') #=> 3
1266 * "hello".index('a') #=> nil
1267 * "hello".index(101) #=> 1(101=0x65='e')
1268 * "hello".index(/[aeiou]/, -3) #=> 4
1270 static mrb_value
1271 mrb_str_index_m(mrb_state *mrb, mrb_value str)
1273 mrb_value *argv;
1274 int argc;
1276 mrb_value sub;
1277 mrb_int pos;
1279 mrb_get_args(mrb, "*", &argv, &argc);
1280 if (argc == 2) {
1281 pos = mrb_fixnum(argv[1]);
1282 sub = argv[0];
1284 else {
1285 pos = 0;
1286 if (argc > 0)
1287 sub = argv[0];
1288 else
1289 sub = mrb_nil_value();
1292 regexp_check(mrb, sub);
1293 if (pos < 0) {
1294 pos += RSTRING_LEN(str);
1295 if (pos < 0) {
1296 return mrb_nil_value();
1300 switch (mrb_type(sub)) {
1301 case MRB_TT_FIXNUM: {
1302 int c = mrb_fixnum(sub);
1303 mrb_int len = RSTRING_LEN(str);
1304 unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1306 for (;pos<len;pos++) {
1307 if (p[pos] == c) return mrb_fixnum_value(pos);
1309 return mrb_nil_value();
1312 default: {
1313 mrb_value tmp;
1315 tmp = mrb_check_string_type(mrb, sub);
1316 if (mrb_nil_p(tmp)) {
1317 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1319 sub = tmp;
1321 /* fall through */
1322 case MRB_TT_STRING:
1323 pos = mrb_str_index(mrb, str, sub, pos);
1324 break;
1327 if (pos == -1) return mrb_nil_value();
1328 return mrb_fixnum_value(pos);
1331 #define STR_REPLACE_SHARED_MIN 10
1333 static mrb_value
1334 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
1336 if (s2->flags & MRB_STR_SHARED) {
1337 L_SHARE:
1338 if (s1->flags & MRB_STR_SHARED){
1339 str_decref(mrb, s1->aux.shared);
1341 else {
1342 mrb_free(mrb, s1->ptr);
1344 s1->ptr = s2->ptr;
1345 s1->len = s2->len;
1346 s1->aux.shared = s2->aux.shared;
1347 s1->flags |= MRB_STR_SHARED;
1348 s1->aux.shared->refcnt++;
1350 else if (s2->len > STR_REPLACE_SHARED_MIN) {
1351 str_make_shared(mrb, s2);
1352 goto L_SHARE;
1354 else {
1355 if (s1->flags & MRB_STR_SHARED) {
1356 str_decref(mrb, s1->aux.shared);
1357 s1->flags &= ~MRB_STR_SHARED;
1358 s1->ptr = (char *)mrb_malloc(mrb, s2->len+1);
1360 else {
1361 s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, s2->len+1);
1363 memcpy(s1->ptr, s2->ptr, s2->len);
1364 s1->ptr[s2->len] = 0;
1365 s1->len = s2->len;
1366 s1->aux.capa = s2->len;
1368 return mrb_obj_value(s1);
1371 /* 15.2.10.5.24 */
1372 /* 15.2.10.5.28 */
1374 * call-seq:
1375 * str.replace(other_str) => str
1377 * s = "hello" #=> "hello"
1378 * s.replace "world" #=> "world"
1380 static mrb_value
1381 mrb_str_replace(mrb_state *mrb, mrb_value str)
1383 mrb_value str2;
1385 mrb_get_args(mrb, "S", &str2);
1386 return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
1389 /* 15.2.10.5.23 */
1391 * call-seq:
1392 * String.new(str="") => new_str
1394 * Returns a new string object containing a copy of <i>str</i>.
1396 static mrb_value
1397 mrb_str_init(mrb_state *mrb, mrb_value self)
1399 mrb_value str2;
1401 if (mrb_get_args(mrb, "|S", &str2) == 1) {
1402 str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
1404 return self;
1407 /* 15.2.10.5.25 */
1408 /* 15.2.10.5.41 */
1410 * call-seq:
1411 * str.intern => symbol
1412 * str.to_sym => symbol
1414 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1415 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1417 * "Koala".intern #=> :Koala
1418 * s = 'cat'.to_sym #=> :cat
1419 * s == :cat #=> true
1420 * s = '@cat'.to_sym #=> :@cat
1421 * s == :@cat #=> true
1423 * This can also be used to create symbols that cannot be represented using the
1424 * <code>:xxx</code> notation.
1426 * 'cat and dog'.to_sym #=> :"cat and dog"
1428 mrb_value
1429 mrb_str_intern(mrb_state *mrb, mrb_value self)
1431 mrb_sym id;
1433 id = mrb_intern_str(mrb, self);
1434 return mrb_symbol_value(id);
1437 /* ---------------------------------- */
1438 mrb_value
1439 mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
1441 mrb_value str;
1443 if (mrb_string_p(obj)) {
1444 return obj;
1446 str = mrb_funcall(mrb, obj, "to_s", 0);
1447 if (!mrb_string_p(str))
1448 return mrb_any_to_s(mrb, obj);
1449 return str;
1452 mrb_value
1453 mrb_ptr_to_str(mrb_state *mrb, void *p)
1455 struct RString *p_str;
1456 char *p1;
1457 char *p2;
1458 uintptr_t n = (uintptr_t)p;
1460 p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
1461 p1 = p_str->ptr;
1462 *p1++ = '0';
1463 *p1++ = 'x';
1464 p2 = p1;
1466 do {
1467 *p2++ = mrb_digitmap[n % 16];
1468 n /= 16;
1469 } while (n > 0);
1470 *p2 = '\0';
1471 p_str->len = (mrb_int)(p2 - p_str->ptr);
1473 while (p1 < p2) {
1474 const char c = *p1;
1475 *p1++ = *--p2;
1476 *p2 = c;
1479 return mrb_obj_value(p_str);
1482 mrb_value
1483 mrb_string_type(mrb_state *mrb, mrb_value str)
1485 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1488 mrb_value
1489 mrb_check_string_type(mrb_state *mrb, mrb_value str)
1491 return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1494 /* ---------------------------------- */
1495 /* 15.2.10.5.29 */
1497 * call-seq:
1498 * str.reverse => new_str
1500 * Returns a new string with the characters from <i>str</i> in reverse order.
1502 * "stressed".reverse #=> "desserts"
1504 static mrb_value
1505 mrb_str_reverse(mrb_state *mrb, mrb_value str)
1507 struct RString *s2;
1508 char *s, *e, *p;
1510 if (RSTRING(str)->len <= 1) return mrb_str_dup(mrb, str);
1512 s2 = str_new(mrb, 0, RSTRING(str)->len);
1513 str_with_class(mrb, s2, str);
1514 s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
1515 p = s2->ptr;
1517 while (e >= s) {
1518 *p++ = *e--;
1520 return mrb_obj_value(s2);
1523 /* 15.2.10.5.30 */
1525 * call-seq:
1526 * str.reverse! => str
1528 * Reverses <i>str</i> in place.
1530 static mrb_value
1531 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
1533 struct RString *s = mrb_str_ptr(str);
1534 char *p, *e;
1535 char c;
1537 mrb_str_modify(mrb, s);
1538 if (s->len > 1) {
1539 p = s->ptr;
1540 e = p + s->len - 1;
1541 while (p < e) {
1542 c = *p;
1543 *p++ = *e;
1544 *e-- = c;
1547 return str;
1551 * call-seq:
1552 * str.rindex(substring [, fixnum]) => fixnum or nil
1553 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1554 * str.rindex(regexp [, fixnum]) => fixnum or nil
1556 * Returns the index of the last occurrence of the given <i>substring</i>,
1557 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1558 * <code>nil</code> if not found. If the second parameter is present, it
1559 * specifies the position in the string to end the search---characters beyond
1560 * this point will not be considered.
1562 * "hello".rindex('e') #=> 1
1563 * "hello".rindex('l') #=> 3
1564 * "hello".rindex('a') #=> nil
1565 * "hello".rindex(101) #=> 1
1566 * "hello".rindex(/[aeiou]/, -2) #=> 1
1568 static mrb_int
1569 mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
1571 char *s, *sbeg, *t;
1572 struct RString *ps = mrb_str_ptr(str);
1573 struct RString *psub = mrb_str_ptr(sub);
1574 mrb_int len = psub->len;
1576 /* substring longer than string */
1577 if (ps->len < len) return -1;
1578 if (ps->len - pos < len) {
1579 pos = ps->len - len;
1581 sbeg = ps->ptr;
1582 s = ps->ptr + pos;
1583 t = psub->ptr;
1584 if (len) {
1585 while (sbeg <= s) {
1586 if (memcmp(s, t, len) == 0) {
1587 return s - ps->ptr;
1589 s--;
1591 return -1;
1593 else {
1594 return pos;
1598 /* 15.2.10.5.31 */
1600 * call-seq:
1601 * str.rindex(substring [, fixnum]) => fixnum or nil
1602 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1603 * str.rindex(regexp [, fixnum]) => fixnum or nil
1605 * Returns the index of the last occurrence of the given <i>substring</i>,
1606 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1607 * <code>nil</code> if not found. If the second parameter is present, it
1608 * specifies the position in the string to end the search---characters beyond
1609 * this point will not be considered.
1611 * "hello".rindex('e') #=> 1
1612 * "hello".rindex('l') #=> 3
1613 * "hello".rindex('a') #=> nil
1614 * "hello".rindex(101) #=> 1
1615 * "hello".rindex(/[aeiou]/, -2) #=> 1
1617 static mrb_value
1618 mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
1620 mrb_value *argv;
1621 int argc;
1622 mrb_value sub;
1623 mrb_value vpos;
1624 int pos, len = RSTRING_LEN(str);
1626 mrb_get_args(mrb, "*", &argv, &argc);
1627 if (argc == 2) {
1628 sub = argv[0];
1629 vpos = argv[1];
1630 pos = mrb_fixnum(vpos);
1631 if (pos < 0) {
1632 pos += len;
1633 if (pos < 0) {
1634 regexp_check(mrb, sub);
1635 return mrb_nil_value();
1638 if (pos > len) pos = len;
1640 else {
1641 pos = len;
1642 if (argc > 0)
1643 sub = argv[0];
1644 else
1645 sub = mrb_nil_value();
1647 regexp_check(mrb, sub);
1649 switch (mrb_type(sub)) {
1650 case MRB_TT_FIXNUM: {
1651 int c = mrb_fixnum(sub);
1652 mrb_int len = RSTRING_LEN(str);
1653 unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1655 for (pos=len;pos>=0;pos--) {
1656 if (p[pos] == c) return mrb_fixnum_value(pos);
1658 return mrb_nil_value();
1661 default: {
1662 mrb_value tmp;
1664 tmp = mrb_check_string_type(mrb, sub);
1665 if (mrb_nil_p(tmp)) {
1666 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1668 sub = tmp;
1670 /* fall through */
1671 case MRB_TT_STRING:
1672 pos = mrb_str_rindex(mrb, str, sub, pos);
1673 if (pos >= 0) return mrb_fixnum_value(pos);
1674 break;
1676 } /* end of switch (TYPE(sub)) */
1677 return mrb_nil_value();
1680 static const char isspacetable[256] = {
1681 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1683 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1699 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1701 /* 15.2.10.5.35 */
1704 * call-seq:
1705 * str.split(pattern=$;, [limit]) => anArray
1707 * Divides <i>str</i> into substrings based on a delimiter, returning an array
1708 * of these substrings.
1710 * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1711 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1712 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1713 * of contiguous whitespace characters ignored.
1715 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1716 * pattern matches. Whenever the pattern matches a zero-length string,
1717 * <i>str</i> is split into individual characters.
1719 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1720 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1721 * split on whitespace as if ` ' were specified.
1723 * If the <i>limit</i> parameter is omitted, trailing null fields are
1724 * suppressed. If <i>limit</i> is a positive number, at most that number of
1725 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1726 * string is returned as the only entry in an array). If negative, there is no
1727 * limit to the number of fields returned, and trailing null fields are not
1728 * suppressed.
1730 * " now's the time".split #=> ["now's", "the", "time"]
1731 * " now's the time".split(' ') #=> ["now's", "the", "time"]
1732 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1733 * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
1734 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1735 * "hello".split(//, 3) #=> ["h", "e", "llo"]
1736 * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
1738 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1739 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1740 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1741 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1744 static mrb_value
1745 mrb_str_split_m(mrb_state *mrb, mrb_value str)
1747 int argc;
1748 mrb_value spat = mrb_nil_value();
1749 enum {awk, string, regexp} split_type = string;
1750 long i = 0, lim_p;
1751 mrb_int beg;
1752 mrb_int end;
1753 mrb_int lim = 0;
1754 mrb_value result, tmp;
1756 argc = mrb_get_args(mrb, "|oi", &spat, &lim);
1757 lim_p = (lim > 0 && argc == 2);
1758 if (argc == 2) {
1759 if (lim == 1) {
1760 if (RSTRING_LEN(str) == 0)
1761 return mrb_ary_new_capa(mrb, 0);
1762 return mrb_ary_new_from_values(mrb, 1, &str);
1764 i = 1;
1767 if (argc == 0 || mrb_nil_p(spat)) {
1768 split_type = awk;
1770 else {
1771 if (mrb_string_p(spat)) {
1772 split_type = string;
1773 if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
1774 split_type = awk;
1777 else {
1778 noregexp(mrb, str);
1782 result = mrb_ary_new(mrb);
1783 beg = 0;
1784 if (split_type == awk) {
1785 char *ptr = RSTRING_PTR(str);
1786 char *eptr = RSTRING_END(str);
1787 char *bptr = ptr;
1788 int skip = 1;
1789 unsigned int c;
1791 end = beg;
1792 while (ptr < eptr) {
1793 int ai = mrb_gc_arena_save(mrb);
1794 c = (unsigned char)*ptr++;
1795 if (skip) {
1796 if (ascii_isspace(c)) {
1797 beg = ptr - bptr;
1799 else {
1800 end = ptr - bptr;
1801 skip = 0;
1802 if (lim_p && lim <= i) break;
1805 else if (ascii_isspace(c)) {
1806 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
1807 mrb_gc_arena_restore(mrb, ai);
1808 skip = 1;
1809 beg = ptr - bptr;
1810 if (lim_p) ++i;
1812 else {
1813 end = ptr - bptr;
1817 else if (split_type == string) {
1818 char *ptr = RSTRING_PTR(str);
1819 char *temp = ptr;
1820 char *eptr = RSTRING_END(str);
1821 mrb_int slen = RSTRING_LEN(spat);
1823 if (slen == 0) {
1824 int ai = mrb_gc_arena_save(mrb);
1825 while (ptr < eptr) {
1826 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
1827 mrb_gc_arena_restore(mrb, ai);
1828 ptr++;
1829 if (lim_p && lim <= ++i) break;
1832 else {
1833 char *sptr = RSTRING_PTR(spat);
1834 int ai = mrb_gc_arena_save(mrb);
1836 while (ptr < eptr &&
1837 (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
1838 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
1839 mrb_gc_arena_restore(mrb, ai);
1840 ptr += end + slen;
1841 if (lim_p && lim <= ++i) break;
1844 beg = ptr - temp;
1846 else {
1847 noregexp(mrb, str);
1849 if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
1850 if (RSTRING_LEN(str) == beg) {
1851 tmp = mrb_str_new_empty(mrb, str);
1853 else {
1854 tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
1856 mrb_ary_push(mrb, result, tmp);
1858 if (!lim_p && lim == 0) {
1859 mrb_int len;
1860 while ((len = RARRAY_LEN(result)) > 0 &&
1861 (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
1862 mrb_ary_pop(mrb, result);
1865 return result;
1868 mrb_value
1869 mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
1871 char *end;
1872 char sign = 1;
1873 int c;
1874 unsigned long n;
1875 mrb_int val;
1877 #undef ISDIGIT
1878 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1879 #define conv_digit(c) \
1880 (!ISASCII(c) ? -1 : \
1881 isdigit(c) ? ((c) - '0') : \
1882 islower(c) ? ((c) - 'a' + 10) : \
1883 isupper(c) ? ((c) - 'A' + 10) : \
1886 if (!str) {
1887 if (badcheck) goto bad;
1888 return mrb_fixnum_value(0);
1890 while (ISSPACE(*str)) str++;
1892 if (str[0] == '+') {
1893 str++;
1895 else if (str[0] == '-') {
1896 str++;
1897 sign = 0;
1899 if (str[0] == '+' || str[0] == '-') {
1900 if (badcheck) goto bad;
1901 return mrb_fixnum_value(0);
1903 if (base <= 0) {
1904 if (str[0] == '0') {
1905 switch (str[1]) {
1906 case 'x': case 'X':
1907 base = 16;
1908 break;
1909 case 'b': case 'B':
1910 base = 2;
1911 break;
1912 case 'o': case 'O':
1913 base = 8;
1914 break;
1915 case 'd': case 'D':
1916 base = 10;
1917 break;
1918 default:
1919 base = 8;
1922 else if (base < -1) {
1923 base = -base;
1925 else {
1926 base = 10;
1929 switch (base) {
1930 case 2:
1931 if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
1932 str += 2;
1934 break;
1935 case 3:
1936 break;
1937 case 8:
1938 if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
1939 str += 2;
1941 case 4: case 5: case 6: case 7:
1942 break;
1943 case 10:
1944 if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
1945 str += 2;
1947 case 9: case 11: case 12: case 13: case 14: case 15:
1948 break;
1949 case 16:
1950 if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
1951 str += 2;
1953 break;
1954 default:
1955 if (base < 2 || 36 < base) {
1956 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
1958 break;
1959 } /* end of switch (base) { */
1960 if (*str == '0') { /* squeeze preceeding 0s */
1961 int us = 0;
1962 while ((c = *++str) == '0' || c == '_') {
1963 if (c == '_') {
1964 if (++us >= 2)
1965 break;
1967 else
1968 us = 0;
1970 if (!(c = *str) || ISSPACE(c)) --str;
1972 c = *str;
1973 c = conv_digit(c);
1974 if (c < 0 || c >= base) {
1975 if (badcheck) goto bad;
1976 return mrb_fixnum_value(0);
1979 n = strtoul((char*)str, &end, base);
1980 if (n > MRB_INT_MAX) {
1981 mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str));
1983 val = n;
1984 if (badcheck) {
1985 if (end == str) goto bad; /* no number */
1986 while (*end && ISSPACE(*end)) end++;
1987 if (*end) goto bad; /* trailing garbage */
1990 return mrb_fixnum_value(sign ? val : -val);
1991 bad:
1992 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str));
1993 /* not reached */
1994 return mrb_fixnum_value(0);
1997 char *
1998 mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
2000 struct RString *ps = mrb_str_ptr(*ptr);
2001 char *s = ps->ptr;
2003 if (!s || ps->len != strlen(s)) {
2004 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
2006 return s;
2009 mrb_value
2010 mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
2012 char *s;
2013 int len;
2015 str = mrb_str_to_str(mrb, str);
2016 if (badcheck) {
2017 s = mrb_string_value_cstr(mrb, &str);
2019 else {
2020 s = RSTRING_PTR(str);
2022 if (s) {
2023 len = RSTRING_LEN(str);
2024 if (s[len]) { /* no sentinel somehow */
2025 struct RString *temp_str = str_new(mrb, s, len);
2026 s = temp_str->ptr;
2029 return mrb_cstr_to_inum(mrb, s, base, badcheck);
2032 /* 15.2.10.5.38 */
2034 * call-seq:
2035 * str.to_i(base=10) => integer
2037 * Returns the result of interpreting leading characters in <i>str</i> as an
2038 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2039 * end of a valid number are ignored. If there is not a valid number at the
2040 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2041 * exception.
2043 * "12345".to_i #=> 12345
2044 * "99 red balloons".to_i #=> 99
2045 * "0a".to_i #=> 0
2046 * "0a".to_i(16) #=> 10
2047 * "hello".to_i #=> 0
2048 * "1100101".to_i(2) #=> 101
2049 * "1100101".to_i(8) #=> 294977
2050 * "1100101".to_i(10) #=> 1100101
2051 * "1100101".to_i(16) #=> 17826049
2053 static mrb_value
2054 mrb_str_to_i(mrb_state *mrb, mrb_value self)
2056 mrb_value *argv;
2057 int argc;
2058 int base;
2060 mrb_get_args(mrb, "*", &argv, &argc);
2061 if (argc == 0)
2062 base = 10;
2063 else
2064 base = mrb_fixnum(argv[0]);
2066 if (base < 0) {
2067 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
2069 return mrb_str_to_inum(mrb, self, base, 0/*Qfalse*/);
2072 double
2073 mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck)
2075 char *end;
2076 double d;
2077 #if !defined(DBL_DIG)
2078 # define DBL_DIG 16
2079 #endif
2081 enum {max_width = 20};
2082 #define OutOfRange() (((w = end - p) > max_width) ? \
2083 (w = max_width, ellipsis = "...") : \
2084 (w = (int)(end - p), ellipsis = ""))
2086 if (!p) return 0.0;
2087 while (ISSPACE(*p)) p++;
2089 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2090 return 0.0;
2092 d = strtod(p, &end);
2093 if (p == end) {
2094 if (badcheck) {
2095 bad:
2096 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
2097 /* not reached */
2099 return d;
2101 if (*end) {
2102 char buf[DBL_DIG * 4 + 10];
2103 char *n = buf;
2104 char *e = buf + sizeof(buf) - 1;
2105 char prev = 0;
2107 while (p < end && n < e) prev = *n++ = *p++;
2108 while (*p) {
2109 if (*p == '_') {
2110 /* remove underscores between digits */
2111 if (badcheck) {
2112 if (n == buf || !ISDIGIT(prev)) goto bad;
2113 ++p;
2114 if (!ISDIGIT(*p)) goto bad;
2116 else {
2117 while (*++p == '_');
2118 continue;
2121 prev = *p++;
2122 if (n < e) *n++ = prev;
2124 *n = '\0';
2125 p = buf;
2127 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2128 return 0.0;
2131 d = strtod(p, &end);
2132 if (badcheck) {
2133 if (!end || p == end) goto bad;
2134 while (*end && ISSPACE(*end)) end++;
2135 if (*end) goto bad;
2138 return d;
2141 double
2142 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
2144 char *s;
2145 int len;
2147 str = mrb_str_to_str(mrb, str);
2148 s = RSTRING_PTR(str);
2149 len = RSTRING_LEN(str);
2150 if (s) {
2151 if (badcheck && memchr(s, '\0', len)) {
2152 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
2154 if (s[len]) { /* no sentinel somehow */
2155 struct RString *temp_str = str_new(mrb, s, len);
2156 s = temp_str->ptr;
2159 return mrb_cstr_to_dbl(mrb, s, badcheck);
2162 /* 15.2.10.5.39 */
2164 * call-seq:
2165 * str.to_f => float
2167 * Returns the result of interpreting leading characters in <i>str</i> as a
2168 * floating point number. Extraneous characters past the end of a valid number
2169 * are ignored. If there is not a valid number at the start of <i>str</i>,
2170 * <code>0.0</code> is returned. This method never raises an exception.
2172 * "123.45e1".to_f #=> 1234.5
2173 * "45.67 degrees".to_f #=> 45.67
2174 * "thx1138".to_f #=> 0.0
2176 static mrb_value
2177 mrb_str_to_f(mrb_state *mrb, mrb_value self)
2179 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, 0/*Qfalse*/));
2182 /* 15.2.10.5.40 */
2184 * call-seq:
2185 * str.to_s => str
2186 * str.to_str => str
2188 * Returns the receiver.
2190 static mrb_value
2191 mrb_str_to_s(mrb_state *mrb, mrb_value self)
2193 if (mrb_obj_class(mrb, self) != mrb->string_class) {
2194 return mrb_str_dup(mrb, self);
2196 return self;
2199 /* 15.2.10.5.43 */
2201 * call-seq:
2202 * str.upcase! => str or nil
2204 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2205 * were made.
2207 static mrb_value
2208 mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
2210 struct RString *s = mrb_str_ptr(str);
2211 char *p, *pend;
2212 int modify = 0;
2214 mrb_str_modify(mrb, s);
2215 p = RSTRING_PTR(str);
2216 pend = RSTRING_END(str);
2217 while (p < pend) {
2218 if (ISLOWER(*p)) {
2219 *p = TOUPPER(*p);
2220 modify = 1;
2222 p++;
2225 if (modify) return str;
2226 return mrb_nil_value();
2229 /* 15.2.10.5.42 */
2231 * call-seq:
2232 * str.upcase => new_str
2234 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2235 * uppercase counterparts. The operation is locale insensitive---only
2236 * characters ``a'' to ``z'' are affected.
2238 * "hEllO".upcase #=> "HELLO"
2240 static mrb_value
2241 mrb_str_upcase(mrb_state *mrb, mrb_value self)
2243 mrb_value str;
2245 str = mrb_str_dup(mrb, self);
2246 mrb_str_upcase_bang(mrb, str);
2247 return str;
2251 * call-seq:
2252 * str.dump -> new_str
2254 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2255 * <code>\nnn</code> notation and all special characters escaped.
2257 mrb_value
2258 mrb_str_dump(mrb_state *mrb, mrb_value str)
2260 mrb_int len;
2261 const char *p, *pend;
2262 char *q;
2263 struct RString *result;
2265 len = 2; /* "" */
2266 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2267 while (p < pend) {
2268 unsigned char c = *p++;
2269 switch (c) {
2270 case '"': case '\\':
2271 case '\n': case '\r':
2272 case '\t': case '\f':
2273 case '\013': case '\010': case '\007': case '\033':
2274 len += 2;
2275 break;
2277 case '#':
2278 len += IS_EVSTR(p, pend) ? 2 : 1;
2279 break;
2281 default:
2282 if (ISPRINT(c)) {
2283 len++;
2285 else {
2286 len += 4; /* \NNN */
2288 break;
2292 result = str_new(mrb, 0, len);
2293 str_with_class(mrb, result, str);
2294 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2295 q = result->ptr;
2297 *q++ = '"';
2298 while (p < pend) {
2299 unsigned char c = *p++;
2301 switch (c) {
2302 case '"':
2303 case '\\':
2304 *q++ = '\\';
2305 *q++ = c;
2306 break;
2308 case '\n':
2309 *q++ = '\\';
2310 *q++ = 'n';
2311 break;
2313 case '\r':
2314 *q++ = '\\';
2315 *q++ = 'r';
2316 break;
2318 case '\t':
2319 *q++ = '\\';
2320 *q++ = 't';
2321 break;
2323 case '\f':
2324 *q++ = '\\';
2325 *q++ = 'f';
2326 break;
2328 case '\013':
2329 *q++ = '\\';
2330 *q++ = 'v';
2331 break;
2333 case '\010':
2334 *q++ = '\\';
2335 *q++ = 'b';
2336 break;
2338 case '\007':
2339 *q++ = '\\';
2340 *q++ = 'a';
2341 break;
2343 case '\033':
2344 *q++ = '\\';
2345 *q++ = 'e';
2346 break;
2348 case '#':
2349 if (IS_EVSTR(p, pend)) *q++ = '\\';
2350 *q++ = '#';
2351 break;
2353 default:
2354 if (ISPRINT(c)) {
2355 *q++ = c;
2357 else {
2358 *q++ = '\\';
2359 q[2] = '0' + c % 8; c /= 8;
2360 q[1] = '0' + c % 8; c /= 8;
2361 q[0] = '0' + c % 8;
2362 q += 3;
2366 *q++ = '"';
2367 return mrb_obj_value(result);
2370 mrb_value
2371 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
2373 if ((mrb_int)len < 0) {
2374 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
2376 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
2377 return str;
2380 mrb_value
2381 mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
2383 return mrb_str_cat(mrb, str, ptr, strlen(ptr));
2386 mrb_value
2387 mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
2389 str2 = mrb_str_to_str(mrb, str2);
2390 return mrb_str_buf_append(mrb, str, str2);
2393 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2396 * call-seq:
2397 * str.inspect -> string
2399 * Returns a printable version of _str_, surrounded by quote marks,
2400 * with special characters escaped.
2402 * str = "hello"
2403 * str[3] = "\b"
2404 * str.inspect #=> "\"hel\\bo\""
2406 mrb_value
2407 mrb_str_inspect(mrb_state *mrb, mrb_value str)
2409 const char *p, *pend;
2410 char buf[CHAR_ESC_LEN + 1];
2411 mrb_value result = mrb_str_new(mrb, "\"", 1);
2413 p = RSTRING_PTR(str); pend = RSTRING_END(str);
2414 for (;p < pend; p++) {
2415 unsigned int c, cc;
2417 c = *p;
2418 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
2419 buf[0] = '\\'; buf[1] = c;
2420 mrb_str_buf_cat(mrb, result, buf, 2);
2421 continue;
2423 if (ISPRINT(c)) {
2424 buf[0] = c;
2425 mrb_str_buf_cat(mrb, result, buf, 1);
2426 continue;
2428 switch (c) {
2429 case '\n': cc = 'n'; break;
2430 case '\r': cc = 'r'; break;
2431 case '\t': cc = 't'; break;
2432 case '\f': cc = 'f'; break;
2433 case '\013': cc = 'v'; break;
2434 case '\010': cc = 'b'; break;
2435 case '\007': cc = 'a'; break;
2436 case 033: cc = 'e'; break;
2437 default: cc = 0; break;
2439 if (cc) {
2440 buf[0] = '\\';
2441 buf[1] = (char)cc;
2442 mrb_str_buf_cat(mrb, result, buf, 2);
2443 continue;
2445 else {
2446 buf[0] = '\\';
2447 buf[3] = '0' + c % 8; c /= 8;
2448 buf[2] = '0' + c % 8; c /= 8;
2449 buf[1] = '0' + c % 8;
2450 mrb_str_buf_cat(mrb, result, buf, 4);
2451 continue;
2454 mrb_str_buf_cat(mrb, result, "\"", 1);
2456 return result;
2460 * call-seq:
2461 * str.bytes -> array of fixnums
2463 * Returns an array of bytes in _str_.
2465 * str = "hello"
2466 * str.bytes #=> [104, 101, 108, 108, 111]
2468 static mrb_value
2469 mrb_str_bytes(mrb_state *mrb, mrb_value str)
2471 struct RString *s = mrb_str_ptr(str);
2472 mrb_value a = mrb_ary_new_capa(mrb, s->len);
2473 unsigned char *p = (unsigned char *)(s->ptr), *pend = p + s->len;
2475 while (p < pend) {
2476 mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
2477 p++;
2479 return a;
2482 /* ---------------------------*/
2483 void
2484 mrb_init_string(mrb_state *mrb)
2486 struct RClass *s;
2488 s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class);
2489 MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
2490 mrb_include_module(mrb, s, mrb_class_get(mrb, "Comparable"));
2493 mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
2495 mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2496 mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2497 mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2498 mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2499 mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2500 mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2501 mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
2502 mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2503 mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2504 mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
2505 mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
2506 mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2507 mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2508 mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2509 mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2511 mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
2512 mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2513 mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2514 mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2515 mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2516 mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2517 mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2518 mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2519 mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2520 mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2521 mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2522 mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2523 mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2524 mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2526 mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2527 mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2528 mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2529 mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
2530 mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2531 mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
2532 mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
2533 mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2534 mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());