move range aware aget to array.c from mruby-array-ext gem
[mruby.git] / src / string.c
blob6e5f91e3d0d825ff93b4d8b58b0d6861a0551aac
1 /*
2 ** string.c - String class
3 **
4 ** See Copyright Notice in mruby.h
5 */
7 #include <ctype.h>
8 #include <limits.h>
9 #include <stddef.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include "mruby.h"
13 #include "mruby/array.h"
14 #include "mruby/class.h"
15 #include "mruby/range.h"
16 #include "mruby/string.h"
17 #include "re.h"
19 const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
21 typedef struct mrb_shared_string {
22 mrb_bool nofree;
23 int refcnt;
24 char *ptr;
25 mrb_int len;
26 } mrb_shared_string;
28 static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
29 static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
31 #define RESIZE_CAPA(s,capacity) do {\
32 s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
33 s->aux.capa = capacity;\
34 } while(0)
36 static void
37 str_decref(mrb_state *mrb, mrb_shared_string *shared)
39 shared->refcnt--;
40 if (shared->refcnt == 0) {
41 if (!shared->nofree) {
42 mrb_free(mrb, shared->ptr);
44 mrb_free(mrb, shared);
48 void
49 mrb_str_modify(mrb_state *mrb, struct RString *s)
51 if (s->flags & MRB_STR_SHARED) {
52 mrb_shared_string *shared = s->aux.shared;
54 if (shared->refcnt == 1 && s->ptr == shared->ptr) {
55 s->ptr = shared->ptr;
56 s->aux.capa = shared->len;
57 s->ptr[s->len] = '\0';
58 mrb_free(mrb, shared);
60 else {
61 char *ptr, *p;
62 mrb_int len;
64 p = s->ptr;
65 len = s->len;
66 ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
67 if (p) {
68 memcpy(ptr, p, len);
70 ptr[len] = '\0';
71 s->ptr = ptr;
72 s->aux.capa = len;
73 str_decref(mrb, shared);
75 s->flags &= ~MRB_STR_SHARED;
76 return;
78 if (s->flags & MRB_STR_NOFREE) {
79 char *p = s->ptr;
81 s->ptr = (char *)mrb_malloc(mrb, (size_t)s->len+1);
82 if (p) {
83 memcpy(s->ptr, p, s->len);
85 s->ptr[s->len] = '\0';
86 s->aux.capa = s->len;
87 s->flags &= ~MRB_STR_NOFREE;
88 return;
92 mrb_value
93 mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
95 int slen;
96 struct RString *s = mrb_str_ptr(str);
98 mrb_str_modify(mrb, s);
99 slen = s->len;
100 if (len != slen) {
101 if (slen < len || slen - len > 256) {
102 RESIZE_CAPA(s, len);
104 s->len = len;
105 s->ptr[len] = '\0'; /* sentinel */
107 return str;
110 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
112 /* char offset to byte offset */
114 mrb_str_offset(mrb_state *mrb, mrb_value str, int pos)
116 return pos;
119 static struct RString*
120 str_new(mrb_state *mrb, const char *p, mrb_int len)
122 struct RString *s;
124 s = mrb_obj_alloc_string(mrb);
125 s->len = len;
126 s->aux.capa = len;
127 s->ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
128 if (p) {
129 memcpy(s->ptr, p, len);
131 s->ptr[len] = '\0';
132 return s;
135 void
136 str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
138 s->c = mrb_str_ptr(obj)->c;
141 static mrb_value
142 mrb_str_new_empty(mrb_state *mrb, mrb_value str)
144 struct RString *s = str_new(mrb, 0, 0);
146 str_with_class(mrb, s, str);
147 return mrb_obj_value(s);
150 #ifndef MRB_STR_BUF_MIN_SIZE
151 # define MRB_STR_BUF_MIN_SIZE 128
152 #endif
154 mrb_value
155 mrb_str_buf_new(mrb_state *mrb, mrb_int capa)
157 struct RString *s;
159 s = mrb_obj_alloc_string(mrb);
161 if (capa < MRB_STR_BUF_MIN_SIZE) {
162 capa = MRB_STR_BUF_MIN_SIZE;
164 s->len = 0;
165 s->aux.capa = capa;
166 s->ptr = (char *)mrb_malloc(mrb, capa+1);
167 s->ptr[0] = '\0';
169 return mrb_obj_value(s);
172 static void
173 str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
175 mrb_int capa;
176 mrb_int total;
177 ptrdiff_t off = -1;
179 mrb_str_modify(mrb, s);
180 if (ptr >= s->ptr && ptr <= s->ptr + s->len) {
181 off = ptr - s->ptr;
183 if (len == 0) return;
184 capa = s->aux.capa;
185 if (s->len >= MRB_INT_MAX - (mrb_int)len) {
186 mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big");
188 total = s->len+len;
189 if (capa <= total) {
190 while (total > capa) {
191 if (capa + 1 >= MRB_INT_MAX / 2) {
192 capa = (total + 4095) / 4096;
193 break;
195 capa = (capa + 1) * 2;
197 RESIZE_CAPA(s, capa);
199 if (off != -1) {
200 ptr = s->ptr + off;
202 memcpy(s->ptr + s->len, ptr, len);
203 s->len = total;
204 s->ptr[total] = '\0'; /* sentinel */
207 mrb_value
208 mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
210 if (len == 0) return str;
211 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
212 return str;
215 mrb_value
216 mrb_str_new(mrb_state *mrb, const char *p, size_t len)
218 struct RString *s;
220 s = str_new(mrb, p, len);
221 return mrb_obj_value(s);
225 * call-seq: (Caution! NULL string)
226 * String.new(str="") => new_str
228 * Returns a new string object containing a copy of <i>str</i>.
231 mrb_value
232 mrb_str_new_cstr(mrb_state *mrb, const char *p)
234 struct RString *s;
235 size_t len;
237 if (p) {
238 len = strlen(p);
239 if ((mrb_int)len < 0) {
240 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
243 else {
244 len = 0;
247 s = str_new(mrb, p, len);
249 return mrb_obj_value(s);
252 mrb_value
253 mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
255 struct RString *s;
257 s = mrb_obj_alloc_string(mrb);
258 s->len = len;
259 s->aux.capa = 0; /* nofree */
260 s->ptr = (char *)p;
261 s->flags = MRB_STR_NOFREE;
262 return mrb_obj_value(s);
265 void
266 mrb_gc_free_str(mrb_state *mrb, struct RString *str)
268 if (str->flags & MRB_STR_SHARED)
269 str_decref(mrb, str->aux.shared);
270 else if ((str->flags & MRB_STR_NOFREE) == 0)
271 mrb_free(mrb, str->ptr);
274 char *
275 mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
277 struct RString *s;
279 if (!mrb_string_p(str0)) {
280 mrb_raise(mrb, E_TYPE_ERROR, "expected String");
283 s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
284 if ((strlen(s->ptr) ^ s->len) != 0) {
285 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
287 return s->ptr;
290 static void
291 str_make_shared(mrb_state *mrb, struct RString *s)
293 if (!(s->flags & MRB_STR_SHARED)) {
294 mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
296 shared->refcnt = 1;
297 if (s->flags & MRB_STR_NOFREE) {
298 shared->nofree = TRUE;
299 shared->ptr = s->ptr;
300 s->flags &= ~MRB_STR_NOFREE;
302 else {
303 shared->nofree = FALSE;
304 if (s->aux.capa > s->len) {
305 s->ptr = shared->ptr = (char *)mrb_realloc(mrb, s->ptr, s->len+1);
307 else {
308 shared->ptr = s->ptr;
311 shared->len = s->len;
312 s->aux.shared = shared;
313 s->flags |= MRB_STR_SHARED;
318 * call-seq:
319 * char* str = String("abcd"), len=strlen("abcd")
321 * Returns a new string object containing a copy of <i>str</i>.
323 const char*
324 mrb_str_body(mrb_value str, int *len_p)
326 struct RString *s = mrb_str_ptr(str);
328 *len_p = s->len;
329 return s->ptr;
333 * call-seq: (Caution! String("abcd") change)
334 * String("abcdefg") = String("abcd") + String("efg")
336 * Returns a new string object containing a copy of <i>str</i>.
338 void
339 mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
341 struct RString *s1 = mrb_str_ptr(self), *s2;
342 mrb_int len;
344 mrb_str_modify(mrb, s1);
345 if (!mrb_string_p(other)) {
346 other = mrb_str_to_str(mrb, other);
348 s2 = mrb_str_ptr(other);
349 len = s1->len + s2->len;
351 if (s1->aux.capa < len) {
352 s1->aux.capa = len;
353 s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, len+1);
355 memcpy(s1->ptr+s1->len, s2->ptr, s2->len);
356 s1->len = len;
357 s1->ptr[len] = '\0';
361 * call-seq: (Caution! String("abcd") remain)
362 * String("abcdefg") = String("abcd") + String("efg")
364 * Returns a new string object containing a copy of <i>str</i>.
366 mrb_value
367 mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
369 struct RString *s = mrb_str_ptr(a);
370 struct RString *s2 = mrb_str_ptr(b);
371 struct RString *t;
373 t = str_new(mrb, 0, s->len + s2->len);
374 memcpy(t->ptr, s->ptr, s->len);
375 memcpy(t->ptr + s->len, s2->ptr, s2->len);
377 return mrb_obj_value(t);
380 /* 15.2.10.5.2 */
383 * call-seq: (Caution! String("abcd") remain) for stack_argument
384 * String("abcdefg") = String("abcd") + String("efg")
386 * Returns a new string object containing a copy of <i>str</i>.
388 static mrb_value
389 mrb_str_plus_m(mrb_state *mrb, mrb_value self)
391 mrb_value str;
393 mrb_get_args(mrb, "S", &str);
394 return mrb_str_plus(mrb, self, str);
398 * call-seq:
399 * len = strlen(String("abcd"))
401 * Returns a new string object containing a copy of <i>str</i>.
403 static mrb_value
404 mrb_str_bytesize(mrb_state *mrb, mrb_value self)
406 struct RString *s = mrb_str_ptr(self);
407 return mrb_fixnum_value(s->len);
410 /* 15.2.10.5.26 */
411 /* 15.2.10.5.33 */
413 * call-seq:
414 * len = strlen(String("abcd"))
416 * Returns a new string object containing a copy of <i>str</i>.
418 mrb_value
419 mrb_str_size(mrb_state *mrb, mrb_value self)
421 struct RString *s = mrb_str_ptr(self);
422 return mrb_fixnum_value(s->len);
425 /* 15.2.10.5.1 */
427 * call-seq:
428 * str * integer => new_str
430 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
431 * the receiver.
433 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
435 static mrb_value
436 mrb_str_times(mrb_state *mrb, mrb_value self)
438 mrb_int n,len,times;
439 struct RString *str2;
440 char *p;
442 mrb_get_args(mrb, "i", &times);
443 if (times < 0) {
444 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
446 if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
447 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
450 len = RSTRING_LEN(self)*times;
451 str2 = str_new(mrb, 0, len);
452 str_with_class(mrb, str2, self);
453 p = str2->ptr;
454 if (len > 0) {
455 n = RSTRING_LEN(self);
456 memcpy(p, RSTRING_PTR(self), n);
457 while (n <= len/2) {
458 memcpy(p + n, p, n);
459 n *= 2;
461 memcpy(p + n, p, len-n);
463 p[str2->len] = '\0';
465 return mrb_obj_value(str2);
467 /* -------------------------------------------------------------- */
469 #define lesser(a,b) (((a)>(b))?(b):(a))
471 /* ---------------------------*/
473 * call-seq:
474 * mrb_value str1 <=> mrb_value str2 => int
475 * > 1
476 * = 0
477 * < -1
480 mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
482 mrb_int len;
483 mrb_int retval;
484 struct RString *s1 = mrb_str_ptr(str1);
485 struct RString *s2 = mrb_str_ptr(str2);
487 len = lesser(s1->len, s2->len);
488 retval = memcmp(s1->ptr, s2->ptr, len);
489 if (retval == 0) {
490 if (s1->len == s2->len) return 0;
491 if (s1->len > s2->len) return 1;
492 return -1;
494 if (retval > 0) return 1;
495 return -1;
498 /* 15.2.10.5.3 */
501 * call-seq:
502 * str <=> other_str => -1, 0, +1
504 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
505 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
506 * <i>str</i>. If the strings are of different lengths, and the strings are
507 * equal when compared up to the shortest length, then the longer string is
508 * considered greater than the shorter one. If the variable <code>$=</code> is
509 * <code>false</code>, the comparison is based on comparing the binary values
510 * of each character in the string. In older versions of Ruby, setting
511 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
512 * in favor of using <code>String#casecmp</code>.
514 * <code><=></code> is the basis for the methods <code><</code>,
515 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
516 * included from module <code>Comparable</code>. The method
517 * <code>String#==</code> does not use <code>Comparable#==</code>.
519 * "abcdef" <=> "abcde" #=> 1
520 * "abcdef" <=> "abcdef" #=> 0
521 * "abcdef" <=> "abcdefg" #=> -1
522 * "abcdef" <=> "ABCDEF" #=> 1
524 static mrb_value
525 mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
527 mrb_value str2;
528 mrb_int result;
530 mrb_get_args(mrb, "o", &str2);
531 if (!mrb_string_p(str2)) {
532 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) {
533 return mrb_nil_value();
535 else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) {
536 return mrb_nil_value();
538 else {
539 mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
541 if (mrb_nil_p(tmp)) return mrb_nil_value();
542 if (!mrb_fixnum(tmp)) {
543 return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
545 result = -mrb_fixnum(tmp);
548 else {
549 result = mrb_str_cmp(mrb, str1, str2);
551 return mrb_fixnum_value(result);
554 static mrb_bool
555 str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
557 const mrb_int len = RSTRING_LEN(str1);
559 if (len != RSTRING_LEN(str2)) return FALSE;
560 if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
561 return TRUE;
562 return FALSE;
565 mrb_bool
566 mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
568 if (mrb_obj_equal(mrb, str1, str2)) return TRUE;
569 if (!mrb_string_p(str2)) {
570 if (mrb_nil_p(str2)) return FALSE;
571 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) {
572 return FALSE;
574 str2 = mrb_funcall(mrb, str2, "to_str", 0);
575 return mrb_equal(mrb, str2, str1);
577 return str_eql(mrb, str1, str2);
580 /* 15.2.10.5.4 */
582 * call-seq:
583 * str == obj => true or false
585 * Equality---
586 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
587 * Otherwise, returns <code>false</code> or <code>true</code>
589 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
591 static mrb_value
592 mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
594 mrb_value str2;
595 mrb_bool equal_p;
597 mrb_get_args(mrb, "o", &str2);
598 equal_p = mrb_str_equal(mrb, str1, str2);
600 return mrb_bool_value(equal_p);
602 /* ---------------------------------- */
603 mrb_value
604 mrb_str_to_str(mrb_state *mrb, mrb_value str)
606 mrb_value s;
608 if (!mrb_string_p(str)) {
609 s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
610 if (mrb_nil_p(s)) {
611 s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
613 return s;
615 return str;
618 char *
619 mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
621 mrb_value str = mrb_str_to_str(mrb, ptr);
622 return RSTRING_PTR(str);
625 static mrb_value
626 noregexp(mrb_state *mrb, mrb_value self)
628 mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
629 return mrb_nil_value();
632 static void
633 regexp_check(mrb_state *mrb, mrb_value obj)
635 if (!memcmp(mrb_obj_classname(mrb, obj), REGEXP_CLASS, sizeof(REGEXP_CLASS) - 1)) {
636 noregexp(mrb, obj);
640 static inline mrb_int
641 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
643 const unsigned char *x = xs, *xe = xs + m;
644 const unsigned char *y = ys;
645 int i, qstable[256];
647 /* Preprocessing */
648 for (i = 0; i < 256; ++i)
649 qstable[i] = m + 1;
650 for (; x < xe; ++x)
651 qstable[*x] = xe - x;
652 /* Searching */
653 for (; y + m <= ys + n; y += *(qstable + y[m])) {
654 if (*xs == *y && memcmp(xs, y, m) == 0)
655 return y - ys;
657 return -1;
660 static mrb_int
661 mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
663 const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
665 if (m > n) return -1;
666 else if (m == n) {
667 return memcmp(x0, y0, m) == 0 ? 0 : -1;
669 else if (m < 1) {
670 return 0;
672 else if (m == 1) {
673 const unsigned char *ys = y, *ye = ys + n;
674 for (; y < ye; ++y) {
675 if (*x == *y)
676 return y - ys;
678 return -1;
680 return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
683 static mrb_int
684 mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
686 mrb_int pos;
687 char *s, *sptr;
688 mrb_int len, slen;
690 len = RSTRING_LEN(str);
691 slen = RSTRING_LEN(sub);
692 if (offset < 0) {
693 offset += len;
694 if (offset < 0) return -1;
696 if (len - offset < slen) return -1;
697 s = RSTRING_PTR(str);
698 if (offset) {
699 s += offset;
701 if (slen == 0) return offset;
702 /* need proceed one character at a time */
703 sptr = RSTRING_PTR(sub);
704 slen = RSTRING_LEN(sub);
705 len = RSTRING_LEN(str) - offset;
706 pos = mrb_memsearch(sptr, slen, s, len);
707 if (pos < 0) return pos;
708 return pos + offset;
711 mrb_value
712 mrb_str_dup(mrb_state *mrb, mrb_value str)
714 /* should return shared string */
715 struct RString *s = mrb_str_ptr(str);
717 return mrb_str_new(mrb, s->ptr, s->len);
720 static mrb_value
721 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
723 mrb_int idx;
725 regexp_check(mrb, indx);
726 switch (mrb_type(indx)) {
727 case MRB_TT_FIXNUM:
728 idx = mrb_fixnum(indx);
730 num_index:
731 str = mrb_str_substr(mrb, str, idx, 1);
732 if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
733 return str;
735 case MRB_TT_STRING:
736 if (mrb_str_index(mrb, str, indx, 0) != -1)
737 return mrb_str_dup(mrb, indx);
738 return mrb_nil_value();
740 case MRB_TT_RANGE:
741 /* check if indx is Range */
743 mrb_int beg, len;
745 len = RSTRING_LEN(str);
746 if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
747 return mrb_str_subseq(mrb, str, beg, len);
749 else {
750 return mrb_nil_value();
753 default:
754 idx = mrb_fixnum(indx);
755 goto num_index;
757 return mrb_nil_value(); /* not reached */
760 /* 15.2.10.5.6 */
761 /* 15.2.10.5.34 */
763 * call-seq:
764 * str[fixnum] => fixnum or nil
765 * str[fixnum, fixnum] => new_str or nil
766 * str[range] => new_str or nil
767 * str[regexp] => new_str or nil
768 * str[regexp, fixnum] => new_str or nil
769 * str[other_str] => new_str or nil
770 * str.slice(fixnum) => fixnum or nil
771 * str.slice(fixnum, fixnum) => new_str or nil
772 * str.slice(range) => new_str or nil
773 * str.slice(regexp) => new_str or nil
774 * str.slice(regexp, fixnum) => new_str or nil
775 * str.slice(other_str) => new_str or nil
777 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
778 * of the character at that position. If passed two <code>Fixnum</code>
779 * objects, returns a substring starting at the offset given by the first, and
780 * a length given by the second. If given a range, a substring containing
781 * characters at offsets given by the range is returned. In all three cases, if
782 * an offset is negative, it is counted from the end of <i>str</i>. Returns
783 * <code>nil</code> if the initial offset falls outside the string, the length
784 * is negative, or the beginning of the range is greater than the end.
786 * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
787 * returned. If a numeric parameter follows the regular expression, that
788 * component of the <code>MatchData</code> is returned instead. If a
789 * <code>String</code> is given, that string is returned if it occurs in
790 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
791 * match.
793 * a = "hello there"
794 * a[1] #=> 101(1.8.7) "e"(1.9.2)
795 * a[1,3] #=> "ell"
796 * a[1..3] #=> "ell"
797 * a[-3,2] #=> "er"
798 * a[-4..-2] #=> "her"
799 * a[12..-1] #=> nil
800 * a[-2..-4] #=> ""
801 * a[/[aeiou](.)\1/] #=> "ell"
802 * a[/[aeiou](.)\1/, 0] #=> "ell"
803 * a[/[aeiou](.)\1/, 1] #=> "l"
804 * a[/[aeiou](.)\1/, 2] #=> nil
805 * a["lo"] #=> "lo"
806 * a["bye"] #=> nil
808 static mrb_value
809 mrb_str_aref_m(mrb_state *mrb, mrb_value str)
811 mrb_value a1, a2;
812 int argc;
814 argc = mrb_get_args(mrb, "o|o", &a1, &a2);
815 if (argc == 2) {
816 regexp_check(mrb, a1);
817 return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
819 if (argc != 1) {
820 mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
822 return mrb_str_aref(mrb, str, a1);
825 /* 15.2.10.5.8 */
827 * call-seq:
828 * str.capitalize! => str or nil
830 * Modifies <i>str</i> by converting the first character to uppercase and the
831 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
833 * a = "hello"
834 * a.capitalize! #=> "Hello"
835 * a #=> "Hello"
836 * a.capitalize! #=> nil
838 static mrb_value
839 mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
841 char *p, *pend;
842 int modify = 0;
843 struct RString *s = mrb_str_ptr(str);
845 mrb_str_modify(mrb, s);
846 if (s->len == 0 || !s->ptr) return mrb_nil_value();
847 p = s->ptr; pend = s->ptr + s->len;
848 if (ISLOWER(*p)) {
849 *p = TOUPPER(*p);
850 modify = 1;
852 while (++p < pend) {
853 if (ISUPPER(*p)) {
854 *p = TOLOWER(*p);
855 modify = 1;
858 if (modify) return str;
859 return mrb_nil_value();
862 /* 15.2.10.5.7 */
864 * call-seq:
865 * str.capitalize => new_str
867 * Returns a copy of <i>str</i> with the first character converted to uppercase
868 * and the remainder to lowercase.
870 * "hello".capitalize #=> "Hello"
871 * "HELLO".capitalize #=> "Hello"
872 * "123ABC".capitalize #=> "123abc"
874 static mrb_value
875 mrb_str_capitalize(mrb_state *mrb, mrb_value self)
877 mrb_value str;
879 str = mrb_str_dup(mrb, self);
880 mrb_str_capitalize_bang(mrb, str);
881 return str;
884 /* 15.2.10.5.10 */
886 * call-seq:
887 * str.chomp!(separator=$/) => str or nil
889 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
890 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
892 static mrb_value
893 mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
895 mrb_value rs;
896 mrb_int newline;
897 char *p, *pp;
898 mrb_int rslen;
899 mrb_int len;
900 struct RString *s = mrb_str_ptr(str);
902 mrb_str_modify(mrb, s);
903 len = s->len;
904 if (mrb_get_args(mrb, "|S", &rs) == 0) {
905 if (len == 0) return mrb_nil_value();
906 smart_chomp:
907 if (s->ptr[len-1] == '\n') {
908 s->len--;
909 if (s->len > 0 &&
910 s->ptr[s->len-1] == '\r') {
911 s->len--;
914 else if (s->ptr[len-1] == '\r') {
915 s->len--;
917 else {
918 return mrb_nil_value();
920 s->ptr[s->len] = '\0';
921 return str;
924 if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
925 p = s->ptr;
926 rslen = RSTRING_LEN(rs);
927 if (rslen == 0) {
928 while (len>0 && p[len-1] == '\n') {
929 len--;
930 if (len>0 && p[len-1] == '\r')
931 len--;
933 if (len < s->len) {
934 s->len = len;
935 p[len] = '\0';
936 return str;
938 return mrb_nil_value();
940 if (rslen > len) return mrb_nil_value();
941 newline = RSTRING_PTR(rs)[rslen-1];
942 if (rslen == 1 && newline == '\n')
943 newline = RSTRING_PTR(rs)[rslen-1];
944 if (rslen == 1 && newline == '\n')
945 goto smart_chomp;
947 pp = p + len - rslen;
948 if (p[len-1] == newline &&
949 (rslen <= 1 ||
950 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
951 s->len = len - rslen;
952 p[s->len] = '\0';
953 return str;
955 return mrb_nil_value();
958 /* 15.2.10.5.9 */
960 * call-seq:
961 * str.chomp(separator=$/) => new_str
963 * Returns a new <code>String</code> with the given record separator removed
964 * from the end of <i>str</i> (if present). If <code>$/</code> has not been
965 * changed from the default Ruby record separator, then <code>chomp</code> also
966 * removes carriage return characters (that is it will remove <code>\n</code>,
967 * <code>\r</code>, and <code>\r\n</code>).
969 * "hello".chomp #=> "hello"
970 * "hello\n".chomp #=> "hello"
971 * "hello\r\n".chomp #=> "hello"
972 * "hello\n\r".chomp #=> "hello\n"
973 * "hello\r".chomp #=> "hello"
974 * "hello \n there".chomp #=> "hello \n there"
975 * "hello".chomp("llo") #=> "he"
977 static mrb_value
978 mrb_str_chomp(mrb_state *mrb, mrb_value self)
980 mrb_value str;
982 str = mrb_str_dup(mrb, self);
983 mrb_str_chomp_bang(mrb, str);
984 return str;
987 /* 15.2.10.5.12 */
989 * call-seq:
990 * str.chop! => str or nil
992 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
993 * or <code>nil</code> if <i>str</i> is the empty string. See also
994 * <code>String#chomp!</code>.
996 static mrb_value
997 mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
999 struct RString *s = mrb_str_ptr(str);
1001 mrb_str_modify(mrb, s);
1002 if (s->len > 0) {
1003 int len;
1004 len = s->len - 1;
1005 if (s->ptr[len] == '\n') {
1006 if (len > 0 &&
1007 s->ptr[len-1] == '\r') {
1008 len--;
1011 s->len = len;
1012 s->ptr[len] = '\0';
1013 return str;
1015 return mrb_nil_value();
1018 /* 15.2.10.5.11 */
1020 * call-seq:
1021 * str.chop => new_str
1023 * Returns a new <code>String</code> with the last character removed. If the
1024 * string ends with <code>\r\n</code>, both characters are removed. Applying
1025 * <code>chop</code> to an empty string returns an empty
1026 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1027 * the string unchanged if it doesn't end in a record separator.
1029 * "string\r\n".chop #=> "string"
1030 * "string\n\r".chop #=> "string\n"
1031 * "string\n".chop #=> "string"
1032 * "string".chop #=> "strin"
1033 * "x".chop #=> ""
1035 static mrb_value
1036 mrb_str_chop(mrb_state *mrb, mrb_value self)
1038 mrb_value str;
1039 str = mrb_str_dup(mrb, self);
1040 mrb_str_chop_bang(mrb, str);
1041 return str;
1044 /* 15.2.10.5.14 */
1046 * call-seq:
1047 * str.downcase! => str or nil
1049 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1050 * changes were made.
1052 static mrb_value
1053 mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
1055 char *p, *pend;
1056 int modify = 0;
1057 struct RString *s = mrb_str_ptr(str);
1059 mrb_str_modify(mrb, s);
1060 p = s->ptr;
1061 pend = s->ptr + s->len;
1062 while (p < pend) {
1063 if (ISUPPER(*p)) {
1064 *p = TOLOWER(*p);
1065 modify = 1;
1067 p++;
1070 if (modify) return str;
1071 return mrb_nil_value();
1074 /* 15.2.10.5.13 */
1076 * call-seq:
1077 * str.downcase => new_str
1079 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1080 * lowercase counterparts. The operation is locale insensitive---only
1081 * characters ``A'' to ``Z'' are affected.
1083 * "hEllO".downcase #=> "hello"
1085 static mrb_value
1086 mrb_str_downcase(mrb_state *mrb, mrb_value self)
1088 mrb_value str;
1090 str = mrb_str_dup(mrb, self);
1091 mrb_str_downcase_bang(mrb, str);
1092 return str;
1095 /* 15.2.10.5.16 */
1097 * call-seq:
1098 * str.empty? => true or false
1100 * Returns <code>true</code> if <i>str</i> has a length of zero.
1102 * "hello".empty? #=> false
1103 * "".empty? #=> true
1105 static mrb_value
1106 mrb_str_empty_p(mrb_state *mrb, mrb_value self)
1108 struct RString *s = mrb_str_ptr(self);
1110 return mrb_bool_value(s->len == 0);
1113 /* 15.2.10.5.17 */
1115 * call-seq:
1116 * str.eql?(other) => true or false
1118 * Two strings are equal if the have the same length and content.
1120 static mrb_value
1121 mrb_str_eql(mrb_state *mrb, mrb_value self)
1123 mrb_value str2;
1124 mrb_bool eql_p;
1126 mrb_get_args(mrb, "o", &str2);
1127 eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
1129 return mrb_bool_value(eql_p);
1132 static mrb_value
1133 mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1135 struct RString *orig, *s;
1136 mrb_shared_string *shared;
1138 orig = mrb_str_ptr(str);
1139 str_make_shared(mrb, orig);
1140 shared = orig->aux.shared;
1141 s = mrb_obj_alloc_string(mrb);
1142 s->ptr = orig->ptr + beg;
1143 s->len = len;
1144 s->aux.shared = shared;
1145 s->flags |= MRB_STR_SHARED;
1146 shared->refcnt++;
1148 return mrb_obj_value(s);
1151 mrb_value
1152 mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1154 mrb_value str2;
1156 if (len < 0) return mrb_nil_value();
1157 if (!RSTRING_LEN(str)) {
1158 len = 0;
1160 if (beg > RSTRING_LEN(str)) return mrb_nil_value();
1161 if (beg < 0) {
1162 beg += RSTRING_LEN(str);
1163 if (beg < 0) return mrb_nil_value();
1165 if (beg + len > RSTRING_LEN(str))
1166 len = RSTRING_LEN(str) - beg;
1167 if (len <= 0) {
1168 len = 0;
1170 str2 = mrb_str_subseq(mrb, str, beg, len);
1172 return str2;
1175 mrb_value
1176 mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
1178 mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
1179 return str;
1182 mrb_int
1183 mrb_str_hash(mrb_state *mrb, mrb_value str)
1185 /* 1-8-7 */
1186 struct RString *s = mrb_str_ptr(str);
1187 mrb_int len = s->len;
1188 char *p = s->ptr;
1189 mrb_int key = 0;
1191 while (len--) {
1192 key = key*65599 + *p;
1193 p++;
1195 key = key + (key>>5);
1196 return key;
1199 /* 15.2.10.5.20 */
1201 * call-seq:
1202 * str.hash => fixnum
1204 * Return a hash based on the string's length and content.
1206 static mrb_value
1207 mrb_str_hash_m(mrb_state *mrb, mrb_value self)
1209 mrb_int key = mrb_str_hash(mrb, self);
1210 return mrb_fixnum_value(key);
1213 /* 15.2.10.5.21 */
1215 * call-seq:
1216 * str.include? other_str => true or false
1217 * str.include? fixnum => true or false
1219 * Returns <code>true</code> if <i>str</i> contains the given string or
1220 * character.
1222 * "hello".include? "lo" #=> true
1223 * "hello".include? "ol" #=> false
1224 * "hello".include? ?h #=> true
1226 static mrb_value
1227 mrb_str_include(mrb_state *mrb, mrb_value self)
1229 mrb_int i;
1230 mrb_value str2;
1231 mrb_bool include_p;
1233 mrb_get_args(mrb, "o", &str2);
1234 if (mrb_fixnum_p(str2)) {
1235 include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL);
1237 else {
1238 str2 = mrb_str_to_str(mrb, str2);
1239 i = mrb_str_index(mrb, self, str2, 0);
1241 include_p = (i != -1);
1244 return mrb_bool_value(include_p);
1247 /* 15.2.10.5.22 */
1249 * call-seq:
1250 * str.index(substring [, offset]) => fixnum or nil
1251 * str.index(fixnum [, offset]) => fixnum or nil
1252 * str.index(regexp [, offset]) => fixnum or nil
1254 * Returns the index of the first occurrence of the given
1255 * <i>substring</i>,
1256 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1257 * Returns
1258 * <code>nil</code> if not found.
1259 * If the second parameter is present, it
1260 * specifies the position in the string to begin the search.
1262 * "hello".index('e') #=> 1
1263 * "hello".index('lo') #=> 3
1264 * "hello".index('a') #=> nil
1265 * "hello".index(101) #=> 1(101=0x65='e')
1266 * "hello".index(/[aeiou]/, -3) #=> 4
1268 static mrb_value
1269 mrb_str_index_m(mrb_state *mrb, mrb_value str)
1271 mrb_value *argv;
1272 int argc;
1274 mrb_value sub;
1275 mrb_int pos;
1277 mrb_get_args(mrb, "*", &argv, &argc);
1278 if (argc == 2) {
1279 pos = mrb_fixnum(argv[1]);
1280 sub = argv[0];
1282 else {
1283 pos = 0;
1284 if (argc > 0)
1285 sub = argv[0];
1286 else
1287 sub = mrb_nil_value();
1290 regexp_check(mrb, sub);
1291 if (pos < 0) {
1292 pos += RSTRING_LEN(str);
1293 if (pos < 0) {
1294 return mrb_nil_value();
1298 switch (mrb_type(sub)) {
1299 case MRB_TT_FIXNUM: {
1300 int c = mrb_fixnum(sub);
1301 mrb_int len = RSTRING_LEN(str);
1302 unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1304 for (;pos<len;pos++) {
1305 if (p[pos] == c) return mrb_fixnum_value(pos);
1307 return mrb_nil_value();
1310 default: {
1311 mrb_value tmp;
1313 tmp = mrb_check_string_type(mrb, sub);
1314 if (mrb_nil_p(tmp)) {
1315 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1317 sub = tmp;
1319 /* fall through */
1320 case MRB_TT_STRING:
1321 pos = mrb_str_index(mrb, str, sub, pos);
1322 break;
1325 if (pos == -1) return mrb_nil_value();
1326 return mrb_fixnum_value(pos);
1329 #define STR_REPLACE_SHARED_MIN 10
1331 static mrb_value
1332 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
1334 if (s2->flags & MRB_STR_SHARED) {
1335 L_SHARE:
1336 if (s1->flags & MRB_STR_SHARED){
1337 str_decref(mrb, s1->aux.shared);
1339 else {
1340 mrb_free(mrb, s1->ptr);
1342 s1->ptr = s2->ptr;
1343 s1->len = s2->len;
1344 s1->aux.shared = s2->aux.shared;
1345 s1->flags |= MRB_STR_SHARED;
1346 s1->aux.shared->refcnt++;
1348 else if (s2->len > STR_REPLACE_SHARED_MIN) {
1349 str_make_shared(mrb, s2);
1350 goto L_SHARE;
1352 else {
1353 if (s1->flags & MRB_STR_SHARED) {
1354 str_decref(mrb, s1->aux.shared);
1355 s1->flags &= ~MRB_STR_SHARED;
1356 s1->ptr = (char *)mrb_malloc(mrb, s2->len+1);
1358 else {
1359 s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, s2->len+1);
1361 memcpy(s1->ptr, s2->ptr, s2->len);
1362 s1->ptr[s2->len] = 0;
1363 s1->len = s2->len;
1364 s1->aux.capa = s2->len;
1366 return mrb_obj_value(s1);
1369 /* 15.2.10.5.24 */
1370 /* 15.2.10.5.28 */
1372 * call-seq:
1373 * str.replace(other_str) => str
1375 * s = "hello" #=> "hello"
1376 * s.replace "world" #=> "world"
1378 static mrb_value
1379 mrb_str_replace(mrb_state *mrb, mrb_value str)
1381 mrb_value str2;
1383 mrb_get_args(mrb, "S", &str2);
1384 return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
1387 /* 15.2.10.5.23 */
1389 * call-seq:
1390 * String.new(str="") => new_str
1392 * Returns a new string object containing a copy of <i>str</i>.
1394 static mrb_value
1395 mrb_str_init(mrb_state *mrb, mrb_value self)
1397 mrb_value str2;
1399 if (mrb_get_args(mrb, "|S", &str2) == 1) {
1400 str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
1402 return self;
1405 /* 15.2.10.5.25 */
1406 /* 15.2.10.5.41 */
1408 * call-seq:
1409 * str.intern => symbol
1410 * str.to_sym => symbol
1412 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1413 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1415 * "Koala".intern #=> :Koala
1416 * s = 'cat'.to_sym #=> :cat
1417 * s == :cat #=> true
1418 * s = '@cat'.to_sym #=> :@cat
1419 * s == :@cat #=> true
1421 * This can also be used to create symbols that cannot be represented using the
1422 * <code>:xxx</code> notation.
1424 * 'cat and dog'.to_sym #=> :"cat and dog"
1426 mrb_value
1427 mrb_str_intern(mrb_state *mrb, mrb_value self)
1429 mrb_sym id;
1431 id = mrb_intern_str(mrb, self);
1432 return mrb_symbol_value(id);
1435 /* ---------------------------------- */
1436 mrb_value
1437 mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
1439 mrb_value str;
1441 if (mrb_string_p(obj)) {
1442 return obj;
1444 str = mrb_funcall(mrb, obj, "to_s", 0);
1445 if (!mrb_string_p(str))
1446 return mrb_any_to_s(mrb, obj);
1447 return str;
1450 mrb_value
1451 mrb_ptr_to_str(mrb_state *mrb, void *p)
1453 struct RString *p_str;
1454 char *p1;
1455 char *p2;
1456 uintptr_t n = (uintptr_t)p;
1458 p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
1459 p1 = p_str->ptr;
1460 *p1++ = '0';
1461 *p1++ = 'x';
1462 p2 = p1;
1464 do {
1465 *p2++ = mrb_digitmap[n % 16];
1466 n /= 16;
1467 } while (n > 0);
1468 *p2 = '\0';
1469 p_str->len = (mrb_int)(p2 - p_str->ptr);
1471 while (p1 < p2) {
1472 const char c = *p1;
1473 *p1++ = *--p2;
1474 *p2 = c;
1477 return mrb_obj_value(p_str);
1480 mrb_value
1481 mrb_string_type(mrb_state *mrb, mrb_value str)
1483 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1486 mrb_value
1487 mrb_check_string_type(mrb_state *mrb, mrb_value str)
1489 return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1492 /* ---------------------------------- */
1493 /* 15.2.10.5.29 */
1495 * call-seq:
1496 * str.reverse => new_str
1498 * Returns a new string with the characters from <i>str</i> in reverse order.
1500 * "stressed".reverse #=> "desserts"
1502 static mrb_value
1503 mrb_str_reverse(mrb_state *mrb, mrb_value str)
1505 struct RString *s2;
1506 char *s, *e, *p;
1508 if (RSTRING(str)->len <= 1) return mrb_str_dup(mrb, str);
1510 s2 = str_new(mrb, 0, RSTRING(str)->len);
1511 str_with_class(mrb, s2, str);
1512 s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
1513 p = s2->ptr;
1515 while (e >= s) {
1516 *p++ = *e--;
1518 return mrb_obj_value(s2);
1521 /* 15.2.10.5.30 */
1523 * call-seq:
1524 * str.reverse! => str
1526 * Reverses <i>str</i> in place.
1528 static mrb_value
1529 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
1531 struct RString *s = mrb_str_ptr(str);
1532 char *p, *e;
1533 char c;
1535 mrb_str_modify(mrb, s);
1536 if (s->len > 1) {
1537 p = s->ptr;
1538 e = p + s->len - 1;
1539 while (p < e) {
1540 c = *p;
1541 *p++ = *e;
1542 *e-- = c;
1545 return str;
1549 * call-seq:
1550 * str.rindex(substring [, fixnum]) => fixnum or nil
1551 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1552 * str.rindex(regexp [, fixnum]) => fixnum or nil
1554 * Returns the index of the last occurrence of the given <i>substring</i>,
1555 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1556 * <code>nil</code> if not found. If the second parameter is present, it
1557 * specifies the position in the string to end the search---characters beyond
1558 * this point will not be considered.
1560 * "hello".rindex('e') #=> 1
1561 * "hello".rindex('l') #=> 3
1562 * "hello".rindex('a') #=> nil
1563 * "hello".rindex(101) #=> 1
1564 * "hello".rindex(/[aeiou]/, -2) #=> 1
1566 static mrb_int
1567 mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
1569 char *s, *sbeg, *t;
1570 struct RString *ps = mrb_str_ptr(str);
1571 struct RString *psub = mrb_str_ptr(sub);
1572 mrb_int len = psub->len;
1574 /* substring longer than string */
1575 if (ps->len < len) return -1;
1576 if (ps->len - pos < len) {
1577 pos = ps->len - len;
1579 sbeg = ps->ptr;
1580 s = ps->ptr + pos;
1581 t = psub->ptr;
1582 if (len) {
1583 while (sbeg <= s) {
1584 if (memcmp(s, t, len) == 0) {
1585 return s - ps->ptr;
1587 s--;
1589 return -1;
1591 else {
1592 return pos;
1596 /* 15.2.10.5.31 */
1598 * call-seq:
1599 * str.rindex(substring [, fixnum]) => fixnum or nil
1600 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1601 * str.rindex(regexp [, fixnum]) => fixnum or nil
1603 * Returns the index of the last occurrence of the given <i>substring</i>,
1604 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1605 * <code>nil</code> if not found. If the second parameter is present, it
1606 * specifies the position in the string to end the search---characters beyond
1607 * this point will not be considered.
1609 * "hello".rindex('e') #=> 1
1610 * "hello".rindex('l') #=> 3
1611 * "hello".rindex('a') #=> nil
1612 * "hello".rindex(101) #=> 1
1613 * "hello".rindex(/[aeiou]/, -2) #=> 1
1615 static mrb_value
1616 mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
1618 mrb_value *argv;
1619 int argc;
1620 mrb_value sub;
1621 mrb_value vpos;
1622 int pos, len = RSTRING_LEN(str);
1624 mrb_get_args(mrb, "*", &argv, &argc);
1625 if (argc == 2) {
1626 sub = argv[0];
1627 vpos = argv[1];
1628 pos = mrb_fixnum(vpos);
1629 if (pos < 0) {
1630 pos += len;
1631 if (pos < 0) {
1632 regexp_check(mrb, sub);
1633 return mrb_nil_value();
1636 if (pos > len) pos = len;
1638 else {
1639 pos = len;
1640 if (argc > 0)
1641 sub = argv[0];
1642 else
1643 sub = mrb_nil_value();
1645 regexp_check(mrb, sub);
1647 switch (mrb_type(sub)) {
1648 case MRB_TT_FIXNUM: {
1649 int c = mrb_fixnum(sub);
1650 mrb_int len = RSTRING_LEN(str);
1651 unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1653 for (pos=len;pos>=0;pos--) {
1654 if (p[pos] == c) return mrb_fixnum_value(pos);
1656 return mrb_nil_value();
1659 default: {
1660 mrb_value tmp;
1662 tmp = mrb_check_string_type(mrb, sub);
1663 if (mrb_nil_p(tmp)) {
1664 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1666 sub = tmp;
1668 /* fall through */
1669 case MRB_TT_STRING:
1670 pos = mrb_str_rindex(mrb, str, sub, pos);
1671 if (pos >= 0) return mrb_fixnum_value(pos);
1672 break;
1674 } /* end of switch (TYPE(sub)) */
1675 return mrb_nil_value();
1678 static const char isspacetable[256] = {
1679 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1681 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1697 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1699 /* 15.2.10.5.35 */
1702 * call-seq:
1703 * str.split(pattern=$;, [limit]) => anArray
1705 * Divides <i>str</i> into substrings based on a delimiter, returning an array
1706 * of these substrings.
1708 * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1709 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1710 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1711 * of contiguous whitespace characters ignored.
1713 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1714 * pattern matches. Whenever the pattern matches a zero-length string,
1715 * <i>str</i> is split into individual characters.
1717 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1718 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1719 * split on whitespace as if ` ' were specified.
1721 * If the <i>limit</i> parameter is omitted, trailing null fields are
1722 * suppressed. If <i>limit</i> is a positive number, at most that number of
1723 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1724 * string is returned as the only entry in an array). If negative, there is no
1725 * limit to the number of fields returned, and trailing null fields are not
1726 * suppressed.
1728 * " now's the time".split #=> ["now's", "the", "time"]
1729 * " now's the time".split(' ') #=> ["now's", "the", "time"]
1730 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1731 * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
1732 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1733 * "hello".split(//, 3) #=> ["h", "e", "llo"]
1734 * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
1736 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1737 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1738 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1739 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1742 static mrb_value
1743 mrb_str_split_m(mrb_state *mrb, mrb_value str)
1745 int argc;
1746 mrb_value spat = mrb_nil_value();
1747 enum {awk, string, regexp} split_type = string;
1748 long i = 0, lim_p;
1749 mrb_int beg;
1750 mrb_int end;
1751 mrb_int lim = 0;
1752 mrb_value result, tmp;
1754 argc = mrb_get_args(mrb, "|oi", &spat, &lim);
1755 lim_p = (lim > 0 && argc == 2);
1756 if (argc == 2) {
1757 if (lim == 1) {
1758 if (RSTRING_LEN(str) == 0)
1759 return mrb_ary_new_capa(mrb, 0);
1760 return mrb_ary_new_from_values(mrb, 1, &str);
1762 i = 1;
1765 if (argc == 0 || mrb_nil_p(spat)) {
1766 split_type = awk;
1768 else {
1769 if (mrb_string_p(spat)) {
1770 split_type = string;
1771 if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
1772 split_type = awk;
1775 else {
1776 noregexp(mrb, str);
1780 result = mrb_ary_new(mrb);
1781 beg = 0;
1782 if (split_type == awk) {
1783 char *ptr = RSTRING_PTR(str);
1784 char *eptr = RSTRING_END(str);
1785 char *bptr = ptr;
1786 int skip = 1;
1787 unsigned int c;
1789 end = beg;
1790 while (ptr < eptr) {
1791 int ai = mrb_gc_arena_save(mrb);
1792 c = (unsigned char)*ptr++;
1793 if (skip) {
1794 if (ascii_isspace(c)) {
1795 beg = ptr - bptr;
1797 else {
1798 end = ptr - bptr;
1799 skip = 0;
1800 if (lim_p && lim <= i) break;
1803 else if (ascii_isspace(c)) {
1804 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
1805 mrb_gc_arena_restore(mrb, ai);
1806 skip = 1;
1807 beg = ptr - bptr;
1808 if (lim_p) ++i;
1810 else {
1811 end = ptr - bptr;
1815 else if (split_type == string) {
1816 char *ptr = RSTRING_PTR(str);
1817 char *temp = ptr;
1818 char *eptr = RSTRING_END(str);
1819 mrb_int slen = RSTRING_LEN(spat);
1821 if (slen == 0) {
1822 int ai = mrb_gc_arena_save(mrb);
1823 while (ptr < eptr) {
1824 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
1825 mrb_gc_arena_restore(mrb, ai);
1826 ptr++;
1827 if (lim_p && lim <= ++i) break;
1830 else {
1831 char *sptr = RSTRING_PTR(spat);
1832 int ai = mrb_gc_arena_save(mrb);
1834 while (ptr < eptr &&
1835 (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
1836 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
1837 mrb_gc_arena_restore(mrb, ai);
1838 ptr += end + slen;
1839 if (lim_p && lim <= ++i) break;
1842 beg = ptr - temp;
1844 else {
1845 noregexp(mrb, str);
1847 if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
1848 if (RSTRING_LEN(str) == beg) {
1849 tmp = mrb_str_new_empty(mrb, str);
1851 else {
1852 tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
1854 mrb_ary_push(mrb, result, tmp);
1856 if (!lim_p && lim == 0) {
1857 mrb_int len;
1858 while ((len = RARRAY_LEN(result)) > 0 &&
1859 (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
1860 mrb_ary_pop(mrb, result);
1863 return result;
1866 mrb_value
1867 mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
1869 char *end;
1870 char sign = 1;
1871 int c;
1872 unsigned long n;
1873 mrb_int val;
1875 #undef ISDIGIT
1876 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1877 #define conv_digit(c) \
1878 (!ISASCII(c) ? -1 : \
1879 isdigit(c) ? ((c) - '0') : \
1880 islower(c) ? ((c) - 'a' + 10) : \
1881 isupper(c) ? ((c) - 'A' + 10) : \
1884 if (!str) {
1885 if (badcheck) goto bad;
1886 return mrb_fixnum_value(0);
1888 while (ISSPACE(*str)) str++;
1890 if (str[0] == '+') {
1891 str++;
1893 else if (str[0] == '-') {
1894 str++;
1895 sign = 0;
1897 if (str[0] == '+' || str[0] == '-') {
1898 if (badcheck) goto bad;
1899 return mrb_fixnum_value(0);
1901 if (base <= 0) {
1902 if (str[0] == '0') {
1903 switch (str[1]) {
1904 case 'x': case 'X':
1905 base = 16;
1906 break;
1907 case 'b': case 'B':
1908 base = 2;
1909 break;
1910 case 'o': case 'O':
1911 base = 8;
1912 break;
1913 case 'd': case 'D':
1914 base = 10;
1915 break;
1916 default:
1917 base = 8;
1920 else if (base < -1) {
1921 base = -base;
1923 else {
1924 base = 10;
1927 switch (base) {
1928 case 2:
1929 if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
1930 str += 2;
1932 break;
1933 case 3:
1934 break;
1935 case 8:
1936 if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
1937 str += 2;
1939 case 4: case 5: case 6: case 7:
1940 break;
1941 case 10:
1942 if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
1943 str += 2;
1945 case 9: case 11: case 12: case 13: case 14: case 15:
1946 break;
1947 case 16:
1948 if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
1949 str += 2;
1951 break;
1952 default:
1953 if (base < 2 || 36 < base) {
1954 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
1956 break;
1957 } /* end of switch (base) { */
1958 if (*str == '0') { /* squeeze preceeding 0s */
1959 int us = 0;
1960 while ((c = *++str) == '0' || c == '_') {
1961 if (c == '_') {
1962 if (++us >= 2)
1963 break;
1965 else
1966 us = 0;
1968 if (!(c = *str) || ISSPACE(c)) --str;
1970 c = *str;
1971 c = conv_digit(c);
1972 if (c < 0 || c >= base) {
1973 if (badcheck) goto bad;
1974 return mrb_fixnum_value(0);
1977 n = strtoul((char*)str, &end, base);
1978 if (n > MRB_INT_MAX) {
1979 mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str));
1981 val = n;
1982 if (badcheck) {
1983 if (end == str) goto bad; /* no number */
1984 while (*end && ISSPACE(*end)) end++;
1985 if (*end) goto bad; /* trailing garbage */
1988 return mrb_fixnum_value(sign ? val : -val);
1989 bad:
1990 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str));
1991 /* not reached */
1992 return mrb_fixnum_value(0);
1995 char *
1996 mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
1998 struct RString *ps = mrb_str_ptr(*ptr);
1999 char *s = ps->ptr;
2001 if (!s || ps->len != strlen(s)) {
2002 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
2004 return s;
2007 mrb_value
2008 mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
2010 char *s;
2011 int len;
2013 str = mrb_str_to_str(mrb, str);
2014 if (badcheck) {
2015 s = mrb_string_value_cstr(mrb, &str);
2017 else {
2018 s = RSTRING_PTR(str);
2020 if (s) {
2021 len = RSTRING_LEN(str);
2022 if (s[len]) { /* no sentinel somehow */
2023 struct RString *temp_str = str_new(mrb, s, len);
2024 s = temp_str->ptr;
2027 return mrb_cstr_to_inum(mrb, s, base, badcheck);
2030 /* 15.2.10.5.38 */
2032 * call-seq:
2033 * str.to_i(base=10) => integer
2035 * Returns the result of interpreting leading characters in <i>str</i> as an
2036 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2037 * end of a valid number are ignored. If there is not a valid number at the
2038 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2039 * exception.
2041 * "12345".to_i #=> 12345
2042 * "99 red balloons".to_i #=> 99
2043 * "0a".to_i #=> 0
2044 * "0a".to_i(16) #=> 10
2045 * "hello".to_i #=> 0
2046 * "1100101".to_i(2) #=> 101
2047 * "1100101".to_i(8) #=> 294977
2048 * "1100101".to_i(10) #=> 1100101
2049 * "1100101".to_i(16) #=> 17826049
2051 static mrb_value
2052 mrb_str_to_i(mrb_state *mrb, mrb_value self)
2054 mrb_value *argv;
2055 int argc;
2056 int base;
2058 mrb_get_args(mrb, "*", &argv, &argc);
2059 if (argc == 0)
2060 base = 10;
2061 else
2062 base = mrb_fixnum(argv[0]);
2064 if (base < 0) {
2065 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
2067 return mrb_str_to_inum(mrb, self, base, 0/*Qfalse*/);
2070 double
2071 mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck)
2073 char *end;
2074 double d;
2075 #if !defined(DBL_DIG)
2076 # define DBL_DIG 16
2077 #endif
2079 enum {max_width = 20};
2080 #define OutOfRange() (((w = end - p) > max_width) ? \
2081 (w = max_width, ellipsis = "...") : \
2082 (w = (int)(end - p), ellipsis = ""))
2084 if (!p) return 0.0;
2085 while (ISSPACE(*p)) p++;
2087 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2088 return 0.0;
2090 d = strtod(p, &end);
2091 if (p == end) {
2092 if (badcheck) {
2093 bad:
2094 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
2095 /* not reached */
2097 return d;
2099 if (*end) {
2100 char buf[DBL_DIG * 4 + 10];
2101 char *n = buf;
2102 char *e = buf + sizeof(buf) - 1;
2103 char prev = 0;
2105 while (p < end && n < e) prev = *n++ = *p++;
2106 while (*p) {
2107 if (*p == '_') {
2108 /* remove underscores between digits */
2109 if (badcheck) {
2110 if (n == buf || !ISDIGIT(prev)) goto bad;
2111 ++p;
2112 if (!ISDIGIT(*p)) goto bad;
2114 else {
2115 while (*++p == '_');
2116 continue;
2119 prev = *p++;
2120 if (n < e) *n++ = prev;
2122 *n = '\0';
2123 p = buf;
2125 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2126 return 0.0;
2129 d = strtod(p, &end);
2130 if (badcheck) {
2131 if (!end || p == end) goto bad;
2132 while (*end && ISSPACE(*end)) end++;
2133 if (*end) goto bad;
2136 return d;
2139 double
2140 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
2142 char *s;
2143 int len;
2145 str = mrb_str_to_str(mrb, str);
2146 s = RSTRING_PTR(str);
2147 len = RSTRING_LEN(str);
2148 if (s) {
2149 if (badcheck && memchr(s, '\0', len)) {
2150 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
2152 if (s[len]) { /* no sentinel somehow */
2153 struct RString *temp_str = str_new(mrb, s, len);
2154 s = temp_str->ptr;
2157 return mrb_cstr_to_dbl(mrb, s, badcheck);
2160 /* 15.2.10.5.39 */
2162 * call-seq:
2163 * str.to_f => float
2165 * Returns the result of interpreting leading characters in <i>str</i> as a
2166 * floating point number. Extraneous characters past the end of a valid number
2167 * are ignored. If there is not a valid number at the start of <i>str</i>,
2168 * <code>0.0</code> is returned. This method never raises an exception.
2170 * "123.45e1".to_f #=> 1234.5
2171 * "45.67 degrees".to_f #=> 45.67
2172 * "thx1138".to_f #=> 0.0
2174 static mrb_value
2175 mrb_str_to_f(mrb_state *mrb, mrb_value self)
2177 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, 0/*Qfalse*/));
2180 /* 15.2.10.5.40 */
2182 * call-seq:
2183 * str.to_s => str
2184 * str.to_str => str
2186 * Returns the receiver.
2188 static mrb_value
2189 mrb_str_to_s(mrb_state *mrb, mrb_value self)
2191 if (mrb_obj_class(mrb, self) != mrb->string_class) {
2192 return mrb_str_dup(mrb, self);
2194 return self;
2197 /* 15.2.10.5.43 */
2199 * call-seq:
2200 * str.upcase! => str or nil
2202 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2203 * were made.
2205 static mrb_value
2206 mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
2208 struct RString *s = mrb_str_ptr(str);
2209 char *p, *pend;
2210 int modify = 0;
2212 mrb_str_modify(mrb, s);
2213 p = RSTRING_PTR(str);
2214 pend = RSTRING_END(str);
2215 while (p < pend) {
2216 if (ISLOWER(*p)) {
2217 *p = TOUPPER(*p);
2218 modify = 1;
2220 p++;
2223 if (modify) return str;
2224 return mrb_nil_value();
2227 /* 15.2.10.5.42 */
2229 * call-seq:
2230 * str.upcase => new_str
2232 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2233 * uppercase counterparts. The operation is locale insensitive---only
2234 * characters ``a'' to ``z'' are affected.
2236 * "hEllO".upcase #=> "HELLO"
2238 static mrb_value
2239 mrb_str_upcase(mrb_state *mrb, mrb_value self)
2241 mrb_value str;
2243 str = mrb_str_dup(mrb, self);
2244 mrb_str_upcase_bang(mrb, str);
2245 return str;
2249 * call-seq:
2250 * str.dump -> new_str
2252 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2253 * <code>\nnn</code> notation and all special characters escaped.
2255 mrb_value
2256 mrb_str_dump(mrb_state *mrb, mrb_value str)
2258 mrb_int len;
2259 const char *p, *pend;
2260 char *q;
2261 struct RString *result;
2263 len = 2; /* "" */
2264 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2265 while (p < pend) {
2266 unsigned char c = *p++;
2267 switch (c) {
2268 case '"': case '\\':
2269 case '\n': case '\r':
2270 case '\t': case '\f':
2271 case '\013': case '\010': case '\007': case '\033':
2272 len += 2;
2273 break;
2275 case '#':
2276 len += IS_EVSTR(p, pend) ? 2 : 1;
2277 break;
2279 default:
2280 if (ISPRINT(c)) {
2281 len++;
2283 else {
2284 len += 4; /* \NNN */
2286 break;
2290 result = str_new(mrb, 0, len);
2291 str_with_class(mrb, result, str);
2292 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2293 q = result->ptr;
2295 *q++ = '"';
2296 while (p < pend) {
2297 unsigned char c = *p++;
2299 switch (c) {
2300 case '"':
2301 case '\\':
2302 *q++ = '\\';
2303 *q++ = c;
2304 break;
2306 case '\n':
2307 *q++ = '\\';
2308 *q++ = 'n';
2309 break;
2311 case '\r':
2312 *q++ = '\\';
2313 *q++ = 'r';
2314 break;
2316 case '\t':
2317 *q++ = '\\';
2318 *q++ = 't';
2319 break;
2321 case '\f':
2322 *q++ = '\\';
2323 *q++ = 'f';
2324 break;
2326 case '\013':
2327 *q++ = '\\';
2328 *q++ = 'v';
2329 break;
2331 case '\010':
2332 *q++ = '\\';
2333 *q++ = 'b';
2334 break;
2336 case '\007':
2337 *q++ = '\\';
2338 *q++ = 'a';
2339 break;
2341 case '\033':
2342 *q++ = '\\';
2343 *q++ = 'e';
2344 break;
2346 case '#':
2347 if (IS_EVSTR(p, pend)) *q++ = '\\';
2348 *q++ = '#';
2349 break;
2351 default:
2352 if (ISPRINT(c)) {
2353 *q++ = c;
2355 else {
2356 *q++ = '\\';
2357 q[2] = '0' + c % 8; c /= 8;
2358 q[1] = '0' + c % 8; c /= 8;
2359 q[0] = '0' + c % 8;
2360 q += 3;
2364 *q++ = '"';
2365 return mrb_obj_value(result);
2368 mrb_value
2369 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
2371 if ((mrb_int)len < 0) {
2372 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
2374 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
2375 return str;
2378 mrb_value
2379 mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
2381 return mrb_str_cat(mrb, str, ptr, strlen(ptr));
2384 mrb_value
2385 mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
2387 str2 = mrb_str_to_str(mrb, str2);
2388 return mrb_str_buf_append(mrb, str, str2);
2391 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2394 * call-seq:
2395 * str.inspect -> string
2397 * Returns a printable version of _str_, surrounded by quote marks,
2398 * with special characters escaped.
2400 * str = "hello"
2401 * str[3] = "\b"
2402 * str.inspect #=> "\"hel\\bo\""
2404 mrb_value
2405 mrb_str_inspect(mrb_state *mrb, mrb_value str)
2407 const char *p, *pend;
2408 char buf[CHAR_ESC_LEN + 1];
2409 mrb_value result = mrb_str_new(mrb, "\"", 1);
2411 p = RSTRING_PTR(str); pend = RSTRING_END(str);
2412 for (;p < pend; p++) {
2413 unsigned char c, cc;
2415 c = *p;
2416 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
2417 buf[0] = '\\'; buf[1] = c;
2418 mrb_str_buf_cat(mrb, result, buf, 2);
2419 continue;
2421 if (ISPRINT(c)) {
2422 buf[0] = c;
2423 mrb_str_buf_cat(mrb, result, buf, 1);
2424 continue;
2426 switch (c) {
2427 case '\n': cc = 'n'; break;
2428 case '\r': cc = 'r'; break;
2429 case '\t': cc = 't'; break;
2430 case '\f': cc = 'f'; break;
2431 case '\013': cc = 'v'; break;
2432 case '\010': cc = 'b'; break;
2433 case '\007': cc = 'a'; break;
2434 case 033: cc = 'e'; break;
2435 default: cc = 0; break;
2437 if (cc) {
2438 buf[0] = '\\';
2439 buf[1] = (char)cc;
2440 mrb_str_buf_cat(mrb, result, buf, 2);
2441 continue;
2443 else {
2444 buf[0] = '\\';
2445 buf[3] = '0' + c % 8; c /= 8;
2446 buf[2] = '0' + c % 8; c /= 8;
2447 buf[1] = '0' + c % 8;
2448 mrb_str_buf_cat(mrb, result, buf, 4);
2449 continue;
2452 mrb_str_buf_cat(mrb, result, "\"", 1);
2454 return result;
2458 * call-seq:
2459 * str.bytes -> array of fixnums
2461 * Returns an array of bytes in _str_.
2463 * str = "hello"
2464 * str.bytes #=> [104, 101, 108, 108, 111]
2466 static mrb_value
2467 mrb_str_bytes(mrb_state *mrb, mrb_value str)
2469 struct RString *s = mrb_str_ptr(str);
2470 mrb_value a = mrb_ary_new_capa(mrb, s->len);
2471 unsigned char *p = (unsigned char *)(s->ptr), *pend = p + s->len;
2473 while (p < pend) {
2474 mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
2475 p++;
2477 return a;
2480 /* ---------------------------*/
2481 void
2482 mrb_init_string(mrb_state *mrb)
2484 struct RClass *s;
2486 s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class);
2487 MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
2488 mrb_include_module(mrb, s, mrb_class_get(mrb, "Comparable"));
2491 mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
2493 mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2494 mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2495 mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2496 mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2497 mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2498 mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2499 mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
2500 mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2501 mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2502 mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
2503 mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
2504 mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2505 mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2506 mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2507 mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2509 mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
2510 mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2511 mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2512 mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2513 mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2514 mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2515 mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2516 mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2517 mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2518 mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2519 mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2520 mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2521 mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2522 mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2524 mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2525 mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2526 mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2527 mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
2528 mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2529 mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
2530 mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
2531 mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2532 mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());