2 ** string.c - String class
4 ** See Copyright Notice in mruby.h
8 # define _CRT_NONSTDC_NO_DEPRECATE
9 # define WIN32_LEAN_AND_MEAN
13 #include <mruby/array.h>
14 #include <mruby/class.h>
15 #include <mruby/range.h>
16 #include <mruby/string.h>
17 #include <mruby/numeric.h>
18 #include <mruby/internal.h>
19 #include <mruby/presym.h>
22 typedef struct mrb_shared_string
{
28 const char mrb_digitmap
[] = "0123456789abcdefghijklmnopqrstuvwxyz";
30 #define mrb_obj_alloc_string(mrb) MRB_OBJ_ALLOC((mrb), MRB_TT_STRING, (mrb)->string_class)
32 #ifndef MRB_STR_LENGTH_MAX
33 #if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)
34 #define MRB_STR_LENGTH_MAX 0
36 #define MRB_STR_LENGTH_MAX 1048576
41 str_check_length(mrb_state
*mrb
, mrb_int len
)
44 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative (or overflowed) string size");
46 #if MRB_STR_LENGTH_MAX != 0
47 if (len
> MRB_STR_LENGTH_MAX
-1) {
48 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "string too long (len=%i max=" MRB_STRINGIZE(MRB_STR_LENGTH_MAX
) ")", len
);
53 static struct RString
*
54 str_init_normal_capa(mrb_state
*mrb
, struct RString
*s
,
55 const char *p
, mrb_int len
, mrb_int capa
)
57 str_check_length(mrb
, capa
);
58 char *dst
= (char*)mrb_malloc(mrb
, capa
+ 1);
59 if (p
) memcpy(dst
, p
, len
);
63 s
->as
.heap
.aux
.capa
= capa
;
64 RSTR_UNSET_TYPE_FLAG(s
);
68 static struct RString
*
69 str_init_normal(mrb_state
*mrb
, struct RString
*s
, const char *p
, mrb_int len
)
71 return str_init_normal_capa(mrb
, s
, p
, len
, len
);
74 static struct RString
*
75 str_init_embed(struct RString
*s
, const char *p
, mrb_int len
)
78 if (p
) memcpy(RSTR_EMBED_PTR(s
), p
, len
);
79 RSTR_EMBED_PTR(s
)[len
] = '\0';
80 RSTR_SET_TYPE_FLAG(s
, EMBED
);
81 RSTR_SET_EMBED_LEN(s
, len
);
85 static struct RString
*
86 str_init_nofree(struct RString
*s
, const char *p
, mrb_int len
)
88 s
->as
.heap
.ptr
= (char*)p
;
90 s
->as
.heap
.aux
.capa
= 0; /* nofree */
91 RSTR_SET_TYPE_FLAG(s
, NOFREE
);
95 static struct RString
*
96 str_init_shared(mrb_state
*mrb
, const struct RString
*orig
, struct RString
*s
, mrb_shared_string
*shared
)
102 shared
= (mrb_shared_string
*)mrb_malloc(mrb
, sizeof(mrb_shared_string
));
104 shared
->ptr
= orig
->as
.heap
.ptr
;
105 shared
->capa
= orig
->as
.heap
.aux
.capa
;
107 s
->as
.heap
.ptr
= orig
->as
.heap
.ptr
;
108 s
->as
.heap
.len
= orig
->as
.heap
.len
;
109 s
->as
.heap
.aux
.shared
= shared
;
110 RSTR_SET_TYPE_FLAG(s
, SHARED
);
114 static struct RString
*
115 str_init_fshared(const struct RString
*orig
, struct RString
*s
, struct RString
*fshared
)
117 s
->as
.heap
.ptr
= orig
->as
.heap
.ptr
;
118 s
->as
.heap
.len
= orig
->as
.heap
.len
;
119 s
->as
.heap
.aux
.fshared
= fshared
;
120 RSTR_SET_TYPE_FLAG(s
, FSHARED
);
124 static struct RString
*
125 str_init_modifiable(mrb_state
*mrb
, struct RString
*s
, const char *p
, mrb_int len
)
127 if (RSTR_EMBEDDABLE_P(len
)) {
128 return str_init_embed(s
, p
, len
);
130 return str_init_normal(mrb
, s
, p
, len
);
133 static struct RString
*
134 str_new_static(mrb_state
*mrb
, const char *p
, mrb_int len
)
136 if (RSTR_EMBEDDABLE_P(len
)) {
137 return str_init_embed(mrb_obj_alloc_string(mrb
), p
, len
);
139 return str_init_nofree(mrb_obj_alloc_string(mrb
), p
, len
);
142 static struct RString
*
143 str_new(mrb_state
*mrb
, const char *p
, mrb_int len
)
145 str_check_length(mrb
, len
);
146 if (RSTR_EMBEDDABLE_P(len
)) {
147 return str_init_embed(mrb_obj_alloc_string(mrb
), p
, len
);
149 if (p
&& mrb_ro_data_p(p
)) {
150 return str_init_nofree(mrb_obj_alloc_string(mrb
), p
, len
);
152 return str_init_normal(mrb
, mrb_obj_alloc_string(mrb
), p
, len
);
156 mrb_str_new_capa(mrb_state
*mrb
, mrb_int capa
)
158 struct RString
*s
= mrb_obj_alloc_string(mrb
);
160 if (RSTR_EMBEDDABLE_P(capa
)) {
161 s
= str_init_embed(s
, NULL
, 0);
164 s
= str_init_normal_capa(mrb
, s
, NULL
, 0, capa
);
166 return mrb_obj_value(s
);
170 resize_capa(mrb_state
*mrb
, struct RString
*s
, mrb_int capacity
)
172 if (RSTR_EMBED_P(s
)) {
173 if (!RSTR_EMBEDDABLE_P(capacity
)) {
174 str_init_normal_capa(mrb
, s
, RSTR_EMBED_PTR(s
), RSTR_EMBED_LEN(s
), capacity
);
178 str_check_length(mrb
, capacity
);
179 s
->as
.heap
.ptr
= (char*)mrb_realloc(mrb
, RSTR_PTR(s
), capacity
+1);
180 s
->as
.heap
.aux
.capa
= (mrb_ssize
)capacity
;
185 mrb_str_new(mrb_state
*mrb
, const char *p
, mrb_int len
)
187 return mrb_obj_value(str_new(mrb
, p
, len
));
191 mrb_str_new_cstr(mrb_state
*mrb
, const char *p
)
203 s
= str_new(mrb
, p
, len
);
205 return mrb_obj_value(s
);
209 mrb_str_new_static(mrb_state
*mrb
, const char *p
, mrb_int len
)
211 struct RString
*s
= str_new_static(mrb
, p
, len
);
212 return mrb_obj_value(s
);
216 str_decref(mrb_state
*mrb
, mrb_shared_string
*shared
)
219 if (shared
->refcnt
== 0) {
220 mrb_free(mrb
, shared
->ptr
);
221 mrb_free(mrb
, shared
);
226 str_modify_keep_ascii(mrb_state
*mrb
, struct RString
*s
)
228 if (RSTR_SHARED_P(s
)) {
229 mrb_shared_string
*shared
= s
->as
.heap
.aux
.shared
;
231 if (shared
->refcnt
== 1 && s
->as
.heap
.ptr
== shared
->ptr
) {
232 s
->as
.heap
.aux
.capa
= shared
->capa
;
233 s
->as
.heap
.ptr
[s
->as
.heap
.len
] = '\0';
234 RSTR_UNSET_SHARED_FLAG(s
);
235 mrb_free(mrb
, shared
);
238 str_init_modifiable(mrb
, s
, s
->as
.heap
.ptr
, s
->as
.heap
.len
);
239 str_decref(mrb
, shared
);
242 else if (RSTR_NOFREE_P(s
) || RSTR_FSHARED_P(s
)) {
243 str_init_modifiable(mrb
, s
, s
->as
.heap
.ptr
, s
->as
.heap
.len
);
248 check_null_byte(mrb_state
*mrb
, struct RString
*str
)
250 const char *p
= RSTR_PTR(str
);
251 if (p
&& memchr(p
, '\0', RSTR_LEN(str
))) {
252 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
257 mrb_gc_free_str(mrb_state
*mrb
, struct RString
*str
)
259 if (RSTR_EMBED_P(str
))
261 else if (RSTR_SHARED_P(str
))
262 str_decref(mrb
, str
->as
.heap
.aux
.shared
);
263 else if (!RSTR_NOFREE_P(str
) && !RSTR_FSHARED_P(str
))
264 mrb_free(mrb
, str
->as
.heap
.ptr
);
267 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
268 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
269 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
271 # define ALIGNED_WORD_ACCESS 0
273 # define ALIGNED_WORD_ACCESS 1
277 #define bitint uint64_t
278 #define MASK01 0x0101010101010101ull
280 #define bitint uint32_t
281 #define MASK01 0x01010101ul
284 #ifdef MRB_UTF8_STRING
286 #define NOASCII(c) ((c) & 0x80)
288 #ifdef SIMPLE_SEARCH_NONASCII
289 /* the naive implementation. define SIMPLE_SEARCH_NONASCII, */
290 /* if you need it for any constraint (e.g. code size). */
292 search_nonascii(const char* p
, const char *e
)
295 if (NOASCII(*p
)) return p
;
300 #elif defined(__SSE2__)
301 # include <emmintrin.h>
303 static inline const char *
304 search_nonascii(const char *p
, const char *e
)
306 if (sizeof(__m128i
) < (size_t)(e
- p
)) {
307 if (!_mm_movemask_epi8(_mm_loadu_si128((__m128i
const*)p
))) {
308 const intptr_t lowbits
= sizeof(__m128i
) - 1;
309 const __m128i
*s
, *t
;
310 s
= (const __m128i
*)(~lowbits
& ((intptr_t)p
+ lowbits
));
311 t
= (const __m128i
*)(~lowbits
& (intptr_t)e
);
313 if (_mm_movemask_epi8(_mm_load_si128(s
))) break;
320 case 15: if (NOASCII(*p
)) return p
; ++p
;
321 case 14: if (NOASCII(*p
)) return p
; ++p
;
322 case 13: if (NOASCII(*p
)) return p
; ++p
;
323 case 12: if (NOASCII(*p
)) return p
; ++p
;
324 case 11: if (NOASCII(*p
)) return p
; ++p
;
325 case 10: if (NOASCII(*p
)) return p
; ++p
;
326 case 9: if (NOASCII(*p
)) return p
; ++p
;
327 case 8: if (NOASCII(*p
)) return p
; ++p
;
328 case 7: if (NOASCII(*p
)) return p
; ++p
;
329 case 6: if (NOASCII(*p
)) return p
; ++p
;
330 case 5: if (NOASCII(*p
)) return p
; ++p
;
331 case 4: if (NOASCII(*p
)) return p
; ++p
;
332 case 3: if (NOASCII(*p
)) return p
; ++p
;
333 case 2: if (NOASCII(*p
)) return p
; ++p
;
334 case 1: if (NOASCII(*p
)) return p
; ++p
;
335 if (NOASCII(*p
)) return p
;
344 search_nonascii(const char *p
, const char *e
)
346 ptrdiff_t byte_len
= e
- p
;
348 const char *be
= p
+ sizeof(bitint
) * (byte_len
/ sizeof(bitint
));
349 for (; p
< be
; p
+=sizeof(bitint
)) {
352 memcpy(&t0
, p
, sizeof(bitint
));
353 const bitint t1
= t0
& (MASK01
*0x80);
355 e
= p
+ sizeof(bitint
)-1;
356 byte_len
= sizeof(bitint
)-1;
361 switch (byte_len
% sizeof(bitint
)) {
363 case 7: if (e
[-7]&0x80) return e
-7;
364 case 6: if (e
[-6]&0x80) return e
-6;
365 case 5: if (e
[-5]&0x80) return e
-5;
366 case 4: if (e
[-4]&0x80) return e
-4;
368 case 3: if (e
[-3]&0x80) return e
-3;
369 case 2: if (e
[-2]&0x80) return e
-2;
370 case 1: if (e
[-1]&0x80) return e
-1;
375 #endif /* SIMPLE_SEARCH_NONASCII */
377 #define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80)
379 extern const char mrb_utf8len_table
[];
380 const char mrb_utf8len_table
[] = {
381 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
382 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0
386 mrb_utf8len(const char* p
, const char* e
)
388 mrb_int len
= mrb_utf8len_table
[(unsigned char)p
[0] >> 3];
389 if (len
> e
- p
) return 1;
394 if (utf8_islead(p
[3])) return 1;
396 if (utf8_islead(p
[2])) return 1;
398 if (utf8_islead(p
[1])) return 1;
403 #if defined(__GNUC__) || __has_builtin(__builtin_popcount)
405 # define popcount(x) __builtin_popcountll(x)
407 # define popcount(x) __builtin_popcountl(x)
410 static inline uint32_t popcount(bitint x
)
412 x
= (x
& (MASK01
*0x55)) + ((x
>> 1) & (MASK01
*0x55));
413 x
= (x
& (MASK01
*0x33)) + ((x
>> 2) & (MASK01
*0x33));
414 x
= (x
& (MASK01
*0x0F)) + ((x
>> 4) & (MASK01
*0x0F));
415 return (x
* MASK01
) >> 56;
420 mrb_utf8_strlen(const char *str
, mrb_int byte_len
)
423 const char *e
= str
+ byte_len
;
427 const char *np
= search_nonascii(p
, e
);
432 while (NOASCII(*p
)) {
433 p
+= mrb_utf8len(p
, e
);
441 utf8_strlen(mrb_value str
)
443 struct RString
*s
= mrb_str_ptr(str
);
444 mrb_int byte_len
= RSTR_LEN(s
);
446 if (RSTR_SINGLE_BYTE_P(s
)) {
450 mrb_int utf8_len
= mrb_utf8_strlen(RSTR_PTR(s
), byte_len
);
451 mrb_assert(utf8_len
<= byte_len
);
452 if (byte_len
== utf8_len
) RSTR_SET_SINGLE_BYTE_FLAG(s
);
457 #define RSTRING_CHAR_LEN(s) utf8_strlen(s)
459 /* map character index to byte offset index */
461 chars2bytes(mrb_value s
, mrb_int off
, mrb_int idx
)
463 if (RSTR_SINGLE_BYTE_P(mrb_str_ptr(s
))) {
467 const char *p0
= RSTRING_PTR(s
) + off
;
469 const char *e
= RSTRING_END(s
);
472 while (p
<e
&& i
<idx
) {
473 if ((*p
& 0x80) == 0) {
474 const char *np
= search_nonascii(p
, e
);
475 ptrdiff_t alen
= np
- p
;
486 p
+= mrb_utf8len(p
, e
);
491 mrb_int len
= (mrb_int
)(p
-p0
);
496 /* map byte offset to character index */
498 bytes2chars(mrb_value s
, mrb_int bi
)
500 if (RSTR_SINGLE_BYTE_P(mrb_str_ptr(s
))) {
504 const char *p
= RSTRING_PTR(s
);
505 const char *e
= p
+ RSTRING_LEN(s
);
506 const char *pivot
= p
+ bi
;
509 if (e
< pivot
) return -1;
511 if ((*p
& 0x80) == 0) {
512 const char *np
= search_nonascii(p
, pivot
);
517 p
+= mrb_utf8len(p
, e
);
521 if (p
!= pivot
) return -1;
526 char_adjust(const char *beg
, const char *end
, const char *ptr
)
528 ptrdiff_t len
= end
- ptr
;
529 if (len
< 1 || utf8_islead(ptr
[0])) return ptr
;
530 if (len
> 1 && utf8_islead(ptr
[1])) return ptr
+1;
531 if (len
> 2 && utf8_islead(ptr
[2])) return ptr
+2;
532 if (len
> 3 && utf8_islead(ptr
[3])) return ptr
+3;
537 char_backtrack(const char *ptr
, const char *end
)
539 ptrdiff_t len
= end
- ptr
;
540 if (len
< 1 || utf8_islead(end
[-1])) return end
-1;
541 if (len
> 1 && utf8_islead(end
[-2])) return end
-2;
542 if (len
> 2 && utf8_islead(end
[-3])) return end
-3;
543 if (len
> 3 && utf8_islead(end
[-4])) return end
-4;
548 str_index_str_by_char(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int pos
)
550 const char *ptr
= RSTRING_PTR(sub
);
551 mrb_int len
= RSTRING_LEN(sub
);
554 pos
= chars2bytes(str
, 0, pos
);
557 pos
= mrb_str_index(mrb
, str
, ptr
, len
, pos
);
560 pos
= bytes2chars(str
, pos
);
566 #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
567 #define chars2bytes(s, off, ci) (ci)
568 #define bytes2chars(s, bi) (bi)
569 #define char_adjust(beg, end, ptr) (ptr)
570 #define char_backtrack(ptr, end) ((end) - 1)
571 #define str_index_str_by_char(mrb, str, sub, pos) str_index_str((mrb), (str), (sub), (pos))
574 /* memsearch_swar (SWAR stands for SIMD within a register) */
575 /* See https://en.wikipedia.org/wiki/SWAR */
576 /* The function is taken from http://0x80.pl/articles/simd-strfind.html */
577 /* The original source code is under 2-clause BSD license; see LEGAL file. */
578 /* The modifications:
581 * remove alignment issue
582 * support bigendian CPU
583 * fixed potential buffer overflow
585 static inline mrb_int
586 memsearch_swar(const char *xs
, mrb_int m
, const char *ys
, mrb_int n
)
588 #define MASK7f (MASK01*0x7f)
589 #define MASK80 (MASK01*0x80)
590 #if defined(MRB_ENDIAN_BIG)
592 #define MASKtop 0x8000000000000000ull
594 #define MASKtop 0x80000000ul
600 const bitint first
= MASK01
* (uint8_t)xs
[0];
601 const bitint last
= MASK01
* (uint8_t)xs
[m
-1];
604 const char *s1
= ys
+m
-1;
606 const mrb_int lim
= n
- m
- (mrb_int
)sizeof(bitint
);
609 for (i
=0; i
< lim
; i
+=sizeof(bitint
)) {
612 memcpy(&t0
, s0
+i
, sizeof(bitint
));
613 memcpy(&t1
, s1
+i
, sizeof(bitint
));
615 const bitint eq
= (t0
^ first
) | (t1
^ last
);
616 bitint zeros
= ((~eq
& MASK7f
) + MASK01
) & (~eq
& MASK80
);
618 for (size_t j
= 0; zeros
; j
++) {
619 if (zeros
& MASKtop
) {
620 const mrb_int idx
= i
+ j
;
621 const char* p
= s0
+ idx
+ 1;
622 if (memcmp(p
, xs
+ 1, m
- 2) == 0) {
627 #if defined(MRB_ENDIAN_BIG)
637 const char *e
= ys
+ n
;
639 p
= (const char*)memchr(p
, *xs
, e
- p
);
640 if (p
== NULL
|| (e
- p
) < m
) break;
641 if (memcmp(p
+1, xs
+1, m
-1) == 0) return (mrb_int
)(p
- ys
);
650 mrb_memsearch(const char *x
, mrb_int m
, const char *y
, mrb_int n
)
652 if (m
> n
) return -1;
654 return memcmp(x
, y
, m
) == 0 ? 0 : -1;
660 const char *p
= (const char*)memchr(y
, *x
, n
);
662 if (p
) return (mrb_int
)(p
- y
);
665 return memsearch_swar(x
, m
, y
, n
);
669 str_share(mrb_state
*mrb
, struct RString
*orig
, struct RString
*s
)
671 size_t len
= (size_t)orig
->as
.heap
.len
;
673 mrb_assert(!RSTR_EMBED_P(orig
));
674 if (RSTR_NOFREE_P(orig
)) {
675 str_init_nofree(s
, orig
->as
.heap
.ptr
, len
);
677 else if (RSTR_SHARED_P(orig
)) {
678 str_init_shared(mrb
, orig
, s
, orig
->as
.heap
.aux
.shared
);
680 else if (RSTR_FSHARED_P(orig
)) {
681 str_init_fshared(orig
, s
, orig
->as
.heap
.aux
.fshared
);
684 if (orig
->as
.heap
.aux
.capa
> orig
->as
.heap
.len
) {
685 orig
->as
.heap
.ptr
= (char*)mrb_realloc(mrb
, orig
->as
.heap
.ptr
, len
+1);
686 orig
->as
.heap
.aux
.capa
= (mrb_ssize
)len
;
688 str_init_shared(mrb
, orig
, s
, NULL
);
689 str_init_shared(mrb
, orig
, orig
, s
->as
.heap
.aux
.shared
);
694 mrb_str_byte_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
696 struct RString
*orig
= mrb_str_ptr(str
);
697 struct RString
*s
= mrb_obj_alloc_string(mrb
);
699 if (RSTR_EMBEDDABLE_P(len
)) {
700 str_init_embed(s
, RSTR_PTR(orig
)+beg
, len
);
703 str_share(mrb
, orig
, s
);
704 s
->as
.heap
.ptr
+= (mrb_ssize
)beg
;
705 s
->as
.heap
.len
= (mrb_ssize
)len
;
707 RSTR_COPY_SINGLE_BYTE_FLAG(s
, orig
);
708 return mrb_obj_value(s
);
711 #ifdef MRB_UTF8_STRING
712 static inline mrb_value
713 str_subseq(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
715 beg
= chars2bytes(str
, 0, beg
);
716 len
= chars2bytes(str
, beg
, len
);
717 return mrb_str_byte_subseq(mrb
, str
, beg
, len
);
720 #define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
724 mrb_str_beg_len(mrb_int str_len
, mrb_int
*begp
, mrb_int
*lenp
)
726 if (str_len
< *begp
|| *lenp
< 0) return FALSE
;
729 if (*begp
< 0) return FALSE
;
731 if (*lenp
> str_len
- *begp
)
732 *lenp
= str_len
- *begp
;
740 str_substr(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
742 return mrb_str_beg_len(RSTRING_CHAR_LEN(str
), &beg
, &len
) ?
743 str_subseq(mrb
, str
, beg
, len
) : mrb_nil_value();
747 mrb_str_index(mrb_state
*mrb
, mrb_value str
, const char *sptr
, mrb_int slen
, mrb_int offset
)
749 mrb_int len
= RSTRING_LEN(str
);
753 if (offset
< 0) return -1;
755 if (len
- offset
< slen
) return -1;
757 char *s
= RSTRING_PTR(str
);
761 if (slen
== 0) return offset
;
762 /* need proceed one character at a time */
763 len
= RSTRING_LEN(str
) - offset
;
765 mrb_int pos
= mrb_memsearch(sptr
, slen
, s
, len
);
766 if (pos
< 0) return pos
;
771 str_index_str(mrb_state
*mrb
, mrb_value str
, mrb_value str2
, mrb_int offset
)
773 const char *ptr
= RSTRING_PTR(str2
);
774 mrb_int len
= RSTRING_LEN(str2
);
776 return mrb_str_index(mrb
, str
, ptr
, len
, offset
);
780 str_replace(mrb_state
*mrb
, struct RString
*s1
, struct RString
*s2
)
782 mrb_check_frozen(mrb
, s1
);
783 if (s1
== s2
) return mrb_obj_value(s1
);
784 RSTR_COPY_SINGLE_BYTE_FLAG(s1
, s2
);
785 if (RSTR_SHARED_P(s1
)) {
786 str_decref(mrb
, s1
->as
.heap
.aux
.shared
);
788 else if (!RSTR_EMBED_P(s1
) && !RSTR_NOFREE_P(s1
) && !RSTR_FSHARED_P(s1
)) {
789 mrb_free(mrb
, s1
->as
.heap
.ptr
);
792 size_t len
= (size_t)RSTR_LEN(s2
);
793 if (RSTR_EMBEDDABLE_P(len
)) {
794 str_init_embed(s1
, RSTR_PTR(s2
), len
);
797 str_share(mrb
, s2
, s1
);
800 return mrb_obj_value(s1
);
804 str_rindex(mrb_state
*mrb
, mrb_value str
, mrb_value sub
, mrb_int pos
)
806 const char *s
, *sbeg
, *send
, *t
;
807 struct RString
*ps
= mrb_str_ptr(str
);
808 mrb_int len
= RSTRING_LEN(sub
);
809 mrb_int slen
= RSTR_LEN(ps
);
811 /* substring longer than string */
812 if (slen
< len
) return -1;
813 if (slen
- pos
< len
) {
819 t
= RSTRING_PTR(sub
);
821 s
= char_adjust(sbeg
, send
, s
);
823 if ((mrb_int
)(send
- s
) >= len
&& memcmp(s
, t
, len
) == 0) {
824 return (mrb_int
)(s
- sbeg
);
826 s
= char_backtrack(sbeg
, s
);
841 mrb_utf8_from_locale(const char *str
, int len
)
845 int mbssize
, wcssize
;
850 len
= (int)strlen(str
);
851 wcssize
= MultiByteToWideChar(GetACP(), 0, str
, len
, NULL
, 0);
852 wcsp
= (wchar_t*) malloc((wcssize
+ 1) * sizeof(wchar_t));
855 wcssize
= MultiByteToWideChar(GetACP(), 0, str
, len
, wcsp
, wcssize
+ 1);
858 mbssize
= WideCharToMultiByte(CP_UTF8
, 0, (LPCWSTR
) wcsp
, -1, NULL
, 0, NULL
, NULL
);
859 mbsp
= (char*) malloc((mbssize
+ 1));
864 mbssize
= WideCharToMultiByte(CP_UTF8
, 0, (LPCWSTR
) wcsp
, -1, mbsp
, mbssize
, NULL
, NULL
);
871 mrb_locale_from_utf8(const char *utf8
, int len
)
875 int mbssize
, wcssize
;
880 len
= (int)strlen(utf8
);
881 wcssize
= MultiByteToWideChar(CP_UTF8
, 0, utf8
, len
, NULL
, 0);
882 wcsp
= (wchar_t*) malloc((wcssize
+ 1) * sizeof(wchar_t));
885 wcssize
= MultiByteToWideChar(CP_UTF8
, 0, utf8
, len
, wcsp
, wcssize
+ 1);
887 mbssize
= WideCharToMultiByte(GetACP(), 0, (LPCWSTR
) wcsp
, -1, NULL
, 0, NULL
, NULL
);
888 mbsp
= (char*) malloc((mbssize
+ 1));
893 mbssize
= WideCharToMultiByte(GetACP(), 0, (LPCWSTR
) wcsp
, -1, mbsp
, mbssize
, NULL
, NULL
);
901 mrb_str_modify_keep_ascii(mrb_state
*mrb
, struct RString
*s
)
903 mrb_check_frozen(mrb
, s
);
904 str_modify_keep_ascii(mrb
, s
);
908 mrb_str_modify(mrb_state
*mrb
, struct RString
*s
)
910 mrb_str_modify_keep_ascii(mrb
, s
);
911 RSTR_UNSET_SINGLE_BYTE_FLAG(s
);
915 mrb_str_resize(mrb_state
*mrb
, mrb_value str
, mrb_int len
)
918 struct RString
*s
= mrb_str_ptr(str
);
920 str_check_length(mrb
, len
);
921 mrb_str_modify(mrb
, s
);
924 if (slen
< len
|| slen
- len
> 256) {
925 resize_capa(mrb
, s
, len
);
927 RSTR_SET_LEN(s
, len
);
928 RSTR_PTR(s
)[len
] = '\0'; /* sentinel */
934 mrb_str_to_cstr(mrb_state
*mrb
, mrb_value str0
)
938 const char *p
= RSTRING_PTR(str0
);
939 mrb_int len
= RSTRING_LEN(str0
);
940 check_null_byte(mrb
, RSTRING(str0
));
941 s
= str_init_modifiable(mrb
, mrb_obj_alloc_string(mrb
), p
, len
);
946 mrb_str_concat(mrb_state
*mrb
, mrb_value self
, mrb_value other
)
948 other
= mrb_obj_as_string(mrb
, other
);
949 mrb_str_cat_str(mrb
, self
, other
);
953 mrb_str_plus(mrb_state
*mrb
, mrb_value a
, mrb_value b
)
955 struct RString
*s
= mrb_str_ptr(a
);
956 struct RString
*s2
= mrb_str_ptr(b
);
958 mrb_int slen
= RSTR_LEN(s
);
959 mrb_int s2len
= RSTR_LEN(s2
);
960 const char *p
= RSTR_PTR(s
);
961 const char *p2
= RSTR_PTR(s2
);
963 t
= str_new(mrb
, 0, slen
+ s2len
);
964 char *pt
= RSTR_PTR(t
);
966 memcpy(pt
+ slen
, p2
, s2len
);
968 return mrb_obj_value(t
);
975 * str + other_str -> new_str
977 * Concatenation---Returns a new <code>String</code> containing
978 * <i>other_str</i> concatenated to <i>str</i>.
980 * "Hello from " + self.to_s #=> "Hello from main"
983 mrb_str_plus_m(mrb_state
*mrb
, mrb_value self
)
987 mrb_get_args(mrb
, "S", &str
);
988 return mrb_str_plus(mrb
, self
, str
);
997 * Returns the length of string.
1000 mrb_str_size(mrb_state
*mrb
, mrb_value self
)
1002 mrb_int len
= RSTRING_CHAR_LEN(self
);
1003 return mrb_int_value(mrb
, len
);
1007 mrb_str_bytesize(mrb_state
*mrb
, mrb_value self
)
1009 return mrb_int_value(mrb
, RSTRING_LEN(self
));
1015 * str * integer => new_str
1017 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
1020 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
1023 mrb_str_times(mrb_state
*mrb
, mrb_value self
)
1027 mrb_get_args(mrb
, "i", ×
);
1029 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "negative argument");
1031 if (mrb_int_mul_overflow(RSTRING_LEN(self
), times
, &len
)) {
1032 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "argument too big");
1035 struct RString
*str2
= str_new(mrb
, 0, len
);
1036 char *p
= RSTR_PTR(str2
);
1038 mrb_int n
= RSTRING_LEN(self
);
1039 memcpy(p
, RSTRING_PTR(self
), n
);
1040 while (n
<= len
/2) {
1041 memcpy(p
+ n
, p
, n
);
1044 memcpy(p
+ n
, p
, len
-n
);
1046 p
[RSTR_LEN(str2
)] = '\0';
1047 RSTR_COPY_SINGLE_BYTE_FLAG(str2
, mrb_str_ptr(self
));
1049 return mrb_obj_value(str2
);
1051 /* -------------------------------------------------------------- */
1053 #define lesser(a,b) (((a)>(b))?(b):(a))
1055 /* ---------------------------*/
1058 * mrb_value str1 <=> mrb_value str2 => int
1064 mrb_str_cmp(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
1066 struct RString
*s1
= mrb_str_ptr(str1
);
1067 struct RString
*s2
= mrb_str_ptr(str2
);
1069 mrb_int len1
= RSTR_LEN(s1
);
1070 mrb_int len2
= RSTR_LEN(s2
);
1071 mrb_int len
= lesser(len1
, len2
);
1072 mrb_int retval
= memcmp(RSTR_PTR(s1
), RSTR_PTR(s2
), len
);
1074 if (len1
== len2
) return 0;
1075 if (len1
> len2
) return 1;
1078 if (retval
> 0) return 1;
1086 * str <=> other_str => -1, 0, +1
1088 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
1089 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
1090 * <i>str</i>. If the strings are of different lengths, and the strings are
1091 * equal when compared up to the shortest length, then the longer string is
1092 * considered greater than the shorter one. If the variable <code>$=</code> is
1093 * <code>false</code>, the comparison is based on comparing the binary values
1094 * of each character in the string. In older versions of Ruby, setting
1095 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
1096 * in favor of using <code>String#casecmp</code>.
1098 * <code><=></code> is the basis for the methods <code><</code>,
1099 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
1100 * included from module <code>Comparable</code>. The method
1101 * <code>String#==</code> does not use <code>Comparable#==</code>.
1103 * "abcdef" <=> "abcde" #=> 1
1104 * "abcdef" <=> "abcdef" #=> 0
1105 * "abcdef" <=> "abcdefg" #=> -1
1106 * "abcdef" <=> "ABCDEF" #=> 1
1109 mrb_str_cmp_m(mrb_state
*mrb
, mrb_value str1
)
1111 mrb_value str2
= mrb_get_arg1(mrb
);
1114 if (!mrb_string_p(str2
)) {
1115 return mrb_nil_value();
1118 result
= mrb_str_cmp(mrb
, str1
, str2
);
1120 return mrb_int_value(mrb
, result
);
1124 str_eql(mrb_state
*mrb
, const mrb_value str1
, const mrb_value str2
)
1126 const mrb_int len
= RSTRING_LEN(str1
);
1128 if (len
!= RSTRING_LEN(str2
)) return FALSE
;
1129 if (memcmp(RSTRING_PTR(str1
), RSTRING_PTR(str2
), (size_t)len
) == 0)
1135 mrb_str_equal(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
1137 if (!mrb_string_p(str2
)) return FALSE
;
1138 return str_eql(mrb
, str1
, str2
);
1144 * str == obj => true or false
1147 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
1148 * Otherwise, returns <code>false</code> or <code>true</code>
1150 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
1153 mrb_str_equal_m(mrb_state
*mrb
, mrb_value str1
)
1155 mrb_value str2
= mrb_get_arg1(mrb
);
1157 return mrb_bool_value(mrb_str_equal(mrb
, str1
, str2
));
1159 /* ---------------------------------- */
1162 mrb_str_dup(mrb_state
*mrb
, mrb_value str
)
1164 struct RString
*s
= mrb_str_ptr(str
);
1165 struct RString
*dup
= str_new(mrb
, 0, 0);
1167 return str_replace(mrb
, dup
, s
);
1170 enum str_convert_range
{
1171 /* `beg` and `len` are byte unit in `0 ... str.bytesize` */
1172 STR_BYTE_RANGE_CORRECTED
= 1,
1174 /* `beg` and `len` are char unit in any range */
1177 /* `beg` and `len` are char unit in `0 ... str.size` */
1178 STR_CHAR_RANGE_CORRECTED
= 3,
1180 /* `beg` is out of range */
1181 STR_OUT_OF_RANGE
= -1
1184 static enum str_convert_range
1185 str_convert_range(mrb_state
*mrb
, mrb_value str
, mrb_value indx
, mrb_value alen
, mrb_int
*beg
, mrb_int
*len
)
1187 if (!mrb_undef_p(alen
)) {
1188 *beg
= mrb_as_int(mrb
, indx
);
1189 *len
= mrb_as_int(mrb
, alen
);
1190 return STR_CHAR_RANGE
;
1193 switch (mrb_type(indx
)) {
1195 indx
= mrb_ensure_int_type(mrb
, indx
);
1197 case MRB_TT_INTEGER
:
1198 *beg
= mrb_integer(indx
);
1200 return STR_CHAR_RANGE
;
1203 *beg
= str_index_str(mrb
, str
, indx
, 0);
1204 if (*beg
< 0) { break; }
1205 *len
= RSTRING_LEN(indx
);
1206 return STR_BYTE_RANGE_CORRECTED
;
1209 *len
= RSTRING_CHAR_LEN(str
);
1210 switch (mrb_range_beg_len(mrb
, indx
, beg
, len
, *len
, TRUE
)) {
1212 return STR_CHAR_RANGE_CORRECTED
;
1214 return STR_OUT_OF_RANGE
;
1220 return STR_OUT_OF_RANGE
;
1224 mrb_str_aref(mrb_state
*mrb
, mrb_value str
, mrb_value indx
, mrb_value alen
)
1228 switch (str_convert_range(mrb
, str
, indx
, alen
, &beg
, &len
)) {
1229 case STR_CHAR_RANGE_CORRECTED
:
1230 return str_subseq(mrb
, str
, beg
, len
);
1231 case STR_CHAR_RANGE
:
1232 str
= str_substr(mrb
, str
, beg
, len
);
1233 if (mrb_undef_p(alen
) && !mrb_nil_p(str
) && RSTRING_LEN(str
) == 0) return mrb_nil_value();
1235 case STR_BYTE_RANGE_CORRECTED
:
1236 if (mrb_string_p(indx
)) {
1237 return mrb_str_dup(mrb
, indx
);
1240 return mrb_str_byte_subseq(mrb
, str
, beg
, len
);
1242 case STR_OUT_OF_RANGE
:
1244 return mrb_nil_value();
1252 * str[int] => int or nil
1253 * str[int, int] => new_str or nil
1254 * str[range] => new_str or nil
1255 * str[other_str] => new_str or nil
1256 * str.slice(int) => int or nil
1257 * str.slice(int, int) => new_str or nil
1258 * str.slice(range) => new_str or nil
1259 * str.slice(other_str) => new_str or nil
1261 * Element Reference---If passed a single <code>Integer</code>, returns the code
1262 * of the character at that position. If passed two <code>Integer</code>
1263 * objects, returns a substring starting at the offset given by the first, and
1264 * a length given by the second. If given a range, a substring containing
1265 * characters at offsets given by the range is returned. In all three cases, if
1266 * an offset is negative, it is counted from the end of <i>str</i>. Returns
1267 * <code>nil</code> if the initial offset falls outside the string, the length
1268 * is negative, or the beginning of the range is greater than the end.
1270 * If a <code>String</code> is given, that string is returned if it occurs in
1271 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
1275 * a[1] #=> 101(1.8.7) "e"(1.9.2)
1276 * a[1.1] #=> "e"(1.9.2)
1280 * a[-4..-2] #=> "her"
1287 mrb_str_aref_m(mrb_state
*mrb
, mrb_value str
)
1291 if (mrb_get_args(mrb
, "o|o", &a1
, &a2
) == 1) {
1292 a2
= mrb_undef_value();
1295 return mrb_str_aref(mrb
, str
, a1
, a2
);
1298 static mrb_noreturn
void
1299 str_out_of_index(mrb_state
*mrb
, mrb_value index
)
1301 mrb_raisef(mrb
, E_INDEX_ERROR
, "index %v out of string", index
);
1305 str_replace_partial(mrb_state
*mrb
, mrb_value src
, mrb_int pos
, mrb_int end
, mrb_value rep
)
1307 const mrb_int shrink_threshold
= 256;
1308 struct RString
*str
= mrb_str_ptr(src
);
1309 mrb_int len
= RSTR_LEN(str
);
1310 mrb_int replen
, newlen
;
1313 if (end
> len
) { end
= len
; }
1315 if (pos
< 0 || pos
> len
) {
1316 str_out_of_index(mrb
, mrb_int_value(mrb
, pos
));
1319 replen
= (mrb_nil_p(rep
) ? 0 : RSTRING_LEN(rep
));
1320 if (mrb_int_add_overflow(replen
, len
- (end
- pos
), &newlen
)) {
1321 mrb_raise(mrb
, E_RUNTIME_ERROR
, "string size too big");
1324 mrb_str_modify(mrb
, str
);
1327 resize_capa(mrb
, str
, newlen
);
1330 strp
= RSTR_PTR(str
);
1332 memmove(strp
+ newlen
- (len
- end
), strp
+ end
, len
- end
);
1333 if (!mrb_nil_p(rep
)) {
1334 memmove(strp
+ pos
, RSTRING_PTR(rep
), replen
);
1336 RSTR_SET_LEN(str
, newlen
);
1337 strp
[newlen
] = '\0';
1339 if (len
- newlen
>= shrink_threshold
) {
1340 resize_capa(mrb
, str
, newlen
);
1346 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
1349 str_escape(mrb_state
*mrb
, mrb_value str
, mrb_bool inspect
)
1351 const char *p
, *pend
;
1352 char buf
[4]; /* `\x??` or UTF-8 character */
1353 mrb_value result
= mrb_str_new_lit(mrb
, "\"");
1354 #ifdef MRB_UTF8_STRING
1355 uint32_t sb_flag
= MRB_STR_SINGLE_BYTE
;
1358 p
= RSTRING_PTR(str
); pend
= RSTRING_END(str
);
1359 for (;p
< pend
; p
++) {
1360 unsigned char c
, cc
;
1361 #ifdef MRB_UTF8_STRING
1363 mrb_int clen
= mrb_utf8len(p
, pend
);
1365 mrb_str_cat(mrb
, result
, p
, clen
);
1373 if (c
== '"'|| c
== '\\' || (c
== '#' && IS_EVSTR(p
+1, pend
))) {
1374 buf
[0] = '\\'; buf
[1] = c
;
1375 mrb_str_cat(mrb
, result
, buf
, 2);
1380 mrb_str_cat(mrb
, result
, buf
, 1);
1384 case '\n': cc
= 'n'; break;
1385 case '\r': cc
= 'r'; break;
1386 case '\t': cc
= 't'; break;
1387 case '\f': cc
= 'f'; break;
1388 case '\013': cc
= 'v'; break;
1389 case '\010': cc
= 'b'; break;
1390 case '\007': cc
= 'a'; break;
1391 case 033: cc
= 'e'; break;
1392 default: cc
= 0; break;
1397 mrb_str_cat(mrb
, result
, buf
, 2);
1401 buf
[3] = mrb_digitmap
[c
% 16]; c
/= 16;
1402 buf
[2] = mrb_digitmap
[c
% 16];
1403 mrb_str_cat(mrb
, result
, buf
, 4);
1406 mrb_str_cat_lit(mrb
, result
, "\"");
1407 #ifdef MRB_UTF8_STRING
1409 mrb_str_ptr(str
)->flags
|= sb_flag
;
1410 mrb_str_ptr(result
)->flags
|= sb_flag
;
1413 RSTR_SET_SINGLE_BYTE_FLAG(mrb_str_ptr(result
));
1421 mrb_str_aset(mrb_state
*mrb
, mrb_value str
, mrb_value indx
, mrb_value alen
, mrb_value replace
)
1423 mrb_int beg
, len
, charlen
;
1425 mrb_ensure_string_type(mrb
, replace
);
1426 switch (str_convert_range(mrb
, str
, indx
, alen
, &beg
, &len
)) {
1427 case STR_OUT_OF_RANGE
:
1429 mrb_raise(mrb
, E_INDEX_ERROR
, "string not matched");
1430 case STR_CHAR_RANGE
:
1432 mrb_raisef(mrb
, E_INDEX_ERROR
, "negative length %v", alen
);
1434 charlen
= RSTRING_CHAR_LEN(str
);
1435 if (beg
< 0) { beg
+= charlen
; }
1436 if (beg
< 0 || beg
> charlen
) { str_out_of_index(mrb
, indx
); }
1438 case STR_CHAR_RANGE_CORRECTED
:
1439 beg
= chars2bytes(str
, 0, beg
);
1440 len
= chars2bytes(str
, beg
, len
);
1442 case STR_BYTE_RANGE_CORRECTED
:
1443 if (mrb_int_add_overflow(beg
, len
, &len
)) {
1444 mrb_raise(mrb
, E_RUNTIME_ERROR
, "string index too big");
1446 str_replace_partial(mrb
, str
, beg
, len
, replace
);
1452 * str[int] = replace
1453 * str[int, int] = replace
1454 * str[range] = replace
1455 * str[other_str] = replace
1457 * Modify +self+ by replacing the content of +self+.
1458 * The portion of the string affected is determined using the same criteria as +String#[]+.
1461 mrb_str_aset_m(mrb_state
*mrb
, mrb_value str
)
1463 mrb_value indx
, alen
, replace
;
1465 switch (mrb_get_args(mrb
, "oo|S!", &indx
, &alen
, &replace
)) {
1468 alen
= mrb_undef_value();
1473 mrb_str_aset(mrb
, str
, indx
, alen
, replace
);
1480 * str.capitalize! => str or nil
1482 * Modifies <i>str</i> by converting the first character to uppercase and the
1483 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
1486 * a.capitalize! #=> "Hello"
1488 * a.capitalize! #=> nil
1491 mrb_str_capitalize_bang(mrb_state
*mrb
, mrb_value str
)
1493 mrb_bool modify
= FALSE
;
1494 struct RString
*s
= mrb_str_ptr(str
);
1495 mrb_int len
= RSTR_LEN(s
);
1497 mrb_str_modify_keep_ascii(mrb
, s
);
1498 char *p
= RSTR_PTR(s
);
1499 char *pend
= RSTR_PTR(s
) + len
;
1500 if (len
== 0 || p
== NULL
) return mrb_nil_value();
1505 while (++p
< pend
) {
1511 if (modify
) return str
;
1512 return mrb_nil_value();
1518 * str.capitalize => new_str
1520 * Returns a copy of <i>str</i> with the first character converted to uppercase
1521 * and the remainder to lowercase.
1523 * "hello".capitalize #=> "Hello"
1524 * "HELLO".capitalize #=> "Hello"
1525 * "123ABC".capitalize #=> "123abc"
1528 mrb_str_capitalize(mrb_state
*mrb
, mrb_value self
)
1532 str
= mrb_str_dup(mrb
, self
);
1533 mrb_str_capitalize_bang(mrb
, str
);
1540 * str.chomp!(separator="\n") => str or nil
1542 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
1543 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
1546 mrb_str_chomp_bang(mrb_state
*mrb
, mrb_value str
)
1554 struct RString
*s
= mrb_str_ptr(str
);
1556 argc
= mrb_get_args(mrb
, "|S", &rs
);
1557 mrb_str_modify_keep_ascii(mrb
, s
);
1560 if (len
== 0) return mrb_nil_value();
1562 if (RSTR_PTR(s
)[len
-1] == '\n') {
1563 RSTR_SET_LEN(s
, RSTR_LEN(s
) - 1);
1564 if (RSTR_LEN(s
) > 0 &&
1565 RSTR_PTR(s
)[RSTR_LEN(s
)-1] == '\r') {
1566 RSTR_SET_LEN(s
, RSTR_LEN(s
) - 1);
1569 else if (RSTR_PTR(s
)[len
-1] == '\r') {
1570 RSTR_SET_LEN(s
, RSTR_LEN(s
) - 1);
1573 return mrb_nil_value();
1575 RSTR_PTR(s
)[RSTR_LEN(s
)] = '\0';
1579 if (len
== 0 || mrb_nil_p(rs
)) return mrb_nil_value();
1581 rslen
= RSTRING_LEN(rs
);
1583 while (len
>0 && p
[len
-1] == '\n') {
1585 if (len
>0 && p
[len
-1] == '\r')
1588 if (len
< RSTR_LEN(s
)) {
1589 RSTR_SET_LEN(s
, len
);
1593 return mrb_nil_value();
1595 if (rslen
> len
) return mrb_nil_value();
1596 newline
= RSTRING_PTR(rs
)[rslen
-1];
1597 if (rslen
== 1 && newline
== '\n')
1598 newline
= RSTRING_PTR(rs
)[rslen
-1];
1599 if (rslen
== 1 && newline
== '\n')
1602 pp
= p
+ len
- rslen
;
1603 if (p
[len
-1] == newline
&&
1605 memcmp(RSTRING_PTR(rs
), pp
, rslen
) == 0)) {
1606 RSTR_SET_LEN(s
, len
- rslen
);
1607 p
[RSTR_LEN(s
)] = '\0';
1610 return mrb_nil_value();
1616 * str.chomp(separator="\n") => new_str
1618 * Returns a new <code>String</code> with the given record separator removed
1619 * from the end of <i>str</i> (if present). <code>chomp</code> also removes
1620 * carriage return characters (that is it will remove <code>\n</code>,
1621 * <code>\r</code>, and <code>\r\n</code>).
1623 * "hello".chomp #=> "hello"
1624 * "hello\n".chomp #=> "hello"
1625 * "hello\r\n".chomp #=> "hello"
1626 * "hello\n\r".chomp #=> "hello\n"
1627 * "hello\r".chomp #=> "hello"
1628 * "hello \n there".chomp #=> "hello \n there"
1629 * "hello".chomp("llo") #=> "he"
1632 mrb_str_chomp(mrb_state
*mrb
, mrb_value self
)
1636 str
= mrb_str_dup(mrb
, self
);
1637 mrb_str_chomp_bang(mrb
, str
);
1644 * str.chop! => str or nil
1646 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
1647 * or <code>nil</code> if <i>str</i> is the empty string. See also
1648 * <code>String#chomp!</code>.
1651 mrb_str_chop_bang(mrb_state
*mrb
, mrb_value str
)
1653 struct RString
*s
= mrb_str_ptr(str
);
1655 mrb_str_modify_keep_ascii(mrb
, s
);
1656 if (RSTR_LEN(s
) > 0) {
1658 #ifdef MRB_UTF8_STRING
1659 const char* t
= RSTR_PTR(s
), *p
= t
;
1660 const char* e
= p
+ RSTR_LEN(s
);
1662 mrb_int clen
= mrb_utf8len(p
, e
);
1663 if (p
+ clen
>=e
) break;
1668 len
= RSTR_LEN(s
) - 1;
1670 if (RSTR_PTR(s
)[len
] == '\n') {
1672 RSTR_PTR(s
)[len
-1] == '\r') {
1676 RSTR_SET_LEN(s
, len
);
1677 RSTR_PTR(s
)[len
] = '\0';
1680 return mrb_nil_value();
1686 * str.chop => new_str
1688 * Returns a new <code>String</code> with the last character removed. If the
1689 * string ends with <code>\r\n</code>, both characters are removed. Applying
1690 * <code>chop</code> to an empty string returns an empty
1691 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1692 * the string unchanged if it doesn't end in a record separator.
1694 * "string\r\n".chop #=> "string"
1695 * "string\n\r".chop #=> "string\n"
1696 * "string\n".chop #=> "string"
1697 * "string".chop #=> "strin"
1701 mrb_str_chop(mrb_state
*mrb
, mrb_value self
)
1704 str
= mrb_str_dup(mrb
, self
);
1705 mrb_str_chop_bang(mrb
, str
);
1712 * str.downcase! => str or nil
1714 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1715 * changes were made.
1718 mrb_str_downcase_bang(mrb_state
*mrb
, mrb_value str
)
1721 mrb_bool modify
= FALSE
;
1722 struct RString
*s
= mrb_str_ptr(str
);
1724 mrb_str_modify_keep_ascii(mrb
, s
);
1726 pend
= RSTR_PTR(s
) + RSTR_LEN(s
);
1735 if (modify
) return str
;
1736 return mrb_nil_value();
1742 * str.downcase => new_str
1744 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1745 * lowercase counterparts. The operation is locale insensitive---only
1746 * characters 'A' to 'Z' are affected.
1748 * "hEllO".downcase #=> "hello"
1751 mrb_str_downcase(mrb_state
*mrb
, mrb_value self
)
1755 str
= mrb_str_dup(mrb
, self
);
1756 mrb_str_downcase_bang(mrb
, str
);
1763 * str.empty? => true or false
1765 * Returns <code>true</code> if <i>str</i> has a length of zero.
1767 * "hello".empty? #=> false
1768 * "".empty? #=> true
1771 mrb_str_empty_p(mrb_state
*mrb
, mrb_value self
)
1773 struct RString
*s
= mrb_str_ptr(self
);
1775 return mrb_bool_value(RSTR_LEN(s
) == 0);
1781 * str.eql?(other) => true or false
1783 * Two strings are equal if the have the same length and content.
1786 mrb_str_eql(mrb_state
*mrb
, mrb_value self
)
1788 mrb_value str2
= mrb_get_arg1(mrb
);
1791 eql_p
= (mrb_string_p(str2
)) && str_eql(mrb
, self
, str2
);
1793 return mrb_bool_value(eql_p
);
1797 mrb_str_substr(mrb_state
*mrb
, mrb_value str
, mrb_int beg
, mrb_int len
)
1799 return str_substr(mrb
, str
, beg
, len
);
1803 * 32 bit magic FNV-0 and FNV-1 prime
1805 #define FNV_32_PRIME ((uint32_t)0x01000193)
1806 #define FNV1_32_INIT ((uint32_t)0x811c9dc5)
1809 mrb_byte_hash_step(const uint8_t *s
, mrb_int len
, uint32_t hval
)
1811 const uint8_t *send
= s
+ len
;
1814 * FNV-1 hash each octet in the buffer
1817 /* multiply by the 32 bit FNV magic prime mod 2^32 */
1818 #if defined(NO_FNV_GCC_OPTIMIZATION)
1819 hval
*= FNV_32_PRIME
;
1821 hval
+= (hval
<<1) + (hval
<<4) + (hval
<<7) + (hval
<<8) + (hval
<<24);
1824 /* xor the bottom with the current octet */
1825 hval
^= (uint32_t)*s
++;
1828 /* return our new hash value */
1833 mrb_byte_hash(const uint8_t *s
, mrb_int len
)
1835 return mrb_byte_hash_step(s
, len
, FNV1_32_INIT
);
1839 mrb_str_hash(mrb_state
*mrb
, mrb_value str
)
1841 struct RString
*s
= mrb_str_ptr(str
);
1842 return mrb_byte_hash((uint8_t*)RSTR_PTR(s
), RSTR_LEN(s
));
1850 * Return a hash based on the string's length and content.
1853 mrb_str_hash_m(mrb_state
*mrb
, mrb_value self
)
1855 mrb_int key
= mrb_str_hash(mrb
, self
);
1856 return mrb_int_value(mrb
, key
);
1862 * str.include? other_str => true or false
1863 * str.include? int => true or false
1865 * Returns <code>true</code> if <i>str</i> contains the given string or
1868 * "hello".include? "lo" #=> true
1869 * "hello".include? "ol" #=> false
1870 * "hello".include? ?h #=> true
1873 mrb_str_include(mrb_state
*mrb
, mrb_value self
)
1877 mrb_get_args(mrb
, "S", &str2
);
1878 if (str_index_str(mrb
, self
, str2
, 0) < 0)
1879 return mrb_bool_value(FALSE
);
1880 return mrb_bool_value(TRUE
);
1885 * str.byteindex(substring, offset = 0) -> integer or nil
1887 * Returns the \Integer byte-based index of the first occurrence of the given +substring+,
1888 * or +nil+ if none found:
1890 * 'foo'.byteindex('f') # => 0
1891 * 'foo'.byteindex('oo') # => 1
1892 * 'foo'.byteindex('ooo') # => nil
1895 mrb_str_byteindex_m(mrb_state
*mrb
, mrb_value str
)
1900 if (mrb_get_args(mrb
, "S|i", &sub
, &pos
) == 1) {
1904 pos
+= RSTRING_LEN(str
);
1906 return mrb_nil_value();
1909 pos
= str_index_str(mrb
, str
, sub
, pos
);
1911 if (pos
== -1) return mrb_nil_value();
1912 return mrb_int_value(mrb
, pos
);
1918 * str.index(substring [, offset]) => int or nil
1920 * Returns the index of the first occurrence of the given
1921 * <i>substring</i>. Returns <code>nil</code> if not found.
1922 * If the second parameter is present, it
1923 * specifies the position in the string to begin the search.
1925 * "hello".index('l') #=> 2
1926 * "hello".index('lo') #=> 3
1927 * "hello".index('a') #=> nil
1928 * "hello".index('l', -2) #=> 3
1930 #ifdef MRB_UTF8_STRING
1932 mrb_str_index_m(mrb_state
*mrb
, mrb_value str
)
1934 if (RSTR_SINGLE_BYTE_P(mrb_str_ptr(str
))) {
1935 return mrb_str_byteindex_m(mrb
, str
);
1941 if (mrb_get_args(mrb
, "S|i", &sub
, &pos
) == 1) {
1945 mrb_int clen
= RSTRING_CHAR_LEN(str
);
1948 return mrb_nil_value();
1951 pos
= str_index_str_by_char(mrb
, str
, sub
, pos
);
1953 if (pos
== -1) return mrb_nil_value();
1954 return mrb_int_value(mrb
, pos
);
1957 #define mrb_str_index_m mrb_str_byteindex_m
1964 * str.replace(other_str) => str
1966 * s = "hello" #=> "hello"
1967 * s.replace "world" #=> "world"
1970 mrb_str_replace(mrb_state
*mrb
, mrb_value str
)
1974 mrb_get_args(mrb
, "S", &str2
);
1975 return str_replace(mrb
, mrb_str_ptr(str
), mrb_str_ptr(str2
));
1981 * String.new(str="") => new_str
1983 * Returns a new string object containing a copy of <i>str</i>.
1986 mrb_str_init(mrb_state
*mrb
, mrb_value self
)
1990 if (mrb_get_args(mrb
, "|S", &str2
) == 0) {
1991 struct RString
*s
= str_new(mrb
, 0, 0);
1992 str2
= mrb_obj_value(s
);
1994 str_replace(mrb
, mrb_str_ptr(self
), mrb_str_ptr(str2
));
2002 * str.intern => symbol
2003 * str.to_sym => symbol
2005 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
2006 * symbol if it did not previously exist.
2008 * "Koala".intern #=> :Koala
2009 * s = 'cat'.to_sym #=> :cat
2010 * s == :cat #=> true
2011 * s = '@cat'.to_sym #=> :@cat
2012 * s == :@cat #=> true
2014 * This can also be used to create symbols that cannot be represented using the
2015 * <code>:xxx</code> notation.
2017 * 'cat and dog'.to_sym #=> :"cat and dog"
2020 mrb_str_intern(mrb_state
*mrb
, mrb_value self
)
2022 return mrb_symbol_value(mrb_intern_str(mrb
, self
));
2024 /* ---------------------------------- */
2026 mrb_obj_as_string(mrb_state
*mrb
, mrb_value obj
)
2028 switch (mrb_type(obj
)) {
2032 return mrb_sym_str(mrb
, mrb_symbol(obj
));
2033 case MRB_TT_INTEGER
:
2034 return mrb_integer_to_str(mrb
, obj
, 10);
2038 return mrb_mod_to_s(mrb
, obj
);
2040 return mrb_type_convert(mrb
, obj
, MRB_TT_STRING
, MRB_SYM(to_s
));
2045 mrb_ptr_to_str(mrb_state
*mrb
, void *p
)
2047 struct RString
*p_str
;
2050 uintptr_t n
= (uintptr_t)p
;
2052 p_str
= str_new(mrb
, NULL
, 2 + sizeof(uintptr_t) * CHAR_BIT
/ 4);
2053 p1
= RSTR_PTR(p_str
);
2059 *p2
++ = mrb_digitmap
[n
% 16];
2063 RSTR_SET_LEN(p_str
, (mrb_int
)(p2
- RSTR_PTR(p_str
)));
2071 return mrb_obj_value(p_str
);
2075 str_reverse(char *p
, char *e
)
2089 * str.reverse! => str
2091 * Reverses <i>str</i> in place.
2094 mrb_str_reverse_bang(mrb_state
*mrb
, mrb_value str
)
2096 struct RString
*s
= mrb_str_ptr(str
);
2099 #ifdef MRB_UTF8_STRING
2100 mrb_int utf8_len
= RSTRING_CHAR_LEN(str
);
2101 mrb_int len
= RSTR_LEN(s
);
2103 if (utf8_len
< 2) return str
;
2104 if (utf8_len
< len
) {
2105 mrb_str_modify(mrb
, s
);
2107 e
= p
+ RSTR_LEN(s
);
2109 mrb_int clen
= mrb_utf8len(p
, e
);
2110 str_reverse(p
, p
+ clen
- 1);
2117 if (RSTR_LEN(s
) > 1) {
2118 mrb_str_modify(mrb
, s
);
2125 e
= p
+ RSTR_LEN(s
) - 1;
2130 /* ---------------------------------- */
2134 * str.reverse => new_str
2136 * Returns a new string with the characters from <i>str</i> in reverse order.
2138 * "stressed".reverse #=> "desserts"
2141 mrb_str_reverse(mrb_state
*mrb
, mrb_value str
)
2143 mrb_value str2
= mrb_str_dup(mrb
, str
);
2144 mrb_str_reverse_bang(mrb
, str2
);
2150 * byterindex(substring, offset = self.bytesize) -> integer or nil
2152 * Returns the \Integer byte-based index of the _last_ occurrence of the given +substring+,
2153 * or +nil+ if none found:
2155 * 'foo'.byterindex('f') # => 0
2156 * 'foo'.byterindex('o') # => 2
2157 * 'foo'.byterindex('oo') # => 1
2158 * 'foo'.byterindex('ooo') # => nil
2161 mrb_str_byterindex_m(mrb_state
*mrb
, mrb_value str
)
2165 mrb_int len
= RSTRING_LEN(str
);
2167 if (mrb_get_args(mrb
, "S|i", &sub
, &pos
) == 1) {
2174 return mrb_nil_value();
2177 if (pos
> len
) pos
= len
;
2179 pos
= str_rindex(mrb
, str
, sub
, pos
);
2181 return mrb_nil_value();
2183 return mrb_int_value(mrb
, pos
);
2189 * str.rindex(substring [, offset]) => int or nil
2191 * Returns the index of the last occurrence of the given <i>substring</i>.
2192 * Returns <code>nil</code> if not found. If the second parameter is
2193 * present, it specifies the position in the string to end the
2194 * search---characters beyond this point will not be considered.
2196 * "hello".rindex('e') #=> 1
2197 * "hello".rindex('l') #=> 3
2198 * "hello".rindex('a') #=> nil
2199 * "hello".rindex('l', 2) #=> 2
2201 #ifdef MRB_UTF8_STRING
2203 mrb_str_rindex_m(mrb_state
*mrb
, mrb_value str
)
2205 if (RSTR_SINGLE_BYTE_P(mrb_str_ptr(str
))) {
2206 return mrb_str_byterindex_m(mrb
, str
);
2212 if (mrb_get_args(mrb
, "S|i", &sub
, &pos
) == 1) {
2213 pos
= RSTRING_LEN(str
);
2215 else if (pos
>= 0) {
2216 pos
= chars2bytes(str
, 0, pos
);
2219 const char *p
= RSTRING_PTR(str
);
2220 const char *e
= RSTRING_END(str
);
2221 while (pos
++ < 0 && p
< e
) {
2222 e
= char_backtrack(p
, e
);
2224 if (p
== e
) return mrb_nil_value();
2225 pos
= (mrb_int
)(e
- p
);
2227 pos
= str_rindex(mrb
, str
, sub
, pos
);
2229 pos
= bytes2chars(str
, pos
);
2230 if (pos
< 0) return mrb_nil_value();
2231 return mrb_int_value(mrb
, pos
);
2233 return mrb_nil_value();
2236 #define mrb_str_rindex_m mrb_str_byterindex_m
2243 * str.split(separator=nil, [limit]) => anArray
2245 * Divides <i>str</i> into substrings based on a delimiter, returning an array
2246 * of these substrings.
2248 * If <i>separator</i> is a <code>String</code>, then its contents are used as
2249 * the delimiter when splitting <i>str</i>. If <i>separator</i> is a single
2250 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
2251 * of contiguous whitespace characters ignored.
2253 * If <i>separator</i> is omitted or <code>nil</code> (which is the default),
2254 * <i>str</i> is split on whitespace as if ' ' were specified.
2256 * If the <i>limit</i> parameter is omitted, trailing null fields are
2257 * suppressed. If <i>limit</i> is a positive number, at most that number of
2258 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
2259 * string is returned as the only entry in an array). If negative, there is no
2260 * limit to the number of fields returned, and trailing null fields are not
2263 * " now's the time".split #=> ["now's", "the", "time"]
2264 * " now's the time".split(' ') #=> ["now's", "the", "time"]
2266 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
2267 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
2268 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
2269 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
2273 mrb_str_split_m(mrb_state
*mrb
, mrb_value str
)
2276 mrb_value spat
= mrb_nil_value();
2277 enum {awk
, string
} split_type
= string
;
2283 mrb_value result
, tmp
;
2285 argc
= mrb_get_args(mrb
, "|oi", &spat
, &lim
);
2286 lim_p
= (lim
> 0 && argc
== 2);
2289 if (RSTRING_LEN(str
) == 0)
2290 return mrb_ary_new_capa(mrb
, 0);
2291 return mrb_ary_new_from_values(mrb
, 1, &str
);
2296 if (argc
== 0 || mrb_nil_p(spat
)) {
2299 else if (!mrb_string_p(spat
)) {
2300 mrb_raise(mrb
, E_TYPE_ERROR
, "expected String");
2302 else if (RSTRING_LEN(spat
) == 1 && RSTRING_PTR(spat
)[0] == ' ') {
2306 result
= mrb_ary_new(mrb
);
2308 if (split_type
== awk
) {
2309 mrb_bool skip
= TRUE
;
2311 mrb_int str_len
= RSTRING_LEN(str
);
2313 int ai
= mrb_gc_arena_save(mrb
);
2316 while (idx
< str_len
) {
2317 c
= (unsigned char)RSTRING_PTR(str
)[idx
++];
2325 if (lim_p
&& lim
<= i
) break;
2328 else if (ISSPACE(c
)) {
2329 mrb_ary_push(mrb
, result
, mrb_str_byte_subseq(mrb
, str
, beg
, end
-beg
));
2330 mrb_gc_arena_restore(mrb
, ai
);
2340 else { /* split_type == string */
2341 mrb_int str_len
= RSTRING_LEN(str
);
2342 mrb_int pat_len
= RSTRING_LEN(spat
);
2344 int ai
= mrb_gc_arena_save(mrb
);
2346 while (idx
< str_len
) {
2348 end
= mrb_memsearch(RSTRING_PTR(spat
), pat_len
, RSTRING_PTR(str
)+idx
, str_len
- idx
);
2352 end
= chars2bytes(str
, idx
, 1);
2354 mrb_ary_push(mrb
, result
, mrb_str_byte_subseq(mrb
, str
, idx
, end
));
2355 mrb_gc_arena_restore(mrb
, ai
);
2356 idx
+= end
+ pat_len
;
2357 if (lim_p
&& lim
<= ++i
) break;
2361 if (RSTRING_LEN(str
) > 0 && (lim_p
|| RSTRING_LEN(str
) > beg
|| lim
< 0)) {
2362 if (RSTRING_LEN(str
) == beg
) {
2363 tmp
= mrb_str_new(mrb
, 0, 0);
2366 tmp
= mrb_str_byte_subseq(mrb
, str
, beg
, RSTRING_LEN(str
)-beg
);
2368 mrb_ary_push(mrb
, result
, tmp
);
2370 if (!lim_p
&& lim
== 0) {
2372 while ((len
= RARRAY_LEN(result
)) > 0 &&
2373 (tmp
= RARRAY_PTR(result
)[len
-1], RSTRING_LEN(tmp
) == 0))
2374 mrb_ary_pop(mrb
, result
);
2381 trailingbad(const char *str
, const char *p
, const char *pend
)
2383 if (p
== str
) return TRUE
; /* no number */
2384 if (*(p
- 1) == '_') return TRUE
; /* trailing '_' */
2385 while (p
<pend
&& ISSPACE(*p
)) p
++;
2386 if (p
<pend
) return TRUE
; /* trailing garbage */
2391 mrb_str_len_to_integer(mrb_state
*mrb
, const char *str
, size_t len
, mrb_int base
, int badcheck
)
2393 const char *p
= str
;
2394 const char *pend
= str
+ len
;
2395 #ifdef MRB_USE_BIGINT
2396 const char *p2
= NULL
;
2403 #define conv_digit(c) \
2404 (ISDIGIT(c) ? ((c) - '0') : \
2405 ISLOWER(c) ? ((c) - 'a' + 10) : \
2406 ISUPPER(c) ? ((c) - 'A' + 10) : \
2410 if (badcheck
) goto bad
;
2411 return mrb_fixnum_value(0);
2413 while (p
<pend
&& ISSPACE(*p
))
2419 else if (p
[0] == '-') {
2443 else if (base
< -1) {
2452 if (p
[0] == '0' && (p
[1] == 'b'||p
[1] == 'B')) {
2459 if (p
[0] == '0' && (p
[1] == 'o'||p
[1] == 'O')) {
2462 case 4: case 5: case 6: case 7:
2465 if (p
[0] == '0' && (p
[1] == 'd'||p
[1] == 'D')) {
2468 case 9: case 11: case 12: case 13: case 14: case 15:
2471 if (p
[0] == '0' && (p
[1] == 'x'||p
[1] == 'X')) {
2476 if (base
< 2 || 36 < base
) {
2477 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %i", base
);
2480 } /* end of switch (base) { */
2482 if (badcheck
) goto bad
;
2483 return mrb_fixnum_value(0);
2485 if (*p
== '0') { /* squeeze preceding 0s */
2490 if (p
<pend
&& *p
== '_') {
2491 if (badcheck
) goto bad
;
2501 if (*(p
- 1) == '0')
2504 if (p
== pend
|| *p
== '_') {
2505 if (badcheck
) goto bad
;
2506 return mrb_fixnum_value(0);
2508 #ifdef MRB_USE_BIGINT
2511 for (;p
<pend
; p
++) {
2515 if (badcheck
) goto bad
;
2519 if (badcheck
) goto bad
;
2523 if (badcheck
&& *p
== '\0') {
2524 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string contains null byte");
2527 if (c
< 0 || c
>= base
) {
2530 if (mrb_int_mul_overflow(n
, base
, &n
)) goto overflow
;
2531 if (MRB_INT_MAX
- c
< n
) {
2532 if (sign
== 0 && MRB_INT_MAX
- n
== c
- 1) {
2538 #ifdef MRB_USE_BIGINT
2540 const char *p3
= p2
;
2542 char c
= TOLOWER(*p3
);
2543 const char *p4
= strchr(mrb_digitmap
, c
);
2544 if (p4
== NULL
&& c
!= '_') break;
2545 if (p4
- mrb_digitmap
>= base
) break;
2548 if (badcheck
&& trailingbad(str
, p
, pend
)) goto bad
;
2549 return mrb_bint_new_str(mrb
, p2
, (mrb_int
)(p3
-p2
), sign
? base
: -base
);
2551 mrb_raisef(mrb
, E_RANGE_ERROR
, "string (%l) too big for integer", str
, pend
-str
);
2557 if (badcheck
&& trailingbad(str
, p
, pend
)) goto bad
;
2558 return mrb_int_value(mrb
, sign
? val
: -val
);
2560 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for number(%!l)", str
, pend
-str
);
2562 return mrb_fixnum_value(0);
2565 /* obsolete: use RSTRING_CSTR() or mrb_string_cstr() */
2567 mrb_string_value_cstr(mrb_state
*mrb
, mrb_value
*ptr
)
2573 mrb_ensure_string_type(mrb
, *ptr
);
2574 ps
= mrb_str_ptr(*ptr
);
2575 check_null_byte(mrb
, ps
);
2578 if (p
== NULL
) return "";
2579 if (p
[len
] == '\0') {
2584 * Even after str_modify_keep_ascii(), NULL termination is not ensured if
2585 * RSTR_SET_LEN() is used explicitly (e.g. String#delete_suffix!).
2587 str_modify_keep_ascii(mrb
, ps
);
2588 RSTR_PTR(ps
)[len
] = '\0';
2589 return RSTR_PTR(ps
);
2593 mrb_string_cstr(mrb_state
*mrb
, mrb_value str
)
2595 return mrb_string_value_cstr(mrb
, &str
);
2599 mrb_str_to_integer(mrb_state
*mrb
, mrb_value str
, mrb_int base
, mrb_bool badcheck
)
2604 mrb_ensure_string_type(mrb
, str
);
2605 s
= RSTRING_PTR(str
);
2606 len
= RSTRING_LEN(str
);
2607 return mrb_str_len_to_integer(mrb
, s
, len
, base
, badcheck
);
2613 * str.to_i(base=10) => integer
2615 * Returns the result of interpreting leading characters in <i>str</i> as an
2616 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2617 * end of a valid number are ignored. If there is not a valid number at the
2618 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2621 * "12345".to_i #=> 12345
2622 * "99 red balloons".to_i #=> 99
2624 * "0a".to_i(16) #=> 10
2625 * "hello".to_i #=> 0
2626 * "1100101".to_i(2) #=> 101
2627 * "1100101".to_i(8) #=> 294977
2628 * "1100101".to_i(10) #=> 1100101
2629 * "1100101".to_i(16) #=> 17826049
2632 mrb_str_to_i(mrb_state
*mrb
, mrb_value self
)
2636 mrb_get_args(mrb
, "|i", &base
);
2637 if (base
< 0 || 36 < base
) {
2638 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "illegal radix %i", base
);
2640 return mrb_str_to_integer(mrb
, self
, base
, FALSE
);
2643 #ifndef MRB_NO_FLOAT
2645 mrb_str_len_to_dbl(mrb_state
*mrb
, const char *s
, size_t len
, mrb_bool badcheck
)
2647 char buf
[DBL_DIG
* 4 + 20];
2648 const char *p
= s
, *p2
;
2649 const char *pend
= p
+ len
;
2654 mrb_bool dot
= FALSE
;
2657 while (p
<pend
&& ISSPACE(*p
)) p
++;
2660 if (pend
- p
> 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2663 if (!badcheck
) return 0.0;
2664 x
= mrb_str_len_to_integer(mrb
, p
, pend
-p
, 0, badcheck
);
2665 if (mrb_integer_p(x
))
2666 d
= (double)mrb_integer(x
);
2667 else /* if (mrb_float_p(x)) */
2674 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string for Float contains null byte");
2681 if (!badcheck
&& *p
== ' ') {
2686 if (*p
== '_') break;
2693 if (c
== '.') dot
= TRUE
;
2695 /* remove an underscore between digits */
2696 if (n
== buf
|| !ISDIGIT(prev
) || p
== pend
) {
2697 if (badcheck
) goto bad
;
2701 else if (badcheck
&& prev
== '_' && !ISDIGIT(c
)) goto bad
;
2703 const char *bend
= buf
+sizeof(buf
)-1;
2704 if (n
==bend
) { /* buffer overflow */
2705 if (dot
) break; /* cut off remaining fractions */
2716 if (mrb_read_float(p
, &end
, &d
) == FALSE
) {
2719 mrb_raisef(mrb
, E_ARGUMENT_ERROR
, "invalid string for float(%!s)", s
);
2725 if (!end
|| p
== end
) goto bad
;
2726 while (end
<pend
&& ISSPACE(*end
)) end
++;
2727 if (end
<pend
) goto bad
;
2733 mrb_str_to_dbl(mrb_state
*mrb
, mrb_value str
, mrb_bool badcheck
)
2735 return mrb_str_len_to_dbl(mrb
, RSTRING_PTR(str
), RSTRING_LEN(str
), badcheck
);
2743 * Returns the result of interpreting leading characters in <i>str</i> as a
2744 * floating-point number. Extraneous characters past the end of a valid number
2745 * are ignored. If there is not a valid number at the start of <i>str</i>,
2746 * <code>0.0</code> is returned. This method never raises an exception.
2748 * "123.45e1".to_f #=> 1234.5
2749 * "45.67 degrees".to_f #=> 45.67
2750 * "thx1138".to_f #=> 0.0
2753 mrb_str_to_f(mrb_state
*mrb
, mrb_value self
)
2755 return mrb_float_value(mrb
, mrb_str_to_dbl(mrb
, self
, FALSE
));
2764 * Returns the receiver.
2767 mrb_str_to_s(mrb_state
*mrb
, mrb_value self
)
2769 if (mrb_obj_class(mrb
, self
) != mrb
->string_class
) {
2770 return mrb_str_dup(mrb
, self
);
2778 * str.upcase! => str or nil
2780 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2784 mrb_str_upcase_bang(mrb_state
*mrb
, mrb_value str
)
2786 struct RString
*s
= mrb_str_ptr(str
);
2788 mrb_bool modify
= FALSE
;
2790 mrb_str_modify_keep_ascii(mrb
, s
);
2791 p
= RSTRING_PTR(str
);
2792 pend
= RSTRING_END(str
);
2801 if (modify
) return str
;
2802 return mrb_nil_value();
2808 * str.upcase => new_str
2810 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2811 * uppercase counterparts. The operation is locale insensitive---only
2812 * characters 'a' to 'z' are affected.
2814 * "hEllO".upcase #=> "HELLO"
2817 mrb_str_upcase(mrb_state
*mrb
, mrb_value self
)
2821 str
= mrb_str_dup(mrb
, self
);
2822 mrb_str_upcase_bang(mrb
, str
);
2828 * str.dump -> new_str
2830 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2831 * <code>\nnn</code> notation and all special characters escaped.
2834 mrb_str_dump(mrb_state
*mrb
, mrb_value str
)
2836 return str_escape(mrb
, str
, FALSE
);
2840 mrb_str_cat(mrb_state
*mrb
, mrb_value str
, const char *ptr
, size_t len
)
2842 struct RString
*s
= mrb_str_ptr(str
);
2847 if (len
== 0) return str
;
2848 mrb_str_modify(mrb
, s
);
2849 if (ptr
>= RSTR_PTR(s
) && ptr
<= RSTR_PTR(s
) + (size_t)RSTR_LEN(s
)) {
2850 off
= ptr
- RSTR_PTR(s
);
2853 capa
= RSTR_CAPA(s
);
2854 if (mrb_int_add_overflow(RSTR_LEN(s
), len
, &total
)) {
2856 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "string size too big");
2858 if (capa
<= total
) {
2859 if (capa
== 0) capa
= 1;
2860 while (capa
<= total
) {
2861 if (mrb_int_mul_overflow(capa
, 2, &capa
)) goto size_error
;
2863 resize_capa(mrb
, s
, capa
);
2866 ptr
= RSTR_PTR(s
) + off
;
2868 memcpy(RSTR_PTR(s
) + RSTR_LEN(s
), ptr
, len
);
2869 RSTR_SET_LEN(s
, total
);
2870 RSTR_PTR(s
)[total
] = '\0'; /* sentinel */
2875 mrb_str_cat_cstr(mrb_state
*mrb
, mrb_value str
, const char *ptr
)
2877 return mrb_str_cat(mrb
, str
, ptr
, ptr
? strlen(ptr
) : 0);
2881 mrb_str_cat_str(mrb_state
*mrb
, mrb_value str
, mrb_value str2
)
2883 if (mrb_str_ptr(str
) == mrb_str_ptr(str2
)) {
2884 mrb_str_modify(mrb
, mrb_str_ptr(str
));
2886 return mrb_str_cat(mrb
, str
, RSTRING_PTR(str2
), RSTRING_LEN(str2
));
2890 mrb_str_append(mrb_state
*mrb
, mrb_value str1
, mrb_value str2
)
2892 mrb_ensure_string_type(mrb
, str2
);
2893 return mrb_str_cat_str(mrb
, str1
, str2
);
2898 * str.inspect -> string
2900 * Returns a printable version of _str_, surrounded by quote marks,
2901 * with special characters escaped.
2905 * str.inspect #=> "\"hel\\bo\""
2908 mrb_str_inspect(mrb_state
*mrb
, mrb_value str
)
2910 return str_escape(mrb
, str
, TRUE
);
2915 * str.bytes -> array of int
2917 * Returns an array of bytes in _str_.
2920 * str.bytes #=> [104, 101, 108, 108, 111]
2923 mrb_str_bytes(mrb_state
*mrb
, mrb_value str
)
2925 struct RString
*s
= mrb_str_ptr(str
);
2926 mrb_value a
= mrb_ary_new_capa(mrb
, RSTR_LEN(s
));
2927 unsigned char *p
= (unsigned char*)(RSTR_PTR(s
)), *pend
= p
+ RSTR_LEN(s
);
2930 mrb_ary_push(mrb
, a
, mrb_fixnum_value(p
[0]));
2938 * str.getbyte(index) -> 0 .. 255
2940 * returns the <i>index</i>th byte as an integer.
2943 mrb_str_getbyte(mrb_state
*mrb
, mrb_value str
)
2946 mrb_get_args(mrb
, "i", &pos
);
2949 pos
+= RSTRING_LEN(str
);
2950 if (pos
< 0 || RSTRING_LEN(str
) <= pos
)
2951 return mrb_nil_value();
2953 return mrb_fixnum_value((unsigned char)RSTRING_PTR(str
)[pos
]);
2958 * str.setbyte(index, integer) -> integer
2960 * modifies the <i>index</i>th byte as <i>integer</i>.
2963 mrb_str_setbyte(mrb_state
*mrb
, mrb_value str
)
2968 mrb_get_args(mrb
, "ii", &pos
, &byte
);
2970 len
= RSTRING_LEN(str
);
2971 if (pos
< -len
|| len
<= pos
)
2972 mrb_raisef(mrb
, E_INDEX_ERROR
, "index %i out of string", pos
);
2976 mrb_str_modify(mrb
, mrb_str_ptr(str
));
2978 RSTRING_PTR(str
)[pos
] = (unsigned char)byte
;
2979 return mrb_fixnum_value((unsigned char)byte
);
2984 * str.byteslice(integer) -> new_str or nil
2985 * str.byteslice(integer, integer) -> new_str or nil
2986 * str.byteslice(range) -> new_str or nil
2988 * Byte Reference---If passed a single Integer, returns a
2989 * substring of one byte at that position. If passed two Integer
2990 * objects, returns a substring starting at the offset given by the first, and
2991 * a length given by the second. If given a Range, a substring containing
2992 * bytes at offsets given by the range is returned. In all three cases, if
2993 * an offset is negative, it is counted from the end of <i>str</i>. Returns
2994 * <code>nil</code> if the initial offset falls outside the string, the length
2995 * is negative, or the beginning of the range is greater than the end.
2996 * The encoding of the resulted string keeps original encoding.
2998 * "hello".byteslice(1) #=> "e"
2999 * "hello".byteslice(-1) #=> "o"
3000 * "hello".byteslice(1, 2) #=> "el"
3001 * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
3002 * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
3005 mrb_str_byteslice(mrb_state
*mrb
, mrb_value str
)
3008 mrb_int str_len
, beg
, len
;
3009 mrb_bool empty
= TRUE
;
3011 len
= mrb_get_argc(mrb
);
3014 mrb_get_args(mrb
, "ii", &beg
, &len
);
3015 str_len
= RSTRING_LEN(str
);
3018 a1
= mrb_get_arg1(mrb
);
3019 str_len
= RSTRING_LEN(str
);
3020 if (mrb_range_p(a1
)) {
3021 if (mrb_range_beg_len(mrb
, a1
, &beg
, &len
, str_len
, TRUE
) != MRB_RANGE_OK
) {
3022 return mrb_nil_value();
3026 beg
= mrb_as_int(mrb
, a1
);
3032 mrb_argnum_error(mrb
, len
, 1, 2);
3035 if (mrb_str_beg_len(str_len
, &beg
, &len
) && (empty
|| len
!= 0)) {
3036 return mrb_str_byte_subseq(mrb
, str
, beg
, len
);
3039 return mrb_nil_value();
3044 sub_replace(mrb_state
*mrb
, mrb_value self
)
3048 mrb_int found
, offset
;
3051 mrb_get_args(mrb
, "ssi", &p
, &plen
, &match
, &mlen
, &found
);
3052 result
= mrb_str_new(mrb
, 0, 0);
3053 for (mrb_int i
=0; i
<plen
; i
++) {
3054 if (p
[i
] != '\\' || i
+1==plen
) {
3055 mrb_str_cat(mrb
, result
, p
+i
, 1);
3061 mrb_str_cat(mrb
, result
, "\\", 1);
3064 mrb_str_cat(mrb
, result
, RSTRING_PTR(self
), found
);
3067 mrb_str_cat(mrb
, result
, match
, mlen
);
3070 offset
= found
+ mlen
;
3071 if (RSTRING_LEN(self
) > offset
) {
3072 mrb_str_cat(mrb
, result
, RSTRING_PTR(self
)+offset
, RSTRING_LEN(self
)-offset
);
3075 case '1': case '2': case '3':
3076 case '4': case '5': case '6':
3077 case '7': case '8': case '9':
3078 /* ignore sub-group match (no Regexp supported) */
3081 mrb_str_cat(mrb
, result
, &p
[i
-1], 2);
3090 str_bytesplice(mrb_state
*mrb
, mrb_value str
, mrb_int idx1
, mrb_int len1
, mrb_value replace
, mrb_int idx2
, mrb_int len2
)
3092 struct RString
*s
= RSTRING(str
);
3094 idx1
+= RSTR_LEN(s
);
3097 idx2
+= RSTRING_LEN(replace
);
3099 if (RSTR_LEN(s
) < idx1
|| idx1
< 0 || RSTRING_LEN(replace
) < idx2
|| idx2
< 0) {
3100 mrb_raise(mrb
, E_INDEX_ERROR
, "index out of string");
3102 if (len1
< 0 || len2
< 0) {
3103 mrb_raise(mrb
, E_INDEX_ERROR
, "negative length");
3106 if (mrb_int_add_overflow(idx1
, len1
, &n
) || RSTR_LEN(s
) < n
) {
3107 len1
= RSTR_LEN(s
) - idx1
;
3109 if (mrb_int_add_overflow(idx2
, len2
, &n
) || RSTRING_LEN(replace
) < n
) {
3110 len2
= RSTRING_LEN(replace
) - idx2
;
3112 mrb_str_modify(mrb
, s
);
3114 memmove(RSTR_PTR(s
)+idx1
, RSTRING_PTR(replace
)+idx2
, len2
);
3116 memmove(RSTR_PTR(s
)+idx1
+len2
, RSTR_PTR(s
)+idx1
+len1
, RSTR_LEN(s
)-(idx1
+len1
));
3117 RSTR_SET_LEN(s
, RSTR_LEN(s
)-(len1
-len2
));
3120 else { /* len1 < len2 */
3121 mrb_int slen
= RSTR_LEN(s
);
3122 mrb_str_resize(mrb
, str
, slen
+len2
-len1
);
3123 memmove(RSTR_PTR(s
)+idx1
+len2
, RSTR_PTR(s
)+idx1
+len1
, slen
-(idx1
+len1
));
3124 memmove(RSTR_PTR(s
)+idx1
, RSTRING_PTR(replace
)+idx2
, len2
);
3131 * bytesplice(index, length, str) -> string
3132 * bytesplice(index, length, str, str_index, str_length) -> string
3133 * bytesplice(range, str) -> string
3134 * bytesplice(range, str, str_range) -> string
3136 * Replaces some or all of the content of +self+ with +str+, and returns +self+.
3137 * The portion of the string affected is determined using
3138 * the same criteria as String#byteslice, except that +length+ cannot be omitted.
3139 * If the replacement string is not the same length as the text it is replacing,
3140 * the string will be adjusted accordingly.
3142 * If +str_index+ and +str_length+, or +str_range+ are given, the content of +self+ is replaced by str.byteslice(str_index, str_length) or str.byteslice(str_range); however the substring of +str+ is not allocated as a new string.
3144 * The form that take an Integer will raise an IndexError if the value is out
3145 * of range; the Range form will raise a RangeError.
3146 * If the beginning or ending offset does not land on character (codepoint)
3147 * boundary, an IndexError will be raised.
3150 mrb_str_bytesplice(mrb_state
*mrb
, mrb_value str
)
3152 mrb_int idx1
, len1
, idx2
, len2
;
3153 mrb_value range1
, range2
, replace
;
3154 switch (mrb_get_argc(mrb
)) {
3156 mrb_get_args(mrb
, "ooo", &range1
, &replace
, &range2
);
3157 if (mrb_integer_p(range1
)) {
3158 mrb_get_args(mrb
, "iiS", &idx1
, &len1
, &replace
);
3159 return str_bytesplice(mrb
, str
, idx1
, len1
, replace
, 0, RSTRING_LEN(replace
));
3161 mrb_ensure_string_type(mrb
, replace
);
3162 if (mrb_range_beg_len(mrb
, range1
, &idx1
, &len1
, RSTRING_LEN(str
), FALSE
) != MRB_RANGE_OK
) break;
3163 if (mrb_range_beg_len(mrb
, range2
, &idx2
, &len2
, RSTRING_LEN(replace
), FALSE
) != MRB_RANGE_OK
) break;
3164 return str_bytesplice(mrb
, str
, idx1
, len1
, replace
, idx2
, len2
);
3166 mrb_get_args(mrb
, "iiSii", &idx1
, &len1
, &replace
, &idx2
, &len2
);
3167 return str_bytesplice(mrb
, str
, idx1
, len1
, replace
, idx2
, len2
);
3169 mrb_get_args(mrb
, "oS", &range1
, &replace
);
3170 if (mrb_range_beg_len(mrb
, range1
, &idx1
, &len1
, RSTRING_LEN(str
), FALSE
) == MRB_RANGE_OK
) {
3171 return str_bytesplice(mrb
, str
, idx1
, len1
, replace
, 0, RSTRING_LEN(replace
));
3176 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "wrong number of arumgnts");
3180 mrb_encoding(mrb_state
*mrb
, mrb_value self
)
3182 mrb_get_args(mrb
, "");
3183 #ifdef MRB_UTF8_STRING
3184 return mrb_str_new_lit(mrb
, "UTF-8");
3186 return mrb_str_new_lit(mrb
, "ASCII-8BIT");
3190 /* ---------------------------*/
3192 mrb_init_string(mrb_state
*mrb
)
3196 mrb_static_assert(RSTRING_EMBED_LEN_MAX
< (1 << MRB_STR_EMBED_LEN_BIT
),
3197 "pointer size too big for embedded string");
3199 mrb
->string_class
= s
= mrb_define_class_id(mrb
, MRB_SYM(String
), mrb
->object_class
); /* 15.2.10 */
3200 MRB_SET_INSTANCE_TT(s
, MRB_TT_STRING
);
3202 mrb_define_method_id(mrb
, s
, MRB_SYM(bytesize
), mrb_str_bytesize
, MRB_ARGS_NONE());
3204 mrb_define_method_id(mrb
, s
, MRB_OPSYM(cmp
), mrb_str_cmp_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
3205 mrb_define_method_id(mrb
, s
, MRB_OPSYM(eq
), mrb_str_equal_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
3206 mrb_define_method_id(mrb
, s
, MRB_OPSYM(add
), mrb_str_plus_m
, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
3207 mrb_define_method_id(mrb
, s
, MRB_OPSYM(mul
), mrb_str_times
, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
3208 mrb_define_method_id(mrb
, s
, MRB_OPSYM(aref
), mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
3209 mrb_define_method_id(mrb
, s
, MRB_OPSYM(aset
), mrb_str_aset_m
, MRB_ARGS_ANY());
3210 mrb_define_method_id(mrb
, s
, MRB_SYM(capitalize
), mrb_str_capitalize
, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
3211 mrb_define_method_id(mrb
, s
, MRB_SYM_B(capitalize
), mrb_str_capitalize_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
3212 mrb_define_method_id(mrb
, s
, MRB_SYM(chomp
), mrb_str_chomp
, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
3213 mrb_define_method_id(mrb
, s
, MRB_SYM_B(chomp
), mrb_str_chomp_bang
, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
3214 mrb_define_method_id(mrb
, s
, MRB_SYM(chop
), mrb_str_chop
, MRB_ARGS_NONE()); /* 15.2.10.5.11 */
3215 mrb_define_method_id(mrb
, s
, MRB_SYM_B(chop
), mrb_str_chop_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.12 */
3216 mrb_define_method_id(mrb
, s
, MRB_SYM(downcase
), mrb_str_downcase
, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
3217 mrb_define_method_id(mrb
, s
, MRB_SYM_B(downcase
), mrb_str_downcase_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
3218 mrb_define_method_id(mrb
, s
, MRB_SYM_Q(empty
), mrb_str_empty_p
, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
3219 mrb_define_method_id(mrb
, s
, MRB_SYM_Q(eql
), mrb_str_eql
, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
3221 mrb_define_method_id(mrb
, s
, MRB_SYM(hash
), mrb_str_hash_m
, MRB_ARGS_NONE()); /* 15.2.10.5.20 */
3222 mrb_define_method_id(mrb
, s
, MRB_SYM_Q(include
), mrb_str_include
, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
3223 mrb_define_method_id(mrb
, s
, MRB_SYM(index
), mrb_str_index_m
, MRB_ARGS_ARG(1,1)); /* 15.2.10.5.22 */
3224 mrb_define_method_id(mrb
, s
, MRB_SYM(initialize
), mrb_str_init
, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
3225 mrb_define_method_id(mrb
, s
, MRB_SYM(initialize_copy
), mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
3226 mrb_define_method_id(mrb
, s
, MRB_SYM(intern
), mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
3227 mrb_define_method_id(mrb
, s
, MRB_SYM(length
), mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
3228 mrb_define_method_id(mrb
, s
, MRB_SYM(replace
), mrb_str_replace
, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
3229 mrb_define_method_id(mrb
, s
, MRB_SYM(reverse
), mrb_str_reverse
, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
3230 mrb_define_method_id(mrb
, s
, MRB_SYM_B(reverse
), mrb_str_reverse_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
3231 mrb_define_method_id(mrb
, s
, MRB_SYM(rindex
), mrb_str_rindex_m
, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
3232 mrb_define_method_id(mrb
, s
, MRB_SYM(size
), mrb_str_size
, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
3233 mrb_define_method_id(mrb
, s
, MRB_SYM(slice
), mrb_str_aref_m
, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
3234 mrb_define_method_id(mrb
, s
, MRB_SYM(split
), mrb_str_split_m
, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
3236 #ifndef MRB_NO_FLOAT
3237 mrb_define_method_id(mrb
, s
, MRB_SYM(to_f
), mrb_str_to_f
, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
3239 mrb_define_method_id(mrb
, s
, MRB_SYM(to_i
), mrb_str_to_i
, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
3240 mrb_define_method_id(mrb
, s
, MRB_SYM(to_s
), mrb_str_to_s
, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
3241 mrb_define_method_id(mrb
, s
, MRB_SYM(to_str
), mrb_str_to_s
, MRB_ARGS_NONE());
3242 mrb_define_method_id(mrb
, s
, MRB_SYM(to_sym
), mrb_str_intern
, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
3243 mrb_define_method_id(mrb
, s
, MRB_SYM(upcase
), mrb_str_upcase
, MRB_ARGS_NONE()); /* 15.2.10.5.42 */
3244 mrb_define_method_id(mrb
, s
, MRB_SYM_B(upcase
), mrb_str_upcase_bang
, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
3245 mrb_define_method_id(mrb
, s
, MRB_SYM(inspect
), mrb_str_inspect
, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
3246 mrb_define_method_id(mrb
, s
, MRB_SYM(bytes
), mrb_str_bytes
, MRB_ARGS_NONE());
3248 mrb_define_method_id(mrb
, s
, MRB_SYM(getbyte
), mrb_str_getbyte
, MRB_ARGS_REQ(1));
3249 mrb_define_method_id(mrb
, s
, MRB_SYM(setbyte
), mrb_str_setbyte
, MRB_ARGS_REQ(2));
3250 mrb_define_method_id(mrb
, s
, MRB_SYM(byteindex
), mrb_str_byteindex_m
, MRB_ARGS_ARG(1,1));
3251 mrb_define_method_id(mrb
, s
, MRB_SYM(byterindex
), mrb_str_byterindex_m
, MRB_ARGS_ARG(1,1));
3252 mrb_define_method_id(mrb
, s
, MRB_SYM(byteslice
), mrb_str_byteslice
, MRB_ARGS_ARG(1,1));
3253 mrb_define_method_id(mrb
, s
, MRB_SYM(bytesplice
), mrb_str_bytesplice
, MRB_ARGS_ANY());
3255 mrb_define_method_id(mrb
, s
, MRB_SYM(__sub_replace
), sub_replace
, MRB_ARGS_REQ(3)); /* internal */
3257 mrb_define_method_id(mrb
, mrb
->kernel_module
, MRB_SYM(__ENCODING__
), mrb_encoding
, MRB_ARGS_NONE());