2 ** symbol.c - Symbol class
4 ** See Copyright Notice in mruby.h
9 #include <mruby/khash.h>
10 #include <mruby/string.h>
11 #include <mruby/dump.h>
12 #include <mruby/class.h>
13 #include <mruby/internal.h>
14 #include <mruby/presym.h>
18 #ifndef MRB_PRESYM_SCANNING
19 /* const uint16_t presym_length_table[] */
20 /* const char * const presym_name_table[] */
21 # include <mruby/presym/table.h>
25 presym_find(const char *name
, size_t len
)
27 if (presym_length_table
[MRB_PRESYM_MAX
-1] < len
) return 0;
29 mrb_sym presym_size
= MRB_PRESYM_MAX
;
30 for (mrb_sym start
= 0; presym_size
!= 0; presym_size
/=2) {
31 mrb_sym idx
= start
+presym_size
/2;
32 int cmp
= (int)len
-(int)presym_length_table
[idx
];
34 cmp
= memcmp(name
, presym_name_table
[idx
], len
);
35 if (cmp
== 0) return idx
+1;
46 presym_sym2name(mrb_sym sym
, mrb_int
*lenp
)
48 if (sym
> MRB_PRESYM_MAX
) return NULL
;
49 if (lenp
) *lenp
= presym_length_table
[sym
-1];
50 return presym_name_table
[sym
-1];
53 #endif /* MRB_NO_PRESYM */
55 /* ------------------------------------------------------ */
57 sym_validate_len(mrb_state
*mrb
, size_t len
)
59 if (len
>= UINT16_MAX
) {
60 mrb_raise(mrb
, E_ARGUMENT_ERROR
, "symbol length too long");
64 #ifdef MRB_USE_ALL_SYMBOLS
65 # define SYMBOL_INLINE_P(sym) FALSE
66 # define sym_inline_pack(name, len) 0
67 # define sym_inline_unpack(sym, buf, lenp) NULL
69 # define SYMBOL_INLINE_P(sym) ((sym) >= (1<<24))
71 static const char pack_table
[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
74 sym_inline_pack(const char *name
, size_t len
)
76 #if defined(MRB_WORD_BOXING) && defined(MRB_32BIT) && !defined(MRB_WORDBOX_NO_FLOAT_TRUNCATE)
77 const size_t pack_length_max
= 4;
79 const size_t pack_length_max
= 5;
84 if (len
> pack_length_max
) return 0; /* too long */
85 if (len
== 0) return 0; /* empty string */
86 for (size_t i
=0; i
<len
; i
++) {
88 if (c
== 0) return 0; /* NUL in name */
89 const char *p
= strchr(pack_table
, (int)c
);
90 if (p
== 0) return 0; /* non alnum char */
92 uint32_t bits
= (uint32_t)(p
- pack_table
)+1;
93 sym
|= bits
<<(24-i
*6);
95 mrb_assert(SYMBOL_INLINE_P(sym
));
100 sym_inline_unpack(mrb_sym sym
, char *buf
, mrb_int
*lenp
)
104 mrb_assert(SYMBOL_INLINE_P(sym
));
106 for (i
=0; i
<5; i
++) {
107 uint32_t bits
= sym
>>(24-i
*6) & 0x3f;
108 if (bits
== 0) break;
109 buf
[i
] = pack_table
[bits
-1];
117 #define sym_lit_p(mrb, i) (mrb->symflags[i>>3]&(1<<(i&7)))
118 #define sym_lit_set(mrb, i) mrb->symflags[i>>3]|=(1<<(i&7))
119 #define sym_flags_clear(mrb, i) mrb->symflags[i>>3]&=~(1<<(i&7))
122 sym_check(mrb_state
*mrb
, const char *name
, size_t len
, mrb_sym i
)
124 const char *symname
= mrb
->symtbl
[i
];
127 if (sym_lit_p(mrb
, i
)) {
128 symlen
= strlen(symname
);
132 symlen
= mrb_packed_int_decode((const uint8_t*)symname
, (const uint8_t**)&symname
);
134 if (len
== symlen
&& memcmp(symname
, name
, len
) == 0) {
141 find_symbol(mrb_state
*mrb
, const char *name
, size_t len
, uint8_t *hashp
)
146 #ifndef MRB_NO_PRESYM
148 i
= presym_find(name
, len
);
153 i
= sym_inline_pack(name
, len
);
156 hash
= mrb_byte_hash((const uint8_t*)name
, len
);
157 if (hashp
) *hashp
= hash
;
159 i
= mrb
->symhash
[hash
];
160 if (i
== 0) return 0;
162 if (sym_check(mrb
, name
, len
, i
)) {
163 return (i
+MRB_PRESYM_MAX
);
165 uint8_t diff
= mrb
->symlink
[i
];
169 if (sym_check(mrb
, name
, len
, i
)) {
170 return (i
+MRB_PRESYM_MAX
);
176 if (diff
== 0) return 0;
183 sym_intern(mrb_state
*mrb
, const char *name
, size_t len
, mrb_bool lit
)
188 sym_validate_len(mrb
, len
);
189 sym
= find_symbol(mrb
, name
, len
, &hash
);
190 if (sym
> 0) return sym
;
192 /* registering a new symbol */
193 sym
= mrb
->symidx
+ 1;
194 if (mrb
->symcapa
<= sym
) {
195 size_t symcapa
= mrb
->symcapa
;
196 if (symcapa
== 0) symcapa
= 100;
197 else symcapa
= (size_t)(symcapa
* 6 / 5);
198 mrb
->symtbl
= (const char**)mrb_realloc(mrb
, (void*)mrb
->symtbl
, sizeof(char*)*symcapa
);
199 mrb
->symflags
= (uint8_t*)mrb_realloc(mrb
, mrb
->symflags
, symcapa
/8+1);
200 memset(mrb
->symflags
+mrb
->symcapa
/8+1, 0, (symcapa
-mrb
->symcapa
)/8);
201 mrb
->symlink
= (uint8_t*)mrb_realloc(mrb
, mrb
->symlink
, symcapa
);
202 mrb
->symcapa
= symcapa
;
204 sym_flags_clear(mrb
, sym
);
205 if ((lit
|| mrb_ro_data_p(name
)) && name
[len
] == 0 && strlen(name
) == len
) {
206 sym_lit_set(mrb
, sym
);
207 mrb
->symtbl
[sym
] = name
;
210 uint32_t ulen
= (uint32_t)len
;
211 size_t ilen
= mrb_packed_int_len(ulen
);
212 char *p
= (char*)mrb_malloc(mrb
, len
+ilen
+1);
213 mrb_packed_int_encode(ulen
, (uint8_t*)p
);
214 memcpy(p
+ilen
, name
, len
);
216 mrb
->symtbl
[sym
] = p
;
218 if (mrb
->symhash
[hash
]) {
219 mrb_sym i
= sym
- mrb
->symhash
[hash
];
221 mrb
->symlink
[sym
] = 0xff;
223 mrb
->symlink
[sym
] = i
;
226 mrb
->symlink
[sym
] = 0;
228 mrb
->symhash
[hash
] = mrb
->symidx
= sym
;
230 return (sym
+MRB_PRESYM_MAX
);
234 mrb_intern(mrb_state
*mrb
, const char *name
, size_t len
)
236 return sym_intern(mrb
, name
, len
, FALSE
);
240 mrb_intern_static(mrb_state
*mrb
, const char *name
, size_t len
)
242 return sym_intern(mrb
, name
, len
, TRUE
);
246 mrb_intern_cstr(mrb_state
*mrb
, const char *name
)
248 return mrb_intern(mrb
, name
, strlen(name
));
252 mrb_intern_str(mrb_state
*mrb
, mrb_value str
)
254 return mrb_intern(mrb
, RSTRING_PTR(str
), RSTRING_LEN(str
));
258 mrb_intern_check(mrb_state
*mrb
, const char *name
, size_t len
)
262 sym_validate_len(mrb
, len
);
263 sym
= find_symbol(mrb
, name
, len
, NULL
);
264 if (sym
> 0) return sym
;
269 mrb_check_intern(mrb_state
*mrb
, const char *name
, size_t len
)
271 mrb_sym sym
= mrb_intern_check(mrb
, name
, len
);
272 if (sym
== 0) return mrb_nil_value();
273 return mrb_symbol_value(sym
);
277 mrb_intern_check_cstr(mrb_state
*mrb
, const char *name
)
279 return mrb_intern_check(mrb
, name
, strlen(name
));
283 mrb_check_intern_cstr(mrb_state
*mrb
, const char *name
)
285 mrb_sym sym
= mrb_intern_check_cstr(mrb
, name
);
286 if (sym
== 0) return mrb_nil_value();
287 return mrb_symbol_value(sym
);
291 mrb_intern_check_str(mrb_state
*mrb
, mrb_value str
)
293 return mrb_intern_check(mrb
, RSTRING_PTR(str
), RSTRING_LEN(str
));
297 mrb_check_intern_str(mrb_state
*mrb
, mrb_value str
)
299 mrb_sym sym
= mrb_intern_check_str(mrb
, str
);
300 if (sym
== 0) return mrb_nil_value();
301 return mrb_symbol_value(sym
);
305 sym2name_len(mrb_state
*mrb
, mrb_sym sym
, char *buf
, mrb_int
*lenp
)
307 if (sym
== 0) goto outofsym
;
308 if (SYMBOL_INLINE_P(sym
)) return sym_inline_unpack(sym
, buf
, lenp
);
310 #ifndef MRB_NO_PRESYM
312 const char *name
= presym_sym2name(sym
, lenp
);
313 if (name
) return name
;
316 sym
-= MRB_PRESYM_MAX
;
318 if (mrb
->symidx
< sym
) {
324 const char *symname
= mrb
->symtbl
[sym
];
325 if (!sym_lit_p(mrb
, sym
)) {
326 uint32_t len
= mrb_packed_int_decode((const uint8_t*)symname
, (const uint8_t**)&symname
);
327 if (lenp
) *lenp
= (mrb_int
)len
;
330 *lenp
= (mrb_int
)strlen(symname
);
336 mrb_sym_name_len(mrb_state
*mrb
, mrb_sym sym
, mrb_int
*lenp
)
338 #ifdef MRB_USE_ALL_SYMBOLS
339 return sym2name_len(mrb
, sym
, NULL
, lenp
);
341 return sym2name_len(mrb
, sym
, mrb
->symbuf
, lenp
);
346 mrb_free_symtbl(mrb_state
*mrb
)
350 for (i
=1,lim
=mrb
->symidx
+1; i
<lim
; i
++) {
351 if (!sym_lit_p(mrb
, i
)) {
352 mrb_free(mrb
, (char*)mrb
->symtbl
[i
]);
355 mrb_free(mrb
, (void*)mrb
->symtbl
);
356 mrb_free(mrb
, (void*)mrb
->symlink
);
357 mrb_free(mrb
, (void*)mrb
->symflags
);
361 mrb_init_symtbl(mrb_state
*mrb
)
365 /**********************************************************************
366 * Document-class: Symbol
368 * <code>Symbol</code> objects represent names and some strings
370 * interpreter. They are generated using the <code>:name</code> and
371 * <code>:"string"</code> literals
372 * syntax, and by the various <code>to_sym</code> methods. The same
373 * <code>Symbol</code> object will be created for a given name or string
374 * for the duration of a program's execution, regardless of the context
375 * or meaning of that name. Thus if <code>Fred</code> is a constant in
376 * one context, a method in another, and a class in a third, the
377 * <code>Symbol</code> <code>:Fred</code> will be the same object in
378 * all three contexts.
392 * $f1.object_id #=> 2514190
393 * $f2.object_id #=> 2514190
394 * $f3.object_id #=> 2514190
404 * Returns the name or string corresponding to <i>sym</i>.
406 * :fred.to_s #=> "fred"
409 sym_to_s(mrb_state
*mrb
, mrb_value sym
)
411 return mrb_sym_str(mrb
, mrb_symbol(sym
));
418 * Returns the name or string corresponding to <i>sym</i>. Unlike #to_s, the
419 * returned string is frozen.
421 * :fred.name #=> "fred"
422 * :fred.name.frozen? #=> true
425 sym_name(mrb_state
*mrb
, mrb_value vsym
)
427 mrb_sym sym
= mrb_symbol(vsym
);
429 const char *name
= mrb_sym_name_len(mrb
, sym
, &len
);
431 mrb_assert(name
!= NULL
);
432 if (SYMBOL_INLINE_P(sym
)) {
433 return mrb_str_new_frozen(mrb
, name
, len
);
435 return mrb_str_new_static_frozen(mrb
, name
, len
);
440 * Document-method: Symbol#to_sym
446 * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
447 * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
454 * sym.inspect -> string
456 * Returns the representation of <i>sym</i> as a symbol literal.
458 * :fred.inspect #=> ":fred"
462 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
463 #else /* not __STDC__ */
464 /* As in Harbison and Steele. */
465 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
467 #define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
470 is_special_global_name(const char* m
)
473 case '~': case '*': case '$': case '?': case '!': case '@':
474 case '/': case '\\': case ';': case ',': case '.': case '=':
475 case ':': case '<': case '>': case '\"':
476 case '&': case '`': case '\'': case '+':
482 if (is_identchar(*m
)) m
+= 1;
485 if (!ISDIGIT(*m
)) return FALSE
;
486 do m
++; while (ISDIGIT(*m
));
493 symname_p(const char *name
)
495 const char *m
= name
;
496 mrb_bool localid
= FALSE
;
498 if (!m
) return FALSE
;
504 if (is_special_global_name(++m
)) return TRUE
;
508 if (*++m
== '@') ++m
;
513 case '<': ++m
; break;
514 case '=': if (*++m
== '>') m
++; break;
521 case '>': case '=': ++m
; break;
528 case '~': m
++; break;
529 case '=': if (*++m
== '=') m
++; break;
530 default: return FALSE
;
535 if (*++m
== '*') m
++;
539 case '=': case '~': m
++;
543 if (*++m
== '@') m
++;
546 if (*++m
== '|') m
++;
549 if (*++m
== '&') m
++;
552 case '^': case '/': case '%': case '~': case '`':
557 if (*++m
!= ']') return FALSE
;
558 if (*++m
== '=') m
++;
562 localid
= !ISUPPER(*m
);
564 if (*m
!= '_' && !ISALPHA(*m
)) return FALSE
;
565 while (is_identchar(*m
)) m
+= 1;
568 case '!': case '?': case '=': m
++;
574 return *m
? FALSE
: TRUE
;
578 sym_inspect(mrb_state
*mrb
, mrb_value sym
)
583 mrb_sym id
= mrb_symbol(sym
);
586 name
= mrb_sym_name_len(mrb
, id
, &len
);
587 str
= mrb_str_new(mrb
, NULL
, len
+1);
588 sp
= RSTRING_PTR(str
);
590 memcpy(sp
+1, name
, len
);
591 mrb_assert_int_fit(mrb_int
, len
, size_t, SIZE_MAX
);
592 if (!symname_p(name
) || strlen(name
) != (size_t)len
) {
593 str
= mrb_str_inspect(mrb
, str
);
594 sp
= RSTRING_PTR(str
);
598 #ifdef MRB_UTF8_STRING
599 if (SYMBOL_INLINE_P(id
)) RSTR_SET_ASCII_FLAG(mrb_str_ptr(str
));
605 mrb_sym_str(mrb_state
*mrb
, mrb_sym sym
)
608 const char *name
= mrb_sym_name_len(mrb
, sym
, &len
);
610 if (!name
) return mrb_undef_value(); /* can't happen */
611 if (SYMBOL_INLINE_P(sym
)) {
612 mrb_value str
= mrb_str_new(mrb
, name
, len
);
613 RSTR_SET_ASCII_FLAG(mrb_str_ptr(str
));
616 return mrb_str_new_static(mrb
, name
, len
);
620 sym_cstr(mrb_state
*mrb
, mrb_sym sym
, mrb_bool dump
)
623 const char *name
= mrb_sym_name_len(mrb
, sym
, &len
);
625 if (!name
) return NULL
;
626 if (strlen(name
) == (size_t)len
&& (!dump
|| symname_p(name
))) {
630 mrb_value str
= mrb_str_new_static(mrb
, name
, len
);
631 str
= mrb_str_dump(mrb
, str
);
632 return RSTRING_PTR(str
);
637 mrb_sym_name(mrb_state
*mrb
, mrb_sym sym
)
639 return sym_cstr(mrb
, sym
, FALSE
);
643 mrb_sym_dump(mrb_state
*mrb
, mrb_sym sym
)
645 return sym_cstr(mrb
, sym
, TRUE
);
648 #define lesser(a,b) (((a)>(b))?(b):(a))
651 sym_cmp(mrb_state
*mrb
, mrb_value s1
)
653 mrb_value s2
= mrb_get_arg1(mrb
);
656 if (!mrb_symbol_p(s2
)) return mrb_nil_value();
657 sym1
= mrb_symbol(s1
);
658 sym2
= mrb_symbol(s2
);
659 if (sym1
== sym2
) return mrb_fixnum_value(0);
663 mrb_int len
, len1
, len2
;
664 char buf1
[8], buf2
[8];
666 p1
= sym2name_len(mrb
, sym1
, buf1
, &len1
);
667 p2
= sym2name_len(mrb
, sym2
, buf2
, &len2
);
668 len
= lesser(len1
, len2
);
669 retval
= memcmp(p1
, p2
, len
);
671 if (len1
== len2
) return mrb_fixnum_value(0);
672 if (len1
> len2
) return mrb_fixnum_value(1);
673 return mrb_fixnum_value(-1);
675 if (retval
> 0) return mrb_fixnum_value(1);
676 return mrb_fixnum_value(-1);
681 mrb_init_symbol(mrb_state
*mrb
)
685 mrb
->symbol_class
= sym
= mrb_define_class_id(mrb
, MRB_SYM(Symbol
), mrb
->object_class
); /* 15.2.11 */
686 MRB_SET_INSTANCE_TT(sym
, MRB_TT_SYMBOL
);
687 mrb_undef_class_method_id(mrb
, sym
, MRB_SYM(new));
689 mrb_define_method_id(mrb
, sym
, MRB_SYM(to_s
), sym_to_s
, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
690 mrb_define_method_id(mrb
, sym
, MRB_SYM(name
), sym_name
, MRB_ARGS_NONE());
691 mrb_define_method_id(mrb
, sym
, MRB_SYM(to_sym
), mrb_obj_itself
, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
692 mrb_define_method_id(mrb
, sym
, MRB_SYM(inspect
), sym_inspect
, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
693 mrb_define_method_id(mrb
, sym
, MRB_OPSYM(cmp
), sym_cmp
, MRB_ARGS_REQ(1));
694 mrb_define_method_id(mrb
, sym
, MRB_OPSYM(eq
), mrb_obj_equal_m
, MRB_ARGS_REQ(1));