Handle mutating of array passed to Set.new during iteration
[ruby.git] / set.c
blob6fb04d8788adc0576047b03371fdf49f501818f1
1 /* This implements sets using the same hash table implementation as in
2 st.c, but without a value for each hash entry. This results in the
3 same basic performance characteristics as when using an st table,
4 but uses 1/3 less memory.
5 */
7 #include "id.h"
8 #include "internal.h"
9 #include "internal/bits.h"
10 #include "internal/hash.h"
11 #include "internal/proc.h"
12 #include "internal/sanitizers.h"
13 #include "internal/set_table.h"
14 #include "internal/symbol.h"
15 #include "internal/variable.h"
16 #include "ruby_assert.h"
18 #include <stdio.h>
19 #ifdef HAVE_STDLIB_H
20 #include <stdlib.h>
21 #endif
22 #include <string.h>
24 #ifndef SET_DEBUG
25 #define SET_DEBUG 0
26 #endif
28 #if SET_DEBUG
29 #include "internal/gc.h"
30 #endif
32 static st_index_t
33 dbl_to_index(double d)
35 union {double d; st_index_t i;} u;
36 u.d = d;
37 return u.i;
40 static const uint64_t prime1 = ((uint64_t)0x2e0bb864 << 32) | 0xe9ea7df5;
41 static const uint32_t prime2 = 0x830fcab9;
43 static inline uint64_t
44 mult_and_mix(uint64_t m1, uint64_t m2)
46 #if defined HAVE_UINT128_T
47 uint128_t r = (uint128_t) m1 * (uint128_t) m2;
48 return (uint64_t) (r >> 64) ^ (uint64_t) r;
49 #else
50 uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32;
51 uint64_t lm1 = m1, lm2 = m2;
52 uint64_t v64_128 = hm1 * hm2;
53 uint64_t v32_96 = hm1 * lm2 + lm1 * hm2;
54 uint64_t v1_32 = lm1 * lm2;
56 return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32);
57 #endif
60 static inline uint64_t
61 key64_hash(uint64_t key, uint32_t seed)
63 return mult_and_mix(key + seed, prime1);
66 /* Should cast down the result for each purpose */
67 #define set_index_hash(index) key64_hash(rb_hash_start(index), prime2)
69 static st_index_t
70 set_ident_hash(st_data_t n)
72 #ifdef USE_FLONUM /* RUBY */
74 * - flonum (on 64-bit) is pathologically bad, mix the actual
75 * float value in, but do not use the float value as-is since
76 * many integers get interpreted as 2.0 or -2.0 [Bug #10761]
78 if (FLONUM_P(n)) {
79 n ^= dbl_to_index(rb_float_value(n));
81 #endif
83 return (st_index_t)set_index_hash((st_index_t)n);
86 static const struct st_hash_type identhash = {
87 rb_st_numcmp,
88 set_ident_hash,
91 static const struct st_hash_type objhash = {
92 rb_any_cmp,
93 rb_any_hash,
96 VALUE rb_cSet;
98 #define id_each idEach
99 static ID id_each_entry;
100 static ID id_any_p;
101 static ID id_new;
102 static ID id_i_hash;
103 static ID id_set_iter_lev;
105 #define RSET_INITIALIZED FL_USER1
106 #define RSET_LEV_MASK (FL_USER13 | FL_USER14 | FL_USER15 | /* FL 13..19 */ \
107 FL_USER16 | FL_USER17 | FL_USER18 | FL_USER19)
108 #define RSET_LEV_SHIFT (FL_USHIFT + 13)
109 #define RSET_LEV_MAX 127 /* 7 bits */
111 #define SET_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(SET_DEBUG, expr, #expr)
113 #define RSET_SIZE(set) set_table_size(RSET_TABLE(set))
114 #define RSET_EMPTY(set) (RSET_SIZE(set) == 0)
115 #define RSET_SIZE_NUM(set) SIZET2NUM(RSET_SIZE(set))
116 #define RSET_IS_MEMBER(sobj, item) set_lookup(RSET_TABLE(set), (st_data_t)(item))
117 #define RSET_COMPARE_BY_IDENTITY(set) (RSET_TABLE(set)->type == &identhash)
119 struct set_object {
120 set_table table;
123 static int
124 mark_key(st_data_t key, st_data_t data)
126 rb_gc_mark_movable((VALUE)key);
128 return ST_CONTINUE;
131 static void
132 set_mark(void *ptr)
134 struct set_object *sobj = ptr;
135 if (sobj->table.entries) set_foreach(&sobj->table, mark_key, 0);
138 static void
139 set_free_embedded(struct set_object *sobj)
141 free((&sobj->table)->bins);
142 free((&sobj->table)->entries);
145 static void
146 set_free(void *ptr)
148 struct set_object *sobj = ptr;
149 set_free_embedded(sobj);
150 memset(&sobj->table, 0, sizeof(sobj->table));
153 static size_t
154 set_size(const void *ptr)
156 const struct set_object *sobj = ptr;
157 /* Do not count the table size twice, as it is embedded */
158 return (unsigned long)set_memsize(&sobj->table) - sizeof(sobj->table);
161 static int
162 set_foreach_replace(st_data_t key, st_data_t argp, int error)
164 if (rb_gc_location((VALUE)key) != (VALUE)key) {
165 return ST_REPLACE;
168 return ST_CONTINUE;
171 static int
172 set_replace_ref(st_data_t *key, st_data_t argp, int existing)
174 if (rb_gc_location((VALUE)*key) != (VALUE)*key) {
175 *key = rb_gc_location((VALUE)*key);
178 return ST_CONTINUE;
181 static void
182 set_update_references(void *ptr)
184 struct set_object *sobj = ptr;
185 set_foreach_with_replace(&sobj->table, set_foreach_replace, set_replace_ref, 0);
188 static const rb_data_type_t set_data_type = {
189 .wrap_struct_name = "set",
190 .function = {
191 .dmark = set_mark,
192 .dfree = set_free,
193 .dsize = set_size,
194 .dcompact = set_update_references,
196 .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
199 static inline set_table *
200 RSET_TABLE(VALUE set)
202 struct set_object *sobj;
203 TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj);
204 return &sobj->table;
207 static unsigned long
208 iter_lev_in_ivar(VALUE set)
210 VALUE levval = rb_ivar_get(set, id_set_iter_lev);
211 SET_ASSERT(FIXNUM_P(levval));
212 long lev = FIX2LONG(levval);
213 SET_ASSERT(lev >= 0);
214 return (unsigned long)lev;
217 void rb_ivar_set_internal(VALUE obj, ID id, VALUE val);
219 static void
220 iter_lev_in_ivar_set(VALUE set, unsigned long lev)
222 SET_ASSERT(lev >= RSET_LEV_MAX);
223 SET_ASSERT(POSFIXABLE(lev)); /* POSFIXABLE means fitting to long */
224 rb_ivar_set_internal(set, id_set_iter_lev, LONG2FIX((long)lev));
227 static inline unsigned long
228 iter_lev_in_flags(VALUE set)
230 return (unsigned long)((RBASIC(set)->flags >> RSET_LEV_SHIFT) & RSET_LEV_MAX);
233 static inline void
234 iter_lev_in_flags_set(VALUE set, unsigned long lev)
236 SET_ASSERT(lev <= RSET_LEV_MAX);
237 RBASIC(set)->flags = ((RBASIC(set)->flags & ~RSET_LEV_MASK) | ((VALUE)lev << RSET_LEV_SHIFT));
240 static inline bool
241 set_iterating_p(VALUE set)
243 return iter_lev_in_flags(set) > 0;
246 static void
247 set_iter_lev_inc(VALUE set)
249 unsigned long lev = iter_lev_in_flags(set);
250 if (lev == RSET_LEV_MAX) {
251 lev = iter_lev_in_ivar(set) + 1;
252 if (!POSFIXABLE(lev)) { /* paranoiac check */
253 rb_raise(rb_eRuntimeError, "too much nested iterations");
256 else {
257 lev += 1;
258 iter_lev_in_flags_set(set, lev);
259 if (lev < RSET_LEV_MAX) return;
261 iter_lev_in_ivar_set(set, lev);
264 static void
265 set_iter_lev_dec(VALUE set)
267 unsigned long lev = iter_lev_in_flags(set);
268 if (lev == RSET_LEV_MAX) {
269 lev = iter_lev_in_ivar(set);
270 if (lev > RSET_LEV_MAX) {
271 iter_lev_in_ivar_set(set, lev-1);
272 return;
274 rb_attr_delete(set, id_set_iter_lev);
276 else if (lev == 0) {
277 rb_raise(rb_eRuntimeError, "iteration level underflow");
279 iter_lev_in_flags_set(set, lev - 1);
282 static VALUE
283 set_foreach_ensure(VALUE set)
285 set_iter_lev_dec(set);
286 return 0;
289 typedef int set_foreach_func(VALUE, VALUE);
291 struct set_foreach_arg {
292 VALUE set;
293 set_foreach_func *func;
294 VALUE arg;
297 static int
298 set_iter_status_check(int status)
300 if (status == ST_CONTINUE) {
301 return ST_CHECK;
304 return status;
307 static int
308 set_foreach_iter(st_data_t key, st_data_t argp, int error)
310 struct set_foreach_arg *arg = (struct set_foreach_arg *)argp;
312 if (error) return ST_STOP;
314 set_table *tbl = RSET_TABLE(arg->set);
315 int status = (*arg->func)((VALUE)key, arg->arg);
317 if (RSET_TABLE(arg->set) != tbl) {
318 rb_raise(rb_eRuntimeError, "reset occurred during iteration");
321 return set_iter_status_check(status);
324 static VALUE
325 set_foreach_call(VALUE arg)
327 VALUE set = ((struct set_foreach_arg *)arg)->set;
328 int ret = 0;
329 ret = set_foreach_check(RSET_TABLE(set), set_foreach_iter,
330 (st_data_t)arg, (st_data_t)Qundef);
331 if (ret) {
332 rb_raise(rb_eRuntimeError, "ret: %d, set modified during iteration", ret);
334 return Qnil;
337 static void
338 set_iter(VALUE set, set_foreach_func *func, VALUE farg)
340 struct set_foreach_arg arg;
342 if (RSET_EMPTY(set))
343 return;
344 arg.set = set;
345 arg.func = func;
346 arg.arg = farg;
347 if (RB_OBJ_FROZEN(set)) {
348 set_foreach_call((VALUE)&arg);
350 else {
351 set_iter_lev_inc(set);
352 rb_ensure(set_foreach_call, (VALUE)&arg, set_foreach_ensure, set);
356 NORETURN(static void no_new_item(void));
357 static void
358 no_new_item(void)
360 rb_raise(rb_eRuntimeError, "can't add a new item into set during iteration");
363 static void
364 set_compact_after_delete(VALUE set)
366 if (!set_iterating_p(set)) {
367 set_compact_table(RSET_TABLE(set));
371 static int
372 set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr)
374 if (tab->type != &identhash && rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) {
375 key = rb_hash_key_str(key);
376 if (key_addr) *key_addr = key;
378 int ret = set_insert(tab, (st_data_t)key);
379 if (ret == 0) RB_OBJ_WRITTEN(set, Qundef, key);
380 return ret;
383 static int
384 set_insert_wb(VALUE set, VALUE key, VALUE *key_addr)
386 return set_table_insert_wb(RSET_TABLE(set), set, key, key_addr);
389 static VALUE
390 set_alloc_with_size(VALUE klass, st_index_t size)
392 VALUE set;
393 struct set_object *sobj;
395 set = TypedData_Make_Struct(klass, struct set_object, &set_data_type, sobj);
396 set_init_table_with_size(&sobj->table, &objhash, size);
398 return set;
402 static VALUE
403 set_s_alloc(VALUE klass)
405 return set_alloc_with_size(klass, 0);
408 static VALUE
409 set_s_create(int argc, VALUE *argv, VALUE klass)
411 VALUE set = set_alloc_with_size(klass, argc);
412 set_table *table = RSET_TABLE(set);
413 int i;
415 for (i=0; i < argc; i++) {
416 set_table_insert_wb(table, set, argv[i], NULL);
419 return set;
422 static void
423 check_set(VALUE arg)
425 if (!rb_obj_is_kind_of(arg, rb_cSet)) {
426 rb_raise(rb_eArgError, "value must be a set");
430 static ID
431 enum_method_id(VALUE other)
433 if (rb_respond_to(other, id_each_entry)) {
434 return id_each_entry;
436 else if (rb_respond_to(other, id_each)) {
437 return id_each;
439 else {
440 rb_raise(rb_eArgError, "value must be enumerable");
444 static VALUE
445 set_enum_size(VALUE set, VALUE args, VALUE eobj)
447 return RSET_SIZE_NUM(set);
450 static VALUE
451 set_initialize_without_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set))
453 VALUE element = i;
454 set_insert_wb(set, element, &element);
455 return element;
458 static VALUE
459 set_initialize_with_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set))
461 VALUE element = rb_yield(i);
462 set_insert_wb(set, element, &element);
463 return element;
467 * call-seq:
468 * Set.new -> new_set
469 * Set.new(enum) -> new_set
470 * Set.new(enum) { |elem| ... } -> new_set
472 * Creates a new set containing the elements of the given enumerable
473 * object.
475 * If a block is given, the elements of enum are preprocessed by the
476 * given block.
478 * Set.new([1, 2]) #=> #<Set: {1, 2}>
479 * Set.new([1, 2, 1]) #=> #<Set: {1, 2}>
480 * Set.new([1, 'c', :s]) #=> #<Set: {1, "c", :s}>
481 * Set.new(1..5) #=> #<Set: {1, 2, 3, 4, 5}>
482 * Set.new([1, 2, 3]) { |x| x * x } #=> #<Set: {1, 4, 9}>
484 static VALUE
485 set_i_initialize(int argc, VALUE *argv, VALUE set)
487 if (RBASIC(set)->flags & RSET_INITIALIZED) {
488 rb_raise(rb_eRuntimeError, "cannot reinitialize set");
490 RBASIC(set)->flags |= RSET_INITIALIZED;
492 VALUE other;
493 rb_check_arity(argc, 0, 1);
495 if (argc > 0 && (other = argv[0]) != Qnil) {
496 if (RB_TYPE_P(other, T_ARRAY)) {
497 long i;
498 int block_given = rb_block_given_p();
499 set_table *into = RSET_TABLE(set);
500 for (i=0; i<RARRAY_LEN(other); i++) {
501 VALUE key = RARRAY_AREF(other, i);
502 if (block_given) key = rb_yield(key);
503 set_table_insert_wb(into, set, key, NULL);
506 else {
507 rb_block_call(other, enum_method_id(other), 0, 0,
508 rb_block_given_p() ? set_initialize_with_block : set_initialize_without_block,
509 set);
513 return set;
516 static VALUE
517 set_i_initialize_copy(VALUE set, VALUE other)
519 if (set == other) return set;
521 if (set_iterating_p(set)) {
522 rb_raise(rb_eRuntimeError, "cannot replace set during iteration");
525 struct set_object *sobj;
526 TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj);
528 set_free_embedded(sobj);
529 set_copy(&sobj->table, RSET_TABLE(other));
531 return set;
534 static int
535 set_inspect_i(st_data_t key, st_data_t arg)
537 VALUE str = (VALUE)arg;
538 if (RSTRING_LEN(str) > 8) {
539 rb_str_buf_cat_ascii(str, ", ");
541 rb_str_buf_append(str, rb_inspect((VALUE)key));
543 return ST_CONTINUE;
546 static VALUE
547 set_inspect(VALUE set, VALUE dummy, int recur)
549 VALUE str;
551 if (recur) return rb_usascii_str_new2("#<Set: {...}>");
552 str = rb_str_buf_new2("#<Set: {");
553 set_iter(set, set_inspect_i, str);
554 rb_str_buf_cat2(str, "}>");
556 return str;
560 * call-seq:
561 * inspect -> new_string
563 * Returns a new string containing the set entries:
565 * s = Set.new
566 * s.inspect # => "#<Set: {}>"
567 * s.add(1)
568 * s.inspect # => "#<Set: {1}>"
569 * s.add(2)
570 * s.inspect # => "#<Set: {1, 2}>"
572 * Related: see {Methods for Converting}[rdoc-ref:Set@Methods+for+Converting].
574 static VALUE
575 set_i_inspect(VALUE set)
577 return rb_exec_recursive(set_inspect, set, 0);
580 static int
581 set_to_a_i(st_data_t key, st_data_t arg)
583 rb_ary_push((VALUE)arg, (VALUE)key);
584 return ST_CONTINUE;
588 * call-seq:
589 * to_a -> array
591 * Returns an array containing all elements in the set.
593 * Set[1, 2].to_a #=> [1, 2]
594 * Set[1, 'c', :s].to_a #=> [1, "c", :s]
596 static VALUE
597 set_i_to_a(VALUE set)
599 st_index_t size = RSET_SIZE(set);
600 VALUE ary = rb_ary_new_capa(size);
602 if (size == 0) return ary;
604 if (ST_DATA_COMPATIBLE_P(VALUE)) {
605 RARRAY_PTR_USE(ary, ptr, {
606 size = set_keys(RSET_TABLE(set), ptr, size);
608 rb_gc_writebarrier_remember(ary);
609 rb_ary_set_len(ary, size);
611 else {
612 set_iter(set, set_to_a_i, (st_data_t)ary);
614 return ary;
618 * call-seq:
619 * to_set(klass = Set, *args, &block) -> self or new_set
621 * Returns self if receiver is an instance of +Set+ and no arguments or
622 * block are given. Otherwise, converts the set to another with
623 * <tt>klass.new(self, *args, &block)</tt>.
625 * In subclasses, returns `klass.new(self, *args, &block)` unless overridden.
627 static VALUE
628 set_i_to_set(int argc, VALUE *argv, VALUE set)
630 VALUE klass;
632 if (argc == 0) {
633 klass = rb_cSet;
634 argv = &set;
635 argc = 1;
637 else {
638 klass = argv[0];
639 argv[0] = set;
642 if (klass == rb_cSet && rb_obj_is_instance_of(set, rb_cSet) &&
643 argc == 1 && !rb_block_given_p()) {
644 return set;
647 return rb_funcall_passing_block(klass, id_new, argc, argv);
651 * call-seq:
652 * join(separator=nil)-> new_string
654 * Returns a string created by converting each element of the set to a string.
656 static VALUE
657 set_i_join(int argc, VALUE *argv, VALUE set)
659 rb_check_arity(argc, 0, 1);
660 return rb_ary_join(set_i_to_a(set), argc == 0 ? Qnil : argv[0]);
664 * call-seq:
665 * add(obj) -> self
667 * Adds the given object to the set and returns self. Use `merge` to
668 * add many elements at once.
670 * Set[1, 2].add(3) #=> #<Set: {1, 2, 3}>
671 * Set[1, 2].add([3, 4]) #=> #<Set: {1, 2, [3, 4]}>
672 * Set[1, 2].add(2) #=> #<Set: {1, 2}>
674 static VALUE
675 set_i_add(VALUE set, VALUE item)
677 rb_check_frozen(set);
678 if (set_iterating_p(set)) {
679 if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) {
680 no_new_item();
683 else {
684 set_insert_wb(set, item, NULL);
686 return set;
690 * call-seq:
691 * add?(obj) -> self or nil
693 * Adds the given object to the set and returns self. If the object is
694 * already in the set, returns nil.
696 * Set[1, 2].add?(3) #=> #<Set: {1, 2, 3}>
697 * Set[1, 2].add?([3, 4]) #=> #<Set: {1, 2, [3, 4]}>
698 * Set[1, 2].add?(2) #=> nil
700 static VALUE
701 set_i_add_p(VALUE set, VALUE item)
703 rb_check_frozen(set);
704 if (set_iterating_p(set)) {
705 if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) {
706 no_new_item();
708 return Qnil;
710 else {
711 return set_insert_wb(set, item, NULL) ? Qnil : set;
716 * call-seq:
717 * delete(obj) -> self
719 * Deletes the given object from the set and returns self. Use subtract
720 * to delete many items at once.
722 static VALUE
723 set_i_delete(VALUE set, VALUE item)
725 rb_check_frozen(set);
726 if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) {
727 set_compact_after_delete(set);
729 return set;
733 * call-seq:
734 * delete?(obj) -> self or nil
736 * Deletes the given object from the set and returns self. If the
737 * object is not in the set, returns nil.
739 static VALUE
740 set_i_delete_p(VALUE set, VALUE item)
742 rb_check_frozen(set);
743 if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) {
744 set_compact_after_delete(set);
745 return set;
747 return Qnil;
750 static int
751 set_delete_if_i(st_data_t key, st_data_t dummy)
753 return RTEST(rb_yield((VALUE)key)) ? ST_DELETE : ST_CONTINUE;
757 * call-seq:
758 * delete_if { |o| ... } -> self
759 * delete_if -> enumerator
761 * Deletes every element of the set for which block evaluates to
762 * true, and returns self. Returns an enumerator if no block is given.
764 static VALUE
765 set_i_delete_if(VALUE set)
767 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
768 rb_check_frozen(set);
769 set_iter(set, set_delete_if_i, 0);
770 set_compact_after_delete(set);
771 return set;
775 * call-seq:
776 * reject! { |o| ... } -> self
777 * reject! -> enumerator
779 * Equivalent to Set#delete_if, but returns nil if no changes were made.
780 * Returns an enumerator if no block is given.
782 static VALUE
783 set_i_reject(VALUE set)
785 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
786 rb_check_frozen(set);
788 set_table *table = RSET_TABLE(set);
789 size_t n = set_table_size(table);
790 set_iter(set, set_delete_if_i, 0);
792 if (n == set_table_size(table)) return Qnil;
794 set_compact_after_delete(set);
795 return set;
798 static int
799 set_classify_i(st_data_t key, st_data_t tmp)
801 VALUE* args = (VALUE*)tmp;
802 VALUE hash = args[0];
803 VALUE hash_key = rb_yield(key);
804 VALUE set = rb_hash_lookup2(hash, hash_key, Qundef);
805 if (set == Qundef) {
806 set = set_s_alloc(args[1]);
807 rb_hash_aset(hash, hash_key, set);
809 set_i_add(set, key);
811 return ST_CONTINUE;
815 * call-seq:
816 * classify { |o| ... } -> hash
817 * classify -> enumerator
819 * Classifies the set by the return value of the given block and
820 * returns a hash of {value => set of elements} pairs. The block is
821 * called once for each element of the set, passing the element as
822 * parameter.
824 * files = Set.new(Dir.glob("*.rb"))
825 * hash = files.classify { |f| File.mtime(f).year }
826 * hash #=> {2000 => #<Set: {"a.rb", "b.rb"}>,
827 * # 2001 => #<Set: {"c.rb", "d.rb", "e.rb"}>,
828 * # 2002 => #<Set: {"f.rb"}>}
830 * Returns an enumerator if no block is given.
832 static VALUE
833 set_i_classify(VALUE set)
835 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
836 VALUE args[2];
837 args[0] = rb_hash_new();
838 args[1] = rb_obj_class(set);
839 set_iter(set, set_classify_i, (st_data_t)args);
840 return args[0];
843 struct set_divide_args {
844 VALUE self;
845 VALUE set_class;
846 VALUE final_set;
847 VALUE hash;
848 VALUE current_set;
849 VALUE current_item;
850 unsigned long ni;
851 unsigned long nj;
854 static VALUE
855 set_divide_block0(RB_BLOCK_CALL_FUNC_ARGLIST(j, arg))
857 struct set_divide_args *args = (struct set_divide_args *)arg;
858 if (args->nj > args->ni) {
859 VALUE i = args->current_item;
860 if (RTEST(rb_yield_values(2, i, j)) && RTEST(rb_yield_values(2, j, i))) {
861 VALUE hash = args->hash;
862 if (args->current_set == Qnil) {
863 VALUE set = rb_hash_aref(hash, j);
864 if (set == Qnil) {
865 VALUE both[2] = {i, j};
866 set = set_s_create(2, both, args->set_class);
867 rb_hash_aset(hash, i, set);
868 rb_hash_aset(hash, j, set);
869 set_i_add(args->final_set, set);
871 else {
872 set_i_add(set, i);
873 rb_hash_aset(hash, i, set);
875 args->current_set = set;
877 else {
878 set_i_add(args->current_set, j);
879 rb_hash_aset(hash, j, args->current_set);
883 args->nj++;
884 return j;
887 static VALUE
888 set_divide_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, arg))
890 struct set_divide_args *args = (struct set_divide_args *)arg;
891 VALUE hash = args->hash;
892 args->current_set = rb_hash_aref(hash, i);
893 args->current_item = i;
894 args->nj = 0;
895 rb_block_call(args->self, id_each, 0, 0, set_divide_block0, arg);
896 if (args->current_set == Qnil) {
897 VALUE set = set_s_create(1, &i, args->set_class);
898 rb_hash_aset(hash, i, set);
899 set_i_add(args->final_set, set);
901 args->ni++;
902 return i;
905 static void set_merge_enum_into(VALUE set, VALUE arg);
908 * call-seq:
909 * divide { |o1, o2| ... } -> set
910 * divide { |o| ... } -> set
911 * divide -> enumerator
913 * Divides the set into a set of subsets according to the commonality
914 * defined by the given block.
916 * If the arity of the block is 2, elements o1 and o2 are in common
917 * if both block.call(o1, o2) and block.call(o2, o1) are true.
918 * Otherwise, elements o1 and o2 are in common if
919 * block.call(o1) == block.call(o2).
921 * numbers = Set[1, 3, 4, 6, 9, 10, 11]
922 * set = numbers.divide { |i,j| (i - j).abs == 1 }
923 * set #=> #<Set: {#<Set: {1}>,
924 * # #<Set: {3, 4}>,
925 * # #<Set: {6}>}>
926 * # #<Set: {9, 10, 11}>,
928 * Returns an enumerator if no block is given.
930 static VALUE
931 set_i_divide(VALUE set)
933 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
935 if (rb_block_arity() == 2) {
936 VALUE final_set = set_s_create(0, 0, rb_cSet);
937 struct set_divide_args args = {
938 .self = set,
939 .set_class = rb_obj_class(set),
940 .final_set = final_set,
941 .hash = rb_hash_new(),
942 .current_set = 0,
943 .current_item = 0,
944 .ni = 0,
945 .nj = 0
947 rb_block_call(set, id_each, 0, 0, set_divide_block, (VALUE)&args);
948 return final_set;
951 VALUE values = rb_hash_values(set_i_classify(set));
952 set = set_alloc_with_size(rb_cSet, RARRAY_LEN(values));
953 set_merge_enum_into(set, values);
954 return set;
957 static int
958 set_clear_i(st_data_t key, st_data_t dummy)
960 return ST_DELETE;
964 * call-seq:
965 * clear -> self
967 * Removes all elements and returns self.
969 * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}>
970 * set.clear #=> #<Set: {}>
971 * set #=> #<Set: {}>
973 static VALUE
974 set_i_clear(VALUE set)
976 rb_check_frozen(set);
977 if (RSET_SIZE(set) == 0) return set;
978 if (set_iterating_p(set)) {
979 set_iter(set, set_clear_i, 0);
981 else {
982 set_clear(RSET_TABLE(set));
983 set_compact_after_delete(set);
985 return set;
988 struct set_intersection_data {
989 VALUE set;
990 set_table *into;
991 set_table *other;
994 static int
995 set_intersection_i(st_data_t key, st_data_t tmp)
997 struct set_intersection_data *data = (struct set_intersection_data *)tmp;
998 if (set_lookup(data->other, key)) {
999 set_table_insert_wb(data->into, data->set, key, NULL);
1002 return ST_CONTINUE;
1005 static VALUE
1006 set_intersection_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, data))
1008 set_intersection_i((st_data_t)i, (st_data_t)data);
1009 return i;
1013 * call-seq:
1014 * set & enum -> new_set
1016 * Returns a new set containing elements common to the set and the given
1017 * enumerable object.
1019 * Set[1, 3, 5] & Set[3, 2, 1] #=> #<Set: {3, 1}>
1020 * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> #<Set: {"a", "b"}>
1022 static VALUE
1023 set_i_intersection(VALUE set, VALUE other)
1025 VALUE new_set = set_s_alloc(rb_obj_class(set));
1026 set_table *stable = RSET_TABLE(set);
1027 set_table *ntable = RSET_TABLE(new_set);
1029 if (rb_obj_is_kind_of(other, rb_cSet)) {
1030 set_table *otable = RSET_TABLE(other);
1031 if (set_table_size(stable) >= set_table_size(otable)) {
1032 /* Swap so we iterate over the smaller set */
1033 otable = stable;
1034 set = other;
1037 struct set_intersection_data data = {
1038 .set = new_set,
1039 .into = ntable,
1040 .other = otable
1042 set_iter(set, set_intersection_i, (st_data_t)&data);
1044 else {
1045 struct set_intersection_data data = {
1046 .set = new_set,
1047 .into = ntable,
1048 .other = stable
1050 rb_block_call(other, enum_method_id(other), 0, 0, set_intersection_block, (VALUE)&data);
1053 return new_set;
1057 * call-seq:
1058 * include?(item) -> true or false
1060 * Returns true if the set contains the given object:
1062 * Set[1, 2, 3].include? 2 #=> true
1063 * Set[1, 2, 3].include? 4 #=> false
1065 * Note that <code>include?</code> and <code>member?</code> do not test member
1066 * equality using <code>==</code> as do other Enumerables.
1068 * This is aliased to #===, so it is usable in +case+ expressions:
1070 * case :apple
1071 * when Set[:potato, :carrot]
1072 * "vegetable"
1073 * when Set[:apple, :banana]
1074 * "fruit"
1075 * end
1076 * # => "fruit"
1078 * See also Enumerable#include?
1080 static VALUE
1081 set_i_include(VALUE set, VALUE item)
1083 return RBOOL(RSET_IS_MEMBER(set, item));
1086 struct set_merge_args {
1087 VALUE set;
1088 set_table *into;
1091 static int
1092 set_merge_i(st_data_t key, st_data_t data)
1094 struct set_merge_args *args = (struct set_merge_args *)data;
1095 set_table_insert_wb(args->into, args->set, key, NULL);
1096 return ST_CONTINUE;
1099 static VALUE
1100 set_merge_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set))
1102 VALUE element = key;
1103 set_insert_wb(set, element, &element);
1104 return element;
1107 static void
1108 set_merge_enum_into(VALUE set, VALUE arg)
1110 if (rb_obj_is_kind_of(arg, rb_cSet)) {
1111 struct set_merge_args args = {
1112 .set = set,
1113 .into = RSET_TABLE(set)
1115 set_iter(arg, set_merge_i, (st_data_t)&args);
1117 else if (RB_TYPE_P(arg, T_ARRAY)) {
1118 long i;
1119 set_table *into = RSET_TABLE(set);
1120 for (i=0; i<RARRAY_LEN(arg); i++) {
1121 set_table_insert_wb(into, set, RARRAY_AREF(arg, i), NULL);
1124 else {
1125 rb_block_call(arg, enum_method_id(arg), 0, 0, set_merge_block, (VALUE)set);
1130 * call-seq:
1131 * merge(*enums, **nil) -> self
1133 * Merges the elements of the given enumerable objects to the set and
1134 * returns self.
1136 static VALUE
1137 set_i_merge(int argc, VALUE *argv, VALUE set)
1139 if (rb_keyword_given_p()) {
1140 rb_raise(rb_eArgError, "no keywords accepted");
1142 rb_check_frozen(set);
1144 int i;
1146 for (i=0; i < argc; i++) {
1147 set_merge_enum_into(set, argv[i]);
1150 return set;
1153 static VALUE
1154 set_reset_table_with_type(VALUE set, const struct st_hash_type *type)
1156 rb_check_frozen(set);
1158 struct set_object *sobj;
1159 TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj);
1160 set_table *old = &sobj->table;
1162 size_t size = set_table_size(old);
1163 if (size > 0) {
1164 set_table *new = set_init_table_with_size(NULL, type, size);
1165 struct set_merge_args args = {
1166 .set = set,
1167 .into = new
1169 set_iter(set, set_merge_i, (st_data_t)&args);
1170 set_free_embedded(sobj);
1171 memcpy(&sobj->table, new, sizeof(*new));
1172 free(new);
1174 else {
1175 sobj->table.type = type;
1178 return set;
1182 * call-seq:
1183 * compare_by_identity -> self
1185 * Makes the set compare its elements by their identity and returns self.
1187 static VALUE
1188 set_i_compare_by_identity(VALUE set)
1190 if (RSET_COMPARE_BY_IDENTITY(set)) return set;
1192 if (set_iterating_p(set)) {
1193 rb_raise(rb_eRuntimeError, "compare_by_identity during iteration");
1196 return set_reset_table_with_type(set, &identhash);
1200 * call-seq:
1201 * compare_by_identity? -> true or false
1203 * Returns true if the set will compare its elements by their
1204 * identity. Also see Set#compare_by_identity.
1206 static VALUE
1207 set_i_compare_by_identity_p(VALUE set)
1209 return RBOOL(RSET_COMPARE_BY_IDENTITY(set));
1213 * call-seq:
1214 * size -> integer
1216 * Returns the number of elements.
1218 static VALUE
1219 set_i_size(VALUE set)
1221 return RSET_SIZE_NUM(set);
1225 * call-seq:
1226 * empty? -> true or false
1228 * Returns true if the set contains no elements.
1230 static VALUE
1231 set_i_empty(VALUE set)
1233 return RBOOL(RSET_EMPTY(set));
1236 static int
1237 set_xor_i(st_data_t key, st_data_t data)
1239 VALUE element = (VALUE)key;
1240 VALUE set = (VALUE)data;
1241 set_table *table = RSET_TABLE(set);
1242 if (set_table_insert_wb(table, set, element, &element)) {
1243 set_delete(table, &element);
1245 return ST_CONTINUE;
1249 * call-seq:
1250 * set ^ enum -> new_set
1252 * Returns a new set containing elements exclusive between the set and the
1253 * given enumerable object. <tt>(set ^ enum)</tt> is equivalent to
1254 * <tt>((set | enum) - (set & enum))</tt>.
1256 * Set[1, 2] ^ Set[2, 3] #=> #<Set: {3, 1}>
1257 * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> #<Set: {"d", 1, "c"}>
1259 static VALUE
1260 set_i_xor(VALUE set, VALUE other)
1262 VALUE new_set;
1263 if (rb_obj_is_kind_of(other, rb_cSet)) {
1264 new_set = other;
1266 else {
1267 new_set = set_s_alloc(rb_obj_class(set));
1268 set_merge_enum_into(new_set, other);
1270 set_iter(set, set_xor_i, (st_data_t)new_set);
1271 return new_set;
1275 * call-seq:
1276 * set | enum -> new_set
1278 * Returns a new set built by merging the set and the elements of the
1279 * given enumerable object.
1281 * Set[1, 2, 3] | Set[2, 4, 5] #=> #<Set: {1, 2, 3, 4, 5}>
1282 * Set[1, 5, 'z'] | (1..6) #=> #<Set: {1, 5, "z", 2, 3, 4, 6}>
1284 static VALUE
1285 set_i_union(VALUE set, VALUE other)
1287 set = rb_obj_dup(set);
1288 set_merge_enum_into(set, other);
1289 return set;
1292 static int
1293 set_remove_i(st_data_t key, st_data_t from)
1295 set_delete((struct set_table *)from, (st_data_t *)&key);
1296 return ST_CONTINUE;
1299 static VALUE
1300 set_remove_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set))
1302 rb_check_frozen(set);
1303 set_delete(RSET_TABLE(set), (st_data_t *)&key);
1304 return key;
1307 static void
1308 set_remove_enum_from(VALUE set, VALUE arg)
1310 if (rb_obj_is_kind_of(arg, rb_cSet)) {
1311 set_iter(arg, set_remove_i, (st_data_t)RSET_TABLE(set));
1313 else {
1314 rb_block_call(arg, enum_method_id(arg), 0, 0, set_remove_block, (VALUE)set);
1319 * call-seq:
1320 * subtract(enum) -> self
1322 * Deletes every element that appears in the given enumerable object
1323 * and returns self.
1325 static VALUE
1326 set_i_subtract(VALUE set, VALUE other)
1328 rb_check_frozen(set);
1329 set_remove_enum_from(set, other);
1330 return set;
1334 * call-seq:
1335 * set - enum -> new_set
1337 * Returns a new set built by duplicating the set, removing every
1338 * element that appears in the given enumerable object.
1340 * Set[1, 3, 5] - Set[1, 5] #=> #<Set: {3}>
1341 * Set['a', 'b', 'z'] - ['a', 'c'] #=> #<Set: {"b", "z"}>
1343 static VALUE
1344 set_i_difference(VALUE set, VALUE other)
1346 return set_i_subtract(rb_obj_dup(set), other);
1349 static int
1350 set_each_i(st_data_t key, st_data_t dummy)
1352 rb_yield(key);
1353 return ST_CONTINUE;
1357 * call-seq:
1358 * each { |o| ... } -> self
1359 * each -> enumerator
1361 * Calls the given block once for each element in the set, passing
1362 * the element as parameter. Returns an enumerator if no block is
1363 * given.
1365 static VALUE
1366 set_i_each(VALUE set)
1368 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
1369 set_iter(set, set_each_i, 0);
1370 return set;
1373 static int
1374 set_collect_i(st_data_t key, st_data_t data)
1376 set_insert_wb((VALUE)data, rb_yield((VALUE)key), NULL);
1377 return ST_CONTINUE;
1381 * call-seq:
1382 * collect! { |o| ... } -> self
1383 * collect! -> enumerator
1385 * Replaces the elements with ones returned by +collect+.
1386 * Returns an enumerator if no block is given.
1388 static VALUE
1389 set_i_collect(VALUE set)
1391 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
1392 rb_check_frozen(set);
1394 VALUE new_set = set_s_alloc(rb_obj_class(set));
1395 set_iter(set, set_collect_i, (st_data_t)new_set);
1396 set_i_initialize_copy(set, new_set);
1398 return set;
1401 static int
1402 set_keep_if_i(st_data_t key, st_data_t into)
1404 if (!RTEST(rb_yield((VALUE)key))) {
1405 set_delete((set_table *)into, &key);
1407 return ST_CONTINUE;
1411 * call-seq:
1412 * keep_if { |o| ... } -> self
1413 * keep_if -> enumerator
1415 * Deletes every element of the set for which block evaluates to false, and
1416 * returns self. Returns an enumerator if no block is given.
1418 static VALUE
1419 set_i_keep_if(VALUE set)
1421 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
1422 rb_check_frozen(set);
1424 set_iter(set, set_keep_if_i, (st_data_t)RSET_TABLE(set));
1426 return set;
1430 * call-seq:
1431 * select! { |o| ... } -> self
1432 * select! -> enumerator
1434 * Equivalent to Set#keep_if, but returns nil if no changes were made.
1435 * Returns an enumerator if no block is given.
1437 static VALUE
1438 set_i_select(VALUE set)
1440 RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
1441 rb_check_frozen(set);
1443 set_table *table = RSET_TABLE(set);
1444 size_t n = set_table_size(table);
1445 set_iter(set, set_keep_if_i, (st_data_t)table);
1447 return (n == set_table_size(table)) ? Qnil : set;
1451 * call-seq:
1452 * replace(enum) -> self
1454 * Replaces the contents of the set with the contents of the given
1455 * enumerable object and returns self.
1457 * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}>
1458 * set.replace([1, 2]) #=> #<Set: {1, 2}>
1459 * set #=> #<Set: {1, 2}>
1461 static VALUE
1462 set_i_replace(VALUE set, VALUE other)
1464 rb_check_frozen(set);
1466 if (rb_obj_is_kind_of(other, rb_cSet)) {
1467 set_i_initialize_copy(set, other);
1469 else {
1470 if (set_iterating_p(set)) {
1471 rb_raise(rb_eRuntimeError, "cannot replace set during iteration");
1474 // make sure enum is enumerable before calling clear
1475 enum_method_id(other);
1477 set_clear(RSET_TABLE(set));
1478 set_merge_enum_into(set, other);
1481 return set;
1485 * call-seq:
1486 * reset -> self
1488 * Resets the internal state after modification to existing elements
1489 * and returns self. Elements will be reindexed and deduplicated.
1491 static VALUE
1492 set_i_reset(VALUE set)
1494 if (set_iterating_p(set)) {
1495 rb_raise(rb_eRuntimeError, "reset during iteration");
1498 return set_reset_table_with_type(set, RSET_TABLE(set)->type);
1501 static void set_flatten_merge(VALUE set, VALUE from, VALUE seen);
1503 static int
1504 set_flatten_merge_i(st_data_t item, st_data_t arg)
1506 VALUE *args = (VALUE *)arg;
1507 VALUE set = args[0];
1508 if (rb_obj_is_kind_of(item, rb_cSet)) {
1509 VALUE e_id = rb_obj_id(item);
1510 VALUE hash = args[2];
1511 switch(rb_hash_aref(hash, e_id)) {
1512 case Qfalse:
1513 return ST_CONTINUE;
1514 case Qtrue:
1515 rb_raise(rb_eArgError, "tried to flatten recursive Set");
1516 default:
1517 break;
1520 rb_hash_aset(hash, e_id, Qtrue);
1521 set_flatten_merge(set, item, hash);
1522 rb_hash_aset(hash, e_id, Qfalse);
1524 else {
1525 set_i_add(set, item);
1527 return ST_CONTINUE;
1530 static void
1531 set_flatten_merge(VALUE set, VALUE from, VALUE hash)
1533 VALUE args[3] = {set, from, hash};
1534 set_iter(from, set_flatten_merge_i, (st_data_t)args);
1538 * call-seq:
1539 * flatten -> set
1541 * Returns a new set that is a copy of the set, flattening each
1542 * containing set recursively.
1544 static VALUE
1545 set_i_flatten(VALUE set)
1547 VALUE new_set = set_s_alloc(rb_obj_class(set));
1548 set_flatten_merge(new_set, set, rb_hash_new());
1549 return new_set;
1552 static int
1553 set_contains_set_i(st_data_t item, st_data_t arg)
1555 if (rb_obj_is_kind_of(item, rb_cSet)) {
1556 *(bool *)arg = true;
1557 return ST_STOP;
1559 return ST_CONTINUE;
1563 * call-seq:
1564 * flatten! -> self
1566 * Equivalent to Set#flatten, but replaces the receiver with the
1567 * result in place. Returns nil if no modifications were made.
1569 static VALUE
1570 set_i_flatten_bang(VALUE set)
1572 bool contains_set = false;
1573 set_iter(set, set_contains_set_i, (st_data_t)&contains_set);
1574 if (!contains_set) return Qnil;
1575 rb_check_frozen(set);
1576 return set_i_replace(set, set_i_flatten(set));
1579 struct set_subset_data {
1580 set_table *table;
1581 VALUE result;
1584 static int
1585 set_le_i(st_data_t key, st_data_t arg)
1587 struct set_subset_data *data = (struct set_subset_data *)arg;
1588 if (set_lookup(data->table, key)) return ST_CONTINUE;
1589 data->result = Qfalse;
1590 return ST_STOP;
1593 static VALUE
1594 set_le(VALUE set, VALUE other)
1596 struct set_subset_data data = {
1597 .table = RSET_TABLE(other),
1598 .result = Qtrue
1600 set_iter(set, set_le_i, (st_data_t)&data);
1601 return data.result;
1605 * call-seq:
1606 * proper_subset?(set) -> true or false
1608 * Returns true if the set is a proper subset of the given set.
1610 static VALUE
1611 set_i_proper_subset(VALUE set, VALUE other)
1613 check_set(other);
1614 if (RSET_SIZE(set) >= RSET_SIZE(other)) return Qfalse;
1615 return set_le(set, other);
1619 * call-seq:
1620 * subset?(set) -> true or false
1622 * Returns true if the set is a subset of the given set.
1624 static VALUE
1625 set_i_subset(VALUE set, VALUE other)
1627 check_set(other);
1628 if (RSET_SIZE(set) > RSET_SIZE(other)) return Qfalse;
1629 return set_le(set, other);
1633 * call-seq:
1634 * proper_superset?(set) -> true or false
1636 * Returns true if the set is a proper superset of the given set.
1638 static VALUE
1639 set_i_proper_superset(VALUE set, VALUE other)
1641 check_set(other);
1642 if (RSET_SIZE(set) <= RSET_SIZE(other)) return Qfalse;
1643 return set_le(other, set);
1647 * call-seq:
1648 * superset?(set) -> true or false
1650 * Returns true if the set is a superset of the given set.
1652 static VALUE
1653 set_i_superset(VALUE set, VALUE other)
1655 check_set(other);
1656 if (RSET_SIZE(set) < RSET_SIZE(other)) return Qfalse;
1657 return set_le(other, set);
1660 static int
1661 set_intersect_i(st_data_t key, st_data_t arg)
1663 VALUE *args = (VALUE *)arg;
1664 if (set_lookup((set_table *)args[0], key)) {
1665 args[1] = Qtrue;
1666 return ST_STOP;
1668 return ST_CONTINUE;
1672 * call-seq:
1673 * intersect?(set) -> true or false
1675 * Returns true if the set and the given enumerable have at least one
1676 * element in common.
1678 * Set[1, 2, 3].intersect? Set[4, 5] #=> false
1679 * Set[1, 2, 3].intersect? Set[3, 4] #=> true
1680 * Set[1, 2, 3].intersect? 4..5 #=> false
1681 * Set[1, 2, 3].intersect? [3, 4] #=> true
1683 static VALUE
1684 set_i_intersect(VALUE set, VALUE other)
1686 if (rb_obj_is_kind_of(other, rb_cSet)) {
1687 size_t set_size = RSET_SIZE(set);
1688 size_t other_size = RSET_SIZE(other);
1689 VALUE args[2];
1690 args[1] = Qfalse;
1691 VALUE iter_arg;
1693 if (set_size < other_size) {
1694 iter_arg = set;
1695 args[0] = (VALUE)RSET_TABLE(other);
1697 else {
1698 iter_arg = other;
1699 args[0] = (VALUE)RSET_TABLE(set);
1701 set_iter(iter_arg, set_intersect_i, (st_data_t)args);
1702 return args[1];
1704 else if (rb_obj_is_kind_of(other, rb_mEnumerable)) {
1705 return rb_funcall(other, id_any_p, 1, set);
1707 else {
1708 rb_raise(rb_eArgError, "value must be enumerable");
1713 * call-seq:
1714 * disjoint?(set) -> true or false
1716 * Returns true if the set and the given enumerable have no
1717 * element in common. This method is the opposite of +intersect?+.
1719 * Set[1, 2, 3].disjoint? Set[3, 4] #=> false
1720 * Set[1, 2, 3].disjoint? Set[4, 5] #=> true
1721 * Set[1, 2, 3].disjoint? [3, 4] #=> false
1722 * Set[1, 2, 3].disjoint? 4..5 #=> true
1724 static VALUE
1725 set_i_disjoint(VALUE set, VALUE other)
1727 return RBOOL(!RTEST(set_i_intersect(set, other)));
1731 * call-seq:
1732 * set <=> other -> -1, 0, 1, or nil
1734 * Returns 0 if the set are equal, -1 / 1 if the set is a
1735 * proper subset / superset of the given set, or or nil if
1736 * they both have unique elements.
1738 static VALUE
1739 set_i_compare(VALUE set, VALUE other)
1741 if (rb_obj_is_kind_of(other, rb_cSet)) {
1742 size_t set_size = RSET_SIZE(set);
1743 size_t other_size = RSET_SIZE(other);
1745 if (set_size < other_size) {
1746 if (set_le(set, other) == Qtrue) {
1747 return INT2NUM(-1);
1750 else if (set_size > other_size) {
1751 if (set_le(other, set) == Qtrue) {
1752 return INT2NUM(1);
1755 else if (set_le(set, other) == Qtrue) {
1756 return INT2NUM(0);
1760 return Qnil;
1763 struct set_equal_data {
1764 VALUE result;
1765 VALUE set;
1768 static int
1769 set_eql_i(st_data_t item, st_data_t arg)
1771 struct set_equal_data *data = (struct set_equal_data *)arg;
1773 if (!set_lookup(RSET_TABLE(data->set), item)) {
1774 data->result = Qfalse;
1775 return ST_STOP;
1777 return ST_CONTINUE;
1780 static VALUE
1781 set_recursive_eql(VALUE set, VALUE dt, int recur)
1783 if (recur) return Qtrue;
1784 struct set_equal_data *data = (struct set_equal_data*)dt;
1785 data->result = Qtrue;
1786 set_iter(set, set_eql_i, dt);
1787 return data->result;
1791 * call-seq:
1792 * set == other -> true or false
1794 * Returns true if two sets are equal.
1796 static VALUE
1797 set_i_eq(VALUE set, VALUE other)
1799 if (!rb_obj_is_kind_of(other, rb_cSet)) return Qfalse;
1800 if (set == other) return Qtrue;
1802 set_table *stable = RSET_TABLE(set);
1803 set_table *otable = RSET_TABLE(other);
1804 size_t ssize = set_table_size(stable);
1805 size_t osize = set_table_size(otable);
1807 if (ssize != osize) return Qfalse;
1808 if (ssize == 0 && osize == 0) return Qtrue;
1809 if (stable->type != otable->type) return Qfalse;
1811 struct set_equal_data data;
1812 data.set = other;
1813 return rb_exec_recursive_paired(set_recursive_eql, set, other, (VALUE)&data);
1816 static int
1817 set_hash_i(st_data_t item, st_data_t(arg))
1819 st_index_t *hval = (st_index_t *)arg;
1820 st_index_t ival = rb_hash(item);
1821 *hval ^= rb_st_hash(&ival, sizeof(st_index_t), 0);
1822 return ST_CONTINUE;
1826 * call-seq:
1827 * hash -> integer
1829 * Returns hash code for set.
1831 static VALUE
1832 set_i_hash(VALUE set)
1834 st_index_t size = RSET_SIZE(set);
1835 st_index_t hval = rb_st_hash_start(size);
1836 hval = rb_hash_uint(hval, (st_index_t)set_i_hash);
1837 if (size) {
1838 set_iter(set, set_hash_i, (VALUE)&hval);
1840 hval = rb_st_hash_end(hval);
1841 return ST2FIX(hval);
1844 /* :nodoc: */
1845 static int
1846 set_to_hash_i(st_data_t key, st_data_t arg)
1848 rb_hash_aset((VALUE)arg, (VALUE)key, Qtrue);
1849 return ST_CONTINUE;
1852 static VALUE
1853 set_i_to_h(VALUE set)
1855 st_index_t size = RSET_SIZE(set);
1856 VALUE hash;
1857 if (RSET_COMPARE_BY_IDENTITY(set)) {
1858 hash = rb_ident_hash_new_with_size(size);
1860 else {
1861 hash = rb_hash_new_with_size(size);
1863 rb_hash_set_default(hash, Qfalse);
1865 if (size == 0) return hash;
1867 set_iter(set, set_to_hash_i, (st_data_t)hash);
1868 return hash;
1871 static VALUE
1872 compat_dumper(VALUE set)
1874 VALUE dumper = rb_class_new_instance(0, 0, rb_cObject);
1875 rb_ivar_set(dumper, id_i_hash, set_i_to_h(set));
1876 return dumper;
1879 static int
1880 set_i_from_hash_i(st_data_t key, st_data_t val, st_data_t set)
1882 if ((VALUE)val != Qtrue) {
1883 rb_raise(rb_eRuntimeError, "expect true as Set value: %"PRIsVALUE, rb_obj_class((VALUE)val));
1885 set_i_add((VALUE)set, (VALUE)key);
1886 return ST_CONTINUE;
1889 static VALUE
1890 set_i_from_hash(VALUE set, VALUE hash)
1892 Check_Type(hash, T_HASH);
1893 if (rb_hash_compare_by_id_p(hash)) set_i_compare_by_identity(set);
1894 rb_hash_stlike_foreach(hash, set_i_from_hash_i, (st_data_t)set);
1895 return set;
1898 static VALUE
1899 compat_loader(VALUE self, VALUE a)
1901 return set_i_from_hash(self, rb_ivar_get(a, id_i_hash));
1905 * Document-class: Set
1907 * Copyright (c) 2002-2024 Akinori MUSHA <knu@iDaemons.org>
1909 * Documentation by Akinori MUSHA and Gavin Sinclair.
1911 * All rights reserved. You can redistribute and/or modify it under the same
1912 * terms as Ruby.
1914 * The Set class implements a collection of unordered values with no
1915 * duplicates. It is a hybrid of Array's intuitive inter-operation
1916 * facilities and Hash's fast lookup.
1918 * Set is easy to use with Enumerable objects (implementing `each`).
1919 * Most of the initializer methods and binary operators accept generic
1920 * Enumerable objects besides sets and arrays. An Enumerable object
1921 * can be converted to Set using the `to_set` method.
1923 * Set uses a data structure similar to Hash for storage, except that
1924 * it only has keys and no values.
1926 * * Equality of elements is determined according to Object#eql? and
1927 * Object#hash. Use Set#compare_by_identity to make a set compare
1928 * its elements by their identity.
1929 * * Set assumes that the identity of each element does not change
1930 * while it is stored. Modifying an element of a set will render the
1931 * set to an unreliable state.
1932 * * When a string is to be stored, a frozen copy of the string is
1933 * stored instead unless the original string is already frozen.
1935 * == Comparison
1937 * The comparison operators <tt><</tt>, <tt>></tt>, <tt><=</tt>, and
1938 * <tt>>=</tt> are implemented as shorthand for the
1939 * {proper_,}{subset?,superset?} methods. The <tt><=></tt>
1940 * operator reflects this order, or returns +nil+ for sets that both
1941 * have distinct elements (<tt>{x, y}</tt> vs. <tt>{x, z}</tt> for example).
1943 * == Example
1945 * s1 = Set[1, 2] #=> #<Set: {1, 2}>
1946 * s2 = [1, 2].to_set #=> #<Set: {1, 2}>
1947 * s1 == s2 #=> true
1948 * s1.add("foo") #=> #<Set: {1, 2, "foo"}>
1949 * s1.merge([2, 6]) #=> #<Set: {1, 2, "foo", 6}>
1950 * s1.subset?(s2) #=> false
1951 * s2.subset?(s1) #=> true
1953 * == Contact
1955 * - Akinori MUSHA <knu@iDaemons.org> (current maintainer)
1957 * == What's Here
1959 * First, what's elsewhere. \Class \Set:
1961 * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here].
1962 * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here],
1963 * which provides dozens of additional methods.
1965 * In particular, class \Set does not have many methods of its own
1966 * for fetching or for iterating.
1967 * Instead, it relies on those in \Enumerable.
1969 * Here, class \Set provides methods that are useful for:
1971 * - {Creating an Array}[rdoc-ref:Array@Methods+for+Creating+an+Array]
1972 * - {Creating a Set}[rdoc-ref:Array@Methods+for+Creating+a+Set]
1973 * - {Set Operations}[rdoc-ref:Array@Methods+for+Set+Operations]
1974 * - {Comparing}[rdoc-ref:Array@Methods+for+Comparing]
1975 * - {Querying}[rdoc-ref:Array@Methods+for+Querying]
1976 * - {Assigning}[rdoc-ref:Array@Methods+for+Assigning]
1977 * - {Deleting}[rdoc-ref:Array@Methods+for+Deleting]
1978 * - {Converting}[rdoc-ref:Array@Methods+for+Converting]
1979 * - {Iterating}[rdoc-ref:Array@Methods+for+Iterating]
1980 * - {And more....}[rdoc-ref:Array@Other+Methods]
1982 * === Methods for Creating a \Set
1984 * - ::[]:
1985 * Returns a new set containing the given objects.
1986 * - ::new:
1987 * Returns a new set containing either the given objects
1988 * (if no block given) or the return values from the called block
1989 * (if a block given).
1991 * === Methods for \Set Operations
1993 * - #| (aliased as #union and #+):
1994 * Returns a new set containing all elements from +self+
1995 * and all elements from a given enumerable (no duplicates).
1996 * - #& (aliased as #intersection):
1997 * Returns a new set containing all elements common to +self+
1998 * and a given enumerable.
1999 * - #- (aliased as #difference):
2000 * Returns a copy of +self+ with all elements
2001 * in a given enumerable removed.
2002 * - #^: Returns a new set containing all elements from +self+
2003 * and a given enumerable except those common to both.
2005 * === Methods for Comparing
2007 * - #<=>: Returns -1, 0, or 1 as +self+ is less than, equal to,
2008 * or greater than a given object.
2009 * - #==: Returns whether +self+ and a given enumerable are equal,
2010 * as determined by Object#eql?.
2011 * - #compare_by_identity?:
2012 * Returns whether the set considers only identity
2013 * when comparing elements.
2015 * === Methods for Querying
2017 * - #length (aliased as #size):
2018 * Returns the count of elements.
2019 * - #empty?:
2020 * Returns whether the set has no elements.
2021 * - #include? (aliased as #member? and #===):
2022 * Returns whether a given object is an element in the set.
2023 * - #subset? (aliased as #<=):
2024 * Returns whether a given object is a subset of the set.
2025 * - #proper_subset? (aliased as #<):
2026 * Returns whether a given enumerable is a proper subset of the set.
2027 * - #superset? (aliased as #>=):
2028 * Returns whether a given enumerable is a superset of the set.
2029 * - #proper_superset? (aliased as #>):
2030 * Returns whether a given enumerable is a proper superset of the set.
2031 * - #disjoint?:
2032 * Returns +true+ if the set and a given enumerable
2033 * have no common elements, +false+ otherwise.
2034 * - #intersect?:
2035 * Returns +true+ if the set and a given enumerable:
2036 * have any common elements, +false+ otherwise.
2037 * - #compare_by_identity?:
2038 * Returns whether the set considers only identity
2039 * when comparing elements.
2041 * === Methods for Assigning
2043 * - #add (aliased as #<<):
2044 * Adds a given object to the set; returns +self+.
2045 * - #add?:
2046 * If the given object is not an element in the set,
2047 * adds it and returns +self+; otherwise, returns +nil+.
2048 * - #merge:
2049 * Merges the elements of each given enumerable object to the set; returns +self+.
2050 * - #replace:
2051 * Replaces the contents of the set with the contents
2052 * of a given enumerable.
2054 * === Methods for Deleting
2056 * - #clear:
2057 * Removes all elements in the set; returns +self+.
2058 * - #delete:
2059 * Removes a given object from the set; returns +self+.
2060 * - #delete?:
2061 * If the given object is an element in the set,
2062 * removes it and returns +self+; otherwise, returns +nil+.
2063 * - #subtract:
2064 * Removes each given object from the set; returns +self+.
2065 * - #delete_if - Removes elements specified by a given block.
2066 * - #select! (aliased as #filter!):
2067 * Removes elements not specified by a given block.
2068 * - #keep_if:
2069 * Removes elements not specified by a given block.
2070 * - #reject!
2071 * Removes elements specified by a given block.
2073 * === Methods for Converting
2075 * - #classify:
2076 * Returns a hash that classifies the elements,
2077 * as determined by the given block.
2078 * - #collect! (aliased as #map!):
2079 * Replaces each element with a block return-value.
2080 * - #divide:
2081 * Returns a hash that classifies the elements,
2082 * as determined by the given block;
2083 * differs from #classify in that the block may accept
2084 * either one or two arguments.
2085 * - #flatten:
2086 * Returns a new set that is a recursive flattening of +self+.
2087 * - #flatten!:
2088 * Replaces each nested set in +self+ with the elements from that set.
2089 * - #inspect (aliased as #to_s):
2090 * Returns a string displaying the elements.
2091 * - #join:
2092 * Returns a string containing all elements, converted to strings
2093 * as needed, and joined by the given record separator.
2094 * - #to_a:
2095 * Returns an array containing all set elements.
2096 * - #to_set:
2097 * Returns +self+ if given no arguments and no block;
2098 * with a block given, returns a new set consisting of block
2099 * return values.
2101 * === Methods for Iterating
2103 * - #each:
2104 * Calls the block with each successive element; returns +self+.
2106 * === Other Methods
2108 * - #reset:
2109 * Resets the internal state; useful if an object
2110 * has been modified while an element in the set.
2113 void
2114 Init_Set(void)
2116 rb_cSet = rb_define_class("Set", rb_cObject);
2117 rb_include_module(rb_cSet, rb_mEnumerable);
2119 id_each_entry = rb_intern_const("each_entry");
2120 id_any_p = rb_intern_const("any?");
2121 id_new = rb_intern_const("new");
2122 id_i_hash = rb_intern_const("@hash");
2123 id_set_iter_lev = rb_make_internal_id();
2125 rb_define_alloc_func(rb_cSet, set_s_alloc);
2126 rb_define_singleton_method(rb_cSet, "[]", set_s_create, -1);
2128 rb_define_method(rb_cSet, "initialize", set_i_initialize, -1);
2129 rb_define_method(rb_cSet, "initialize_copy", set_i_initialize_copy, 1);
2131 rb_define_method(rb_cSet, "&", set_i_intersection, 1);
2132 rb_define_alias(rb_cSet, "intersection", "&");
2133 rb_define_method(rb_cSet, "-", set_i_difference, 1);
2134 rb_define_alias(rb_cSet, "difference", "-");
2135 rb_define_method(rb_cSet, "^", set_i_xor, 1);
2136 rb_define_method(rb_cSet, "|", set_i_union, 1);
2137 rb_define_alias(rb_cSet, "+", "|");
2138 rb_define_alias(rb_cSet, "union", "|");
2139 rb_define_method(rb_cSet, "<=>", set_i_compare, 1);
2140 rb_define_method(rb_cSet, "==", set_i_eq, 1);
2141 rb_define_alias(rb_cSet, "eql?", "==");
2142 rb_define_method(rb_cSet, "add", set_i_add, 1);
2143 rb_define_alias(rb_cSet, "<<", "add");
2144 rb_define_method(rb_cSet, "add?", set_i_add_p, 1);
2145 rb_define_method(rb_cSet, "classify", set_i_classify, 0);
2146 rb_define_method(rb_cSet, "clear", set_i_clear, 0);
2147 rb_define_method(rb_cSet, "collect!", set_i_collect, 0);
2148 rb_define_alias(rb_cSet, "map!", "collect!");
2149 rb_define_method(rb_cSet, "compare_by_identity", set_i_compare_by_identity, 0);
2150 rb_define_method(rb_cSet, "compare_by_identity?", set_i_compare_by_identity_p, 0);
2151 rb_define_method(rb_cSet, "delete", set_i_delete, 1);
2152 rb_define_method(rb_cSet, "delete?", set_i_delete_p, 1);
2153 rb_define_method(rb_cSet, "delete_if", set_i_delete_if, 0);
2154 rb_define_method(rb_cSet, "disjoint?", set_i_disjoint, 1);
2155 rb_define_method(rb_cSet, "divide", set_i_divide, 0);
2156 rb_define_method(rb_cSet, "each", set_i_each, 0);
2157 rb_define_method(rb_cSet, "empty?", set_i_empty, 0);
2158 rb_define_method(rb_cSet, "flatten", set_i_flatten, 0);
2159 rb_define_method(rb_cSet, "flatten!", set_i_flatten_bang, 0);
2160 rb_define_method(rb_cSet, "hash", set_i_hash, 0);
2161 rb_define_method(rb_cSet, "include?", set_i_include, 1);
2162 rb_define_alias(rb_cSet, "member?", "include?");
2163 rb_define_alias(rb_cSet, "===", "include?");
2164 rb_define_method(rb_cSet, "inspect", set_i_inspect, 0);
2165 rb_define_alias(rb_cSet, "to_s", "inspect");
2166 rb_define_method(rb_cSet, "intersect?", set_i_intersect, 1);
2167 rb_define_method(rb_cSet, "join", set_i_join, -1);
2168 rb_define_method(rb_cSet, "keep_if", set_i_keep_if, 0);
2169 rb_define_method(rb_cSet, "merge", set_i_merge, -1);
2170 rb_define_method(rb_cSet, "proper_subset?", set_i_proper_subset, 1);
2171 rb_define_alias(rb_cSet, "<", "proper_subset?");
2172 rb_define_method(rb_cSet, "proper_superset?", set_i_proper_superset, 1);
2173 rb_define_alias(rb_cSet, ">", "proper_superset?");
2174 rb_define_method(rb_cSet, "reject!", set_i_reject, 0);
2175 rb_define_method(rb_cSet, "replace", set_i_replace, 1);
2176 rb_define_method(rb_cSet, "reset", set_i_reset, 0);
2177 rb_define_method(rb_cSet, "size", set_i_size, 0);
2178 rb_define_alias(rb_cSet, "length", "size");
2179 rb_define_method(rb_cSet, "select!", set_i_select, 0);
2180 rb_define_alias(rb_cSet, "filter!", "select!");
2181 rb_define_method(rb_cSet, "subset?", set_i_subset, 1);
2182 rb_define_alias(rb_cSet, "<=", "subset?");
2183 rb_define_method(rb_cSet, "subtract", set_i_subtract, 1);
2184 rb_define_method(rb_cSet, "superset?", set_i_superset, 1);
2185 rb_define_alias(rb_cSet, ">=", "superset?");
2186 rb_define_method(rb_cSet, "to_a", set_i_to_a, 0);
2187 rb_define_method(rb_cSet, "to_h", set_i_to_h, 0);
2188 rb_define_method(rb_cSet, "to_set", set_i_to_set, -1);
2190 /* :nodoc: */
2191 VALUE compat = rb_define_class_under(rb_cSet, "compatible", rb_cObject);
2192 rb_marshal_define_compat(rb_cSet, compat, compat_dumper, compat_loader);
2194 rb_provide("set.rb");