diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..76fdc30666 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# / +*.bak +*.dylib +*.inc +*.o +*.orig +*.rej +*.sav +*.swp +*.d +*~ +.DS_Store +.ccmalloc +.svn +/.git +cscope.out +mruby.exe +y.tab.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..fdf54c21df --- /dev/null +++ b/Makefile @@ -0,0 +1,120 @@ +# makefile discription. +# basic build file for Rite-VM(mruby) +# 11.Apr.2011 coded by Kenji Yoshimoto. +# 17.Jan.2012 coded by Hiroshi Mimaki. + +# project-specific macros +# extension of the executable-file is modifiable(.exe .out ...) +TARGET := bin/mrubysample +RITEVM := lib/ritevm +MRUBY := tools/mruby/mruby +ifeq ($(OS),Windows_NT) +EXE := $(TARGET).exe +LIB := $(RITEVM).lib +MRB := $(MRUBY).exe +else +EXE := $(TARGET) +LIB := $(RITEVM).a +MRB := $(MRUBY) +endif +MSRC := src/minimain.c +YSRC := src/parse.y +YC := src/y.tab.c +EXCEPT1 := $(YC) $(MSRC) +OBJM := $(patsubst %.c,%.o,$(MSRC)) +OBJY := $(patsubst %.c,%.o,$(YC)) +OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard src/*.c))) +#OBJ2 := $(patsubst %.c,%.o,$(wildcard ext/regex/*.c)) +#OBJ3 := $(patsubst %.c,%.o,$(wildcard ext/enc/*.c)) +OBJS := $(OBJ1) $(OBJ2) $(OBJ3) +# mruby libraries +EXTC := mrblib/mrblib.c +EXTRB := $(wildcard mrblib/*.rb) +EXT0 := $(patsubst %.c,%.o,src/$(EXTC)) +# ext libraries +EXTS := $(EXT0) + +# libraries, includes +LIBS = $(LIB) -lm +INCLUDES = -I./src -I./include + +# library for iOS +IOSLIB := $(RITEVM)-ios.a +IOSSIMLIB := $(RITEVM)-iossim.a +IOSDEVLIB := $(RITEVM)-iosdev.a +IOSSIMCC := xcrun -sdk iphoneos llvm-gcc-4.2 -arch i386 -isysroot "/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator5.0.sdk/" +IOSDEVCC := xcrun -sdk iphoneos llvm-gcc-4.2 -arch armv7 -isysroot "/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS5.0.sdk/" + +# compiler, linker (gcc) +CC = gcc +LL = gcc +YACC = bison +DEBUG_MODE = 1 +ifeq ($(DEBUG_MODE),1) +CFLAGS = -g +else +CFLAGS = -O3 +endif +ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS) +MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)" + +############################## +# generic build targets, rules + +.PHONY : all +all : $(LIB) $(MRB) $(EXE) + @echo "make: built targets of `pwd`" + +############################## +# make library for iOS +.PHONY : ios +ios : $(IOSLIB) + +$(IOSLIB) : $(IOSSIMLIB) $(IOSDEVLIB) + lipo -arch i386 $(IOSSIMLIB) -arch armv7 $(IOSDEVLIB) -create -output $(IOSLIB) + +$(IOSSIMLIB) : + $(MAKE) clean -C src $(MAKE_FLAGS) + $(MAKE) -C src $(MAKE_FLAGS) CC="$(IOSSIMCC)" LL="$(IOSSIMCC)" + cp $(LIB) $(IOSSIMLIB) + +$(IOSDEVLIB) : + $(MAKE) clean -C src $(MAKE_FLAGS) + $(MAKE) -C src $(MAKE_FLAGS) CC="$(IOSDEVCC)" LL="$(IOSDEVCC)" + cp $(LIB) $(IOSDEVLIB) + +# executable constructed using linker from object files +$(EXE) : $(OBJM) $(LIB) + $(LL) -o $@ $(OBJM) $(LIBS) + +-include $(OBJS:.o=.d) + +# src compile +$(LIB) : $(EXTS) $(OBJS) $(OBJY) + $(MAKE) -C src $(MAKE_FLAGS) + +# mruby interpreter compile +$(MRB) : $(EXTS) $(OBJS) $(OBJY) + $(MAKE) -C tools/mruby $(MAKE_FLAGS) + +# objects compiled from source +$(OBJS) : + $(MAKE) -C src $(MAKE_FLAGS) && $(MAKE) -C tools/mruby $(MAKE_FLAGS) + +# extend libraries complile +$(EXTS) : $(EXTRB) + $(MAKE) -C mrblib $(MAKE_FLAGS) + +# test module compile +$(OBJM) : $(MSRC) + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(MSRC) -o $(OBJM) + +# clean up +.PHONY : clean +clean : + $(MAKE) clean -C src $(MAKE_FLAGS) + $(MAKE) clean -C tools/mruby $(MAKE_FLAGS) + -rm -f $(EXE) $(OBJM) + -rm -f $(OBJM:.o=.d) + -rm -f $(IOSLIB) $(IOSSIMLIB) $(IOSDEVLIB) + @echo "make: removing targets, objects and depend files of `pwd`" diff --git a/README.md b/README.md deleted file mode 100644 index bc094bb1b2..0000000000 --- a/README.md +++ /dev/null @@ -1,4 +0,0 @@ -mruby -===== - -Lightweight Ruby \ No newline at end of file diff --git a/Todo.txt b/Todo.txt new file mode 100644 index 0000000000..a143583413 --- /dev/null +++ b/Todo.txt @@ -0,0 +1,14 @@ +やること(まだできてないこと) / not yet complete + +* ヒアドキュメント / here document +* 特殊変数 ($1,$2..) / special variables +* super in aliased methods +* BEGIN/END (対応しないんだっけ?) +* const_missing +* respond_to_missing + +改善すること(できているが直すこと) + +* Hash (サイズを減らす。khashを使うか、順序を保存するか) +* stringEx (encoding削除、CODERANGE削除、UTF-8 or ASCII以外削除) +* 気づいたら書き加える diff --git a/bin/.gitkeep b/bin/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/doc/.gitkeep b/doc/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/.gitkeep b/ext/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/include/mrbconf.h b/include/mrbconf.h new file mode 100644 index 0000000000..d4802a5e7e --- /dev/null +++ b/include/mrbconf.h @@ -0,0 +1,42 @@ +#ifndef MRUBYCONF_H +#define MRUBYCONF_H + +#include +typedef double mrb_float; +typedef int32_t mrb_int; +typedef intptr_t mrb_sym; + +#define readint(p,base) strtol((p),NULL,(base)) +#define readfloat(p) strtod((p),NULL) + +#undef INCLUDE_ENCODING /* not use encoding classes (ascii only) */ +#define INCLUDE_ENCODING /* use UTF-8 encoding classes */ + +#undef INCLUDE_REGEXP /* not use regular expression classes */ +#define INCLUDE_REGEXP /* use regular expression classes */ + +#ifdef INCLUDE_REGEXP +# define INCLUDE_ENCODING /* Regexp depends Encoding */ +#endif + +#undef HAVE_UNISTD_H /* WINDOWS */ +#define HAVE_UNISTD_H /* LINUX */ + +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF___INT64 0 +#define SIZEOF_VOIDP 4 +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 + +#ifndef FALSE +# define FALSE 0 +#endif + +#ifndef TRUE +# define TRUE 1 +#endif + +#endif /* MRUBYCONF_H */ diff --git a/include/mruby.h b/include/mruby.h new file mode 100644 index 0000000000..506158d3ce --- /dev/null +++ b/include/mruby.h @@ -0,0 +1,623 @@ +#ifndef MRUBY_H +#define MRUBY_H + +#include +#include "mrbconf.h" + +enum mrb_vtype { + MRB_TT_FALSE = 0, /* 0 */ + MRB_TT_FREE, /* 1 */ + MRB_TT_TRUE, /* 2 */ + MRB_TT_FIXNUM, /* 3 */ + MRB_TT_SYMBOL, /* 4 */ + MRB_TT_UNDEF, /* 5 */ + MRB_TT_FLOAT, /* 6 */ + MRB_TT_OBJECT, /* 7 */ + MRB_TT_CLASS, /* 8 */ + MRB_TT_MODULE, /* 9 */ + MRB_TT_ICLASS, /* 10 */ + MRB_TT_SCLASS, /* 11 */ + MRB_TT_PROC, /* 12 */ + MRB_TT_ARRAY, /* 13 */ + MRB_TT_HASH, /* 14 */ + MRB_TT_STRING, /* 15 */ + MRB_TT_RANGE, /* 16 */ + MRB_TT_REGEX, /* 17 */ + MRB_TT_STRUCT, /* 18 */ + MRB_TT_EXCEPTION, /* 19 */ + MRB_TT_MATCH, /* 20 */ + MRB_TT_FILE, /* 21 */ + MRB_TT_ENV, /* 22 */ + MRB_TT_DATA, /* 23 */ + MRB_TT_THREAD, /* 24 */ + MRB_TT_THREADGRP, /* 25 */ + MRB_TT_MAXDEFINE /* 26 */ +}; + +typedef struct mrb_value { + union { + mrb_float f; + void *p; + mrb_int i; + mrb_sym sym; + } value; + enum mrb_vtype tt:8; +} mrb_value; + +#define mrb_type(o) (o).tt +#define mrb_nil_p(o) ((o).tt == MRB_TT_FALSE && !(o).value.i) +#define mrb_test(o) ((o).tt != MRB_TT_FALSE) +#define mrb_fixnum(o) (o).value.i +#define mrb_float(o) (o).value.f +#define mrb_symbol(o) (o).value.sym +#define mrb_object(o) (o).value.p +#define FIXNUM_P(o) ((o).tt == MRB_TT_FIXNUM) +#define UNDEF_P(o) ((o).tt == MRB_TT_UNDEF) + +#include "mruby/object.h" + +#define IMMEDIATE_P(x) ((mrb_type(x) >= MRB_TT_FALSE) && (mrb_type(x) <= MRB_TT_FLOAT)) +#define SPECIAL_CONST_P(x) IMMEDIATE_P(x) +#define SYMBOL_P(o) (mrb_type(o) == MRB_TT_SYMBOL) +#define RTEST(o) mrb_test(o) + +#define FL_ABLE(x) (!SPECIAL_CONST_P(x)) +#define FL_TEST(x,f) (FL_ABLE(x)?(RBASIC(x)->flags&(f)):0) +#define FL_ANY(x,f) FL_TEST(x,f) +#define FL_ALL(x,f) (FL_TEST(x,f) == (f)) +#define FL_SET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags |= (f);} while (0) +#define FL_UNSET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags &= ~(f);} while (0) + +static inline mrb_int +mrb_special_const_p(mrb_value obj) +{ + if (SPECIAL_CONST_P(obj)) return 1; + return 0; +} +static inline mrb_value +mrb_fixnum_value(mrb_int i) +{ + mrb_value v; + + v.tt = MRB_TT_FIXNUM; + v.value.i = i; + return v; +} + +static inline mrb_value +mrb_float_value(mrb_float f) +{ + mrb_value v; + + v.tt = MRB_TT_FLOAT; + v.value.f = f; + return v; +} + +static inline mrb_value +mrb_symbol_value(mrb_sym i) +{ + mrb_value v; + + v.tt = MRB_TT_SYMBOL; + v.value.sym = i; + return v; +} + +static inline mrb_value +mrb_obj_value(void *p) +{ + mrb_value v; + struct RBasic *b = p; + + v.tt = b->tt; + v.value.p = p; + return v; +} + +static inline mrb_value +mrb_false_value() +{ + mrb_value v; + + v.tt = MRB_TT_FALSE; + v.value.i = 1; + return v; +} + +static inline mrb_value +mrb_nil_value() +{ + mrb_value v; + + v.tt = MRB_TT_FALSE; + v.value.p = 0; + return v; +} + +static inline mrb_value +mrb_true_value() +{ + mrb_value v; + + v.tt = MRB_TT_TRUE; + v.value.i = 1; + return v; +} + +static inline mrb_value +mrb_undef_value() +{ + mrb_value v; + + v.tt = MRB_TT_UNDEF; + v.value.i = 0; + return v; +} + +typedef int32_t mrb_code; + +struct mrb_state; + +typedef void* (*mrb_allocf) (struct mrb_state *mrb, void*, size_t); + +#define MRB_ARENA_SIZE 1024 //256 up kusuda 2011/04/30 +#define ruby_debug (mrb_nil_value()) +#define ruby_verbose (mrb_nil_value()) + +typedef struct { + mrb_sym mid; + struct RProc *proc; + int stackidx; + int nregs; + int argc; + mrb_code *pc; + int acc; + struct RClass *target_class; + int ridx; + int eidx; + struct REnv *env; +} mrb_callinfo; + +enum gc_state { + GC_STATE_NONE = 0, + GC_STATE_MARK, + GC_STATE_SWEEP +}; + +typedef struct mrb_state { + void *jmp; + + mrb_allocf allocf; + + mrb_value *stack; + mrb_value *stbase, *stend; + + mrb_callinfo *ci; + mrb_callinfo *cibase, *ciend; + + mrb_code **rescue; + int rsize; + struct RProc **ensure; + int esize; + + struct RObject *exc; + struct kh_iv *globals; + + struct mrb_irep **irep; + size_t irep_len, irep_capa; + + struct RClass *object_class; + struct RClass *class_class; + struct RClass *module_class; + struct RClass *proc_class; + struct RClass *string_class; + struct RClass *array_class; + struct RClass *hash_class; + struct RClass *range_class; +#ifdef INCLUDE_REGEXP + struct RClass *regex_class; + struct RClass *match_class; +#endif +#ifdef INCLUDE_ENCODING + struct RClass *encode_class; + struct RClass *converter_class; +#endif + + struct RClass *float_class; + struct RClass *fixnum_class; + struct RClass *true_class; + struct RClass *false_class; + struct RClass *nil_class; + struct RClass *symbol_class; + + struct RClass *kernel_module; + struct heap_page *heaps; + struct heap_page *sweeps; + struct heap_page *free_heaps; + size_t live; /* count of live objects */ + struct RBasic *arena[MRB_ARENA_SIZE]; + int arena_idx; + + enum gc_state gc_state; /* state of gc */ + int current_white_part; /* make white object by white_part */ + struct RBasic *gray_list; /* list of gray objects */ + struct RBasic *variable_gray_list; /* list of objects to be traversed atomically */ + size_t gc_live_after_mark; + size_t gc_threshold; + mrb_int gc_interval_ratio; + mrb_int gc_step_ratio; + + mrb_sym symidx; + struct kh_n2s *name2sym; /* symbol table */ + struct kh_s2n *sym2name; /* reverse symbol table */ + struct RNode *local_svar;/* regexp */ + + struct RClass *eException_class; + struct RClass *eStandardError_class; + struct RClass *eRuntimeError_class; +} mrb_state; + +typedef mrb_value (*mrb_func_t)(mrb_state *mrb, mrb_value); +typedef mrb_value (*mrb_funcargv_t)(mrb_state *mrb, mrb_value, int argc, mrb_value* argv); +struct RClass *mrb_define_class(mrb_state *, const char*, struct RClass*); +struct RClass *mrb_define_module(mrb_state *, const char*); +mrb_value mrb_singleton_class(mrb_state*, mrb_value); +void mrb_include_module(mrb_state*, struct RClass*, struct RClass*); + +void mrb_define_method(mrb_state*, struct RClass*, const char*, mrb_func_t,int); +void mrb_define_class_method(mrb_state *, struct RClass *, const char *, mrb_func_t, int); +void mrb_define_singleton_method(mrb_state*, void*, const char*, mrb_func_t,int); +void mrb_define_const(mrb_state*, struct RClass*, const char *name, mrb_value); +mrb_value mrb_instance_new(mrb_state *mrb, mrb_value cv); +struct RClass * mrb_class_new(mrb_state *mrb, struct RClass *super); +struct RClass * mrb_module_new(mrb_state *mrb); +struct RClass * mrb_class_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym name); +struct RClass * mrb_class_get(mrb_state *mrb, char *name); +struct RClass * mrb_class_obj_get(mrb_state *mrb, char *name); + +mrb_value mrb_obj_dup(mrb_state *mrb, mrb_value obj); +mrb_value mrb_check_to_integer(mrb_state *mrb, mrb_value val, const char *method); +int mrb_obj_respond_to(struct RClass* c, mrb_sym mid); +struct RClass * mrb_define_class_under(mrb_state *mrb, struct RClass *outer, const char *name, struct RClass *super); +struct RClass * mrb_define_module_under(mrb_state *mrb, struct RClass *outer, const char *name); + +/* required arguments */ +#define ARGS_REQ(n) (((n)&0x1f) << 19) +/* optional arguments */ +#define ARGS_OPT(n) (((n)&0x1f) << 14) +/* rest argument */ +#define ARGS_REST() (1 << 13) +/* required arguments after rest */ +#define ARGS_POST(n) (((n)&0x1f) << 8) +/* keyword arguments (n of keys, kdict) */ +#define ARGS_KEY(n1,n2) ((((n1)&0x1f) << 3) | ((n2)?(1<<2):0)) +/* block argument */ +#define ARGS_BLOCK() (1 << 1) + +/* accept any number of arguments */ +#define ARGS_ANY() ARGS_REST() +/* accept no arguments */ +#define ARGS_NONE() 0 + +int mrb_get_args(mrb_state *mrb, const char *format, ...); + +mrb_value mrb_funcall(mrb_state*, mrb_value, const char*, int,...); +mrb_value mrb_funcall_argv(mrb_state*, mrb_value, const char*, int, mrb_value*); +mrb_value mrb_funcall_with_block(mrb_state*, mrb_value, const char*, int, mrb_value*, struct RProc*); +mrb_sym mrb_intern(mrb_state*,const char*); +const char *mrb_sym2name(mrb_state*,mrb_sym); +mrb_value mrb_str_format(mrb_state *, int, const mrb_value *, mrb_value); + +void *mrb_malloc(mrb_state*, size_t); +void *mrb_calloc(mrb_state*, size_t, size_t); +void *mrb_realloc(mrb_state*, void*, size_t); +void *mrb_obj_alloc(mrb_state*, enum mrb_vtype, struct RClass*); +void *mrb_free(mrb_state*, void*); + +mrb_value mrb_str_new_cstr(mrb_state*, const char*); + +mrb_state* mrb_open(void); +mrb_state* mrb_open_allocf(mrb_allocf); +void mrb_close(mrb_state*); +int mrb_checkstack(mrb_state*,int); + +mrb_value mrb_run(mrb_state*, struct RProc*, mrb_value); + +mrb_value mrb_p(mrb_state*, mrb_value); +int mrb_obj_id(mrb_value obj); +mrb_sym mrb_to_id(mrb_state *mrb, mrb_value name); + +int mrb_obj_equal(mrb_state*, mrb_value, mrb_value); +int mrb_equal(mrb_state *mrb, mrb_value obj1, mrb_value obj2); +mrb_value mrb_Integer(mrb_state *mrb, mrb_value val); +mrb_value mrb_Float(mrb_state *mrb, mrb_value val); +mrb_value mrb_inspect(mrb_state *mrb, mrb_value obj); +int mrb_eql(mrb_state *mrb, mrb_value obj1, mrb_value obj2); + +void mrb_garbage_collect(mrb_state*); +void mrb_incremental_gc(mrb_state *); +int mrb_gc_arena_save(mrb_state*); +void mrb_gc_arena_restore(mrb_state*,int); +void mrb_gc_mark(mrb_state*,struct RBasic*); +#define mrb_gc_mark_value(mrb,val) do {\ + if ((val).tt >= MRB_TT_OBJECT) mrb_gc_mark((mrb), mrb_object(val));\ +} while (0); +void mrb_gc_mark_gv(mrb_state*); +void mrb_gc_free_gv(mrb_state*); +void mrb_gc_mark_iv(mrb_state*, struct RObject*); +size_t mrb_gc_mark_iv_size(mrb_state*, struct RObject*); +void mrb_gc_free_iv(mrb_state*, struct RObject*); +void mrb_gc_mark_mt(mrb_state*, struct RClass*); +size_t mrb_gc_mark_mt_size(mrb_state*, struct RClass*); +void mrb_gc_free_mt(mrb_state*, struct RClass*); +void mrb_gc_mark_ht(mrb_state*, struct RClass*); +size_t mrb_gc_mark_ht_size(mrb_state*, struct RClass*); +void mrb_gc_free_ht(mrb_state*, struct RClass*); +void mrb_field_write_barrier(mrb_state *, struct RBasic*, struct RBasic*); +#define mrb_field_write_barrier_value(mrb, obj, val) do{\ + if ((val.tt >= MRB_TT_OBJECT)) mrb_field_write_barrier((mrb), (obj), mrb_object(val));\ +} while (0); +void mrb_write_barrier(mrb_state *, struct RBasic*); + +#define MRUBY_VERSION "Rite" + +#if 0 +#define DEBUG(x) x +#else +#define DEBUG(x) +#endif + +mrb_value mrb_check_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method); +mrb_value mrb_any_to_s(mrb_state *mrb, mrb_value obj); +const char * mrb_obj_classname(mrb_state *mrb, mrb_value obj); +struct RClass* mrb_obj_class(mrb_state *mrb, mrb_value obj); +mrb_value mrb_class_path(mrb_state *mrb, struct RClass *c); +mrb_value mrb_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method); +mrb_int mrb_obj_is_kind_of(mrb_state *mrb, mrb_value obj, struct RClass *c); +mrb_value mrb_obj_inspect(mrb_state *mrb, mrb_value self); +mrb_value mrb_obj_clone(mrb_state *mrb, mrb_value self); +mrb_value mrb_check_funcall(mrb_state *mrb, mrb_value recv, mrb_sym mid, int argc, mrb_value *argv); + +/* need to include to use these macros */ +#ifndef ISPRINT +//#define ISASCII(c) isascii((int)(unsigned char)(c)) +#define ISASCII(c) 1 +#undef ISPRINT +#define ISPRINT(c) (ISASCII(c) && isprint((int)(unsigned char)(c))) +#define ISSPACE(c) (ISASCII(c) && isspace((int)(unsigned char)(c))) +#define ISUPPER(c) (ISASCII(c) && isupper((int)(unsigned char)(c))) +#define ISLOWER(c) (ISASCII(c) && islower((int)(unsigned char)(c))) +#define ISALNUM(c) (ISASCII(c) && isalnum((int)(unsigned char)(c))) +#define ISALPHA(c) (ISASCII(c) && isalpha((int)(unsigned char)(c))) +#define ISDIGIT(c) (ISASCII(c) && isdigit((int)(unsigned char)(c))) +#define ISXDIGIT(c) (ISASCII(c) && isxdigit((int)(unsigned char)(c))) +#endif + +extern mrb_value mrb_rs; +extern mrb_value mrb_default_rs; + +int mrb_block_given_p(void); +void mrb_raise(mrb_state *mrb, struct RClass *c, const char *fmt, ...); +void rb_raise(struct RClass *c, const char *fmt, ...); +void mrb_warn(const char *fmt, ...); +void mrb_warning(const char *fmt, ...); +void mrb_bug(const char *fmt, ...); + +#define E_TYPE_ERROR (mrb_class_obj_get(mrb, "TypeError")) +#define E_ARGUMENT_ERROR (mrb_class_obj_get(mrb, "ArgumentError")) +#define E_INDEX_ERROR (mrb_class_obj_get(mrb, "IndexError")) +#define E_RANGE_ERROR (mrb_class_obj_get(mrb, "RangeError")) +#define E_NAME_ERROR (mrb_class_obj_get(mrb, "NameError")) +#define E_NOMETHOD_ERROR (mrb_class_obj_get(mrb, "NoMethodError")) +#define E_SCRIPT_ERROR (mrb_class_obj_get(mrb, "ScriptError")) +#define E_SYNTAX_ERROR (mrb_class_obj_get(mrb, "SyntaxError")) +#define E_LOAD_ERROR (mrb_class_obj_get(mrb, "LoadError")) +#define E_SYSTEMCALL_ERROR (mrb_class_obj_get(mrb, "SystemCallError")) +#define E_LOCALJUMP_ERROR (mrb_class_obj_get(mrb, "LocalJumpError")) +#define E_REGEXP_ERROR (mrb_class_obj_get(mrb, "RegexpError")) +#define E_ZERODIVISION_ERROR (mrb_class_obj_get(mrb, "ZeroDivisionError")) + +#define E_ENCODING_ERROR (mrb_class_obj_get(mrb, "EncodingError")) +#define E_NOTIMP_ERROR (mrb_class_obj_get(mrb, "NotImplementedError")) +#define E_FLOATDOMAIN_ERROR (mrb_class_obj_get(mrb, "FloatDomainError")) + +#define E_KEY_ERROR (mrb_class_obj_get(mrb, "KeyError")) + +#define SYM2ID(x) ((x).value.sym) + +#define CONST_ID_CACHE(mrb, result, str) \ + { \ + static mrb_sym mrb_intern_id_cache;\ + if (!mrb_intern_id_cache) \ + mrb_intern_id_cache = mrb_intern(mrb, str); \ + result mrb_intern_id_cache; \ + } +#define CONST_ID(mrb, var, str) \ + do CONST_ID_CACHE(mrb, var =, str) while (0) + +#define NUM2CHR_internal(x) (((mrb_type(x) == MRB_TT_STRING)&&(RSTRING_LEN(x)>=1))?\ + RSTRING_PTR(x)[0]:(char)(mrb_fixnum_number(x)&0xff)) +#ifdef __GNUC__ +# define NUM2CHR(x) __extension__ ({mrb_value num2chr_x = (x); NUM2CHR_internal(num2chr_x);}) +#else +static inline char +NUM2CHR(mrb_value x) +{ + return NUM2CHR_internal(x); +} +#endif +mrb_value mrb_io_gets(mrb_state *mrb, mrb_value); +mrb_value mrb_io_getbyte(mrb_state *mrb, mrb_value); +mrb_value mrb_io_ungetc(mrb_state *, mrb_value, mrb_value); +mrb_value mrb_io_ungetbyte(mrb_state *mrb, mrb_value, mrb_value); +mrb_value mrb_io_close(mrb_state *mrb, mrb_value); +mrb_value mrb_io_flush(mrb_state *mrb, mrb_value); +mrb_value mrb_io_eof(mrb_value); +mrb_value mrb_io_binmode(mrb_state *mrb, mrb_value); +mrb_value mrb_io_ascii8bit_binmode(mrb_value); +mrb_value mrb_io_addstr(mrb_state *mrb, mrb_value, mrb_value); +mrb_value mrb_io_printf(mrb_state *mrb, int, mrb_value*, mrb_value); +mrb_value mrb_io_print(mrb_state *mrb, int, mrb_value*, mrb_value); +mrb_value mrb_io_puts(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value); +mrb_value mrb_io_fdopen(int, int, const char*); +mrb_value mrb_io_get_io(mrb_state *mrb, mrb_value); +mrb_value mrb_file_open(mrb_state *mrb, const char*, const char*); +mrb_value mrb_file_open_str(mrb_value, const char*); +mrb_value mrb_gets(mrb_state *mrb); + +mrb_value mrb_yield(mrb_state *mrb, mrb_value v, mrb_value blk); +mrb_value mrb_yield_argv(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv); +mrb_value mrb_yield_with_self(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv, mrb_value self); +mrb_value mrb_class_new_instance(mrb_state *mrb, int, mrb_value*, struct RClass *); +mrb_value mrb_class_new_instance_m(mrb_state *mrb, mrb_value klass); + +mrb_value mrb_exec_recursive(mrb_state *mrb, mrb_value(*)(mrb_state *, mrb_value, mrb_value, int),mrb_value,void *); + +#ifndef xmalloc +#define xmalloc malloc +#define xrealloc realloc +#define xcalloc calloc +#define xfree free +#endif + +void mrb_gc(void); +#define thread_debug if(0)printf + +#define RUBY_VM 1 /* YARV */ +#define HAVE_NATIVETHREAD +int ruby_native_thread_p(void); + +#define RUBY_EVENT_NONE 0x0000 +#define RUBY_EVENT_LINE 0x0001 +#define RUBY_EVENT_CLASS 0x0002 +#define RUBY_EVENT_END 0x0004 +#define RUBY_EVENT_CALL 0x0008 +#define RUBY_EVENT_RETURN 0x0010 +#define RUBY_EVENT_C_CALL 0x0020 +#define RUBY_EVENT_C_RETURN 0x0040 +#define RUBY_EVENT_RAISE 0x0080 +#define RUBY_EVENT_ALL 0xffff +#define RUBY_EVENT_VM 0x10000 +#define RUBY_EVENT_SWITCH 0x20000 +#define RUBY_EVENT_COVERAGE 0x40000 + +typedef unsigned int mrb_event_flag_t; +typedef void (*mrb_event_hook_func_t)(mrb_state *mrb, mrb_event_flag_t, mrb_value data, mrb_value, mrb_sym, mrb_value klass); + +typedef struct mrb_event_hook_struct { + mrb_event_flag_t flag; + mrb_event_hook_func_t func; + mrb_value data; + struct mrb_event_hook_struct *next; +} mrb_event_hook_t; + +#define RB_EVENT_HOOKS_HAVE_CALLBACK_DATA 1 +void mrb_add_event_hook(mrb_state *mrb, mrb_event_hook_func_t func, mrb_event_flag_t events, + mrb_value data); +int mrb_remove_event_hook(mrb_event_hook_func_t func); +mrb_value mrb_to_int(mrb_state *mrb, mrb_value val); +void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t); + +//#define RUBY_SETJMP(env) ${setjmp_prefix}setjmp(env${setjmp_sigmask+,0}) +//#define RUBY_LONGJMP(env,val) ${setjmp_prefix}longjmp(env,val) +//#define RUBY_JMP_BUF ${setjmp_sigmask+${setjmp_prefix}}jmp_buf +#define RUBY_SETJMP(env) __builtin_setjmp(env) +#define RUBY_LONGJMP(env,val) __builtin_longjmp(env,1)//(env,val) +//#define RUBY_JMP_BUF ${setjmp_sigmask+${setjmp_prefix}}jmp_buf +#define select(n, r, w, e, t) select_large_fdset(n, r, w, e, t) + +//int RUBY_SETJMP(mrb_jmpbuf_t env); /* add kusuda */ +#define ruby_setjmp(env) RUBY_SETJMP(env) +#define ruby_longjmp(env,val) RUBY_LONGJMP(env,val) + +#if defined PRIdPTR && !defined PRI_VALUE_PREFIX +#define PRIdVALUE PRIdPTR +#define PRIiVALUE PRIiPTR +#define PRIoVALUE PRIoPTR +#define PRIuVALUE PRIuPTR +#define PRIxVALUE PRIxPTR +#define PRIXVALUE PRIXPTR +#else +#define PRIdVALUE PRI_VALUE_PREFIX"d" +#define PRIiVALUE PRI_VALUE_PREFIX"i" +#define PRIoVALUE PRI_VALUE_PREFIX"o" +#define PRIuVALUE PRI_VALUE_PREFIX"u" +#define PRIxVALUE PRI_VALUE_PREFIX"x" +#define PRIXVALUE PRI_VALUE_PREFIX"X" +#endif +#ifndef PRI_VALUE_PREFIX +# define PRI_VALUE_PREFIX "" +#endif + +#if defined PRIdPTR +# define PRI_PTRDIFF_PREFIX "t" +#elif SIZEOF_PTRDIFF_T == SIZEOF_INT +# define PRI_PTRDIFF_PREFIX +#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG +# define PRI_PTRDIFF_PREFIX "l" +#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG_LONG +# define PRI_PTRDIFF_PREFIX "ll" +#else +# define PRI_PTRDIFF_PREFIX +#endif +#define PRIdPTRDIFF PRI_PTRDIFF_PREFIX"d" +#define PRIiPTRDIFF PRI_PTRDIFF_PREFIX"i" +#define PRIoPTRDIFF PRI_PTRDIFF_PREFIX"o" +#define PRIuPTRDIFF PRI_PTRDIFF_PREFIX"u" +#define PRIxPTRDIFF PRI_PTRDIFF_PREFIX"x" +#define PRIXPTRDIFF PRI_PTRDIFF_PREFIX"X" + +#if defined PRIdPTR +# define PRI_SIZE_PREFIX "z" +#elif SIZEOF_SIZE_T == SIZEOF_INT +# define PRI_SIZE_PREFIX +#elif SIZEOF_SIZE_T == SIZEOF_LONG +# define PRI_SIZE_PREFIX "l" +#elif SIZEOF_SIZE_T == SIZEOF_LONG_LONG +# define PRI_SIZE_PREFIX "ll" +#endif +#define PRIdSIZE PRI_SIZE_PREFIX"d" +#define PRIiSIZE PRI_SIZE_PREFIX"i" +#define PRIoSIZE PRI_SIZE_PREFIX"o" +#define PRIuSIZE PRI_SIZE_PREFIX"u" +#define PRIxSIZE PRI_SIZE_PREFIX"x" +#define PRIXSIZE PRI_SIZE_PREFIX"X" +#define PRIdPTRDIFF PRI_PTRDIFF_PREFIX"d" + +#define KHASH 0 +#define STHASH 1 +#define BASICHASH 2 +#define HASH_CLASS_METHOD BASICHASH + +typedef enum call_type { + CALL_PUBLIC, + CALL_FCALL, + CALL_VCALL, + CALL_TYPE_MAX +} call_type; + +/* compar.c */ +void mrb_cmperr(mrb_state *mrb, mrb_value x, mrb_value y); +int mrb_cmpint(mrb_state *mrb, mrb_value val, mrb_value a, mrb_value b); + +#define REALLOC_N(mrb,var,type,n) (var)=(type*)mrb_realloc(mrb, (char*)(var), sizeof(type)*(n)) + +#ifndef ANYARGS +# ifdef __cplusplus +# define ANYARGS ... +# else +# define ANYARGS +# endif +#endif +void st_foreach_safe(mrb_state *mrb, void *table, int (*func)(ANYARGS), void * a); +void mrb_define_alias(mrb_state *mrb, struct RClass *klass, const char *name1, const char *name2); +const char *mrb_class_name(mrb_state *mrb, struct RClass* klass); +void mrb_define_global_const(mrb_state *mrb, const char *name, mrb_value val); + +mrb_value mrb_block_proc(void); +int mrb_sourceline(void); +void ruby_default_signal(int sig); +mrb_value mrb_attr_get(mrb_state *mrb, mrb_value obj, mrb_sym id); + +#endif /* MRUBY_H */ diff --git a/include/mruby/array.h b/include/mruby/array.h new file mode 100644 index 0000000000..6f382ef6ac --- /dev/null +++ b/include/mruby/array.h @@ -0,0 +1,42 @@ +#ifndef MRUBY_ARRAY_H +#define MRUBY_ARRAY_H + +struct RArray { + MRUBY_OBJECT_HEADER; + size_t len; + size_t capa; + mrb_value *buf; +}; + +#define mrb_ary_ptr(v) ((struct RArray*)((v).value.p)) +#define mrb_ary_value(p) mrb_obj_value((void*)(p)) +#define RARRAY(v) ((struct RArray*)((v).value.p)) + +#define RARRAY_LEN(a) (RARRAY(a)->len) +#define RARRAY_PTR(a) (RARRAY(a)->buf) + +mrb_value mrb_ary_new_capa(mrb_state*, size_t); +mrb_value mrb_ary_new(mrb_state *mrb); +mrb_value mrb_ary_new_elts(mrb_state *mrb, long n, const mrb_value *elts); +void mrb_ary_concat(mrb_state*, mrb_value, mrb_value); +mrb_value mrb_ary_splat(mrb_state*, mrb_value); +void mrb_ary_push(mrb_state*, mrb_value, mrb_value); +mrb_value mrb_ary_pop(mrb_state *mrb, mrb_value ary); +mrb_value mrb_ary_new_from_values(mrb_state *mrb, mrb_value *vals, size_t size); +mrb_value mrb_ary_aget(mrb_state *mrb, mrb_value self); +mrb_value mrb_ary_ref(mrb_state *mrb, mrb_value ary, mrb_int n); +void mrb_ary_set(mrb_state *mrb, mrb_value ary, mrb_int n, mrb_value val); +int mrb_ary_len(mrb_state *mrb, mrb_value ary); +mrb_value mrb_ary_replace_m(mrb_state *mrb, mrb_value self); +void mrb_ary_replace(mrb_state *mrb, struct RArray *a, mrb_value *argv, size_t len); +mrb_value mrb_check_array_type(mrb_state *mrb, mrb_value self); +mrb_value mrb_ary_unshift(mrb_state *mrb, mrb_value self, mrb_value item); +mrb_value mrb_ary_new4(mrb_state *mrb, long n, const mrb_value *elts); +mrb_value mrb_assoc_new(mrb_state *mrb, mrb_value car, mrb_value cdr); +mrb_value mrb_ary_entry(mrb_value ary, long offset); +void mrb_mem_clear(mrb_value *mem, long size); +mrb_value mrb_ary_tmp_new(mrb_state *mrb, long capa); +mrb_value mrb_ary_sort(mrb_state *mrb, mrb_value ary); +mrb_value mrb_ary_shift(mrb_state *mrb, mrb_value self); + +#endif /* MRUBY_ARRAY_H */ diff --git a/include/mruby/class.h b/include/mruby/class.h new file mode 100644 index 0000000000..470f517c3e --- /dev/null +++ b/include/mruby/class.h @@ -0,0 +1,71 @@ +#ifndef MRUBY_CLASS_H +#define MRUBY_CLASS_H + +struct RClass { + MRUBY_OBJECT_HEADER; + struct kh_iv *iv; + struct kh_mt *mt; + struct RClass *super; +}; + +#define mrb_class_ptr(v) ((struct RClass*)((v).value.p)) +#define RCLASS_SUPER(v) (((struct RClass*)((v).value.p))->super) +#define RCLASS_IV_TBL(v) (((struct RClass*)((v).value.p))->iv) +#define RCLASS_M_TBL(v) (((struct RClass*)((v).value.p))->mt) + +static inline struct RClass* +mrb_class(mrb_state *mrb, mrb_value v) +{ + switch (mrb_type(v)) { + case MRB_TT_FALSE: + if (v.value.p) + return mrb->false_class; + return mrb->nil_class; + case MRB_TT_TRUE: + return mrb->true_class; + case MRB_TT_SYMBOL: + return mrb->symbol_class; + case MRB_TT_FIXNUM: + return mrb->fixnum_class; + case MRB_TT_FLOAT: + return mrb->float_class; + +#ifdef INCLUDE_REGEXP +// case MRB_TT_REGEX: +// return mrb->regex_class; +// case MRB_TT_MATCH: +// return mrb->match_class; +// case MRB_TT_DATA: +// return mrb->encode_class; +#else + case MRB_TT_REGEX: + case MRB_TT_MATCH: + mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: %s given", + mrb_obj_classname(mrb, v)); + return mrb->nil_class; /* not reach */ +#endif + default: + return ((struct RBasic*)mrb_object(v))->c; + } +} + +#define MRB_SET_INSTANCE_TT(c, tt) c->flags = ((c->flags & ~0xff) | (char)tt) +#define MRB_INSTANCE_TT(c) (enum mrb_vtype)(c->flags & 0xff) + +struct RClass* mrb_define_class_id(mrb_state*, mrb_sym, struct RClass*); +struct RClass* mrb_define_module_id(mrb_state*, mrb_sym); +struct RClass *mrb_vm_define_class(mrb_state*, mrb_value, mrb_value, mrb_sym); +struct RClass *mrb_vm_define_module(mrb_state*, mrb_value, mrb_sym); +void mrb_define_method_vm(mrb_state*, struct RClass*, mrb_sym, mrb_value); +void mrb_define_method_raw(mrb_state*, struct RClass*, mrb_sym, struct RProc *); + +struct RClass *mrb_class_outer_module(mrb_state*, struct RClass *); +struct RProc *mrb_method_search_vm(mrb_state*, struct RClass**, mrb_sym); +struct RProc *mrb_method_search(mrb_state*, struct RClass*, mrb_sym); + +int mrb_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym mid); +void mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, int aspec); + +void mrb_obj_call_init(mrb_state *mrb, mrb_value obj, int argc, mrb_value *argv); + +#endif /* MRUBY_CLASS_H */ diff --git a/include/mruby/hash.h b/include/mruby/hash.h new file mode 100644 index 0000000000..b2c3d69c8a --- /dev/null +++ b/include/mruby/hash.h @@ -0,0 +1,59 @@ +#ifndef MRUBY_HASH_H +#define MRUBY_HASH_H + +struct RHash { + MRUBY_OBJECT_HEADER; + struct kh_ht *ht; + mrb_value ifnone; +}; + +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfU /* constant vector a */ +#define UMASK 0x80000000U /* most significant w-r bits */ +#define LMASK 0x7fffffffU /* least significant r bits */ +#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) +#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1U ? MATRIX_A : 0U)) +enum {MT_MAX_STATE = N}; + +struct MT { + /* assume int is enough to store 32bits */ + unsigned int state[N]; /* the array for the state vector */ + unsigned int *next; + int left; +}; + +#define mrb_hash_end(h) st_hash_end(h) +#define mrb_hash_uint(h, i) st_hash_uint(h, i) + +#define mrb_hash_ptr(v) ((struct RHash*)((v).value.p)) +#define mrb_hash_value(p) mrb_obj_value((void*)(p)) + +mrb_value mrb_hash_new_capa(mrb_state*, size_t); +mrb_value mrb_hash_new(mrb_state *mrb, int capa); + +void mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val); +mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key); +mrb_value mrb_hash_getWithDef(mrb_state *mrb, mrb_value hash, mrb_value vkey, mrb_value def); +mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key); +mrb_value mrb_hash(mrb_state *mrb, mrb_value obj); +void ruby_setenv(mrb_state *mrb, const char *name, const char *value); + +/* RHASH_TBL allocates st_table if not available. */ +#define RHASH(obj) ((struct RHash*)((obj).value.p)) +#define RHASH_TBL(h) mrb_hash_tbl(h) +#define RHASH_H_TBL(h) (RHASH(h)->ht) +#define RHASH_SIZE(h) (RHASH_H_TBL(h)->size) +#define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0) +#define RHASH_IFNONE(h) (RHASH(h)->ifnone) +#define RHASH_PROCDEFAULT(h) (RHASH(h)->ifnone) +struct kh_ht * mrb_hash_tbl(mrb_state *mrb, mrb_value hash); + +#define MRB_HASH_PROC_DEFAULT 256 +#define MRB_RHASH_PROCDEFAULT_P(h) (RHASH(h)->flags & MRB_HASH_PROC_DEFAULT) + +char * ruby_strdup(const char *str); +void mrb_reset_random_seed(void); +mrb_value mrb_obj_is_proc(mrb_value proc); + +#endif /* MRUBY_HASH_H */ diff --git a/include/mruby/numeric.h b/include/mruby/numeric.h new file mode 100644 index 0000000000..ee559fa1d2 --- /dev/null +++ b/include/mruby/numeric.h @@ -0,0 +1,17 @@ +#ifndef MRUBY_NUMERIC_H +#define MRUBY_NUMERIC_H + +#include + +#define RSHIFT(x,y) ((x)>>(int)(y)) +#define FIXNUM_MAX (LONG_MAX>>1) +#define FIXNUM_MIN RSHIFT((long)LONG_MIN,1) +#define POSFIXABLE(f) ((f) < FIXNUM_MAX+1) +#define NEGFIXABLE(f) ((f) >= FIXNUM_MIN) +#define FIXABLE(f) (POSFIXABLE(f) && NEGFIXABLE(f)) + +mrb_value mrb_dbl2big(mrb_state *mrb, float d); +void mrb_num_zerodiv(mrb_state *mrb); +mrb_value mrb_fix2str(mrb_state *mrb, mrb_value x, int base); + +#endif /* MRUBY_NUMERIC_H */ diff --git a/include/mruby/object.h b/include/mruby/object.h new file mode 100644 index 0000000000..151e36b968 --- /dev/null +++ b/include/mruby/object.h @@ -0,0 +1,46 @@ +#ifndef MRUBY_OBJECT_H +#define MRUBY_OBJECT_H + +#define MRUBY_OBJECT_HEADER \ + enum mrb_vtype tt:8;\ + int color:3;\ + unsigned int flags:21;\ + struct RClass *c;\ + struct RBasic *gcnext; + + +/* white: 011, black: 100, gray: 000 */ +#define MRB_GC_GRAY 0 +#define MRB_GC_WHITE_A 1 +#define MRB_GC_WHITE_B (1 << 1) +#define MRB_GC_BLACK (1 << 2) +#define MRB_GC_WHITES (MRB_GC_WHITE_A | MRB_GC_WHITE_B) +#define MRB_GC_COLOR_MASK 7 + +#define paint_gray(o) ((o)->color = MRB_GC_GRAY) +#define paint_black(o) ((o)->color = MRB_GC_BLACK) +#define paint_white(o) ((o)->color = MRB_GC_WHITES) +#define paint_partial_white(s, o) ((o)->color = (s)->current_white_part) +#define is_gray(o) ((o)->color == MRB_GC_GRAY) +#define is_white(o) ((o)->color & MRB_GC_WHITES) +#define is_black(o) ((o)->color & MRB_GC_BLACK) +#define is_dead(s, o) (((o)->color & other_white_part(s) & MRB_GC_WHITES) || (o)->tt == MRB_TT_FREE) +#define flip_white_part(s) ((s)->current_white_part = other_white_part(s)) +#define other_white_part(s) ((s)->current_white_part ^ MRB_GC_WHITES) + +struct RBasic { + MRUBY_OBJECT_HEADER; +}; + +struct RObject { + MRUBY_OBJECT_HEADER; + struct kh_iv *iv; +}; + +#define mrb_obj_ptr(v) ((struct RObject*)((v).value.p)) +#define RBASIC(obj) ((struct RBasic*)((obj).value.p)) +#define RBASIC_KLASS(v) ((struct RClass *)(((struct RBasic*)((v).value.p))->c)) +#define ROBJECT(v) ((struct RObject*)((v).value.p)) +#define ROBJECT_IVPTR(v) (((struct RObject*)((v).value.p))->iv) +#define ROBJECT_NUMIV(v) (ROBJECT_IVPTR(v) ? ROBJECT_IVPTR(v)->size : 0) +#endif /* MRUBY_OBJECT_H */ diff --git a/include/mruby/proc.h b/include/mruby/proc.h new file mode 100644 index 0000000000..eafe54cf9a --- /dev/null +++ b/include/mruby/proc.h @@ -0,0 +1,44 @@ +#ifndef MRUBY_PROC_H +#define MRUBY_PROC_H + +#include "mruby.h" +#include "irep.h" + +struct REnv { + MRUBY_OBJECT_HEADER; + mrb_value *stack; + mrb_sym mid; + int cioff; +}; + +struct RProc { + MRUBY_OBJECT_HEADER; + union { + mrb_irep *irep; + mrb_func_t func; + } body; + struct RClass *target_class; + struct REnv *env; +}; + +/* aspec access */ +#define ARGS_GETREQ(a) (((a) >> 19) & 0x1f) +#define ARGS_GETOPT(a) (((a) >> 14) & 0x1f) +#define ARGS_GETREST(a) ((a) & (1<<13)) +#define ARGS_GETPOST(a) (((a) >> 8) & 0x1f) +#define ARGS_GETKEY(a) (((a) >> 3) & 0x1f)) +#define ARGS_GETKDICT(a) ((a) & (1<<2)) +#define ARGS_GETBLOCK(a) ((a) & (1<<1)) + +#define MRB_PROC_CFUNC 128 +#define MRB_PROC_CFUNC_P(p) ((p)->flags & MRB_PROC_CFUNC) +#define MRB_PROC_STRICT 256 +#define MRB_PROC_STRICT_P(p) ((p)->flags & MRB_PROC_STRICT) + +#define mrb_proc_ptr(v) ((struct RProc*)((v).value.p)) + +struct RProc *mrb_proc_new(mrb_state*, mrb_irep*); +struct RProc *mrb_proc_new_cfunc(mrb_state*, mrb_func_t); +struct RProc *mrb_closure_new(mrb_state*, mrb_irep*); + +#endif /* MRUBY_STRING_H */ diff --git a/include/mruby/range.h b/include/mruby/range.h new file mode 100644 index 0000000000..d25690ab38 --- /dev/null +++ b/include/mruby/range.h @@ -0,0 +1,21 @@ +#ifndef MRUBY_RANGE_H +#define MRUBY_RANGE_H + +struct RRange { + MRUBY_OBJECT_HEADER; + struct mrb_range_edges { + mrb_value beg; + mrb_value end; + } *edges; + int excl; +}; + +#define mrb_range_ptr(v) ((struct RRange*)((v).value.p)) +#define mrb_range_value(p) mrb_obj_value((void*)(p)) + +mrb_value mrb_range_new(mrb_state*, mrb_value, mrb_value, int); +mrb_int mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, mrb_int len, mrb_int err); +int mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, struct RClass* c); +struct RClass* mrb_class_real(struct RClass* cl); + +#endif /* MRUBY_RANGE_H */ diff --git a/include/mruby/string.h b/include/mruby/string.h new file mode 100644 index 0000000000..e889d8447a --- /dev/null +++ b/include/mruby/string.h @@ -0,0 +1,133 @@ +#ifndef MRUBY_STRING_H +#define MRUBY_STRING_H + +#ifdef INCLUDE_ENCODING +#include "encoding.h" +#endif + +#ifndef RB_GC_GUARD +#define RB_GC_GUARD(v) v +#endif + +#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) + +#define mrb_str_new4 mrb_str_new_frozen + +#define STR_BUF_MIN_SIZE 128 +//#define RSTRING_EMBED_LEN_MAX STR_BUF_MIN_SIZE + +extern const char ruby_digitmap[]; + +struct RString { + MRUBY_OBJECT_HEADER; + size_t len; + union { + size_t capa; + mrb_value shared; + } aux; + char *buf; +}; + +extern struct SCOPE { + struct RBasic super; + mrb_sym *local_tbl; + mrb_value *local_vars; + int flags; +} *ruby_scope; + +struct RVarmap { + struct RBasic super; + mrb_sym id; + mrb_value val; + struct RVarmap *next; +}; +extern struct RVarmap *ruby_dyna_vars; + +//struct st_hash_type { +// int (*compare)(); +// int (*hash)(); +//}; + +#define mrb_str_ptr(s) ((struct RString*)((s).value.p)) +#define RSTRING(s) ((struct RString*)((s).value.p)) +#define RSTRING_PTR(s) (RSTRING(s)->buf) +#define RSTRING_LEN(s) (RSTRING(s)->len) +#define RSTRING_CAPA(s) (RSTRING(s)->aux.capa) +#define RSTRING_SHARED(s) (RSTRING(s)->aux.shared) +#define RSTRING_END(s) (RSTRING(s)->buf + RSTRING(s)->len) + +#define MRB_STR_SHARED 256 +#define MRB_STR_SHARED_P(s) (FL_ALL(s, MRB_STR_SHARED)) +#define MRB_STR_NOCAPA (MRB_STR_SHARED) +#define MRB_STR_NOCAPA_P(s) (FL_ANY(s, MRB_STR_NOCAPA)) +#define MRB_STR_UNSET_NOCAPA(s) do {\ + FL_UNSET(s, MRB_STR_NOCAPA);\ +} while (0) + +mrb_value mrb_str_literal(mrb_state*, mrb_value); +void mrb_str_concat(mrb_state*, mrb_value, mrb_value); +mrb_value mrb_obj_to_str(mrb_state*, mrb_value); +mrb_value mrb_str_plus(mrb_state*, mrb_value, mrb_value); +mrb_value mrb_obj_as_string(mrb_state *mrb, mrb_value obj); +mrb_value mrb_str_new(mrb_state *mrb, const char *p, size_t len); /* mrb_str_new */ +mrb_value mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len); /* mrb_str_resize */ +mrb_value mrb_string_value(mrb_state *mrb, mrb_value *ptr); /* StringValue */ +mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len); +mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str); +mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa); +mrb_value mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len); +mrb_value str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len); + +char * mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr); +char * mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr); +mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len); +size_t mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos); +mrb_value mrb_str_size(mrb_state *mrb, mrb_value self); +long mrb_str_offset(mrb_state *mrb, mrb_value str, long pos); +mrb_value mrb_str_new2(mrb_state *mrb, const char *p); +mrb_value mrb_str_dup(mrb_state *mrb, mrb_value str); /* mrb_str_dup */ +mrb_value mrb_str_new_frozen(mrb_state *mrb, mrb_value orig); +mrb_value mrb_lastline_get(mrb_state *mrb); +mrb_value mrb_usascii_str_new(mrb_state *mrb, const char *ptr, long len); +void mrb_lastline_set(mrb_value val); +mrb_value mrb_str_buf_cat_ascii(mrb_state *mrb, mrb_value str, const char *ptr); +void mrb_str_modify(mrb_state *mrb, mrb_value str); +void mrb_str_set_len(mrb_state *mrb, mrb_value str, long len); +mrb_value mrb_str_intern(mrb_state *mrb, mrb_value self); +void mrb_str_shared_replace(mrb_state *mrb, mrb_value str, mrb_value str2); +mrb_value mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr); +mrb_value mrb_str_catf(mrb_state *mrb, mrb_value str, const char *format, ...); +mrb_value mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck); +double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck); +mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str); +mrb_value mrb_locale_str_new(mrb_state *mrb, const char *ptr, long len); +mrb_value mrb_filesystem_str_new_cstr(mrb_state *mrb, const char *ptr); +mrb_int mrb_str_hash(mrb_state *mrb, mrb_value str); +int mrb_str_hash_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2); +mrb_value str_new3(mrb_state *mrb, struct RClass* klass, mrb_value str); +mrb_value mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2); +void mrb_str_setter(mrb_state *mrb, mrb_value val, mrb_sym id, mrb_value *var); +int mrb_str_is_ascii_only_p(mrb_state *mrb, mrb_value str); +mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str); +int mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2); +mrb_value str_new4(mrb_state *mrb, enum mrb_vtype ttype, mrb_value str); +mrb_value * mrb_svar(mrb_int cnt); +mrb_value mrb_str_drop_bytes(mrb_state *mrb, mrb_value str, long len); +mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str); +mrb_value mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len); +mrb_value mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2); +size_t mrb_str_capacity(mrb_value str); + +#ifdef INCLUDE_ENCODING +int sym_printable(mrb_state *mrb, const char *s, const char *send, mrb_encoding *enc); +mrb_value mrb_str_conv_enc(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to); +mrb_value mrb_str_conv_enc_opts(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to, int ecflags, mrb_value ecopts); +mrb_value mrb_enc_str_new(mrb_state *mrb, const char *ptr, long len, mrb_encoding *enc); +#else +int mrb_symname_p(const char *name); +#endif + +mrb_value mrb_tainted_str_new(mrb_state *mrb, const char *ptr, long len); +int mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2); + +#endif /* MRUBY_STRING_H */ diff --git a/include/mruby/struct.h b/include/mruby/struct.h new file mode 100644 index 0000000000..b8244045d5 --- /dev/null +++ b/include/mruby/struct.h @@ -0,0 +1,16 @@ +#ifndef MSTRUCT_H +#define MSTRUCT_H + +struct RStruct { + struct RBasic basic; + long len; + mrb_value *ptr; +}; +#define RSTRUCT(st) ((struct RStruct*)((st).value.p)) +#define RSTRUCT_LEN(st) ((int)(RSTRUCT(st)->len)) +#define RSTRUCT_PTR(st) (RSTRUCT(st)->ptr) + +mrb_value mrb_yield_values(int n, ...); +mrb_value mrb_mod_module_eval(mrb_state *mrb, int argc, mrb_value *argv, mrb_value mod); + +#endif //MSTRUCT_H diff --git a/lib/.gitkeep b/lib/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mrblib/Makefile b/mrblib/Makefile new file mode 100644 index 0000000000..91dfe4c649 --- /dev/null +++ b/mrblib/Makefile @@ -0,0 +1,62 @@ +# makefile discription. +# basic build file for RiteVM library +# 11.Oct.2011 coded by Hiroshi Mimaki. + +# project-specific macros +# extension of the executable-file is modifiable(.exe .out ...) +BASEDIR = . +TARGET := mrblib +MLIB := $(TARGET).o +CLIB := $(TARGET).c +DLIB := $(TARGET).ctmp +RLIB := $(TARGET).rbtmp +MRB1 := $(BASEDIR)/*.rb +MRBS := $(MRB1) + +# C compiler (gcc) +CC = gcc +DEBUG_MODE = 1 +ifeq ($(DEBUG_MODE),1) +CFLAGS = -g +else +CFLAGS = -O3 +endif +INCLUDES = -I../src -I../include +ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS) +MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)" + +# mruby compiler +ifeq ($(OS),Windows_NT) +MRBC = ../bin/mrbc.exe +else +MRBC = ../bin/mrbc +endif + +############################## +# generic build targets, rules + +.PHONY : all +all : $(MRBC) $(MLIB) + @echo "make: built targets of `pwd`" + +# Compile mrblib source +$(MLIB) : $(CLIB) + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(CLIB) -o $(MLIB) + +# Compile C source from merged mruby source +$(CLIB) : $(RLIB) $(MRBC) + $(MRBC) -Bmrblib_irep -o$(DLIB) $(RLIB); cat init_$(TARGET).c $(DLIB) > $@ + +$(MRBC) : ../src/opcode.h ../src/codegen.c ../src/parse.y + $(MAKE) -C ../tools/mrbc $(MAKE_FLAGS) + +# merge mruby sources +$(RLIB) : $(MRBS) + cat $? > $@ + +# clean up +.PHONY : clean +clean : + -rm -f $(MRBC) $(MLIB) $(CLIB) $(RLIB) $(DLIB) + @echo "make: removing targets, objects and depend files of `pwd`" + diff --git a/mrblib/array.rb b/mrblib/array.rb new file mode 100644 index 0000000000..a708323992 --- /dev/null +++ b/mrblib/array.rb @@ -0,0 +1,79 @@ +# +# Array +# +class Array + # 15.2.12.5.10 + def each(&block) + idx = 0 + while(idx < length) + block.call(self[idx]) + idx += 1 + end + self + end + + # 15.2.12.5.11 + def each_index(&block) + idx = 0 + while(idx < length) + block.call(idx) + idx += 1 + end + self + end + + # 15.2.12.5.7 + def collect!(&block) + self.each_index{|idx| + self[idx] = block.call(self[idx]) + } + self + end + + # 15.2.12.5.20 + # map!(&block) + alias map! collect! + + # 15.2.12.5.15 + def initialize(size=0, obj=nil, &block) + raise TypeError, "expected Integer for 1st argument" unless size.kind_of? Integer + raise ArgumentError, "negative array size" if size < 0 + + self.clear + if size > 0 + self[size - 1] = nil # allocate + + idx = 0 + while(idx < size) + self[idx] = (block)? block.call(idx): obj + idx += 1 + end + end + + self + end + + def delete(key, &block) + while i = self.index(key) + self.delete_at(i) + ret = key + end + if ret == nil && block + block.call + else + ret + end + end +end + +# include modules +module Enumerable; end +module Comparable; end +class Array + include Enumerable + include Comparable + + def sort!(&block) + self.replace(self.sort(&block)) + end +end diff --git a/mrblib/compar.rb b/mrblib/compar.rb new file mode 100644 index 0000000000..974ad5036c --- /dev/null +++ b/mrblib/compar.rb @@ -0,0 +1,63 @@ +### move to compar.c +# module Comparable + # def == other + # cmp = self <=> other + # if cmp == 0 + # true + # else + # false + # end + # end + + # def < other + # cmp = self <=> other + # if cmp.nil? + # false + # elsif cmp < 0 + # true + # else + # false + # end + # end + + # def <= other + # cmp = self <=> other + # if cmp.nil? + # false + # elsif cmp <= 0 + # true + # else + # false + # end + # end + + # def > other + # cmp = self <=> other + # if cmp.nil? + # false + # elsif cmp > 0 + # true + # else + # false + # end + # end + + # def >= other + # cmp = self <=> other + # if cmp.nil? + # false + # elsif cmp >= 0 + # true + # else + # false + # end + # end + + # def between?(min,max) + # if self < min or self > max + # false + # else + # true + # end + # end +# end diff --git a/mrblib/enum.rb b/mrblib/enum.rb new file mode 100644 index 0000000000..b5a387f432 --- /dev/null +++ b/mrblib/enum.rb @@ -0,0 +1,266 @@ +# +# Enumerable +# +module Enumerable + # 15.3.2.2.1 + def all?(&block) + st = true + if block + self.each{|val| + unless block.call(val) + st = false + break + end + } + else + self.each{|val| + unless val + st = false + break + end + } + end + st + end + + # 15.3.2.2.2 + def any?(&block) + st = false + if block + self.each{|val| + if block.call(val) + st = true + break + end + } + else + self.each{|val| + if val + st = true + break + end + } + end + st + end + + # 15.3.2.2.3 + def collect(&block) + ary = [] + self.each{|val| + ary.push(block.call(val)) + } + ary + end + + # 15.3.2.2.4 + def detect(ifnone=nil, &block) + ret = ifnone + self.each{|val| + if block.call(val) + ret = val + break + end + } + ret + end + + # 15.3.2.2.5 + def each_with_index(&block) + i = 0 + self.each{|val| + block.call(val, i) + i += 1 + } + self + end + + # 15.3.2.2.6 + def entries + ary = [] + self.each{|val| + ary.push val + } + ary + end + + # 15.3.2.2.7 + # find(ifnone=nil, &block) + alias find detect + + # 15.3.2.2.8 + def find_all(&block) + ary = [] + self.each{|val| + ary.push(val) if block.call(val) + } + ary + end + + # 15.3.2.2.9 + def grep(pattern, &block) + ary = [] + self.each{|val| + if pattern === val + ary.push((block)? block.call(val): val) + end + } + ary + end + + # 15.3.2.2.10 + def include?(obj) + st = false + self.each{|val| + if val == obj + st = true + break + end + } + st + end + + # 15.3.2.2.11 + def inject(*args, &block) + raise ArgumentError, "too many arguments" if args.size > 2 + flag = true # 1st element? + result = nil + self.each{|val| + if flag + # 1st element + result = (args.empty?)? val: block.call(args[0], val) + flag = false + else + result = block.call(result, val) + end + } + result + end + + # 15.3.2.2.12 + # map(&block) + alias map collect + + # 15.3.2.2.13 + def max(&block) + flag = true # 1st element? + result = nil + self.each{|val| + if flag + # 1st element + result = val + flag = false + else + if block + result = val if block.call(val, result) > 0 + else + result = val if (val <=> result) > 0 + end + end + } + result + end + + # 15.3.2.2.14 + def min(&block) + flag = true # 1st element? + result = nil + self.each{|val| + if flag + # 1st element + result = val + flag = false + else + if block + result = val if block.call(val, result) < 0 + else + result = val if (val <=> result) < 0 + end + end + } + result + end + + # 15.3.2.2.15 + # member?(obj) + alias member? include? + + # 15.3.2.2.16 + def partition(&block) + ary_T = [] + ary_F = [] + self.each{|val| + if block.call(val) + ary_T.push(val) + else + ary_F.push(val) + end + } + [ary_T, ary_F] + end + + # 15.3.2.2.17 + def reject(&block) + ary = [] + self.each{|val| + ary.push(val) unless block.call(val) + } + ary + end + + # 15.3.2.2.18 + # select(&block) + alias select find_all + + + # Does this OK? Please test it. + def __sort_sub__(sorted, work, src_ary, head, tail, &block) + if head == tail + sorted[head] = work[head] if src_ary == 1 + return + end + + # on current step, which is a src ary? + if src_ary == 0 + src, dst = sorted, work + else + src, dst = work, sorted + end + + key = src[head] # key value for dividing values + i, j = head, tail # position to store on the dst ary + + (head + 1).upto(tail){|idx| + if ((block)? block.call(src[idx], key): (src[idx] <=> key)) > 0 + # larger than key + dst[j] = src[idx] + j -= 1 + else + dst[i] = src[idx] + i += 1 + end + } + + sorted[i] = key + + # sort each sub-array + src_ary = (src_ary + 1) % 2 # exchange a src ary + __sort_sub__(sorted, work, src_ary, head, i - 1, &block) if i > head + __sort_sub__(sorted, work, src_ary, i + 1, tail, &block) if i < tail + end +# private :__sort_sub__ + + # 15.3.2.2.19 + def sort(&block) + ary = [] + self.each{|val| ary.push(val)} + unless ary.empty? + __sort_sub__(ary, ::Array.new(ary.size), 0, 0, ary.size - 1, &block) + end + ary + end + + # 15.3.2.2.20 + # to_a + alias to_a entries +end diff --git a/mrblib/error.rb b/mrblib/error.rb new file mode 100644 index 0000000000..88da1825c8 --- /dev/null +++ b/mrblib/error.rb @@ -0,0 +1,9 @@ +# +# Exception +# +class Exception + # 15.2.22.4.1 + def self.exception(*args, &block) + self.new(*args, &block) + end +end diff --git a/mrblib/hash.rb b/mrblib/hash.rb new file mode 100644 index 0000000000..7157684f8a --- /dev/null +++ b/mrblib/hash.rb @@ -0,0 +1,58 @@ +# +# Hash +# +class Hash + # 15.2.13.4.8 + def delete(key, &block) + if block && ! self.has_key?(key) + block.call(key) + else + self.__delete(key) + end + end + + # 15.2.13.4.9 + def each(&block) + self.keys.each{|k| block.call([k, self[k]])} + self + end + + # 15.2.13.4.10 + def each_key(&block) + self.keys.each{|k| block.call(k)} + self + end + + # 15.2.13.4.11 + def each_value(&block) + self.keys.each{|k| block.call(self[k])} + self + end + + # 15.2.13.4.16 + def initialize(*args, &block) + self.__init_core(block, *args) + end + + # 15.2.13.4.22 + def merge(other, &block) + h = {} + raise "can't convert argument into Hash" unless other.respond_to?(:to_hash) + other = other.to_hash + self.each_key{|k| h[k] = self[k]} + if block + other.each_key{|k| + h[k] = (self.has_key?(k))? block.call(k, self[k], other[k]): other[k] + } + else + other.each_key{|k| h[k] = other[k]} + end + h + end +end + +# include modules +module Enumerable; end +class Hash + include Enumerable +end diff --git a/mrblib/init_mrblib.c b/mrblib/init_mrblib.c new file mode 100644 index 0000000000..c44d28f940 --- /dev/null +++ b/mrblib/init_mrblib.c @@ -0,0 +1,17 @@ +#include "mruby.h" +#include "irep.h" +#include "dump.h" +#include "mruby/string.h" +#include "mruby/proc.h" + +extern const char mrblib_irep[]; + +void +mrb_init_mrblib(mrb_state *mrb) +{ + int n = mrb_read_irep(mrb, mrblib_irep); + + extern mrb_value mrb_top_self(mrb_state *mrb); + mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_top_self(mrb)); +} + diff --git a/mrblib/kernel.rb b/mrblib/kernel.rb new file mode 100644 index 0000000000..c09755d6c5 --- /dev/null +++ b/mrblib/kernel.rb @@ -0,0 +1,45 @@ +# +# Kernel +# +module Kernel + # 15.3.1.2.6 + def self.lambda(&block) + ### *** TODO *** ### + block # dummy + end + + # 15.3.1.2.8 + def self.loop #(&block) + while(true) + yield + end + end + + # 15.3.1.3.4 + def __send__(symbol, *args, &block) + ### *** TODO *** ### + end + + # 15.3.1.3.18 + def instance_eval(string=nil, &block) + ### *** TODO *** ### + end + + # 15.3.1.3.27 + def lambda(&block) + ### *** TODO *** ### + block # dummy + end + + # 15.3.1.3.29 + def loop #(&block) + while(true) + yield + end + end + + # 15.3.1.3.44 + def send(symbol, *args, &block) + ### *** TODO *** ### + end +end diff --git a/mrblib/numeric.rb b/mrblib/numeric.rb new file mode 100644 index 0000000000..ee5bdcb56c --- /dev/null +++ b/mrblib/numeric.rb @@ -0,0 +1,42 @@ +# +# Integer +# +class Integer + # 15.2.8.3.15 + def downto(num, &block) + raise TypeError, "expected Integer" unless num.kind_of? Integer + i = self + while(i >= num) + block.call(i) + i -= 1 + end + self + end + + # 15.2.8.3.22 + def times(&block) + i = 0 + while(i < self) + block.call(i) + i += 1 + end + self + end + + # 15.2.8.3.27 + def upto(num, &block) + raise TypeError, "expected Integer" unless num.kind_of? Integer + i = self + while(i <= num) + block.call(i) + i += 1 + end + self + end +end + +# include modules +module Comparable; end +class Numeric + include Comparable +end diff --git a/mrblib/print.rb b/mrblib/print.rb new file mode 100644 index 0000000000..cb1fad75de --- /dev/null +++ b/mrblib/print.rb @@ -0,0 +1,20 @@ +module Kernel + def print(*args) + i = 0 + len = args.size + while i < len + __printstr__ args[i].to_s + i += 1 + end + end + def puts(*args) + i = 0 + len = args.size + while i < len + __printstr__ args[i].to_s + __printstr__ "\n" + i += 1 + end + __printstr__ "\n" if len == 0 + end +end diff --git a/mrblib/range.rb b/mrblib/range.rb new file mode 100644 index 0000000000..79bc40ecd7 --- /dev/null +++ b/mrblib/range.rb @@ -0,0 +1,30 @@ +# +# Range +# +class Range + # 15.2.14.4.4 + def each(&block) + val = self.first + unless val.respond_to? :succ + raise TypeError, "can't iterate" + end + + last = self.last + return self if (val <=> last) > 0 + + while((val <=> last) < 0) + block.call(val) + val = val.succ + end + + block.call(val) unless exclude_end? + + self + end +end + +# include modules +module Enumerable; end +class Range + include Enumerable +end diff --git a/mrblib/string.rb b/mrblib/string.rb new file mode 100644 index 0000000000..78f2bea9d3 --- /dev/null +++ b/mrblib/string.rb @@ -0,0 +1,93 @@ +# +# String +# +class String + # 15.2.10.5.15 + def each_line(&block) + # expect that str.index accepts an Integer for 1st argument as a byte data + offset = 0 + while(pos = self.index(0x0a, offset)) + block.call(self[offset, pos + 1 - offset]) + offset = pos + 1 + end + block.call(self[offset, self.size - offset]) if self.size > offset + self + end + + # 15.2.10.5.18 + def gsub(*args, &block) + unless (args.size == 1 && block) || args.size == 2 + raise ArgumentError, "wrong number of arguments" + end + + ### *** TODO *** ### + end + + # 15.2.10.5.19 + def gsub!(*args, &block) + str = self.gsub(*args, &block) + if str != self + self.replace(str) + self + else + nil + end + end + + # 15.2.10.5.32 + def scan(reg, &block) + ### *** TODO *** ### + end + + # 15.2.10.5.36 + def sub(*args, &block) + unless (args.size == 1 && block) || args.size == 2 + raise ArgumentError, "wrong number of arguments" + end + + ### *** TODO *** ### + end + + # 15.2.10.5.37 + def sub!(*args, &block) + str = self.sub(*args, &block) + if str != self + self.replace(str) + self + else + nil + end + end + + def each_char(&block) + pos = 0 + while(pos < self.size) + block.call(self[pos]) + pos += 1 + end + self + end + + def each_byte(&block) + bytes = self.unpack("C*") + pos = 0 + while(pos < bytes.size) + block.call(bytes[pos]) + pos += 1 + end + self + end + + def []=(pos, value) + b = self[0, pos] + a = self[pos+1..-1] + p [b, value, a].join('') + self.replace([b, value, a].join('')) + end +end + +# include modules +module Comparable; end +class String + include Comparable +end diff --git a/mrblib/struct.rb b/mrblib/struct.rb new file mode 100644 index 0000000000..b11f59f2aa --- /dev/null +++ b/mrblib/struct.rb @@ -0,0 +1,30 @@ +# +# Struct +# +class Struct + # 15.2.18.4.4 + def each(&block) + self.class.members.each{|field| + block.call(self[field]) + } + self + end + + # 15.2.18.4.5 + def each_pair(&block) + self.class.members.each{|field| + block.call(field.to_sym, self[field]) + } + self + end + + # 15.2.18.4.7 + def select(&block) + ary = [] + self.class.members.each{|field| + val = self[field] + ary.push(val) if block.call(val) + } + ary + end +end diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000000..41a2c83a0b --- /dev/null +++ b/src/Makefile @@ -0,0 +1,89 @@ +# makefile discription. +# basic build file for RiteVM library +# 11.Apr.2011 coded by Kenji Yoshimoto. +# 31.Aug.2011 coded by Hiroshi Mimaki. + +# project-specific macros +# extension of the executable-file is modifiable(.exe .out ...) +BASEDIR = . +TARGET := ../lib/ritevm +ifeq ($(OS),Windows_NT) +LIB := $(TARGET).lib +else +LIB := $(TARGET).a +endif +YSRC := $(BASEDIR)/parse.y +YC := $(BASEDIR)/y.tab.c +EXCEPT1 := $(YC) $(BASEDIR)/minimain.c $(BASEDIR)/compile.c $(BASEDIR)/dump.c $(BASEDIR)/cdump.c +OBJY := $(patsubst %.c,%.o,$(YC)) +OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard $(BASEDIR)/*.c))) +#OBJ2 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/regex/*.c)) +#OBJ3 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/enc/*.c)) +OBJS := $(OBJ1) $(OBJ2) $(OBJ3) +# mruby libraries +EXTC := $(BASEDIR)/../mrblib/mrblib.c +EXTRB := $(wildcard $(BASEDIR)/../mrblib/*.rb) +EXTM := $(patsubst %.c,%.o,$(EXTC)) +# extend libraries +#EXT1 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/socket/*.c)) +EXTS := $(EXT1) + +# libraries, includes +INCLUDES = -I$(BASEDIR) -I$(BASEDIR)/../include +#INCLUDES = -I$(RITEVM_ROOT) + +# compiler, linker (gcc) +CC = gcc +AR = ar +YACC = bison + +DEBUG_MODE = 1 +ifeq ($(DEBUG_MODE),1) +CFLAGS = -g +else +CFLAGS = -O3 +endif +ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS) +MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)" + +############################## +# generic build targets, rules + +.PHONY : all +all : $(EXTM) $(LIB) + @echo "make: built targets of `pwd`" + +# executable constructed using linker from object files +$(LIB) : $(OBJS) $(OBJY) $(EXTM) $(EXTS) + $(AR) r $@ $(OBJS) $(OBJY) $(EXTM) $(EXTS) + +-include $(OBJS:.o=.d) $(OBJY:.o=.d) + +# objects compiled from source +$(OBJS) : %.o : %.c + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@ + +# mruby library compile +$(EXTM) : $(EXTRB) $(OBJS) $(OBJY) + $(MAKE) -C ../mrblib $(MAKE_FLAGS) + +# extend libraries complile +$(EXTS) : %.o : %.c + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@ + +# parser complie +$(OBJY) : $(YC) + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(YC) -o $(OBJY) + +# yacc complie +$(YC) : $(YSRC) + $(YACC) -o $(YC) $(YSRC) + +# clean up +.PHONY : clean #cleandep +clean : + $(MAKE) clean -C ../mrblib $(MAKE_FLAGS) + -rm -f $(LIB) $(OBJS) $(OBJY) $(YC) + -rm -f $(OBJS:.o=.d) $(OBJY:.o=.d) + @echo "make: removing targets, objects and depend files of `pwd`" + diff --git a/src/array.c b/src/array.c new file mode 100644 index 0000000000..855a45aba5 --- /dev/null +++ b/src/array.c @@ -0,0 +1,1458 @@ +#include "mruby.h" +#include "mruby/array.h" +#include +#include "mruby/string.h" +#include "mdata.h" +#include "mruby/class.h" + +#ifdef INCLUDE_REGEXP + #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr +#else + #define mrb_usascii_str_new2 mrb_str_new_cstr + #define mrb_usascii_str_new mrb_str_new +#endif +mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int), + mrb_value obj, mrb_value paired_obj, void* arg); + +//#define ARY_DEFAULT_LEN 16 +#define ARY_DEFAULT_LEN 4 +#define ARY_SHRINK_RATIO 5 /* must be larger than 2 */ +#ifdef LONG_MAX +# define ARY_MAX_SIZE (LONG_MAX / sizeof(mrb_value)) +#endif + +static inline mrb_value +ary_elt(mrb_value ary, long offset) +{ + if (RARRAY_LEN(ary) == 0) return mrb_nil_value(); + if (offset < 0 || RARRAY_LEN(ary) <= offset) { + return mrb_nil_value(); + } + return RARRAY_PTR(ary)[offset]; +} + +mrb_value +mrb_ary_new_capa(mrb_state *mrb, size_t capa) +{ + struct RArray *a; + + if (capa < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative ary size (or size too big)"); + } +#ifdef LONG_MAX + if (capa > ARY_MAX_SIZE) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "ary size too big"); + } +#endif + if (capa < ARY_DEFAULT_LEN) { + capa = ARY_DEFAULT_LEN; + } + + a = mrb_obj_alloc(mrb, MRB_TT_ARRAY, mrb->array_class); + a->buf = mrb_malloc(mrb, sizeof(mrb_value) * capa); + memset(a->buf, 0, sizeof(mrb_value) * capa); + a->capa = capa; + a->len = 0; + + return mrb_obj_value(a); +} + +mrb_value +mrb_ary_new(mrb_state *mrb) +{ + return mrb_ary_new_capa(mrb, 0); +} + +mrb_value +mrb_ary_new_from_values(mrb_state *mrb, mrb_value *vals, size_t size) +{ + mrb_value ary; + struct RArray *a; + + ary = mrb_ary_new_capa(mrb, size); + a = mrb_ary_ptr(ary); + memcpy(a->buf, vals, sizeof(mrb_value)*size); + a->len = size; + + return ary; +} + +mrb_value +mrb_assoc_new(mrb_state *mrb, mrb_value car, mrb_value cdr) +{ + mrb_value arv[2]; + arv[0] = car; + arv[1] = cdr; + return mrb_ary_new_from_values(mrb, arv, 2); +} + +void +ary_fill_with_nil(mrb_value *buf, size_t size) +{ + mrb_value nil = mrb_nil_value(); + + while((int)(size--)) { + *buf++ = nil; + } +} + +void +mrb_ary_expand_capa(mrb_state *mrb, struct RArray *a, size_t len) +{ + size_t capa = a->capa; + +#ifdef LONG_MAX + if (len > ARY_MAX_SIZE) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "array size too big"); + } +#endif + + while(capa < len) { + if (capa == 0) { + capa = ARY_DEFAULT_LEN; + } + else { + capa *= 2; + } + } + +#ifdef LONG_MAX + if (capa > ARY_MAX_SIZE) capa = ARY_MAX_SIZE; /* len <= capa <= ARY_MAX_SIZE */ +#endif + + if (capa > a->capa) { + a->capa = capa; + a->buf = mrb_realloc(mrb, a->buf, sizeof(mrb_value)*capa); + } +} + +void +mrb_ary_shrink_capa(mrb_state *mrb, struct RArray *a) +{ + size_t capa = a->capa; + + if (capa < ARY_DEFAULT_LEN * 2) return; + if (capa <= a->len * ARY_SHRINK_RATIO) return; + + do { + capa /= 2; + if (capa < ARY_DEFAULT_LEN) { + capa = ARY_DEFAULT_LEN; + break; + } + } while(capa > a->len * ARY_SHRINK_RATIO); + + if (capa > a->len && capa < a->capa) { + a->capa = capa; + a->buf = mrb_realloc(mrb, a->buf, sizeof(mrb_value)*capa); + } +} + +mrb_value +mrb_ary_s_create(mrb_state *mrb, mrb_value self) +{ + mrb_value *vals; + int len; + + mrb_get_args(mrb, "*", &vals, &len); + return mrb_ary_new_from_values(mrb, vals, (size_t)len); +} + +void +mrb_ary_concat(mrb_state *mrb, mrb_value self, mrb_value other) +{ + struct RArray *a1 = mrb_ary_ptr(self); + struct RArray *a2 = mrb_ary_ptr(other); + size_t len = a1->len + a2->len; + + if (a1->capa < len) mrb_ary_expand_capa(mrb, a1, len); + memcpy(a1->buf+a1->len, a2->buf, sizeof(mrb_value)*a2->len); + mrb_write_barrier(mrb, (struct RBasic*)a1); + a1->len = len; +} + +mrb_value +mrb_ary_concat_m(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + + mrb_get_args(mrb, "o", &other); + if (mrb_type(other) != MRB_TT_ARRAY) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "expected Array"); + } + mrb_ary_concat(mrb, self, other); + return self; +} + +mrb_value +mrb_ary_plus(mrb_state *mrb, mrb_value self) +{ + struct RArray *a1 = mrb_ary_ptr(self); + struct RArray *a2; + mrb_value other; + mrb_value ary; + + mrb_get_args(mrb, "o", &other); + if (mrb_type(other) != MRB_TT_ARRAY) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "expected Array"); + } + + ary = mrb_ary_new_capa(mrb, a1->len + RARRAY_LEN(other)); + a2 = mrb_ary_ptr(ary); + memcpy(a2->buf, a1->buf, sizeof(mrb_value)*a1->len); + memcpy(a2->buf + a1->len, RARRAY_PTR(other), sizeof(mrb_value)*RARRAY_LEN(other)); + a2->len = a1->len + RARRAY_LEN(other); + + return ary; +} + +static mrb_value +recursive_cmp(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur) +{ + long i, len; + + if (recur) return mrb_undef_value(); /* Subtle! */ + len = RARRAY_LEN(ary1); + if (len > RARRAY_LEN(ary2)) { + len = RARRAY_LEN(ary2); + } + + for (i=0; i", 1, ary_elt(ary2, i)); + if (mrb_type(r) != MRB_TT_FIXNUM || mrb_fixnum(r) != 0) return r; + } + + return mrb_undef_value(); +} + +/* + * call-seq: + * ary <=> other_ary -> -1, 0, +1 or nil + * + * Comparison---Returns an integer (-1, 0, or +1) + * if this array is less than, equal to, or greater than other_ary. + * Each object in each array is compared (using <=>). If any value isn't + * equal, then that inequality is the return value. If all the + * values found are equal, then the return is based on a + * comparison of the array lengths. Thus, two arrays are + * ``equal'' according to Array#<=> if and only if they have + * the same length and the value of each element is equal to the + * value of the corresponding element in the other array. + * + * [ "a", "a", "c" ] <=> [ "a", "b", "c" ] #=> -1 + * [ 1, 2, 3, 4, 5, 6 ] <=> [ 1, 2 ] #=> +1 + * + */ +mrb_value +mrb_ary_cmp(mrb_state *mrb, mrb_value ary1) +{ + mrb_value ary2; + struct RArray *a1, *a2; + mrb_value r; + long len; + + mrb_get_args(mrb, "o", &ary2); + if (mrb_type(ary2) != MRB_TT_ARRAY) return mrb_nil_value(); + a1 = RARRAY(ary1); a2 = RARRAY(ary2); + if (a1->len == a2->len && a1->buf == a2->buf) return mrb_fixnum_value(0); + r = mrb_exec_recursive_paired(mrb, recursive_cmp, ary1, ary2, &ary2); + if (mrb_type(r) != MRB_TT_UNDEF) return r; + len = a1->len - a2->len; + return mrb_fixnum_value((len == 0)? 0: (len > 0)? 1: -1); +} + +void +mrb_ary_replace(mrb_state *mrb, struct RArray *a, mrb_value *argv, size_t len) +{ + if (a->capa < len) mrb_ary_expand_capa(mrb, a, len); + memcpy(a->buf, argv, sizeof(mrb_value)*len); + mrb_write_barrier(mrb, (struct RBasic*)a); + a->len = len; +} + +mrb_value +mrb_ary_replace_m(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + + mrb_get_args(mrb, "o", &other); + mrb_ary_replace(mrb, mrb_ary_ptr(self), RARRAY_PTR(other), RARRAY_LEN(other)); + + return self; +} + +mrb_value +mrb_ary_times(mrb_state *mrb, mrb_value self) +{ + struct RArray *a1 = mrb_ary_ptr(self); + struct RArray *a2; + mrb_value ary; + mrb_value *buf; + mrb_int times; + //size_t len; + + mrb_get_args(mrb, "i", ×); + if (times < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); + } + if (times == 0) return mrb_ary_new(mrb); + + ary = mrb_ary_new_capa(mrb, a1->len * times); + a2 = mrb_ary_ptr(ary); + buf = a2->buf; + while(times--) { + memcpy(buf, a1->buf, sizeof(mrb_value)*(a1->len)); + buf += a1->len; + a2->len += a1->len; + } + + return ary; +} + +static void +ary_reverse(struct RArray *a) +{ + mrb_value *p1, *p2; + + p1 = a->buf; + p2 = a->buf + a->len - 1; + + while(p1 < p2) { + mrb_value tmp = *p1; + *p1++ = *p2; + *p2-- = tmp; + } +} + +mrb_value +mrb_ary_reverse_bang(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + + if (a->len > 1) { + ary_reverse(a); + } + return self; +} + +mrb_value +mrb_ary_reverse(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + mrb_value ary; + + ary = mrb_ary_new_capa(mrb, a->len); + if (a->len > 0) { + mrb_ary_replace(mrb, mrb_ary_ptr(ary), a->buf, a->len); + ary_reverse(mrb_ary_ptr(ary)); + } + return ary; +} + +mrb_value +mrb_ary_new4(mrb_state *mrb, long n, const mrb_value *elts) +{ + mrb_value ary; + + ary = mrb_ary_new_capa(mrb, n);//mrb_ary_new2(n); + if (n > 0 && elts) { + memcpy(RARRAY_PTR(ary), elts, sizeof(mrb_value)*n); + RARRAY_LEN(ary) = n; //ARY_SET_LEN(ary, n); + } + + return ary; +} + +mrb_value +mrb_ary_new_elts(mrb_state *mrb, long n, const mrb_value *elts) +{ + return mrb_ary_new4(mrb, n, elts); +} + +void +mrb_ary_push(mrb_state *mrb, mrb_value ary, mrb_value elem) /* mrb_ary_push */ +{ + struct RArray *a = mrb_ary_ptr(ary); + + if (a->len == a->capa) mrb_ary_expand_capa(mrb, a, a->len + 1); + a->buf[a->len++] = elem; + mrb_write_barrier(mrb, (struct RBasic*)a); +} + +mrb_value +mrb_ary_pop(mrb_state *mrb, mrb_value ary) +{ + struct RArray *a = mrb_ary_ptr(ary); + + if (a->len == 0) return mrb_nil_value(); + + return a->buf[--a->len]; +} + +mrb_value +mrb_ary_push_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int len; + + mrb_get_args(mrb, "*", &argv, &len); + while(len--) { + mrb_ary_push(mrb, self, *argv++); + } + + return self; +} + +mrb_value +mrb_ary_pop_m(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + + return ((a->len == 0)? mrb_nil_value(): mrb_ary_pop(mrb, self)); +} + +mrb_value +mrb_ary_shift(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + mrb_value *buf = a->buf; + size_t size = a->len; + mrb_value val; + + if (size == 0) return mrb_nil_value(); + + val = *buf; + while((int)(--size)) { + *buf = *(buf+1); + ++buf; + } + --a->len; + + return val; +} + +/* self = [1,2,3] + item = 0 + self.unshift item + p self #=> [0, 1, 2, 3] */ +mrb_value +mrb_ary_unshift(mrb_state *mrb, mrb_value self, mrb_value item) +{ + struct RArray *a = mrb_ary_ptr(self); + + if (a->capa < a->len + 1) mrb_ary_expand_capa(mrb, a, a->len + 1); + memmove(a->buf + 1, a->buf, sizeof(mrb_value)*a->len); + memcpy(a->buf, &item, sizeof(mrb_value)); + a->len += 1; + mrb_write_barrier(mrb, (struct RBasic*)a); + + return self; +} + +mrb_value +mrb_ary_unshift_m(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + mrb_value *vals; + int len; + + mrb_get_args(mrb, "*", &vals, &len); + if (len == 0) return self; + if (a->capa < a->len + len) mrb_ary_expand_capa(mrb, a, a->len + len); + memmove(a->buf + len, a->buf, sizeof(mrb_value)*a->len); + memcpy(a->buf, vals, sizeof(mrb_value)*len); + a->len += len; + mrb_write_barrier(mrb, (struct RBasic*)a); + + return self; +} + +mrb_value +mrb_ary_ref(mrb_state *mrb, mrb_value ary, mrb_int n) +{ + struct RArray *a = mrb_ary_ptr(ary); + + /* range check */ + if (n < 0) n += a->len; + if (n < 0 || a->len <= (size_t)n) return mrb_nil_value(); + + return a->buf[n]; +} + +void +mrb_ary_set(mrb_state *mrb, mrb_value ary, mrb_int n, mrb_value val) /* rb_ary_store */ +{ + struct RArray *a = mrb_ary_ptr(ary); + + /* range check */ + if (n < 0) n += a->len; + if (n < 0) { + mrb_raise(mrb, E_INDEX_ERROR, "index %ld out of array", n - a->len); + } + if (a->len <= (size_t)n) { + if (a->capa <= (size_t)n) mrb_ary_expand_capa(mrb, a, n + 1); + ary_fill_with_nil(a->buf + a->len, n + 1 - a->len); + a->len = n + 1; + } + + a->buf[n] = val; + mrb_write_barrier(mrb, (struct RBasic*)a); +} + +mrb_value +mrb_ary_splice(mrb_state *mrb, mrb_value ary, mrb_int head, mrb_int len, mrb_value rpl) +{ + struct RArray *a = mrb_ary_ptr(ary); + mrb_int tail; + size_t size; + mrb_value *argv; + int i, argc; + + /* range check */ + if (head < 0) head += a->len; + if (head < 0) { + mrb_raise(mrb, E_INDEX_ERROR, "index is out of array"); + } + tail = head + len; + + /* size check */ + if (mrb_type(rpl) == MRB_TT_ARRAY) { + argc = RARRAY_LEN(rpl); + argv = RARRAY_PTR(rpl); + } + else { + argc = 1; + argv = &rpl; + } + size = head + argc; + + if (tail < a->len) size += a->len - tail; + + if (size > a->capa) mrb_ary_expand_capa(mrb, a, size); + + if (head > a->len) { + ary_fill_with_nil(a->buf + a->len, (size_t)(head - a->len)); + } + else if (head < a->len) { + memmove(a->buf + head + argc, a->buf + tail, sizeof(mrb_value)*(a->len - tail)); + } + + for(i = 0; i < argc; i++) { + *(a->buf + head + i) = *(argv + i); + } + + a->len = size; + + return ary; +} + +int +mrb_ary_alen(mrb_state *mrb, mrb_value ary) +{ + return RARRAY_LEN(ary); +} + +mrb_value +mrb_ary_aget(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + mrb_int index, len; + mrb_value *argv; + int size; + + mrb_get_args(mrb, "i*", &index, &argv, &size); + switch(size) { + case 0: + return mrb_ary_ref(mrb, self, index); + + case 1: + if (mrb_type(argv[0]) != MRB_TT_FIXNUM) { + mrb_raise(mrb, E_TYPE_ERROR, "expected Fixnum"); + } + len = mrb_fixnum(argv[0]); + if (index < 0) index += a->len; + if (index < 0 || a->len < (size_t)index) return mrb_nil_value(); + if ((len = mrb_fixnum(argv[0])) < 0) return mrb_nil_value(); + if (a->len == (size_t)index) return mrb_ary_new(mrb); + if ((size_t)len > a->len - index) len = a->len - index; + return mrb_ary_new_from_values(mrb, a->buf + index, len); + + default: + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + } + + return mrb_nil_value(); /* dummy to avoid warning : not reach here */ +} + +mrb_value +mrb_ary_aset(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + switch(argc) { + case 2: + if (FIXNUM_P(argv[0])) { + mrb_ary_set(mrb, self, mrb_fixnum(argv[0]), argv[1]); + } + else { + /* Should we support Range object for 1st arg ? */ + mrb_raise(mrb, E_TYPE_ERROR, "expected Fixnum for 1st argument"); + } + break; + + case 3: + mrb_ary_splice(mrb, self, mrb_fixnum(argv[0]), mrb_fixnum(argv[1]), argv[2]); + break; + + default: + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + } + + return self; +} + +mrb_value +mrb_ary_delete_at(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + mrb_int index; + mrb_value val; + mrb_value *buf; + size_t len; + + mrb_get_args(mrb, "i", &index); + if (index < 0) index += a->len; + if (index < 0 || a->len <= (size_t)index) return mrb_nil_value(); + + val = a->buf[index]; + + buf = a->buf + index; + len = a->len - index; + while((int)(--len)) { + *buf = *(buf+1); + ++buf; + } + --a->len; + + mrb_ary_shrink_capa(mrb, a); + + return val; +} + +mrb_value +mrb_ary_first(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + //mrb_value ary; + size_t size; + mrb_value *vals; + int len; + + mrb_get_args(mrb, "*", &vals, &len); + if (len > 1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + } + + if (len == 0) return (a->len > 0)? a->buf[0]: mrb_nil_value(); + + /* len == 1 */ + size = mrb_fixnum(*vals); + if (size > a->len) size = a->len; + return mrb_ary_new_from_values(mrb, a->buf, size); +} + +mrb_value +mrb_ary_last(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + //mrb_value ary; + size_t size; + mrb_value *vals; + int len; + + mrb_get_args(mrb, "*", &vals, &len); + if (len > 1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + } + + if (len == 0) return (a->len > 0)? a->buf[a->len - 1]: mrb_nil_value(); + + /* len == 1 */ + size = mrb_fixnum(*vals); + if (size > a->len) size = a->len; + return mrb_ary_new_from_values(mrb, a->buf + a->len - size, size); +} + +mrb_value +mrb_ary_index_m(mrb_state *mrb, mrb_value self) +{ + mrb_value obj; + long i; + + mrb_get_args(mrb, "o", &obj); + for (i = 0; i < RARRAY_LEN(self); i++) { + if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) { + return mrb_fixnum_value(i); + } + } + return mrb_nil_value(); +} + +mrb_value +mrb_ary_rindex_m(mrb_state *mrb, mrb_value self) +{ + mrb_value obj; + long i; + + mrb_get_args(mrb, "o", &obj); + for (i = RARRAY_LEN(self) - 1; i >= 0; i--) { + if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) { + return mrb_fixnum_value(i); + } + } + return mrb_nil_value(); +} + +mrb_value +mrb_ary_splat(mrb_state *mrb, mrb_value v) +{ + return v; +} + +static mrb_value +mrb_ary_size(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + + return mrb_fixnum_value(a->len); +} + +#if 0 /* --> implement with ruby code */ +mrb_value +mrb_ary_each(mrb_state *mrb, mrb_value self) +{ + long i; + + for (i = 0; i < RARRAY_LEN(self); i++) { + mrb_yield(RARRAY_PTR(self)[i]); + } + + return self; +} +#endif + +#if 0 /* --> implement with ruby code */ +mrb_value +mrb_ary_each_index(mrb_state *mrb, mrb_value self) +{ + long i; + + for (i = 0; i < RARRAY_LEN(self); i++) { + mrb_yield(mrb_fixnum_value(i)); + } + + return self; +} +#endif + +#if 0 /* --> implement with ruby code */ +mrb_value +mrb_ary_collect_bang(mrb_state *mrb, mrb_value self) +{ + long i; + + for (i = 0; i < RARRAY_LEN(self); i++) { + RARRAY_PTR(self)[i] = mrb_yield(RARRAY_PTR(self)[i]); + } + + return self; +} +#endif + +mrb_value +mrb_ary_clear(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + + a->len = 0; + mrb_ary_shrink_capa(mrb, a); + + return self; +} + +mrb_value +mrb_ary_empty_p(mrb_state *mrb, mrb_value self) +{ + struct RArray *a = mrb_ary_ptr(self); + + return ((a->len == 0)? mrb_true_value(): mrb_false_value()); +} + +mrb_value +mrb_check_array_type(mrb_state *mrb, mrb_value ary) +{ + return mrb_check_convert_type(mrb, ary, MRB_TT_ARRAY, "Array", "to_ary"); +} + +mrb_value +mrb_ary_entry(mrb_value ary, long offset) +{ + if (offset < 0) { + offset += RARRAY_LEN(ary); + } + return ary_elt(ary, offset); +} + +void +mrb_mem_clear(mrb_value *mem, long size) +{ + while (size--) { + *mem++ = mrb_nil_value(); + } +} + +mrb_value +mrb_ary_tmp_new(mrb_state *mrb, long capa) +{ + return mrb_ary_new_capa(mrb, capa);//ary_new(0, capa); +} + +#if 0 +/* + * call-seq: + * ary.sort! -> ary + * ary.sort! {| a,b | block } -> ary + * + * Sorts +self+. Comparisons for + * the sort will be done using the <=> operator or using + * an optional code block. The block implements a comparison between + * a and b, returning -1, 0, or +1. See also + * Enumerable#sort_by. + * + * a = [ "d", "a", "e", "c", "b" ] + * a.sort #=> ["a", "b", "c", "d", "e"] + * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + */ + +mrb_value +mrb_ary_sort_bang(mrb_value ary) +{ +#if 0 + mrb_ary_modify(ary); + //assert(!ARY_SHARED_P(ary)); + if (RARRAY_LEN(ary) > 1) { + mrb_value tmp = ary_make_substitution(ary); /* only ary refers tmp */ + struct ary_sort_data data; + + RBASIC(tmp)->klass = 0; + data.ary = tmp; + data.opt_methods = 0; + data.opt_inited = 0; + ruby_qsort(RARRAY_PTR(tmp), RARRAY_LEN(tmp), sizeof(VALUE), + mrb_block_given_p()?sort_1:sort_2, &data); + + if (ARY_EMBED_P(tmp)) { + assert(ARY_EMBED_P(tmp)); + if (ARY_SHARED_P(ary)) { /* ary might be destructively operated in the given block */ + mrb_ary_unshare(ary); + } + FL_SET_EMBED(ary); + MEMCPY(RARRAY_PTR(ary), ARY_EMBED_PTR(tmp), VALUE, ARY_EMBED_LEN(tmp)); + ARY_SET_LEN(ary, ARY_EMBED_LEN(tmp)); + } + else { + assert(!ARY_EMBED_P(tmp)); + if (ARY_HEAP_PTR(ary) == ARY_HEAP_PTR(tmp)) { + assert(!ARY_EMBED_P(ary)); + FL_UNSET_SHARED(ary); + ARY_SET_CAPA(ary, ARY_CAPA(tmp)); + } + else { + assert(!ARY_SHARED_P(tmp)); + if (ARY_EMBED_P(ary)) { + FL_UNSET_EMBED(ary); + } + else if (ARY_SHARED_P(ary)) { + /* ary might be destructively operated in the given block */ + mrb_ary_unshare(ary); + } + else { + xfree(ARY_HEAP_PTR(ary)); + } + ARY_SET_PTR(ary, RARRAY_PTR(tmp)); + ARY_SET_HEAP_LEN(ary, RARRAY_LEN(tmp)); + ARY_SET_CAPA(ary, ARY_CAPA(tmp)); + } + /* tmp was lost ownership for the ptr */ + FL_UNSET(tmp, FL_FREEZE); + FL_SET_EMBED(tmp); + ARY_SET_EMBED_LEN(tmp, 0); + FL_SET(tmp, FL_FREEZE); + } + /* tmp will be GC'ed. */ + RBASIC(tmp)->c = mrb->array_class; + } +#endif + return ary; +} +#endif + +mrb_value +mrb_ary_dup(mrb_state *mrb, mrb_value self) +{ + struct RArray *a1 = mrb_ary_ptr(self); + struct RArray *a2; + mrb_value ary; + mrb_value *buf; + mrb_int times; + //size_t len; + + ary = mrb_ary_new_capa(mrb, a1->len); + a2 = mrb_ary_ptr(ary); + buf = a2->buf; + while(times--) { + memcpy(buf, a1->buf, sizeof(mrb_value)*a1->len); + buf += a1->len; + } + a2->len = a1->len; + + return ary; +} + +#if 0 +/* + * call-seq: + * ary.sort -> new_ary + * ary.sort {| a,b | block } -> new_ary + * + * Returns a new array created by sorting +self+. Comparisons for + * the sort will be done using the <=> operator or using + * an optional code block. The block implements a comparison between + * a and b, returning -1, 0, or +1. See also + * Enumerable#sort_by. + * + * a = [ "d", "a", "e", "c", "b" ] + * a.sort #=> ["a", "b", "c", "d", "e"] + * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + */ + +mrb_value +mrb_ary_sort(mrb_state *mrb, mrb_value ary) +{ + ary = mrb_ary_dup(mrb, ary); + mrb_ary_sort_bang(ary); + return ary; +} +#endif + +static mrb_value +inspect_ary(mrb_state *mrb, mrb_value ary, mrb_value list) +{ + long i; + mrb_value s, arystr; + char *head = "["; + char *sep = ", "; + char *tail = "]"; + + /* check recursive */ + for(i=0; i 0) { + mrb_str_buf_cat(mrb, arystr, sep, strlen(sep)); + } + if (mrb_type(RARRAY_PTR(ary)[i]) == MRB_TT_ARRAY) { + s = inspect_ary(mrb, RARRAY_PTR(ary)[i], list); + } else { + s = mrb_inspect(mrb, RARRAY_PTR(ary)[i]); + } + //mrb_str_buf_append(mrb, arystr, s); + mrb_str_buf_cat(mrb, arystr, RSTRING_PTR(s), RSTRING_LEN(s)); + mrb_gc_arena_restore(mrb, ai); + } + + mrb_str_buf_cat(mrb, arystr, tail, strlen(tail)); + mrb_ary_pop(mrb, list); + + return arystr; +} + +#if 0 +static mrb_value +inspect_ary_r(mrb_state *mrb, mrb_value ary, mrb_value dummy, int recur) +{ + //int tainted = OBJ_TAINTED(ary); + //int untrust = OBJ_UNTRUSTED(ary); + long i; + mrb_value s, arystr; + //if (recur) return mrb_tainted_str_new2("[...]"); + arystr = mrb_str_buf_new(mrb, 128); + mrb_str_buf_cat(mrb, arystr, "[", strlen("[")); /* for capa */ + //arystr = mrb_str_new_cstr(mrb, "[");//mrb_str_buf_new2("["); + for (i=0; i 0) mrb_str_buf_cat(mrb, arystr, ", ", strlen(", "));//mrb_str_buf_cat2(str, ", "); + mrb_str_buf_append(mrb, arystr, s); + } + mrb_str_buf_cat(mrb, arystr, "]", strlen("]"));// mrb_str_buf_cat2(str, "]"); + //if (tainted) OBJ_TAINT(str); + //if (untrust) OBJ_UNTRUST(str); + return arystr; +} +#endif + +/* 15.2.12.5.31 (x) */ +/* + * call-seq: + * ary.to_s -> string + * ary.inspect -> string + * + * Creates a string representation of +self+. + */ + +static mrb_value +mrb_ary_inspect(mrb_state *mrb, mrb_value ary) +{ + if (RARRAY_LEN(ary) == 0) return mrb_str_new2(mrb, "[]"); + #if 0 /* THREAD */ + return mrb_exec_recursive(inspect_ary_r, ary, 0); + #else + return inspect_ary(mrb, ary, mrb_ary_new(mrb)); + #endif +} + +static mrb_value +join_ary(mrb_state *mrb, mrb_value ary, mrb_value sep, mrb_value list) +{ + long i; + mrb_value result, val, tmp; + + /* check recursive */ + for(i=0; i 0 && !mrb_nil_p(sep)) { + //mrb_str_buf_append(mrb, result, sep); // segv (encoding error?) + mrb_str_buf_cat(mrb, result, RSTRING_PTR(sep), RSTRING_LEN(sep)); + } + + val = RARRAY_PTR(ary)[i]; + switch(mrb_type(val)) { + case MRB_TT_ARRAY: + ary_join: + val = join_ary(mrb, val, sep, list); + /* fall through */ + + case MRB_TT_STRING: + str_join: + //mrb_str_buf_append(mrb, result, val); + mrb_str_buf_cat(mrb, result, RSTRING_PTR(val), RSTRING_LEN(val)); + break; + + default: + tmp = mrb_check_string_type(mrb, val); + if (!mrb_nil_p(tmp)) { + val = tmp; + goto str_join; + } + tmp = mrb_check_convert_type(mrb, val, MRB_TT_ARRAY, "Array", "to_ary"); + if (!mrb_nil_p(tmp)) { + val = tmp; + goto ary_join; + } + val = mrb_obj_as_string(mrb, val); + goto str_join; + } + } + + mrb_ary_pop(mrb, list); + + return result; +} + +mrb_value +mrb_ary_join(mrb_state *mrb, mrb_value ary, mrb_value sep) +{ + sep = mrb_obj_as_string(mrb, sep); + return join_ary(mrb, ary, sep, mrb_ary_new(mrb)); +} + +#if 0 +static void ary_join_1(mrb_state *mrb, mrb_value obj, mrb_value ary, mrb_value sep, long i, mrb_value result, mrb_value first); + +static mrb_value +recursive_join(mrb_state *mrb, mrb_value obj, mrb_value args, int recur) +{ + mrb_value ary = mrb_ary_ref(mrb, args, 0); + mrb_value sep = mrb_ary_ref(mrb, args, 1); + mrb_value result = mrb_ary_ref(mrb, args, 2); + mrb_value first = mrb_ary_ref(mrb, args, 3); + + if (recur) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "recursive array join"); + } + else { + ary_join_1(mrb, obj, ary, sep, 0, result, first); + } + return mrb_nil_value(); +} + +static void +ary_join_0(mrb_state *mrb, mrb_value ary, mrb_value sep, long max, mrb_value result) +{ + long i; + mrb_value val; + + for (i=0; i 0 && !mrb_nil_p(sep)) + mrb_str_buf_append(mrb, result, sep); + mrb_str_buf_append(mrb, result, val); + //if (OBJ_TAINTED(val)) OBJ_TAINT(result); + //if (OBJ_UNTRUSTED(val)) OBJ_TAINT(result); + } +} + +static void +ary_join_1(mrb_state *mrb, mrb_value obj, mrb_value ary, mrb_value sep, long i, mrb_value result, mrb_value first) +{ + mrb_value val, tmp; + + for (; i 0 && !mrb_nil_p(sep)) { + mrb_str_buf_append(mrb, result, sep); + } + + val = RARRAY_PTR(ary)[i]; + switch (mrb_type(val)) { + case MRB_TT_STRING: + str_join: + mrb_str_buf_append(mrb, result, val); + break; + case MRB_TT_ARRAY: + obj = val; + ary_join: + if (mrb_obj_equal(mrb, val, ary)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "recursive array join"); + } + else { + //struct recursive_join_arg args; + mrb_value args = mrb_ary_new(mrb); + + mrb_ary_set(mrb, args, 0, val); + mrb_ary_set(mrb, args, 1, sep); + mrb_ary_set(mrb, args, 2, result); + mrb_ary_set(mrb, args, 3, first); + + mrb_exec_recursive(mrb, recursive_join, obj, &args); + } + break; + default: + tmp = mrb_check_string_type(mrb, val); + if (!mrb_nil_p(tmp)) { + val = tmp; + goto str_join; + } + tmp = mrb_check_convert_type(mrb, val, MRB_TT_ARRAY, "Array", "to_ary"); + if (!mrb_nil_p(tmp)) { + obj = val; + val = tmp; + goto ary_join; + } + val = mrb_obj_as_string(mrb, val); + if (mrb_test(first)) { +#ifdef INCLUDE_REGEXP /* include "encoding.h" */ + mrb_enc_copy(mrb, result, val); +#endif + first = mrb_false_value(); + } + goto str_join; + } + } +} + +mrb_value +mrb_ary_join(mrb_state *mrb, mrb_value ary, mrb_value sep) +{ + long len = 1, i; + //int taint = FALSE; + //int untrust = FALSE; + mrb_value val, tmp, result; + + if (RARRAY_LEN(ary) == 0) return mrb_str_new2(mrb, ""); + //if (OBJ_TAINTED(ary) || OBJ_TAINTED(sep)) taint = TRUE; + //if (OBJ_UNTRUSTED(ary) || OBJ_UNTRUSTED(sep)) untrust = TRUE; + + if (!mrb_nil_p(sep)) { + //StringValue(sep); + mrb_string_value(mrb, &sep); + len += RSTRING_LEN(sep) * (RARRAY_LEN(ary) - 1); + } + + for (i=0; ibuf, ++(RSTRING(result)->capa)); + ary_join_0(mrb, ary, sep, i, result); +mrb_realloc(mrb, RSTRING(result)->buf, ++(RSTRING(result)->capa)); + ary_join_1(mrb, ary, ary, sep, i, result, first); +mrb_realloc(mrb, RSTRING(result)->buf, ++(RSTRING(result)->capa)); + return result; + } + + len += RSTRING_LEN(tmp); + } + + result = mrb_str_buf_new(mrb, len); + //if (taint) OBJ_TAINT(result); + //if (untrust) OBJ_UNTRUST(result); + ary_join_0(mrb, ary, sep, RARRAY_LEN(ary), result); + + return result; +} +#endif + +/* + * call-seq: + * ary.join(sep=nil) -> str + * + * Returns a string created by converting each element of the array to + * a string, separated by sep. + * + * [ "a", "b", "c" ].join #=> "abc" + * [ "a", "b", "c" ].join("-") #=> "a-b-c" + */ + +static mrb_value +mrb_ary_join_m(mrb_state *mrb, mrb_value ary) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + switch(argc) { + case 0: + return mrb_ary_join(mrb, ary, mrb_nil_value()); + + case 1: + return mrb_ary_join(mrb, ary, argv[0]); + + default: + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + } + + return mrb_nil_value(); /* dummy */ +} + +static mrb_value +recursive_equal(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur) +{ + long i; + + if (recur) return mrb_true_value(); /* Subtle! */ + for (i=0; i bool + * + * Equality---Two arrays are equal if they contain the same number + * of elements and if each element is equal to (according to + * Object.==) the corresponding element in the other array. + * + * [ "a", "c" ] == [ "a", "c", 7 ] #=> false + * [ "a", "c", 7 ] == [ "a", "c", 7 ] #=> true + * [ "a", "c", 7 ] == [ "a", "d", "f" ] #=> false + * + */ + +static mrb_value +mrb_ary_equal(mrb_state *mrb, mrb_value ary1) +{ + mrb_value ary2; + + mrb_get_args(mrb, "o", &ary2); + if (mrb_obj_equal(mrb, ary1,ary2)) return mrb_true_value(); + if (mrb_type(ary2) != MRB_TT_ARRAY) { + if (!mrb_respond_to(mrb, ary2, mrb_intern(mrb, "to_ary"))) { + return mrb_false_value(); + } + if (mrb_equal(mrb, ary2, ary1)){ + return mrb_true_value(); + } + else { + return mrb_false_value(); + } + } + if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return mrb_false_value(); + return mrb_exec_recursive_paired(mrb, recursive_equal, ary1, ary2, &ary2); +} + +static mrb_value +recursive_eql(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur) +{ + long i; + + if (recur) return mrb_true_value(); /* Subtle! */ + for (i=0; i true or false + * + * Returns true if +self+ and _other_ are the same object, + * or are both arrays with the same content. + */ + +static mrb_value +mrb_ary_eql(mrb_state *mrb, mrb_value ary1) +{ + mrb_value ary2; + + mrb_get_args(mrb, "o", &ary2); + if (mrb_obj_equal(mrb, ary1,ary2)) return mrb_true_value(); + if (mrb_type(ary2) != MRB_TT_ARRAY) return mrb_false_value(); + if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return mrb_false_value(); + return mrb_exec_recursive_paired(mrb, recursive_eql, ary1, ary2, &ary2); +} + +#if 0 +static mrb_value +recursive_hash(mrb_value ary, mrb_value dummy, int recur) +{ + long i; + st_index_t h; + mrb_value n; + + h = mrb_hash_start(RARRAY_LEN(ary)); + if (recur) { + h = mrb_hash_uint(h, NUM2LONG(mrb_hash(mrb_cArray))); + } + else { + for (i=0; i fixnum + * + * Compute a hash-code for this array. Two arrays with the same content + * will have the same hash code (and will compare using eql?). + */ + +static mrb_value +mrb_ary_hash(mrb_state *mrb, mrb_value ary) +{ + return mrb_exec_recursive_outer(mrb, recursive_hash, ary, mrb_fixnum_value(0)); +} +#endif + +void +mrb_init_array(mrb_state *mrb) +{ + struct RClass *a; + + a = mrb->array_class = mrb_define_class(mrb, "Array", mrb->object_class); + MRB_SET_INSTANCE_TT(a, MRB_TT_ARRAY); + mrb_include_module(mrb, a, mrb_class_get(mrb, "Enumerable")); + + mrb_define_class_method(mrb, a, "[]", mrb_ary_s_create, ARGS_ANY()); /* 15.2.12.4.1 */ + + mrb_define_method(mrb, a, "*", mrb_ary_times, ARGS_REQ(1)); /* 15.2.12.5.1 */ + mrb_define_method(mrb, a, "+", mrb_ary_plus, ARGS_REQ(1)); /* 15.2.12.5.2 */ + mrb_define_method(mrb, a, "<<", mrb_ary_push_m, ARGS_REQ(1)); /* 15.2.12.5.3 */ + mrb_define_method(mrb, a, "[]", mrb_ary_aget, ARGS_ANY()); /* 15.2.12.5.4 */ + mrb_define_method(mrb, a, "[]=", mrb_ary_aset, ARGS_ANY()); /* 15.2.12.5.5 */ + mrb_define_method(mrb, a, "clear", mrb_ary_clear, ARGS_NONE()); /* 15.2.12.5.6 */ +#if 0 /* --> implement with ruby code */ + mrb_define_method(mrb, a, "collect!", mrb_ary_collect_bang, ARGS_NONE()); /* 15.2.12.5.7 */ +#endif + mrb_define_method(mrb, a, "concat", mrb_ary_concat_m, ARGS_REQ(1)); /* 15.2.12.5.8 */ + mrb_define_method(mrb, a, "delete_at", mrb_ary_delete_at, ARGS_REQ(1)); /* 15.2.12.5.9 */ +#if 0 /* --> implement with ruby code */ + mrb_define_method(mrb, a, "each", mrb_ary_each, ARGS_NONE()); /* 15.2.12.5.10 */ +#endif +#if 0 /* --> implement with ruby code */ + mrb_define_method(mrb, a, "each_index", mrb_ary_each_index, ARGS_NONE()); /* 15.2.12.5.11 */ +#endif + mrb_define_method(mrb, a, "empty?", mrb_ary_empty_p, ARGS_NONE()); /* 15.2.12.5.12 */ + mrb_define_method(mrb, a, "first", mrb_ary_first, ARGS_ANY()); /* 15.2.12.5.13 */ + mrb_define_method(mrb, a, "index", mrb_ary_index_m, ARGS_REQ(1)); /* 15.2.12.5.14 */ +#if 0 /* --> implement with ruby code */ + mrb_define_method(mrb, a, "initialize", mrb_ary_initialize, ARGS_ANY()); /* 15.2.12.5.15 */ +#endif + mrb_define_method(mrb, a, "initialize_copy", mrb_ary_replace_m, ARGS_REQ(1)); /* 15.2.12.5.16 */ + mrb_define_method(mrb, a, "join", mrb_ary_join_m, ARGS_ANY()); /* 15.2.12.5.17 */ + mrb_define_method(mrb, a, "last", mrb_ary_last, ARGS_ANY()); /* 15.2.12.5.18 */ + mrb_define_method(mrb, a, "length", mrb_ary_size, ARGS_NONE()); /* 15.2.12.5.19 */ +#if 0 /* --> implement with ruby code */ + mrb_define_method(mrb, a, "map!", mrb_ary_collect_bang, ARGS_NONE()); /* 15.2.12.5.20 */ +#endif + mrb_define_method(mrb, a, "pop", mrb_ary_pop_m, ARGS_NONE()); /* 15.2.12.5.21 */ + mrb_define_method(mrb, a, "push", mrb_ary_push_m, ARGS_ANY()); /* 15.2.12.5.22 */ + mrb_define_method(mrb, a, "replace", mrb_ary_replace_m, ARGS_REQ(1)); /* 15.2.12.5.23 */ + mrb_define_method(mrb, a, "reverse", mrb_ary_reverse, ARGS_NONE()); /* 15.2.12.5.24 */ + mrb_define_method(mrb, a, "reverse!", mrb_ary_reverse_bang, ARGS_NONE()); /* 15.2.12.5.25 */ + mrb_define_method(mrb, a, "rindex", mrb_ary_rindex_m, ARGS_REQ(1)); /* 15.2.12.5.26 */ + mrb_define_method(mrb, a, "shift", mrb_ary_shift, ARGS_NONE()); /* 15.2.12.5.27 */ + mrb_define_method(mrb, a, "size", mrb_ary_size, ARGS_NONE()); /* 15.2.12.5.28 */ + mrb_define_method(mrb, a, "slice", mrb_ary_aget, ARGS_ANY()); /* 15.2.12.5.29 */ + mrb_define_method(mrb, a, "unshift", mrb_ary_unshift_m, ARGS_ANY()); /* 15.2.12.5.30 */ + + mrb_define_method(mrb, a, "inspect", mrb_ary_inspect, ARGS_NONE()); /* 15.2.12.5.31 (x) */ + mrb_define_alias(mrb, a, "to_s", "inspect"); /* 15.2.12.5.32 (x) */ + mrb_define_method(mrb, a, "==", mrb_ary_equal, ARGS_REQ(1)); /* 15.2.12.5.33 (x) */ + mrb_define_method(mrb, a, "eql?", mrb_ary_eql, ARGS_REQ(1)); /* 15.2.12.5.34 (x) */ + //mrb_define_method(mrb, a, "hash", mrb_ary_hash, ARGS_NONE()); /* 15.2.12.5.35 (x) */ + mrb_define_method(mrb, a, "<=>", mrb_ary_cmp, ARGS_REQ(1)); /* 15.2.12.5.36 (x) */ +} diff --git a/src/ascii.c b/src/ascii.c new file mode 100644 index 0000000000..91bd540730 --- /dev/null +++ b/src/ascii.c @@ -0,0 +1,96 @@ +/********************************************************************** + ascii.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#ifdef INCLUDE_ENCODING +#include "regenc.h" + +OnigEncodingDefine(ascii, ASCII) = { + onigenc_single_byte_mbc_enc_len, + "ASCII-8BIT",/* name */ + 1, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + onigenc_ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("BINARY", "ASCII-8BIT") +ENC_REPLICATE("IBM437", "ASCII-8BIT") +ENC_ALIAS("CP437", "IBM437") +ENC_REPLICATE("IBM737", "ASCII-8BIT") +ENC_ALIAS("CP737", "IBM737") +ENC_REPLICATE("IBM775", "ASCII-8BIT") +ENC_ALIAS("CP775", "IBM775") +ENC_REPLICATE("CP850", "ASCII-8BIT") +ENC_ALIAS("IBM850", "CP850") +ENC_REPLICATE("IBM852", "ASCII-8BIT") +ENC_REPLICATE("CP852", "IBM852") +ENC_REPLICATE("IBM855", "ASCII-8BIT") +ENC_REPLICATE("CP855", "IBM855") +ENC_REPLICATE("IBM857", "ASCII-8BIT") +ENC_ALIAS("CP857", "IBM857") +ENC_REPLICATE("IBM860", "ASCII-8BIT") +ENC_ALIAS("CP860", "IBM860") +ENC_REPLICATE("IBM861", "ASCII-8BIT") +ENC_ALIAS("CP861", "IBM861") +ENC_REPLICATE("IBM862", "ASCII-8BIT") +ENC_ALIAS("CP862", "IBM862") +ENC_REPLICATE("IBM863", "ASCII-8BIT") +ENC_ALIAS("CP863", "IBM863") +ENC_REPLICATE("IBM864", "ASCII-8BIT") +ENC_ALIAS("CP864", "IBM864") +ENC_REPLICATE("IBM865", "ASCII-8BIT") +ENC_ALIAS("CP865", "IBM865") +ENC_REPLICATE("IBM866", "ASCII-8BIT") +ENC_ALIAS("CP866", "IBM866") +ENC_REPLICATE("IBM869", "ASCII-8BIT") +ENC_ALIAS("CP869", "IBM869") +ENC_REPLICATE("Windows-1258", "ASCII-8BIT") +ENC_ALIAS("CP1258", "Windows-1258") +ENC_REPLICATE("GB1988", "ASCII-8BIT") +ENC_REPLICATE("macCentEuro", "ASCII-8BIT") +ENC_REPLICATE("macCroatian", "ASCII-8BIT") +ENC_REPLICATE("macCyrillic", "ASCII-8BIT") +ENC_REPLICATE("macGreek", "ASCII-8BIT") +ENC_REPLICATE("macIceland", "ASCII-8BIT") +ENC_REPLICATE("macRoman", "ASCII-8BIT") +ENC_REPLICATE("macRomania", "ASCII-8BIT") +ENC_REPLICATE("macThai", "ASCII-8BIT") +ENC_REPLICATE("macTurkish", "ASCII-8BIT") +ENC_REPLICATE("macUkraine", "ASCII-8BIT") +#endif //INCLUDE_ENCODING diff --git a/src/cdump.c b/src/cdump.c new file mode 100644 index 0000000000..74365b0906 --- /dev/null +++ b/src/cdump.c @@ -0,0 +1,197 @@ +#include "cdump.h" + +#include + +#include "irep.h" +#include "mruby/string.h" +#include "re.h" + +#define MRB_CDUMP_LINE_LEN 128 + +#define SOURCE_CODE(fmt, ...) fprintf(f, fmt"\n", __VA_ARGS__) +#define SOURCE_CODE0(str) do {fputs(str, f); putc('\n', f);} while (0) + +static int +make_cdump_isec(mrb_state *mrb, int irep_no, FILE *f) +{ + int i; + mrb_irep *irep = mrb->irep[irep_no]; + + if (irep == 0) + return -1; + + /* dump isec struct*/ + if (irep->ilen > 0) { + SOURCE_CODE ("static mrb_code iseq_%d[] = {", irep_no); + for (i=0; iilen; i++) + SOURCE_CODE(" 0x%08x," , irep->iseq[i]); + SOURCE_CODE0 ("};"); + SOURCE_CODE0 (""); + } + + return 0; +} + +static size_t +str_format_len(mrb_value str) +{ + size_t dump_len = 0; + + char *src; + + for (src = RSTRING_PTR(str); src < RSTRING_END(str);) { + switch (*src) { + case 0x07:/* BEL */ /* fall through */ + case 0x08:/* BS */ /* fall through */ + case 0x09:/* HT */ /* fall through */ + case 0x0A:/* LF */ /* fall through */ + case 0x0B:/* VT */ /* fall through */ + case 0x0C:/* FF */ /* fall through */ + case 0x0D:/* CR */ /* fall through */ + case 0x22:/* " */ /* fall through */ + case 0x27:/* ' */ /* fall through */ + case 0x3F:/* ? */ /* fall through */ + case 0x5C:/* \ */ /* fall through */ + dump_len += 2; src += 2; + break; + + default: + dump_len++; src++; + break; + } + } + + return dump_len; +} + +static char* +str_to_format(mrb_value str, char *buf) +{ + char *src, *dst; + + for (src = RSTRING_PTR(str), dst = buf; src < RSTRING_END(str);) { + switch (*src) { + case 0x07:/* BEL */ memcpy(dst, "\\a", 2); dst+=2; src+=2; break; + case 0x08:/* BS */ memcpy(dst, "\\b", 2); dst+=2; src+=2; break; + case 0x09:/* HT */ memcpy(dst, "\\t", 2); dst+=2; src+=2; break; + case 0x0A:/* LF */ memcpy(dst, "\\n", 2); dst+=2; src+=2; break; + case 0x0B:/* VT */ memcpy(dst, "\\v", 2); dst+=2; src+=2; break; + case 0x0C:/* FF */ memcpy(dst, "\\f", 2); dst+=2; src+=2; break; + case 0x0D:/* CR */ memcpy(dst, "\\r", 2); dst+=2; src+=2; break; + case 0x22:/* " */ memcpy(dst, "\\\"", 2); dst+=2; src+=2; break; + case 0x27:/* ' */ memcpy(dst, "\\\'", 2); dst+=2; src+=2; break; + case 0x3F:/* ? */ memcpy(dst, "\\\?", 2); dst+=2; src+=2; break; + case 0x5C:/* \ */ memcpy(dst, "\\\\", 2); dst+=2; src+=2; break; + default: *dst++ = *src++; break; + } + } + + return buf; +} + +int +make_cdump_irep(mrb_state *mrb, int irep_no, FILE *f) +{ + mrb_irep *irep = mrb->irep[irep_no]; + int n; + char *buf = 0; + size_t buf_len, str_len; + + if (irep == 0) + return -1; + + buf_len = MRB_CDUMP_LINE_LEN; + if ((buf = mrb_malloc(mrb, buf_len)) == 0 ) { + return MRB_CDUMP_GENERAL_FAILURE; + } + + SOURCE_CODE0 (" irep = mrb->irep[idx] = mrb_malloc(mrb, sizeof(mrb_irep));"); + SOURCE_CODE0 (" irep->idx = idx++;"); + SOURCE_CODE (" irep->flags = %d | MRB_ISEQ_NOFREE;", irep->flags); + SOURCE_CODE (" irep->nlocals = %d;", irep->nlocals); + SOURCE_CODE (" irep->nregs = %d;", irep->nregs); + SOURCE_CODE (" irep->ilen = %d;", irep->ilen); + SOURCE_CODE (" irep->iseq = iseq_%d;", irep_no); + + SOURCE_CODE (" irep->slen = %d;", irep->slen); + if(irep->slen > 0) { + SOURCE_CODE (" irep->syms = mrb_malloc(mrb, sizeof(mrb_sym)*%d);", irep->slen); + for (n=0; nslen; n++) + if (irep->syms[n]) { + SOURCE_CODE (" irep->syms[%d] = mrb_intern(mrb, \"%s\");", n, mrb_sym2name(mrb, irep->syms[n])); + } + } + else + SOURCE_CODE0 (" irep->syms = NULL;"); + + SOURCE_CODE (" irep->plen = %d;", irep->plen); + if(irep->plen > 0) { + SOURCE_CODE (" irep->pool = mrb_malloc(mrb, sizeof(mrb_value)*%d);", irep->plen); + for (n=0; nplen; n++) { + switch (irep->pool[n].tt) { + case MRB_TT_FLOAT: + SOURCE_CODE(" irep->pool[%d] = mrb_float_value(%.16e);", n, irep->pool[n].value.f); break; + case MRB_TT_STRING: + str_len = str_format_len(irep->pool[n]) + 1; + if ( str_len > buf_len ) { + buf_len = str_len; + if ((buf = mrb_realloc(mrb, buf, buf_len)) == 0 ) { + return MRB_CDUMP_GENERAL_FAILURE; + } + } + memset(buf, 0, buf_len); + SOURCE_CODE(" irep->pool[%d] = mrb_str_new(mrb, \"%s\", %d);", n, str_to_format(irep->pool[n], buf), RSTRING_LEN(irep->pool[n])); break; + /* TODO MRB_TT_REGEX */ + default: break; + } + } + } + else + SOURCE_CODE0 (" irep->pool = NULL;"); + SOURCE_CODE0(""); + return MRB_CDUMP_OK; +} + +int +mrb_cdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname) +{ + int irep_no, irep_num; + + if (mrb == 0 || n < 0 || n >= mrb->irep_len || f == 0 || initname == 0) + return -1; + + irep_num = mrb->irep_len - n; + + SOURCE_CODE0("#include \"mruby.h\""); + SOURCE_CODE0("#include \"irep.h\""); + SOURCE_CODE0("#include \"mruby/string.h\""); + SOURCE_CODE0("#include \"mruby/proc.h\""); + SOURCE_CODE0(""); + + for (irep_no=n; irep_noirep_len; irep_no++) { + if (make_cdump_isec(mrb, irep_no, f) != 0) + return -1; + } + + SOURCE_CODE0("void"); + SOURCE_CODE ("%s(mrb_state *mrb)", initname); + SOURCE_CODE0("{"); + SOURCE_CODE0(" int n = mrb->irep_len;"); + SOURCE_CODE0(" int idx = n;"); + SOURCE_CODE0(" mrb_irep *irep;"); + SOURCE_CODE0(""); + SOURCE_CODE (" mrb_add_irep(mrb, idx+%d);", irep_num); + SOURCE_CODE0(""); + for (irep_no=n; irep_noirep_len; irep_no++) { + if (make_cdump_irep(mrb, irep_no, f) != 0) + return -1; + } + + SOURCE_CODE0(" mrb->irep_len = idx;"); + SOURCE_CODE0(""); + SOURCE_CODE0(" extern mrb_value mrb_top_self(mrb_state *mrb);"); + SOURCE_CODE0(" mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_top_self(mrb));"); + SOURCE_CODE0("}"); + + return 0; +} diff --git a/src/cdump.h b/src/cdump.h new file mode 100644 index 0000000000..82fc442a6f --- /dev/null +++ b/src/cdump.h @@ -0,0 +1,8 @@ +#include "mruby.h" +#include + +int mrb_cdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname); + +/* error code */ +#define MRB_CDUMP_OK 0 +#define MRB_CDUMP_GENERAL_FAILURE -1 diff --git a/src/class.c b/src/class.c new file mode 100644 index 0000000000..aa2485cb1e --- /dev/null +++ b/src/class.c @@ -0,0 +1,1049 @@ +#include "mruby.h" +#include +#include +#include "mruby/class.h" +#include "mruby/proc.h" +#include "mruby/string.h" +#include "mruby/numeric.h" +#include "variable.h" +#include "mruby/array.h" +#include "error.h" + +#include "ritehash.h" + +#ifdef INCLUDE_REGEXP + #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr +#else + #define mrb_usascii_str_new2 mrb_str_new_cstr +#endif + +KHASH_MAP_INIT_INT(mt, struct RProc*); +KHASH_MAP_INIT_INT(iv, mrb_value); + +typedef struct fc_result { + mrb_sym name; + struct RClass * klass; + mrb_value path; + struct RClass * track; + struct fc_result *prev; +} fcresult_t; + +struct RClass* mrb_class_real(struct RClass* cl); +int kiv_lookup(khash_t(iv) *table, mrb_sym key, mrb_value *value); +extern struct kh_iv *mrb_class_tbl; + +void +mrb_gc_mark_mt(mrb_state *mrb, struct RClass *c) +{ + khiter_t k; + khash_t(mt) *h = c->mt; + + if (!h) return; + for (k = kh_begin(h); k != kh_end(h); k++) { + if (kh_exist(h, k)){ + struct RProc *m = kh_value(h, k); + if (m) { + paint_black(m); + } + } + } +} + +size_t +mrb_gc_mark_mt_size(mrb_state *mrb, struct RClass *c) +{ + khash_t(mt) *h = c->mt; + + if (!h) return 0; + return kh_size(h); +} + +void +mrb_gc_free_mt(mrb_state *mrb, struct RClass *c) +{ + kh_destroy(mt, c->mt); +} + +void +mrb_name_class(mrb_state *mrb, struct RClass *c, mrb_sym name) +{ + mrb_obj_iv_set(mrb, (struct RObject*)c, + mrb_intern(mrb, "__classid__"), mrb_symbol_value(name)); +} + +static mrb_sym +class_sym(mrb_state *mrb, struct RClass *c, struct RClass *outer) +{ + mrb_value name; + + name = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classid__")); + if (mrb_nil_p(name)) { + khash_t(iv)* h; + khiter_t k; + mrb_value v; + + if (!outer) outer = mrb->object_class; + h = outer->iv; + for (k = kh_begin(h); k != kh_end(h); k++) { + if (!kh_exist(h,k)) continue; + v = kh_value(h,k); + if (mrb_type(v) == c->tt && mrb_class_ptr(v) == c) { + return kh_key(h,k); + } + } + } + return SYM2ID(name); +} + +static void +make_metaclass(mrb_state *mrb, struct RClass *c) +{ + struct RClass *sc; + + if (c->c->tt == MRB_TT_SCLASS) { + return; + } + sc = mrb_obj_alloc(mrb, MRB_TT_SCLASS, mrb->class_class); + sc->mt = 0; + if (!c->super) { + sc->super = mrb->class_class; + } + else { + sc->super = c->super->c; + } + c->c = sc; + mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)sc); + mrb_field_write_barrier(mrb, (struct RBasic*)sc, (struct RBasic*)sc->super); +} + +struct RClass* +mrb_define_module_id(mrb_state *mrb, mrb_sym name) +{ + struct RClass *m = mrb_module_new(mrb); + + m->mt = kh_init(mt, mrb); + mrb_obj_iv_set(mrb, (struct RObject*)mrb->object_class, + name, mrb_obj_value(m)); + mrb_name_class(mrb, m, name); + + return m; +} + +struct RClass* +mrb_define_module(mrb_state *mrb, const char *name) +{ + return mrb_define_module_id(mrb, mrb_intern(mrb, name)); +} + +static void +setup_class(mrb_state *mrb, mrb_value outer, struct RClass *c, mrb_sym id) +{ + mrb_name_class(mrb, c, id); + mrb_const_set(mrb, outer, id, mrb_obj_value(c)); + mrb_obj_iv_set(mrb, (struct RObject*)c, + mrb_intern(mrb, "__outer__"), outer); +} + +struct RClass* +mrb_class_outer_module(mrb_state *mrb, struct RClass *c) +{ + mrb_value outer; + + outer = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__outer__")); + if (mrb_nil_p(outer)) return 0; + return mrb_class_ptr(outer); +} + +struct RClass* +mrb_vm_define_module(mrb_state *mrb, mrb_value outer, mrb_sym id) +{ + struct RClass *c; + mrb_value v; + + if (mrb_const_defined(mrb, outer, id)) { + v = mrb_const_get(mrb, outer, id); + c = mrb_class_ptr(v); + } + else { + c = mrb_module_new(mrb); + setup_class(mrb, outer, c, id); + } + return c; +} + +struct RClass* +mrb_define_class_id(mrb_state *mrb, mrb_sym name, struct RClass *super) +{ + struct RClass *c = mrb_class_new(mrb, super); + + mrb_obj_iv_set(mrb, (struct RObject*)mrb->object_class, + name, mrb_obj_value(c)); + mrb_name_class(mrb, c, name); + + return c; +} + +struct RClass* +mrb_define_class(mrb_state *mrb, const char *name, struct RClass *super) +{ + struct RClass *c; + c = mrb_define_class_id(mrb, mrb_intern(mrb, name), super); + return c; +} + +struct RClass* +mrb_vm_define_class(mrb_state *mrb, mrb_value outer, mrb_value super, mrb_sym id) +{ + struct RClass *c = 0; + + if (mrb_const_defined(mrb, outer, id)) { + mrb_value v = mrb_const_get(mrb, outer, id); + + c = mrb_class_ptr(v); + if (!mrb_nil_p(super) && (c->tt != MRB_TT_CLASS || c->super != mrb_class_ptr(super))) { + c = 0; + } + } + if (!c) { + struct RClass *s = 0; + + if (!mrb_nil_p(super)) s = mrb_class_ptr(super); + c = mrb_class_new(mrb, s); + setup_class(mrb, outer, c, id); + } + + return c; +} + +/*! + * Defines a class under the namespace of \a outer. + * \param outer a class which contains the new class. + * \param id name of the new class + * \param super a class from which the new class will derive. + * NULL means \c Object class. + * \return the created class + * \throw TypeError if the constant name \a name is already taken but + * the constant is not a \c Class. + * \throw NameError if the class is already defined but the class can not + * be reopened because its superclass is not \a super. + * \post top-level constant named \a name refers the returned class. + * + * \note if a class named \a name is already defined and its superclass is + * \a super, the function just returns the defined class. + */ +struct RClass * +mrb_define_class_under(mrb_state *mrb, struct RClass *outer, const char *name, struct RClass *super) +{ + struct RClass * c; + mrb_sym id = mrb_intern(mrb, name); + + if (mrb_const_defined_at(mrb, outer, id)) { + c = mrb_class_from_sym(mrb, outer, id); + if (c->tt != MRB_TT_CLASS) { + mrb_raise(mrb, E_TYPE_ERROR, "%s is not a class", mrb_sym2name(mrb, id)); + } + if (mrb_class_real(c->super) != super) { + mrb_name_error(mrb, id, "%s is already defined", mrb_sym2name(mrb, id)); + } + return c; + } + if (!super) { + mrb_warn("no super class for `%s::%s', Object assumed", + mrb_obj_classname(mrb, mrb_obj_value(outer)), mrb_sym2name(mrb, id)); + } + c = mrb_class_new(mrb, super); + setup_class(mrb, mrb_obj_value(outer), c, id); + mrb_const_set(mrb, mrb_obj_value(outer), id, mrb_obj_value(c)); + + return c; +} + +struct RClass * +mrb_define_module_under(mrb_state *mrb, struct RClass *outer, const char *name) +{ + struct RClass * c; + mrb_sym id = mrb_intern(mrb, name); + + if (mrb_const_defined_at(mrb, outer, id)) { + c = mrb_class_from_sym(mrb, outer, id); + if (c->tt != MRB_TT_MODULE) { + mrb_raise(mrb, E_TYPE_ERROR, "%s is not a module", mrb_sym2name(mrb, id)); + } + return c; + } + c = mrb_module_new(mrb); + setup_class(mrb, mrb_obj_value(outer), c, id); + mrb_const_set(mrb, mrb_obj_value(outer), id, mrb_obj_value(c)); + + return c; +} + +void +mrb_define_method_raw(mrb_state *mrb, struct RClass *c, mrb_sym mid, struct RProc *p) +{ + khash_t(mt) *h = c->mt; + khiter_t k; + int ret; + + if (!h) h = c->mt = kh_init(mt, mrb); + k = kh_put(mt, h, mid, &ret); + kh_value(h, k) = p; +} + +void +mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, int aspec) +{ + struct RProc *p; + + p = mrb_proc_new_cfunc(mrb, func); + p->target_class = c; + mrb_define_method_raw(mrb, c, mid, p); +} + +void +mrb_define_method(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, int apec) +{ + return mrb_define_method_id(mrb, c, mrb_intern(mrb, name), func, apec); +} + +void +mrb_define_method_vm(mrb_state *mrb, struct RClass *c, mrb_sym name, mrb_value body) +{ + khash_t(mt) *h = c->mt; + khiter_t k; + int ret; + + if (!h) h = c->mt = kh_init(mt, mrb); + k = kh_put(mt, h, name, &ret); + kh_value(h, k) = mrb_proc_ptr(body); +} + +int +mrb_get_args(mrb_state *mrb, const char *format, ...) +{ + char c; + int i=0; + mrb_value *sp = mrb->stack + 1; + va_list ap; + int argc = mrb->ci->argc; + int *argcp; + + va_start(ap, format); + if (argc < 0) { + struct RArray *a = mrb_ary_ptr(mrb->stack[1]); + + argc = a->len; + sp = a->buf; + } + while ((c = *format++)) { + switch (c) { + case 'o': + { + mrb_value *p; + p = va_arg(ap, mrb_value*); + *p = (argc > i) ? *sp : mrb_nil_value(); + i++; sp++; + } + break; + case 'i': + { + mrb_int *p; + + p = va_arg(ap, mrb_int*); + *p = (argc > i) ? mrb_fixnum(*sp) : 0; + i++; sp++; + } + break; + case 'f': + { + mrb_float *p; + + p = va_arg(ap, mrb_float*); + switch (sp->tt) { + case MRB_TT_FLOAT: + *p = (argc > i) ? mrb_float(*sp) : 0; + break; + case MRB_TT_FIXNUM: + *p = (argc > i) ? (mrb_float)mrb_fixnum(*sp) : 0; + break; + default: + // error + break; + } + i++; sp++; + } + break; + case 's': + { + char **ps; + size_t *pl; + struct RString *s; + + if (argc > i) { + s = mrb_str_ptr(*sp); + ps = va_arg(ap, char**); + *ps = s->buf; + pl = va_arg(ap, size_t*); + *pl = s->len; + } + else { + *ps = ""; + *pl = 0; + } + i++; sp++; + } + break; + case 'a': + { + mrb_value *var; + var = va_arg(ap, mrb_value*); + if (argc > i) { + if (var) { + memcpy(var, sp, sizeof(mrb_value)*(argc-i)); + } + //i = mrb->argc; + } + else { + if (var) *var = mrb_ary_new(mrb); + } + argcp = va_arg(ap, int*); + *argcp = argc-i; + goto last_var; + } + break; + case 'b': + { + struct RProc **p; + mrb_value *bp = mrb->stack + 1; + + p = va_arg(ap, struct RProc**); + if (mrb->ci->argc > 0) { + bp += mrb->ci->argc; + } + if (mrb_nil_p(*bp)) *p = 0; + else *p = mrb_proc_ptr(*bp); + } + break; + case '&': + { + mrb_value *p, *bp = mrb->stack + 1; + + p = va_arg(ap, mrb_value*); + if (mrb->ci->argc > 0) { + bp += mrb->ci->argc; + } + *p = *bp; + } + break; + case '*': + { + mrb_value **var; + var = va_arg(ap, mrb_value**); + argcp = va_arg(ap, int*); + if (argc > i) { + *argcp = argc-i; + if (*argcp > 0) { + if (var) { + *var = sp; + } + i += *argcp; + } + } + else { + *argcp = 0; + *var = NULL; + } + goto last_var; + } + break; + } + } +last_var: + va_end(ap); + return 0; +} + +static struct RClass* +boot_defclass(mrb_state *mrb, struct RClass *super) +{ + struct RClass *c; + + c = mrb_obj_alloc(mrb, MRB_TT_CLASS, mrb->class_class); + c->super = super ? super : mrb->object_class; + mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)super); + c->mt = kh_init(mt, mrb); + return c; +} + +void +mrb_include_module(mrb_state *mrb, struct RClass *c, struct RClass *m) +{ + struct RClass *ic; + + ic = mrb_obj_alloc(mrb, MRB_TT_ICLASS, mrb->class_class); + ic->c = m; + ic->mt = m->mt; + ic->iv = m->iv; + ic->super = c->super; + c->super = ic; + mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)ic); +} + +static mrb_value +mrb_mod_include(mrb_state *mrb, mrb_value klass) +{ + mrb_value mod; + + mrb_get_args(mrb, "o", &mod); + mrb_include_module(mrb, mrb_class_ptr(klass), mrb_class_ptr(mod)); + return mod; +} + +static struct RClass * +mrb_singleton_class_ptr(mrb_state *mrb, struct RClass *c) +{ + struct RClass *sc; + + if (c->tt == MRB_TT_SCLASS) { + return c; + } + sc = mrb_obj_alloc(mrb, MRB_TT_SCLASS, mrb->class_class); + sc->mt = 0; + sc->super = c; + mrb_field_write_barrier(mrb, (struct RBasic*)sc, (struct RBasic*)c); + + return sc; +} + +mrb_value +mrb_singleton_class(mrb_state *mrb, mrb_value v) +{ + struct RBasic *obj; + + switch (mrb_type(v)) { + case MRB_TT_FALSE: + case MRB_TT_TRUE: + case MRB_TT_SYMBOL: + case MRB_TT_FIXNUM: + case MRB_TT_FLOAT: + return mrb_nil_value(); /* should raise TypeError */ + default: + break; + } + obj = (struct RBasic*)mrb_object(v); + obj->c = mrb_singleton_class_ptr(mrb, obj->c); + return mrb_obj_value(obj->c); +} + +void +mrb_define_class_method(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, int aspec) +{ + return mrb_define_method_id(mrb, c->c, mrb_intern(mrb, name), func, aspec); +} + +struct RProc* +mrb_method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid) +{ + khiter_t k; + struct RProc *m; + struct RClass *c = *cp; + + while (c) { + khash_t(mt) *h = c->mt; + + if (h) { + k = kh_get(mt, h, mid); + if (k != kh_end(h)) { + m = kh_value(h, k); + if (!m) break; + *cp = c; + return m; + } + } + c = c->super; + } + return 0; /* no method */ +} + +struct RProc* +mrb_method_search(mrb_state *mrb, struct RClass* c, mrb_sym mid) +{ + struct RProc *m; + + m = mrb_method_search_vm(mrb, &c, mid); + if (!m) { + mrb_raise(mrb, E_NOMETHOD_ERROR, "no method named %s\n", mrb_sym2name(mrb, mid)); + } + return m; +} + +mrb_value +mrb_funcall(mrb_state *mrb, mrb_value self, const char *name, int argc,...) +{ + mrb_value args[16]; + va_list ap; + int i; + + if (argc == 0) { + for (i=0; i<5; i++) { + args[i] = mrb_nil_value(); + } + } + else { + va_start(ap, argc); + // assert(argc < 16); + for (i=0; i obj + * + * Calls allocate to create a new object of + * class's class, then invokes that object's + * initialize method, passing it args. + * This is the method that ends up getting called whenever + * an object is constructed using .new. + * + */ +mrb_value +mrb_class_new_instance(mrb_state *mrb, int argc, mrb_value *argv, struct RClass * klass) +{ + mrb_value obj; + struct RClass * c = mrb_obj_alloc(mrb, klass->tt, klass); + c->super = klass; + obj = mrb_obj_value(c); + mrb_obj_call_init(mrb, obj, argc, argv); + return obj; +} + +mrb_value +mrb_class_new_instance_m(mrb_state *mrb, mrb_value klass) +{ + mrb_value *argv; + struct RProc *b; + struct RClass *k = mrb_class_ptr(klass); + struct RClass *c; + int argc; + mrb_value obj; + + mrb_get_args(mrb, "b*", &b, &argv, &argc); + c = mrb_obj_alloc(mrb, k->tt, k); + c->super = k; + obj = mrb_obj_value(c); + mrb_funcall_with_block(mrb, obj, "initialize", argc, argv, b); + + return obj; +} + +mrb_value +mrb_instance_new(mrb_state *mrb, mrb_value cv) +{ + struct RClass *c = mrb_class_ptr(cv); + struct RProc *b; + struct RObject *o; + enum mrb_vtype ttype = MRB_INSTANCE_TT(c); + mrb_value obj; + mrb_value *argv; + int argc; + + if (ttype == 0) ttype = MRB_TT_OBJECT; + o = mrb_obj_alloc(mrb, ttype, c); + obj = mrb_obj_value(o); + mrb_get_args(mrb, "b*", &b, &argv, &argc); + mrb_funcall_with_block(mrb, obj, "initialize", argc, argv, b); + + return obj; +} + +static mrb_value +mrb_bob_init(mrb_state *mrb, mrb_value cv) +{ + return mrb_nil_value(); +} + +static mrb_value +mrb_bob_not(mrb_state *mrb, mrb_value cv) +{ + if (mrb_test(cv)) + return mrb_false_value(); + return mrb_true_value(); +} + +/* 15.3.1.3.30 */ +/* + * call-seq: + * obj.method_missing(symbol [, *args] ) -> result + * + * Invoked by Ruby when obj is sent a message it cannot handle. + * symbol is the symbol for the method called, and args + * are any arguments that were passed to it. By default, the interpreter + * raises an error when this method is called. However, it is possible + * to override the method to provide more dynamic behavior. + * If it is decided that a particular method should not be handled, then + * super should be called, so that ancestors can pick up the + * missing method. + * The example below creates + * a class Roman, which responds to methods with names + * consisting of roman numerals, returning the corresponding integer + * values. + * + * class Roman + * def romanToInt(str) + * # ... + * end + * def method_missing(methId) + * str = methId.id2name + * romanToInt(str) + * end + * end + * + * r = Roman.new + * r.iv #=> 4 + * r.xxiii #=> 23 + * r.mm #=> 2000 + */ +static mrb_value +mrb_bob_missing(mrb_state *mrb, mrb_value mod) +{ + mrb_value name, *a; + int alen; + + mrb_get_args(mrb, "o*", &name, &a, &alen); + if (!SYMBOL_P(name)) { + mrb_raise(mrb, E_TYPE_ERROR, "name should be a symbol"); + } + mrb_raise(mrb, E_NOMETHOD_ERROR, "no method named %s", mrb_sym2name(mrb, mrb_symbol(name))); + /* not reached */ + return mrb_nil_value(); +} + +int +mrb_obj_respond_to(struct RClass* c, mrb_sym mid) +{ + khiter_t k; + + while (c) { + khash_t(mt) *h = c->mt; + + if (h) { + k = kh_get(mt, h, mid); + if (k != kh_end(h)) + return 1; /* exist method */ + } + c = c->super; + } + return 0; /* no method */ +} + +int +mrb_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym mid) +{ + return mrb_obj_respond_to(mrb_class(mrb, obj), mid); +} + +mrb_value +mrb_class_path(mrb_state *mrb, struct RClass *c) +{ + mrb_value path; + + path = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classpath__")); + if (mrb_nil_p(path)) { + struct RClass *outer = mrb_class_outer_module(mrb, c); + mrb_sym sym = class_sym(mrb, c, outer); + if (outer && outer != mrb->object_class) { + mrb_value base = mrb_class_path(mrb, outer); + path = mrb_str_plus(mrb, base, mrb_str_new_cstr(mrb, "::")); + mrb_str_concat(mrb, path, mrb_str_new_cstr(mrb, mrb_sym2name(mrb, sym))); + } + else if (sym == 0) { + return mrb_nil_value(); + } + else { + path = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, sym)); + } + mrb_obj_iv_set(mrb, (struct RObject*)c, mrb_intern(mrb, "__classpath__"), path); + } + return path; +} + +struct RClass * +mrb_class_real(struct RClass* cl) +{ + while ((cl->tt == MRB_TT_SCLASS) || (cl->tt == MRB_TT_ICLASS)) { + cl = cl->super; + } + return cl; +} + +const char* +mrb_class_name(mrb_state *mrb, struct RClass* c) +{ + mrb_value path = mrb_class_path(mrb, c); + if (mrb_nil_p(path)) return 0; + return mrb_str_ptr(path)->buf; +} + +const char* +mrb_obj_classname(mrb_state *mrb, mrb_value obj) +{ + return mrb_class_name(mrb, mrb_class(mrb, obj)); +} + +/*! + * Ensures a class can be derived from super. + * + * \param super a reference to an object. + * \exception TypeError if \a super is not a Class or \a super is a singleton class. + */ +void +mrb_check_inheritable(mrb_state *mrb, struct RClass *super) +{ + if (super->tt != MRB_TT_CLASS) { + mrb_raise(mrb, E_TYPE_ERROR, "superclass must be a Class (%s given)", + mrb_obj_classname(mrb, mrb_obj_value(super))); + } + if (super->tt == MRB_TT_SCLASS) { + mrb_raise(mrb, E_TYPE_ERROR, "can't make subclass of singleton class"); + } + if (super == mrb->class_class) { + mrb_raise(mrb, E_TYPE_ERROR, "can't make subclass of Class"); + } +} + +/*! + * Creates a new class. + * \param super a class from which the new class derives. + * \exception TypeError \a super is not inheritable. + * \exception TypeError \a super is the Class class. + */struct RClass * +mrb_class_new(mrb_state *mrb, struct RClass *super) +{ + struct RClass *c; + + if (super) { +// mrb_check_type(mrb, super, MRB_TT_CLASS); + mrb_check_inheritable(mrb, super); + } + c = boot_defclass(mrb, super); + make_metaclass(mrb, c); + + return c; +} + +/*! + * Creates a new module. + */ +struct RClass * +mrb_module_new(mrb_state *mrb) +{ + struct RClass *m = mrb_obj_alloc(mrb, MRB_TT_MODULE, mrb->module_class); + m->mt = kh_init(mt, mrb); + make_metaclass(mrb, m); + + return m; +} + +/* + * call-seq: + * obj.class => class + * + * Returns the class of obj, now preferred over + * Object#type, as an object's type in Ruby is only + * loosely tied to that object's class. This method must always be + * called with an explicit receiver, as class is also a + * reserved word in Ruby. + * + * 1.class #=> Fixnum + * self.class #=> Object + */ + +struct RClass* +mrb_obj_class(mrb_state *mrb, mrb_value obj) +{ + return mrb_class_real(mrb_class(mrb, obj)); +} + +void +mrb_alias_method(mrb_state *mrb, struct RClass *c, mrb_sym a, mrb_sym b) +{ + struct RProc *m = mrb_method_search(mrb, c, b); + + mrb_define_method_vm(mrb, c, a, mrb_obj_value(m)); +} + +/*! + * Defines an alias of a method. + * \param klass the class which the original method belongs to + * \param name1 a new name for the method + * \param name2 the original name of the method + */ +void +mrb_define_alias(mrb_state *mrb, struct RClass *klass, const char *name1, const char *name2) +{ + mrb_alias_method(mrb, klass, mrb_intern(mrb, name1), mrb_intern(mrb, name2)); +} + +/* + * call-seq: + * mod.to_s -> string + * + * Return a string representing this module or class. For basic + * classes and modules, this is the name. For singletons, we + * show information on the thing we're attached to as well. + */ + +static mrb_value +mrb_mod_to_s(mrb_state *mrb, mrb_value klass) +{ + //if (FL_TEST(klass, FL_SINGLETON)) { + if (mrb_type(klass) == MRB_TT_SCLASS) { + mrb_value s = mrb_usascii_str_new2(mrb, "#<"); + mrb_value v = mrb_iv_get(mrb, klass, mrb_intern(mrb, "__attached__")); + + mrb_str_cat2(mrb, s, "Class:"); + switch (mrb_type(v)) { + case MRB_TT_CLASS: + case MRB_TT_MODULE: + mrb_str_append(mrb, s, mrb_inspect(mrb, v)); + break; + default: + mrb_str_append(mrb, s, mrb_any_to_s(mrb, v)); + break; + } + mrb_str_cat2(mrb, s, ">"); + + return s; + } + else { + struct RClass *c = mrb_class_ptr(klass); + const char *cn = mrb_class_name(mrb, c); + + if (!cn) { + char buf[256]; + + + switch (mrb_type(klass)) { + case MRB_TT_CLASS: + snprintf(buf, 256, "#", c); + break; + case MRB_TT_MODULE: + snprintf(buf, 256, "#", c); + break; + } + return mrb_str_dup(mrb, mrb_str_new_cstr(mrb, buf)); + } + else { + return mrb_str_dup(mrb, mrb_str_new_cstr(mrb, cn)); + } + } +} + +mrb_value +mrb_mod_alias(mrb_state *mrb, mrb_value mod) +{ + struct RClass *c = mrb_class_ptr(mod); + mrb_value new, old; + + mrb_get_args(mrb, "oo", &new, &old); + mrb_alias_method(mrb, c, mrb_symbol(new), mrb_symbol(old)); + return mrb_nil_value(); +} + + +void +mrb_undef_method(mrb_state *mrb, struct RClass *c, mrb_sym a) +{ + mrb_value m; + + m.tt = MRB_TT_PROC; + m.value.p = 0; + mrb_define_method_vm(mrb, c, a, m); +} + +mrb_value +mrb_mod_undef(mrb_state *mrb, mrb_value mod) +{ + struct RClass *c = mrb_class_ptr(mod); + int argc; + mrb_value *argv; + + mrb_get_args(mrb, "*", &argv, &argc); + while (argc--) { + mrb_undef_method(mrb, c, mrb_symbol(*argv)); + argv++; + } + return mrb_nil_value(); +} + + +static mrb_value +mrb_mod_eqq(mrb_state *mrb, mrb_value mod) +{ + mrb_value obj; + + mrb_get_args(mrb, "o", &obj); + if (!mrb_obj_is_kind_of(mrb, obj, mrb_class_ptr(mod))) + return mrb_false_value(); + return mrb_true_value(); +} + +void +mrb_init_class(mrb_state *mrb) +{ + struct RClass *bob; /* BasicObject */ + struct RClass *obj; /* Object */ + struct RClass *mod; /* Module */ + struct RClass *cls; /* Class */ + //struct RClass *krn; /* Kernel */ + + /* boot class hierarchy */ + bob = boot_defclass(mrb, 0); + obj = boot_defclass(mrb, bob); mrb->object_class = obj; + mod = boot_defclass(mrb, obj); mrb->module_class = mod;/* obj -> mod */ + cls = boot_defclass(mrb, mod); mrb->class_class = cls; /* obj -> cls */ + /* fix-up loose ends */ + bob->c = obj->c = mod->c = cls->c = cls; + make_metaclass(mrb, bob); + make_metaclass(mrb, obj); + make_metaclass(mrb, mod); + make_metaclass(mrb, cls); + + /* name basic classes */ + mrb_define_const(mrb, obj, "BasicObject", mrb_obj_value(bob)); + mrb_define_const(mrb, obj, "Object", mrb_obj_value(obj)); + mrb_define_const(mrb, obj, "Module", mrb_obj_value(mod)); + mrb_define_const(mrb, obj, "Class", mrb_obj_value(cls)); + + /* name each classes */ + mrb_name_class(mrb, bob, mrb_intern(mrb, "BasicObject")); + mrb_name_class(mrb, obj, mrb_intern(mrb, "Object")); + mrb_name_class(mrb, mod, mrb_intern(mrb, "Module")); + mrb_name_class(mrb, cls, mrb_intern(mrb, "Class")); + + MRB_SET_INSTANCE_TT(mod, MRB_TT_MODULE); + MRB_SET_INSTANCE_TT(cls, MRB_TT_CLASS); + mrb_define_method(mrb, bob, "initialize", mrb_bob_init, ARGS_NONE()); + mrb_define_method(mrb, bob, "!", mrb_bob_not, ARGS_NONE()); + mrb_define_method(mrb, bob, "method_missing", mrb_bob_missing, ARGS_ANY()); /* 15.3.1.3.30 */ + mrb_define_method(mrb, cls, "new", mrb_instance_new, ARGS_ANY()); + mrb_define_method(mrb, mod, "include", mrb_mod_include, ARGS_REQ(1)); + + mrb_define_method(mrb, mod, "to_s", mrb_mod_to_s, ARGS_NONE()); + mrb_define_method(mrb, mod, "alias_method", mrb_mod_alias, ARGS_ANY()); + mrb_define_method(mrb, mod, "undef_method", mrb_mod_undef, ARGS_ANY()); + + mrb_define_method(mrb, mod, "===", mrb_mod_eqq, ARGS_REQ(1)); +} diff --git a/src/codegen.c b/src/codegen.c new file mode 100644 index 0000000000..c44e619e97 --- /dev/null +++ b/src/codegen.c @@ -0,0 +1,2273 @@ +#undef CODEGEN_TEST +#define CODEGEN_DUMP + +#include "mruby.h" +#include "irep.h" +#include "compile.h" +#include "mruby/proc.h" +#include "opcode.h" +#include "mruby/string.h" +#include +#include + +typedef mrb_ast_node node; +typedef struct mrb_parser_state parser_state; + +struct loopinfo { + enum looptype { + LOOP_NORMAL, + LOOP_BLOCK, + LOOP_FOR, + LOOP_BEGIN, + LOOP_RESCUE, + } type; + int pc1, pc2, pc3, acc; + int ensure_level; + struct loopinfo *prev; +}; + +typedef struct scope { + mrb_state *mrb; + mrb_pool *mpool; + jmp_buf jmp; + + struct scope *prev; + + node *lv; + + int sp; + int pc; + int lastlabel; + int ainfo; + + struct loopinfo *loop; + int ensure_level; + + mrb_code *iseq; + int icapa; + + mrb_value *pool; + int plen; + int pcapa; + + mrb_sym *syms; + int slen; + + int nlocals; + int nregs; + + int idx; +} codegen_scope; + +static codegen_scope* scope_new(mrb_state *mrb, codegen_scope *prev, node *lv); +static void scope_finish(codegen_scope *s, int idx); +static struct loopinfo *loop_push(codegen_scope *s, enum looptype t); +static void loop_break(codegen_scope *s, node *tree); +static void loop_pop(codegen_scope *s, int val); + +static void gen_assignment(codegen_scope *s, node *node, int sp, int val); +static void gen_vmassignment(codegen_scope *s, node *tree, int rhs, int val); + +static void codegen(codegen_scope *s, node *tree, int val); + +static void +codegen_error(codegen_scope *s, const char *message) +{ + if (!s) return; + while (s->prev) { + mrb_pool_close(s->mpool); + s = s->prev; + } + mrb_pool_close(s->mpool); + fprintf(stderr, "codegen error: %s\n", message); + longjmp(s->jmp, 1); +} + +static void* +codegen_palloc(codegen_scope *s, size_t len) +{ + void *p = mrb_pool_alloc(s->mpool, len); + + if (!p) codegen_error(s, "pool memory allocation"); + return p; +} + +void* +codegen_malloc(codegen_scope *s, size_t len) +{ + void *p = mrb_malloc(s->mrb, len); + + if (!p) codegen_error(s, "mrb_malloc"); + return p; +} + +void* +codegen_realloc(codegen_scope *s, void *p, size_t len) +{ + p = mrb_realloc(s->mrb, p, len); + + if (!p && len > 0) codegen_error(s, "mrb_realloc"); + return p; +} + +static int +new_label(codegen_scope *s) +{ + s->lastlabel = s->pc; + return s->pc; +} + +static inline void +genop(codegen_scope *s, mrb_code i) +{ + if (s->pc == s->icapa) { + s->icapa *= 2; + s->iseq = codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->icapa); + } + s->iseq[s->pc] = i; + s->pc++; +} + +static void +genop_peep(codegen_scope *s, mrb_code i, int val) +{ + // peephole optimization + if (!val && s->lastlabel != s->pc && s->pc > 0) { + mrb_code i0 = s->iseq[s->pc-1]; + int c1 = GET_OPCODE(i); + int c0 = GET_OPCODE(i0); + + switch (c1) { + case OP_MOVE: + switch (c0) { + case OP_MOVE: + if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i) == GETARG_B(i0) && GETARG_A(i) >= s->nlocals) { + // skip swapping OP_MOVE + return; + } + break; + case OP_LOADI: + if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) { + s->iseq[s->pc-1] = MKOP_AsBx(OP_LOADI, GETARG_A(i), GETARG_sBx(i0)); + return; + } + break; + case OP_ARRAY: + case OP_HASH: + case OP_RANGE: + case OP_AREF: + case OP_GETUPVAR: + if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) { + s->iseq[s->pc-1] = MKOP_ABC(c0, GETARG_A(i), GETARG_B(i0), GETARG_C(i0)); + return; + } + break; + case OP_LOADSYM: + case OP_GETGLOBAL: + case OP_GETIV: + case OP_GETCV: + case OP_GETCONST: + case OP_GETSPECIAL: + case OP_LOADL: + case OP_STRING: + case OP_GETMCNST: + if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) { + s->iseq[s->pc-1] = MKOP_ABx(c0, GETARG_A(i), GETARG_Bx(i0)); + return; + } + break; + case OP_SCLASS: + if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) { + s->iseq[s->pc-1] = MKOP_AB(c0, GETARG_A(i), GETARG_B(i0)); + return; + } + break; + case OP_LOADNIL: + case OP_LOADSELF: + case OP_LOADT: + case OP_LOADF: + case OP_OCLASS: + if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) { + s->iseq[s->pc-1] = MKOP_A(c0, GETARG_A(i)); + return; + } + break; + } + break; + case OP_SETIV: + case OP_SETCV: + case OP_SETCONST: + case OP_SETMCNST: + switch (c0) { + case OP_MOVE: + if (GETARG_A(i) == GETARG_A(i0)) { + s->iseq[s->pc-1] = MKOP_ABx(c1, GETARG_B(i0), GETARG_Bx(i)); + return; + } + break; + } + break; + case OP_SETUPVAR: + switch (c0) { + case OP_MOVE: + if (GETARG_A(i) == GETARG_A(i0)) { + s->iseq[s->pc-1] = MKOP_ABC(c1, GETARG_B(i0), GETARG_B(i), GETARG_C(i)); + return; + } + break; + } + break; + case OP_EPOP: + if (c0 == OP_EPOP) { + s->iseq[s->pc-1] = MKOP_A(OP_EPOP, GETARG_A(i0)+GETARG_A(i)); + return; + } + break; + case OP_POPERR: + if (c0 == OP_POPERR) { + s->iseq[s->pc-1] = MKOP_A(OP_POPERR, GETARG_A(i0)+GETARG_A(i)); + return; + } + break; + } + } + genop(s, i); +} + +static void +scope_error(codegen_scope *s) +{ + exit(1); +} + +static inline void +dispatch(codegen_scope *s, int pc) +{ + int diff = s->pc - pc; + mrb_code i = s->iseq[pc]; + int c = GET_OPCODE(i); + + s->lastlabel = s->pc; + switch (c) { + case OP_JMP: + case OP_JMPIF: + case OP_JMPNOT: + case OP_ONERR: + break; + default: + fprintf(stderr, "bug: dispatch on non JMP op\n"); + scope_error(s); + } + s->iseq[pc] = MKOP_AsBx(c, GETARG_A(i), diff); +} + +static void +dispatch_linked(codegen_scope *s, int pc) +{ + mrb_code i; + int pos; + + if (!pc) return; + for (;;) { + i = s->iseq[pc]; + pos = GETARG_sBx(i); + dispatch(s, pc); + if (!pos) break; + pc = pos; + } +} + +#define nregs_update do {if (s->sp > s->nregs) s->nregs = s->sp;} while (0) +static void +push_(codegen_scope *s) +{ + if (s->sp > 511) { + codegen_error(s, "too complex expression"); + } + s->sp++; + nregs_update; +} +#if 0 +static void +push_n_(codegen_scope *s, int n) +{ + if (s->sp + n > 511) { + codegen_error(s, "too complex expression"); + } + s->sp += n; + nregs_update; +} +#endif + +#define push() push_(s) +#define push_n(n) push_n_(s, n) +#define pop() (s->sp--) +#define pop_n(n) (s->sp-=(n)) +#define cursp() (s->sp) + +static inline int +new_lit(codegen_scope *s, mrb_value val) +{ + int i; + + for (i=0; iplen; i++) { + if (memcmp(&s->pool[i], &val, sizeof(mrb_value)) == 0) return i; + } + if (s->plen == s->pcapa) { + s->pcapa *= 2; + s->pool = codegen_realloc(s, s->pool, sizeof(mrb_value)*s->pcapa); + } + s->pool[s->plen] = val; + return s->plen++; +} + +static inline int +new_msym(codegen_scope *s, mrb_sym sym) +{ + int i, len; + + len = s->slen; + if (len > 255) len = 255; + for (i=0; isyms[i] == sym) return i; + if (s->syms[i] == 0) break; + } + if (i > 255) { + codegen_error(s, "too many symbols (max 256)"); + } + s->syms[i] = sym; + if (i == s->slen) s->slen++; + return i; +} + +static inline int +new_sym(codegen_scope *s, mrb_sym sym) +{ + int i; + + for (i=0; islen; i++) { + if (s->syms[i] == sym) return i; + } + if (s->slen > 125 && s->slen < 256) { + s->syms = codegen_realloc(s, s->syms, sizeof(mrb_sym)*65536); + memset(s->syms+s->slen, 0, sizeof(mrb_sym)*(256-s->slen)); + s->slen = 256; + } + s->syms[s->slen] = sym; + return s->slen++; +} + +static int +node_len(node *tree) +{ + int n = 0; + + while (tree) { + n++; + tree = tree->cdr; + } + return n; +} + +#define lv_name(lv) ((mrb_sym)(lv)->car) +static int +lv_idx(codegen_scope *s, mrb_sym id) +{ + node *lv = s->lv; + int n = 1; + + while (lv) { + if (lv_name(lv) == id) return n; + n++; + lv = lv->cdr; + } + return 0; +} + +#define NOVAL 0 +#define VAL 1 + +static void +for_body(codegen_scope *s, node *tree) +{ + codegen_scope *prev = s; + int idx, base = s->idx; + struct loopinfo *lp; + node *n2; + mrb_code c; + + // generate receiver + codegen(s, tree->cdr->car, VAL); + // generate loop-block + s = scope_new(s->mrb, s, tree->car); + idx = s->idx; + + lp = loop_push(s, LOOP_FOR); + lp->pc1 = new_label(s); + + // generate loop variable + n2 = tree->car; + if (n2->car && !n2->car->cdr && !n2->cdr) { + genop(s, MKOP_Ax(OP_ENTER, 1<<18)); + gen_assignment(s, n2->car->car, 1, NOVAL); + } + else { + genop(s, MKOP_Ax(OP_ENTER, 1<<18)); + gen_vmassignment(s, n2, 1, VAL); + } + codegen(s, tree->cdr->cdr->car, VAL); + pop(); + c = s->iseq[s->pc-1]; + if (GET_OPCODE(c) != OP_RETURN || GETARG_B(c) != OP_R_NORMAL || s->pc == s->lastlabel) { + genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL)); + } + loop_pop(s, NOVAL); + scope_finish(s, idx); + s = prev; + genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx - base, OP_L_BLOCK)); + pop(); + idx = new_msym(s, mrb_intern(s->mrb, "each")); + genop(s, MKOP_ABC(OP_SEND, cursp(), idx, 0)); +} + +static int +lambda_body(codegen_scope *s, node *tree, int blk) +{ + int idx, base = s->idx; + mrb_code c; + + s = scope_new(s->mrb, s, tree->car); + idx = s->idx; + + if (blk) { + struct loopinfo *lp = loop_push(s, LOOP_BLOCK); + lp->pc1 = new_label(s); + } + tree = tree->cdr; + if (tree->car) { + int ma, oa, ra, pa, ka, kd, ba, a; + int pos, i; + node *n, *opt; + + ma = node_len(tree->car->car); + n = tree->car->car; + while (n) { + n = n->cdr; + } + oa = node_len(tree->car->cdr->car); + ra = tree->car->cdr->cdr->car ? 1 : 0; + pa = node_len(tree->car->cdr->cdr->cdr->car); + ka = kd = 0; + ba = tree->car->cdr->cdr->cdr->cdr ? 1 : 0; + + a = ((ma & 0x1f) << 18) + | ((oa & 0x1f) << 13) + | ((ra & 1) << 12) + | ((pa & 0x1f) << 7) + | ((ka & 0x1f) << 2) + | ((kd & 1)<< 1) + | (ba & 1); + s->ainfo = (((ma+oa) & 0x3f) << 6) /* (12bits = 6:1:5) */ + | ((ra & 1) << 5) + | (pa & 0x1f); + genop(s, MKOP_Ax(OP_ENTER, a)); + pos = new_label(s); + for (i=0; i 0) { + genop(s, MKOP_Ax(OP_JMP, 0)); + } + opt = tree->car->cdr->car; + i = 0; + while (opt) { + int idx; + + dispatch(s, pos+i); + codegen(s, opt->car->cdr, VAL); + idx = lv_idx(s, (mrb_sym)opt->car->car); + pop(); + genop_peep(s, MKOP_AB(OP_MOVE, idx, cursp()), NOVAL); + i++; + opt = opt->cdr; + } + if (oa > 0) { + dispatch(s, pos+i); + } + } + codegen(s, tree->cdr->car, VAL); + pop(); + c = s->iseq[s->pc-1]; + if (GET_OPCODE(c) != OP_RETURN || GETARG_B(c) != OP_R_NORMAL || s->pc == s->lastlabel) { + genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL)); + } + if (blk) { + loop_pop(s, NOVAL); + } + scope_finish(s, idx); + + return idx - base; +} + +static int +scope_body(codegen_scope *s, node *tree) +{ + codegen_scope *scope = scope_new(s->mrb, s, tree->car); + int idx = scope->idx; + + if (!s->iseq) { + codegen(scope, tree->cdr, NOVAL); + genop(scope, MKOP_A(OP_STOP, 0)); + } + else { + codegen(scope, tree->cdr, VAL); + genop(scope, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL)); + } + scope_finish(scope, idx); + + return idx - s->idx; +} + +static int +nosplat(node *t) +{ + while (t) { + if ((intptr_t)t->car->car == NODE_SPLAT) return 0; + t = t->cdr; + } + return 1; +} + +static mrb_sym +attrsym(codegen_scope *s, mrb_sym a) +{ + const char *name = mrb_sym2name(s->mrb, a); + char *name2; + size_t len = strlen(name); + + name2 = codegen_palloc(s, len+1); + strcpy(name2, name); + name2[len] = '='; + name2[len+1] = '\0'; + + return mrb_intern(s->mrb, name2); +} + +static int +gen_values(codegen_scope *s, node *t) +{ + int n = 0; + + while (t) { + if ((intptr_t)t->car->car == NODE_SPLAT) { // splat mode + pop_n(n); + genop(s, MKOP_ABC(OP_ARRAY, cursp(), cursp(), n)); + push(); + codegen(s, t->car, VAL); + pop(); pop(); + genop(s, MKOP_AB(OP_ARYCAT, cursp(), cursp()+1)); + t = t->cdr; + while (t) { + push(); + codegen(s, t->car, VAL); + pop(); pop(); + if ((intptr_t)t->car->car == NODE_SPLAT) { + genop(s, MKOP_AB(OP_ARYCAT, cursp(), cursp()+1)); + } + else { + genop(s, MKOP_AB(OP_ARYPUSH, cursp(), cursp()+1)); + } + t = t->cdr; + } + return -1; + } + // normal (no splat) mode + codegen(s, t->car, VAL); + n++; + t = t->cdr; + } + return n; +} + +#define CALL_MAXARGS 127 + +static void +gen_call(codegen_scope *s, node *tree, mrb_sym name, int sp, int val) +{ + mrb_sym sym = name ? name : (mrb_sym)tree->cdr->car; + int idx; + int n = 0, noop = 0, sendv = 0; + + codegen(s, tree->car, VAL); /* receiver */ + idx = new_msym(s, sym); + tree = tree->cdr->cdr->car; + if (tree) { + n = gen_values(s, tree->car); + if (n < 0) { + n = noop = sendv = 1; + push(); + } + } + if (sp) { + if (sendv) { + pop(); + genop(s, MKOP_AB(OP_ARYPUSH, cursp(), sp)); + push(); + } + else { + genop(s, MKOP_AB(OP_MOVE, cursp(), sp)); + push(); + n++; + } + } + if (tree && tree->cdr) { + noop = 1; + codegen(s, tree->cdr, VAL); + pop(); + } + else { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + } + pop_n(n+1); + { + const char *name = mrb_sym2name(s->mrb, sym); + + if (!noop && name[0] == '+' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_ADD, cursp(), idx, n)); + } + else if (!noop && name[0] == '-' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_SUB, cursp(), idx, n)); + } + else if (!noop && name[0] == '<' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_LT, cursp(), idx, n)); + } + else if (!noop && name[0] == '<' && strlen(name) == 2 && name[1] == '=') { + genop(s, MKOP_ABC(OP_LE, cursp(), idx, n)); + } + else if (!noop && name[0] == '>' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_GT, cursp(), idx, n)); + } + else if (!noop && name[0] == '>' && strlen(name) == 2 && name[1] == '=') { + genop(s, MKOP_ABC(OP_GE, cursp(), idx, n)); + } + else { + if (sendv) n = CALL_MAXARGS; + genop(s, MKOP_ABC(OP_SEND, cursp(), idx, n)); + } + } + if (val) { + push(); + } +} + +static void +gen_assignment(codegen_scope *s, node *node, int sp, int val) +{ + int idx; + int type = (intptr_t)node->car; + + node = node->cdr; + switch ((intptr_t)type) { + case NODE_GVAR: + idx = new_sym(s, (mrb_sym)node); + genop_peep(s, MKOP_ABx(OP_SETGLOBAL, sp, idx), val); + break; + case NODE_LVAR: + idx = lv_idx(s, (mrb_sym)node); + if (idx > 0) { + if (idx != sp) { + genop_peep(s, MKOP_AB(OP_MOVE, idx, sp), val); + } + break; + } + else { /* upvar */ + int lv = 0; + codegen_scope *up = s->prev; + + while (up) { + idx = lv_idx(up, (mrb_sym)node); + if (idx > 0) { + genop_peep(s, MKOP_ABC(OP_SETUPVAR, sp, idx, lv), val); + break; + } + lv++; + up = up->prev; + } + // assert(up!=0); + } + break; + case NODE_IVAR: + idx = new_sym(s, (mrb_sym)node); + genop_peep(s, MKOP_ABx(OP_SETIV, sp, idx), val); + break; + case NODE_CVAR: + idx = new_sym(s, (mrb_sym)node); + genop_peep(s, MKOP_ABx(OP_SETCV, sp, idx), val); + break; + case NODE_CONST: + idx = new_sym(s, (mrb_sym)node); + genop_peep(s, MKOP_ABx(OP_SETCONST, sp, idx), val); + break; + case NODE_COLON2: + idx = new_sym(s, (mrb_sym)node->cdr); + genop_peep(s, MKOP_AB(OP_MOVE, cursp(), sp), NOVAL); + push(); + codegen(s, node->car, VAL); + pop_n(2); + genop_peep(s, MKOP_ABx(OP_SETMCNST, cursp(), idx), val); + break; + + case NODE_CALL: + push(); + gen_call(s, node, attrsym(s, (mrb_sym)node->cdr->car), sp, val); + val = NOVAL; /* push should have done in gen_call() */ + break; + + default: + printf("unknown lhs %d\n", type); + break; + } + if (val) push(); +} + +static void +gen_vmassignment(codegen_scope *s, node *tree, int rhs, int val) +{ + int n = 0, post = 0; + node *t, *p; + + if (tree->car) { /* pre */ + t = tree->car; + n = 0; + while (t) { + genop(s, MKOP_ABC(OP_AREF, cursp(), rhs, n)); + gen_assignment(s, t->car, cursp(), NOVAL); + n++; + t = t->cdr; + } + } + t = tree->cdr; + if (t) { + if (t->cdr) { /* post count */ + p = t->cdr->car; + while (p) { + post++; + p = p->cdr; + } + } + if (val) { + genop(s, MKOP_AB(OP_MOVE, cursp(), rhs)); + push(); + } + pop(); + genop(s, MKOP_ABC(OP_APOST, cursp(), n, post)); + n = 1; + if (t->car) { /* rest */ + gen_assignment(s, t->car, cursp(), NOVAL); + } + if (t->cdr && t->cdr->car) { + t = t->cdr->car; + while (t) { + gen_assignment(s, t->car, cursp()+n, NOVAL); + t = t->cdr; + n++; + } + } + } +} + +static void +raise_error(codegen_scope *s, const char *msg) +{ + int idx = new_lit(s, mrb_str_new_cstr(s->mrb, msg)); + + genop(s, MKOP_ABx(OP_ERR, 0, idx)); +} + +static void +codegen(codegen_scope *s, node *tree, int val) +{ + int nt; + + if (!tree) return; + nt = (intptr_t)tree->car; + tree = tree->cdr; + switch (nt) { + case NODE_BEGIN: + while (tree) { + codegen(s, tree->car, tree->cdr ? NOVAL : val); + tree = tree->cdr; + } + break; + + case NODE_RESCUE: + { + int onerr, noexc, exend, pos1, pos2, tmp; + struct loopinfo *lp; + + onerr = new_label(s); + genop(s, MKOP_Bx(OP_ONERR, 0)); + lp = loop_push(s, LOOP_BEGIN); + lp->pc1 = onerr; + if (tree->car) { + codegen(s, tree->car, val); + } + lp->type = LOOP_RESCUE; + noexc = new_label(s); + genop(s, MKOP_Bx(OP_JMP, 0)); + dispatch(s, onerr); + tree = tree->cdr; + exend = 0; + pos1 = 0; + if (tree->car) { + node *n2 = tree->car; + int exc = cursp(); + + genop(s, MKOP_A(OP_RESCUE, exc)); + push(); + while (n2) { + node *n3 = n2->car; + + if (pos1) dispatch(s, pos1); + if (n3->car) { + node *n4 = n3->car; + + pos2 = 0; + while (n4) { + codegen(s, n4->car, VAL); + genop(s, MKOP_AB(OP_MOVE, cursp(), exc)); + push(); + genop(s, MKOP_A(OP_LOADNIL, cursp())); + pop(); pop(); + genop(s, MKOP_ABC(OP_SEND, cursp(), new_msym(s, mrb_intern(s->mrb, "===")), 1)); + tmp = new_label(s); + genop(s, MKOP_AsBx(OP_JMPIF, cursp(), pos2)); + pos2 = tmp; + n4 = n4->cdr; + } + pos1 = new_label(s); + genop(s, MKOP_Bx(OP_JMP, 0)); + dispatch_linked(s, pos2); + } + pop(); + if (n3->cdr->car) { + gen_assignment(s, n3->cdr->car, exc, NOVAL); + } + if (n3->cdr->cdr->car) { + codegen(s, n3->cdr->cdr->car, val); + } + tmp = new_label(s); + genop(s, MKOP_AsBx(OP_JMP, cursp(), exend)); + exend = tmp; + n2 = n2->cdr; + push(); + } + if (pos1) { + dispatch(s, pos1); + genop(s, MKOP_A(OP_RAISE, exc)); + } + } + tree = tree->cdr; + dispatch(s, noexc); + genop(s, MKOP_A(OP_POPERR, 1)); + if (tree->car) { + codegen(s, tree->car, val); + } + dispatch_linked(s, exend); + loop_pop(s, NOVAL); + } + break; + + case NODE_ENSURE: + { + int idx; + int epush = s->pc; + + genop(s, MKOP_Bx(OP_EPUSH, 0)); + s->ensure_level++; + codegen(s, tree->car, val); + idx = scope_body(s, tree->cdr); + s->iseq[epush] = MKOP_Bx(OP_EPUSH, idx); + s->ensure_level--; + genop_peep(s, MKOP_A(OP_EPOP, 1), NOVAL); + } + break; + + case NODE_LAMBDA: + { + int idx = lambda_body(s, tree, 1); + + genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_LAMBDA)); + push(); + } + break; + + case NODE_BLOCK: + { + int idx = lambda_body(s, tree, 1); + + genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_BLOCK)); + push(); + } + break; + + case NODE_IF: + { + int pos1, pos2; + node *e = tree->cdr->cdr->car; + + codegen(s, tree->car, VAL); + pop(); + pos1 = new_label(s); + genop(s, MKOP_AsBx(OP_JMPNOT, cursp(), 0)); + + codegen(s, tree->cdr->car, val); + if (e) { + if (val) pop(); + pos2 = new_label(s); + genop(s, MKOP_sBx(OP_JMP, 0)); + dispatch(s, pos1); + codegen(s, e, val); + dispatch(s, pos2); + } + else { + if (val) { + pop(); + genop(s, MKOP_A(OP_LOADNIL, cursp())); + push(); + } + dispatch(s, pos1); + } + } + break; + + case NODE_AND: + { + int pos; + + codegen(s, tree->car, VAL); + pos = new_label(s); + pop(); + genop(s, MKOP_AsBx(OP_JMPNOT, cursp(), 0)); + codegen(s, tree->cdr, val); + dispatch(s, pos); + } + break; + + case NODE_OR: + { + int pos; + + codegen(s, tree->car, VAL); + pos = new_label(s); + pop(); + genop(s, MKOP_AsBx(OP_JMPIF, cursp(), 0)); + codegen(s, tree->cdr, val); + dispatch(s, pos); + } + break; + + case NODE_WHILE: + { + struct loopinfo *lp = loop_push(s, LOOP_NORMAL); + + lp->pc1 = new_label(s); + codegen(s, tree->car, VAL); + pop(); + lp->pc2 = new_label(s); + genop(s, MKOP_AsBx(OP_JMPNOT, cursp(), 0)); + codegen(s, tree->cdr, NOVAL); + genop(s, MKOP_sBx(OP_JMP, lp->pc1 - s->pc)); + dispatch(s, lp->pc2); + loop_pop(s, val); + } + break; + + case NODE_UNTIL: + { + struct loopinfo *lp = loop_push(s, LOOP_NORMAL); + + lp->pc1 = new_label(s); + codegen(s, tree->car, VAL); + pop(); + lp->pc2 = new_label(s); + genop(s, MKOP_AsBx(OP_JMPIF, cursp(), 0)); + codegen(s, tree->cdr, NOVAL); + genop(s, MKOP_sBx(OP_JMP, lp->pc1 - s->pc)); + dispatch(s, lp->pc2); + loop_pop(s, val); + } + break; + + case NODE_FOR: + for_body(s, tree); + if (val) push(); + break; + + case NODE_CASE: + { + int head = 0; + int pos1, pos2, pos3, tmp; + node *n; + + pos3 = 0; + if (tree->car) { + head = cursp(); + codegen(s, tree->car, VAL); + } + tree = tree->cdr; + while (tree) { + n = tree->car->car; + pos1 = pos2 = 0; + while (n) { + codegen(s, n->car, VAL); + if (head) { + genop(s, MKOP_AB(OP_MOVE, cursp(), head)); + push(); + genop(s, MKOP_A(OP_LOADNIL, cursp())); + pop(); pop(); + genop(s, MKOP_ABC(OP_SEND, cursp(), new_msym(s, mrb_intern(s->mrb, "===")), 1)); + } + tmp = new_label(s); + genop(s, MKOP_AsBx(OP_JMPIF, cursp(), pos2)); + pos2 = tmp; + n = n->cdr; + } + if (tree->car->car) { + pos1 = new_label(s); + genop(s, MKOP_AsBx(OP_JMP, cursp(), 0)); + dispatch_linked(s, pos2); + } + pop(); pop(); + codegen(s, tree->car->cdr, val); + tmp = new_label(s); + genop(s, MKOP_AsBx(OP_JMP, cursp(), pos3)); + pos3 = tmp; + if (pos1) dispatch(s, pos1); + tree = tree->cdr; + push(); push(); + } + pop(); + if (pos3) dispatch_linked(s, pos3); + if (val) push(); + } + break; + + case NODE_SCOPE: + scope_body(s, tree); + break; + + case NODE_FCALL: + case NODE_CALL: + gen_call(s, tree, 0, 0, val); + break; + + case NODE_DOT2: + codegen(s, tree->car, VAL); + codegen(s, tree->cdr, VAL); + pop(); pop(); + if (val) { + genop(s, MKOP_ABC(OP_RANGE, cursp(), cursp(), 0)); + push(); + } + break; + + case NODE_DOT3: + codegen(s, tree->car, VAL); + codegen(s, tree->cdr, VAL); + pop(); pop(); + if (val) { + genop(s, MKOP_ABC(OP_RANGE, cursp(), cursp(), 1)); + push(); + } + break; + + case NODE_COLON2: + { + int sym = new_sym(s, (mrb_sym)tree->cdr); + + codegen(s, tree->car, VAL); + pop(); + genop(s, MKOP_ABx(OP_GETMCNST, cursp(), sym)); + push(); + } + break; + + case NODE_COLON3: + { + int sym = new_sym(s, (mrb_sym)tree); + + genop(s, MKOP_A(OP_OCLASS, cursp())); + genop(s, MKOP_ABx(OP_GETMCNST, cursp(), sym)); + push(); + } + break; + + case NODE_ARRAY: + { + int n; + + n = gen_values(s, tree); + if (n >= 0) { + pop_n(n); + if (val) { + genop(s, MKOP_ABC(OP_ARRAY, cursp(), cursp(), n)); + push(); + } + } + else if (val) { + push(); + } + } + break; + + case NODE_HASH: + { + int len = 0; + + while (tree) { + codegen(s, tree->car->car, VAL); + codegen(s, tree->car->cdr, VAL); + len++; + tree = tree->cdr; + } + pop_n(len*2); + if (val) { + genop(s, MKOP_ABC(OP_HASH, cursp(), cursp(), len)); + push(); + } + } + break; + + case NODE_SPLAT: + codegen(s, tree, VAL); + break; + + case NODE_ASGN: + codegen(s, tree->cdr, VAL); + pop(); + gen_assignment(s, tree->car, cursp(), val); + break; + + case NODE_MASGN: + { + int len = 0, n = 0, post = 0; + node *t = tree->cdr, *p; + int rhs = cursp(); + + if ((intptr_t)t->car == NODE_ARRAY && nosplat(t->cdr)) { + // fixed rhs + t = t->cdr; + while (t) { + codegen(s, t->car, VAL); + len++; + t = t->cdr; + } + tree = tree->car; + if (tree->car) { /* pre */ + t = tree->car; + n = 0; + while (t) { + gen_assignment(s, t->car, rhs+n, NOVAL); + n++; + t = t->cdr; + } + } + t = tree->cdr; + if (t) { + if (t->cdr) { /* post count */ + p = t->cdr->car; + while (p) { + post++; + p = p->cdr; + } + } + if (t->car) { /* rest (len - pre - post) */ + int rn = len - post - n; + + genop(s, MKOP_ABC(OP_ARRAY, cursp(), rhs+n, rn)); + gen_assignment(s, t->car, cursp(), NOVAL); + n += rn; + } + if (t->cdr && t->cdr->car) { + t = t->cdr->car; + while (ncar, rhs+n, NOVAL); + t = t->cdr; + n++; + } + } + } + pop_n(len); + if (val) { + genop(s, MKOP_ABC(OP_ARRAY, rhs, rhs, len)); + push(); + } + } + else { + // variable rhs + codegen(s, t, VAL); + gen_vmassignment(s, tree->car, rhs, val); + if (!val) pop(); + } + } + break; + + case NODE_OP_ASGN: + codegen(s, tree->car, VAL); + codegen(s, tree->cdr->cdr->car, VAL); + genop(s, MKOP_A(OP_LOADNIL, cursp())); + pop(); pop(); + { + mrb_sym sym = (mrb_sym)tree->cdr->car; + const char *name = mrb_sym2name(s->mrb, sym); + int idx = new_msym(s, sym); + + if (name[0] == '+' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_ADD, cursp(), idx, 2)); + } + else if (name[0] == '-' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_SUB, cursp(), idx, 2)); + } + else if (name[0] == '<' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_LT, cursp(), idx, 2)); + } + else if (name[0] == '<' && strlen(name) == 2 && name[1] == '=') { + genop(s, MKOP_ABC(OP_LE, cursp(), idx, 2)); + } + else if (name[0] == '>' && strlen(name) == 1) { + genop(s, MKOP_ABC(OP_GT, cursp(), idx, 2)); + } + else if (name[0] == '>' && strlen(name) == 2 && name[1] == '=') { + genop(s, MKOP_ABC(OP_GE, cursp(), idx, 2)); + } + else { + genop(s, MKOP_ABC(OP_SEND, cursp(), idx, 2)); + } + } + gen_assignment(s, tree->car, cursp(), val); + break; + + case NODE_SUPER: + { + int n = 0; + + push(); + if (tree) { + node *args = tree->car; + while (args) { + codegen(s, args->car, VAL); + n++; + args = args->cdr; + } + } + if (tree && tree->cdr) { + codegen(s, tree->cdr, VAL); + pop(); + } + else { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + } + pop_n(n+1); + genop(s, MKOP_ABC(OP_SUPER, cursp(), 0, n)); + if (val) push(); + } + break; + + case NODE_ZSUPER: + { + codegen_scope *s2 = s; + int lv = 0, ainfo = 0; + + while (s2->ainfo < 0) { + lv++; + s2 = s2->prev; + if (!s2) break; + } + if (s2) ainfo = s2->ainfo; + push(); + genop(s, MKOP_ABx(OP_ARGARY, cursp(), (ainfo<<4)|(lv & 0xf))); + pop(); + genop(s, MKOP_ABC(OP_SUPER, cursp(), 0, CALL_MAXARGS)); + if (val) push(); + } + break; + + case NODE_RETURN: + codegen(s, tree, VAL); + pop(); + if (s->loop && s->loop->type != LOOP_NORMAL) { + genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_RETURN)); + } + else { + genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL)); + } + break; + + case NODE_YIELD: + { + codegen_scope *s2 = s; + int lv = 0, ainfo = 0; + int n = 0, sendv = 0; + + while (s2->ainfo < 0) { + lv++; + s2 = s2->prev; + if (!s2) break; + } + if (s2) ainfo = s2->ainfo; + genop(s, MKOP_ABx(OP_BLKPUSH, cursp(), (ainfo<<4)|(lv & 0xf))); + push(); + if (tree) { + n = gen_values(s, tree); + if (n < 0) { + n = sendv = 1; + push(); + } + } + genop(s, MKOP_A(OP_LOADNIL, cursp())); + pop_n(n+1); + if (sendv) n = CALL_MAXARGS; + genop(s, MKOP_ABC(OP_SEND, cursp(), new_msym(s, mrb_intern(s->mrb, "call")), n)); + if (val) push(); + } + break; + + case NODE_BREAK: + loop_break(s, tree); + if (val) push(); + break; + + case NODE_NEXT: + if (!s->loop) { + raise_error(s, "unexpected next"); + } + else if (s->loop->type == LOOP_NORMAL) { + if (s->ensure_level > s->loop->ensure_level) { + genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - s->loop->ensure_level), NOVAL); + } + codegen(s, tree, NOVAL); + genop(s, MKOP_sBx(OP_JMP, s->loop->pc1 - s->pc)); + } + else { + codegen(s, tree, VAL); + pop(); + genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL)); + } + if (val) push(); + break; + + case NODE_REDO: + if (!s->loop) { + raise_error(s, "unexpected redo"); + } + else { + if (s->ensure_level > s->loop->ensure_level) { + genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - s->loop->ensure_level), NOVAL); + } + genop(s, MKOP_sBx(OP_JMP, s->loop->pc2 - s->pc)); + } + break; + + case NODE_RETRY: + { + const char *msg = "unexpected retry"; + + if (!s->loop) { + raise_error(s, msg); + } + else { + struct loopinfo *lp = s->loop; + int n = 0; + + while (lp && lp->type != LOOP_RESCUE) { + if (lp->type == LOOP_BEGIN) { + n++; + } + lp = lp->prev; + } + if (!lp) { + raise_error(s, msg); + } + else { + if (n > 0) { + while (n--) { + genop_peep(s, MKOP_A(OP_POPERR, 1), NOVAL); + } + } + if (s->ensure_level > lp->ensure_level) { + genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - lp->ensure_level), NOVAL); + } + genop(s, MKOP_sBx(OP_JMP, lp->pc1 - s->pc)); + } + } + } + break; + + case NODE_LVAR: + if (val) { + int idx = lv_idx(s, (mrb_sym)tree); + + if (idx > 0) { + genop(s, MKOP_AB(OP_MOVE, cursp(), idx)); + } + else { + int lv = 0; + codegen_scope *up = s->prev; + + while (up) { + idx = lv_idx(up, (mrb_sym)tree); + if (idx > 0) { + genop(s, MKOP_ABC(OP_GETUPVAR, cursp(), idx, lv)); + break; + } + lv++; + up = up->prev; + } + } + push(); + } + break; + + case NODE_GVAR: + { + int sym = new_sym(s, (mrb_sym)tree); + + genop(s, MKOP_ABx(OP_GETGLOBAL, cursp(), sym)); + push(); + } + break; + + case NODE_IVAR: + { + int sym = new_sym(s, (mrb_sym)tree); + + genop(s, MKOP_ABx(OP_GETIV, cursp(), sym)); + push(); + } + break; + + case NODE_CVAR: + { + int sym = new_sym(s, (mrb_sym)tree); + + genop(s, MKOP_ABx(OP_GETCV, cursp(), sym)); + push(); + } + break; + + case NODE_CONST: + { + int sym = new_sym(s, (mrb_sym)tree); + + genop(s, MKOP_ABx(OP_GETCONST, cursp(), sym)); + push(); + } + break; + + case NODE_DEFINED: + codegen(s, tree, VAL); + break; + + case NODE_BACK_REF: + codegen(s, tree, VAL); + break; + + case NODE_NTH_REF: + codegen(s, tree, VAL); + break; + + case NODE_ARG: + // should not happen + break; + + case NODE_BLOCK_ARG: + codegen(s, tree, VAL); + break; + + case NODE_INT: + if (val) { + char *p = (char*)tree->car; + int base = (intptr_t)tree->cdr->car; + int i = readint(p, base); + mrb_code co; + + if (i < MAXARG_sBx && i > -MAXARG_sBx) { + co = MKOP_AsBx(OP_LOADI, cursp(), i); + } + else { + int off = new_lit(s, mrb_fixnum_value(i)); + co = MKOP_ABx(OP_LOADL, cursp(), off); + } + genop(s, co); + push(); + } + break; + + case NODE_FLOAT: + if (val) { + char *p = (char*)tree; + mrb_float f = readfloat(p); + int off = new_lit(s, mrb_float_value(f)); + + genop(s, MKOP_ABx(OP_LOADL, cursp(), off)); + push(); + } + break; + + case NODE_NEGATE: + { + nt = (intptr_t)tree->car; + tree = tree->cdr; + switch (nt) { + case NODE_FLOAT: + { + char *p = (char*)tree; + mrb_float f = readfloat(p); + int off = new_lit(s, mrb_float_value(-f)); + + genop(s, MKOP_ABx(OP_LOADL, cursp(), off)); + push(); + } + break; + + case NODE_INT: + { + char *p = (char*)tree->car; + int base = (intptr_t)tree->cdr->car; + int i = readint(p, base); + mrb_code co; + + i = -i; + if (i < MAXARG_sBx && i > -MAXARG_sBx) { + co = MKOP_AsBx(OP_LOADI, cursp(), i); + } + else { + int off = new_lit(s, mrb_fixnum_value(i)); + co = MKOP_ABx(OP_LOADL, cursp(), off); + } + genop(s, co); + push(); + } + break; + + default: + { + int sym = new_msym(s, mrb_intern(s->mrb, "-")); + + genop(s, MKOP_ABx(OP_LOADI, cursp(), 0)); + push(); + codegen(s, tree, VAL); + pop(); pop(); + genop(s, MKOP_ABC(OP_SUB, cursp(), sym, 2)); + } + break; + } + } + break; + + case NODE_STR: + if (val) { + char *p = (char*)tree->car; + size_t len = (intptr_t)tree->cdr; + int off = new_lit(s, mrb_str_new(s->mrb, p, len)); + + genop(s, MKOP_ABx(OP_STRING, cursp(), off)); + push(); + } + break; + + case NODE_DSTR: + if (val) { + node *n = tree; + + codegen(s, n->car, VAL); + n = n->cdr; + while (n) { + codegen(s, n->car, VAL); + pop(); pop(); + genop(s, MKOP_AB(OP_STRCAT, cursp(), cursp()+1)); + push(); + n = n->cdr; + } + } + else { + node *n = tree; + + while (n) { + if ((intptr_t)n->car->car != NODE_STR) { + codegen(s, n->car, NOVAL); + } + n = n->cdr; + } + } + break; + + case NODE_SYM: + if (val) { + int sym = new_sym(s, (mrb_sym)tree); + + genop(s, MKOP_ABx(OP_LOADSYM, cursp(), sym)); + push(); + } + break; + + case NODE_SELF: + if (val) { + genop(s, MKOP_A(OP_LOADSELF, cursp())); + push(); + } + break; + + case NODE_NIL: + if (val) { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + push(); + } + break; + + case NODE_TRUE: + if (val) { + genop(s, MKOP_A(OP_LOADT, cursp())); + push(); + } + break; + + case NODE_FALSE: + if (val) { + genop(s, MKOP_A(OP_LOADF, cursp())); + push(); + } + break; + + case NODE_ALIAS: + { + int a = new_msym(s, (mrb_sym)tree->car); + int b = new_msym(s, (mrb_sym)tree->cdr); + int c = new_msym(s, mrb_intern(s->mrb, "alias_method")); + + genop(s, MKOP_A(OP_TCLASS, cursp())); + push(); + genop(s, MKOP_ABx(OP_LOADSYM, cursp(), a)); + push(); + genop(s, MKOP_ABx(OP_LOADSYM, cursp(), b)); + push(); + genop(s, MKOP_A(OP_LOADNIL, cursp())); + pop_n(3); + genop(s, MKOP_ABC(OP_SEND, cursp(), c, 2)); + if (val) { + push(); + } + } + break; + + case NODE_UNDEF: + { + int sym = new_msym(s, (mrb_sym)tree); + int undef = new_msym(s, mrb_intern(s->mrb, "undef_method")); + + genop(s, MKOP_A(OP_TCLASS, cursp())); + push(); + genop(s, MKOP_ABx(OP_LOADSYM, cursp(), sym)); + push(); + genop(s, MKOP_A(OP_LOADNIL, cursp())); + pop_n(2); + genop(s, MKOP_ABC(OP_SEND, cursp(), undef, 2)); + if (val) { + push(); + } + } + break; + + case NODE_CLASS: + { + int idx; + + if (tree->car->car == (node*)0) { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + push(); + } + else if (tree->car->car == (node*)1) { + genop(s, MKOP_A(OP_OCLASS, cursp())); + push(); + } + else { + codegen(s, tree->car->car, VAL); + } + if (tree->cdr->car) { + codegen(s, tree->cdr->car, VAL); + } + else { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + push(); + } + pop(); pop(); + idx = new_msym(s, (mrb_sym)tree->car->cdr); + genop(s, MKOP_AB(OP_CLASS, cursp(), idx)); + idx = scope_body(s, tree->cdr->cdr->car); + genop(s, MKOP_ABx(OP_EXEC, cursp(), idx)); + if (val) { + push(); + } + } + break; + + case NODE_MODULE: + { + int idx; + + if (tree->car->car == (node*)0) { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + push(); + } + else if (tree->car->car == (node*)1) { + genop(s, MKOP_A(OP_OCLASS, cursp())); + push(); + } + else { + codegen(s, tree->car->car, VAL); + } + pop(); + idx = new_msym(s, (mrb_sym)tree->car->cdr); + genop(s, MKOP_AB(OP_MODULE, cursp(), idx)); + idx = scope_body(s, tree->cdr->car); + genop(s, MKOP_ABx(OP_EXEC, cursp(), idx)); + if (val) { + push(); + } + } + break; + + case NODE_SCLASS: + { + int idx; + + codegen(s, tree->car, VAL); + pop(); + genop(s, MKOP_AB(OP_SCLASS, cursp(), cursp())); + idx = scope_body(s, tree->cdr->car); + genop(s, MKOP_ABx(OP_EXEC, cursp(), idx)); + if (val) { + push(); + } + } + break; + + case NODE_DEF: + { + int sym = new_msym(s, (mrb_sym)tree->car); + int idx = lambda_body(s, tree->cdr, 0); + + genop(s, MKOP_A(OP_TCLASS, cursp())); + push(); + genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_METHOD)); + pop(); + genop(s, MKOP_AB(OP_METHOD, cursp(), sym)); + if (val) { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + } + } + break; + + case NODE_SDEF: + { + node *recv = tree->car; + int sym = new_msym(s, (mrb_sym)tree->cdr->car); + int idx = lambda_body(s, tree->cdr->cdr, 0); + + codegen(s, recv, VAL); + pop(); + genop(s, MKOP_AB(OP_SCLASS, cursp(), cursp())); + push(); + genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_METHOD)); + pop(); + genop(s, MKOP_AB(OP_METHOD, cursp(), sym)); + if (val) { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + } + } + break; + + case NODE_POSTEXE: + codegen(s, tree, NOVAL); + break; + + default: + break; + } +} + +static codegen_scope* +scope_new(mrb_state *mrb, codegen_scope *prev, node *lv) +{ + mrb_pool *pool = mrb_pool_open(mrb); + codegen_scope *p = mrb_pool_alloc(pool, sizeof(codegen_scope)); + if (!p) return 0; + + memset(p, 0, sizeof(codegen_scope)); + p->mrb = mrb; + p->mpool = pool; + if (!prev) return p; + p->prev = prev; + p->ainfo = -1; + + p->mrb = prev->mrb; + p->icapa = 1024; + p->iseq = mrb_malloc(mrb, sizeof(mrb_code)*p->icapa); + + p->pcapa = 32; + p->pool = mrb_malloc(mrb, sizeof(mrb_value)*p->pcapa); + + p->syms = mrb_malloc(mrb, sizeof(mrb_sym)*256); + + p->lv = lv; + p->sp += node_len(lv)+2; + p->nlocals = p->sp; + + p->idx = mrb->irep_len++; + + return p; +} + +static void +scope_finish(codegen_scope *s, int idx) +{ + mrb_state *mrb = s->mrb; + mrb_irep *irep; + + mrb_add_irep(mrb, idx); + irep = mrb->irep[idx] = mrb_malloc(mrb, sizeof(mrb_irep)); + + irep->idx = idx; + irep->flags = 0; + if (s->iseq) { + irep->iseq = codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->pc); + irep->ilen = s->pc; + } + if (s->pool) { + irep->pool = codegen_realloc(s, s->pool, sizeof(mrb_value)*s->plen); + irep->plen = s->plen; + } + if (s->syms) { + irep->syms = codegen_realloc(s, s->syms, sizeof(mrb_sym)*s->slen); + irep->slen = s->slen; + } + + irep->nlocals = s->nlocals; + irep->nregs = s->nregs; + + mrb_pool_close(s->mpool); +} + +static struct loopinfo* +loop_push(codegen_scope *s, enum looptype t) +{ + struct loopinfo *p = codegen_palloc(s, sizeof(struct loopinfo)); + + p->type = t; + p->pc1 = p->pc2 = p->pc3 = 0; + p->prev = s->loop; + p->ensure_level = s->ensure_level; + p->acc = cursp(); + s->loop = p; + + return p; +} + +static void +loop_break(codegen_scope *s, node *tree) +{ + if (!s->loop) { + codegen(s, tree, NOVAL); + raise_error(s, "unexpected break"); + } + else { + struct loopinfo *loop; + + if (tree) { + codegen(s, tree, VAL); + printf("break op %d\n", cursp()); + pop(); + } + + loop = s->loop; + while (loop->type == LOOP_BEGIN) { + genop_peep(s, MKOP_A(OP_POPERR, 1), NOVAL); + loop = loop->prev; + } + if (loop->type == LOOP_NORMAL) { + int tmp; + + if (s->ensure_level > s->loop->ensure_level) { + genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - s->loop->ensure_level), NOVAL); + } + if (tree) { + genop_peep(s, MKOP_AB(OP_MOVE, loop->acc, cursp()), NOVAL); + } + tmp = new_label(s); + genop(s, MKOP_sBx(OP_JMP, loop->pc3)); + loop->pc3 = tmp; + } + else { + genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_BREAK)); + } + } +} + +static void +loop_pop(codegen_scope *s, int val) +{ + if (val) { + genop(s, MKOP_A(OP_LOADNIL, cursp())); + } + dispatch_linked(s, s->loop->pc3); + s->loop = s->loop->prev; + if (val) push(); +} + +static void +codedump(mrb_state *mrb, int n) +{ + mrb_irep *irep = mrb->irep[n]; + int i; + mrb_code c; + + if (!irep) return; + printf("irep %d nregs=%d nlocals=%d pools=%d syms=%d\n", n, + irep->nregs, irep->nlocals, irep->plen, irep->slen); + for (i=0; iilen; i++) { + printf("%03d ", i); + c = irep->iseq[i]; + switch (GET_OPCODE(c)) { + case OP_NOP: + printf("OP_NOP\n"); + break; + case OP_MOVE: + printf("OP_MOVE\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c)); + break; + case OP_LOADL: + printf("OP_LOADL\tR%d\tL(%d)\n", GETARG_A(c), GETARG_Bx(c)); + break; + case OP_LOADI: + printf("OP_LOADI\tR%d\t%d\n", GETARG_A(c), GETARG_sBx(c)); + break; + case OP_LOADSYM: + printf("OP_LOADSYM\tR%d\t'%s'\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)])); + break; + case OP_LOADNIL: + printf("OP_LOADNIL\tR%d\n", GETARG_A(c)); + break; + case OP_LOADSELF: + printf("OP_LOADSELF\tR%d\n", GETARG_A(c)); + break; + case OP_LOADT: + printf("OP_LOADT\tR%d\n", GETARG_A(c)); + break; + case OP_LOADF: + printf("OP_LOADF\tR%d\n", GETARG_A(c)); + break; + case OP_GETGLOBAL: + printf("OP_GETGLOBAL\tR%d\t'%s'\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)])); + break; + case OP_SETGLOBAL: + printf("OP_SETGLOBAL\t'%s'\tR%d\n", + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]), + GETARG_A(c)); + break; + case OP_GETCONST: + printf("OP_GETCONST\tR%d\t'%s'\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)])); + break; + case OP_SETCONST: + printf("OP_SETCONST\t'%s'\tR%d\n", + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]), + GETARG_A(c)); + break; + case OP_GETMCNST: + printf("OP_GETMCNST\tR%d\tR%d::%s\n", GETARG_A(c), GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)])); + break; + case OP_SETMCNST: + printf("OP_SETMCNST\tR%d::%s\tR%d\n", GETARG_A(c)+1, + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]), + GETARG_A(c)); + break; + case OP_GETIV: + printf("OP_GETIV\tR%d\t%s\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)])); + break; + case OP_SETIV: + printf("OP_SETIV\t%s\tR%d\n", + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]), + GETARG_A(c)); + break; + case OP_GETUPVAR: + printf("OP_GETUPVAR\tR%d\t%d\t%d\n", + GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + case OP_SETUPVAR: + printf("OP_SETUPVAR\tR%d\t%d\t%d\n", + GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + case OP_GETCV: + printf("OP_GETCV\tR%d\t%s\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)])); + break; + case OP_SETCV: + printf("OP_SETCV\t%s\tR%d\n", + mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]), + GETARG_A(c)); + break; + case OP_JMP: + printf("OP_JMP\t\t%03d\n", i+GETARG_sBx(c)); + break; + case OP_JMPIF: + printf("OP_JMPIF\tR%d\t%03d\n", GETARG_A(c), i+GETARG_sBx(c)); + break; + case OP_JMPNOT: + printf("OP_JMPNOT\tR%d\t%03d\n", GETARG_A(c), i+GETARG_sBx(c)); + break; + case OP_SEND: + printf("OP_SEND\tR%d\t'%s'\t%d\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)]), + GETARG_C(c)); + break; + case OP_SUPER: + printf("OP_SUPER\tR%d\t%d\n", GETARG_A(c), + GETARG_C(c)); + break; + case OP_ARGARY: + printf("OP_ARGARY\tR%d\t%d:%d:%d:%d\n", GETARG_A(c), + (GETARG_Bx(c)>>10)&0x3f, + (GETARG_Bx(c)>>9)&0x1, + (GETARG_Bx(c)>>4)&0x1f, + (GETARG_Bx(c)>>0)&0xf); + break; + + case OP_ENTER: + printf("OP_ENTER\t%d:%d:%d:%d:%d:%d:%d\n", + (GETARG_Ax(c)>>18)&0x1f, + (GETARG_Ax(c)>>13)&0x1f, + (GETARG_Ax(c)>>12)&0x1, + (GETARG_Ax(c)>>7)&0x1f, + (GETARG_Ax(c)>>2)&0x1f, + (GETARG_Ax(c)>>1)&0x1, + GETARG_Ax(c) & 0x1); + break; + case OP_RETURN: + printf("OP_RETURN\tR%d", GETARG_A(c)); + switch (GETARG_B(c)) { + case OP_R_NORMAL: + printf("\n"); break; + case OP_R_RETURN: + printf("\treturn\n"); break; + case OP_R_BREAK: + printf("\tbreak\n"); break; + default: + printf("\tbroken\n"); break; + break; + } + break; + case OP_BLKPUSH: + printf("OP_BLKPUSH\tR%d\t%d:%d:%d:%d\n", GETARG_A(c), + (GETARG_Bx(c)>>10)&0x3f, + (GETARG_Bx(c)>>9)&0x1, + (GETARG_Bx(c)>>4)&0x1f, + (GETARG_Bx(c)>>0)&0xf); + break; + + case OP_LAMBDA: + printf("OP_LAMBDA\tR%d\tI(%d)\t%d\n", GETARG_A(c), n+GETARG_b(c), GETARG_c(c)); + break; + case OP_RANGE: + printf("OP_RANGE\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + case OP_METHOD: + printf("OP_METHOD\tR%d\t'%s'\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)])); + break; + + case OP_ADD: + printf("OP_ADD\tR%d\t'%s'\t%d\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)]), + GETARG_C(c)); + break; + case OP_SUB: + printf("OP_SUB\tR%d\t'%s'\t%d\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)]), + GETARG_C(c)); + break; + case OP_LT: + printf("OP_LT\tR%d\t'%s'\t%d\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)]), + GETARG_C(c)); + break; + case OP_LE: + printf("OP_LE\tR%d\t'%s'\t%d\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)]), + GETARG_C(c)); + break; + case OP_GT: + printf("OP_GT\tR%d\t'%s'\t%d\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)]), + GETARG_C(c)); + break; + case OP_GE: + printf("OP_GE\tR%d\t'%s'\t%d\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)]), + GETARG_C(c)); + break; + + case OP_STOP: + printf("OP_STOP\n"); + break; + + case OP_ARRAY: + printf("OP_ARRAY\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + case OP_ARYCAT: + printf("OP_ARYCAT\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c)); + break; + case OP_ARYPUSH: + printf("OP_ARYPUSH\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c)); + break; + case OP_AREF: + printf("OP_AREF\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + case OP_APOST: + printf("OP_APOST\tR%d\t%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + case OP_STRING: + printf("OP_STRING\tR%d\t'%s'\n", GETARG_A(c), RSTRING_PTR(irep->pool[GETARG_Bx(c)])); + break; + case OP_STRCAT: + printf("OP_STRCAT\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c)); + break; + case OP_HASH: + printf("OP_HASH\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + + case OP_OCLASS: + printf("OP_OCLASS\tR%d\n", GETARG_A(c)); + break; + case OP_CLASS: + printf("OP_CLASS\tR%d\t'%s'\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)])); + break; + case OP_MODULE: + printf("OP_MODULE\tR%d\t'%s'\n", GETARG_A(c), + mrb_sym2name(mrb, irep->syms[GETARG_B(c)])); + break; + case OP_EXEC: + printf("OP_EXEC\tR%d\tI(%d)\n", GETARG_A(c), n+GETARG_Bx(c)); + break; + case OP_SCLASS: + printf("OP_SCLASS\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c)); + break; + case OP_TCLASS: + printf("OP_TCLASS\tR%d\n", GETARG_A(c)); + break; + case OP_ERR: + printf("OP_ERR\t:L(%d)\n", GETARG_Bx(c)); + break; + case OP_EPUSH: + printf("OP_EPUSH\t:I(%d)\n", n+GETARG_Bx(c)); + break; + case OP_ONERR: + printf("OP_ONERR\t%03d\n", i+GETARG_sBx(c)); + break; + case OP_RESCUE: + printf("OP_RESCUE\tR%d\n", GETARG_A(c)); + break; + case OP_RAISE: + printf("OP_RAISE\tR%d\n", GETARG_A(c)); + break; + case OP_POPERR: + printf("OP_POPERR\t%d\n", GETARG_A(c)); + break; + case OP_EPOP: + printf("OP_EPOP\t%d\n", GETARG_A(c)); + break; + + default: + printf("OP_unknown %d\t%d\t%d\t%d\n", GET_OPCODE(c), + GETARG_A(c), GETARG_B(c), GETARG_C(c)); + break; + } + } + printf("\n"); +} + +void +codedump_all(mrb_state *mrb, int start) +{ + int i; + + for (i=start; iirep_len; i++) { + codedump(mrb, i); + } +} + +static int +codegen_start(mrb_state *mrb, node *tree) +{ + codegen_scope *scope = scope_new(mrb, 0, 0); + + if (!scope) { + return -1; + } + scope->mrb = mrb; + + if (setjmp(scope->jmp) != 0) { + return -1; + } + // prepare irep + codegen(scope, tree, NOVAL); + return 0; +} + +int +mrb_generate_code(mrb_state *mrb, node *tree) +{ + int start = mrb->irep_len; + int n; + + n = codegen_start(mrb, tree); + if (n < 0) return n; + + return start; +} + +#ifdef CODEGEN_TEST +int +main() +{ + mrb_state *mrb = mrb_open(); + int n; + +#if 1 + n = mrb_compile_string(mrb, "p(__FILE__)\np(__LINE__)"); +#else + n = mrb_compile_string(mrb, "\ +def fib(n)\n\ + if n<2\n\ + n\n\ + else\n\ + fib(n-2)+fib(n-1)\n\ + end\n\ +end\n\ +p(fib(30), \"\\n\")\n\ +"); +#endif + printf("ret: %d\n", n); +#ifdef CODEGEN_DUMP + codedump_all(mrb, n); +#endif + mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[0]), mrb_nil_value()); + + return 0; +} +#endif diff --git a/src/compar.c b/src/compar.c new file mode 100644 index 0000000000..d66525c15b --- /dev/null +++ b/src/compar.c @@ -0,0 +1,144 @@ +#include "mruby.h" +#include "mruby/string.h" +#include "mruby/numeric.h" + +void +mrb_cmperr(mrb_state *mrb, mrb_value x, mrb_value y) +{ + const char *classname; + + if (SPECIAL_CONST_P(y)) { + y = mrb_inspect(mrb, y); + //classname = StringValuePtr(y); + classname = mrb_string_value_ptr(mrb, y); + } + else { + classname = mrb_obj_classname(mrb, y); + } + mrb_raise(mrb, E_ARGUMENT_ERROR, "comparison of %s with %s failed", + mrb_obj_classname(mrb, x), classname); +} + +int +mrb_cmpint(mrb_state *mrb, mrb_value val, mrb_value a, mrb_value b) +{ + if (mrb_nil_p(val)) { + mrb_cmperr(mrb, a, b); + } + if (FIXNUM_P(val)) { + long l = mrb_fixnum(val); + if (l > 0) return 1; + if (l < 0) return -1; + return 0; + } + if (mrb_test(mrb_funcall(mrb, val, ">", 1, mrb_fixnum_value(0)))) return 1; + if (mrb_test(mrb_funcall(mrb, val, "<", 1, mrb_fixnum_value(0)))) return -1; + return 0; +} + +static mrb_value +cmp_equal(mrb_state *mrb, mrb_value x) +{ + mrb_value y, c; + + /* *** TEMPORAL IMPLEMENT *** */ + + mrb_get_args(mrb, "o", &y); + if (mrb_obj_equal(mrb, x, y)) return mrb_true_value(); + + c = mrb_funcall(mrb, x, "<=>", 1, y); + + if (mrb_cmpint(mrb, c, x, y) == 0) return mrb_true_value(); + return mrb_false_value(); +} + +#include +static mrb_value +cmp_gt(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mrb_value c; + + c = mrb_funcall(mrb, x, "<=>", 1, y); + + if (mrb_cmpint(mrb, c, x, y) > 0) return mrb_true_value(); + return mrb_false_value(); +} + +static mrb_value +cmp_gt_m(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + + mrb_get_args(mrb, "o", &y); + return cmp_gt(mrb, x, y); +} + +static mrb_value +cmp_ge_m(mrb_state *mrb, mrb_value x) +{ + mrb_value y, c; + + mrb_get_args(mrb, "o", &y); + c = mrb_funcall(mrb, x, "<=>", 1, y); + + if (mrb_cmpint(mrb, c, x, y) >= 0) return mrb_true_value(); + return mrb_false_value(); +} + +static mrb_value +cmp_lt(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mrb_value c; + + c = mrb_funcall(mrb, x, "<=>", 1, y); + + if (mrb_cmpint(mrb, c, x, y) < 0) return mrb_true_value(); + return mrb_false_value(); +} + +static mrb_value +cmp_lt_m(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + + mrb_get_args(mrb, "o", &y); + return cmp_lt(mrb, x, y); +} + +static mrb_value +cmp_le_m(mrb_state *mrb, mrb_value x) +{ + mrb_value y, c; + + mrb_get_args(mrb, "o", &y); + c = mrb_funcall(mrb, x, "<=>", 1, y); + + if (mrb_cmpint(mrb, c, x, y) <= 0) return mrb_true_value(); + return mrb_false_value(); +} + +static mrb_value +cmp_between(mrb_state *mrb, mrb_value x) +{ + mrb_value min, max; + + mrb_get_args(mrb, "oo", &min, &max); + + if (mrb_test(cmp_lt(mrb, x, min))) return mrb_false_value(); + if (mrb_test(cmp_gt(mrb, x, max))) return mrb_false_value(); + return mrb_true_value(); +} + +void +mrb_init_comparable(mrb_state *mrb) +{ + struct RClass *comp; + + comp = mrb_define_module(mrb, "Comparable"); + mrb_define_method(mrb, comp, "<", cmp_lt_m, ARGS_REQ(1)); /* 15.3.3.2.1 */ + mrb_define_method(mrb, comp, "<=", cmp_le_m, ARGS_REQ(1)); /* 15.3.3.2.2 */ + mrb_define_method(mrb, comp, "==", cmp_equal, ARGS_REQ(1)); /* 15.3.3.2.3 */ + mrb_define_method(mrb, comp, ">", cmp_gt_m, ARGS_REQ(1)); /* 15.3.3.2.4 */ + mrb_define_method(mrb, comp, ">=", cmp_ge_m, ARGS_REQ(1)); /* 15.3.3.2.5 */ + mrb_define_method(mrb, comp, "between?", cmp_between, ARGS_REQ(2)); /* 15.3.3.2.6 */ +} diff --git a/src/compile.h b/src/compile.h new file mode 100644 index 0000000000..f0e6b18748 --- /dev/null +++ b/src/compile.h @@ -0,0 +1,73 @@ +#include "mruby.h" +#include +#include + +typedef struct mrb_ast_node { + struct mrb_ast_node *car, *cdr; +} mrb_ast_node; + +#include "node.h" +#include "pool.h" +#include + +enum mrb_lex_state_enum { + EXPR_BEG, /* ignore newline, +/- is a sign. */ + EXPR_END, /* newline significant, +/- is an operator. */ + EXPR_ENDARG, /* ditto, and unbound braces. */ + EXPR_ENDFN, /* ditto, and unbound braces. */ + EXPR_ARG, /* newline significant, +/- is an operator. */ + EXPR_CMDARG, /* newline significant, +/- is an operator. */ + EXPR_MID, /* newline significant, +/- is an operator. */ + EXPR_FNAME, /* ignore newline, no reserved words. */ + EXPR_DOT, /* right after `.' or `::', no reserved words. */ + EXPR_CLASS, /* immediate after `class', no here document. */ + EXPR_VALUE, /* alike EXPR_BEG but label is disallowed. */ + EXPR_MAX_STATE +}; + +struct mrb_parser_state { + mrb_state *mrb; + struct mrb_pool *pool; + mrb_ast_node *cells; + char *s, *send; + FILE *f; + int lineno; + int column; + const char *filename; + + enum mrb_lex_state_enum lstate; + int sterm; + + unsigned int cond_stack; + unsigned int cmdarg_stack; + int paren_nest; + int lpar_beg; + + mrb_ast_node *pb; + char buf[1024]; + int bidx; + + mrb_ast_node *heredoc; + + int in_def, in_single, cmd_start; + mrb_ast_node *locals; + + void *ylval; + + int nerr; + mrb_ast_node *tree, *begin_tree; + + jmp_buf jmp; +}; + +struct mrb_parser_state* mrb_parse_file(mrb_state*,FILE*); +struct mrb_parser_state* mrb_parse_string(mrb_state*,char*); +struct mrb_parser_state* mrb_parse_nstring(mrb_state*,char*,size_t); +int mrb_generate_code(mrb_state*, mrb_ast_node*); + +int mrb_compile_file(mrb_state*,FILE*); +int mrb_compile_string(mrb_state*,char*); +int mrb_compile_nstring(mrb_state*,char*,size_t); + +const char *mrb_parser_filename(struct mrb_parser_state *p, const char *s); +int mrb_parser_lineno(struct mrb_parser_state *p, int n); diff --git a/src/crc.c b/src/crc.c new file mode 100644 index 0000000000..513622a097 --- /dev/null +++ b/src/crc.c @@ -0,0 +1,28 @@ +#include +#include +// Calculate CRC (CRC-16-CCITT) +// +// 0000_0000_0000_0000_0000_0000_0000_0000 +// ^|------- CRC -------|- work --| +// carry +#define CRC_16_CCITT 0x11021ul //x^16+x^12+x^5+1 +#define CRC_XOR_PATTERN (CRC_16_CCITT << 8) +#define CRC_CARRY_BIT (1 << 24) + +uint16_t +calc_crc_16_ccitt(unsigned char *src, int nbytes) +{ + uint32_t crcwk = 0ul; + int ibyte, ibit; + + for (ibyte = 0; ibyte < nbytes; ibyte++) { + crcwk |= *src++; + for (ibit = 0; ibit < CHAR_BIT; ibit++) { + crcwk <<= 1; + if (crcwk & CRC_CARRY_BIT) { + crcwk ^= CRC_XOR_PATTERN; + } + } + } + return (uint16_t)(crcwk >> 8); +} diff --git a/src/dump.c b/src/dump.c new file mode 100644 index 0000000000..ba2a9ab7a0 --- /dev/null +++ b/src/dump.c @@ -0,0 +1,697 @@ +#include +#include "dump.h" + +#include "mruby/string.h" +#ifdef INCLUDE_REGEXP +#include "re.h" +#endif +#include "irep.h" + +static const unsigned char def_rite_binary_header[] = + RITE_FILE_IDENFIFIER + RITE_FILE_FORMAT_VER + RITE_VM_VER + RITE_COMPILER_TYPE + RITE_COMPILER_VER + "0000" //Binary data size + "00" //Number of ireps + "00" //Start index + RITE_RESERVED +; + +static const unsigned char def_rite_file_header[] = + RITE_FILE_IDENFIFIER + RITE_FILE_FORMAT_VER + RITE_VM_VER + RITE_COMPILER_TYPE + RITE_COMPILER_VER + "00000000" //Binary data size + "0000" //Number of ireps + "0000" //Start index + RITE_RESERVED + "0000" //CRC +; + +const char bin2hex[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' +}; + +#define DUMP_SIZE(size, type) ((type == DUMP_TYPE_BIN) ? size : size * RITE_FILE_HEX_SIZE) + +enum { + DUMP_IREP_HEADER = 0, + DUMP_ISEQ_BLOCK, + DUMP_POOL_BLOCK, + DUMP_SYMS_BLOCK, + DUMP_SECTION_NUM, +}; + +uint16_t calc_crc_16_ccitt(unsigned char*,int); +static inline int uint8_dump(uint8_t,char*,int); +static inline int uint16_dump(uint16_t,char*,int); +static inline int uint32_dump(uint32_t,char*,int); +static char* str_dump(char*,char*,uint16_t,int); +static uint16_t str_dump_len(char*,uint16_t, int); +static uint32_t get_irep_header_size(mrb_state*,mrb_irep*,int); +static uint32_t get_iseq_block_size(mrb_state*,mrb_irep*,int); +static uint32_t get_pool_block_size(mrb_state*,mrb_irep*,int); +static uint32_t get_syms_block_size(mrb_state*,mrb_irep*,int); +static uint32_t get_irep_record_size(mrb_state*,int,int); +static int write_irep_header(mrb_state*,mrb_irep*,char*,int); +static int write_iseq_block(mrb_state*,mrb_irep*,char*,int); +static int write_pool_block(mrb_state*,mrb_irep*,char*,int); +static int write_syms_block(mrb_state*,mrb_irep*,char*,int); +static int calc_crc_section(mrb_state*,mrb_irep*,uint16_t*,int); +static int write_rite_header(mrb_state*,int,char*,uint32_t); +static int dump_rite_header(mrb_state*,int,FILE*,uint32_t); +static int write_irep_record(mrb_state*,int,char*,uint32_t*,int); +static int dump_irep_record(mrb_state*,int,FILE*,uint32_t*); +static int mrb_write_irep(mrb_state*,int,char*); + + +static inline int +uint8_dump(unsigned char bin, char *hex, int type) +{ + if (type == DUMP_TYPE_BIN) { + *hex = bin; + } else { + *hex++ = bin2hex[(bin >> 4) & 0x0f]; + *hex = bin2hex[bin & 0x0f]; + } + return DUMP_SIZE(sizeof(char), type); +} + +static inline int +uint16_dump(uint16_t bin, char *hex, int type) +{ + if (type == DUMP_TYPE_BIN) { + return (uint16_to_bin(bin, hex)); + } else { + *hex++ = bin2hex[(bin >> 12)& 0x0f]; + *hex++ = bin2hex[(bin >> 8) & 0x0f]; + *hex++ = bin2hex[(bin >> 4) & 0x0f]; + *hex = bin2hex[bin & 0x0f]; + return DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type); + } +} + +static inline int +uint32_dump(uint32_t bin, char *hex, int type) +{ + if (type == DUMP_TYPE_BIN) { + return (uint32_to_bin(bin, hex)); + } else { + *hex++ = bin2hex[(bin >> 28) & 0x0f]; + *hex++ = bin2hex[(bin >> 24) & 0x0f]; + *hex++ = bin2hex[(bin >> 20) & 0x0f]; + *hex++ = bin2hex[(bin >> 16) & 0x0f]; + *hex++ = bin2hex[(bin >> 12) & 0x0f]; + *hex++ = bin2hex[(bin >> 8) & 0x0f]; + *hex++ = bin2hex[(bin >> 4) & 0x0f]; + *hex = bin2hex[bin & 0x0f]; + return DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, type); + } +} + +static char* +str_dump(char *str, char *hex, uint16_t len, int type) +{ + if (type == DUMP_TYPE_BIN) + memcpy(hex, str, len); + else { + char *src, *dst; + + for (src = str, dst = hex; len > 0; src++, dst++, len--) { + switch (*src) { + case 0x07:/* BEL */ *dst++ = '\\'; *dst = 'a'; break; + case 0x08:/* BS */ *dst++ = '\\'; *dst = 'b'; break; + case 0x09:/* HT */ *dst++ = '\\'; *dst = 't'; break; + case 0x0A:/* LF */ *dst++ = '\\'; *dst = 'n'; break; + case 0x0B:/* VT */ *dst++ = '\\'; *dst = 'v'; break; + case 0x0C:/* FF */ *dst++ = '\\'; *dst = 'f'; break; + case 0x0D:/* CR */ *dst++ = '\\'; *dst = 'r'; break; + case 0x22:/* " */ /* fall through */ + case 0x27:/* ' */ /* fall through */ + // case 0x3F:/* ? */ /* fall through */ + case 0x5C:/* \ */ /* fall through */ + default: *dst = *src; break; + } + } + } + + return hex; +} + +static uint16_t +str_dump_len(char *str, uint16_t len, int type) +{ + uint16_t dump_len = 0; + + if (type == DUMP_TYPE_BIN) + dump_len = len; + else { + char *src; + + for (src = str; len > 0; src++, len--) { + switch (*src) { + case 0x07:/* BEL */ /* fall through */ + case 0x08:/* BS */ /* fall through */ + case 0x09:/* HT */ /* fall through */ + case 0x0A:/* LF */ /* fall through */ + case 0x0B:/* VT */ /* fall through */ + case 0x0C:/* FF */ /* fall through */ + case 0x0D:/* CR */ /* fall through */ + dump_len += 2; + break; + + case 0x22:/* " */ /* fall through */ + case 0x27:/* ' */ /* fall through */ + // case 0x3F:/* ? */ /* fall through */ + case 0x5C:/* \ */ /* fall through */ + default: + dump_len++; break; + } + } + } + + return dump_len; +} + +static uint32_t +get_irep_header_size(mrb_state *mrb, mrb_irep *irep, int type) +{ + uint32_t size = 0; + + size += sizeof(char) * 2; + size += DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type) * 4; + + return size; +} + +static uint32_t +get_iseq_block_size(mrb_state *mrb, mrb_irep *irep, int type) +{ + uint32_t size = 0; + + size += MRB_DUMP_SIZE_OF_LONG; /* ilen */ + size += irep->ilen * MRB_DUMP_SIZE_OF_LONG; /* iseq(n) */ + size += MRB_DUMP_SIZE_OF_SHORT; /* crc */ + + return DUMP_SIZE(size, type); +} + +static uint32_t +get_pool_block_size(mrb_state *mrb, mrb_irep *irep, int type) +{ + uint32_t size = 0; + int pool_no; + mrb_value str; + char buf[32]; + + size += MRB_DUMP_SIZE_OF_LONG; /* plen */ + size += irep->plen * sizeof(char); /* tt(n) */ + size += irep->plen * MRB_DUMP_SIZE_OF_SHORT; /* len(n) */ + size += MRB_DUMP_SIZE_OF_SHORT; /* crc */ + size = DUMP_SIZE(size, type); + + for (pool_no = 0; pool_no < irep->plen; pool_no++) { + uint16_t nlen =0; + + switch (irep->pool[pool_no].tt) { + case MRB_TT_FIXNUM: + sprintf( buf, "%d", irep->pool[pool_no].value.i); + size += strlen(buf); + break; + case MRB_TT_FLOAT: + sprintf( buf, "%.16e", irep->pool[pool_no].value.f); + size += strlen(buf); + break; + case MRB_TT_STRING: + str = mrb_string_value( mrb, &irep->pool[pool_no]); + nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type); + size += nlen; + break; +#ifdef INCLUDE_REGEXP + case MRB_TT_REGEX: + str = mrb_reg_to_s(mrb, irep->pool[pool_no]); + nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type); + size += nlen; + break; +#endif + default: + break; + } + } + + return size; +} + +static uint32_t +get_syms_block_size(mrb_state *mrb, mrb_irep *irep, int type) +{ + uint32_t size = 0; + int sym_no; + + size += MRB_DUMP_SIZE_OF_LONG; /* slen */ + size += MRB_DUMP_SIZE_OF_SHORT; /* crc */ + size = DUMP_SIZE(size, type); + + for (sym_no = 0; sym_no < irep->slen; sym_no++) { + const char * name; + uint16_t nlen =0; + + size += DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type); /* snl(n) */ + if (irep->syms[sym_no] != 0) { + name = mrb_sym2name(mrb, irep->syms[sym_no]); + nlen = str_dump_len((char*)name, strlen(name), type); + size += nlen; /* sn(n) */ + } + } + + return size; +} + +static uint32_t +get_irep_record_size(mrb_state *mrb, int irep_no, int type) +{ + uint32_t size = 0; + mrb_irep *irep = mrb->irep[irep_no]; + + size += DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, type); /* rlen */ + size += get_irep_header_size(mrb, irep, type); + size += get_iseq_block_size(mrb, irep, type); + size += get_pool_block_size(mrb, irep, type); + size += get_syms_block_size(mrb, irep, type); + + return size; +} + +static int +write_irep_header(mrb_state *mrb, mrb_irep *irep, char *buf, int type) +{ + char *buf_top = buf; + + *buf++ = RITE_IREP_IDENFIFIER; /* record identifier */ + *buf++ = RITE_IREP_TYPE_CLASS; /* class or module */ + buf += uint16_dump((uint16_t)irep->nlocals, buf, type); /* number of local variable */ + buf += uint16_dump((uint16_t)irep->nregs, buf, type); /* number of register variable */ + buf += uint16_dump(DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type)/* crc */, buf, type); /* offset of isec block */ + + return (int)(buf - buf_top); +} + +static int +write_iseq_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type) +{ + char *buf_top = buf; + int iseq_no; + + buf += uint32_dump((uint32_t)irep->ilen, buf, type); /* number of opcode */ + + for (iseq_no = 0; iseq_no < irep->ilen; iseq_no++) { + buf += uint32_dump((uint32_t)irep->iseq[iseq_no], buf, type); /* opcode */ + } + + return (int)(buf - buf_top); +} + +static int +write_pool_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type) +{ + int pool_no; + mrb_value str; + char *buf_top = buf; + char *char_buf; + uint16_t buf_size =0; + + buf_size = MRB_DUMP_DEFAULT_STR_LEN; + if ((char_buf = mrb_malloc(mrb, buf_size)) == 0) + goto error_exit; + + buf += uint32_dump((uint32_t)irep->plen, buf, type); /* number of pool */ + + for (pool_no = 0; pool_no < irep->plen; pool_no++) { + uint16_t nlen =0; + + buf += uint8_dump(irep->pool[pool_no].tt, buf, type); /* data type */ + memset(char_buf, 0, buf_size); + + switch (irep->pool[pool_no].tt) { + case MRB_TT_FIXNUM: + sprintf(char_buf, "%d", irep->pool[pool_no].value.i); + break; + + case MRB_TT_FLOAT: + sprintf(char_buf, "%.16e", irep->pool[pool_no].value.f); + break; + + case MRB_TT_STRING: + str = mrb_string_value( mrb, &irep->pool[pool_no]); + nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type); + if ( nlen > buf_size - 1) { + buf_size = nlen + 1; + if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0) + goto error_exit; + memset(char_buf, 0, buf_size); + } + str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type); + break; + +#ifdef INCLUDE_REGEXP + case MRB_TT_REGEX: + str = mrb_reg_to_s(mrb, irep->pool[pool_no]); + nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type); + if ( nlen > buf_size - 1) { + buf_size = nlen + 1; + if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0) + goto error_exit; + memset(char_buf, 0, buf_size); + } + str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type); + break; +#endif + + default: + buf += uint16_dump(0, buf, type); /* data length = 0 */ + continue; + } + + buf += uint16_dump((uint16_t)strlen(char_buf), buf, type); /* data length */ + + memcpy(buf, char_buf, strlen(char_buf)); + buf += strlen(char_buf); + } + +error_exit: + if (char_buf) + mrb_free(mrb, char_buf); + return (int)(buf - buf_top); +} + +static int +write_syms_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type) +{ + int sym_no; + char *buf_top = buf; + char *char_buf; + uint16_t buf_size =0; + + buf_size = MRB_DUMP_DEFAULT_STR_LEN; + if ((char_buf = mrb_malloc(mrb, buf_size)) == 0) + goto error_exit; + + buf += uint32_dump((uint32_t)irep->slen, buf, type); /* number of symbol */ + + for (sym_no = 0; sym_no < irep->slen; sym_no++) { + const char * name; + uint16_t nlen =0; + + if (irep->syms[sym_no] != 0) { + name = mrb_sym2name(mrb, irep->syms[sym_no]); + nlen = str_dump_len((char*)name, strlen(name), type); + if ( nlen > buf_size - 1) { + buf_size = nlen + 1; + if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0) + goto error_exit; + } + memset(char_buf, 0, buf_size); + str_dump((char*)name, char_buf, strlen(name), type); + + buf += uint16_dump(nlen, buf, type); /* length of symbol name */ + memcpy(buf, char_buf, nlen); /* symbol name */ + buf += nlen; + } + else { + buf += uint16_dump(MRB_DUMP_NULL_SYM_LEN, buf, type); /* length of symbol name */ + } + } + +error_exit: + if (char_buf) + mrb_free(mrb, char_buf); + return (int)(buf - buf_top); +} + +static int +calc_crc_section(mrb_state *mrb, mrb_irep *irep, uint16_t *crc, int section) +{ + char *buf, *buf_top; + uint32_t buf_size; + int type = DUMP_TYPE_BIN; + + switch (section) { + case DUMP_IREP_HEADER: buf_size = get_irep_header_size(mrb, irep, type); break; + case DUMP_ISEQ_BLOCK: buf_size = get_iseq_block_size(mrb, irep, type); break; + case DUMP_POOL_BLOCK: buf_size = get_pool_block_size(mrb, irep, type); break; + case DUMP_SYMS_BLOCK: buf_size = get_syms_block_size(mrb, irep, type); break; + default: return MRB_DUMP_GENERAL_FAILURE; + } + + if ((buf = mrb_malloc(mrb, buf_size)) == 0) + return MRB_DUMP_GENERAL_FAILURE; + + buf_top = buf; + memset(buf, 0, buf_size); + + switch (section) { + case DUMP_IREP_HEADER: buf += write_irep_header(mrb, irep, buf, type); break; + case DUMP_ISEQ_BLOCK: buf += write_iseq_block(mrb, irep, buf, type); break; + case DUMP_POOL_BLOCK: buf += write_pool_block(mrb, irep, buf, type); break; + case DUMP_SYMS_BLOCK: buf += write_syms_block(mrb, irep, buf, type); break; + default: break; + } + + *crc = calc_crc_16_ccitt((unsigned char *)buf_top, (int)(buf - buf_top)); + + mrb_free(mrb, buf_top); + + return MRB_DUMP_OK; +} + +static int +write_rite_header(mrb_state *mrb, int top, char* bin, uint32_t rbds) +{ + rite_binary_header *binary_header; + uint16_t crc; + int type = DUMP_TYPE_BIN; + + binary_header = (rite_binary_header *)bin; + + memcpy( binary_header, def_rite_binary_header, sizeof(*binary_header)); + + uint32_dump(rbds, (char *)binary_header->rbds, type); + uint16_dump((uint16_t)mrb->irep_len, (char *)binary_header->nirep, type); + uint16_dump((uint16_t)top, (char *)binary_header->sirep, type); + + crc = calc_crc_16_ccitt((unsigned char *)binary_header, sizeof(*binary_header)); + bin += sizeof(*binary_header); + uint16_dump(crc, bin, type); + + return MRB_DUMP_OK; +} + +static int +dump_rite_header(mrb_state *mrb, int top, FILE* fp, uint32_t rbds) +{ + rite_binary_header binary_header; + rite_file_header file_header; + uint16_t crc; + int type; + + if (fseek(fp, 0, SEEK_SET) != 0) + return MRB_DUMP_GENERAL_FAILURE; + + /* calc crc */ + memcpy( &binary_header, def_rite_binary_header, sizeof(binary_header)); + + type = DUMP_TYPE_BIN; + uint32_dump(rbds, (char *)&binary_header.rbds, type); + uint16_dump((uint16_t)mrb->irep_len, (char *)&binary_header.nirep, type); + uint16_dump((uint16_t)top, (char *)&binary_header.sirep, type); + + crc = calc_crc_16_ccitt((unsigned char *)&binary_header, sizeof(binary_header)); + + /* dump rbc header */ + memcpy( &file_header, def_rite_file_header, sizeof(file_header)); + + type = DUMP_TYPE_HEX; + uint32_dump(rbds, (char *)&file_header.rbds, type); + uint16_dump((uint16_t)mrb->irep_len, (char *)&file_header.nirep, type); + uint16_dump((uint16_t)top, (char *)&file_header.sirep, type); + uint16_dump(crc, (char *)&file_header.hcrc, type); + + if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1) + return MRB_DUMP_WRITE_FAULT; + + return MRB_DUMP_OK; +} + +static int +write_irep_record(mrb_state *mrb, int irep_no, char* bin, uint32_t *rlen, int type) +{ + uint32_t irep_record_size; + mrb_irep *irep = mrb->irep[irep_no]; + int section; + + if (irep == 0) + return MRB_DUMP_INVALID_IREP; + + /* buf alloc */ + irep_record_size = get_irep_record_size(mrb, irep_no, type); + if (irep_record_size == 0) + return MRB_DUMP_GENERAL_FAILURE; + + memset( bin, 0, irep_record_size); + + /* rlen */ + *rlen = irep_record_size - DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, type); + + bin += uint32_dump(*rlen, bin, type); + + for (section = 0; section < DUMP_SECTION_NUM; section++) { + int rc; + uint16_t crc; + + switch (section) { + case DUMP_IREP_HEADER: bin += write_irep_header(mrb, irep, bin, type); break; + case DUMP_ISEQ_BLOCK: bin += write_iseq_block(mrb, irep, bin, type); break; + case DUMP_POOL_BLOCK: bin += write_pool_block(mrb, irep, bin, type); break; + case DUMP_SYMS_BLOCK: bin += write_syms_block(mrb, irep, bin, type); break; + default: break; + } + + if ((rc = calc_crc_section(mrb, irep, &crc, section)) != 0) + return rc; + + bin += uint16_dump(crc, bin, type); /* crc */ + } + + return MRB_DUMP_OK; +} + +static int +dump_irep_record(mrb_state *mrb, int irep_no, FILE* fp, uint32_t *rlen) +{ + int rc = MRB_DUMP_OK; + uint32_t irep_record_size; + char *buf; + mrb_irep *irep = mrb->irep[irep_no]; + + if (irep == 0) + return MRB_DUMP_INVALID_IREP; + + /* buf alloc */ + irep_record_size = get_irep_record_size(mrb, irep_no, DUMP_TYPE_HEX); + if (irep_record_size == 0) + return MRB_DUMP_GENERAL_FAILURE; + + if ((buf = mrb_malloc(mrb, irep_record_size)) == 0) + return MRB_DUMP_GENERAL_FAILURE; + + memset( buf, 0, irep_record_size); + + if ((rc = write_irep_record(mrb, irep_no, buf, rlen, DUMP_TYPE_HEX)) != MRB_DUMP_OK) + goto error_exit; + + + if (fwrite(buf, irep_record_size, 1, fp) != 1) + rc = MRB_DUMP_WRITE_FAULT; + +error_exit: + mrb_free(mrb, buf); + + return rc; +} + +static int +mrb_write_irep(mrb_state *mrb, int top, char *bin) +{ + int rc; + uint32_t rlen=0; /* size of irep record */ + int irep_no; + char *bin_top; + + if (mrb == 0 || top < 0 || top >= mrb->irep_len || bin == 0) + return MRB_DUMP_INVALID_ARGUMENT; + + bin_top = bin; + bin += sizeof(rite_binary_header) + MRB_DUMP_SIZE_OF_SHORT/* crc */; + + for (irep_no=top; irep_noirep_len; irep_no++) { + if ((rc = write_irep_record(mrb, irep_no, bin, &rlen, DUMP_TYPE_BIN)) != 0) + return rc; + + bin += (rlen + DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, DUMP_TYPE_BIN)); + } + + bin += uint32_dump(0, bin, DUMP_TYPE_BIN); /* end of file */ + + rc = write_rite_header(mrb, top, bin_top, (bin - bin_top)); //TODO: Remove top(SIREP) + + return rc; +} + +int +mrb_dump_irep(mrb_state *mrb, int top, FILE* fp) +{ + int rc; + uint32_t rbds=0; /* size of Rite Binary Data */ + uint32_t rlen=0; /* size of irep record */ + int irep_no; + + if (mrb == 0 || top < 0 || top >= mrb->irep_len || fp == 0) + return MRB_DUMP_INVALID_ARGUMENT; + + if (fwrite(&def_rite_file_header, sizeof(rite_file_header), 1, fp) != 1) /* dummy write */ + return MRB_DUMP_WRITE_FAULT; + + for (irep_no=top; irep_noirep_len; irep_no++) { + if ((rc = dump_irep_record(mrb, irep_no, fp, &rlen)) != 0) + return rc; + + rbds += rlen; + } + + if (fwrite("00000000"/* end of file */, 8, 1, fp) != 1) + return MRB_DUMP_WRITE_FAULT; + + rc = dump_rite_header(mrb, top, fp, rbds); //TODO: Remove top(SIREP) + + return rc; +} + +int +mrb_bdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname) +{ + int rc; + int irep_no; + char *buf; + int buf_size = 0; + int buf_idx = 0; + + if (mrb == 0 || n < 0 || n >= mrb->irep_len || f == 0 || initname == 0) + return -1; + + buf_size = sizeof(rite_binary_header) + MRB_DUMP_SIZE_OF_SHORT/* crc */; + for (irep_no=n; irep_noirep_len; irep_no++) + buf_size += get_irep_record_size(mrb, irep_no, DUMP_TYPE_BIN); + buf_size += MRB_DUMP_SIZE_OF_LONG; /* end of file */ + + if ((buf = mrb_malloc(mrb, buf_size)) == 0) + return MRB_DUMP_GENERAL_FAILURE; + + rc = mrb_write_irep(mrb, n, buf); + + if (rc == MRB_DUMP_OK) { + fprintf(f, "const char %s[] = {", initname); + while (buf_idx < buf_size ) { + if (buf_idx % 16 == 0 ) fputs("\n", f); + fprintf(f, "0x%02x,", (unsigned char)buf[buf_idx++]); + } + fputs("\n};\n", f); + } + + mrb_free(mrb, buf); + + return rc; +} diff --git a/src/dump.h b/src/dump.h new file mode 100644 index 0000000000..410dfcd711 --- /dev/null +++ b/src/dump.h @@ -0,0 +1,118 @@ +#include "mruby.h" +#include +#include + +int mrb_dump_irep(mrb_state*,int,FILE*); +int mrb_load_irep(mrb_state*,FILE*); +int mrb_load_irep_offset(mrb_state*,FILE*,long); +int mrb_read_irep(mrb_state*,char*); + +int mrb_bdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname); + +/* dump type */ +#define DUMP_TYPE_CODE 0 +#define DUMP_TYPE_BIN 1 +#define DUMP_TYPE_HEX 2 + +/* dump/load error code */ +#define MRB_DUMP_OK 0 +#define MRB_DUMP_GENERAL_FAILURE -1 +#define MRB_DUMP_WRITE_FAULT -2 +#define MRB_DUMP_READ_FAULT -3 +#define MRB_DUMP_CRC_ERROR -4 +#define MRB_DUMP_INVALID_FILE_HEADER -5 +#define MRB_DUMP_INVALID_IREP -6 +#define MRB_DUMP_INVALID_ARGUMENT -7 + +/* size of long/int/short value on dump/load */ +#define MRB_DUMP_SIZE_OF_LONG 4 +#define MRB_DUMP_SIZE_OF_INT 4 +#define MRB_DUMP_SIZE_OF_SHORT 2 + +/* null symbol length */ +#define MRB_DUMP_NULL_SYM_LEN 0xFFFF + +/* Use HEX format string */ +#define RITE_FILE_IS_HEX + +#ifdef RITE_FILE_IS_HEX +#define RITE_FILE_HEX_SIZE 2 +#else +#define RITE_FILE_HEX_SIZE 1 +#endif + +/* Rite Binary File header */ +#define RITE_FILE_IDENFIFIER "RITE" +#define RITE_FILE_FORMAT_VER "00090000" +#define RITE_VM_VER "00090000" +#define RITE_COMPILER_TYPE "MATZ " +#define RITE_COMPILER_VER "00090000" +#define RITE_RESERVED " " + +/* irep header */ +#define RITE_IREP_IDENFIFIER 'S' +#define RITE_IREP_TYPE_CLASS 'C' +#define RITE_IREP_TYPE_MODULE 'M' + +#define MRB_DUMP_DEFAULT_STR_LEN 128 + +//Rite Binary file_header +typedef struct _rite_binary_header { + unsigned char rbfi[4]; //Rite Binary File Identify + unsigned char rbfv[8]; //Rite Binary File Format Version + unsigned char risv[8]; //Rite Instruction Specification Version + unsigned char rct[8]; //Rite Compiler Type + unsigned char rcv[8]; //Rite Compiler Version + unsigned char rbds[4]; //Rite Binary Data Size + unsigned char nirep[2]; //Number of ireps + unsigned char sirep[2]; //Start index + unsigned char rsv[8]; //Reserved +} rite_binary_header; + +// Rite File file_header +typedef struct _rite_file_header { + unsigned char rbfi[4]; //Rite Binary File Identify + unsigned char rbfv[8]; //Rite Binary File Format Version + unsigned char risv[8]; //Rite Instruction Specification Version + unsigned char rct[8]; //Rite Compiler Type + unsigned char rcv[8]; //Rite Compiler Version + unsigned char rbds[8]; //Rite Binary Data Size + unsigned char nirep[4]; //Number of ireps + unsigned char sirep[4]; //Start index + unsigned char rsv[8]; //Reserved + unsigned char hcrc[4]; //HCRC +} rite_file_header; + +static inline int +uint16_to_bin(uint16_t s, char *bin) +{ + *bin++ = (s >> 8) & 0xff; + *bin = s & 0xff; + return (MRB_DUMP_SIZE_OF_SHORT); +} + +static inline int +uint32_to_bin(uint32_t l, char *bin) +{ + *bin++ = (l >> 24) & 0xff; + *bin++ = (l >> 16) & 0xff; + *bin++ = (l >> 8) & 0xff; + *bin = l & 0xff; + return (MRB_DUMP_SIZE_OF_LONG); +} + +static inline uint32_t +bin_to_uint32(unsigned char bin[]) +{ + return (uint32_t)bin[0] << 24 | + (uint32_t)bin[1] << 16 | + (uint32_t)bin[2] << 8 | + (uint32_t)bin[3]; +} + +static inline uint16_t +bin_to_uint16(unsigned char bin[]) +{ + return (uint16_t)bin[0] << 8 | + (uint16_t)bin[1]; +} diff --git a/src/encoding.c b/src/encoding.c new file mode 100644 index 0000000000..db9a364258 --- /dev/null +++ b/src/encoding.c @@ -0,0 +1,1663 @@ +/********************************************************************** + + encoding.c - + + $Author: naruse $ + created at: Thu May 24 17:23:27 JST 2007 + + Copyright (C) 2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "mruby.h" +#ifdef INCLUDE_ENCODING +#include +#ifndef NO_LOCALE_CHARMAP +#ifdef __CYGWIN__ +#include +#endif +#ifdef HAVE_LANGINFO_H +#include +#endif +#endif + +#define USE_UPPER_CASE_TABLE + +#include +#include +#include "regenc.h" +#include "regint.h" +#include "encoding.h" +#include "st.h" +#include +#include "mruby/numeric.h" +#include "mruby/string.h" +#include "mruby/array.h" +#include "variable.h" +#include "mruby/hash.h" + +#define pprintf printf +#define mrb_warning printf +#define mrb_bug printf +#ifndef INT_MAX +#define INT_MAX 2147483647 +#endif +#define mrb_isascii(c) ((unsigned long)(c) < 128) +#define OBJ_FREEZE(a) +static mrb_sym id_encoding; +//mrb_value mrb_cEncoding; +static mrb_value mrb_encoding_list; + +struct mrb_encoding_entry { + const char *name; + mrb_encoding *enc; + mrb_encoding *base; +}; + +static struct { + struct mrb_encoding_entry *list; + int count; + int size; + st_table *names; +} enc_table; + +void mrb_enc_init(mrb_state *mrb); + +enum { + ENCINDEX_ASCII, + ENCINDEX_UTF_8, + ENCINDEX_US_ASCII, + ENCINDEX_BUILTIN_MAX +}; +#define ENCODING_COUNT ENCINDEX_BUILTIN_MAX +#define ENCODING_NAMELEN_MAX 63 +#define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX) +#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2)) + +//#define BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & T_MASK) +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef OTHER +#define OTHER 2 +#endif + +#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr + +static size_t +enc_memsize(mrb_state *mrb, const void *p) +{ + return 0; +} + +static const struct mrb_data_type encoding_data_type = { + "encoding", 0, +}; +#define is_data_encoding(obj) (DATA_TYPE(obj) == &encoding_data_type) + +// RUBY_IMMEDIATE_MASK = 0x03, +//#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK +//#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK) +//#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x)) + +static mrb_value +enc_new(mrb_state *mrb, mrb_encoding *encoding) +{ + return mrb_obj_value(Data_Wrap_Struct(mrb, mrb->encode_class, &encoding_data_type, encoding)); +} + +#define enc_autoload_p(enc) (!mrb_enc_mbmaxlen(enc)) + +#define UNSPECIFIED_ENCODING INT_MAX + + +static mrb_value +mrb_enc_from_encoding_index(mrb_state *mrb, int idx) +{ + mrb_value list, enc; + + if (mrb_nil_p(list = mrb_encoding_list)) { + mrb_bug("mrb_enc_from_encoding_index(%d): no mrb_encoding_list", idx); + } + enc = mrb_ary_ref(mrb, list, idx);//mrb_ary_entry(list, idx); + if (mrb_nil_p(enc)) { + mrb_bug("mrb_enc_from_encoding_index(%d): not created yet", idx); + } + return enc; +} + +mrb_value +mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *encoding) +{ + int idx; + if (!encoding) return mrb_nil_value(); + idx = ENC_TO_ENCINDEX(encoding); + return mrb_enc_from_encoding_index(mrb, idx); +} + +static int enc_autoload(mrb_state *mrb, mrb_encoding *enc); +static int +check_encoding(mrb_state *mrb, mrb_encoding *enc) +{ + int index = mrb_enc_to_index(enc); + if (mrb_enc_from_index(mrb, index) != enc) + return -1; + if (enc_autoload_p(enc)) { + index = enc_autoload(mrb, enc); + } + return index; +} + +static int +enc_check_encoding(mrb_state *mrb, mrb_value obj) +{ + if (SPECIAL_CONST_P(obj) || !is_data_encoding(obj)) { + return -1; + } + return check_encoding(mrb, RDATA(obj)->data); +} + +static int +must_encoding(mrb_state *mrb, mrb_value enc) +{ + int index = enc_check_encoding(mrb, enc); + if (index < 0) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected Encoding)", + mrb_obj_classname(mrb, enc)); + } + return index; +} + +int +mrb_to_encoding_index(mrb_state *mrb, mrb_value enc) +{ + int idx; + + idx = enc_check_encoding(mrb, enc); + if (idx >= 0) { + return idx; + } + else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) { + return -1; + } + if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { + return -1; + } + //return mrb_enc_find_index(StringValueCStr(enc)); + return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); + +} + +static mrb_encoding * +to_encoding(mrb_state *mrb, mrb_value enc) +{ + int idx; + + //StringValue(enc); + mrb_string_value(mrb, &enc); + + if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)"); + } + //idx = mrb_enc_find_index(StringValueCStr(enc)); + idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); + if (idx < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc)); + } + return mrb_enc_from_index(mrb, idx); +} + +mrb_encoding * +mrb_to_encoding(mrb_state *mrb, mrb_value enc) +{ + if (enc_check_encoding(mrb, enc) >= 0) return RDATA(enc)->data; + return to_encoding(mrb, enc); +} + +static int +enc_table_expand(int newsize) +{ + struct mrb_encoding_entry *ent; + int count = newsize; + + if (enc_table.size >= newsize) return newsize; + newsize = (newsize + 7) / 8 * 8; + ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize); + if (!ent) return -1; + memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size)); + enc_table.list = ent; + enc_table.size = newsize; + return count; +} + +static int +enc_register_at(mrb_state *mrb, int index, const char *name, mrb_encoding *encoding) +{ + struct mrb_encoding_entry *ent = &enc_table.list[index]; + mrb_value list; + mrb_value ref_ary; + + if (!valid_encoding_name_p(name)) return -1; + if (!ent->name) { + ent->name = name = strdup(name); + } + else if (STRCASECMP(name, ent->name)) { + return -1; + } + if (!ent->enc) { + ent->enc = xmalloc(sizeof(mrb_encoding)); + } + if (encoding) { + *ent->enc = *encoding; + } + else { + memset(ent->enc, 0, sizeof(*ent->enc)); + } + encoding = ent->enc; + encoding->name = name; + encoding->ruby_encoding_index = index; + st_insert(enc_table.names, (st_data_t)name, (st_data_t)index); + list = mrb_encoding_list; + //if (list && mrb_nil_p((mrb_ary_ref(mrb, list, index)))) { + if (list.tt) { + ref_ary = mrb_ary_ref(mrb, list, index); + if mrb_nil_p(ref_ary) { + /* initialize encoding data */ + mrb_ary_set(mrb, list, index, enc_new(mrb, encoding));//rb_ary_store(list, index, enc_new(encoding)); + } + } + return index; +} + + +static int +enc_register(mrb_state *mrb, const char *name, mrb_encoding *encoding) +{ + int index = enc_table.count; + + if ((index = enc_table_expand(index + 1)) < 0) return -1; + enc_table.count = index; + return enc_register_at(mrb, index - 1, name, encoding); +} + +static void set_encoding_const(mrb_state *mrb, const char *, mrb_encoding *); +int mrb_enc_registered(const char *name); + +static void +enc_check_duplication(mrb_state *mrb, const char *name) +{ + if (mrb_enc_registered(name) >= 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is already registered", name); + } +} +static mrb_encoding* +set_base_encoding(int index, mrb_encoding *base) +{ + mrb_encoding *enc = enc_table.list[index].enc; + + enc_table.list[index].base = base; + if (mrb_enc_dummy_p(base)) ENC_SET_DUMMY(enc); + return enc; +} + +int +mrb_enc_replicate(mrb_state *mrb, const char *name, mrb_encoding *encoding) +{ + int idx; + + enc_check_duplication(mrb, name); + idx = enc_register(mrb, name, encoding); + set_base_encoding(idx, encoding); + set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx)); + return idx; +} + +/* 15.2.40.2.17 */ +/* + * call-seq: + * enc.replicate(name) -> encoding + * + * Returns a replicated encoding of _enc_ whose name is _name_. + * The new encoding should have the same byte structure of _enc_. + * If _name_ is used by another encoding, raise ArgumentError. + * + */ +static mrb_value +enc_replicate(mrb_state *mrb, mrb_value encoding) +{ + mrb_value name; + mrb_get_args(mrb, "o", &name); + return mrb_enc_from_encoding_index(mrb, + //mrb_enc_replicate(mrb, StringValueCStr(name), + mrb_enc_replicate(mrb, mrb_string_value_cstr(mrb, &name), + mrb_to_encoding(mrb, encoding))); +} +static int +enc_replicate_with_index(mrb_state *mrb, const char *name, mrb_encoding *origenc, int idx) +{ + if (idx < 0) { + idx = enc_register(mrb, name, origenc); + } + else { + idx = enc_register_at(mrb, idx, name, origenc); + } + if (idx >= 0) { + set_base_encoding(idx, origenc); + set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx)); + } + return idx; +} +int +mrb_encdb_replicate(mrb_state *mrb, const char *name, const char *orig) +{ + int origidx = mrb_enc_registered(orig); + int idx = mrb_enc_registered(name); + + if (origidx < 0) { + origidx = enc_register(mrb, orig, 0); + } + return enc_replicate_with_index(mrb, name, mrb_enc_from_index(mrb, origidx), idx); +} +int +mrb_define_dummy_encoding(mrb_state *mrb, const char *name) +{ + int index = mrb_enc_replicate(mrb, name, mrb_ascii8bit_encoding(mrb)); + mrb_encoding *enc = enc_table.list[index].enc; + + ENC_SET_DUMMY(enc); + return index; +} + +int +mrb_encdb_dummy(mrb_state *mrb, const char *name) +{ + int index = enc_replicate_with_index(mrb, name, mrb_ascii8bit_encoding(mrb), + mrb_enc_registered(name)); + mrb_encoding *enc = enc_table.list[index].enc; + + ENC_SET_DUMMY(enc); + return index; +} + +/* 15.2.40.2.13 */ +/* + * call-seq: + * enc.dummy? -> true or false + * + * Returns true for dummy encodings. + * A dummy encoding is an encoding for which character handling is not properly + * implemented. + * It is used for stateful encodings. + * + * Encoding::ISO_2022_JP.dummy? #=> true + * Encoding::UTF_8.dummy? #=> false + * + */ +static mrb_value +enc_dummy_p(mrb_state *mrb, mrb_value enc) +{ + return ENC_DUMMY_P(enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value(); +} + +/* 15.2.40.2.12 */ +/* + * call-seq: + * enc.ascii_compatible? -> true or false + * + * Returns whether ASCII-compatible or not. + * + * Encoding::UTF_8.ascii_compatible? #=> true + * Encoding::UTF_16BE.ascii_compatible? #=> false + * + */ +static mrb_value +enc_ascii_compatible_p(mrb_state *mrb, mrb_value enc) +{ + return mrb_enc_asciicompat(mrb, enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value(); +} + +static const char * +enc_alias_internal(const char *alias, int idx) +{ + alias = strdup(alias); + st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx); + return alias; +} + +/* + * Returns 1 when the encoding is Unicode series other than UTF-7 else 0. + */ +int +mrb_enc_unicode_p(mrb_encoding *enc) +{ + const char *name = mrb_enc_name(enc); + return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7'; +} + +extern mrb_encoding OnigEncodingUTF_8; +extern mrb_encoding OnigEncodingUS_ASCII; + +void +mrb_enc_init(mrb_state *mrb) +{ + enc_table_expand(ENCODING_COUNT + 1); + if (!enc_table.names) { + enc_table.names = st_init_strcasetable(); + } +#define ENC_REGISTER(enc) enc_register_at(mrb, ENCINDEX_##enc, mrb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc) + ENC_REGISTER(ASCII); + ENC_REGISTER(UTF_8); + ENC_REGISTER(US_ASCII); +#undef ENC_REGISTER + enc_table.count = ENCINDEX_BUILTIN_MAX; +} + +mrb_encoding * +mrb_enc_from_index(mrb_state *mrb, int index) +{ + if (!enc_table.list) { + mrb_enc_init(mrb); + } + if (index < 0 || enc_table.count <= index) { + return 0; + } + return enc_table.list[index].enc; +} + +int +mrb_enc_registered(const char *name) +{ + st_data_t idx = 0; + + if (!name) return -1; + if (!enc_table.list) return -1; + if (st_lookup(enc_table.names, (st_data_t)name, &idx)) { + return (int)idx; + } + return -1; +} + +mrb_value +mrb_require_safe(mrb_value fname, int safe) +{ + mrb_value result = mrb_nil_value(); + return result; +} +static int +load_encoding(const char *name) +{ + mrb_value enclib;// = mrb_sprintf("enc/%s.so", name); + //mrb_value verbose;// = ruby_verbose; + //mrb_value debug;// = ruby_debug; + //mrb_value loaded; + char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3; + int idx; + + while (s < e) { + if (!ISALNUM(*s)) *s = '_'; + else if (ISUPPER(*s)) *s = TOLOWER(*s); + ++s; + } + OBJ_FREEZE(enclib); + //ruby_verbose = mrb_false_value(); + //ruby_debug = mrb_false_value(); + //loaded = mrb_protect(require_enc, enclib, 0); + //ruby_verbose = verbose; + //ruby_debug = debug; + //rb_set_errinfo(mrb_nil_value()); + //if (mrb_nil_p(loaded)) return -1; + if ((idx = mrb_enc_registered(name)) < 0) return -1; + if (enc_autoload_p(enc_table.list[idx].enc)) return -1; + return idx; +} + +static int +enc_autoload(mrb_state *mrb, mrb_encoding *enc) +{ + int i; + mrb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base; + + if (base) { + i = 0; + do { + if (i >= enc_table.count) return -1; + } while (enc_table.list[i].enc != base && (++i, 1)); + if (enc_autoload_p(base)) { + if (enc_autoload(mrb, base) < 0) return -1; + } + i = ENC_TO_ENCINDEX(enc); + enc_register_at(mrb, i, mrb_enc_name(enc), base); + } + else { + i = load_encoding(mrb_enc_name(enc)); + } + return i; +} + +int +mrb_enc_find_index(mrb_state *mrb, const char *name) +{ + int i = mrb_enc_registered(name); + mrb_encoding *enc; + + if (i < 0) { + i = load_encoding(name); + } + else if (!(enc = mrb_enc_from_index(mrb, i))) { + if (i != UNSPECIFIED_ENCODING) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is not registered", name); + } + } + else if (enc_autoload_p(enc)) { + if (enc_autoload(mrb, enc) < 0) { + //mrb_warn("failed to load encoding (%s); use ASCII-8BIT instead", + printf("failed to load encoding (%s); use ASCII-8BIT instead", + name); + return 0; + } + } + return i; +} + +mrb_encoding * +mrb_enc_find(mrb_state *mrb, const char *name) +{ + int idx = mrb_enc_find_index(mrb, name); + if (idx < 0) idx = 0; + return mrb_enc_from_index(mrb, idx); +} + +static inline int +enc_capable(mrb_value obj) +{ + if (SPECIAL_CONST_P(obj)) return (mrb_type(obj) == MRB_TT_SYMBOL); + switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) { + case MRB_TT_STRING: + case MRB_TT_REGEX: + case MRB_TT_FILE: + return TRUE; + case MRB_TT_DATA: + if (is_data_encoding(obj)) return TRUE; + default: + return FALSE; + } +} + +mrb_sym +mrb_id_encoding(mrb_state *mrb) +{ + //CONST_ID(id_encoding, "encoding"); + id_encoding = mrb_intern(mrb, "encoding"); + return id_encoding; +} + +int +mrb_enc_get_index(mrb_state *mrb, mrb_value obj) +{ + int i = -1; + mrb_value tmp; + struct RString *ps; + + if (SPECIAL_CONST_P(obj)) { + if (mrb_type(obj) != MRB_TT_SYMBOL) return -1; + //obj = mrb_id2str(SYM2ID(obj)); + obj = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, SYM2ID(obj))); + } + switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) { + as_default: + default: + case MRB_TT_STRING: + case MRB_TT_REGEX: + i = (int)ENCODING_GET_INLINED(obj); + ps = mrb_str_ptr(obj); + if (i == ENCODING_INLINE_MAX) { + mrb_value iv; + + //iv = rb_ivar_get(obj, mrb_id_encoding(mrb)); + iv = mrb_iv_get(mrb, obj, mrb_id_encoding(mrb)); + i = mrb_fixnum(iv); + } + break; + + case MRB_TT_FILE: + tmp = mrb_funcall(mrb, obj, "internal_encoding", 0, 0); + if (mrb_nil_p(tmp)) obj = mrb_funcall(mrb, obj, "external_encoding", 0, 0); + else obj = tmp; + if (mrb_nil_p(obj)) break; + case MRB_TT_DATA: + if (is_data_encoding(obj)) { + i = enc_check_encoding(mrb, obj); + } + else { + goto as_default; + } + break; + } + return i; +} + +void +mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int idx) +{ + if (idx < ENCODING_INLINE_MAX) { + ENCODING_SET_INLINED(obj, idx); + return; + } + ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); + //mrb_ivar_set(obj, mrb_id_encoding(mrb), INT2NUM(idx)); + mrb_iv_set(mrb, obj, mrb_id_encoding(mrb), mrb_fixnum_value(idx)); + return; +} + +mrb_value +mrb_enc_associate_index(mrb_state *mrb, mrb_value obj, int idx) +{ +/* enc_check_capable(obj);*/ + if (mrb_enc_get_index(mrb, obj) == idx) + return obj; + if (SPECIAL_CONST_P(obj)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "cannot set encoding"); + } + if (!ENC_CODERANGE_ASCIIONLY(obj) || + !mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx))) { + ENC_CODERANGE_CLEAR(obj); + } + mrb_enc_set_index(mrb, obj, idx); + return obj; +} + +mrb_value +mrb_enc_associate(mrb_state *mrb, mrb_value obj, mrb_encoding *enc) +{ + return mrb_enc_associate_index(mrb, obj, mrb_enc_to_index(enc)); +} + +mrb_encoding* +mrb_enc_get(mrb_state *mrb, mrb_value obj) +{ + return mrb_enc_from_index(mrb, mrb_enc_get_index(mrb, obj)); +} + +mrb_encoding* +mrb_enc_check(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + mrb_encoding *enc = mrb_enc_compatible(mrb, str1, str2); + if (!enc) + mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s", + mrb_enc_name(mrb_enc_get(mrb, str1)), + mrb_enc_name(mrb_enc_get(mrb, str2))); + return enc; +} + +mrb_encoding* +mrb_enc_compatible(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + int idx1, idx2; + mrb_encoding *enc1, *enc2; + + idx1 = mrb_enc_get_index(mrb, str1); + idx2 = mrb_enc_get_index(mrb, str2); + + if (idx1 < 0 || idx2 < 0) + return 0; + + if (idx1 == idx2) { + return mrb_enc_from_index(mrb, idx1); + } + enc1 = mrb_enc_from_index(mrb, idx1); + enc2 = mrb_enc_from_index(mrb, idx2); + + if (mrb_type(str2) == MRB_TT_STRING && RSTRING_LEN(str2) == 0) + //return (idx1 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc2)) ? enc2 : enc1; + return enc1; + if (mrb_type(str1) == MRB_TT_STRING && RSTRING_LEN(str1) == 0) + //return (idx2 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc1)) ? enc1 : enc2; + return enc2; + if (!mrb_enc_asciicompat(mrb, enc1) || !mrb_enc_asciicompat(mrb, enc2)) { + return 0; + } + + /* objects whose encoding is the same of contents */ + //if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ != MRB_TT_STRING && idx2 == ENCINDEX_US_ASCII) + //return enc1; + //if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING && idx1 == ENCINDEX_US_ASCII) + //return enc2; + + if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING) { + mrb_value tmp = str1; + int idx0 = idx1; + str1 = str2; + str2 = tmp; + idx1 = idx2; + idx2 = idx0; + } + if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ == MRB_TT_STRING) { + int cr1, cr2; + + cr1 = mrb_enc_str_coderange(mrb, str1); + if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ == MRB_TT_STRING) { + cr2 = mrb_enc_str_coderange(mrb, str2); + if (cr1 != cr2) { + /* may need to handle ENC_CODERANGE_BROKEN */ + if (cr1 == ENC_CODERANGE_7BIT) return enc2; + if (cr2 == ENC_CODERANGE_7BIT) return enc1; + } + if (cr2 == ENC_CODERANGE_7BIT) { + if (idx1 == ENCINDEX_ASCII) return enc2; + return enc1; + } + } + if (cr1 == ENC_CODERANGE_7BIT) + return enc2; + } + return 0; +} + +void +mrb_enc_copy(mrb_state *mrb, mrb_value obj1, mrb_value obj2) +{ + mrb_enc_associate_index(mrb, obj1, mrb_enc_get_index(mrb, obj2)); +} + + +/* + * call-seq: + * obj.encoding -> encoding + * + * Returns the Encoding object that represents the encoding of obj. + */ + +mrb_value +mrb_obj_encoding(mrb_state *mrb, mrb_value obj) +{ + mrb_encoding *enc = mrb_enc_get(mrb, obj); + if (!enc) { + mrb_raise(mrb, E_TYPE_ERROR, "unknown encoding"); + } + return mrb_enc_from_encoding(mrb, enc); +} + +int +mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc) +{ + return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); +} + +int +mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc) +{ + int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); + if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) + return MBCLEN_CHARFOUND_LEN(n); + else { + int min = mrb_enc_mbminlen(enc); + return min <= e-p ? min : (int)(e-p); + } +} + +int +mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc) +{ + int n; + if (e <= p) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); + if (e-p < n) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p)); + return n; +} + +int +mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc) +{ + unsigned int c, l; + if (e <= p) + return -1; + if (mrb_enc_asciicompat(mrb, enc)) { + c = (unsigned char)*p; + if (!ISASCII(c)) + return -1; + if (len) *len = 1; + return c; + } + l = mrb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(l)) + return -1; + c = mrb_enc_mbc_to_codepoint(p, e, enc); + if (!mrb_enc_isascii(c, enc)) + return -1; + if (len) *len = l; + return c; +} + +unsigned int +mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len_p, mrb_encoding *enc) +{ + int r; + if (e <= p) + mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); + r = mrb_enc_precise_mbclen(p, e, enc); + if (MBCLEN_CHARFOUND_P(r)) { + if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r); + return mrb_enc_mbc_to_codepoint(p, e, enc); + } + else + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(enc)); + return 0; +} + +#undef mrb_enc_codepoint +unsigned int +mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc) +{ + return mrb_enc_codepoint_len(mrb, p, e, 0, enc); +} + +int +mrb_enc_codelen(mrb_state *mrb, int c, mrb_encoding *enc) +{ + int n = ONIGENC_CODE_TO_MBCLEN(enc,c); + if (n == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid codepoint 0x%x in %s", c, mrb_enc_name(enc)); + } + return n; +} + +int +mrb_enc_toupper(int c, mrb_encoding *enc) +{ + return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c)); +} + +int +mrb_enc_tolower(int c, mrb_encoding *enc) +{ + return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c)); +} + +/* 15.2.40.2.14 */ +/* + * call-seq: + * enc.inspect -> string + * + * Returns a string which represents the encoding for programmers. + * + * Encoding::UTF_8.inspect #=> "#" + * Encoding::ISO_2022_JP.inspect #=> "#" + */ +static mrb_value +enc_inspect(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + //mrb_value str = mrb_sprintf("#<%s:%s%s>", mrb_obj_classname(mrb, self), + // mrb_enc_name((mrb_encoding*)(DATA_PTR(self))), + // (mrb_fixnum(enc_dummy_p(mrb, self)) ? " (dummy)" : "")); + char buf[256]; + sprintf(buf, "#<%s:%s%s>", mrb_obj_classname(mrb, self), + mrb_enc_name((mrb_encoding*)(DATA_PTR(self))), + (mrb_enc_dummy_p((mrb_encoding*)(DATA_PTR(self))) ? " (dummy)" : "")); + str = mrb_str_new(mrb, buf, strlen(buf)); + ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} + +/* 15.2.40.2.15 */ +/* 15.2.40.2.18 */ +/* + * call-seq: + * enc.name -> string + * + * Returns the name of the encoding. + * + * Encoding::UTF_8.name #=> "UTF-8" + */ +static mrb_value +enc_name(mrb_state *mrb, mrb_value self) +{ + return mrb_usascii_str_new2(mrb, mrb_enc_name((mrb_encoding*)DATA_PTR(self))); +} + +static int +enc_names_i(mrb_state *mrb, st_data_t name, st_data_t idx, st_data_t args) +{ + mrb_value *arg = (mrb_value *)args; + int iargs = mrb_fixnum(arg[0]); + //if ((int)idx == (int)arg[0]) { + if ((int)idx == iargs) { + mrb_value str = mrb_usascii_str_new2(mrb, (char *)name); + //OBJ_FREEZE(str); + mrb_ary_push(mrb, arg[1], str); + } + return ST_CONTINUE; +} + +/* 15.2.40.2.16 */ +/* + * call-seq: + * enc.names -> array + * + * Returns the list of name and aliases of the encoding. + * + * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"] + */ +static mrb_value +enc_names(mrb_state *mrb, mrb_value self) +{ + mrb_value args[2]; + + args[0] = mrb_fixnum_value(mrb_to_encoding_index(mrb, self)); + args[1] = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0); + st_foreachNew(mrb, enc_table.names, enc_names_i, args); + return args[1]; +} + +/* 15.2.40.2.8 */ +/* + * call-seq: + * Encoding.list -> [enc1, enc2, ...] + * + * Returns the list of loaded encodings. + * + * Encoding.list + * #=> [#, #, + * #] + * + * Encoding.find("US-ASCII") + * #=> # + * + * Encoding.list + * #=> [#, #, + * #, #] + * + */ +static mrb_value +enc_list(mrb_state *mrb, mrb_value klass) +{ + struct RArray *ar = (struct RArray *)mrb_encoding_list.value.p; + mrb_value ary = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0); + //mrb_ary_replace_m(mrb, ary/*, mmrb_encoding_list*/); + mrb_ary_replace(mrb, mrb_ary_ptr(ary), ar->buf, enc_table.count); + return ary; +} + +/* 15.2.40.2.7 */ +/* + * call-seq: + * Encoding.find(string) -> enc + * Encoding.find(symbol) -> enc + * + * Search the encoding with specified name. + * name should be a string or symbol. + * + * Encoding.find("US-ASCII") #=> # + * Encoding.find(:Shift_JIS) #=> # + * + * Names which this method accept are encoding names and aliases + * including following special aliases + * + * "external":: default external encoding + * "internal":: default internal encoding + * "locale":: locale encoding + * "filesystem":: filesystem encoding + * + * An ArgumentError is raised when no encoding with name. + * Only Encoding.find("internal") however returns nil + * when no encoding named "internal", in other words, when Ruby has no + * default internal encoding. + */ +static mrb_value +enc_find(mrb_state *mrb, mrb_value klass) +{ + mrb_value enc; + mrb_get_args(mrb, "o", &enc); + + return mrb_enc_from_encoding(mrb, to_encoding(mrb, enc)); +} + +/* 15.2.40.2.2 */ +/* + * call-seq: + * Encoding.compatible?(str1, str2) -> enc or nil + * + * Checks the compatibility of two strings. + * If they are compatible, means concatenatable, + * returns an encoding which the concatenated string will be. + * If they are not compatible, nil is returned. + * + * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b") + * #=> # + * + * Encoding.compatible?( + * "\xa1".force_encoding("iso-8859-1"), + * "\xa1\xa1".force_encoding("euc-jp")) + * #=> nil + * + */ +static mrb_value +enc_compatible_p(mrb_state *mrb, mrb_value klass) +{ + mrb_value str1; + mrb_value str2; + mrb_encoding *enc; + mrb_get_args(mrb, "oo", &str1, &str2); + if (!enc_capable(str1)) return mrb_nil_value(); + if (!enc_capable(str2)) return mrb_nil_value(); + enc = mrb_enc_compatible(mrb, str1, str2); + if (!enc) return mrb_nil_value(); + return mrb_enc_from_encoding(mrb, enc); +} + +/* 15.2.40.2.19 */ +/* :nodoc: */ +static mrb_value +enc_dump(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) +{ + //mrb_scan_args(argc, argv, "01", 0); + return enc_name(mrb, self); +} + +/* 15.2.40.2.11 */ +/* :nodoc: */ +static mrb_value +enc_load(mrb_state *mrb, mrb_value klass) +{ + mrb_value str; + mrb_get_args(mrb, "o", &str); + return enc_find(mrb, str); +} + +mrb_encoding * +mrb_ascii8bit_encoding(mrb_state *mrb) +{ + if (!enc_table.list) { + mrb_enc_init(mrb); + } + return enc_table.list[ENCINDEX_ASCII].enc; +} + +int +mrb_ascii8bit_encindex(void) +{ + return ENCINDEX_ASCII; +} + +mrb_encoding * +mrb_utf8_encoding(mrb_state *mrb) +{ + if (!enc_table.list) { + mrb_enc_init(mrb); + } + return enc_table.list[ENCINDEX_UTF_8].enc; +} + +int +mrb_utf8_encindex(void) +{ + return ENCINDEX_UTF_8; +} + +mrb_encoding * +mrb_usascii_encoding(mrb_state *mrb) +{ + if (!enc_table.list) { + mrb_enc_init(mrb); + } + return enc_table.list[ENCINDEX_US_ASCII].enc; +} + +int +mrb_usascii_encindex(void) +{ + return ENCINDEX_US_ASCII; +} + +int +mrb_locale_encindex(mrb_state *mrb) +{ + mrb_value charmap = mrb_locale_charmap(mrb, mrb_obj_value(mrb->encode_class)); + int idx; + + if (mrb_nil_p(charmap)) + idx = mrb_usascii_encindex(); + //else if ((idx = mrb_enc_find_index(StringValueCStr(charmap))) < 0) + else if ((idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &charmap))) < 0) + idx = mrb_ascii8bit_encindex(); + + if (mrb_enc_registered("locale") < 0) enc_alias_internal("locale", idx); + + return idx; +} + +mrb_encoding * +mrb_locale_encoding(mrb_state *mrb) +{ + return mrb_enc_from_index(mrb, mrb_locale_encindex(mrb)); +} + +static int +enc_set_filesystem_encoding(mrb_state *mrb) +{ + int idx; +#if defined NO_LOCALE_CHARMAP + idx = mrb_enc_to_index(mrb_default_external_encoding(mrb)); +#elif defined _WIN32 || defined __CYGWIN__ + char cp[sizeof(int) * 8 / 3 + 4]; + //snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP()); + idx = mrb_enc_find_index(mrb, cp); + if (idx < 0) idx = mrb_ascii8bit_encindex(); +#else + idx = mrb_enc_to_index(mrb_default_external_encoding(mrb)); +#endif + + enc_alias_internal("filesystem", idx); + return idx; +} + +int +mrb_filesystem_encindex(void) +{ + int idx = mrb_enc_registered("filesystem"); + if (idx < 0) + idx = mrb_ascii8bit_encindex(); + return idx; +} + +mrb_encoding * +mrb_filesystem_encoding(mrb_state *mrb) +{ + return mrb_enc_from_index(mrb, mrb_filesystem_encindex()); +} + +struct default_encoding { + int index; /* -2 => not yet set, -1 => nil */ + mrb_encoding *enc; +}; + +static struct default_encoding default_external = {0}; + +static int +enc_set_default_encoding(mrb_state *mrb, struct default_encoding *def, mrb_value encoding, const char *name) +{ + int overridden = FALSE; + + if (def->index != -2) + /* Already set */ + overridden = TRUE; + + if (mrb_nil_p(encoding)) { + def->index = -1; + def->enc = 0; + st_insert(enc_table.names, (st_data_t)strdup(name), + (st_data_t)UNSPECIFIED_ENCODING); + } + else { + def->index = mrb_enc_to_index(mrb_to_encoding(mrb, encoding)); + def->enc = 0; + enc_alias_internal(name, def->index); + } + + if (def == &default_external) + enc_set_filesystem_encoding(mrb); + + return overridden; +} + +mrb_encoding * +mrb_default_external_encoding(mrb_state *mrb) +{ + if (default_external.enc) return default_external.enc; + + if (default_external.index >= 0) { + default_external.enc = mrb_enc_from_index(mrb, default_external.index); + return default_external.enc; + } + else { + return mrb_locale_encoding(mrb); + } +} + +mrb_value +mrb_enc_default_external(mrb_state *mrb) +{ + return mrb_enc_from_encoding(mrb, mrb_default_external_encoding(mrb)); +} + +/* 15.2.40.2.3 */ +/* + * call-seq: + * Encoding.default_external -> enc + * + * Returns default external encoding. + * + * It is initialized by the locale or -E option. + */ +static mrb_value +get_default_external(mrb_state *mrb, mrb_value klass) +{ + return mrb_enc_default_external(mrb); +} + +void +mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding) +{ + if (mrb_nil_p(encoding)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "default external can not be nil"); + } + enc_set_default_encoding(mrb, &default_external, encoding, + "external"); +} + +/* 15.2.40.2.4 */ +/* + * call-seq: + * Encoding.default_external = enc + * + * Sets default external encoding. + */ +static mrb_value +set_default_external(mrb_state *mrb, mrb_value klass) +{ + mrb_value encoding; + mrb_get_args(mrb, "o", &encoding); + mrb_warning("setting Encoding.default_external"); + mrb_enc_set_default_external(mrb, encoding); + return encoding; +} + +static struct default_encoding default_internal = {-2}; + +mrb_encoding * +mrb_default_internal_encoding(mrb_state *mrb) +{ + if (!default_internal.enc && default_internal.index >= 0) { + default_internal.enc = mrb_enc_from_index(mrb, default_internal.index); + } + return default_internal.enc; /* can be NULL */ +} + +mrb_value +mrb_enc_default_internal(mrb_state *mrb) +{ + /* Note: These functions cope with default_internal not being set */ + return mrb_enc_from_encoding(mrb, mrb_default_internal_encoding(mrb)); +} + +/* 15.2.40.2.5 */ +/* + * call-seq: + * Encoding.default_internal -> enc + * + * Returns default internal encoding. + * + * It is initialized by the source internal_encoding or -E option. + */ +static mrb_value +get_default_internal(mrb_state *mrb, mrb_value klass) +{ + return mrb_enc_default_internal(mrb); +} + +void +mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding) +{ + enc_set_default_encoding(mrb, &default_internal, encoding, + "internal"); +} + +/* 15.2.40.2.6 */ +/* + * call-seq: + * Encoding.default_internal = enc or nil + * + * Sets default internal encoding. + * Or removes default internal encoding when passed nil. + */ +static mrb_value +set_default_internal(mrb_state *mrb, mrb_value klass) +{ + mrb_value encoding; + mrb_get_args(mrb, "o", &encoding); + mrb_warning("setting Encoding.default_internal"); + mrb_enc_set_default_internal(mrb, encoding); + return encoding; +} + +#define digit(x) ((x) >= '0' && (x) <= '9') +#define strstart(s, n) (strncasecmp(s, n, strlen(n)) == 0) +#define C_CODESET "US-ASCII" /* Return this as the encoding of the + * C/POSIX locale. Could as well one day + * become "UTF-8". */ +#if defined _WIN32 || defined __CYGWIN__ +#define JA_CODESET "Windows-31J" +#else +#define JA_CODESET "EUC-JP" +#endif + +static char buf[16]; + +const char * +nl_langinfo_codeset(void) +{ + const char *l, *p; + int n; + + if (((l = getenv("LC_ALL")) && *l) || + ((l = getenv("LC_CTYPE")) && *l) || + ((l = getenv("LANG")) && *l)) { + /* check standardized locales */ + if (!strcmp(l, "C") || !strcmp(l, "POSIX")) + return C_CODESET; + /* check for encoding name fragment */ + p = strchr(l, '.'); + if (!p++) p = l; + if (strstart(p, "UTF")) + return "UTF-8"; + if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) { + if (digit(p[n])) { + p += n; + memcpy(buf, "ISO-8859-\0\0", 12); + buf[9] = *p++; + if (digit(*p)) buf[10] = *p++; + return buf; + } + } + if (strstart(p, "KOI8-R")) return "KOI8-R"; + if (strstart(p, "KOI8-U")) return "KOI8-U"; + if (strstart(p, "620")) return "TIS-620"; + if (strstart(p, "2312")) return "GB2312"; + if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */ + if (strstart(p, "BIG5")) return "Big5"; + if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */ + if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */ + if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J"; + /* check for conclusive modifier */ + if (strstart(p, "euro")) return "ISO-8859-15"; + /* check for language (and perhaps country) codes */ + if (strstart(l, "zh_TW")) return "Big5"; + if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */ + if (strstart(l, "zh")) return "GB2312"; + if (strstart(l, "ja")) return JA_CODESET; + if (strstart(l, "ko")) return "EUC-KR"; + if (strstart(l, "ru")) return "KOI8-R"; + if (strstart(l, "uk")) return "KOI8-U"; + if (strstart(l, "pl") || strstart(l, "hr") || + strstart(l, "hu") || strstart(l, "cs") || + strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2"; + if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3"; + if (strstart(l, "el")) return "ISO-8859-7"; + if (strstart(l, "he")) return "ISO-8859-8"; + if (strstart(l, "tr")) return "ISO-8859-9"; + if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */ + if (strstart(l, "lt")) return "ISO-8859-13"; + if (strstart(l, "cy")) return "ISO-8859-14"; + if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */ + if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8"; + /* Send me further rules if you like, but don't forget that we are + * *only* interested in locale naming conventions on platforms + * that do not already provide an nl_langinfo(CODESET) implementation. */ + } + return NULL; +} + +/* 15.2.40.2.9 */ +/* + * call-seq: + * Encoding.locale_charmap -> string + * + * Returns the locale charmap name. + * + * Debian GNU/Linux + * LANG=C + * Encoding.locale_charmap #=> "ANSI_X3.4-1968" + * LANG=ja_JP.EUC-JP + * Encoding.locale_charmap #=> "EUC-JP" + * + * SunOS 5 + * LANG=C + * Encoding.locale_charmap #=> "646" + * LANG=ja + * Encoding.locale_charmap #=> "eucJP" + * + * The result is highly platform dependent. + * So Encoding.find(Encoding.locale_charmap) may cause an error. + * If you need some encoding object even for unknown locale, + * Encoding.find("locale") can be used. + * + */ +mrb_value +mrb_locale_charmap(mrb_state *mrb, mrb_value klass) +{ +#if defined NO_LOCALE_CHARMAP + return mrb_usascii_str_new2(mrb, "ASCII-8BIT"); +#elif defined _WIN32 || defined __CYGWIN__ + const char *nl_langinfo_codeset(void); + const char *codeset = nl_langinfo_codeset(); + char cp[sizeof(int) * 3 + 4]; + if (!codeset) { + //snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP()); + codeset = cp; + } + return mrb_usascii_str_new2(mrb, codeset); +#elif defined HAVE_LANGINFO_H + char *codeset; + codeset = nl_langinfo(CODESET); + return mrb_usascii_str_new2(mrb, codeset); +#else + return mrb_nil_value(); +#endif +} +static void +set_encoding_const(mrb_state *mrb, const char *name, mrb_encoding *enc) +{ + mrb_value encoding = mrb_enc_from_encoding(mrb, enc); + char *s = (char *)name; + int haslower = 0, hasupper = 0, valid = 0; + + if (ISDIGIT(*s)) return; + if (ISUPPER(*s)) { + hasupper = 1; + while (*++s && (ISALNUM(*s) || *s == '_')) { + if (ISLOWER(*s)) haslower = 1; + } + } + if (!*s) { + if (s - name > ENCODING_NAMELEN_MAX) return; + valid = 1; + //mrb_define_const(mrb_cEncoding, name, encoding); + mrb_define_const(mrb, mrb->encode_class, name, encoding); + } + if (!valid || haslower) { + size_t len = s - name; + if (len > ENCODING_NAMELEN_MAX) return; + if (!haslower || !hasupper) { + do { + if (ISLOWER(*s)) haslower = 1; + if (ISUPPER(*s)) hasupper = 1; + } while (*++s && (!haslower || !hasupper)); + len = s - name; + } + len += strlen(s); + if (len++ > ENCODING_NAMELEN_MAX) return; + //MEMCPY(s = ALLOCA_N(char, len), name, char, len); + memcpy(s = mrb_malloc(mrb, len), name, len); + name = s; + if (!valid) { + if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); + for (; *s; ++s) { + if (!ISALNUM(*s)) *s = '_'; + } + if (hasupper) { + mrb_define_const(mrb, mrb->encode_class, name, encoding); + } + } + if (haslower) { + for (s = (char *)name; *s; ++s) { + if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); + } + mrb_define_const(mrb, mrb->encode_class, name, encoding); + } + } +} +static int +mrb_enc_name_list_i(mrb_state *mrb, st_data_t name, st_data_t idx, mrb_value *arg) +{ + mrb_value ary = *arg; + mrb_value str = mrb_usascii_str_new2(mrb, (char *)name); + //OBJ_FREEZE(str); + mrb_ary_push(mrb, ary, str); + return ST_CONTINUE; +} + +/* 15.2.40.2.10 */ +/* + * call-seq: + * Encoding.name_list -> ["enc1", "enc2", ...] + * + * Returns the list of available encoding names. + * + * Encoding.name_list + * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8", + * "ISO-8859-1", "Shift_JIS", "EUC-JP", + * "Windows-31J", + * "BINARY", "CP932", "eucJP"] + * + */ + +static mrb_value +mrb_enc_name_list(mrb_state *mrb, mrb_value klass) +{ + mrb_value ary = mrb_ary_new_capa(mrb, enc_table.names->num_entries);//mrb_ary_new2(enc_table.names->num_entries); + st_foreachNew(mrb, enc_table.names, mrb_enc_name_list_i, &ary); + return ary; +} + +static int +mrb_enc_aliases_enc_i(mrb_state *mrb, st_data_t name, st_data_t orig, st_data_t arg) +{ + mrb_value *p = (mrb_value *)arg; + mrb_value aliases = p[0], ary = p[1]; + int idx = (int)orig; + mrb_value key, str = mrb_ary_ref(mrb, ary, idx);//mrb_ary_entry(ary, idx); + + if (mrb_nil_p(str)) { + mrb_encoding *enc = mrb_enc_from_index(mrb, idx); + + if (!enc) return ST_CONTINUE; + if (STRCASECMP((char*)name, mrb_enc_name(enc)) == 0) { + return ST_CONTINUE; + } + str = mrb_usascii_str_new2(mrb, mrb_enc_name(enc)); + OBJ_FREEZE(str); + mrb_ary_set(mrb, ary, idx, str);//rb_ary_store(ary, idx, str); + } + key = mrb_usascii_str_new2(mrb, (char *)name); + OBJ_FREEZE(key); + mrb_hash_set(mrb, aliases, key, str); + return ST_CONTINUE; +} + +/* 15.2.40.2.1 */ +/* + * call-seq: + * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...} + * + * Returns the hash of available encoding alias and original encoding name. + * + * Encoding.aliases + * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII", + * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"} + * + */ + +static mrb_value +mrb_enc_aliases(mrb_state *mrb, mrb_value klass) +{ + mrb_value aliases[2]; + aliases[0] = mrb_hash_new_capa(mrb, 0); + aliases[1] = mrb_ary_new(mrb); + st_foreachNew(mrb, enc_table.names, mrb_enc_aliases_enc_i, aliases); + return aliases[0]; +} + +void +mrb_init_encoding(mrb_state *mrb) +{ +#undef mrb_intern +#define mrb_intern(str) mrb_intern_const(str) + mrb_value list; + int i; + struct RClass *s; + + s = mrb->encode_class = mrb_define_class(mrb, "Encoding", mrb->object_class); + //mrb_undef_alloc_func(mrb_cEncoding); + //mrb_undef_method(CLASS_OF(mrb_cEncoding), "new"); + mrb_define_class_method(mrb, s, "aliases", mrb_enc_aliases, ARGS_NONE()); /* 15.2.40.2.1 */ + mrb_define_class_method(mrb, s, "compatible?", enc_compatible_p, ARGS_REQ(2)); /* 15.2.40.2.2 */ + mrb_define_class_method(mrb, s, "default_external", get_default_external, ARGS_NONE()); /* 15.2.40.2.3 */ + mrb_define_class_method(mrb, s, "default_external=", set_default_external, ARGS_REQ(1)); /* 15.2.40.2.4 */ + mrb_define_class_method(mrb, s, "default_internal", get_default_internal, ARGS_NONE()); /* 15.2.40.2.5 */ + mrb_define_class_method(mrb, s, "default_internal=", set_default_internal, ARGS_REQ(1)); /* 15.2.40.2.6 */ + mrb_define_class_method(mrb, s, "find", enc_find, ARGS_REQ(1)); /* 15.2.40.2.7 */ + mrb_define_class_method(mrb, s, "list", enc_list, ARGS_NONE()); /* 15.2.40.2.8 */ + mrb_define_class_method(mrb, s, "locale_charmap", mrb_locale_charmap, ARGS_NONE()); /* 15.2.40.2.9 */ + mrb_define_class_method(mrb, s, "name_list", mrb_enc_name_list, ARGS_NONE()); /* 15.2.40.2.10 */ + mrb_define_class_method(mrb, s, "_load", enc_load, ARGS_REQ(1)); /* 15.2.40.2.11 */ + mrb_define_method(mrb, s, "ascii_compatible?", enc_ascii_compatible_p, ARGS_NONE()); /* 15.2.40.2.12 */ + mrb_define_method(mrb, s, "dummy?", enc_dummy_p, ARGS_NONE()); /* 15.2.40.2.13 */ + mrb_define_method(mrb, s, "inspect", enc_inspect, ARGS_NONE()); /* 15.2.40.2.14 */ + mrb_define_method(mrb, s, "name", enc_name, ARGS_NONE()); /* 15.2.40.2.15 */ + mrb_define_method(mrb, s, "names", enc_names, ARGS_NONE()); /* 15.2.40.2.16 */ + mrb_define_method(mrb, s, "replicate", enc_replicate, ARGS_REQ(1)); /* 15.2.40.2.17 */ + mrb_define_method(mrb, s, "to_s", enc_name, ARGS_NONE()); /* 15.2.40.2.18 */ + mrb_define_method(mrb, s, "_dump", enc_dump, ARGS_ANY()); /* 15.2.40.2.19 */ + +/* add kusuda --> */ + if (!enc_table.list) { + mrb_enc_init(mrb); + } +/* add kusuda --< */ + list = mrb_ary_new_capa(mrb, enc_table.count);//mrb_ary_new2(enc_table.count); + RBASIC(list)->c = 0; + mrb_encoding_list = list; + //mrb_gc_register_mark_object(list); + + for (i = 0; i < enc_table.count; ++i) { + mrb_ary_push(mrb, list, enc_new(mrb, enc_table.list[i].enc)); + } +} + +/* locale insensitive functions */ + +#define ctype_test(c, ctype) \ + (mrb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype)) + +int mrb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); } +int mrb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); } +int mrb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); } +int mrb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); } +int mrb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); } +int mrb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); } +int mrb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); } +int mrb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); } +int mrb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); } +int mrb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); } +int mrb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); } +int mrb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); } + +int +mrb_tolower(int c) +{ + return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c; +} + +int +mrb_toupper(int c) +{ + return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c; +} +#endif //INCLUDE_ENCODING diff --git a/src/encoding.h b/src/encoding.h new file mode 100644 index 0000000000..784d67f44f --- /dev/null +++ b/src/encoding.h @@ -0,0 +1,360 @@ +/********************************************************************** + + encoding.h - + + $Author: matz $ + created at: Thu May 24 11:49:41 JST 2007 + + Copyright (C) 2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef RUBY_ENCODING_H +#define RUBY_ENCODING_H 1 + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "oniguruma.h" +#include "mdata.h" + +int mrb_tolower(int c); +int mrb_toupper(int c); +#define TOUPPER(c) mrb_toupper((unsigned char)(c)) +#define TOLOWER(c) mrb_tolower((unsigned char)(c)) + +#define FL_USHIFT 12 + +#define FL_USER0 (((int)1)<<(FL_USHIFT+0)) +#define FL_USER1 (((int)1)<<(FL_USHIFT+1)) +#define FL_USER2 (((int)1)<<(FL_USHIFT+2)) +#define FL_USER3 (((int)1)<<(FL_USHIFT+3)) +#define FL_USER4 (((int)1)<<(FL_USHIFT+4)) +#define FL_USER5 (((int)1)<<(FL_USHIFT+5)) +#define FL_USER6 (((int)1)<<(FL_USHIFT+6)) +#define FL_USER7 (((int)1)<<(FL_USHIFT+7)) +#define FL_USER8 (((int)1)<<(FL_USHIFT+8)) +#define FL_USER9 (((int)1)<<(FL_USHIFT+9)) + +#define ENCODING_INLINE_MAX 1023 +/* 1023 = 0x03FF */ +/*#define ENCODING_SHIFT (FL_USHIFT+10)*/ +#define ENCODING_SHIFT (10) +#define ENCODING_MASK (((unsigned int)ENCODING_INLINE_MAX)<flags &= ~ENCODING_MASK;\ + RBASIC(obj)->flags |= (unsigned int)(i) << ENCODING_SHIFT;\ +} while (0) +#define ENCODING_SET(mrb, obj,i) do {\ + mrb_value mrb_encoding_set_obj = (obj); \ + int encoding_set_enc_index = (i); \ + if (encoding_set_enc_index < ENCODING_INLINE_MAX) \ + ENCODING_SET_INLINED(mrb_encoding_set_obj, encoding_set_enc_index); \ + else \ + mrb_enc_set_index(mrb, mrb_encoding_set_obj, encoding_set_enc_index); \ +} while (0) + +#define ENCODING_GET_INLINED(obj) (unsigned int)((RSTRING(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT) +#define ENCODING_GET(mrb, obj) \ + (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \ + ENCODING_GET_INLINED(obj) : \ + mrb_enc_get_index(mrb, obj)) + +#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0) + +#define ENCODING_MAXNAMELEN 42 + +#define ENC_CODERANGE_MASK ((int)(FL_USER8|FL_USER9)) +#define ENC_CODERANGE_UNKNOWN 0 +#define ENC_CODERANGE_7BIT ((int)FL_USER8) +#define ENC_CODERANGE_VALID ((int)FL_USER9) +#define ENC_CODERANGE_BROKEN ((int)(FL_USER8|FL_USER9)) +#define ENC_CODERANGE(obj) ((int)(RSTRING(obj)->flags & ENC_CODERANGE_MASK)) +#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT) +#ifdef INCLUDE_ENCODING +#define ENC_CODERANGE_SET(obj,cr) (RSTRING(obj)->flags = \ + (RSTRING(obj)->flags & ~ENC_CODERANGE_MASK) | (cr)) +#else +#define ENC_CODERANGE_SET(obj,cr) +#endif //INCLUDE_ENCODING +#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0) + +/* assumed ASCII compatibility */ +#define ENC_CODERANGE_AND(a, b) \ + (a == ENC_CODERANGE_7BIT ? b : \ + a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \ + ENC_CODERANGE_UNKNOWN) + +#define ENCODING_CODERANGE_SET(mrb, obj, encindex, cr) \ + do { \ + mrb_value mrb_encoding_coderange_obj = (obj); \ + ENCODING_SET(mrb, mrb_encoding_coderange_obj, (encindex)); \ + ENC_CODERANGE_SET(mrb_encoding_coderange_obj, (cr)); \ + } while (0) + +typedef OnigEncodingType mrb_encoding; + +mrb_encoding* mrb_enc_get(mrb_state *mrb, mrb_value obj); +/* mrb_encoding * -> name */ +#define mrb_enc_name(enc) (enc)->name +int mrb_enc_get_index(mrb_state *mrb, mrb_value obj); + +int mrb_enc_replicate(mrb_state *, const char *, mrb_encoding *); +int mrb_define_dummy_encoding(mrb_state *mrb, const char *); +#define mrb_enc_to_index(enc) ((enc) ? ENC_TO_ENCINDEX(enc) : 0) +void mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int encindex); +int mrb_enc_find_index(mrb_state *mrb, const char *name); +int mrb_to_encoding_index(mrb_state *mrb, mrb_value); +mrb_encoding* mrb_to_encoding(mrb_state *mrb, mrb_value); +mrb_encoding* mrb_enc_get(mrb_state *, mrb_value); +mrb_encoding* mrb_enc_compatible(mrb_state *, mrb_value, mrb_value); +mrb_encoding* mrb_enc_check(mrb_state *, mrb_value, mrb_value); +mrb_value mrb_enc_associate_index(mrb_state *mrb, mrb_value, int); +#ifdef INCLUDE_ENCODING +mrb_value mrb_enc_associate(mrb_state *mrb, mrb_value, mrb_encoding*); +#else +#define mrb_enc_associate(mrb,value,enc) +#endif //INCLUDE_ENCODING +void mrb_enc_copy(mrb_state *mrb, mrb_value dst, mrb_value src); + +mrb_value mrb_enc_reg_new(const char*, long, mrb_encoding*, int); +//PRINTF_ARGS(mrb_value rb_enc_sprintf(mrb_encoding *, const char*, ...), 2, 3); +mrb_value mrb_enc_vsprintf(mrb_encoding *, const char*, va_list); +long mrb_enc_strlen(const char*, const char*, mrb_encoding*); +char* mrb_enc_nth(mrb_state *, const char*, const char*, long, mrb_encoding*); +mrb_value mrb_obj_encoding(mrb_state *, mrb_value); +mrb_value mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *enc); +mrb_value rb_enc_uint_chr(mrb_state *mrb, unsigned int code, mrb_encoding *enc); + +mrb_value mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *); +mrb_value mrb_str_export_to_enc(mrb_value, mrb_encoding *); + +/* index -> mrb_encoding */ +mrb_encoding* mrb_enc_from_index(mrb_state *mrb, int idx); + +/* name -> mrb_encoding */ +mrb_encoding * mrb_enc_find(mrb_state *mrb, const char *name); + +/* mrb_encoding * -> name */ +#define mrb_enc_name(enc) (enc)->name + +/* mrb_encoding * -> minlen/maxlen */ +#define mrb_enc_mbminlen(enc) (enc)->min_enc_len +#define mrb_enc_mbmaxlen(enc) (enc)->max_enc_len + +/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */ +int mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc); + +/* -> mbclen (only for valid encoding) */ +int mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc); + +/* -> chlen, invalid or needmore */ +int mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc); +#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) +#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) +#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) +#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) +#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) + +/* -> 0x00..0x7f, -1 */ +int mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc); + + +/* -> code (and len) or raise exception */ +unsigned int mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc); + +/* prototype for obsolete function */ +unsigned int mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc); +/* overriding macro */ +#define mrb_enc_codepoint(mrb,p,e,enc) mrb_enc_codepoint_len((mrb),(p),(e),0,(enc)) +#define mrb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) + +/* -> codelen>0 or raise exception */ +#ifdef INCLUDE_ENCODING +int mrb_enc_codelen(mrb_state *mrb, int code, mrb_encoding *enc); +#else +#define mrb_enc_codelen(mrb,code,enc) 1 +#endif //INCLUDE_ENCODING + +/* code,ptr,encoding -> write buf */ +#ifdef INCLUDE_ENCODING +#define mrb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf)) +#else +#define mrb_enc_mbcput(c,buf,enc) *(buf) = (char)(c) +#endif //INCLUDE_ENCODING + +/* start, ptr, end, encoding -> prev_char */ +#define mrb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) +/* start, ptr, end, encoding -> next_char */ +#define mrb_enc_left_char_head(s,p,e,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) +#define mrb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) + +/* ptr, ptr, encoding -> newline_or_not */ +#define mrb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end)) + +#define mrb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) +#define mrb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) +#define mrb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c) +#define mrb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c) +#define mrb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c) +#define mrb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT(enc,c) +#define mrb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c) +#define mrb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c) +#define mrb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c) +#define mrb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c) + +#define mrb_enc_asciicompat(mrb, enc) (mrb_enc_mbminlen(enc)==1 && !mrb_enc_dummy_p(enc)) + +int mrb_enc_casefold(char *to, const char *p, const char *e, mrb_encoding *enc); +int mrb_enc_toupper(int c, mrb_encoding *enc); +int mrb_enc_tolower(int c, mrb_encoding *enc); +//ID mrb_intern3(const char*, long, mrb_encoding*); +//ID mrb_interned_id_p(const char *, long, mrb_encoding *); +int mrb_enc_symname_p(const char*, mrb_encoding*); +int mrb_enc_symname2_p(const char*, long, mrb_encoding*); +int mrb_enc_str_coderange(mrb_state *mrb, mrb_value); +long mrb_str_coderange_scan_restartable(const char*, const char*, mrb_encoding*, int*); +int mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value); +#define mrb_enc_str_asciicompat_p(mrb, str) mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, str)) +mrb_value mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *enc); +int mrb_enc_unicode_p(mrb_encoding *enc); +mrb_encoding *mrb_ascii8bit_encoding(mrb_state *mrb); +mrb_encoding *mrb_utf8_encoding(mrb_state *mrb); +mrb_encoding *mrb_usascii_encoding(mrb_state *mrb); +mrb_encoding *mrb_locale_encoding(mrb_state *mrb); +mrb_encoding *mrb_filesystem_encoding(mrb_state *mrb); +mrb_encoding *mrb_default_external_encoding(mrb_state *mrb); +mrb_encoding *mrb_default_internal_encoding(mrb_state *mrb); +int mrb_ascii8bit_encindex(void); +int mrb_utf8_encindex(void); +int mrb_usascii_encindex(void); +int mrb_locale_encindex(mrb_state *mrb); +int mrb_filesystem_encindex(void); +mrb_value mrb_enc_default_external(mrb_state *mrb); +mrb_value mrb_enc_default_internal(mrb_state *mrb); +void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding); +void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding); +mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass); +#ifdef INCLUDE_ENCODING +int mrb_memsearch(mrb_state *mrb, const void*,int,const void*,int,mrb_encoding*); +#endif //INCLUDE_ENCODING +mrb_value mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr); +int mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p); + +#define ENC_DUMMY_FLAG (1<<24) +#define ENC_INDEX_MASK (~(~0U<<24)) + +#define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK) + +#define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG) +#define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG) + +static inline int +mrb_enc_dummy_p(mrb_encoding *enc) +{ + return ENC_DUMMY_P(enc) != 0; +} + +/* econv stuff */ + +typedef enum { + econv_invalid_byte_sequence, + econv_undefined_conversion, + econv_destination_buffer_full, + econv_source_buffer_empty, + econv_finished, + econv_after_output, + econv_incomplete_input +} mrb_econv_result_t; + +typedef struct mrb_econv_t mrb_econv_t; + +mrb_value mrb_str_encode(mrb_state *mrb, mrb_value str, mrb_value to, int ecflags, mrb_value ecopts); +int mrb_econv_has_convpath_p(mrb_state *mrb, const char* from_encoding, const char* to_encoding); + +int mrb_econv_prepare_opts(mrb_state *mrb, mrb_value opthash, mrb_value *ecopts); + +mrb_econv_t *mrb_econv_open(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags); +mrb_econv_t *mrb_econv_open_opts(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags, mrb_value ecopts); + +mrb_econv_result_t mrb_econv_convert(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, + unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, + int flags); +void mrb_econv_close(mrb_econv_t *ec); + +/* result: 0:success -1:failure */ +int mrb_econv_set_replacement(mrb_state *mrb, mrb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); + +/* result: 0:success -1:failure */ +int mrb_econv_decorate_at_first(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name); +int mrb_econv_decorate_at_last(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name); + +mrb_value mrb_econv_open_exc(mrb_state *mrb, const char *senc, const char *denc, int ecflags); + +/* result: 0:success -1:failure */ +int mrb_econv_insert_output(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char *str, size_t len, const char *str_encoding); + +/* encoding that mrb_econv_insert_output doesn't need conversion */ +const char *mrb_econv_encoding_to_insert_output(mrb_econv_t *ec); + +/* raise an error if the last mrb_econv_convert is error */ +void mrb_econv_check_error(mrb_state *mrb, mrb_econv_t *ec); + +/* returns an exception object or nil */ +mrb_value mrb_econv_make_exception(mrb_state *mrb, mrb_econv_t *ec); + +int mrb_econv_putbackable(mrb_econv_t *ec); +void mrb_econv_putback(mrb_econv_t *ec, unsigned char *p, int n); + +/* returns the corresponding ASCII compatible encoding for encname, + * or NULL if encname is not ASCII incompatible encoding. */ +const char *mrb_econv_asciicompat_encoding(const char *encname); + +mrb_value mrb_econv_str_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, int flags); +mrb_value mrb_econv_substr_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, int flags); +mrb_value mrb_econv_str_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, mrb_value dst, int flags); +mrb_value mrb_econv_substr_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, mrb_value dst, int flags); + +void mrb_econv_binmode(mrb_econv_t *ec); + +/* flags for mrb_econv_open */ + +#define ECONV_ERROR_HANDLER_MASK 0x000000ff + +#define ECONV_INVALID_MASK 0x0000000f +#define ECONV_INVALID_REPLACE 0x00000002 + +#define ECONV_UNDEF_MASK 0x000000f0 +#define ECONV_UNDEF_REPLACE 0x00000020 +#define ECONV_UNDEF_HEX_CHARREF 0x00000030 + +#define ECONV_DECORATOR_MASK 0x0000ff00 + +#define ECONV_UNIVERSAL_NEWLINE_DECORATOR 0x00000100 +#define ECONV_CRLF_NEWLINE_DECORATOR 0x00001000 +#define ECONV_CR_NEWLINE_DECORATOR 0x00002000 +#define ECONV_XML_TEXT_DECORATOR 0x00004000 +#define ECONV_XML_ATTR_CONTENT_DECORATOR 0x00008000 + +#define ECONV_STATEFUL_DECORATOR_MASK 0x00f00000 +#define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000 + +/* end of flags for mrb_econv_open */ + +/* flags for mrb_econv_convert */ +#define ECONV_PARTIAL_INPUT 0x00010000 +#define ECONV_AFTER_OUTPUT 0x00020000 +/* end of flags for mrb_econv_convert */ + +int mrb_isspace(int c); + +#if defined(__cplusplus) +} /* extern "C" { */ +#endif + +#endif /* RUBY_ENCODING_H */ diff --git a/src/enum.c b/src/enum.c new file mode 100644 index 0000000000..c9bb431395 --- /dev/null +++ b/src/enum.c @@ -0,0 +1,1077 @@ +/********************************************************************** + + enum.c - + + $Author: yugui $ + created at: Fri Oct 1 15:15:19 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "mruby.h" + +#if 0 + +#include "mruby/struct.h" +#include "mruby/array.h" + +static inline mrb_value +mrb_call0(mrb_state *mrb, mrb_value recv, mrb_sym mid, int argc, const mrb_value *argv, + call_type scope, mrb_value self) +{ + return mrb_funcall(mrb, recv, mrb_sym2name(mrb, mid), argc, argv); +} +static inline mrb_value +mrb_call(mrb_state *mrb, mrb_value recv, mrb_sym mid, int argc, const mrb_value *argv, call_type scope) +{ + return mrb_call0(mrb, recv, mid, argc, argv, scope, mrb_fixnum_value(0)/*Qundef*/); +} + +mrb_value rb_mEnumerable; +static mrb_sym id_each, id_eqq, id_cmp, id_next, id_size; + +struct iter_method_arg { + mrb_value obj; + mrb_sym mid; + int argc; + mrb_value *argv; +}; + +static mrb_value +iterate_method(mrb_state *mrb, void *obj) +{ + const struct iter_method_arg * arg = + (struct iter_method_arg *) obj; + + return mrb_call(mrb, arg->obj, arg->mid, arg->argc, arg->argv, CALL_FCALL); +} + +#ifndef ANYARGS +# ifdef __cplusplus +# define ANYARGS ... +# else +# define ANYARGS +# endif +#endif + +mrb_value +mrb_iterate(mrb_state *mrb, + mrb_value (* it_proc) (mrb_state *, void*), void *data1, + mrb_value (* bl_proc) (ANYARGS), + void *data2) +{ + mrb_value retval = mrb_nil_value(); + retval = (*bl_proc) (data2); + retval = (*it_proc) (mrb, data1); + return retval; +} + +mrb_value +mrb_block_call(mrb_state *mrb, mrb_value obj, mrb_sym mid, int argc, mrb_value * argv, + mrb_value (*bl_proc) (ANYARGS), + void *data2) +{ + struct iter_method_arg arg; + + arg.obj = obj; + arg.mid = mid; + arg.argc = argc; + arg.argv = argv; + return mrb_iterate(mrb, iterate_method, &arg, bl_proc, data2); +} + +static mrb_value +enum_values_pack(mrb_state *mrb, int argc, mrb_value *argv) +{ + if (argc == 0) return mrb_nil_value(); + if (argc == 1) return argv[0]; + return mrb_ary_new4(mrb, argc, argv); +} + +#define ENUM_WANT_SVALUE(mrb) do { \ + i = enum_values_pack(mrb, argc, argv); \ +} while (0) + +#define enum_yield mrb_yield_values2 +mrb_value +mrb_yield_values2(int argc, const mrb_value *argv) +{ + //return mrb_yield_0(argc, argv); + return mrb_nil_value(); /* dummy */ +} + +static mrb_value +grep_i(mrb_state *mrb, mrb_value i, mrb_value args, int argc, mrb_value *argv) +{ + mrb_value *arg = &args; + ENUM_WANT_SVALUE(mrb); + + if (RTEST(mrb_funcall(mrb, arg[0], "===", 1, i))) { + mrb_ary_push(mrb, arg[1], i); + } + return mrb_nil_value(); +} + +static mrb_value +grep_iter_i(mrb_state *mrb, mrb_value i, mrb_value args, int argc, mrb_value *argv) +{ + mrb_value *arg = &args; + ENUM_WANT_SVALUE(mrb); + + if (RTEST(mrb_funcall(mrb, arg[0], "===", 1, i))) { + mrb_ary_push(mrb, arg[1], mrb_yield(i)); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.9 */ +/* + * call-seq: + * enum.grep(pattern) -> array + * enum.grep(pattern) {| obj | block } -> array + * + * Returns an array of every element in enum for which + * Pattern === element. If the optional block is + * supplied, each matching element is passed to it, and the block's + * result is stored in the output array. + * + * (1..100).grep 38..44 #=> [38, 39, 40, 41, 42, 43, 44] + * c = IO.constants + * c.grep(/SEEK/) #=> [:SEEK_SET, :SEEK_CUR, :SEEK_END] + * res = c.grep(/SEEK/) {|v| IO.const_get(v) } + * res #=> [0, 1, 2] + * + */ + +static mrb_value +enum_grep(mrb_state *mrb, mrb_value obj) +{ + mrb_value ary = mrb_ary_new(mrb); + mrb_value arg[2]; + mrb_value pat; + + mrb_get_args(mrb, "o", &pat); + + arg[0] = pat; + arg[1] = ary; + + mrb_block_call(mrb, obj, id_each, 0, 0, mrb_block_given_p() ? grep_iter_i : grep_i, arg); + + return ary; +} + +/* + * call-seq: + * enum.count -> int + * enum.count(item) -> int + * enum.count {| obj | block } -> int + * + * Returns the number of items in enum, where #size is called + * if it responds to it, otherwise the items are counted through + * enumeration. If an argument is given, counts the number of items + * in enum, for which equals to item. If a block is + * given, counts the number of elements yielding a true value. + * + * ary = [1, 2, 4, 2] + * ary.count #=> 4 + * ary.count(2) #=> 2 + * ary.count{|x|x%2==0} #=> 3 + * + */ + +void +mrb_iter_break(void) +{ + //vm_iter_break(GET_THREAD()); /* dummy */ +} + +static mrb_value +find_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv) +{ + ENUM_WANT_SVALUE(mrb); + + if (RTEST(mrb_yield(i))) { + *memo = i; + mrb_iter_break(); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.4 */ +/* 15.3.2.2.7 */ +/* + * call-seq: + * enum.detect(ifnone = nil) {| obj | block } -> obj or nil + * enum.find(ifnone = nil) {| obj | block } -> obj or nil + * enum.detect(ifnone = nil) -> an_enumerator + * enum.find(ifnone = nil) -> an_enumerator + * + * Passes each entry in enum to block. Returns the + * first for which block is not false. If no + * object matches, calls ifnone and returns its result when it + * is specified, or returns nil otherwise. + * + * If no block is given, an enumerator is returned instead. + * + * (1..10).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> nil + * (1..100).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> 35 + * + */ + +static mrb_value +enum_find(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj) +{ + mrb_value memo; + mrb_value if_none; + + memo.tt = MRB_TT_FREE; + //mrb_scan_args(argc, argv, "01", &if_none); + if_none = argv[0]; + //RETURN_ENUMERATOR(obj, argc, argv); + mrb_block_call(mrb, obj, id_each, 0, 0, find_i, &memo); + if (memo.tt != MRB_TT_FREE) { + return memo; + } + if (!mrb_nil_p(if_none)) { + return mrb_funcall(mrb, if_none, "call", 0, 0); + } + return mrb_nil_value(); +} + +static mrb_value +enum_find_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return enum_find(mrb, argc, argv, self); +} + +/* + * call-seq: + * enum.find_index(value) -> int or nil + * enum.find_index {| obj | block } -> int or nil + * enum.find_index -> an_enumerator + * + * Compares each entry in enum with value or passes + * to block. Returns the index for the first for which the + * evaluated value is non-false. If no object matches, returns + * nil + * + * If neither block nor argument is given, an enumerator is returned instead. + * + * (1..10).find_index {|i| i % 5 == 0 and i % 7 == 0 } #=> nil + * (1..100).find_index {|i| i % 5 == 0 and i % 7 == 0 } #=> 34 + * (1..100).find_index(50) #=> 49 + * + */ + +static mrb_value +find_all_i(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv) +{ + ENUM_WANT_SVALUE(mrb); + + if (RTEST(mrb_yield(i))) { + mrb_ary_push(mrb, ary, i); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.8 */ +/* 15.3.2.2.18 */ +/* + * call-seq: + * enum.find_all {| obj | block } -> array + * enum.select {| obj | block } -> array + * enum.find_all -> an_enumerator + * enum.select -> an_enumerator + * + * Returns an array containing all elements of enum for which + * block is not false (see also + * Enumerable#reject). + * + * If no block is given, an enumerator is returned instead. + * + * + * (1..10).find_all {|i| i % 3 == 0 } #=> [3, 6, 9] + * + */ + +static mrb_value +enum_find_all(mrb_state *mrb, mrb_value obj) +{ + mrb_value ary; + + //RETURN_ENUMERATOR(obj, 0, 0); + + ary = mrb_ary_new(mrb); + mrb_block_call(mrb, obj, id_each, 0, 0, find_all_i, &ary); + + return ary; +} + +static mrb_value +reject_i(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv) +{ + ENUM_WANT_SVALUE(mrb); + + if (!RTEST(mrb_yield(i))) { + mrb_ary_push(mrb, ary, i); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.17 */ +/* + * call-seq: + * enum.reject {| obj | block } -> array + * enum.reject -> an_enumerator + * + * Returns an array for all elements of enum for which + * block is false (see also Enumerable#find_all). + * + * If no block is given, an enumerator is returned instead. + * + * (1..10).reject {|i| i % 3 == 0 } #=> [1, 2, 4, 5, 7, 8, 10] + * + */ + +static mrb_value +enum_reject(mrb_state *mrb, mrb_value obj) +{ + mrb_value ary; + + //RETURN_ENUMERATOR(obj, 0, 0); + + ary = mrb_ary_new(mrb); + mrb_block_call(mrb, obj, id_each, 0, 0, reject_i, &ary); + + return ary; +} + +static mrb_value +collect_i(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv) +{ + mrb_ary_push(mrb, ary, enum_yield(argc, argv)); + + return mrb_nil_value(); +} + +static mrb_value +collect_all(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv) +{ + //mrb_thread_check_ints(); /* dummy */ + mrb_ary_push(mrb, ary, enum_values_pack(mrb, argc, argv)); + + return mrb_nil_value(); +} + +/* 15.3.2.2.3 */ +/* 15.3.2.2.12 */ +/* + * call-seq: + * enum.collect {| obj | block } -> array + * enum.map {| obj | block } -> array + * enum.collect -> an_enumerator + * enum.map -> an_enumerator + * + * Returns a new array with the results of running block once + * for every element in enum. + * + * If no block is given, an enumerator is returned instead. + * + * (1..4).collect {|i| i*i } #=> [1, 4, 9, 16] + * (1..4).collect { "cat" } #=> ["cat", "cat", "cat", "cat"] + * + */ + +static mrb_value +enum_collect(mrb_state *mrb, mrb_value obj) +{ + mrb_value ary; + + //RETURN_ENUMERATOR(obj, 0, 0); + + ary = mrb_ary_new(mrb); + mrb_block_call(mrb, obj, id_each, 0, 0, collect_i, &ary); + + return ary; +} + +/* 15.3.2.2.6 */ +/* 15.3.2.2.20 */ +/* + * call-seq: + * enum.to_a -> array + * enum.entries -> array + * + * Returns an array containing the items in enum. + * + * (1..7).to_a #=> [1, 2, 3, 4, 5, 6, 7] + * { 'a'=>1, 'b'=>2, 'c'=>3 }.to_a #=> [["a", 1], ["b", 2], ["c", 3]] + */ +static mrb_value +enum_to_a(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj) +{ + mrb_value ary = mrb_ary_new(mrb); + + mrb_block_call(mrb, obj, id_each, argc, argv, collect_all, &ary); + //OBJ_INFECT(ary, obj); + + return ary; +} + +static mrb_value +enum_to_a_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return enum_to_a(mrb, argc, argv, self); +} + +static mrb_value +inject_i(mrb_state *mrb, mrb_value i, mrb_value p, int argc, mrb_value *argv) +{ + mrb_value *memo = &p; + + ENUM_WANT_SVALUE(mrb); + + if (memo[0].tt == MRB_TT_FREE) { + memo[0] = i; + } + else { + memo[0] = mrb_yield_values(2, memo[0], i); + } + return mrb_nil_value(); +} + +static mrb_value +inject_op_i(mrb_state *mrb, mrb_value i, mrb_value p, int argc, mrb_value *argv) +{ + mrb_value *memo = &p; + + ENUM_WANT_SVALUE(mrb); + + if (memo[0].tt == MRB_TT_FREE) { + memo[0] = i; + } + else { + memo[0] = mrb_funcall(mrb, memo[0], mrb_sym2name(mrb, SYM2ID(memo[1])), 1, i); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.11 */ +/* + * call-seq: + * enum.inject(initial, sym) -> obj + * enum.inject(sym) -> obj + * enum.inject(initial) {| memo, obj | block } -> obj + * enum.inject {| memo, obj | block } -> obj + * + * enum.reduce(initial, sym) -> obj + * enum.reduce(sym) -> obj + * enum.reduce(initial) {| memo, obj | block } -> obj + * enum.reduce {| memo, obj | block } -> obj + * + * Combines all elements of enum by applying a binary + * operation, specified by a block or a symbol that names a + * method or operator. + * + * If you specify a block, then for each element in enum + * the block is passed an accumulator value (memo) and the element. + * If you specify a symbol instead, then each element in the collection + * will be passed to the named method of memo. + * In either case, the result becomes the new value for memo. + * At the end of the iteration, the final value of memo is the + * return value fo the method. + * + * If you do not explicitly specify an initial value for memo, + * then uses the first element of collection is used as the initial value + * of memo. + * + * Examples: + * + * # Sum some numbers + * (5..10).reduce(:+) #=> 45 + * # Same using a block and inject + * (5..10).inject {|sum, n| sum + n } #=> 45 + * # Multiply some numbers + * (5..10).reduce(1, :*) #=> 151200 + * # Same using a block + * (5..10).inject(1) {|product, n| product * n } #=> 151200 + * # find the longest word + * longest = %w{ cat sheep bear }.inject do |memo,word| + * memo.length > word.length ? memo : word + * end + * longest #=> "sheep" + * + */ +static mrb_value +enum_inject(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj) +{ + mrb_value memo[2]; + mrb_value (*iter)(mrb_state *mrb, mrb_value, mrb_value, int, mrb_value*) = inject_i; + + //switch (mrb_scan_args(argc, argv, "02", &memo[0], &memo[1])) { + switch (argc) { + case 0: + memo[0].tt = MRB_TT_FREE; + break; + case 1: + if (mrb_block_given_p()) { + break; + } + memo[1] = mrb_symbol_value(mrb_to_id(mrb, argv[0])); + memo[0].tt = MRB_TT_FREE; + iter = inject_op_i; + break; + case 2: + if (mrb_block_given_p()) { + mrb_warning("given block not used"); + } + memo[0] = argv[0]; + memo[1] = mrb_symbol_value(mrb_to_id(mrb, argv[1])); + iter = inject_op_i; + break; + } + mrb_block_call(mrb, obj, id_each, 0, 0, iter, memo); + if (memo[0].tt == MRB_TT_FREE) return mrb_nil_value(); + return memo[0]; +} + +static mrb_value +enum_inject_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return enum_inject(mrb, argc, argv, self); +} + +static mrb_value +partition_i(mrb_state *mrb, mrb_value i, mrb_value *ary, int argc, mrb_value *argv) +{ + ENUM_WANT_SVALUE(mrb); + + if (RTEST(mrb_yield(i))) { + mrb_ary_push(mrb, ary[0], i); + } + else { + mrb_ary_push(mrb, ary[1], i); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.16 */ +/* + * call-seq: + * enum.partition {| obj | block } -> [ true_array, false_array ] + * enum.partition -> an_enumerator + * + * Returns two arrays, the first containing the elements of + * enum for which the block evaluates to true, the second + * containing the rest. + * + * If no block is given, an enumerator is returned instead. + * + * (1..6).partition {|i| (i&1).zero?} #=> [[2, 4, 6], [1, 3, 5]] + * + */ + +static mrb_value +enum_partition(mrb_state *mrb, mrb_value obj) +{ + mrb_value ary[2]; + + //RETURN_ENUMERATOR(obj, 0, 0); + + ary[0] = mrb_ary_new(mrb); + ary[1] = mrb_ary_new(mrb); + mrb_block_call(mrb, obj, id_each, 0, 0, partition_i, ary); + + return mrb_assoc_new(mrb, ary[0], ary[1]); +} + +/* 15.3.2.2.19 */ +/* + * call-seq: + * enum.sort -> array + * enum.sort {| a, b | block } -> array + * + * Returns an array containing the items in enum sorted, + * either according to their own <=> method, or by using + * the results of the supplied block. The block should return -1, 0, or + * +1 depending on the comparison between a and b. As of + * Ruby 1.8, the method Enumerable#sort_by implements a + * built-in Schwartzian Transform, useful when key computation or + * comparison is expensive. + * + * %w(rhea kea flea).sort #=> ["flea", "kea", "rhea"] + * (1..10).sort {|a,b| b <=> a} #=> [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + */ + +static mrb_value +enum_sort(mrb_state *mrb, mrb_value obj) +{ + return mrb_ary_sort(mrb, enum_to_a(mrb, 0, 0, obj)); +} + +/* + * call-seq: + * enum.sort_by {| obj | block } -> array + * enum.sort_by -> an_enumerator + * + * Sorts enum using a set of keys generated by mapping the + * values in enum through the given block. + * + * If no block is given, an enumerator is returned instead. + * + * %w{ apple pear fig }.sort_by {|word| word.length} + * #=> ["fig", "pear", "apple"] + * + * The current implementation of sort_by generates an + * array of tuples containing the original collection element and the + * mapped value. This makes sort_by fairly expensive when + * the keysets are simple + * + * require 'benchmark' + * + * a = (1..100000).map {rand(100000)} + * + * Benchmark.bm(10) do |b| + * b.report("Sort") { a.sort } + * b.report("Sort by") { a.sort_by {|a| a} } + * end + * + * produces: + * + * user system total real + * Sort 0.180000 0.000000 0.180000 ( 0.175469) + * Sort by 1.980000 0.040000 2.020000 ( 2.013586) + * + * However, consider the case where comparing the keys is a non-trivial + * operation. The following code sorts some files on modification time + * using the basic sort method. + * + * files = Dir["*"] + * sorted = files.sort {|a,b| File.new(a).mtime <=> File.new(b).mtime} + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This sort is inefficient: it generates two new File + * objects during every comparison. A slightly better technique is to + * use the Kernel#test method to generate the modification + * times directly. + * + * files = Dir["*"] + * sorted = files.sort { |a,b| + * test(?M, a) <=> test(?M, b) + * } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This still generates many unnecessary Time objects. A + * more efficient technique is to cache the sort keys (modification + * times in this case) before the sort. Perl users often call this + * approach a Schwartzian Transform, after Randal Schwartz. We + * construct a temporary array, where each element is an array + * containing our sort key along with the filename. We sort this array, + * and then extract the filename from the result. + * + * sorted = Dir["*"].collect { |f| + * [test(?M, f), f] + * }.sort.collect { |f| f[1] } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This is exactly what sort_by does internally. + * + * sorted = Dir["*"].sort_by {|f| test(?M, f)} + * sorted #=> ["mon", "tues", "wed", "thurs"] + */ + +#define ENUMFUNC(name) mrb_block_given_p() ? name##_iter_i : name##_i + +#define DEFINE_ENUMFUNCS(mrb, name) \ +static mrb_value enum_##name##_func(mrb_value result, mrb_value *memo); \ +\ +static mrb_value \ +name##_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv) \ +{ \ + return enum_##name##_func(enum_values_pack(mrb, argc, argv), memo); \ +} \ +\ +static mrb_value \ +name##_iter_i(mrb_state *mrb,mrb_value i, mrb_value *memo, int argc, mrb_value *argv) \ +{ \ + return enum_##name##_func(enum_yield(argc, argv), memo); \ +} \ +\ +static mrb_value \ +enum_##name##_func(mrb_value result, mrb_value *memo) + +DEFINE_ENUMFUNCS(mrb, all) +{ + if (!RTEST(result)) { + *memo = mrb_false_value(); + mrb_iter_break(); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.1 */ +/* + * call-seq: + * enum.all? [{|obj| block } ] -> true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block never returns + * false or nil. If the block is not given, + * Ruby adds an implicit block of {|obj| obj} (that is + * all? will return true only if none of the + * collection members are false or nil.) + * + * %w{ant bear cat}.all? {|word| word.length >= 3} #=> true + * %w{ant bear cat}.all? {|word| word.length >= 4} #=> false + * [ nil, true, 99 ].all? #=> false + * + */ + +static mrb_value +enum_all(mrb_state *mrb, mrb_value obj) +{ + mrb_value result = mrb_true_value(); + + mrb_block_call(mrb, obj, id_each, 0, 0, ENUMFUNC(all), &result); + return result; +} + +DEFINE_ENUMFUNCS(mrb, any) +{ + if (RTEST(result)) { + *memo = mrb_true_value(); + mrb_iter_break(); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.2 */ +/* + * call-seq: + * enum.any? [{|obj| block } ] -> true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block ever returns a value other + * than false or nil. If the block is not + * given, Ruby adds an implicit block of {|obj| obj} (that + * is any? will return true if at least one + * of the collection members is not false or + * nil. + * + * %w{ant bear cat}.any? {|word| word.length >= 3} #=> true + * %w{ant bear cat}.any? {|word| word.length >= 4} #=> true + * [ nil, true, 99 ].any? #=> true + * + */ + +static mrb_value +enum_any(mrb_state *mrb, mrb_value obj) +{ + mrb_value result = mrb_false_value(); + + mrb_block_call(mrb, obj, id_each, 0, 0, ENUMFUNC(any), &result); + return result; +} + +static mrb_value +min_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv) +{ + mrb_value cmp; + + ENUM_WANT_SVALUE(mrb); + + if (memo->tt == MRB_TT_FREE) { + *memo = i; + } + else { + cmp = mrb_funcall(mrb, i, "<=>", 1, *memo); + if (mrb_cmpint(mrb, cmp, i, *memo) < 0) { + *memo = i; + } + } + return mrb_nil_value(); +} + +static mrb_value +min_ii(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv) +{ + mrb_value cmp; + + ENUM_WANT_SVALUE(mrb); + + if (memo->tt == MRB_TT_FREE) { + *memo = i; + } + else { + cmp = mrb_yield_values(2, i, *memo); + if (mrb_cmpint(mrb, cmp, i, *memo) < 0) { + *memo = i; + } + } + return mrb_nil_value(); +} + +/* 15.3.2.2.14 */ +/* + * call-seq: + * enum.min -> obj + * enum.min {| a,b | block } -> obj + * + * Returns the object in enum with the minimum value. The + * first form assumes all objects implement Comparable; + * the second uses the block to return a <=> b. + * + * a = %w(albatross dog horse) + * a.min #=> "albatross" + * a.min {|a,b| a.length <=> b.length } #=> "dog" + */ + +static mrb_value +enum_min(mrb_state *mrb, mrb_value obj) +{ + mrb_value result; + result.tt = MRB_TT_FREE; + + if (mrb_block_given_p()) { + mrb_block_call(mrb, obj, id_each, 0, 0, min_ii, &result); + } + else { + mrb_block_call(mrb, obj, id_each, 0, 0, min_i, &result); + } + if (result.tt == MRB_TT_FREE) return mrb_nil_value(); + return result; +} + +static mrb_value +max_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv) +{ + mrb_value cmp; + + ENUM_WANT_SVALUE(mrb); + + if (memo->tt == MRB_TT_FREE) { + *memo = i; + } + else { + cmp = mrb_funcall(mrb, i, "<=>", 1, *memo); + if (mrb_cmpint(mrb, cmp, i, *memo) > 0) { + *memo = i; + } + } + return mrb_nil_value(); +} + +static mrb_value +max_ii(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv) +{ + mrb_value cmp; + + ENUM_WANT_SVALUE(mrb); + + if (memo->tt == MRB_TT_FREE) { + *memo = i; + } + else { + cmp = mrb_yield_values(2, i, *memo); + if (mrb_cmpint(mrb, cmp, i, *memo) > 0) { + *memo = i; + } + } + return mrb_nil_value(); +} + +/* 15.3.2.2.13 */ +/* + * call-seq: + * enum.max -> obj + * enum.max {|a,b| block } -> obj + * + * Returns the object in _enum_ with the maximum value. The + * first form assumes all objects implement Comparable; + * the second uses the block to return a <=> b. + * + * a = %w(albatross dog horse) + * a.max #=> "horse" + * a.max {|a,b| a.length <=> b.length } #=> "albatross" + */ + +static mrb_value +enum_max(mrb_state *mrb, mrb_value obj) +{ + mrb_value result; + result.tt = MRB_TT_FREE; + + if (mrb_block_given_p()) { + mrb_block_call(mrb, obj, id_each, 0, 0, max_ii, &result); + } + else { + mrb_block_call(mrb, obj, id_each, 0, 0, max_i, &result); + } + if (result.tt == MRB_TT_FREE) return mrb_nil_value(); + return result; +} + +static mrb_value +member_i(mrb_state *mrb, mrb_value iter, mrb_value *memo, int argc, mrb_value *argv) +{ + if (mrb_equal(mrb, enum_values_pack(mrb, argc, argv), memo[0])) { + memo[1] = mrb_true_value(); + mrb_iter_break(); + } + return mrb_nil_value(); +} + +/* 15.3.2.2.10 */ +/* 15.3.2.2.15 */ +/* + * call-seq: + * enum.include?(obj) -> true or false + * enum.member?(obj) -> true or false + * + * Returns true if any member of enum equals + * obj. Equality is tested using ==. + * + * IO.constants.include? :SEEK_SET #=> true + * IO.constants.include? :SEEK_NO_FURTHER #=> false + * + */ + +static mrb_value +enum_member(mrb_state *mrb, mrb_value obj) +{ + mrb_value memo[2]; + mrb_value val; + + mrb_get_args(mrb, "o", &val); + + memo[0] = val; + memo[1] = mrb_false_value(); + mrb_block_call(mrb, obj, id_each, 0, 0, member_i, memo); + return memo[1]; +} + +static mrb_value +each_with_index_i(mrb_state *mrb, mrb_value i, long *memo, int argc, void *argv) +{ + long n = (*memo)++; + + return mrb_yield_values(2, enum_values_pack(mrb, argc, argv), mrb_fixnum_value(n)); +} + +/* 15.3.2.2.5 */ +/* + * call-seq: + * enum.each_with_index(*args) {|obj, i| block } -> enum + * enum.each_with_index(*args) -> an_enumerator + * + * Calls block with two arguments, the item and its index, + * for each item in enum. Given arguments are passed through + * to #each(). + * + * If no block is given, an enumerator is returned instead. + * + * hash = Hash.new + * %w(cat dog wombat).each_with_index {|item, index| + * hash[item] = index + * } + * hash #=> {"cat"=>0, "dog"=>1, "wombat"=>2} + * + */ + +static mrb_value +enum_each_with_index(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj) +{ + long memo; + + //RETURN_ENUMERATOR(obj, argc, argv); + + memo = 0; + mrb_block_call(mrb, obj, id_each, argc, argv, each_with_index_i, &memo); + return obj; +} + +static mrb_value +enum_each_with_index_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return enum_each_with_index(mrb, argc, argv, self); +} + +/* + * call-seq: + * enum.reverse_each(*args) {|item| block } -> enum + * enum.reverse_each(*args) -> an_enumerator + * + * Builds a temporary array and traverses that array in reverse order. + * + * If no block is given, an enumerator is returned instead. + * + */ + +#endif + +/* + * The Enumerable mixin provides collection classes with + * several traversal and searching methods, and with the ability to + * sort. The class must provide a method each, which + * yields successive members of the collection. If + * Enumerable#max, #min, or + * #sort is used, the objects in the collection must also + * implement a meaningful <=> operator, as these methods + * rely on an ordering between members of the collection. + */ + +void +mrb_init_enumerable(mrb_state *mrb) +{ + struct RClass *cenum; +//#undef mrb_intern +//#define mrb_intern(str) mrb_intern_const(str) + + cenum = mrb_define_module(mrb, "Enumerable"); + +#if 0 + //mrb_define_class_method(mrb, cenum, "all?", enum_all, ARGS_NONE()); /* 15.3.2.2.1 */ + //mrb_define_class_method(mrb, cenum, "any?", enum_any, ARGS_NONE()); /* 15.3.2.2.2 */ + //mrb_define_class_method(mrb, cenum, "collect", enum_collect, ARGS_NONE()); /* 15.3.2.2.3 */ + //mrb_define_class_method(mrb, cenum, "detect", enum_find_m, ARGS_ANY()); /* 15.3.2.2.4 */ + //mrb_define_class_method(mrb, cenum, "each_with_index", enum_each_with_index_m, ARGS_ANY()); /* 15.3.2.2.5 */ + mrb_define_class_method(mrb, cenum, "entries", enum_to_a_m, ARGS_ANY()); /* 15.3.2.2.6 */ + //mrb_define_class_method(mrb, cenum, "find", enum_find_m, ARGS_ANY()); /* 15.3.2.2.7 */ + //mrb_define_class_method(mrb, cenum, "find_all", enum_find_all, ARGS_NONE()); /* 15.3.2.2.8 */ + //mrb_define_class_method(mrb, cenum, "grep", enum_grep, ARGS_REQ(1)); /* 15.3.2.2.9 */ + mrb_define_class_method(mrb, cenum, "include?", enum_member, ARGS_REQ(1)); /* 15.3.2.2.10 */ + //mrb_define_class_method(mrb, cenum, "inject", enum_inject_m, ARGS_ANY()); /* 15.3.2.2.11 */ + //mrb_define_class_method(mrb, cenum, "map", enum_collect, ARGS_NONE()); /* 15.3.2.2.12 */ + //mrb_define_class_method(mrb, cenum, "max", enum_max, ARGS_NONE()); /* 15.3.2.2.13 */ + //mrb_define_class_method(mrb, cenum, "min", enum_min, ARGS_NONE()); /* 15.3.2.2.14 */ + mrb_define_class_method(mrb, cenum, "member?", enum_member, ARGS_REQ(1)); /* 15.3.2.2.15 */ + //mrb_define_class_method(mrb, cenum, "partition", enum_partition, ARGS_NONE()); /* 15.3.2.2.16 */ + //mrb_define_class_method(mrb, cenum, "reject", enum_reject, ARGS_NONE()); /* 15.3.2.2.17 */ + //mrb_define_class_method(mrb, cenum, "select", enum_find_all, ARGS_NONE()); /* 15.3.2.2.18 */ + //mrb_define_class_method(mrb, cenum, "sort", enum_sort, ARGS_NONE()); /* 15.3.2.2.19 */ + mrb_define_class_method(mrb, cenum, "to_a", enum_to_a_m, ARGS_ANY()); /* 15.3.2.2.20 */ + id_eqq = mrb_intern(mrb, "==="); + id_each = mrb_intern(mrb, "each"); + id_cmp = mrb_intern(mrb, "<=>"); + id_next = mrb_intern(mrb, "next"); + id_size = mrb_intern(mrb, "size"); +#endif +} + diff --git a/src/error.c b/src/error.c new file mode 100644 index 0000000000..9dbfc972f4 --- /dev/null +++ b/src/error.c @@ -0,0 +1,479 @@ +#include "mruby.h" +#include +#include +#include +#include +#include "error.h" +#include "opcode.h" +#include "irep.h" +#include "mruby/proc.h" +#include "mruby/numeric.h" +#include "variable.h" +#include "mruby/string.h" +#include "eval_intern.h" +#include "mruby/class.h" + +#define warn_printf printf + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +mrb_value +mrb_exc_new(mrb_state *mrb, struct RClass *c, const char *ptr, long len) +{ + return mrb_funcall(mrb, mrb_obj_value(c), "new", 1, mrb_str_new(mrb, ptr, len)); +} + +mrb_value +mrb_exc_new3(mrb_state *mrb, struct RClass* c, mrb_value str) +{ + //StringValue(str); + mrb_string_value(mrb, &str); + return mrb_funcall(mrb, mrb_obj_value(c), "new", 1, str); +} + +//mrb_value make_exception(mrb_state *mrb, int argc, mrb_value *argv, int isstr); +/* + * call-seq: + * Exception.new(msg = nil) -> exception + * + * Construct a new Exception object, optionally passing in + * a message. + */ + +static mrb_value +exc_initialize(mrb_state *mrb, mrb_value exc) +{ + mrb_value mesg; + + mrb_get_args(mrb, "o", &mesg); + mrb_iv_set(mrb, exc, mrb_intern(mrb, "mesg"), mesg); + + return exc; +} + +/* + * Document-method: exception + * + * call-seq: + * exc.exception(string) -> an_exception or exc + * + * With no argument, or if the argument is the same as the receiver, + * return the receiver. Otherwise, create a new + * exception object of the same class as the receiver, but with a + * message equal to string.to_str. + * + */ + +static mrb_value +exc_exception(mrb_state *mrb, mrb_value self) +{ + mrb_value exc; + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc == 0) return self; + if (argc == 1 && mrb_obj_equal(mrb, self, argv[0])) return self; + exc = mrb_obj_clone(mrb, self); + exc_initialize(mrb, exc); + + return exc; +} + +/* + * call-seq: + * exception.to_s -> string + * + * Returns exception's message (or the name of the exception if + * no message is set). + */ + +static mrb_value +exc_to_s(mrb_state *mrb, mrb_value exc) +{ + mrb_value mesg = mrb_attr_get(mrb, exc, mrb_intern(mrb, "mesg")); + + if (mrb_nil_p(mesg)) return mrb_str_new2(mrb, mrb_obj_classname(mrb, exc)); + return mesg; +} + +/* + * call-seq: + * exception.message -> string + * + * Returns the result of invoking exception.to_s. + * Normally this returns the exception's message or name. By + * supplying a to_str method, exceptions are agreeing to + * be used where Strings are expected. + */ + +static mrb_value +exc_message(mrb_state *mrb, mrb_value exc) +{ + return mrb_funcall(mrb, exc, "to_s", 0); +} + +/* + * call-seq: + * exception.inspect -> string + * + * Return this exception's class name an message + */ + +static mrb_value +exc_inspect(mrb_state *mrb, mrb_value exc) +{ + mrb_value str, klass; + + klass = mrb_str_new2(mrb, mrb_obj_classname(mrb, exc)); + exc = mrb_obj_as_string(mrb, exc); + if (RSTRING_LEN(exc) == 0) { + return klass; + } + + str = mrb_str_new2(mrb, "#<"); + mrb_str_append(mrb, str, klass); + mrb_str_cat2(mrb, str, ": "); + mrb_str_append(mrb, str, exc); + mrb_str_cat2(mrb, str, ">"); + + return str; +} + + +static mrb_value +exc_equal(mrb_state *mrb, mrb_value exc) +{ + mrb_value obj; + mrb_value mesg; + mrb_sym id_mesg = mrb_intern(mrb, "mesg"); + + mrb_get_args(mrb, "o", &obj); + + if (mrb_obj_equal(mrb, exc, obj)) return mrb_true_value(); + + if (mrb_obj_class(mrb, exc) != mrb_obj_class(mrb, obj)) { + if ( mrb_respond_to(mrb, obj, mrb_intern(mrb, "message")) ) { + mesg = mrb_funcall(mrb, obj, "message", 0); + } + else + return mrb_false_value(); + } + else { + mesg = mrb_attr_get(mrb, obj, id_mesg); + } + + if (!mrb_equal(mrb, mrb_attr_get(mrb, exc, id_mesg), mesg)) + return mrb_false_value(); + return mrb_true_value(); +} + +void +mrb_exc_raise(mrb_state *mrb, mrb_value exc) +{ + mrb->exc = mrb_object(exc); + longjmp(*(jmp_buf*)mrb->jmp, 1); +} + +void +mrb_raise_va(mrb_state *mrb, struct RClass *c, const char *fmt, va_list args) +{ + char buf[256]; + + vsnprintf(buf, 256, fmt, args); + mrb_exc_raise(mrb, mrb_exc_new(mrb, c, buf, strlen(buf))); +} + +void +mrb_raise(mrb_state *mrb, struct RClass *c, const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, 256, fmt, args); + mrb_raise_va(mrb, c, fmt, args); + va_end(args); +} + +void +mrb_name_error(mrb_state *mrb, mrb_sym id, const char *fmt, ...) +{ + mrb_value exc, argv[2]; + va_list args; + char buf[256]; + + va_start(args, fmt); + //argv[0] = mrb_vsprintf(fmt, args); + vsnprintf(buf, 256, fmt, args); + argv[0] = mrb_str_new(mrb, buf, strlen(buf)); + va_end(args); + + argv[1] = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); + exc = mrb_class_new_instance(mrb, 2, argv, E_NAME_ERROR); + mrb_exc_raise(mrb, exc); +} +mrb_value +mrb_sprintf(mrb_state *mrb, const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, 256, fmt, args); + va_end(args); + return mrb_str_new(mrb, buf, strlen(buf)); +} + +void +mrb_warn(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + snprintf(buf, 256, "warning: %s", fmt); + printf(buf, args); + va_end(args); +} + + +void +mrb_warning(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + snprintf(buf, 256, "warning: %s", fmt); + printf(buf, args); + va_end(args); +} + +void +mrb_bug(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + snprintf(buf, 256, "bug: %s", fmt); + printf(buf, args); + va_end(args); +} + +static const char * +mrb_strerrno(int err) +{ +#define defined_error(name, num) if (err == num) return name; +#define undefined_error(name) +//#include "known_errors.inc" +#undef defined_error +#undef undefined_error + return NULL; +} + +void +mrb_bug_errno(const char *mesg, int errno_arg) +{ + if (errno_arg == 0) + mrb_bug("%s: errno == 0 (NOERROR)", mesg); + else { + const char *errno_str = mrb_strerrno(errno_arg); + if (errno_str) + mrb_bug("%s: %s (%s)", mesg, strerror(errno_arg), errno_str); + else + mrb_bug("%s: %s (%d)", mesg, strerror(errno_arg), errno_arg); + } +} + +int +sysexit_status(mrb_state *mrb, mrb_value err) +{ + mrb_value st = mrb_iv_get(mrb, err, mrb_intern(mrb, "status")); + return mrb_fixnum(st); +} + +void +error_pos(void) +{ +#if 0 + const char *sourcefile = mrb_sourcefile(); + int sourceline = mrb_sourceline(); + + if (sourcefile) { + if (sourceline == 0) { + warn_printf("%s", sourcefile); + } + else if (mrb_frame_callee()) { + warn_printf("%s:%d:in `%s'", sourcefile, sourceline, + mrb_sym2name(mrb, mrb_frame_callee())); + } + else { + warn_printf("%s:%d", sourcefile, sourceline); + } + } +#endif +} + +static void +set_backtrace(mrb_state *mrb, mrb_value info, mrb_value bt) +{ + mrb_funcall(mrb, info, "set_backtrace", 1, bt); +} + +mrb_value +make_exception(mrb_state *mrb, int argc, mrb_value *argv, int isstr) +{ + mrb_value mesg; + int n; + + mesg = mrb_nil_value(); + switch (argc) { + case 0: + break; + case 1: + if (mrb_nil_p(argv[0])) + break; + if (isstr) { + mesg = mrb_check_string_type(mrb, argv[0]); + if (!mrb_nil_p(mesg)) { + mesg = mrb_exc_new3(mrb, mrb->eRuntimeError_class, mesg); + break; + } + } + n = 0; + goto exception_call; + + case 2: + case 3: + n = 1; +exception_call: + //if (argv[0] == sysstack_error) return argv[0]; + + //CONST_ID(mrb, exception, "exception"); + //mesg = mrb_check_funcall(mrb, argv[0], exception, n, argv+1); + //if (mrb_nil_p(mesg)) { + // /* undef */ + // mrb_raise(mrb, E_TYPE_ERROR, "exception class/object expected"); + //} + if (mrb_respond_to(mrb, argv[0], mrb_intern(mrb, "exception"))) { + mesg = mrb_funcall(mrb, argv[0], "exception", n, argv+1); + } + else { + /* undef */ + mrb_raise(mrb, E_TYPE_ERROR, "exception class/object expected"); + } + + break; + default: + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 0..3)", argc); + break; + } + if (argc > 0) { + if (!mrb_obj_is_kind_of(mrb, mesg, mrb->eException_class)) + mrb_raise(mrb, E_TYPE_ERROR, "exception object expected"); + if (argc > 2) + set_backtrace(mrb, mesg, argv[2]); + } + + return mesg; +} + +mrb_value +mrb_make_exception(mrb_state *mrb, int argc, mrb_value *argv) +{ + return make_exception(mrb, argc, argv, TRUE); +} + +void +mrb_sys_fail(mrb_state *mrb, const char *mesg) +{ + mrb_raise(mrb, mrb->eRuntimeError_class, "%s", mesg); +} + +static mrb_value +mrb_exc_c_exception(mrb_state *mrb, mrb_value exc) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_make_exception(mrb, argc, argv); +} + +static mrb_value +mrb_exc_exception(mrb_state *mrb, mrb_value exc) +{ + mrb_value *argv; + int argc; + mrb_value exclass; + + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 0) return exc; + exclass = mrb_obj_value(mrb_class(mrb, exc)); + return mrb_funcall(mrb, exclass, mrb_intern(mrb, "exception"), argc, argv); +} + +void +mrb_init_exception(mrb_state *mrb) +{ + struct RClass *e; + struct RClass *eTypeError_class; + struct RClass *eArgumentError_class; + struct RClass *eIndexError_class; + struct RClass *eRangeError_class; + struct RClass *eNameError_class; + struct RClass *eNoMethodError_class; + struct RClass *eScriptError_class; + struct RClass *eSyntaxError_class; + struct RClass *eLoadError_class; + struct RClass *eSystemCallError_class; + struct RClass *eLocalJumpError_class; + struct RClass *eRegexpError_class; + struct RClass *eZeroDivisionError_class; + struct RClass *eEncodingError_class; + struct RClass *eNotImpError_class; + struct RClass *eFloatDomainError_class; + struct RClass *eKeyError_class; + + mrb->eException_class = e = mrb_define_class(mrb, "Exception", mrb->object_class); /* 15.2.22 */ + mrb_define_class_method(mrb, e, "exception", mrb_instance_new, ARGS_ANY()); + mrb_define_method(mrb, e, "exception", exc_exception, ARGS_ANY()); + mrb_define_method(mrb, e, "initialize", exc_initialize, ARGS_ANY()); + mrb_define_method(mrb, e, "==", exc_equal, ARGS_REQ(1)); + mrb_define_method(mrb, e, "to_s", exc_to_s, ARGS_NONE()); + mrb_define_method(mrb, e, "message", exc_message, ARGS_NONE()); + mrb_define_method(mrb, e, "inspect", exc_inspect, ARGS_NONE()); + + mrb->eStandardError_class = mrb_define_class(mrb, "StandardError", mrb->eException_class); /* 15.2.23 */ + mrb->eRuntimeError_class = mrb_define_class(mrb, "RuntimeError", mrb->eStandardError_class); /* 15.2.28 */ + + eTypeError_class = mrb_define_class(mrb, "TypeError", mrb->eStandardError_class); /* 15.2.29 */ + eArgumentError_class = mrb_define_class(mrb, "ArgumentError", mrb->eStandardError_class); /* 15.2.24 */ + eIndexError_class = mrb_define_class(mrb, "IndexError", mrb->eStandardError_class); /* 15.2.33 */ + eRangeError_class = mrb_define_class(mrb, "RangeError", mrb->eStandardError_class); /* 15.2.26 */ + eNameError_class = mrb_define_class(mrb, "NameError", mrb->eStandardError_class); /* 15.2.31 */ + + eNoMethodError_class = mrb_define_class(mrb, "NoMethodError", eNameError_class); /* 15.2.32 */ + eScriptError_class = mrb_define_class(mrb, "ScriptError", mrb->eException_class); /* 15.2.37 */ + eSyntaxError_class = mrb_define_class(mrb, "SyntaxError", eScriptError_class); /* 15.2.38 */ + eLoadError_class = mrb_define_class(mrb, "LoadError", eScriptError_class); /* 15.2.39 */ + eSystemCallError_class = mrb_define_class(mrb, "SystemCallError", mrb->eStandardError_class); /* 15.2.36 */ + eLocalJumpError_class = mrb_define_class(mrb, "LocalJumpError", mrb->eStandardError_class); /* 15.2.25 */ + eRegexpError_class = mrb_define_class(mrb, "RegexpError", mrb->eStandardError_class); /* 15.2.27 */ + eZeroDivisionError_class = mrb_define_class(mrb, "ZeroDivisionError", mrb->eStandardError_class); /* 15.2.30 */ + + eEncodingError_class = mrb_define_class(mrb, "EncodingError", mrb->eStandardError_class); + eNotImpError_class = mrb_define_class(mrb, "NotImplementedError", eScriptError_class); + + eFloatDomainError_class = mrb_define_class(mrb, "FloatDomainError", eRangeError_class); + eKeyError_class = mrb_define_class(mrb, "KeyError", eIndexError_class); +} diff --git a/src/error.h b/src/error.h new file mode 100644 index 0000000000..5b1873b1db --- /dev/null +++ b/src/error.h @@ -0,0 +1,21 @@ +#ifndef MRUBY_ERROR_H +#define MRUBY_ERROR_H + +struct RException { + MRUBY_OBJECT_HEADER; +}; + +void mrb_sys_fail(mrb_state *mrb, const char *mesg); +void mrb_exc_raise(mrb_state *mrb, mrb_value mesg); +void mrb_bug_errno(const char*, int); +int sysexit_status(mrb_state *mrb, mrb_value err); +void error_pos(void); +mrb_value mrb_exc_new3(mrb_state *mrb, struct RClass* c, mrb_value str); +mrb_value make_exception(mrb_state *mrb, int argc, mrb_value *argv, int isstr); +mrb_value mrb_exc_new(mrb_state *mrb, struct RClass *c, const char *ptr, long len); +mrb_value mrb_make_exception(mrb_state *mrb, int argc, mrb_value *argv); +mrb_value mrb_sprintf(mrb_state *mrb, const char *fmt, ...); +void mrb_name_error(mrb_state *mrb, mrb_sym id, const char *fmt, ...); +void mrb_exc_print(mrb_state *mrb, struct RObject *exc); + +#endif /* MRUBY_ERROR_H */ diff --git a/src/etc.c b/src/etc.c new file mode 100644 index 0000000000..8c98700a30 --- /dev/null +++ b/src/etc.c @@ -0,0 +1,280 @@ +#include "mruby.h" +#include "mdata.h" +#include "mruby/string.h" +#include "error.h" +#include "mruby/numeric.h" + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +void +ruby_xfree(void *x) +{ + //if (x) + // vm_xfree(&mrb_objspace, x); +} + +struct RData* +mrb_data_object_alloc(mrb_state *mrb, struct RClass *klass, void *ptr, const struct mrb_data_type *type) +{ + struct RData *data; + + data = mrb_obj_alloc(mrb, MRB_TT_DATA, klass); + data->data = ptr; + data->type = type; + + return data; +} + +void * +mrb_check_datatype(mrb_state *mrb, mrb_value obj, const struct mrb_data_type *type) +{ + static const char mesg[] = "wrong argument type %s (expected %s)"; + + if (SPECIAL_CONST_P(obj) || (mrb_type(obj) != MRB_TT_DATA)) { + mrb_check_type(mrb, obj, MRB_TT_DATA); + } + if (DATA_TYPE(obj) != type) { + const char *etype = DATA_TYPE(obj)->struct_name; + mrb_raise(mrb, E_TYPE_ERROR, mesg, etype, type->struct_name); + } + return DATA_PTR(obj); +} + +mrb_value +mrb_lastline_get(mrb_state *mrb) +{ + //mrb_value *var = mrb_svar(0); + //if (var) { + // return *var; + //} + //return mrb_nil_value(); + mrb_value *argv; + int argc; + mrb_get_args(mrb, "*", &argv, &argc); + if (argc < 1) { + return mrb_nil_value(); + } + else + { + return argv[0]; + } +} + +mrb_value +mrb_rescue2(mrb_state *mrb, mrb_value (* b_proc) (ANYARGS), mrb_value *data1, + mrb_value (* r_proc) (ANYARGS), mrb_value *data2, ...) +{ + mrb_value result = (*b_proc) (mrb, data1); + return result; +} + +mrb_value +mrb_rescue(mrb_state *mrb, mrb_value (* b_proc)(ANYARGS), mrb_value *data1, + mrb_value (* r_proc)(ANYARGS), mrb_value *data2) +{ + return mrb_rescue2(mrb, b_proc, data1, r_proc, data2, mrb->eStandardError_class, + mrb_fixnum_value(0)); +} +/* ------------------------------------------------ */ +/* + * Calls func(obj, arg, recursive), where recursive is non-zero if the + * current method is called recursively on obj + */ + +mrb_value +mrb_exec_recursive(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int), mrb_value obj, void *arg) +{ + // return mrb_exec_recursive(mrb, io_puts_ary, line, &out); + return func(mrb, obj, *(mrb_value*)arg, 0); +} + +/* + * Calls func(obj, arg, recursive), where recursive is non-zero if the + * current method is called recursively on the ordered pair + */ + +mrb_value +mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int), + mrb_value obj, mrb_value paired_obj, void* arg) +{ + // return mrb_exec_recursive_paired(mrb, recursive_eql, hash1, hash2, mrb_fixnum_value((int)&data)); + return func(mrb, obj, paired_obj, 0); +} + +mrb_sym +mrb_to_id(mrb_state *mrb, mrb_value name) +{ + mrb_value tmp; + mrb_sym id; + + switch (mrb_type(name)) { + default: + tmp = mrb_check_string_type(mrb, name); + if (mrb_nil_p(tmp)) { + tmp = mrb_inspect(mrb, name); + mrb_raise(mrb, E_TYPE_ERROR, "%s is not a symbol", + RSTRING_PTR(tmp)); + } + name = tmp; + /* fall through */ + case MRB_TT_STRING: + name = mrb_str_intern(mrb, name); + /* fall through */ + case MRB_TT_SYMBOL: + return SYM2ID(name); + } + return id; +} + +/* + * call-seq: + * proc { |...| block } -> a_proc + * + * Equivalent to Proc.new. + */ + +mrb_value +mrb_block_proc(void) +{ + return mrb_nil_value();//proc_new(mrb_cProc, FALSE); +} + +/* + * Document-method: __id__ + * Document-method: object_id + * + * call-seq: + * obj.__id__ -> fixnum + * obj.object_id -> fixnum + * + * Returns an integer identifier for obj. The same number will + * be returned on all calls to id for a given object, and + * no two active objects will share an id. + * Object#object_id is a different concept from the + * :name notation, which returns the symbol id of + * name. Replaces the deprecated Object#id. + */ + +/* + * call-seq: + * obj.hash -> fixnum + * + * Generates a Fixnum hash value for this object. This + * function must have the property that a.eql?(b) implies + * a.hash == b.hash. The hash value is used by class + * Hash. Any hash value that exceeds the capacity of a + * Fixnum will be truncated before being used. + */ + +int +mrb_obj_id(mrb_value obj) +{ + /* + * 32-bit mrb_value space + * MSB ------------------------ LSB + * false 00000000000000000000000000000000 + * true 00000000000000000000000000000010 + * nil 00000000000000000000000000000100 + * undef 00000000000000000000000000000110 + * symbol ssssssssssssssssssssssss00001110 + * object oooooooooooooooooooooooooooooo00 = 0 (mod sizeof(RVALUE)) + * fixnum fffffffffffffffffffffffffffffff1 + * + * object_id space + * LSB + * false 00000000000000000000000000000000 + * true 00000000000000000000000000000010 + * nil 00000000000000000000000000000100 + * undef 00000000000000000000000000000110 + * symbol 000SSSSSSSSSSSSSSSSSSSSSSSSSSS0 S...S % A = 4 (S...S = s...s * A + 4) + * object oooooooooooooooooooooooooooooo0 o...o % A = 0 + * fixnum fffffffffffffffffffffffffffffff1 bignum if required + * + * where A = sizeof(RVALUE)/4 + * + * sizeof(RVALUE) is + * 20 if 32-bit, double is 4-byte aligned + * 24 if 32-bit, double is 8-byte aligned + * 40 if 64-bit + */ + /* + * 128-bit mrb_value space + * MSB -------- LSB + * x86 [0,1] [2,3] [4,5] [6,7] [8,9] [A,B] [C,D] [E,F] + * 7 6 5 4 3 2 1 0 + * 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF + * FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 + * false 0000000000000000 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000001 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * true 0000000000000001 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000010 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * nil 0000000000000001 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000001 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * undef 0000000000000000 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000101 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * symbol ssssssssssssssss ssssssssssssssss xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000100 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * object oooooooooooooooo oooooooooooooo00 = 0 (mod sizeof(RVALUE)) + (1)fixnum 0000000000000001 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000011 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * float 0000000000000001 0000000000000000 0000000000000000 0000000000000000 xxxxxxxx00000011 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * <-- mrb_float --> xxxxxxxx00001101 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx + * + * object_id space + * LSB + * false 0000000000000000 0000000000000000 + * true 0000000000000000 0000000000000010 + * nil 0000000000000000 0000000000000100 + * undef 0000000000000000 0000000000000110 + * symbol 000SSSSSSSSSSSS SSSSSSSSSSSSSSS0 S...S % A = 4 (S...S = s...s * A + 4) + * object ooooooooooooooo ooooooooooooooo0 o...o % A = 0 + * fixnum ffffffffffffffff fffffffffffffff1 bignum if required + * + * where A = sizeof(RVALUE)/4 + * + * sizeof(RVALUE) is + * 20 if 32-bit, double is 4-byte aligned + * 24 if 32-bit, double is 8-byte aligned + * 40 if 64-bit + */ + /* tt:0_27 */ + switch (mrb_type(obj)) { + case MRB_TT_FREE: + return 0; /* not define */ + case MRB_TT_FALSE: + if (mrb_nil_p(obj)) + return 4; + return 0; + case MRB_TT_TRUE: + return 2; + case MRB_TT_FIXNUM: + return mrb_fixnum(obj)*2+1; /* odd number */ + case MRB_TT_SYMBOL: + return SYM2ID(obj) * 2; + case MRB_TT_UNDEF: + return 0; /* not define */ + case MRB_TT_FLOAT: + return (int)mrb_float(obj)*2; /* even number */ + case MRB_TT_OBJECT: + case MRB_TT_CLASS: + case MRB_TT_MODULE: + case MRB_TT_ICLASS: + case MRB_TT_SCLASS: + case MRB_TT_PROC: + case MRB_TT_ARRAY: + case MRB_TT_HASH: + case MRB_TT_STRING: + case MRB_TT_RANGE: + case MRB_TT_REGEX: + case MRB_TT_STRUCT: + case MRB_TT_EXCEPTION: + case MRB_TT_MATCH: + case MRB_TT_FILE: + case MRB_TT_DATA: + case MRB_TT_THREAD: + case MRB_TT_THREADGRP: + default: + return mrb_fixnum(obj); /* even number */ + } +} + diff --git a/src/eval_intern.h b/src/eval_intern.h new file mode 100644 index 0000000000..c3e8cdf50e --- /dev/null +++ b/src/eval_intern.h @@ -0,0 +1,217 @@ +#ifndef RUBY_EVAL_INTERN_H +#define RUBY_EVAL_INTERN_H + +//#include "ruby/ruby.h" +#include "mruby.h" +#define HAVE_STRING_H +//#include "vm_core.h" +#include "node.h" + +/* other frame flag */ +#define VM_FRAME_FLAG_PASSED 0x0100 +#define PASS_PASSED_BLOCK_TH(th) do { \ + (th)->passed_block = GC_GUARDED_PTR_REF((mrb_block_t *)(th)->cfp->lfp[0]); \ + (th)->cfp->flag |= VM_FRAME_FLAG_PASSED; \ +} while (0) + +#define PASS_PASSED_BLOCK() do { \ + mrb_thread_t * const __th__ = GET_THREAD(); \ + PASS_PASSED_BLOCK_TH(__th__); \ +} while (0) + +#ifdef HAVE_STDLIB_H +#include +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +#include +#include + +#ifdef __APPLE__ +#include +#endif + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +# ifndef atarist +# ifndef alloca +# define alloca __builtin_alloca +# endif +# endif /* atarist */ +#else +# ifdef HAVE_ALLOCA_H +# include +# else +# ifdef _AIX +#pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca(); +# endif +# endif /* AIX */ +# endif /* HAVE_ALLOCA_H */ +#endif /* __GNUC__ */ + +#ifndef HAVE_STRING_H +char *strrchr(const char *, const char); +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_NET_SOCKET_H +#include +#endif + + + +#include +#include +#include + +#ifdef HAVE_SYS_SELECT_H +#include +#endif + +/* + Solaris sys/select.h switches select to select_large_fdset to support larger + file descriptors if FD_SETSIZE is larger than 1024 on 32bit environment. + But Ruby doesn't change FD_SETSIZE because fd_set is allocated dynamically. + So following definition is required to use select_large_fdset. +*/ +#ifdef HAVE_SELECT_LARGE_FDSET +#define select(n, r, w, e, t) select_large_fdset(n, r, w, e, t) +#endif + +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +#include + +#define SAVE_ROOT_JMPBUF(th, stmt) do \ + if (ruby_setjmp((th)->root_jmpbuf) == 0) { \ + stmt; \ + } while (0) + +#define TH_PUSH_TAG(th) do { \ + mrb_thread_t * const _th = th; \ + struct mrb_vm_tag _tag; \ + _tag.tag = 0; \ + _tag.prev = _th->tag; \ + _th->tag = &_tag; + +#define TH_POP_TAG() \ + _th->tag = _tag.prev; \ +} while (0) + +#define TH_POP_TAG2() \ + _th->tag = _tag.prev + +#define PUSH_TAG() TH_PUSH_TAG(GET_THREAD()) +#define POP_TAG() TH_POP_TAG() + +#define TH_EXEC_TAG() ruby_setjmp(_th->tag->buf) + +#define EXEC_TAG() \ + TH_EXEC_TAG() + +#define TH_JUMP_TAG(th, st) do { \ + ruby_longjmp(th->tag->buf,(st)); \ +} while (0) + +//#define JUMP_TAG(st) TH_JUMP_TAG(GET_THREAD(), st) + +enum ruby_tag_type { + RUBY_TAG_RETURN = 0x1, + RUBY_TAG_BREAK = 0x2, + RUBY_TAG_NEXT = 0x3, + RUBY_TAG_RETRY = 0x4, + RUBY_TAG_REDO = 0x5, + RUBY_TAG_RAISE = 0x6, + RUBY_TAG_THROW = 0x7, + RUBY_TAG_FATAL = 0x8, + RUBY_TAG_MASK = 0xf +}; +#define TAG_RETURN RUBY_TAG_RETURN +#define TAG_BREAK RUBY_TAG_BREAK +#define TAG_NEXT RUBY_TAG_NEXT +#define TAG_RETRY RUBY_TAG_RETRY +#define TAG_REDO RUBY_TAG_REDO +#define TAG_RAISE RUBY_TAG_RAISE +#define TAG_THROW RUBY_TAG_THROW +#define TAG_FATAL RUBY_TAG_FATAL +#define TAG_MASK RUBY_TAG_MASK + +#define NEW_THROW_OBJECT(val, pt, st) \ + ((mrb_value)mrb_node_newnode(NODE_LIT, (mrb_value)(val), (mrb_value)(pt), (mrb_value)(st))) +//#define SET_THROWOBJ_CATCH_POINT(obj, val) +// (RNODE((obj))->u2.value = (val)) +//#define SET_THROWOBJ_STATE(obj, val) +// (RNODE((obj))->u3.value = (val)) + +#define GET_THROWOBJ_VAL(obj) ((mrb_value)RNODE((obj))->u1.value) +#define GET_THROWOBJ_CATCH_POINT(obj) ((mrb_value*)RNODE((obj))->u2.value) +#define GET_THROWOBJ_STATE(obj) ((int)RNODE((obj))->u3.value) + +#define SCOPE_TEST(f) (mrb_vm_cref()->nd_visi & (f)) +#define SCOPE_CHECK(f) (mrb_vm_cref()->nd_visi == (f)) +#define SCOPE_SET(f) (mrb_vm_cref()->nd_visi = (f)) + +#define sysstack_error mrb_fixnum_value(0) + +#define CHECK_STACK_OVERFLOW(mrb, cfp, margin) do \ + if ((mrb_value *)((char *)(((mrb_value *)(cfp)->sp) + (margin)) + sizeof(mrb_control_frame_t)) >= ((mrb_value *)cfp)) { \ + mrb_exc_raise(mrb, sysstack_error); \ + } \ +while (0) + +void mrb_thread_cleanup(void); +void mrb_thread_wait_other_threads(void); + +enum { + RAISED_EXCEPTION = 1, + RAISED_STACKOVERFLOW = 2, + RAISED_NOMEMORY = 4 +}; +//int rb_threadptr_set_raised(mrb_thread_t *th); +//int rb_threadptr_reset_raised(mrb_thread_t *th); +#define mrb_thread_raised_set(th, f) ((th)->raised_flag |= (f)) +#define mrb_thread_raised_reset(th, f) ((th)->raised_flag &= ~(f)) +#define mrb_thread_raised_p(th, f) (((th)->raised_flag & (f)) != 0) +#define mrb_thread_raised_clear(th) ((th)->raised_flag = 0) + +//mrb_value mrb_f_eval(int argc, mrb_value *argv, mrb_value self); +//mrb_value mrb_make_exception(int argc, mrb_value *argv); +#ifndef NORETURN +# define NORETURN(x) x +#endif +#ifndef DEPRECATED +# define DEPRECATED(x) x +#endif + +NORETURN(void mrb_fiber_start(void)); + +NORETURN(void rb_print_undef(mrb_value, mrb_sym, int)); +NORETURN(void rb_vm_localjump_error(const char *,mrb_value, int)); +NORETURN(void rb_vm_jump_tag_but_local_jump(int, mrb_value)); +//NORETURN(void mrb_raise_method_missing(mrb_thread_t *th, int argc, mrb_value *argv, +// mrb_value obj, int call_status)); + +mrb_value mrb_vm_make_jump_tag_but_local_jump(int state, mrb_value val); +NODE *mrb_vm_cref(void); +//mrb_value rb_vm_call_cfunc(mrb_value recv, mrb_value (*func)(mrb_value), mrb_value arg, const mrb_block_t *blockptr, mrb_value filename, mrb_value filepath); +void mrb_vm_set_progname(mrb_value filename); +void mrb_thread_terminate_all(mrb_state *mrb); +//mrb_value mrb_vm_top_self(); +mrb_value mrb_vm_cbase(void); +//int mrb_vm_get_sourceline(const mrb_control_frame_t *); +void mrb_trap_restore_mask(void); + +#endif /* RUBY_EVAL_INTERN_H */ diff --git a/src/ext/.gitkeep b/src/ext/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/gc.c b/src/gc.c new file mode 100644 index 0000000000..e4b1f82bad --- /dev/null +++ b/src/gc.c @@ -0,0 +1,1146 @@ +#include "mruby.h" +#include "mruby/object.h" +#include "mruby/class.h" +#include "mruby/array.h" +#include "mruby/string.h" +#include "mruby/hash.h" +#include "mruby/range.h" +#include "ritehash.h" +#include +#include +#include "mruby/struct.h" +#include "mruby/proc.h" +#include "mdata.h" +#include "mruby/numeric.h" + +/* + = Tri-color Incremental Garbage Collection + + RiteVM's GC is Tri-color Incremental GC with Mark & Sweep. + Algorithm details are omitted. + Instead, the part about the implementation described below. + + == Object's Color + + Each object to be painted in three colors. + + * White - Unmarked. + * Gray - Marked, But the child objects are unmarked. + * Black - Marked, the child objects are also marked. + + == Two white part + + The white has a different part of A and B. + In sweep phase, the sweep target white is either A or B. + The sweep target white is switched just before sweep phase. + e.g. A -> B -> A -> B ... + + All objects are painted white when allocated. + This white is another the sweep target white. + For example, if the sweep target white is A, it's B. + So objects when allocated in sweep phase will be next sweep phase target. + Therefore, these objects will not be released accidentally in sweep phase. + + == Execution Timing + + GC Execution Time and Each step interval are decided by live objects count. + List of Adjustment API: + + * gc_interval_ratio_set + * gc_step_ratio_set + + For details, see the comments for each function. + + = Write Barrier + + RiteVM implementer, C extension library writer must write a write + barrier when writing a pointer to an object on object's field. + Two different write barrier: + + * mrb_field_write_barrier + * mrb_write_barrier + + For details, see the comments for each function. + +*/ + +#ifdef INCLUDE_REGEXP +#include "re.h" +#endif + +#include "gc.h" + +#ifdef GC_PROFILE +#include + +static double program_invoke_time = 0; +static double gc_time = 0; +static double gc_total_time = 0; + +static double +gettimeofday_time(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + tv.tv_usec * 1e-6; +} + +#define GC_INVOKE_TIME_REPORT do {\ + fprintf(stderr, "gc_invoke: %19.3f\n", gettimeofday_time() - program_invoke_time);\ +} while(0) + +#define GC_TIME_START do {\ + gc_time = gettimeofday_time();\ +} while(0) + +#define GC_TIME_STOP_AND_REPORT do {\ + gc_time = gettimeofday_time() - gc_time;\ + gc_total_time += gc_time;\ + fprintf(stderr, "gc_state: %d\n", mrb->gc_state);\ + fprintf(stderr, "gc_time: %30.20f\n", gc_time);\ + fprintf(stderr, "gc_total_time: %30.20f\n\n", gc_total_time);\ +} while(0) +#else +#define GC_INVOKE_TIME_REPORT +#define GC_TIME_START +#define GC_TIME_STOP_AND_REPORT +#endif + +#ifdef GC_DEBUG +#include +#define gc_assert(expect) assert(expect) +#else +#define gc_assert(expect) ((void)0) +#endif + +#define GC_STEP_SIZE 1024 + + +void* +mrb_realloc(mrb_state *mrb, void *p, size_t len) +{ + return (mrb->allocf)(mrb, p, len); +} + +void* +mrb_malloc(mrb_state *mrb, size_t len) +{ + return (mrb->allocf)(mrb, 0, len); +} + +void* +mrb_calloc(mrb_state *mrb, size_t nelem, size_t len) +{ + void *p = (mrb->allocf)(mrb, 0, nelem*len); + + memset(p, 0, nelem*len); + return p; +} + +void* +mrb_free(mrb_state *mrb, void *p) +{ + return (mrb->allocf)(mrb, p, 0); +} + +#define HEAP_PAGE_SIZE 1024 + +struct heap_page { + struct RBasic *freelist; + struct heap_page *prev; + struct heap_page *next; + struct heap_page *free_next; + struct heap_page *free_prev; + RVALUE objects[HEAP_PAGE_SIZE]; +}; + +static void +link_heap_page(mrb_state *mrb, struct heap_page *page) +{ + page->next = mrb->heaps; + if (mrb->heaps) + mrb->heaps->prev = page; + mrb->heaps = page; +} + +static void +unlink_heap_page(mrb_state *mrb, struct heap_page *page) +{ + if (page->prev) + page->prev->next = page->next; + if (page->next) + page->next->prev = page->prev; + if (mrb->heaps == page) + mrb->heaps = page->next; + page->prev = NULL; + page->next = NULL; +} + +static void +link_free_heap_page(mrb_state *mrb, struct heap_page *page) +{ + page->free_next = mrb->free_heaps; + if (mrb->free_heaps) { + mrb->free_heaps->free_prev = page; + } + mrb->free_heaps = page; +} + +static void +unlink_free_heap_page(mrb_state *mrb, struct heap_page *page) +{ + if (page->free_prev) + page->free_prev->free_next = page->free_next; + if (page->free_next) + page->free_next->free_prev = page->free_prev; + if (mrb->free_heaps == page) + mrb->free_heaps = page->free_next; + page->free_prev = NULL; + page->free_next = NULL; +} + +static void +add_heap(mrb_state *mrb) +{ + struct heap_page *page = mrb_malloc(mrb, sizeof(struct heap_page)); + RVALUE *p, *e; + struct RBasic *prev = NULL; + + memset(page, 0, sizeof(struct heap_page)); + + for (p = page->objects, e=p+HEAP_PAGE_SIZE; pas.free.tt = MRB_TT_FREE; + p->as.free.next = prev; + prev = &p->as.basic; + } + page->freelist = prev; + + link_heap_page(mrb, page); + link_free_heap_page(mrb, page); +} + +#define DEFAULT_GC_INTERVAL_RATIO 200 +#define DEFAULT_GC_STEP_RATIO 200 + +void +mrb_init_heap(mrb_state *mrb) +{ + mrb->heaps = 0; + mrb->free_heaps = 0; + add_heap(mrb); + mrb->gc_interval_ratio = DEFAULT_GC_INTERVAL_RATIO; + mrb->gc_step_ratio = DEFAULT_GC_STEP_RATIO; + +#ifdef GC_PROFILE + program_invoke_time = gettimeofday_time(); +#endif +} + +void* +mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls) +{ + struct RBasic *p; + + if (mrb->gc_threshold < mrb->live) { + mrb_incremental_gc(mrb); + } + if (mrb->free_heaps == NULL) { + add_heap(mrb); + } + + p = mrb->free_heaps->freelist; + mrb->free_heaps->freelist = ((struct free_obj*)p)->next; + if (mrb->free_heaps->freelist == NULL) { + unlink_free_heap_page(mrb, mrb->free_heaps); + } + + mrb->live++; + mrb->arena[mrb->arena_idx++] = p; + memset(p, 0, sizeof(RVALUE)); + if (mrb->arena_idx >= MRB_ARENA_SIZE) { + /* arena overflow error */ + mrb_raise(mrb, E_TYPE_ERROR, "arena overflow error"); + } + p->tt = ttype; + p->c = cls; + paint_partial_white(mrb, p); + return (void*)p; +} + +static inline void +add_gray_list(mrb_state *mrb, struct RBasic *obj) +{ + paint_gray(obj); + obj->gcnext = mrb->gray_list; + mrb->gray_list = obj; +} + +static void +gc_mark_children(mrb_state *mrb, struct RBasic *obj) +{ + gc_assert(is_gray(obj)); + paint_black(obj); + mrb->gray_list = obj->gcnext; + mrb_gc_mark(mrb, (struct RBasic*)obj->c); + switch (obj->tt) { + case MRB_TT_ICLASS: + mrb_gc_mark(mrb, (struct RBasic*)((struct RClass*)obj)->super); + break; + + case MRB_TT_CLASS: + case MRB_TT_SCLASS: + case MRB_TT_MODULE: + { + struct RClass *c = (struct RClass*)obj; + + mrb_gc_mark_iv(mrb, (struct RObject*)obj); + mrb_gc_mark_mt(mrb, c); + mrb_gc_mark(mrb, (struct RBasic*)c->super); + } + break; + + case MRB_TT_OBJECT: + mrb_gc_mark_iv(mrb, (struct RObject*)obj); + break; + + case MRB_TT_PROC: + { + struct RProc *p = (struct RProc*)obj; + + mrb_gc_mark(mrb, (struct RBasic*)p->env); + mrb_gc_mark(mrb, (struct RBasic*)p->target_class); + } + break; + + case MRB_TT_ENV: + { + struct REnv *e = (struct REnv *)obj; + + if (e->cioff < 0) { + int i, len; + + len = (int)e->flags; + for (i=0; istack[i]); + } + } + } + break; + + case MRB_TT_ARRAY: + { + struct RArray *a = (struct RArray*)obj; + size_t i, e; + + for (i=0,e=a->len; ibuf[i]); + } + } + break; + + case MRB_TT_HASH: + mrb_gc_mark_ht(mrb, (struct RClass*)obj); + break; + case MRB_TT_STRING: + { + struct RString *s = (struct RString*)obj; + + if (s->flags & MRB_STR_SHARED) { + mrb_gc_mark_value(mrb, s->aux.shared) + } + } + break; + case MRB_TT_RANGE: + { + struct RRange *r = (struct RRange*)obj; + + mrb_gc_mark_value(mrb, r->edges->beg); + mrb_gc_mark_value(mrb, r->edges->end); + } + break; + case MRB_TT_REGEX: + case MRB_TT_STRUCT: + case MRB_TT_EXCEPTION: + break; + } +} + +void +mrb_gc_mark(mrb_state *mrb, struct RBasic *obj) +{ + if (obj == 0) return; + if (!is_white(obj)) return; + gc_assert(!is_dead(mrb, obj)); + add_gray_list(mrb, obj); +} + +static void +obj_free(mrb_state *mrb, struct RBasic *obj) +{ + DEBUG(printf("obj_free(%p,tt=%d)\n",obj,obj->tt)); + switch (obj->tt) { + /* immediate - no mark */ + case MRB_TT_TRUE: + case MRB_TT_FIXNUM: + case MRB_TT_SYMBOL: + case MRB_TT_FLOAT: + /* cannot happen */ + return; + + case MRB_TT_OBJECT: + mrb_gc_free_iv(mrb, (struct RObject*)obj); + break; + case MRB_TT_CLASS: + case MRB_TT_MODULE: + case MRB_TT_SCLASS: + mrb_gc_free_mt(mrb, (struct RClass*)obj); + mrb_gc_free_iv(mrb, (struct RObject*)obj); + break; + case MRB_TT_ENV: + { + struct REnv *e = (struct REnv *)obj; + + if (e->cioff < 0) { + mrb_free(mrb, mrb->stack); + mrb->stack = 0; + } + } + break; + case MRB_TT_PROC: + case MRB_TT_ICLASS: + break; + case MRB_TT_ARRAY: + mrb_free(mrb, ((struct RArray*)obj)->buf); + break; + case MRB_TT_HASH: + mrb_gc_free_ht(mrb, (struct RClass*)obj); + break; + case MRB_TT_STRING: + if (!(obj->flags & MRB_STR_SHARED)) + mrb_free(mrb, ((struct RString*)obj)->buf); + break; + case MRB_TT_RANGE: + mrb_free(mrb, ((struct RRange*)obj)->edges); + break; + case MRB_TT_REGEX: + case MRB_TT_STRUCT: + case MRB_TT_EXCEPTION: + break; + } + obj->tt = MRB_TT_FREE; +} + +static void +root_scan_phase(mrb_state *mrb) +{ + int i, j, e; + mrb_callinfo *ci; + + mrb->gray_list = 0; + mrb->variable_gray_list = 0; + + mrb_gc_mark_gv(mrb); + /* mark arena */ + for (i=0,e=mrb->arena_idx; iarena[i]); + } + mrb_gc_mark(mrb, (struct RBasic*)mrb->object_class); + /* mark stack */ + e = mrb->stack - mrb->stbase; + if (mrb->ci) e += mrb->ci->nregs; + for (i=0; istbase[i]); + } + /* mark ensure stack */ + e = (mrb->ci) ? mrb->ci->eidx : 0; + for (i=0; iensure[i]); + } + /* mark closure */ + for (ci = mrb->cibase; ci <= mrb->ci; ci++) { + if (!ci) continue; + mrb_gc_mark( mrb, (struct RBasic*)ci->env); + } + /* mark irep pool */ + for (i=0; iirep_len; i++) { + mrb_irep *irep = mrb->irep[i]; + if (!irep) continue; + for (j=0; jplen; j++) { + mrb_gc_mark_value(mrb, irep->pool[j]); + } + } +} + +static size_t +gc_gray_mark(mrb_state *mrb, struct RBasic *obj) +{ + size_t children = 0; + + gc_mark_children(mrb, obj); + + switch (obj->tt) { + case MRB_TT_ICLASS: + children++; + break; + + case MRB_TT_CLASS: + case MRB_TT_SCLASS: + case MRB_TT_MODULE: + { + struct RClass *c = (struct RClass*)obj; + + children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj); + children += mrb_gc_mark_mt_size(mrb, c); + children++; + } + break; + + case MRB_TT_OBJECT: + children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj); + break; + + case MRB_TT_ENV: + children += (int)obj->flags; + break; + + case MRB_TT_ARRAY: + { + struct RArray *a = (struct RArray*)obj; + children += a->len; + } + break; + + case MRB_TT_HASH: + children += mrb_gc_mark_ht_size(mrb, (struct RClass*)obj); + break; + + case MRB_TT_STRING: + break; + case MRB_TT_PROC: + case MRB_TT_RANGE: + children+=2; + break; + + case MRB_TT_REGEX: + case MRB_TT_STRUCT: + case MRB_TT_EXCEPTION: + break; + } + return children; +} + +static size_t +incremental_marking_phase(mrb_state *mrb, size_t limit) +{ + size_t tried_marks = 0; + + while (mrb->gray_list && tried_marks < limit) { + tried_marks += gc_gray_mark(mrb, mrb->gray_list); + } + + return tried_marks; +} + +static void +final_marking_phase(mrb_state *mrb) +{ + while (mrb->gray_list) { + gc_mark_children(mrb, mrb->gray_list); + } + gc_assert(mrb->gray_list == NULL); + mrb->gray_list = mrb->variable_gray_list; + mrb->variable_gray_list = 0; + while (mrb->gray_list) { + gc_mark_children(mrb, mrb->gray_list); + } + gc_assert(mrb->gray_list == NULL); +} + +static void +prepare_incremental_sweep(mrb_state *mrb) +{ + mrb->gc_state = GC_STATE_SWEEP; + mrb->sweeps = mrb->heaps; + mrb->gc_live_after_mark = mrb->live; +} + +static size_t +incremental_sweep_phase(mrb_state *mrb, size_t limit) +{ + struct heap_page *page = mrb->sweeps; + size_t tried_sweep = 0; + + while (page && (tried_sweep < limit)) { + RVALUE *p = page->objects; + RVALUE *e = p + HEAP_PAGE_SIZE; + size_t freed = 0; + int dead_slot = 1; + int full = (page->freelist == NULL); + + while (pas.basic)) { + if (p->as.basic.tt != MRB_TT_FREE) { + obj_free(mrb, &p->as.basic); + p->as.free.next = page->freelist; + page->freelist = (struct RBasic*)p; + freed++; + } + } + else { + paint_partial_white(mrb, &p->as.basic); /* next gc target */ + dead_slot = 0; + } + p++; + } + + /* free dead slot */ + if (dead_slot && freed < HEAP_PAGE_SIZE) { + struct heap_page *next = page->next; + + unlink_heap_page(mrb, page); + unlink_free_heap_page(mrb, page); + mrb_free(mrb, page); + page = next; + } + else { + if (full && freed > 0) { + link_free_heap_page(mrb, page); + } + page = page->next; + } + tried_sweep += HEAP_PAGE_SIZE; + mrb->live -= freed; + mrb->gc_live_after_mark -= freed; + } + mrb->sweeps = page; + return tried_sweep; +} + +static size_t +incremental_gc(mrb_state *mrb, size_t limit) +{ + switch (mrb->gc_state) { + case GC_STATE_NONE: + root_scan_phase(mrb); + mrb->gc_state = GC_STATE_MARK; + flip_white_part(mrb); + return 0; + case GC_STATE_MARK: + if (mrb->gray_list) { + return incremental_marking_phase(mrb, limit); + } + else { + final_marking_phase(mrb); + prepare_incremental_sweep(mrb); + return 0; + } + case GC_STATE_SWEEP: { + size_t tried_sweep = 0; + tried_sweep = incremental_sweep_phase(mrb, limit); + if (tried_sweep == 0) + mrb->gc_state = GC_STATE_NONE; + return tried_sweep; + } + default: + /* unknown state */ + gc_assert(0); + return 0; + } +} + +void +mrb_incremental_gc(mrb_state *mrb) +{ + size_t limit = 0, result = 0; + + GC_INVOKE_TIME_REPORT; + GC_TIME_START; + + limit = (GC_STEP_SIZE/100) * mrb->gc_step_ratio; + while (result < limit) { + result += incremental_gc(mrb, limit); + if (mrb->gc_state == GC_STATE_NONE) + break; + } + + if (mrb->gc_state == GC_STATE_NONE) { + gc_assert(mrb->live >= mrb->gc_live_after_mark); + mrb->gc_threshold = (mrb->gc_live_after_mark/100) * mrb->gc_interval_ratio; + if (mrb->gc_threshold < GC_STEP_SIZE) { + mrb->gc_threshold = GC_STEP_SIZE; + } + } + else { + mrb->gc_threshold = mrb->live + GC_STEP_SIZE; + } + + + GC_TIME_STOP_AND_REPORT; +} + +void +mrb_garbage_collect(mrb_state *mrb) +{ + size_t max_limit = ~0; + + GC_INVOKE_TIME_REPORT; + GC_TIME_START; + + if (mrb->gc_state == GC_STATE_SWEEP) { + /* finish sweep phase */ + while (mrb->gc_state != GC_STATE_NONE) { + incremental_gc(mrb, max_limit); + } + } + + do { + incremental_gc(mrb, max_limit); + } while (mrb->gc_state != GC_STATE_NONE); + + mrb->gc_threshold = (mrb->gc_live_after_mark/100) * mrb->gc_interval_ratio; + + GC_TIME_STOP_AND_REPORT; +} + +int +mrb_gc_arena_save(mrb_state *mrb) +{ + return mrb->arena_idx; +} + +void +mrb_gc_arena_restore(mrb_state *mrb, int idx) +{ + mrb->arena_idx = idx; +} + +/* + * Field write barrier + * Paint obj(Black) -> value(White) to obj(Black) -> value(Black). + */ + +void +mrb_field_write_barrier(mrb_state *mrb, struct RBasic *obj, struct RBasic *value) +{ + if (!is_black(obj)) return; + if (!is_white(value)) return; + + gc_assert(!is_dead(mrb, value) && !is_dead(mrb, obj)); + gc_assert(mrb->gc_state != GC_STATE_NONE); + + if (mrb->gc_state == GC_STATE_MARK) { + add_gray_list(mrb, value); + } + else { + gc_assert(mrb->gc_state == GC_STATE_SWEEP); + paint_partial_white(mrb, obj); /* for never write barriers */ + } +} + +/* + * Write barrier + * Paint obj(Black) to obj(Gray). + * + * The object that is painted gray will be traversed atomically in final + * mark phase. So you use this write barrier if it's frequency written spot. + * e.g. Set element on Array. + */ + +void +mrb_write_barrier(mrb_state *mrb, struct RBasic *obj) +{ + if (!is_black(obj)) return; + + gc_assert(!is_dead(mrb, obj)); + gc_assert(mrb->gc_state != GC_STATE_NONE); + paint_gray(obj); + obj->gcnext = mrb->variable_gray_list; + mrb->variable_gray_list = obj; +} + +/* + * call-seq: + * GC.start -> nil + * + * Initiates full garbage collection. + * + */ + +static mrb_value +gc_start(mrb_state *mrb, mrb_value obj) +{ + mrb_garbage_collect(mrb); + return mrb_nil_value(); +} + +/* + * call-seq: + * GC.interval_ratio -> fixnum + * + * Returns ratio of GC interval. Default value is 200(%). + * + */ + +static mrb_value +gc_interval_ratio_get(mrb_state *mrb, mrb_value obj) +{ + return mrb_fixnum_value(mrb->gc_interval_ratio); +} + +/* + * call-seq: + * GC.interval_ratio = fixnum -> nil + * + * Updates ratio of GC interval. Default value is 200(%). + * GC start as soon as after end all step of GC if you set 100(%). + * + */ + +static mrb_value +gc_interval_ratio_set(mrb_state *mrb, mrb_value obj) +{ + mrb_value ratio; + mrb_get_args(mrb, "o", &ratio); + mrb->gc_interval_ratio = mrb_fixnum(mrb_to_int(mrb, ratio)); + return mrb_nil_value(); +} + +/* + * call-seq: + * GC.step_ratio -> fixnum + * + * Returns step span ratio of Incremental GC. Default value is 200(%). + * + */ + +static mrb_value +gc_step_ratio_get(mrb_state *mrb, mrb_value obj) +{ + return mrb_fixnum_value(mrb->gc_step_ratio); +} + +/* + * call-seq: + * GC.step_ratio = fixnum -> nil + * + * Updates step span ratio of Incremental GC. Default value is 200(%). + * 1 step of incrementalGC becomes long if a rate is big. + * + */ + +static mrb_value +gc_step_ratio_set(mrb_state *mrb, mrb_value obj) +{ + mrb_value ratio; + mrb_get_args(mrb, "o", &ratio); + mrb->gc_step_ratio = mrb_fixnum(mrb_to_int(mrb, ratio)); + return mrb_nil_value(); +} + +void +mrb_init_gc(mrb_state *mrb) +{ + struct RClass *gc; + gc = mrb_define_module(mrb, "GC"); + + mrb_define_class_method(mrb, gc, "start", gc_start, ARGS_NONE()); + mrb_define_class_method(mrb, gc, "interval_ratio", gc_interval_ratio_get, ARGS_NONE()); + mrb_define_class_method(mrb, gc, "interval_ratio=", gc_interval_ratio_set, ARGS_REQ(1)); + mrb_define_class_method(mrb, gc, "step_ratio", gc_step_ratio_get, ARGS_NONE()); + mrb_define_class_method(mrb, gc, "step_ratio=", gc_step_ratio_set, ARGS_REQ(1)); +} + +#ifdef GC_TEST +#ifdef GC_DEBUG +void +test_mrb_field_write_barrier(void) +{ + mrb_state *mrb = mrb_open(); + struct RBasic *obj, *value; + + puts("test_mrb_field_write_barrier"); + obj = RBASIC(mrb_ary_new(mrb)); + value = RBASIC(mrb_str_new_cstr(mrb, "value")); + paint_black(obj); + paint_partial_white(mrb,value); + + + puts(" in GC_STATE_MARK"); + mrb->gc_state = GC_STATE_MARK; + mrb_field_write_barrier(mrb, obj, value); + + gc_assert(is_gray(value)); + + + puts(" in GC_STATE_SWEEP"); + paint_partial_white(mrb,value); + mrb->gc_state = GC_STATE_SWEEP; + mrb_field_write_barrier(mrb, obj, value); + + gc_assert(obj->color & mrb->current_white_part); + gc_assert(obj->color & mrb->current_white_part); + + + puts(" fail with black"); + mrb->gc_state = GC_STATE_MARK; + paint_white(obj); + paint_partial_white(mrb,value); + mrb_field_write_barrier(mrb, obj, value); + + gc_assert(obj->color & mrb->current_white_part); + + + puts(" fail with gray"); + mrb->gc_state = GC_STATE_MARK; + paint_black(obj); + paint_gray(value); + mrb_field_write_barrier(mrb, obj, value); + + gc_assert(is_gray(value)); + + + { + puts("test_mrb_field_write_barrier_value"); + obj = RBASIC(mrb_ary_new(mrb)); + mrb_value value = mrb_str_new_cstr(mrb, "value"); + paint_black(obj); + paint_partial_white(mrb, RBASIC(value)); + + mrb->gc_state = GC_STATE_MARK; + mrb_field_write_barrier_value(mrb, obj, value); + + gc_assert(is_gray(RBASIC(value))); + } + + mrb_close(mrb); +} + +void +test_mrb_write_barrier(void) +{ + mrb_state *mrb = mrb_open(); + struct RBasic *obj; + + puts("test_mrb_write_barrier"); + obj = RBASIC(mrb_ary_new(mrb)); + paint_black(obj); + + puts(" in GC_STATE_MARK"); + mrb->gc_state = GC_STATE_MARK; + mrb_write_barrier(mrb, obj); + + gc_assert(is_gray(obj)); + gc_assert(mrb->variable_gray_list == obj); + + + puts(" fail with gray"); + paint_gray(obj); + mrb_write_barrier(mrb, obj); + + gc_assert(is_gray(obj)); + + mrb_close(mrb); +} + +void +test_add_gray_list(void) +{ + mrb_state *mrb = mrb_open(); + struct RBasic *obj1, *obj2; + + puts("test_add_gray_list"); + gc_assert(mrb->gray_list == NULL); + obj1 = RBASIC(mrb_str_new_cstr(mrb, "test")); + add_gray_list(mrb, obj1); + gc_assert(mrb->gray_list == obj1); + gc_assert(is_gray(obj1)); + + obj2 = RBASIC(mrb_str_new_cstr(mrb, "test")); + add_gray_list(mrb, obj2); + gc_assert(mrb->gray_list == obj2); + gc_assert(mrb->gray_list->gcnext == obj1); + gc_assert(is_gray(obj2)); + + mrb_close(mrb); +} + +void +test_gc_gray_mark(void) +{ + mrb_state *mrb = mrb_open(); + mrb_value obj_v, value_v; + struct RBasic *obj; + size_t gray_num = 0; + + puts("test_gc_gray_mark"); + + puts(" in MRB_TT_CLASS"); + obj = (struct RBasic *)mrb->object_class; + paint_gray(obj); + gray_num = gc_gray_mark(mrb, obj); + gc_assert(is_black(obj)); + gc_assert(gray_num > 1); + + puts(" in MRB_TT_ARRAY"); + obj_v = mrb_ary_new(mrb); + value_v = mrb_str_new_cstr(mrb, "test"); + paint_gray(RBASIC(obj_v)); + paint_partial_white(mrb, RBASIC(value_v)); + mrb_ary_push(mrb, obj_v, value_v); + gray_num = gc_gray_mark(mrb, RBASIC(obj_v)); + gc_assert(is_black(RBASIC(obj_v))); + gc_assert(is_gray(RBASIC(value_v))); + gc_assert(gray_num == 1); + + mrb_close(mrb); +} + +void +test_incremental_gc(void) +{ + mrb_state *mrb = mrb_open(); + size_t max = ~0, live = 0, total = 0, freed = 0; + RVALUE *free; + struct heap_page *page; + + puts("test_incremental_gc"); + + mrb_garbage_collect(mrb); + + gc_assert(mrb->gc_state == GC_STATE_NONE); + incremental_gc(mrb, max); + gc_assert(mrb->gc_state == GC_STATE_MARK); + + incremental_gc(mrb, max); + gc_assert(mrb->gc_state == GC_STATE_MARK); + + incremental_gc(mrb, max); + gc_assert(mrb->gc_state == GC_STATE_SWEEP); + + page = mrb->heaps; + while (page) { + RVALUE *p = page->objects; + RVALUE *e = p + HEAP_PAGE_SIZE; + while (pas.basic)) { + live++; + } + if (is_gray(&p->as.basic) && !is_dead(mrb, &p->as.basic)) { + printf("%p\n", &p->as.basic); + } + p++; + } + page = page->next; + total += HEAP_PAGE_SIZE; + } + + gc_assert(mrb->gray_list == NULL); + + incremental_gc(mrb, max); + gc_assert(mrb->gc_state == GC_STATE_SWEEP); + + incremental_gc(mrb, max); + gc_assert(mrb->gc_state == GC_STATE_NONE); + + free = (RVALUE *)mrb->heaps->freelist; + while (free) { + freed++; + free = (RVALUE *)free->as.free.next; + } + + gc_assert(mrb->live == live); + gc_assert(mrb->live == total-freed); + + mrb_close(mrb); +} + +void +test_incremental_sweep_phase(void) +{ + mrb_state *mrb = mrb_open(); + + puts("test_incremental_sweep_phase"); + + add_heap(mrb); + mrb->sweeps = mrb->heaps; + + gc_assert(mrb->heaps->next->next == NULL); + gc_assert(mrb->free_heaps->next->next == NULL); + incremental_sweep_phase(mrb, HEAP_PAGE_SIZE*3); + + gc_assert(mrb->heaps->next == NULL); + gc_assert(mrb->heaps == mrb->free_heaps); + + mrb_close(mrb); +} + +void +test_gc_api(void) +{ + mrb_state *mrb = mrb_open(); + mrb_value res; + + mrb_value argv[1]; + + puts("test_gc_api"); + + gc_start(mrb, mrb_nil_value()); + + res = gc_interval_ratio_get(mrb, mrb_nil_value()); + gc_assert(mrb_fixnum(res) == 200); + + argv[0] = mrb_fixnum_value(300); + mrb->argv = &argv; + mrb->argc = 1; + + gc_interval_ratio_set(mrb, mrb_nil_value()); + res = gc_interval_ratio_get(mrb, mrb_nil_value()); + gc_assert(mrb_fixnum(res) == 300); + + res = gc_step_ratio_get(mrb, mrb_nil_value()); + gc_assert(mrb_fixnum(res) == 200); + + gc_step_ratio_set(mrb, mrb_nil_value()); + res = gc_step_ratio_get(mrb, mrb_nil_value()); + gc_assert(mrb_fixnum(res) == 300); + + mrb_close(mrb); +} + +static void +test_many_object_benchmark(void) +{ + mrb_state *mrb = mrb_open(); + size_t i = 0, j=0; + mrb_value ary = mrb_ary_new(mrb); + int save_point = mrb_gc_arena_save(mrb); + + puts("test_many_object_benchmark"); + + for (i=0; i<1000; i++) { + mrb_value cary = mrb_ary_new(mrb); + mrb_ary_push(mrb, ary, cary); + for (j=0; j<1000; j++) { + mrb_ary_push(mrb, cary, mrb_str_new_cstr(mrb, "t")); + } + mrb_gc_arena_restore(mrb, save_point); + } + + mrb_close(mrb); +} + +int +main(void) +{ + test_mrb_field_write_barrier(); + test_mrb_write_barrier(); + test_add_gray_list(); + test_gc_gray_mark(); + test_incremental_gc(); + test_incremental_sweep_phase(); + test_gc_api(); + test_many_object_benchmark(); + return 0; +} +#endif +#endif diff --git a/src/gc.h b/src/gc.h new file mode 100644 index 0000000000..1395805339 --- /dev/null +++ b/src/gc.h @@ -0,0 +1,26 @@ +#ifndef MRUBY_GC_H +#define MRUBY_GC_H + +typedef struct { + union { + struct free_obj { + MRUBY_OBJECT_HEADER; + struct RBasic *next; + } free; + struct RBasic basic; + struct RObject object; + struct RClass klass; + struct RString string; + struct RArray array; + struct RHash hash; + struct RRange range; + struct RStruct structdata; + struct RProc procdata; +#ifdef INCLUDE_REGEXP + struct RMatch match; + struct RRegexp regexp; +#endif + } as; +} RVALUE; + +#endif /* MRUBY_GC_H */ diff --git a/src/hash.c b/src/hash.c new file mode 100644 index 0000000000..3f336f425d --- /dev/null +++ b/src/hash.c @@ -0,0 +1,1436 @@ +/********************************************************************** + + hash.c - + + $Author: yugui $ + created at: Mon Nov 22 18:51:18 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "mruby.h" +#include "mruby/hash.h" +#include "ritehash.h" +#include "mruby/class.h" +#include "mruby/array.h" +#include "error.h" +#include "mruby/string.h" +#include "mruby/numeric.h" +#include "mruby/struct.h" +#include "st.h" +#include +#include + + +#ifdef __APPLE__ +#include +#endif + +#include + +static khint_t +mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) +{ + char type = mrb_type(key); + mrb_value s1 = mrb_str_new(mrb, &type, 1); + mrb_value s2 = mrb_inspect(mrb, key); + s1 = mrb_str_cat(mrb, s1, RSTRING_PTR(s2), RSTRING_LEN(s2)); + return kh_str_hash_func(mrb, RSTRING_PTR(s1)); +} + +static khint_t +mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) +{ + return mrb_equal(mrb, a, b); +} +KHASH_INIT(ht, mrb_value, mrb_value, 1, mrb_hash_ht_hash_func, mrb_hash_ht_hash_equal); + +mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int), + mrb_value obj, mrb_value paired_obj, void* arg); + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +static void mrb_hash_modify(mrb_state *mrb, mrb_value hash); + +static inline mrb_value +mrb_hash_ht_key(mrb_state *mrb, mrb_value key) +{ + if (mrb_type(key) == MRB_TT_STRING) + return mrb_str_dup(mrb, key); + else + return key; +} + +#define KEY(key) mrb_hash_ht_key(mrb, key) + +void +mrb_gc_mark_ht(mrb_state *mrb, struct RClass *c) +{ + khiter_t k; + khash_t(ht) *h = ((struct RHash*)c)->ht; + + mrb_gc_mark_value(mrb, ((struct RHash*)c)->ifnone); + if (!h) return; + for (k = kh_begin(h); k != kh_end(h); k++) + if (kh_exist(h, k)) { + mrb_gc_mark_value(mrb, kh_key(h, k)); + mrb_gc_mark_value(mrb, kh_value(h, k)); + } +} + +size_t +mrb_gc_mark_ht_size(mrb_state *mrb, struct RClass *c) +{ + size_t ht_size = 0; + khash_t(ht) *h = ((struct RHash*)c)->ht; + + /* ((struct RHash*)c)->ifnone */ + ht_size++; + + /* ((struct RHash*)c)->ht */ + if (h) ht_size += kh_size(h)*2; + + return ht_size; +} + +void +mrb_gc_free_ht(mrb_state *mrb, struct RClass *c) +{ + khash_t(ht) *h = ((struct RHash*)c)->ht; + + kh_destroy(ht, h); +} + + +mrb_value +mrb_hash_new_capa(mrb_state *mrb, size_t capa) +{ + struct RHash *h; + + h = mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); + h->ht = kh_init(ht, mrb); + kh_resize(ht, h->ht, capa); + h->ifnone = mrb_nil_value(); + return mrb_obj_value(h); +} + +mrb_value +mrb_hash_new(mrb_state *mrb, int capa) +{ + return mrb_hash_new_capa(mrb, capa); +} + +mrb_value +mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) /* mrb_hash_aref */ /* mrb_hash_lookup */ +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + + if (h) { + k = kh_get(ht, h, key); + if (k != kh_end(h)) + return kh_value(h, k); + } + + /* not found */ + if (MRB_RHASH_PROCDEFAULT_P(hash)) { + return mrb_funcall(mrb, RHASH_PROCDEFAULT(hash), "call", 2, hash, key); + } + else { + return RHASH_IFNONE(hash); + } +} + +mrb_value +mrb_hash_getWithDef(mrb_state *mrb, mrb_value hash, mrb_value vkey, mrb_value def) /* mrb_hash_lookup2 */ +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + + if (h) { + k = kh_get(ht, h, vkey); + if (k != kh_end(h)) + return kh_value(h, k); + } + + /* not found */ + return def; +} + +void +mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) /* mrb_hash_aset */ +{ + khash_t(ht) *h; + khiter_t k; + int r; + + mrb_hash_modify(mrb, hash); + h = RHASH_H_TBL(hash); + + k = kh_get(ht, h, key); + if (k == kh_end(h)) { + /* expand */ + k = kh_put(ht, h, KEY(key), &r); + } + + kh_value(h, k) = val; + mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash)); + return; +} + +mrb_value +mrb_hash_freeze(mrb_value hash) +{ + //return mrb_obj_freeze(hash); + return (hash); +} + +mrb_value +mrb_hash(mrb_state *mrb, mrb_value obj) +{ + mrb_value hval = mrb_funcall(mrb, obj, "Hash", 0); +retry: + switch (mrb_type(hval)) { + case MRB_TT_FIXNUM: + return hval; + + default: + hval = mrb_to_int(mrb, hval); + goto retry; + } +} + +static mrb_value +hash_s_new(mrb_state *mrb, mrb_value klass) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value hash = mrb_hash_new_capa(mrb, 0); + mrb_obj_call_init(mrb, hash, argc, argv); + return hash; +} + +mrb_value +mrb_hash_dup(mrb_state *mrb, mrb_value hash) +{ + struct RHash* ret; + khash_t(ht) *h, *ret_h; + khiter_t k, ret_k; + int r; + + ret = mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); + ret->ht = kh_init(ht, mrb); + + if (!RHASH_EMPTY_P(hash)) { + h = RHASH_H_TBL(hash); + ret_h = ret->ht; + + for (k = kh_begin(h); k != kh_end(h); k++) { + if (kh_exist(h,k)) { + ret_k = kh_put(ht, ret_h, KEY(kh_key(h,k)), &r); + kh_val(ret_h, ret_k) = kh_val(h,k); + } + } + } + + return mrb_obj_value(ret); +} + +static void +mrb_hash_modify_check(mrb_state *mrb, mrb_value hash) +{ + //if (OBJ_FROZEN(hash)) mrb_error_frozen("hash"); +} + +khash_t(ht) * +mrb_hash_tbl(mrb_state *mrb, mrb_value hash) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + + if (!RHASH_H_TBL(hash)) { + RHASH_H_TBL(hash) = kh_init(ht, mrb); + } + return h; +} + +static void +mrb_hash_modify(mrb_state *mrb, mrb_value hash) +{ + //mrb_hash_modify_check(mrb, hash); + mrb_hash_tbl(mrb, hash); +} + +/* 15.2.13.4.16 */ +/* + * call-seq: + * Hash.new -> new_hash + * Hash.new(obj) -> new_hash + * Hash.new {|hash, key| block } -> new_hash + * + * Returns a new, empty hash. If this hash is subsequently accessed by + * a key that doesn't correspond to a hash entry, the value returned + * depends on the style of new used to create the hash. In + * the first form, the access returns nil. If + * obj is specified, this single object will be used for + * all default values. If a block is specified, it will be + * called with the hash object and the key, and should return the + * default value. It is the block's responsibility to store the value + * in the hash if required. + * + * h = Hash.new("Go Fish") + * h["a"] = 100 + * h["b"] = 200 + * h["a"] #=> 100 + * h["c"] #=> "Go Fish" + * # The following alters the single default object + * h["c"].upcase! #=> "GO FISH" + * h["d"] #=> "GO FISH" + * h.keys #=> ["a", "b"] + * + * # While this creates a new default object each time + * h = Hash.new { |hash, key| hash[key] = "Go Fish: #{key}" } + * h["c"] #=> "Go Fish: c" + * h["c"].upcase! #=> "GO FISH: C" + * h["d"] #=> "Go Fish: d" + * h.keys #=> ["c", "d"] + * + */ + +static mrb_value +mrb_hash_init_core(mrb_state *mrb, mrb_value hash) +{ + mrb_value block; + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "o*", &block, &argv, &argc); + + mrb_hash_modify(mrb, hash); + + if (mrb_nil_p(block)) { + if (argc > 0) { + if (argc != 1) mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + RHASH_IFNONE(hash) = argv[0]; + } + else { + RHASH_IFNONE(hash) = mrb_nil_value(); + } + } + else { + if (argc > 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + } + RHASH(hash)->flags |= MRB_HASH_PROC_DEFAULT; + RHASH_PROCDEFAULT(hash) = block; + } + + return hash; +} + +/* + * call-seq: + * Hash[ key, value, ... ] -> new_hash + * Hash[ [ [key, value], ... ] ] -> new_hash + * Hash[ object ] -> new_hash + * + * Creates a new hash populated with the given objects. Equivalent to + * the literal { key => value, ... }. In the first + * form, keys and values occur in pairs, so there must be an even number of arguments. + * The second and third form take a single argument which is either + * an array of key-value pairs or an object convertible to a hash. + * + * Hash["a", 100, "b", 200] #=> {"a"=>100, "b"=>200} + * Hash[ [ ["a", 100], ["b", 200] ] ] #=> {"a"=>100, "b"=>200} + * Hash["a" => 100, "b" => 200] #=> {"a"=>100, "b"=>200} + */ + +static mrb_value +to_hash(mrb_state *mrb, mrb_value hash) +{ + return mrb_convert_type(mrb, hash, MRB_TT_HASH, "Hash", "to_hash"); +} + +/* + * call-seq: + * Hash.try_convert(obj) -> hash or nil + * + * Try to convert obj into a hash, using to_hash method. + * Returns converted hash or nil if obj cannot be converted + * for any reason. + * + * Hash.try_convert({1=>2}) # => {1=>2} + * Hash.try_convert("1=>2") # => nil + */ + +/* 15.2.13.4.2 */ +/* + * call-seq: + * hsh[key] -> value + * + * Element Reference---Retrieves the value object corresponding + * to the key object. If not found, returns the default value (see + * Hash::new for details). + * + * h = { "a" => 100, "b" => 200 } + * h["a"] #=> 100 + * h["c"] #=> nil + * + */ +mrb_value +mrb_hash_aget(mrb_state *mrb, mrb_value self) +{ + mrb_value key; + + mrb_get_args(mrb, "o", &key); + return mrb_hash_get(mrb, self, key); +} + +mrb_value +mrb_hash_lookup2(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) +{ + return mrb_hash_getWithDef(mrb, hash, key, def); +} + +mrb_value +mrb_hash_lookup(mrb_state *mrb, mrb_value hash, mrb_value key) +{ + return mrb_hash_lookup2(mrb, hash, key, mrb_nil_value()); +} + +/* + * call-seq: + * hsh.fetch(key [, default] ) -> obj + * hsh.fetch(key) {| key | block } -> obj + * + * Returns a value from the hash for the given key. If the key can't be + * found, there are several options: With no other arguments, it will + * raise an KeyError exception; if default is + * given, then that will be returned; if the optional code block is + * specified, then that will be run and its result returned. + * + * h = { "a" => 100, "b" => 200 } + * h.fetch("a") #=> 100 + * h.fetch("z", "go fish") #=> "go fish" + * h.fetch("z") { |el| "go fish, #{el}"} #=> "go fish, z" + * + * The following example shows that an exception is raised if the key + * is not found and a default value is not supplied. + * + * h = { "a" => 100, "b" => 200 } + * h.fetch("z") + * + * produces: + * + * prog.rb:2:in `fetch': key not found (KeyError) + * from prog.rb:2 + * + */ + +/* 15.2.13.4.5 */ +/* + * call-seq: + * hsh.default(key=nil) -> obj + * + * Returns the default value, the value that would be returned by + * hsh[key] if key did not exist in hsh. + * See also Hash::new and Hash#default=. + * + * h = Hash.new #=> {} + * h.default #=> nil + * h.default(2) #=> nil + * + * h = Hash.new("cat") #=> {} + * h.default #=> "cat" + * h.default(2) #=> "cat" + * + * h = Hash.new {|h,k| h[k] = k.to_i*10} #=> {} + * h.default #=> nil + * h.default(2) #=> 20 + */ + +static mrb_value +mrb_hash_default(mrb_state *mrb, mrb_value hash) +{ + mrb_value *argv; + int argc; + mrb_value key; + + mrb_get_args(mrb, "*", &argv, &argc); + + if (MRB_RHASH_PROCDEFAULT_P(hash)) { + if (argc == 0) return mrb_nil_value(); + key = argv[0]; + return mrb_funcall(mrb, RHASH_PROCDEFAULT(hash), "call", 2, hash, key); + } + else { + return RHASH_IFNONE(hash); + } +} + +/* 15.2.13.4.6 */ +/* + * call-seq: + * hsh.default = obj -> obj + * + * Sets the default value, the value returned for a key that does not + * exist in the hash. It is not possible to set the default to a + * Proc that will be executed on each key lookup. + * + * h = { "a" => 100, "b" => 200 } + * h.default = "Go fish" + * h["a"] #=> 100 + * h["z"] #=> "Go fish" + * # This doesn't do what you might hope... + * h.default = proc do |hash, key| + * hash[key] = key + key + * end + * h[2] #=> # + * h["cat"] #=> # + */ + +static mrb_value +mrb_hash_set_default(mrb_state *mrb, mrb_value hash) +{ + mrb_value ifnone; + mrb_get_args(mrb, "o", &ifnone); + + mrb_hash_modify(mrb, hash); + RHASH_IFNONE(hash) = ifnone; + RHASH(hash)->flags &= ~(MRB_HASH_PROC_DEFAULT); + + return ifnone; +} + +/* 15.2.13.4.7 */ +/* + * call-seq: + * hsh.default_proc -> anObject + * + * If Hash::new was invoked with a block, return that + * block, otherwise return nil. + * + * h = Hash.new {|h,k| h[k] = k*k } #=> {} + * p = h.default_proc #=> # + * a = [] #=> [] + * p.call(a, 2) + * a #=> [nil, nil, 4] + */ + + +static mrb_value +mrb_hash_default_proc(mrb_state *mrb, mrb_value hash) +{ + if (MRB_RHASH_PROCDEFAULT_P(hash)) { + return RHASH_PROCDEFAULT(hash); + } + return mrb_nil_value(); +} + +/* + * call-seq: + * hsh.default_proc = proc_obj -> proc_obj + * + * Sets the default proc to be executed on each key lookup. + * + * h.default_proc = proc do |hash, key| + * hash[key] = key + key + * end + * h[2] #=> 4 + * h["cat"] #=> "catcat" + */ + +mrb_value +mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + mrb_value delVal; + + if (h) { + k = kh_get(ht, h, key); + if (k != kh_end(h)) { + delVal = kh_value(h, k); + kh_del(ht, h, k); + return delVal; + } + } + + /* not found */ + return mrb_nil_value(); +} + +/* 15.2.13.4.8 */ +/* + * call-seq: + * hsh.delete(key) -> value + * hsh.delete(key) {| key | block } -> value + * + * Deletes and returns a key-value pair from hsh whose key is + * equal to key. If the key is not found, returns the + * default value. If the optional code block is given and the + * key is not found, pass in the key and return the result of + * block. + * + * h = { "a" => 100, "b" => 200 } + * h.delete("a") #=> 100 + * h.delete("z") #=> nil + * h.delete("z") { |el| "#{el} not found" } #=> "z not found" + * + */ +mrb_value +mrb_hash_delete(mrb_state *mrb, mrb_value self) +{ + mrb_value key; + + mrb_get_args(mrb, "o", &key); + return mrb_hash_delete_key(mrb, self, key); +} +struct shift_var { + mrb_value key; + mrb_value val; +}; + + +/* 15.2.13.4.24 */ +/* + * call-seq: + * hsh.shift -> anArray or obj + * + * Removes a key-value pair from hsh and returns it as the + * two-item array [ key, value ], or + * the hash's default value if the hash is empty. + * + * h = { 1 => "a", 2 => "b", 3 => "c" } + * h.shift #=> [1, "a"] + * h #=> {2=>"b", 3=>"c"} + */ + +static mrb_value +mrb_hash_shift(mrb_state *mrb, mrb_value hash) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + mrb_value delKey, delVal; + mrb_value result; + int r; + + mrb_hash_modify(mrb, hash); + if (h) { + if (kh_size(h) > 0) { + for (k = kh_begin(h); k != kh_end(h); k++) { + if (!kh_exist(h,k)) continue; + + delKey = kh_key(h,k); + delVal = mrb_hash_delete_key(mrb, hash, delKey); + + result = mrb_hash_new(mrb, 1); + k = kh_put(ht, RHASH_H_TBL(result), KEY(delKey), &r); + kh_value(RHASH_H_TBL(result), k) = delVal; + return result; + } + } + } + + if (MRB_RHASH_PROCDEFAULT_P(hash)) { + return mrb_funcall(mrb, RHASH_PROCDEFAULT(hash), "call", 2, hash, mrb_nil_value()); + } + else { + return RHASH_IFNONE(hash); + } +} + +/* + * call-seq: + * hsh.delete_if {| key, value | block } -> hsh + * hsh.delete_if -> an_enumerator + * + * Deletes every key-value pair from hsh for which block + * evaluates to true. + * + * If no block is given, an enumerator is returned instead. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.delete_if {|key, value| key >= "b" } #=> {"a"=>100} + * + */ + +/* + * call-seq: + * hsh.reject! {| key, value | block } -> hsh or nil + * hsh.reject! -> an_enumerator + * + * Equivalent to Hash#delete_if, but returns + * nil if no changes were made. + */ + +/* + * call-seq: + * hsh.reject {| key, value | block } -> a_hash + * + * Same as Hash#delete_if, but works on (and returns) a + * copy of the hsh. Equivalent to + * hsh.dup.delete_if. + * + */ + +/* + * call-seq: + * hsh.values_at(key, ...) -> array + * + * Return an array containing the values associated with the given keys. + * Also see Hash.select. + * + * h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } + * h.values_at("cow", "cat") #=> ["bovine", "feline"] + */ + +mrb_value +mrb_hash_values_at(mrb_state *mrb, int argc, mrb_value *argv, mrb_value hash) +{ + mrb_value result = mrb_ary_new_capa(mrb, argc);//mrb_ary_new2(argc); + long i; + + for (i=0; i a_hash + * hsh.select -> an_enumerator + * + * Returns a new hash consisting of entries for which the block returns true. + * + * If no block is given, an enumerator is returned instead. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.select {|k,v| k > "a"} #=> {"b" => 200, "c" => 300} + * h.select {|k,v| v < 200} #=> {"a" => 100} + */ + +/* + * call-seq: + * hsh.select! {| key, value | block } -> hsh or nil + * hsh.select! -> an_enumerator + * + * Equivalent to Hash#keep_if, but returns + * nil if no changes were made. + */ + +/* + * call-seq: + * hsh.keep_if {| key, value | block } -> hsh + * hsh.keep_if -> an_enumerator + * + * Deletes every key-value pair from hsh for which block + * evaluates to false. + * + * If no block is given, an enumerator is returned instead. + * + */ + +/* 15.2.13.4.4 */ +/* + * call-seq: + * hsh.clear -> hsh + * + * Removes all key-value pairs from hsh. + * + * h = { "a" => 100, "b" => 200 } #=> {"a"=>100, "b"=>200} + * h.clear #=> {} + * + */ + +static mrb_value +mrb_hash_clear(mrb_state *mrb, mrb_value hash) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + + kh_clear(ht, h); + return hash; +} + +/* 15.2.13.4.3 */ +/* 15.2.13.4.26 */ +/* + * call-seq: + * hsh[key] = value -> value + * hsh.store(key, value) -> value + * + * Element Assignment---Associates the value given by + * value with the key given by key. + * key should not have its value changed while it is in + * use as a key (a String passed as a key will be + * duplicated and frozen). + * + * h = { "a" => 100, "b" => 200 } + * h["a"] = 9 + * h["c"] = 4 + * h #=> {"a"=>9, "b"=>200, "c"=>4} + * + */ +mrb_value +mrb_hash_aset(mrb_state *mrb, mrb_value self) +{ + mrb_value key, val; + + mrb_get_args(mrb, "oo", &key, &val); + mrb_hash_set(mrb, self, key, val); + return val; +} + +/* 15.2.13.4.17 */ +/* 15.2.13.4.23 */ +/* + * call-seq: + * hsh.replace(other_hash) -> hsh + * + * Replaces the contents of hsh with the contents of + * other_hash. + * + * h = { "a" => 100, "b" => 200 } + * h.replace({ "c" => 300, "d" => 400 }) #=> {"c"=>300, "d"=>400} + * + */ + +static mrb_value +mrb_hash_replace(mrb_state *mrb, mrb_value hash) +{ + mrb_value hash2; + khash_t(ht) *h2; + khiter_t k; + + mrb_get_args(mrb, "o", &hash2); + + mrb_hash_modify_check(mrb, hash); + hash2 = to_hash(mrb, hash2); + if (mrb_obj_equal(mrb, hash, hash2)) return hash; + mrb_hash_clear(mrb, hash); + + h2 = RHASH_H_TBL(hash2); + if (h2) { + for (k = kh_begin(h2); k != kh_end(h2); k++) { + if (kh_exist(h2, k)) + mrb_hash_set(mrb, hash, kh_key(h2, k), kh_value(h2, k)); + } + } + + if (MRB_RHASH_PROCDEFAULT_P(hash2)) { + RHASH(hash)->flags |= MRB_HASH_PROC_DEFAULT; + RHASH_PROCDEFAULT(hash) = RHASH_PROCDEFAULT(hash2); + } + else { + RHASH_IFNONE(hash) = RHASH_IFNONE(hash2); + } + return hash; +} + +/* 15.2.13.4.20 */ +/* 15.2.13.4.25 */ +/* + * call-seq: + * hsh.length -> fixnum + * hsh.size -> fixnum + * + * Returns the number of key-value pairs in the hash. + * + * h = { "d" => 100, "a" => 200, "v" => 300, "e" => 400 } + * h.length #=> 4 + * h.delete("a") #=> 200 + * h.length #=> 3 + */ +static mrb_value +mrb_hash_size_m(mrb_state *mrb, mrb_value self) +{ + khash_t(ht) *h = RHASH_H_TBL(self); + + if (!h) return mrb_fixnum_value(0); + return mrb_fixnum_value(kh_size(h)); +} + +/* 15.2.13.4.12 */ +/* + * call-seq: + * hsh.empty? -> true or false + * + * Returns true if hsh contains no key-value pairs. + * + * {}.empty? #=> true + * + */ +static mrb_value +mrb_hash_empty_p(mrb_state *mrb, mrb_value self) +{ + khash_t(ht) *h = RHASH_H_TBL(self); + khiter_t k; + if (h) { + for (k = kh_begin(h); k != kh_end(h); k++) + if (kh_exist(h, k)) + return mrb_false_value(); + } + return mrb_true_value(); +} + +/* 15.2.13.4.11 */ +/* + * call-seq: + * hsh.each_value {| value | block } -> hsh + * hsh.each_value -> an_enumerator + * + * Calls block once for each key in hsh, passing the + * value as a parameter. + * + * If no block is given, an enumerator is returned instead. + * + * h = { "a" => 100, "b" => 200 } + * h.each_value {|value| puts value } + * + * produces: + * + * 100 + * 200 + */ + +/* 15.2.13.4.10 */ +/* + * call-seq: + * hsh.each_key {| key | block } -> hsh + * hsh.each_key -> an_enumerator + * + * Calls block once for each key in hsh, passing the key + * as a parameter. + * + * If no block is given, an enumerator is returned instead. + * + * h = { "a" => 100, "b" => 200 } + * h.each_key {|key| puts key } + * + * produces: + * + * a + * b + */ + +/* 15.2.13.4.9 */ +/* + * call-seq: + * hsh.each {| key, value | block } -> hsh + * hsh.each_pair {| key, value | block } -> hsh + * hsh.each -> an_enumerator + * hsh.each_pair -> an_enumerator + * + * Calls block once for each key in hsh, passing the key-value + * pair as parameters. + * + * If no block is given, an enumerator is returned instead. + * + * h = { "a" => 100, "b" => 200 } + * h.each {|key, value| puts "#{key} is #{value}" } + * + * produces: + * + * a is 100 + * b is 200 + * + */ + +static mrb_value +inspect_hash(mrb_state *mrb, mrb_value hash, int recur) +{ + mrb_value str, str2; + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + + if (recur) return mrb_str_new2(mrb, "{...}"); + + str = mrb_str_new2(mrb, "{"); + if (h && kh_size(h) > 0) { + for (k = kh_begin(h); k != kh_end(h); k++) { + int ai; + + if (!kh_exist(h,k)) continue; + + ai = mrb_gc_arena_save(mrb); + + if (RSTRING_LEN(str) > 1) mrb_str_cat2(mrb, str, ", "); + + str2 = mrb_inspect(mrb, kh_key(h,k)); + mrb_str_append(mrb, str, str2); + mrb_str_buf_cat(mrb, str, "=>", strlen("=>")); + str2 = mrb_inspect(mrb, kh_value(h,k)); + mrb_str_append(mrb, str, str2); + + mrb_gc_arena_restore(mrb, ai); + } + } + mrb_str_buf_cat(mrb, str, "}", strlen("}"));//mrb_str_buf_cat2(str, "}"); + + return str; +} + +/* 15.2.13.4.30 (x)*/ +/* + * call-seq: + * hsh.to_s -> string + * hsh.inspect -> string + * + * Return the contents of this hash as a string. + * + * h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300 } + * h.to_s #=> "{\"c\"=>300, \"a\"=>100, \"d\"=>400}" + */ + +static mrb_value +mrb_hash_inspect(mrb_state *mrb, mrb_value hash) +{ + if (RHASH_EMPTY_P(hash)) + return mrb_str_new2(mrb, "{}"); + return inspect_hash(mrb, hash, 0); +} + +/* 15.2.13.4.29 (x)*/ +/* + * call-seq: + * hsh.to_hash => hsh + * + * Returns +self+. + */ + +static mrb_value +mrb_hash_to_hash(mrb_state *mrb, mrb_value hash) +{ + return hash; +} + +/* 15.2.13.4.19 */ +/* + * call-seq: + * hsh.keys -> array + * + * Returns a new array populated with the keys from this hash. See also + * Hash#values. + * + * h = { "a" => 100, "b" => 200, "c" => 300, "d" => 400 } + * h.keys #=> ["a", "b", "c", "d"] + * + */ + +static mrb_value +mrb_hash_keys(mrb_state *mrb, mrb_value hash) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + mrb_value ary = mrb_ary_new(mrb); + + if (!h) return ary; + for (k = kh_begin(h); k != kh_end(h); k++) { + if (kh_exist(h, k)) { + mrb_value v = kh_key(h,k); + if ( !mrb_special_const_p(v) ) + v = mrb_obj_dup(mrb, v); + mrb_ary_push(mrb, ary, v); + } + } + return ary; +} + +/* 15.2.13.4.28 */ +/* + * call-seq: + * hsh.values -> array + * + * Returns a new array populated with the values from hsh. See + * also Hash#keys. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.values #=> [100, 200, 300] + * + */ + +static mrb_value +mrb_hash_values(mrb_state *mrb, mrb_value hash) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + mrb_value ary = mrb_ary_new(mrb); + + if (!h) return ary; + for (k = kh_begin(h); k != kh_end(h); k++) { + if (kh_exist(h, k)){ + mrb_value v = kh_value(h,k); + if ( !mrb_special_const_p(v) ) + v = mrb_obj_dup(mrb, v); + mrb_ary_push(mrb, ary, v); + } + } + return ary; +} + +static mrb_value +mrb_hash_has_keyWithKey(mrb_state *mrb, mrb_value hash, mrb_value key) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + + if (h) { + k = kh_get(ht, h, key); + if (k != kh_end(h)) + return mrb_true_value(); + } + + return mrb_false_value(); +} + +/* 15.2.13.4.13 */ +/* 15.2.13.4.15 */ +/* 15.2.13.4.18 */ +/* 15.2.13.4.21 */ +/* + * call-seq: + * hsh.has_key?(key) -> true or false + * hsh.include?(key) -> true or false + * hsh.key?(key) -> true or false + * hsh.member?(key) -> true or false + * + * Returns true if the given key is present in hsh. + * + * h = { "a" => 100, "b" => 200 } + * h.has_key?("a") #=> true + * h.has_key?("z") #=> false + * + */ + +static mrb_value +mrb_hash_has_key(mrb_state *mrb, mrb_value hash) +{ + mrb_value key; + + mrb_get_args(mrb, "o", &key); + return mrb_hash_has_keyWithKey(mrb, hash, key); +} + +static mrb_value +mrb_hash_has_valueWithvalue(mrb_state *mrb, mrb_value hash, mrb_value value) +{ + khash_t(ht) *h = RHASH_H_TBL(hash); + khiter_t k; + + if (h) { + for (k = kh_begin(h); k != kh_end(h); k++) { + if (!kh_exist(h, k)) continue; + + if (mrb_equal(mrb, kh_value(h,k), value)) { + return mrb_true_value(); + } + } + } + + return mrb_false_value(); +} + +/* 15.2.13.4.14 */ +/* 15.2.13.4.27 */ +/* + * call-seq: + * hsh.has_value?(value) -> true or false + * hsh.value?(value) -> true or false + * + * Returns true if the given value is present for some key + * in hsh. + * + * h = { "a" => 100, "b" => 200 } + * h.has_value?(100) #=> true + * h.has_value?(999) #=> false + */ + +static mrb_value +mrb_hash_has_value(mrb_state *mrb, mrb_value hash) +{ + mrb_value val; + + mrb_get_args(mrb, "o", &val); + return mrb_hash_has_valueWithvalue(mrb, hash, val); +} + +static mrb_value +recursive_eql(mrb_state *mrb, mrb_value hash, mrb_value dt, int recur) +{ + khash_t(ht) *h1 = RHASH_H_TBL(hash); + khash_t(ht) *h2 = RHASH_H_TBL(dt); + khiter_t k1, k2; + mrb_value key1; + + for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) { + if (!kh_exist(h1, k1)) continue; + key1 = kh_key(h1,k1); + k2 = kh_get(ht, h2, key1); + if ( k2 != kh_end(h2)) { + if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) { + continue; /* next key */ + } + } + return mrb_false_value(); + } + return mrb_true_value(); +} + +static mrb_value +hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql) +{ + if (mrb_obj_equal(mrb, hash1, hash2)) return mrb_true_value(); + if (mrb_type(hash2) != MRB_TT_HASH) { + if (!mrb_respond_to(mrb, hash2, mrb_intern(mrb, "to_hash"))) { + return mrb_false_value(); + } + if (eql) + return mrb_fixnum_value(mrb_eql(mrb, hash2, hash1)); + else + return mrb_fixnum_value(mrb_equal(mrb, hash2, hash1)); + } + if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) return mrb_false_value(); + if (!RHASH(hash1)->ht || !RHASH(hash2)->ht) return mrb_true_value(); + + return mrb_exec_recursive_paired(mrb, recursive_eql, hash1, hash2, (void*)0); +} + +/* 15.2.13.4.1 */ +/* + * call-seq: + * hsh == other_hash -> true or false + * + * Equality---Two hashes are equal if they each contain the same number + * of keys and if each key-value pair is equal to (according to + * Object#==) the corresponding elements in the other + * hash. + * + * h1 = { "a" => 1, "c" => 2 } + * h2 = { 7 => 35, "c" => 2, "a" => 1 } + * h3 = { "a" => 1, "c" => 2, 7 => 35 } + * h4 = { "a" => 1, "d" => 2, "f" => 35 } + * h1 == h2 #=> false + * h2 == h3 #=> true + * h3 == h4 #=> false + * + */ + +static mrb_value +mrb_hash_equal(mrb_state *mrb, mrb_value hash1) +{ + mrb_value hash2; + mrb_get_args(mrb, "o", &hash2); + return hash_equal(mrb, hash1, hash2, FALSE); +} + +/* 15.2.13.4.32 (x)*/ +/* + * call-seq: + * hash.eql?(other) -> true or false + * + * Returns true if hash and other are + * both hashes with the same content. + */ + +static mrb_value +mrb_hash_eql(mrb_state *mrb, mrb_value hash1) +{ + mrb_value hash2; + mrb_get_args(mrb, "o", &hash2); + return hash_equal(mrb, hash1, hash2, TRUE); +} + +/* + * call-seq: + * hsh.merge!(other_hash) -> hsh + * hsh.update(other_hash) -> hsh + * hsh.merge!(other_hash){|key, oldval, newval| block} -> hsh + * hsh.update(other_hash){|key, oldval, newval| block} -> hsh + * + * Adds the contents of other_hash to hsh. If no + * block is specified, entries with duplicate keys are overwritten + * with the values from other_hash, otherwise the value + * of each duplicate key is determined by calling the block with + * the key, its value in hsh and its value in other_hash. + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge!(h2) #=> {"a"=>100, "b"=>254, "c"=>300} + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge!(h2) { |key, v1, v2| v1 } + * #=> {"a"=>100, "b"=>200, "c"=>300} + */ + +/* 15.2.13.4.22 */ +/* + * call-seq: + * hsh.merge(other_hash) -> new_hash + * hsh.merge(other_hash){|key, oldval, newval| block} -> new_hash + * + * Returns a new hash containing the contents of other_hash and + * the contents of hsh. If no block is specified, the value for + * entries with duplicate keys will be that of other_hash. Otherwise + * the value for each duplicate key is determined by calling the block + * with the key, its value in hsh and its value in other_hash. + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge(h2) #=> {"a"=>100, "b"=>254, "c"=>300} + * h1.merge(h2){|key, oldval, newval| newval - oldval} + * #=> {"a"=>100, "b"=>54, "c"=>300} + * h1 #=> {"a"=>100, "b"=>200} + * + */ + +/* + * call-seq: + * hash.assoc(obj) -> an_array or nil + * + * Searches through the hash comparing _obj_ with the key using ==. + * Returns the key-value pair (two elements array) or +nil+ + * if no match is found. See Array#assoc. + * + * h = {"colors" => ["red", "blue", "green"], + * "letters" => ["a", "b", "c" ]} + * h.assoc("letters") #=> ["letters", ["a", "b", "c"]] + * h.assoc("foo") #=> nil + */ + +mrb_value +mrb_hash_assoc(mrb_state *mrb, mrb_value hash) +{ + mrb_value key, value, has_key; + + mrb_get_args(mrb, "o", &key); + + if (mrb_nil_p(key)) + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + + has_key = mrb_hash_has_keyWithKey(mrb, hash, key); + if (mrb_test(has_key)) { + value = mrb_hash_get(mrb, hash, key); + return mrb_assoc_new(mrb, key, value); + } + else { + return mrb_nil_value(); + } +} + +/* + * call-seq: + * hash.rassoc(key) -> an_array or nil + * + * Searches through the hash comparing _obj_ with the value using ==. + * Returns the first key-value pair (two-element array) that matches. See + * also Array#rassoc. + * + * a = {1=> "one", 2 => "two", 3 => "three", "ii" => "two"} + * a.rassoc("two") #=> [2, "two"] + * a.rassoc("four") #=> nil + */ + +mrb_value +mrb_hash_rassoc(mrb_state *mrb, mrb_value hash) +{ + mrb_value key, value, has_key; + + mrb_get_args(mrb, "o", &key); + + if (mrb_nil_p(key)) + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + + has_key = mrb_hash_has_keyWithKey(mrb, hash, key); + if (mrb_test(has_key)) { + value = mrb_hash_get(mrb, hash, key); + return mrb_assoc_new(mrb, value, key); + } + else { + return mrb_nil_value(); + } +} + +/* + * call-seq: + * hash.flatten -> an_array + * hash.flatten(level) -> an_array + * + * Returns a new array that is a one-dimensional flattening of this + * hash. That is, for every key or value that is an array, extract + * its elements into the new array. Unlike Array#flatten, this + * method does not flatten recursively by default. The optional + * level argument determines the level of recursion to flatten. + * + * a = {1=> "one", 2 => [2,"two"], 3 => "three"} + * a.flatten # => [1, "one", 2, [2, "two"], 3, "three"] + * a.flatten(2) # => [1, "one", 2, 2, "two", 3, "three"] + */ + +/* + * A Hash is a collection of key-value pairs. It is + * similar to an Array, except that indexing is done via + * arbitrary keys of any object type, not an integer index. Hashes enumerate + * their values in the order that the corresponding keys were inserted. + * + * Hashes have a default value that is returned when accessing + * keys that do not exist in the hash. By default, that value is + * nil. + * + */ + +void +mrb_init_hash(mrb_state *mrb) +{ + struct RClass *h; + + h = mrb->hash_class = mrb_define_class(mrb, "Hash", mrb->object_class); + MRB_SET_INSTANCE_TT(h, MRB_TT_HASH); + + //mrb_define_class_method(mrb, h, "new", hash_s_new, ARGS_ANY()); + mrb_include_module(mrb, h, mrb_class_get(mrb, "Enumerable")); + mrb_define_method(mrb, h, "==", mrb_hash_equal, ARGS_REQ(1)); /* 15.2.13.4.1 */ + mrb_define_method(mrb, h, "[]", mrb_hash_aget, ARGS_REQ(1)); /* 15.2.13.4.2 */ + mrb_define_method(mrb, h, "[]=", mrb_hash_aset, ARGS_REQ(2)); /* 15.2.13.4.3 */ + mrb_define_method(mrb, h, "clear", mrb_hash_clear, ARGS_NONE()); /* 15.2.13.4.4 */ + mrb_define_method(mrb, h, "default", mrb_hash_default, ARGS_ANY()); /* 15.2.13.4.5 */ + mrb_define_method(mrb, h, "default=", mrb_hash_set_default, ARGS_REQ(1)); /* 15.2.13.4.6 */ + mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,ARGS_NONE()); /* 15.2.13.4.7 */ + mrb_define_method(mrb, h, "__delete", mrb_hash_delete, ARGS_REQ(1)); /* core of 15.2.13.4.8 */ +//mrb_define_method(mrb, h, "each", mrb_hash_each_pair, ARGS_NONE()); /* 15.2.13.4.9 */ /* move to mrblib\hash.rb */ +//mrb_define_method(mrb, h, "each_key", mrb_hash_each_key, ARGS_NONE()); /* 15.2.13.4.10 */ /* move to mrblib\hash.rb */ +//mrb_define_method(mrb, h, "each_value", mrb_hash_each_value, ARGS_NONE()); /* 15.2.13.4.11 */ /* move to mrblib\hash.rb */ + mrb_define_method(mrb, h, "empty?", mrb_hash_empty_p, ARGS_NONE()); /* 15.2.13.4.12 */ + mrb_define_method(mrb, h, "has_key?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.13 */ + mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, ARGS_REQ(1)); /* 15.2.13.4.14 */ + mrb_define_method(mrb, h, "include?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.15 */ + mrb_define_method(mrb, h, "__init_core", mrb_hash_init_core, ARGS_ANY()); /* core of 15.2.13.4.16 */ + mrb_define_method(mrb, h, "initialize_copy", mrb_hash_replace, ARGS_REQ(1)); /* 15.2.13.4.17 */ + mrb_define_method(mrb, h, "key?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.18 */ + mrb_define_method(mrb, h, "keys", mrb_hash_keys, ARGS_NONE()); /* 15.2.13.4.19 */ + mrb_define_method(mrb, h, "length", mrb_hash_size_m, ARGS_NONE()); /* 15.2.13.4.20 */ + mrb_define_method(mrb, h, "member?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.21 */ +//mrb_define_method(mrb, h, "merge", mrb_hash_merge, ARGS_REQ(1)); /* 15.2.13.4.22 */ /* move to mrblib\hash.rb */ + mrb_define_method(mrb, h, "replace", mrb_hash_replace, ARGS_REQ(1)); /* 15.2.13.4.23 */ + mrb_define_method(mrb, h, "shift", mrb_hash_shift, ARGS_NONE()); /* 15.2.13.4.24 */ + mrb_define_method(mrb, h, "size", mrb_hash_size_m, ARGS_NONE()); /* 15.2.13.4.25 */ + mrb_define_method(mrb, h, "store", mrb_hash_aset, ARGS_REQ(2)); /* 15.2.13.4.26 */ + mrb_define_method(mrb, h, "value?", mrb_hash_has_value, ARGS_REQ(1)); /* 15.2.13.4.27 */ + mrb_define_method(mrb, h, "values", mrb_hash_values, ARGS_NONE()); /* 15.2.13.4.28 */ + + mrb_define_method(mrb, h, "to_hash", mrb_hash_to_hash, ARGS_NONE()); /* 15.2.13.4.29 (x)*/ + mrb_define_method(mrb, h, "inspect", mrb_hash_inspect, ARGS_NONE()); /* 15.2.13.4.30 (x)*/ + mrb_define_alias(mrb, h, "to_s", "inspect"); /* 15.2.13.4.31 (x)*/ + mrb_define_method(mrb, h, "eql?", mrb_hash_eql, ARGS_REQ(1)); /* 15.2.13.4.32 (x)*/ +} diff --git a/src/init.c b/src/init.c new file mode 100644 index 0000000000..ce039ce30d --- /dev/null +++ b/src/init.c @@ -0,0 +1,105 @@ +#include "mruby.h" + +void mrb_init_class(mrb_state*); +void mrb_init_symtbl(mrb_state*); +void mrb_init_symbols(mrb_state*); +void mrb_init_object(mrb_state*); +void mrb_init_kernel(mrb_state*); +void mrb_init_enumerable(mrb_state*); +void mrb_init_comparable(mrb_state*); +void mrb_init_array(mrb_state*); +void mrb_init_hash(mrb_state*); +void mrb_init_numeric(mrb_state*); +void mrb_init_proc(mrb_state*); +void mrb_init_range(mrb_state*); +void mrb_init_string(mrb_state*); +void mrb_init_regexp(mrb_state*); +void mrb_init_encoding(mrb_state*); +void mrb_init_exception(mrb_state*); +void mrb_init_time(mrb_state *); +void mrb_init_io(mrb_state *); +void mrb_init_file(mrb_state *); +void mrb_init_thread(mrb_state *); +void mrb_init_struct(mrb_state *); +void mrb_init_gc(mrb_state *); +void Init_var_tables(mrb_state *mrb); +void Init_version(mrb_state *mrb); +void mrb_init_print(mrb_state *mrb); +void mrb_init_mrblib(mrb_state *mrb); + +#define MANDEL +#ifdef MANDEL +#include +#include +static mrb_value +mpow(mrb_state *mrb, mrb_value obj) +{ + mrb_float x, y; + + mrb_get_args(mrb, "ff", &x, &y); + x = pow(x, y); + + return mrb_float_value(x); +} + +static mrb_value +msqrt(mrb_state *mrb, mrb_value obj) +{ + mrb_float x; + + mrb_get_args(mrb, "f", &x); + x = sqrt(x); + + return mrb_float_value(x); +} + +static mrb_value +mputc(mrb_state *mrb, mrb_value obj) +{ + int x; + + mrb_get_args(mrb, "i", &x); + putc(x, stdout); + + return mrb_nil_value(); +} +#endif + +void +mrb_init_core(mrb_state *mrb) +{ + mrb_init_symtbl(mrb); + + mrb_init_class(mrb); + mrb_init_object(mrb); + mrb_init_kernel(mrb); + mrb_init_comparable(mrb); + mrb_init_enumerable(mrb); + + mrb_init_symbols(mrb); + mrb_init_proc(mrb); + mrb_init_string(mrb); + Init_version(mrb); /* after init_string */ + mrb_init_array(mrb); + mrb_init_hash(mrb); + mrb_init_numeric(mrb); + mrb_init_range(mrb); + mrb_init_struct(mrb); + mrb_init_gc(mrb); +#ifdef INCLUDE_REGEXP + mrb_init_regexp(mrb); + mrb_init_encoding(mrb); +#endif + mrb_init_exception(mrb); + mrb_init_print(mrb); + +#ifdef MANDEL + mrb_define_method(mrb, mrb->kernel_module, "pow", mpow, ARGS_REQ(2)); + mrb_define_method(mrb, mrb->kernel_module, "sqrt", msqrt, ARGS_REQ(1)); + mrb_define_method(mrb, mrb->kernel_module, "putc", mputc, ARGS_REQ(1)); +#endif + + mrb_init_mrblib(mrb); + + mrb_gc_arena_restore(mrb, 0); +} diff --git a/src/init_ext.c b/src/init_ext.c new file mode 100644 index 0000000000..cb094f1717 --- /dev/null +++ b/src/init_ext.c @@ -0,0 +1,10 @@ +#include "mruby.h" + +void +mrb_init_ext(mrb_state *mrb) +{ +#ifdef INCLUDE_SOCKET + extern void mrb_init_socket(mrb_state *mrb); + mrb_init_socket(mrb); +#endif +} diff --git a/src/irep.h b/src/irep.h new file mode 100644 index 0000000000..5ec6cc6f1d --- /dev/null +++ b/src/irep.h @@ -0,0 +1,23 @@ +#ifndef MRUBY_IREP_H +#define MRUBY_IREP_H + +typedef struct mrb_irep { + int idx; + + int flags; + int nlocals; + int nregs; + + mrb_code *iseq; + mrb_value *pool; + int *syms; + + int ilen, plen, slen; +} mrb_irep; + +#define MRB_IREP_NOFREE 3 +#define MRB_ISEQ_NOFREE 1 + +void mrb_add_irep(mrb_state *mrb, int n); + +#endif /* MRUBY_IREP_H */ diff --git a/src/kernel.c b/src/kernel.c new file mode 100644 index 0000000000..e5b2cab04f --- /dev/null +++ b/src/kernel.c @@ -0,0 +1,1530 @@ +#include "mruby.h" +#include "mruby/string.h" +#include +#include +#include +#include "mruby/proc.h" + +#include "mruby/range.h" +#include "mruby/array.h" +#include "mruby/hash.h" +#include "mruby/class.h" +#include "mruby/struct.h" +#include "variable.h" +#include "ritehash.h" +#include "error.h" +#include "method.h" +#include "mdata.h" + +#ifdef INCLUDE_REGEXP +#include "re.h" +#include "regint.h" +#endif + +KHASH_MAP_INIT_INT(mt, struct RProc*); +KHASH_MAP_INIT_INT(iv, mrb_value); + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +static mrb_value tst_setconst(mrb_state *mrb, mrb_value obj); +int kiv_lookup(khash_t(iv) *table, mrb_sym key, mrb_value *value); + +struct obj_ivar_tag { + mrb_value obj; + int (*func)(mrb_sym key, mrb_value val, void * arg); + void * arg; +}; + +static int +obj_ivar_i(mrb_sym key, int index, struct obj_ivar_tag *arg) +{ + enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; + struct obj_ivar_tag *data = (struct obj_ivar_tag *)arg; + if ((long)index < ROBJECT_NUMIV(data->obj)) { + mrb_value val = ROBJECT_IVPTR(data->obj)->vals[(long)index]; + if (val.tt != MRB_TT_FREE) { + return (data->func)((mrb_sym)key, val, data->arg); + } + } + return ST_CONTINUE; +} + +void +mrb_ivar_foreach(mrb_state *mrb, mrb_value obj, int (*func)(ANYARGS), void* arg) +{ + struct obj_ivar_tag data; + switch (mrb_type(obj)) { + case MRB_TT_OBJECT: + //obj_ivar_each(mrb, obj, func, arg); + if (RCLASS_IV_TBL(obj)) { + data.obj = obj; + data.func = (int (*)(mrb_sym key, mrb_value val, void * arg))func; + data.arg = arg; + st_foreach_safe(mrb, RCLASS_IV_TBL(obj), obj_ivar_i, (void *)&data); + } + break; + case MRB_TT_CLASS: + case MRB_TT_MODULE: + if (RCLASS_IV_TBL(obj)) { + st_foreach_safe(mrb, RCLASS_IV_TBL(obj), func, arg); + } + break; + default: + if (!ROBJECT_IVPTR(obj)/*generic_iv_tbl*/) break; + if (/*FL_TEST(obj, FL_EXIVAR) ||*/ mrb_special_const_p(obj)) { + mrb_value *tbl=0; + if (kiv_lookup(ROBJECT_IVPTR(obj)/*generic_iv_tbl*/, SYM2ID(obj), tbl)) { + st_foreach_safe(mrb, (void *)tbl, func, arg); + } + } + break; + } +} + +static int +inspect_i(mrb_state *mrb, mrb_sym id, mrb_value value, mrb_value str) +{ + enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; + mrb_value str2; + const char *ivname; + /* need not to show internal data */ + if (RSTRING_PTR(str)[0] == '-') { /* first element */ + RSTRING_PTR(str)[0] = '#'; + mrb_str_cat2(mrb, str, " "); + } + else { + mrb_str_cat2(mrb, str, ", "); + } + ivname = mrb_sym2name(mrb, id); + mrb_str_cat2(mrb, str, ivname); + mrb_str_cat2(mrb, str, "="); + str2 = mrb_inspect(mrb, value); + mrb_str_append(mrb, str, str2); + //OBJ_INFECT(str, str2); + + return ST_CONTINUE; +} + +static mrb_value +inspect_obj(mrb_state *mrb, mrb_value obj, mrb_value str, int recur) +{ + if (recur) { + mrb_str_cat2(mrb, str, " ..."); + } + else { + mrb_ivar_foreach(mrb, obj, inspect_i, &str); + } + mrb_str_cat2(mrb, str, ">"); + RSTRING_PTR(str)[0] = '#'; + //OBJ_INFECT(str, obj); + + return str; +} + +int +mrb_obj_basic_to_s_p(mrb_state *mrb, mrb_value obj) +{ + //const mrb_method_entry_t *me = mrb_method_entry(CLASS_OF(obj), mrb_intern("to_s")); + //if (me && me->def && me->def->type == VM_METHOD_TYPE_CFUNC && + //me->def->body.cfunc.func == mrb_any_to_s) + struct RProc *me = mrb_method_search(mrb, mrb_class(mrb, obj), mrb_intern(mrb, "to_s")); + if (me && MRB_PROC_CFUNC_P(me) && (me->body.func == mrb_any_to_s)) + return 1; + return 0; +} + +/* 15.3.1.3.17 */ +/* + * call-seq: + * obj.inspect -> string + * + * Returns a string containing a human-readable representation of + * obj. If not overridden and no instance variables, uses the + * to_s method to generate the string. + * obj. If not overridden, uses the to_s method to + * generate the string. + * + * [ 1, 2, 3..4, 'five' ].inspect #=> "[1, 2, 3..4, \"five\"]" + * Time.new.inspect #=> "2008-03-08 19:43:39 +0900" + */ +mrb_value +mrb_obj_inspect(mrb_state *mrb, mrb_value obj) +{ + if ((mrb_type(obj) == MRB_TT_OBJECT) && mrb_obj_basic_to_s_p(mrb, obj)) { + int has_ivar = 0; + mrb_value *ptr = (mrb_value *)ROBJECT_IVPTR(obj); + long len = ROBJECT_NUMIV(obj); + long i; + + for (i = 0; i < len; i++) { + if (ptr[i].tt != MRB_TT_FREE) { + has_ivar = 1; + break; + } + } + + if (has_ivar) { + mrb_value str; + const char *c = mrb_obj_classname(mrb, obj); + + str = mrb_sprintf(mrb, "-<%s:%p", c, (void*)&obj); + return inspect_obj(mrb, obj, str, 0); + } + return mrb_any_to_s(mrb, obj); + } + else if (mrb_nil_p(obj)) { + return mrb_str_new_cstr(mrb, "nil"); + } + return mrb_funcall(mrb, obj, "to_s", 0, 0); +} + +/* 15.3.1.3.1 */ +/* 15.3.1.3.10 */ +/* 15.3.1.3.11 */ +/* + * call-seq: + * obj == other -> true or false + * obj.equal?(other) -> true or false + * obj.eql?(other) -> true or false + * + * Equality---At the Object level, == returns + * true only if obj and other are the + * same object. Typically, this method is overridden in descendant + * classes to provide class-specific meaning. + * + * Unlike ==, the equal? method should never be + * overridden by subclasses: it is used to determine object identity + * (that is, a.equal?(b) iff a is the same + * object as b). + * + * The eql? method returns true if + * obj and anObject have the same value. Used by + * Hash to test members for equality. For objects of + * class Object, eql? is synonymous with + * ==. Subclasses normally continue this tradition, but + * there are exceptions. Numeric types, for example, + * perform type conversion across ==, but not across + * eql?, so: + * + * 1 == 1.0 #=> true + * 1.eql? 1.0 #=> false + */ +static mrb_value +mrb_obj_equal_m(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + + mrb_get_args(mrb, "o", &arg); + if (mrb_obj_equal(mrb, self, arg)) { + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +static mrb_value +mrb_obj_not_equal_m(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + + mrb_get_args(mrb, "o", &arg); + if (mrb_obj_equal(mrb, self, arg)) { + return mrb_false_value(); + } + else { + return mrb_true_value(); + } +} + +/* 15.3.1.3.2 */ +/* + * call-seq: + * obj === other -> true or false + * + * Case Equality---For class Object, effectively the same + * as calling #==, but typically overridden by descendants + * to provide meaningful semantics in case statements. + */ +static mrb_value +mrb_equal_m(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + + mrb_get_args(mrb, "o", &arg); + if (mrb_equal(mrb, self, arg)){ + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +/* 15.3.1.3.3 */ +/* 15.3.1.3.33 */ +/* + * call-seq: + * obj.hash -> fixnum + * + * Generates a Fixnum hash value for this object. This + * function must have the property that a.eql?(b) implies + * a.hash == b.hash. The hash value is used by class + * Hash. Any hash value that exceeds the capacity of a + * Fixnum will be truncated before being used. + */ +static mrb_value +mrb_obj_id_m(mrb_state *mrb, mrb_value self) +{ + return mrb_fixnum_value(mrb_obj_id(self)); +} + +mrb_value +send_internal(int argc, mrb_value *argv, mrb_value recv, enum call_type ctype) +{ + return mrb_nil_value(); /* dummy */ +} + +mrb_value +mrb_f_send(int argc, mrb_value *argv, mrb_value recv) +{ + return send_internal(argc, argv, recv, CALL_FCALL); +} + +/* 15.3.1.3.4 */ +/* 15.3.1.3.44 */ +/* + * call-seq: + * obj.send(symbol [, args...]) -> obj + * obj.__send__(symbol [, args...]) -> obj + * + * Invokes the method identified by _symbol_, passing it any + * arguments specified. You can use __send__ if the name + * +send+ clashes with an existing method in _obj_. + * + * class Klass + * def hello(*args) + * "Hello " + args.join(' ') + * end + * end + * k = Klass.new + * k.send :hello, "gentle", "readers" #=> "Hello gentle readers" + */ +static mrb_value +mrb_f_send_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_f_send(argc, argv, self); +} + +/* 15.3.1.2.1 */ +/* 15.3.1.3.5 */ +/* + * call-seq: + * spawn([env,] command... [,options]) -> pid + * Process.spawn([env,] command... [,options]) -> pid + * + * spawn executes specified command and return its pid. + */ +static mrb_value +mrb_f_spawn_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return (mrb_f_send(argc, argv, self)); +} + +static mrb_value +mrb_f_block_given_p(void) +{ + return mrb_false_value(); /* dummy */ +} + +/* 15.3.1.2.2 */ +/* 15.3.1.2.5 */ +/* 15.3.1.3.6 */ +/* 15.3.1.3.25 */ +/* + * call-seq: + * block_given? -> true or false + * iterator? -> true or false + * + * Returns true if yield would execute a + * block in the current context. The iterator? form + * is mildly deprecated. + * + * def try + * if block_given? + * yield + * else + * "no block" + * end + * end + * try #=> "no block" + * try { "hello" } #=> "hello" + * try do "hello" end #=> "hello" + */ +static mrb_value +mrb_f_block_given_p_m(mrb_state *mrb, mrb_value self) +{ + return mrb_f_block_given_p(); +} + +/* 15.3.1.3.7 */ +/* + * call-seq: + * obj.class -> class + * + * Returns the class of obj. This method must always be + * called with an explicit receiver, as class is also a + * reserved word in Ruby. + * + * 1.class #=> Fixnum + * self.class #=> Object + */ +static mrb_value +mrb_obj_class_m(mrb_state *mrb, mrb_value self) +{ + return mrb_obj_value(mrb_obj_class(mrb, self)); +} + +struct RClass* +mrb_singleton_class_clone(mrb_state *mrb, mrb_value obj) +{ + struct RClass *klass = RBASIC(obj)->c; + + //if (!FL_TEST(klass, FL_SINGLETON)) + //return klass; + if (klass->tt != MRB_TT_SCLASS) + return klass; + else { + //struct clone_method_data data; + /* copy singleton(unnamed) class */ + //VALUE clone = class_alloc(RBASIC(klass)->flags, 0); + struct RClass *clone = mrb_obj_alloc(mrb, klass->tt, mrb->class_class); + //clone->super = objklass->super; + + if ((mrb_type(obj) == MRB_TT_CLASS) || + (mrb_type(obj) == MRB_TT_SCLASS)) { /* BUILTIN_TYPE(obj) == T_CLASS */ + clone->c = clone; + } + else { + clone->c = mrb_singleton_class_clone(mrb, mrb_obj_value(klass)); + } + + clone->super = klass->super; + if (klass->iv) { + //clone->iv = st_copy(klass->iv); + clone->iv = klass->iv; + } + clone->mt = kh_init(mt, mrb); + clone->tt = MRB_TT_SCLASS; + return clone; + } +} + +static void +init_copy(mrb_state *mrb, mrb_value dest, mrb_value obj) +{ + //if (OBJ_FROZEN(dest)) { + // rb_raise(rb_eTypeError, "[bug] frozen object (%s) allocated", rb_obj_classname(dest)); + //} + //RBASIC(dest)->flags &= ~(T_MASK|FL_EXIVAR); + //RBASIC(dest)->flags |= RBASIC(obj)->flags & (T_MASK|FL_EXIVAR|FL_TAINT); + //if (FL_TEST(obj, FL_EXIVAR)) { + // mrb_copy_generic_ivar(dest, obj); + //} + //mrb_gc_copy_finalizer(dest, obj); + switch (mrb_type(obj)) { + case MRB_TT_OBJECT: + case MRB_TT_CLASS: + case MRB_TT_MODULE: + if (ROBJECT(dest)->iv) { + //st_free_table(ROBJECT(dest)->iv); + ROBJECT(dest)->iv = 0; + } + if (ROBJECT(obj)->iv) { + //ROBJECT(dest)->iv = st_copy((st_table *)ROBJECT(obj)->iv); + ROBJECT(dest)->iv = ROBJECT(obj)->iv; + } + } + mrb_funcall(mrb, dest, "initialize_copy", 1, obj); +} + +/* 15.3.1.3.8 */ +/* + * call-seq: + * obj.clone -> an_object + * + * Produces a shallow copy of obj---the instance variables of + * obj are copied, but not the objects they reference. Copies + * the frozen and tainted state of obj. See also the discussion + * under Object#dup. + * + * class Klass + * attr_accessor :str + * end + * s1 = Klass.new #=> # + * s1.str = "Hello" #=> "Hello" + * s2 = s1.clone #=> # + * s2.str[1,4] = "i" #=> "i" + * s1.inspect #=> "#" + * s2.inspect #=> "#" + * + * This method may have class-specific behavior. If so, that + * behavior will be documented under the #+initialize_copy+ method of + * the class. + * + * Some Class(True False Nil Symbol Fixnum Float) Object cannot clone. + */ +mrb_value +mrb_obj_clone(mrb_state *mrb, mrb_value self) +{ + struct RObject *clone; + + if (mrb_special_const_p(self)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't clone %s", mrb_obj_classname(mrb, self)); + } + clone = mrb_obj_alloc(mrb, self.tt, mrb_obj_class(mrb, self)); + clone->c = mrb_singleton_class_clone(mrb, self); + //RBASIC(clone)->flags = (RBASIC(obj)->flags | FL_TEST(clone, FL_TAINT) | FL_TEST(clone, FL_UNTRUSTED)) & ~(FL_FREEZE|FL_FINALIZE); + init_copy(mrb, mrb_obj_value(clone), self); + //1-9-2 no bug mrb_funcall(mrb, clone, "initialize_clone", 1, self); + //RBASIC(clone)->flags |= RBASIC(obj)->flags & FL_FREEZE; + + return mrb_obj_value(clone); +} + +/* 15.3.1.3.9 */ +/* + * call-seq: + * obj.dup -> an_object + * + * Produces a shallow copy of obj---the instance variables of + * obj are copied, but not the objects they reference. + * dup copies the tainted state of obj. See also + * the discussion under Object#clone. In general, + * clone and dup may have different semantics + * in descendant classes. While clone is used to duplicate + * an object, including its internal state, dup typically + * uses the class of the descendant object to create the new instance. + * + * This method may have class-specific behavior. If so, that + * behavior will be documented under the #+initialize_copy+ method of + * the class. + */ + +mrb_value +mrb_obj_dup(mrb_state *mrb, mrb_value obj) +{ + struct RBasic *p; + mrb_value dup; + + if (mrb_special_const_p(obj)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't dup %s", mrb_obj_classname(mrb, obj)); + } + p = mrb_obj_alloc(mrb, mrb_type(obj), mrb_obj_class(mrb, obj)); + //init_copy(dup, obj); + dup = mrb_obj_value(p); + mrb_funcall(mrb, dup, "initialize_copy", 1, obj); + + return dup; +} + +/* 15.3.1.2.3 */ +/* 15.3.1.3.12 */ +/* + * call-seq: + * eval(string [, binding [, filename [,lineno]]]) -> obj + * + * Evaluates the Ruby expression(s) in string. If + * binding is given, which must be a Binding + * object, the evaluation is performed in its context. If the + * optional filename and lineno parameters are + * present, they will be used when reporting syntax errors. + * + * def getBinding(str) + * return binding + * end + * str = "hello" + * eval "str + ' Fred'" #=> "hello Fred" + * eval "str + ' Fred'", getBinding("bye") #=> "bye Fred" + */ +mrb_value +mrb_f_eval(int argc, mrb_value *argv, mrb_value self) +{ + return mrb_false_value(); /* dummy */ +} +mrb_value +mrb_f_eval_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_f_eval(argc, argv, self); +} + +static mrb_value +mrb_obj_extend(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj) +{ + int i; + + if (argc == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (at least 1)"); + } + for (i = 0; i < argc; i++) { + //Check_Type(argv[i], T_MODULE); + mrb_check_type(mrb, argv[i], MRB_TT_MODULE); + } + while (argc--) { + mrb_funcall(mrb, argv[argc], "extend_object", 1, obj); + mrb_funcall(mrb, argv[argc], "extended", 1, obj); + } + return obj; +} + +/* 15.3.1.3.13 */ +/* + * call-seq: + * obj.extend(module, ...) -> obj + * + * Adds to _obj_ the instance methods from each module given as a + * parameter. + * + * module Mod + * def hello + * "Hello from Mod.\n" + * end + * end + * + * class Klass + * def hello + * "Hello from Klass.\n" + * end + * end + * + * k = Klass.new + * k.hello #=> "Hello from Klass.\n" + * k.extend(Mod) #=> # + * k.hello #=> "Hello from Mod.\n" + */ +mrb_value +mrb_obj_extend_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_obj_extend(mrb, argc, argv, self); +} + +/* 15.3.1.2.4 */ +/* 15.3.1.3.14 */ +/* + * call-seq: + * global_variables -> array + * + * Returns an array of the names of global variables. + * + * global_variables.grep /std/ #=> [:$stdin, :$stdout, :$stderr] + */ +//mrb_value +//mrb_f_global_variables(mrb_state *mrb, mrb_value self) + +/* 15.3.1.3.15 */ +mrb_value +mrb_obj_hash(mrb_state *mrb, mrb_value self) +{ + return mrb_fixnum_value(mrb_obj_id(self)); +} + +/* 15.3.1.3.16 */ +mrb_value +mrb_obj_init_copy(mrb_state *mrb, mrb_value self) +{ + mrb_value orig; + + mrb_get_args(mrb, "o", &orig); + if (mrb_obj_equal(mrb, self, orig)) return self; + if ((mrb_type(self) != mrb_type(orig)) || (mrb_obj_class(mrb, self) != mrb_obj_class(mrb, orig))) { + mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take same class object"); + } + return self; +} + +/* 15.3.1.3.18 */ +/* + * call-seq: + * obj.instance_eval(string [, filename [, lineno]] ) -> obj + * obj.instance_eval {| | block } -> obj + * + * Evaluates a string containing Ruby source code, or the given block, + * within the context of the receiver (_obj_). In order to set the + * context, the variable +self+ is set to _obj_ while + * the code is executing, giving the code access to _obj_'s + * instance variables. In the version of instance_eval + * that takes a +String+, the optional second and third + * parameters supply a filename and starting line number that are used + * when reporting compilation errors. + * + * class KlassWithSecret + * def initialize + * @secret = 99 + * end + * end + * k = KlassWithSecret.new + * k.instance_eval { @secret } #=> 99 + */ +mrb_value +mrb_obj_instance_eval(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + mrb_value b, klass; + + mrb_get_args(mrb, "&", &b); + return mrb_yield_with_self(mrb, b, 0, 0, self); +} + +/* 15.3.1.3.19 */ +/* + * call-seq: + * obj.instance_of?(class) -> true or false + * + * Returns true if obj is an instance of the given + * class. See also Object#kind_of?. + */ +mrb_value +rb_obj_is_instance_of(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + + mrb_get_args(mrb, "o", &arg); + if (mrb_obj_is_instance_of(mrb, self, mrb_class_ptr(arg))){ + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +/* 15.3.1.3.20 */ +/* + * call-seq: + * obj.instance_variable_defined?(symbol) -> true or false + * + * Returns true if the given instance variable is + * defined in obj. + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_defined?(:@a) #=> true + * fred.instance_variable_defined?("@b") #=> true + * fred.instance_variable_defined?("@c") #=> false + */ +mrb_value +mrb_obj_ivar_defined(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + khiter_t k; + kh_iv_t *h = RCLASS_IV_TBL(self); + + mrb_get_args(mrb, "o", &arg); + mrb_sym mid = mrb_to_id(mrb, arg); + + //if (!mrb_is_instance_id(id)) { + // mrb_name_error(id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id)); + //} + //return mrb_ivar_defined(self, id); + k = kh_get(iv, h, mid); + if (k != kh_end(h)) { + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +/* 15.3.1.3.21 */ +/* + * call-seq: + * obj.instance_variable_get(symbol) -> obj + * + * Returns the value of the given instance variable, or nil if the + * instance variable is not set. The @ part of the + * variable name should be included for regular instance + * variables. Throws a NameError exception if the + * supplied symbol is not valid as an instance variable name. + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_get(:@a) #=> "cat" + * fred.instance_variable_get("@b") #=> 99 + */ +mrb_value +mrb_obj_ivar_get(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + + mrb_get_args(mrb, "o", &arg); + mrb_sym id = mrb_to_id(mrb, arg); + + //if (!mrb_is_instance_id(id)) { + // mrb_name_error(mrb, id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id)); + //} + return mrb_iv_get(mrb, self, id); +} + +/* 15.3.1.3.22 */ +/* + * call-seq: + * obj.instance_variable_set(symbol, obj) -> obj + * + * Sets the instance variable names by symbol to + * object, thereby frustrating the efforts of the class's + * author to attempt to provide proper encapsulation. The variable + * did not have to exist prior to this call. + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_set(:@a, 'dog') #=> "dog" + * fred.instance_variable_set(:@c, 'cat') #=> "cat" + * fred.inspect #=> "#" + */ +mrb_value +mrb_obj_ivar_set(mrb_state *mrb, mrb_value self) +{ + mrb_value key; + mrb_value val; + + mrb_get_args(mrb, "oo", &key, &val); + mrb_sym id = mrb_to_id(mrb, key); + + //if (!mrb_is_instance_id(id)) { + // mrb_name_error(mrb, id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id)); + //} + mrb_iv_set(mrb, self, id, val); + return val; +} + +/* 15.3.1.3.23 */ +/* + * call-seq: + * obj.instance_variables -> array + * + * Returns an array of instance variable names for the receiver. Note + * that simply defining an accessor does not create the corresponding + * instance variable. + * + * class Fred + * attr_accessor :a1 + * def initialize + * @iv = 3 + * end + * end + * Fred.new.instance_variables #=> [:@iv] + */ +mrb_value +mrb_obj_instance_variables(mrb_state *mrb, mrb_value self) +{ + mrb_value ary; + kh_iv_t *h = RCLASS_IV_TBL(self); + int i; + const char* p; + + ary = mrb_ary_new(mrb); + //if (mrb_is_instance_id(key)) { + // mrb_ary_push(mrb, ary, mrb_sym2name(mrb, key)); + //} + for (i=0;i true or false + * obj.kind_of?(class) -> true or false + * + * Returns true if class is the class of + * obj, or if class is one of the superclasses of + * obj or modules included in obj. + * + * module M; end + * class A + * include M + * end + * class B < A; end + * class C < B; end + * b = B.new + * b.instance_of? A #=> false + * b.instance_of? B #=> true + * b.instance_of? C #=> false + * b.instance_of? M #=> false + * b.kind_of? A #=> true + * b.kind_of? B #=> true + * b.kind_of? C #=> false + * b.kind_of? M #=> true + */ +mrb_value +mrb_obj_is_kind_of_m(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + + mrb_get_args(mrb, "o", &arg); + if (mrb_obj_is_kind_of(mrb, self, mrb_class_ptr(arg))) { + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +/* 15.3.1.2.6 */ +/* 15.3.1.3.27 */ +/* + * call-seq: + * lambda { |...| block } -> a_proc + * + * Equivalent to Proc.new, except the resulting Proc objects + * check the number of parameters passed when called. + */ +mrb_value +proc_lambda(mrb_state *mrb, mrb_value self) +{ + //return mrb_block_lambda(); + return mrb_nil_value(); /* dummy */ +} + +/* 15.3.1.2.7 */ +/* 15.3.1.3.28 */ +/* + * call-seq: + * local_variables -> array + * + * Returns the names of the current local variables. + * + * fred = 1 + * for i in 1..10 + * # ... + * end + * local_variables #=> [:fred, :i] + */ +mrb_value +mrb_f_local_variables(mrb_state *mrb, mrb_value self) +{ + mrb_value ary; + + ary = mrb_ary_new(mrb); + return ary; /* dummy */ +} + +/* 15.3.1.2.8 */ +/* 15.3.1.3.29 */ +/* + * call-seq: + * loop { block } + * loop -> an_enumerator + * + * Repeatedly executes the block. + * + * If no block is given, an enumerator is returned instead. + * + * loop do + * print "Input: " + * line = gets + * break if !line or line =~ /^qQ/ + * # ... + * end + * + * StopIteration raised in the block breaks the loop. + */ +mrb_value +mrb_f_loop(mrb_state *mrb, mrb_value self) +{ + return mrb_nil_value(); /* dummy */ +} + +static void +method_entry_loop(mrb_state *mrb, struct RClass* klass, mrb_value ary) +{ + int i; + + khash_t(mt) *h = klass->mt; + for (i=0;itt == MRB_TT_ICLASS) || + (klass->tt == MRB_TT_SCLASS)) { + } + else + { + if (!recur) break; + } + oldklass = klass; + klass = klass->super; + } + //st_foreach(list, func, ary); + //st_free_table(list); + + return ary; +} + +mrb_value +mrb_obj_singleton_methods(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj) +{ + mrb_value recur, ary; + //st_table *list; + struct RClass* klass; + + if (argc == 0) { + recur = mrb_true_value(); + } + else { + //mrb_scan_args(argc, argv, "01", &recur); + recur = argv[0]; + } + klass = mrb_class(mrb, obj); + //list = st_init_numtable(); + ary = mrb_ary_new(mrb); + if (klass && (klass->tt == MRB_TT_SCLASS)) { + //st_foreach(RCLASS_M_TBL(klass), method_entry, (st_data_t)list); + method_entry_loop(mrb, klass, ary); + klass = klass->super; + } + if (RTEST(recur)) { + while (klass && ((klass->tt == MRB_TT_SCLASS) || (klass->tt == MRB_TT_ICLASS))) { + //st_foreach(RCLASS_M_TBL(klass), method_entry, (st_data_t)list); + method_entry_loop(mrb, klass, ary); + klass = klass->super; + } + } + //st_foreach(list, ins_methods_i, ary); + //st_free_table(list); + + return ary; +} + +mrb_value +mrb_obj_methods(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj, mrb_method_flag_t flag) +{ +retry: + if (argc == 0) { + mrb_value args[1]; + + args[0] = mrb_true_value(); + return class_instance_method_list(mrb, argc, argv, mrb_class(mrb, obj), 0); + } + else { + mrb_value recur; + + //mrb_scan_args(argc, argv, "1", &recur); + recur = argv[0]; + if (mrb_test(recur)) { + argc = 0; + goto retry; + } + return mrb_obj_singleton_methods(mrb, argc, argv, obj); + } +} +/* 15.3.1.3.31 */ +/* + * call-seq: + * obj.methods -> array + * + * Returns a list of the names of methods publicly accessible in + * obj. This will include all the methods accessible in + * obj's ancestors. + * + * class Klass + * def kMethod() + * end + * end + * k = Klass.new + * k.methods[0..9] #=> [:kMethod, :freeze, :nil?, :is_a?, + * # :class, :instance_variable_set, + * # :methods, :extend, :__send__, :instance_eval] + * k.methods.length #=> 42 + */ +mrb_value +mrb_obj_methods_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_obj_methods(mrb, argc, argv, self, 0); /* everything but private */ +} + +/* 15.3.1.3.32 */ +/* + * call_seq: + * nil.nil? -> true + * .nil? -> false + * + * Only the object nil responds true to nil?. + */ +mrb_value +mrb_false(mrb_state *mrb, mrb_value self) +{ + return mrb_false_value(); +} + +/* 15.3.1.2.10 */ +/* 15.3.1.3.35 */ +/* + * call-seq: + * print(obj, ...) -> nil + * + * Prints each object in turn to $stdout. If the output + * field separator ($,) is not +nil+, its + * contents will appear between each field. If the output record + * separator ($\\) is not +nil+, it will be + * appended to the output. If no arguments are given, prints + * $_. Objects that aren't strings will be converted by + * calling their to_s method. + * + * print "cat", [1,2,3], 99, "\n" + * $, = ", " + * $\ = "\n" + * print "cat", [1,2,3], 99 + * + * produces: + * + * cat12399 + * cat, 1, 2, 3, 99 + */ + +/* 15.3.1.3.36 */ +/* + * call-seq: + * obj.private_methods(all=true) -> array + * + * Returns the list of private methods accessible to obj. If + * the all parameter is set to false, only those methods + * in the receiver will be listed. + */ +mrb_value +mrb_obj_private_methods(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_obj_methods(mrb, argc, argv, self, NOEX_PRIVATE); /* private attribute not define */ +} + +/* 15.3.1.3.37 */ +/* + * call-seq: + * obj.protected_methods(all=true) -> array + * + * Returns the list of protected methods accessible to obj. If + * the all parameter is set to false, only those methods + * in the receiver will be listed. + */ +mrb_value +mrb_obj_protected_methods(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_obj_methods(mrb, argc, argv, self, NOEX_PROTECTED); /* protected attribute not define */ +} + +/* 15.3.1.3.38 */ +/* + * call-seq: + * obj.public_methods(all=true) -> array + * + * Returns the list of public methods accessible to obj. If + * the all parameter is set to false, only those methods + * in the receiver will be listed. + */ +mrb_value +mrb_obj_public_methods(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_obj_methods(mrb, argc, argv, self, NOEX_PUBLIC); /* public attribute not define */ +} + +/* 15.3.1.2.11 */ +/* 15.3.1.3.39 */ +/* + * call-seq: + * puts(obj, ...) -> nil + * + * Equivalent to + * + * $stdout.puts(obj, ...) + */ + +static mrb_value +get_errinfo(mrb_state *mrb) +{ + //return get_thread_errinfo(GET_THREAD()); + return mrb_str_new_cstr(mrb, "error!!"); /* dummy */ +} + +/* 15.3.1.2.12 */ +/* 15.3.1.3.40 */ +/* + * call-seq: + * raise + * raise(string) + * raise(exception [, string [, array]]) + * fail + * fail(string) + * fail(exception [, string [, array]]) + * + * With no arguments, raises the exception in $! or raises + * a RuntimeError if $! is +nil+. + * With a single +String+ argument, raises a + * +RuntimeError+ with the string as a message. Otherwise, + * the first parameter should be the name of an +Exception+ + * class (or an object that returns an +Exception+ object when sent + * an +exception+ message). The optional second parameter sets the + * message associated with the exception, and the third parameter is an + * array of callback information. Exceptions are caught by the + * +rescue+ clause of begin...end blocks. + * + * raise "Failed to create socket" + * raise ArgumentError, "No parameters", caller + */ +mrb_value +mrb_f_raise(mrb_state *mrb, mrb_value self) +{ + mrb_value err; + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 0) { + err = get_errinfo(mrb); + if (!mrb_nil_p(err)) { + argc = 1; + argv[0] = err; + } + } + mrb_exc_raise(mrb, mrb_make_exception(mrb, argc, argv)); + return mrb_nil_value(); /* not reached */ +} + +/* 15.3.1.3.41 */ +/* + * call-seq: + * obj.remove_instance_variable(symbol) -> obj + * + * Removes the named instance variable from obj, returning that + * variable's value. + * + * class Dummy + * attr_reader :var + * def initialize + * @var = 99 + * end + * def remove + * remove_instance_variable(:@var) + * end + * end + * d = Dummy.new + * d.var #=> 99 + * d.remove #=> 99 + * d.var #=> nil + */ +mrb_value +mrb_obj_remove_instance_variable(mrb_state *mrb, mrb_value self) +{ + mrb_sym sym; + mrb_value name; + khash_t(iv) *h; + khiter_t k; + mrb_value val; + mrb_value Qundef = mrb_undef_value(); + + mrb_get_args(mrb, "o", &name); + sym = mrb_to_id(mrb, name); + //if (OBJ_FROZEN(obj)) mrb_error_frozen("object"); + //if (!mrb_is_instance_id(id)) { + // mrb_name_error(mrb, id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id)); + //} + switch (mrb_type(self)) { + case MRB_TT_OBJECT: + case MRB_TT_CLASS: + case MRB_TT_MODULE: + if (!mrb_obj_ptr(self)->iv) break; + h = mrb_obj_ptr(self)->iv; + k = kh_get(iv, h, sym); + if (k != kh_end(h)) { + val = kh_value(h, k); + if (!mrb_obj_equal(mrb, val, Qundef)) { + kh_value(h, k) = Qundef; + return val; + } + } + break; + //default: + // if (mrb_special_const_p(obj)) { + // v = val; + // if (generic_ivar_remove(obj, (st_data_t)id, &v)) { + // return (VALUE)v; + // } + // } + // break; + } + mrb_name_error(mrb, sym, "instance variable %s not defined", mrb_sym2name(mrb, sym)); + return mrb_nil_value(); /* not reached */ +} + +/* 15.3.1.2.13 */ +/* 15.3.1.3.42 */ +/* + * call-seq: + * require(string) -> true or false + * + * Ruby tries to load the library named _string_, returning + * +true+ if successful. If the filename does not resolve to + * an absolute path, it will be searched for in the directories listed + * in $:. If the file has the extension ``.rb'', it is + * loaded as a source file; if the extension is ``.so'', ``.o'', or + * ``.dll'', or whatever the default shared library extension is on + * the current platform, Ruby loads the shared library as a Ruby + * extension. Otherwise, Ruby tries adding ``.rb'', ``.so'', and so on + * to the name. The name of the loaded feature is added to the array in + * $". A feature will not be loaded if its name already + * appears in $". The file name is converted to an absolute + * path, so ``require 'a'; require './a''' will not load + * a.rb twice. + * + * require "my-library.rb" + * require "db-driver" + */ +mrb_value +mrb_f_require(mrb_state *mrb, mrb_value self) +{ + mrb_value fname; + + mrb_get_args(mrb, "o", &fname); + return mrb_nil_value(); /* dummy */ +} + + +static inline int +basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) +{ + return mrb_respond_to(mrb, obj, id); + //return TRUE; +} +/* 15.3.1.3.43 */ +/* + * call-seq: + * obj.respond_to?(symbol, include_private=false) -> true or false + * + * Returns +true+ if _obj_ responds to the given + * method. Private methods are included in the search only if the + * optional second parameter evaluates to +true+. + * + * If the method is not implemented, + * as Process.fork on Windows, File.lchmod on GNU/Linux, etc., + * false is returned. + * + * If the method is not defined, respond_to_missing? + * method is called and the result is returned. + */ +mrb_value +obj_respond_to(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + mrb_value mid, priv; + mrb_sym id; + + mrb_get_args(mrb, "*", &argv, &argc); + //mrb_scan_args(argc, argv, "11", &mid, &priv); + mid = argv[0]; + if (argc > 1) priv = argv[1]; + else priv = mrb_nil_value(); + id = mrb_to_id(mrb, mid); + if (basic_obj_respond_to(mrb, self, id, !RTEST(priv))) + return mrb_true_value(); + return mrb_false_value(); +} + +/* 15.3.1.3.45 */ +/* + * call-seq: + * obj.singleton_methods(all=true) -> array + * + * Returns an array of the names of singleton methods for obj. + * If the optional all parameter is true, the list will include + * methods in modules included in obj. + * Only public and protected singleton methods are returned. + * + * module Other + * def three() end + * end + * + * class Single + * def Single.four() end + * end + * + * a = Single.new + * + * def a.one() + * end + * + * class << a + * include Other + * def two() + * end + * end + * + * Single.singleton_methods #=> [:four] + * a.singleton_methods(false) #=> [:two, :one] + * a.singleton_methods #=> [:two, :one, :three] + */ +mrb_value +mrb_obj_singleton_methods_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_obj_singleton_methods(mrb, argc, argv, self); +} + +mrb_value mrb_f_sprintf(mrb_state *mrb, mrb_value obj); /* in sprintf.c */ + +void +mrb_init_kernel(mrb_state *mrb) +{ + struct RClass *krn; + + krn = mrb->kernel_module = mrb_define_module(mrb, "Kernel"); + mrb_define_class_method(mrb, krn, "'", mrb_f_spawn_m, ARGS_ANY()); /* 15.3.1.2.1 */ + mrb_define_class_method(mrb, krn, "block_given?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.2.2 */ + mrb_define_class_method(mrb, krn, "eval", mrb_f_eval_m, ARGS_ANY()); /* 15.3.1.2.3 */ + mrb_define_class_method(mrb, krn, "global_variables", mrb_f_global_variables, ARGS_NONE()); /* 15.3.1.2.4 */ + mrb_define_class_method(mrb, krn, "iterator?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.2.5 */ + mrb_define_class_method(mrb, krn, "lambda", proc_lambda, ARGS_NONE()); /* 15.3.1.2.6 */ + mrb_define_class_method(mrb, krn, "local_variables", mrb_f_local_variables, ARGS_NONE()); /* 15.3.1.2.7 */ + mrb_define_class_method(mrb, krn, "loop", mrb_f_loop, ARGS_NONE()); /* 15.3.1.2.8 */ +; /* 15.3.1.2.11 */ + mrb_define_class_method(mrb, krn, "raise", mrb_f_raise, ARGS_ANY()); /* 15.3.1.2.12 */ + mrb_define_class_method(mrb, krn, "require", mrb_f_require, ARGS_REQ(1)); /* 15.3.1.2.13 */ + + mrb_define_method(mrb, krn, "singleton_class", mrb_singleton_class, ARGS_NONE()); + + mrb_define_method(mrb, krn, "==", mrb_obj_equal_m, ARGS_REQ(1)); /* 15.3.1.3.1 */ + mrb_define_method(mrb, krn, "!=", mrb_obj_not_equal_m, ARGS_REQ(1)); + mrb_define_method(mrb, krn, "===", mrb_equal_m, ARGS_REQ(1)); /* 15.3.1.3.2 */ + mrb_define_method(mrb, krn, "__id__", mrb_obj_id_m, ARGS_NONE()); /* 15.3.1.3.3 */ + mrb_define_method(mrb, krn, "__send__", mrb_f_send_m, ARGS_ANY()); /* 15.3.1.3.4 */ + mrb_define_method(mrb, krn, "'", mrb_f_spawn_m, ARGS_ANY()); /* 15.3.1.3.5 *//* "spawn"->"'" */ + mrb_define_method(mrb, krn, "block_given?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.3.6 */ + mrb_define_method(mrb, krn, "class", mrb_obj_class_m, ARGS_NONE()); /* 15.3.1.3.7 */ + mrb_define_method(mrb, krn, "clone", mrb_obj_clone, ARGS_NONE()); /* 15.3.1.3.8 */ + mrb_define_method(mrb, krn, "dup", mrb_obj_dup, ARGS_NONE()); /* 15.3.1.3.9 */ + mrb_define_method(mrb, krn, "eql?", mrb_obj_equal_m, ARGS_REQ(1)); /* 15.3.1.3.10 */ + mrb_define_method(mrb, krn, "equal?", mrb_obj_equal_m, ARGS_REQ(1)); /* 15.3.1.3.11 */ + mrb_define_method(mrb, krn, "eval", mrb_f_eval_m, ARGS_ANY()); /* 15.3.1.3.12 */ + mrb_define_method(mrb, krn, "extend", mrb_obj_extend_m, ARGS_ANY()); /* 15.3.1.3.13 */ + mrb_define_method(mrb, krn, "global_variables", mrb_f_global_variables, ARGS_NONE()); /* 15.3.1.3.14 */ + mrb_define_method(mrb, krn, "hash", mrb_obj_hash, ARGS_NONE()); /* 15.3.1.3.15 */ + mrb_define_method(mrb, krn, "initialize_copy", mrb_obj_init_copy, ARGS_REQ(1)); /* 15.3.1.3.16 */ + mrb_define_method(mrb, krn, "inspect", mrb_obj_inspect, ARGS_NONE()); /* 15.3.1.3.17 */ + mrb_define_method(mrb, krn, "instance_eval", mrb_obj_instance_eval, ARGS_ANY()); /* 15.3.1.3.18 */ + mrb_define_method(mrb, krn, "instance_of?", rb_obj_is_instance_of, ARGS_REQ(1)); /* 15.3.1.3.19 */ + mrb_define_method(mrb, krn, "instance_variable_defined?", mrb_obj_ivar_defined, ARGS_REQ(1)); /* 15.3.1.3.20 */ + mrb_define_method(mrb, krn, "instance_variable_get", mrb_obj_ivar_get, ARGS_REQ(1)); /* 15.3.1.3.21 */ + mrb_define_method(mrb, krn, "instance_variable_set", mrb_obj_ivar_set, ARGS_REQ(2)); /* 15.3.1.3.22 */ + mrb_define_method(mrb, krn, "instance_variables", mrb_obj_instance_variables, ARGS_NONE()); /* 15.3.1.3.23 */ + mrb_define_method(mrb, krn, "is_a?", mrb_obj_is_kind_of_m, ARGS_REQ(1)); /* 15.3.1.3.24 */ + mrb_define_method(mrb, krn, "iterator?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.3.25 */ + mrb_define_method(mrb, krn, "kind_of?", mrb_obj_is_kind_of_m, ARGS_REQ(1)); /* 15.3.1.3.26 */ + mrb_define_method(mrb, krn, "lambda", proc_lambda, ARGS_NONE()); /* 15.3.1.3.27 */ + mrb_define_method(mrb, krn, "local_variables", mrb_f_local_variables, ARGS_NONE()); /* 15.3.1.3.28 */ + mrb_define_method(mrb, krn, "loop", mrb_f_loop, ARGS_NONE()); /* 15.3.1.3.29 */ + mrb_define_method(mrb, krn, "methods", mrb_obj_methods_m, ARGS_ANY()); /* 15.3.1.3.31 */ + mrb_define_method(mrb, krn, "nil?", mrb_false, ARGS_NONE()); /* 15.3.1.3.32 */ + mrb_define_method(mrb, krn, "object_id", mrb_obj_id_m, ARGS_NONE()); /* 15.3.1.3.33 */ + mrb_define_method(mrb, krn, "private_methods", mrb_obj_private_methods, ARGS_ANY()); /* 15.3.1.3.36 */ + mrb_define_method(mrb, krn, "protected_methods", mrb_obj_protected_methods, ARGS_ANY()); /* 15.3.1.3.37 */ + mrb_define_method(mrb, krn, "public_methods", mrb_obj_public_methods, ARGS_ANY()); /* 15.3.1.3.38 */ + mrb_define_method(mrb, krn, "raise", mrb_f_raise, ARGS_ANY()); /* 15.3.1.3.40 */ + mrb_define_method(mrb, krn, "remove_instance_variable", mrb_obj_remove_instance_variable,ARGS_REQ(1)); /* 15.3.1.3.41 */ + mrb_define_method(mrb, krn, "require", mrb_f_require, ARGS_REQ(1)); /* 15.3.1.3.42 */ + mrb_define_method(mrb, krn, "respond_to?", obj_respond_to, ARGS_ANY()); /* 15.3.1.3.43 */ + mrb_define_method(mrb, krn, "send", mrb_f_send_m, ARGS_ANY()); /* 15.3.1.3.44 */ + mrb_define_method(mrb, krn, "singleton_methods", mrb_obj_singleton_methods_m, ARGS_ANY()); /* 15.3.1.3.45 */ + mrb_define_method(mrb, krn, "to_s", mrb_any_to_s, ARGS_NONE()); /* 15.3.1.3.46 */ + + mrb_define_method(mrb, krn, "sprintf", mrb_f_sprintf, ARGS_ANY()); /* in sprintf.c */ + mrb_define_method(mrb, krn, "format", mrb_f_sprintf, ARGS_ANY()); /* in sprintf.c */ + + mrb_include_module(mrb, mrb->object_class, mrb->kernel_module); +} diff --git a/src/keywords b/src/keywords new file mode 100644 index 0000000000..be5324875a --- /dev/null +++ b/src/keywords @@ -0,0 +1,50 @@ +%{ +struct kwtable {const char *name; int id[2]; enum mrb_lex_state_enum state;}; +const struct kwtable *mrb_reserved_word(const char *, unsigned int); +static const struct kwtable *reserved_word(const char *, unsigned int); +#define mrb_reserved_word(str, len) reserved_word(str, len) +%} + +struct kwtable; +%% +__ENCODING__, {keyword__ENCODING__, keyword__ENCODING__}, EXPR_END +__LINE__, {keyword__LINE__, keyword__LINE__}, EXPR_END +__FILE__, {keyword__FILE__, keyword__FILE__}, EXPR_END +BEGIN, {keyword_BEGIN, keyword_BEGIN}, EXPR_END +END, {keyword_END, keyword_END}, EXPR_END +alias, {keyword_alias, keyword_alias}, EXPR_FNAME +and, {keyword_and, keyword_and}, EXPR_VALUE +begin, {keyword_begin, keyword_begin}, EXPR_BEG +break, {keyword_break, keyword_break}, EXPR_MID +case, {keyword_case, keyword_case}, EXPR_VALUE +class, {keyword_class, keyword_class}, EXPR_CLASS +def, {keyword_def, keyword_def}, EXPR_FNAME +do, {keyword_do, keyword_do}, EXPR_BEG +else, {keyword_else, keyword_else}, EXPR_BEG +elsif, {keyword_elsif, keyword_elsif}, EXPR_VALUE +end, {keyword_end, keyword_end}, EXPR_END +ensure, {keyword_ensure, keyword_ensure}, EXPR_BEG +false, {keyword_false, keyword_false}, EXPR_END +for, {keyword_for, keyword_for}, EXPR_VALUE +if, {keyword_if, modifier_if}, EXPR_VALUE +in, {keyword_in, keyword_in}, EXPR_VALUE +module, {keyword_module, keyword_module}, EXPR_VALUE +next, {keyword_next, keyword_next}, EXPR_MID +nil, {keyword_nil, keyword_nil}, EXPR_END +not, {keyword_not, keyword_not}, EXPR_ARG +or, {keyword_or, keyword_or}, EXPR_VALUE +redo, {keyword_redo, keyword_redo}, EXPR_END +rescue, {keyword_rescue, modifier_rescue}, EXPR_MID +retry, {keyword_retry, keyword_retry}, EXPR_END +return, {keyword_return, keyword_return}, EXPR_MID +self, {keyword_self, keyword_self}, EXPR_END +super, {keyword_super, keyword_super}, EXPR_ARG +then, {keyword_then, keyword_then}, EXPR_BEG +true, {keyword_true, keyword_true}, EXPR_END +undef, {keyword_undef, keyword_undef}, EXPR_FNAME +unless, {keyword_unless, modifier_unless}, EXPR_VALUE +until, {keyword_until, modifier_until}, EXPR_VALUE +when, {keyword_when, keyword_when}, EXPR_VALUE +while, {keyword_while, modifier_while}, EXPR_VALUE +yield, {keyword_yield, keyword_yield}, EXPR_ARG +%% diff --git a/src/lex.def b/src/lex.def new file mode 100644 index 0000000000..9e3938b6a4 --- /dev/null +++ b/src/lex.def @@ -0,0 +1,216 @@ +/* C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -C -p -j1 -i 1 -g -o -t -N mrb_reserved_word -k'1,3,$' keywords */ + +#ifa' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 1 "keywords" + +struct kwtable {const char *name; int id[2]; enum mrb_lex_state_enum state;}; +const struct kwtable *mrb_reserved_word(const char *, unsigned int); +static const struct kwtable *reserved_word(const char *, unsigned int); +#define mrb_reserved_word(str, len) reserved_word(str, len) +#line 8 "keywords" +struct kwtable; + +#define TOTAL_KEYWORDS 40 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 12 +#define MIN_HASH_VALUE 8 +#define MAX_HASH_VALUE 50 +/* maximum key range = 43, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (str, len) + register const char *str; + register unsigned int len; +{ + static const unsigned char asso_values[] = + { + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 14, 51, 16, 8, + 11, 13, 51, 51, 51, 51, 10, 51, 13, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 11, 51, 13, 1, 26, + 4, 1, 8, 28, 51, 23, 51, 1, 1, 27, + 5, 19, 21, 51, 8, 3, 3, 11, 51, 21, + 24, 16, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[2]]; + /*FALLTHROUGH*/ + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct kwtable * +mrb_reserved_word (str, len) + register const char *str; + register unsigned int len; +{ + static const struct kwtable wordlist[] = + { + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 18 "keywords" + {"break", {keyword_break, keyword_break}, EXPR_MID}, +#line 23 "keywords" + {"else", {keyword_else, keyword_else}, EXPR_BEG}, +#line 33 "keywords" + {"nil", {keyword_nil, keyword_nil}, EXPR_END}, +#line 26 "keywords" + {"ensure", {keyword_ensure, keyword_ensure}, EXPR_BEG}, +#line 25 "keywords" + {"end", {keyword_end, keyword_end}, EXPR_END}, +#line 42 "keywords" + {"then", {keyword_then, keyword_then}, EXPR_BEG}, +#line 34 "keywords" + {"not", {keyword_not, keyword_not}, EXPR_ARG}, +#line 27 "keywords" + {"false", {keyword_false, keyword_false}, EXPR_END}, +#line 40 "keywords" + {"self", {keyword_self, keyword_self}, EXPR_END}, +#line 24 "keywords" + {"elsif", {keyword_elsif, keyword_elsif}, EXPR_VALUE}, +#line 37 "keywords" + {"rescue", {keyword_rescue, modifier_rescue}, EXPR_MID}, +#line 43 "keywords" + {"true", {keyword_true, keyword_true}, EXPR_END}, +#line 46 "keywords" + {"until", {keyword_until, modifier_until}, EXPR_VALUE}, +#line 45 "keywords" + {"unless", {keyword_unless, modifier_unless}, EXPR_VALUE}, +#line 39 "keywords" + {"return", {keyword_return, keyword_return}, EXPR_MID}, +#line 21 "keywords" + {"def", {keyword_def, keyword_def}, EXPR_FNAME}, +#line 16 "keywords" + {"and", {keyword_and, keyword_and}, EXPR_VALUE}, +#line 22 "keywords" + {"do", {keyword_do, keyword_do}, EXPR_BEG}, +#line 49 "keywords" + {"yield", {keyword_yield, keyword_yield}, EXPR_ARG}, +#line 28 "keywords" + {"for", {keyword_for, keyword_for}, EXPR_VALUE}, +#line 44 "keywords" + {"undef", {keyword_undef, keyword_undef}, EXPR_FNAME}, +#line 35 "keywords" + {"or", {keyword_or, keyword_or}, EXPR_VALUE}, +#line 30 "keywords" + {"in", {keyword_in, keyword_in}, EXPR_VALUE}, +#line 47 "keywords" + {"when", {keyword_when, keyword_when}, EXPR_VALUE}, +#line 38 "keywords" + {"retry", {keyword_retry, keyword_retry}, EXPR_END}, +#line 29 "keywords" + {"if", {keyword_if, modifier_if}, EXPR_VALUE}, +#line 19 "keywords" + {"case", {keyword_case, keyword_case}, EXPR_VALUE}, +#line 36 "keywords" + {"redo", {keyword_redo, keyword_redo}, EXPR_END}, +#line 32 "keywords" + {"next", {keyword_next, keyword_next}, EXPR_MID}, +#line 41 "keywords" + {"super", {keyword_super, keyword_super}, EXPR_ARG}, +#line 31 "keywords" + {"module", {keyword_module, keyword_module}, EXPR_VALUE}, +#line 17 "keywords" + {"begin", {keyword_begin, keyword_begin}, EXPR_BEG}, +#line 11 "keywords" + {"__LINE__", {keyword__LINE__, keyword__LINE__}, EXPR_END}, +#line 12 "keywords" + {"__FILE__", {keyword__FILE__, keyword__FILE__}, EXPR_END}, +#line 10 "keywords" + {"__ENCODING__", {keyword__ENCODING__, keyword__ENCODING__}, EXPR_END}, +#line 14 "keywords" + {"END", {keyword_END, keyword_END}, EXPR_END}, +#line 15 "keywords" + {"alias", {keyword_alias, keyword_alias}, EXPR_FNAME}, +#line 13 "keywords" + {"BEGIN", {keyword_BEGIN, keyword_BEGIN}, EXPR_END}, + {""}, +#line 20 "keywords" + {"class", {keyword_class, keyword_class}, EXPR_CLASS}, + {""}, {""}, +#line 48 "keywords" + {"while", {keyword_while, modifier_while}, EXPR_VALUE} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} +#line 50 "keywords" + diff --git a/src/load.c b/src/load.c new file mode 100644 index 0000000000..848cf8f9ac --- /dev/null +++ b/src/load.c @@ -0,0 +1,642 @@ +#include +#include "dump.h" + +#include "mruby/string.h" +#ifdef INCLUDE_REGEXP +#include "re.h" +#endif +#include "irep.h" + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +typedef struct _RiteFILE +{ + FILE* fp; + unsigned char buf[256]; + int cnt; + int readlen; +} RiteFILE; + +const char hex2bin[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //00-0f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //10-1f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, //30-3f + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, //40-4f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //50-5f + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0 //60-6f + //70-ff +}; + +static uint16_t hex_to_bin8(unsigned char*,unsigned char*); +static uint16_t hex_to_bin16(unsigned char*,unsigned char*); +static uint16_t hex_to_bin32(unsigned char*,unsigned char*); +static uint8_t hex_to_uint8(unsigned char*); +static uint16_t hex_to_uint16(unsigned char*); +static uint32_t hex_to_uint32(unsigned char*); +static char* hex_to_str(char*,char*,uint16_t*); +uint16_t calc_crc_16_ccitt(unsigned char*,int); +static unsigned char rite_fgetcSub(RiteFILE*); +static unsigned char rite_fgetc(RiteFILE*,int); +static unsigned char* rite_fgets(RiteFILE*,unsigned char*,int,int); +static int load_rite_header(FILE*,rite_binary_header*,unsigned char*); +static int load_rite_irep_record(mrb_state*, RiteFILE*,unsigned char*,uint32_t*); +static int read_rite_header(mrb_state*,unsigned char*,rite_binary_header*); +static int read_rite_irep_record(mrb_state*,unsigned char*,mrb_irep*,uint32_t*); + + +static unsigned char +rite_fgetcSub(RiteFILE* rfp) +{ + //only first call + if (rfp->buf[0] == '\0') { + rfp->readlen = fread(rfp->buf, 1, sizeof(rfp->buf), rfp->fp); + rfp->cnt = 0; + } + + if (rfp->readlen == rfp->cnt) { + rfp->readlen = fread(rfp->buf, 1, sizeof(rfp->buf), rfp->fp); + rfp->cnt = 0; + if (rfp->readlen == 0) { + return '\0'; + } + } + return rfp->buf[(rfp->cnt)++]; +} + +static unsigned char +rite_fgetc(RiteFILE* rfp, int ignorecomment) +{ + unsigned char tmp; + + for (;;) { + tmp = rite_fgetcSub(rfp); + if (tmp == '\n' || tmp == '\r') { + continue; + } + else if (ignorecomment && tmp == '#') { + while (tmp != '\n' && tmp != '\r' && tmp != '\0') + tmp = rite_fgetcSub(rfp); + if (tmp == '\0') + return '\0'; + } + else { + return tmp; + } + } +} + +static unsigned char* +rite_fgets(RiteFILE* rfp, unsigned char* dst, int len, int ignorecomment) +{ + int i; + + for (i=0; irbfi, file_header.rbfi, sizeof(file_header.rbfi)); + if (memcmp(bin_header->rbfi, RITE_FILE_IDENFIFIER, sizeof(bin_header->rbfi)) != 0) { + return MRB_DUMP_INVALID_FILE_HEADER; //File identifier error + } + memcpy(bin_header->rbfv, file_header.rbfv, sizeof(file_header.rbfv)); + if (memcmp(bin_header->rbfv, RITE_FILE_FORMAT_VER, sizeof(bin_header->rbfv)) != 0) { + return MRB_DUMP_INVALID_FILE_HEADER; //File format version error + } + memcpy(bin_header->risv, file_header.risv, sizeof(file_header.risv)); + memcpy(bin_header->rct, file_header.rct, sizeof(file_header.rct)); + memcpy(bin_header->rcv, file_header.rcv, sizeof(file_header.rcv)); + hex_to_bin32(bin_header->rbds, file_header.rbds); + hex_to_bin16(bin_header->nirep, file_header.nirep); + hex_to_bin16(bin_header->sirep, file_header.sirep); + memcpy(bin_header->rsv, file_header.rsv, sizeof(file_header.rsv)); + memcpy(hcrc, file_header.hcrc, sizeof(file_header.hcrc)); + + return MRB_DUMP_OK; +} + +static int +load_rite_irep_record(mrb_state *mrb, RiteFILE* rfp, unsigned char* dst, uint32_t* len) +{ + int i; + uint32_t blocklen; + uint16_t offset, tt, pdl, snl, clen; + unsigned char hex2[2], hex4[4], hex8[8], hcrc[4]; + unsigned char *pStart; + char *char_buf; + uint16_t buf_size =0; + + buf_size = MRB_DUMP_DEFAULT_STR_LEN; + if ((char_buf = mrb_malloc(mrb, buf_size)) == 0) + goto error_exit; + + pStart = dst; + + //IREP HEADER BLOCK + *dst = rite_fgetc(rfp, TRUE); //record identifier + if (*dst != RITE_IREP_IDENFIFIER) + return MRB_DUMP_INVALID_IREP; + dst += sizeof(unsigned char); + *dst = rite_fgetc(rfp, TRUE); //class or module + dst += sizeof(unsigned char); + rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //number of local variable + dst += hex_to_bin16(dst, hex4); + rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //number of register variable + dst += hex_to_bin16(dst, hex4); + rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //offset of isec block + offset = hex_to_uint16(hex4); + rite_fgets(rfp, hcrc, sizeof(hcrc), TRUE); //header CRC + memset( char_buf, '\0', buf_size); + rite_fgets(rfp, (unsigned char*)char_buf, (offset - (MRB_DUMP_SIZE_OF_SHORT * RITE_FILE_HEX_SIZE)), TRUE); //class or module name + hex_to_str(char_buf, (char*)(dst + MRB_DUMP_SIZE_OF_SHORT + MRB_DUMP_SIZE_OF_SHORT), &clen); //class or module name + dst += uint16_to_bin((MRB_DUMP_SIZE_OF_SHORT/*crc*/ + clen), (char*)dst); //offset of isec block + dst += hex_to_bin16(dst, hcrc); //header CRC + dst += clen; + + //ISEQ BLOCK + rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //iseq length + dst += hex_to_bin32(dst, hex8); + blocklen = hex_to_uint32(hex8); + for (i=0; i buf_size - 1) { + buf_size = pdl + 1; + if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0) + goto error_exit; + } + memset(char_buf, '\0', buf_size); + rite_fgets(rfp, (unsigned char*)char_buf, pdl, FALSE); //pool + hex_to_str(char_buf, (char*)(dst + MRB_DUMP_SIZE_OF_SHORT), &clen); + dst += uint16_to_bin(clen, (char*)dst); + dst += clen; + } + rite_fgets(rfp, hcrc, sizeof(hcrc), TRUE); //pool CRC + dst += hex_to_bin16(dst, hcrc); + + //SYMS BLOCK + rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //syms length + dst += hex_to_bin32(dst, hex8); + blocklen = hex_to_uint32(hex8); + for (i=0; i buf_size - 1) { + buf_size = snl + 1; + if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0) + goto error_exit; + } + memset(char_buf, '\0', buf_size); + rite_fgets(rfp, (unsigned char*)char_buf, snl, FALSE); //symbol name + hex_to_str(char_buf, (char*)(dst + MRB_DUMP_SIZE_OF_SHORT), &clen); + dst += uint16_to_bin(clen, (char*)dst); + dst += clen; + } + rite_fgets(rfp, hcrc, sizeof(hcrc), TRUE); //syms CRC + dst += hex_to_bin16(dst, hcrc); + + *len = dst - pStart; + +error_exit: + if (char_buf) + mrb_free(mrb, char_buf); + + return MRB_DUMP_OK; +} + +int +mrb_load_irep(mrb_state *mrb, FILE* fp) +{ + int ret, i; + uint32_t len, rlen; + unsigned char hex8[8], hcrc[4]; + unsigned char *dst, *rite_dst = NULL; + rite_binary_header bin_header; + RiteFILE ritefp, *rfp; + + if ((mrb == NULL) || (fp == NULL)) { + return MRB_DUMP_INVALID_ARGUMENT; + } + memset(&ritefp, 0, sizeof(ritefp)); + ritefp.fp = fp; + rfp = &ritefp; + + //Read File Header Section + if ((ret = load_rite_header(fp, &bin_header, hcrc)) != MRB_DUMP_OK) + return ret; + + len = sizeof(rite_binary_header) + bin_to_uint32(bin_header.rbds); + if ((rite_dst = mrb_malloc(mrb, len)) == NULL) + return MRB_DUMP_GENERAL_FAILURE; + + dst = rite_dst; + memset(dst, 0x00, len); + memcpy(dst, &bin_header, sizeof(rite_binary_header)); + dst += sizeof(rite_binary_header); + dst += hex_to_bin16(dst, hcrc); + + //Read Binary Data Section + len = bin_to_uint16(bin_header.nirep); + for (i=0; irbfi, RITE_FILE_IDENFIFIER, sizeof(bin_header->rbfi)) != 0) { + return MRB_DUMP_INVALID_FILE_HEADER; //File identifier error + } + if (memcmp(bin_header->risv, RITE_VM_VER, sizeof(bin_header->risv)) != 0) { + return MRB_DUMP_INVALID_FILE_HEADER; //Instruction set version check + } + + crc = calc_crc_16_ccitt((unsigned char *)bin_header, sizeof(*bin_header)); //Calculate CRC + if (crc != bin_to_uint16(bin)) { + return MRB_DUMP_INVALID_FILE_HEADER; //CRC error + } + + return bin_to_uint16(bin_header->nirep); +} + +static int +read_rite_irep_record(mrb_state *mrb, unsigned char *src, mrb_irep *irep, uint32_t* len) +{ + int i, ret = MRB_DUMP_OK; + char *buf; + unsigned char *recordStart, *pStart; + uint16_t crc, tt, pdl, snl, offset, bufsize=MRB_DUMP_DEFAULT_STR_LEN; + mrb_int fix_num; + mrb_float f; + mrb_value str; + + recordStart = src; + buf = mrb_malloc(mrb, bufsize); + if (buf == NULL) { + ret = MRB_DUMP_INVALID_IREP; + goto error_exit; + } + + //Header Section + pStart = src; + if (*src != RITE_IREP_IDENFIFIER) + return MRB_DUMP_INVALID_IREP; + src += (sizeof(unsigned char) * 2); + irep->nlocals = bin_to_uint16(src); //number of local variable + src += MRB_DUMP_SIZE_OF_SHORT; + irep->nregs = bin_to_uint16(src); //number of register variable + src += MRB_DUMP_SIZE_OF_SHORT; + offset = bin_to_uint16(src); //offset of isec block + src += MRB_DUMP_SIZE_OF_SHORT; + crc = calc_crc_16_ccitt(pStart, src - pStart); //Calculate CRC + if (crc != bin_to_uint16(src)) //header CRC + return MRB_DUMP_INVALID_IREP; + src += offset; + + //Binary Data Section + //ISEQ BLOCK + pStart = src; + irep->ilen = bin_to_uint32(src); //iseq length + src += MRB_DUMP_SIZE_OF_LONG; + if (irep->ilen > 0) { + if ((irep->iseq = mrb_malloc(mrb, sizeof(mrb_code) * irep->ilen)) == NULL) { + ret = MRB_DUMP_GENERAL_FAILURE; + goto error_exit; + } + for (i=0; iilen; i++) { + irep->iseq[i] = bin_to_uint32(src); //iseq + src += MRB_DUMP_SIZE_OF_LONG; + } + } + crc = calc_crc_16_ccitt((unsigned char *)pStart, src - pStart); //Calculate CRC + if (crc != bin_to_uint16(src)) { //iseq CRC + ret = MRB_DUMP_INVALID_IREP; + goto error_exit; + } + src += MRB_DUMP_SIZE_OF_SHORT; + + //POOL BLOCK + pStart = src; + irep->plen = bin_to_uint32(src); //pool length + src += MRB_DUMP_SIZE_OF_LONG; + if (irep->plen > 0) { + irep->pool = mrb_malloc(mrb, sizeof(mrb_value) * irep->plen); + if (irep->pool == NULL) { + ret = MRB_DUMP_INVALID_IREP; + goto error_exit; + } + + for (i=0; iplen; i++) { + tt = *src; //pool TT + src += sizeof(unsigned char); + pdl = bin_to_uint16(src); //pool data length + src += MRB_DUMP_SIZE_OF_SHORT; + if (pdl > bufsize - 1) { + mrb_free(mrb, buf); + bufsize = pdl + 1; + if ((buf = mrb_malloc(mrb, bufsize)) == NULL) { + ret = MRB_DUMP_GENERAL_FAILURE; + goto error_exit; + } + } + memcpy(buf, src, pdl); + src += pdl; + buf[pdl] = '\0'; + + switch (tt) { //pool data + case MRB_TT_FIXNUM: + sscanf(buf, "%d", &fix_num); + irep->pool[i] = mrb_fixnum_value(fix_num); + break; + + case MRB_TT_FLOAT: + sscanf(buf, "%le", &f); + irep->pool[i] = mrb_float_value(f); + break; + + case MRB_TT_STRING: + irep->pool[i] = mrb_str_new(mrb, buf, pdl); + break; + +#ifdef INCLUDE_REGEXP + case MRB_TT_REGEX: + str = mrb_str_new(mrb, buf, pdl); + irep->pool[i] = mrb_reg_quote(mrb, str); + break; +#endif + + default: + irep->pool[i] = mrb_nil_value(); + break; + } + } + } + crc = calc_crc_16_ccitt((unsigned char *)pStart, src - pStart); //Calculate CRC + if (crc != bin_to_uint16(src)) { //pool CRC + ret = MRB_DUMP_INVALID_IREP; + goto error_exit; + } + src += MRB_DUMP_SIZE_OF_SHORT; + + //SYMS BLOCK + pStart = src; + irep->slen = bin_to_uint32(src); //syms length + src += MRB_DUMP_SIZE_OF_LONG; + if (irep->slen > 0) { + if ((irep->syms = mrb_malloc(mrb, MRB_DUMP_SIZE_OF_INT * irep->slen)) == NULL) { + ret = MRB_DUMP_INVALID_IREP; + goto error_exit; + } + + memset(irep->syms, 0, sizeof(mrb_sym)*(irep->slen)); + for (i=0; islen; i++) { + snl = bin_to_uint16(src); //symbol name length + src += MRB_DUMP_SIZE_OF_SHORT; + + if (snl == MRB_DUMP_NULL_SYM_LEN) { + irep->syms[i] = 0; + continue; + } + + if (snl > bufsize - 1) { + mrb_free(mrb, buf); + bufsize = snl + 1; + if ((buf = mrb_malloc(mrb, bufsize)) == NULL) { + ret = MRB_DUMP_GENERAL_FAILURE; + goto error_exit; + } + } + memcpy(buf, src, snl); //symbol name + src += snl; + buf[snl] = '\0'; + irep->syms[i] = mrb_intern(mrb, buf); + } + } + crc = calc_crc_16_ccitt((unsigned char *)pStart, src - pStart); //Calculate CRC + if (crc != bin_to_uint16(src)) { //syms CRC + ret = MRB_DUMP_INVALID_IREP; + goto error_exit; + } + src += MRB_DUMP_SIZE_OF_SHORT; + + *len = src - recordStart; +error_exit: + if (buf) + mrb_free(mrb, buf); + + return ret; +} + +int +mrb_read_irep(mrb_state *mrb, char *bin) +{ + int ret = MRB_DUMP_OK, i, n, nirep, sirep; + uint32_t len; + unsigned char *src; + rite_binary_header bin_header; + + if ((mrb == NULL) || (bin == NULL)) { + return MRB_DUMP_INVALID_ARGUMENT; + } + src = (unsigned char*)bin; + sirep = mrb->irep_len; + + //Read File Header Section + if ((nirep = read_rite_header(mrb, src, &bin_header)) < 0) + return nirep; + + mrb_add_irep(mrb, sirep + nirep); + + for (n=0,i=sirep; nirep[i] = mrb_malloc(mrb, sizeof(mrb_irep))) == NULL) { + ret = MRB_DUMP_GENERAL_FAILURE; + goto error_exit; + } + memset(mrb->irep[i], 0, sizeof(mrb_irep)); + } + src += sizeof(bin_header) + MRB_DUMP_SIZE_OF_SHORT; //header + crc + + //Read Binary Data Section + for (n=0,i=sirep; nirep[i], &len)) != MRB_DUMP_OK) + goto error_exit; + mrb->irep[i]->idx = i; + src += len; + } + if (0 != bin_to_uint32(src)) { //dummy record len + ret = MRB_DUMP_GENERAL_FAILURE; + } + + mrb->irep_len += nirep; + +error_exit: + if (ret != MRB_DUMP_OK) { + for (n=0,i=sirep; nirep[i]) { + if (mrb->irep[i]->iseq) + mrb_free(mrb, mrb->irep[i]->iseq); + + if (mrb->irep[i]->pool) + mrb_free(mrb, mrb->irep[i]->pool); + + if (mrb->irep[i]->syms) + mrb_free(mrb, mrb->irep[i]->syms); + + mrb_free(mrb, mrb->irep[i]); + } + } + return ret; + } + return sirep + hex_to_uint8(bin_header.sirep); +} + +static uint16_t +hex_to_bin8(unsigned char *dst, unsigned char *src) +{ + dst[0] = (hex2bin[src[0]] << 4) | (hex2bin[src[1]]); + return 1; +} + +static uint16_t +hex_to_bin16(unsigned char *dst, unsigned char *src) +{ + dst[0] = (hex2bin[src[0]] << 4) | (hex2bin[src[1]]); + dst[1] = (hex2bin[src[2]] << 4) | (hex2bin[src[3]]); + return 2; +} + +static uint16_t +hex_to_bin32(unsigned char *dst, unsigned char *src) +{ + dst[0] = (hex2bin[src[0]] << 4) | (hex2bin[src[1]]); + dst[1] = (hex2bin[src[2]] << 4) | (hex2bin[src[3]]); + dst[2] = (hex2bin[src[4]] << 4) | (hex2bin[src[5]]); + dst[3] = (hex2bin[src[6]] << 4) | (hex2bin[src[7]]); + return 4; +} + +static uint8_t +hex_to_uint8(unsigned char *hex) +{ + return (unsigned char)hex2bin[hex[0]] << 4 | + (unsigned char)hex2bin[hex[1]]; +} + +static uint16_t +hex_to_uint16(unsigned char *hex) +{ + return (uint16_t)hex2bin[hex[0]] << 12 | + (uint16_t)hex2bin[hex[1]] << 8 | + (uint16_t)hex2bin[hex[2]] << 4 | + (uint16_t)hex2bin[hex[3]]; +} + +static uint32_t +hex_to_uint32(unsigned char *hex) +{ + return (uint32_t)hex2bin[hex[0]] << 28 | + (uint32_t)hex2bin[hex[1]] << 24 | + (uint32_t)hex2bin[hex[2]] << 20 | + (uint32_t)hex2bin[hex[3]] << 16 | + (uint32_t)hex2bin[hex[4]] << 12 | + (uint32_t)hex2bin[hex[5]] << 8 | + (uint32_t)hex2bin[hex[6]] << 4 | + (uint32_t)hex2bin[hex[7]]; +} + +static char* +hex_to_str(char *hex, char *str, uint16_t *str_len) +{ + char *src, *dst; + uint16_t hex_len = strlen(hex); + + *str_len = 0; + + for (src = hex, dst = str; hex_len > 0; (*str_len)++, hex_len--) { + if (*src == '\\' && hex_len > 1) { + src++; hex_len--; + switch(*src) { + case 'a': *dst++ = '\a'/* BEL */; break; + case 'b': *dst++ = '\b'/* BS */; break; + case 't': *dst++ = '\t'/* HT */; break; + case 'n': *dst++ = '\n'/* LF */; break; + case 'v': *dst++ = '\v'/* VT */; break; + case 'f': *dst++ = '\f'/* FF */; break; + case 'r': *dst++ = '\r'/* CR */; break; + case '\"': /* fall through */ + case '\'': /* fall through */ + case '\?': /* fall through */ + case '\\': *dst++ = *src; break; + default:break; + } + src++; + } else { + *dst++ = *src++; + } + } + + return str; +} + diff --git a/src/mdata.h b/src/mdata.h new file mode 100644 index 0000000000..827f7c1141 --- /dev/null +++ b/src/mdata.h @@ -0,0 +1,53 @@ +/********************************************************************** + + mdata.h - + + + Copyright (C) 2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef RUBY_DATA_H +#define RUBY_DATA_H 1 + +#if defined(__cplusplus) +extern "C" { +#endif + + +struct mrb_data_type { + const char *struct_name; + void (*dfree)(mrb_state *mrb, void*); +}; + +struct RData { + MRUBY_OBJECT_HEADER; + struct kh_iv *iv; + struct mrb_data_type *type; + void *data; +}; + +struct RData *mrb_data_object_alloc(mrb_state *mrb, struct RClass* klass, void *datap, const struct mrb_data_type *type); + +#define Data_Wrap_Struct(mrb,klass,type,ptr)\ + mrb_data_object_alloc(mrb,klass,ptr,type) + +#define Data_Make_Struct(mrb,klass,strct,type,sval) (\ + sval = mrb_malloc(mrb, sizeof(strct)),\ + memset(sval, 0, sizeof(strct)),\ + Data_Wrap_Struct(mrb,klass,type,sval)\ +) + +#define RDATA(obj) ((struct RData *)((obj).value.p)) +#define DATA_PTR(d) (RDATA(d)->data) +#define DATA_TYPE(d) (RDATA(d)->type) +void *mrb_check_datatype(mrb_state *mrb, mrb_value, const struct mrb_data_type*); +#define Data_Get_Struct(mrb,obj,type,sval) do {\ + sval = mrb_check_datatype(mrb, obj, type); \ +} while (0) + +#if defined(__cplusplus) +} /* extern "C" { */ +#endif + +#endif /* RUBY_DATA_H */ diff --git a/src/method.h b/src/method.h new file mode 100644 index 0000000000..3591917a26 --- /dev/null +++ b/src/method.h @@ -0,0 +1,103 @@ +/********************************************************************** + + method.h - + + $Author: ko1 $ + created at: Wed Jul 15 20:02:33 2009 + + Copyright (C) 2009 Koichi Sasada + +**********************************************************************/ +#ifndef METHOD_H +#define METHOD_H + +typedef enum { + NOEX_PUBLIC = 0x00, + NOEX_NOSUPER = 0x01, + NOEX_PRIVATE = 0x02, + NOEX_PROTECTED = 0x04, + NOEX_MASK = 0x06, + NOEX_BASIC = 0x08, + NOEX_UNDEF = NOEX_NOSUPER, + NOEX_MODFUNC = 0x12, + NOEX_SUPER = 0x20, + NOEX_VCALL = 0x40, + NOEX_RESPONDS = 0x80 +} mrb_method_flag_t; + +#define NOEX_SAFE(n) ((int)((n) >> 8) & 0x0F) +#define NOEX_WITH(n, s) ((s << 8) | (n) | (ruby_running ? 0 : NOEX_BASIC)) +#define NOEX_WITH_SAFE(n) NOEX_WITH(n, mrb_safe_level()) + +/* method data type */ + +typedef enum { + VM_METHOD_TYPE_ISEQ, + VM_METHOD_TYPE_CFUNC, + VM_METHOD_TYPE_ATTRSET, + VM_METHOD_TYPE_IVAR, + VM_METHOD_TYPE_BMETHOD, + VM_METHOD_TYPE_ZSUPER, + VM_METHOD_TYPE_UNDEF, + VM_METHOD_TYPE_NOTIMPLEMENTED, + VM_METHOD_TYPE_OPTIMIZED, /* Kernel#send, Proc#call, etc */ + VM_METHOD_TYPE_MISSING /* wrapper for method_missing(id) */ +} mrb_method_type_t; + +typedef struct mrb_method_cfunc_struct { + mrb_value (*func)(ANYARGS); + int argc; +} mrb_method_cfunc_t; + +typedef struct mrb_method_attr_struct { + mrb_sym id; + mrb_value location; +} mrb_method_attr_t; + +typedef struct mrb_iseq_struct mrb_iseq_t; + +typedef struct mrb_method_definition_struct { + mrb_method_type_t type; /* method type */ + mrb_sym original_id; + union { + mrb_iseq_t *iseq; /* should be mark */ + mrb_method_cfunc_t cfunc; + mrb_method_attr_t attr; + mrb_value proc; /* should be mark */ + enum method_optimized_type { + OPTIMIZED_METHOD_TYPE_SEND, + OPTIMIZED_METHOD_TYPE_CALL + } optimize_type; + } body; + int alias_count; +} mrb_method_definition_t; + +typedef struct mrb_method_entry_struct { + mrb_method_flag_t flag; + char mark; + mrb_method_definition_t *def; + mrb_sym called_id; + mrb_value klass; /* should be mark */ +} mrb_method_entry_t; + +struct unlinked_method_entry_list_entry { + struct unlinked_method_entry_list_entry *next; + mrb_method_entry_t *me; +}; + +#define UNDEFINED_METHOD_ENTRY_P(me) (!(me) || !(me)->def || (me)->def->type == VM_METHOD_TYPE_UNDEF) + +void mrb_add_method_cfunc(mrb_value klass, mrb_sym mid, mrb_value (*func)(ANYARGS), int argc, mrb_method_flag_t noex); +mrb_method_entry_t *mrb_add_method(mrb_value klass, mrb_sym mid, mrb_method_type_t type, void *option, mrb_method_flag_t noex); +mrb_method_entry_t *mrb_method_entry(mrb_state *mrb, mrb_value klass, mrb_sym id); + +mrb_method_entry_t *mrb_method_entry_get_without_cache(mrb_value klass, mrb_sym id); +mrb_method_entry_t *mrb_method_entry_set(mrb_value klass, mrb_sym mid, const mrb_method_entry_t *, mrb_method_flag_t noex); + +int mrb_method_entry_arity(const mrb_method_entry_t *me); + +void mrb_mark_method_entry(const mrb_method_entry_t *me); +void mrb_free_method_entry(mrb_method_entry_t *me); +void mrb_sweep_method_entry(void *vm); + +#endif /* METHOD_H */ diff --git a/src/minimain.c b/src/minimain.c new file mode 100644 index 0000000000..b1b70c2aff --- /dev/null +++ b/src/minimain.c @@ -0,0 +1,117 @@ +#include "mruby.h" +#include "mruby/proc.h" + +#if 0 +#include "opcode.h" + +mrb_code fib_iseq[256]; + +int fib_syms[4]; + +mrb_irep fib_irep = { + 1, + MRB_IREP_NOFREE, + 2, + 5, + fib_iseq, + NULL, + fib_syms, + + 256, 0, 4, +}; + +mrb_code main_iseq[256]; + +int main_syms[2]; + +mrb_irep main_irep = { + 0, + MRB_IREP_NOFREE, + 1, + 3, + main_iseq, + NULL, + main_syms, + + 256, 0, 2, +}; + +int +main(int argc, char **argv) +{ + mrb_state *mrb = mrb_open(); + int sirep = mrb->irep_len; + int n; + + main_syms[0] = mrb_intern(mrb, "fib"); + main_syms[1] = mrb_intern(mrb, "p"); + n = 0; + + main_iseq[n++] = MKOP_AB(OP_LAMBDA, 1, 1); /* r1 := lambda(1) */ + main_iseq[n++] = MKOP_AB(OP_METHOD, 1, 0); /* defmethod(r1) */ + main_iseq[n++] = MKOP_AB(OP_MOVE, 1, 0); /* r1 := r0 */ + main_iseq[n++] = MKOP_AB(OP_MOVE, 2, 0); /* r2 := r0 */ + main_iseq[n++] = MKOP_AsBx(OP_LOADI, 3, 35); /* r3 := 20 */ + main_iseq[n++] = MKOP_ABC(OP_SEND, 2, 0, 1); /* r2 .fib r3 */ + main_iseq[n++] = MKOP_ABC(OP_SEND, 1, 1, 1); /* r1 .p r2 */ + main_iseq[n++] = MKOP_ABC(OP_STOP, 1, 1, 2); /* stop */ + main_irep.ilen = n; + main_irep.idx = sirep; + + fib_syms[0] = mrb_intern(mrb, "<"); + fib_syms[1] = mrb_intern(mrb, "-"); + fib_syms[2] = mrb_intern(mrb, "+"); + fib_syms[3] = mrb_intern(mrb, "fib"); + n = 0; + + fib_iseq[n++] = MKOP_AB(OP_MOVE, 2, 1); /* r2 := r1 */ + fib_iseq[n++] = MKOP_AsBx(OP_LOADI, 3, 3); /* r3 := 2 */ + fib_iseq[n++] = MKOP_ABC(OP_LT, 2, 0, 2); /* r2 .< r3 */ + fib_iseq[n++] = MKOP_AsBx(OP_JMPNOT, 2, 2); /* ifnot r2 :else */ + fib_iseq[n++] = MKOP_AsBx(OP_LOADI, 2, 1); /* r6 := 1 */ + fib_iseq[n++] = MKOP_A(OP_RETURN, 2); /* return r2 */ + fib_iseq[n++] = MKOP_AB(OP_MOVE, 3, 0); /* r3 := r0 :else */ + fib_iseq[n++] = MKOP_AB(OP_MOVE, 4, 1); /* r4 := r1 */ + fib_iseq[n++] = MKOP_ABC(OP_SUBI, 4, 1, 2); /* r4 .- 2 */ + fib_iseq[n++] = MKOP_ABC(OP_SEND, 3, 3, 1); /* r3 .fib r4 */ + fib_iseq[n++] = MKOP_AB(OP_MOVE, 4, 0); /* r4 := r0 */ + fib_iseq[n++] = MKOP_AB(OP_MOVE, 5, 1); /* r5 := r1 */ + fib_iseq[n++] = MKOP_ABC(OP_SUBI, 5, 1, 1); /* r5 .- 1 */ + fib_iseq[n++] = MKOP_ABC(OP_SEND, 4, 3, 1); /* r4 .fib :r5 */ + fib_iseq[n++] = MKOP_ABC(OP_ADD, 3, 2, 1); /* r3 .+ r4 */ + fib_iseq[n++] = MKOP_A(OP_RETURN, 3); /* return r3 */ + fib_irep.ilen = n; + fib_irep.idx = sirep+1; + + mrb_add_irep(mrb, sirep+2); + mrb->irep[sirep ] = &main_irep; + mrb->irep[sirep+1] = &fib_irep; + + mrb_run(mrb, mrb_proc_new(mrb, &main_irep), mrb_nil_value()); +} + +#else +#include "compile.h" + +int +main() +{ + mrb_state *mrb = mrb_open(); + int n; + + n = mrb_compile_string(mrb, "\ +def fib(n)\n\ + if n<2\n\ + n\n\ + else\n\ + fib(n-2)+fib(n-1)\n\ + end\n\ +end\n\ +p(fib(30), \"\\n\")\n\ +"); + mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_nil_value()); + + return 0; +} + +#endif diff --git a/src/name2ctype.h b/src/name2ctype.h new file mode 100644 index 0000000000..2248b8f7cb --- /dev/null +++ b/src/name2ctype.h @@ -0,0 +1,17985 @@ +/* C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -7 -c -j1 -i1 -t -C -P -T -H uniname2ctype_hash -Q uniname2ctype_pool -N uniname2ctype_p */ +#ifndef USE_UNICODE_PROPERTIES +/* Computed positions: -k'1,3' */ +#else /* USE_UNICODE_PROPERTIES */ +/* Computed positions: -k'1-3,6,12,16,$' */ +#endif /* USE_UNICODE_PROPERTIES */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + + + +#ifdef USE_UNICODE_PROPERTIES +#ifdef USE_UNICODE_PROPERTIES +/* 'Any': - */ +static const OnigCodePoint CR_Any[] = { + 1, + 0x0000, 0x10ffff, +}; /* CR_Any */ + +/* 'Assigned': - */ +static const OnigCodePoint CR_Assigned[] = { + 485, + 0x0000, 0x0377, + 0x037a, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x0606, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07fa, + 0x0800, 0x082d, + 0x0830, 0x083e, + 0x0900, 0x0939, + 0x093c, 0x094e, + 0x0950, 0x0955, + 0x0958, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fb, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, + 0x0cf1, 0x0cf2, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d75, + 0x0d79, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fce, 0x0fd8, + 0x1000, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1400, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b7c, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c37, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c7f, + 0x1cd0, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfd, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2064, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b8, + 0x20d0, 0x20f0, + 0x2100, 0x2189, + 0x2190, 0x23e8, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x26cd, + 0x26cf, 0x26e1, + 0x26e3, 0x26e3, + 0x26e8, 0x26ff, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x2b4c, + 0x2b50, 0x2b59, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2cf1, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2e31, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31e3, + 0x31f0, 0x321e, + 0x3220, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa4d0, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa673, + 0xa67c, 0xa697, + 0xa6a0, 0xa6f7, + 0xa700, 0xa78c, + 0xa7fb, 0xa82b, + 0xa830, 0xa839, + 0xa840, 0xa877, + 0xa880, 0xa8c4, + 0xa8ce, 0xa8d9, + 0xa8e0, 0xa8fb, + 0xa900, 0xa953, + 0xa95f, 0xa97c, + 0xa980, 0xa9cd, + 0xa9cf, 0xa9d9, + 0xa9de, 0xa9df, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadf, + 0xabc0, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xd800, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe26, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10190, 0x1019b, + 0x101d0, 0x101fd, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10857, 0x1085f, + 0x10900, 0x1091b, + 0x1091f, 0x10939, + 0x1093f, 0x1093f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x10a60, 0x10a7f, + 0x10b00, 0x10b35, + 0x10b39, 0x10b55, + 0x10b58, 0x10b72, + 0x10b78, 0x10b7f, + 0x10c00, 0x10c48, + 0x10e60, 0x10e7e, + 0x11080, 0x110c1, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x12470, 0x12473, + 0x13000, 0x1342e, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d360, 0x1d371, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f100, 0x1f10a, + 0x1f110, 0x1f12e, + 0x1f131, 0x1f131, + 0x1f13d, 0x1f13d, + 0x1f13f, 0x1f13f, + 0x1f142, 0x1f142, + 0x1f146, 0x1f146, + 0x1f14a, 0x1f14e, + 0x1f157, 0x1f157, + 0x1f15f, 0x1f15f, + 0x1f179, 0x1f179, + 0x1f17b, 0x1f17c, + 0x1f17f, 0x1f17f, + 0x1f18a, 0x1f18d, + 0x1f190, 0x1f190, + 0x1f200, 0x1f200, + 0x1f210, 0x1f231, + 0x1f240, 0x1f248, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, +}; /* CR_Assigned */ + +/* 'C': Major Category */ +static const OnigCodePoint CR_C[] = { + 20, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2064, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x110bd, 0x110bd, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10ffff, +}; /* CR_C */ + +/* 'Cc': General Category */ +static const OnigCodePoint CR_Cc[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x009f, +}; /* CR_Cc */ + +/* 'Cf': General Category */ +static const OnigCodePoint CR_Cf[] = { + 15, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2064, + 0x206a, 0x206f, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x110bd, 0x110bd, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, +}; /* CR_Cf */ + +/* 'Cn': General Category */ +static const OnigCodePoint CR_Cn[] = { + 485, + 0x0378, 0x0379, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x0526, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x05ff, + 0x0604, 0x0605, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x065f, 0x065f, + 0x070e, 0x070e, + 0x074b, 0x074c, + 0x07b2, 0x07bf, + 0x07fb, 0x07ff, + 0x082e, 0x082f, + 0x083f, 0x08ff, + 0x093a, 0x093b, + 0x094f, 0x094f, + 0x0956, 0x0957, + 0x0973, 0x0978, + 0x0980, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fc, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a50, + 0x0a52, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a76, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b45, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b64, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bcf, + 0x0bd1, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3c, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c57, + 0x0c5a, 0x0c5f, + 0x0c64, 0x0c65, + 0x0c70, 0x0c77, + 0x0c80, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce4, 0x0ce5, + 0x0cf0, 0x0cf0, + 0x0cf3, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3c, + 0x0d45, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d64, 0x0d65, + 0x0d76, 0x0d78, + 0x0d80, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6d, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fcd, + 0x0fd9, 0x0fff, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x13ff, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18ab, 0x18af, + 0x18f6, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19ac, 0x19af, + 0x19ca, 0x19cf, + 0x19db, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a5f, 0x1a5f, + 0x1a7d, 0x1a7e, + 0x1a8a, 0x1a8f, + 0x1a9a, 0x1a9f, + 0x1aae, 0x1aff, + 0x1b4c, 0x1b4f, + 0x1b7d, 0x1b7f, + 0x1bab, 0x1bad, + 0x1bba, 0x1bff, + 0x1c38, 0x1c3a, + 0x1c4a, 0x1c4c, + 0x1c80, 0x1ccf, + 0x1cf3, 0x1cff, + 0x1de7, 0x1dfc, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x2065, 0x2069, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b9, 0x20cf, + 0x20f1, 0x20ff, + 0x218a, 0x218f, + 0x23e9, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x26ce, 0x26ce, + 0x26e2, 0x26e2, + 0x26e4, 0x26e7, + 0x2700, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27cb, 0x27cb, + 0x27cd, 0x27cf, + 0x2b4d, 0x2b4f, + 0x2b5a, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c5f, + 0x2cf2, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2ddf, + 0x2e32, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312e, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31e4, 0x31ef, + 0x321f, 0x321f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fcc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa4cf, + 0xa62c, 0xa63f, + 0xa660, 0xa661, + 0xa674, 0xa67b, + 0xa698, 0xa69f, + 0xa6f8, 0xa6ff, + 0xa78d, 0xa7fa, + 0xa82c, 0xa82f, + 0xa83a, 0xa83f, + 0xa878, 0xa87f, + 0xa8c5, 0xa8cd, + 0xa8da, 0xa8df, + 0xa8fc, 0xa8ff, + 0xa954, 0xa95e, + 0xa97d, 0xa97f, + 0xa9ce, 0xa9ce, + 0xa9da, 0xa9dd, + 0xa9e0, 0xa9ff, + 0xaa37, 0xaa3f, + 0xaa4e, 0xaa4f, + 0xaa5a, 0xaa5b, + 0xaa7c, 0xaa7f, + 0xaac3, 0xaada, + 0xaae0, 0xabbf, + 0xabee, 0xabef, + 0xabfa, 0xabff, + 0xd7a4, 0xd7af, + 0xd7c7, 0xd7ca, + 0xd7fc, 0xd7ff, + 0xfa2e, 0xfa2f, + 0xfa6e, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe27, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xfefe, + 0xff00, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfff8, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x1018f, + 0x1019c, 0x101cf, + 0x101fe, 0x1027f, + 0x1029d, 0x1029f, + 0x102d1, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10856, 0x10856, + 0x10860, 0x108ff, + 0x1091c, 0x1091e, + 0x1093a, 0x1093e, + 0x10940, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x10a5f, + 0x10a80, 0x10aff, + 0x10b36, 0x10b38, + 0x10b56, 0x10b57, + 0x10b73, 0x10b77, + 0x10b80, 0x10bff, + 0x10c49, 0x10e5f, + 0x10e7f, 0x1107f, + 0x110c2, 0x11fff, + 0x1236f, 0x123ff, + 0x12463, 0x1246f, + 0x12474, 0x12fff, + 0x1342f, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d128, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d35f, + 0x1d372, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7cc, 0x1d7cd, + 0x1d800, 0x1efff, + 0x1f02c, 0x1f02f, + 0x1f094, 0x1f0ff, + 0x1f10b, 0x1f10f, + 0x1f12f, 0x1f130, + 0x1f132, 0x1f13c, + 0x1f13e, 0x1f13e, + 0x1f140, 0x1f141, + 0x1f143, 0x1f145, + 0x1f147, 0x1f149, + 0x1f14f, 0x1f156, + 0x1f158, 0x1f15e, + 0x1f160, 0x1f178, + 0x1f17a, 0x1f17a, + 0x1f17d, 0x1f17e, + 0x1f180, 0x1f189, + 0x1f18e, 0x1f18f, + 0x1f191, 0x1f1ff, + 0x1f201, 0x1f20f, + 0x1f232, 0x1f23f, + 0x1f249, 0x1ffff, + 0x2a6d7, 0x2a6ff, + 0x2b735, 0x2f7ff, + 0x2fa1e, 0xe0000, + 0xe0002, 0xe001f, + 0xe0080, 0xe00ff, + 0xe01f0, 0xeffff, + 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff, +}; /* CR_Cn */ + +/* 'Co': General Category */ +static const OnigCodePoint CR_Co[] = { + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, +}; /* CR_Co */ + +/* 'Cs': General Category */ +static const OnigCodePoint CR_Cs[] = { + 1, + 0xd800, 0xdfff, +}; /* CR_Cs */ + +/* 'L': Major Category */ +static const OnigCodePoint CR_L[] = { + 422, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0370, 0x0374, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x07a5, + 0x07b1, 0x07b1, + 0x07ca, 0x07ea, + 0x07f4, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x0815, + 0x081a, 0x081a, + 0x0824, 0x0824, + 0x0828, 0x0828, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bd0, 0x0bd0, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c3d, + 0x0c58, 0x0c59, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d3d, + 0x0d60, 0x0d61, + 0x0d7a, 0x0d7f, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f88, 0x0f8b, + 0x1000, 0x102a, + 0x103f, 0x103f, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1aa7, 0x1aa7, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b83, 0x1ba0, + 0x1bae, 0x1baf, + 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2183, 0x2184, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e2f, 0x2e2f, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66e, + 0xa67f, 0xa697, + 0xa6a0, 0xa6e5, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa90a, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9cf, 0xa9cf, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa80, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabe2, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x10340, + 0x10342, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11083, 0x110af, + 0x12000, 0x1236e, + 0x13000, 0x1342e, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_L */ + +/* 'Ll': General Category */ +static const OnigCodePoint CR_Ll[] = { + 599, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0242, 0x0242, + 0x0247, 0x0247, + 0x0249, 0x0249, + 0x024b, 0x024b, + 0x024d, 0x024d, + 0x024f, 0x0293, + 0x0295, 0x02af, + 0x0371, 0x0371, + 0x0373, 0x0373, + 0x0377, 0x0377, + 0x037b, 0x037d, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04cf, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x04fb, 0x04fb, + 0x04fd, 0x04fd, + 0x04ff, 0x04ff, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0511, 0x0511, + 0x0513, 0x0513, + 0x0515, 0x0515, + 0x0517, 0x0517, + 0x0519, 0x0519, + 0x051b, 0x051b, + 0x051d, 0x051d, + 0x051f, 0x051f, + 0x0521, 0x0521, + 0x0523, 0x0523, + 0x0525, 0x0525, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9d, + 0x1e9f, 0x1e9f, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1efb, 0x1efb, + 0x1efd, 0x1efd, + 0x1eff, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x214e, 0x214e, + 0x2184, 0x2184, + 0x2c30, 0x2c5e, + 0x2c61, 0x2c61, + 0x2c65, 0x2c66, + 0x2c68, 0x2c68, + 0x2c6a, 0x2c6a, + 0x2c6c, 0x2c6c, + 0x2c71, 0x2c71, + 0x2c73, 0x2c74, + 0x2c76, 0x2c7c, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2cec, 0x2cec, + 0x2cee, 0x2cee, + 0x2d00, 0x2d25, + 0xa641, 0xa641, + 0xa643, 0xa643, + 0xa645, 0xa645, + 0xa647, 0xa647, + 0xa649, 0xa649, + 0xa64b, 0xa64b, + 0xa64d, 0xa64d, + 0xa64f, 0xa64f, + 0xa651, 0xa651, + 0xa653, 0xa653, + 0xa655, 0xa655, + 0xa657, 0xa657, + 0xa659, 0xa659, + 0xa65b, 0xa65b, + 0xa65d, 0xa65d, + 0xa65f, 0xa65f, + 0xa663, 0xa663, + 0xa665, 0xa665, + 0xa667, 0xa667, + 0xa669, 0xa669, + 0xa66b, 0xa66b, + 0xa66d, 0xa66d, + 0xa681, 0xa681, + 0xa683, 0xa683, + 0xa685, 0xa685, + 0xa687, 0xa687, + 0xa689, 0xa689, + 0xa68b, 0xa68b, + 0xa68d, 0xa68d, + 0xa68f, 0xa68f, + 0xa691, 0xa691, + 0xa693, 0xa693, + 0xa695, 0xa695, + 0xa697, 0xa697, + 0xa723, 0xa723, + 0xa725, 0xa725, + 0xa727, 0xa727, + 0xa729, 0xa729, + 0xa72b, 0xa72b, + 0xa72d, 0xa72d, + 0xa72f, 0xa731, + 0xa733, 0xa733, + 0xa735, 0xa735, + 0xa737, 0xa737, + 0xa739, 0xa739, + 0xa73b, 0xa73b, + 0xa73d, 0xa73d, + 0xa73f, 0xa73f, + 0xa741, 0xa741, + 0xa743, 0xa743, + 0xa745, 0xa745, + 0xa747, 0xa747, + 0xa749, 0xa749, + 0xa74b, 0xa74b, + 0xa74d, 0xa74d, + 0xa74f, 0xa74f, + 0xa751, 0xa751, + 0xa753, 0xa753, + 0xa755, 0xa755, + 0xa757, 0xa757, + 0xa759, 0xa759, + 0xa75b, 0xa75b, + 0xa75d, 0xa75d, + 0xa75f, 0xa75f, + 0xa761, 0xa761, + 0xa763, 0xa763, + 0xa765, 0xa765, + 0xa767, 0xa767, + 0xa769, 0xa769, + 0xa76b, 0xa76b, + 0xa76d, 0xa76d, + 0xa76f, 0xa76f, + 0xa771, 0xa778, + 0xa77a, 0xa77a, + 0xa77c, 0xa77c, + 0xa77f, 0xa77f, + 0xa781, 0xa781, + 0xa783, 0xa783, + 0xa785, 0xa785, + 0xa787, 0xa787, + 0xa78c, 0xa78c, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7cb, 0x1d7cb, +}; /* CR_Ll */ + +/* 'Lm': General Category */ +static const OnigCodePoint CR_Lm[] = { + 49, + 0x02b0, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0374, 0x0374, + 0x037a, 0x037a, + 0x0559, 0x0559, + 0x0640, 0x0640, + 0x06e5, 0x06e6, + 0x07f4, 0x07f5, + 0x07fa, 0x07fa, + 0x081a, 0x081a, + 0x0824, 0x0824, + 0x0828, 0x0828, + 0x0971, 0x0971, + 0x0e46, 0x0e46, + 0x0ec6, 0x0ec6, + 0x10fc, 0x10fc, + 0x17d7, 0x17d7, + 0x1843, 0x1843, + 0x1aa7, 0x1aa7, + 0x1c78, 0x1c7d, + 0x1d2c, 0x1d61, + 0x1d78, 0x1d78, + 0x1d9b, 0x1dbf, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2c7d, 0x2c7d, + 0x2d6f, 0x2d6f, + 0x2e2f, 0x2e2f, + 0x3005, 0x3005, + 0x3031, 0x3035, + 0x303b, 0x303b, + 0x309d, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xa4f8, 0xa4fd, + 0xa60c, 0xa60c, + 0xa67f, 0xa67f, + 0xa717, 0xa71f, + 0xa770, 0xa770, + 0xa788, 0xa788, + 0xa9cf, 0xa9cf, + 0xaa70, 0xaa70, + 0xaadd, 0xaadd, + 0xff70, 0xff70, + 0xff9e, 0xff9f, +}; /* CR_Lm */ + +/* 'Lo': General Category */ +static const OnigCodePoint CR_Lo[] = { + 311, + 0x01bb, 0x01bb, + 0x01c0, 0x01c3, + 0x0294, 0x0294, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063f, + 0x0641, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x07a5, + 0x07b1, 0x07b1, + 0x07ca, 0x07ea, + 0x0800, 0x0815, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x0972, 0x0972, + 0x0979, 0x097f, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bd0, 0x0bd0, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c3d, + 0x0c58, 0x0c59, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d3d, + 0x0d60, 0x0d61, + 0x0d7a, 0x0d7f, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e45, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f88, 0x0f8b, + 0x1000, 0x102a, + 0x103f, 0x103f, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x10d0, 0x10fa, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17dc, 0x17dc, + 0x1820, 0x1842, + 0x1844, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b83, 0x1ba0, + 0x1bae, 0x1baf, + 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c77, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x2135, 0x2138, + 0x2d30, 0x2d65, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3006, 0x3006, + 0x303c, 0x303c, + 0x3041, 0x3096, + 0x309f, 0x309f, + 0x30a1, 0x30fa, + 0x30ff, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa014, + 0xa016, 0xa48c, + 0xa4d0, 0xa4f7, + 0xa500, 0xa60b, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa66e, 0xa66e, + 0xa6a0, 0xa6e5, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa90a, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa60, 0xaa6f, + 0xaa71, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa80, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadc, + 0xabc0, 0xabe2, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff66, 0xff6f, + 0xff71, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x10340, + 0x10342, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10450, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11083, 0x110af, + 0x12000, 0x1236e, + 0x13000, 0x1342e, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_Lo */ + +/* 'Lt': General Category */ +static const OnigCodePoint CR_Lt[] = { + 10, + 0x01c5, 0x01c5, + 0x01c8, 0x01c8, + 0x01cb, 0x01cb, + 0x01f2, 0x01f2, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fbc, 0x1fbc, + 0x1fcc, 0x1fcc, + 0x1ffc, 0x1ffc, +}; /* CR_Lt */ + +/* 'Lu': General Category */ +static const OnigCodePoint CR_Lu[] = { + 594, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0243, 0x0246, + 0x0248, 0x0248, + 0x024a, 0x024a, + 0x024c, 0x024c, + 0x024e, 0x024e, + 0x0370, 0x0370, + 0x0372, 0x0372, + 0x0376, 0x0376, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03cf, 0x03cf, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x04fa, 0x04fa, + 0x04fc, 0x04fc, + 0x04fe, 0x04fe, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0510, 0x0510, + 0x0512, 0x0512, + 0x0514, 0x0514, + 0x0516, 0x0516, + 0x0518, 0x0518, + 0x051a, 0x051a, + 0x051c, 0x051c, + 0x051e, 0x051e, + 0x0520, 0x0520, + 0x0522, 0x0522, + 0x0524, 0x0524, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1e9e, 0x1e9e, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1efa, 0x1efa, + 0x1efc, 0x1efc, + 0x1efe, 0x1efe, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2183, 0x2183, + 0x2c00, 0x2c2e, + 0x2c60, 0x2c60, + 0x2c62, 0x2c64, + 0x2c67, 0x2c67, + 0x2c69, 0x2c69, + 0x2c6b, 0x2c6b, + 0x2c6d, 0x2c70, + 0x2c72, 0x2c72, + 0x2c75, 0x2c75, + 0x2c7e, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0x2ceb, 0x2ceb, + 0x2ced, 0x2ced, + 0xa640, 0xa640, + 0xa642, 0xa642, + 0xa644, 0xa644, + 0xa646, 0xa646, + 0xa648, 0xa648, + 0xa64a, 0xa64a, + 0xa64c, 0xa64c, + 0xa64e, 0xa64e, + 0xa650, 0xa650, + 0xa652, 0xa652, + 0xa654, 0xa654, + 0xa656, 0xa656, + 0xa658, 0xa658, + 0xa65a, 0xa65a, + 0xa65c, 0xa65c, + 0xa65e, 0xa65e, + 0xa662, 0xa662, + 0xa664, 0xa664, + 0xa666, 0xa666, + 0xa668, 0xa668, + 0xa66a, 0xa66a, + 0xa66c, 0xa66c, + 0xa680, 0xa680, + 0xa682, 0xa682, + 0xa684, 0xa684, + 0xa686, 0xa686, + 0xa688, 0xa688, + 0xa68a, 0xa68a, + 0xa68c, 0xa68c, + 0xa68e, 0xa68e, + 0xa690, 0xa690, + 0xa692, 0xa692, + 0xa694, 0xa694, + 0xa696, 0xa696, + 0xa722, 0xa722, + 0xa724, 0xa724, + 0xa726, 0xa726, + 0xa728, 0xa728, + 0xa72a, 0xa72a, + 0xa72c, 0xa72c, + 0xa72e, 0xa72e, + 0xa732, 0xa732, + 0xa734, 0xa734, + 0xa736, 0xa736, + 0xa738, 0xa738, + 0xa73a, 0xa73a, + 0xa73c, 0xa73c, + 0xa73e, 0xa73e, + 0xa740, 0xa740, + 0xa742, 0xa742, + 0xa744, 0xa744, + 0xa746, 0xa746, + 0xa748, 0xa748, + 0xa74a, 0xa74a, + 0xa74c, 0xa74c, + 0xa74e, 0xa74e, + 0xa750, 0xa750, + 0xa752, 0xa752, + 0xa754, 0xa754, + 0xa756, 0xa756, + 0xa758, 0xa758, + 0xa75a, 0xa75a, + 0xa75c, 0xa75c, + 0xa75e, 0xa75e, + 0xa760, 0xa760, + 0xa762, 0xa762, + 0xa764, 0xa764, + 0xa766, 0xa766, + 0xa768, 0xa768, + 0xa76a, 0xa76a, + 0xa76c, 0xa76c, + 0xa76e, 0xa76e, + 0xa779, 0xa779, + 0xa77b, 0xa77b, + 0xa77d, 0xa77e, + 0xa780, 0xa780, + 0xa782, 0xa782, + 0xa784, 0xa784, + 0xa786, 0xa786, + 0xa78b, 0xa78b, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8, + 0x1d7ca, 0x1d7ca, +}; /* CR_Lu */ + +/* 'M': Major Category */ +static const OnigCodePoint CR_M[] = { + 188, + 0x0300, 0x036f, + 0x0483, 0x0489, + 0x0591, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x061a, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06de, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x07eb, 0x07f3, + 0x0816, 0x0819, + 0x081b, 0x0823, + 0x0825, 0x0827, + 0x0829, 0x082d, + 0x0900, 0x0903, + 0x093c, 0x093c, + 0x093e, 0x094e, + 0x0951, 0x0955, + 0x0962, 0x0963, + 0x0981, 0x0983, + 0x09bc, 0x09bc, + 0x09be, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09e2, 0x09e3, + 0x0a01, 0x0a03, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a70, 0x0a71, + 0x0a75, 0x0a75, + 0x0a81, 0x0a83, + 0x0abc, 0x0abc, + 0x0abe, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b3c, 0x0b3c, + 0x0b3e, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b62, 0x0b63, + 0x0b82, 0x0b82, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c62, 0x0c63, + 0x0c82, 0x0c83, + 0x0cbc, 0x0cbc, + 0x0cbe, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0ce2, 0x0ce3, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d62, 0x0d63, + 0x0d82, 0x0d83, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f3f, + 0x0f71, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102b, 0x103e, + 0x1056, 0x1059, + 0x105e, 0x1060, + 0x1062, 0x1064, + 0x1067, 0x106d, + 0x1071, 0x1074, + 0x1082, 0x108d, + 0x108f, 0x108f, + 0x109a, 0x109d, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b6, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a17, 0x1a1b, + 0x1a55, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a7f, + 0x1b00, 0x1b04, + 0x1b34, 0x1b44, + 0x1b6b, 0x1b73, + 0x1b80, 0x1b82, + 0x1ba1, 0x1baa, + 0x1c24, 0x1c37, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce8, + 0x1ced, 0x1ced, + 0x1cf2, 0x1cf2, + 0x1dc0, 0x1de6, + 0x1dfd, 0x1dff, + 0x20d0, 0x20f0, + 0x2cef, 0x2cf1, + 0x2de0, 0x2dff, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa66f, 0xa672, + 0xa67c, 0xa67d, + 0xa6f0, 0xa6f1, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa823, 0xa827, + 0xa880, 0xa881, + 0xa8b4, 0xa8c4, + 0xa8e0, 0xa8f1, + 0xa926, 0xa92d, + 0xa947, 0xa953, + 0xa980, 0xa983, + 0xa9b3, 0xa9c0, + 0xaa29, 0xaa36, + 0xaa43, 0xaa43, + 0xaa4c, 0xaa4d, + 0xaa7b, 0xaa7b, + 0xaab0, 0xaab0, + 0xaab2, 0xaab4, + 0xaab7, 0xaab8, + 0xaabe, 0xaabf, + 0xaac1, 0xaac1, + 0xabe3, 0xabea, + 0xabec, 0xabed, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe26, + 0x101fd, 0x101fd, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x11080, 0x11082, + 0x110b0, 0x110ba, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef, +}; /* CR_M */ + +/* 'Mc': General Category */ +static const OnigCodePoint CR_Mc[] = { + 106, + 0x0903, 0x0903, + 0x093e, 0x0940, + 0x0949, 0x094c, + 0x094e, 0x094e, + 0x0982, 0x0983, + 0x09be, 0x09c0, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09d7, 0x09d7, + 0x0a03, 0x0a03, + 0x0a3e, 0x0a40, + 0x0a83, 0x0a83, + 0x0abe, 0x0ac0, + 0x0ac9, 0x0ac9, + 0x0acb, 0x0acc, + 0x0b02, 0x0b03, + 0x0b3e, 0x0b3e, + 0x0b40, 0x0b40, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b57, 0x0b57, + 0x0bbe, 0x0bbf, + 0x0bc1, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c41, 0x0c44, + 0x0c82, 0x0c83, + 0x0cbe, 0x0cbe, + 0x0cc0, 0x0cc4, + 0x0cc7, 0x0cc8, + 0x0cca, 0x0ccb, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d40, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dcf, 0x0dd1, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0f3e, 0x0f3f, + 0x0f7f, 0x0f7f, + 0x102b, 0x102c, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x103b, 0x103c, + 0x1056, 0x1057, + 0x1062, 0x1064, + 0x1067, 0x106d, + 0x1083, 0x1084, + 0x1087, 0x108c, + 0x108f, 0x108f, + 0x109a, 0x109c, + 0x17b6, 0x17b6, + 0x17be, 0x17c5, + 0x17c7, 0x17c8, + 0x1923, 0x1926, + 0x1929, 0x192b, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a19, 0x1a1b, + 0x1a55, 0x1a55, + 0x1a57, 0x1a57, + 0x1a61, 0x1a61, + 0x1a63, 0x1a64, + 0x1a6d, 0x1a72, + 0x1b04, 0x1b04, + 0x1b35, 0x1b35, + 0x1b3b, 0x1b3b, + 0x1b3d, 0x1b41, + 0x1b43, 0x1b44, + 0x1b82, 0x1b82, + 0x1ba1, 0x1ba1, + 0x1ba6, 0x1ba7, + 0x1baa, 0x1baa, + 0x1c24, 0x1c2b, + 0x1c34, 0x1c35, + 0x1ce1, 0x1ce1, + 0x1cf2, 0x1cf2, + 0xa823, 0xa824, + 0xa827, 0xa827, + 0xa880, 0xa881, + 0xa8b4, 0xa8c3, + 0xa952, 0xa953, + 0xa983, 0xa983, + 0xa9b4, 0xa9b5, + 0xa9ba, 0xa9bb, + 0xa9bd, 0xa9c0, + 0xaa2f, 0xaa30, + 0xaa33, 0xaa34, + 0xaa4d, 0xaa4d, + 0xaa7b, 0xaa7b, + 0xabe3, 0xabe4, + 0xabe6, 0xabe7, + 0xabe9, 0xabea, + 0xabec, 0xabec, + 0x11082, 0x11082, + 0x110b0, 0x110b2, + 0x110b7, 0x110b8, + 0x1d165, 0x1d166, + 0x1d16d, 0x1d172, +}; /* CR_Mc */ + +/* 'Me': General Category */ +static const OnigCodePoint CR_Me[] = { + 5, + 0x0488, 0x0489, + 0x06de, 0x06de, + 0x20dd, 0x20e0, + 0x20e2, 0x20e4, + 0xa670, 0xa672, +}; /* CR_Me */ + +/* 'Mn': General Category */ +static const OnigCodePoint CR_Mn[] = { + 194, + 0x0300, 0x036f, + 0x0483, 0x0487, + 0x0591, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x061a, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06df, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x07eb, 0x07f3, + 0x0816, 0x0819, + 0x081b, 0x0823, + 0x0825, 0x0827, + 0x0829, 0x082d, + 0x0900, 0x0902, + 0x093c, 0x093c, + 0x0941, 0x0948, + 0x094d, 0x094d, + 0x0951, 0x0955, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09bc, 0x09bc, + 0x09c1, 0x09c4, + 0x09cd, 0x09cd, + 0x09e2, 0x09e3, + 0x0a01, 0x0a02, + 0x0a3c, 0x0a3c, + 0x0a41, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a70, 0x0a71, + 0x0a75, 0x0a75, + 0x0a81, 0x0a82, + 0x0abc, 0x0abc, + 0x0ac1, 0x0ac5, + 0x0ac7, 0x0ac8, + 0x0acd, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b01, + 0x0b3c, 0x0b3c, + 0x0b3f, 0x0b3f, + 0x0b41, 0x0b44, + 0x0b4d, 0x0b4d, + 0x0b56, 0x0b56, + 0x0b62, 0x0b63, + 0x0b82, 0x0b82, + 0x0bc0, 0x0bc0, + 0x0bcd, 0x0bcd, + 0x0c3e, 0x0c40, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c62, 0x0c63, + 0x0cbc, 0x0cbc, + 0x0cbf, 0x0cbf, + 0x0cc6, 0x0cc6, + 0x0ccc, 0x0ccd, + 0x0ce2, 0x0ce3, + 0x0d41, 0x0d44, + 0x0d4d, 0x0d4d, + 0x0d62, 0x0d63, + 0x0dca, 0x0dca, + 0x0dd2, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f71, 0x0f7e, + 0x0f80, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102d, 0x1030, + 0x1032, 0x1037, + 0x1039, 0x103a, + 0x103d, 0x103e, + 0x1058, 0x1059, + 0x105e, 0x1060, + 0x1071, 0x1074, + 0x1082, 0x1082, + 0x1085, 0x1086, + 0x108d, 0x108d, + 0x109d, 0x109d, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1a56, 0x1a56, + 0x1a58, 0x1a5e, + 0x1a60, 0x1a60, + 0x1a62, 0x1a62, + 0x1a65, 0x1a6c, + 0x1a73, 0x1a7c, + 0x1a7f, 0x1a7f, + 0x1b00, 0x1b03, + 0x1b34, 0x1b34, + 0x1b36, 0x1b3a, + 0x1b3c, 0x1b3c, + 0x1b42, 0x1b42, + 0x1b6b, 0x1b73, + 0x1b80, 0x1b81, + 0x1ba2, 0x1ba5, + 0x1ba8, 0x1ba9, + 0x1c2c, 0x1c33, + 0x1c36, 0x1c37, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce0, + 0x1ce2, 0x1ce8, + 0x1ced, 0x1ced, + 0x1dc0, 0x1de6, + 0x1dfd, 0x1dff, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20f0, + 0x2cef, 0x2cf1, + 0x2de0, 0x2dff, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa66f, 0xa66f, + 0xa67c, 0xa67d, + 0xa6f0, 0xa6f1, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xa8c4, 0xa8c4, + 0xa8e0, 0xa8f1, + 0xa926, 0xa92d, + 0xa947, 0xa951, + 0xa980, 0xa982, + 0xa9b3, 0xa9b3, + 0xa9b6, 0xa9b9, + 0xa9bc, 0xa9bc, + 0xaa29, 0xaa2e, + 0xaa31, 0xaa32, + 0xaa35, 0xaa36, + 0xaa43, 0xaa43, + 0xaa4c, 0xaa4c, + 0xaab0, 0xaab0, + 0xaab2, 0xaab4, + 0xaab7, 0xaab8, + 0xaabe, 0xaabf, + 0xaac1, 0xaac1, + 0xabe5, 0xabe5, + 0xabe8, 0xabe8, + 0xabed, 0xabed, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe26, + 0x101fd, 0x101fd, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x11080, 0x11081, + 0x110b3, 0x110b6, + 0x110b9, 0x110ba, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef, +}; /* CR_Mn */ + +/* 'N': Major Category */ +static const OnigCodePoint CR_N[] = { + 81, + 0x0030, 0x0039, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x07c0, 0x07c9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x09f4, 0x09f9, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bf2, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7e, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d75, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f33, + 0x1040, 0x1049, + 0x1090, 0x1099, + 0x1369, 0x137c, + 0x16ee, 0x16f0, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19da, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1b50, 0x1b59, + 0x1bb0, 0x1bb9, + 0x1c40, 0x1c49, + 0x1c50, 0x1c59, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2150, 0x2182, + 0x2185, 0x2189, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xa620, 0xa629, + 0xa6e6, 0xa6ef, + 0xa830, 0xa835, + 0xa8d0, 0xa8d9, + 0xa900, 0xa909, + 0xa9d0, 0xa9d9, + 0xaa50, 0xaa59, + 0xabf0, 0xabf9, + 0xff10, 0xff19, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x10341, 0x10341, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x104a0, 0x104a9, + 0x10858, 0x1085f, + 0x10916, 0x1091b, + 0x10a40, 0x10a47, + 0x10a7d, 0x10a7e, + 0x10b58, 0x10b5f, + 0x10b78, 0x10b7f, + 0x10e60, 0x10e7e, + 0x12400, 0x12462, + 0x1d360, 0x1d371, + 0x1d7ce, 0x1d7ff, + 0x1f100, 0x1f10a, +}; /* CR_N */ + +/* 'Nd': General Category */ +static const OnigCodePoint CR_Nd[] = { + 37, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x07c0, 0x07c9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x1090, 0x1099, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19da, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1b50, 0x1b59, + 0x1bb0, 0x1bb9, + 0x1c40, 0x1c49, + 0x1c50, 0x1c59, + 0xa620, 0xa629, + 0xa8d0, 0xa8d9, + 0xa900, 0xa909, + 0xa9d0, 0xa9d9, + 0xaa50, 0xaa59, + 0xabf0, 0xabf9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff, +}; /* CR_Nd */ + +/* 'Nl': General Category */ +static const OnigCodePoint CR_Nl[] = { + 12, + 0x16ee, 0x16f0, + 0x2160, 0x2182, + 0x2185, 0x2188, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0xa6e6, 0xa6ef, + 0x10140, 0x10174, + 0x10341, 0x10341, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x12400, 0x12462, +}; /* CR_Nl */ + +/* 'No': General Category */ +static const OnigCodePoint CR_No[] = { + 38, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x09f4, 0x09f9, + 0x0bf0, 0x0bf2, + 0x0c78, 0x0c7e, + 0x0d70, 0x0d75, + 0x0f2a, 0x0f33, + 0x1369, 0x137c, + 0x17f0, 0x17f9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2150, 0x215f, + 0x2189, 0x2189, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xa830, 0xa835, + 0x10107, 0x10133, + 0x10175, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x10858, 0x1085f, + 0x10916, 0x1091b, + 0x10a40, 0x10a47, + 0x10a7d, 0x10a7e, + 0x10b58, 0x10b5f, + 0x10b78, 0x10b7f, + 0x10e60, 0x10e7e, + 0x1d360, 0x1d371, + 0x1f100, 0x1f10a, +}; /* CR_No */ + +/* 'P': Major Category */ +static const OnigCodePoint CR_P[] = { + 129, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x0609, 0x060a, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x07f7, 0x07f9, + 0x0830, 0x083e, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd4, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x1400, 0x1400, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x1aa0, 0x1aa6, + 0x1aa8, 0x1aad, + 0x1b5a, 0x1b60, + 0x1c3b, 0x1c3f, + 0x1c7e, 0x1c7f, + 0x1cd3, 0x1cd3, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27ef, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e2e, + 0x2e30, 0x2e31, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xa4fe, 0xa4ff, + 0xa60d, 0xa60f, + 0xa673, 0xa673, + 0xa67e, 0xa67e, + 0xa6f2, 0xa6f7, + 0xa874, 0xa877, + 0xa8ce, 0xa8cf, + 0xa8f8, 0xa8fa, + 0xa92e, 0xa92f, + 0xa95f, 0xa95f, + 0xa9c1, 0xa9cd, + 0xa9de, 0xa9df, + 0xaa5c, 0xaa5f, + 0xaade, 0xaadf, + 0xabeb, 0xabeb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x103d0, 0x103d0, + 0x10857, 0x10857, + 0x1091f, 0x1091f, + 0x1093f, 0x1093f, + 0x10a50, 0x10a58, + 0x10a7f, 0x10a7f, + 0x10b39, 0x10b3f, + 0x110bb, 0x110bc, + 0x110be, 0x110c1, + 0x12470, 0x12473, +}; /* CR_P */ + +/* 'Pc': General Category */ +static const OnigCodePoint CR_Pc[] = { + 6, + 0x005f, 0x005f, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xff3f, 0xff3f, +}; /* CR_Pc */ + +/* 'Pd': General Category */ +static const OnigCodePoint CR_Pd[] = { + 15, + 0x002d, 0x002d, + 0x058a, 0x058a, + 0x05be, 0x05be, + 0x1400, 0x1400, + 0x1806, 0x1806, + 0x2010, 0x2015, + 0x2e17, 0x2e17, + 0x2e1a, 0x2e1a, + 0x301c, 0x301c, + 0x3030, 0x3030, + 0x30a0, 0x30a0, + 0xfe31, 0xfe32, + 0xfe58, 0xfe58, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d, +}; /* CR_Pd */ + +/* 'Pe': General Category */ +static const OnigCodePoint CR_Pe[] = { + 70, + 0x0029, 0x0029, + 0x005d, 0x005d, + 0x007d, 0x007d, + 0x0f3b, 0x0f3b, + 0x0f3d, 0x0f3d, + 0x169c, 0x169c, + 0x2046, 0x2046, + 0x207e, 0x207e, + 0x208e, 0x208e, + 0x232a, 0x232a, + 0x2769, 0x2769, + 0x276b, 0x276b, + 0x276d, 0x276d, + 0x276f, 0x276f, + 0x2771, 0x2771, + 0x2773, 0x2773, + 0x2775, 0x2775, + 0x27c6, 0x27c6, + 0x27e7, 0x27e7, + 0x27e9, 0x27e9, + 0x27eb, 0x27eb, + 0x27ed, 0x27ed, + 0x27ef, 0x27ef, + 0x2984, 0x2984, + 0x2986, 0x2986, + 0x2988, 0x2988, + 0x298a, 0x298a, + 0x298c, 0x298c, + 0x298e, 0x298e, + 0x2990, 0x2990, + 0x2992, 0x2992, + 0x2994, 0x2994, + 0x2996, 0x2996, + 0x2998, 0x2998, + 0x29d9, 0x29d9, + 0x29db, 0x29db, + 0x29fd, 0x29fd, + 0x2e23, 0x2e23, + 0x2e25, 0x2e25, + 0x2e27, 0x2e27, + 0x2e29, 0x2e29, + 0x3009, 0x3009, + 0x300b, 0x300b, + 0x300d, 0x300d, + 0x300f, 0x300f, + 0x3011, 0x3011, + 0x3015, 0x3015, + 0x3017, 0x3017, + 0x3019, 0x3019, + 0x301b, 0x301b, + 0x301e, 0x301f, + 0xfd3f, 0xfd3f, + 0xfe18, 0xfe18, + 0xfe36, 0xfe36, + 0xfe38, 0xfe38, + 0xfe3a, 0xfe3a, + 0xfe3c, 0xfe3c, + 0xfe3e, 0xfe3e, + 0xfe40, 0xfe40, + 0xfe42, 0xfe42, + 0xfe44, 0xfe44, + 0xfe48, 0xfe48, + 0xfe5a, 0xfe5a, + 0xfe5c, 0xfe5c, + 0xfe5e, 0xfe5e, + 0xff09, 0xff09, + 0xff3d, 0xff3d, + 0xff5d, 0xff5d, + 0xff60, 0xff60, + 0xff63, 0xff63, +}; /* CR_Pe */ + +/* 'Pf': General Category */ +static const OnigCodePoint CR_Pf[] = { + 10, + 0x00bb, 0x00bb, + 0x2019, 0x2019, + 0x201d, 0x201d, + 0x203a, 0x203a, + 0x2e03, 0x2e03, + 0x2e05, 0x2e05, + 0x2e0a, 0x2e0a, + 0x2e0d, 0x2e0d, + 0x2e1d, 0x2e1d, + 0x2e21, 0x2e21, +}; /* CR_Pf */ + +/* 'Pi': General Category */ +static const OnigCodePoint CR_Pi[] = { + 11, + 0x00ab, 0x00ab, + 0x2018, 0x2018, + 0x201b, 0x201c, + 0x201f, 0x201f, + 0x2039, 0x2039, + 0x2e02, 0x2e02, + 0x2e04, 0x2e04, + 0x2e09, 0x2e09, + 0x2e0c, 0x2e0c, + 0x2e1c, 0x2e1c, + 0x2e20, 0x2e20, +}; /* CR_Pi */ + +/* 'Po': General Category */ +static const OnigCodePoint CR_Po[] = { + 124, + 0x0021, 0x0023, + 0x0025, 0x0027, + 0x002a, 0x002a, + 0x002c, 0x002c, + 0x002e, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005c, 0x005c, + 0x00a1, 0x00a1, + 0x00b7, 0x00b7, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x0589, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x0609, 0x060a, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x07f7, 0x07f9, + 0x0830, 0x083e, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd4, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x1805, + 0x1807, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x1aa0, 0x1aa6, + 0x1aa8, 0x1aad, + 0x1b5a, 0x1b60, + 0x1c3b, 0x1c3f, + 0x1c7e, 0x1c7f, + 0x1cd3, 0x1cd3, + 0x2016, 0x2017, + 0x2020, 0x2027, + 0x2030, 0x2038, + 0x203b, 0x203e, + 0x2041, 0x2043, + 0x2047, 0x2051, + 0x2053, 0x2053, + 0x2055, 0x205e, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e01, + 0x2e06, 0x2e08, + 0x2e0b, 0x2e0b, + 0x2e0e, 0x2e16, + 0x2e18, 0x2e19, + 0x2e1b, 0x2e1b, + 0x2e1e, 0x2e1f, + 0x2e2a, 0x2e2e, + 0x2e30, 0x2e31, + 0x3001, 0x3003, + 0x303d, 0x303d, + 0x30fb, 0x30fb, + 0xa4fe, 0xa4ff, + 0xa60d, 0xa60f, + 0xa673, 0xa673, + 0xa67e, 0xa67e, + 0xa6f2, 0xa6f7, + 0xa874, 0xa877, + 0xa8ce, 0xa8cf, + 0xa8f8, 0xa8fa, + 0xa92e, 0xa92f, + 0xa95f, 0xa95f, + 0xa9c1, 0xa9cd, + 0xa9de, 0xa9df, + 0xaa5c, 0xaa5f, + 0xaade, 0xaadf, + 0xabeb, 0xabeb, + 0xfe10, 0xfe16, + 0xfe19, 0xfe19, + 0xfe30, 0xfe30, + 0xfe45, 0xfe46, + 0xfe49, 0xfe4c, + 0xfe50, 0xfe52, + 0xfe54, 0xfe57, + 0xfe5f, 0xfe61, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff07, + 0xff0a, 0xff0a, + 0xff0c, 0xff0c, + 0xff0e, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3c, 0xff3c, + 0xff61, 0xff61, + 0xff64, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x103d0, 0x103d0, + 0x10857, 0x10857, + 0x1091f, 0x1091f, + 0x1093f, 0x1093f, + 0x10a50, 0x10a58, + 0x10a7f, 0x10a7f, + 0x10b39, 0x10b3f, + 0x110bb, 0x110bc, + 0x110be, 0x110c1, + 0x12470, 0x12473, +}; /* CR_Po */ + +/* 'Ps': General Category */ +static const OnigCodePoint CR_Ps[] = { + 72, + 0x0028, 0x0028, + 0x005b, 0x005b, + 0x007b, 0x007b, + 0x0f3a, 0x0f3a, + 0x0f3c, 0x0f3c, + 0x169b, 0x169b, + 0x201a, 0x201a, + 0x201e, 0x201e, + 0x2045, 0x2045, + 0x207d, 0x207d, + 0x208d, 0x208d, + 0x2329, 0x2329, + 0x2768, 0x2768, + 0x276a, 0x276a, + 0x276c, 0x276c, + 0x276e, 0x276e, + 0x2770, 0x2770, + 0x2772, 0x2772, + 0x2774, 0x2774, + 0x27c5, 0x27c5, + 0x27e6, 0x27e6, + 0x27e8, 0x27e8, + 0x27ea, 0x27ea, + 0x27ec, 0x27ec, + 0x27ee, 0x27ee, + 0x2983, 0x2983, + 0x2985, 0x2985, + 0x2987, 0x2987, + 0x2989, 0x2989, + 0x298b, 0x298b, + 0x298d, 0x298d, + 0x298f, 0x298f, + 0x2991, 0x2991, + 0x2993, 0x2993, + 0x2995, 0x2995, + 0x2997, 0x2997, + 0x29d8, 0x29d8, + 0x29da, 0x29da, + 0x29fc, 0x29fc, + 0x2e22, 0x2e22, + 0x2e24, 0x2e24, + 0x2e26, 0x2e26, + 0x2e28, 0x2e28, + 0x3008, 0x3008, + 0x300a, 0x300a, + 0x300c, 0x300c, + 0x300e, 0x300e, + 0x3010, 0x3010, + 0x3014, 0x3014, + 0x3016, 0x3016, + 0x3018, 0x3018, + 0x301a, 0x301a, + 0x301d, 0x301d, + 0xfd3e, 0xfd3e, + 0xfe17, 0xfe17, + 0xfe35, 0xfe35, + 0xfe37, 0xfe37, + 0xfe39, 0xfe39, + 0xfe3b, 0xfe3b, + 0xfe3d, 0xfe3d, + 0xfe3f, 0xfe3f, + 0xfe41, 0xfe41, + 0xfe43, 0xfe43, + 0xfe47, 0xfe47, + 0xfe59, 0xfe59, + 0xfe5b, 0xfe5b, + 0xfe5d, 0xfe5d, + 0xff08, 0xff08, + 0xff3b, 0xff3b, + 0xff5b, 0xff5b, + 0xff5f, 0xff5f, + 0xff62, 0xff62, +}; /* CR_Ps */ + +/* 'S': Major Category */ +static const OnigCodePoint CR_S[] = { + 198, + 0x0024, 0x0024, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00a2, 0x00a9, + 0x00ac, 0x00ac, + 0x00ae, 0x00b1, + 0x00b4, 0x00b4, + 0x00b6, 0x00b6, + 0x00b8, 0x00b8, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02eb, + 0x02ed, 0x02ed, + 0x02ef, 0x02ff, + 0x0375, 0x0375, + 0x0384, 0x0385, + 0x03f6, 0x03f6, + 0x0482, 0x0482, + 0x0606, 0x0608, + 0x060b, 0x060b, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x07f6, 0x07f6, + 0x09f2, 0x09f3, + 0x09fa, 0x09fb, + 0x0af1, 0x0af1, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bfa, + 0x0c7f, 0x0c7f, + 0x0cf1, 0x0cf2, + 0x0d79, 0x0d79, + 0x0e3f, 0x0e3f, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fce, 0x0fcf, + 0x0fd5, 0x0fd8, + 0x109e, 0x109f, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x17db, 0x17db, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x1b61, 0x1b6a, + 0x1b74, 0x1b7c, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x20a0, 0x20b8, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x213a, 0x213b, + 0x2140, 0x2144, + 0x214a, 0x214d, + 0x214f, 0x214f, + 0x2190, 0x2328, + 0x232b, 0x23e8, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x26cd, + 0x26cf, 0x26e1, + 0x26e3, 0x26e3, + 0x26e8, 0x26ff, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c4, + 0x27c7, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x27e5, + 0x27f0, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2b4c, + 0x2b50, 0x2b59, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x309b, 0x309c, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31e3, + 0x3200, 0x321e, + 0x322a, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa720, 0xa721, + 0xa789, 0xa78a, + 0xa828, 0xa82b, + 0xa836, 0xa839, + 0xaa77, 0xaa79, + 0xfb29, 0xfb29, + 0xfdfc, 0xfdfd, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x10190, 0x1019b, + 0x101d0, 0x101fc, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f110, 0x1f12e, + 0x1f131, 0x1f131, + 0x1f13d, 0x1f13d, + 0x1f13f, 0x1f13f, + 0x1f142, 0x1f142, + 0x1f146, 0x1f146, + 0x1f14a, 0x1f14e, + 0x1f157, 0x1f157, + 0x1f15f, 0x1f15f, + 0x1f179, 0x1f179, + 0x1f17b, 0x1f17c, + 0x1f17f, 0x1f17f, + 0x1f18a, 0x1f18d, + 0x1f190, 0x1f190, + 0x1f200, 0x1f200, + 0x1f210, 0x1f231, + 0x1f240, 0x1f248, +}; /* CR_S */ + +/* 'Sc': General Category */ +static const OnigCodePoint CR_Sc[] = { + 16, + 0x0024, 0x0024, + 0x00a2, 0x00a5, + 0x060b, 0x060b, + 0x09f2, 0x09f3, + 0x09fb, 0x09fb, + 0x0af1, 0x0af1, + 0x0bf9, 0x0bf9, + 0x0e3f, 0x0e3f, + 0x17db, 0x17db, + 0x20a0, 0x20b8, + 0xa838, 0xa838, + 0xfdfc, 0xfdfc, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xffe0, 0xffe1, + 0xffe5, 0xffe6, +}; /* CR_Sc */ + +/* 'Sk': General Category */ +static const OnigCodePoint CR_Sk[] = { + 26, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x00a8, 0x00a8, + 0x00af, 0x00af, + 0x00b4, 0x00b4, + 0x00b8, 0x00b8, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02eb, + 0x02ed, 0x02ed, + 0x02ef, 0x02ff, + 0x0375, 0x0375, + 0x0384, 0x0385, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x309b, 0x309c, + 0xa700, 0xa716, + 0xa720, 0xa721, + 0xa789, 0xa78a, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xffe3, 0xffe3, +}; /* CR_Sk */ + +/* 'Sm': General Category */ +static const OnigCodePoint CR_Sm[] = { + 65, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00ac, 0x00ac, + 0x00b1, 0x00b1, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x03f6, 0x03f6, + 0x0606, 0x0608, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x2140, 0x2144, + 0x214b, 0x214b, + 0x2190, 0x2194, + 0x219a, 0x219b, + 0x21a0, 0x21a0, + 0x21a3, 0x21a3, + 0x21a6, 0x21a6, + 0x21ae, 0x21ae, + 0x21ce, 0x21cf, + 0x21d2, 0x21d2, + 0x21d4, 0x21d4, + 0x21f4, 0x22ff, + 0x2308, 0x230b, + 0x2320, 0x2321, + 0x237c, 0x237c, + 0x239b, 0x23b3, + 0x23dc, 0x23e1, + 0x25b7, 0x25b7, + 0x25c1, 0x25c1, + 0x25f8, 0x25ff, + 0x266f, 0x266f, + 0x27c0, 0x27c4, + 0x27c7, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x27e5, + 0x27f0, 0x27ff, + 0x2900, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2aff, + 0x2b30, 0x2b44, + 0x2b47, 0x2b4c, + 0xfb29, 0xfb29, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe2, 0xffe2, + 0xffe9, 0xffec, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3, +}; /* CR_Sm */ + +/* 'So': General Category */ +static const OnigCodePoint CR_So[] = { + 154, + 0x00a6, 0x00a7, + 0x00a9, 0x00a9, + 0x00ae, 0x00ae, + 0x00b0, 0x00b0, + 0x00b6, 0x00b6, + 0x0482, 0x0482, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x07f6, 0x07f6, + 0x09fa, 0x09fa, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bf8, + 0x0bfa, 0x0bfa, + 0x0c7f, 0x0c7f, + 0x0cf1, 0x0cf2, + 0x0d79, 0x0d79, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fce, 0x0fcf, + 0x0fd5, 0x0fd8, + 0x109e, 0x109f, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x1b61, 0x1b6a, + 0x1b74, 0x1b7c, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x213a, 0x213b, + 0x214a, 0x214a, + 0x214c, 0x214d, + 0x214f, 0x214f, + 0x2195, 0x2199, + 0x219c, 0x219f, + 0x21a1, 0x21a2, + 0x21a4, 0x21a5, + 0x21a7, 0x21ad, + 0x21af, 0x21cd, + 0x21d0, 0x21d1, + 0x21d3, 0x21d3, + 0x21d5, 0x21f3, + 0x2300, 0x2307, + 0x230c, 0x231f, + 0x2322, 0x2328, + 0x232b, 0x237b, + 0x237d, 0x239a, + 0x23b4, 0x23db, + 0x23e2, 0x23e8, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x25b6, + 0x25b8, 0x25c0, + 0x25c2, 0x25f7, + 0x2600, 0x266e, + 0x2670, 0x26cd, + 0x26cf, 0x26e1, + 0x26e3, 0x26e3, + 0x26e8, 0x26ff, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x2800, 0x28ff, + 0x2b00, 0x2b2f, + 0x2b45, 0x2b46, + 0x2b50, 0x2b59, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31e3, + 0x3200, 0x321e, + 0x322a, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa828, 0xa82b, + 0xa836, 0xa837, + 0xa839, 0xa839, + 0xaa77, 0xaa79, + 0xfdfd, 0xfdfd, + 0xffe4, 0xffe4, + 0xffe8, 0xffe8, + 0xffed, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x10190, 0x1019b, + 0x101d0, 0x101fc, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f110, 0x1f12e, + 0x1f131, 0x1f131, + 0x1f13d, 0x1f13d, + 0x1f13f, 0x1f13f, + 0x1f142, 0x1f142, + 0x1f146, 0x1f146, + 0x1f14a, 0x1f14e, + 0x1f157, 0x1f157, + 0x1f15f, 0x1f15f, + 0x1f179, 0x1f179, + 0x1f17b, 0x1f17c, + 0x1f17f, 0x1f17f, + 0x1f18a, 0x1f18d, + 0x1f190, 0x1f190, + 0x1f200, 0x1f200, + 0x1f210, 0x1f231, + 0x1f240, 0x1f248, +}; /* CR_So */ + +/* 'Z': Major Category */ +static const OnigCodePoint CR_Z[] = { + 9, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000, +}; /* CR_Z */ + +/* 'Zl': General Category */ +static const OnigCodePoint CR_Zl[] = { + 1, + 0x2028, 0x2028, +}; /* CR_Zl */ + +/* 'Zp': General Category */ +static const OnigCodePoint CR_Zp[] = { + 1, + 0x2029, 0x2029, +}; /* CR_Zp */ + +/* 'Zs': General Category */ +static const OnigCodePoint CR_Zs[] = { + 8, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000, +}; /* CR_Zs */ + +/* 'Math': Derived Property */ +static const OnigCodePoint CR_Math[] = { + 105, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x005e, 0x005e, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00ac, 0x00ac, + 0x00b1, 0x00b1, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x03d0, 0x03d2, + 0x03d5, 0x03d5, + 0x03f0, 0x03f1, + 0x03f4, 0x03f6, + 0x0606, 0x0608, + 0x2016, 0x2016, + 0x2032, 0x2034, + 0x2040, 0x2040, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x2061, 0x2064, + 0x207a, 0x207e, + 0x208a, 0x208e, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20e6, + 0x20eb, 0x20ef, + 0x2102, 0x2102, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2128, 0x2129, + 0x212c, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2138, + 0x213c, 0x2149, + 0x214b, 0x214b, + 0x2190, 0x21a7, + 0x21a9, 0x21ae, + 0x21b0, 0x21b1, + 0x21b6, 0x21b7, + 0x21bc, 0x21db, + 0x21dd, 0x21dd, + 0x21e4, 0x21e5, + 0x21f4, 0x22ff, + 0x2308, 0x230b, + 0x2320, 0x2321, + 0x237c, 0x237c, + 0x239b, 0x23b5, + 0x23b7, 0x23b7, + 0x23d0, 0x23d0, + 0x23dc, 0x23e2, + 0x25a0, 0x25a1, + 0x25ae, 0x25b7, + 0x25bc, 0x25c1, + 0x25c6, 0x25c7, + 0x25ca, 0x25cb, + 0x25cf, 0x25d3, + 0x25e2, 0x25e2, + 0x25e4, 0x25e4, + 0x25e7, 0x25ec, + 0x25f8, 0x25ff, + 0x2605, 0x2606, + 0x2640, 0x2640, + 0x2642, 0x2642, + 0x2660, 0x2663, + 0x266d, 0x266f, + 0x27c0, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x27ff, + 0x2900, 0x2aff, + 0x2b30, 0x2b44, + 0x2b47, 0x2b4c, + 0xfb29, 0xfb29, + 0xfe61, 0xfe66, + 0xfe68, 0xfe68, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff3c, 0xff3c, + 0xff3e, 0xff3e, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe2, 0xffe2, + 0xffe9, 0xffec, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7cb, + 0x1d7ce, 0x1d7ff, +}; /* CR_Math */ + +/* 'Alphabetic': Derived Property */ +static const OnigCodePoint CR_Alphabetic[] = { + 474, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0345, 0x0345, + 0x0370, 0x0374, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05b0, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x061a, + 0x0621, 0x0657, + 0x0659, 0x065e, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06e1, 0x06e8, + 0x06ed, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x073f, + 0x074d, 0x07b1, + 0x07ca, 0x07ea, + 0x07f4, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x0817, + 0x081a, 0x082c, + 0x0900, 0x0939, + 0x093d, 0x094c, + 0x094e, 0x094e, + 0x0950, 0x0950, + 0x0955, 0x0955, + 0x0958, 0x0963, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09ce, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4c, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acc, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4c, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccc, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d7a, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e46, + 0x0e4d, 0x0e4d, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ecd, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f81, + 0x0f88, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x1000, 0x1036, + 0x1038, 0x1038, + 0x103b, 0x103f, + 0x1050, 0x1062, + 0x1065, 0x1068, + 0x106e, 0x1086, + 0x108e, 0x108e, + 0x109c, 0x109d, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1713, + 0x1720, 0x1733, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17c8, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x1938, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x1a00, 0x1a1b, + 0x1a20, 0x1a5e, + 0x1a61, 0x1a74, + 0x1aa7, 0x1aa7, + 0x1b00, 0x1b33, + 0x1b35, 0x1b43, + 0x1b45, 0x1b4b, + 0x1b80, 0x1ba9, + 0x1bae, 0x1baf, + 0x1c00, 0x1c35, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf2, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x24b6, 0x24e9, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2dff, + 0x2e2f, 0x2e2f, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66e, + 0xa67f, 0xa697, + 0xa6a0, 0xa6ef, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa827, + 0xa840, 0xa873, + 0xa880, 0xa8c3, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa90a, 0xa92a, + 0xa930, 0xa952, + 0xa960, 0xa97c, + 0xa980, 0xa9bf, + 0xa9cf, 0xa9cf, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa80, 0xaabe, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabea, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10140, 0x10174, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11082, 0x110b8, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x13000, 0x1342e, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_Alphabetic */ + +/* 'Lowercase': Derived Property */ +static const OnigCodePoint CR_Lowercase[] = { + 602, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0242, 0x0242, + 0x0247, 0x0247, + 0x0249, 0x0249, + 0x024b, 0x024b, + 0x024d, 0x024d, + 0x024f, 0x0293, + 0x0295, 0x02b8, + 0x02c0, 0x02c1, + 0x02e0, 0x02e4, + 0x0345, 0x0345, + 0x0371, 0x0371, + 0x0373, 0x0373, + 0x0377, 0x0377, + 0x037a, 0x037d, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04cf, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x04fb, 0x04fb, + 0x04fd, 0x04fd, + 0x04ff, 0x04ff, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0511, 0x0511, + 0x0513, 0x0513, + 0x0515, 0x0515, + 0x0517, 0x0517, + 0x0519, 0x0519, + 0x051b, 0x051b, + 0x051d, 0x051d, + 0x051f, 0x051f, + 0x0521, 0x0521, + 0x0523, 0x0523, + 0x0525, 0x0525, + 0x0561, 0x0587, + 0x1d00, 0x1dbf, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9d, + 0x1e9f, 0x1e9f, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1efb, 0x1efb, + 0x1efd, 0x1efd, + 0x1eff, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2090, 0x2094, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x214e, 0x214e, + 0x2170, 0x217f, + 0x2184, 0x2184, + 0x24d0, 0x24e9, + 0x2c30, 0x2c5e, + 0x2c61, 0x2c61, + 0x2c65, 0x2c66, + 0x2c68, 0x2c68, + 0x2c6a, 0x2c6a, + 0x2c6c, 0x2c6c, + 0x2c71, 0x2c71, + 0x2c73, 0x2c74, + 0x2c76, 0x2c7d, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2cec, 0x2cec, + 0x2cee, 0x2cee, + 0x2d00, 0x2d25, + 0xa641, 0xa641, + 0xa643, 0xa643, + 0xa645, 0xa645, + 0xa647, 0xa647, + 0xa649, 0xa649, + 0xa64b, 0xa64b, + 0xa64d, 0xa64d, + 0xa64f, 0xa64f, + 0xa651, 0xa651, + 0xa653, 0xa653, + 0xa655, 0xa655, + 0xa657, 0xa657, + 0xa659, 0xa659, + 0xa65b, 0xa65b, + 0xa65d, 0xa65d, + 0xa65f, 0xa65f, + 0xa663, 0xa663, + 0xa665, 0xa665, + 0xa667, 0xa667, + 0xa669, 0xa669, + 0xa66b, 0xa66b, + 0xa66d, 0xa66d, + 0xa681, 0xa681, + 0xa683, 0xa683, + 0xa685, 0xa685, + 0xa687, 0xa687, + 0xa689, 0xa689, + 0xa68b, 0xa68b, + 0xa68d, 0xa68d, + 0xa68f, 0xa68f, + 0xa691, 0xa691, + 0xa693, 0xa693, + 0xa695, 0xa695, + 0xa697, 0xa697, + 0xa723, 0xa723, + 0xa725, 0xa725, + 0xa727, 0xa727, + 0xa729, 0xa729, + 0xa72b, 0xa72b, + 0xa72d, 0xa72d, + 0xa72f, 0xa731, + 0xa733, 0xa733, + 0xa735, 0xa735, + 0xa737, 0xa737, + 0xa739, 0xa739, + 0xa73b, 0xa73b, + 0xa73d, 0xa73d, + 0xa73f, 0xa73f, + 0xa741, 0xa741, + 0xa743, 0xa743, + 0xa745, 0xa745, + 0xa747, 0xa747, + 0xa749, 0xa749, + 0xa74b, 0xa74b, + 0xa74d, 0xa74d, + 0xa74f, 0xa74f, + 0xa751, 0xa751, + 0xa753, 0xa753, + 0xa755, 0xa755, + 0xa757, 0xa757, + 0xa759, 0xa759, + 0xa75b, 0xa75b, + 0xa75d, 0xa75d, + 0xa75f, 0xa75f, + 0xa761, 0xa761, + 0xa763, 0xa763, + 0xa765, 0xa765, + 0xa767, 0xa767, + 0xa769, 0xa769, + 0xa76b, 0xa76b, + 0xa76d, 0xa76d, + 0xa76f, 0xa778, + 0xa77a, 0xa77a, + 0xa77c, 0xa77c, + 0xa77f, 0xa77f, + 0xa781, 0xa781, + 0xa783, 0xa783, + 0xa785, 0xa785, + 0xa787, 0xa787, + 0xa78c, 0xa78c, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7cb, 0x1d7cb, +}; /* CR_Lowercase */ + +/* 'Uppercase': Derived Property */ +static const OnigCodePoint CR_Uppercase[] = { + 596, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0243, 0x0246, + 0x0248, 0x0248, + 0x024a, 0x024a, + 0x024c, 0x024c, + 0x024e, 0x024e, + 0x0370, 0x0370, + 0x0372, 0x0372, + 0x0376, 0x0376, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03cf, 0x03cf, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x04fa, 0x04fa, + 0x04fc, 0x04fc, + 0x04fe, 0x04fe, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0510, 0x0510, + 0x0512, 0x0512, + 0x0514, 0x0514, + 0x0516, 0x0516, + 0x0518, 0x0518, + 0x051a, 0x051a, + 0x051c, 0x051c, + 0x051e, 0x051e, + 0x0520, 0x0520, + 0x0522, 0x0522, + 0x0524, 0x0524, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1e9e, 0x1e9e, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1efa, 0x1efa, + 0x1efc, 0x1efc, + 0x1efe, 0x1efe, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2160, 0x216f, + 0x2183, 0x2183, + 0x24b6, 0x24cf, + 0x2c00, 0x2c2e, + 0x2c60, 0x2c60, + 0x2c62, 0x2c64, + 0x2c67, 0x2c67, + 0x2c69, 0x2c69, + 0x2c6b, 0x2c6b, + 0x2c6d, 0x2c70, + 0x2c72, 0x2c72, + 0x2c75, 0x2c75, + 0x2c7e, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0x2ceb, 0x2ceb, + 0x2ced, 0x2ced, + 0xa640, 0xa640, + 0xa642, 0xa642, + 0xa644, 0xa644, + 0xa646, 0xa646, + 0xa648, 0xa648, + 0xa64a, 0xa64a, + 0xa64c, 0xa64c, + 0xa64e, 0xa64e, + 0xa650, 0xa650, + 0xa652, 0xa652, + 0xa654, 0xa654, + 0xa656, 0xa656, + 0xa658, 0xa658, + 0xa65a, 0xa65a, + 0xa65c, 0xa65c, + 0xa65e, 0xa65e, + 0xa662, 0xa662, + 0xa664, 0xa664, + 0xa666, 0xa666, + 0xa668, 0xa668, + 0xa66a, 0xa66a, + 0xa66c, 0xa66c, + 0xa680, 0xa680, + 0xa682, 0xa682, + 0xa684, 0xa684, + 0xa686, 0xa686, + 0xa688, 0xa688, + 0xa68a, 0xa68a, + 0xa68c, 0xa68c, + 0xa68e, 0xa68e, + 0xa690, 0xa690, + 0xa692, 0xa692, + 0xa694, 0xa694, + 0xa696, 0xa696, + 0xa722, 0xa722, + 0xa724, 0xa724, + 0xa726, 0xa726, + 0xa728, 0xa728, + 0xa72a, 0xa72a, + 0xa72c, 0xa72c, + 0xa72e, 0xa72e, + 0xa732, 0xa732, + 0xa734, 0xa734, + 0xa736, 0xa736, + 0xa738, 0xa738, + 0xa73a, 0xa73a, + 0xa73c, 0xa73c, + 0xa73e, 0xa73e, + 0xa740, 0xa740, + 0xa742, 0xa742, + 0xa744, 0xa744, + 0xa746, 0xa746, + 0xa748, 0xa748, + 0xa74a, 0xa74a, + 0xa74c, 0xa74c, + 0xa74e, 0xa74e, + 0xa750, 0xa750, + 0xa752, 0xa752, + 0xa754, 0xa754, + 0xa756, 0xa756, + 0xa758, 0xa758, + 0xa75a, 0xa75a, + 0xa75c, 0xa75c, + 0xa75e, 0xa75e, + 0xa760, 0xa760, + 0xa762, 0xa762, + 0xa764, 0xa764, + 0xa766, 0xa766, + 0xa768, 0xa768, + 0xa76a, 0xa76a, + 0xa76c, 0xa76c, + 0xa76e, 0xa76e, + 0xa779, 0xa779, + 0xa77b, 0xa77b, + 0xa77d, 0xa77e, + 0xa780, 0xa780, + 0xa782, 0xa782, + 0xa784, 0xa784, + 0xa786, 0xa786, + 0xa78b, 0xa78b, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8, + 0x1d7ca, 0x1d7ca, +}; /* CR_Uppercase */ + +/* 'Cased': Derived Property */ +static const OnigCodePoint CR_Cased[] = { + 110, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x01ba, + 0x01bc, 0x01bf, + 0x01c4, 0x0293, + 0x0295, 0x02b8, + 0x02c0, 0x02c1, + 0x02e0, 0x02e4, + 0x0345, 0x0345, + 0x0370, 0x0373, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0561, 0x0587, + 0x10a0, 0x10c5, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x217f, + 0x2183, 0x2184, + 0x24b6, 0x24e9, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2d00, 0x2d25, + 0xa640, 0xa65f, + 0xa662, 0xa66d, + 0xa680, 0xa697, + 0xa722, 0xa787, + 0xa78b, 0xa78c, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0x10400, 0x1044f, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, +}; /* CR_Cased */ + +/* 'Case_Ignorable': Derived Property */ +static const OnigCodePoint CR_Case_Ignorable[] = { + 266, + 0x0027, 0x0027, + 0x002e, 0x002e, + 0x003a, 0x003a, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x00a8, 0x00a8, + 0x00ad, 0x00ad, + 0x00af, 0x00af, + 0x00b4, 0x00b4, + 0x00b7, 0x00b8, + 0x02b0, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x0384, 0x0385, + 0x0387, 0x0387, + 0x0483, 0x0489, + 0x0559, 0x0559, + 0x0591, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05f4, 0x05f4, + 0x0600, 0x0603, + 0x0610, 0x061a, + 0x0640, 0x0640, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06e8, + 0x06ea, 0x06ed, + 0x070f, 0x070f, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x07eb, 0x07f5, + 0x07fa, 0x07fa, + 0x0816, 0x082d, + 0x0900, 0x0902, + 0x093c, 0x093c, + 0x0941, 0x0948, + 0x094d, 0x094d, + 0x0951, 0x0955, + 0x0962, 0x0963, + 0x0971, 0x0971, + 0x0981, 0x0981, + 0x09bc, 0x09bc, + 0x09c1, 0x09c4, + 0x09cd, 0x09cd, + 0x09e2, 0x09e3, + 0x0a01, 0x0a02, + 0x0a3c, 0x0a3c, + 0x0a41, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a70, 0x0a71, + 0x0a75, 0x0a75, + 0x0a81, 0x0a82, + 0x0abc, 0x0abc, + 0x0ac1, 0x0ac5, + 0x0ac7, 0x0ac8, + 0x0acd, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b01, + 0x0b3c, 0x0b3c, + 0x0b3f, 0x0b3f, + 0x0b41, 0x0b44, + 0x0b4d, 0x0b4d, + 0x0b56, 0x0b56, + 0x0b62, 0x0b63, + 0x0b82, 0x0b82, + 0x0bc0, 0x0bc0, + 0x0bcd, 0x0bcd, + 0x0c3e, 0x0c40, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c62, 0x0c63, + 0x0cbc, 0x0cbc, + 0x0cbf, 0x0cbf, + 0x0cc6, 0x0cc6, + 0x0ccc, 0x0ccd, + 0x0ce2, 0x0ce3, + 0x0d41, 0x0d44, + 0x0d4d, 0x0d4d, + 0x0d62, 0x0d63, + 0x0dca, 0x0dca, + 0x0dd2, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e46, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f71, 0x0f7e, + 0x0f80, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102d, 0x1030, + 0x1032, 0x1037, + 0x1039, 0x103a, + 0x103d, 0x103e, + 0x1058, 0x1059, + 0x105e, 0x1060, + 0x1071, 0x1074, + 0x1082, 0x1082, + 0x1085, 0x1086, + 0x108d, 0x108d, + 0x109d, 0x109d, + 0x10fc, 0x10fc, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b4, 0x17b5, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17d7, 0x17d7, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x1843, 0x1843, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1a56, 0x1a56, + 0x1a58, 0x1a5e, + 0x1a60, 0x1a60, + 0x1a62, 0x1a62, + 0x1a65, 0x1a6c, + 0x1a73, 0x1a7c, + 0x1a7f, 0x1a7f, + 0x1aa7, 0x1aa7, + 0x1b00, 0x1b03, + 0x1b34, 0x1b34, + 0x1b36, 0x1b3a, + 0x1b3c, 0x1b3c, + 0x1b42, 0x1b42, + 0x1b6b, 0x1b73, + 0x1b80, 0x1b81, + 0x1ba2, 0x1ba5, + 0x1ba8, 0x1ba9, + 0x1c2c, 0x1c33, + 0x1c36, 0x1c37, + 0x1c78, 0x1c7d, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce0, + 0x1ce2, 0x1ce8, + 0x1ced, 0x1ced, + 0x1d2c, 0x1d61, + 0x1d78, 0x1d78, + 0x1d9b, 0x1de6, + 0x1dfd, 0x1dff, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x200b, 0x200f, + 0x2018, 0x2019, + 0x2024, 0x2024, + 0x2027, 0x2027, + 0x202a, 0x202e, + 0x2060, 0x2064, + 0x206a, 0x206f, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20f0, + 0x2c7d, 0x2c7d, + 0x2cef, 0x2cf1, + 0x2d6f, 0x2d6f, + 0x2de0, 0x2dff, + 0x2e2f, 0x2e2f, + 0x3005, 0x3005, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303b, + 0x3099, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xa4f8, 0xa4fd, + 0xa60c, 0xa60c, + 0xa66f, 0xa672, + 0xa67c, 0xa67d, + 0xa67f, 0xa67f, + 0xa6f0, 0xa6f1, + 0xa700, 0xa721, + 0xa770, 0xa770, + 0xa788, 0xa78a, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xa8c4, 0xa8c4, + 0xa8e0, 0xa8f1, + 0xa926, 0xa92d, + 0xa947, 0xa951, + 0xa980, 0xa982, + 0xa9b3, 0xa9b3, + 0xa9b6, 0xa9b9, + 0xa9bc, 0xa9bc, + 0xa9cf, 0xa9cf, + 0xaa29, 0xaa2e, + 0xaa31, 0xaa32, + 0xaa35, 0xaa36, + 0xaa43, 0xaa43, + 0xaa4c, 0xaa4c, + 0xaa70, 0xaa70, + 0xaab0, 0xaab0, + 0xaab2, 0xaab4, + 0xaab7, 0xaab8, + 0xaabe, 0xaabf, + 0xaac1, 0xaac1, + 0xaadd, 0xaadd, + 0xabe5, 0xabe5, + 0xabe8, 0xabe8, + 0xabed, 0xabed, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe13, 0xfe13, + 0xfe20, 0xfe26, + 0xfe52, 0xfe52, + 0xfe55, 0xfe55, + 0xfeff, 0xfeff, + 0xff07, 0xff07, + 0xff0e, 0xff0e, + 0xff1a, 0xff1a, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffe3, 0xffe3, + 0xfff9, 0xfffb, + 0x101fd, 0x101fd, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x11080, 0x11081, + 0x110b3, 0x110b6, + 0x110b9, 0x110ba, + 0x110bd, 0x110bd, + 0x1d167, 0x1d169, + 0x1d173, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, +}; /* CR_Case_Ignorable */ + +/* 'Changes_When_Lowercased': Derived Property */ +static const OnigCodePoint CR_Changes_When_Lowercased[] = { + 557, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c5, + 0x01c7, 0x01c8, + 0x01ca, 0x01cb, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f2, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0243, 0x0246, + 0x0248, 0x0248, + 0x024a, 0x024a, + 0x024c, 0x024c, + 0x024e, 0x024e, + 0x0370, 0x0370, + 0x0372, 0x0372, + 0x0376, 0x0376, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03cf, 0x03cf, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x04fa, 0x04fa, + 0x04fc, 0x04fc, + 0x04fe, 0x04fe, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0510, 0x0510, + 0x0512, 0x0512, + 0x0514, 0x0514, + 0x0516, 0x0516, + 0x0518, 0x0518, + 0x051a, 0x051a, + 0x051c, 0x051c, + 0x051e, 0x051e, + 0x0520, 0x0520, + 0x0522, 0x0522, + 0x0524, 0x0524, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1e9e, 0x1e9e, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1efa, 0x1efa, + 0x1efc, 0x1efc, + 0x1efe, 0x1efe, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fb8, 0x1fbc, + 0x1fc8, 0x1fcc, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffc, + 0x2126, 0x2126, + 0x212a, 0x212b, + 0x2132, 0x2132, + 0x2160, 0x216f, + 0x2183, 0x2183, + 0x24b6, 0x24cf, + 0x2c00, 0x2c2e, + 0x2c60, 0x2c60, + 0x2c62, 0x2c64, + 0x2c67, 0x2c67, + 0x2c69, 0x2c69, + 0x2c6b, 0x2c6b, + 0x2c6d, 0x2c70, + 0x2c72, 0x2c72, + 0x2c75, 0x2c75, + 0x2c7e, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0x2ceb, 0x2ceb, + 0x2ced, 0x2ced, + 0xa640, 0xa640, + 0xa642, 0xa642, + 0xa644, 0xa644, + 0xa646, 0xa646, + 0xa648, 0xa648, + 0xa64a, 0xa64a, + 0xa64c, 0xa64c, + 0xa64e, 0xa64e, + 0xa650, 0xa650, + 0xa652, 0xa652, + 0xa654, 0xa654, + 0xa656, 0xa656, + 0xa658, 0xa658, + 0xa65a, 0xa65a, + 0xa65c, 0xa65c, + 0xa65e, 0xa65e, + 0xa662, 0xa662, + 0xa664, 0xa664, + 0xa666, 0xa666, + 0xa668, 0xa668, + 0xa66a, 0xa66a, + 0xa66c, 0xa66c, + 0xa680, 0xa680, + 0xa682, 0xa682, + 0xa684, 0xa684, + 0xa686, 0xa686, + 0xa688, 0xa688, + 0xa68a, 0xa68a, + 0xa68c, 0xa68c, + 0xa68e, 0xa68e, + 0xa690, 0xa690, + 0xa692, 0xa692, + 0xa694, 0xa694, + 0xa696, 0xa696, + 0xa722, 0xa722, + 0xa724, 0xa724, + 0xa726, 0xa726, + 0xa728, 0xa728, + 0xa72a, 0xa72a, + 0xa72c, 0xa72c, + 0xa72e, 0xa72e, + 0xa732, 0xa732, + 0xa734, 0xa734, + 0xa736, 0xa736, + 0xa738, 0xa738, + 0xa73a, 0xa73a, + 0xa73c, 0xa73c, + 0xa73e, 0xa73e, + 0xa740, 0xa740, + 0xa742, 0xa742, + 0xa744, 0xa744, + 0xa746, 0xa746, + 0xa748, 0xa748, + 0xa74a, 0xa74a, + 0xa74c, 0xa74c, + 0xa74e, 0xa74e, + 0xa750, 0xa750, + 0xa752, 0xa752, + 0xa754, 0xa754, + 0xa756, 0xa756, + 0xa758, 0xa758, + 0xa75a, 0xa75a, + 0xa75c, 0xa75c, + 0xa75e, 0xa75e, + 0xa760, 0xa760, + 0xa762, 0xa762, + 0xa764, 0xa764, + 0xa766, 0xa766, + 0xa768, 0xa768, + 0xa76a, 0xa76a, + 0xa76c, 0xa76c, + 0xa76e, 0xa76e, + 0xa779, 0xa779, + 0xa77b, 0xa77b, + 0xa77d, 0xa77e, + 0xa780, 0xa780, + 0xa782, 0xa782, + 0xa784, 0xa784, + 0xa786, 0xa786, + 0xa78b, 0xa78b, + 0xff21, 0xff3a, + 0x10400, 0x10427, +}; /* CR_Changes_When_Lowercased */ + +/* 'Changes_When_Uppercased': Derived Property */ +static const OnigCodePoint CR_Changes_When_Uppercased[] = { + 573, + 0x0061, 0x007a, + 0x00b5, 0x00b5, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0137, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018c, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019a, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01b9, + 0x01bd, 0x01bd, + 0x01bf, 0x01bf, + 0x01c5, 0x01c6, + 0x01c8, 0x01c9, + 0x01cb, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f2, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0233, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0242, 0x0242, + 0x0247, 0x0247, + 0x0249, 0x0249, + 0x024b, 0x024b, + 0x024d, 0x024d, + 0x024f, 0x0254, + 0x0256, 0x0257, + 0x0259, 0x0259, + 0x025b, 0x025b, + 0x0260, 0x0260, + 0x0263, 0x0263, + 0x0268, 0x0269, + 0x026b, 0x026b, + 0x026f, 0x026f, + 0x0271, 0x0272, + 0x0275, 0x0275, + 0x027d, 0x027d, + 0x0280, 0x0280, + 0x0283, 0x0283, + 0x0288, 0x028c, + 0x0292, 0x0292, + 0x0345, 0x0345, + 0x0371, 0x0371, + 0x0373, 0x0373, + 0x0377, 0x0377, + 0x037b, 0x037d, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f2, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fb, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04cf, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x04fb, 0x04fb, + 0x04fd, 0x04fd, + 0x04ff, 0x04ff, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0511, 0x0511, + 0x0513, 0x0513, + 0x0515, 0x0515, + 0x0517, 0x0517, + 0x0519, 0x0519, + 0x051b, 0x051b, + 0x051d, 0x051d, + 0x051f, 0x051f, + 0x0521, 0x0521, + 0x0523, 0x0523, + 0x0525, 0x0525, + 0x0561, 0x0587, + 0x1d79, 0x1d79, + 0x1d7d, 0x1d7d, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1efb, 0x1efb, + 0x1efd, 0x1efd, + 0x1eff, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbc, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fcc, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x1ffc, 0x1ffc, + 0x214e, 0x214e, + 0x2170, 0x217f, + 0x2184, 0x2184, + 0x24d0, 0x24e9, + 0x2c30, 0x2c5e, + 0x2c61, 0x2c61, + 0x2c65, 0x2c66, + 0x2c68, 0x2c68, + 0x2c6a, 0x2c6a, + 0x2c6c, 0x2c6c, + 0x2c73, 0x2c73, + 0x2c76, 0x2c76, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce3, + 0x2cec, 0x2cec, + 0x2cee, 0x2cee, + 0x2d00, 0x2d25, + 0xa641, 0xa641, + 0xa643, 0xa643, + 0xa645, 0xa645, + 0xa647, 0xa647, + 0xa649, 0xa649, + 0xa64b, 0xa64b, + 0xa64d, 0xa64d, + 0xa64f, 0xa64f, + 0xa651, 0xa651, + 0xa653, 0xa653, + 0xa655, 0xa655, + 0xa657, 0xa657, + 0xa659, 0xa659, + 0xa65b, 0xa65b, + 0xa65d, 0xa65d, + 0xa65f, 0xa65f, + 0xa663, 0xa663, + 0xa665, 0xa665, + 0xa667, 0xa667, + 0xa669, 0xa669, + 0xa66b, 0xa66b, + 0xa66d, 0xa66d, + 0xa681, 0xa681, + 0xa683, 0xa683, + 0xa685, 0xa685, + 0xa687, 0xa687, + 0xa689, 0xa689, + 0xa68b, 0xa68b, + 0xa68d, 0xa68d, + 0xa68f, 0xa68f, + 0xa691, 0xa691, + 0xa693, 0xa693, + 0xa695, 0xa695, + 0xa697, 0xa697, + 0xa723, 0xa723, + 0xa725, 0xa725, + 0xa727, 0xa727, + 0xa729, 0xa729, + 0xa72b, 0xa72b, + 0xa72d, 0xa72d, + 0xa72f, 0xa72f, + 0xa733, 0xa733, + 0xa735, 0xa735, + 0xa737, 0xa737, + 0xa739, 0xa739, + 0xa73b, 0xa73b, + 0xa73d, 0xa73d, + 0xa73f, 0xa73f, + 0xa741, 0xa741, + 0xa743, 0xa743, + 0xa745, 0xa745, + 0xa747, 0xa747, + 0xa749, 0xa749, + 0xa74b, 0xa74b, + 0xa74d, 0xa74d, + 0xa74f, 0xa74f, + 0xa751, 0xa751, + 0xa753, 0xa753, + 0xa755, 0xa755, + 0xa757, 0xa757, + 0xa759, 0xa759, + 0xa75b, 0xa75b, + 0xa75d, 0xa75d, + 0xa75f, 0xa75f, + 0xa761, 0xa761, + 0xa763, 0xa763, + 0xa765, 0xa765, + 0xa767, 0xa767, + 0xa769, 0xa769, + 0xa76b, 0xa76b, + 0xa76d, 0xa76d, + 0xa76f, 0xa76f, + 0xa77a, 0xa77a, + 0xa77c, 0xa77c, + 0xa77f, 0xa77f, + 0xa781, 0xa781, + 0xa783, 0xa783, + 0xa785, 0xa785, + 0xa787, 0xa787, + 0xa78c, 0xa78c, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, +}; /* CR_Changes_When_Uppercased */ + +/* 'Changes_When_Titlecased': Derived Property */ +static const OnigCodePoint CR_Changes_When_Titlecased[] = { + 574, + 0x0061, 0x007a, + 0x00b5, 0x00b5, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0137, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018c, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019a, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01b9, + 0x01bd, 0x01bd, + 0x01bf, 0x01bf, + 0x01c4, 0x01c4, + 0x01c6, 0x01c7, + 0x01c9, 0x01ca, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f1, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0233, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0242, 0x0242, + 0x0247, 0x0247, + 0x0249, 0x0249, + 0x024b, 0x024b, + 0x024d, 0x024d, + 0x024f, 0x0254, + 0x0256, 0x0257, + 0x0259, 0x0259, + 0x025b, 0x025b, + 0x0260, 0x0260, + 0x0263, 0x0263, + 0x0268, 0x0269, + 0x026b, 0x026b, + 0x026f, 0x026f, + 0x0271, 0x0272, + 0x0275, 0x0275, + 0x027d, 0x027d, + 0x0280, 0x0280, + 0x0283, 0x0283, + 0x0288, 0x028c, + 0x0292, 0x0292, + 0x0345, 0x0345, + 0x0371, 0x0371, + 0x0373, 0x0373, + 0x0377, 0x0377, + 0x037b, 0x037d, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f2, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fb, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04cf, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x04fb, 0x04fb, + 0x04fd, 0x04fd, + 0x04ff, 0x04ff, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0511, 0x0511, + 0x0513, 0x0513, + 0x0515, 0x0515, + 0x0517, 0x0517, + 0x0519, 0x0519, + 0x051b, 0x051b, + 0x051d, 0x051d, + 0x051f, 0x051f, + 0x0521, 0x0521, + 0x0523, 0x0523, + 0x0525, 0x0525, + 0x0561, 0x0587, + 0x1d79, 0x1d79, + 0x1d7d, 0x1d7d, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1efb, 0x1efb, + 0x1efd, 0x1efd, + 0x1eff, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x214e, 0x214e, + 0x2170, 0x217f, + 0x2184, 0x2184, + 0x24d0, 0x24e9, + 0x2c30, 0x2c5e, + 0x2c61, 0x2c61, + 0x2c65, 0x2c66, + 0x2c68, 0x2c68, + 0x2c6a, 0x2c6a, + 0x2c6c, 0x2c6c, + 0x2c73, 0x2c73, + 0x2c76, 0x2c76, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce3, + 0x2cec, 0x2cec, + 0x2cee, 0x2cee, + 0x2d00, 0x2d25, + 0xa641, 0xa641, + 0xa643, 0xa643, + 0xa645, 0xa645, + 0xa647, 0xa647, + 0xa649, 0xa649, + 0xa64b, 0xa64b, + 0xa64d, 0xa64d, + 0xa64f, 0xa64f, + 0xa651, 0xa651, + 0xa653, 0xa653, + 0xa655, 0xa655, + 0xa657, 0xa657, + 0xa659, 0xa659, + 0xa65b, 0xa65b, + 0xa65d, 0xa65d, + 0xa65f, 0xa65f, + 0xa663, 0xa663, + 0xa665, 0xa665, + 0xa667, 0xa667, + 0xa669, 0xa669, + 0xa66b, 0xa66b, + 0xa66d, 0xa66d, + 0xa681, 0xa681, + 0xa683, 0xa683, + 0xa685, 0xa685, + 0xa687, 0xa687, + 0xa689, 0xa689, + 0xa68b, 0xa68b, + 0xa68d, 0xa68d, + 0xa68f, 0xa68f, + 0xa691, 0xa691, + 0xa693, 0xa693, + 0xa695, 0xa695, + 0xa697, 0xa697, + 0xa723, 0xa723, + 0xa725, 0xa725, + 0xa727, 0xa727, + 0xa729, 0xa729, + 0xa72b, 0xa72b, + 0xa72d, 0xa72d, + 0xa72f, 0xa72f, + 0xa733, 0xa733, + 0xa735, 0xa735, + 0xa737, 0xa737, + 0xa739, 0xa739, + 0xa73b, 0xa73b, + 0xa73d, 0xa73d, + 0xa73f, 0xa73f, + 0xa741, 0xa741, + 0xa743, 0xa743, + 0xa745, 0xa745, + 0xa747, 0xa747, + 0xa749, 0xa749, + 0xa74b, 0xa74b, + 0xa74d, 0xa74d, + 0xa74f, 0xa74f, + 0xa751, 0xa751, + 0xa753, 0xa753, + 0xa755, 0xa755, + 0xa757, 0xa757, + 0xa759, 0xa759, + 0xa75b, 0xa75b, + 0xa75d, 0xa75d, + 0xa75f, 0xa75f, + 0xa761, 0xa761, + 0xa763, 0xa763, + 0xa765, 0xa765, + 0xa767, 0xa767, + 0xa769, 0xa769, + 0xa76b, 0xa76b, + 0xa76d, 0xa76d, + 0xa76f, 0xa76f, + 0xa77a, 0xa77a, + 0xa77c, 0xa77c, + 0xa77f, 0xa77f, + 0xa781, 0xa781, + 0xa783, 0xa783, + 0xa785, 0xa785, + 0xa787, 0xa787, + 0xa78c, 0xa78c, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, +}; /* CR_Changes_When_Titlecased */ + +/* 'Changes_When_Casefolded': Derived Property */ +static const OnigCodePoint CR_Changes_When_Casefolded[] = { + 568, + 0x0041, 0x005a, + 0x00b5, 0x00b5, + 0x00c0, 0x00d6, + 0x00d8, 0x00df, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x0149, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x017f, 0x017f, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c5, + 0x01c7, 0x01c8, + 0x01ca, 0x01cb, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f2, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0243, 0x0246, + 0x0248, 0x0248, + 0x024a, 0x024a, + 0x024c, 0x024c, + 0x024e, 0x024e, + 0x0345, 0x0345, + 0x0370, 0x0370, + 0x0372, 0x0372, + 0x0376, 0x0376, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03c2, 0x03c2, + 0x03cf, 0x03d1, + 0x03d5, 0x03d6, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f0, 0x03f1, + 0x03f4, 0x03f5, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x04fa, 0x04fa, + 0x04fc, 0x04fc, + 0x04fe, 0x04fe, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0510, 0x0510, + 0x0512, 0x0512, + 0x0514, 0x0514, + 0x0516, 0x0516, + 0x0518, 0x0518, + 0x051a, 0x051a, + 0x051c, 0x051c, + 0x051e, 0x051e, + 0x0520, 0x0520, + 0x0522, 0x0522, + 0x0524, 0x0524, + 0x0531, 0x0556, + 0x0587, 0x0587, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1e9a, 0x1e9b, + 0x1e9e, 0x1e9e, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1efa, 0x1efa, + 0x1efc, 0x1efc, + 0x1efe, 0x1efe, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1f80, 0x1faf, + 0x1fb2, 0x1fb4, + 0x1fb7, 0x1fbc, + 0x1fc2, 0x1fc4, + 0x1fc7, 0x1fcc, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff7, 0x1ffc, + 0x2126, 0x2126, + 0x212a, 0x212b, + 0x2132, 0x2132, + 0x2160, 0x216f, + 0x2183, 0x2183, + 0x24b6, 0x24cf, + 0x2c00, 0x2c2e, + 0x2c60, 0x2c60, + 0x2c62, 0x2c64, + 0x2c67, 0x2c67, + 0x2c69, 0x2c69, + 0x2c6b, 0x2c6b, + 0x2c6d, 0x2c70, + 0x2c72, 0x2c72, + 0x2c75, 0x2c75, + 0x2c7e, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0x2ceb, 0x2ceb, + 0x2ced, 0x2ced, + 0xa640, 0xa640, + 0xa642, 0xa642, + 0xa644, 0xa644, + 0xa646, 0xa646, + 0xa648, 0xa648, + 0xa64a, 0xa64a, + 0xa64c, 0xa64c, + 0xa64e, 0xa64e, + 0xa650, 0xa650, + 0xa652, 0xa652, + 0xa654, 0xa654, + 0xa656, 0xa656, + 0xa658, 0xa658, + 0xa65a, 0xa65a, + 0xa65c, 0xa65c, + 0xa65e, 0xa65e, + 0xa662, 0xa662, + 0xa664, 0xa664, + 0xa666, 0xa666, + 0xa668, 0xa668, + 0xa66a, 0xa66a, + 0xa66c, 0xa66c, + 0xa680, 0xa680, + 0xa682, 0xa682, + 0xa684, 0xa684, + 0xa686, 0xa686, + 0xa688, 0xa688, + 0xa68a, 0xa68a, + 0xa68c, 0xa68c, + 0xa68e, 0xa68e, + 0xa690, 0xa690, + 0xa692, 0xa692, + 0xa694, 0xa694, + 0xa696, 0xa696, + 0xa722, 0xa722, + 0xa724, 0xa724, + 0xa726, 0xa726, + 0xa728, 0xa728, + 0xa72a, 0xa72a, + 0xa72c, 0xa72c, + 0xa72e, 0xa72e, + 0xa732, 0xa732, + 0xa734, 0xa734, + 0xa736, 0xa736, + 0xa738, 0xa738, + 0xa73a, 0xa73a, + 0xa73c, 0xa73c, + 0xa73e, 0xa73e, + 0xa740, 0xa740, + 0xa742, 0xa742, + 0xa744, 0xa744, + 0xa746, 0xa746, + 0xa748, 0xa748, + 0xa74a, 0xa74a, + 0xa74c, 0xa74c, + 0xa74e, 0xa74e, + 0xa750, 0xa750, + 0xa752, 0xa752, + 0xa754, 0xa754, + 0xa756, 0xa756, + 0xa758, 0xa758, + 0xa75a, 0xa75a, + 0xa75c, 0xa75c, + 0xa75e, 0xa75e, + 0xa760, 0xa760, + 0xa762, 0xa762, + 0xa764, 0xa764, + 0xa766, 0xa766, + 0xa768, 0xa768, + 0xa76a, 0xa76a, + 0xa76c, 0xa76c, + 0xa76e, 0xa76e, + 0xa779, 0xa779, + 0xa77b, 0xa77b, + 0xa77d, 0xa77e, + 0xa780, 0xa780, + 0xa782, 0xa782, + 0xa784, 0xa784, + 0xa786, 0xa786, + 0xa78b, 0xa78b, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff21, 0xff3a, + 0x10400, 0x10427, +}; /* CR_Changes_When_Casefolded */ + +/* 'Changes_When_Casemapped': Derived Property */ +static const OnigCodePoint CR_Changes_When_Casemapped[] = { + 97, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00b5, 0x00b5, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0137, + 0x0139, 0x018c, + 0x018e, 0x019a, + 0x019c, 0x01a9, + 0x01ac, 0x01b9, + 0x01bc, 0x01bd, + 0x01bf, 0x01bf, + 0x01c4, 0x0220, + 0x0222, 0x0233, + 0x023a, 0x0254, + 0x0256, 0x0257, + 0x0259, 0x0259, + 0x025b, 0x025b, + 0x0260, 0x0260, + 0x0263, 0x0263, + 0x0268, 0x0269, + 0x026b, 0x026b, + 0x026f, 0x026f, + 0x0271, 0x0272, + 0x0275, 0x0275, + 0x027d, 0x027d, + 0x0280, 0x0280, + 0x0283, 0x0283, + 0x0288, 0x028c, + 0x0292, 0x0292, + 0x0345, 0x0345, + 0x0370, 0x0373, + 0x0376, 0x0377, + 0x037b, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03d1, + 0x03d5, 0x03f2, + 0x03f4, 0x03f5, + 0x03f7, 0x03fb, + 0x03fd, 0x0481, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0561, 0x0587, + 0x10a0, 0x10c5, + 0x1d79, 0x1d79, + 0x1d7d, 0x1d7d, + 0x1e00, 0x1e9b, + 0x1e9e, 0x1e9e, + 0x1ea0, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2126, 0x2126, + 0x212a, 0x212b, + 0x2132, 0x2132, + 0x214e, 0x214e, + 0x2160, 0x217f, + 0x2183, 0x2184, + 0x24b6, 0x24e9, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2c70, + 0x2c72, 0x2c73, + 0x2c75, 0x2c76, + 0x2c7e, 0x2ce3, + 0x2ceb, 0x2cee, + 0x2d00, 0x2d25, + 0xa640, 0xa65f, + 0xa662, 0xa66d, + 0xa680, 0xa697, + 0xa722, 0xa72f, + 0xa732, 0xa76f, + 0xa779, 0xa787, + 0xa78b, 0xa78c, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0x10400, 0x1044f, +}; /* CR_Changes_When_Casemapped */ + +/* 'ID_Start': Derived Property */ +static const OnigCodePoint CR_ID_Start[] = { + 424, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0370, 0x0374, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x07a5, + 0x07b1, 0x07b1, + 0x07ca, 0x07ea, + 0x07f4, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x0815, + 0x081a, 0x081a, + 0x0824, 0x0824, + 0x0828, 0x0828, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bd0, 0x0bd0, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c3d, + 0x0c58, 0x0c59, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d3d, + 0x0d60, 0x0d61, + 0x0d7a, 0x0d7f, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f88, 0x0f8b, + 0x1000, 0x102a, + 0x103f, 0x103f, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1aa7, 0x1aa7, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b83, 0x1ba0, + 0x1bae, 0x1baf, + 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2118, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x309b, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66e, + 0xa67f, 0xa697, + 0xa6a0, 0xa6ef, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa90a, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9cf, 0xa9cf, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa80, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabe2, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10140, 0x10174, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11083, 0x110af, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x13000, 0x1342e, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_ID_Start */ + +/* 'ID_Continue': Derived Property */ +static const OnigCodePoint CR_ID_Continue[] = { + 506, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00b7, 0x00b7, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0300, 0x0374, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0386, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0487, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x061a, + 0x0621, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06df, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x082d, + 0x0900, 0x0939, + 0x093c, 0x094e, + 0x0950, 0x0955, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d6f, + 0x0d7a, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1049, + 0x1050, 0x109d, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1369, 0x1371, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x1a00, 0x1a1b, + 0x1a20, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa7, 0x1aa7, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b59, + 0x1b6b, 0x1b73, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c37, + 0x1c40, 0x1c49, + 0x1c4d, 0x1c7d, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfd, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20f0, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2118, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cf1, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2dff, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66f, + 0xa67c, 0xa67d, + 0xa67f, 0xa697, + 0xa6a0, 0xa6f1, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa827, + 0xa840, 0xa873, + 0xa880, 0xa8c4, + 0xa8d0, 0xa8d9, + 0xa8e0, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa900, 0xa92d, + 0xa930, 0xa953, + 0xa960, 0xa97c, + 0xa980, 0xa9c0, + 0xa9cf, 0xa9d9, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabea, + 0xabec, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe26, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10140, 0x10174, + 0x101fd, 0x101fd, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11080, 0x110ba, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x13000, 0x1342e, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef, +}; /* CR_ID_Continue */ + +/* 'XID_Start': Derived Property */ +static const OnigCodePoint CR_XID_Start[] = { + 431, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0370, 0x0374, + 0x0376, 0x0377, + 0x037b, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x07a5, + 0x07b1, 0x07b1, + 0x07ca, 0x07ea, + 0x07f4, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x0815, + 0x081a, 0x081a, + 0x0824, 0x0824, + 0x0828, 0x0828, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bd0, 0x0bd0, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c3d, + 0x0c58, 0x0c59, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d3d, + 0x0d60, 0x0d61, + 0x0d7a, 0x0d7f, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e32, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb2, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f88, 0x0f8b, + 0x1000, 0x102a, + 0x103f, 0x103f, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1aa7, 0x1aa7, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b83, 0x1ba0, + 0x1bae, 0x1baf, + 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2118, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66e, + 0xa67f, 0xa697, + 0xa6a0, 0xa6ef, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa90a, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9cf, 0xa9cf, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa80, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabe2, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfc5d, + 0xfc64, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdf9, + 0xfe71, 0xfe71, + 0xfe73, 0xfe73, + 0xfe77, 0xfe77, + 0xfe79, 0xfe79, + 0xfe7b, 0xfe7b, + 0xfe7d, 0xfe7d, + 0xfe7f, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10140, 0x10174, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11083, 0x110af, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x13000, 0x1342e, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_XID_Start */ + +/* 'XID_Continue': Derived Property */ +static const OnigCodePoint CR_XID_Continue[] = { + 513, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00b7, 0x00b7, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0300, 0x0374, + 0x0376, 0x0377, + 0x037b, 0x037d, + 0x0386, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0487, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x061a, + 0x0621, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06df, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x082d, + 0x0900, 0x0939, + 0x093c, 0x094e, + 0x0950, 0x0955, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d6f, + 0x0d7a, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1049, + 0x1050, 0x109d, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1369, 0x1371, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x1a00, 0x1a1b, + 0x1a20, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa7, 0x1aa7, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b59, + 0x1b6b, 0x1b73, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c37, + 0x1c40, 0x1c49, + 0x1c4d, 0x1c7d, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfd, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20f0, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2118, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cf1, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2dff, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66f, + 0xa67c, 0xa67d, + 0xa67f, 0xa697, + 0xa6a0, 0xa6f1, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa827, + 0xa840, 0xa873, + 0xa880, 0xa8c4, + 0xa8d0, 0xa8d9, + 0xa8e0, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa900, 0xa92d, + 0xa930, 0xa953, + 0xa960, 0xa97c, + 0xa980, 0xa9c0, + 0xa9cf, 0xa9d9, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabea, + 0xabec, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfc5d, + 0xfc64, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdf9, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe26, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe71, 0xfe71, + 0xfe73, 0xfe73, + 0xfe77, 0xfe77, + 0xfe79, 0xfe79, + 0xfe7b, 0xfe7b, + 0xfe7d, 0xfe7d, + 0xfe7f, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10140, 0x10174, + 0x101fd, 0x101fd, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11080, 0x110ba, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x13000, 0x1342e, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef, +}; /* CR_XID_Continue */ + +/* 'Default_Ignorable_Code_Point': Derived Property */ +static const OnigCodePoint CR_Default_Ignorable_Code_Point[] = { + 15, + 0x00ad, 0x00ad, + 0x034f, 0x034f, + 0x115f, 0x1160, + 0x17b4, 0x17b5, + 0x180b, 0x180d, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x206f, + 0x3164, 0x3164, + 0xfe00, 0xfe0f, + 0xfeff, 0xfeff, + 0xffa0, 0xffa0, + 0xfff0, 0xfff8, + 0x1d173, 0x1d17a, + 0xe0000, 0xe0fff, +}; /* CR_Default_Ignorable_Code_Point */ + +/* 'Grapheme_Extend': Derived Property */ +static const OnigCodePoint CR_Grapheme_Extend[] = { + 206, + 0x0300, 0x036f, + 0x0483, 0x0489, + 0x0591, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x061a, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06de, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x07eb, 0x07f3, + 0x0816, 0x0819, + 0x081b, 0x0823, + 0x0825, 0x0827, + 0x0829, 0x082d, + 0x0900, 0x0902, + 0x093c, 0x093c, + 0x0941, 0x0948, + 0x094d, 0x094d, + 0x0951, 0x0955, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09bc, 0x09bc, + 0x09be, 0x09be, + 0x09c1, 0x09c4, + 0x09cd, 0x09cd, + 0x09d7, 0x09d7, + 0x09e2, 0x09e3, + 0x0a01, 0x0a02, + 0x0a3c, 0x0a3c, + 0x0a41, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a70, 0x0a71, + 0x0a75, 0x0a75, + 0x0a81, 0x0a82, + 0x0abc, 0x0abc, + 0x0ac1, 0x0ac5, + 0x0ac7, 0x0ac8, + 0x0acd, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b01, + 0x0b3c, 0x0b3c, + 0x0b3e, 0x0b3f, + 0x0b41, 0x0b44, + 0x0b4d, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b62, 0x0b63, + 0x0b82, 0x0b82, + 0x0bbe, 0x0bbe, + 0x0bc0, 0x0bc0, + 0x0bcd, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c3e, 0x0c40, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c62, 0x0c63, + 0x0cbc, 0x0cbc, + 0x0cbf, 0x0cbf, + 0x0cc2, 0x0cc2, + 0x0cc6, 0x0cc6, + 0x0ccc, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0ce2, 0x0ce3, + 0x0d3e, 0x0d3e, + 0x0d41, 0x0d44, + 0x0d4d, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d62, 0x0d63, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dcf, + 0x0dd2, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0ddf, 0x0ddf, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f71, 0x0f7e, + 0x0f80, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102d, 0x1030, + 0x1032, 0x1037, + 0x1039, 0x103a, + 0x103d, 0x103e, + 0x1058, 0x1059, + 0x105e, 0x1060, + 0x1071, 0x1074, + 0x1082, 0x1082, + 0x1085, 0x1086, + 0x108d, 0x108d, + 0x109d, 0x109d, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1a56, 0x1a56, + 0x1a58, 0x1a5e, + 0x1a60, 0x1a60, + 0x1a62, 0x1a62, + 0x1a65, 0x1a6c, + 0x1a73, 0x1a7c, + 0x1a7f, 0x1a7f, + 0x1b00, 0x1b03, + 0x1b34, 0x1b34, + 0x1b36, 0x1b3a, + 0x1b3c, 0x1b3c, + 0x1b42, 0x1b42, + 0x1b6b, 0x1b73, + 0x1b80, 0x1b81, + 0x1ba2, 0x1ba5, + 0x1ba8, 0x1ba9, + 0x1c2c, 0x1c33, + 0x1c36, 0x1c37, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce0, + 0x1ce2, 0x1ce8, + 0x1ced, 0x1ced, + 0x1dc0, 0x1de6, + 0x1dfd, 0x1dff, + 0x200c, 0x200d, + 0x20d0, 0x20f0, + 0x2cef, 0x2cf1, + 0x2de0, 0x2dff, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa66f, 0xa672, + 0xa67c, 0xa67d, + 0xa6f0, 0xa6f1, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xa8c4, 0xa8c4, + 0xa8e0, 0xa8f1, + 0xa926, 0xa92d, + 0xa947, 0xa951, + 0xa980, 0xa982, + 0xa9b3, 0xa9b3, + 0xa9b6, 0xa9b9, + 0xa9bc, 0xa9bc, + 0xaa29, 0xaa2e, + 0xaa31, 0xaa32, + 0xaa35, 0xaa36, + 0xaa43, 0xaa43, + 0xaa4c, 0xaa4c, + 0xaab0, 0xaab0, + 0xaab2, 0xaab4, + 0xaab7, 0xaab8, + 0xaabe, 0xaabf, + 0xaac1, 0xaac1, + 0xabe5, 0xabe5, + 0xabe8, 0xabe8, + 0xabed, 0xabed, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe26, + 0xff9e, 0xff9f, + 0x101fd, 0x101fd, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x11080, 0x11081, + 0x110b3, 0x110b6, + 0x110b9, 0x110ba, + 0x1d165, 0x1d165, + 0x1d167, 0x1d169, + 0x1d16e, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef, +}; /* CR_Grapheme_Extend */ + +/* 'Grapheme_Base': Derived Property */ +static const OnigCodePoint CR_Grapheme_Base[] = { + 567, + 0x0020, 0x007e, + 0x00a0, 0x00ac, + 0x00ae, 0x02ff, + 0x0370, 0x0377, + 0x037a, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x0482, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0606, 0x060f, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x064a, + 0x0660, 0x066f, + 0x0671, 0x06d5, + 0x06e5, 0x06e6, + 0x06e9, 0x06e9, + 0x06ee, 0x070d, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x07a5, + 0x07b1, 0x07b1, + 0x07c0, 0x07ea, + 0x07f4, 0x07fa, + 0x0800, 0x0815, + 0x081a, 0x081a, + 0x0824, 0x0824, + 0x0828, 0x0828, + 0x0830, 0x083e, + 0x0903, 0x0939, + 0x093d, 0x0940, + 0x0949, 0x094c, + 0x094e, 0x094e, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x0964, 0x0972, + 0x0979, 0x097f, + 0x0982, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09bf, 0x09c0, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09e6, 0x09fb, + 0x0a03, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3e, 0x0a40, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a6f, + 0x0a72, 0x0a74, + 0x0a83, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0ac0, + 0x0ac9, 0x0ac9, + 0x0acb, 0x0acc, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b02, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b40, 0x0b40, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbf, 0x0bbf, + 0x0bc1, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd0, 0x0bd0, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c3d, + 0x0c41, 0x0c44, + 0x0c58, 0x0c59, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbe, + 0x0cc0, 0x0cc1, + 0x0cc3, 0x0cc4, + 0x0cc7, 0x0cc8, + 0x0cca, 0x0ccb, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0cf1, 0x0cf2, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d3d, + 0x0d3f, 0x0d40, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d60, 0x0d61, + 0x0d66, 0x0d75, + 0x0d79, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dd0, 0x0dd1, + 0x0dd8, 0x0dde, + 0x0df2, 0x0df4, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e3f, 0x0e46, + 0x0e4f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f17, + 0x0f1a, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0f3a, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f7f, 0x0f7f, + 0x0f85, 0x0f85, + 0x0f88, 0x0f8b, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fce, 0x0fd8, + 0x1000, 0x102c, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x103b, 0x103c, + 0x103f, 0x1057, + 0x105a, 0x105d, + 0x1061, 0x1070, + 0x1075, 0x1081, + 0x1083, 0x1084, + 0x1087, 0x108c, + 0x108e, 0x109c, + 0x109e, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1360, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1400, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1735, 0x1736, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17b6, 0x17b6, + 0x17be, 0x17c5, + 0x17c7, 0x17c8, + 0x17d4, 0x17dc, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180a, + 0x180e, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1923, 0x1926, + 0x1929, 0x192b, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x1a16, + 0x1a19, 0x1a1b, + 0x1a1e, 0x1a55, + 0x1a57, 0x1a57, + 0x1a61, 0x1a61, + 0x1a63, 0x1a64, + 0x1a6d, 0x1a72, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad, + 0x1b04, 0x1b33, + 0x1b35, 0x1b35, + 0x1b3b, 0x1b3b, + 0x1b3d, 0x1b41, + 0x1b43, 0x1b4b, + 0x1b50, 0x1b6a, + 0x1b74, 0x1b7c, + 0x1b82, 0x1ba1, + 0x1ba6, 0x1ba7, + 0x1baa, 0x1baa, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c2b, + 0x1c34, 0x1c35, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c7f, + 0x1cd3, 0x1cd3, + 0x1ce1, 0x1ce1, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf2, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x200a, + 0x2010, 0x2027, + 0x202f, 0x205f, + 0x2070, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b8, + 0x2100, 0x2189, + 0x2190, 0x23e8, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x26cd, + 0x26cf, 0x26e1, + 0x26e3, 0x26e3, + 0x26e8, 0x26ff, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x2b4c, + 0x2b50, 0x2b59, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2cee, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e31, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x3029, + 0x3030, 0x303f, + 0x3041, 0x3096, + 0x309b, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31e3, + 0x31f0, 0x321e, + 0x3220, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa4d0, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66e, + 0xa673, 0xa673, + 0xa67e, 0xa697, + 0xa6a0, 0xa6ef, + 0xa6f2, 0xa6f7, + 0xa700, 0xa78c, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa824, + 0xa827, 0xa82b, + 0xa830, 0xa839, + 0xa840, 0xa877, + 0xa880, 0xa8c3, + 0xa8ce, 0xa8d9, + 0xa8f2, 0xa8fb, + 0xa900, 0xa925, + 0xa92e, 0xa946, + 0xa952, 0xa953, + 0xa95f, 0xa97c, + 0xa983, 0xa9b2, + 0xa9b4, 0xa9b5, + 0xa9ba, 0xa9bb, + 0xa9bd, 0xa9cd, + 0xa9cf, 0xa9d9, + 0xa9de, 0xa9df, + 0xaa00, 0xaa28, + 0xaa2f, 0xaa30, + 0xaa33, 0xaa34, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa4d, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa7b, + 0xaa80, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadf, + 0xabc0, 0xabe4, + 0xabe6, 0xabe7, + 0xabe9, 0xabec, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff01, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfffc, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10190, 0x1019b, + 0x101d0, 0x101fc, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10857, 0x1085f, + 0x10900, 0x1091b, + 0x1091f, 0x10939, + 0x1093f, 0x1093f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a40, 0x10a47, + 0x10a50, 0x10a58, + 0x10a60, 0x10a7f, + 0x10b00, 0x10b35, + 0x10b39, 0x10b55, + 0x10b58, 0x10b72, + 0x10b78, 0x10b7f, + 0x10c00, 0x10c48, + 0x10e60, 0x10e7e, + 0x11082, 0x110b2, + 0x110b7, 0x110b8, + 0x110bb, 0x110bc, + 0x110be, 0x110c1, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x12470, 0x12473, + 0x13000, 0x1342e, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d164, + 0x1d166, 0x1d166, + 0x1d16a, 0x1d16d, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1d360, 0x1d371, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f100, 0x1f10a, + 0x1f110, 0x1f12e, + 0x1f131, 0x1f131, + 0x1f13d, 0x1f13d, + 0x1f13f, 0x1f13f, + 0x1f142, 0x1f142, + 0x1f146, 0x1f146, + 0x1f14a, 0x1f14e, + 0x1f157, 0x1f157, + 0x1f15f, 0x1f15f, + 0x1f179, 0x1f179, + 0x1f17b, 0x1f17c, + 0x1f17f, 0x1f17f, + 0x1f18a, 0x1f18d, + 0x1f190, 0x1f190, + 0x1f200, 0x1f200, + 0x1f210, 0x1f231, + 0x1f240, 0x1f248, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_Grapheme_Base */ + +/* 'Grapheme_Link': Derived Property */ +static const OnigCodePoint CR_Grapheme_Link[] = { + 26, + 0x094d, 0x094d, + 0x09cd, 0x09cd, + 0x0a4d, 0x0a4d, + 0x0acd, 0x0acd, + 0x0b4d, 0x0b4d, + 0x0bcd, 0x0bcd, + 0x0c4d, 0x0c4d, + 0x0ccd, 0x0ccd, + 0x0d4d, 0x0d4d, + 0x0dca, 0x0dca, + 0x0e3a, 0x0e3a, + 0x0f84, 0x0f84, + 0x1039, 0x103a, + 0x1714, 0x1714, + 0x1734, 0x1734, + 0x17d2, 0x17d2, + 0x1a60, 0x1a60, + 0x1b44, 0x1b44, + 0x1baa, 0x1baa, + 0xa806, 0xa806, + 0xa8c4, 0xa8c4, + 0xa953, 0xa953, + 0xa9c0, 0xa9c0, + 0xabed, 0xabed, + 0x10a3f, 0x10a3f, + 0x110b9, 0x110b9, +}; /* CR_Grapheme_Link */ + +/* 'Common': Script */ +static const OnigCodePoint CR_Common[] = { + 159, + 0x0000, 0x0040, + 0x005b, 0x0060, + 0x007b, 0x00a9, + 0x00ab, 0x00b9, + 0x00bb, 0x00bf, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02b9, 0x02df, + 0x02e5, 0x02ff, + 0x0374, 0x0374, + 0x037e, 0x037e, + 0x0385, 0x0385, + 0x0387, 0x0387, + 0x0589, 0x0589, + 0x0600, 0x0603, + 0x060c, 0x060c, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0640, 0x0640, + 0x0660, 0x0669, + 0x06dd, 0x06dd, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0cf1, 0x0cf2, + 0x0e3f, 0x0e3f, + 0x0fd5, 0x0fd8, + 0x10fb, 0x10fb, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x1802, 0x1803, + 0x1805, 0x1805, + 0x1cd3, 0x1cd3, + 0x1ce1, 0x1ce1, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf2, + 0x2000, 0x200b, + 0x200e, 0x2064, + 0x206a, 0x2070, + 0x2074, 0x207e, + 0x2080, 0x208e, + 0x20a0, 0x20b8, + 0x2100, 0x2125, + 0x2127, 0x2129, + 0x212c, 0x2131, + 0x2133, 0x214d, + 0x214f, 0x215f, + 0x2189, 0x2189, + 0x2190, 0x23e8, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x26cd, + 0x26cf, 0x26e1, + 0x26e3, 0x26e3, + 0x26e8, 0x26ff, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x27ff, + 0x2900, 0x2b4c, + 0x2b50, 0x2b59, + 0x2e00, 0x2e31, + 0x2ff0, 0x2ffb, + 0x3000, 0x3004, + 0x3006, 0x3006, + 0x3008, 0x3020, + 0x3030, 0x3037, + 0x303c, 0x303f, + 0x309b, 0x309c, + 0x30a0, 0x30a0, + 0x30fb, 0x30fc, + 0x3190, 0x319f, + 0x31c0, 0x31e3, + 0x3220, 0x325f, + 0x327f, 0x32cf, + 0x3358, 0x33ff, + 0x4dc0, 0x4dff, + 0xa700, 0xa721, + 0xa788, 0xa78a, + 0xa830, 0xa839, + 0xfd3e, 0xfd3f, + 0xfdfd, 0xfdfd, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfeff, 0xfeff, + 0xff01, 0xff20, + 0xff3b, 0xff40, + 0xff5b, 0xff65, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10190, 0x1019b, + 0x101d0, 0x101fc, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d166, + 0x1d16a, 0x1d17a, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d360, 0x1d371, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f100, 0x1f10a, + 0x1f110, 0x1f12e, + 0x1f131, 0x1f131, + 0x1f13d, 0x1f13d, + 0x1f13f, 0x1f13f, + 0x1f142, 0x1f142, + 0x1f146, 0x1f146, + 0x1f14a, 0x1f14e, + 0x1f157, 0x1f157, + 0x1f15f, 0x1f15f, + 0x1f179, 0x1f179, + 0x1f17b, 0x1f17c, + 0x1f17f, 0x1f17f, + 0x1f18a, 0x1f18d, + 0x1f190, 0x1f190, + 0x1f210, 0x1f231, + 0x1f240, 0x1f248, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, +}; /* CR_Common */ + +/* 'Latin': Script */ +static const OnigCodePoint CR_Latin[] = { + 28, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02b8, + 0x02e0, 0x02e4, + 0x1d00, 0x1d25, + 0x1d2c, 0x1d5c, + 0x1d62, 0x1d65, + 0x1d6b, 0x1d77, + 0x1d79, 0x1dbe, + 0x1e00, 0x1eff, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x212a, 0x212b, + 0x2132, 0x2132, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x2c60, 0x2c7f, + 0xa722, 0xa787, + 0xa78b, 0xa78c, + 0xa7fb, 0xa7ff, + 0xfb00, 0xfb06, + 0xff21, 0xff3a, + 0xff41, 0xff5a, +}; /* CR_Latin */ + +/* 'Greek': Script */ +static const OnigCodePoint CR_Greek[] = { + 33, + 0x0370, 0x0373, + 0x0375, 0x0377, + 0x037a, 0x037d, + 0x0384, 0x0384, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03e1, + 0x03f0, 0x03ff, + 0x1d26, 0x1d2a, + 0x1d5d, 0x1d61, + 0x1d66, 0x1d6a, + 0x1dbf, 0x1dbf, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2126, 0x2126, + 0x10140, 0x1018a, + 0x1d200, 0x1d245, +}; /* CR_Greek */ + +/* 'Cyrillic': Script */ +static const OnigCodePoint CR_Cyrillic[] = { + 8, + 0x0400, 0x0484, + 0x0487, 0x0525, + 0x1d2b, 0x1d2b, + 0x1d78, 0x1d78, + 0x2de0, 0x2dff, + 0xa640, 0xa65f, + 0xa662, 0xa673, + 0xa67c, 0xa697, +}; /* CR_Cyrillic */ + +/* 'Armenian': Script */ +static const OnigCodePoint CR_Armenian[] = { + 5, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x058a, 0x058a, + 0xfb13, 0xfb17, +}; /* CR_Armenian */ + +/* 'Hebrew': Script */ +static const OnigCodePoint CR_Hebrew[] = { + 9, + 0x0591, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfb4f, +}; /* CR_Hebrew */ + +/* 'Arabic': Script */ +static const OnigCodePoint CR_Arabic[] = { + 18, + 0x0606, 0x060b, + 0x060d, 0x061a, + 0x061e, 0x061e, + 0x0621, 0x063f, + 0x0641, 0x064a, + 0x0656, 0x065e, + 0x066a, 0x066f, + 0x0671, 0x06dc, + 0x06de, 0x06ff, + 0x0750, 0x077f, + 0xfb50, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfc, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0x10e60, 0x10e7e, +}; /* CR_Arabic */ + +/* 'Syriac': Script */ +static const OnigCodePoint CR_Syriac[] = { + 3, + 0x0700, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, +}; /* CR_Syriac */ + +/* 'Thaana': Script */ +static const OnigCodePoint CR_Thaana[] = { + 1, + 0x0780, 0x07b1, +}; /* CR_Thaana */ + +/* 'Devanagari': Script */ +static const OnigCodePoint CR_Devanagari[] = { + 9, + 0x0900, 0x0939, + 0x093c, 0x094e, + 0x0950, 0x0950, + 0x0953, 0x0955, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0xa8e0, 0xa8fb, +}; /* CR_Devanagari */ + +/* 'Bengali': Script */ +static const OnigCodePoint CR_Bengali[] = { + 14, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fb, +}; /* CR_Bengali */ + +/* 'Gurmukhi': Script */ +static const OnigCodePoint CR_Gurmukhi[] = { + 16, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, +}; /* CR_Gurmukhi */ + +/* 'Gujarati': Script */ +static const OnigCodePoint CR_Gujarati[] = { + 14, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, +}; /* CR_Gujarati */ + +/* 'Oriya': Script */ +static const OnigCodePoint CR_Oriya[] = { + 14, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b71, +}; /* CR_Oriya */ + +/* 'Tamil': Script */ +static const OnigCodePoint CR_Tamil[] = { + 16, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, +}; /* CR_Tamil */ + +/* 'Telugu': Script */ +static const OnigCodePoint CR_Telugu[] = { + 14, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7f, +}; /* CR_Telugu */ + +/* 'Kannada': Script */ +static const OnigCodePoint CR_Kannada[] = { + 13, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, +}; /* CR_Kannada */ + +/* 'Malayalam': Script */ +static const OnigCodePoint CR_Malayalam[] = { + 12, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d75, + 0x0d79, 0x0d7f, +}; /* CR_Malayalam */ + +/* 'Sinhala': Script */ +static const OnigCodePoint CR_Sinhala[] = { + 11, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, +}; /* CR_Sinhala */ + +/* 'Thai': Script */ +static const OnigCodePoint CR_Thai[] = { + 2, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e5b, +}; /* CR_Thai */ + +/* 'Lao': Script */ +static const OnigCodePoint CR_Lao[] = { + 18, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, +}; /* CR_Lao */ + +/* 'Tibetan': Script */ +static const OnigCodePoint CR_Tibetan[] = { + 7, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fce, 0x0fd4, +}; /* CR_Tibetan */ + +/* 'Myanmar': Script */ +static const OnigCodePoint CR_Myanmar[] = { + 2, + 0x1000, 0x109f, + 0xaa60, 0xaa7b, +}; /* CR_Myanmar */ + +/* 'Georgian': Script */ +static const OnigCodePoint CR_Georgian[] = { + 4, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x2d00, 0x2d25, +}; /* CR_Georgian */ + +/* 'Hangul': Script */ +static const OnigCodePoint CR_Hangul[] = { + 13, + 0x1100, 0x11ff, + 0x3131, 0x318e, + 0x3200, 0x321e, + 0x3260, 0x327e, + 0xa960, 0xa97c, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, +}; /* CR_Hangul */ + +/* 'Ethiopic': Script */ +static const OnigCodePoint CR_Ethiopic[] = { + 27, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, +}; /* CR_Ethiopic */ + +/* 'Cherokee': Script */ +static const OnigCodePoint CR_Cherokee[] = { + 1, + 0x13a0, 0x13f4, +}; /* CR_Cherokee */ + +/* 'Canadian_Aboriginal': Script */ +static const OnigCodePoint CR_Canadian_Aboriginal[] = { + 2, + 0x1400, 0x167f, + 0x18b0, 0x18f5, +}; /* CR_Canadian_Aboriginal */ + +/* 'Ogham': Script */ +static const OnigCodePoint CR_Ogham[] = { + 1, + 0x1680, 0x169c, +}; /* CR_Ogham */ + +/* 'Runic': Script */ +static const OnigCodePoint CR_Runic[] = { + 2, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, +}; /* CR_Runic */ + +/* 'Khmer': Script */ +static const OnigCodePoint CR_Khmer[] = { + 4, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x19e0, 0x19ff, +}; /* CR_Khmer */ + +/* 'Mongolian': Script */ +static const OnigCodePoint CR_Mongolian[] = { + 6, + 0x1800, 0x1801, + 0x1804, 0x1804, + 0x1806, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, +}; /* CR_Mongolian */ + +/* 'Hiragana': Script */ +static const OnigCodePoint CR_Hiragana[] = { + 3, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x1f200, 0x1f200, +}; /* CR_Hiragana */ + +/* 'Katakana': Script */ +static const OnigCodePoint CR_Katakana[] = { + 7, + 0x30a1, 0x30fa, + 0x30fd, 0x30ff, + 0x31f0, 0x31ff, + 0x32d0, 0x32fe, + 0x3300, 0x3357, + 0xff66, 0xff6f, + 0xff71, 0xff9d, +}; /* CR_Katakana */ + +/* 'Bopomofo': Script */ +static const OnigCodePoint CR_Bopomofo[] = { + 2, + 0x3105, 0x312d, + 0x31a0, 0x31b7, +}; /* CR_Bopomofo */ + +/* 'Han': Script */ +static const OnigCodePoint CR_Han[] = { + 15, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x3005, 0x3005, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303b, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_Han */ + +/* 'Yi': Script */ +static const OnigCodePoint CR_Yi[] = { + 2, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, +}; /* CR_Yi */ + +/* 'Old_Italic': Script */ +static const OnigCodePoint CR_Old_Italic[] = { + 2, + 0x10300, 0x1031e, + 0x10320, 0x10323, +}; /* CR_Old_Italic */ + +/* 'Gothic': Script */ +static const OnigCodePoint CR_Gothic[] = { + 1, + 0x10330, 0x1034a, +}; /* CR_Gothic */ + +/* 'Deseret': Script */ +static const OnigCodePoint CR_Deseret[] = { + 1, + 0x10400, 0x1044f, +}; /* CR_Deseret */ + +/* 'Inherited': Script */ +static const OnigCodePoint CR_Inherited[] = { + 23, + 0x0300, 0x036f, + 0x0485, 0x0486, + 0x064b, 0x0655, + 0x0670, 0x0670, + 0x0951, 0x0952, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce0, + 0x1ce2, 0x1ce8, + 0x1ced, 0x1ced, + 0x1dc0, 0x1de6, + 0x1dfd, 0x1dff, + 0x200c, 0x200d, + 0x20d0, 0x20f0, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe26, + 0x101fd, 0x101fd, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0xe0100, 0xe01ef, +}; /* CR_Inherited */ + +/* 'Tagalog': Script */ +static const OnigCodePoint CR_Tagalog[] = { + 2, + 0x1700, 0x170c, + 0x170e, 0x1714, +}; /* CR_Tagalog */ + +/* 'Hanunoo': Script */ +static const OnigCodePoint CR_Hanunoo[] = { + 1, + 0x1720, 0x1734, +}; /* CR_Hanunoo */ + +/* 'Buhid': Script */ +static const OnigCodePoint CR_Buhid[] = { + 1, + 0x1740, 0x1753, +}; /* CR_Buhid */ + +/* 'Tagbanwa': Script */ +static const OnigCodePoint CR_Tagbanwa[] = { + 3, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, +}; /* CR_Tagbanwa */ + +/* 'Limbu': Script */ +static const OnigCodePoint CR_Limbu[] = { + 5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x194f, +}; /* CR_Limbu */ + +/* 'Tai_Le': Script */ +static const OnigCodePoint CR_Tai_Le[] = { + 2, + 0x1950, 0x196d, + 0x1970, 0x1974, +}; /* CR_Tai_Le */ + +/* 'Linear_B': Script */ +static const OnigCodePoint CR_Linear_B[] = { + 7, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, +}; /* CR_Linear_B */ + +/* 'Ugaritic': Script */ +static const OnigCodePoint CR_Ugaritic[] = { + 2, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, +}; /* CR_Ugaritic */ + +/* 'Shavian': Script */ +static const OnigCodePoint CR_Shavian[] = { + 1, + 0x10450, 0x1047f, +}; /* CR_Shavian */ + +/* 'Osmanya': Script */ +static const OnigCodePoint CR_Osmanya[] = { + 2, + 0x10480, 0x1049d, + 0x104a0, 0x104a9, +}; /* CR_Osmanya */ + +/* 'Cypriot': Script */ +static const OnigCodePoint CR_Cypriot[] = { + 6, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, +}; /* CR_Cypriot */ + +/* 'Braille': Script */ +static const OnigCodePoint CR_Braille[] = { + 1, + 0x2800, 0x28ff, +}; /* CR_Braille */ + +/* 'Buginese': Script */ +static const OnigCodePoint CR_Buginese[] = { + 2, + 0x1a00, 0x1a1b, + 0x1a1e, 0x1a1f, +}; /* CR_Buginese */ + +/* 'Coptic': Script */ +static const OnigCodePoint CR_Coptic[] = { + 3, + 0x03e2, 0x03ef, + 0x2c80, 0x2cf1, + 0x2cf9, 0x2cff, +}; /* CR_Coptic */ + +/* 'New_Tai_Lue': Script */ +static const OnigCodePoint CR_New_Tai_Lue[] = { + 4, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x19df, +}; /* CR_New_Tai_Lue */ + +/* 'Glagolitic': Script */ +static const OnigCodePoint CR_Glagolitic[] = { + 2, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, +}; /* CR_Glagolitic */ + +/* 'Tifinagh': Script */ +static const OnigCodePoint CR_Tifinagh[] = { + 2, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, +}; /* CR_Tifinagh */ + +/* 'Syloti_Nagri': Script */ +static const OnigCodePoint CR_Syloti_Nagri[] = { + 1, + 0xa800, 0xa82b, +}; /* CR_Syloti_Nagri */ + +/* 'Old_Persian': Script */ +static const OnigCodePoint CR_Old_Persian[] = { + 2, + 0x103a0, 0x103c3, + 0x103c8, 0x103d5, +}; /* CR_Old_Persian */ + +/* 'Kharoshthi': Script */ +static const OnigCodePoint CR_Kharoshthi[] = { + 8, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, +}; /* CR_Kharoshthi */ + +/* 'Balinese': Script */ +static const OnigCodePoint CR_Balinese[] = { + 2, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b7c, +}; /* CR_Balinese */ + +/* 'Cuneiform': Script */ +static const OnigCodePoint CR_Cuneiform[] = { + 3, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x12470, 0x12473, +}; /* CR_Cuneiform */ + +/* 'Phoenician': Script */ +static const OnigCodePoint CR_Phoenician[] = { + 2, + 0x10900, 0x1091b, + 0x1091f, 0x1091f, +}; /* CR_Phoenician */ + +/* 'Phags_Pa': Script */ +static const OnigCodePoint CR_Phags_Pa[] = { + 1, + 0xa840, 0xa877, +}; /* CR_Phags_Pa */ + +/* 'Nko': Script */ +static const OnigCodePoint CR_Nko[] = { + 1, + 0x07c0, 0x07fa, +}; /* CR_Nko */ + +/* 'Sundanese': Script */ +static const OnigCodePoint CR_Sundanese[] = { + 2, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, +}; /* CR_Sundanese */ + +/* 'Lepcha': Script */ +static const OnigCodePoint CR_Lepcha[] = { + 3, + 0x1c00, 0x1c37, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c4f, +}; /* CR_Lepcha */ + +/* 'Ol_Chiki': Script */ +static const OnigCodePoint CR_Ol_Chiki[] = { + 1, + 0x1c50, 0x1c7f, +}; /* CR_Ol_Chiki */ + +/* 'Vai': Script */ +static const OnigCodePoint CR_Vai[] = { + 1, + 0xa500, 0xa62b, +}; /* CR_Vai */ + +/* 'Saurashtra': Script */ +static const OnigCodePoint CR_Saurashtra[] = { + 2, + 0xa880, 0xa8c4, + 0xa8ce, 0xa8d9, +}; /* CR_Saurashtra */ + +/* 'Kayah_Li': Script */ +static const OnigCodePoint CR_Kayah_Li[] = { + 1, + 0xa900, 0xa92f, +}; /* CR_Kayah_Li */ + +/* 'Rejang': Script */ +static const OnigCodePoint CR_Rejang[] = { + 2, + 0xa930, 0xa953, + 0xa95f, 0xa95f, +}; /* CR_Rejang */ + +/* 'Lycian': Script */ +static const OnigCodePoint CR_Lycian[] = { + 1, + 0x10280, 0x1029c, +}; /* CR_Lycian */ + +/* 'Carian': Script */ +static const OnigCodePoint CR_Carian[] = { + 1, + 0x102a0, 0x102d0, +}; /* CR_Carian */ + +/* 'Lydian': Script */ +static const OnigCodePoint CR_Lydian[] = { + 2, + 0x10920, 0x10939, + 0x1093f, 0x1093f, +}; /* CR_Lydian */ + +/* 'Cham': Script */ +static const OnigCodePoint CR_Cham[] = { + 4, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa5f, +}; /* CR_Cham */ + +/* 'Tai_Tham': Script */ +static const OnigCodePoint CR_Tai_Tham[] = { + 5, + 0x1a20, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad, +}; /* CR_Tai_Tham */ + +/* 'Tai_Viet': Script */ +static const OnigCodePoint CR_Tai_Viet[] = { + 2, + 0xaa80, 0xaac2, + 0xaadb, 0xaadf, +}; /* CR_Tai_Viet */ + +/* 'Avestan': Script */ +static const OnigCodePoint CR_Avestan[] = { + 2, + 0x10b00, 0x10b35, + 0x10b39, 0x10b3f, +}; /* CR_Avestan */ + +/* 'Egyptian_Hieroglyphs': Script */ +static const OnigCodePoint CR_Egyptian_Hieroglyphs[] = { + 1, + 0x13000, 0x1342e, +}; /* CR_Egyptian_Hieroglyphs */ + +/* 'Samaritan': Script */ +static const OnigCodePoint CR_Samaritan[] = { + 2, + 0x0800, 0x082d, + 0x0830, 0x083e, +}; /* CR_Samaritan */ + +/* 'Lisu': Script */ +static const OnigCodePoint CR_Lisu[] = { + 1, + 0xa4d0, 0xa4ff, +}; /* CR_Lisu */ + +/* 'Bamum': Script */ +static const OnigCodePoint CR_Bamum[] = { + 1, + 0xa6a0, 0xa6f7, +}; /* CR_Bamum */ + +/* 'Javanese': Script */ +static const OnigCodePoint CR_Javanese[] = { + 3, + 0xa980, 0xa9cd, + 0xa9cf, 0xa9d9, + 0xa9de, 0xa9df, +}; /* CR_Javanese */ + +/* 'Meetei_Mayek': Script */ +static const OnigCodePoint CR_Meetei_Mayek[] = { + 2, + 0xabc0, 0xabed, + 0xabf0, 0xabf9, +}; /* CR_Meetei_Mayek */ + +/* 'Imperial_Aramaic': Script */ +static const OnigCodePoint CR_Imperial_Aramaic[] = { + 2, + 0x10840, 0x10855, + 0x10857, 0x1085f, +}; /* CR_Imperial_Aramaic */ + +/* 'Old_South_Arabian': Script */ +static const OnigCodePoint CR_Old_South_Arabian[] = { + 1, + 0x10a60, 0x10a7f, +}; /* CR_Old_South_Arabian */ + +/* 'Inscriptional_Parthian': Script */ +static const OnigCodePoint CR_Inscriptional_Parthian[] = { + 2, + 0x10b40, 0x10b55, + 0x10b58, 0x10b5f, +}; /* CR_Inscriptional_Parthian */ + +/* 'Inscriptional_Pahlavi': Script */ +static const OnigCodePoint CR_Inscriptional_Pahlavi[] = { + 2, + 0x10b60, 0x10b72, + 0x10b78, 0x10b7f, +}; /* CR_Inscriptional_Pahlavi */ + +/* 'Old_Turkic': Script */ +static const OnigCodePoint CR_Old_Turkic[] = { + 1, + 0x10c00, 0x10c48, +}; /* CR_Old_Turkic */ + +/* 'Kaithi': Script */ +static const OnigCodePoint CR_Kaithi[] = { + 1, + 0x11080, 0x110c1, +}; /* CR_Kaithi */ + +/* 'White_Space': Binary Property */ +static const OnigCodePoint CR_White_Space[] = { + 11, + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000, +}; /* CR_White_Space */ + +/* 'Bidi_Control': Binary Property */ +static const OnigCodePoint CR_Bidi_Control[] = { + 2, + 0x200e, 0x200f, + 0x202a, 0x202e, +}; /* CR_Bidi_Control */ + +/* 'Join_Control': Binary Property */ +static const OnigCodePoint CR_Join_Control[] = { + 1, + 0x200c, 0x200d, +}; /* CR_Join_Control */ + +/* 'Dash': Binary Property */ +static const OnigCodePoint CR_Dash[] = { + 19, + 0x002d, 0x002d, + 0x058a, 0x058a, + 0x05be, 0x05be, + 0x1400, 0x1400, + 0x1806, 0x1806, + 0x2010, 0x2015, + 0x2053, 0x2053, + 0x207b, 0x207b, + 0x208b, 0x208b, + 0x2212, 0x2212, + 0x2e17, 0x2e17, + 0x2e1a, 0x2e1a, + 0x301c, 0x301c, + 0x3030, 0x3030, + 0x30a0, 0x30a0, + 0xfe31, 0xfe32, + 0xfe58, 0xfe58, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d, +}; /* CR_Dash */ + +/* 'Hyphen': Binary Property */ +static const OnigCodePoint CR_Hyphen[] = { + 10, + 0x002d, 0x002d, + 0x00ad, 0x00ad, + 0x058a, 0x058a, + 0x1806, 0x1806, + 0x2010, 0x2011, + 0x2e17, 0x2e17, + 0x30fb, 0x30fb, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d, + 0xff65, 0xff65, +}; /* CR_Hyphen */ + +/* 'Quotation_Mark': Binary Property */ +static const OnigCodePoint CR_Quotation_Mark[] = { + 12, + 0x0022, 0x0022, + 0x0027, 0x0027, + 0x00ab, 0x00ab, + 0x00bb, 0x00bb, + 0x2018, 0x201f, + 0x2039, 0x203a, + 0x300c, 0x300f, + 0x301d, 0x301f, + 0xfe41, 0xfe44, + 0xff02, 0xff02, + 0xff07, 0xff07, + 0xff62, 0xff63, +}; /* CR_Quotation_Mark */ + +/* 'Terminal_Punctuation': Binary Property */ +static const OnigCodePoint CR_Terminal_Punctuation[] = { + 65, + 0x0021, 0x0021, + 0x002c, 0x002c, + 0x002e, 0x002e, + 0x003a, 0x003b, + 0x003f, 0x003f, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x0589, 0x0589, + 0x05c3, 0x05c3, + 0x060c, 0x060c, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x06d4, 0x06d4, + 0x0700, 0x070a, + 0x070c, 0x070c, + 0x07f8, 0x07f9, + 0x0830, 0x083e, + 0x0964, 0x0965, + 0x0e5a, 0x0e5b, + 0x0f08, 0x0f08, + 0x0f0d, 0x0f12, + 0x104a, 0x104b, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x16eb, 0x16ed, + 0x17d4, 0x17d6, + 0x17da, 0x17da, + 0x1802, 0x1805, + 0x1808, 0x1809, + 0x1944, 0x1945, + 0x1aa8, 0x1aab, + 0x1b5a, 0x1b5b, + 0x1b5d, 0x1b5f, + 0x1c3b, 0x1c3f, + 0x1c7e, 0x1c7f, + 0x203c, 0x203d, + 0x2047, 0x2049, + 0x2e2e, 0x2e2e, + 0x3001, 0x3002, + 0xa4fe, 0xa4ff, + 0xa60d, 0xa60f, + 0xa6f3, 0xa6f7, + 0xa876, 0xa877, + 0xa8ce, 0xa8cf, + 0xa92f, 0xa92f, + 0xa9c7, 0xa9c9, + 0xaa5d, 0xaa5f, + 0xaadf, 0xaadf, + 0xabeb, 0xabeb, + 0xfe50, 0xfe52, + 0xfe54, 0xfe57, + 0xff01, 0xff01, + 0xff0c, 0xff0c, + 0xff0e, 0xff0e, + 0xff1a, 0xff1b, + 0xff1f, 0xff1f, + 0xff61, 0xff61, + 0xff64, 0xff64, + 0x1039f, 0x1039f, + 0x103d0, 0x103d0, + 0x10857, 0x10857, + 0x1091f, 0x1091f, + 0x10b3a, 0x10b3f, + 0x110be, 0x110c1, + 0x12470, 0x12473, +}; /* CR_Terminal_Punctuation */ + +/* 'Other_Math': Binary Property */ +static const OnigCodePoint CR_Other_Math[] = { + 99, + 0x005e, 0x005e, + 0x03d0, 0x03d2, + 0x03d5, 0x03d5, + 0x03f0, 0x03f1, + 0x03f4, 0x03f5, + 0x2016, 0x2016, + 0x2032, 0x2034, + 0x2040, 0x2040, + 0x2061, 0x2064, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20e6, + 0x20eb, 0x20ef, + 0x2102, 0x2102, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2128, 0x2129, + 0x212c, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2138, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2195, 0x2199, + 0x219c, 0x219f, + 0x21a1, 0x21a2, + 0x21a4, 0x21a5, + 0x21a7, 0x21a7, + 0x21a9, 0x21ad, + 0x21b0, 0x21b1, + 0x21b6, 0x21b7, + 0x21bc, 0x21cd, + 0x21d0, 0x21d1, + 0x21d3, 0x21d3, + 0x21d5, 0x21db, + 0x21dd, 0x21dd, + 0x21e4, 0x21e5, + 0x23b4, 0x23b5, + 0x23b7, 0x23b7, + 0x23d0, 0x23d0, + 0x23e2, 0x23e2, + 0x25a0, 0x25a1, + 0x25ae, 0x25b6, + 0x25bc, 0x25c0, + 0x25c6, 0x25c7, + 0x25ca, 0x25cb, + 0x25cf, 0x25d3, + 0x25e2, 0x25e2, + 0x25e4, 0x25e4, + 0x25e7, 0x25ec, + 0x2605, 0x2606, + 0x2640, 0x2640, + 0x2642, 0x2642, + 0x2660, 0x2663, + 0x266d, 0x266e, + 0x27c5, 0x27c6, + 0x27e6, 0x27ef, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0xfe61, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xff3c, 0xff3c, + 0xff3e, 0xff3e, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1d7ce, 0x1d7ff, +}; /* CR_Other_Math */ + +/* 'Hex_Digit': Binary Property */ +static const OnigCodePoint CR_Hex_Digit[] = { + 6, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066, + 0xff10, 0xff19, + 0xff21, 0xff26, + 0xff41, 0xff46, +}; /* CR_Hex_Digit */ + +/* 'ASCII_Hex_Digit': Binary Property */ +static const OnigCodePoint CR_ASCII_Hex_Digit[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066, +}; /* CR_ASCII_Hex_Digit */ + +/* 'Other_Alphabetic': Binary Property */ +static const OnigCodePoint CR_Other_Alphabetic[] = { + 141, + 0x0345, 0x0345, + 0x05b0, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x061a, + 0x064b, 0x0657, + 0x0659, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06e1, 0x06e4, + 0x06e7, 0x06e8, + 0x06ed, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x073f, + 0x07a6, 0x07b0, + 0x0816, 0x0817, + 0x081b, 0x0823, + 0x0825, 0x0827, + 0x0829, 0x082c, + 0x0900, 0x0903, + 0x093e, 0x094c, + 0x094e, 0x094e, + 0x0955, 0x0955, + 0x0962, 0x0963, + 0x0981, 0x0983, + 0x09be, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09d7, 0x09d7, + 0x09e2, 0x09e3, + 0x0a01, 0x0a03, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4c, + 0x0a51, 0x0a51, + 0x0a70, 0x0a71, + 0x0a75, 0x0a75, + 0x0a81, 0x0a83, + 0x0abe, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acc, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b3e, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b56, 0x0b57, + 0x0b62, 0x0b63, + 0x0b82, 0x0b82, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4c, + 0x0c55, 0x0c56, + 0x0c62, 0x0c63, + 0x0c82, 0x0c83, + 0x0cbe, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccc, + 0x0cd5, 0x0cd6, + 0x0ce2, 0x0ce3, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d62, 0x0d63, + 0x0d82, 0x0d83, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e4d, 0x0e4d, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ecd, 0x0ecd, + 0x0f71, 0x0f81, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x102b, 0x1036, + 0x1038, 0x1038, + 0x103b, 0x103e, + 0x1056, 0x1059, + 0x105e, 0x1060, + 0x1062, 0x1062, + 0x1067, 0x1068, + 0x1071, 0x1074, + 0x1082, 0x1086, + 0x109c, 0x109d, + 0x135f, 0x135f, + 0x1712, 0x1713, + 0x1732, 0x1733, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b6, 0x17c8, + 0x18a9, 0x18a9, + 0x1920, 0x192b, + 0x1930, 0x1938, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a17, 0x1a1b, + 0x1a55, 0x1a5e, + 0x1a61, 0x1a74, + 0x1b00, 0x1b04, + 0x1b35, 0x1b43, + 0x1b80, 0x1b82, + 0x1ba1, 0x1ba9, + 0x1c24, 0x1c35, + 0x1cf2, 0x1cf2, + 0x24b6, 0x24e9, + 0x2de0, 0x2dff, + 0xa823, 0xa827, + 0xa880, 0xa881, + 0xa8b4, 0xa8c3, + 0xa926, 0xa92a, + 0xa947, 0xa952, + 0xa980, 0xa983, + 0xa9b3, 0xa9bf, + 0xaa29, 0xaa36, + 0xaa43, 0xaa43, + 0xaa4c, 0xaa4d, + 0xaab0, 0xaab0, + 0xaab2, 0xaab4, + 0xaab7, 0xaab8, + 0xaabe, 0xaabe, + 0xabe3, 0xabea, + 0xfb1e, 0xfb1e, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x11082, 0x11082, + 0x110b0, 0x110b8, +}; /* CR_Other_Alphabetic */ + +/* 'Ideographic': Binary Property */ +static const OnigCodePoint CR_Ideographic[] = { + 11, + 0x3006, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_Ideographic */ + +/* 'Diacritic': Binary Property */ +static const OnigCodePoint CR_Diacritic[] = { + 117, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x00a8, 0x00a8, + 0x00af, 0x00af, + 0x00b4, 0x00b4, + 0x00b7, 0x00b8, + 0x02b0, 0x034e, + 0x0350, 0x0357, + 0x035d, 0x0362, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x0384, 0x0385, + 0x0483, 0x0487, + 0x0559, 0x0559, + 0x0591, 0x05a1, + 0x05a3, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x064b, 0x0652, + 0x0657, 0x0658, + 0x06df, 0x06e0, + 0x06e5, 0x06e6, + 0x06ea, 0x06ec, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x07eb, 0x07f5, + 0x0818, 0x0819, + 0x093c, 0x093c, + 0x094d, 0x094d, + 0x0951, 0x0954, + 0x0971, 0x0971, + 0x09bc, 0x09bc, + 0x09cd, 0x09cd, + 0x0a3c, 0x0a3c, + 0x0a4d, 0x0a4d, + 0x0abc, 0x0abc, + 0x0acd, 0x0acd, + 0x0b3c, 0x0b3c, + 0x0b4d, 0x0b4d, + 0x0bcd, 0x0bcd, + 0x0c4d, 0x0c4d, + 0x0cbc, 0x0cbc, + 0x0ccd, 0x0ccd, + 0x0d4d, 0x0d4d, + 0x0dca, 0x0dca, + 0x0e47, 0x0e4c, + 0x0e4e, 0x0e4e, + 0x0ec8, 0x0ecc, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f3f, + 0x0f82, 0x0f84, + 0x0f86, 0x0f87, + 0x0fc6, 0x0fc6, + 0x1037, 0x1037, + 0x1039, 0x103a, + 0x1087, 0x108d, + 0x108f, 0x108f, + 0x109a, 0x109b, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x1939, 0x193b, + 0x1a75, 0x1a7c, + 0x1a7f, 0x1a7f, + 0x1b34, 0x1b34, + 0x1b44, 0x1b44, + 0x1b6b, 0x1b73, + 0x1baa, 0x1baa, + 0x1c36, 0x1c37, + 0x1c78, 0x1c7d, + 0x1cd0, 0x1ce8, + 0x1ced, 0x1ced, + 0x1d2c, 0x1d6a, + 0x1dc4, 0x1dcf, + 0x1dfd, 0x1dff, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x2cef, 0x2cf1, + 0x2e2f, 0x2e2f, + 0x302a, 0x302f, + 0x3099, 0x309c, + 0x30fc, 0x30fc, + 0xa66f, 0xa66f, + 0xa67c, 0xa67d, + 0xa67f, 0xa67f, + 0xa6f0, 0xa6f1, + 0xa717, 0xa721, + 0xa788, 0xa788, + 0xa8c4, 0xa8c4, + 0xa8e0, 0xa8f1, + 0xa92b, 0xa92e, + 0xa953, 0xa953, + 0xa9b3, 0xa9b3, + 0xa9c0, 0xa9c0, + 0xaa7b, 0xaa7b, + 0xaabf, 0xaac2, + 0xabec, 0xabed, + 0xfb1e, 0xfb1e, + 0xfe20, 0xfe26, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffe3, 0xffe3, + 0x110b9, 0x110ba, + 0x1d167, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, +}; /* CR_Diacritic */ + +/* 'Extender': Binary Property */ +static const OnigCodePoint CR_Extender[] = { + 20, + 0x00b7, 0x00b7, + 0x02d0, 0x02d1, + 0x0640, 0x0640, + 0x07fa, 0x07fa, + 0x0e46, 0x0e46, + 0x0ec6, 0x0ec6, + 0x1843, 0x1843, + 0x1aa7, 0x1aa7, + 0x1c36, 0x1c36, + 0x1c7b, 0x1c7b, + 0x3005, 0x3005, + 0x3031, 0x3035, + 0x309d, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xa60c, 0xa60c, + 0xa9cf, 0xa9cf, + 0xaa70, 0xaa70, + 0xaadd, 0xaadd, + 0xff70, 0xff70, +}; /* CR_Extender */ + +/* 'Other_Lowercase': Binary Property */ +static const OnigCodePoint CR_Other_Lowercase[] = { + 13, + 0x02b0, 0x02b8, + 0x02c0, 0x02c1, + 0x02e0, 0x02e4, + 0x0345, 0x0345, + 0x037a, 0x037a, + 0x1d2c, 0x1d61, + 0x1d78, 0x1d78, + 0x1d9b, 0x1dbf, + 0x2090, 0x2094, + 0x2170, 0x217f, + 0x24d0, 0x24e9, + 0x2c7d, 0x2c7d, + 0xa770, 0xa770, +}; /* CR_Other_Lowercase */ + +/* 'Other_Uppercase': Binary Property */ +static const OnigCodePoint CR_Other_Uppercase[] = { + 2, + 0x2160, 0x216f, + 0x24b6, 0x24cf, +}; /* CR_Other_Uppercase */ + +/* 'Noncharacter_Code_Point': Binary Property */ +static const OnigCodePoint CR_Noncharacter_Code_Point[] = { + 18, + 0xfdd0, 0xfdef, + 0xfffe, 0xffff, + 0x1fffe, 0x1ffff, + 0x2fffe, 0x2ffff, + 0x3fffe, 0x3ffff, + 0x4fffe, 0x4ffff, + 0x5fffe, 0x5ffff, + 0x6fffe, 0x6ffff, + 0x7fffe, 0x7ffff, + 0x8fffe, 0x8ffff, + 0x9fffe, 0x9ffff, + 0xafffe, 0xaffff, + 0xbfffe, 0xbffff, + 0xcfffe, 0xcffff, + 0xdfffe, 0xdffff, + 0xefffe, 0xeffff, + 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff, +}; /* CR_Noncharacter_Code_Point */ + +/* 'Other_Grapheme_Extend': Binary Property */ +static const OnigCodePoint CR_Other_Grapheme_Extend[] = { + 16, + 0x09be, 0x09be, + 0x09d7, 0x09d7, + 0x0b3e, 0x0b3e, + 0x0b57, 0x0b57, + 0x0bbe, 0x0bbe, + 0x0bd7, 0x0bd7, + 0x0cc2, 0x0cc2, + 0x0cd5, 0x0cd6, + 0x0d3e, 0x0d3e, + 0x0d57, 0x0d57, + 0x0dcf, 0x0dcf, + 0x0ddf, 0x0ddf, + 0x200c, 0x200d, + 0xff9e, 0xff9f, + 0x1d165, 0x1d165, + 0x1d16e, 0x1d172, +}; /* CR_Other_Grapheme_Extend */ + +/* 'IDS_Binary_Operator': Binary Property */ +static const OnigCodePoint CR_IDS_Binary_Operator[] = { + 2, + 0x2ff0, 0x2ff1, + 0x2ff4, 0x2ffb, +}; /* CR_IDS_Binary_Operator */ + +/* 'IDS_Trinary_Operator': Binary Property */ +static const OnigCodePoint CR_IDS_Trinary_Operator[] = { + 1, + 0x2ff2, 0x2ff3, +}; /* CR_IDS_Trinary_Operator */ + +/* 'Radical': Binary Property */ +static const OnigCodePoint CR_Radical[] = { + 3, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, +}; /* CR_Radical */ + +/* 'Unified_Ideograph': Binary Property */ +static const OnigCodePoint CR_Unified_Ideograph[] = { + 11, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xfa0e, 0xfa0f, + 0xfa11, 0xfa11, + 0xfa13, 0xfa14, + 0xfa1f, 0xfa1f, + 0xfa21, 0xfa21, + 0xfa23, 0xfa24, + 0xfa27, 0xfa29, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, +}; /* CR_Unified_Ideograph */ + +/* 'Other_Default_Ignorable_Code_Point': Binary Property */ +static const OnigCodePoint CR_Other_Default_Ignorable_Code_Point[] = { + 10, + 0x034f, 0x034f, + 0x115f, 0x1160, + 0x2065, 0x2069, + 0x3164, 0x3164, + 0xffa0, 0xffa0, + 0xfff0, 0xfff8, + 0xe0000, 0xe0000, + 0xe0002, 0xe001f, + 0xe0080, 0xe00ff, + 0xe01f0, 0xe0fff, +}; /* CR_Other_Default_Ignorable_Code_Point */ + +/* 'Deprecated': Binary Property */ +static const OnigCodePoint CR_Deprecated[] = { + 8, + 0x0149, 0x0149, + 0x0f77, 0x0f77, + 0x0f79, 0x0f79, + 0x17a3, 0x17a4, + 0x206a, 0x206f, + 0x2329, 0x232a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, +}; /* CR_Deprecated */ + +/* 'Soft_Dotted': Binary Property */ +static const OnigCodePoint CR_Soft_Dotted[] = { + 31, + 0x0069, 0x006a, + 0x012f, 0x012f, + 0x0249, 0x0249, + 0x0268, 0x0268, + 0x029d, 0x029d, + 0x02b2, 0x02b2, + 0x03f3, 0x03f3, + 0x0456, 0x0456, + 0x0458, 0x0458, + 0x1d62, 0x1d62, + 0x1d96, 0x1d96, + 0x1da4, 0x1da4, + 0x1da8, 0x1da8, + 0x1e2d, 0x1e2d, + 0x1ecb, 0x1ecb, + 0x2071, 0x2071, + 0x2148, 0x2149, + 0x2c7c, 0x2c7c, + 0x1d422, 0x1d423, + 0x1d456, 0x1d457, + 0x1d48a, 0x1d48b, + 0x1d4be, 0x1d4bf, + 0x1d4f2, 0x1d4f3, + 0x1d526, 0x1d527, + 0x1d55a, 0x1d55b, + 0x1d58e, 0x1d58f, + 0x1d5c2, 0x1d5c3, + 0x1d5f6, 0x1d5f7, + 0x1d62a, 0x1d62b, + 0x1d65e, 0x1d65f, + 0x1d692, 0x1d693, +}; /* CR_Soft_Dotted */ + +/* 'Logical_Order_Exception': Binary Property */ +static const OnigCodePoint CR_Logical_Order_Exception[] = { + 5, + 0x0e40, 0x0e44, + 0x0ec0, 0x0ec4, + 0xaab5, 0xaab6, + 0xaab9, 0xaab9, + 0xaabb, 0xaabc, +}; /* CR_Logical_Order_Exception */ + +/* 'Other_ID_Start': Binary Property */ +static const OnigCodePoint CR_Other_ID_Start[] = { + 3, + 0x2118, 0x2118, + 0x212e, 0x212e, + 0x309b, 0x309c, +}; /* CR_Other_ID_Start */ + +/* 'Other_ID_Continue': Binary Property */ +static const OnigCodePoint CR_Other_ID_Continue[] = { + 3, + 0x00b7, 0x00b7, + 0x0387, 0x0387, + 0x1369, 0x1371, +}; /* CR_Other_ID_Continue */ + +/* 'STerm': Binary Property */ +static const OnigCodePoint CR_STerm[] = { + 43, + 0x0021, 0x0021, + 0x002e, 0x002e, + 0x003f, 0x003f, + 0x055c, 0x055c, + 0x055e, 0x055e, + 0x0589, 0x0589, + 0x061f, 0x061f, + 0x06d4, 0x06d4, + 0x0700, 0x0702, + 0x07f9, 0x07f9, + 0x0964, 0x0965, + 0x104a, 0x104b, + 0x1362, 0x1362, + 0x1367, 0x1368, + 0x166e, 0x166e, + 0x1803, 0x1803, + 0x1809, 0x1809, + 0x1944, 0x1945, + 0x1b5a, 0x1b5b, + 0x1b5e, 0x1b5f, + 0x1c3b, 0x1c3c, + 0x1c7e, 0x1c7f, + 0x203c, 0x203d, + 0x2047, 0x2049, + 0x2e2e, 0x2e2e, + 0x3002, 0x3002, + 0xa4ff, 0xa4ff, + 0xa60e, 0xa60f, + 0xa6f3, 0xa6f3, + 0xa6f7, 0xa6f7, + 0xa876, 0xa877, + 0xa8ce, 0xa8cf, + 0xa92f, 0xa92f, + 0xa9c8, 0xa9c9, + 0xaa5d, 0xaa5f, + 0xabeb, 0xabeb, + 0xfe52, 0xfe52, + 0xfe56, 0xfe57, + 0xff01, 0xff01, + 0xff0e, 0xff0e, + 0xff1f, 0xff1f, + 0xff61, 0xff61, + 0x110be, 0x110c1, +}; /* CR_STerm */ + +/* 'Variation_Selector': Binary Property */ +static const OnigCodePoint CR_Variation_Selector[] = { + 3, + 0x180b, 0x180d, + 0xfe00, 0xfe0f, + 0xe0100, 0xe01ef, +}; /* CR_Variation_Selector */ + +/* 'Pattern_White_Space': Binary Property */ +static const OnigCodePoint CR_Pattern_White_Space[] = { + 5, + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x200e, 0x200f, + 0x2028, 0x2029, +}; /* CR_Pattern_White_Space */ + +/* 'Pattern_Syntax': Binary Property */ +static const OnigCodePoint CR_Pattern_Syntax[] = { + 28, + 0x0021, 0x002f, + 0x003a, 0x0040, + 0x005b, 0x005e, + 0x0060, 0x0060, + 0x007b, 0x007e, + 0x00a1, 0x00a7, + 0x00a9, 0x00a9, + 0x00ab, 0x00ac, + 0x00ae, 0x00ae, + 0x00b0, 0x00b1, + 0x00b6, 0x00b6, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x2010, 0x2027, + 0x2030, 0x203e, + 0x2041, 0x2053, + 0x2055, 0x205e, + 0x2190, 0x245f, + 0x2500, 0x2775, + 0x2794, 0x2bff, + 0x2e00, 0x2e7f, + 0x3001, 0x3003, + 0x3008, 0x3020, + 0x3030, 0x3030, + 0xfd3e, 0xfd3f, + 0xfe45, 0xfe46, +}; /* CR_Pattern_Syntax */ +#endif /* USE_UNICODE_PROPERTIES */ +#endif /* USE_UNICODE_PROPERTIES */ + +/* 'NEWLINE': [[:NEWLINE:]] */ +static const OnigCodePoint CR_NEWLINE[] = { + 1, + 0x000a, 0x000a, +}; /* CR_NEWLINE */ + +/* 'Alpha': [[:Alpha:]] */ +#define CR_Alpha CR_Alphabetic + +/* 'Blank': [[:Blank:]] */ +static const OnigCodePoint CR_Blank[] = { + 9, + 0x0009, 0x0009, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000, +}; /* CR_Blank */ + +/* 'Cntrl': [[:Cntrl:]] */ +#define CR_Cntrl CR_Cc + +/* 'Digit': [[:Digit:]] */ +#define CR_Digit CR_Nd + +/* 'Graph': [[:Graph:]] */ +static const OnigCodePoint CR_Graph[] = { + 490, + 0x0021, 0x007e, + 0x00a1, 0x0377, + 0x037a, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x0606, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07fa, + 0x0800, 0x082d, + 0x0830, 0x083e, + 0x0900, 0x0939, + 0x093c, 0x094e, + 0x0950, 0x0955, + 0x0958, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fb, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, + 0x0cf1, 0x0cf2, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d75, + 0x0d79, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fce, 0x0fd8, + 0x1000, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1400, 0x167f, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b7c, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c37, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c7f, + 0x1cd0, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfd, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x205e, + 0x2060, 0x2064, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b8, + 0x20d0, 0x20f0, + 0x2100, 0x2189, + 0x2190, 0x23e8, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x26cd, + 0x26cf, 0x26e1, + 0x26e3, 0x26e3, + 0x26e8, 0x26ff, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x2b4c, + 0x2b50, 0x2b59, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2cf1, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2e31, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31e3, + 0x31f0, 0x321e, + 0x3220, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa4d0, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa673, + 0xa67c, 0xa697, + 0xa6a0, 0xa6f7, + 0xa700, 0xa78c, + 0xa7fb, 0xa82b, + 0xa830, 0xa839, + 0xa840, 0xa877, + 0xa880, 0xa8c4, + 0xa8ce, 0xa8d9, + 0xa8e0, 0xa8fb, + 0xa900, 0xa953, + 0xa95f, 0xa97c, + 0xa980, 0xa9cd, + 0xa9cf, 0xa9d9, + 0xa9de, 0xa9df, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadf, + 0xabc0, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe26, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10190, 0x1019b, + 0x101d0, 0x101fd, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10857, 0x1085f, + 0x10900, 0x1091b, + 0x1091f, 0x10939, + 0x1093f, 0x1093f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x10a60, 0x10a7f, + 0x10b00, 0x10b35, + 0x10b39, 0x10b55, + 0x10b58, 0x10b72, + 0x10b78, 0x10b7f, + 0x10c00, 0x10c48, + 0x10e60, 0x10e7e, + 0x11080, 0x110c1, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x12470, 0x12473, + 0x13000, 0x1342e, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d360, 0x1d371, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f100, 0x1f10a, + 0x1f110, 0x1f12e, + 0x1f131, 0x1f131, + 0x1f13d, 0x1f13d, + 0x1f13f, 0x1f13f, + 0x1f142, 0x1f142, + 0x1f146, 0x1f146, + 0x1f14a, 0x1f14e, + 0x1f157, 0x1f157, + 0x1f15f, 0x1f15f, + 0x1f179, 0x1f179, + 0x1f17b, 0x1f17c, + 0x1f17f, 0x1f17f, + 0x1f18a, 0x1f18d, + 0x1f190, 0x1f190, + 0x1f200, 0x1f200, + 0x1f210, 0x1f231, + 0x1f240, 0x1f248, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, +}; /* CR_Graph */ + +/* 'Lower': [[:Lower:]] */ +#define CR_Lower CR_Lowercase + +/* 'Print': [[:Print:]] */ +static const OnigCodePoint CR_Print[] = { + 487, + 0x0020, 0x007e, + 0x00a0, 0x0377, + 0x037a, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x0606, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07fa, + 0x0800, 0x082d, + 0x0830, 0x083e, + 0x0900, 0x0939, + 0x093c, 0x094e, + 0x0950, 0x0955, + 0x0958, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fb, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, + 0x0cf1, 0x0cf2, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d75, + 0x0d79, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fce, 0x0fd8, + 0x1000, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1400, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b7c, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c37, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c7f, + 0x1cd0, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfd, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2027, + 0x202a, 0x2064, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b8, + 0x20d0, 0x20f0, + 0x2100, 0x2189, + 0x2190, 0x23e8, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x26cd, + 0x26cf, 0x26e1, + 0x26e3, 0x26e3, + 0x26e8, 0x26ff, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27ca, + 0x27cc, 0x27cc, + 0x27d0, 0x2b4c, + 0x2b50, 0x2b59, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2cf1, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2e31, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31e3, + 0x31f0, 0x321e, + 0x3220, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa4d0, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa673, + 0xa67c, 0xa697, + 0xa6a0, 0xa6f7, + 0xa700, 0xa78c, + 0xa7fb, 0xa82b, + 0xa830, 0xa839, + 0xa840, 0xa877, + 0xa880, 0xa8c4, + 0xa8ce, 0xa8d9, + 0xa8e0, 0xa8fb, + 0xa900, 0xa953, + 0xa95f, 0xa97c, + 0xa980, 0xa9cd, + 0xa9cf, 0xa9d9, + 0xa9de, 0xa9df, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadf, + 0xabc0, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe26, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10190, 0x1019b, + 0x101d0, 0x101fd, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10857, 0x1085f, + 0x10900, 0x1091b, + 0x1091f, 0x10939, + 0x1093f, 0x1093f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x10a60, 0x10a7f, + 0x10b00, 0x10b35, + 0x10b39, 0x10b55, + 0x10b58, 0x10b72, + 0x10b78, 0x10b7f, + 0x10c00, 0x10c48, + 0x10e60, 0x10e7e, + 0x11080, 0x110c1, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x12470, 0x12473, + 0x13000, 0x1342e, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d360, 0x1d371, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f100, 0x1f10a, + 0x1f110, 0x1f12e, + 0x1f131, 0x1f131, + 0x1f13d, 0x1f13d, + 0x1f13f, 0x1f13f, + 0x1f142, 0x1f142, + 0x1f146, 0x1f146, + 0x1f14a, 0x1f14e, + 0x1f157, 0x1f157, + 0x1f15f, 0x1f15f, + 0x1f179, 0x1f179, + 0x1f17b, 0x1f17c, + 0x1f17f, 0x1f17f, + 0x1f18a, 0x1f18d, + 0x1f190, 0x1f190, + 0x1f200, 0x1f200, + 0x1f210, 0x1f231, + 0x1f240, 0x1f248, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, +}; /* CR_Print */ + +/* 'Punct': [[:Punct:]] */ +#define CR_Punct CR_P + +/* 'Space': [[:Space:]] */ +#define CR_Space CR_White_Space + +/* 'Upper': [[:Upper:]] */ +#define CR_Upper CR_Uppercase + +/* 'XDigit': [[:XDigit:]] */ +#define CR_XDigit CR_ASCII_Hex_Digit + +/* 'Word': [[:Word:]] */ +static const OnigCodePoint CR_Word[] = { + 506, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0300, 0x0374, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x061a, + 0x0621, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x082d, + 0x0900, 0x0939, + 0x093c, 0x094e, + 0x0950, 0x0955, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d6f, + 0x0d7a, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1049, + 0x1050, 0x109d, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x1a00, 0x1a1b, + 0x1a20, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa7, 0x1aa7, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b59, + 0x1b6b, 0x1b73, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c37, + 0x1c40, 0x1c49, + 0x1c4d, 0x1c7d, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfd, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20f0, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x24b6, 0x24e9, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cf1, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2dff, + 0x2e2f, 0x2e2f, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa672, + 0xa67c, 0xa67d, + 0xa67f, 0xa697, + 0xa6a0, 0xa6f1, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa827, + 0xa840, 0xa873, + 0xa880, 0xa8c4, + 0xa8d0, 0xa8d9, + 0xa8e0, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa900, 0xa92d, + 0xa930, 0xa953, + 0xa960, 0xa97c, + 0xa980, 0xa9c0, + 0xa9cf, 0xa9d9, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabea, + 0xabec, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe26, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10140, 0x10174, + 0x101fd, 0x101fd, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11080, 0x110ba, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x13000, 0x1342e, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef, +}; /* CR_Word */ + +/* 'Alnum': [[:Alnum:]] */ +static const OnigCodePoint CR_Alnum[] = { + 497, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ec, 0x02ec, + 0x02ee, 0x02ee, + 0x0345, 0x0345, + 0x0370, 0x0374, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x0525, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05b0, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x061a, + 0x0621, 0x0657, + 0x0659, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06e1, 0x06e8, + 0x06ed, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x073f, + 0x074d, 0x07b1, + 0x07c0, 0x07ea, + 0x07f4, 0x07f5, + 0x07fa, 0x07fa, + 0x0800, 0x0817, + 0x081a, 0x082c, + 0x0900, 0x0939, + 0x093d, 0x094c, + 0x094e, 0x094e, + 0x0950, 0x0950, + 0x0955, 0x0955, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0971, 0x0972, + 0x0979, 0x097f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09ce, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4c, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acc, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b63, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3d, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4c, + 0x0c55, 0x0c56, + 0x0c58, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccc, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce3, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3d, 0x0d44, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d6f, + 0x0d7a, 0x0d7f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e46, + 0x0e4d, 0x0e4d, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ecd, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f20, 0x0f29, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f71, 0x0f81, + 0x0f88, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x1000, 0x1036, + 0x1038, 0x1038, + 0x103b, 0x1049, + 0x1050, 0x1062, + 0x1065, 0x1068, + 0x106e, 0x1086, + 0x108e, 0x108e, + 0x1090, 0x1099, + 0x109c, 0x109d, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1713, + 0x1720, 0x1733, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17c8, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x1938, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x1a00, 0x1a1b, + 0x1a20, 0x1a5e, + 0x1a61, 0x1a74, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa7, 0x1aa7, + 0x1b00, 0x1b33, + 0x1b35, 0x1b43, + 0x1b45, 0x1b4b, + 0x1b50, 0x1b59, + 0x1b80, 0x1ba9, + 0x1bae, 0x1bb9, + 0x1c00, 0x1c35, + 0x1c40, 0x1c49, + 0x1c4d, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf2, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x24b6, 0x24e9, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2de0, 0x2dff, + 0x2e2f, 0x2e2f, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa62b, + 0xa640, 0xa65f, + 0xa662, 0xa66e, + 0xa67f, 0xa697, + 0xa6a0, 0xa6ef, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78c, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa827, + 0xa840, 0xa873, + 0xa880, 0xa8c3, + 0xa8d0, 0xa8d9, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa900, 0xa92a, + 0xa930, 0xa952, + 0xa960, 0xa97c, + 0xa980, 0xa9bf, + 0xa9cf, 0xa9d9, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa80, 0xaabe, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xabc0, 0xabea, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10140, 0x10174, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031e, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10c00, 0x10c48, + 0x11082, 0x110b8, + 0x12000, 0x1236e, + 0x12400, 0x12462, + 0x13000, 0x1342e, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2f800, 0x2fa1d, +}; /* CR_Alnum */ + +/* 'ASCII': [[:ASCII:]] */ +static const OnigCodePoint CR_ASCII[] = { + 1, + 0x0000, 0x007f, +}; /* CR_ASCII */ + +static const OnigCodePoint* const CodeRanges[] = { + CR_NEWLINE, + CR_Alpha, + CR_Blank, + CR_Cntrl, + CR_Digit, + CR_Graph, + CR_Lower, + CR_Print, + CR_Punct, + CR_Space, + CR_Upper, + CR_XDigit, + CR_Word, + CR_Alnum, + CR_ASCII, +#ifdef USE_UNICODE_PROPERTIES +#ifdef USE_UNICODE_PROPERTIES + CR_Any, + CR_Assigned, + CR_C, + CR_Cc, + CR_Cf, + CR_Cn, + CR_Co, + CR_Cs, + CR_L, + CR_Ll, + CR_Lm, + CR_Lo, + CR_Lt, + CR_Lu, + CR_M, + CR_Mc, + CR_Me, + CR_Mn, + CR_N, + CR_Nd, + CR_Nl, + CR_No, + CR_P, + CR_Pc, + CR_Pd, + CR_Pe, + CR_Pf, + CR_Pi, + CR_Po, + CR_Ps, + CR_S, + CR_Sc, + CR_Sk, + CR_Sm, + CR_So, + CR_Z, + CR_Zl, + CR_Zp, + CR_Zs, + CR_Math, + CR_Alphabetic, + CR_Lowercase, + CR_Uppercase, + CR_Cased, + CR_Case_Ignorable, + CR_Changes_When_Lowercased, + CR_Changes_When_Uppercased, + CR_Changes_When_Titlecased, + CR_Changes_When_Casefolded, + CR_Changes_When_Casemapped, + CR_ID_Start, + CR_ID_Continue, + CR_XID_Start, + CR_XID_Continue, + CR_Default_Ignorable_Code_Point, + CR_Grapheme_Extend, + CR_Grapheme_Base, + CR_Grapheme_Link, + CR_Common, + CR_Latin, + CR_Greek, + CR_Cyrillic, + CR_Armenian, + CR_Hebrew, + CR_Arabic, + CR_Syriac, + CR_Thaana, + CR_Devanagari, + CR_Bengali, + CR_Gurmukhi, + CR_Gujarati, + CR_Oriya, + CR_Tamil, + CR_Telugu, + CR_Kannada, + CR_Malayalam, + CR_Sinhala, + CR_Thai, + CR_Lao, + CR_Tibetan, + CR_Myanmar, + CR_Georgian, + CR_Hangul, + CR_Ethiopic, + CR_Cherokee, + CR_Canadian_Aboriginal, + CR_Ogham, + CR_Runic, + CR_Khmer, + CR_Mongolian, + CR_Hiragana, + CR_Katakana, + CR_Bopomofo, + CR_Han, + CR_Yi, + CR_Old_Italic, + CR_Gothic, + CR_Deseret, + CR_Inherited, + CR_Tagalog, + CR_Hanunoo, + CR_Buhid, + CR_Tagbanwa, + CR_Limbu, + CR_Tai_Le, + CR_Linear_B, + CR_Ugaritic, + CR_Shavian, + CR_Osmanya, + CR_Cypriot, + CR_Braille, + CR_Buginese, + CR_Coptic, + CR_New_Tai_Lue, + CR_Glagolitic, + CR_Tifinagh, + CR_Syloti_Nagri, + CR_Old_Persian, + CR_Kharoshthi, + CR_Balinese, + CR_Cuneiform, + CR_Phoenician, + CR_Phags_Pa, + CR_Nko, + CR_Sundanese, + CR_Lepcha, + CR_Ol_Chiki, + CR_Vai, + CR_Saurashtra, + CR_Kayah_Li, + CR_Rejang, + CR_Lycian, + CR_Carian, + CR_Lydian, + CR_Cham, + CR_Tai_Tham, + CR_Tai_Viet, + CR_Avestan, + CR_Egyptian_Hieroglyphs, + CR_Samaritan, + CR_Lisu, + CR_Bamum, + CR_Javanese, + CR_Meetei_Mayek, + CR_Imperial_Aramaic, + CR_Old_South_Arabian, + CR_Inscriptional_Parthian, + CR_Inscriptional_Pahlavi, + CR_Old_Turkic, + CR_Kaithi, + CR_White_Space, + CR_Bidi_Control, + CR_Join_Control, + CR_Dash, + CR_Hyphen, + CR_Quotation_Mark, + CR_Terminal_Punctuation, + CR_Other_Math, + CR_Hex_Digit, + CR_ASCII_Hex_Digit, + CR_Other_Alphabetic, + CR_Ideographic, + CR_Diacritic, + CR_Extender, + CR_Other_Lowercase, + CR_Other_Uppercase, + CR_Noncharacter_Code_Point, + CR_Other_Grapheme_Extend, + CR_IDS_Binary_Operator, + CR_IDS_Trinary_Operator, + CR_Radical, + CR_Unified_Ideograph, + CR_Other_Default_Ignorable_Code_Point, + CR_Deprecated, + CR_Soft_Dotted, + CR_Logical_Order_Exception, + CR_Other_ID_Start, + CR_Other_ID_Continue, + CR_STerm, + CR_Variation_Selector, + CR_Pattern_White_Space, + CR_Pattern_Syntax, +#endif /* USE_UNICODE_PROPERTIES */ +#endif /* USE_UNICODE_PROPERTIES */ +}; +struct uniname2ctype_struct { + int name, ctype; +}; + +static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned int); + +#ifndef USE_UNICODE_PROPERTIES +#define TOTAL_KEYWORDS 15 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 7 +#define MIN_HASH_VALUE 7 +#define MAX_HASH_VALUE 21 +/* maximum key range = 15, duplicates = 0 */ +#else /* USE_UNICODE_PROPERTIES */ +#define TOTAL_KEYWORDS 367 +#define MIN_WORD_LENGTH 1 +#define MAX_WORD_LENGTH 30 +#define MIN_HASH_VALUE 3 +#define MAX_HASH_VALUE 1751 +/* maximum key range = 1749, duplicates = 0 */ +#endif /* USE_UNICODE_PROPERTIES */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +uniname2ctype_hash (str, len) + register const char *str; + register unsigned int len; +{ +#ifndef USE_UNICODE_PROPERTIES + static const unsigned char asso_values[] = +#else /* USE_UNICODE_PROPERTIES */ + static const unsigned short asso_values[] = +#endif /* USE_UNICODE_PROPERTIES */ + { +#ifndef USE_UNICODE_PROPERTIES + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 3, 13, 6, + 4, 22, 22, 11, 22, 1, 22, 22, 10, 22, + 2, 22, 1, 22, 10, 8, 4, 7, 22, 3, + 4, 22, 22, 22, 22, 22, 22, 22 +#else /* USE_UNICODE_PROPERTIES */ + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, + 1752, 1752, 1752, 1752, 1752, 1752, 1752, 7, 419, 1, + 128, 38, 100, 186, 46, 11, 226, 463, 4, 317, + 6, 2, 265, 5, 18, 112, 30, 289, 164, 384, + 368, 579, 10, 1752, 1752, 1752, 1752, 1752 +#endif /* USE_UNICODE_PROPERTIES */ + }; +#ifndef USE_UNICODE_PROPERTIES + return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]]; +#else /* USE_UNICODE_PROPERTIES */ + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[15]]; + /*FALLTHROUGH*/ + case 15: + case 14: + case 13: + case 12: + hval += asso_values[(unsigned char)str[11]]; + /*FALLTHROUGH*/ + case 11: + case 10: + case 9: + case 8: + case 7: + case 6: + hval += asso_values[(unsigned char)str[5]]; + /*FALLTHROUGH*/ + case 5: + case 4: + case 3: + hval += asso_values[(unsigned char)str[2]]; + /*FALLTHROUGH*/ + case 2: + hval += asso_values[(unsigned char)str[1]]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +#endif /* USE_UNICODE_PROPERTIES */ +} + +struct uniname2ctype_pool_t + { +#ifndef USE_UNICODE_PROPERTIES + char uniname2ctype_pool_str7[sizeof("print")]; + char uniname2ctype_pool_str8[sizeof("punct")]; + char uniname2ctype_pool_str9[sizeof("alpha")]; + char uniname2ctype_pool_str10[sizeof("alnum")]; + char uniname2ctype_pool_str11[sizeof("xdigit")]; + char uniname2ctype_pool_str12[sizeof("newline")]; + char uniname2ctype_pool_str13[sizeof("upper")]; + char uniname2ctype_pool_str14[sizeof("ascii")]; + char uniname2ctype_pool_str15[sizeof("cntrl")]; + char uniname2ctype_pool_str16[sizeof("space")]; + char uniname2ctype_pool_str17[sizeof("word")]; + char uniname2ctype_pool_str18[sizeof("lower")]; + char uniname2ctype_pool_str19[sizeof("graph")]; + char uniname2ctype_pool_str20[sizeof("digit")]; + char uniname2ctype_pool_str21[sizeof("blank")]; +#else /* USE_UNICODE_PROPERTIES */ + char uniname2ctype_pool_str3[sizeof("c")]; + char uniname2ctype_pool_str5[sizeof("cc")]; + char uniname2ctype_pool_str7[sizeof("co")]; + char uniname2ctype_pool_str9[sizeof("l")]; + char uniname2ctype_pool_str10[sizeof("lo")]; + char uniname2ctype_pool_str12[sizeof("no")]; + char uniname2ctype_pool_str13[sizeof("n")]; + char uniname2ctype_pool_str14[sizeof("ll")]; + char uniname2ctype_pool_str15[sizeof("cn")]; + char uniname2ctype_pool_str16[sizeof("nl")]; + char uniname2ctype_pool_str18[sizeof("lao")]; + char uniname2ctype_pool_str19[sizeof("laoo")]; + char uniname2ctype_pool_str20[sizeof("zl")]; + char uniname2ctype_pool_str21[sizeof("z")]; + char uniname2ctype_pool_str22[sizeof("control")]; + char uniname2ctype_pool_str24[sizeof("qaac")]; + char uniname2ctype_pool_str25[sizeof("ci")]; + char uniname2ctype_pool_str28[sizeof("lana")]; + char uniname2ctype_pool_str33[sizeof("oalpha")]; + char uniname2ctype_pool_str34[sizeof("qaai")]; + char uniname2ctype_pool_str40[sizeof("arabic")]; + char uniname2ctype_pool_str41[sizeof("cari")]; + char uniname2ctype_pool_str43[sizeof("oriya")]; + char uniname2ctype_pool_str44[sizeof("carian")]; + char uniname2ctype_pool_str46[sizeof("cntrl")]; + char uniname2ctype_pool_str49[sizeof("connectorpunctuation")]; + char uniname2ctype_pool_str50[sizeof("olower")]; + char uniname2ctype_pool_str51[sizeof("latn")]; + char uniname2ctype_pool_str52[sizeof("latin")]; + char uniname2ctype_pool_str56[sizeof("ital")]; + char uniname2ctype_pool_str65[sizeof("hano")]; + char uniname2ctype_pool_str66[sizeof("lt")]; + char uniname2ctype_pool_str68[sizeof("han")]; + char uniname2ctype_pool_str70[sizeof("hanunoo")]; + char uniname2ctype_pool_str71[sizeof("canadianaboriginal")]; + char uniname2ctype_pool_str73[sizeof("hangul")]; + char uniname2ctype_pool_str74[sizeof("hani")]; + char uniname2ctype_pool_str76[sizeof("nchar")]; + char uniname2ctype_pool_str77[sizeof("zinh")]; + char uniname2ctype_pool_str83[sizeof("tale")]; + char uniname2ctype_pool_str85[sizeof("loe")]; + char uniname2ctype_pool_str86[sizeof("hira")]; + char uniname2ctype_pool_str91[sizeof("taile")]; + char uniname2ctype_pool_str92[sizeof("lineseparator")]; + char uniname2ctype_pool_str94[sizeof("thaa")]; + char uniname2ctype_pool_str97[sizeof("hiragana")]; + char uniname2ctype_pool_str98[sizeof("thai")]; + char uniname2ctype_pool_str100[sizeof("initialpunctuation")]; + char uniname2ctype_pool_str101[sizeof("other")]; + char uniname2ctype_pool_str103[sizeof("thaana")]; + char uniname2ctype_pool_str107[sizeof("cher")]; + char uniname2ctype_pool_str111[sizeof("otherletter")]; + char uniname2ctype_pool_str113[sizeof("othernumber")]; + char uniname2ctype_pool_str114[sizeof("letter")]; + char uniname2ctype_pool_str116[sizeof("sc")]; + char uniname2ctype_pool_str118[sizeof("so")]; + char uniname2ctype_pool_str123[sizeof("taiviet")]; + char uniname2ctype_pool_str128[sizeof("noncharactercodepoint")]; + char uniname2ctype_pool_str129[sizeof("ethi")]; + char uniname2ctype_pool_str130[sizeof("cans")]; + char uniname2ctype_pool_str135[sizeof("titlecaseletter")]; + char uniname2ctype_pool_str136[sizeof("ascii")]; + char uniname2ctype_pool_str138[sizeof("letternumber")]; + char uniname2ctype_pool_str139[sizeof("otheralphabetic")]; + char uniname2ctype_pool_str141[sizeof("otherlowercase")]; + char uniname2ctype_pool_str144[sizeof("idc")]; + char uniname2ctype_pool_str146[sizeof("oidc")]; + char uniname2ctype_pool_str147[sizeof("sinhala")]; + char uniname2ctype_pool_str148[sizeof("terminalpunctuation")]; + char uniname2ctype_pool_str151[sizeof("olditalic")]; + char uniname2ctype_pool_str152[sizeof("di")]; + char uniname2ctype_pool_str153[sizeof("otheridcontinue")]; + char uniname2ctype_pool_str155[sizeof("odi")]; + char uniname2ctype_pool_str156[sizeof("dia")]; + char uniname2ctype_pool_str161[sizeof("otheridstart")]; + char uniname2ctype_pool_str162[sizeof("oldturkic")]; + char uniname2ctype_pool_str167[sizeof("diacritic")]; + char uniname2ctype_pool_str168[sizeof("oldpersian")]; + char uniname2ctype_pool_str171[sizeof("radical")]; + char uniname2ctype_pool_str179[sizeof("sinh")]; + char uniname2ctype_pool_str183[sizeof("ideo")]; + char uniname2ctype_pool_str185[sizeof("shavian")]; + char uniname2ctype_pool_str186[sizeof("format")]; + char uniname2ctype_pool_str192[sizeof("inscriptionalparthian")]; + char uniname2ctype_pool_str196[sizeof("vai")]; + char uniname2ctype_pool_str197[sizeof("vaii")]; + char uniname2ctype_pool_str202[sizeof("tifinagh")]; + char uniname2ctype_pool_str203[sizeof("cf")]; + char uniname2ctype_pool_str205[sizeof("othersymbol")]; + char uniname2ctype_pool_str207[sizeof("ideographic")]; + char uniname2ctype_pool_str211[sizeof("inherited")]; + char uniname2ctype_pool_str212[sizeof("glagolitic")]; + char uniname2ctype_pool_str218[sizeof("idcontinue")]; + char uniname2ctype_pool_str220[sizeof("asciihexdigit")]; + char uniname2ctype_pool_str224[sizeof("inscriptionalpahlavi")]; + char uniname2ctype_pool_str225[sizeof("s")]; + char uniname2ctype_pool_str226[sizeof("gothic")]; + char uniname2ctype_pool_str227[sizeof("cs")]; + char uniname2ctype_pool_str229[sizeof("avestan")]; + char uniname2ctype_pool_str235[sizeof("tavt")]; + char uniname2ctype_pool_str236[sizeof("zs")]; + char uniname2ctype_pool_str243[sizeof("decimalnumber")]; + char uniname2ctype_pool_str244[sizeof("tagbanwa")]; + char uniname2ctype_pool_str245[sizeof("joinc")]; + char uniname2ctype_pool_str248[sizeof("geor")]; + char uniname2ctype_pool_str249[sizeof("hang")]; + char uniname2ctype_pool_str251[sizeof("georgian")]; + char uniname2ctype_pool_str253[sizeof("cased")]; + char uniname2ctype_pool_str256[sizeof("joincontrol")]; + char uniname2ctype_pool_str257[sizeof("oids")]; + char uniname2ctype_pool_str260[sizeof("variationselector")]; + char uniname2ctype_pool_str262[sizeof("graph")]; + char uniname2ctype_pool_str263[sizeof("changeswhenlowercased")]; + char uniname2ctype_pool_str264[sizeof("nd")]; + char uniname2ctype_pool_str268[sizeof("goth")]; + char uniname2ctype_pool_str269[sizeof("pc")]; + char uniname2ctype_pool_str271[sizeof("po")]; + char uniname2ctype_pool_str272[sizeof("ogrext")]; + char uniname2ctype_pool_str276[sizeof("coptic")]; + char uniname2ctype_pool_str277[sizeof("grext")]; + char uniname2ctype_pool_str282[sizeof("logicalorderexception")]; + char uniname2ctype_pool_str285[sizeof("idst")]; + char uniname2ctype_pool_str288[sizeof("alpha")]; + char uniname2ctype_pool_str289[sizeof("pi")]; + char uniname2ctype_pool_str292[sizeof("dsrt")]; + char uniname2ctype_pool_str297[sizeof("dash")]; + char uniname2ctype_pool_str298[sizeof("otherdefaultignorablecodepoint")]; + char uniname2ctype_pool_str302[sizeof("copt")]; + char uniname2ctype_pool_str306[sizeof("idstart")]; + char uniname2ctype_pool_str307[sizeof("closepunctuation")]; + char uniname2ctype_pool_str309[sizeof("changeswhentitlecased")]; + char uniname2ctype_pool_str312[sizeof("lepc")]; + char uniname2ctype_pool_str317[sizeof("avst")]; + char uniname2ctype_pool_str318[sizeof("cprt")]; + char uniname2ctype_pool_str319[sizeof("runic")]; + char uniname2ctype_pool_str320[sizeof("patsyn")]; + char uniname2ctype_pool_str321[sizeof("mc")]; + char uniname2ctype_pool_str326[sizeof("tfng")]; + char uniname2ctype_pool_str327[sizeof("lepcha")]; + char uniname2ctype_pool_str328[sizeof("prti")]; + char uniname2ctype_pool_str329[sizeof("print")]; + char uniname2ctype_pool_str330[sizeof("phli")]; + char uniname2ctype_pool_str331[sizeof("mn")]; + char uniname2ctype_pool_str332[sizeof("idsbinaryoperator")]; + char uniname2ctype_pool_str334[sizeof("talu")]; + char uniname2ctype_pool_str335[sizeof("runr")]; + char uniname2ctype_pool_str337[sizeof("graphemebase")]; + char uniname2ctype_pool_str338[sizeof("common")]; + char uniname2ctype_pool_str339[sizeof("alnum")]; + char uniname2ctype_pool_str340[sizeof("phoenician")]; + char uniname2ctype_pool_str341[sizeof("deva")]; + char uniname2ctype_pool_str342[sizeof("changeswhencasefolded")]; + char uniname2ctype_pool_str343[sizeof("pe")]; + char uniname2ctype_pool_str344[sizeof("mongolian")]; + char uniname2ctype_pool_str352[sizeof("armn")]; + char uniname2ctype_pool_str353[sizeof("deseret")]; + char uniname2ctype_pool_str354[sizeof("softdotted")]; + char uniname2ctype_pool_str357[sizeof("armi")]; + char uniname2ctype_pool_str358[sizeof("devanagari")]; + char uniname2ctype_pool_str360[sizeof("digit")]; + char uniname2ctype_pool_str361[sizeof("caseignorable")]; + char uniname2ctype_pool_str362[sizeof("taml")]; + char uniname2ctype_pool_str363[sizeof("tamil")]; + char uniname2ctype_pool_str365[sizeof("telu")]; + char uniname2ctype_pool_str366[sizeof("ids")]; + char uniname2ctype_pool_str367[sizeof("armenian")]; + char uniname2ctype_pool_str370[sizeof("sd")]; + char uniname2ctype_pool_str372[sizeof("privateuse")]; + char uniname2ctype_pool_str373[sizeof("assigned")]; + char uniname2ctype_pool_str375[sizeof("cham")]; + char uniname2ctype_pool_str377[sizeof("omath")]; + char uniname2ctype_pool_str378[sizeof("otherpunctuation")]; + char uniname2ctype_pool_str379[sizeof("taitham")]; + char uniname2ctype_pool_str381[sizeof("defaultignorablecodepoint")]; + char uniname2ctype_pool_str387[sizeof("glag")]; + char uniname2ctype_pool_str388[sizeof("ethiopic")]; + char uniname2ctype_pool_str390[sizeof("vs")]; + char uniname2ctype_pool_str395[sizeof("me")]; + char uniname2ctype_pool_str396[sizeof("cwl")]; + char uniname2ctype_pool_str400[sizeof("mtei")]; + char uniname2ctype_pool_str404[sizeof("math")]; + char uniname2ctype_pool_str407[sizeof("term")]; + char uniname2ctype_pool_str408[sizeof("java")]; + char uniname2ctype_pool_str410[sizeof("tglg")]; + char uniname2ctype_pool_str413[sizeof("lower")]; + char uniname2ctype_pool_str414[sizeof("patternwhitespace")]; + char uniname2ctype_pool_str417[sizeof("finalpunctuation")]; + char uniname2ctype_pool_str418[sizeof("tagalog")]; + char uniname2ctype_pool_str419[sizeof("patws")]; + char uniname2ctype_pool_str420[sizeof("lisu")]; + char uniname2ctype_pool_str426[sizeof("otheruppercase")]; + char uniname2ctype_pool_str427[sizeof("space")]; + char uniname2ctype_pool_str429[sizeof("graphemeextend")]; + char uniname2ctype_pool_str430[sizeof("saur")]; + char uniname2ctype_pool_str435[sizeof("uideo")]; + char uniname2ctype_pool_str438[sizeof("lowercase")]; + char uniname2ctype_pool_str440[sizeof("rjng")]; + char uniname2ctype_pool_str442[sizeof("osma")]; + char uniname2ctype_pool_str444[sizeof("linb")]; + char uniname2ctype_pool_str445[sizeof("bali")]; + char uniname2ctype_pool_str448[sizeof("cwt")]; + char uniname2ctype_pool_str449[sizeof("separator")]; + char uniname2ctype_pool_str450[sizeof("othermath")]; + char uniname2ctype_pool_str451[sizeof("unassigned")]; + char uniname2ctype_pool_str454[sizeof("lowercaseletter")]; + char uniname2ctype_pool_str455[sizeof("arab")]; + char uniname2ctype_pool_str458[sizeof("samr")]; + char uniname2ctype_pool_str459[sizeof("brai")]; + char uniname2ctype_pool_str460[sizeof("sundanese")]; + char uniname2ctype_pool_str462[sizeof("samaritan")]; + char uniname2ctype_pool_str463[sizeof("ahex")]; + char uniname2ctype_pool_str465[sizeof("linearb")]; + char uniname2ctype_pool_str467[sizeof("pf")]; + char uniname2ctype_pool_str469[sizeof("ext")]; + char uniname2ctype_pool_str474[sizeof("olck")]; + char uniname2ctype_pool_str476[sizeof("nko")]; + char uniname2ctype_pool_str477[sizeof("nkoo")]; + char uniname2ctype_pool_str479[sizeof("newline")]; + char uniname2ctype_pool_str480[sizeof("tibetan")]; + char uniname2ctype_pool_str481[sizeof("javanese")]; + char uniname2ctype_pool_str485[sizeof("bengali")]; + char uniname2ctype_pool_str486[sizeof("newtailue")]; + char uniname2ctype_pool_str487[sizeof("kana")]; + char uniname2ctype_pool_str488[sizeof("olchiki")]; + char uniname2ctype_pool_str489[sizeof("kali")]; + char uniname2ctype_pool_str490[sizeof("cwcf")]; + char uniname2ctype_pool_str491[sizeof("ps")]; + char uniname2ctype_pool_str493[sizeof("braille")]; + char uniname2ctype_pool_str494[sizeof("tibt")]; + char uniname2ctype_pool_str502[sizeof("sterm")]; + char uniname2ctype_pool_str504[sizeof("ugar")]; + char uniname2ctype_pool_str505[sizeof("nonspacingmark")]; + char uniname2ctype_pool_str508[sizeof("phag")]; + char uniname2ctype_pool_str509[sizeof("kaithi")]; + char uniname2ctype_pool_str512[sizeof("xidc")]; + char uniname2ctype_pool_str514[sizeof("balinese")]; + char uniname2ctype_pool_str515[sizeof("mong")]; + char uniname2ctype_pool_str516[sizeof("ogam")]; + char uniname2ctype_pool_str520[sizeof("modifierletter")]; + char uniname2ctype_pool_str521[sizeof("ugaritic")]; + char uniname2ctype_pool_str522[sizeof("katakana")]; + char uniname2ctype_pool_str523[sizeof("pd")]; + char uniname2ctype_pool_str525[sizeof("hebr")]; + char uniname2ctype_pool_str531[sizeof("p")]; + char uniname2ctype_pool_str533[sizeof("orkh")]; + char uniname2ctype_pool_str536[sizeof("word")]; + char uniname2ctype_pool_str537[sizeof("saurashtra")]; + char uniname2ctype_pool_str538[sizeof("khar")]; + char uniname2ctype_pool_str539[sizeof("sund")]; + char uniname2ctype_pool_str542[sizeof("zp")]; + char uniname2ctype_pool_str548[sizeof("changeswhenuppercased")]; + char uniname2ctype_pool_str552[sizeof("xidstart")]; + char uniname2ctype_pool_str553[sizeof("shaw")]; + char uniname2ctype_pool_str554[sizeof("kthi")]; + char uniname2ctype_pool_str556[sizeof("ogham")]; + char uniname2ctype_pool_str558[sizeof("spaceseparator")]; + char uniname2ctype_pool_str559[sizeof("changeswhencasemapped")]; + char uniname2ctype_pool_str560[sizeof("sarb")]; + char uniname2ctype_pool_str562[sizeof("xidcontinue")]; + char uniname2ctype_pool_str564[sizeof("bidic")]; + char uniname2ctype_pool_str570[sizeof("deprecated")]; + char uniname2ctype_pool_str573[sizeof("xdigit")]; + char uniname2ctype_pool_str575[sizeof("bidicontrol")]; + char uniname2ctype_pool_str584[sizeof("lu")]; + char uniname2ctype_pool_str587[sizeof("dashpunctuation")]; + char uniname2ctype_pool_str590[sizeof("extender")]; + char uniname2ctype_pool_str593[sizeof("idstrinaryoperator")]; + char uniname2ctype_pool_str594[sizeof("cherokee")]; + char uniname2ctype_pool_str595[sizeof("punct")]; + char uniname2ctype_pool_str597[sizeof("phagspa")]; + char uniname2ctype_pool_str598[sizeof("oupper")]; + char uniname2ctype_pool_str599[sizeof("lyci")]; + char uniname2ctype_pool_str601[sizeof("whitespace")]; + char uniname2ctype_pool_str602[sizeof("lycian")]; + char uniname2ctype_pool_str603[sizeof("yi")]; + char uniname2ctype_pool_str606[sizeof("cyrl")]; + char uniname2ctype_pool_str608[sizeof("knda")]; + char uniname2ctype_pool_str610[sizeof("orya")]; + char uniname2ctype_pool_str611[sizeof("cyrillic")]; + char uniname2ctype_pool_str616[sizeof("yiii")]; + char uniname2ctype_pool_str618[sizeof("kannada")]; + char uniname2ctype_pool_str623[sizeof("xids")]; + char uniname2ctype_pool_str626[sizeof("limbu")]; + char uniname2ctype_pool_str635[sizeof("m")]; + char uniname2ctype_pool_str638[sizeof("unifiedideograph")]; + char uniname2ctype_pool_str639[sizeof("paragraphseparator")]; + char uniname2ctype_pool_str640[sizeof("lm")]; + char uniname2ctype_pool_str645[sizeof("openpunctuation")]; + char uniname2ctype_pool_str646[sizeof("tagb")]; + char uniname2ctype_pool_str649[sizeof("kharoshthi")]; + char uniname2ctype_pool_str651[sizeof("enclosingmark")]; + char uniname2ctype_pool_str652[sizeof("surrogate")]; + char uniname2ctype_pool_str653[sizeof("beng")]; + char uniname2ctype_pool_str654[sizeof("number")]; + char uniname2ctype_pool_str656[sizeof("telugu")]; + char uniname2ctype_pool_str660[sizeof("rejang")]; + char uniname2ctype_pool_str661[sizeof("malayalam")]; + char uniname2ctype_pool_str674[sizeof("idsb")]; + char uniname2ctype_pool_str676[sizeof("hexdigit")]; + char uniname2ctype_pool_str677[sizeof("xpeo")]; + char uniname2ctype_pool_str689[sizeof("phnx")]; + char uniname2ctype_pool_str692[sizeof("bopo")]; + char uniname2ctype_pool_str698[sizeof("bopomofo")]; + char uniname2ctype_pool_str699[sizeof("dep")]; + char uniname2ctype_pool_str701[sizeof("sylo")]; + char uniname2ctype_pool_str705[sizeof("grbase")]; + char uniname2ctype_pool_str706[sizeof("alphabetic")]; + char uniname2ctype_pool_str707[sizeof("cwcm")]; + char uniname2ctype_pool_str708[sizeof("patternsyntax")]; + char uniname2ctype_pool_str709[sizeof("grek")]; + char uniname2ctype_pool_str710[sizeof("greek")]; + char uniname2ctype_pool_str714[sizeof("syrc")]; + char uniname2ctype_pool_str717[sizeof("syriac")]; + char uniname2ctype_pool_str722[sizeof("cuneiform")]; + char uniname2ctype_pool_str723[sizeof("gujr")]; + char uniname2ctype_pool_str726[sizeof("lydi")]; + char uniname2ctype_pool_str727[sizeof("gujarati")]; + char uniname2ctype_pool_str728[sizeof("sylotinagri")]; + char uniname2ctype_pool_str729[sizeof("lydian")]; + char uniname2ctype_pool_str748[sizeof("sm")]; + char uniname2ctype_pool_str751[sizeof("currencysymbol")]; + char uniname2ctype_pool_str755[sizeof("limb")]; + char uniname2ctype_pool_str758[sizeof("othergraphemeextend")]; + char uniname2ctype_pool_str786[sizeof("guru")]; + char uniname2ctype_pool_str797[sizeof("qmark")]; + char uniname2ctype_pool_str809[sizeof("mark")]; + char uniname2ctype_pool_str820[sizeof("quotationmark")]; + char uniname2ctype_pool_str823[sizeof("hex")]; + char uniname2ctype_pool_str842[sizeof("upper")]; + char uniname2ctype_pool_str843[sizeof("wspace")]; + char uniname2ctype_pool_str848[sizeof("khmr")]; + char uniname2ctype_pool_str849[sizeof("khmer")]; + char uniname2ctype_pool_str863[sizeof("oldsoutharabian")]; + char uniname2ctype_pool_str864[sizeof("spacingmark")]; + char uniname2ctype_pool_str866[sizeof("punctuation")]; + char uniname2ctype_pool_str867[sizeof("uppercase")]; + char uniname2ctype_pool_str878[sizeof("meeteimayek")]; + char uniname2ctype_pool_str883[sizeof("uppercaseletter")]; + char uniname2ctype_pool_str884[sizeof("cypriot")]; + char uniname2ctype_pool_str886[sizeof("buhd")]; + char uniname2ctype_pool_str887[sizeof("buhid")]; + char uniname2ctype_pool_str895[sizeof("modifiersymbol")]; + char uniname2ctype_pool_str898[sizeof("blank")]; + char uniname2ctype_pool_str908[sizeof("hyphen")]; + char uniname2ctype_pool_str909[sizeof("bugi")]; + char uniname2ctype_pool_str935[sizeof("myanmar")]; + char uniname2ctype_pool_str937[sizeof("imperialaramaic")]; + char uniname2ctype_pool_str947[sizeof("mathsymbol")]; + char uniname2ctype_pool_str966[sizeof("cwu")]; + char uniname2ctype_pool_str975[sizeof("gurmukhi")]; + char uniname2ctype_pool_str978[sizeof("buginese")]; + char uniname2ctype_pool_str1022[sizeof("symbol")]; + char uniname2ctype_pool_str1024[sizeof("osmanya")]; + char uniname2ctype_pool_str1036[sizeof("bamu")]; + char uniname2ctype_pool_str1040[sizeof("sk")]; + char uniname2ctype_pool_str1065[sizeof("bamum")]; + char uniname2ctype_pool_str1071[sizeof("kayahli")]; + char uniname2ctype_pool_str1072[sizeof("egyp")]; + char uniname2ctype_pool_str1140[sizeof("grlink")]; + char uniname2ctype_pool_str1141[sizeof("xsux")]; + char uniname2ctype_pool_str1174[sizeof("any")]; + char uniname2ctype_pool_str1187[sizeof("graphemelink")]; + char uniname2ctype_pool_str1221[sizeof("mlym")]; + char uniname2ctype_pool_str1235[sizeof("mymr")]; + char uniname2ctype_pool_str1277[sizeof("hebrew")]; + char uniname2ctype_pool_str1542[sizeof("egyptianhieroglyphs")]; + char uniname2ctype_pool_str1751[sizeof("zyyy")]; +#endif /* USE_UNICODE_PROPERTIES */ + }; +static const struct uniname2ctype_pool_t uniname2ctype_pool_contents = + { +#ifndef USE_UNICODE_PROPERTIES + "print", + "punct", +#else /* USE_UNICODE_PROPERTIES */ + "c", + "cc", + "co", + "l", + "lo", + "no", + "n", + "ll", + "cn", + "nl", + "lao", + "laoo", + "zl", + "z", + "control", + "qaac", + "ci", + "lana", + "oalpha", + "qaai", + "arabic", + "cari", + "oriya", + "carian", + "cntrl", + "connectorpunctuation", + "olower", + "latn", + "latin", + "ital", + "hano", + "lt", + "han", + "hanunoo", + "canadianaboriginal", + "hangul", + "hani", + "nchar", + "zinh", + "tale", + "loe", + "hira", + "taile", + "lineseparator", + "thaa", + "hiragana", + "thai", + "initialpunctuation", + "other", + "thaana", + "cher", + "otherletter", + "othernumber", + "letter", + "sc", + "so", + "taiviet", + "noncharactercodepoint", + "ethi", + "cans", + "titlecaseletter", + "ascii", + "letternumber", + "otheralphabetic", + "otherlowercase", + "idc", + "oidc", + "sinhala", + "terminalpunctuation", + "olditalic", + "di", + "otheridcontinue", + "odi", + "dia", + "otheridstart", + "oldturkic", + "diacritic", + "oldpersian", + "radical", + "sinh", + "ideo", + "shavian", + "format", + "inscriptionalparthian", + "vai", + "vaii", + "tifinagh", + "cf", + "othersymbol", + "ideographic", + "inherited", + "glagolitic", + "idcontinue", + "asciihexdigit", + "inscriptionalpahlavi", + "s", + "gothic", + "cs", + "avestan", + "tavt", + "zs", + "decimalnumber", + "tagbanwa", + "joinc", + "geor", + "hang", + "georgian", + "cased", + "joincontrol", + "oids", + "variationselector", + "graph", + "changeswhenlowercased", + "nd", + "goth", + "pc", + "po", + "ogrext", + "coptic", + "grext", + "logicalorderexception", + "idst", +#endif /* USE_UNICODE_PROPERTIES */ + "alpha", +#ifdef USE_UNICODE_PROPERTIES + "pi", + "dsrt", + "dash", + "otherdefaultignorablecodepoint", + "copt", + "idstart", + "closepunctuation", + "changeswhentitlecased", + "lepc", + "avst", + "cprt", + "runic", + "patsyn", + "mc", + "tfng", + "lepcha", + "prti", + "print", + "phli", + "mn", + "idsbinaryoperator", + "talu", + "runr", + "graphemebase", + "common", +#endif /* USE_UNICODE_PROPERTIES */ + "alnum", +#ifndef USE_UNICODE_PROPERTIES + "xdigit", + "newline", + "upper", + "ascii", + "cntrl", +#else /* USE_UNICODE_PROPERTIES */ + "phoenician", + "deva", + "changeswhencasefolded", + "pe", + "mongolian", + "armn", + "deseret", + "softdotted", + "armi", + "devanagari", + "digit", + "caseignorable", + "taml", + "tamil", + "telu", + "ids", + "armenian", + "sd", + "privateuse", + "assigned", + "cham", + "omath", + "otherpunctuation", + "taitham", + "defaultignorablecodepoint", + "glag", + "ethiopic", + "vs", + "me", + "cwl", + "mtei", + "math", + "term", + "java", + "tglg", + "lower", + "patternwhitespace", + "finalpunctuation", + "tagalog", + "patws", + "lisu", + "otheruppercase", +#endif /* USE_UNICODE_PROPERTIES */ + "space", +#ifdef USE_UNICODE_PROPERTIES + "graphemeextend", + "saur", + "uideo", + "lowercase", + "rjng", + "osma", + "linb", + "bali", + "cwt", + "separator", + "othermath", + "unassigned", + "lowercaseletter", + "arab", + "samr", + "brai", + "sundanese", + "samaritan", + "ahex", + "linearb", + "pf", + "ext", + "olck", + "nko", + "nkoo", + "newline", + "tibetan", + "javanese", + "bengali", + "newtailue", + "kana", + "olchiki", + "kali", + "cwcf", + "ps", + "braille", + "tibt", + "sterm", + "ugar", + "nonspacingmark", + "phag", + "kaithi", + "xidc", + "balinese", + "mong", + "ogam", + "modifierletter", + "ugaritic", + "katakana", + "pd", + "hebr", + "p", + "orkh", +#endif /* USE_UNICODE_PROPERTIES */ + "word", +#ifndef USE_UNICODE_PROPERTIES + "lower", + "graph", + "digit", + "blank" +#else /* USE_UNICODE_PROPERTIES */ + "saurashtra", + "khar", + "sund", + "zp", + "changeswhenuppercased", + "xidstart", + "shaw", + "kthi", + "ogham", + "spaceseparator", + "changeswhencasemapped", + "sarb", + "xidcontinue", + "bidic", + "deprecated", + "xdigit", + "bidicontrol", + "lu", + "dashpunctuation", + "extender", + "idstrinaryoperator", + "cherokee", + "punct", + "phagspa", + "oupper", + "lyci", + "whitespace", + "lycian", + "yi", + "cyrl", + "knda", + "orya", + "cyrillic", + "yiii", + "kannada", + "xids", + "limbu", + "m", + "unifiedideograph", + "paragraphseparator", + "lm", + "openpunctuation", + "tagb", + "kharoshthi", + "enclosingmark", + "surrogate", + "beng", + "number", + "telugu", + "rejang", + "malayalam", + "idsb", + "hexdigit", + "xpeo", + "phnx", + "bopo", + "bopomofo", + "dep", + "sylo", + "grbase", + "alphabetic", + "cwcm", + "patternsyntax", + "grek", + "greek", + "syrc", + "syriac", + "cuneiform", + "gujr", + "lydi", + "gujarati", + "sylotinagri", + "lydian", + "sm", + "currencysymbol", + "limb", + "othergraphemeextend", + "guru", + "qmark", + "mark", + "quotationmark", + "hex", + "upper", + "wspace", + "khmr", + "khmer", + "oldsoutharabian", + "spacingmark", + "punctuation", + "uppercase", + "meeteimayek", + "uppercaseletter", + "cypriot", + "buhd", + "buhid", + "modifiersymbol", + "blank", + "hyphen", + "bugi", + "myanmar", + "imperialaramaic", + "mathsymbol", + "cwu", + "gurmukhi", + "buginese", + "symbol", + "osmanya", + "bamu", + "sk", + "bamum", + "kayahli", + "egyp", + "grlink", + "xsux", + "any", + "graphemelink", + "mlym", + "mymr", + "hebrew", + "egyptianhieroglyphs", + "zyyy" +#endif /* USE_UNICODE_PROPERTIES */ + }; +#define uniname2ctype_pool ((const char *) &uniname2ctype_pool_contents) +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct uniname2ctype_struct * +uniname2ctype_p (str, len) + register const char *str; + register unsigned int len; +{ + static const struct uniname2ctype_struct wordlist[] = + { +#ifdef USE_UNICODE_PROPERTIES + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str3, 17}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str5, 18}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str7, 21}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str9, 23}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str10, 26}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str12, 36}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str13, 33}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str14, 24}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str15, 20}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str16, 35}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str18, 93}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str19, 93}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str20, 51}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str21, 50}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str22, 18}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str24, 127}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str25, 59}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str28, 150}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str33, 175}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str34, 113}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str40, 79}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str41, 147}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str43, 86}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str44, 147}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str46, 3}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str49, 38}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str50, 179}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str51, 74}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str52, 74}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str56, 110}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str65, 115}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str66, 27}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str68, 108}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str70, 115}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str71, 100}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str73, 97}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str74, 108}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str76, 181}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str77, 113}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str83, 119}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str85, 190}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str86, 105}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str91, 119}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str92, 51}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str94, 81}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str97, 105}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str98, 92}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str100, 42}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str101, 17}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str103, 81}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str107, 99}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str111, 26}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str113, 36}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str114, 23}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str116, 46}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str118, 49}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str123, 151}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str128, 181}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str129, 98}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str130, 100}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str135, 27}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str136, 14}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str138, 35}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str139, 175}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str141, 179}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str144, 66}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str146, 192}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str147, 91}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str148, 171}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str151, 110}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str152, 69}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str153, 192}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str155, 187}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str156, 177}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str161, 191}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str162, 163}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str167, 177}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str168, 132}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str171, 185}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str179, 91}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str183, 176}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str185, 122}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str186, 19}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str192, 161}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str196, 142}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str197, 142}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str202, 130}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str203, 19}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str205, 49}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str207, 176}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str211, 113}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str212, 129}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str218, 66}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str220, 174}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str224, 162}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str225, 45}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str226, 111}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str227, 22}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str229, 152}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str235, 151}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str236, 53}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str243, 34}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str244, 117}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str245, 167}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str248, 96}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str249, 97}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str251, 96}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str253, 58}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str256, 167}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str257, 191}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str260, 194}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str262, 5}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str263, 60}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str264, 34}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str268, 111}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str269, 38}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str271, 43}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str272, 182}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str276, 127}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str277, 70}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str282, 190}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str285, 184}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str288, 1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str289, 42}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str292, 112}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str297, 168}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str298, 187}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str302, 127}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str306, 65}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str307, 40}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str309, 62}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str312, 140}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str317, 152}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str318, 124}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str319, 102}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str320, 196}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str321, 30}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str326, 130}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str327, 140}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str328, 161}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str329, 7}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str330, 162}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str331, 32}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str332, 183}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str334, 128}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str335, 102}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str337, 71}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str338, 73}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str339, 13}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str340, 136}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str341, 82}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str342, 63}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str343, 40}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str344, 104}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str352, 77}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str353, 112}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str354, 189}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str357, 159}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str358, 82}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str360, 4}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str361, 59}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str362, 87}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str363, 87}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str365, 88}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str366, 65}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str367, 77}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str370, 189}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str372, 21}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str373, 16}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str375, 149}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str377, 172}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str378, 43}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str379, 150}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str381, 69}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str387, 129}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str388, 98}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str390, 194}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str395, 31}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str396, 60}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str400, 158}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str404, 54}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str407, 171}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str408, 157}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str410, 114}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str413, 6}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str414, 195}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str417, 41}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str418, 114}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str419, 195}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str420, 155}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str426, 180}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str427, 9}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str429, 70}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str430, 143}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str435, 186}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str438, 56}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str440, 145}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str442, 123}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str444, 120}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str445, 134}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str448, 62}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str449, 50}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str450, 172}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str451, 20}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str454, 24}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str455, 79}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str458, 154}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str459, 125}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str460, 139}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str462, 154}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str463, 174}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str465, 120}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str467, 41}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str469, 178}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str474, 141}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str476, 138}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str477, 138}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str479, 0}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str480, 94}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str481, 157}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str485, 83}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str486, 128}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str487, 106}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str488, 141}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str489, 144}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str490, 63}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str491, 44}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str493, 125}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str494, 94}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str502, 193}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str504, 121}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str505, 32}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str508, 137}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str509, 164}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str512, 68}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str514, 134}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str515, 104}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str516, 101}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str520, 25}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str521, 121}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str522, 106}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str523, 39}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str525, 78}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str531, 37}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str533, 163}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str536, 12}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str537, 143}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str538, 133}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str539, 139}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str542, 52}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str548, 61}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str552, 67}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str553, 122}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str554, 164}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str556, 101}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str558, 53}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str559, 64}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str560, 160}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str562, 68}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str564, 166}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str570, 188}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str573, 11}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str575, 166}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str584, 28}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str587, 39}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str590, 178}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str593, 184}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str594, 99}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str595, 8}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str597, 137}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str598, 180}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str599, 146}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str601, 165}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str602, 146}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str603, 109}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str606, 76}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str608, 89}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str610, 86}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str611, 76}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str616, 109}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str618, 89}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str623, 67}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str626, 118}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str635, 29}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str638, 186}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str639, 52}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str640, 25}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str645, 44}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str646, 117}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str649, 133}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str651, 31}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str652, 22}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str653, 83}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str654, 33}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str656, 88}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str660, 145}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str661, 90}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str674, 183}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str676, 173}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str677, 132}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str689, 136}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str692, 107}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str698, 107}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str699, 188}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str701, 131}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str705, 71}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str706, 55}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str707, 64}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str708, 196}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str709, 75}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str710, 75}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str714, 80}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str717, 80}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str722, 135}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str723, 85}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str726, 148}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str727, 85}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str728, 131}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str729, 148}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str748, 48}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str751, 46}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str755, 118}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str758, 182}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str786, 84}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str797, 170}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str809, 29}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str820, 170}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str823, 173}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str842, 10}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str843, 165}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str848, 103}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str849, 103}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str863, 160}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str864, 30}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str866, 37}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str867, 57}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str878, 158}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str883, 28}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str884, 124}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str886, 116}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str887, 116}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str895, 47}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str898, 2}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str908, 169}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str909, 126}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str935, 95}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str937, 159}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str947, 48}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str966, 61}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str975, 84}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str978, 126}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, +#endif /* USE_UNICODE_PROPERTIES */ + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, +#ifndef USE_UNICODE_PROPERTIES + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str7, 7}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str8, 8}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str9, 1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str10, 13}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str11, 11}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str12, 0}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str13, 10}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str14, 14}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str15, 3}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str16, 9}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str17, 12}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str18, 6}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str19, 5}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str20, 4}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str21, 2} +#else /* USE_UNICODE_PROPERTIES */ + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1022, 45}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1024, 123}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1036, 156}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1040, 47}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1065, 156}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1071, 144}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1072, 153}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1140, 72}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1141, 135}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1174, 15}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1187, 72}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1221, 90}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1235, 95}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1277, 78}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1542, 153}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1751, 73} +#endif /* USE_UNICODE_PROPERTIES */ + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = uniname2ctype_hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register int o = wordlist[key].name; + if (o >= 0) + { + register const char *s = o + uniname2ctype_pool; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + } + return 0; +} + +static int +uniname2ctype(const UChar *name, unsigned int len) +{ + const struct uniname2ctype_struct *p = uniname2ctype_p((const char *)name, len); + if (p) return p->ctype; + return -1; +} diff --git a/src/node.h b/src/node.h new file mode 100644 index 0000000000..a6e3d58fe7 --- /dev/null +++ b/src/node.h @@ -0,0 +1,125 @@ +enum node_type { + NODE_METHOD, + NODE_FBODY, + NODE_CFUNC, + NODE_SCOPE, + NODE_BLOCK, + NODE_IF, + NODE_CASE, + NODE_WHEN, + NODE_OPT_N, + NODE_WHILE, + NODE_UNTIL, + NODE_ITER, + NODE_FOR, + NODE_BREAK, + NODE_NEXT, + NODE_REDO, + NODE_RETRY, + NODE_BEGIN, + NODE_RESCUE, + NODE_ENSURE, + NODE_AND, + NODE_OR, + NODE_NOT, + NODE_MASGN, + NODE_ASGN, + NODE_CDECL, + NODE_CVASGN, + NODE_CVDECL, + NODE_OP_ASGN, + NODE_CALL, + NODE_FCALL, + NODE_VCALL, + NODE_SUPER, + NODE_ZSUPER, + NODE_ARRAY, + NODE_ZARRAY, + NODE_HASH, + NODE_RETURN, + NODE_YIELD, + NODE_LVAR, + NODE_DVAR, + NODE_GVAR, + NODE_IVAR, + NODE_CONST, + NODE_CVAR, + NODE_NTH_REF, + NODE_BACK_REF, + NODE_MATCH, + NODE_MATCH2, + NODE_MATCH3, + NODE_INT, + NODE_FLOAT, + NODE_NEGATE, + NODE_LAMBDA, + NODE_SYM, + NODE_STR, + NODE_DSTR, + NODE_DREGX, + NODE_DREGX_ONCE, + NODE_LIST, + NODE_ARG, + NODE_ARGSCAT, + NODE_ARGSPUSH, + NODE_SPLAT, + NODE_TO_ARY, + NODE_SVALUE, + NODE_BLOCK_ARG, + NODE_DEF, + NODE_SDEF, + NODE_ALIAS, + NODE_UNDEF, + NODE_CLASS, + NODE_MODULE, + NODE_SCLASS, + NODE_COLON2, + NODE_COLON3, + NODE_CREF, + NODE_DOT2, + NODE_DOT3, + NODE_FLIP2, + NODE_FLIP3, + NODE_ATTRSET, + NODE_SELF, + NODE_NIL, + NODE_TRUE, + NODE_FALSE, + NODE_DEFINED, + NODE_NEWLINE, + NODE_POSTEXE, + NODE_ALLOCA, + NODE_DMETHOD, + NODE_BMETHOD, + NODE_MEMO, + NODE_IFUNC, + NODE_DSYM, + NODE_ATTRASGN, + NODE_LAST +}; + +typedef struct RNode { + unsigned long flags; + char *nd_file; + union { + struct RNode *node; + mrb_sym id; + mrb_value value; + //mrb_value (*cfunc)((ARGS_ANY())); + mrb_sym *tbl; + } u1; + union { + struct RNode *node; + mrb_sym id; + long argc; + mrb_value value; + } u2; + union { + struct RNode *node; + mrb_sym id; + long state; + struct global_entry *entry; + long cnt; + mrb_value value; + } u3; +} NODE; diff --git a/src/numeric.c b/src/numeric.c new file mode 100644 index 0000000000..f79369d907 --- /dev/null +++ b/src/numeric.c @@ -0,0 +1,2018 @@ +/********************************************************************** + + numeric.c - + + $Author: yugui $ + created at: Fri Aug 13 18:33:09 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "mruby.h" +#include "mruby/numeric.h" +#include "mruby/string.h" +#include "mruby/array.h" +#include +#include "mruby/class.h" +#include "variable.h" + +#include +#include +#include + +#ifdef INCLUDE_REGEXP +#include "encoding.h" +#endif + +#if defined(__FreeBSD__) && __FreeBSD__ < 4 +#include +#endif + +#ifdef HAVE_FLOAT_H +#include +#endif + +#ifdef HAVE_IEEEFP_H +#include +#endif + +#ifndef mrb_usascii_str_new2 + #ifdef INCLUDE_REGEXP + #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr + #else + #define mrb_usascii_str_new2 mrb_str_new_cstr + #endif +#endif +#ifndef mrb_usascii_str_new2 + #ifdef INCLUDE_REGEXP + #else + #define mrb_usascii_str_new mrb_str_new + #endif +#endif + +/* use IEEE 64bit values if not defined */ +#ifndef FLT_RADIX +#define FLT_RADIX 2 +#endif +#ifndef FLT_ROUNDS +#define FLT_ROUNDS 1 +#endif +#ifndef DBL_MIN +#define DBL_MIN 2.2250738585072014e-308 +#endif +#ifndef DBL_MAX +#define DBL_MAX 1.7976931348623157e+308 +#endif +#ifndef DBL_MIN_EXP +#define DBL_MIN_EXP (-1021) +#endif +#ifndef DBL_MAX_EXP +#define DBL_MAX_EXP 1024 +#endif +#ifndef DBL_MIN_10_EXP +#define DBL_MIN_10_EXP (-307) +#endif +#ifndef DBL_MAX_10_EXP +#define DBL_MAX_10_EXP 308 +#endif +#ifndef DBL_DIG +#define DBL_DIG 15 +#endif +#ifndef DBL_MANT_DIG +#define DBL_MANT_DIG 53 +#endif +#ifndef DBL_EPSILON +#define DBL_EPSILON 2.2204460492503131e-16 +#endif + +#define mrb_rational_raw1(x) mrb_rational_raw(x, INT2FIX(1)) + +#if SIZEOF_LONG_LONG > 0 +# define LONG_LONG long long +#elif SIZEOF___INT64 > 0 +# define HAVE_LONG_LONG 1 +# define LONG_LONG __int64 +# undef SIZEOF_LONG_LONG +# define SIZEOF_LONG_LONG SIZEOF___INT64 +#endif + +#if defined HAVE_UINTPTR_T && 0 +typedef uintptr_t VALUE; +typedef uintptr_t ID; +# define SIGNED_VALUE intptr_t +# define SIZEOF_VALUE SIZEOF_UINTPTR_T +#elif SIZEOF_LONG == SIZEOF_VOIDP +//typedef unsigned long VALUE; +//typedef unsigned long ID; +# define SIGNED_VALUE long long +# define SIZEOF_VALUE SIZEOF_LONG +#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP +typedef unsigned LONG_LONG VALUE; +typedef unsigned LONG_LONG ID; +# define SIGNED_VALUE LONG_LONG +# define LONG_LONG_VALUE 1 +# define SIZEOF_VALUE SIZEOF_LONG_LONG +#else +# error ---->> ruby requires sizeof(void*) == sizeof(long) to be compiled. <<---- +#endif + +#ifdef HAVE_INFINITY +#elif BYTE_ORDER == LITTLE_ENDIAN +const unsigned char mrb_infinity[] = "\x00\x00\x80\x7f"; +#else +const unsigned char mrb_infinity[] = "\x7f\x80\x00\x00"; +#endif + +#ifdef HAVE_NAN +#elif BYTE_ORDER == LITTLE_ENDIAN +const unsigned char mrb_nan[] = "\x00\x00\xc0\x7f"; +#else +const unsigned char mrb_nan[] = "\x7f\xc0\x00\x00"; +#endif + +extern double round(double); + +#ifndef HAVE_ROUND +double +round(double x) +{ + double f; + + if (x > 0.0) { + f = floor(x); + x = f + (x - f >= 0.5); + } + else if (x < 0.0) { + f = ceil(x); + x = f - (f - x >= 0.5); + } + return x; +} +#endif + + + + +void mrb_cmperr(mrb_state *mrb, mrb_value x, mrb_value y); + +void +mrb_num_zerodiv(mrb_state *mrb) +{ + mrb_raise(mrb, E_ZERODIVISION_ERROR, "divided by 0"); +} + + +/* + * call-seq: + * num.coerce(numeric) -> array + * + * If aNumeric is the same type as num, returns an array + * containing aNumeric and num. Otherwise, returns an + * array with both aNumeric and num represented as + * Float objects. This coercion mechanism is used by + * Ruby to handle mixed-type numeric operations: it is intended to + * find a compatible common type between the two operands of the operator. + * + * 1.coerce(2.5) #=> [2.5, 1.0] + * 1.2.coerce(3) #=> [3.0, 1.2] + * 1.coerce(2) #=> [2, 1] + */ + +static mrb_value +num_coerce(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + + //if (CLASS_OF(x) == CLASS_OF(y)) + if (mrb_class(mrb, x) == mrb_class(mrb, y)) + return mrb_assoc_new(mrb, y, x); + x = mrb_Float(mrb, x); + y = mrb_Float(mrb, y); + return mrb_assoc_new(mrb, y, x); +} + +static mrb_value +coerce_body(mrb_state *mrb, mrb_value *x) +{ + return mrb_funcall(mrb, x[1], "coerce", 1, x[0]); +} + +static mrb_value +coerce_rescue(mrb_state *mrb, mrb_value *x) +{ + volatile mrb_value v = mrb_inspect(mrb, x[1]); + + mrb_raise(mrb, E_TYPE_ERROR, "%s can't be coerced into %s", + mrb_special_const_p(x[1])? + RSTRING_PTR(v): + mrb_obj_classname(mrb, x[1]), + mrb_obj_classname(mrb, x[0])); + return mrb_nil_value(); /* dummy */ +} + +static int +do_coerce(mrb_state *mrb, mrb_value *x, mrb_value *y, int err) +{ + mrb_value ary; + mrb_value a[2]; + + a[0] = *x; a[1] = *y; + + ary = coerce_body(mrb, a); + if (mrb_type(ary) != MRB_TT_ARRAY || RARRAY_LEN(ary) != 2) { + if (err) { + mrb_raise(mrb, E_TYPE_ERROR, "coerce must return [x, y]"); + } + return FALSE; + } + + *x = RARRAY_PTR(ary)[0]; + *y = RARRAY_PTR(ary)[1]; + return TRUE; +} + +mrb_value +mrb_num_coerce_bin(mrb_state *mrb, mrb_value x, mrb_value y, char* func) +{ + do_coerce(mrb, &x, &y, TRUE); + return mrb_funcall(mrb, x, func, 1, y); +} + +mrb_value +mrb_num_coerce_cmp(mrb_state *mrb, mrb_value x, mrb_value y, char* func) +{ + if (do_coerce(mrb, &x, &y, FALSE)) + return mrb_funcall(mrb, x, func, 1, y); + return mrb_nil_value(); +} + +mrb_value +mrb_num_coerce_relop(mrb_state *mrb, mrb_value x, mrb_value y, char* func) +{ + mrb_value c, x0 = x, y0 = y; + + if (!do_coerce(mrb, &x, &y, FALSE) || + mrb_nil_p(c = mrb_funcall(mrb, x, func, 1, y))) { + mrb_cmperr(mrb, x0, y0); + return mrb_nil_value(); /* not reached */ + } + return c; +} + +/* + * call-seq: + * +num -> num + * + * Unary Plus---Returns the receiver's value. + */ + +static mrb_value +num_uplus(mrb_state *mrb, mrb_value num) +{ + return num; +} + +/* + * call-seq: + * -num -> numeric + * + * Unary Minus---Returns the receiver's value, negated. + */ + +static mrb_value +num_uminus(mrb_state *mrb, mrb_value num) +{ + mrb_value zero; + + zero = mrb_fixnum_value(0); + do_coerce(mrb, &zero, &num, TRUE); + + return mrb_funcall(mrb, zero, "-", 1, num); +} + +/* + * call-seq: + * num.quo(numeric) -> real + * + * Returns most exact division (rational for integers, float for floats). + */ +static mrb_value +num_quo(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + + mrb_get_args(mrb, "o", &y); + return mrb_funcall(mrb, mrb_float_value((double)mrb_fixnum(x)), "/", 1, y); +} + +/* + * call-seq: + * num.abs -> numeric + * num.magnitude -> numeric + * + * Returns the absolute value of num. + * + * 12.abs #=> 12 + * (-34.56).abs #=> 34.56 + * -34.56.abs #=> 34.56 + */ + +static mrb_value +num_abs(mrb_state *mrb, mrb_value num) +{ + if (mrb_test(mrb_funcall(mrb, num, "<", 1, mrb_fixnum_value(0)))) { + return mrb_funcall(mrb, num, "-@", 0); + } + return num; +} + +/******************************************************************** + * + * Document-class: Float + * + * Float objects represent inexact real numbers using + * the native architecture's double-precision floating point + * representation. + */ + +mrb_value +mrb_float_new(double d) +{ + //NEWOBJ(flt, struct RFloat); + //OBJSETUP(flt, mrb_cFloat, MRB_TT_FLOAT); + + //flt->float_value = d; + //return (mrb_value)flt; + return mrb_float_value(d); +} + +/* 15.2.9.3.16(x) */ +/* + * call-seq: + * flt.to_s -> string + * + * Returns a string containing a representation of self. As well as a + * fixed or exponential form of the number, the call may return + * ``NaN'', ``Infinity'', and + * ``-Infinity''. + */ + +static mrb_value +flo_to_s(mrb_state *mrb, mrb_value flt) +{ + char buf[32]; + double value = mrb_float(flt); + char *p, *e; + + if (isinf(value)) + return mrb_str_new2(mrb, value < 0 ? "-Infinity" : "Infinity"); + else if(isnan(value)) + return mrb_str_new2(mrb, "NaN"); + + sprintf(buf, "%#.15g", value); /* ensure to print decimal point */ + if (!(e = strchr(buf, 'e'))) { + e = buf + strlen(buf); + } + if (!ISDIGIT(e[-1])) { /* reformat if ended with decimal point (ex 111111111111111.) */ + sprintf(buf, "%#.14e", value); + if (!(e = strchr(buf, 'e'))) { + e = buf + strlen(buf); + } + } + p = e; + while (p[-1]=='0' && ISDIGIT(p[-2])) + p--; + memmove(p, e, strlen(e)+1); + return mrb_str_new2(mrb, buf); +} + +/* 15.2.9.3.2 */ +/* + * call-seq: + * float - other -> float + * + * Returns a new float which is the difference of float + * and other. + */ + +static mrb_value +flo_minus(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + + mrb_get_args(mrb, "o", &y); + + switch (mrb_type(y)) { + case MRB_TT_FIXNUM: + return mrb_float_value(mrb_float(x) - (double)mrb_fixnum(y)); + case MRB_TT_FLOAT: + return mrb_float_value(mrb_float(x) - mrb_float(y)); + default: + return mrb_num_coerce_bin(mrb, x, y, "-"); + } +} + +/* 15.2.9.3.3 */ +/* + * call-seq: + * float * other -> float + * + * Returns a new float which is the product of float + * and other. + */ + +static mrb_value +flo_mul(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + + mrb_get_args(mrb, "o", &y); + + switch (mrb_type(y)) { + case MRB_TT_FIXNUM: + return mrb_float_value(mrb_float(x) * (double)mrb_fixnum(y)); + case MRB_TT_FLOAT: + return mrb_float_value(mrb_float(x) * mrb_float(y)); + default: + return mrb_num_coerce_bin(mrb, x, y, "*"); + } +} + +/* 15.2.9.3.4 */ +/* + * call-seq: + * float / other -> float + * + * Returns a new float which is the result of dividing + * float by other. + */ + +static mrb_value +flo_div(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + long f_y; + //double d; + + mrb_get_args(mrb, "o", &y); + + switch (mrb_type(y)) { + case MRB_TT_FIXNUM: + f_y = mrb_fixnum(y); + return mrb_float_value(mrb_float(x) / (double)f_y); + case MRB_TT_FLOAT: + return mrb_float_value(mrb_float(x) / mrb_float(y)); + default: + return mrb_num_coerce_bin(mrb, x, y, "/"); + } +} + +/* + * call-seq: + * float.quo(numeric) -> float + * + * Returns float / numeric. + */ +static mrb_value +flo_quo(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + + mrb_get_args(mrb, "o", &y); + return mrb_funcall(mrb, x, "/", 1, y); +} + +static void +flodivmod(mrb_state *mrb, double x, double y, double *divp, double *modp) +{ + double div, mod; + + if (y == 0.0) mrb_num_zerodiv(mrb); +#ifdef HAVE_FMOD + mod = fmod(x, y); +#else + { + double z; + + modf(x/y, &z); + mod = x - z * y; + } +#endif + if (isinf(x) && !isinf(y) && !isnan(y)) + div = x; + else + div = (x - mod) / y; + if (y*mod < 0) { + mod += y; + div -= 1.0; + } + if (modp) *modp = mod; + if (divp) *divp = div; +} + +/* 15.2.9.3.5 */ +/* + * call-seq: + * flt % other -> float + * flt.modulo(other) -> float + * + * Return the modulo after division of flt by other. + * + * 6543.21.modulo(137) #=> 104.21 + * 6543.21.modulo(137.24) #=> 92.9299999999996 + */ + +static mrb_value +flo_mod(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + double fy, mod; + mrb_get_args(mrb, "o", &y); + + switch (mrb_type(y)) { + case MRB_TT_FIXNUM: + fy = (double)mrb_fixnum(y); + break; + case MRB_TT_FLOAT: + fy = mrb_float(y); + break; + default: + return mrb_num_coerce_bin(mrb, x, y, "%"); + } + flodivmod(mrb, mrb_float(x), fy, 0, &mod); + return mrb_float_value(mod); +} + +static mrb_value +dbl2ival(double d) +{ + if (FIXABLE(d)) { + d = round(d); + return mrb_fixnum_value((long)d); + } + return mrb_nil_value(); /* range over */ //mrb_dbl2big(d); +} + + +/* 15.2.8.3.16 */ +/* + * call-seq: + * num.eql?(numeric) -> true or false + * + * Returns true if num and numeric are the + * same type and have equal values. + * + * 1 == 1.0 #=> true + * 1.eql?(1.0) #=> false + * (1.0).eql?(1.0) #=> true + */ +static mrb_value +num_eql(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + if (mrb_type(x) != mrb_type(y)) return mrb_false_value(); + if (mrb_equal(mrb, x, y)) { + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +static mrb_value +num_equal(mrb_state *mrb, mrb_value x, mrb_value y) +{ + if (mrb_obj_equal(mrb, x, y)) return mrb_true_value(); + return mrb_funcall(mrb, y, "==", 1, x); +} + +/* 15.2.9.3.7 */ +/* + * call-seq: + * flt == obj -> true or false + * + * Returns true only if obj has the same value + * as flt. Contrast this with Float#eql?, which + * requires obj to be a Float. + * + * 1.0 == 1 #=> true + * + */ + +static mrb_value +flo_eq(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + volatile double a, b; + mrb_get_args(mrb, "o", &y); + + switch (mrb_type(y)) { + case MRB_TT_FIXNUM: + b = (double)mrb_fixnum(y); + break; + case MRB_TT_FLOAT: + b = mrb_float(y); +#if defined(_MSC_VER) && _MSC_VER < 1300 + if (isnan(b)) return mrb_false_value(); +#endif + break; + default: + return num_equal(mrb, x, y); + } + a = mrb_float(x); +#if defined(_MSC_VER) && _MSC_VER < 1300 + if (isnan(a)) return mrb_false_value(); +#endif + return (a == b)?mrb_true_value():mrb_false_value(); +} + +/* 15.2.8.3.18 */ +/* + * call-seq: + * flt.hash -> integer + * + * Returns a hash code for this float. + */ +static mrb_value +flo_hash(mrb_state *mrb, mrb_value num) +{ + double d; + char *c; + int i, hash; + + d = (double)mrb_fixnum(num); + if (d == 0) d = fabs(d); + c = (char*)&d; + for (hash=0, i=0; i b) return mrb_fixnum_value(1); + if (a < b) return mrb_fixnum_value(-1); + return mrb_nil_value(); +} + +/* 15.2.9.3.13 */ +/* + * call-seq: + * flt.to_f -> self + * + * As flt is already a float, returns +self+. + */ + +static mrb_value +flo_to_f(mrb_state *mrb, mrb_value num) +{ + return num; +} + +/* 15.2.9.3.11 */ +/* + * call-seq: + * flt.infinite? -> nil, -1, +1 + * + * Returns nil, -1, or +1 depending on whether flt + * is finite, -infinity, or +infinity. + * + * (0.0).infinite? #=> nil + * (-1.0/0.0).infinite? #=> -1 + * (+1.0/0.0).infinite? #=> 1 + */ + +static mrb_value +flo_is_infinite_p(mrb_state *mrb, mrb_value num) +{ + double value = mrb_float(num); + + if (isinf(value)) { + return mrb_fixnum_value( value < 0 ? -1 : 1 ); + } + + return mrb_nil_value(); +} + +/* 15.2.9.3.9 */ +/* + * call-seq: + * flt.finite? -> true or false + * + * Returns true if flt is a valid IEEE floating + * point number (it is not infinite, and nan? is + * false). + * + */ + +static mrb_value +flo_is_finite_p(mrb_state *mrb, mrb_value num) +{ + double value = mrb_float(num); + +#if HAVE_FINITE + if (!finite(value)) + return mrb_false_value(); +#else + if (isinf(value) || isnan(value)) + return mrb_false_value(); +#endif + + return mrb_true_value(); +} + +/* 15.2.9.3.10 */ +/* + * call-seq: + * flt.floor -> integer + * + * Returns the largest integer less than or equal to flt. + * + * 1.2.floor #=> 1 + * 2.0.floor #=> 2 + * (-1.2).floor #=> -2 + * (-2.0).floor #=> -2 + */ + +static mrb_value +flo_floor(mrb_state *mrb, mrb_value num) +{ + double f = floor(mrb_float(num)); + long val; + + if (!FIXABLE(f)) { + return mrb_dbl2big(mrb, f); + } + val = (long)f; + return mrb_fixnum_value(val); +} + +/* 15.2.9.3.8 */ +/* + * call-seq: + * flt.ceil -> integer + * + * Returns the smallest Integer greater than or equal to + * flt. + * + * 1.2.ceil #=> 2 + * 2.0.ceil #=> 2 + * (-1.2).ceil #=> -1 + * (-2.0).ceil #=> -2 + */ + +static mrb_value +flo_ceil(mrb_state *mrb, mrb_value num) +{ + double f = ceil(mrb_float(num)); + long val; + + if (!FIXABLE(f)) { + return mrb_dbl2big(mrb, f); + } + val = (long)f; + return mrb_fixnum_value(val); +} + +/* 15.2.9.3.12 */ +/* + * call-seq: + * flt.round([ndigits]) -> integer or float + * + * Rounds flt to a given precision in decimal digits (default 0 digits). + * Precision may be negative. Returns a floating point number when ndigits + * is more than zero. + * + * 1.4.round #=> 1 + * 1.5.round #=> 2 + * 1.6.round #=> 2 + * (-1.5).round #=> -2 + * + * 1.234567.round(2) #=> 1.23 + * 1.234567.round(3) #=> 1.235 + * 1.234567.round(4) #=> 1.2346 + * 1.234567.round(5) #=> 1.23457 + * + * 34567.89.round(-5) #=> 0 + * 34567.89.round(-4) #=> 30000 + * 34567.89.round(-3) #=> 35000 + * 34567.89.round(-2) #=> 34600 + * 34567.89.round(-1) #=> 34570 + * 34567.89.round(0) #=> 34568 + * 34567.89.round(1) #=> 34567.9 + * 34567.89.round(2) #=> 34567.89 + * 34567.89.round(3) #=> 34567.89 + * + */ + +static mrb_value +flo_round(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value num) +{ + mrb_value nd; + double number, f; + int ndigits = 0, i; + long val; + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc /*> 0 && mrb_scan_args(argc, argv, "01", &nd) */== 1) { + nd = argv[0]; + ndigits = mrb_fixnum(nd); + } + number = mrb_float(num); + f = 1.0; + i = abs(ndigits); + while (--i >= 0) + f = f*10.0; + + if (isinf(f)) { + if (ndigits < 0) number = 0; + } + else { + if (ndigits < 0) number /= f; + else number *= f; + number = round(number); + if (ndigits < 0) number *= f; + else number /= f; + } + + if (ndigits > 0) return mrb_float_value(number); + + if (!FIXABLE(number)) { + return mrb_dbl2big(mrb, number); + } + val = (long)number; + return mrb_fixnum_value(val); +} + +/* 15.2.9.3.14 */ +/* 15.2.9.3.15 */ +/* + * call-seq: + * flt.to_i -> integer + * flt.to_int -> integer + * flt.truncate -> integer + * + * Returns flt truncated to an Integer. + */ + +static mrb_value +flo_truncate(mrb_state *mrb, mrb_value num) +{ + double f = mrb_float(num); + long val; + + if (f > 0.0) f = floor(f); + if (f < 0.0) f = ceil(f); + + if (!FIXABLE(f)) { + return mrb_dbl2big(mrb, f); + } + val = (long)f; + return mrb_fixnum_value(val); +} + +/* 15.2.8.3.17 */ +/* + * call-seq: + * num.floor -> integer + * + * Returns the largest integer less than or equal to num. + * Numeric implements this by converting anInteger + * to a Float and invoking Float#floor. + * + * 1.floor #=> 1 + * (-1).floor #=> -1 + */ + +static mrb_value +num_floor(mrb_state *mrb, mrb_value num) +{ + return flo_floor(mrb, mrb_Float(mrb, num)); +} + +/* 15.2.8.3.20 */ +/* + * call-seq: + * num.round([ndigits]) -> integer or float + * + * Rounds num to a given precision in decimal digits (default 0 digits). + * Precision may be negative. Returns a floating point number when ndigits + * is more than zero. Numeric implements this by converting itself + * to a Float and invoking Float#round. + */ + +static mrb_value +num_round(mrb_state *mrb, /*int argc, mrb_value* argv,*/ mrb_value num) +{ + return flo_round(mrb, /*argc, argv,*/ mrb_Float(mrb, num)); +} + +SIGNED_VALUE +mrb_num2long(mrb_state *mrb, mrb_value val) +{ + again: + if (mrb_nil_p(val)) { + mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from nil to integer"); + } + + if (FIXNUM_P(val)) return mrb_fixnum(val); + + switch (mrb_type(val)) { + case MRB_TT_FLOAT: + if (mrb_float(val) <= (double)LONG_MAX + && mrb_float(val) >= (double)LONG_MIN) { + return (SIGNED_VALUE)(mrb_float(val)); + } + else { + char buf[24]; + char *s; + + snprintf(buf, sizeof(buf), "%-.10g", mrb_float(val)); + if ((s = strchr(buf, ' ')) != 0) *s = '\0'; + mrb_raise(mrb, E_RANGE_ERROR, "float %s out of range of integer", buf); + } + + default: + val = mrb_to_int(mrb, val); + goto again; + } +} + +mrb_value +mrb_num2ulong(mrb_state *mrb, mrb_value val) +{ + again: + if (mrb_nil_p(val)) { + mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from nil to integer"); + } + + if (FIXNUM_P(val)) return val; /* this is FIX2LONG, inteneded */ + + switch (mrb_type(val)) { + case MRB_TT_FLOAT: + if (mrb_float(val) <= (double)LONG_MAX + && mrb_float(val) >= (double)LONG_MIN) { + return mrb_fixnum_value(mrb_float(val)); + } + else { + char buf[24]; + char *s; + + snprintf(buf, sizeof(buf), "%-.10g", mrb_float(val)); + if ((s = strchr(buf, ' ')) != 0) *s = '\0'; + mrb_raise(mrb, E_RANGE_ERROR, "float %s out of range of integer", buf); + } + + default: + val = mrb_to_int(mrb, val); + goto again; + } +} + +#if SIZEOF_INT < SIZEOF_VALUE +void +mrb_out_of_int(mrb_state *mrb, SIGNED_VALUE num) +{ + mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIdVALUE " too %s to convert to `int'", + num, num < 0 ? "small" : "big"); +} + +static void +check_int(SIGNED_VALUE num) +{ + if ((SIGNED_VALUE)(int)num != num) { + mrb_out_of_int(num); + } +} + +static void +check_uint(mrb_state *mrb, mrb_value num, mrb_value sign) +{ + static const mrb_value mask = ~(mrb_value)UINT_MAX; + + if (RTEST(sign)) { + /* minus */ + if ((num & mask) != mask || (num & ~mask) <= INT_MAX + 1UL) + mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIdVALUE " too small to convert to `unsigned int'", num); + } + else { + /* plus */ + if ((num & mask) != 0) + mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIuVALUE " too big to convert to `unsigned int'", num); + } +} + +long +mrb_num2int(mrb_value val) +{ + long num = mrb_num2long(mrb, val); + + check_int(num); + return num; +} + +long +mrb_fix2int(mrb_state *mrb, mrb_value val) +{ + long num = FIXNUM_P(val)?mrb_fixnum(val):mrb_num2long(mrb, val); + + check_int(num); + return num; +} + +unsigned long +mrb_num2uint(mrb_value val) +{ + unsigned long num = mrb_num2ulong(val); + + check_uint(num, mrb_funcall(mrb, val, "<", 1, mrb_fixnum_value(0))); + return num; +} + +unsigned long +mrb_fix2uint(mrb_state *mrb, mrb_value val) +{ + unsigned long num; + + if (!FIXNUM_P(val)) { + return mrb_num2uint(mrb, val); + } + num = FIX2ULONG(val); + + check_uint(num, mrb_funcall(mrb, val, "<", 1, mrb_fixnum_value(0))); + return num; +} +#else +long +mrb_num2int(mrb_state *mrb, mrb_value val) +{ + return mrb_num2long(mrb, val); +} + +long +mrb_fix2int(mrb_value val) +{ + return mrb_fixnum(val); +} +#endif + +mrb_value +mrb_num2fix(mrb_state *mrb, mrb_value val) +{ + long v; + + if (FIXNUM_P(val)) return val; + + v = mrb_num2long(mrb, val); + if (!FIXABLE(v)) + mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIdVALUE " out of range of fixnum", v); + return mrb_fixnum_value(v); +} + +#if HAVE_LONG_LONG + +LONG_LONG +mrb_num2ll(mrb_state *mrb, mrb_value val) +{ + if (mrb_nil_p(val)) { + mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from nil"); + } + + if (FIXNUM_P(val)) return (LONG_LONG)mrb_fixnum(val); + + switch (mrb_type(val)) { + case MRB_TT_FLOAT: + if (mrb_float(val) <= (double)LLONG_MAX + && mrb_float(val) >= (double)LLONG_MIN) { + return (LONG_LONG)(mrb_float(val)); + } + else { + char buf[24]; + char *s; + + snprintf(buf, sizeof(buf), "%-.10g", mrb_float(val)); + if ((s = strchr(buf, ' ')) != 0) *s = '\0'; + mrb_raise(mrb, E_RANGE_ERROR, "float %s out of range of long long", buf); + } + + case MRB_TT_STRING: + mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from string"); + return mrb_nil_value(); /* not reached */ + + case MRB_TT_TRUE: + case MRB_TT_FALSE: + mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from boolean"); + return mrb_nil_value(); /* not reached */ + + default: + val = mrb_to_int(mrb, val); + return NUM2LL(val); + } +} + +unsigned LONG_LONG +mrb_num2ull(mrb_state *mrb, mrb_value val) +{ + return (unsigned LONG_LONG)mrb_num2ll(mrb, val); +} + +#endif /* HAVE_LONG_LONG */ + +/* + * Document-class: Integer + * + * Integer is the basis for the two concrete classes that + * hold whole numbers, Bignum and Fixnum. + * + */ + + +/* 15.2.8.3.14 */ +/* 15.2.8.3.24 */ +/* 15.2.8.3.26 */ +/* + * call-seq: + * int.to_i -> integer + * int.to_int -> integer + * int.floor -> integer + * int.ceil -> integer + * int.round -> integer + * int.truncate -> integer + * + * As int is already an Integer, all these + * methods simply return the receiver. + */ + +static mrb_value +int_to_i(mrb_state *mrb, mrb_value num) +{ + return num; +} + +/* 15.2.8.3.21 */ +/* + * call-seq: + * fixnum.next -> integer + * fixnum.succ -> integer + * + * Returns the Integer equal to int + 1. + * + * 1.next #=> 2 + * (-1).next #=> 0 + */ + +static mrb_value +fix_succ(mrb_state *mrb, mrb_value num) +{ + long i = mrb_fixnum(num) + 1; + return mrb_fixnum_value(i); +} + +/* 15.2.8.3.19 */ +/* + * call-seq: + * int.next -> integer + * int.succ -> integer + * + * Returns the Integer equal to int + 1. + * + * 1.next #=> 2 + * (-1).next #=> 0 + */ +static mrb_value +int_succ(mrb_state *mrb, mrb_value num) +{ + if (FIXNUM_P(num)) { + long i = mrb_fixnum(num) + 1; + return mrb_fixnum_value(i); + } + return mrb_funcall(mrb, num, "+", 1, mrb_fixnum_value(1)); +} + +mrb_value +rb_fix2str(mrb_state *mrb, mrb_value x, int base) +{ + extern const char ruby_digitmap[]; + char buf[SIZEOF_VALUE*CHAR_BIT + 2], *b = buf + sizeof buf; + long val = mrb_fixnum(x); + int neg = 0; + + if (base < 2 || 36 < base) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid radix %d", base); + } + if (val == 0) { + return mrb_usascii_str_new2(mrb, "0"); + } + if (val < 0) { + val = -val; + neg = 1; + } + *--b = '\0'; + do { + *--b = ruby_digitmap[(int)(val % base)]; + } while (val /= base); + if (neg) { + *--b = '-'; + } + + return mrb_usascii_str_new2(mrb, b); +} + +#define SQRT_LONG_MAX ((SIGNED_VALUE)1<<((SIZEOF_LONG*CHAR_BIT-1)/2)) +/*tests if N*N would overflow*/ +#define FIT_SQRT_LONG(n) (((n)=-SQRT_LONG_MAX)) + +/* 15.2.8.3.3 */ +/* + * call-seq: + * fix * numeric -> numeric_result + * + * Performs multiplication: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ + +static mrb_value +fix_mul(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + + if (FIXNUM_P(y)) { +#ifdef __HP_cc +/* avoids an optimization bug of HP aC++/ANSI C B3910B A.06.05 [Jul 25 2005] */ + volatile +#endif + long a, b; +#if SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG + LONG_LONG d; +#else + long c; + mrb_value r; +#endif + + a = mrb_fixnum(x); + b = mrb_fixnum(y); + +#if SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG + d = (LONG_LONG)a * b; + if (FIXABLE(d)) return mrb_fixnum_value(d); + return mrb_nil_value();// rb_ll2inum(d); +#else + if (FIT_SQRT_LONG(a) && FIT_SQRT_LONG(b)) + return mrb_fixnum_value(a*b); + c = a * b; + r = mrb_fixnum_value(c); + + if (a == 0) return x; + if (mrb_fixnum(r) != c || c/a != b) { + //r = mrb_big_mul(mrb_int2big(a), mrb_int2big(b)); + r = mrb_fixnum_value(a*b); + } + return r; +#endif + } + switch (mrb_type(y)) { + case MRB_TT_FLOAT: + return mrb_float_value((double)mrb_fixnum(x) * mrb_float(y)); + default: + return mrb_num_coerce_bin(mrb, x, y, "*"); + } +} + +static void +fixdivmod(mrb_state *mrb, long x, long y, long *divp, long *modp) +{ + long div, mod; + + if (y == 0) mrb_num_zerodiv(mrb); + if (y < 0) { + if (x < 0) + div = -x / -y; + else + div = - (x / -y); + } + else { + if (x < 0) + div = - (-x / y); + else + div = x / y; + } + mod = x - div*y; + if ((mod < 0 && y > 0) || (mod > 0 && y < 0)) { + mod += y; + div -= 1; + } + if (divp) *divp = div; + if (modp) *modp = mod; +} + +mrb_value rb_big_fdiv(mrb_value x, mrb_value y); + +//mrb_value mrb_rational_reciprocal(mrb_value x); + +static mrb_value +fix_divide(mrb_state *mrb, mrb_value x, mrb_value y, char* op) +{ + if (FIXNUM_P(y)) { + long div; + + fixdivmod(mrb, mrb_fixnum(x), mrb_fixnum(y), &div, 0); + return mrb_fixnum_value(div); + } + switch (mrb_type(y)) { + case MRB_TT_FLOAT: + { + double div; + + if (*op == '/') { + div = (double)mrb_fixnum(x) / mrb_float(y); + return mrb_float_value(div); + } + else { + if (mrb_float(y) == 0) mrb_num_zerodiv(mrb); + div = (double)mrb_fixnum(x) / mrb_float(y); + return mrb_dbl2big(mrb, floor(div)); + } + } + //case MRB_TT_RATIONAL: + // if (op == '/' && mrb_fixnum(x) == 1) + // return mrb_rational_reciprocal(y); + /* fall through */ + default: + return mrb_num_coerce_bin(mrb, x, y, op); + } +} + +/* 15.2.8.3.4 */ +/* + * call-seq: + * fix / numeric -> numeric_result + * + * Performs division: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ + +static mrb_value +fix_div(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + + return fix_divide(mrb, x, y, "/"); +} + +/* 15.2.8.3.5 */ +/* + * call-seq: + * fix % other -> real + * fix.modulo(other) -> real + * + * Returns fix modulo other. + * See numeric.divmod for more information. + */ + +static mrb_value +fix_mod(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + + if (FIXNUM_P(y)) { + long mod; + + fixdivmod(mrb, mrb_fixnum(x), mrb_fixnum(y), 0, &mod); + return mrb_fixnum_value(mod); + } + switch (mrb_type(y)) { + case MRB_TT_FLOAT: + { + double mod; + + flodivmod(mrb, (double)mrb_fixnum(x), mrb_float(y), 0, &mod); + return mrb_float_value(mod); + } + default: + return mrb_num_coerce_bin(mrb, x, y, "%"); + } +} + +/* + * call-seq: + * fix.divmod(numeric) -> array + * + * See Numeric#divmod. + */ +static mrb_value +fix_divmod(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + + if (FIXNUM_P(y)) { + long div, mod; + + fixdivmod(mrb, mrb_fixnum(x), mrb_fixnum(y), &div, &mod); + + return mrb_assoc_new(mrb, mrb_fixnum_value(div), mrb_fixnum_value(mod)); + } + switch (mrb_type(y)) { + case MRB_TT_FLOAT: + { + double div, mod; + volatile mrb_value a, b; + + flodivmod(mrb, (double)mrb_fixnum(x), mrb_float(y), &div, &mod); + a = dbl2ival(div); + b = mrb_float_value(mod); + return mrb_assoc_new(mrb, a, b); + } + default: + return mrb_num_coerce_bin(mrb, x, y, "divmod"); + } +} + +/* 15.2.8.3.7 */ +/* + * call-seq: + * fix == other -> true or false + * + * Return true if fix equals other + * numerically. + * + * 1 == 2 #=> false + * 1 == 1.0 #=> true + */ + +static mrb_value +fix_equal(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + + if (mrb_obj_equal(mrb, x, y)) return mrb_true_value(); + if (FIXNUM_P(y)) return mrb_false_value(); + switch (mrb_type(y)) { + case MRB_TT_FLOAT: + return (double)mrb_fixnum(x) == mrb_float(y) ? mrb_true_value() : mrb_false_value(); + default: + return num_equal(mrb, x, y); + } +} + +/* 15.2.8.3.8 */ +/* + * call-seq: + * ~fix -> integer + * + * One's complement: returns a number where each bit is flipped. + * ex.0---00001 (1)-> 1---11110 (-2) + * ex.0---00010 (2)-> 1---11101 (-3) + * ex.0---00100 (4)-> 1---11011 (-5) + */ + +static mrb_value +fix_rev(mrb_state *mrb, mrb_value num) +{ + long val = mrb_fixnum(num); + + val = ~val; + return mrb_fixnum_value(val); +} + +static mrb_value +bit_coerce(mrb_state *mrb, mrb_value x) +{ + while (!FIXNUM_P(x)) { + if (mrb_type(x) == MRB_TT_FLOAT) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer"); + } + x = mrb_to_int(mrb, x); + } + return x; +} + +/* 15.2.8.3.9 */ +/* + * call-seq: + * fix & integer -> integer_result + * + * Bitwise AND. + */ + +static mrb_value +fix_and(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + long val; + mrb_get_args(mrb, "o", &y); + + //if (!FIXNUM_P(y = bit_coerce(mrb, y))) { + // return mrb_big_and(y, x); + //} + if (mrb_type(y) == MRB_TT_FLOAT) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer"); + } + y = bit_coerce(mrb, y); + val = mrb_fixnum(x) & mrb_fixnum(y); + return mrb_fixnum_value(val); +} + +/* 15.2.8.3.10 */ +/* + * call-seq: + * fix | integer -> integer_result + * + * Bitwise OR. + */ + +static mrb_value +fix_or(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + long val; + mrb_get_args(mrb, "o", &y); + + //if (!FIXNUM_P(y = bit_coerce(mrb, y))) { + // return mrb_big_or(y, x); + //} + if (mrb_type(y) == MRB_TT_FLOAT) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer"); + } + y = bit_coerce(mrb, y); + val = mrb_fixnum(x) | mrb_fixnum(y); + return mrb_fixnum_value(val); +} + +/* 15.2.8.3.11 */ +/* + * call-seq: + * fix ^ integer -> integer_result + * + * Bitwise EXCLUSIVE OR. + */ + +static mrb_value +fix_xor(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + long val; + mrb_get_args(mrb, "o", &y); + + //if (!FIXNUM_P(y = bit_coerce(mrb, y))) { + // return mrb_big_xor(y, x); + //} + if (mrb_type(y) == MRB_TT_FLOAT) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer"); + } + y = bit_coerce(mrb, y); + val = mrb_fixnum(x) ^ mrb_fixnum(y); + return mrb_fixnum_value(val); +} + +static mrb_value fix_lshift(mrb_state *mrb, long, unsigned long); +static mrb_value fix_rshift(long, unsigned long); + +/* 15.2.8.3.12 */ +/* + * call-seq: + * fix << count -> integer + * + * Shifts _fix_ left _count_ positions (right if _count_ is negative). + */ + +static mrb_value +mrb_fix_lshift(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + long val, width; + mrb_get_args(mrb, "o", &y); + + val = mrb_fixnum(x); + //if (!FIXNUM_P(y)) + // return mrb_big_lshift(mrb_int2big(val), y); + if (mrb_type(y) == MRB_TT_FLOAT) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer"); + } + width = mrb_fixnum(y); + if (width < 0) + return fix_rshift(val, (unsigned long)-width); + return fix_lshift(mrb, val, width); +} + +static mrb_value +fix_lshift(mrb_state *mrb, long val, unsigned long width) +{ + if (width > (SIZEOF_LONG*CHAR_BIT-1) + || ((unsigned long)abs(val))>>(SIZEOF_LONG*CHAR_BIT-1-width) > 0) { + mrb_raise(mrb, E_RANGE_ERROR, "width(%d) > (SIZEOF_LONG*CHAR_BIT-1)", width); + } + val = val << width; + return mrb_fixnum_value(val); +} + +/* 15.2.8.3.13 */ +/* + * call-seq: + * fix >> count -> integer + * + * Shifts _fix_ right _count_ positions (left if _count_ is negative). + */ + +static mrb_value +mrb_fix_rshift(mrb_state *mrb, mrb_value x) +{ + mrb_value y; + long i, val; + mrb_get_args(mrb, "o", &y); + + val = mrb_fixnum(x); + //if (!FIXNUM_P(y)) + // return mrb_big_rshift(mrb_int2big(val), y); + i = mrb_fixnum(y); + if (i == 0) return x; + if (i < 0) + return fix_lshift(mrb, val, (unsigned long)-i); + return fix_rshift(val, i); +} + +static mrb_value +fix_rshift(long val, unsigned long i) +{ + if (i >= sizeof(long)*CHAR_BIT-1) { + if (val < 0) return mrb_fixnum_value(-1); + return mrb_fixnum_value(0); + } + val = RSHIFT(val, i); + return mrb_fixnum_value(val); +} + +/* 15.2.8.3.23 */ +/* + * call-seq: + * fix.to_f -> float + * + * Converts fix to a Float. + * + */ + +static mrb_value +fix_to_f(mrb_state *mrb, mrb_value num) +{ + double val; + + val = (double)mrb_fixnum(num); + + return mrb_float_value(val); +} + +/* + * Document-class: ZeroDivisionError + * + * Raised when attempting to divide an integer by 0. + * + * 42 / 0 + * + * raises the exception: + * + * ZeroDivisionError: divided by 0 + * + * Note that only division by an exact 0 will raise that exception: + * + * 42 / 0.0 #=> Float::INFINITY + * 42 / -0.0 #=> -Float::INFINITY + * 0 / 0.0 #=> NaN + */ + +/* + * Document-class: FloatDomainError + * + * Raised when attempting to convert special float values + * (in particular infinite or NaN) + * to numerical classes which don't support them. + * + * Float::INFINITY.to_r + * + * raises the exception: + * + * FloatDomainError: Infinity + */ +/* ------------------------------------------------------------------------*/ +static mrb_int +dbl2big(mrb_state *mrb, float d) +{ + //long i = 0; + //BDIGIT c; + //BDIGIT *digits; + mrb_int z; + //double u = (d < 0)?-d:d; + + if (isinf(d)) { + mrb_raise(mrb, E_FLOATDOMAIN_ERROR, d < 0 ? "-Infinity" : "Infinity"); + } + if (isnan(d)) { + mrb_raise(mrb, E_FLOATDOMAIN_ERROR, "NaN"); + } + z = (mrb_int)d; + return z; +} + +mrb_value +mrb_dbl2big(mrb_state *mrb, float d) +{ + return mrb_fixnum_value(dbl2big(mrb, d));//bignorm(dbl2big(d)); +} + +/* 15.2.8.3.1 */ +/* + * call-seq: + * fix + numeric -> numeric_result + * + * Performs addition: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ +static mrb_value +mrb_fixnum_plus(mrb_state *mrb, mrb_value self) +{ + mrb_int x, y; + + x = mrb_fixnum(self); + mrb_get_args(mrb, "i", &y); + + DEBUG(printf("%d + %d = %d\n", x, y, x+y)); + return mrb_fixnum_value(x + y); +} + +/* 15.2.8.3.2 */ +/* 15.2.8.3.16 */ +/* + * call-seq: + * fix - numeric -> numeric_result + * + * Performs subtraction: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ +static mrb_value +mrb_fixnum_minus(mrb_state *mrb, mrb_value self) +{ + mrb_int x, y; + + x = mrb_fixnum(self); + mrb_get_args(mrb, "i", &y); + + DEBUG(printf("%d - %d = %d\n", x, y, x-y)); + return mrb_fixnum_value(x - y); +} + +/* 15.2.8.3.6 */ +/* + * call-seq: + * self.i <=> other.i => -1, 0, +1 + * < => -1 + * = => 0 + * > => +1 + * Comparison---Returns -1, 0, or +1 depending on whether fix is + * less than, equal to, or greater than numeric. This is the + * basis for the tests in Comparable. + */ +static mrb_value +mrb_fixnum_cmp(mrb_state *mrb, mrb_value self) +{ + mrb_int x, y; + mrb_value vy; + + mrb_get_args(mrb, "o", &vy); + if (FIXNUM_P(vy)) { + x = mrb_fixnum(self); + y = mrb_fixnum(vy); + DEBUG(printf("%d <=> %d\n", x, y)); + if (x > y) + return mrb_fixnum_value(1); + else if (x < y) + return mrb_fixnum_value(-1); + else + return mrb_fixnum_value(0); + } + else { + return mrb_num_coerce_cmp(mrb, self, vy, "<=>"); + } + +} + +/* 15.2.8.3.29 (x) */ +/* + * call-seq: + * fix > other => true or false + * + * Returns true if the value of fix is + * greater than that of other. + */ + +mrb_value +mrb_fix2str(mrb_state *mrb, mrb_value x, int base) +{ + char buf[64], *b = buf + sizeof buf; + long val = mrb_fixnum(x); + int neg = 0; + + if (base < 2 || 36 < base) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid radix %d", base); + } + if (val == 0) { + return mrb_str_new2(mrb, "0"); + } + if (val < 0) { + val = -val; + neg = 1; + } + *--b = '\0'; + do { + *--b = ruby_digitmap[(int)(val % base)]; + } while (val /= base); + if (neg) { + *--b = '-'; + } + + return mrb_str_new2(mrb, b); +} + +mrb_value +mrb_fix_to_s(mrb_state *mrb, mrb_value self, int argc, mrb_value *argv) +{ + int base; + + if (argc == 0) base = 10; + else { + //mrb_value b; + + //mrb_scan_args(argc, argv, "01", &b); + base = mrb_fixnum(argv[0]); + } + + return mrb_fix2str(mrb, self, base); +} + +/* 15.2.8.3.25 */ +/* + * call-seq: + * fix.to_s(base=10) -> string + * + * Returns a string containing the representation of fix radix + * base (between 2 and 36). + * + * 12345.to_s #=> "12345" + * 12345.to_s(2) #=> "11000000111001" + * 12345.to_s(8) #=> "30071" + * 12345.to_s(10) #=> "12345" + * 12345.to_s(16) #=> "3039" + * 12345.to_s(36) #=> "9ix" + * + */ +static mrb_value +mrb_fixnum_to_s(mrb_state *mrb, mrb_value self) /* fix_to_s */ +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_fix_to_s(mrb, self, argc, argv); +} + +/* 15.2.9.3.6 */ +/* + * call-seq: + * self.f <=> other.f => -1, 0, +1 + * < => -1 + * = => 0 + * > => +1 + * Comparison---Returns -1, 0, or +1 depending on whether fix is + * less than, equal to, or greater than numeric. This is the + * basis for the tests in Comparable. + */ +static mrb_value +mrb_float_cmp(mrb_state *mrb, mrb_value self) +{ + mrb_value vy; + mrb_float x, y; + + x = mrb_float(self); + mrb_get_args(mrb, "o", &vy); + if (FIXNUM_P(vy)) { + y = (mrb_float)mrb_fixnum(vy); + } + else { + y = mrb_float(vy); + } + + DEBUG(printf("%f <=> %f\n", x, y)); + if (x > y) + return mrb_fixnum_value(1); + else { + if (x < y) + return mrb_fixnum_value(-1); + return mrb_fixnum_value(0); + } +} + +/* 15.2.9.3.1 */ +/* + * call-seq: + * float + other -> float + * + * Returns a new float which is the sum of float + * and other. + */ +static mrb_value +mrb_float_plus(mrb_state *mrb, mrb_value self) +{ + mrb_float x, y; + + x = mrb_float(self); + mrb_get_args(mrb, "f", &y); + + return mrb_float_value(x + y); +} +/* ------------------------------------------------------------------------*/ +void +mrb_init_numeric(mrb_state *mrb) +{ + struct RClass *numeric, *integer, *fixnum, *fl; + /* Numeric Class */ + numeric = mrb_define_class(mrb, "Numeric", mrb->object_class); + mrb_include_module(mrb, numeric, mrb_class_get(mrb, "Comparable")); + + mrb_define_method(mrb, numeric, "+@", num_uplus, ARGS_REQ(1)); /* 15.2.7.4.1 */ + mrb_define_method(mrb, numeric, "-@", num_uminus, ARGS_REQ(1)); /* 15.2.7.4.2 */ + mrb_define_method(mrb, numeric, "abs", num_abs, ARGS_NONE()); /* 15.2.7.4.3 */ + mrb_define_method(mrb, numeric, "coerce", num_coerce, ARGS_REQ(1)); /* 15.2.7.4.4 */ + mrb_define_method(mrb, numeric, "quo", num_quo, ARGS_REQ(1)); /* 15.2.7.4.5 (x) */ + + /* Integer Class */ + integer = mrb_define_class(mrb, "Integer", numeric); + fixnum = mrb->fixnum_class = mrb_define_class(mrb, "Fixnum", integer); + + mrb_define_method(mrb, fixnum, "+", mrb_fixnum_plus, ARGS_REQ(1)); /* 15.2.8.3.1 */ + mrb_define_method(mrb, fixnum, "-", mrb_fixnum_minus, ARGS_REQ(1)); /* 15.2.8.3.2 */ + mrb_define_method(mrb, fixnum, "*", fix_mul, ARGS_REQ(1)); /* 15.2.8.3.3 */ + mrb_define_method(mrb, fixnum, "/", fix_div, ARGS_REQ(1)); /* 15.2.8.3.4 */ + mrb_define_method(mrb, fixnum, "%", fix_mod, ARGS_REQ(1)); /* 15.2.8.3.5 */ + mrb_define_method(mrb, fixnum, "<=>", mrb_fixnum_cmp, ARGS_REQ(1)); /* 15.2.8.3.6 */ + mrb_define_method(mrb, fixnum, "==", fix_equal, ARGS_REQ(1)); /* 15.2.8.3.7 */ + mrb_define_method(mrb, fixnum, "~", fix_rev, ARGS_NONE()); /* 15.2.8.3.8 */ + mrb_define_method(mrb, fixnum, "&", fix_and, ARGS_REQ(1)); /* 15.2.8.3.9 */ + mrb_define_method(mrb, fixnum, "|", fix_or, ARGS_REQ(1)); /* 15.2.8.3.10 */ + mrb_define_method(mrb, fixnum, "^", fix_xor, ARGS_REQ(1)); /* 15.2.8.3.11 */ + mrb_define_method(mrb, fixnum, "<<", mrb_fix_lshift, ARGS_REQ(1)); /* 15.2.8.3.12 */ + mrb_define_method(mrb, fixnum, ">>", mrb_fix_rshift, ARGS_REQ(1)); /* 15.2.8.3.13 */ + mrb_define_method(mrb, fixnum, "ceil", int_to_i, ARGS_NONE()); /* 15.2.8.3.14 */ + mrb_define_method(mrb, fixnum, "eql?", num_eql, ARGS_REQ(1)); /* 15.2.8.3.16 */ + mrb_define_method(mrb, fixnum, "floor", num_floor, ARGS_NONE()); /* 15.2.8.3.17 */ + mrb_define_method(mrb, fixnum, "hash", flo_hash, ARGS_NONE()); /* 15.2.8.3.18 */ + mrb_define_method(mrb, fixnum, "next", int_succ, ARGS_NONE()); /* 15.2.8.3.19 */ + mrb_define_method(mrb, fixnum, "round", num_round, ARGS_ANY()); /* 15.2.8.3.20 */ + mrb_define_method(mrb, fixnum, "succ", fix_succ, ARGS_NONE()); /* 15.2.8.3.21 */ + mrb_define_method(mrb, fixnum, "to_f", fix_to_f, ARGS_NONE()); /* 15.2.8.3.23 */ + mrb_define_method(mrb, fixnum, "to_i", int_to_i, ARGS_NONE()); /* 15.2.8.3.24 */ + mrb_define_method(mrb, fixnum, "to_s", mrb_fixnum_to_s, ARGS_NONE()); /* 15.2.8.3.25 */ + mrb_define_method(mrb, fixnum, "truncate", int_to_i, ARGS_NONE()); /* 15.2.8.3.26 */ + //mrb_define_method(mrb, fixnum, "<", mrb_fixnum_lt, ARGS_REQ(1)); /* 15.2.8.3.28 (x) */ + //mrb_define_method(mrb, fixnum, ">", mrb_fixnum_gt, ARGS_REQ(1)); /* 15.2.8.3.29 (x) */ + mrb_define_method(mrb, fixnum, "divmod", fix_divmod, ARGS_REQ(1)); /* 15.2.8.3.30 (x) */ + + /* Float Class */ + fl = mrb->float_class = mrb_define_class(mrb, "Float", numeric); + mrb_define_method(mrb, fl, "+", mrb_float_plus, ARGS_REQ(1)); /* 15.2.9.3.1 */ + mrb_define_method(mrb, fl, "-", flo_minus, ARGS_REQ(1)); /* 15.2.9.3.2 */ + mrb_define_method(mrb, fl, "*", flo_mul, ARGS_REQ(1)); /* 15.2.9.3.3 */ + mrb_define_method(mrb, fl, "/", flo_div, ARGS_REQ(1)); /* 15.2.9.3.4 */ + mrb_define_method(mrb, fl, "%", flo_mod, ARGS_REQ(1)); /* 15.2.9.3.5 */ + mrb_define_method(mrb, fl, "<=>", mrb_float_cmp, ARGS_REQ(1)); /* 15.2.9.3.6 */ + mrb_define_method(mrb, fl, "==", flo_eq, ARGS_REQ(1)); /* 15.2.9.3.7 */ + mrb_define_method(mrb, fl, "ceil", flo_ceil, ARGS_NONE()); /* 15.2.9.3.8 */ + mrb_define_method(mrb, fl, "finite?", flo_is_finite_p, ARGS_NONE()); /* 15.2.9.3.9 */ + mrb_define_method(mrb, fl, "floor", flo_floor, ARGS_NONE()); /* 15.2.9.3.10 */ + mrb_define_method(mrb, fl, "infinite?", flo_is_infinite_p,ARGS_NONE()); /* 15.2.9.3.11 */ + mrb_define_method(mrb, fl, "round", flo_round, ARGS_ANY()); /* 15.2.9.3.12 */ + mrb_define_method(mrb, fl, "to_f", flo_to_f, ARGS_NONE()); /* 15.2.9.3.13 */ + mrb_define_method(mrb, fl, "to_i", flo_truncate, ARGS_NONE()); /* 15.2.9.3.14 */ + mrb_define_method(mrb, fl, "truncate", flo_truncate, ARGS_NONE()); /* 15.2.9.3.15 */ + + mrb_define_method(mrb, fl, "to_s", flo_to_s, ARGS_NONE()); /* 15.2.9.3.16(x) */ + //mrb_define_method(mrb, fl, "<", flo_lt, ARGS_REQ(1)); /* 15.2.9.3.17(x) */ + //mrb_define_method(mrb, fl, ">", flo_gt, ARGS_REQ(1)); /* 15.2.9.3.18(x) */ + mrb_define_method(mrb, fl, "quo", flo_quo, ARGS_REQ(1)); /* 15.2.9.3.19(x) */ +} diff --git a/src/object.c b/src/object.c new file mode 100644 index 0000000000..c60c2fb7bf --- /dev/null +++ b/src/object.c @@ -0,0 +1,632 @@ +#include "mruby.h" +#include +#include "mruby/string.h" +#include +#include "mruby/class.h" +#include "method.h" +#include "mruby/numeric.h" +#include "mdata.h" + +#ifdef INCLUDE_REGEXP + #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr +#else + #define mrb_usascii_str_new2 mrb_str_new_cstr + #define mrb_usascii_str_new mrb_str_new +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +int +mrb_obj_eq(mrb_state *mrb, mrb_value v1, mrb_value v2) +{ + if (v1.tt != v2.tt) return 0; + switch (v1.tt) { + case MRB_TT_TRUE: + return 1; + + case MRB_TT_FALSE: + case MRB_TT_FIXNUM: + return (v1.value.i == v2.value.i); + case MRB_TT_SYMBOL: + return (v1.value.sym == v2.value.sym); + + case MRB_TT_FLOAT: + return (v1.value.f == v2.value.f); + + default: + return (v1.value.p == v2.value.p); + } +} + +int +mrb_obj_equal(mrb_state *mrb, mrb_value v1, mrb_value v2) +{ + /* temporary definition */ + return mrb_obj_eq(mrb, v1, v2); +} + +int +mrb_equal(mrb_state *mrb, mrb_value obj1, mrb_value obj2) +{ + mrb_value result; + + if (mrb_obj_eq(mrb, obj1, obj2)) return TRUE; + result = mrb_funcall(mrb, obj1, "==", 1, obj2); + if (mrb_nil_p(result)) { + return FALSE; + } + else { + if (mrb_type(result) == MRB_TT_TRUE) { + return TRUE; + } + else { + return FALSE; + } + } +} + +/* + * Document-class: NilClass + * + * The class of the singleton object nil. + */ + +/* 15.2.4.3.4 */ +/* + * call_seq: + * nil.nil? -> true + * + * Only the object nil responds true to nil?. + */ + +static mrb_value +mrb_true(mrb_state *mrb, mrb_value obj) +{ + return mrb_true_value(); +} + +/* 15.2.4.3.5 */ +/* + * call-seq: + * nil.to_s -> "" + * + * Always returns the empty string. + */ + +static mrb_value +nil_to_s(mrb_state *mrb, mrb_value obj) +{ + return mrb_usascii_str_new(mrb, 0, 0); +} + +/*********************************************************************** + * Document-class: TrueClass + * + * The global value true is the only instance of class + * TrueClass and represents a logically true value in + * boolean expressions. The class provides operators allowing + * true to be used in logical expressions. + */ + +/* 15.2.5.3.1 */ +/* + * call-seq: + * true & obj -> true or false + * + * And---Returns false if obj is + * nil or false, true otherwise. + */ + +static mrb_value +true_and(mrb_state *mrb, mrb_value obj) +{ + mrb_value obj2; + + mrb_get_args(mrb, "o", &obj2); + return mrb_test(obj2)?mrb_true_value():mrb_false_value(); +} + +/* 15.2.5.3.2 */ +/* + * call-seq: + * true ^ obj -> !obj + * + * Exclusive Or---Returns true if obj is + * nil or false, false + * otherwise. + */ + +static mrb_value +true_xor(mrb_state *mrb, mrb_value obj) +{ + mrb_value obj2; + + mrb_get_args(mrb, "o", &obj2); + return mrb_test(obj2)?mrb_false_value():mrb_true_value(); +} + +/* 15.2.5.3.3 */ +/* + * call-seq: + * true.to_s -> "true" + * + * The string representation of true is "true". + */ + +static mrb_value +true_to_s(mrb_state *mrb, mrb_value obj) +{ + return mrb_usascii_str_new2(mrb, "true"); +} + +/* 15.2.5.3.4 */ +/* + * call-seq: + * true | obj -> true + * + * Or---Returns true. As anObject is an argument to + * a method call, it is always evaluated; there is no short-circuit + * evaluation in this case. + * + * true | puts("or") + * true || puts("logical or") + * + * produces: + * + * or + */ + +static mrb_value +true_or(mrb_state *mrb, mrb_value obj) +{ + mrb_value obj2; + + mrb_get_args(mrb, "o", &obj2); + return mrb_true_value(); +} + +/* + * Document-class: FalseClass + * + * The global value false is the only instance of class + * FalseClass and represents a logically false value in + * boolean expressions. The class provides operators allowing + * false to participate correctly in logical expressions. + * + */ + +/* 15.2.4.3.1 */ +/* 15.2.6.3.1 */ +/* + * call-seq: + * false & obj -> false + * nil & obj -> false + * + * And---Returns false. obj is always + * evaluated as it is the argument to a method call---there is no + * short-circuit evaluation in this case. + */ + +static mrb_value +false_and(mrb_state *mrb, mrb_value obj) +{ + mrb_value obj2; + + mrb_get_args(mrb, "o", &obj2); + return mrb_false_value(); +} + +/* 15.2.4.3.2 */ +/* 15.2.6.3.2 */ +/* + * call-seq: + * false ^ obj -> true or false + * nil ^ obj -> true or false + * + * Exclusive Or---If obj is nil or + * false, returns false; otherwise, returns + * true. + * + */ + +static mrb_value +false_xor(mrb_state *mrb, mrb_value obj) +{ + mrb_value obj2; + + mrb_get_args(mrb, "o", &obj2); + return mrb_test(obj2)?mrb_true_value():mrb_false_value(); +} + +/* 15.2.4.3.3 */ +/* 15.2.6.3.4 */ +/* + * call-seq: + * false | obj -> true or false + * nil | obj -> true or false + * + * Or---Returns false if obj is + * nil or false; true otherwise. + */ + +static mrb_value +false_or(mrb_state *mrb, mrb_value obj) +{ + mrb_value obj2; + + mrb_get_args(mrb, "o", &obj2); + return mrb_test(obj2)?mrb_true_value():mrb_false_value(); +} + +/* 15.2.6.3.3 */ +/* + * call-seq: + * false.to_s -> "false" + * + * 'nuf said... + */ + +static mrb_value +false_to_s(mrb_state *mrb, mrb_value obj) +{ + return mrb_usascii_str_new2(mrb, "false"); +} + +void +mrb_init_object(mrb_state *mrb) +{ + struct RClass *n; + struct RClass *t; + struct RClass *f; + + n = mrb->nil_class = mrb_define_class(mrb, "NilClass", mrb->object_class); + mrb_define_method(mrb, n, "&", false_and, ARGS_REQ(1)); /* 15.2.4.3.1 */ + mrb_define_method(mrb, n, "^", false_xor, ARGS_REQ(1)); /* 15.2.4.3.2 */ + mrb_define_method(mrb, n, "|", false_or, ARGS_REQ(1)); /* 15.2.4.3.3 */ + mrb_define_method(mrb, n, "nil?", mrb_true, ARGS_NONE()); /* 15.2.4.3.4 */ + mrb_define_method(mrb, n, "to_s", nil_to_s, ARGS_NONE()); /* 15.2.4.3.5 */ + + t = mrb->true_class = mrb_define_class(mrb, "TrueClass", mrb->object_class); + mrb_define_method(mrb, t, "&", true_and, ARGS_REQ(1)); /* 15.2.5.3.1 */ + mrb_define_method(mrb, t, "^", true_xor, ARGS_REQ(1)); /* 15.2.5.3.2 */ + mrb_define_method(mrb, t, "to_s", true_to_s, ARGS_NONE()); /* 15.2.5.3.3 */ + mrb_define_method(mrb, t, "|", true_or, ARGS_REQ(1)); /* 15.2.5.3.4 */ + + f = mrb->false_class = mrb_define_class(mrb, "FalseClass", mrb->object_class); + mrb_define_method(mrb, f, "&", false_and, ARGS_REQ(1)); /* 15.2.6.3.1 */ + mrb_define_method(mrb, f, "^", false_xor, ARGS_REQ(1)); /* 15.2.6.3.2 */ + mrb_define_method(mrb, f, "to_s", false_to_s, ARGS_NONE()); /* 15.2.6.3.3 */ + mrb_define_method(mrb, f, "|", false_or, ARGS_REQ(1)); /* 15.2.6.3.4 */ +} + +mrb_value +convert_type(mrb_state *mrb, mrb_value val, const char *tname, const char *method, int raise) +{ + mrb_sym m = 0; + + m = mrb_intern(mrb, method); + if (!mrb_respond_to(mrb, val, m)) { + if (raise) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s into %s", + mrb_nil_p(val) ? "nil" : + (mrb_type(val) == MRB_TT_TRUE) ? "true" : + (mrb_type(val) == MRB_TT_FALSE) ? "false" : + mrb_obj_classname(mrb, val), + tname); + return mrb_nil_value(); + } + else { + return mrb_nil_value(); + } + } + return mrb_funcall(mrb, val, method, 0); +} + +mrb_value +mrb_check_to_integer(mrb_state *mrb, mrb_value val, const char *method) +{ + mrb_value v; + + if (mrb_type(val) == MRB_TT_FIXNUM) return val; + v = convert_type(mrb, val, "Integer", method, FALSE); + if (mrb_nil_p(v)) return (v); + if (!mrb_obj_is_kind_of(mrb, v, mrb_obj_class(mrb, v))) { + return mrb_nil_value(); + } + return v; +} + +mrb_value +mrb_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method) +{ + mrb_value v; + + if (mrb_type(val) == type) return val; + v = convert_type(mrb, val, tname, method, 1/*Qtrue*/); + if (mrb_type(v) != type) { + mrb_raise(mrb, E_TYPE_ERROR, "%s#%s should return %s", + mrb_obj_classname(mrb, val), method, tname); + } + return v; +} + +mrb_value +mrb_check_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method) +{ + mrb_value v; + + /* always convert T_DATA */ + if (mrb_type(val) == type && type != MRB_TT_DATA) return val; + v = convert_type(mrb, val, tname, method, 0/*Qfalse*/); + if (mrb_nil_p(v)) return mrb_nil_value(); + if (mrb_type(v) != type) { + mrb_raise(mrb, E_TYPE_ERROR, "%s#%s should return %s", + mrb_obj_classname(mrb, val), method, tname); + } + return v; +} + +static const struct types { + unsigned char type; + const char *name; +} builtin_types[] = { +// {MRB_TT_NIL, "nil"}, + {MRB_TT_FALSE, "false"}, + {MRB_TT_TRUE, "true"}, + {MRB_TT_FIXNUM, "Fixnum"}, + {MRB_TT_SYMBOL, "Symbol"}, /* :symbol */ + {MRB_TT_MODULE, "Module"}, + {MRB_TT_OBJECT, "Object"}, + {MRB_TT_CLASS, "Class"}, + {MRB_TT_ICLASS, "iClass"}, /* internal use: mixed-in module holder */ + {MRB_TT_SCLASS, "SClass"}, + {MRB_TT_PROC, "Proc"}, + {MRB_TT_FLOAT, "Float"}, + {MRB_TT_ARRAY, "Array"}, + {MRB_TT_HASH, "Hash"}, + {MRB_TT_STRING, "String"}, + {MRB_TT_RANGE, "Range"}, + {MRB_TT_REGEX, "Regexp"}, + {MRB_TT_STRUCT, "Struct"}, +// {MRB_TT_BIGNUM, "Bignum"}, + {MRB_TT_FILE, "File"}, + {MRB_TT_DATA, "Data"}, /* internal use: wrapped C pointers */ + {MRB_TT_MATCH, "MatchData"}, /* data of $~ */ +// {MRB_TT_VARMAP, "Varmap"}, /* internal use: dynamic variables */ +// {MRB_TT_NODE, "Node"}, /* internal use: syntax tree node */ +// {MRB_TT_UNDEF, "undef"}, /* internal use: #undef; should not happen */ + {-1, 0} +}; + +void +mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t) +{ + const struct types *type = builtin_types; + struct RString *s; + int xt; + + /*if (x == Qundef) { + //mrb_bug("undef leaked to the Ruby space"); + printf ("undef leaked to the Ruby space\n"); + }*/ + + xt = mrb_type(x); + if ((xt != t) || (xt == MRB_TT_DATA)) { + while (type->type < MRB_TT_MAXDEFINE) { + if (type->type == t) { + const char *etype; + + if (mrb_nil_p(x)) { + etype = "nil"; + } + else if (mrb_type(x) == MRB_TT_FIXNUM) { + etype = "Fixnum"; + } + else if (mrb_type(x) == MRB_TT_SYMBOL) { + etype = "Symbol"; + } + else if (mrb_special_const_p(x)) { + s = mrb_str_ptr(mrb_obj_as_string(mrb, x)); + etype = s->buf; + } + else { + etype = mrb_obj_classname(mrb, x); + } + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected %s)", + etype, type->name); + } + type++; + } + /*mrb_bug("unknown type 0x%x", t);*/ + printf ("unknown type 0x%x (0x%x given)", t, mrb_type(x)); + } +} + +/* 15.3.1.3.46 */ +/* + * call-seq: + * obj.to_s => string + * + * Returns a string representing obj. The default + * to_s prints the object's class and an encoding of the + * object id. As a special case, the top-level object that is the + * initial execution context of Ruby programs returns ``main.'' + */ + +mrb_value +mrb_any_to_s(mrb_state *mrb, mrb_value obj) +{ + const char *cname = mrb_obj_classname(mrb, obj); + size_t len; + mrb_value str; + struct RString *s; + + len = strlen(cname)+6+16; + str = mrb_str_new(mrb, 0, len); /* 6:tags 16:addr */ + s = mrb_str_ptr(str); + // snprintf(RSTRING(str)->ptr, len+1, "#<%s:0x%lx>", cname, obj); + sprintf(s->buf, "#<%s:0x%lx>", cname, (unsigned long)(obj.value.p)); + s->len = strlen(s->buf); + /*if (OBJ_TAINTED(obj)) OBJ_TAINT(str);*/ + + return str; +} + +/* + * call-seq: + * obj.is_a?(class) => true or false + * obj.kind_of?(class) => true or false + * + * Returns true if class is the class of + * obj, or if class is one of the superclasses of + * obj or modules included in obj. + * + * module M; end + * class A + * include M + * end + * class B < A; end + * class C < B; end + * b = B.new + * b.instance_of? A #=> false + * b.instance_of? B #=> true + * b.instance_of? C #=> false + * b.instance_of? M #=> false + * b.kind_of? A #=> true + * b.kind_of? B #=> true + * b.kind_of? C #=> false + * b.kind_of? M #=> true + */ + +int +mrb_obj_is_kind_of(mrb_state *mrb, mrb_value obj, struct RClass *c) +{ + struct RClass *cl = mrb_class(mrb, obj); + + switch (c->tt) { + case MRB_TT_MODULE: + case MRB_TT_CLASS: + case MRB_TT_ICLASS: + break; + + default: + mrb_raise(mrb, E_TYPE_ERROR, "class or module required"); + } + + while (cl) { + if (cl == c || cl->mt == c->mt) + return 1/* TRUE */; + cl = cl->super; + } + return 0/* FALSE */; +} + +static mrb_value +mrb_to_integer(mrb_state *mrb, mrb_value val, const char *method) +{ + mrb_value v; + + if (FIXNUM_P(val)) return val; + //if (TYPE(val) == T_BIGNUM) return val; + v = convert_type(mrb, val, "Integer", method, TRUE); + if (!mrb_obj_is_kind_of(mrb, v, mrb->fixnum_class)) { + const char *cname = mrb_obj_classname(mrb, val); + mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s to Integer (%s#%s gives %s)", + cname, cname, method, mrb_obj_classname(mrb, v)); + } + return v; +} + +mrb_value +mrb_to_int(mrb_state *mrb, mrb_value val) +{ + return mrb_to_integer(mrb, val, "to_int"); +} + +static mrb_value +mrb_convert_to_integer(mrb_state *mrb, mrb_value val, int base) +{ + mrb_value tmp; + + if (mrb_nil_p(val)) { + if (base != 0) goto arg_error; + mrb_raise(mrb, E_TYPE_ERROR, "can't convert nil into Integer"); + } + switch (mrb_type(val)) { + case MRB_TT_FLOAT: + if (base != 0) goto arg_error; + if (mrb_float(val) <= (double)FIXNUM_MAX + && mrb_float(val) >= (double)FIXNUM_MIN) { + break; + } + return mrb_dbl2big(mrb, mrb_float(val)); + + case MRB_TT_FIXNUM: + if (base != 0) goto arg_error; + return val; + + case MRB_TT_STRING: +string_conv: + return mrb_str_to_inum(mrb, val, base, TRUE); + + default: + break; + } + if (base != 0) { + tmp = mrb_check_string_type(mrb, val); + if (!mrb_nil_p(tmp)) goto string_conv; + arg_error: + mrb_raise(mrb, E_ARGUMENT_ERROR, "base specified for non string value"); + } + tmp = convert_type(mrb, val, "Integer", "to_int", FALSE); + if (mrb_nil_p(tmp)) { + return mrb_to_integer(mrb, val, "to_i"); + } + return tmp; +} + +mrb_value +mrb_Integer(mrb_state *mrb, mrb_value val) +{ + return mrb_convert_to_integer(mrb, val, 0); +} + +mrb_value +mrb_Float(mrb_state *mrb, mrb_value val) +{ + if (mrb_nil_p(val)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert nil into Float"); + } + switch (mrb_type(val)) { + case MRB_TT_FIXNUM: + return mrb_float_value((mrb_float)mrb_fixnum(val)); + + case MRB_TT_FLOAT: + return val; + + case MRB_TT_STRING: + return mrb_float_value(mrb_str_to_dbl(mrb, val, TRUE)); + + default: + return mrb_convert_type(mrb, val, MRB_TT_FLOAT, "Float", "to_f"); + } +} + +mrb_value +mrb_inspect(mrb_state *mrb, mrb_value obj) +{ + return mrb_obj_as_string(mrb, mrb_funcall(mrb, obj, "inspect", 0, 0)); +} + +int +mrb_eql(mrb_state *mrb, mrb_value obj1, mrb_value obj2) +{ + return RTEST(mrb_funcall(mrb, obj1, "eql?", 1, obj2)); +} + diff --git a/src/oniguruma.h b/src/oniguruma.h new file mode 100644 index 0000000000..3332df023b --- /dev/null +++ b/src/oniguruma.h @@ -0,0 +1,771 @@ +#ifndef ONIGURUMA_H +#define ONIGURUMA_H +/********************************************************************** + oniguruma.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define ONIGURUMA +#define ONIGURUMA_VERSION_MAJOR 5 +#define ONIGURUMA_VERSION_MINOR 9 +#define ONIGURUMA_VERSION_TEENY 2 + +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifndef ONIG_EXTERN +#ifdef RUBY_EXTERN +#define ONIG_EXTERN RUBY_EXTERN +#else +#if defined(_WIN32) && !defined(__GNUC__) +#if defined(EXPORT) || defined(RUBY_EXPORT) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) +#endif +#endif +#endif +#endif + +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern +#endif + +/* PART: character encoding */ + +#ifndef ONIG_ESCAPE_UCHAR_COLLISION +#define UChar OnigUChar +#endif + +typedef unsigned char OnigUChar; +typedef unsigned int OnigCodePoint; +typedef unsigned int OnigCtype; +typedef size_t OnigDistance; + +#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) + +typedef unsigned int OnigCaseFoldType; /* case fold flag */ + +ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; + +/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ +/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ +#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) +#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) + +#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR +#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag + + +#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3 +#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13 +/* 13 => Unicode:0x1ffc */ + +/* code range */ +#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) +#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] +#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] + +typedef struct { + int byte_len; /* argument(original) character(s) byte length */ + int code_len; /* number of code */ + OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]; +} OnigCaseFoldCodeItem; + +typedef struct { + OnigCodePoint esc; + OnigCodePoint anychar; + OnigCodePoint anytime; + OnigCodePoint zero_or_one_time; + OnigCodePoint one_or_more_time; + OnigCodePoint anychar_anytime; +} OnigMetaCharTableType; + +typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); + +typedef struct OnigEncodingTypeST { + int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc); + const char* name; + int max_enc_len; + int min_enc_len; + int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + int (*code_to_mbclen)(OnigCodePoint code, struct OnigEncodingTypeST* enc); + int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, struct OnigEncodingTypeST* enc); + int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, struct OnigEncodingTypeST* enc); + int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc); + int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc); + int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); + int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc); + int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc); + OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + int ruby_encoding_index; +} OnigEncodingType; + +typedef OnigEncodingType* OnigEncoding; + +ONIG_EXTERN OnigEncodingType OnigEncodingASCII; + +#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) + +#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) + + +/* work size */ +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18 +/* 18: 6(max-byte) * 3(case-fold chars) */ + +/* character types */ +#define ONIGENC_CTYPE_NEWLINE 0 +#define ONIGENC_CTYPE_ALPHA 1 +#define ONIGENC_CTYPE_BLANK 2 +#define ONIGENC_CTYPE_CNTRL 3 +#define ONIGENC_CTYPE_DIGIT 4 +#define ONIGENC_CTYPE_GRAPH 5 +#define ONIGENC_CTYPE_LOWER 6 +#define ONIGENC_CTYPE_PRINT 7 +#define ONIGENC_CTYPE_PUNCT 8 +#define ONIGENC_CTYPE_SPACE 9 +#define ONIGENC_CTYPE_UPPER 10 +#define ONIGENC_CTYPE_XDIGIT 11 +#define ONIGENC_CTYPE_WORD 12 +#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ +#define ONIGENC_CTYPE_ASCII 14 +#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII +#define ONIGENC_CTYPE_SPECIAL_MASK 128 +#define ONIGENC_CTYPE_S /* [\t\n\v\f\r\s] */ \ + ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_SPACE +#define ONIGENC_CTYPE_D /* [0-9] */ \ + ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_DIGIT +#define ONIGENC_CTYPE_W /* [0-9A-Za-z_] */ \ + ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_WORD +#define ONIGENC_CTYPE_SPECIAL_P(ctype) ((ctype) & ONIGENC_CTYPE_SPECIAL_MASK) + + +#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e) + +#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) +#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) +#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1) +#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) +#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) +#define ONIGENC_IS_MBC_WORD(enc,s,end) \ + ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) + + +#define ONIGENC_NAME(enc) ((enc)->name) + +#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \ + (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc) +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + (enc)->is_allowed_reverse_match(s,end,enc) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \ + (enc)->left_adjust_char_head(start, s, end, enc) +#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \ + (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc) +#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \ + (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc) +#define ONIGENC_STEP_BACK(enc,start,s,end,n) \ + onigenc_step_back((enc),(start),(s),(end),(n)) + +#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n) +#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r)) +#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r) + +#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1) +#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1) + +#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n)) +#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1) +#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r)) + +#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) + +ONIG_EXTERN +int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc); + +#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc) +#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) +#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) +#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) +#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc) +#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc) +#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \ + (enc)->property_name_to_ctype(enc,p,end) + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc) + +#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) +#define ONIGENC_IS_CODE_GRAPH(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) +#define ONIGENC_IS_CODE_PRINT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) +#define ONIGENC_IS_CODE_ALNUM(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) +#define ONIGENC_IS_CODE_ALPHA(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) +#define ONIGENC_IS_CODE_LOWER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) +#define ONIGENC_IS_CODE_UPPER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) +#define ONIGENC_IS_CODE_CNTRL(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) +#define ONIGENC_IS_CODE_PUNCT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) +#define ONIGENC_IS_CODE_SPACE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) +#define ONIGENC_IS_CODE_BLANK(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) +#define ONIGENC_IS_CODE_DIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) +#define ONIGENC_IS_CODE_WORD(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) + +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \ + (enc)->get_ctype_code_range(ctype,sbout,ranges,enc) + +ONIG_EXTERN +OnigUChar* onigenc_step_back(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n); + + +/* encoding API */ +ONIG_EXTERN +int onigenc_init(void); +ONIG_EXTERN +int onigenc_set_default_encoding(OnigEncoding enc); +ONIG_EXTERN +OnigEncoding onigenc_get_default_encoding(void); +ONIG_EXTERN +void onigenc_set_default_caseconv_table(const OnigUChar* table); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev); +ONIG_EXTERN +OnigUChar* onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end); +ONIG_EXTERN +OnigUChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end); +ONIG_EXTERN +int onigenc_strlen(OnigEncoding enc, const OnigUChar* p, const OnigUChar* end); +ONIG_EXTERN +int onigenc_strlen_null(OnigEncoding enc, const OnigUChar* p); +ONIG_EXTERN +int onigenc_str_bytelen_null(OnigEncoding enc, const OnigUChar* p); + + + +/* PART: regular expression */ + +/* config parameters */ +#define ONIG_NREGION 10 +#define ONIG_MAX_BACKREF_NUM 1000 +#define ONIG_MAX_REPEAT_NUM 100000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 +/* constants */ +#define ONIG_MAX_ERROR_MESSAGE_LEN 90 + +typedef unsigned int OnigOptionType; + +#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE + +/* options */ +#define ONIG_OPTION_NONE 0U +#define ONIG_OPTION_IGNORECASE 1U +#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) +#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) +#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) +#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) +#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) +#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) +/* options (search time) */ +#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) +#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) +#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ + +#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) +#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) +#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) + +/* syntax */ +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ + OnigMetaCharTableType meta_char_table; +} OnigSyntaxType; + +ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl_NG; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) + +/* default syntax */ +ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax; +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax + +/* syntax (operators) */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g, \g */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ +/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ +#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ +#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ + +/* syntax (behavior) */ +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?)(?) */ +#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ + +/* syntax (behavior) in char class [...] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ +/* syntax (behavior) warning */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ +#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */ + +/* meta character specifiers (onig_set_meta_char()) */ +#define ONIG_META_CHAR_ESCAPE 0 +#define ONIG_META_CHAR_ANYCHAR 1 +#define ONIG_META_CHAR_ANYTIME 2 +#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 +#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 +#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 + +#define ONIG_INEFFECTIVE_META_CHAR 0 + +/* error codes */ +#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) +/* normal return */ +#define ONIG_NORMAL 0 +#define ONIG_MISMATCH -1 +#define ONIG_NO_SUPPORT_CONFIG -2 + +/* internal error */ +#define ONIGERR_MEMORY -5 +#define ONIGERR_TYPE_BUG -6 +#define ONIGERR_PARSER_BUG -11 +#define ONIGERR_STACK_BUG -12 +#define ONIGERR_UNDEFINED_BYTECODE -13 +#define ONIGERR_UNEXPECTED_BYTECODE -14 +#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 +#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +/* general error */ +#define ONIGERR_INVALID_ARGUMENT -30 +/* syntax error */ +#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 +#define ONIGERR_EMPTY_CHAR_CLASS -102 +#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 +#define ONIGERR_END_PATTERN_AT_ESCAPE -104 +#define ONIGERR_END_PATTERN_AT_META -105 +#define ONIGERR_END_PATTERN_AT_CONTROL -106 +#define ONIGERR_META_CODE_SYNTAX -108 +#define ONIGERR_CONTROL_CODE_SYNTAX -109 +#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 +#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 +#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 +#define ONIGERR_NESTED_REPEAT_OPERATOR -115 +#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 +#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 +#define ONIGERR_END_PATTERN_IN_GROUP -118 +#define ONIGERR_UNDEFINED_GROUP_OPTION -119 +#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 +#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 +#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 +/* values error (syntax error) */ +#define ONIGERR_TOO_BIG_NUMBER -200 +#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 +#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 +#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 +#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 +#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 +#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 +#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 +#define ONIGERR_INVALID_BACKREF -208 +#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 +#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 +#define ONIGERR_EMPTY_GROUP_NAME -214 +#define ONIGERR_INVALID_GROUP_NAME -215 +#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 +#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 +#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 +#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 +#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 +#define ONIGERR_NEVER_ENDING_RECURSION -221 +#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 +#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_CODE_POINT_VALUE -400 +#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 +#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 + +/* errors related to thread */ +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 + + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 +#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ + ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) + +typedef struct OnigCaptureTreeNodeStruct { + int group; /* group number */ + int beg; + int end; + int allocated; + int num_childs; + struct OnigCaptureTreeNodeStruct** childs; +} OnigCaptureTreeNode; + +/* match result region type */ +struct re_registers { + int allocated; + int num_regs; + int* beg; + int* end; + /* extended */ + OnigCaptureTreeNode* history_root; /* capture history tree root */ +}; + +/* capture tree traverse */ +#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 +#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 +#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ + ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) + + +#define ONIG_REGION_NOTPOS -1 + +typedef struct re_registers OnigRegion; + +typedef struct { + OnigEncoding enc; + OnigUChar* par; + OnigUChar* par_end; +} OnigErrorInfo; + +typedef struct { + int lower; + int upper; +} OnigRepeatRange; + +typedef void (*OnigWarnFunc)(const char* s); +extern void onig_null_warn(const char* s); +#define ONIG_NULL_WARN onig_null_warn + +#define ONIG_CHAR_TABLE_SIZE 256 + +/* regex_t state */ +#define ONIG_STATE_NORMAL 0 +#define ONIG_STATE_SEARCHING 1 +#define ONIG_STATE_COMPILING -1 +#define ONIG_STATE_MODIFY -2 + +#define ONIG_STATE(reg) \ + ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) + +typedef struct re_pattern_buffer { + /* common members of BBuf(bytes-buffer) */ + unsigned char* p; /* compiled pattern */ + unsigned int used; /* used space for p */ + unsigned int alloc; /* allocated space for p */ + + int state; /* normal, searching, compiling */ + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_comb_exp_check; /* combination explosion check */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ + int stack_pop_level; + int repeat_range_alloc; + OnigRepeatRange* repeat_range; + + OnigEncoding enc; + OnigOptionType options; + const OnigSyntaxType* syntax; + OnigCaseFoldType case_fold_flag; + void* name_table; + + /* optimization info (string search, char-map and anchors) */ + int optimize; /* optimize flag */ + int threshold_len; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + int sub_anchor; /* start-anchor for exact or map */ + unsigned char *exact; + unsigned char *exact_end; + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + int *int_map; /* BM skip for exact_len > 255 */ + int *int_map_backward; /* BM skip for backward search */ + OnigDistance dmin; /* min-distance of exact or map */ + OnigDistance dmax; /* max-distance of exact or map */ + + /* regex_t link chain */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ +} OnigRegexType; + +typedef OnigRegexType* OnigRegex; + +#ifndef ONIG_ESCAPE_REGEX_T_COLLISION + typedef OnigRegexType regex_t; +#endif + + +typedef struct { + int num_of_elements; + OnigEncoding pattern_enc; + OnigEncoding target_enc; + OnigSyntaxType* syntax; + OnigOptionType option; + OnigCaseFoldType case_fold_flag; +} OnigCompileInfo; + +/* Oniguruma Native API */ +ONIG_EXTERN +int onig_init(void); +ONIG_EXTERN +int onig_error_code_to_str(OnigUChar* s, int err_code, ...); +ONIG_EXTERN +void onig_set_warn_func(OnigWarnFunc f); +ONIG_EXTERN +void onig_set_verb_warn_func(OnigWarnFunc f); +ONIG_EXTERN +int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo); +ONIG_EXTERN +int onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax); +ONIG_EXTERN +int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo); +ONIG_EXTERN +int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo); +ONIG_EXTERN +void onig_free(OnigRegex); +ONIG_EXTERN +void onig_free_body(OnigRegex); +ONIG_EXTERN +int onig_recompile(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo); +ONIG_EXTERN +int onig_recompile_deluxe(OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo); +ONIG_EXTERN +long onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option); +ONIG_EXTERN +long onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option); +ONIG_EXTERN +OnigRegion* onig_region_new(void); +ONIG_EXTERN +void onig_region_init(OnigRegion* region); +ONIG_EXTERN +void onig_region_free(OnigRegion* region, int free_self); +ONIG_EXTERN +void onig_region_copy(OnigRegion* to, OnigRegion* from); +ONIG_EXTERN +void onig_region_clear(OnigRegion* region); +ONIG_EXTERN +int onig_region_resize(OnigRegion* region, int n); +ONIG_EXTERN +int onig_region_set(OnigRegion* region, int at, int beg, int end); +ONIG_EXTERN +int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums); +ONIG_EXTERN +int onig_name_to_backref_number(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region); +ONIG_EXTERN +int onig_foreach_name(OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg); +ONIG_EXTERN +int onig_number_of_names(OnigRegex reg); +ONIG_EXTERN +int onig_number_of_captures(OnigRegex reg); +ONIG_EXTERN +int onig_number_of_capture_histories(OnigRegex reg); +ONIG_EXTERN +OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region); +ONIG_EXTERN +int onig_capture_tree_traverse(OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg); +ONIG_EXTERN +int onig_noname_group_capture_is_active(OnigRegex reg); +ONIG_EXTERN +OnigEncoding onig_get_encoding(OnigRegex reg); +ONIG_EXTERN +OnigOptionType onig_get_options(OnigRegex reg); +ONIG_EXTERN +OnigCaseFoldType onig_get_case_fold_flag(OnigRegex reg); +ONIG_EXTERN +const OnigSyntaxType* onig_get_syntax(OnigRegex reg); +ONIG_EXTERN +int onig_set_default_syntax(const OnigSyntaxType* syntax); +ONIG_EXTERN +void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from); +ONIG_EXTERN +unsigned int onig_get_syntax_op(OnigSyntaxType* syntax); +ONIG_EXTERN +unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax); +ONIG_EXTERN +unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax); +ONIG_EXTERN +OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax); +ONIG_EXTERN +void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op); +ONIG_EXTERN +void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2); +ONIG_EXTERN +void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior); +ONIG_EXTERN +void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options); +ONIG_EXTERN +int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code); +ONIG_EXTERN +void onig_copy_encoding(OnigEncoding to, OnigEncoding from); +ONIG_EXTERN +OnigCaseFoldType onig_get_default_case_fold_flag(void); +ONIG_EXTERN +int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag); +ONIG_EXTERN +unsigned int onig_get_match_stack_limit_size(void); +ONIG_EXTERN +int onig_set_match_stack_limit_size(unsigned int size); +ONIG_EXTERN +int onig_end(void); +ONIG_EXTERN +const char* onig_version(void); +ONIG_EXTERN +const char* onig_copyright(void); + +#ifdef __cplusplus +} +#endif + +#endif /* ONIGURUMA_H */ diff --git a/src/opcode.h b/src/opcode.h new file mode 100644 index 0000000000..e6ea74f790 --- /dev/null +++ b/src/opcode.h @@ -0,0 +1,148 @@ +#ifndef OPCODE_H +#define OPCODE_H + +#define MAXARG_Bx ((1<<16)-1) +#define MAXARG_sBx (MAXARG_Bx>>1) /* `sBx' is signed */ + +/* instructions OP:A:B:C = 7:9:9:7 (32 bits) */ +/* OP:A:Bx = 7:9:16 */ +/* OP:Ax = 7:25 */ + +#define GET_OPCODE(i) (((mrb_code)(i)) & 0x7f) +#define GETARG_A(i) ((((mrb_code)(i)) >> 23) & 0x1ff) +#define GETARG_B(i) ((((mrb_code)(i)) >> 14) & 0x1ff) +#define GETARG_C(i) ((((mrb_code)(i)) >> 7) & 0x7f) +#define GETARG_Bx(i) ((((mrb_code)(i)) >> 7) & 0xffff) +#define GETARG_sBx(i) (GETARG_Bx(i)-MAXARG_sBx) +#define GETARG_Ax(i) ((((mrb_code)(i)) >> 7) & 0x1ffffff) +#define GETARG_UNPACK_b(i,n1,n2) ((((mrb_code)(i)) >> (7+n2)) & (((1<> 7) & (((1<R(A+1) (mSyms[B]=:>,C=1) */ +OP_GE,/* A B C R(A) := R(A)>=R(A+1) (mSyms[B]=:>=,C=1) */ + +OP_ARRAY,/* A B C R(A) := ary_new(R(B),R(B+1)..R(B+C)) */ +OP_ARYCAT,/* A B ary_cat(R(A),R(B)) */ +OP_ARYPUSH,/* A B ary_push(R(A),R(B)) */ +OP_AREF,/* A B C R(A) := R(B)[C] */ +OP_ASET,/* A B C R(B)[C] := R(A) */ +OP_APOST,/* A B C *R(A),R(A+1)..R(A+C) := R(A) */ + +OP_STRING,/* A Bx R(A) := str_dup(Lit(Bx)) */ +OP_STRCAT,/* A B str_cat(R(A),R(B)) */ + +OP_HASH,/* A B C R(A) := hash_new(R(B),R(B+1)..R(B+C)) */ +OP_LAMBDA,/* A Bz Cz R(A) := lambda(SEQ[Bz],Cm) */ +OP_RANGE,/* A B C R(A) := range_new(R(B),R(B+1),C) */ + +OP_OCLASS,/* A R(A) := ::Object */ +OP_CLASS,/* A B R(A) := newclass(R(A),mSym(B),R(A+1)) */ +OP_MODULE,/* A B R(A) := newmodule(R(A),mSym(B)) */ +OP_EXEC,/* A Bx R(A) := blockexec(R(A),SEQ[Bx]) */ +OP_METHOD,/* A B R(A).newmethod(mSym(B),R(A+1)) */ +OP_SCLASS,/* A B R(A) := R(B).singleton_class */ +OP_TCLASS,/* A R(A) := target_class */ + +OP_DEBUG,/* A print R(A) */ +OP_STOP,/* stop VM */ +OP_ERR,/* Bx raise RuntimeError with message Lit(Bx) */ + +OP_RSVD1,/* reserved instruction #1 */ +OP_RSVD2,/* reserved instruction #2 */ +OP_RSVD3,/* reserved instruction #3 */ +OP_RSVD4,/* reserved instruction #4 */ +OP_RSVD5,/* reserved instruction #5 */ +}; + +#define OP_L_STRICT 1 +#define OP_L_CAPTURE 2 +#define OP_L_METHOD OP_L_STRICT +#define OP_L_LAMBDA (OP_L_STRICT|OP_L_CAPTURE) +#define OP_L_BLOCK OP_L_CAPTURE + +#define OP_R_NORMAL 0 +#define OP_R_BREAK 1 +#define OP_R_RETURN 2 + +#endif /* OPCODE_H */ diff --git a/src/parse.y b/src/parse.y new file mode 100644 index 0000000000..5925b9a5e0 --- /dev/null +++ b/src/parse.y @@ -0,0 +1,5435 @@ +%{ +#undef PARSER_TEST +#undef PARSER_DEBUG + +#define YYDEBUG 1 +#define YYERROR_VERBOSE 1 +#define YYSTACK_USE_ALLOCA 0 + +#include "mruby.h" +#include "st.h" +#include "compile.h" +#include +#include +#include +#include + +#define YYLEX_PARAM p + +typedef mrb_ast_node node; +typedef struct mrb_parser_state parser_state; + +static int yylex(void *lval, parser_state *p); +static void yyerror(parser_state *p, const char *s); +static void yywarn(parser_state *p, const char *s); +static void yywarning(parser_state *p, const char *s); +static void backref_error(parser_state *p, node *n); + +#define identchar(c) (isalnum(c) || (c) == '_' || !isascii(c)) + +#define TRUE 1 +#define FALSE 0 + +typedef unsigned int stack_type; + +#define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1)) +#define BITSTACK_POP(stack) ((stack) = (stack) >> 1) +#define BITSTACK_LEXPOP(stack) ((stack) = ((stack) >> 1) | ((stack) & 1)) +#define BITSTACK_SET_P(stack) ((stack)&1) + +#define COND_PUSH(n) BITSTACK_PUSH(p->cond_stack, (n)) +#define COND_POP() BITSTACK_POP(p->cond_stack) +#define COND_LEXPOP() BITSTACK_LEXPOP(p->cond_stack) +#define COND_P() BITSTACK_SET_P(p->cond_stack) + +#define CMDARG_PUSH(n) BITSTACK_PUSH(p->cmdarg_stack, (n)) +#define CMDARG_POP() BITSTACK_POP(p->cmdarg_stack) +#define CMDARG_LEXPOP() BITSTACK_LEXPOP(p->cmdarg_stack) +#define CMDARG_P() BITSTACK_SET_P(p->cmdarg_stack) + +static mrb_sym +intern_gen(parser_state *p, const char *s) +{ + return mrb_intern(p->mrb, s); +} +#define intern(s) intern_gen(p,(s)) + +static void +cons_free_gen(parser_state *p, node *cons) +{ + cons->cdr = p->cells; + p->cells = cons; +} +#define cons_free(c) cons_free_gen(p, (c)) + +static void* +parser_palloc(parser_state *p, size_t size) +{ + void *m = mrb_pool_alloc(p->pool, size); + + if (!m) { + longjmp(p->jmp, 1); + } + return m; +} + +static node* +cons_gen(parser_state *p, node *car, node *cdr) +{ + node *c; + + if (p->cells) { + c = p->cells; + p->cells = p->cells->cdr; + } + else { + c = parser_palloc(p, sizeof(mrb_ast_node)); + } + + c->car = car; + c->cdr = cdr; + return c; +} +#define cons(a,b) cons_gen(p,(a),(b)) + +static node* +list1_gen(parser_state *p, node *a) +{ + return cons(a, 0); +} +#define list1(a) list1_gen(p, (a)) + +static node* +list2_gen(parser_state *p, node *a, node *b) +{ + return cons(a, cons(b,0)); +} +#define list2(a,b) list2_gen(p, (a),(b)) + +static node* +list3_gen(parser_state *p, node *a, node *b, node *c) +{ + return cons(a, cons(b, cons(c,0))); +} +#define list3(a,b,c) list3_gen(p, (a),(b),(c)) + +static node* +list4_gen(parser_state *p, node *a, node *b, node *c, node *d) +{ + return cons(a, cons(b, cons(c, cons(d, 0)))); +} +#define list4(a,b,c,d) list4_gen(p, (a),(b),(c),(d)) + +static node* +list5_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e) +{ + return cons(a, cons(b, cons(c, cons(d, cons(e, 0))))); +} +#define list5(a,b,c,d,e) list5_gen(p, (a),(b),(c),(d),(e)) + +static node* +list6_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e, node *f) +{ + return cons(a, cons(b, cons(c, cons(d, cons(e, cons(f, 0)))))); +} +#define list6(a,b,c,d,e,f) list6_gen(p, (a),(b),(c),(d),(e),(f)) + +static node* +append_gen(parser_state *p, node *a, node *b) +{ + node *c = a; + + if (!a) return b; + while (c->cdr) { + c = c->cdr; + } + if (b) { + c->cdr = b; + } + return a; +} +#define append(a,b) append_gen(p,(a),(b)) +#define push(a,b) append_gen(p,(a),list1(b)) + +static char* +parser_strndup(parser_state *p, const char *s, size_t len) +{ + char *b = parser_palloc(p, len+1); + + memcpy(b, s, len); + b[len] = '\0'; + return b; +} +#define strndup(s,len) parser_strndup(p, s, len) + +static char* +parser_strdup(parser_state *p, const char *s) +{ + return parser_strndup(p, s, strlen(s)); +} +#undef strdup +#define strdup(s) parser_strdup(p, s) + +// xxx ----------------------------- + +static node* +local_switch(parser_state *p) +{ + node *prev = p->locals; + + p->locals = cons(0, 0); + return prev; +} + +static void +local_resume(parser_state *p, node *prev) +{ + p->locals = prev; +} + +static void +local_nest(parser_state *p) +{ + p->locals = cons(0, p->locals); +} + +static void +local_unnest(parser_state *p) +{ + p->locals = p->locals->cdr; +} + +static int +local_var_p(parser_state *p, mrb_sym sym) +{ + node *l = p->locals; + + while (l) { + node *n = l->car; + while (n) { + if ((mrb_sym)n->car == sym) return 1; + n = n->cdr; + } + l = l->cdr; + } + return 0; +} + +static void +local_add_f(parser_state *p, mrb_sym sym) +{ + p->locals->car = push(p->locals->car, (node*)sym); +} + +static void +local_add(parser_state *p, mrb_sym sym) +{ + if (!local_var_p(p, sym)) { + local_add_f(p, sym); + } +} + +// (:scope (vars..) (prog...)) +static node* +new_scope(parser_state *p, node *body) +{ + return cons((node*)NODE_SCOPE, cons(p->locals->car, body)); +} + +// (:begin prog...) +static node* +new_begin(parser_state *p, node *body) +{ + if (body) + return list2((node*)NODE_BEGIN, body); + return cons((node*)NODE_BEGIN, 0); +} + +#define newline_node(n) (n) + +// (:rescue body rescue else) +static node* +new_rescue(parser_state *p, node *body, node *resq, node *els) +{ + return list4((node*)NODE_RESCUE, body, resq, els); +} + +// (:ensure body ensure) +static node* +new_ensure(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_ENSURE, cons(a, cons(0, b))); +} + +// (:nil) +static node* +new_nil(parser_state *p) +{ + return list1((node*)NODE_NIL); +} + +// (:true) +static node* +new_true(parser_state *p) +{ + return list1((node*)NODE_TRUE); +} + +// (:true) +static node* +new_false(parser_state *p) +{ + return list1((node*)NODE_FALSE); +} + +// (:alias new old) +static node* +new_alias(parser_state *p, mrb_sym a, mrb_sym b) +{ + return cons((node*)NODE_ALIAS, cons((node*)a, (node*)b)); +} + +// (:if cond then else) +static node* +new_if(parser_state *p, node *a, node *b, node *c) +{ + return list4((node*)NODE_IF, a, b, c); +} + +// (:unless cond then else) +static node* +new_unless(parser_state *p, node *a, node *b, node *c) +{ + return list4((node*)NODE_IF, a, c, b); +} + +// (:while cond body) +static node* +new_while(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_WHILE, cons(a, b)); +} + +// (:until cond body) +static node* +new_until(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_UNTIL, cons(a, b)); +} + +// (:for var obj body) +static node* +new_for(parser_state *p, node *v, node *o, node *b) +{ + return list4((node*)NODE_FOR, v, o, b); +} + +// (:case a ((when ...) body) ((when...) body)) +static node* +new_case(parser_state *p, node *a, node *b) +{ + node *n = list2((node*)NODE_CASE, a); + node *n2 = n; + + while (n2->cdr) { + n2 = n2->cdr; + } + n2->cdr = b; + return n; +} + +// (:postexe a) +static node* +new_postexe(parser_state *p, node *a) +{ + return cons((node*)NODE_POSTEXE, a); +} + +// (:self) +static node* +new_self(parser_state *p) +{ + return list1((node*)NODE_SELF); +} + +// (:call a b c) +static node* +new_call(parser_state *p, node *a, mrb_sym b, node *c) +{ + return list4((node*)NODE_CALL, a, (node*)b, c); +} + +// (:fcall self mid args) +static node* +new_fcall(parser_state *p, mrb_sym b, node *c) +{ + return list4((node*)NODE_FCALL, new_self(p), (node*)b, c); +} + +#if 0 +// (:vcall self mid) +static node* +new_vcall(parser_state *p, mrb_sym b) +{ + return list3((node*)NODE_VCALL, new_self(p), (node*)b); +} +#endif + +// (:super . c) +static node* +new_super(parser_state *p, node *c) +{ + return cons((node*)NODE_SUPER, c); +} + +// (:zsuper) +static node* +new_zsuper(parser_state *p) +{ + return list1((node*)NODE_ZSUPER); +} + +// (:yield . c) +static node* +new_yield(parser_state *p, node *c) +{ + if (c) { + if (c->cdr) { + yyerror(p, "both block arg and actual block given"); + } + return cons((node*)NODE_YIELD, c->car); + } + return cons((node*)NODE_YIELD, 0); +} + +// (:return . c) +static node* +new_return(parser_state *p, node *c) +{ + return cons((node*)NODE_RETURN, c); +} + +// (:break . c) +static node* +new_break(parser_state *p, node *c) +{ + return cons((node*)NODE_BREAK, c); +} + +// (:next . c) +static node* +new_next(parser_state *p, node *c) +{ + return cons((node*)NODE_NEXT, c); +} + +// (:redo) +static node* +new_redo(parser_state *p) +{ + return list1((node*)NODE_REDO); +} + +// (:retry) +static node* +new_retry(parser_state *p) +{ + return list1((node*)NODE_RETRY); +} + +// (:dot2 a b) +static node* +new_dot2(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_DOT2, cons(a, b)); +} + +// (:dot3 a b) +static node* +new_dot3(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_DOT3, cons(a, b)); +} + +// (:colon2 b c) +static node* +new_colon2(parser_state *p, node *b, mrb_sym c) +{ + return cons((node*)NODE_COLON2, cons(b, (node*)c)); +} + +// (:colon3 . c) +static node* +new_colon3(parser_state *p, mrb_sym c) +{ + return cons((node*)NODE_COLON3, (node*)c); +} + +// (:and a b) +static node* +new_and(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_AND, cons(a, b)); +} + +// (:or a b) +static node* +new_or(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_OR, cons(a, b)); +} + +// (:array a...) +static node* +new_array(parser_state *p, node *a) +{ + return cons((node*)NODE_ARRAY, a); +} + +// (:splat . a) +static node* +new_splat(parser_state *p, node *a) +{ + return cons((node*)NODE_SPLAT, a); +} + +// (:hash (k . v) (k . v)...) +static node* +new_hash(parser_state *p, node *a) +{ + return cons((node*)NODE_HASH, a); +} + +// (:sym . a) +static node* +new_sym(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_SYM, (node*)sym); +} + +// (:lvar . a) +static node* +new_lvar(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_LVAR, (node*)sym); +} + +// (:gvar . a) +static node* +new_gvar(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_GVAR, (node*)sym); +} + +// (:ivar . a) +static node* +new_ivar(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_IVAR, (node*)sym); +} + +// (:cvar . a) +static node* +new_cvar(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_CVAR, (node*)sym); +} + +// (:const . a) +static node* +new_const(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_CONST, (node*)sym); +} + +// (:undef a...) +static node* +new_undef(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_UNDEF, (node*)sym); +} + +// (:class class super body) +static node* +new_class(parser_state *p, node *c, node *s, node *b) +{ + return list4((node*)NODE_CLASS, c, s, cons(p->locals->car, b)); +} + +// (:sclass obj body) +static node* +new_sclass(parser_state *p, node *o, node *b) +{ + return list3((node*)NODE_SCLASS, o, cons(p->locals->car, b)); +} + +// (:module module body) +static node* +new_module(parser_state *p, node *m, node *b) +{ + return list3((node*)NODE_MODULE, m, cons(p->locals->car, b)); +} + +// (:def m lv (arg . body)) +static node* +new_def(parser_state *p, mrb_sym m, node *a, node *b) +{ + return list5((node*)NODE_DEF, (node*)m, p->locals->car, a, b); +} + +// (:sdef obj m lv (arg . body)) +static node* +new_sdef(parser_state *p, node *o, mrb_sym m, node *a, node *b) +{ + return list6((node*)NODE_SDEF, o, (node*)m, p->locals->car, a, b); +} + +// (:arg . sym) +static node* +new_arg(parser_state *p, mrb_sym sym) +{ + return cons((node*)NODE_ARG, (node*)sym); +} + +// (m o r m2 b) +// m: (a b c) +// o: ((a . e1) (b . e2)) +// r: a +// m2: (a b c) +// b: a +static node* +new_args(parser_state *p, node *m, node *opt, mrb_sym rest, node *m2, mrb_sym blk) +{ + node *n; + + n = cons(m2, (node*)blk); + n = cons((node*)rest, n); + n = cons(opt, n); + return cons(m, n); +} + +// (:block_arg . a) +static node* +new_block_arg(parser_state *p, node *a) +{ + return cons((node*)NODE_BLOCK_ARG, a); +} + +// (:block arg body) +static node* +new_block(parser_state *p, node *a, node *b) +{ + return list4((node*)NODE_BLOCK, p->locals->car, a, b); +} + +// (:lambda arg body) +static node* +new_lambda(parser_state *p, node *a, node *b) +{ + return list4((node*)NODE_LAMBDA, p->locals->car, a, b); +} + +// (:asgn lhs rhs) +static node* +new_asgn(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_ASGN, cons(a, b)); +} + +// (:masgn mlhs=(pre rest post) mrhs) +static node* +new_masgn(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_MASGN, cons(a, b)); +} + +// (:asgn lhs rhs) +static node* +new_op_asgn(parser_state *p, node *a, mrb_sym op, node *b) +{ + return list4((node*)NODE_OP_ASGN, a, (node*)op, b); +} + +// (:int . i) +static node* +new_int(parser_state *p, const char *s, int base) +{ + return list3((node*)NODE_INT, (node*)strdup(s), (node*)base); +} + +// (:float . i) +static node* +new_float(parser_state *p, const char *s) +{ + return cons((node*)NODE_FLOAT, (node*)strdup(s)); +} + +// (:str . (s . len)) +static node* +new_str(parser_state *p, const char *s, size_t len) +{ + return cons((node*)NODE_STR, cons((node*)strndup(s, len), (node*)len)); +} + +// (:dstr . a) +static node* +new_dstr(parser_state *p, node *a) +{ + return cons((node*)NODE_DSTR, a); +} + +// (:backref . n) +static node* +new_back_ref(parser_state *p, int n) +{ + return cons((node*)NODE_BACK_REF, (node*)n); +} + +// (:nthref . n) +static node* +new_nth_ref(parser_state *p, int n) +{ + return cons((node*)NODE_NTH_REF, (node*)n); +} + +static void +new_bv(parser_state *p, mrb_sym id) +{ +} + +// xxx ----------------------------- + +// (:call a op) +static node* +call_uni_op(parser_state *p, node *recv, char *m) +{ + return new_call(p, recv, intern(m), 0); +} + +// (:call a op b) +static node* +call_bin_op(parser_state *p, node *recv, char *m, node *arg1) +{ + return new_call(p, recv, intern(m), list1(list1(arg1))); +} + +// (:match (a . b)) +static node* +match_op(parser_state *p, node *a, node *b) +{ + return cons((node*)NODE_MATCH, cons((node*)a, (node*)b)); +} + + +static void +args_with_block(parser_state *p, node *a, node *b) +{ + if (b) { + if (a->cdr) { + yyerror(p, "both block arg and actual block given"); + } + a->cdr = b; + } +} + +static void +call_with_block(parser_state *p, node *a, node *b) +{ + node *n = a->cdr->cdr->cdr; + + if (!n->car) n->car = cons(0, b); + else { + args_with_block(p, n->car, b); + } +} + +static node* +negate_lit(parser_state *p, node *n) +{ + return cons((node*)NODE_NEGATE, n); +} + +static node* +cond(node *n) +{ + return n; +} + +static node* +ret_args(parser_state *p, node *n) +{ + if (n->cdr) { + yyerror(p, "block argument should not be given"); + } + if (!n->car->cdr) return n->car->car; + return new_array(p, n->car); +} + +static void +assignable(parser_state *p, node *lhs) +{ + switch ((int)lhs->car) { + case NODE_LVAR: + local_add(p, (mrb_sym)lhs->cdr); + break; + default: + break; + } +} + +static node* +var_reference(parser_state *p, node *lhs) +{ + node *n; + + switch ((int)lhs->car) { + case NODE_LVAR: + if (!local_var_p(p, (mrb_sym)lhs->cdr)) { + n = new_fcall(p, (mrb_sym)lhs->cdr, 0); + cons_free(lhs); + return n; + } + break; + default: + break; + } + return lhs; +} + +// xxx ----------------------------- + +%} + +%pure_parser +%parse-param {parser_state *p} +%lex-param {parser_state *p} + +%union { + node *node; + mrb_sym id; + int num; + const struct vtable *vars; +} + +%token + keyword_class + keyword_module + keyword_def + keyword_undef + keyword_begin + keyword_rescue + keyword_ensure + keyword_end + keyword_if + keyword_unless + keyword_then + keyword_elsif + keyword_else + keyword_case + keyword_when + keyword_while + keyword_until + keyword_for + keyword_break + keyword_next + keyword_redo + keyword_retry + keyword_in + keyword_do + keyword_do_cond + keyword_do_block + keyword_do_LAMBDA + keyword_return + keyword_yield + keyword_super + keyword_self + keyword_nil + keyword_true + keyword_false + keyword_and + keyword_or + keyword_not + modifier_if + modifier_unless + modifier_while + modifier_until + modifier_rescue + keyword_alias + keyword_BEGIN + keyword_END + keyword__LINE__ + keyword__FILE__ + keyword__ENCODING__ + +%token tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL +%token tINTEGER tFLOAT tCHAR tREGEXP +%token tSTRING tSTRING_PART +%token tNTH_REF tBACK_REF +%token tREGEXP_END + +%type singleton string string_interp regexp +%type literal numeric cpath +%type top_compstmt top_stmts top_stmt +%type bodystmt compstmt stmts stmt expr arg primary command command_call method_call +%type expr_value arg_value primary_value +%type if_tail opt_else case_body cases opt_rescue exc_list exc_var opt_ensure +%type args call_args opt_call_args +%type paren_args opt_paren_args variable +%type command_args aref_args opt_block_arg block_arg var_ref var_lhs +%type command_asgn mrhs superclass block_call block_command +%type f_block_optarg f_block_opt +%type f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list f_margs +%type assoc_list assocs assoc undef_list backref for_var +%type block_param opt_block_param block_param_def f_opt +%type bv_decls opt_bv_decl bvar f_larglist lambda_body +%type brace_block cmd_brace_block do_block lhs none fitem f_bad_arg +%type mlhs mlhs_list mlhs_post mlhs_basic mlhs_item mlhs_node mlhs_inner +%type fsym sym symbol operation operation2 operation3 +%type cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg + +%token tUPLUS /* unary+ */ +%token tUMINUS /* unary- */ +%token tPOW /* ** */ +%token tCMP /* <=> */ +%token tEQ /* == */ +%token tEQQ /* === */ +%token tNEQ /* != */ +%token tGEQ /* >= */ +%token tLEQ /* <= */ +%token tANDOP tOROP /* && and || */ +%token tMATCH tNMATCH /* =~ and !~ */ +%token tDOT2 tDOT3 /* .. and ... */ +%token tAREF tASET /* [] and []= */ +%token tLSHFT tRSHFT /* << and >> */ +%token tCOLON2 /* :: */ +%token tCOLON3 /* :: at EXPR_BEG */ +%token tOP_ASGN /* +=, -= etc. */ +%token tASSOC /* => */ +%token tLPAREN /* ( */ +%token tLPAREN_ARG /* ( */ +%token tRPAREN /* ) */ +%token tLBRACK /* [ */ +%token tLBRACE /* { */ +%token tLBRACE_ARG /* { */ +%token tSTAR /* * */ +%token tAMPER /* & */ +%token tLAMBDA /* -> */ +%token tSYMBEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG +%token tSTRING_BEG tSTRING_DVAR tLAMBEG + +/* + * precedence table + */ + +%nonassoc tLOWEST +%nonassoc tLBRACE_ARG + +%nonassoc modifier_if modifier_unless modifier_while modifier_until +%left keyword_or keyword_and +%right keyword_not +%right '=' tOP_ASGN +%left modifier_rescue +%right '?' ':' +%nonassoc tDOT2 tDOT3 +%left tOROP +%left tANDOP +%nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH +%left '>' tGEQ '<' tLEQ +%left '|' '^' +%left '&' +%left tLSHFT tRSHFT +%left '+' '-' +%left '*' '/' '%' +%right tUMINUS_NUM tUMINUS +%right tPOW +%right '!' '~' tUPLUS + +%nonassoc idNULL +%nonassoc idRespond_to +%nonassoc idIFUNC +%nonassoc idCFUNC +%nonassoc id_core_set_method_alias +%nonassoc id_core_set_variable_alias +%nonassoc id_core_undef_method +%nonassoc id_core_define_method +%nonassoc id_core_define_singleton_method +%nonassoc id_core_set_postexe + +%token tLAST_TOKEN + +%% +program : { + p->lstate = EXPR_BEG; + local_nest(p); + } + top_compstmt + { + p->tree = new_scope(p, $2); + local_unnest(p); + } + ; + +top_compstmt : top_stmts opt_terms + { + $$ = $1; + } + ; + +top_stmts : none + { + $$ = new_begin(p, 0); + } + | top_stmt + { + $$ = new_begin(p, $1); + } + | top_stmts terms top_stmt + { + $$ = push($1, newline_node($3)); + } + | error top_stmt + { + $$ = $2; + } + ; + +top_stmt : stmt + | keyword_BEGIN + { + if (p->in_def || p->in_single) { + yyerror(p, "BEGIN in method"); + } + $$ = local_switch(p); + } + '{' top_compstmt '}' + { + p->begin_tree = push(p->begin_tree, $4); + local_resume(p, $2); + $$ = 0; + } + ; + +bodystmt : compstmt + opt_rescue + opt_else + opt_ensure + { + if ($2) { + $$ = new_rescue(p, $1, $2, $3); + } + else if ($3) { + yywarn(p, "else without rescue is useless"); + $$ = append($$, $3); + } + else { + $$ = $1; + } + if ($4) { + if ($$) { + $$ = new_ensure(p, $$, $4); + } + else { + $$ = push($4, new_nil(p)); + } + } + } + ; + +compstmt : stmts opt_terms + { + $$ = $1; + } + ; + +stmts : none + { + $$ = new_begin(p, 0); + } + | stmt + { + $$ = new_begin(p, $1); + } + | stmts terms stmt + { + $$ = push($1, newline_node($3)); + } + | error stmt + { + $$ = new_begin(p, $2); + } + ; + +stmt : keyword_alias fsym {p->lstate = EXPR_FNAME;} fsym + { + $$ = new_alias(p, $2, $4); + } + | keyword_undef undef_list + { + $$ = $2; + } + | stmt modifier_if expr_value + { + $$ = new_if(p, cond($3), $1, 0); + } + | stmt modifier_unless expr_value + { + $$ = new_unless(p, cond($3), $1, 0); + } + | stmt modifier_while expr_value + { + $$ = new_while(p, cond($3), $1); + } + | stmt modifier_until expr_value + { + $$ = new_until(p, cond($3), $1); + } + | stmt modifier_rescue stmt + { + $$ = new_rescue(p, $1, list1(list3(0, 0, $3)), 0); + } + | keyword_END '{' compstmt '}' + { + if (p->in_def || p->in_single) { + yywarn(p, "END in method; use at_exit"); + } + $$ = new_postexe(p, $3); + } + | command_asgn + | mlhs '=' command_call + { + $$ = new_masgn(p, $1, list1($3)); + } + | var_lhs tOP_ASGN command_call + { + $$ = new_op_asgn(p, $1, $2, $3); + } + | primary_value '[' opt_call_args rbracket tOP_ASGN command_call + { + $$ = new_op_asgn(p, new_call(p, $1, intern("[]"), $3), $5, $6); + } + | primary_value '.' tIDENTIFIER tOP_ASGN command_call + { + $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5); + } + | primary_value '.' tCONSTANT tOP_ASGN command_call + { + $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5); + } + | primary_value tCOLON2 tCONSTANT tOP_ASGN command_call + { + yyerror(p, "constant re-assignment"); + $$ = 0; + } + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call + { + $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5); + } + | backref tOP_ASGN command_call + { + backref_error(p, $1); + $$ = new_begin(p, 0); + } + | lhs '=' mrhs + { + $$ = new_asgn(p, $1, new_array(p, $3)); + } + | mlhs '=' arg_value + { + $$ = new_masgn(p, $1, $3); + } + | mlhs '=' mrhs + { + $$ = new_masgn(p, $1, new_array(p, $3)); + } + | expr + ; + +command_asgn : lhs '=' command_call + { + $$ = new_asgn(p, $1, $3); + } + | lhs '=' command_asgn + { + $$ = new_asgn(p, $1, $3); + } + ; + + +expr : command_call + | expr keyword_and expr + { + $$ = new_and(p, $1, $3); + } + | expr keyword_or expr + { + $$ = new_or(p, $1, $3); + } + | keyword_not opt_nl expr + { + $$ = call_uni_op(p, cond($3), "!"); + } + | '!' command_call + { + $$ = call_uni_op(p, cond($2), "!"); + } + | arg + ; + +expr_value : expr + { + if (!$1) $$ = new_nil(p); + else $$ = $1; + } + ; + +command_call : command + | block_command + ; + +block_command : block_call + | block_call dot_or_colon operation2 command_args + ; + +cmd_brace_block : tLBRACE_ARG + { + local_nest(p); + } + opt_block_param + compstmt + '}' + { + $$ = new_block(p, $3, $4); + local_unnest(p); + } + ; + +command : operation command_args %prec tLOWEST + { + $$ = new_fcall(p, $1, $2); + } + | operation command_args cmd_brace_block + { + args_with_block(p, $2, $3); + $$ = new_fcall(p, $1, $2); + } + | primary_value '.' operation2 command_args %prec tLOWEST + { + $$ = new_call(p, $1, $3, $4); + } + | primary_value '.' operation2 command_args cmd_brace_block + { + args_with_block(p, $4, $5); + $$ = new_call(p, $1, $3, $4); + } + | primary_value tCOLON2 operation2 command_args %prec tLOWEST + { + $$ = new_call(p, $1, $3, $4); + } + | primary_value tCOLON2 operation2 command_args cmd_brace_block + { + args_with_block(p, $4, $5); + $$ = new_call(p, $1, $3, $4); + } + | keyword_super command_args + { + $$ = new_super(p, $2); + } + | keyword_yield command_args + { + $$ = new_yield(p, $2); + } + | keyword_return call_args + { + $$ = new_return(p, ret_args(p, $2)); + } + | keyword_break call_args + { + $$ = new_break(p, ret_args(p, $2)); + } + | keyword_next call_args + { + $$ = new_next(p, ret_args(p, $2)); + } + ; + +mlhs : mlhs_basic + { + $$ = $1; + } + | tLPAREN mlhs_inner rparen + { + $$ = $2; + } + ; + +mlhs_inner : mlhs_basic + | tLPAREN mlhs_inner rparen + { + $$ = list1($2); + } + ; + +mlhs_basic : mlhs_list + { + $$ = list1($1); + } + | mlhs_list mlhs_item + { + $$ = list1(push($1,$2)); + } + | mlhs_list tSTAR mlhs_node + { + $$ = list2($1, $3); + } + | mlhs_list tSTAR mlhs_node ',' mlhs_post + { + $$ = list3($1, $3, $5); + } + | mlhs_list tSTAR + { + $$ = list2($1, new_nil(p)); + } + | mlhs_list tSTAR ',' mlhs_post + { + $$ = list3($1, new_nil(p), $4); + } + | tSTAR mlhs_node + { + $$ = list2(0, $2); + } + | tSTAR mlhs_node ',' mlhs_post + { + $$ = list3(0, $2, $4); + } + | tSTAR + { + $$ = list2(0, new_nil(p)); + } + | tSTAR ',' mlhs_post + { + $$ = list3(0, new_nil(p), $3); + } + ; + +mlhs_item : mlhs_node + | tLPAREN mlhs_inner rparen + { + $$ = $2; + } + ; + +mlhs_list : mlhs_item ',' + { + $$ = list1($1); + } + | mlhs_list mlhs_item ',' + { + $$ = push($1, $2); + } + ; + +mlhs_post : mlhs_item + { + $$ = list1($1); + } + | mlhs_list mlhs_item + { + $$ = push($1, $2); + } + ; + +mlhs_node : variable + { + assignable(p, $1); + } + | primary_value '[' opt_call_args rbracket + { + $$ = new_call(p, $1, intern("[]"), $3); + } + | primary_value '.' tIDENTIFIER + { + $$ = new_call(p, $1, $3, 0); + } + | primary_value tCOLON2 tIDENTIFIER + { + $$ = new_call(p, $1, $3, 0); + } + | primary_value '.' tCONSTANT + { + $$ = new_call(p, $1, $3, 0); + } + | primary_value tCOLON2 tCONSTANT + { + if (p->in_def || p->in_single) + yyerror(p, "dynamic constant assignment"); + $$ = new_colon2(p, $1, $3); + } + | tCOLON3 tCONSTANT + { + if (p->in_def || p->in_single) + yyerror(p, "dynamic constant assignment"); + $$ = new_colon3(p, $2); + } + | backref + { + backref_error(p, $1); + $$ = 0; + } + ; + +lhs : variable + { + assignable(p, $1); + } + | primary_value '[' opt_call_args rbracket + { + $$ = new_call(p, $1, intern("[]"), $3); + } + | primary_value '.' tIDENTIFIER + { + $$ = new_call(p, $1, $3, 0); + } + | primary_value tCOLON2 tIDENTIFIER + { + $$ = new_call(p, $1, $3, 0); + } + | primary_value '.' tCONSTANT + { + $$ = new_call(p, $1, $3, 0); + } + | primary_value tCOLON2 tCONSTANT + { + if (p->in_def || p->in_single) + yyerror(p, "dynamic constant assignment"); + $$ = new_colon2(p, $1, $3); + } + | tCOLON3 tCONSTANT + { + if (p->in_def || p->in_single) + yyerror(p, "dynamic constant assignment"); + $$ = new_colon3(p, $2); + } + | backref + { + backref_error(p, $1); + $$ = 0; + } + ; + +cname : tIDENTIFIER + { + yyerror(p, "class/module name must be CONSTANT"); + } + | tCONSTANT + ; + +cpath : tCOLON3 cname + { + $$ = cons((node*)1, (node*)$2); + } + | cname + { + $$ = cons((node*)0, (node*)$1); + } + | primary_value tCOLON2 cname + { + $$ = cons($1, (node*)$3); + } + ; + +fname : tIDENTIFIER + | tCONSTANT + | tFID + | op + { + p->lstate = EXPR_ENDFN; + $$ = $1; + } + | reswords + { + p->lstate = EXPR_ENDFN; + $$ = $1; + } + ; + +fsym : fname + | symbol + ; + +fitem : fsym + { + $$ = new_sym(p, $1); + } + ; + +undef_list : fsym + { + $$ = new_undef(p, $1); + } + | undef_list ',' {p->lstate = EXPR_FNAME;} fitem + { + $$ = push($1, (node*)$4); + } + ; + +op : '|' { $$ = intern("|"); } + | '^' { $$ = intern("^"); } + | '&' { $$ = intern("&"); } + | tCMP { $$ = intern("<=>"); } + | tEQ { $$ = intern("=="); } + | tEQQ { $$ = intern("==="); } + | tMATCH { $$ = intern("=~"); } + | tNMATCH { $$ = intern("!~"); } + | '>' { $$ = intern(">"); } + | tGEQ { $$ = intern(">="); } + | '<' { $$ = intern("<"); } + | tLEQ { $$ = intern(">="); } + | tNEQ { $$ = intern("!="); } + | tLSHFT { $$ = intern("<<"); } + | tRSHFT { $$ = intern(">>"); } + | '+' { $$ = intern("+"); } + | '-' { $$ = intern("-"); } + | '*' { $$ = intern("*"); } + | tSTAR { $$ = intern("*"); } + | '/' { $$ = intern("/"); } + | '%' { $$ = intern("%"); } + | tPOW { $$ = intern("**"); } + | '!' { $$ = intern("!"); } + | '~' { $$ = intern("~"); } + | tUPLUS { $$ = intern("+@"); } + | tUMINUS { $$ = intern("-@"); } + | tAREF { $$ = intern("[]"); } + | tASET { $$ = intern("[]="); } + ; + +reswords : keyword__LINE__ | keyword__FILE__ | keyword__ENCODING__ + | keyword_BEGIN | keyword_END + | keyword_alias | keyword_and | keyword_begin + | keyword_break | keyword_case | keyword_class | keyword_def + | keyword_do | keyword_else | keyword_elsif + | keyword_end | keyword_ensure | keyword_false + | keyword_for | keyword_in | keyword_module | keyword_next + | keyword_nil | keyword_not | keyword_or | keyword_redo + | keyword_rescue | keyword_retry | keyword_return | keyword_self + | keyword_super | keyword_then | keyword_true | keyword_undef + | keyword_when | keyword_yield | keyword_if | keyword_unless + | keyword_while | keyword_until + ; + +arg : lhs '=' arg + { + $$ = new_asgn(p, $1, $3); + } + | lhs '=' arg modifier_rescue arg + { + $$ = new_asgn(p, $1, new_rescue(p, $3, list1(list3(0, 0, $5)), 0)); + } + | var_lhs tOP_ASGN arg + { + $$ = new_op_asgn(p, $1, $2, $3); + } + | var_lhs tOP_ASGN arg modifier_rescue arg + { + $$ = new_op_asgn(p, $1, $2, new_rescue(p, $3, list1(list3(0, 0, $5)), 0)); + } + | primary_value '[' opt_call_args rbracket tOP_ASGN arg + { + $$ = new_op_asgn(p, new_call(p, $1, intern("[]"), $3), $5, $6); + } + | primary_value '.' tIDENTIFIER tOP_ASGN arg + { + $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5); + } + | primary_value '.' tCONSTANT tOP_ASGN arg + { + $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5); + } + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg + { + $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5); + } + | primary_value tCOLON2 tCONSTANT tOP_ASGN arg + { + yyerror(p, "constant re-assignment"); + $$ = new_begin(p, 0); + } + | tCOLON3 tCONSTANT tOP_ASGN arg + { + yyerror(p, "constant re-assignment"); + $$ = new_begin(p, 0); + } + | backref tOP_ASGN arg + { + backref_error(p, $1); + $$ = new_begin(p, 0); + } + | arg tDOT2 arg + { + $$ = new_dot2(p, $1, $3); + } + | arg tDOT3 arg + { + $$ = new_dot3(p, $1, $3); + } + | arg '+' arg + { + $$ = call_bin_op(p, $1, "+", $3); + } + | arg '-' arg + { + $$ = call_bin_op(p, $1, "-", $3); + } + | arg '*' arg + { + $$ = call_bin_op(p, $1, "*", $3); + } + | arg '/' arg + { + $$ = call_bin_op(p, $1, "/", $3); + } + | arg '%' arg + { + $$ = call_bin_op(p, $1, "%", $3); + } + | arg tPOW arg + { + $$ = call_bin_op(p, $1, "**", $3); + } + | tUMINUS_NUM tINTEGER tPOW arg + { + $$ = call_uni_op(p, call_bin_op(p, $2, "**", $4), "-@"); + } + | tUMINUS_NUM tFLOAT tPOW arg + { + $$ = call_uni_op(p, call_bin_op(p, $2, "**", $4), "-@"); + } + | tUPLUS arg + { + $$ = call_uni_op(p, $2, "+@"); + } + | tUMINUS arg + { + $$ = call_uni_op(p, $2, "-@"); + } + | arg '|' arg + { + $$ = call_bin_op(p, $1, "|", $3); + } + | arg '^' arg + { + $$ = call_bin_op(p, $1, "^", $3); + } + | arg '&' arg + { + $$ = call_bin_op(p, $1, "&", $3); + } + | arg tCMP arg + { + $$ = call_bin_op(p, $1, "<=>", $3); + } + | arg '>' arg + { + $$ = call_bin_op(p, $1, ">", $3); + } + | arg tGEQ arg + { + $$ = call_bin_op(p, $1, ">=", $3); + } + | arg '<' arg + { + $$ = call_bin_op(p, $1, "<", $3); + } + | arg tLEQ arg + { + $$ = call_bin_op(p, $1, "<=", $3); + } + | arg tEQ arg + { + $$ = call_bin_op(p, $1, "==", $3); + } + | arg tEQQ arg + { + $$ = call_bin_op(p, $1, "===", $3); + } + | arg tNEQ arg + { + $$ = call_bin_op(p, $1, "!=", $3); + } + | arg tMATCH arg + { + $$ = match_op(p, $1, $3); +#if 0 + if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) { + $$ = reg_named_capture_assign($1->nd_lit, $$); + } +#endif + } + | arg tNMATCH arg + { + $$ = call_bin_op(p, $1, "!~", $3); + } + | '!' arg + { + $$ = call_uni_op(p, cond($2), "!"); + } + | '~' arg + { + $$ = call_uni_op(p, cond($2), "~"); + } + | arg tLSHFT arg + { + $$ = call_bin_op(p, $1, "<<", $3); + } + | arg tRSHFT arg + { + $$ = call_bin_op(p, $1, ">>", $3); + } + | arg tANDOP arg + { + $$ = new_and(p, $1, $3); + } + | arg tOROP arg + { + $$ = new_or(p, $1, $3); + } + | arg '?' arg opt_nl ':' arg + { + $$ = new_if(p, cond($1), $3, $6); + } + | primary + { + $$ = $1; + } + ; + +arg_value : arg + { + $$ = $1; + if (!$$) $$ = new_nil(p); + } + ; + +aref_args : none + | args trailer + { + $$ = $1; + } + | args ',' assocs trailer + { + $$ = push($1, new_hash(p, $3)); + } + | assocs trailer + { + $$ = new_hash(p, $1); + } + ; + +paren_args : '(' opt_call_args rparen + { + $$ = $2; + } + ; + +opt_paren_args : none + | paren_args + ; + +opt_call_args : none + | call_args + | args ',' + { + $$ = cons($1,0); + } + | args ',' assocs ',' + { + $$ = cons(push($1, new_hash(p, $3)), 0); + } + | assocs ',' + { + $$ = cons(list1(new_hash(p, $1)), 0); + } + ; + +call_args : command + { + $$ = cons(list1($1), 0); + } + | args opt_block_arg + { + $$ = cons($1, $2); + } + | assocs opt_block_arg + { + $$ = cons(list1(new_hash(p, $1)), $2); + } + | args ',' assocs opt_block_arg + { + $$ = cons(push($1, new_hash(p, $3)), $4); + } + | block_arg + { + $$ = cons(0, $1); + } + ; + +command_args : { + $$ = p->cmdarg_stack; + CMDARG_PUSH(1); + } + call_args + { + /* CMDARG_POP() */ + p->cmdarg_stack = $1; + $$ = $2; + } + ; + +block_arg : tAMPER arg_value + { + $$ = new_block_arg(p, $2); + } + ; + +opt_block_arg : ',' block_arg + { + $$ = $2; + } + | none + { + $$ = 0; + } + ; + +args : arg_value + { + $$ = cons($1, 0); + } + | tSTAR arg_value + { + $$ = cons(new_splat(p, $2), 0); + } + | args ',' arg_value + { + $$ = push($1, $3); + } + | args ',' tSTAR arg_value + { + $$ = push($1, new_splat(p, $4)); + } + ; + +mrhs : args ',' arg_value + { + $$ = push($1, $3); + } + | args ',' tSTAR arg_value + { + $$ = push($1, new_splat(p, $4)); + } + | tSTAR arg_value + { + $$ = list1(new_splat(p, $2)); + } + ; + +primary : literal + | string + | regexp + | var_ref + | backref + | tFID + { + $$ = new_fcall(p, $1, 0); + } + | keyword_begin + bodystmt + keyword_end + { + $$ = $2; + } + | tLPAREN_ARG expr {p->lstate = EXPR_ENDARG;} rparen + { + yywarning(p, "(...) interpreted as grouped expression"); + $$ = $2; + } + | tLPAREN compstmt ')' + { + $$ = $2; + } + | primary_value tCOLON2 tCONSTANT + { + $$ = new_colon2(p, $1, $3); + } + | tCOLON3 tCONSTANT + { + $$ = new_colon3(p, $2); + } + | tLBRACK aref_args ']' + { + $$ = new_array(p, $2); + } + | tLBRACE assoc_list '}' + { + $$ = new_hash(p, $2); + } + | keyword_return + { + $$ = new_return(p, 0); + } + | keyword_yield '(' call_args rparen + { + $$ = new_yield(p, $3); + } + | keyword_yield '(' rparen + { + $$ = new_yield(p, 0); + } + | keyword_yield + { + $$ = new_yield(p, 0); + } + | keyword_not '(' expr rparen + { + $$ = call_uni_op(p, cond($3), "!"); + } + | keyword_not '(' rparen + { + $$ = call_uni_op(p, new_nil(p), "!"); + } + | operation brace_block + { + $$ = new_fcall(p, $1, cons(0, $2)); + } + | method_call + | method_call brace_block + { + call_with_block(p, $1, $2); + $$ = $1; + } + | tLAMBDA + { + local_nest(p); + $$ = p->lpar_beg; + p->lpar_beg = ++p->paren_nest; + } + f_larglist + lambda_body + { + p->lpar_beg = $2; + $$ = new_lambda(p, $3, $4); + local_unnest(p); + } + | keyword_if expr_value then + compstmt + if_tail + keyword_end + { + $$ = new_if(p, cond($2), $4, $5); + } + | keyword_unless expr_value then + compstmt + opt_else + keyword_end + { + $$ = new_unless(p, cond($2), $4, $5); + } + | keyword_while {COND_PUSH(1);} expr_value do {COND_POP();} + compstmt + keyword_end + { + $$ = new_while(p, cond($3), $6); + } + | keyword_until {COND_PUSH(1);} expr_value do {COND_POP();} + compstmt + keyword_end + { + $$ = new_until(p, cond($3), $6); + } + | keyword_case expr_value opt_terms + case_body + keyword_end + { + $$ = new_case(p, $2, $4); + } + | keyword_case opt_terms case_body keyword_end + { + $$ = new_case(p, 0, $3); + } + | keyword_for for_var keyword_in + {COND_PUSH(1);} + expr_value do + {COND_POP();} + compstmt + keyword_end + { + $$ = new_for(p, $2, $5, $8); + } + | keyword_class cpath superclass + { + if (p->in_def || p->in_single) + yyerror(p, "class definition in method body"); + $$ = local_switch(p); + } + bodystmt + keyword_end + { + $$ = new_class(p, $2, $3, $5); + local_resume(p, $4); + } + | keyword_class tLSHFT expr + { + $$ = p->in_def; + p->in_def = 0; + } + term + { + $$ = cons(local_switch(p), (node*)p->in_single); + p->in_single = 0; + } + bodystmt + keyword_end + { + $$ = new_sclass(p, $3, $7); + local_resume(p, $6->car); + p->in_def = $4; + p->in_single = (int)$6->cdr; + } + | keyword_module cpath + { + if (p->in_def || p->in_single) + yyerror(p, "module definition in method body"); + $$ = local_switch(p); + } + bodystmt + keyword_end + { + $$ = new_module(p, $2, $4); + local_resume(p, $3); + } + | keyword_def fname + { + p->in_def++; + $$ = local_switch(p); + } + f_arglist + bodystmt + keyword_end + { + $$ = new_def(p, $2, $4, $5); + local_resume(p, $3); + p->in_def--; + } + | keyword_def singleton dot_or_colon {p->lstate = EXPR_FNAME;} fname + { + p->in_single++; + p->lstate = EXPR_ENDFN; /* force for args */ + $$ = local_switch(p); + } + f_arglist + bodystmt + keyword_end + { + $$ = new_sdef(p, $2, $5, $7, $8); + local_resume(p, $6); + p->in_single--; + } + | keyword_break + { + $$ = new_break(p, 0); + } + | keyword_next + { + $$ = new_next(p, 0); + } + | keyword_redo + { + $$ = new_redo(p); + } + | keyword_retry + { + $$ = new_retry(p); + } + ; + +primary_value : primary + { + $$ = $1; + if (!$$) $$ = new_nil(p); + } + ; + +then : term + | keyword_then + | term keyword_then + ; + +do : term + | keyword_do_cond + ; + +if_tail : opt_else + | keyword_elsif expr_value then + compstmt + if_tail + { + $$ = new_if(p, cond($2), $4, $5); + } + ; + +opt_else : none + | keyword_else compstmt + { + $$ = $2; + } + ; + +for_var : lhs + { + $$ = list1(list1($1)); + } + | mlhs + ; + +f_marg : f_norm_arg + { + $$ = new_arg(p, $1); + } + | tLPAREN f_margs rparen + { + $$ = new_masgn(p, $2, 0); + } + ; + +f_marg_list : f_marg + { + $$ = list1($1); + } + | f_marg_list ',' f_marg + { + $$ = push($1, $3); + } + ; + +f_margs : f_marg_list + { + $$ = list3($1,0,0); + } + | f_marg_list ',' tSTAR f_norm_arg + { + $$ = list3($1, new_arg(p, $4), 0); + } + | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list + { + $$ = list3($1, new_arg(p, $4), $6); + } + | f_marg_list ',' tSTAR + { + $$ = list3($1, (node*)-1, 0); + } + | f_marg_list ',' tSTAR ',' f_marg_list + { + $$ = list3($1, (node*)-1, $5); + } + | tSTAR f_norm_arg + { + $$ = list3(0, new_arg(p, $2), 0); + } + | tSTAR f_norm_arg ',' f_marg_list + { + $$ = list3(0, new_arg(p, $2), $4); + } + | tSTAR + { + $$ = list3(0, (node*)-1, 0); + } + | tSTAR ',' f_marg_list + { + $$ = list3(0, (node*)-1, $3); + } + ; + +block_param : f_arg ',' f_block_optarg ',' f_rest_arg opt_f_block_arg + { + $$ = new_args(p, $1, $3, $5, 0, $6); + } + | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, $1, $3, $5, $7, $8); + } + | f_arg ',' f_block_optarg opt_f_block_arg + { + $$ = new_args(p, $1, $3, 0, 0, $4); + } + | f_arg ',' f_block_optarg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, $1, $3, 0, $5, $6); + } + | f_arg ',' f_rest_arg opt_f_block_arg + { + $$ = new_args(p, $1, 0, $3, 0, $4); + } + | f_arg ',' + { + $$ = new_args(p, $1, 0, 1, 0, 0); + } + | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, $1, 0, $3, $5, $6); + } + | f_arg opt_f_block_arg + { + $$ = new_args(p, $1, 0, 0, 0, $2); + } + | f_block_optarg ',' f_rest_arg opt_f_block_arg + { + $$ = new_args(p, 0, $1, $3, 0, $4); + } + | f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, 0, $1, $3, $5, $6); + } + | f_block_optarg opt_f_block_arg + { + $$ = new_args(p, 0, $1, 0, 0, $2); + } + | f_block_optarg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, 0, $1, 0, $3, $4); + } + | f_rest_arg opt_f_block_arg + { + $$ = new_args(p, 0, 0, $1, 0, $2); + } + | f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, 0, 0, $1, $3, $4); + } + | f_block_arg + { + $$ = new_args(p, 0, 0, 0, 0, $1); + } + ; + +opt_block_param : none + | block_param_def + { + p->cmd_start = TRUE; + $$ = $1; + } + ; + +block_param_def : '|' opt_bv_decl '|' + { + local_add_f(p, 0); + $$ = 0; + } + | tOROP + { + local_add_f(p, 0); + $$ = 0; + } + | '|' block_param opt_bv_decl '|' + { + $$ = $2; + } + ; + + +opt_bv_decl : opt_nl + { + $$ = 0; + } + | opt_nl ';' bv_decls opt_nl + { + $$ = 0; + } + ; + +bv_decls : bvar + | bv_decls ',' bvar + ; + +bvar : tIDENTIFIER + { + local_add_f(p, $1); + new_bv(p, $1); + } + | f_bad_arg + ; + +f_larglist : '(' f_args opt_bv_decl ')' + { + $$ = $2; + } + | f_args + { + $$ = $1; + } + ; + +lambda_body : tLAMBEG compstmt '}' + { + $$ = $2; + } + | keyword_do_LAMBDA compstmt keyword_end + { + $$ = $2; + } + ; + +do_block : keyword_do_block + { + local_nest(p); + } + opt_block_param + compstmt + keyword_end + { + $$ = new_block(p,$3,$4); + local_unnest(p); + } + ; + +block_call : command do_block + { + if ($1->car == (node*)NODE_YIELD) { + yyerror(p, "block given to yield"); + } + else { + call_with_block(p, $1, $2); + } + $$ = $1; + } + | block_call dot_or_colon operation2 opt_paren_args + { + $$ = new_call(p, $1, $3, $4); + } + | block_call dot_or_colon operation2 opt_paren_args brace_block + { + $$ = new_call(p, $1, $3, $4); + call_with_block(p, $$, $5); + } + | block_call dot_or_colon operation2 command_args do_block + { + $$ = new_call(p, $1, $3, $4); + call_with_block(p, $$, $5); + } + ; + +method_call : operation paren_args + { + $$ = new_fcall(p, $1, $2); + } + | primary_value '.' operation2 opt_paren_args + { + $$ = new_call(p, $1, $3, $4); + } + | primary_value tCOLON2 operation2 paren_args + { + $$ = new_call(p, $1, $3, $4); + } + | primary_value tCOLON2 operation3 + { + $$ = new_call(p, $1, $3, 0); + } + | primary_value '.' paren_args + { + $$ = new_call(p, $1, intern("call"), $3); + } + | primary_value tCOLON2 paren_args + { + $$ = new_call(p, $1, intern("call"), $3); + } + | keyword_super paren_args + { + $$ = new_super(p, $2); + } + | keyword_super + { + $$ = new_zsuper(p); + } + | primary_value '[' opt_call_args rbracket + { + $$ = new_call(p, $1, intern("[]"), $3); + } + ; + +brace_block : '{' + { + local_nest(p); + } + opt_block_param + compstmt '}' + { + $$ = new_block(p,$3,$4); + local_unnest(p); + } + | keyword_do + { + local_nest(p); + } + opt_block_param + compstmt keyword_end + { + $$ = new_block(p,$3,$4); + local_unnest(p); + } + ; + +case_body : keyword_when args then + compstmt + cases + { + $$ = cons(cons($2, $4), $5); + } + ; + +cases : opt_else + { + if ($1) { + $$ = cons(cons(0, $1), 0); + } + else { + $$ = 0; + } + } + | case_body + ; + +opt_rescue : keyword_rescue exc_list exc_var then + compstmt + opt_rescue + { + $$ = list1(list3($2, $3, $5)); + if ($6) $$ = append($$, $6); + } + | none + ; + +exc_list : arg_value + { + $$ = list1($1); + } + | mrhs + | none + ; + +exc_var : tASSOC lhs + { + $$ = $2; + } + | none + ; + +opt_ensure : keyword_ensure compstmt + { + $$ = $2; + } + | none + ; + +literal : numeric + | symbol + { + $$ = new_sym(p, $1); + } + ; + +string : tCHAR + | tSTRING + | tSTRING_BEG tSTRING + { + $$ = $2; + } + | tSTRING_BEG string_interp tSTRING + { + $$ = new_dstr(p, push($2, $3)); + } + ; + +string_interp : tSTRING_PART + { + $$ = p->sterm; + p->sterm = 0; + } + compstmt + '}' + { + p->sterm = $2; + $$ = list2($1, $3); + } + | string_interp + tSTRING_PART + { + $$ = p->sterm; + p->sterm = 0; + } + compstmt + '}' + { + p->sterm = $3; + $$ = push(push($1, $2), $4); + } + ; + +regexp : tREGEXP + ; + +symbol : tSYMBEG sym + { + p->lstate = EXPR_END; + $$ = $2; + } + ; + +sym : fname + | tIVAR + | tGVAR + | tCVAR + ; + +numeric : tINTEGER + | tFLOAT + | tUMINUS_NUM tINTEGER %prec tLOWEST + { + $$ = negate_lit(p, $2); + } + | tUMINUS_NUM tFLOAT %prec tLOWEST + { + $$ = negate_lit(p, $2); + } + ; + +variable : tIDENTIFIER + { + $$ = new_lvar(p, $1); + } + | tIVAR + { + $$ = new_ivar(p, $1); + } + | tGVAR + { + $$ = new_gvar(p, $1); + } + | tCVAR + { + $$ = new_cvar(p, $1); + } + | tCONSTANT + { + $$ = new_const(p, $1); + } + ; + +var_lhs : variable + { + assignable(p, $1); + } + ; + +var_ref : variable + { + $$ = var_reference(p, $1); + } + | keyword_nil + { + $$ = new_nil(p); + } + | keyword_self + { + $$ = new_self(p); + } + | keyword_true + { + $$ = new_true(p); + } + | keyword_false + { + $$ = new_false(p); + } + | keyword__FILE__ + { + if (!p->filename) { + p->filename = "(null)"; + } + $$ = new_str(p, p->filename, strlen(p->filename)); + } + | keyword__LINE__ + { + char buf[16]; + + snprintf(buf, 16, "%d", p->lineno); + $$ = new_int(p, buf, 10); + } + ; + +backref : tNTH_REF + | tBACK_REF + ; + +superclass : term + { + $$ = 0; + } + | '<' + { + p->lstate = EXPR_BEG; + } + expr_value term + { + $$ = $3; + } + | error term + { + yyerrok; + $$ = 0; + } + ; + +f_arglist : '(' f_args rparen + { + $$ = $2; + p->lstate = EXPR_BEG; + p->cmd_start = TRUE; + } + | f_args term + { + $$ = $1; + } + ; + +f_args : f_arg ',' f_optarg ',' f_rest_arg opt_f_block_arg + { + $$ = new_args(p, $1, $3, $5, 0, $6); + } + | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, $1, $3, $5, $7, $8); + } + | f_arg ',' f_optarg opt_f_block_arg + { + $$ = new_args(p, $1, $3, 0, 0, $4); + } + | f_arg ',' f_optarg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, $1, $3, 0, $5, $6); + } + | f_arg ',' f_rest_arg opt_f_block_arg + { + $$ = new_args(p, $1, 0, $3, 0, $4); + } + | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, $1, 0, $3, $5, $6); + } + | f_arg opt_f_block_arg + { + $$ = new_args(p, $1, 0, 0, 0, $2); + } + | f_optarg ',' f_rest_arg opt_f_block_arg + { + $$ = new_args(p, 0, $1, $3, 0, $4); + } + | f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, 0, $1, $3, $5, $6); + } + | f_optarg opt_f_block_arg + { + $$ = new_args(p, 0, $1, 0, 0, $2); + } + | f_optarg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, 0, $1, 0, $3, $4); + } + | f_rest_arg opt_f_block_arg + { + $$ = new_args(p, 0, 0, $1, 0, $2); + } + | f_rest_arg ',' f_arg opt_f_block_arg + { + $$ = new_args(p, 0, 0, $1, $3, $4); + } + | f_block_arg + { + $$ = new_args(p, 0, 0, 0, 0, $1); + } + | /* none */ + { + local_add_f(p, 0); + $$ = new_args(p, 0, 0, 0, 0, 0); + } + ; + +f_bad_arg : tCONSTANT + { + yyerror(p, "formal argument cannot be a constant"); + $$ = 0; + } + | tIVAR + { + yyerror(p, "formal argument cannot be an instance variable"); + $$ = 0; + } + | tGVAR + { + yyerror(p, "formal argument cannot be a global variable"); + $$ = 0; + } + | tCVAR + { + yyerror(p, "formal argument cannot be a class variable"); + $$ = 0; + } + ; + +f_norm_arg : f_bad_arg + { + $$ = 0; + } + | tIDENTIFIER + { + local_add_f(p, $1); + $$ = $1; + } + ; + +f_arg_item : f_norm_arg + { + $$ = new_arg(p, $1); + } + | tLPAREN f_margs rparen + { + $$ = new_masgn(p, $2, 0); + } + ; + +f_arg : f_arg_item + { + $$ = list1($1); + } + | f_arg ',' f_arg_item + { + $$ = push($1, $3); + } + ; + +f_opt : tIDENTIFIER '=' arg_value + { + local_add_f(p, $1); + $$ = cons((node*)$1, $3); + } + ; + +f_block_opt : tIDENTIFIER '=' primary_value + { + local_add_f(p, $1); + $$ = cons((node*)$1, $3); + } + ; + +f_block_optarg : f_block_opt + { + $$ = list1($1); + } + | f_block_optarg ',' f_block_opt + { + $$ = push($1, $3); + } + ; + +f_optarg : f_opt + { + $$ = list1($1); + } + | f_optarg ',' f_opt + { + $$ = push($1, $3); + } + ; + +restarg_mark : '*' + | tSTAR + ; + +f_rest_arg : restarg_mark tIDENTIFIER + { + local_add_f(p, $2); + $$ = $2; + } + | restarg_mark + { + $$ = 0; + } + ; + +blkarg_mark : '&' + | tAMPER + ; + +f_block_arg : blkarg_mark tIDENTIFIER + { + local_add_f(p, $2); + $$ = $2; + } + ; + +opt_f_block_arg : ',' f_block_arg + { + $$ = $2; + } + | none + { + local_add_f(p, 0); + $$ = 0; + } + ; + +singleton : var_ref + { + $$ = $1; + if (!$$) $$ = new_nil(p); + } + | '(' {p->lstate = EXPR_BEG;} expr rparen + { + if ($3 == 0) { + yyerror(p, "can't define singleton method for ()."); + } + else { + switch ((enum node_type)$3->car) { + case NODE_STR: + case NODE_DSTR: + case NODE_DREGX: + case NODE_MATCH: + case NODE_FLOAT: + case NODE_ARRAY: + yyerror(p, "can't define singleton method for literals"); + default: + break; + } + } + $$ = $3; + } + ; + +assoc_list : none + | assocs trailer + { + $$ = $1; + } + ; + +assocs : assoc + { + $$ = list1($1); + } + | assocs ',' assoc + { + $$ = push($1, $3); + } + ; + +assoc : arg_value tASSOC arg_value + { + $$ = cons($1, $3); + } + | tLABEL arg_value + { + $$ = cons(new_sym(p, $1), $2); + } + ; + +operation : tIDENTIFIER + | tCONSTANT + | tFID + ; + +operation2 : tIDENTIFIER + | tCONSTANT + | tFID + | op + ; + +operation3 : tIDENTIFIER + | tFID + | op + ; + +dot_or_colon : '.' + | tCOLON2 + ; + +opt_terms : /* none */ + | terms + ; + +opt_nl : /* none */ + | '\n' + ; + +rparen : opt_nl ')' + ; + +rbracket : opt_nl ']' + ; + +trailer : /* none */ + | '\n' + | ',' + ; + +term : ';' {yyerrok;} + | '\n' + ; + +terms : term + | terms ';' {yyerrok;} + ; + +none : /* none */ + { + $$ = 0; + } + ; +%% +#define yylval (*((YYSTYPE*)(p->ylval))) + +static void +yyerror(parser_state *p, const char *s) +{ + fputs(s, stderr); + fputs("\n", stderr); + p->nerr++; +} + +static void +yyerror_i(parser_state *p, const char *fmt, int i) +{ + char buf[256]; + + snprintf(buf, 256, fmt, i); + yyerror(p, buf); +} + +static void +yywarn(parser_state *p, const char *s) +{ + fputs(s, stderr); + fputs("\n", stderr); +} + +static void +yywarning(parser_state *p, const char *s) +{ + fputs(s, stderr); + fputs("\n", stderr); +} + +static void +yywarning_s(parser_state *p, const char *fmt, const char *s) +{ + char buf[256]; + + snprintf(buf, 256, fmt, s); + yywarning(p, buf); +} + +static void +backref_error(parser_state *p, node *n) +{ + switch ((int)n->car) { + case NODE_NTH_REF: + yyerror_i(p, "can't set variable $%d", (int)n->cdr); + break; + case NODE_BACK_REF: + yyerror_i(p, "can't set variable $%c", (int)n->cdr); + break; + } +} + +static int peeks(parser_state *p, const char *s); +static int skips(parser_state *p, const char *s); + +static inline int +nextc(parser_state *p) +{ + int c; + + if (p->pb) { + node *tmp; + + c = (int)p->pb->car; + tmp = p->pb; + p->pb = p->pb->cdr; + cons_free(tmp); + } + else if (p->f) { + if (feof(p->f)) return -1; + c = fgetc(p->f); + if (c == EOF) return -1; + } + else if (!p->s || p->s >= p->send) { + return -1; + } + else { + c = *p->s++; + } + if (c == '\n') { + p->lineno++; + p->column = 0; + // must understand heredoc + } + else { + p->column++; + } + return c; +} + +static void +pushback(parser_state *p, int c) +{ + if (c < 0) return; + p->column--; + p->pb = cons((node*)c, p->pb); +} + +static void +skip(parser_state *p, char term) +{ + int c; + + while ((c = nextc(p)) != term) + ; +} + +static int +peek_n(parser_state *p, int c, int n) +{ + node *list = 0; + int c0; + + n++; /* must read 1 char */ + while (n--) { + c0 = nextc(p); + if (c0 < 0) return FALSE; + list = push(list, (node*)c0); + } + if (p->pb) { + p->pb = push(p->pb, (node*)list); + } + else { + p->pb = list; + } + if (c0 == c) return TRUE; + return FALSE; +} +#define peek(p,c) peek_n((p), (c), 0) + +static int +peeks(parser_state *p, const char *s) +{ + int len = strlen(s); + + if (p->f) { + int n = 0; + while (*s) { + if (!peek_n(p, *s++, n++)) return FALSE; + } + return TRUE; + } + else if (p->s && p->s + len >= p->send) { + if (memcmp(p->s, s, len) == 0) return TRUE; + } + return FALSE; +} + +static int +skips(parser_state *p, const char *s) +{ + int c; + + for (;;) { + // skip until first char + for (;;) { + c = nextc(p); + if (c < 0) return c; + if (c == *s) break; + } + s++; + if (peeks(p, s)) { + int len = strlen(s); + + while (len--) { + nextc(p); + } + return TRUE; + } + } + return FALSE; +} + +#define STR_FUNC_ESCAPE 0x01 +#define STR_FUNC_EXPAND 0x02 +#define STR_FUNC_REGEXP 0x04 +#define STR_FUNC_QWORDS 0x08 +#define STR_FUNC_SYMBOL 0x10 +#define STR_FUNC_INDENT 0x20 + +enum string_type { + str_squote = (0), + str_dquote = (STR_FUNC_EXPAND), + str_xquote = (STR_FUNC_EXPAND), + str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), + str_sword = (STR_FUNC_QWORDS), + str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND), + str_ssym = (STR_FUNC_SYMBOL), + str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND) +}; + +static void +newtok(parser_state *p) +{ + p->bidx = 0; +} + +static void +tokadd(parser_state *p, int c) +{ + if (p->bidx < 1024) { + p->buf[p->bidx++] = c; + } +} + +static int +toklast(parser_state *p) +{ + return p->buf[p->bidx-1]; +} + +static void +tokfix(parser_state *p) +{ + if (p->bidx >= 1024) { + yyerror(p, "string too long (truncated)"); + } + p->buf[p->bidx] = '\0'; +} + +static const char* +tok(parser_state *p) +{ + return p->buf; +} + +static int +toklen(parser_state *p) +{ + return p->bidx; +} + +#define IS_ARG() (p->lstate == EXPR_ARG || p->lstate == EXPR_CMDARG) +#define IS_END() (p->lstate == EXPR_END || p->lstate == EXPR_ENDARG || p->lstate == EXPR_ENDFN) +#define IS_BEG() (p->lstate == EXPR_BEG || p->lstate == EXPR_MID || p->lstate == EXPR_VALUE || p->lstate == EXPR_CLASS) +#define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c)) +#define IS_LABEL_POSSIBLE() ((p->lstate == EXPR_BEG && !cmd_state) || IS_ARG()) +#define IS_LABEL_SUFFIX(n) (peek_n(p, ':',(n)) && !peek_n(p, ':', (n)+1)) + +static unsigned long +scan_oct(const char *start, int len, int *retlen) +{ + const char *s = start; + unsigned long retval = 0; + + while (len-- && *s >= '0' && *s <= '7') { + retval <<= 3; + retval |= *s++ - '0'; + } + *retlen = s - start; + return retval; +} + +static unsigned long +scan_hex(const char *start, int len, int *retlen) +{ + static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF"; + register const char *s = start; + register unsigned long retval = 0; + char *tmp; + + while (len-- && *s && (tmp = strchr(hexdigit, *s))) { + retval <<= 4; + retval |= (tmp - hexdigit) & 15; + s++; + } + *retlen = s - start; + return retval; +} + +static int +read_escape(parser_state *p) +{ + int c; + + switch (c = nextc(p)) { + case '\\': /* Backslash */ + return c; + + case 'n': /* newline */ + return '\n'; + + case 't': /* horizontal tab */ + return '\t'; + + case 'r': /* carriage-return */ + return '\r'; + + case 'f': /* form-feed */ + return '\f'; + + case 'v': /* vertical tab */ + return '\13'; + + case 'a': /* alarm(bell) */ + return '\007'; + + case 'e': /* escape */ + return 033; + + case '0': case '1': case '2': case '3': /* octal constant */ + case '4': case '5': case '6': case '7': + { + char buf[3]; + int i; + + for (i=0; i<3; i++) { + buf[i] = nextc(p); + if (buf[i] == -1) goto eof; + if (buf[i] < '0' || '7' < buf[i]) { + pushback(p, buf[i]); + break; + } + } + c = scan_oct(buf, i+1, &i); + } + return c; + + case 'x': /* hex constant */ + { + char buf[2]; + int i; + + for (i=0; i<2; i++) { + buf[i] = nextc(p); + if (buf[i] == -1) goto eof; + if (!isxdigit(buf[i])) { + pushback(p, buf[i]); + break; + } + } + c = scan_hex(buf, i+1, &i); + if (i == 0) { + yyerror(p, "Invalid escape character syntax"); + return 0; + } + } + return c; + + case 'b': /* backspace */ + return '\010'; + + case 's': /* space */ + return ' '; + + case 'M': + if ((c = nextc(p)) != '-') { + yyerror(p, "Invalid escape character syntax"); + pushback(p, c); + return '\0'; + } + if ((c = nextc(p)) == '\\') { + return read_escape(p) | 0x80; + } + else if (c == -1) goto eof; + else { + return ((c & 0xff) | 0x80); + } + + case 'C': + if ((c = nextc(p)) != '-') { + yyerror(p, "Invalid escape character syntax"); + pushback(p, c); + return '\0'; + } + case 'c': + if ((c = nextc(p))== '\\') { + c = read_escape(p); + } + else if (c == '?') + return 0177; + else if (c == -1) goto eof; + return c & 0x9f; + + eof: + case -1: + yyerror(p, "Invalid escape character syntax"); + return '\0'; + + default: + return c; + } +} + +static int +parse_string(parser_state *p, int term) +{ + int c; + + newtok(p); + + while ((c = nextc(p)) != term) { + if (c == -1) { + yyerror(p, "unterminated string meets end of file"); + return 0; + } + else if (c == '\\') { + c = nextc(p); + if (c == term) { + tokadd(p, c); + } + else { + pushback(p, c); + tokadd(p, read_escape(p)); + } + continue; + } + if (c == '#') { + c = nextc(p); + if (c == '{') { + tokfix(p); + p->lstate = EXPR_END; + p->sterm = term; + yylval.node = new_str(p, tok(p), toklen(p)); + return tSTRING_PART; + } + tokadd(p, '#'); + pushback(p, c); + continue; + } + tokadd(p, c); + } + + tokfix(p); + p->lstate = EXPR_END; + p->sterm = 0; + yylval.node = new_str(p, tok(p), toklen(p)); + return tSTRING; +} + +static int +parse_qstring(parser_state *p, int term) +{ + int c; + + newtok(p); + while ((c = nextc(p)) != term) { + if (c == -1) { + yyerror(p, "unterminated string meets end of file"); + return 0; + } + if (c == '\\') { + c = nextc(p); + switch (c) { + case '\n': + continue; + + case '\\': + c = '\\'; + break; + + case '\'': + if (term == '\'') { + c = '\''; + break; + } + /* fall through */ + default: + tokadd(p, '\\'); + } + } + tokadd(p, c); + } + + tokfix(p); + yylval.node = new_str(p, tok(p), toklen(p)); + p->lstate = EXPR_END; + return tSTRING; +} + +static int +arg_ambiguous(parser_state *p) +{ + yywarning(p, "ambiguous first argument; put parentheses or even spaces"); + return 1; +} + +#include "lex.def" + +static int +parser_yylex(parser_state *p) +{ + register int c; + int space_seen = 0; + int cmd_state; + enum mrb_lex_state_enum last_state; + + if (p->sterm) { + return parse_string(p, p->sterm); + } + cmd_state = p->cmd_start; + p->cmd_start = FALSE; + retry: + last_state = p->lstate; + switch (c = nextc(p)) { + case '\0': /* NUL */ + case '\004': /* ^D */ + case '\032': /* ^Z */ + case -1: /* end of script. */ + return 0; + + /* white spaces */ + case ' ': case '\t': case '\f': case '\r': + case '\13': /* '\v' */ + space_seen = 1; + goto retry; + + case '#': /* it's a comment */ + skip(p, '\n'); + /* fall through */ + case '\n': + switch (p->lstate) { + case EXPR_BEG: + case EXPR_FNAME: + case EXPR_DOT: + case EXPR_CLASS: + case EXPR_VALUE: + goto retry; + default: + break; + } + while ((c = nextc(p))) { + switch (c) { + case ' ': case '\t': case '\f': case '\r': + case '\13': /* '\v' */ + space_seen = 1; + break; + case '.': + if ((c = nextc(p)) != '.') { + pushback(p, c); + pushback(p, '.'); + goto retry; + } + case -1: /* EOF */ + goto normal_newline; + default: + pushback(p, c); + goto normal_newline; + } + } + normal_newline: + p->lstate = EXPR_BEG; + return '\n'; + + case '*': + if ((c = nextc(p)) == '*') { + if ((c = nextc(p)) == '=') { + yylval.id = intern("**"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + c = tPOW; + } + else { + if (c == '=') { + yylval.id = intern("*"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + if (IS_SPCARG(c)) { + yywarning(p, "`*' interpreted as argument prefix"); + c = tSTAR; + } + else if (IS_BEG()) { + c = tSTAR; + } + else { + c = '*'; + } + } + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; break; + } + return c; + + case '!': + c = nextc(p); + if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { + p->lstate = EXPR_ARG; + if (c == '@') { + return '!'; + } + } + else { + p->lstate = EXPR_BEG; + } + if (c == '=') { + return tNEQ; + } + if (c == '~') { + return tNMATCH; + } + pushback(p, c); + return '!'; + + case '=': + if (p->column == 1) { + if (peeks(p, "begin\n")) { + skips(p, "\n=end\n"); + } + goto retry; + } + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; break; + } + if ((c = nextc(p)) == '=') { + if ((c = nextc(p)) == '=') { + return tEQQ; + } + pushback(p, c); + return tEQ; + } + if (c == '~') { + return tMATCH; + } + else if (c == '>') { + return tASSOC; + } + pushback(p, c); + return '='; + + case '<': + last_state = p->lstate; + c = nextc(p); +#if 0 + // no heredoc supported yet + if (c == '<' && + p->lstate != EXPR_DOT && + p->lstate != EXPR_CLASS && + !IS_END() && + (!IS_ARG() || space_seen)) { + int token = heredoc_identifier(); + if (token) return token; + } +#endif + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; break; + } + if (c == '=') { + if ((c = nextc(p)) == '>') { + return tCMP; + } + pushback(p, c); + return tLEQ; + } + if (c == '<') { + if ((c = nextc(p)) == '=') { + yylval.id = intern("<<"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + return tLSHFT; + } + pushback(p, c); + return '<'; + + case '>': + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; break; + } + if ((c = nextc(p)) == '=') { + return tGEQ; + } + if (c == '>') { + if ((c = nextc(p)) == '=') { + yylval.id = intern(">>"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + return tRSHFT; + } + pushback(p, c); + return '>'; + + case '"': + p->sterm = '"'; + return tSTRING_BEG; + + case '\'': + return parse_qstring(p, c); + + case '?': + if (IS_END()) { + p->lstate = EXPR_VALUE; + return '?'; + } + c = nextc(p); + if (c == -1) { + yyerror(p, "incomplete character syntax"); + return 0; + } + if (isspace(c)) { + if (!IS_ARG()) { + int c2 = 0; + switch (c) { + case ' ': + c2 = 's'; + break; + case '\n': + c2 = 'n'; + break; + case '\t': + c2 = 't'; + break; + case '\v': + c2 = 'v'; + break; + case '\r': + c2 = 'r'; + break; + case '\f': + c2 = 'f'; + break; + } + if (c2) { + char buf[256]; + snprintf(buf, 256, "invalid character syntax; use ?\\%c", c2); + yyerror(p, buf); + } + } + ternary: + pushback(p, c); + p->lstate = EXPR_VALUE; + return '?'; + } + newtok(p); + // need support UTF-8 if configured + if ((isalnum(c) || c == '_')) { + int c2 = nextc(p); + pushback(p, c2); + if ((isalnum(c2) || c2 == '_')) { + goto ternary; + } + } + if (c == '\\') { + c = nextc(p); + if (c == 'u') { +#if 0 + tokadd_utf8(p); +#endif + } + else { + pushback(p, c); + c = read_escape(p); + tokadd(p, c); + } + } + else { + tokadd(p, c); + } + tokfix(p); + yylval.node = new_str(p, tok(p), toklen(p)); + p->lstate = EXPR_END; + return tCHAR; + + case '&': + if ((c = nextc(p)) == '&') { + p->lstate = EXPR_BEG; + if ((c = nextc(p)) == '=') { + yylval.id = intern("&&"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + return tANDOP; + } + else if (c == '=') { + yylval.id = intern("&"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + if (IS_SPCARG(c)) { + yywarning(p, "`&' interpreted as argument prefix"); + c = tAMPER; + } + else if (IS_BEG()) { + c = tAMPER; + } + else { + c = '&'; + } + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; + } + return c; + + case '|': + if ((c = nextc(p)) == '|') { + p->lstate = EXPR_BEG; + if ((c = nextc(p)) == '=') { + yylval.id = intern("||"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + return tOROP; + } + if (c == '=') { + yylval.id = intern("|"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { + p->lstate = EXPR_ARG; + } + else { + p->lstate = EXPR_BEG; + } + pushback(p, c); + return '|'; + + case '+': + c = nextc(p); + if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { + p->lstate = EXPR_ARG; + if (c == '@') { + return tUPLUS; + } + pushback(p, c); + return '+'; + } + if (c == '=') { + yylval.id = intern("+"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous(p))) { + p->lstate = EXPR_BEG; + pushback(p, c); + if (c != -1 && ISDIGIT(c)) { + c = '+'; + goto start_num; + } + return tUPLUS; + } + p->lstate = EXPR_BEG; + pushback(p, c); + return '+'; + + case '-': + c = nextc(p); + if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { + p->lstate = EXPR_ARG; + if (c == '@') { + return tUMINUS; + } + pushback(p, c); + return '-'; + } + if (c == '=') { + yylval.id = intern("-"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + if (c == '>') { + p->lstate = EXPR_ARG; + return tLAMBDA; + } + if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous(p))) { + p->lstate = EXPR_BEG; + pushback(p, c); + if (c != -1 && ISDIGIT(c)) { + return tUMINUS_NUM; + } + return tUMINUS; + } + p->lstate = EXPR_BEG; + pushback(p, c); + return '-'; + + case '.': + p->lstate = EXPR_BEG; + if ((c = nextc(p)) == '.') { + if ((c = nextc(p)) == '.') { + return tDOT3; + } + pushback(p, c); + return tDOT2; + } + pushback(p, c); + if (c != -1 && ISDIGIT(c)) { + yyerror(p, "no . floating literal anymore; put 0 before dot"); + } + p->lstate = EXPR_DOT; + return '.'; + + start_num: + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + int is_float, seen_point, seen_e, nondigit; + + is_float = seen_point = seen_e = nondigit = 0; + p->lstate = EXPR_END; + newtok(p); + if (c == '-' || c == '+') { + tokadd(p, c); + c = nextc(p); + } + if (c == '0') { +#define no_digits() do {yyerror(p,"numeric literal without digits"); return 0;} while (0) + int start = toklen(p); + c = nextc(p); + if (c == 'x' || c == 'X') { + /* hexadecimal */ + c = nextc(p); + if (c != -1 && ISXDIGIT(c)) { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (!ISXDIGIT(c)) break; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + } + pushback(p, c); + tokfix(p); + if (toklen(p) == start) { + no_digits(); + } + else if (nondigit) goto trailing_uc; + yylval.node = new_int(p, tok(p), 16); + return tINTEGER; + } + if (c == 'b' || c == 'B') { + /* binary */ + c = nextc(p); + if (c == '0' || c == '1') { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (c != '0' && c != '1') break; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + } + pushback(p, c); + tokfix(p); + if (toklen(p) == start) { + no_digits(); + } + else if (nondigit) goto trailing_uc; + yylval.node = new_int(p, tok(p), 2); + return tINTEGER; + } + if (c == 'd' || c == 'D') { + /* decimal */ + c = nextc(p); + if (c != -1 && ISDIGIT(c)) { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (!ISDIGIT(c)) break; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + } + pushback(p, c); + tokfix(p); + if (toklen(p) == start) { + no_digits(); + } + else if (nondigit) goto trailing_uc; + yylval.node = new_int(p, tok(p), 10); + return tINTEGER; + } + if (c == '_') { + /* 0_0 */ + goto octal_number; + } + if (c == 'o' || c == 'O') { + /* prefixed octal */ + c = nextc(p); + if (c == -1 || c == '_' || !ISDIGIT(c)) { + no_digits(); + } + } + if (c >= '0' && c <= '7') { + /* octal */ + octal_number: + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (c < '0' || c > '9') break; + if (c > '7') goto invalid_octal; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + + if (toklen(p) > start) { + pushback(p, c); + tokfix(p); + if (nondigit) goto trailing_uc; + yylval.node = new_int(p, tok(p), 8); + return tINTEGER; + } + if (nondigit) { + pushback(p, c); + goto trailing_uc; + } + } + if (c > '7' && c <= '9') { + invalid_octal: + yyerror(p, "Invalid octal digit"); + } + else if (c == '.' || c == 'e' || c == 'E') { + tokadd(p, '0'); + } + else { + pushback(p, c); + yylval.node = new_int(p, "0", 10); + return tINTEGER; + } + } + + for (;;) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + nondigit = 0; + tokadd(p, c); + break; + + case '.': + if (nondigit) goto trailing_uc; + if (seen_point || seen_e) { + goto decode_num; + } + else { + int c0 = nextc(p); + if (c0 == -1 || !ISDIGIT(c0)) { + pushback(p, c0); + goto decode_num; + } + c = c0; + } + tokadd(p, '.'); + tokadd(p, c); + is_float++; + seen_point++; + nondigit = 0; + break; + + case 'e': + case 'E': + if (nondigit) { + pushback(p, c); + c = nondigit; + goto decode_num; + } + if (seen_e) { + goto decode_num; + } + tokadd(p, c); + seen_e++; + is_float++; + nondigit = c; + c = nextc(p); + if (c != '-' && c != '+') continue; + tokadd(p, c); + nondigit = c; + break; + + case '_': /* `_' in number just ignored */ + if (nondigit) goto decode_num; + nondigit = c; + break; + + default: + goto decode_num; + } + c = nextc(p); + } + + decode_num: + pushback(p, c); + if (nondigit) { + trailing_uc: + yyerror_i(p, "trailing `%c' in number", nondigit); + } + tokfix(p); + if (is_float) { + strtod(tok(p), 0); + if (errno == ERANGE) { + yywarning_s(p, "float %s out of range", tok(p)); + errno = 0; + } + yylval.node = new_float(p, tok(p)); + return tFLOAT; + } + yylval.node = new_int(p, tok(p), 10); + return tINTEGER; + } + + case ')': + case ']': + p->paren_nest--; + case '}': + COND_LEXPOP(); + CMDARG_LEXPOP(); + if (c == ')') + p->lstate = EXPR_ENDFN; + else + p->lstate = EXPR_ENDARG; + return c; + + case ':': + c = nextc(p); + if (c == ':') { + if (IS_BEG() || p->lstate == EXPR_CLASS || IS_SPCARG(-1)) { + p->lstate = EXPR_BEG; + return tCOLON3; + } + p->lstate = EXPR_DOT; + return tCOLON2; + } + if (IS_END() || ISSPACE(c)) { + pushback(p, c); + p->lstate = EXPR_BEG; + return ':'; + } + switch (c) { + case '\'': +#if 0 + p->lex_strterm = new_strterm(p, str_ssym, c, 0); +#endif + break; + case '"': +#if 0 + p->lex_strterm = new_strterm(p, str_dsym, c, 0); +#endif + break; + default: + pushback(p, c); + break; + } + p->lstate = EXPR_FNAME; + return tSYMBEG; + + case '/': + if (IS_BEG()) { +#if 0 + p->lex_strterm = new_strterm(p, str_regexp, '/', 0); +#endif + return tREGEXP_BEG; + } + if ((c = nextc(p)) == '=') { + yylval.id = intern("/"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + pushback(p, c); + if (IS_SPCARG(c)) { + arg_ambiguous(p); +#if 0 + p->lex_strterm = new_strterm(p, str_regexp, '/', 0); +#endif + return tREGEXP_BEG; + } + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; break; + } + return '/'; + + case '^': + if ((c = nextc(p)) == '=') { + yylval.id = intern("^"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; break; + } + pushback(p, c); + return '^'; + + case ';': + p->lstate = EXPR_BEG; + return ';'; + + case ',': + p->lstate = EXPR_BEG; + return ','; + + case '~': + if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { + if ((c = nextc(p)) != '@') { + pushback(p, c); + } + p->lstate = EXPR_ARG; + } + else { + p->lstate = EXPR_BEG; + } + return '~'; + + case '(': + if (IS_BEG()) { + c = tLPAREN; + } + else if (IS_SPCARG(-1)) { + c = tLPAREN_ARG; + } + p->paren_nest++; + COND_PUSH(0); + CMDARG_PUSH(0); + p->lstate = EXPR_BEG; + return c; + + case '[': + p->paren_nest++; + if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { + p->lstate = EXPR_ARG; + if ((c = nextc(p)) == ']') { + if ((c = nextc(p)) == '=') { + return tASET; + } + pushback(p, c); + return tAREF; + } + pushback(p, c); + return '['; + } + else if (IS_BEG()) { + c = tLBRACK; + } + else if (IS_ARG() && space_seen) { + c = tLBRACK; + } + p->lstate = EXPR_BEG; + COND_PUSH(0); + CMDARG_PUSH(0); + return c; + + case '{': + if (p->lpar_beg && p->lpar_beg == p->paren_nest) { + p->lstate = EXPR_BEG; + p->lpar_beg = 0; + p->paren_nest--; + COND_PUSH(0); + CMDARG_PUSH(0); + return tLAMBEG; + } + if (IS_ARG() || p->lstate == EXPR_END || p->lstate == EXPR_ENDFN) + c = '{'; /* block (primary) */ + else if (p->lstate == EXPR_ENDARG) + c = tLBRACE_ARG; /* block (expr) */ + else + c = tLBRACE; /* hash */ + COND_PUSH(0); + CMDARG_PUSH(0); + p->lstate = EXPR_BEG; + return c; + + case '\\': + c = nextc(p); + if (c == '\n') { + space_seen = 1; + goto retry; /* skip \\n */ + } + pushback(p, c); + return '\\'; + + case '%': + if (IS_BEG()) { + int term; +#if 0 + int paren; +#endif + + c = nextc(p); + quotation: + if (c == -1 || !ISALNUM(c)) { + term = c; + c = 'Q'; + } + else { + term = nextc(p); + if (isalnum(term)) { + yyerror(p, "unknown type of %string"); + return 0; + } + } + if (c == -1 || term == -1) { + yyerror(p, "unterminated quoted string meets end of file"); + return 0; + } +#if 0 + paren = term; +#endif + if (term == '(') term = ')'; + else if (term == '[') term = ']'; + else if (term == '{') term = '}'; + else if (term == '<') term = '>'; +#if 0 + else paren = 0; +#endif + + switch (c) { + case 'Q': +#if 0 + p->lex_strterm = new_strterm(p, str_dquote, term, paren); +#endif + return tSTRING_BEG; + + case 'q': +#if 0 + p->lex_strterm = new_strterm(p, str_squote, term, paren); +#endif + return tSTRING_BEG; + + case 'W': +#if 0 + p->lex_strterm = new_strterm(p, str_dword, term, paren); +#endif + do {c = nextc(p);} while (isspace(c)); + pushback(p, c); + return tWORDS_BEG; + + case 'w': +#if 0 + p->lex_strterm = new_strterm(p, str_sword, term, paren); +#endif + do {c = nextc(p);} while (isspace(c)); + pushback(p, c); + return tQWORDS_BEG; + + case 'r': +#if 0 + p->lex_strterm = new_strterm(p, str_regexp, term, paren); +#endif + return tREGEXP_BEG; + + case 's': +#if 0 + p->lex_strterm = new_strterm(p, str_ssym, term, paren); +#endif + p->lstate = EXPR_FNAME; + return tSYMBEG; + + default: + yyerror(p, "unknown type of %string"); + return 0; + } + } + if ((c = nextc(p)) == '=') { + yylval.id = intern("%"); + p->lstate = EXPR_BEG; + return tOP_ASGN; + } + if (IS_SPCARG(c)) { + goto quotation; + } + switch (p->lstate) { + case EXPR_FNAME: case EXPR_DOT: + p->lstate = EXPR_ARG; break; + default: + p->lstate = EXPR_BEG; break; + } + pushback(p, c); + return '%'; + + case '$': + p->lstate = EXPR_END; + newtok(p); + c = nextc(p); + switch (c) { + case '_': /* $_: last read line string */ + c = nextc(p); + pushback(p, c); + c = '_'; + /* fall through */ + case '~': /* $~: match-data */ + case '*': /* $*: argv */ + case '$': /* $$: pid */ + case '?': /* $?: last status */ + case '!': /* $!: error string */ + case '@': /* $@: error position */ + case '/': /* $/: input record separator */ + case '\\': /* $\: output record separator */ + case ';': /* $;: field separator */ + case ',': /* $,: output field separator */ + case '.': /* $.: last read line number */ + case '=': /* $=: ignorecase */ + case ':': /* $:: load path */ + case '<': /* $<: reading filename */ + case '>': /* $>: default output handle */ + case '\"': /* $": already loaded files */ + tokadd(p, '$'); + tokadd(p, c); + tokfix(p); + yylval.id = intern(tok(p)); + return tGVAR; + + case '-': + tokadd(p, '$'); + tokadd(p, c); + c = nextc(p); + pushback(p, c); + gvar: + tokfix(p); + yylval.id = intern(tok(p)); + return tGVAR; + + case '&': /* $&: last match */ + case '`': /* $`: string before last match */ + case '\'': /* $': string after last match */ + case '+': /* $+: string matches last paren. */ + if (last_state == EXPR_FNAME) { + tokadd(p, '$'); + tokadd(p, c); + goto gvar; + } + yylval.node = new_back_ref(p, c); + return tBACK_REF; + + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + tokadd(p, '$'); + do { + tokadd(p, c); + c = nextc(p); + } while (c != -1 && isdigit(c)); + pushback(p, c); + if (last_state == EXPR_FNAME) goto gvar; + tokfix(p); + yylval.node = new_nth_ref(p, atoi(tok(p)+1)); + return tNTH_REF; + + default: + if (!identchar(c)) { + pushback(p, c); + return '$'; + } + case '0': + tokadd(p, '$'); + } + break; + + case '@': + c = nextc(p); + newtok(p); + tokadd(p, '@'); + if (c == '@') { + tokadd(p, '@'); + c = nextc(p); + } + if (c != -1 && isdigit(c)) { + if (p->bidx == 1) { + yyerror_i(p, "`@%c' is not allowed as an instance variable name", c); + } + else { + yyerror_i(p, "`@@%c' is not allowed as a class variable name", c); + } + return 0; + } + if (!identchar(c)) { + pushback(p, c); + return '@'; + } + break; + + case '_': + newtok(p); + break; + + default: + if (!identchar(c)) { + yyerror_i(p, "Invalid char `\\x%02X' in expression", c); + goto retry; + } + + newtok(p); + break; + } + + do { + tokadd(p, c); + c = nextc(p); + if (c < 0) break; + } while (identchar(c)); + + switch (tok(p)[0]) { + case '@': case '$': + pushback(p, c); + break; + default: + if ((c == '!' || c == '?') && !peek(p, '=')) { + tokadd(p, c); + } + else { + pushback(p, c); + } + } + tokfix(p); + { + int result = 0; + + last_state = p->lstate; + switch (tok(p)[0]) { + case '$': + p->lstate = EXPR_END; + result = tGVAR; + break; + case '@': + p->lstate = EXPR_END; + if (tok(p)[1] == '@') + result = tCVAR; + else + result = tIVAR; + break; + + default: + if (toklast(p) == '!' || toklast(p) == '?') { + result = tFID; + } + else { + if (p->lstate == EXPR_FNAME) { + if ((c = nextc(p)) == '=' && !peek(p, '~') && !peek(p, '>') && + (!peek(p, '=') || (peek_n(p, '>', 1)))) { + result = tIDENTIFIER; + tokadd(p, c); + tokfix(p); + } + else { + pushback(p, c); + } + } + if (result == 0 && isupper(tok(p)[0])) { + result = tCONSTANT; + } + else { + result = tIDENTIFIER; + } + } + + if (IS_LABEL_POSSIBLE()) { + if (IS_LABEL_SUFFIX(0)) { + p->lstate = EXPR_BEG; + nextc(p); + tokfix(p); + yylval.id = intern(tok(p)); + return tLABEL; + } + } + if (p->lstate != EXPR_DOT) { + const struct kwtable *kw; + + /* See if it is a reserved word. */ + kw = mrb_reserved_word(tok(p), toklen(p)); + if (kw) { + enum mrb_lex_state_enum state = p->lstate; + p->lstate = kw->state; + if (state == EXPR_FNAME) { + yylval.id = intern(kw->name); + return kw->id[0]; + } + if (kw->id[0] == keyword_do) { + if (p->lpar_beg && p->lpar_beg == p->paren_nest) { + p->lpar_beg = 0; + p->paren_nest--; + return keyword_do_LAMBDA; + } + if (COND_P()) return keyword_do_cond; + if (CMDARG_P() && state != EXPR_CMDARG) + return keyword_do_block; + if (state == EXPR_ENDARG || state == EXPR_BEG) + return keyword_do_block; + return keyword_do; + } + if (state == EXPR_BEG || state == EXPR_VALUE) + return kw->id[0]; + else { + if (kw->id[0] != kw->id[1]) + p->lstate = EXPR_BEG; + return kw->id[1]; + } + } + } + + if (IS_BEG() || + p->lstate == EXPR_DOT || + IS_ARG()) { + if (cmd_state) { + p->lstate = EXPR_CMDARG; + } + else { + p->lstate = EXPR_ARG; + } + } + else if (p->lstate == EXPR_FNAME) { + p->lstate = EXPR_ENDFN; + } + else { + p->lstate = EXPR_END; + } + } + { + mrb_sym ident = intern(tok(p)); + + yylval.id = ident; +#if 0 + if (last_state != EXPR_DOT && islower(tok(p)[0]) && lvar_defined(ident)) { + p->lstate = EXPR_END; + } +#endif + } + return result; + } +} + +static int +yylex(void *lval, parser_state *p) +{ + int t; + + p->ylval = lval; + t = parser_yylex(p); + + return t; +} + +static void +start_parser(parser_state *p) +{ + node *tree; + + if (setjmp(p->jmp) != 0) { + yyerror(p, "memory allocation error"); + p->nerr++; + p->tree = p->begin_tree = 0; + return; + } + yyparse(p); + tree = p->tree; + if (!tree) { + if (p->begin_tree) { + tree = p->begin_tree; + } + else { + tree = new_nil(p); + } + } + else if (p->begin_tree) { + tree = new_begin(p, p->begin_tree); + append(tree, p->tree); + } +} + +static parser_state* +parser_new(mrb_state *mrb) +{ + mrb_pool *pool; + parser_state *p; + + pool = mrb_pool_open(mrb); + if (!pool) return 0; + p = mrb_pool_alloc(pool, sizeof(parser_state)); + if (!p) return 0; + + memset(p, 0, sizeof(parser_state)); + p->mrb = mrb; + p->pool = pool; + p->in_def = p->in_single = 0; + + p->cmd_start = TRUE; + p->in_def = p->in_single = FALSE; + + p->lineno = 1; +#if defined(PARSER_TEST) || defined(PARSER_DEBUG) + yydebug = 1; +#endif + + return p; +} + +parser_state* +mrb_parse_file(mrb_state *mrb, FILE *f) +{ + parser_state *p; + + p = parser_new(mrb); + if (!p) return 0; + p->s = p->send = NULL; + p->f = f; + + start_parser(p); + return p; +} + +parser_state* +mrb_parse_nstring(mrb_state *mrb, char *s, size_t len) +{ + parser_state *p; + + p = parser_new(mrb); + if (!p) return 0; + p->s = s; + p->send = s + len; + p->f = NULL; + + start_parser(p); + return p; +} + +parser_state* +mrb_parse_string(mrb_state *mrb, char *s) +{ + return mrb_parse_nstring(mrb, s, strlen(s)); +} + +#define PARSER_DUMP + +void parser_dump(mrb_state *mrb, node *tree, int offset); +int mrb_generate_code(mrb_state*, mrb_ast_node*); + +int +mrb_compile_file(mrb_state * mrb, FILE *f) +{ + parser_state *p; + int n; + + p = mrb_parse_file(mrb, f); + if (!p) return -1; + if (!p->tree) return -1; + if (p->nerr) return -1; +#ifdef PARSER_DUMP + parser_dump(mrb, p->tree, 0); +#endif + n = mrb_generate_code(mrb, p->tree); + mrb_pool_close(p->pool); + + return n; +} + +const char* +mrb_parser_filename(parser_state *p, const char *s) +{ + if (s) { + p->filename = strdup(s); + } + return p->filename; +} + +int +mrb_parser_lineno(struct mrb_parser_state *p, int n) +{ + if (n <= 0) { + return p->lineno; + } + return p->lineno = n; +} + +int +mrb_compile_nstring(mrb_state *mrb, char *s, size_t len) +{ + parser_state *p; + int n; + + p = mrb_parse_nstring(mrb, s, len); + if (!p) return -1; + if (!p->tree) return -1; + if (p->nerr) return -1; +#ifdef PARSER_DUMP + parser_dump(mrb, p->tree, 0); +#endif + n = mrb_generate_code(mrb, p->tree); + mrb_pool_close(p->pool); + + return n; +} + +int +mrb_compile_string(mrb_state *mrb, char *s) +{ + return mrb_compile_nstring(mrb, s, strlen(s)); +} + +static void +dump_prefix(int offset) +{ + while (offset--) { + putc(' ', stdout); + putc(' ', stdout); + } +} + +static void +dump_recur(mrb_state *mrb, node *tree, int offset) +{ + while (tree) { + parser_dump(mrb, tree->car, offset); + tree = tree->cdr; + } +} + +void +parser_dump(mrb_state *mrb, node *tree, int offset) +{ + int n; + + if (!tree) return; + again: + dump_prefix(offset); + n = (int)tree->car; + tree = tree->cdr; + switch (n) { + case NODE_BEGIN: + printf("NODE_BEGIN:\n"); + dump_recur(mrb, tree, offset+1); + break; + + case NODE_RESCUE: + printf("NODE_RESCUE:\n"); + if (tree->car) { + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->car, offset+2); + } + tree = tree->cdr; + if (tree->car) { + node *n2 = tree->car; + + dump_prefix(offset+1); + printf("rescue:\n"); + while (n2) { + node *n3 = n2->car; + if (n3->car) { + dump_prefix(offset+2); + printf("handle classes:\n"); + dump_recur(mrb, n3->car, offset+3); + } + if (n3->cdr->car) { + dump_prefix(offset+2); + printf("exc_var:\n"); + parser_dump(mrb, n3->cdr->car, offset+3); + } + if (n3->cdr->cdr->car) { + dump_prefix(offset+2); + printf("rescue body:\n"); + parser_dump(mrb, n3->cdr->cdr->car, offset+3); + } + n2 = n2->cdr; + } + } + tree = tree->cdr; + if (tree->car) { + dump_prefix(offset+1); + printf("else:\n"); + parser_dump(mrb, tree->car, offset+2); + } + break; + + case NODE_ENSURE: + printf("NODE_ENSURE:\n"); + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->car, offset+2); + dump_prefix(offset+1); + printf("ensure:\n"); + parser_dump(mrb, tree->cdr, offset+2); + break; + + case NODE_LAMBDA: + printf("NODE_BLOCK:\n"); + goto block; + + case NODE_BLOCK: + block: + printf("NODE_BLOCK:\n"); + tree = tree->cdr; + if (tree->car) { + node *n = tree->car; + + if (n->car) { + dump_prefix(offset+1); + printf("mandatory args:\n"); + dump_recur(mrb, n->car, offset+2); + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("optional args:\n"); + { + node *n2 = n->car; + + while (n2) { + dump_prefix(offset+2); + printf("%s=", mrb_sym2name(mrb, (mrb_sym)n2->car->car)); + parser_dump(mrb, n2->car->cdr, 0); + n2 = n2->cdr; + } + } + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("rest=*%s\n", mrb_sym2name(mrb, (mrb_sym)n->car)); + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("post mandatory args:\n"); + dump_recur(mrb, n->car, offset+2); + } + n = n->cdr; + if (n) { + dump_prefix(offset+1); + printf("blk=&%s\n", mrb_sym2name(mrb, (mrb_sym)n)); + } + } + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->cdr->car, offset+2); + break; + + case NODE_IF: + printf("NODE_IF:\n"); + dump_prefix(offset+1); + printf("cond:\n"); + parser_dump(mrb, tree->car, offset+2); + dump_prefix(offset+1); + printf("then:\n"); + parser_dump(mrb, tree->cdr->car, offset+2); + if (tree->cdr->cdr->car) { + dump_prefix(offset+1); + printf("else:\n"); + parser_dump(mrb, tree->cdr->cdr->car, offset+2); + } + break; + + case NODE_AND: + printf("NODE_AND:\n"); + parser_dump(mrb, tree->car, offset+1); + parser_dump(mrb, tree->cdr, offset+1); + break; + + case NODE_OR: + printf("NODE_OR:\n"); + parser_dump(mrb, tree->car, offset+1); + parser_dump(mrb, tree->cdr, offset+1); + break; + + case NODE_CASE: + printf("NODE_CASE:\n"); + if (tree->car) { + parser_dump(mrb, tree->car, offset+1); + } + tree = tree->cdr; + while (tree) { + dump_prefix(offset+1); + printf("case:\n"); + dump_recur(mrb, tree->car->car, offset+2); + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->car->cdr, offset+2); + tree = tree->cdr; + } + break; + + case NODE_WHILE: + printf("NODE_WHILE:\n"); + dump_prefix(offset+1); + printf("cond:\n"); + parser_dump(mrb, tree->car, offset+2); + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->cdr, offset+2); + break; + + case NODE_UNTIL: + printf("NODE_UNTIL:\n"); + dump_prefix(offset+1); + printf("cond:\n"); + parser_dump(mrb, tree->car, offset+2); + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->cdr, offset+2); + break; + + case NODE_FOR: + printf("NODE_FOR:\n"); + dump_prefix(offset+1); + printf("var:\n"); + { + node *n2 = tree->car; + + if (n2->car) { + dump_prefix(offset+2); + printf("pre:\n"); + dump_recur(mrb, n2->car, offset+3); + } + n2 = n2->cdr; + if (n2) { + if (n2->car) { + dump_prefix(offset+2); + printf("rest:\n"); + parser_dump(mrb, n2->car, offset+3); + } + n2 = n2->cdr; + if (n2) { + if (n2->car) { + dump_prefix(offset+2); + printf("post:\n"); + dump_recur(mrb, n2->car, offset+3); + } + } + } + } + tree = tree->cdr; + dump_prefix(offset+1); + printf("in:\n"); + parser_dump(mrb, tree->car, offset+2); + tree = tree->cdr; + dump_prefix(offset+1); + printf("do:\n"); + parser_dump(mrb, tree->car, offset+2); + break; + + case NODE_SCOPE: + printf("NODE_SCOPE:\n"); + dump_prefix(offset+1); + printf("local variables:\n"); + { + node *n2 = tree->car; + + while (n2) { + dump_prefix(offset+2); + printf("%s\n", mrb_sym2name(mrb, (mrb_sym)n2->car)); + n2 = n2->cdr; + } + } + tree = tree->cdr; + offset++; + goto again; + + case NODE_FCALL: + case NODE_CALL: + printf("NODE_CALL:\n"); + parser_dump(mrb, tree->car, offset+1); + dump_prefix(offset+1); + printf("method='%s' (%d)\n", + mrb_sym2name(mrb, (mrb_sym)tree->cdr->car), + (int)tree->cdr->car); + tree = tree->cdr->cdr->car; + if (tree) { + dump_prefix(offset+1); + printf("args:\n"); + dump_recur(mrb, tree->car, offset+2); + if (tree->cdr) { + dump_prefix(offset+1); + printf("block:\n"); + parser_dump(mrb, tree->cdr, offset+2); + } + } + break; + + case NODE_DOT2: + printf("NODE_DOT2:\n"); + parser_dump(mrb, tree->car, offset+1); + parser_dump(mrb, tree->cdr, offset+1); + break; + + case NODE_DOT3: + printf("NODE_DOT3:\n"); + parser_dump(mrb, tree->car, offset+1); + parser_dump(mrb, tree->cdr, offset+1); + break; + + case NODE_COLON2: + printf("NODE_COLON2:\n"); + parser_dump(mrb, tree->car, offset+1); + dump_prefix(offset+1); + printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->cdr)); + break; + + case NODE_COLON3: + printf("NODE_COLON3:\n"); + dump_prefix(offset+1); + printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_ARRAY: + printf("NODE_ARRAY:\n"); + dump_recur(mrb, tree, offset+1); + break; + + case NODE_HASH: + printf("NODE_HASH:\n"); + while (tree) { + dump_prefix(offset+1); + printf("key:\n"); + parser_dump(mrb, tree->car->car, offset+2); + dump_prefix(offset+1); + printf("value:\n"); + parser_dump(mrb, tree->car->cdr, offset+2); + tree = tree->cdr; + } + break; + + case NODE_SPLAT: + printf("NODE_SPLAT:\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_ASGN: + printf("NODE_ASGN:\n"); + dump_prefix(offset+1); + printf("lhs:\n"); + parser_dump(mrb, tree->car, offset+2); + dump_prefix(offset+1); + printf("rhs:\n"); + parser_dump(mrb, tree->cdr, offset+2); + break; + + case NODE_MASGN: + printf("NODE_MASGN:\n"); + dump_prefix(offset+1); + printf("mlhs:\n"); + { + node *n2 = tree->car; + + if (n2->car) { + dump_prefix(offset+2); + printf("pre:\n"); + dump_recur(mrb, n2->car, offset+3); + } + n2 = n2->cdr; + if (n2) { + if (n2->car) { + dump_prefix(offset+2); + printf("rest:\n"); + parser_dump(mrb, n2->car, offset+3); + } + n2 = n2->cdr; + if (n2) { + if (n2->car) { + dump_prefix(offset+2); + printf("post:\n"); + dump_recur(mrb, n2->car, offset+3); + } + } + } + } + dump_prefix(offset+1); + printf("rhs:\n"); + parser_dump(mrb, tree->cdr, offset+2); + break; + + case NODE_OP_ASGN: + printf("NODE_OP_ASGN:\n"); + dump_prefix(offset+1); + printf("lhs:\n"); + parser_dump(mrb, tree->car, offset+2); + tree = tree->cdr; + dump_prefix(offset+1); + printf("op='%s' (%d)\n", mrb_sym2name(mrb, (mrb_sym)tree->car), (int)tree->car); + tree = tree->cdr; + parser_dump(mrb, tree->car, offset+1); + break; + + case NODE_SUPER: + printf("NODE_SUPER:\n"); + if (tree) { + dump_prefix(offset+1); + printf("args:\n"); + dump_recur(mrb, tree->car, offset+2); + if (tree->cdr) { + dump_prefix(offset+1); + printf("block:\n"); + parser_dump(mrb, tree->cdr, offset+2); + } + } + break; + + case NODE_ZSUPER: + printf("NODE_ZSUPER\n"); + break; + + case NODE_RETURN: + printf("NODE_RETURN:\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_YIELD: + printf("NODE_YIELD:\n"); + dump_recur(mrb, tree, offset+1); + break; + + case NODE_BREAK: + printf("NODE_BREAK:\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_NEXT: + printf("NODE_NEXT:\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_REDO: + printf("NODE_REDO\n"); + break; + + case NODE_RETRY: + printf("NODE_RETRY\n"); + break; + + case NODE_LVAR: + printf("NODE_LVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_GVAR: + printf("NODE_GVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_IVAR: + printf("NODE_IVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_CVAR: + printf("NODE_CVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_CONST: + printf("NODE_CONST %s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_BACK_REF: + printf("NODE_BACK_REF:\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_NTH_REF: + printf("NODE_NTH_REF:\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_ARG: + printf("NODE_ARG %s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_BLOCK_ARG: + printf("NODE_BLOCK_ARG:\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_INT: + printf("NODE_INT %s base %d\n", (char*)tree->car, (int)tree->cdr->car); + break; + + case NODE_FLOAT: + printf("NODE_FLOAT %s\n", (char*)tree); + break; + + case NODE_NEGATE: + printf("NODE_NEGATE\n"); + parser_dump(mrb, tree, offset+1); + break; + + case NODE_STR: + printf("NODE_STR \"%s\" len %d\n", (char*)tree->car, (int)tree->cdr); + break; + + case NODE_DSTR: + printf("NODE_DSTR\n"); + dump_recur(mrb, tree, offset+1); + break; + + case NODE_SYM: + printf("NODE_SYM :%s\n", mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_SELF: + printf("NODE_SELF\n"); + break; + + case NODE_NIL: + printf("NODE_NIL\n"); + break; + + case NODE_TRUE: + printf("NODE_TRUE\n"); + break; + + case NODE_FALSE: + printf("NODE_FALSE\n"); + break; + + case NODE_ALIAS: + printf("NODE_ALIAS %s %s:\n", + mrb_sym2name(mrb, (mrb_sym)tree->car), + mrb_sym2name(mrb, (mrb_sym)tree->cdr)); + break; + + case NODE_UNDEF: + printf("NODE_UNDEF %s:\n", + mrb_sym2name(mrb, (mrb_sym)tree)); + break; + + case NODE_CLASS: + printf("NODE_CLASS:\n"); + if (tree->car->car == (node*)0) { + dump_prefix(offset+1); + printf(":%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr)); + } + else if (tree->car->car == (node*)1) { + dump_prefix(offset+1); + printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr)); + } + else { + parser_dump(mrb, tree->car->car, offset+1); + dump_prefix(offset+1); + printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr)); + } + if (tree->cdr->car) { + dump_prefix(offset+1); + printf("super:\n"); + parser_dump(mrb, tree->cdr->car, offset+2); + } + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->cdr->cdr->car->cdr, offset+2); + break; + + case NODE_MODULE: + printf("NODE_MODULE:\n"); + if (tree->car->car == (node*)0) { + dump_prefix(offset+1); + printf(":%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr)); + } + else if (tree->car->car == (node*)1) { + dump_prefix(offset+1); + printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr)); + } + else { + parser_dump(mrb, tree->car->car, offset+1); + dump_prefix(offset+1); + printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr)); + } + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->cdr->car->cdr, offset+2); + break; + + case NODE_SCLASS: + printf("NODE_SCLASS:\n"); + parser_dump(mrb, tree->car, offset+1); + dump_prefix(offset+1); + printf("body:\n"); + parser_dump(mrb, tree->cdr->car->cdr, offset+2); + break; + + case NODE_DEF: + printf("NODE_DEF:\n"); + dump_prefix(offset+1); + printf("%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car)); + tree = tree->cdr; + dump_prefix(offset+1); + printf("local variables:\n"); + { + node *n2 = tree->car; + + while (n2) { + dump_prefix(offset+2); + if (n2->car) + printf("%s\n", mrb_sym2name(mrb, (mrb_sym)n2->car)); + n2 = n2->cdr; + } + } + tree = tree->cdr; + if (tree->car) { + node *n = tree->car; + + if (n->car) { + dump_prefix(offset+1); + printf("mandatory args:\n"); + dump_recur(mrb, n->car, offset+2); + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("optional args:\n"); + { + node *n2 = n->car; + + while (n2) { + dump_prefix(offset+2); + printf("%s=", mrb_sym2name(mrb, (mrb_sym)n2->car->car)); + parser_dump(mrb, n2->car->cdr, 0); + n2 = n2->cdr; + } + } + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("rest=*%s\n", mrb_sym2name(mrb, (mrb_sym)n->car)); + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("post mandatory args:\n"); + dump_recur(mrb, n->car, offset+2); + } + n = n->cdr; + if (n) { + dump_prefix(offset+1); + printf("blk=&%s\n", mrb_sym2name(mrb, (mrb_sym)n)); + } + } + parser_dump(mrb, tree->cdr->car, offset+1); + break; + + case NODE_SDEF: + printf("NODE_SDEF:\n"); + parser_dump(mrb, tree->car, offset+1); + tree = tree->cdr; + dump_prefix(offset+1); + printf(":%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car)); + tree = tree->cdr->cdr; + if (tree->car) { + node *n = tree->car; + + if (n->car) { + dump_prefix(offset+1); + printf("mandatory args:\n"); + dump_recur(mrb, n->car, offset+2); + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("optional args:\n"); + { + node *n2 = n->car; + + while (n2) { + dump_prefix(offset+2); + printf("%s=", mrb_sym2name(mrb, (mrb_sym)n2->car->car)); + parser_dump(mrb, n2->car->cdr, 0); + n2 = n2->cdr; + } + } + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("rest=*%s\n", mrb_sym2name(mrb, (mrb_sym)n->car)); + } + n = n->cdr; + if (n->car) { + dump_prefix(offset+1); + printf("post mandatory args:\n"); + dump_recur(mrb, n->car, offset+2); + } + n = n->cdr; + if (n) { + dump_prefix(offset+1); + printf("blk=&%s\n", mrb_sym2name(mrb, (mrb_sym)n)); + } + } + tree = tree->cdr; + parser_dump(mrb, tree->car, offset+1); + break; + + case NODE_POSTEXE: + printf("NODE_POSTEXE:\n"); + parser_dump(mrb, tree, offset+1); + break; + + default: + printf("node type: %d (0x%x)\n", (int)n, (int)n); + break; + } + return; +} + +#ifdef PARSER_TEST +int +main() +{ + mrb_state *mrb = mrb_open(); + int n; + + n = mrb_compile_string(mrb, "\ +def fib(n)\n\ + if n<2\n\ + n\n\ + else\n\ + fib(n-2)+fib(n-1)\n\ + end\n\ +end\n\ +print(fib(20), \"\\n\")\n\ +"); + printf("ret: %d\n", n); + + return 0; +} +#endif diff --git a/src/pool.c b/src/pool.c new file mode 100644 index 0000000000..62e1a4d17e --- /dev/null +++ b/src/pool.c @@ -0,0 +1,152 @@ +#include "pool.h" +#include + +#undef TEST_POOL +#ifdef TEST_POOL +#include + +#define mrb_malloc(m,s) malloc(s) +#define mrb_free(m,p) free(p) +#endif + +#define POOL_PAGE_SIZE 16000 + +mrb_pool* +mrb_pool_open(mrb_state *mrb) +{ + mrb_pool *pool = mrb_malloc(mrb, sizeof(mrb_pool)); + + if (pool) { + pool->mrb = mrb; + pool->pages = 0; + } + + return pool; +} + +void +mrb_pool_close(mrb_pool *pool) +{ + struct mrb_pool_page *page, *tmp; + + if (!pool) return; + page = pool->pages; + while (page) { + tmp = page; + page = page->next; + mrb_free(pool->mrb, tmp); + } + mrb_free(pool->mrb, pool); +} + +static struct mrb_pool_page* +page_alloc(mrb_pool *pool, size_t len) +{ + struct mrb_pool_page *page; + + if (len < POOL_PAGE_SIZE) + len = POOL_PAGE_SIZE; + page = mrb_malloc(pool->mrb, sizeof(struct mrb_pool_page)+len-1); + if (page) { + page->offset = 0; + page->len = len; + } + + return page; +} + +void* +mrb_pool_alloc(mrb_pool *pool, size_t len) +{ + struct mrb_pool_page *page; + size_t n; + + if (!pool) return 0; + + page = pool->pages; + while (page) { + if (page->offset + len <= page->len) { + n = page->offset; + page->offset += len; + page->last = (void*)page->page+n; + return page->last; + } + page = page->next; + } + page = page_alloc(pool, len); + if (!page) return 0; + page->offset = len; + page->next = pool->pages; + pool->pages = page; + + page->last = (void*)page->page; + return page->last; +} + +int +mrb_pool_can_realloc(mrb_pool *pool, void *p, size_t len) +{ + struct mrb_pool_page *page; + + if (!pool) return 0; + page = pool->pages; + while (page) { + if (page->last == p) { + size_t beg; + + beg = (char*)p - page->page; + if (beg + len > page->len) return 0; + return 1; + } + page = page->next; + } + return 0; +} + +void* +mrb_pool_realloc(mrb_pool *pool, void *p, size_t oldlen, size_t newlen) +{ + struct mrb_pool_page *page; + void *np; + + if (!pool) return 0; + page = pool->pages; + while (page) { + if (page->last == p) { + size_t beg; + + beg = (char*)p - page->page; + if (beg + oldlen != page->offset) break; + if (beg + newlen > page->len) { + page->offset = beg; + break; + } + page->offset = beg + newlen; + return p; + } + page = page->next; + } + np = mrb_pool_alloc(pool, newlen); + memcpy(np, p, oldlen); + return np; +} + +#ifdef TEST_POOL +int +main() +{ + int i, len = 250; + mrb_pool *pool; + void *p; + + pool = mrb_pool_open(0); + p = mrb_pool_alloc(pool, len); + for (i=1; i<20; i++) { + printf("%p (len=%d) %d\n", p, len, mrb_pool_can_realloc(pool, p, len*2)); + p = mrb_pool_realloc(pool, p, len, len*2); + len *= 2; + } + mrb_pool_close(pool); + return 0; +} +#endif diff --git a/src/pool.h b/src/pool.h new file mode 100644 index 0000000000..4f0b906a55 --- /dev/null +++ b/src/pool.h @@ -0,0 +1,19 @@ +#include "mruby.h" +#include + +typedef struct mrb_pool { + mrb_state *mrb; + struct mrb_pool_page { + struct mrb_pool_page *next; + size_t offset; + size_t len; + void *last; + char page[1]; + } *pages; +} mrb_pool; + +mrb_pool* mrb_pool_open(mrb_state*); +void mrb_pool_close(mrb_pool*); +void* mrb_pool_alloc(mrb_pool*, size_t); +void* mrb_pool_realloc(mrb_pool*, void*, size_t oldlen, size_t newlen); +int mrb_pool_can_realloc(mrb_pool*, void*, size_t); diff --git a/src/print.c b/src/print.c new file mode 100644 index 0000000000..a3e05b6f09 --- /dev/null +++ b/src/print.c @@ -0,0 +1,69 @@ +#include "mruby.h" +#include "mruby/string.h" +#include + +mrb_value +printstr(mrb_state *mrb, mrb_value obj) +{ + struct RString *str; + char *s; + size_t len; + + if (mrb_type(obj) == MRB_TT_STRING) { + str = mrb_str_ptr(obj); + s = str->buf; + len = str->len; + while (len--) { + putc(*s, stdout); + s++; + } + } + return obj; +} + +mrb_value +mrb_p(mrb_state *mrb, mrb_value obj) +{ + obj = mrb_funcall(mrb, obj, "inspect", 0); + printstr(mrb, obj); + putc('\n', stdout); + return obj; +} + +/* 15.3.1.2.9 */ +/* 15.3.1.3.34 */ +static mrb_value +p_m(mrb_state *mrb, mrb_value self) +{ + int argc, i; + mrb_value *argv; + + mrb_get_args(mrb, "*", &argv, &argc); + for (i=0; ikernel_module; + + mrb_define_method(mrb, krn, "__printstr__", mrb_printstr, ARGS_REQ(1)); + mrb_define_method(mrb, krn, "p", p_m, ARGS_ANY()); /* 15.3.1.3.34 */ +} diff --git a/src/proc.c b/src/proc.c new file mode 100644 index 0000000000..aae9b1932f --- /dev/null +++ b/src/proc.c @@ -0,0 +1,92 @@ +#include "mruby.h" +#include "mruby/proc.h" +#include "mruby/array.h" +#include "mruby/class.h" +#include "opcode.h" + +struct RProc * +mrb_proc_new(mrb_state *mrb, mrb_irep *irep) +{ + struct RProc *p; + + p = mrb_obj_alloc(mrb, MRB_TT_PROC, mrb->proc_class); + p->body.irep = irep; + p->target_class = (mrb->ci) ? mrb->ci->target_class : 0; + p->env = 0; + + return p; +} + +struct RProc * +mrb_closure_new(mrb_state *mrb, mrb_irep *irep) +{ + struct RProc *p = mrb_proc_new(mrb, irep); + struct REnv *e; + + if (!mrb->ci->env) { + e = mrb_obj_alloc(mrb, MRB_TT_ENV, mrb->ci->proc->env); + e->flags= (unsigned int)irep->nlocals; + e->mid = mrb->ci->mid; + e->cioff = mrb->ci - mrb->cibase; + e->stack = mrb->stack; + mrb->ci->env = e; + } + else { + e = mrb->ci->env; + } + p->env = e; + return p; +} + +struct RProc * +mrb_proc_new_cfunc(mrb_state *mrb, mrb_func_t func) +{ + struct RProc *p; + + p = mrb_obj_alloc(mrb, MRB_TT_PROC, mrb->proc_class); + p->body.func = func; + p->flags |= MRB_PROC_CFUNC; + + return p; +} + +int +mrb_proc_cfunc_p(struct RProc *p) +{ + return MRB_PROC_CFUNC_P(p); +} + +mrb_value +mrb_proc_call_cfunc(mrb_state *mrb, struct RProc *p, mrb_value self) +{ + return (p->body.func)(mrb, self); +} + +mrb_code* +mrb_proc_iseq(mrb_state *mrb, struct RProc *p) +{ + return p->body.irep->iseq; +} + +void +mrb_init_proc(mrb_state *mrb) +{ + struct RProc *m; + mrb_code *call_iseq = mrb_malloc(mrb, sizeof(mrb_code)); + mrb_irep *call_irep = mrb_calloc(mrb, sizeof(mrb_irep), 1); + + if ( call_iseq == NULL || call_irep == NULL ) + return; + + *call_iseq = MKOP_A(OP_CALL, 0); + call_irep->idx = -1; + call_irep->flags = MRB_IREP_NOFREE; + call_irep->iseq = call_iseq; + call_irep->ilen = 1; + + mrb->proc_class = mrb_define_class(mrb, "Proc", mrb->object_class); + + m = mrb_proc_new(mrb, call_irep); + mrb_define_method_raw(mrb, mrb->proc_class, mrb_intern(mrb, "call"), m); + mrb_define_method_raw(mrb, mrb->proc_class, mrb_intern(mrb, "[]"), m); +} diff --git a/src/range.c b/src/range.c new file mode 100644 index 0000000000..bc85f1f627 --- /dev/null +++ b/src/range.c @@ -0,0 +1,499 @@ +#include "mruby.h" +#include "mruby/class.h" +#include "mruby/range.h" +#include "variable.h" +#include "error.h" +#include "mruby/numeric.h" +#include "mruby/string.h" + +#include +#include + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef OTHER +#define OTHER 2 +#endif + +mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int), + mrb_value obj, mrb_value paired_obj, void* arg); + +int printf (const char*, ...); +/*--------- <1.8.7>object.c ---------> */ + +/* + * call-seq: + * obj.instance_of?(class) => true or false + * + * Returns true if obj is an instance of the given + * class. See also Object#kind_of?. + */ + +int +mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, struct RClass* c) +{ + if (mrb_obj_class(mrb, obj) == c) return TRUE; + return FALSE; +} +/*--------- <1.8.7>object.c ---------< */ + +mrb_value +mrb_range_new(mrb_state *mrb, mrb_value beg, mrb_value end, int excl) +{ + struct RRange *r; + + r = mrb_obj_alloc(mrb, MRB_TT_RANGE, mrb->range_class); + r->edges = mrb_malloc(mrb, sizeof(struct mrb_range_edges)); + r->edges->beg = beg; + r->edges->end = end; + r->excl = excl; + return mrb_range_value(r); +} + +/* + * call-seq: + * rng.first => obj + * rng.begin => obj + * + * Returns the first object in rng. + */ +mrb_value +mrb_range_beg(mrb_state *mrb, mrb_value range) +{ + struct RRange *r = mrb_range_ptr(range); + + return r->edges->beg; +} + +/* + * call-seq: + * rng.end => obj + * rng.last => obj + * + * Returns the object that defines the end of rng. + * + * (1..10).end #=> 10 + * (1...10).end #=> 10 + */ + +mrb_value +mrb_range_end(mrb_state *mrb, mrb_value range) +{ + struct RRange *r = mrb_range_ptr(range); + + return r->edges->end; +} + +/* + * call-seq: + * range.exclude_end? => true or false + * + * Returns true if range excludes its end value. + */ +mrb_value +mrb_range_excl(mrb_state *mrb, mrb_value range) +{ + struct RRange *r = mrb_range_ptr(range); + + return r->excl ? mrb_true_value() : mrb_false_value(); +} + +/* + * call-seq: + * beg end + * args[0] <= args[1] => true + * args[0] > args[1] => false + */ +static int +range_check(mrb_state *mrb, mrb_value *args) +{ + mrb_value ans = mrb_funcall(mrb, args[0], "<=>", 1, args[1]); + /* beg end + ans :args[0] < args[1] => -1 + args[0] = args[1] => 0 + args[0] > args[1] => +1 */ + if (mrb_nil_p(ans)) return FALSE; + //if (mrb_obj_equal(mrb, ans, mrb_fixnum_value(1))) return FALSE; + if (mrb_fixnum(ans) == 1) return FALSE; + return TRUE; +} + +static void +range_init(mrb_state *mrb, mrb_value range, mrb_value beg, mrb_value end, mrb_int exclude_end) +{ + mrb_value args[2]; + struct RRange *r = mrb_range_ptr(range); + + if ((mrb_type(beg) != MRB_TT_FIXNUM) || (mrb_type(end) != MRB_TT_FIXNUM)) { + args[0] = beg; + args[1] = end; + /* eroor.c v = mrb_rescue(range_check, (mrb_value)args, range_failed, 0); + if (mrb_nil_p(v)) range_failed(); */ + if (!range_check(mrb, args)) { + printf("range_failed()\n"); + } + } + r->excl = exclude_end; + r->edges->beg = beg; + r->edges->end = end; +} +/* + * call-seq: + * Range.new(start, end, exclusive=false) => range + * + * Constructs a range using the given start and end. If the third + * parameter is omitted or is false, the range will include + * the end object; otherwise, it will be excluded. + */ + +mrb_value +mrb_range_initialize(mrb_state *mrb, mrb_value range) +{ + mrb_value beg, end; + mrb_value flags; + + mrb_get_args(mrb, "ooo", &beg, &end, &flags); + /* Ranges are immutable, so that they should be initialized only once. */ + range_init(mrb, range, beg, end, mrb_test(flags)); + return range; +} +/* + * call-seq: + * range == obj => true or false + * + * Returns true only if + * 1) obj is a Range, + * 2) obj has equivalent beginning and end items (by comparing them with ==), + * 3) obj has the same #exclude_end? setting as rng. + * + * (0..2) == (0..2) #=> true + * (0..2) == Range.new(0,2) #=> true + * (0..2) == (0...2) #=> false + * + */ + +mrb_value +mrb_range_eq(mrb_state *mrb, mrb_value range) +{ + struct RRange *rr; + struct RRange *ro; + mrb_value obj; + + mrb_get_args(mrb, "o", &obj); + + if (mrb_obj_equal(mrb, range, obj)) return mrb_true_value(); + + /* same class? */ + // if (!rb_obj_is_instance_of(obj, rb_obj_class(range))) + if (!mrb_obj_is_instance_of(mrb, obj, mrb_obj_class(mrb, range))) + return mrb_false_value(); + + rr = mrb_range_ptr(range); + ro = mrb_range_ptr(obj); + if (!mrb_obj_equal(mrb, rr->edges->beg, ro->edges->beg)) + return mrb_false_value(); + if (!mrb_obj_equal(mrb, rr->edges->end, ro->edges->end)) + return mrb_false_value(); + if (rr->excl != ro->excl) + return mrb_false_value(); + + return mrb_true_value(); +} + +static int +r_le(mrb_state *mrb, mrb_value a, mrb_value b) +{ + //int c; + mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b); /* compare result */ + /* output :a < b => -1, a = b => 0, a > b => +1 */ + + if (mrb_nil_p(r)) return FALSE; + + /* mrb_value -> int */ + //c = mrb_cmpint(mrb, r, a, b); + //if (c == 0) return TRUE; + //if (c < 0) return TRUE; + //return FALSE; + if (mrb_obj_equal(mrb, r, mrb_fixnum_value(0))) return TRUE; + if (mrb_obj_equal(mrb, r, mrb_fixnum_value(-1))) return TRUE; + return FALSE; +} + +static int +r_gt(mrb_state *mrb, mrb_value a, mrb_value b) +{ + //int c; + mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b); + /* output :a < b => -1, a = b => 0, a > b => +1 */ + + if (mrb_nil_p(r)) return FALSE; + + /* mrb_value -> int */ + //c = mrb_cmpint(mrb, r); + //if (c > 0) return TRUE; + //return FALSE; + if (mrb_obj_equal(mrb, r, mrb_fixnum_value(1))) return TRUE; + return FALSE; +} + +static int +r_ge(mrb_state *mrb, mrb_value a, mrb_value b) +{ + //int c; + mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b); /* compare result */ + /* output :a < b => -1, a = b => 0, a > b => +1 */ + + if (mrb_nil_p(r)) return FALSE; + + /* mrb_value -> int */ + //c = mrb_cmpint(mrb, r); + //if (c == 0) return TRUE; + //if (c > 0) return TRUE; + //return FALSE; + if (mrb_obj_equal(mrb, r, mrb_fixnum_value(0))) return TRUE; + if (mrb_obj_equal(mrb, r, mrb_fixnum_value(1))) return TRUE; + return FALSE; +} + +/* + * call-seq: + * range === obj => true or false + * range.member?(val) => true or false + * range.include?(val) => true or false + * + */ +mrb_value +mrb_range_include(mrb_state *mrb, mrb_value range) +{ + mrb_value val; + struct RRange *r = mrb_range_ptr(range); + mrb_value beg, end; + + mrb_get_args(mrb, "o", &val); + + beg = r->edges->beg; + end = r->edges->end; + if (r_le(mrb, beg, val)) { + /* beg <= val */ + if (r->excl) { + if (r_gt(mrb, end, val)) return mrb_true_value(); /* end > val */ + } + else { + if (r_ge(mrb, end, val)) return mrb_true_value(); /* end >= val */ + } + } + return mrb_false_value(); +} + +/* + * call-seq: + * rng.each {| i | block } => rng + * + * Iterates over the elements rng, passing each in turn to the + * block. You can only iterate if the start object of the range + * supports the +succ+ method (which means that you can't iterate over + * ranges of +Float+ objects). + * + * (10..15).each do |n| + * print n, ' ' + * end + * + * produces: + * + * 10 11 12 13 14 15 + */ + +mrb_value +mrb_range_each(mrb_state *mrb, mrb_value range) +{ + return range; +} + +mrb_int +mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, mrb_int len, mrb_int err) +{ + mrb_int beg, end, b, e; + struct RRange *r = mrb_range_ptr(range); + + //if (!mrb_obj_is_kind_of(mrb, range, mrb->range_class)) return FALSE; + if (mrb_type(range) != MRB_TT_RANGE) return FALSE; + + beg = b = mrb_fixnum(r->edges->beg); + end = e = mrb_fixnum(r->edges->end); + + if (beg < 0) { + beg += len; + if (beg < 0) goto out_of_range; + } + if (err == 0 || err == 2) { + if (beg > len) goto out_of_range; + if (end > len) end = len; + } + if (end < 0) end += len; + if (!r->excl) end++; /* include end point */ + len = end - beg; + if (len < 0) len = 0; + + *begp = beg; + *lenp = len; + return TRUE; + +out_of_range: + if (err) { + mrb_raise(mrb, E_RANGE_ERROR, "%ld..%s%ld out of range", + b, r->excl? "." : "", e); + } + return OTHER; +} + +/* 15.2.14.4.12(x) */ +/* + * call-seq: + * rng.to_s -> string + * + * Convert this range object to a printable form. + */ + +static mrb_value +range_to_s(mrb_state *mrb, mrb_value range) +{ + mrb_value str, str2; + struct RRange *r = mrb_range_ptr(range); + + str = mrb_obj_as_string(mrb, r->edges->beg); + str2 = mrb_obj_as_string(mrb, r->edges->end); + str = mrb_str_dup(mrb, str); + mrb_str_cat(mrb, str, "...", r->excl ? 3 : 2); + mrb_str_append(mrb, str, str2); + //OBJ_INFECT(str, str2); + + return str; +} + +static mrb_value +inspect_range(mrb_state *mrb, mrb_value range, mrb_value dummy, int recur) +{ + mrb_value str, str2; + struct RRange *r = mrb_range_ptr(range); + + if (recur) { + return mrb_str_new2(mrb, r->excl ? "(... ... ...)" : "(... .. ...)"); + } + str = mrb_inspect(mrb, r->edges->beg); + str2 = mrb_inspect(mrb, r->edges->end); + str = mrb_str_dup(mrb, str); + mrb_str_cat(mrb, str, "...", r->excl ? 3 : 2); + mrb_str_append(mrb, str, str2); + // OBJ_INFECT(str, str2); + + return str; +} + +/* 15.2.14.4.13(x) */ +/* + * call-seq: + * rng.inspect -> string + * + * Convert this range object to a printable form (using + * inspect to convert the start and end + * objects). + */ + +static mrb_value +range_inspect(mrb_state *mrb, mrb_value range) +{ + return inspect_range(mrb, range, range, 0); +} + +static mrb_value +recursive_eql(mrb_state *mrb, mrb_value range, mrb_value obj, int recur) +{ + struct RRange *r = mrb_range_ptr(range); + struct RRange *o = mrb_range_ptr(obj); + + if (recur) return mrb_true_value(); /* Subtle! */ + if (!mrb_eql(mrb, r->edges->beg, o->edges->beg)) + return mrb_false_value(); + if (!mrb_eql(mrb, r->edges->end, o->edges->end)) + return mrb_false_value(); + + if (r->excl != o->excl) + return mrb_false_value(); + return mrb_true_value(); +} + +/* 15.2.14.4.14(x) */ +/* + * call-seq: + * rng.eql?(obj) -> true or false + * + * Returns true only if obj is a Range, has equivalent + * beginning and end items (by comparing them with #eql?), and has the same + * #exclude_end? setting as rng. + * + * (0..2).eql?(0..2) #=> true + * (0..2).eql?(Range.new(0,2)) #=> true + * (0..2).eql?(0...2) #=> false + * + */ + +static mrb_value +range_eql(mrb_state *mrb, mrb_value range) +{ + mrb_value obj; + mrb_get_args(mrb, "o", &obj); + + if (mrb_obj_equal(mrb, range, obj)) + return mrb_true_value(); + if (!mrb_obj_is_kind_of(mrb, obj, mrb->range_class)) + return mrb_false_value(); + return mrb_exec_recursive_paired(mrb, recursive_eql, range, obj, &obj); +} + +/* 15.2.14.4.15(x) */ +mrb_value +range_initialize_copy(mrb_state *mrb, mrb_value copy) +{ + mrb_value src; + mrb_get_args(mrb, "o", &src); + + if (mrb_obj_equal(mrb, copy, src)) return copy; + //mrb_check_frozen(copy); + if (!mrb_obj_is_instance_of(mrb, src, mrb_obj_class(mrb, copy))) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); + } + memcpy(mrb_range_ptr(copy), mrb_range_ptr(src), sizeof(struct RRange)); + + return copy; +} + +void +mrb_init_range(mrb_state *mrb) +{ + struct RClass *r; + r = mrb->range_class = mrb_define_class(mrb, "Range", mrb->object_class); + mrb_include_module(mrb, r, mrb_class_get(mrb, "Enumerable")); + + mrb_define_method(mrb, r, "begin", mrb_range_beg, ARGS_NONE()); /* 15.2.14.4.3 */ + mrb_define_method(mrb, r, "end", mrb_range_end, ARGS_NONE()); /* 15.2.14.4.5 */ + mrb_define_method(mrb, r, "==", mrb_range_eq, ARGS_REQ(1)); /* 15.2.14.4.1 */ + mrb_define_method(mrb, r, "===", mrb_range_include, ARGS_REQ(1)); /* 15.2.14.4.2 */ + mrb_define_method(mrb, r, "each", mrb_range_each, ARGS_NONE()); /* 15.2.14.4.4 */ + mrb_define_method(mrb, r, "exclude_end?", mrb_range_excl, ARGS_NONE()); /* 15.2.14.4.6 */ + mrb_define_method(mrb, r, "first", mrb_range_beg, ARGS_NONE()); /* 15.2.14.4.7 */ + mrb_define_method(mrb, r, "include?", mrb_range_include, ARGS_REQ(1)); /* 15.2.14.4.8 */ + mrb_define_method(mrb, r, "initialize", mrb_range_initialize, ARGS_REQ(4)); /* 15.2.14.4.9 */ + mrb_define_method(mrb, r, "last", mrb_range_end, ARGS_NONE()); /* 15.2.14.4.10 */ + mrb_define_method(mrb, r, "member?", mrb_range_include, ARGS_REQ(1)); /* 15.2.14.4.11 */ + + mrb_define_method(mrb, r, "to_s", range_to_s, ARGS_NONE()); /* 15.2.14.4.12(x) */ + mrb_define_method(mrb, r, "inspect", range_inspect, ARGS_NONE()); /* 15.2.14.4.13(x) */ + mrb_define_method(mrb, r, "eql?", range_eql, ARGS_REQ(1)); /* 15.2.14.4.14(x) */ + mrb_define_method(mrb, r, "initialize_copy", range_initialize_copy, ARGS_REQ(1)); /* 15.2.14.4.15(x) */ +} diff --git a/src/re.c b/src/re.c new file mode 100644 index 0000000000..645af6ba67 --- /dev/null +++ b/src/re.c @@ -0,0 +1,3306 @@ +/* re.c for RegExp Class */ +#include "mruby.h" +#include +#include "mruby/string.h" +#include "ritehash.h" +#include "encoding.h" +#include "re.h" +#include "mruby/numeric.h" +#include "mruby/range.h" +#include "mruby/array.h" +#include "regint.h" +#include "mruby/class.h" +#include "mruby/hash.h" +#include "variable.h" +#include "error.h" +#ifdef INCLUDE_REGEXP + +//from opcode.h +#define GETARG_A(i) ((((mrb_code)(i)) >> 24) & 0xff) +#define GETARG_B(i) ((((mrb_code)(i)) >> 16) & 0xff) +#define GETARG_C(i) ((((mrb_code)(i)) >> 8) & 0xff) +#define MKARG_A(c) (((c) & 0xff) << 24) +#define MKARG_B(c) (((c) & 0xff) << 16) +#define MKARG_C(c) (((c) & 0xff) << 8) + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#define ARG_REG_OPTION_MASK \ + (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) +#define ARG_ENCODING_FIXED 16 +#define ARG_ENCODING_NONE 32 +#define REG_LITERAL FL_USER5 +#define REG_ENCODING_NONE FL_USER6 +typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN]; +#define mrb_bug printf +#define KCODE_FIXED FL_USER4 +#define scan_oct(s,l,e) (int)ruby_scan_oct(s,l,e) +unsigned long ruby_scan_oct(const char *, size_t, size_t *); +#define scan_hex(s,l,e) (int)ruby_scan_hex(s,l,e) +unsigned long ruby_scan_hex(const char *, size_t, size_t *); + +static mrb_value mrb_match_to_a(mrb_state *mrb, mrb_value match); +int re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range); +static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc, + mrb_encoding **fixed_enc, onig_errmsg_buffer err); +static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len, + mrb_encoding *enc, mrb_encoding *resenc); +static char * option_to_str(char str[4], int options); +mrb_value match_alloc(mrb_state *mrb); +void mrb_warn(const char *fmt, ...); + +static mrb_value reg_cache; +//static int may_need_recompile; +//static int reg_kcode = DEFAULT_KCODE; +/* ------------------------------------------------------------------------- */ +/* RegExp Class */ +/* ------------------------------------------------------------------------- */ +/* 15.2.15.6.1 */ +/* + * call-seq: + * class.new(args, ...) -> obj + * + * Calls allocate to create a new object of + * class's class, then invokes that object's + * initialize method, passing it args. + * This is the method that ends up getting called whenever + * an object is constructed using .new. + * + */ +mrb_value +mrb_reg_s_new_instance(mrb_state *mrb, /*int argc, mrb_value *argv, */mrb_value self) +{ + //obj = mrb_obj_alloc(klass); + //mrb_obj_call_init(obj, argc, argv);...mrb_funcall2(obj, idInitialize, argc, argv); + mrb_value argv[16]; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + struct RRegexp *re; + re = mrb_obj_alloc(mrb, MRB_TT_REGEX, mrb->regex_class); + re->ptr = 0; + re->src = mrb_nil_value(); + re->usecnt = 0; + return mrb_funcall_argv(mrb, mrb_obj_value(re), "initialize", argc, argv); +} +//#define mrb_enc_mbcput(a,b,c) a +mrb_value +mrb_reg_quote(mrb_state *mrb, mrb_value str) +{ + mrb_encoding *enc = mrb_enc_get(mrb, str); + char *s, *send, *t; + mrb_value tmp; + int c,clen; + int ascii_only = mrb_enc_str_asciionly_p(mrb, str); + + s = RSTRING_PTR(str); + send = s + RSTRING_LEN(str); + while (s < send) { + c = mrb_enc_ascget(mrb, s, send, &clen, enc); + if (c == -1) { + s += mbclen(s, send, enc); + continue; + } + switch (c) { + case '[': case ']': case '{': case '}': + case '(': case ')': case '|': case '-': + case '*': case '.': case '\\': + case '?': case '+': case '^': case '$': + case ' ': case '#': + case '\t': case '\f': case '\n': case '\r': + goto meta_found; + } + s += clen; + } + //tmp = mrb_str_new3(str); + tmp = mrb_str_new(mrb, RSTRING_PTR(str), RSTRING_LEN(str)); + if (ascii_only) { + mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb)); + } + return tmp; + +meta_found: + tmp = mrb_str_new(mrb, 0, RSTRING_LEN(str)*2); + if (ascii_only) { + mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb)); + } + else { + mrb_enc_copy(mrb, tmp, str); + } + t = RSTRING_PTR(tmp); + /* copy upto metacharacter */ + memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str)); + t += s - RSTRING_PTR(str); + + while (s < send) { + c = mrb_enc_ascget(mrb, s, send, &clen, enc); + if (c == -1) { + int n = mbclen(s, send, enc); + + while (n--) + *t++ = *s++; + continue; + } + s += clen; + switch (c) { + case '[': case ']': case '{': case '}': + case '(': case ')': case '|': case '-': + case '*': case '.': case '\\': + case '?': case '+': case '^': case '$': + case '#': + t += mrb_enc_mbcput('\\', t, enc); + break; + case ' ': + t += mrb_enc_mbcput('\\', t, enc); + t += mrb_enc_mbcput(' ', t, enc); + continue; + case '\t': + t += mrb_enc_mbcput('\\', t, enc); + t += mrb_enc_mbcput('t', t, enc); + continue; + case '\n': + t += mrb_enc_mbcput('\\', t, enc); + t += mrb_enc_mbcput('n', t, enc); + continue; + case '\r': + t += mrb_enc_mbcput('\\', t, enc); + t += mrb_enc_mbcput('r', t, enc); + continue; + case '\f': + t += mrb_enc_mbcput('\\', t, enc); + t += mrb_enc_mbcput('f', t, enc); + continue; + case '\v': + t += mrb_enc_mbcput('\\', t, enc); + t += mrb_enc_mbcput('v', t, enc); + continue; + } + t += mrb_enc_mbcput(c, t, enc); + } + mrb_str_resize(mrb, tmp, t - RSTRING_PTR(tmp)); + /*OBJ_INFECT(tmp, str);*/ + return tmp; +} + +static mrb_value +reg_operand(mrb_state *mrb, mrb_value s, int check) +{ + if (mrb_type(s) == MRB_TT_SYMBOL) { + //return mrb_sym_to_s(s); + return mrb_obj_inspect(mrb, s); + } + else { + mrb_value tmp = mrb_check_string_type(mrb, s); + if (check && mrb_nil_p(tmp)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s to String", + mrb_obj_classname(mrb, s)); + } + return tmp; + } +} +/* 15.2.15.6.2 */ +/* 15.2.15.6.4 */ +/* + * call-seq: + * Regexp.escape(str) -> string + * Regexp.quote(str) -> string + * + * Escapes any characters that would have special meaning in a regular + * expression. Returns a new escaped string, or self if no characters are + * escaped. For any string, + * Regexp.new(Regexp.escape(str))=~str will be true. + * + * Regexp.escape('\*?{}.') #=> \\\*\?\{\}\. + * + */ + +static mrb_value +mrb_reg_s_quote(mrb_state *mrb, mrb_value c/*, mrb_value str*/) +{ + mrb_value str; + mrb_get_args(mrb, "o", &str); + return mrb_reg_quote(mrb, reg_operand(mrb, str, 1/*TRUE*/)); +} + +static void +match_check(mrb_state *mrb, mrb_value match) +{ + struct RMatch *m = mrb_match_ptr(match); + if (!m->str.tt) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Match"); + } +} + +mrb_value +mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match) +{ + mrb_value str; + long start, end, len; + struct RMatch *m = mrb_match_ptr(match); + + if (mrb_nil_p(match)) return mrb_nil_value(); + match_check(mrb, match); + if (nth >= m->rmatch->regs.num_regs) { + return mrb_nil_value(); + } + if (nth < 0) { + nth += m->rmatch->regs.num_regs; + if (nth <= 0) return mrb_nil_value(); + } + start = m->rmatch->regs.beg[nth]; + if (start == -1) return mrb_nil_value(); + end = m->rmatch->regs.end[nth]; + len = end - start; + str = mrb_str_substr(mrb, m->str, start, len); + /*OBJ_INFECT(str, match);*/ + return str; +} + +mrb_value +mrb_reg_last_match(mrb_state *mrb, mrb_value match) +{ + return mrb_reg_nth_match(mrb, 0, match); +} + + +static int +match_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref) +{ + const char *name; + int num; + + struct re_registers *regs = RMATCH_REGS(match); + mrb_value regexp = RMATCH(match)->regexp; + + match_check(mrb, match); + switch(mrb_type(backref)) { + default: + return mrb_fixnum(backref); + + case MRB_TT_SYMBOL: + name = mrb_sym2name(mrb, SYM2ID(backref)); + break; + + case MRB_TT_STRING: + //name = StringValueCStr(backref); + name = mrb_string_value_cstr(mrb, &backref); + break; + } + num = onig_name_to_backref_number(mrb_regex_ptr(regexp)->ptr, + (const unsigned char*)name, + (const unsigned char*)name + strlen(name), + regs); + if (num < 1) { + mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s", name); + } + + return num; +} +/* 15.2.15.6.3 */ +/* + * call-seq: + * Regexp.last_match -> matchdata + * Regexp.last_match(n) -> str + * + * The first form returns the MatchData object generated by the + * last successful pattern match. Equivalent to reading the global variable + * $~. The second form returns the nth field in this + * MatchData object. + * n can be a string or symbol to reference a named capture. + * + * Note that the last_match is local to the thread and method scope + * of the method that did the pattern match. + * + * /c(.)t/ =~ 'cat' #=> 0 + * Regexp.last_match #=> # + * Regexp.last_match(0) #=> "cat" + * Regexp.last_match(1) #=> "a" + * Regexp.last_match(2) #=> nil + * + * /(?\w+)\s*=\s*(?\w+)/ =~ "var = val" + * Regexp.last_match #=> # + * Regexp.last_match(:lhs) #=> "var" + * Regexp.last_match(:rhs) #=> "val" + */ +static mrb_value +mrb_reg_s_last_match(mrb_state *mrb, mrb_value self/*int argc, mrb_value *argv*/) +{ + //mrb_value nth; + mrb_value argv[16]; + int argc; + mrb_value match = mrb_backref_get(mrb); + + //if (argc > 0 && mrb_scan_args(argc, argv, "01", &nth) == 1) { + mrb_get_args(mrb, "*", &argv, &argc); + if (argc != 0) { + int n; + if (mrb_nil_p(match)) return mrb_nil_value(); + n = match_backref_number(mrb, match, argv[0]); + return mrb_reg_nth_match(mrb, n, match); + } + return match;//match_getter(); +} + +static void +mrb_reg_check(mrb_state *mrb, mrb_value re) +{ + //struct RRegexp *r = mrb_regex_ptr(re); + + //if (!(RREGEXP(re)->ptr) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) { + if (!(RREGEXP(re)->ptr)) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp"); + } + if (RREGEXP_SRC(re).tt == 0) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp"); + } + if (!RREGEXP_SRC_PTR(re)) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp"); + } +} + +int +mrb_reg_options(mrb_state *mrb, mrb_value re) +{ + int options; + + mrb_reg_check(mrb, re); + options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK; + if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED; + if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE; + return options; +} + +static void +reg_enc_error(mrb_state *mrb, mrb_value re, mrb_value str) +{ + mrb_raise(mrb, E_ENCODING_ERROR, + "incompatible encoding regexp match (%s regexp with %s string)", + mrb_enc_name(mrb_enc_get(mrb, re)), + mrb_enc_name(mrb_enc_get(mrb, str))); +} + +static int +mrb_reg_fixed_encoding_p(mrb_value re) +{ + /*if (FL_TEST(re, KCODE_FIXED)) + return Qtrue; + else */ + return 0/*Qfalse*/; +} + +static mrb_encoding* +mrb_reg_prepare_enc(mrb_state *mrb, mrb_value re, mrb_value str, int warn) +{ + mrb_encoding *enc = 0; + + if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN) { + mrb_raise(mrb, E_ARGUMENT_ERROR, + "invalid byte sequence in %s", + mrb_enc_name(mrb_enc_get(mrb, str))); + } + + mrb_reg_check(mrb, re); + enc = mrb_enc_get(mrb, str); + if (!mrb_enc_str_asciicompat_p(mrb, str)) { + if (RREGEXP(re)->ptr->enc != enc) { + reg_enc_error(mrb, re, str); + } + } + else if (mrb_reg_fixed_encoding_p(re)) { + if (RREGEXP(re)->ptr->enc != enc && + (!mrb_enc_asciicompat(mrb, RREGEXP(re)->ptr->enc) || + mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT)) { + reg_enc_error(mrb, re, str); + } + enc = RREGEXP(re)->ptr->enc; + } + if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && + enc != mrb_ascii8bit_encoding(mrb) && + mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) { + mrb_warn("regexp match /.../n against to %s string", + mrb_enc_name(enc)); + } + return enc; +} + +static mrb_value +mrb_reg_desc(mrb_state *mrb, const char *s, long len, mrb_value re) +{ + mrb_encoding *enc = mrb_enc_get(mrb, re); + mrb_value str = mrb_str_new_cstr(mrb, "/");//mrb_str_buf_new2("/"); + mrb_encoding *resenc = mrb_default_internal_encoding(mrb); + if (resenc == NULL) resenc = mrb_default_external_encoding(mrb); + + if (re.tt && mrb_enc_asciicompat(mrb, enc)) { + mrb_enc_copy(mrb, str, re); + } + else { + mrb_enc_associate(mrb, str, mrb_usascii_encoding(mrb)); + } + mrb_reg_expr_str(mrb, str, s, len, enc, resenc); + mrb_str_buf_cat(mrb, str, "/", strlen("/"));//mrb_str_buf_cat2(str, "/"); + if (re.tt) { + char opts[4]; + mrb_reg_check(mrb, re); + if (*option_to_str(opts, RREGEXP(re)->ptr->options)) + mrb_str_buf_cat(mrb, str, opts, strlen(opts));//mrb_str_buf_cat2(str, opts); + if (RBASIC(re)->flags & REG_ENCODING_NONE) + mrb_str_buf_cat(mrb, str, "n", strlen("n"));//mrb_str_buf_cat2(str, "n"); + } + /*OBJ_INFECT(str, re);*/ + return str; +} +static void +mrb_reg_raise(mrb_state *mrb, const char *s, long len, const char *err, mrb_value re) +{ + mrb_value desc = mrb_reg_desc(mrb, s, len, re); + + mrb_raise(mrb, E_REGEXP_ERROR, "%s: %s", err, RSTRING_PTR(desc)); +} + +regex_t * +mrb_reg_prepare_re(mrb_state *mrb, mrb_value re, mrb_value str) +{ + regex_t *reg = RREGEXP(re)->ptr; + onig_errmsg_buffer err = ""; + int r; + OnigErrorInfo einfo; + const char *pattern; + mrb_value unescaped; + mrb_encoding *fixed_enc = 0; + mrb_encoding *enc = mrb_reg_prepare_enc(mrb, re, str, 1); + + if (reg->enc == enc) return reg; + + mrb_reg_check(mrb, re); + reg = RREGEXP(re)->ptr; + pattern = RREGEXP_SRC_PTR(re); + + unescaped = mrb_reg_preprocess(mrb, + pattern, pattern + RREGEXP_SRC_LEN(re), enc, + &fixed_enc, err); + + if (mrb_nil_p(unescaped)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "regexp preprocess failed: %s", err); + } + + r = onig_new(®, (UChar* )RSTRING_PTR(unescaped), + (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)), + reg->options, enc, + OnigDefaultSyntax, &einfo); + if (r) { + onig_error_code_to_str((UChar*)err, r, &einfo); + mrb_reg_raise(mrb, pattern, RREGEXP_SRC_LEN(re), err, re); + } + + //RB_GC_GUARD(unescaped); + return reg; +} + + +mrb_int +mrb_reg_search(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse) +{ + long result; + mrb_value match; + struct re_registers regi, *regs = ®i; + char *range = RSTRING_PTR(str); + regex_t *reg; + int tmpreg; + + if (pos > RSTRING_LEN(str) || pos < 0) { + mrb_backref_set(mrb, mrb_nil_value()); + return -1; + } + + reg = mrb_reg_prepare_re(mrb, re, str); + tmpreg = reg != RREGEXP(re)->ptr; + if (!tmpreg) RREGEXP(re)->usecnt++; + + match = mrb_backref_get(mrb); + if (!mrb_nil_p(match)) { + /*if (FL_TEST(match, MATCH_BUSY)) { + match = Qnil; + } + else { + regs = RMATCH_REGS(match); + }*/ + regs = RMATCH_REGS(match); + } + if (mrb_nil_p(match)) { + memset(regs, 0, sizeof(struct re_registers)); + } +//--> + if (!reverse) { + range += RSTRING_LEN(str); + } + result = onig_search(reg, + (UChar*)(RSTRING_PTR(str)), + ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)), + ((UChar*)(RSTRING_PTR(str)) + pos), + ((UChar*)range), + regs, ONIG_OPTION_NONE); + if (!tmpreg) RREGEXP(re)->usecnt--; + if (tmpreg) { + if (RREGEXP(re)->usecnt) { + onig_free(reg); + } + else { + onig_free(RREGEXP(re)->ptr); + RREGEXP(re)->ptr = reg; + } + } + if (result < 0) { + if (regs == ®i) + onig_region_free(regs, 0); + if (result == ONIG_MISMATCH) { + mrb_backref_set(mrb, mrb_nil_value()); + return result; + } + else { + onig_errmsg_buffer err = ""; + onig_error_code_to_str((UChar*)err, (int)result); + mrb_reg_raise(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); + } + } +//--< + if (mrb_nil_p(match) ) { + match = match_alloc(mrb); + onig_region_copy(RMATCH_REGS(match), regs); + onig_region_free(regs, 0); + } + /*else { + if (mrb_safe_level() >= 3) + OBJ_TAINT(match); + else + FL_UNSET(match, FL_TAINT); + }*/ + + RMATCH(match)->str = str_new4(mrb, str.tt, str); + RMATCH(match)->regexp = re; + RMATCH(match)->rmatch->char_offset_updated = 0; + mrb_backref_set(mrb, match); + + //OBJ_INFECT(match, re); + //OBJ_INFECT(match, str); + + return result; +} + +mrb_int +mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse) +{ + mrb_int range; + struct RString *s = mrb_str_ptr(str); + struct RRegexp *r = mrb_regex_ptr(re); + + mrb_reg_check(mrb, re); + /*if (may_need_recompile) mrb_reg_prepare_re(re);*/ + + /* if (FL_TEST(re, KCODE_FIXED)) + mrb_kcode_set_option(re); + else if (reg_kcode != curr_kcode) + mrb_kcode_reset_option(); */ + + if (reverse) { + range = -pos; + } + else { + range = s->len - pos; + } + return re_adjust_startpos(r->ptr, + s->buf, s->len, + pos, range); +} + +static int +onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, + OnigErrorInfo* einfo, const char *sourcefile, int sourceline) +{ + int r; + + *reg = (regex_t* )malloc/*xmalloc*/(sizeof(regex_t)); + if ((void*)(*reg) == (void*)0) return ONIGERR_MEMORY; + + r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) goto err; + r = onig_compile(*reg, pattern, pattern_end, einfo, sourcefile, sourceline); + if (r) { + err: + onig_free(*reg); + *reg = 0/*NULL*/; + } + return r; +} + +static Regexp* +make_regexp(const char *s, long len, mrb_encoding *enc, int flags, onig_errmsg_buffer err, + const char *sourcefile, int sourceline) +{ + Regexp *rp; + int r; + OnigErrorInfo einfo; + + /* Handle escaped characters first. */ + + /* Build a copy of the string (in dest) with the + escaped characters translated, and generate the regex + from that. + */ + + r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags, + enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline); + if (r) { + onig_error_code_to_str((UChar*)err, r, &einfo); + return 0; + } + return rp; +} + +unsigned long +ruby_scan_hex(const char *start, size_t len, size_t *retlen) +{ + static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF"; + register const char *s = start; + register unsigned long retval = 0; + const char *tmp; + + while (len-- && *s && (tmp = strchr(hexdigit, *s))) { + retval <<= 4; + retval |= (tmp - hexdigit) & 15; + s++; + } + *retlen = (int)(s - start); /* less than len */ + return retval; +} + +static int +check_unicode_range(unsigned long code, onig_errmsg_buffer err) +{ + if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */ + 0x10ffff < code) { + //errcpy(err, "invalid Unicode range"); + printf("invalid Unicode range"); + return -1; + } + return 0; +} + +#define BYTEWIDTH 8 + +int +mrb_uv_to_utf8(mrb_state *mrb, char buf[6], unsigned long uv) +{ + if (uv <= 0x7f) { + buf[0] = (char)uv; + return 1; + } + if (uv <= 0x7ff) { + buf[0] = (char)((uv>>6)&0xff)|0xc0; + buf[1] = (char)(uv&0x3f)|0x80; + return 2; + } + if (uv <= 0xffff) { + buf[0] = (char)((uv>>12)&0xff)|0xe0; + buf[1] = (char)((uv>>6)&0x3f)|0x80; + buf[2] = (char)(uv&0x3f)|0x80; + return 3; + } + if (uv <= 0x1fffff) { + buf[0] = (char)((uv>>18)&0xff)|0xf0; + buf[1] = (char)((uv>>12)&0x3f)|0x80; + buf[2] = (char)((uv>>6)&0x3f)|0x80; + buf[3] = (char)(uv&0x3f)|0x80; + return 4; + } + if (uv <= 0x3ffffff) { + buf[0] = (char)((uv>>24)&0xff)|0xf8; + buf[1] = (char)((uv>>18)&0x3f)|0x80; + buf[2] = (char)((uv>>12)&0x3f)|0x80; + buf[3] = (char)((uv>>6)&0x3f)|0x80; + buf[4] = (char)(uv&0x3f)|0x80; + return 5; + } + if (uv <= 0x7fffffff) { + buf[0] = (char)((uv>>30)&0xff)|0xfc; + buf[1] = (char)((uv>>24)&0x3f)|0x80; + buf[2] = (char)((uv>>18)&0x3f)|0x80; + buf[3] = (char)((uv>>12)&0x3f)|0x80; + buf[4] = (char)((uv>>6)&0x3f)|0x80; + buf[5] = (char)(uv&0x3f)|0x80; + return 6; + } + mrb_raise(mrb, E_RANGE_ERROR, "pack(U): value out of range"); + return 0; +} + +static int +append_utf8(mrb_state *mrb, unsigned long uv, + mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) +{ + if (check_unicode_range(uv, err) != 0) + return -1; + if (uv < 0x80) { + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv); + mrb_str_buf_cat(mrb, buf, escbuf, 4); + } + else { + int len; + char utf8buf[6]; + len = mrb_uv_to_utf8(mrb, utf8buf, uv); + mrb_str_buf_cat(mrb, buf, utf8buf, len); + + if (*encp == 0) + *encp = mrb_utf8_encoding(mrb); + else if (*encp != mrb_utf8_encoding(mrb)) { + //errcpy(err, "UTF-8 character in non UTF-8 regexp"); + printf("UTF-8 character in non UTF-8 regexp"); + return -1; + } + } + return 0; +} + +static int +unescape_unicode_bmp(mrb_state *mrb, const char **pp, const char *end, + mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) +{ + const char *p = *pp; + size_t len; + unsigned long code; + + if (end < p+4) { + //errcpy(err, "invalid Unicode escape"); + printf("invalid Unicode escape"); + return -1; + } + code = ruby_scan_hex(p, 4, &len); + if (len != 4) { + //errcpy(err, "invalid Unicode escape"); + printf("invalid Unicode escape"); + return -1; + } + if (append_utf8(mrb, code, buf, encp, err) != 0) + return -1; + *pp = p + 4; + return 0; +} + +unsigned long +ruby_scan_oct(const char *start, size_t len, size_t *retlen) +{ + register const char *s = start; + register unsigned long retval = 0; + + while (len-- && *s >= '0' && *s <= '7') { + retval <<= 3; + retval |= *s++ - '0'; + } + *retlen = (int)(s - start); /* less than len */ + return retval; +} + +static int +read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err) +{ + const char *p = *pp; + int code; + int meta_prefix = 0, ctrl_prefix = 0; + size_t len; + int retbyte; + + retbyte = -1; + if (p == end || *p++ != '\\') { + //errcpy(err, "too short escaped multibyte character"); + printf("too short escaped multibyte character"); + return -1; + } + +again: + if (p == end) { + //errcpy(err, "too short escape sequence"); + printf("too short escape sequence"); + return -1; + } + switch (*p++) { + case '\\': code = '\\'; break; + case 'n': code = '\n'; break; + case 't': code = '\t'; break; + case 'r': code = '\r'; break; + case 'f': code = '\f'; break; + case 'v': code = '\013'; break; + case 'a': code = '\007'; break; + case 'e': code = '\033'; break; + + /* \OOO */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + p--; + code = scan_oct(p, end < p+3 ? end-p : 3, &len); + p += len; + break; + + case 'x': /* \xHH */ + code = scan_hex(p, end < p+2 ? end-p : 2, &len); + if (len < 1) { + //errcpy(err, "invalid hex escape"); + printf("invalid hex escape"); + return -1; + } + p += len; + break; + + case 'M': /* \M-X, \M-\C-X, \M-\cX */ + if (meta_prefix) { + //errcpy(err, "duplicate meta escape"); + printf("duplicate meta escape"); + return -1; + } + meta_prefix = 1; + if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) { + if (*p == '\\') { + p++; + goto again; + } + else { + code = *p++; + break; + } + } + //errcpy(err, "too short meta escape"); + printf("too short meta escape"); + return -1; + + case 'C': /* \C-X, \C-\M-X */ + if (p == end || *p++ != '-') { + //errcpy(err, "too short control escape"); + printf("too short control escape"); + return -1; + } + case 'c': /* \cX, \c\M-X */ + if (ctrl_prefix) { + //errcpy(err, "duplicate control escape"); + printf("duplicate control escape"); + return -1; + } + ctrl_prefix = 1; + if (p < end && (*p & 0x80) == 0) { + if (*p == '\\') { + p++; + goto again; + } + else { + code = *p++; + break; + } + } + //errcpy(err, "too short control escape"); + printf("too short control escape"); + return -1; + + default: + //errcpy(err, "unexpected escape sequence"); + printf("unexpected escape sequence"); + return -1; + } + if (code < 0 || 0xff < code) { + //errcpy(err, "invalid escape code"); + printf("invalid escape code"); + return -1; + } + + if (ctrl_prefix) + code &= 0x1f; + if (meta_prefix) + code |= 0x80; + + *pp = p; + return code; +} + +static int +unescape_escaped_nonascii(mrb_state *mrb, const char **pp, const char *end, mrb_encoding *enc, + mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) +{ + const char *p = *pp; + int chmaxlen = mrb_enc_mbmaxlen(enc); + //char *chbuf = ALLOCA_N(char, chmaxlen); + char *chbuf = mrb_malloc(mrb, chmaxlen); + int chlen = 0; + int byte; + int l; + + memset(chbuf, 0, chmaxlen); + + byte = read_escaped_byte(&p, end, err); + if (byte == -1) { + return -1; + } + + chbuf[chlen++] = byte; + while (chlen < chmaxlen && + MBCLEN_NEEDMORE_P(mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) { + byte = read_escaped_byte(&p, end, err); + if (byte == -1) { + return -1; + } + chbuf[chlen++] = byte; + } + + l = mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc); + if (MBCLEN_INVALID_P(l)) { + //errcpy(err, "invalid multibyte escape"); + printf("invalid multibyte escape"); + return -1; + } + if (1 < chlen || (chbuf[0] & 0x80)) { + mrb_str_buf_cat(mrb, buf, chbuf, chlen); + + if (*encp == 0) + *encp = enc; + else if (*encp != enc) { + //errcpy(err, "escaped non ASCII character in UTF-8 regexp"); + printf("escaped non ASCII character in UTF-8 regexp"); + return -1; + } + } + else { + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff); + mrb_str_buf_cat(mrb, buf, escbuf, 4); + } + *pp = p; + return 0; +} + +static int +unescape_unicode_list(mrb_state *mrb, const char **pp, const char *end, + mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) +{ + const char *p = *pp; + int has_unicode = 0; + unsigned long code; + size_t len; + + while (p < end && ISSPACE(*p)) p++; + + while (1) { + code = ruby_scan_hex(p, end-p, &len); + if (len == 0) + break; + if (6 < len) { /* max 10FFFF */ + //errcpy(err, "invalid Unicode range"); + printf("invalid Unicode range"); + return -1; + } + p += len; + if (append_utf8(mrb, code, buf, encp, err) != 0) + return -1; + has_unicode = 1; + + while (p < end && ISSPACE(*p)) p++; + } + + if (has_unicode == 0) { + //errcpy(err, "invalid Unicode list"); + printf("invalid Unicode list"); + return -1; + } + + *pp = p; + + return 0; +} + +static int +unescape_nonascii(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc, + mrb_value buf, mrb_encoding **encp, int *has_property, + onig_errmsg_buffer err) +{ + char c; + char smallbuf[2]; + + while (p < end) { + int chlen = mrb_enc_precise_mbclen(p, end, enc); + if (!MBCLEN_CHARFOUND_P(chlen)) { + //errcpy(err, "invalid multibyte character"); + printf("invalid multibyte character"); + return -1; + } + chlen = MBCLEN_CHARFOUND_LEN(chlen); + if (1 < chlen || (*p & 0x80)) { + mrb_str_buf_cat(mrb, buf, p, chlen); + p += chlen; + if (*encp == 0) + *encp = enc; + else if (*encp != enc) { + //errcpy(err, "non ASCII character in UTF-8 regexp"); + printf("non ASCII character in UTF-8 regexp"); + return -1; + } + continue; + } + + switch (c = *p++) { + case '\\': + if (p == end) { + //errcpy(err, "too short escape sequence"); + printf("too short escape sequence"); + return -1; + } + switch (c = *p++) { + case '1': case '2': case '3': + case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */ + { + size_t octlen; + if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) { + /* backref or 7bit octal. + no need to unescape anyway. + re-escaping may break backref */ + goto escape_asis; + } + } + /* xxx: How about more than 199 subexpressions? */ + + case '0': /* \0, \0O, \0OO */ + + case 'x': /* \xHH */ + case 'c': /* \cX, \c\M-X */ + case 'C': /* \C-X, \C-\M-X */ + case 'M': /* \M-X, \M-\C-X, \M-\cX */ + p = p-2; + if (unescape_escaped_nonascii(mrb, &p, end, enc, buf, encp, err) != 0) + return -1; + break; + + case 'u': + if (p == end) { + //errcpy(err, "too short escape sequence"); + printf("too short escape sequence"); + return -1; + } + if (*p == '{') { + /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */ + p++; + if (unescape_unicode_list(mrb, &p, end, buf, encp, err) != 0) + return -1; + if (p == end || *p++ != '}') { + //errcpy(err, "invalid Unicode list"); + printf("invalid Unicode list"); + return -1; + } + break; + } + else { + /* \uHHHH */ + if (unescape_unicode_bmp(mrb, &p, end, buf, encp, err) != 0) + return -1; + break; + } + + case 'p': /* \p{Hiragana} */ + case 'P': + if (!*encp) { + *has_property = 1; + } + goto escape_asis; + + default: /* \n, \\, \d, \9, etc. */ +escape_asis: + smallbuf[0] = '\\'; + smallbuf[1] = c; + mrb_str_buf_cat(mrb, buf, smallbuf, 2); + break; + } + break; + + default: + mrb_str_buf_cat(mrb, buf, &c, 1); + break; + } + } + + return 0; +} + + +static mrb_value +mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc, + mrb_encoding **fixed_enc, onig_errmsg_buffer err) +{ + mrb_value buf; + int has_property = 0; + + //buf = mrb_str_buf_new(0); + buf = mrb_str_buf_new(mrb, 0); + + if (mrb_enc_asciicompat(mrb, enc)) + *fixed_enc = 0; + else { + *fixed_enc = enc; + mrb_enc_associate(mrb, buf, enc); + } + + if (unescape_nonascii(mrb, p, end, enc, buf, fixed_enc, &has_property, err) != 0) + return mrb_nil_value(); + + if (has_property && !*fixed_enc) { + *fixed_enc = enc; + } + + if (*fixed_enc) { + mrb_enc_associate(mrb, buf, *fixed_enc); + } + + return buf; +} + +static int +mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_encoding *enc, + int options, onig_errmsg_buffer err, + const char *sourcefile, int sourceline) +{ + struct RRegexp *re = RREGEXP(obj); + mrb_value unescaped; + mrb_encoding *fixed_enc = 0; + mrb_encoding *a_enc = mrb_ascii8bit_encoding(mrb); + if (re->ptr) + mrb_raise(mrb, E_TYPE_ERROR, "already initialized regexp"); + re->ptr = 0; + + if (mrb_enc_dummy_p(enc)) { + //errcpy(err, "can't make regexp with dummy encoding"); + printf("can't make regexp with dummy encoding"); + return -1; + } + + unescaped = mrb_reg_preprocess(mrb, s, s+len, enc, &fixed_enc, err); + if (mrb_nil_p(unescaped)) + return -1; + + if (fixed_enc) { + if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) || + (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) { + //errcpy(err, "incompatible character encoding"); + printf("incompatible character encoding"); + return -1; + } + if (fixed_enc != a_enc) { + options |= ARG_ENCODING_FIXED; + enc = fixed_enc; + } + } + else if (!(options & ARG_ENCODING_FIXED)) { + enc = mrb_usascii_encoding(mrb); + } + + mrb_enc_associate(mrb, mrb_obj_value(re), enc); + if ((options & ARG_ENCODING_FIXED) || fixed_enc) { + //re->basic.flags |= KCODE_FIXED; + re->flags|= KCODE_FIXED; + } + if (options & ARG_ENCODING_NONE) { + re->flags |= REG_ENCODING_NONE; + } + + re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc, + options & ARG_REG_OPTION_MASK, err, + sourcefile, sourceline); + if (!re->ptr) return -1; + re->src = mrb_enc_str_new(mrb, s, len, enc); + + /*OBJ_FREEZE(re->src); + RB_GC_GUARD(unescaped);*/ + return 0; +} + +static int +mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options, onig_errmsg_buffer err, + const char *sourcefile, int sourceline) +{ + int ret; + mrb_encoding *enc = mrb_enc_get(mrb, str); + + if (options & ARG_ENCODING_NONE) { + mrb_encoding *ascii8bit = mrb_ascii8bit_encoding(mrb); + if (enc != ascii8bit) { + if (mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) { + //errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); + printf("/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); + return -1; + } + enc = ascii8bit; + } + } + + ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str), enc, + options, err, sourcefile, sourceline); + /*OBJ_INFECT(obj, str); + RB_GC_GUARD(str);*/ + return ret; +} + +/* 15.2.15.7.1 */ +/* + * call-seq: + * Regexp.initialize(string, [options [, lang]]) -> regexp + * Regexp.initialize(regexp) -> regexp + * + * Constructs a new regular expression from pattern, which can be either + * a String or a Regexp (in which case that regexp's + * options are propagated, and new options may not be specified (a change as of + * Ruby 1.8). If options is a Fixnum, it should be one or + * more of the constants Regexp::EXTENDED, + * Regexp::IGNORECASE, and Regexp::MULTILINE, + * or-ed together. Otherwise, if options is not + * nil, the regexp will be case insensitive. + * When the lang parameter is `n' or `N' sets the regexp no encoding. + * + * r1 = Regexp.initialize('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/ + * r2 = Regexp.initialize('cat', true) #=> /cat/i + * r3 = Regexp.initialize('dog', Regexp::EXTENDED) #=> /dog/x + * r4 = Regexp.initialize(r2) #=> /cat/i + */ + +static mrb_value +mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) +{ + mrb_value argv[16]; + int argc; + onig_errmsg_buffer err = ""; + int flags = 0; + mrb_value str; + mrb_encoding *enc; + const char *ptr; + long len; + + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 0 || argc > 3) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..3)", argc); + } + if (mrb_type(argv[0]) == MRB_TT_REGEX) { + mrb_value re = argv[0]; + + if (argc > 1) { + /* mrb_warn("flags ignored"); */ + printf("flags ignored"); + } + mrb_reg_check(mrb, re); + flags = mrb_reg_options(mrb, re); + ptr = RREGEXP_SRC_PTR(re); + len = RREGEXP_SRC_LEN(re); + enc = mrb_enc_get(mrb, re); + if (mrb_reg_initialize(mrb, self, ptr, len, enc, flags, err, NULL, 0)) { + /*str = mrb_enc_str_new(mrb, ptr, len, enc); + mrb_reg_raise_str(str, flags, err);*/ + printf("mrb_reg_raise_str(str, flags, err);"); + } + } + else { + if (argc >= 2) { + if (mrb_type(argv[1]) == MRB_TT_FIXNUM) flags = mrb_fixnum(argv[1]); + else if (mrb_test(argv[1])) flags = ONIG_OPTION_IGNORECASE; + } + enc = 0; + if (argc == 3 && !mrb_nil_p(argv[2])) { + //char *kcode = StringValuePtr(argv[2]); + char *kcode = mrb_string_value_ptr(mrb, argv[2]); + if (kcode[0] == 'n' || kcode[0] == 'N') { + enc = mrb_ascii8bit_encoding(mrb); + flags |= ARG_ENCODING_NONE; + } + else { + /*mrb_warn("encoding option is ignored - %s", kcode); */ + printf("mrb_warn:encoding option is ignored - %s", kcode); + } + } + str = argv[0]; + //ptr = StringValuePtr(str); + ptr = mrb_string_value_ptr(mrb, str); + if (enc + ? mrb_reg_initialize(mrb, self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0) + : mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) { + //mrb_reg_raise_str(str, flags, err); + } + } + return self; +} + +/* 15.2.15.7.2 */ +/* :nodoc: */ +static mrb_value +mrb_reg_init_copy(mrb_state *mrb, mrb_value re/*, mrb_value copy*/) +{ + mrb_value argv[16]; + int argc; + onig_errmsg_buffer err = ""; + const char *s; + long len; + mrb_value copy; + + mrb_get_args(mrb, "*", &argv, &argc); + copy = argv[0]; + if (mrb_obj_equal(mrb, copy, re)) return copy; + /*mrb_check_frozen(copy);*/ + /* need better argument type check */ + if (!mrb_obj_is_instance_of(mrb, re, mrb_obj_class(mrb, copy))) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type"); + } + mrb_reg_check(mrb, copy); + s = RREGEXP_SRC_PTR(copy); + len = RREGEXP_SRC_LEN(copy); + if (mrb_reg_initialize(mrb, re, s, len, mrb_enc_get(mrb, copy), mrb_reg_options(mrb, copy), + err, 0/*NULL*/, 0) != 0) { + mrb_reg_raise(mrb, s, len, err, re); + } + return re; +} + +static int +mrb_reg_equal(mrb_state *mrb, mrb_value re1, mrb_value re2) +{ + if (mrb_obj_equal(mrb, re1, re2)) return TRUE; + + if (mrb_type(re2) != MRB_TT_REGEX) return FALSE; + mrb_reg_check(mrb, re1); + mrb_reg_check(mrb, re2); + /*if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse; */ + if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return FALSE; + if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return FALSE; + /*if (ENCODING_GET(re1) != ENCODING_GET(re2)) return mrb_false_value();*/ + if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) { + return TRUE; + } + return FALSE; +} + +/* 15.2.15.7.3 */ +/* + * call-seq: + * rxp == other_rxp -> true or false + * rxp.eql?(other_rxp) -> true or false + * + * Equality---Two regexps are equal if their patterns are identical, they have + * the same character set code, and their casefold? values are the + * same. + * + * /abc/ == /abc/x #=> false + * /abc/ == /abc/i #=> false + * /abc/ == /abc/n #=> false + * /abc/u == /abc/n #=> false + */ + +static mrb_value +mrb_reg_equal_m(mrb_state *mrb, mrb_value re1/*, mrb_value re2*/) +{ + mrb_value re2; + + mrb_get_args(mrb, "o", &re2); + if (mrb_reg_equal(mrb, re1, re2)) + return mrb_true_value(); + return mrb_false_value(); +} + +/* 15.2.15.7.4 */ +/* + * call-seq: + * rxp === str -> true or false + * + * Case Equality---Synonym for Regexp#=~ used in case statements. + * + * a = "HELLO" + * case a + * when /^[a-z]*$/; print "Lower case\n" + * when /^[A-Z]*$/; print "Upper case\n" + * else; print "Mixed case\n" + * end + * + * produces: + * + * Upper case + */ + +mrb_value +mrb_reg_eqq(mrb_state *mrb, mrb_value re/*, mrb_value str*/) +{ + long start; + mrb_value str; + + mrb_get_args(mrb, "o", &str); + str = reg_operand(mrb, str, 0/*FALSE*/); + if (mrb_nil_p(str)) { + mrb_backref_set(mrb, mrb_nil_value()); + return mrb_false_value(); + } + start = mrb_reg_search(mrb, re, str, 0, 0); + if (start < 0) { + return mrb_false_value(); + } + return mrb_true_value(); +} + +static long +reg_match_pos(mrb_state *mrb, mrb_value re, mrb_value *strp, long pos) +{ + mrb_value str = *strp; + + if (mrb_nil_p(str)) { + mrb_backref_set(mrb, mrb_nil_value()); + return -1; + } + *strp = str = reg_operand(mrb, str, 1/*TRUE*/); + if (pos != 0) { + if (pos < 0) { + mrb_value l = mrb_str_size(mrb, str); + pos += mrb_fixnum(l); + if (pos < 0) { + return pos; + } + } + pos = mrb_str_offset(mrb, str, pos); + } + return mrb_reg_search(mrb, re, str, pos, 0); +} + +mrb_value +mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str) +{ + mrb_int pos = reg_match_pos(mrb, re, &str, 0); + if (pos < 0) return mrb_nil_value(); + pos = mrb_str_sublen(mrb, str, pos); + return mrb_fixnum_value(pos); +} +/* 15.2.15.7.5 */ +/* + * call-seq: + * rxp =~ str -> integer or nil + * + * Match---Matches rxp against str. + * + * /at/ =~ "input data" #=> 7 + * /ax/ =~ "input data" #=> nil + * + * If =~ is used with a regexp literal with named captures, + * captured strings (or nil) is assigned to local variables named by + * the capture names. + * + * /(?\w+)\s*=\s*(?\w+)/ =~ " x = y " + * p lhs #=> "x" + * p rhs #=> "y" + * + * If it is not matched, nil is assigned for the variables. + * + * /(?\w+)\s*=\s*(?\w+)/ =~ " x = " + * p lhs #=> nil + * p rhs #=> nil + * + * This assignment is implemented in the Ruby parser. + * The parser detects 'regexp-literal =~ expression' for the assignment. + * The regexp must be a literal without interpolation and placed at left hand side. + * + * The assignment is not occur if the regexp is not a literal. + * + * re = /(?\w+)\s*=\s*(?\w+)/ + * re =~ " x = y " + * p lhs # undefined local variable + * p rhs # undefined local variable + * + * A regexp interpolation, #{}, also disables + * the assignment. + * + * rhs_pat = /(?\w+)/ + * /(?\w+)\s*=\s*#{rhs_pat}/ =~ "x = y" + * p lhs # undefined local variable + * + * The assignment is not occur if the regexp is placed at right hand side. + * + * " x = y " =~ /(?\w+)\s*=\s*(?\w+)/ + * p lhs, rhs # undefined local variable + * + */ +mrb_value +mrb_reg_match(mrb_state *mrb, mrb_value re/*, mrb_value str*/) +{ + mrb_value str; + mrb_get_args(mrb, "o", &str); + return mrb_reg_match_str(mrb, re, str); +} + +/* 15.2.15.7.6 */ +/* + * call-seq: + * rxp.casefold? -> true or false + * + * Returns the value of the case-insensitive flag. + * + * /a/.casefold? #=> false + * /a/i.casefold? #=> true + * /(?i:a)/.casefold? #=> false + */ + +static mrb_value +mrb_reg_casefold_p(mrb_state *mrb, mrb_value re) +{ + mrb_reg_check(mrb, re); + if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return mrb_true_value(); + return mrb_false_value(); +} + +/* 15.2.15.7.7 */ +/* + * call-seq: + * rxp.match(str) -> matchdata or nil + * rxp.match(str,pos) -> matchdata or nil + * + * Returns a MatchData object describing the match, or + * nil if there was no match. This is equivalent to retrieving the + * value of the special variable $~ following a normal match. + * If the second parameter is present, it specifies the position in the string + * to begin the search. + * + * /(.)(.)(.)/.match("abc")[2] #=> "b" + * /(.)(.)/.match("abc", 1)[2] #=> "c" + * + * If a block is given, invoke the block with MatchData if match succeed, so + * that you can write + * + * pat.match(str) {|m| ...} + * + * instead of + * + * if m = pat.match(str) + * ... + * end + * + * The return value is a value from block execution in this case. + */ + +static mrb_value +mrb_reg_match_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value re) +{ + mrb_value argv[16]; + int argc; + mrb_value result, str, initpos, b; + long pos; + + //if (mrb_scan_args(argc, argv, "11", &str, &initpos) == 2) { + mrb_get_args(mrb, "&*", &b, &argv, &argc); + if (argc == 2) { + initpos = argv[1]; + pos = mrb_fixnum(initpos); + } + else { + pos = 0; + } + str = argv[0]; + pos = reg_match_pos(mrb, re, &str, pos); + if (pos < 0) { + mrb_backref_set(mrb, mrb_nil_value()); + return mrb_nil_value(); + } + result = mrb_backref_get(mrb); + /*mrb_match_busy(result);*/ + if (!mrb_nil_p(result) && mrb_block_given_p()) { + return mrb_yield(mrb, result, b); + } + return result; +} + +/* 15.2.15.7.8 */ + +/* + * call-seq: + * rxp.source -> str + * + * Returns the original string of the pattern. + * + * /ab+c/ix.source #=> "ab+c" + * + * Note that escape sequences are retained as is. + * + * /\x20\+/.source #=> "\\x20\\+" + * + */ + +static mrb_value +mrb_reg_source(mrb_state *mrb, mrb_value re) +{ + mrb_value str; + + mrb_reg_check(mrb, re); + str = mrb_enc_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), mrb_enc_get(mrb, re)); + /*if (OBJ_TAINTED(re)) OBJ_TAINT(str);*/ + return str; +} + +static int +name_to_backref_number(mrb_state *mrb, struct re_registers *regs, mrb_value regexp, const char* name, const char* name_end) +{ + int num; + + num = onig_name_to_backref_number(RREGEXP(regexp)->ptr, + (const unsigned char* )name, (const unsigned char* )name_end, regs); + if (num >= 1) { + return num; + } + else { + mrb_value s = mrb_str_new(mrb, name, (long )(name_end - name));//mrb_str_new(name, (long )(name_end - name)); + mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s", + mrb_string_value_ptr(mrb, s)); + return num; /* not reach */ + } +} + +/* + * Document-class: MatchData + * + * MatchData is the type of the special variable $~, + * and is the type of the object returned by Regexp#match and + * Regexp.last_match. It encapsulates all the results of a pattern + * match, results normally accessed through the special variables + * $&, $', $`, $1, + * $2, and so on. + * + */ + +mrb_value +match_alloc(mrb_state *mrb) +{ + struct RMatch* m; + + m = mrb_obj_alloc(mrb, MRB_TT_MATCH, mrb->match_class); + // NEWOBJ(match, struct RMatch); + // OBJSETUP(match, klass, T_MATCH); + + m->str = mrb_nil_value(); + m->rmatch = 0; + m->regexp = mrb_nil_value(); + m->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch); + memset(m->rmatch, 0, sizeof(struct rmatch)); + + return mrb_obj_value(m); +} + +/* ------------------------------------------------------------------------- */ +/* MatchData Class */ +/* ------------------------------------------------------------------------- */ +/* 15.2.16.3.1 */ +/* + * call-seq: + * mtch[i] -> str or nil + * mtch[start, length] -> array + * mtch[range] -> array + * mtch[name] -> str or nil + * + * Match Reference---MatchData acts as an array, and may be + * accessed using the normal array indexing techniques. mtch[0] is + * equivalent to the special variable $&, and returns the entire + * matched string. mtch[1], mtch[2], and so on return the values + * of the matched backreferences (portions of the pattern between parentheses). + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m #=> # + * m[0] #=> "HX1138" + * m[1, 2] #=> ["H", "X"] + * m[1..3] #=> ["H", "X", "113"] + * m[-3, 2] #=> ["X", "113"] + * + * m = /(?a+)b/.match("ccaaab") + * m #=> # + * m["foo"] #=> "aaa" + * m[:foo] #=> "aaa" + */ + +static mrb_value +mrb_match_aref(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value match) +{ + mrb_value argv[16]; + int argc; + mrb_value idx, rest; + + match_check(mrb, match); + //mrb_scan_args(argc, argv, "11", &idx, &rest); + mrb_get_args(mrb, "*", &argv, &argc); + idx = argv[0]; + rest = argv[1]; + if (argc<2) { + if (mrb_type(idx) == MRB_TT_FIXNUM) { + if (mrb_fixnum(idx) >= 0) { + return mrb_reg_nth_match(mrb, mrb_fixnum(idx), match); + } + } + else { + const char *p; + int num; + + switch (mrb_type(idx)) { + case MRB_TT_SYMBOL: + //p = mrb_id2name(SYM2ID(idx)); + p = mrb_sym2name(mrb, SYM2ID(idx)); + goto name_to_backref; + break; + case MRB_TT_STRING: + //p = StringValuePtr(idx); + p = mrb_string_value_ptr(mrb, idx); +name_to_backref: + num = name_to_backref_number(mrb, RMATCH_REGS(match), + RMATCH(match)->regexp, p, p + strlen(p)); + return mrb_reg_nth_match(mrb, num, match); + break; + default: + break; + } + } + } + + return mrb_ary_aget(mrb, /*argc, argv,*/ mrb_match_to_a(mrb, match)); +} + +typedef struct { + long byte_pos; + long char_pos; +} pair_t; + +static int +pair_byte_cmp(const void *pair1, const void *pair2) +{ + long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos; +#if SIZEOF_LONG > SIZEOF_INT + return diff ? diff > 0 ? 1 : -1 : 0; +#else + return (int)diff; +#endif +} + +static void +update_char_offset(mrb_state *mrb, mrb_value match) +{ + struct rmatch *rm = RMATCH(match)->rmatch; + struct re_registers *regs; + int i, num_regs, num_pos; + long c; + char *s, *p, *q, *e; + mrb_encoding *enc; + pair_t *pairs; + + if (rm->char_offset_updated) + return; + + regs = &rm->regs; + num_regs = rm->regs.num_regs; + + if (rm->char_offset_num_allocated < num_regs) { + //REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs); + rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)*num_regs); + rm->char_offset_num_allocated = num_regs; + } + + enc = mrb_enc_get(mrb, RMATCH(match)->str); + if (mrb_enc_mbmaxlen(enc) == 1) { + for (i = 0; i < num_regs; i++) { + rm->char_offset[i].beg = BEG(i); + rm->char_offset[i].end = END(i); + } + rm->char_offset_updated = 1; + return; + } + + //pairs = ALLOCA_N(pair_t, num_regs*2); + pairs = mrb_malloc(mrb, sizeof(pair_t)*num_regs*2); + + num_pos = 0; + for (i = 0; i < num_regs; i++) { + if (BEG(i) < 0) + continue; + pairs[num_pos++].byte_pos = BEG(i); + pairs[num_pos++].byte_pos = END(i); + } + qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp); + + s = p = RSTRING_PTR(RMATCH(match)->str); + e = s + RSTRING_LEN(RMATCH(match)->str); + c = 0; + for (i = 0; i < num_pos; i++) { + q = s + pairs[i].byte_pos; + c += mrb_enc_strlen(p, q, enc); + pairs[i].char_pos = c; + p = q; + } + + for (i = 0; i < num_regs; i++) { + pair_t key, *found; + if (BEG(i) < 0) { + rm->char_offset[i].beg = -1; + rm->char_offset[i].end = -1; + continue; + } + + key.byte_pos = BEG(i); + found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); + rm->char_offset[i].beg = found->char_pos; + + key.byte_pos = END(i); + found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); + rm->char_offset[i].end = found->char_pos; + } + + rm->char_offset_updated = 1; +} + +/* 15.2.16.3.2 */ +/* + * call-seq: + * mtch.begin(n) -> integer + * + * Returns the offset of the start of the nth element of the match + * array in the string. + * n can be a string or symbol to reference a named capture. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.begin(0) #=> 1 + * m.begin(2) #=> 2 + * + * m = /(?.)(.)(?.)/.match("hoge") + * p m.begin(:foo) #=> 0 + * p m.begin(:bar) #=> 2 + */ + +static mrb_value +mrb_match_begin(mrb_state *mrb, mrb_value match/*, mrb_value n*/) +{ + mrb_value argv[16]; + int argc; + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value n = argv[0]; + int i = match_backref_number(mrb, match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(mrb, match); + if (i < 0 || regs->num_regs <= i) + mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i); + + if (BEG(i) < 0) + return mrb_nil_value(); + + update_char_offset(mrb, match); + return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg); +} + +static mrb_value +match_array(mrb_state *mrb, mrb_value match, int start) +{ + struct re_registers *regs; + mrb_value ary; + mrb_value target; + int i; + /*int taint = OBJ_TAINTED(match);*/ + + match_check(mrb, match); + regs = RMATCH_REGS(match); + ary = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs); + target = RMATCH(match)->str; + + for (i=start; inum_regs; i++) { + if (regs->beg[i] == -1) { + mrb_ary_push(mrb, ary, mrb_nil_value()); + } + else { + mrb_value str = mrb_str_subseq(mrb, target, regs->beg[i], regs->end[i]-regs->beg[i]); + /*if (taint) OBJ_TAINT(str);*/ + mrb_ary_push(mrb, ary, str); + } + } + return ary; +} + +/* 15.2.16.3.3 */ +/* + * call-seq: + * mtch.captures -> array + * + * Returns the array of captures; equivalent to mtch.to_a[1..-1]. + * + * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + * f4 #=> "8" + */ +static mrb_value +mrb_match_captures(mrb_state *mrb, mrb_value match) +{ + return match_array(mrb, match, 1); +} + +/* 15.2.16.3.4 */ +/* + * call-seq: + * mtch.end(n) -> integer + * + * Returns the offset of the character immediately following the end of the + * nth element of the match array in the string. + * n can be a string or symbol to reference a named capture. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.end(0) #=> 7 + * m.end(2) #=> 3 + * + * m = /(?.)(.)(?.)/.match("hoge") + * p m.end(:foo) #=> 1 + * p m.end(:bar) #=> 3 + */ + +static mrb_value +mrb_match_end(mrb_state *mrb, mrb_value match/*, mrb_value n*/) +{ + mrb_value argv[16]; + int argc; + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value n = argv[0]; + int i = match_backref_number(mrb, match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(mrb, match); + if (i < 0 || regs->num_regs <= i) + mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i); + + if (BEG(i) < 0) + return mrb_nil_value(); + + update_char_offset(mrb, match); + return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end); +} + +/* 15.2.16.3.5 */ +/* :nodoc: */ +static mrb_value +mrb_match_init_copy(mrb_state *mrb, mrb_value obj/*, mrb_value orig*/) +{ + mrb_value argv[16]; + int argc; + struct rmatch *rm; + + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value orig = argv[0]; + + if (mrb_obj_equal(mrb, obj, orig)) return obj; + + if (!mrb_obj_is_instance_of(mrb, orig, mrb_obj_class(mrb, obj))) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); + } + + RMATCH(obj)->str = RMATCH(orig)->str; + RMATCH(obj)->regexp = RMATCH(orig)->regexp; + + if (RMATCH(obj)->rmatch == 0) { + RMATCH(obj)->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch); + memset(RMATCH(obj)->rmatch, 0, sizeof(struct rmatch)); + } + rm = RMATCH(obj)->rmatch; + onig_region_copy(&rm->regs, RMATCH_REGS(orig)); + + if (!RMATCH(orig)->rmatch->char_offset_updated) { + rm->char_offset_updated = 0; + } + else { + if (rm->char_offset_num_allocated < rm->regs.num_regs) { + //REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs); + rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)* rm->regs.num_regs); + rm->char_offset_num_allocated = rm->regs.num_regs; + } + memcpy(rm->char_offset, RMATCH(orig)->rmatch->char_offset, + sizeof(struct rmatch_offset)* rm->regs.num_regs); + rm->char_offset_updated = 1; + } + + return obj; +} + +/* 15.2.16.3.6 */ +/* 15.2.16.3.10 */ +/* + * call-seq: + * mtch.length -> integer + * mtch.size -> integer + * + * Returns the number of elements in the match array. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.length #=> 5 + * m.size #=> 5 + */ + +static mrb_value +mrb_match_size(mrb_state *mrb, mrb_value match) +{ + match_check(mrb, match); + return mrb_fixnum_value(RMATCH_REGS(match)->num_regs); +} + +/* 15.2.16.3.7 */ +/* + * call-seq: + * mtch.offset(n) -> array + * + * Returns a two-element array containing the beginning and ending offsets of + * the nth match. + * n can be a string or symbol to reference a named capture. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.offset(0) #=> [1, 7] + * m.offset(4) #=> [6, 7] + * + * m = /(?.)(.)(?.)/.match("hoge") + * p m.offset(:foo) #=> [0, 1] + * p m.offset(:bar) #=> [2, 3] + * + */ + +static mrb_value +mrb_match_offset(mrb_state *mrb, mrb_value match/*, mrb_value n*/) +{ + mrb_value n; + struct re_registers *regs = RMATCH_REGS(match); + + mrb_get_args(mrb, "o", &n); + int i = match_backref_number(mrb, match, n); + + match_check(mrb, match); + if (i < 0 || regs->num_regs <= i) + mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i); + + if (BEG(i) < 0) + return mrb_assoc_new(mrb, mrb_nil_value(), mrb_nil_value()); + + update_char_offset(mrb, match); + return mrb_assoc_new(mrb, mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg), + mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end)); +} + +/* 15.2.16.3.8 */ +/* + * call-seq: + * mtch.post_match -> str + * + * Returns the portion of the original string after the current match. + * Equivalent to the special variable $'. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.post_match #=> ": The Movie" + */ +mrb_value +mrb_reg_match_post(mrb_state *mrb, mrb_value match) +{ + mrb_value str; + long pos; + struct re_registers *regs; + + if (mrb_nil_p(match)) return mrb_nil_value(); + match_check(mrb, match); + regs = RMATCH_REGS(match); + if (BEG(0) == -1) return mrb_nil_value(); + str = RMATCH(match)->str; + pos = END(0); + str = mrb_str_subseq(mrb, str, pos, RSTRING_LEN(str) - pos); + /*if (OBJ_TAINTED(match)) OBJ_TAINT(str);*/ + return str; +} + +/* 15.2.16.3.9 */ +/* + * call-seq: + * mtch.pre_match -> str + * + * Returns the portion of the original string before the current match. + * Equivalent to the special variable $`. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.pre_match #=> "T" + */ + +mrb_value +mrb_reg_match_pre(mrb_state *mrb, mrb_value match) +{ + mrb_value str; + struct re_registers *regs; + + if (mrb_nil_p(match)) return mrb_nil_value(); + match_check(mrb, match); + regs = RMATCH_REGS(match); + if (BEG(0) == -1) return mrb_nil_value(); + str = mrb_str_subseq(mrb, RMATCH(match)->str, 0, BEG(0)); + /*if (OBJ_TAINTED(match)) OBJ_TAINT(str);*/ + return str; +} + +/* 15.2.16.3.11 */ +/* + * call-seq: + * mtch.string -> str + * + * Returns a frozen copy of the string passed in to match. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.string #=> "THX1138." + */ + +static mrb_value +mrb_match_string(mrb_state *mrb, mrb_value match) +{ + match_check(mrb, match); + return RMATCH(match)->str; /* str is frozen */ +} + +/* 15.2.16.3.12 */ +/* + * call-seq: + * mtch.to_a -> anArray + * + * Returns the array of matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * + * Because to_a is called when expanding + * *variable, there's a useful assignment + * shortcut for extracting matched fields. This is slightly slower than + * accessing the fields directly (as an intermediate array is + * generated). + * + * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) + * all #=> "HX1138" + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + */ + +static mrb_value +mrb_match_to_a(mrb_state *mrb, mrb_value match) +{ + return match_array(mrb, match, 0); +} + +/* 15.2.16.3.13 */ +/* + * call-seq: + * mtch.to_s -> str + * + * Returns the entire matched string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_s #=> "HX1138" + */ + +static mrb_value +mrb_match_to_s(mrb_state *mrb, mrb_value match) +{ + mrb_value str = mrb_reg_last_match(mrb, match); + + match_check(mrb, match); + if (mrb_nil_p(str)) str = mrb_str_new(mrb, 0, 0);//mrb_str_new(0,0); + /*if (OBJ_TAINTED(match)) OBJ_TAINT(str); */ + /*if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); */ + return str; +} + +static int +char_to_option(int c) +{ + int val; + + switch (c) { + case 'i': + val = ONIG_OPTION_IGNORECASE; + break; + case 'x': + val = ONIG_OPTION_EXTEND; + break; + case 'm': + val = ONIG_OPTION_MULTILINE; + break; + default: + val = 0; + break; + } + return val; +} + +static char * +option_to_str(char str[4], int options) +{ + char *p = str; + if (options & ONIG_OPTION_MULTILINE) *p++ = 'm'; + if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i'; + if (options & ONIG_OPTION_EXTEND) *p++ = 'x'; + *p = 0; + return str; +} + +#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ + +static void +mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len, + mrb_encoding *enc, mrb_encoding *resenc) +{ + const char *p, *pend; + int need_escape = 0; + int c, clen; + + p = s; pend = p + len; + if (mrb_enc_asciicompat(mrb, enc)) { + while (p < pend) { + c = mrb_enc_ascget(mrb, p, pend, &clen, enc); + if (c == -1) { + if (enc == resenc) { + p += mbclen(p, pend, enc); + } + else { + need_escape = 1; + break; + } + } + else if (c != '/' && mrb_enc_isprint(c, enc)) { + p += clen; + } + else { + need_escape = 1; + break; + } + } + } + else { + need_escape = 1; + } + + if (!need_escape) { + mrb_str_buf_cat(mrb, str, s, len); + } + else { + int unicode_p = mrb_enc_unicode_p(enc); + p = s; + while (p str + * + * Returns a string containing the regular expression and its options (using the + * (?opts:source) notation. This string can be fed back in to + * Regexp::new to a regular expression with the same semantics as + * the original. (However, Regexp#== may not return true when + * comparing the two, as the source of the regular expression itself may + * differ, as the example shows). Regexp#inspect produces a + * generally more readable version of rxp. + * + * r1 = /ab+c/ix #=> /ab+c/ix + * s1 = r1.to_s #=> "(?ix-m:ab+c)" + * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/ + * r1 == r2 #=> false + * r1.source #=> "ab+c" + * r2.source #=> "(?ix-m:ab+c)" + */ + +mrb_value +mrb_reg_to_s(mrb_state *mrb, mrb_value re) +{ + int options, opt; + const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND; + long len; + const UChar* ptr; + mrb_value str = mrb_str_new_cstr(mrb, "(?"); + char optbuf[5]; + mrb_encoding *enc = mrb_enc_get(mrb, re); + + mrb_reg_check(mrb, re); + memset(optbuf, 0, 5); + mrb_enc_copy(mrb, str, re); + options = RREGEXP(re)->ptr->options; + ptr = (UChar*)RREGEXP_SRC_PTR(re); + len = RREGEXP_SRC_LEN(re); +again: + if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') { + int err = 1; + ptr += 2; + if ((len -= 2) > 0) { + do { + opt = char_to_option((int )*ptr); + if (opt != 0) { + options |= opt; + } + else { + break; + } + ++ptr; + } while (--len > 0); + } + if (len > 1 && *ptr == '-') { + ++ptr; + --len; + do { + opt = char_to_option((int )*ptr); + if (opt != 0) { + options &= ~opt; + } + else { + break; + } + ++ptr; + } while (--len > 0); + } + if (*ptr == ')') { + --len; + ++ptr; + goto again; + } + if (*ptr == ':' && ptr[len-1] == ')') { + Regexp *rp; + + ++ptr; + len -= 2; + err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT, + enc, OnigDefaultSyntax, NULL); + onig_free(rp); + } + if (err) { + options = RREGEXP(re)->ptr->options; + ptr = (UChar*)RREGEXP_SRC_PTR(re); + len = RREGEXP_SRC_LEN(re); + } + } + + if (*option_to_str(optbuf, options)) mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf)); + + if ((options & embeddable) != embeddable) { + optbuf[0] = '-'; + option_to_str(optbuf + 1, ~options); + mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf)); + } + + mrb_str_buf_cat(mrb, str, ":", strlen(":")); + mrb_reg_expr_str(mrb, str, (char*)ptr, len, enc, NULL); + mrb_str_buf_cat(mrb, str, ")", strlen(")")); + mrb_enc_copy(mrb, str, re); + + /*OBJ_INFECT(str, re);*/ + return str; +} + +/* 15.2.15.7.10(x) */ +/* + * call-seq: + * rxp.inspect -> string + * + * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly, + * #inspect actually produces the more natural version of + * the string than #to_s. + * + * /ab+c/ix.inspect #=> "/ab+c/ix" + * + */ + +static mrb_value +mrb_reg_inspect(mrb_state *mrb, mrb_value re) +{ + if (!RREGEXP(re)->ptr || mrb_nil_p(RREGEXP_SRC(re)) || !RREGEXP_SRC_PTR(re)) { + return mrb_any_to_s(mrb, re); + } + return mrb_reg_desc(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re); +} + +static mrb_value +mrb_reg_s_alloc(mrb_state *mrb, mrb_value dummy) +{ + struct RRegexp* re; + + //NEWOBJ(re, struct RRegexp); + //OBJSETUP(re, klass, T_REGEXP); + re = mrb_obj_alloc(mrb, MRB_TT_REGEX, mrb->regex_class); + + re->ptr = 0; + re->src.tt = 0; + re->usecnt = 0; + + return mrb_obj_value(re); +} + +mrb_value +mrb_reg_match_last(mrb_state *mrb, mrb_value match) +{ + int i; + + if (mrb_nil_p(match)) return mrb_nil_value(); + match_check(mrb, match); + if (RMATCH(match)->rmatch->char_offset[0].beg == -1) return mrb_nil_value(); + + for (i=RMATCH(match)->rmatch->regs.num_regs-1; RMATCH(match)->rmatch->char_offset[i].beg == -1 && i > 0; i--) + ; + if (i == 0) return mrb_nil_value(); + return mrb_reg_nth_match(mrb, i, match); +} + +/* 15.2.16.3.14(x) */ +/* + * call-seq: + * mtch.inspect -> str + * + * Returns a printable version of mtch. + * + * puts /.$/.match("foo").inspect + * #=> # + * + * puts /(.)(.)(.)/.match("foo").inspect + * #=> # + * + * puts /(.)(.)?(.)/.match("fo").inspect + * #=> # + * + * puts /(?.)(?.)(?.)/.match("hoge").inspect + * #=> # + * + */ +struct backref_name_tag { + const UChar *name; + long len; +}; + +static int +match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end, + int back_num, int *back_refs, OnigRegex regex, void *arg0) +{ + struct backref_name_tag *arg = (struct backref_name_tag *)arg0; + int i; + + for (i = 0; i < back_num; i++) { + arg[back_refs[i]].name = name; + arg[back_refs[i]].len = name_end - name; + } + return 0; +} + +static mrb_value +mrb_match_inspect(mrb_state *mrb, mrb_value match) +{ + const char *cname = mrb_obj_classname(mrb, match); + mrb_value str; + int i; + struct re_registers *regs = RMATCH_REGS(match); + int num_regs = regs->num_regs; + struct backref_name_tag *names; + mrb_value regexp = RMATCH(match)->regexp; + + if (regexp.value.p == 0) { + return mrb_sprintf(mrb, "#<%s:%p>", cname, (void*)&match); + } + + //names = ALLOCA_N(struct backref_name_tag, num_regs); + //MEMZERO(names, struct backref_name_tag, num_regs); + names = mrb_malloc(mrb, sizeof(struct backref_name_tag)*num_regs); + memset(names, 0, sizeof(struct backref_name_tag)*num_regs); + + onig_foreach_name(RREGEXP(regexp)->ptr, + match_inspect_name_iter, names); + + str = mrb_str_new_cstr(mrb, "#<");//mrb_str_buf_new2("#<"); + mrb_str_buf_cat(mrb, str, cname, strlen(cname));//mrb_str_buf_cat2(str, cname); + + for (i = 0; i < num_regs; i++) { + char buf[sizeof(num_regs)*3+1]; + mrb_value v; + mrb_str_buf_cat(mrb, str, " ", strlen(" "));//mrb_str_buf_cat2(str, " "); + if (0 < i) { + if (names[i].name) + mrb_str_buf_cat(mrb, str, (const char *)names[i].name, names[i].len); + else { + //mrb_str_catf(mrb, str, "%d", i); + sprintf(buf, "%d", i); + mrb_str_buf_cat(mrb, str, (const char *)buf, strlen(buf)); + } + mrb_str_buf_cat(mrb, str, ":", strlen(":"));//mrb_str_buf_cat2(str, ":"); + } + v = mrb_reg_nth_match(mrb, i, match); + if (mrb_nil_p(v)) + mrb_str_buf_cat(mrb, str, "nil", strlen("nil"));//mrb_str_buf_cat2(str, "nil"); + else + mrb_str_buf_append(mrb, str, mrb_str_inspect(mrb, v)); + } + mrb_str_buf_cat(mrb, str, ">", strlen(">"));//mrb_str_buf_cat2(str, ">"); + + return str; +} + +/* 15.2.16.3.15(x) */ +/* 15.2.16.3.16(x) */ +/* + * call-seq: + * mtch == mtch2 -> true or false + * + * Equality---Two matchdata are equal if their target strings, + * patterns, and matched positions are identical. + */ + +static mrb_value +mrb_match_equal(mrb_state *mrb, mrb_value match1) +{ + const struct re_registers *regs1, *regs2; + mrb_value match2; + + mrb_get_args(mrb, "o", &match2); + if (mrb_obj_equal(mrb, match1, match2)) return mrb_true_value(); + if (mrb_type(match2) != MRB_TT_MATCH) return mrb_false_value(); + if (!mrb_str_equal(mrb, RMATCH(match1)->str, RMATCH(match2)->str)) return mrb_false_value(); + if (!mrb_reg_equal(mrb, RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return mrb_false_value(); + regs1 = RMATCH_REGS(match1); + regs2 = RMATCH_REGS(match2); + if (regs1->num_regs != regs2->num_regs) return mrb_false_value(); + if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return mrb_false_value(); + if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return mrb_false_value(); + return mrb_true_value(); +} + +/* + * Document-class: RegexpError + * + * Raised when given an invalid regexp expression. + * + * Regexp.new("?") + * + * raises the exception: + * + * RegexpError: target of repeat operator is not specified: /?/ + */ + +/* + * Document-class: Regexp + * + * A Regexp holds a regular expression, used to match a pattern + * against strings. Regexps are created using the /.../ and + * %r{...} literals, and by the Regexp::new + * constructor. + * + * :include: doc/re.rdoc + */ + +void +mrb_init_regexp(mrb_state *mrb) +{ + struct RClass *s; + s = mrb->regex_class = mrb_define_class(mrb, "Regexp", mrb->object_class); + + //mrb->encode_class = mrb_define_class(mrb, "Encoding", mrb->object_class); + //mrb_define_alloc_func(mrb, s, mrb_reg_s_alloc); + mrb_define_class_method(mrb, s, "compile", mrb_reg_s_new_instance, ARGS_ANY()); /* 15.2.15.6.1 */ + mrb_define_class_method(mrb, s, "escape", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.2 */ + mrb_define_class_method(mrb, s, "last_match", mrb_reg_s_last_match, ARGS_ANY()); /* 15.2.15.6.3 */ + mrb_define_class_method(mrb, s, "quote", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.4 */ + //mrb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2); + //mrb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1); + + mrb_define_method(mrb, s, "initialize", mrb_reg_initialize_m, ARGS_ANY()); /* 15.2.15.7.1 */ + mrb_define_method(mrb, s, "initialize_copy", mrb_reg_init_copy, ARGS_REQ(1)); /* 15.2.15.7.2 */ + mrb_define_method(mrb, s, "==", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.3 */ + mrb_define_method(mrb, s, "===", mrb_reg_eqq, ARGS_REQ(1)); /* 15.2.15.7.4 */ + mrb_define_method(mrb, s, "=~", mrb_reg_match, ARGS_REQ(1)); /* 15.2.15.7.5 */ + mrb_define_method(mrb, s, "casefold?", mrb_reg_casefold_p, ARGS_NONE()); /* 15.2.15.7.6 */ + mrb_define_method(mrb, s, "match", mrb_reg_match_m, ARGS_ANY()); /* 15.2.15.7.7 */ + mrb_define_method(mrb, s, "source", mrb_reg_source, ARGS_NONE()); /* 15.2.15.7.8 */ + //mrb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0); + //mrb_define_method(rb_cRegexp, "~", rb_reg_match2, 0); + mrb_define_method(mrb, s, "to_s", mrb_reg_to_s, ARGS_NONE()); /* 15.2.15.7.9 (x) */ + mrb_define_method(mrb, s, "inspect", mrb_reg_inspect, ARGS_NONE()); /* 15.2.15.7.10(x) */ + mrb_define_method(mrb, s, "eql?", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.11(x) */ + //mrb_define_method(rb_cRegexp, "options", mrb_reg_options_m, 0); + //mrb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */ + //mrb_define_method(rb_cRegexp, "fixed_encoding?", mrb_reg_fixed_encoding_p, 0); + //mrb_define_method(rb_cRegexp, "names", rb_reg_names, 0); + //mrb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0); + + //mrb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE)); + //mrb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND)); + //mrb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE)); + //mrb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED)); + mrb_define_const(mrb, s, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE)); + mrb_define_const(mrb, s, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND)); + mrb_define_const(mrb, s, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE)); + mrb_define_const(mrb, s, "FIXEDENCODING", mrb_fixnum_value(ARG_ENCODING_FIXED)); + + //mrb_global_variable(®_cache); + + s = mrb->match_class = mrb_define_class(mrb, "MatchData", mrb->object_class); + //mrb_undef_method(CLASS_OF(rb_cMatch), "new"); + + mrb_define_method(mrb, s, "[]", mrb_match_aref, ARGS_ANY()); /* 15.2.16.3.1 */ + mrb_define_method(mrb, s, "begin", mrb_match_begin, ARGS_REQ(1)); /* 15.2.16.3.2 */ + mrb_define_method(mrb, s, "captures", mrb_match_captures, ARGS_NONE()); /* 15.2.16.3.3 */ + mrb_define_method(mrb, s, "end", mrb_match_end, ARGS_REQ(1)); /* 15.2.16.3.4 */ + mrb_define_method(mrb, s, "initialize_copy", mrb_match_init_copy, ARGS_REQ(1)); /* 15.2.16.3.5 */ + mrb_define_method(mrb, s, "length", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.6 */ + mrb_define_method(mrb, s, "offset", mrb_match_offset, ARGS_REQ(1)); /* 15.2.16.3.7 */ + mrb_define_method(mrb, s, "post_match", mrb_reg_match_post, ARGS_NONE()); /* 15.2.16.3.8 */ + mrb_define_method(mrb, s, "pre_match", mrb_reg_match_pre, ARGS_NONE()); /* 15.2.16.3.9 */ + mrb_define_method(mrb, s, "size", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.10 */ + mrb_define_method(mrb, s, "string", mrb_match_string, ARGS_NONE()); /* 15.2.16.3.11 */ + mrb_define_method(mrb, s, "to_a", mrb_match_to_a, ARGS_NONE()); /* 15.2.16.3.12 */ + mrb_define_method(mrb, s, "to_s", mrb_match_to_s, ARGS_NONE()); /* 15.2.16.3.13 */ + mrb_define_method(mrb, s, "inspect", mrb_match_inspect, ARGS_NONE()); /* 15.2.16.3.14(x) */ + mrb_define_method(mrb, s, "==", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.15(x) */ + mrb_define_method(mrb, s, "eql?", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.16(x) */ + //mrb_define_method(rb_cMatch, "regexp", match_regexp, 0); + //mrb_define_method(rb_cMatch, "names", match_names, 0); + //mrb_define_method(rb_cMatch, "values_at", match_values_at, -1); + //mrb_define_method(rb_cMatch, "hash", match_hash, 0); + //mrb_define_method(rb_cMatch, "==", match_equal, 1); +} +/* ----------------1_8_7---------------------------------------- */ +//`mrb_check_type' +//`mrb_reg_regsub' +//`mrb_backref_get' +//`mrb_memsearch' +//`mrb_reg_mbclen2' +//`mrb_reg_regcomp' +//`mrb_yield' + + +mrb_value +mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers *regs, mrb_value regexp) +{ + mrb_value val; + char *p, *s, *e; + int no, clen; + mrb_encoding *str_enc = mrb_enc_get(mrb, str); + mrb_encoding *src_enc = mrb_enc_get(mrb, src); + int acompat = mrb_enc_asciicompat(mrb, str_enc); +#define ASCGET(mrb,s,e,cl) (acompat ? (*cl=1,ISASCII(s[0])?s[0]:-1) : mrb_enc_ascget(mrb, s, e, cl, str_enc)) + struct RString *ps = mrb_str_ptr(str); + + val.tt = 0; + p = s = ps->buf; + e = s + ps->len; + + while (s < e) { + int c = ASCGET(mrb, s, e, &clen); + char *ss; + + if (c == -1) { + s += mbclen(s, e, str_enc); + continue; + } + ss = s; + s += clen; + + if (c != '\\' || s == e) continue; + + //if (!val) { + if (!val.tt) { + val = mrb_str_buf_new(mrb, ss-p); + } + mrb_enc_str_buf_cat(mrb, val, p, ss-p, str_enc); + + c = ASCGET(mrb, s, e, &clen); + if (c == -1) { + s += mbclen(s, e, str_enc); + mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc); + p = s; + continue; + } + s += clen; + + p = s; + switch (c) { + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) { + no = c - '0'; + } + else { + continue; + } + break; + + case 'k': + if (s < e && ASCGET(mrb, s, e, &clen) == '<') { + char *name, *name_end; + + name_end = name = s + clen; + while (name_end < e) { + c = ASCGET(mrb, name_end, e, &clen); + if (c == '>') break; + name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen; + } + if (name_end < e) { + no = name_to_backref_number(mrb, regs, regexp, name, name_end); + p = s = name_end + clen; + break; + } + else { + mrb_raise(mrb, mrb->eRuntimeError_class, "invalid group name reference format"); + } + } + + mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc); + continue; + + case '0': + case '&': + no = 0; + break; + + case '`': + mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0), src_enc); + continue; + + case '\'': + mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc); + continue; + + case '+': + no = regs->num_regs-1; + while (BEG(no) == -1 && no > 0) no--; + if (no == 0) continue; + break; + + case '\\': + mrb_enc_str_buf_cat(mrb, val, s-clen, clen, str_enc); + continue; + + default: + mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc); + continue; + } + + if (no >= 0) { + if (no >= regs->num_regs) continue; + if (BEG(no) == -1) continue; + mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc); + } + } /* while (s < e) { */ + + + if (!val.tt) return str; + if (p < e) { + mrb_enc_str_buf_cat(mrb, val, p, e-p, str_enc); + } + return val; +} + +//#define NEW_NODE(t,a0,a1,a2) mrb_node_newnode((t),(int)(a0),(int)(a1),(int)(a2)) +//#define NEW_IF(c,t,e) NEW_NODE(NODE_IF,c,t,e) +static inline NODE * +lfp_svar_place(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp) +{ + NODE *svar; + + /*if (lfp && th->local_lfp != lfp) { + svar = &lfp[-1]; + } + else { + svar = mrb->&th->local_svar; + }*/ + svar = mrb->local_svar; + /*if (mrb_nil_p(*svar)) { + *svar = mrb_obj_value(NEW_IF(0, 0, 0)); + }*/ + return svar;//(NODE *)((*svar).value.p); +} + +static mrb_value +lfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key) +{ + //mrb_value *regs; + NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp); + //regs = mrb->stack; + + switch (key) { + case 0: + return svar->u1.value; + case 1: + return svar->u2.value; + default: { + return svar->u3.value; + /*const mrb_value hash = regs[GETARG_C(*svar)];//svar->u3.value; + + if (mrb_nil_p(hash)) { + return mrb_nil_value(); + } + else { + return mrb_hash_get(mrb, hash, mrb_fixnum_value(key));//mrb_hash_lookup(hash, key); + }*/ + } + } +} + +static void +lfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key, mrb_value val) +{ + //mrb_value *regs; + NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp); + //regs = mrb->stack; + + switch (key) { + case 0: + svar->u1.value = val; + return; + case 1: + svar->u2.value = val; + return; + default: { + svar->u3.value = val; + //mrb_value hash = *svar;//svar->u3.value; + + //if (mrb_nil_p(hash)) { + // svar->u3.value = hash = mrb_hash_new(mrb, 0); + //} + //mrb_hash_aset(hash, key, val); + //mrb_hash_set(mrb, hash, mrb_fixnum_value(key), val); + } + } +} + +static mrb_value +vm_cfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key) +{ + //cfp = vm_normal_frame(th, cfp); + return lfp_svar_get(mrb, /*th, cfp ? cfp->lfp :*/ 0, key); +} + +static void +vm_cfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key, const mrb_value val) +{ + //cfp = vm_normal_frame(th, cfp); + lfp_svar_set(mrb, /*th, cfp ? cfp->lfp : */0, key, val); +} + +static mrb_value +vm_svar_get(mrb_state *mrb, mrb_int key) +{ + //mrb_thread_t *th = GET_THREAD(); + return vm_cfp_svar_get(mrb,/*th, th->cfp,*/ key); +} + +static void +vm_svar_set(mrb_state *mrb, mrb_int key, mrb_value val) +{ + //mrb_thread_t *th = GET_THREAD(); + vm_cfp_svar_set(mrb,/*th, th->cfp,*/ key, val); +} + + +int +mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref) +{ + return match_backref_number(mrb, match, backref); +} + +mrb_value +mrb_backref_get(mrb_state *mrb) +{ + return vm_svar_get(mrb, 1); +} + +void +mrb_backref_set(mrb_state *mrb, mrb_value val) +{ + vm_svar_set(mrb, 1, val); +} +#endif //INCLUDE_REGEXP + +#ifdef INCLUDE_ENCODING +static inline long +mrb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys, *ye = ys + n; +#define SIZEOF_VOIDP 4 +#define SIZEOF_LONG 4 + +#ifndef VALUE_MAX +# if SIZEOF_VALUE == 8 +# define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL +# elif SIZEOF_VALUE == 4 +# define VALUE_MAX 0xFFFFFFFFUL +# elif SIZEOF_LONG == SIZEOF_VOIDP +# define SIZEOF_VALUE 4 +# define VALUE_MAX 0xFFFFFFFFUL +# endif +#endif + int hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT); + + if (m > SIZEOF_VALUE) + mrb_bug("!!too long pattern string!!"); + + /* Prepare hash value */ + for (hx = *x++, hy = *y++; x < xe; ++x, ++y) { + hx <<= CHAR_BIT; + hy <<= CHAR_BIT; + hx |= *x; + hy |= *y; + } + /* Searching */ + while (hx != hy) { + if (y == ye) + return -1; + hy <<= CHAR_BIT; + hy |= *y; + hy &= mask; + y++; + } + return y - ys - m; +} + +static inline long +mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys; + int i, qstable[256]; + + /* Preprocessing */ + for (i = 0; i < 256; ++i) + qstable[i] = m + 1; + for (; x < xe; ++x) + qstable[*x] = xe - x; + /* Searching */ + for (; y + m <= ys + n; y += *(qstable + y[m])) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return y - ys; + } + return -1; +} + +static inline unsigned int +mrb_memsearch_qs_utf8_hash(const unsigned char *x) +{ + register const unsigned int mix = 8353; + register unsigned int h = *x; + if (h < 0xC0) { + return h + 256; + } + else if (h < 0xE0) { + h *= mix; + h += x[1]; + } + else if (h < 0xF0) { + h *= mix; + h += x[1]; + h *= mix; + h += x[2]; + } + else if (h < 0xF5) { + h *= mix; + h += x[1]; + h *= mix; + h += x[2]; + h *= mix; + h += x[3]; + } + else { + return h + 256; + } + return (unsigned char)h; +} + +static inline long +mrb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys; + int i, qstable[512]; + + /* Preprocessing */ + for (i = 0; i < 512; ++i) { + qstable[i] = m + 1; + } + for (; x < xe; ++x) { + qstable[mrb_memsearch_qs_utf8_hash(x)] = xe - x; + } + /* Searching */ + for (; y + m <= ys + n; y += qstable[mrb_memsearch_qs_utf8_hash(y+m)]) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return y - ys; + } + return -1; +} + +int +mrb_memsearch(mrb_state *mrb, const void *x0, int m, const void *y0, int n, mrb_encoding *enc) +{ + const unsigned char *x = x0, *y = y0; + + if (m > n) return -1; + else if (m == n) { + return memcmp(x0, y0, m) == 0 ? 0 : -1; + } + else if (m < 1) { + return 0; + } + else if (m == 1) { + const unsigned char *ys = y, *ye = ys + n; + for (; y < ye; ++y) { + if (*x == *y) + return y - ys; + } + return -1; + } + else if (m <= SIZEOF_VALUE) { + return mrb_memsearch_ss(x0, m, y0, n); + } + else if (enc == mrb_utf8_encoding(mrb)) { + return mrb_memsearch_qs_utf8(x0, m, y0, n); + } + else { + return mrb_memsearch_qs(x0, m, y0, n); + } +} +#endif //INCLUDE_ENCODING + +#ifdef INCLUDE_REGEXP +mrb_value +mrb_reg_init_str(mrb_state *mrb, mrb_value re, mrb_value s, int options) +{ + onig_errmsg_buffer err = ""; + + if (mrb_reg_initialize_str(mrb, re, s, options, err, NULL, 0) != 0) { + //mrb_reg_raise_str(s, options, err); + printf("mrb_reg_raise_str(s, options, err);"); + } + + return re; +} + +mrb_value +mrb_reg_alloc(mrb_state *mrb) +{ + mrb_value dummy = mrb_nil_value(); + return mrb_reg_s_alloc(mrb, dummy); +} + +mrb_value +mrb_reg_new_str(mrb_state *mrb, mrb_value s, int options) +{ + return mrb_reg_init_str(mrb, mrb_reg_alloc(mrb), s, options); +} + +mrb_value +mrb_reg_regcomp(mrb_state *mrb, mrb_value str) +{ + mrb_value save_str = str; + if (reg_cache.tt && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str) + && ENCODING_GET(mrb, reg_cache) == ENCODING_GET(mrb, str) + && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0) + return reg_cache; + return reg_cache = mrb_reg_new_str(mrb, save_str, 0); +} + +int +re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range) +{ + /* Update the fastmap now if not correct already. */ + /*if (!bufp->fastmap_accurate) { + int ret = re_compile_fastmap0(bufp); + if (ret) return ret; + }*/ + + /* Adjust startpos for mbc string */ + /*if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) { + startpos = re_mbc_startpos(string, size, startpos, range); + }*/ + return startpos; +} +#endif //INCLUDE_REGEXP + +#ifdef INCLUDE_ENCODING +static const unsigned char mbctab_ascii[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; +const unsigned char *re_mbctab = mbctab_ascii; + +#define is_identchar(p,e,enc) (mrb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p)) + +static int +is_special_global_name(const char *m, const char *e, mrb_encoding *enc) +{ + int mb = 0; + + if (m >= e) return 0; + switch (*m) { + case '~': case '*': case '$': case '?': case '!': case '@': + case '/': case '\\': case ';': case ',': case '.': case '=': + case ':': case '<': case '>': case '\"': + case '&': case '`': case '\'': case '+': + case '0': + ++m; + break; + case '-': + ++m; + if (m < e && is_identchar(m, e, enc)) { + if (!ISASCII(*m)) mb = 1; + m += mrb_enc_mbclen(m, e, enc); + } + break; + default: + if (!mrb_enc_isdigit(*m, enc)) return 0; + do { + if (!ISASCII(*m)) mb = 1; + ++m; + } while (m < e && mrb_enc_isdigit(*m, enc)); + } + return m == e ? mb + 1 : 0; +} + +int +mrb_enc_symname2_p(const char *name, long len, mrb_encoding *enc) +{ + const char *m = name; + const char *e = m + len; + int localid = FALSE; + + if (!m) return FALSE; + switch (*m) { + case '\0': + return FALSE; + + case '$': + if (is_special_global_name(++m, e, enc)) return TRUE; + goto id; + + case '@': + if (*++m == '@') ++m; + goto id; + + case '<': + switch (*++m) { + case '<': ++m; break; + case '=': if (*++m == '>') ++m; break; + default: break; + } + break; + + case '>': + switch (*++m) { + case '>': case '=': ++m; break; + } + break; + + case '=': + switch (*++m) { + case '~': ++m; break; + case '=': if (*++m == '=') ++m; break; + default: return FALSE; + } + break; + + case '*': + if (*++m == '*') ++m; + break; + + case '+': case '-': + if (*++m == '@') ++m; + break; + + case '|': case '^': case '&': case '/': case '%': case '~': case '`': + ++m; + break; + + case '[': + if (*++m != ']') return FALSE; + if (*++m == '=') ++m; + break; + + case '!': + switch (*++m) { + case '\0': return TRUE; + case '=': case '~': ++m; break; + default: return FALSE; + } + break; + + default: + localid = !mrb_enc_isupper(*m, enc); +id: + if (m >= e || (*m != '_' && !mrb_enc_isalpha(*m, enc) && ISASCII(*m))) + return FALSE; + while (m < e && is_identchar(m, e, enc)) m += mrb_enc_mbclen(m, e, enc); + if (localid) { + switch (*m) { + case '!': case '?': case '=': ++m; + } + } + break; + } + return m == e; +} + +int +mrb_enc_symname_p(const char *name, mrb_encoding *enc) +{ + return mrb_enc_symname2_p(name, strlen(name), enc); +} +#endif //INCLUDE_ENCODING diff --git a/src/re.h b/src/re.h new file mode 100644 index 0000000000..e388f602c3 --- /dev/null +++ b/src/re.h @@ -0,0 +1,85 @@ +/********************************************************************** + + re.h - + +**********************************************************************/ + +#ifndef RE_H +#define RE_H + +//#include +#include + +#include "node.h" +#include "regex.h" +#include "encoding.h" + +#define BEG(no) regs->beg[no] +#define END(no) regs->end[no] + +struct rmatch_offset { + long beg; + long end; +}; + +struct rmatch { + struct re_registers regs; + + int char_offset_updated; + int char_offset_num_allocated; + struct rmatch_offset *char_offset; +}; + +//struct RMatch { +// MRUBY_OBJECT_HEADER; +// mrb_value str; +// struct re_registers *regs; +//}; +struct RMatch { + MRUBY_OBJECT_HEADER; + mrb_value str; + struct rmatch *rmatch; + mrb_value regexp; /* RRegexp */ +}; + +struct RRegexp { + MRUBY_OBJECT_HEADER; + struct re_pattern_buffer *ptr; + mrb_value src; + unsigned long usecnt; +}; + +#define mrb_regex_ptr(r) ((struct RRegexp*)((r).value.p)) +#define RREGEXP(r) ((struct RRegexp*)((r).value.p)) +#define RREGEXP_SRC(r) (RREGEXP(r)->src) +#define RREGEXP_SRC_PTR(r) (((struct RString*)(RREGEXP_SRC(r).value.p))->buf) +#define RREGEXP_SRC_LEN(r) RSTRING_LEN(RREGEXP(r)->src) +int re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range); + +typedef struct re_pattern_buffer Regexp; + +//#define RMATCH(obj) (R_CAST(RMatch)(obj)) +#define RMATCH_REGS(v) (&((struct RMatch*)((v).value.p))->rmatch->regs) +#define RMATCH(v) ((struct RMatch*)((v).value.p)) +#define mrb_match_ptr(v) ((struct RMatch*)((v).value.p)) + +int mrb_memcmp(const void *p1, const void *p2, int len); + +mrb_int mrb_reg_search (mrb_state *mrb, mrb_value, mrb_value, mrb_int, mrb_int); +mrb_value mrb_reg_regsub (mrb_state *mrb, mrb_value, mrb_value, struct re_registers *, mrb_value); +//mrb_value mrb_reg_regsub(mrb_value, mrb_value, struct re_registers *, mrb_value); +mrb_int mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse); +void mrb_match_busy (mrb_value); + +mrb_value mrb_reg_quote(mrb_state *mrb, mrb_value str); +mrb_value mrb_reg_regcomp(mrb_state *mrb, mrb_value str); +mrb_value mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str); +mrb_value mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match); +mrb_value mrb_backref_get(mrb_state *mrb); +//mrb_int mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n); +mrb_value mrb_reg_to_s(mrb_state *mrb, mrb_value re); +void mrb_backref_set(mrb_state *mrb, mrb_value val); +mrb_value match_alloc(mrb_state *mrb); +int mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref); + +#endif diff --git a/src/regcomp.c b/src/regcomp.c new file mode 100644 index 0000000000..3aaac41944 --- /dev/null +++ b/src/regcomp.c @@ -0,0 +1,6286 @@ +/********************************************************************** + regcomp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#include +#include "regparse.h" +#ifdef INCLUDE_REGEXP + +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; + +extern OnigCaseFoldType +onig_get_default_case_fold_flag(void) +{ + return OnigDefaultCaseFoldFlag; +} + +extern int +onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) +{ + OnigDefaultCaseFoldFlag = case_fold_flag; + return 0; +} + + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + +static UChar* +str_dup(UChar* s, UChar* end) +{ + ptrdiff_t len = end - s; + + if (len > 0) { + UChar* r = (UChar* )xmalloc(len + 1); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, len); + r[len] = (UChar )0; + return r; + } + else return NULL; +} + +static void +swap_node(Node* a, Node* b) +{ + Node c; + c = *a; *a = *b; *b = c; + + if (NTYPE(a) == NT_STR) { + StrNode* sn = NSTR(a); + if (sn->capa == 0) { + size_t len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } + + if (NTYPE(b) == NT_STR) { + StrNode* sn = NSTR(b); + if (sn->capa == 0) { + size_t len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } +} + +static OnigDistance +distance_add(OnigDistance d1, OnigDistance d2) +{ + if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) + return ONIG_INFINITE_DISTANCE; + else { + if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; + else return ONIG_INFINITE_DISTANCE; + } +} + +static OnigDistance +distance_multiply(OnigDistance d, int m) +{ + if (m == 0) return 0; + + if (d < ONIG_INFINITE_DISTANCE / m) + return d * m; + else + return ONIG_INFINITE_DISTANCE; +} + +static int +bitset_is_empty(BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { + if (bs[i] != 0) return 0; + } + return 1; +} + +#ifdef ONIG_DEBUG +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} +#endif + +extern int +onig_bbuf_init(BBuf* buf, int size) +{ + if (size <= 0) { + size = 0; + buf->p = NULL; + } + else { + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + } + + buf->alloc = size; + buf->used = 0; + return 0; +} + + +#ifdef USE_SUBEXP_CALL + +static int +unset_addr_list_init(UnsetAddrList* uslist, int size) +{ + UnsetAddr* p; + + p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); + CHECK_NULL_RETURN_MEMERR(p); + uslist->num = 0; + uslist->alloc = size; + uslist->us = p; + return 0; +} + +static void +unset_addr_list_end(UnsetAddrList* uslist) +{ + if (IS_NOT_NULL(uslist->us)) + xfree(uslist->us); +} + +static int +unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) +{ + UnsetAddr* p; + int size; + + if (uslist->num >= uslist->alloc) { + size = uslist->alloc * 2; + p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); + CHECK_NULL_RETURN_MEMERR(p); + uslist->alloc = size; + uslist->us = p; + } + + uslist->us[uslist->num].offset = offset; + uslist->us[uslist->num].target = node; + uslist->num++; + return 0; +} +#endif /* USE_SUBEXP_CALL */ + + +static int +add_opcode(regex_t* reg, int opcode) +{ + BBUF_ADD1(reg, opcode); + return 0; +} + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +static int +add_state_check_num(regex_t* reg, int num) +{ + StateCheckNumType n = (StateCheckNumType )num; + + BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); + return 0; +} +#endif + +static int +add_rel_addr(regex_t* reg, int addr) +{ + RelAddrType ra = (RelAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_RELADDR); + return 0; +} + +static int +add_abs_addr(regex_t* reg, int addr) +{ + AbsAddrType ra = (AbsAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_ABSADDR); + return 0; +} + +static int +add_length(regex_t* reg, int len) +{ + LengthType l = (LengthType )len; + + BBUF_ADD(reg, &l, SIZE_LENGTH); + return 0; +} + +static int +add_mem_num(regex_t* reg, int num) +{ + MemNumType n = (MemNumType )num; + + BBUF_ADD(reg, &n, SIZE_MEMNUM); + return 0; +} + +static int +add_pointer(regex_t* reg, void* addr) +{ + PointerType ptr = (PointerType )addr; + + BBUF_ADD(reg, &ptr, SIZE_POINTER); + return 0; +} + +static int +add_option(regex_t* reg, OnigOptionType option) +{ + BBUF_ADD(reg, &option, SIZE_OPTION); + return 0; +} + +static int +add_opcode_rel_addr(regex_t* reg, int opcode, int addr) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_rel_addr(reg, addr); + return r; +} + +static int +add_bytes(regex_t* reg, UChar* bytes, int len) +{ + BBUF_ADD(reg, bytes, len); + return 0; +} + +static int +add_bitset(regex_t* reg, BitSetRef bs) +{ + BBUF_ADD(reg, bs, SIZE_BITSET); + return 0; +} + +static int +add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_option(reg, option); + return r; +} + +static int compile_length_tree(Node* node, regex_t* reg); +static int compile_tree(Node* node, regex_t* reg); + + +#define IS_NEED_STR_LEN_OP_EXACT(op) \ + ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ + (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) + +static int +select_str_opcode(int mb_len, int str_len, int ignore_case) +{ + int op; + + if (ignore_case) { + switch (str_len) { + case 1: op = OP_EXACT1_IC; break; + default: op = OP_EXACTN_IC; break; + } + } + else { + switch (mb_len) { + case 1: + switch (str_len) { + case 1: op = OP_EXACT1; break; + case 2: op = OP_EXACT2; break; + case 3: op = OP_EXACT3; break; + case 4: op = OP_EXACT4; break; + case 5: op = OP_EXACT5; break; + default: op = OP_EXACTN; break; + } + break; + + case 2: + switch (str_len) { + case 1: op = OP_EXACTMB2N1; break; + case 2: op = OP_EXACTMB2N2; break; + case 3: op = OP_EXACTMB2N3; break; + default: op = OP_EXACTMB2N; break; + } + break; + + case 3: + op = OP_EXACTMB3N; + break; + + default: + op = OP_EXACTMBN; + break; + } + } + return op; +} + +static int +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) +{ + int r; + int saved_num_null_check = reg->num_null_check; + + if (empty_info != 0) { + r = add_opcode(reg, OP_NULL_CHECK_START); + if (r) return r; + r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ + if (r) return r; + reg->num_null_check++; + } + + r = compile_tree(node, reg); + if (r) return r; + + if (empty_info != 0) { + if (empty_info == NQ_TARGET_IS_EMPTY) + r = add_opcode(reg, OP_NULL_CHECK_END); + else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); + else if (empty_info == NQ_TARGET_IS_EMPTY_REC) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + + if (r) return r; + r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ + } + return r; +} + +#ifdef USE_SUBEXP_CALL +static int +compile_call(CallNode* node, regex_t* reg) +{ + int r; + + r = add_opcode(reg, OP_CALL); + if (r) return r; + r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), + node->target); + if (r) return r; + r = add_abs_addr(reg, 0 /*dummy addr.*/); + return r; +} +#endif + +static int +compile_tree_n_times(Node* node, int n, regex_t* reg) +{ + int i, r; + + for (i = 0; i < n; i++) { + r = compile_tree(node, reg); + if (r) return r; + } + return 0; +} + +static int +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, + regex_t* reg ARG_UNUSED, int ignore_case) +{ + int len; + int op = select_str_opcode(mb_len, str_len, ignore_case); + + len = SIZE_OPCODE; + + if (op == OP_EXACTMBN) len += SIZE_LENGTH; + if (IS_NEED_STR_LEN_OP_EXACT(op)) + len += SIZE_LENGTH; + + len += mb_len * str_len; + return len; +} + +static int +add_compile_string(UChar* s, int mb_len, int str_len, + regex_t* reg, int ignore_case) +{ + int op = select_str_opcode(mb_len, str_len, ignore_case); + add_opcode(reg, op); + + if (op == OP_EXACTMBN) + add_length(reg, mb_len); + + if (IS_NEED_STR_LEN_OP_EXACT(op)) { + if (op == OP_EXACTN_IC) + add_length(reg, mb_len * str_len); + else + add_length(reg, str_len); + } + + add_bytes(reg, s, mb_len * str_len); + return 0; +} + + +static int +compile_length_string_node(Node* node, regex_t* reg) +{ + int rlen, r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev; + StrNode* sn; + + sn = NSTR(node); + if (sn->end <= sn->s) + return 0; + + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enclen(enc, p, sn->end); + p += prev_len; + slen = 1; + rlen = 0; + + for (; p < sn->end; ) { + len = enclen(enc, p, sn->end); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + prev = p; + slen = 1; + prev_len = len; + } + p += len; + } + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + return rlen; +} + +static int +compile_length_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +compile_string_node(Node* node, regex_t* reg) +{ + int r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev, *end; + StrNode* sn; + + sn = NSTR(node); + if (sn->end <= sn->s) + return 0; + + end = sn->end; + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enclen(enc, p, end); + p += prev_len; + slen = 1; + + for (; p < end; ) { + len = enclen(enc, p, end); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string(prev, prev_len, slen, reg, ambig); + if (r) return r; + + prev = p; + slen = 1; + prev_len = len; + } + + p += len; + } + return add_compile_string(prev, prev_len, slen, reg, ambig); +} + +static int +compile_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) +{ +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + add_length(reg, mbuf->used); + return add_bytes(reg, mbuf->p, mbuf->used); +#else + int r, pad_size; + UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; + + GET_ALIGNMENT_PAD_SIZE(p, pad_size); + add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + + r = add_bytes(reg, mbuf->p, mbuf->used); + + /* padding for return value from compile_length_cclass_node() to be fix. */ + pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + return r; +#endif +} + +static int +compile_length_cclass_node(CClassNode* cc, regex_t* reg) +{ + int len; + + if (IS_NCCLASS_SHARE(cc)) { + len = SIZE_OPCODE + SIZE_POINTER; + return len; + } + + if (IS_NULL(cc->mbuf)) { + len = SIZE_OPCODE + SIZE_BITSET; + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + len = SIZE_OPCODE; + } + else { + len = SIZE_OPCODE + SIZE_BITSET; + } +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + len += SIZE_LENGTH + cc->mbuf->used; +#else + len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); +#endif + } + + return len; +} + +static int +compile_cclass_node(CClassNode* cc, regex_t* reg) +{ + int r; + + if (IS_NCCLASS_SHARE(cc)) { + add_opcode(reg, OP_CCLASS_NODE); + r = add_pointer(reg, cc); + return r; + } + + if (IS_NULL(cc->mbuf)) { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_NOT); + else + add_opcode(reg, OP_CCLASS); + + r = add_bitset(reg, cc->bs); + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MB_NOT); + else + add_opcode(reg, OP_CCLASS_MB); + + r = add_multi_byte_cclass(cc->mbuf, reg); + } + else { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MIX_NOT); + else + add_opcode(reg, OP_CCLASS_MIX); + + r = add_bitset(reg, cc->bs); + if (r) return r; + r = add_multi_byte_cclass(cc->mbuf, reg); + } + } + + return r; +} + +static int +entry_repeat_range(regex_t* reg, int id, int lower, int upper) +{ +#define REPEAT_RANGE_ALLOC 4 + + OnigRepeatRange* p; + + if (reg->repeat_range_alloc == 0) { + p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); + CHECK_NULL_RETURN_MEMERR(p); + reg->repeat_range = p; + reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; + } + else if (reg->repeat_range_alloc <= id) { + int n; + n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; + p = (OnigRepeatRange* )xrealloc(reg->repeat_range, + sizeof(OnigRepeatRange) * n); + CHECK_NULL_RETURN_MEMERR(p); + reg->repeat_range = p; + reg->repeat_range_alloc = n; + } + else { + p = reg->repeat_range; + } + + p[id].lower = lower; + p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); + return 0; +} + +static int +compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, + regex_t* reg) +{ + int r; + int num_repeat = reg->num_repeat; + + r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + reg->num_repeat++; + if (r) return r; + r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); + if (r) return r; + + r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); + if (r) return r; + + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + + if ( +#ifdef USE_SUBEXP_CALL + reg->num_call > 0 || +#endif + IS_QUANTIFIER_IN_REPEAT(qn)) { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); + } + else { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); + } + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + return r; +} + +static int +is_anychar_star_quantifier(QtfrNode* qn) +{ + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && + NTYPE(qn->target) == NT_CANY) + return 1; + else + return 0; +} + +#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 +#define CKN_ON (ckn > 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int len, mod_tlen, cklen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); + + /* anychar repeat */ + if (NTYPE(qn->target) == NT_CANY) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) + len = SIZE_OP_JUMP; + else + len = 0; + + len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; + } + else { + if (qn->lower == 0) + len = SIZE_OP_JUMP; + else + len = 0; + + len += mod_tlen + SIZE_OP_PUSH + cklen; + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) /* /(?..){0}/ */ + len = SIZE_OP_JUMP + tlen; + else + len = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + len = SIZE_OP_STATE_CHECK_PUSH + tlen; + } + else { + len = SIZE_OP_PUSH + tlen; + } + } + else { + len = tlen; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + if (CKN_ON) + len += SIZE_OP_STATE_CHECK; + } + + return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int r, mod_tlen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + if (CKN_ON) { + r = add_state_check_num(reg, ckn); + if (r) return r; + } + + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + } + else { + if (IS_MULTILINE(reg->options)) { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_ML_STAR + : OP_ANYCHAR_ML_STAR)); + } + else { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_STAR + : OP_ANYCHAR_STAR)); + } + if (r) return r; + if (CKN_ON) + r = add_state_check_num(reg, ckn); + + return r; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) { + r = add_opcode_rel_addr(reg, OP_JUMP, + (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); + if (r) return r; + } + + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + } + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); + } + else { + if (qn->lower == 0) { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + } + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, + -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); + } + else + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) { /* /(?..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else + r = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, tlen); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, tlen); + } + if (r) return r; + } + + r = compile_tree(qn->target, reg); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + } + + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + if (CKN_ON) { + if (r) return r; + r = add_opcode(reg, OP_STATE_CHECK); + if (r) return r; + r = add_state_check_num(reg, ckn); + } + } + return r; +} + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int len, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + /* anychar repeat */ + if (NTYPE(qn->target) == NT_CANY) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact)) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + len = SIZE_OP_JUMP; + } + else { + len = tlen * qn->lower; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; + else if (IS_NOT_NULL(qn->next_head_exact)) + len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; + else + len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; + } + else + len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ + len = SIZE_OP_JUMP + tlen; + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + len = tlen * qn->lower; + len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + } + + return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int i, r, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact)) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + } + else { + if (IS_MULTILINE(reg->options)) + return add_opcode(reg, OP_ANYCHAR_ML_STAR); + else + return add_opcode(reg, OP_ANYCHAR_STAR); + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); + else if (IS_NOT_NULL(qn->next_head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); + else + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); + } + if (r) return r; + } + else { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + } + else if (IS_NOT_NULL(qn->next_head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); + } + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + int n = qn->upper - qn->lower; + + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + for (i = 0; i < n; i++) { + r = add_opcode_rel_addr(reg, OP_PUSH, + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + } + return r; +} +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_option_node(EncloseNode* node, regex_t* reg) +{ + int tlen; + OnigOptionType prev = reg->options; + + reg->options = node->option; + tlen = compile_length_tree(node->target, reg); + reg->options = prev; + + if (tlen < 0) return tlen; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL + + tlen + SIZE_OP_SET_OPTION; + } + else + return tlen; +} + +static int +compile_option_node(EncloseNode* node, regex_t* reg) +{ + int r; + OnigOptionType prev = reg->options; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + if (r) return r; + r = add_opcode(reg, OP_FAIL); + if (r) return r; + } + + reg->options = node->option; + r = compile_tree(node->target, reg); + reg->options = prev; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + } + return r; +} + +static int +compile_length_enclose_node(EncloseNode* node, regex_t* reg) +{ + int len; + int tlen; + + if (node->type == ENCLOSE_OPTION) + return compile_length_option_node(node, reg); + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + else + tlen = 0; + + switch (node->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + len = SIZE_OP_MEMORY_START_PUSH + tlen + + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + len = SIZE_OP_MEMORY_START_PUSH; + else + len = SIZE_OP_MEMORY_START; + + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + } + break; + + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); + tlen = compile_length_tree(qn->target, reg); + if (tlen < 0) return tlen; + + len = tlen * qn->lower + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + } + else { + len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return len; +} + +static int get_char_length_tree(Node* node, regex_t* reg, int* len); + +static int +compile_enclose_node(EncloseNode* node, regex_t* reg) +{ + int r, len; + + if (node->type == ENCLOSE_OPTION) + return compile_option_node(node, reg); + + switch (node->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + r = add_opcode(reg, OP_CALL); + if (r) return r; + node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; + node->state |= NST_ADDR_FIXED; + r = add_abs_addr(reg, (int )node->call_addr); + if (r) return r; + len = compile_length_tree(node->target, reg); + len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) return r; + } +#endif + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + r = add_opcode(reg, OP_MEMORY_START_PUSH); + else + r = add_opcode(reg, OP_MEMORY_START); + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); + else + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_REC : OP_MEMORY_END)); + + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = add_opcode(reg, OP_RETURN); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH); + else + r = add_opcode(reg, OP_MEMORY_END); + if (r) return r; + r = add_mem_num(reg, node->regnum); + } + break; + + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + len = compile_length_tree(qn->target, reg); + if (len < 0) return len; + + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + } + else { + r = add_opcode(reg, OP_PUSH_STOP_BT); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_STOP_BT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_anchor_node(AnchorNode* node, regex_t* reg) +{ + int len; + int tlen = 0; + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + + switch (node->type) { + case ANCHOR_PREC_READ: + len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; + break; + case ANCHOR_PREC_READ_NOT: + len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; + break; + case ANCHOR_LOOK_BEHIND: + len = SIZE_OP_LOOK_BEHIND + tlen; + break; + case ANCHOR_LOOK_BEHIND_NOT: + len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; + break; + + default: + len = SIZE_OPCODE; + break; + } + + return len; +} + +static int +compile_anchor_node(AnchorNode* node, regex_t* reg) +{ + int r, len; + + switch (node->type) { + case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; + case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; + case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; + case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; + case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; + case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + + case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; + case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; + case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; +#endif + + case ANCHOR_PREC_READ: + r = add_opcode(reg, OP_PUSH_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_POS); + break; + + case ANCHOR_PREC_READ_NOT: + len = compile_length_tree(node->target, reg); + if (len < 0) return len; + r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_POS); + break; + + case ANCHOR_LOOK_BEHIND: + { + int n; + r = add_opcode(reg, OP_LOOK_BEHIND); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + int n; + len = compile_length_tree(node->target, reg); + r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, + len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_tree(Node* node, regex_t* reg) +{ + int len, type, r; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + len = 0; + do { + r = compile_length_tree(NCAR(node), reg); + if (r < 0) return r; + len += r; + } while (IS_NOT_NULL(node = NCDR(node))); + r = len; + break; + + case NT_ALT: + { + int n; + + n = r = 0; + do { + r += compile_length_tree(NCAR(node), reg); + n++; + } while (IS_NOT_NULL(node = NCDR(node))); + r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); + } + break; + + case NT_STR: + if (NSTRING_IS_RAW(node)) + r = compile_length_string_raw_node(NSTR(node), reg); + else + r = compile_length_string_node(node, reg); + break; + + case NT_CCLASS: + r = compile_length_cclass_node(NCCLASS(node), reg); + break; + + case NT_CTYPE: + case NT_CANY: + r = SIZE_OPCODE; + break; + + case NT_BREF: + { + BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif + if (br->back_num == 1) { + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + } + else { + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + r = SIZE_OP_CALL; + break; +#endif + + case NT_QTFR: + r = compile_length_quantifier_node(NQTFR(node), reg); + break; + + case NT_ENCLOSE: + r = compile_length_enclose_node(NENCLOSE(node), reg); + break; + + case NT_ANCHOR: + r = compile_length_anchor_node(NANCHOR(node), reg); + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_tree(Node* node, regex_t* reg) +{ + int n, type, len, pos, r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + do { + r = compile_tree(NCAR(node), reg); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + Node* x = node; + len = 0; + do { + len += compile_length_tree(NCAR(x), reg); + if (NCDR(x) != NULL) { + len += SIZE_OP_PUSH + SIZE_OP_JUMP; + } + } while (IS_NOT_NULL(x = NCDR(x))); + pos = reg->used + len; /* goal position */ + + do { + len = compile_length_tree(NCAR(node), reg); + if (IS_NOT_NULL(NCDR(node))) { + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + if (r) break; + } + r = compile_tree(NCAR(node), reg); + if (r) break; + if (IS_NOT_NULL(NCDR(node))) { + len = pos - (reg->used + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) break; + } + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_STR: + if (NSTRING_IS_RAW(node)) + r = compile_string_raw_node(NSTR(node), reg); + else + r = compile_string_node(node, reg); + break; + + case NT_CCLASS: + r = compile_cclass_node(NCCLASS(node), reg); + break; + + case NT_CTYPE: + { + int op; + + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + else op = OP_WORD; + break; + default: + return ONIGERR_TYPE_BUG; + break; + } + r = add_opcode(reg, op); + } + break; + + case NT_CANY: + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML); + else + r = add_opcode(reg, OP_ANYCHAR); + break; + + case NT_BREF: + { + BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; + } + else +#endif + if (br->back_num == 1) { + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREFN_IC); + if (r) return r; + r = add_mem_num(reg, n); + } + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + default: + r = add_opcode(reg, OP_BACKREFN); + if (r) return r; + r = add_mem_num(reg, n); + break; + } + } + } + else { + int i; + int* p; + + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + r = add_opcode(reg, OP_BACKREF_MULTI); + } + if (r) return r; + +#ifdef USE_BACKREF_WITH_LEVEL + add_bacref_mems: +#endif + r = add_length(reg, br->back_num); + if (r) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r) return r; + } + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + r = compile_call(NCALL(node), reg); + break; +#endif + + case NT_QTFR: + r = compile_quantifier_node(NQTFR(node), reg); + break; + + case NT_ENCLOSE: + r = compile_enclose_node(NENCLOSE(node), reg); + break; + + case NT_ANCHOR: + r = compile_anchor_node(NANCHOR(node), reg); + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); +#endif + break; + } + + return r; +} + +#ifdef USE_NAMED_GROUP + +static int +noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) +{ + int r = 0; + Node* node = *plink; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = noname_disable_map(&(NCAR(node)), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + { + Node** ptarget = &(NQTFR(node)->target); + Node* old = *ptarget; + r = noname_disable_map(ptarget, map, counter); + if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { + onig_reduce_nested_quantifier(node, *ptarget); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + if (IS_ENCLOSE_NAMED_GROUP(en)) { + (*counter)++; + map[en->regnum].new_val = *counter; + en->regnum = *counter; + r = noname_disable_map(&(en->target), map, counter); + } + else { + *plink = en->target; + en->target = NULL_NODE; + onig_node_free(node); + r = noname_disable_map(plink, map, counter); + } + } + else + r = noname_disable_map(&(en->target), map, counter); + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = noname_disable_map(&(an->target), map, counter); + break; + } + } + break; + + default: + break; + } + + return r; +} + +static int +renumber_node_backref(Node* node, GroupNumRemap* map) +{ + int i, pos, n, old_num; + int *backs; + BRefNode* bn = NBREF(node); + + if (! IS_BACKREF_NAME_REF(bn)) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + + old_num = bn->back_num; + if (IS_NULL(bn->back_dynamic)) + backs = bn->back_static; + else + backs = bn->back_dynamic; + + for (i = 0, pos = 0; i < old_num; i++) { + n = map[backs[i]].new_val; + if (n > 0) { + backs[pos] = n; + pos++; + } + } + + bn->back_num = pos; + return 0; +} + +static int +renumber_by_map(Node* node, GroupNumRemap* map) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = renumber_by_map(NCAR(node), map); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + case NT_QTFR: + r = renumber_by_map(NQTFR(node)->target, map); + break; + case NT_ENCLOSE: + r = renumber_by_map(NENCLOSE(node)->target, map); + break; + + case NT_BREF: + r = renumber_node_backref(node, map); + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = renumber_by_map(an->target, map); + break; + } + } + break; + + default: + break; + } + + return r; +} + +static int +numbered_ref_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = numbered_ref_check(NCAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + case NT_QTFR: + r = numbered_ref_check(NQTFR(node)->target); + break; + case NT_ENCLOSE: + r = numbered_ref_check(NENCLOSE(node)->target); + break; + + case NT_BREF: + if (! IS_BACKREF_NAME_REF(NBREF(node))) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + break; + + default: + break; + } + + return r; +} + +static int +disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) +{ + int r, i, pos, counter; + BitStatusType loc; + GroupNumRemap* map; + + map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); + CHECK_NULL_RETURN_MEMERR(map); + for (i = 1; i <= env->num_mem; i++) { + map[i].new_val = 0; + } + counter = 0; + r = noname_disable_map(root, map, &counter); + if (r != 0) return r; + + r = renumber_by_map(*root, map); + if (r != 0) return r; + + for (i = 1, pos = 1; i <= env->num_mem; i++) { + if (map[i].new_val > 0) { + SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; + pos++; + } + } + + loc = env->capture_history; + BIT_STATUS_CLEAR(env->capture_history); + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(loc, i)) { + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); + } + } + + env->num_mem = env->num_named; + reg->num_mem = env->num_named; + + return onig_renumber_name_table(reg, map); +} +#endif /* USE_NAMED_GROUP */ + +#ifdef USE_SUBEXP_CALL +static int +unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +{ + int i, offset; + EncloseNode* en; + AbsAddrType addr; + + for (i = 0; i < uslist->num; i++) { + en = NENCLOSE(uslist->us[i].target); + if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + addr = en->call_addr; + offset = uslist->us[i].offset; + + BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); + } + return 0; +} +#endif + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT +static int +quantifiers_memory_node_info(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + { + int v; + do { + v = quantifiers_memory_node_info(NCAR(node)); + if (v > r) r = v; + } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ + } + else + r = quantifiers_memory_node_info(NCALL(node)->target); + break; +#endif + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->upper != 0) { + r = quantifiers_memory_node_info(qn->target); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: + return NQ_TARGET_IS_EMPTY_MEM; + break; + + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = quantifiers_memory_node_info(en->target); + break; + default: + break; + } + } + break; + + case NT_BREF: + case NT_STR: + case NT_CTYPE: + case NT_CCLASS: + case NT_CANY: + case NT_ANCHOR: + default: + break; + } + + return r; +} +#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ + +static int +get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) +{ + OnigDistance tmin; + int r = 0; + + *min = 0; + switch (NTYPE(node)) { + case NT_BREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + if (br->state & NST_RECURSION) break; + + backs = BACKREFS_P(br); + if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[0]], min, env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[i]], &tmin, env); + if (r != 0) break; + if (*min > tmin) *min = tmin; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + EncloseNode* en = NENCLOSE(NCALL(node)->target); + if (IS_ENCLOSE_MIN_FIXED(en)) + *min = en->min_len; + } + else + r = get_min_match_length(NCALL(node)->target, min, env); + break; +#endif + + case NT_LIST: + do { + r = get_min_match_length(NCAR(node), &tmin, env); + if (r == 0) *min += tmin; + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + Node *x, *y; + y = node; + do { + x = NCAR(y); + r = get_min_match_length(x, &tmin, env); + if (r != 0) break; + if (y == node) *min = tmin; + else if (*min > tmin) *min = tmin; + } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + *min = sn->end - sn->s; + } + break; + + case NT_CTYPE: + *min = 1; + break; + + case NT_CCLASS: + case NT_CANY: + *min = 1; + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + + if (qn->lower > 0) { + r = get_min_match_length(qn->target, min, env); + if (r == 0) + *min = distance_multiply(*min, qn->lower); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_MIN_FIXED(en)) + *min = en->min_len; + else { + r = get_min_match_length(en->target, min, env); + if (r == 0) { + en->min_len = *min; + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_min_match_length(en->target, min, env); + break; + } + } + break; + + case NT_ANCHOR: + default: + break; + } + + return r; +} + +static int +get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) +{ + OnigDistance tmax; + int r = 0; + + *max = 0; + switch (NTYPE(node)) { + case NT_LIST: + do { + r = get_max_match_length(NCAR(node), &tmax, env); + if (r == 0) + *max = distance_add(*max, tmax); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + do { + r = get_max_match_length(NCAR(node), &tmax, env); + if (r == 0 && *max < tmax) *max = tmax; + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + *max = sn->end - sn->s; + } + break; + + case NT_CTYPE: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case NT_CCLASS: + case NT_CANY: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case NT_BREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + if (br->state & NST_RECURSION) { + *max = ONIG_INFINITE_DISTANCE; + break; + } + backs = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_max_match_length(nodes[backs[i]], &tmax, env); + if (r != 0) break; + if (*max < tmax) *max = tmax; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_max_match_length(NCALL(node)->target, max, env); + else + *max = ONIG_INFINITE_DISTANCE; + break; +#endif + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + + if (qn->upper != 0) { + r = get_max_match_length(qn->target, max, env); + if (r == 0 && *max != 0) { + if (! IS_REPEAT_INFINITE(qn->upper)) + *max = distance_multiply(*max, qn->upper); + else + *max = ONIG_INFINITE_DISTANCE; + } + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_MAX_FIXED(en)) + *max = en->max_len; + else { + r = get_max_match_length(en->target, max, env); + if (r == 0) { + en->max_len = *max; + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_max_match_length(en->target, max, env); + break; + } + } + break; + + case NT_ANCHOR: + default: + break; + } + + return r; +} + +#define GET_CHAR_LEN_VARLEN -1 +#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 + +/* fixed size pattern node only */ +static int +get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +{ + int tlen; + int r = 0; + + level++; + *len = 0; + switch (NTYPE(node)) { + case NT_LIST: + do { + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + if (r == 0) + *len = distance_add(*len, tlen); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + int tlen2; + int varlen = 0; + + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { + r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); + if (r == 0) { + if (tlen != tlen2) + varlen = 1; + } + } + if (r == 0) { + if (varlen != 0) { + if (level == 1) + r = GET_CHAR_LEN_TOP_ALT_VARLEN; + else + r = GET_CHAR_LEN_VARLEN; + } + else + *len = tlen; + } + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + UChar *s = sn->s; + while (s < sn->end) { + s += enclen(reg->enc, s, sn->end); + (*len)++; + } + } + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->lower == qn->upper) { + r = get_char_length_tree1(qn->target, reg, &tlen, level); + if (r == 0) + *len = distance_multiply(tlen, qn->lower); + } + else + r = GET_CHAR_LEN_VARLEN; + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_char_length_tree1(NCALL(node)->target, reg, len, level); + else + r = GET_CHAR_LEN_VARLEN; + break; +#endif + + case NT_CTYPE: + *len = 1; + break; + + case NT_CCLASS: + case NT_CANY: + *len = 1; + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_char_length_tree1(en->target, reg, len, level); + break; + default: + break; + } + } + break; + + case NT_ANCHOR: + break; + + default: + r = GET_CHAR_LEN_VARLEN; + break; + } + + return r; +} + +static int +get_char_length_tree(Node* node, regex_t* reg, int* len) +{ + return get_char_length_tree1(node, reg, len, 0); +} + +/* x is not included y ==> 1 : 0 */ +static int +is_not_included(Node* x, Node* y, regex_t* reg) +{ + int i, len; + OnigCodePoint code; + UChar *p, c; + int ytype; + + retry: + ytype = NTYPE(y); + switch (NTYPE(x)) { + case NT_CTYPE: + { + switch (ytype) { + case NT_CTYPE: + if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && + NCTYPE(y)->not != NCTYPE(x)->not) + return 1; + else + return 0; + break; + + case NT_CCLASS: + swap: + { + Node* tmp; + tmp = x; x = y; y = tmp; + goto retry; + } + break; + + case NT_STR: + goto swap; + break; + + default: + break; + } + } + break; + + case NT_CCLASS: + { + CClassNode* xc = NCCLASS(x); + switch (ytype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(y)->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (IS_CODE_SB_WORD(reg->enc, i)) return 0; + } + } + return 1; + } + return 0; + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! IS_CODE_SB_WORD(reg->enc, i)) { + if (!IS_NCCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } + } + } + return 1; + } + break; + + default: + break; + } + break; + + case NT_CCLASS: + { + int v; + CClassNode* yc = NCCLASS(y); + + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + v = BITSET_AT(xc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || + (v == 0 && IS_NCCLASS_NOT(xc))) { + v = BITSET_AT(yc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(yc)) || + (v == 0 && IS_NCCLASS_NOT(yc))) + return 0; + } + } + if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) + return 1; + return 0; + } + break; + + case NT_STR: + goto swap; + break; + + default: + break; + } + } + break; + + case NT_STR: + { + StrNode* xs = NSTR(x); + if (NSTRING_LEN(x) == 0) + break; + + c = *(xs->s); + switch (ytype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) + return NCTYPE(y)->not; + else + return !(NCTYPE(y)->not); + break; + default: + break; + } + break; + + case NT_CCLASS: + { + CClassNode* cc = NCCLASS(y); + + code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, + xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); + return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); + } + break; + + case NT_STR: + { + UChar *q; + StrNode* ys = NSTR(y); + len = NSTRING_LEN(x); + if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); + if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + /* tiny version */ + return 0; + } + else { + for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { + if (*p != *q) return 1; + } + } + } + break; + + default: + break; + } + } + break; + + default: + break; + } + + return 0; +} + +static Node* +get_head_value_node(Node* node, int exact, regex_t* reg) +{ + Node* n = NULL_NODE; + + switch (NTYPE(node)) { + case NT_BREF: + case NT_ALT: + case NT_CANY: +#ifdef USE_SUBEXP_CALL + case NT_CALL: +#endif + break; + + case NT_CTYPE: + case NT_CCLASS: + if (exact == 0) { + n = node; + } + break; + + case NT_LIST: + n = get_head_value_node(NCAR(node), exact, reg); + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + + if (sn->end <= sn->s) + break; + + if (exact != 0 && + !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { + } + else { + n = node; + } + } + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->lower > 0) { + if (IS_NOT_NULL(qn->head_exact)) + n = qn->head_exact; + else + n = get_head_value_node(qn->target, exact, reg); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType options = reg->options; + + reg->options = NENCLOSE(node)->option; + n = get_head_value_node(NENCLOSE(node)->target, exact, reg); + reg->options = options; + } + break; + + case ENCLOSE_MEMORY: + case ENCLOSE_STOP_BACKTRACK: + n = get_head_value_node(en->target, exact, reg); + break; + } + } + break; + + case NT_ANCHOR: + if (NANCHOR(node)->type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node)->target, exact, reg); + break; + + default: + break; + } + + return n; +} + +static int +check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) +{ + int type, r = 0; + + type = NTYPE(node); + if ((NTYPE2BIT(type) & type_mask) == 0) + return 1; + + switch (type) { + case NT_LIST: + case NT_ALT: + do { + r = check_type_tree(NCAR(node), type_mask, enclose_mask, + anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, + anchor_mask); + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + if ((en->type & enclose_mask) == 0) + return 1; + + r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); + } + break; + + case NT_ANCHOR: + type = NANCHOR(node)->type; + if ((type & anchor_mask) == 0) + return 1; + + if (NANCHOR(node)->target) + r = check_type_tree(NANCHOR(node)->target, + type_mask, enclose_mask, anchor_mask); + break; + + default: + break; + } + return r; +} + +#ifdef USE_SUBEXP_CALL + +#define RECURSION_EXIST 1 +#define RECURSION_INFINITE 2 + +static int +subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node *x; + OnigDistance min; + int ret; + + x = node; + do { + ret = subexp_inf_recursive_check(NCAR(x), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r |= ret; + if (head) { + ret = get_min_match_length(NCAR(x), &min, env); + if (ret != 0) return ret; + if (min != 0) head = 0; + } + } while (IS_NOT_NULL(x = NCDR(x))); + } + break; + + case NT_ALT: + { + int ret; + r = RECURSION_EXIST; + do { + ret = subexp_inf_recursive_check(NCAR(node), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r &= ret; + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); + if (r == RECURSION_EXIST) { + if (NQTFR(node)->lower == 0) r = 0; + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check(an->target, env, head); + break; + } + } + break; + + case NT_CALL: + r = subexp_inf_recursive_check(NCALL(node)->target, env, head); + break; + + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) + return 0; + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); + else { + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + +static int +subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + do { + r = subexp_inf_recursive_check_trav(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check_trav(an->target, env); + break; + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + if (IS_ENCLOSE_RECURSION(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_inf_recursive_check(en->target, env, 1); + if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + } + r = subexp_inf_recursive_check_trav(en->target, env); + } + + break; + + default: + break; + } + + return r; +} + +static int +subexp_recursive_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r |= subexp_recursive_check(NCAR(node)); + } while (IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = subexp_recursive_check(NQTFR(node)->target); + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check(an->target); + break; + } + } + break; + + case NT_CALL: + r = subexp_recursive_check(NCALL(node)->target); + if (r != 0) SET_CALL_RECURSION(node); + break; + + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) + return 0; + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + return 1; /* recursion */ + else { + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NENCLOSE(node)->target); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + + +static int +subexp_recursive_check_trav(Node* node, ScanEnv* env) +{ +#define FOUND_CALLED_NODE 1 + + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + { + int ret; + do { + ret = subexp_recursive_check_trav(NCAR(node), env); + if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; + else if (ret < 0) return ret; + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + r = subexp_recursive_check_trav(NQTFR(node)->target, env); + if (NQTFR(node)->upper == 0) { + if (r == FOUND_CALLED_NODE) + NQTFR(node)->is_refered = 1; + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check_trav(an->target, env); + break; + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + if (! IS_ENCLOSE_RECURSION(en)) { + if (IS_ENCLOSE_CALLED(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_recursive_check(en->target); + if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + } + } + r = subexp_recursive_check_trav(en->target, env); + if (IS_ENCLOSE_CALLED(en)) + r |= FOUND_CALLED_NODE; + } + break; + + default: + break; + } + + return r; +} + +static int +setup_subexp_call(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + do { + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + do { + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = setup_subexp_call(NQTFR(node)->target, env); + break; + case NT_ENCLOSE: + r = setup_subexp_call(NENCLOSE(node)->target, env); + break; + + case NT_CALL: + { + CallNode* cn = NCALL(node); + Node** nodes = SCANENV_MEM_NODES(env); + + if (cn->group_num != 0) { + int gnum = cn->group_num; + +#ifdef USE_NAMED_GROUP + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } +#endif + if (gnum > env->num_mem) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + +#ifdef USE_NAMED_GROUP + set_call_attr: +#endif + cn->target = nodes[cn->group_num]; + if (IS_NULL(cn->target)) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + SET_ENCLOSE_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); + cn->unset_addr_list = env->unset_addr_list; + } +#ifdef USE_NAMED_GROUP + else { + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, + &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } + } +#endif + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = setup_subexp_call(an->target, env); + break; + } + } + break; + + default: + break; + } + + return r; +} +#endif + +/* divide different length alternatives in look-behind. + (?<=A|B) ==> (?<=A)|(?<=B) + (? (?type; + + head = an->target; + np = NCAR(head); + swap_node(node, head); + NCAR(node) = head; + NANCHOR(head)->target = np; + + np = node; + while ((np = NCDR(np)) != NULL_NODE) { + insert_node = onig_node_new_anchor(anc_type); + CHECK_NULL_RETURN_MEMERR(insert_node); + NANCHOR(insert_node)->target = NCAR(np); + NCAR(np) = insert_node; + } + + if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { + np = node; + do { + SET_NTYPE(np, NT_LIST); /* alt -> list */ + } while ((np = NCDR(np)) != NULL_NODE); + } + return 0; +} + +static int +setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ + int r, len; + AnchorNode* an = NANCHOR(node); + + r = get_char_length_tree(an->target, reg, &len); + if (r == 0) + an->char_len = len; + else if (r == GET_CHAR_LEN_VARLEN) + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) + r = divide_look_behind_alternatives(node); + else + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + + return r; +} + +static int +next_setup(Node* node, Node* next_node, regex_t* reg) +{ + int type; + + retry: + type = NTYPE(node); + if (type == NT_QTFR) { + QtfrNode* qn = NQTFR(node); + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { +#ifdef USE_QTFR_PEEK_NEXT + Node* n = get_head_value_node(next_node, 1, reg); + /* '\0': for UTF-16BE etc... */ + if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { + qn->next_head_exact = n; + } +#endif + /* automatic posseivation a*b ==> (?>a*)b */ + if (qn->lower <= 1) { + int ttype = NTYPE(qn->target); + if (IS_NODE_TYPE_SIMPLE(ttype)) { + Node *x, *y; + x = get_head_value_node(qn->target, 0, reg); + if (IS_NOT_NULL(x)) { + y = get_head_value_node(next_node, 0, reg); + if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + swap_node(node, en); + NENCLOSE(node)->target = en; + } + } + } + } + } + } + else if (type == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + node = en->target; + goto retry; + } + } + return 0; +} + + +static int +update_string_node_case_fold(regex_t* reg, Node *node) +{ + UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *sbuf, *ebuf, *sp; + int r, i, len, sbuf_size; + StrNode* sn = NSTR(node); + + end = sn->end; + sbuf_size = (end - sn->s) * 2; + sbuf = (UChar* )xmalloc(sbuf_size); + CHECK_NULL_RETURN_MEMERR(sbuf); + ebuf = sbuf + sbuf_size; + + sp = sbuf; + p = sn->s; + while (p < end) { + len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); + q = buf; + for (i = 0; i < len; i++) { + if (sp >= ebuf) { + sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); + CHECK_NULL_RETURN_MEMERR(sbuf); + sp = sbuf + sbuf_size; + sbuf_size *= 2; + ebuf = sbuf + sbuf_size; + } + + *sp++ = buf[i]; + } + } + + r = onig_node_str_set(node, sbuf, sp); + if (r != 0) { + xfree(sbuf); + return r; + } + + xfree(sbuf); + return 0; +} + +static int +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, + regex_t* reg) +{ + int r; + Node *node; + + node = onig_node_new_str(s, end); + if (IS_NULL(node)) return ONIGERR_MEMORY; + + r = update_string_node_case_fold(reg, node); + if (r != 0) { + onig_node_free(node); + return r; + } + + NSTRING_SET_AMBIG(node); + NSTRING_SET_DONT_GET_OPT_INFO(node); + *rnode = node; + return 0; +} + +static int +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], + UChar *p, int slen, UChar *end, + regex_t* reg, Node **rnode) +{ + int r, i, j, len, varlen; + Node *anode, *var_anode, *snode, *xnode, *an; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + + *rnode = var_anode = NULL_NODE; + + varlen = 0; + for (i = 0; i < item_num; i++) { + if (items[i].byte_len != slen) { + varlen = 1; + break; + } + } + + if (varlen != 0) { + *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(var_anode)) return ONIGERR_MEMORY; + + xnode = onig_node_new_list(NULL, NULL); + if (IS_NULL(xnode)) goto mem_err; + NCAR(var_anode) = xnode; + + anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) goto mem_err; + NCAR(xnode) = anode; + } + else { + *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) return ONIGERR_MEMORY; + } + + snode = onig_node_new_str(p, p + slen); + if (IS_NULL(snode)) goto mem_err; + + NCAR(anode) = snode; + + for (i = 0; i < item_num; i++) { + snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + + for (j = 0; j < items[i].code_len; j++) { + len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); + if (len < 0) { + r = len; + goto mem_err2; + } + + r = onig_node_str_cat(snode, buf, buf + len); + if (r != 0) goto mem_err2; + } + + an = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(an)) { + goto mem_err2; + } + + if (items[i].byte_len != slen) { + Node *rem; + UChar *q = p + items[i].byte_len; + + if (q < end) { + r = expand_case_fold_make_rem_string(&rem, q, end, reg); + if (r != 0) { + onig_node_free(an); + goto mem_err2; + } + + xnode = onig_node_list_add(NULL_NODE, snode); + if (IS_NULL(xnode)) { + onig_node_free(an); + onig_node_free(rem); + goto mem_err2; + } + if (IS_NULL(onig_node_list_add(xnode, rem))) { + onig_node_free(an); + onig_node_free(xnode); + onig_node_free(rem); + goto mem_err; + } + + NCAR(an) = xnode; + } + else { + NCAR(an) = snode; + } + + NCDR(var_anode) = an; + var_anode = an; + } + else { + NCAR(an) = snode; + NCDR(anode) = an; + anode = an; + } + } + + return varlen; + + mem_err2: + onig_node_free(snode); + + mem_err: + onig_node_free(*rnode); + + return ONIGERR_MEMORY; +} + +static int +expand_case_fold_string(Node* node, regex_t* reg) +{ +#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 + + int r, n, len, alt_num; + UChar *start, *end, *p; + Node *top_root, *root, *snode, *prev_node; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + StrNode* sn = NSTR(node); + + if (NSTRING_IS_AMBIG(node)) return 0; + + start = sn->s; + end = sn->end; + if (start >= end) return 0; + + r = 0; + top_root = root = prev_node = snode = NULL_NODE; + alt_num = 1; + p = start; + while (p < end) { + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, + p, end, items); + if (n < 0) { + r = n; + goto err; + } + + len = enclen(reg->enc, p, end); + + if (n == 0) { + if (IS_NULL(snode)) { + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + prev_node = snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, snode))) { + onig_node_free(snode); + goto mem_err; + } + } + } + + r = onig_node_str_cat(snode, p, p + len); + if (r != 0) goto err; + } + else { + alt_num *= (n + 1); + if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); + if (r < 0) goto mem_err; + if (r == 1) { + if (IS_NULL(root)) { + top_root = prev_node; + } + else { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + + root = NCAR(prev_node); + } + else { /* r == 0 */ + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + } + + snode = NULL_NODE; + } + + p += len; + } + + if (p < end) { + Node *srem; + + r = expand_case_fold_make_rem_string(&srem, p, end, reg); + if (r != 0) goto mem_err; + + if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(srem); + onig_node_free(prev_node); + goto mem_err; + } + } + + if (IS_NULL(root)) { + prev_node = srem; + } + else { + if (IS_NULL(onig_node_list_add(root, srem))) { + onig_node_free(srem); + goto mem_err; + } + } + } + + /* ending */ + top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); + swap_node(node, top_root); + onig_node_free(top_root); + return 0; + + mem_err: + r = ONIGERR_MEMORY; + + err: + onig_node_free(top_root); + return r; +} + + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define CEC_THRES_NUM_BIG_REPEAT 512 +#define CEC_INFINITE_NUM 0x7fffffff + +#define CEC_IN_INFINITE_REPEAT (1<<0) +#define CEC_IN_FINITE_REPEAT (1<<1) +#define CEC_CONT_BIG_REPEAT (1<<2) + +static int +setup_comb_exp_check(Node* node, int state, ScanEnv* env) +{ + int type; + int r = state; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_comb_exp_check(NCAR(node), r, env); + prev = NCAR(node); + } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_ALT: + { + int ret; + do { + ret = setup_comb_exp_check(NCAR(node), state, env); + r |= ret; + } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + { + int child_state = state; + int add_state = 0; + QtfrNode* qn = NQTFR(node); + Node* target = qn->target; + int var_num; + + if (! IS_REPEAT_INFINITE(qn->upper)) { + if (qn->upper > 1) { + /* {0,1}, {1,1} are allowed */ + child_state |= CEC_IN_FINITE_REPEAT; + + /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ + if (env->backrefed_mem == 0) { + if (NTYPE(qn->target) == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(qn->target); + if (en->type == ENCLOSE_MEMORY) { + if (NTYPE(en->target) == NT_QTFR) { + QtfrNode* q = NQTFR(en->target); + if (IS_REPEAT_INFINITE(q->upper) + && q->greedy == qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + if (qn->upper == 1) + child_state = state; + } + } + } + } + } + } + } + + if (state & CEC_IN_FINITE_REPEAT) { + qn->comb_exp_check_num = -1; + } + else { + if (IS_REPEAT_INFINITE(qn->upper)) { + var_num = CEC_INFINITE_NUM; + child_state |= CEC_IN_INFINITE_REPEAT; + } + else { + var_num = qn->upper - qn->lower; + } + + if (var_num >= CEC_THRES_NUM_BIG_REPEAT) + add_state |= CEC_CONT_BIG_REPEAT; + + if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || + ((state & CEC_CONT_BIG_REPEAT) != 0 && + var_num >= CEC_THRES_NUM_BIG_REPEAT)) { + if (qn->comb_exp_check_num == 0) { + env->num_comb_exp_check++; + qn->comb_exp_check_num = env->num_comb_exp_check; + if (env->curr_max_regnum > env->comb_exp_max_regnum) + env->comb_exp_max_regnum = env->curr_max_regnum; + } + } + } + + r = setup_comb_exp_check(target, child_state, env); + r |= add_state; + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_MEMORY: + { + if (env->curr_max_regnum < en->regnum) + env->curr_max_regnum = en->regnum; + + r = setup_comb_exp_check(en->target, state, env); + } + break; + + default: + r = setup_comb_exp_check(en->target, state, env); + break; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) + env->has_recursion = 1; + else + r = setup_comb_exp_check(NCALL(node)->target, state, env); + break; +#endif + + default: + break; + } + + return r; +} +#endif + +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) + +/* setup_tree does the following work. + 1. check empty loop. (set qn->target_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ +static int +setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_tree(NCAR(node), reg, state, env); + if (IS_NOT_NULL(prev) && r == 0) { + r = next_setup(prev, NCAR(node), reg); + } + prev = NCAR(node); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_ALT: + do { + r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_CCLASS: + break; + + case NT_STR: + if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + r = expand_case_fold_string(node, reg); + } + break; + + case NT_CTYPE: + case NT_CANY: + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + break; +#endif + + case NT_BREF: + { + int i; + int* p; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + p = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); + BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } +#endif + SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + } + } + break; + + case NT_QTFR: + { + OnigDistance d; + QtfrNode* qn = NQTFR(node); + Node* target = qn->target; + + if ((state & IN_REPEAT) != 0) { + qn->state |= NST_IN_REPEAT; + } + + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { + r = get_min_match_length(target, &d, env); + if (r) break; + if (d == 0) { + qn->target_empty_info = NQ_TARGET_IS_EMPTY; +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + r = quantifiers_memory_node_info(target); + if (r < 0) break; + if (r > 0) { + qn->target_empty_info = r; + } +#endif + } + } + + state |= IN_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + r = setup_tree(target, reg, state, env); + if (r) break; + + /* expand string */ +#define EXPAND_STRING_MAX_LENGTH 100 + if (NTYPE(target) == NT_STR) { + if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && + qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int len = NSTRING_LEN(target); + StrNode* sn = NSTR(target); + + if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int i, n = qn->lower; + onig_node_conv_to_str_node(node, NSTR(target)->flag); + for (i = 0; i < n; i++) { + r = onig_node_str_cat(node, sn->s, sn->end); + if (r) break; + } + onig_node_free(target); + break; /* break case NT_QTFR: */ + } + } + } + +#ifdef USE_OP_PUSH_OR_JUMP_EXACT + if (qn->greedy && (qn->target_empty_info != 0)) { + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_NOT_NULL(tqn->head_exact)) { + qn->head_exact = tqn->head_exact; + tqn->head_exact = NULL; + } + } + else { + qn->head_exact = get_head_value_node(qn->target, 1, reg); + } + } +#endif + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType options = reg->options; + reg->options = NENCLOSE(node)->option; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); + reg->options = options; + } + break; + + case ENCLOSE_MEMORY: + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ + } + r = setup_tree(en->target, reg, state, env); + break; + + case ENCLOSE_STOP_BACKTRACK: + { + Node* target = en->target; + r = setup_tree(target, reg, state, env); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && + tqn->greedy != 0) { /* (?>a*), a*+ etc... */ + int qtype = NTYPE(tqn->target); + if (IS_NODE_TYPE_SIMPLE(qtype)) + SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + } + } + } + break; + } + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + + switch (an->type) { + case ANCHOR_PREC_READ: + r = setup_tree(an->target, reg, state, env); + break; + case ANCHOR_PREC_READ_NOT: + r = setup_tree(an->target, reg, (state | IN_NOT), env); + break; + +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB \ + ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ + BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) + +#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) +#define ALLOWED_ENCLOSE_IN_LB_NOT 0 + +#define ALLOWED_ANCHOR_IN_LB \ +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +#define ALLOWED_ANCHOR_IN_LB_NOT \ +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) + + case ANCHOR_LOOK_BEHIND: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, state, env); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, (state | IN_NOT), env); + } + break; + } + } + break; + + default: + break; + } + + return r; +} + +/* set skip map for Boyer-Moor search */ +static int +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + UChar skip[], int** int_skip) +{ + int i, len; + + len = end - s; + if (len < ONIG_CHAR_TABLE_SIZE) { + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; + + for (i = 0; i < len - 1; i++) + skip[s[i]] = len - 1 - i; + } + else { + if (IS_NULL(*int_skip)) { + *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; + } + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; + + for (i = 0; i < len - 1; i++) + (*int_skip)[s[i]] = len - 1 - i; + } + return 0; +} + +#define OPT_EXACT_MAXLEN 24 + +typedef struct { + OnigDistance min; /* min byte length */ + OnigDistance max; /* max byte length */ +} MinMaxLen; + +typedef struct { + MinMaxLen mmd; + OnigEncoding enc; + OnigOptionType options; + OnigCaseFoldType case_fold_flag; + ScanEnv* scan_env; +} OptEnv; + +typedef struct { + int left_anchor; + int right_anchor; +} OptAncInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int reach_end; + int ignore_case; + int len; + UChar s[OPT_EXACT_MAXLEN]; +} OptExactInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int value; /* weighted value */ + UChar map[ONIG_CHAR_TABLE_SIZE]; +} OptMapInfo; + +typedef struct { + MinMaxLen len; + + OptAncInfo anc; + OptExactInfo exb; /* boundary */ + OptExactInfo exm; /* middle */ + OptExactInfo expr; /* prec read (?=...) */ + + OptMapInfo map; /* boundary */ +} NodeOptInfo; + + +static int +map_position_value(OnigEncoding enc, int i) +{ + static const short int ByteValTable[] = { + 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 + }; + + if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { + if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) + return 20; + else + return (int )ByteValTable[i]; + } + else + return 4; /* Take it easy. */ +} + +static int +distance_value(MinMaxLen* mm) +{ + /* 1000 / (min-max-dist + 1) */ + static const short int dist_vals[] = { + 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, + 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, + 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, + 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, + 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, + 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 + }; + + int d; + + if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + + d = mm->max - mm->min; + if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) + /* return dist_vals[d] * 16 / (mm->min + 12); */ + return (int )dist_vals[d]; + else + return 1; +} + +static int +comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +{ + if (v2 <= 0) return -1; + if (v1 <= 0) return 1; + + v1 *= distance_value(d1); + v2 *= distance_value(d2); + + if (v2 > v1) return 1; + if (v2 < v1) return -1; + + if (d2->min < d1->min) return 1; + if (d2->min > d1->min) return -1; + return 0; +} + +static int +is_equal_mml(MinMaxLen* a, MinMaxLen* b) +{ + return (a->min == b->min && a->max == b->max) ? 1 : 0; +} + + +static void +set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) +{ + mml->min = min; + mml->max = max; +} + +static void +clear_mml(MinMaxLen* mml) +{ + mml->min = mml->max = 0; +} + +static void +copy_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = from->min; + to->max = from->max; +} + +static void +add_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = distance_add(to->min, from->min); + to->max = distance_add(to->max, from->max); +} + +static void +alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +{ + if (to->min > from->min) to->min = from->min; + if (to->max < from->max) to->max = from->max; +} + +static void +copy_opt_env(OptEnv* to, OptEnv* from) +{ + *to = *from; +} + +static void +clear_opt_anc_info(OptAncInfo* anc) +{ + anc->left_anchor = 0; + anc->right_anchor = 0; +} + +static void +copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +{ + *to = *from; +} + +static void +concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, + OnigDistance left_len, OnigDistance right_len) +{ + clear_opt_anc_info(to); + + to->left_anchor = left->left_anchor; + if (left_len == 0) { + to->left_anchor |= right->left_anchor; + } + + to->right_anchor = right->right_anchor; + if (right_len == 0) { + to->right_anchor |= left->right_anchor; + } +} + +static int +is_left_anchor(int anc) +{ + if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || + anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || + anc == ANCHOR_PREC_READ_NOT) + return 0; + + return 1; +} + +static int +is_set_opt_anc_info(OptAncInfo* to, int anc) +{ + if ((to->left_anchor & anc) != 0) return 1; + + return ((to->right_anchor & anc) != 0 ? 1 : 0); +} + +static void +add_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor |= anc; + else + to->right_anchor |= anc; +} + +static void +remove_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor &= ~anc; + else + to->right_anchor &= ~anc; +} + +static void +alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +{ + to->left_anchor &= add->left_anchor; + to->right_anchor &= add->right_anchor; +} + +static int +is_full_opt_exact_info(OptExactInfo* ex) +{ + return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); +} + +static void +clear_opt_exact_info(OptExactInfo* ex) +{ + clear_mml(&ex->mmd); + clear_opt_anc_info(&ex->anc); + ex->reach_end = 0; + ex->ignore_case = 0; + ex->len = 0; + ex->s[0] = '\0'; +} + +static void +copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +{ + *to = *from; +} + +static void +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) +{ + int i, j, len; + UChar *p, *end; + OptAncInfo tanc; + + if (! to->ignore_case && add->ignore_case) { + if (to->len >= add->len) return ; /* avoid */ + + to->ignore_case = 1; + } + + p = add->s; + end = p + add->len; + for (i = to->len; p < end; ) { + len = enclen(enc, p, end); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; + to->reach_end = (p == end ? add->reach_end : 0); + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); + if (! to->reach_end) tanc.right_anchor = 0; + copy_opt_anc_info(&to->anc, &tanc); +} + +static void +concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) +{ + int i, j, len; + UChar *p; + + for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { + len = enclen(enc, p, end); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; +} + +static void +alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +{ + int i, j, len; + + if (add->len == 0 || to->len == 0) { + clear_opt_exact_info(to); + return ; + } + + if (! is_equal_mml(&to->mmd, &add->mmd)) { + clear_opt_exact_info(to); + return ; + } + + for (i = 0; i < to->len && i < add->len; ) { + if (to->s[i] != add->s[i]) break; + len = enclen(env->enc, to->s + i, to->s + to->len); + + for (j = 1; j < len; j++) { + if (to->s[i+j] != add->s[i+j]) break; + } + if (j < len) break; + i += len; + } + + if (! add->reach_end || i < add->len || i < to->len) { + to->reach_end = 0; + } + to->len = i; + to->ignore_case |= add->ignore_case; + + alt_merge_opt_anc_info(&to->anc, &add->anc); + if (! to->reach_end) to->anc.right_anchor = 0; +} + +static void +select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +{ + int v1, v2; + + v1 = now->len; + v2 = alt->len; + + if (v2 == 0) { + return ; + } + else if (v1 == 0) { + copy_opt_exact_info(now, alt); + return ; + } + else if (v1 <= 2 && v2 <= 2) { + /* ByteValTable[x] is big value --> low price */ + v2 = map_position_value(enc, now->s[0]); + v1 = map_position_value(enc, alt->s[0]); + + if (now->len > 1) v1 += 5; + if (alt->len > 1) v2 += 5; + } + + if (now->ignore_case == 0) v1 *= 2; + if (alt->ignore_case == 0) v2 *= 2; + + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_exact_info(now, alt); +} + +static void +clear_opt_map_info(OptMapInfo* map) +{ + static const OptMapInfo clean_info = { + {0, 0}, {0, 0}, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + } + }; + + xmemcpy(map, &clean_info, sizeof(OptMapInfo)); +} + +static void +copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +{ + *to = *from; +} + +static void +add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +{ + if (map->map[c] == 0) { + map->map[c] = 1; + map->value += map_position_value(enc, c); + } +} + +static int +add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, + OnigEncoding enc, OnigCaseFoldType case_fold_flag) +{ + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int i, n; + + add_char_opt_map_info(map, p[0], enc); + + case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); + if (n < 0) return n; + + for (i = 0; i < n; i++) { + ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); + add_char_opt_map_info(map, buf[0], enc); + } + + return 0; +} + +static void +select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +{ + const int z = 1<<15; /* 32768: something big value */ + + int v1, v2; + + if (alt->value == 0) return ; + if (now->value == 0) { + copy_opt_map_info(now, alt); + return ; + } + + v1 = z / now->value; + v2 = z / alt->value; + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_map_info(now, alt); +} + +static int +comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +{ +#define COMP_EM_BASE 20 + int ve, vm; + + if (m->value <= 0) return -1; + + ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + vm = COMP_EM_BASE * 5 * 2 / m->value; + return comp_distance_value(&e->mmd, &m->mmd, ve, vm); +} + +static void +alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +{ + int i, val; + + /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ + if (to->value == 0) return ; + if (add->value == 0 || to->mmd.max < add->mmd.min) { + clear_opt_map_info(to); + return ; + } + + alt_merge_mml(&to->mmd, &add->mmd); + + val = 0; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (add->map[i]) + to->map[i] = 1; + + if (to->map[i]) + val += map_position_value(enc, i); + } + to->value = val; + + alt_merge_opt_anc_info(&to->anc, &add->anc); +} + +static void +set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +{ + copy_mml(&(opt->exb.mmd), mmd); + copy_mml(&(opt->expr.mmd), mmd); + copy_mml(&(opt->map.mmd), mmd); +} + +static void +clear_node_opt_info(NodeOptInfo* opt) +{ + clear_mml(&opt->len); + clear_opt_anc_info(&opt->anc); + clear_opt_exact_info(&opt->exb); + clear_opt_exact_info(&opt->exm); + clear_opt_exact_info(&opt->expr); + clear_opt_map_info(&opt->map); +} + +static void +copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +{ + *to = *from; +} + +static void +concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +{ + int exb_reach, exm_reach; + OptAncInfo tanc; + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); + copy_opt_anc_info(&to->anc, &tanc); + + if (add->exb.len > 0 && to->len.max == 0) { + concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, + to->len.max, add->len.max); + copy_opt_anc_info(&add->exb.anc, &tanc); + } + + if (add->map.value > 0 && to->len.max == 0) { + if (add->map.mmd.max == 0) + add->map.anc.left_anchor |= to->anc.left_anchor; + } + + exb_reach = to->exb.reach_end; + exm_reach = to->exm.reach_end; + + if (add->len.max != 0) + to->exb.reach_end = to->exm.reach_end = 0; + + if (add->exb.len > 0) { + if (exb_reach) { + concat_opt_exact_info(&to->exb, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + else if (exm_reach) { + concat_opt_exact_info(&to->exm, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + } + select_opt_exact_info(enc, &to->exm, &add->exb); + select_opt_exact_info(enc, &to->exm, &add->exm); + + if (to->expr.len > 0) { + if (add->len.max > 0) { + if (to->expr.len > (int )add->len.max) + to->expr.len = add->len.max; + + if (to->expr.mmd.max == 0) + select_opt_exact_info(enc, &to->exb, &to->expr); + else + select_opt_exact_info(enc, &to->exm, &to->expr); + } + } + else if (add->expr.len > 0) { + copy_opt_exact_info(&to->expr, &add->expr); + } + + select_opt_map_info(&to->map, &add->map); + + add_mml(&to->len, &add->len); +} + +static void +alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +{ + alt_merge_opt_anc_info (&to->anc, &add->anc); + alt_merge_opt_exact_info(&to->exb, &add->exb, env); + alt_merge_opt_exact_info(&to->exm, &add->exm, env); + alt_merge_opt_exact_info(&to->expr, &add->expr, env); + alt_merge_opt_map_info(env->enc, &to->map, &add->map); + + alt_merge_mml(&to->len, &add->len); +} + + +#define MAX_NODE_OPT_INFO_REF_COUNT 5 + +static int +optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +{ + int type; + int r = 0; + + clear_node_opt_info(opt); + set_bound_node_opt_info(opt, &env->mmd); + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + OptEnv nenv; + NodeOptInfo nopt; + Node* nd = node; + + copy_opt_env(&nenv, env); + do { + r = optimize_node_left(NCAR(nd), &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + } + } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); + } + break; + + case NT_ALT: + { + NodeOptInfo nopt; + Node* nd = node; + + do { + r = optimize_node_left(NCAR(nd), &nopt, env); + if (r == 0) { + if (nd == node) copy_node_opt_info(opt, &nopt); + else alt_merge_node_opt_info(opt, &nopt, env); + } + } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + int slen = sn->end - sn->s; + int is_raw = NSTRING_IS_RAW(node); + + if (! NSTRING_IS_AMBIG(node)) { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + NSTRING_IS_RAW(node), env->enc); + if (slen > 0) { + add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + } + set_mml(&opt->len, slen, slen); + } + else { + int max; + + if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + int n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + } + else { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->case_fold_flag); + if (r != 0) break; + } + + max = slen; + } + + set_mml(&opt->len, slen, max); + } + + if (opt->exb.len == slen) + opt->exb.reach_end = 1; + } + break; + + case NT_CCLASS: + { + int i, z; + CClassNode* cc = NCCLASS(node); + + /* no need to check ignore case. (setted in setup_tree()) */ + + if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + set_mml(&opt->len, min, max); + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = BITSET_AT(cc->bs, i); + if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + set_mml(&opt->len, 1, 1); + } + } + break; + + case NT_CTYPE: + { + int i, min, max; + + max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + if (max == 1) { + min = 1; + + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + break; + } + } + else { + min = ONIGENC_MBC_MINLEN(env->enc); + } + set_mml(&opt->len, min, max); + } + break; + + case NT_CANY: + { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + set_mml(&opt->len, min, max); + } + break; + + case NT_ANCHOR: + switch (NANCHOR(node)->type) { + case ANCHOR_BEGIN_BUF: + case ANCHOR_BEGIN_POSITION: + case ANCHOR_BEGIN_LINE: + case ANCHOR_END_BUF: + case ANCHOR_SEMI_END_BUF: + case ANCHOR_END_LINE: + add_opt_anc_info(&opt->anc, NANCHOR(node)->type); + break; + + case ANCHOR_PREC_READ: + { + NodeOptInfo nopt; + + r = optimize_node_left(NANCHOR(node)->target, &nopt, env); + if (r == 0) { + if (nopt.exb.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exb); + else if (nopt.exm.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exm); + + opt->expr.reach_end = 0; + + if (nopt.map.value > 0) + copy_opt_map_info(&opt->map, &nopt.map); + } + } + break; + + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ + case ANCHOR_LOOK_BEHIND_NOT: + break; + } + break; + + case NT_BREF: + { + int i; + int* backs; + OnigDistance min, max, tmin, tmax; + Node** nodes = SCANENV_MEM_NODES(env->scan_env); + BRefNode* br = NBREF(node); + + if (br->state & NST_RECURSION) { + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + break; + } + backs = BACKREFS_P(br); + r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); + if (r != 0) break; + if (min > tmin) min = tmin; + if (max < tmax) max = tmax; + } + if (r == 0) set_mml(&opt->len, min, max); + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + else { + OnigOptionType save = env->options; + env->options = NENCLOSE(NCALL(node)->target)->option; + r = optimize_node_left(NCALL(node)->target, opt, env); + env->options = save; + } + break; +#endif + + case NT_QTFR: + { + int i; + OnigDistance min, max; + NodeOptInfo nopt; + QtfrNode* qn = NQTFR(node); + + r = optimize_node_left(qn->target, &nopt, env); + if (r) break; + + if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { + if (env->mmd.max == 0 && + NTYPE(qn->target) == NT_CANY && qn->greedy) { + if (IS_MULTILINE(env->options)) + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); + else + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + } + } + else { + if (qn->lower > 0) { + copy_node_opt_info(opt, &nopt); + if (nopt.exb.len > 0) { + if (nopt.exb.reach_end) { + for (i = 2; i <= qn->lower && + ! is_full_opt_exact_info(&opt->exb); i++) { + concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); + } + if (i < qn->lower) { + opt->exb.reach_end = 0; + } + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } + } + + min = distance_multiply(nopt.len.min, qn->lower); + if (IS_REPEAT_INFINITE(qn->upper)) + max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + else + max = distance_multiply(nopt.len.max, qn->upper); + + set_mml(&opt->len, min, max); + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType save = env->options; + + env->options = en->option; + r = optimize_node_left(en->target, opt, env); + env->options = save; + } + break; + + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + en->opt_count++; + if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { + OnigDistance min, max; + + min = 0; + max = ONIG_INFINITE_DISTANCE; + if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; + if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; + set_mml(&opt->len, min, max); + } + else +#endif + { + r = optimize_node_left(en->target, opt, env); + + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + } + } + break; + + case ENCLOSE_STOP_BACKTRACK: + r = optimize_node_left(en->target, opt, env); + break; + } + } + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "optimize_node_left: undefined node type %d\n", + NTYPE(node)); +#endif + r = ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +{ + int r; + + if (e->len == 0) return 0; + + if (e->ignore_case) { + reg->exact = (UChar* )xmalloc(e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); + xmemcpy(reg->exact, e->s, e->len); + reg->exact_end = reg->exact + e->len; + reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + } + else { + int allow_reverse; + + reg->exact = str_dup(e->s, e->s + e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); + reg->exact_end = reg->exact + e->len; + + allow_reverse = + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + + if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { + r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, + reg->map, &(reg->int_map)); + if (r) return r; + + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + } + else { + reg->optimize = ONIG_OPTIMIZE_EXACT; + } + } + + reg->dmin = e->mmd.min; + reg->dmax = e->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); + } + + return 0; +} + +static void +set_optimize_map_info(regex_t* reg, OptMapInfo* m) +{ + int i; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + reg->map[i] = m->map[i]; + + reg->optimize = ONIG_OPTIMIZE_MAP; + reg->dmin = m->mmd.min; + reg->dmax = m->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + 1; + } +} + +static void +set_sub_anchor(regex_t* reg, OptAncInfo* anc) +{ + reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; +} + +#ifdef ONIG_DEBUG +static void print_optimize_info(FILE* f, regex_t* reg); +#endif + +static int +set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) +{ + + int r; + NodeOptInfo opt; + OptEnv env; + + env.enc = reg->enc; + env.options = reg->options; + env.case_fold_flag = reg->case_fold_flag; + env.scan_env = scan_env; + clear_mml(&env.mmd); + + r = optimize_node_left(node, &opt, &env); + if (r) return r; + + reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + + if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { + reg->anchor_dmin = opt.len.min; + reg->anchor_dmax = opt.len.max; + } + + if (opt.exb.len > 0 || opt.exm.len > 0) { + select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); + if (opt.map.value > 0 && + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + goto set_map; + } + else { + r = set_optimize_exact_info(reg, &opt.exb); + set_sub_anchor(reg, &opt.exb.anc); + } + } + else if (opt.map.value > 0) { + set_map: + set_optimize_map_info(reg, &opt.map); + set_sub_anchor(reg, &opt.map.anc); + } + else { + reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; + if (opt.len.max == 0) + reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; + } + +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) + print_optimize_info(stderr, reg); +#endif + return r; +} + +static void +clear_optimize_info(regex_t* reg) +{ + reg->optimize = ONIG_OPTIMIZE_NONE; + reg->anchor = 0; + reg->anchor_dmin = 0; + reg->anchor_dmax = 0; + reg->sub_anchor = 0; + reg->exact_end = (UChar* )NULL; + reg->threshold_len = 0; + if (IS_NOT_NULL(reg->exact)) { + xfree(reg->exact); + reg->exact = (UChar* )NULL; + } +} + +#ifdef ONIG_DEBUG + +static void print_enc_string(FILE* fp, OnigEncoding enc, + const UChar *s, const UChar *end) +{ + fprintf(fp, "\nPATTERN: /"); + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + const UChar *p; + OnigCodePoint code; + + p = s; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + fprintf(fp, " 0x%04x ", (int )code); + } + else { + fputc((int )code, fp); + } + + p += enclen(enc, p, end); + } + } + else { + while (s < end) { + fputc((int )*s, fp); + s++; + } + } + + fprintf(fp, "/\n"); +} + +static void +print_distance_range(FILE* f, OnigDistance a, OnigDistance b) +{ + if (a == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", a); + + fputs("-", f); + + if (b == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", b); +} + +static void +print_anchor(FILE* f, int anchor) +{ + int q = 0; + + fprintf(f, "["); + + if (anchor & ANCHOR_BEGIN_BUF) { + fprintf(f, "begin-buf"); + q = 1; + } + if (anchor & ANCHOR_BEGIN_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-line"); + } + if (anchor & ANCHOR_BEGIN_POSITION) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-pos"); + } + if (anchor & ANCHOR_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-buf"); + } + if (anchor & ANCHOR_SEMI_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "semi-end-buf"); + } + if (anchor & ANCHOR_END_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-line"); + } + if (anchor & ANCHOR_ANYCHAR_STAR) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "anychar-star"); + } + if (anchor & ANCHOR_ANYCHAR_STAR_ML) { + if (q) fprintf(f, ", "); + fprintf(f, "anychar-star-pl"); + } + + fprintf(f, "]"); +} + +static void +print_optimize_info(FILE* f, regex_t* reg) +{ + static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", + "EXACT_IC", "MAP" }; + + fprintf(f, "optimize: %s\n", on[reg->optimize]); + fprintf(f, " anchor: "); print_anchor(f, reg->anchor); + if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) + print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); + fprintf(f, "\n"); + + if (reg->optimize) { + fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + + if (reg->exact) { + UChar *p; + fprintf(f, "exact: ["); + for (p = reg->exact; p < reg->exact_end; p++) { + fputc(*p, f); + } + fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); + } + else if (reg->optimize & ONIG_OPTIMIZE_MAP) { + int c, i, n = 0; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + if (reg->map[i]) n++; + + fprintf(f, "map: n=%d\n", n); + if (n > 0) { + c = 0; + fputc('[', f); + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (reg->map[i] != 0) { + if (c > 0) fputs(", ", f); + c++; + if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && + ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) + fputc(i, f); + else + fprintf(f, "%d", i); + } + } + fprintf(f, "]\n"); + } + } +} +#endif /* ONIG_DEBUG */ + + +extern void +onig_free_body(regex_t* reg) +{ + if (IS_NOT_NULL(reg)) { + if (IS_NOT_NULL(reg->p)) xfree(reg->p); + if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); + if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); + if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); + if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + +#ifdef USE_NAMED_GROUP + onig_names_free(reg); +#endif + } +} + +extern void +onig_free(regex_t* reg) +{ + if (IS_NOT_NULL(reg)) { + onig_free_body(reg); + xfree(reg); + } +} + +size_t +onig_memsize(regex_t *reg) +{ + size_t size = sizeof(regex_t); + if (IS_NOT_NULL(reg->p)) size += reg->alloc; + if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; + if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; + if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; + if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); + if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); + + return size; +} + +#define REGEX_TRANSFER(to,from) do {\ + (to)->state = ONIG_STATE_MODIFY;\ + onig_free_body(to);\ + xmemcpy(to, from, sizeof(regex_t));\ + xfree(from);\ +} while (0) + +extern void +onig_transfer(regex_t* to, regex_t* from) +{ + THREAD_ATOMIC_START; + REGEX_TRANSFER(to, from); + THREAD_ATOMIC_END; +} + +#define REGEX_CHAIN_HEAD(reg) do {\ + while (IS_NOT_NULL((reg)->chain)) {\ + (reg) = (reg)->chain;\ + }\ +} while (0) + +extern void +onig_chain_link_add(regex_t* to, regex_t* add) +{ + THREAD_ATOMIC_START; + REGEX_CHAIN_HEAD(to); + to->chain = add; + THREAD_ATOMIC_END; +} + +extern void +onig_chain_reduce(regex_t* reg) +{ + regex_t *head, *prev; + + prev = reg; + head = prev->chain; + if (IS_NOT_NULL(head)) { + reg->state = ONIG_STATE_MODIFY; + while (IS_NOT_NULL(head->chain)) { + prev = head; + head = head->chain; + } + prev->chain = (regex_t* )NULL; + REGEX_TRANSFER(reg, head); + } +} + +#ifdef ONIG_DEBUG +static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); +#endif +#ifdef ONIG_DEBUG_PARSE_TREE +static void print_tree P_((FILE* f, Node* node)); +#endif + +extern int +onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigErrorInfo* einfo, const char *sourcefile, int sourceline) +{ +#define COMPILE_INIT_SIZE 20 + + int r, init_size; + Node* root; + ScanEnv scan_env = {0}; +#ifdef USE_SUBEXP_CALL + UnsetAddrList uslist; +#endif + + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + + scan_env.sourcefile = sourcefile; + scan_env.sourceline = sourceline; + reg->state = ONIG_STATE_COMPILING; + +#ifdef ONIG_DEBUG + print_enc_string(stderr, reg->enc, pattern, pattern_end); +#endif + + if (reg->alloc == 0) { + init_size = (pattern_end - pattern) * 2; + if (init_size <= 0) init_size = COMPILE_INIT_SIZE; + r = BBUF_INIT(reg, init_size); + if (r != 0) goto end; + } + else + reg->used = 0; + + reg->num_mem = 0; + reg->num_repeat = 0; + reg->num_null_check = 0; + reg->repeat_range_alloc = 0; + reg->repeat_range = (OnigRepeatRange* )NULL; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + reg->num_comb_exp_check = 0; +#endif + + r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); + if (r != 0) goto err; + +#ifdef USE_NAMED_GROUP + /* mixed use named group and no-named group */ + if (scan_env.num_named > 0 && + IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + if (scan_env.num_named != scan_env.num_mem) + r = disable_noname_group_capture(&root, reg, &scan_env); + else + r = numbered_ref_check(root); + + if (r != 0) goto err; + } +#endif + +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_init(&uslist, scan_env.num_call); + if (r != 0) goto err; + scan_env.unset_addr_list = &uslist; + r = setup_subexp_call(root, &scan_env); + if (r != 0) goto err_unset; + r = subexp_recursive_check_trav(root, &scan_env); + if (r < 0) goto err_unset; + r = subexp_inf_recursive_check_trav(root, &scan_env); + if (r != 0) goto err_unset; + + reg->num_call = scan_env.num_call; + } + else + reg->num_call = 0; +#endif + + r = setup_tree(root, reg, 0, &scan_env); + if (r != 0) goto err_unset; + +#ifdef ONIG_DEBUG_PARSE_TREE + print_tree(stderr, root); +#endif + + reg->capture_history = scan_env.capture_history; + reg->bt_mem_start = scan_env.bt_mem_start; + reg->bt_mem_start |= reg->capture_history; + if (IS_FIND_CONDITION(reg->options)) + BIT_STATUS_ON_ALL(reg->bt_mem_end); + else { + reg->bt_mem_end = scan_env.bt_mem_end; + reg->bt_mem_end |= reg->capture_history; + } + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (scan_env.backrefed_mem == 0 +#ifdef USE_SUBEXP_CALL + || scan_env.num_call == 0 +#endif + ) { + setup_comb_exp_check(root, 0, &scan_env); +#ifdef USE_SUBEXP_CALL + if (scan_env.has_recursion != 0) { + scan_env.num_comb_exp_check = 0; + } + else +#endif + if (scan_env.comb_exp_max_regnum > 0) { + int i; + for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { + if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { + scan_env.num_comb_exp_check = 0; + break; + } + } + } + } + + reg->num_comb_exp_check = scan_env.num_comb_exp_check; +#endif + + clear_optimize_info(reg); +#ifndef ONIG_DONT_OPTIMIZE + r = set_optimize_info_from_tree(root, reg, &scan_env); + if (r != 0) goto err_unset; +#endif + + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { + xfree(scan_env.mem_nodes_dynamic); + scan_env.mem_nodes_dynamic = (Node** )NULL; + } + + r = compile_tree(root, reg); + if (r == 0) { + r = add_opcode(reg, OP_END); +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_fix(&uslist, reg); + unset_addr_list_end(&uslist); + if (r) goto err; + } +#endif + + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) + reg->stack_pop_level = STACK_POP_LEVEL_ALL; + else { + if (reg->bt_mem_start != 0) + reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; + else + reg->stack_pop_level = STACK_POP_LEVEL_FREE; + } + } +#ifdef USE_SUBEXP_CALL + else if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + onig_node_free(root); + +#ifdef ONIG_DEBUG_COMPILE +#ifdef USE_NAMED_GROUP + onig_print_names(stderr, reg); +#endif + print_compiled_byte_code_list(stderr, reg); +#endif + + end: + reg->state = ONIG_STATE_NORMAL; + return r; + + err_unset: +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + err: + if (IS_NOT_NULL(scan_env.error)) { + if (IS_NOT_NULL(einfo)) { + einfo->enc = scan_env.enc; + einfo->par = scan_env.error; + einfo->par_end = scan_env.error_end; + } + } + + onig_node_free(root); + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) + xfree(scan_env.mem_nodes_dynamic); + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + regex_t *new_reg; + + r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); + if (r) return r; + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); + } + else { + onig_chain_link_add(reg, new_reg); + } + return 0; +} +#endif + +static int onig_inited = 0; + +extern int +onig_reg_init(regex_t* reg, OnigOptionType option, + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, const OnigSyntaxType* syntax) +{ + if (! onig_inited) + onig_init(); + + if (IS_NULL(reg)) + return ONIGERR_INVALID_ARGUMENT; + + if (ONIGENC_IS_UNDEF(enc)) + return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + + if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) + == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; + } + + (reg)->state = ONIG_STATE_MODIFY; + + if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { + option |= syntax->options; + option &= ~ONIG_OPTION_SINGLELINE; + } + else + option |= syntax->options; + + (reg)->enc = enc; + (reg)->options = option; + (reg)->syntax = syntax; + (reg)->optimize = 0; + (reg)->exact = (UChar* )NULL; + (reg)->int_map = (int* )NULL; + (reg)->int_map_backward = (int* )NULL; + (reg)->chain = (regex_t* )NULL; + + (reg)->p = (UChar* )NULL; + (reg)->alloc = 0; + (reg)->used = 0; + (reg)->name_table = (void* )NULL; + + (reg)->case_fold_flag = case_fold_flag; + return 0; +} + +extern int +onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax, OnigErrorInfo* einfo) +{ + int r; + + r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) return r; + + r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0); + return r; +} + +extern int +onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; + + r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) goto err; + + r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0); + if (r) { + err: + onig_free(*reg); + *reg = NULL; + } + return r; +} + + +extern int +onig_init(void) +{ + if (onig_inited != 0) + return 0; + + THREAD_SYSTEM_INIT; + THREAD_ATOMIC_START; + + onig_inited = 1; + + onigenc_init(); + /* onigenc_set_default_caseconv_table((UChar* )0); */ + +#ifdef ONIG_DEBUG_STATISTICS + onig_statistics_init(); +#endif + + THREAD_ATOMIC_END; + return 0; +} + + +extern int +onig_end(void) +{ + THREAD_ATOMIC_START; + +#ifdef ONIG_DEBUG_STATISTICS + onig_print_statistics(stderr); +#endif + +#ifdef USE_SHARED_CCLASS_TABLE + onig_free_shared_cclass_table(); +#endif + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + onig_free_node_list(); +#endif + + onig_inited = 0; + + THREAD_ATOMIC_END; + THREAD_SYSTEM_END; + return 0; +} +#endif //INCLUDE_REGEXP + +#ifdef INCLUDE_ENCODING +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} +#endif //INCLUDE_ENCODING + +#ifdef INCLUDE_REGEXP +extern int +onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_NCCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; + } + else { + len = ONIGENC_CODE_TO_MBCLEN(enc, code); + } + return onig_is_code_in_cc_len(len, code, cc); +} + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_STATE_CHECK 6 + +OnigOpInfoType OnigOpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_NOT_WORD, "not-word", ARG_NON }, + { OP_WORD_BOUND, "word-bound", ARG_NON }, + { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, + { OP_WORD_BEGIN, "word-begin", ARG_NON }, + { OP_WORD_END, "word-end", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, + { OP_PUSH_POS, "push-pos", ARG_NON }, + { OP_POP_POS, "pop-pos", ARG_NON }, + { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, + { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, + { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, + { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, + { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, + { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_ML_STAR, + "state-check-anychar-ml*", ARG_STATE_CHECK }, + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +Indent(FILE* f, int indent) +{ + int i; + for (i = 0; i < indent; i++) putc(' ', f); +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + StateCheckNumType scn; + OnigCodePoint code; + UChar *q; + + fprintf(f, "[%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + + case ARG_STATE_CHECK: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + fprintf(f, ":%d", scn); + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, bp, bpend); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%u:%d", (unsigned int )cc, n); + } + break; + + case OP_BACKREFN_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_WITH_LEVEL: + { + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":(%d)", addr); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_PUSH_LOOK_BEHIND_NOT: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:(%d)", len, addr); + break; + + case OP_STATE_CHECK_PUSH: + case OP_STATE_CHECK_PUSH_OR_JUMP: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:(%d)", scn, addr); + break; + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", + *--bp); + } + } + fputs("]", f); + if (nextp) *nextp = bp; +} + +static void +print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + int ncode; + UChar* bp = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "code length: %d\n", reg->used); + + ncode = 0; + while (bp < end) { + ncode++; + if (bp > reg->p) { + if (ncode % 5 == 0) + fprintf(f, "\n"); + else + fputs(" ", f); + } + onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc); + } + + fprintf(f, "\n"); +} + +static void +print_indent_tree(FILE* f, Node* node, int indent) +{ + int i, type; + int add = 3; + UChar* p; + + Indent(f, indent); + if (IS_NULL(node)) { + fprintf(f, "ERROR: null node!!!\n"); + exit (0); + } + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + if (NTYPE(node) == NT_LIST) + fprintf(f, "\n", (int )node); + else + fprintf(f, "\n", (int )node); + + print_indent_tree(f, NCAR(node), indent + add); + while (IS_NOT_NULL(node = NCDR(node))) { + if (NTYPE(node) != type) { + fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); + exit(0); + } + print_indent_tree(f, NCAR(node), indent + add); + } + break; + + case NT_STR: + fprintf(f, "", + (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); + for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { + if (*p >= 0x20 && *p < 0x7f) + fputc(*p, f); + else { + fprintf(f, " 0x%02x", *p); + } + } + break; + + case NT_CCLASS: + fprintf(f, "", (int )node); + if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); + if (NCCLASS(node)->mbuf) { + BBuf* bbuf = NCCLASS(node)->mbuf; + for (i = 0; i < bbuf->used; i++) { + if (i > 0) fprintf(f, ","); + fprintf(f, "%0x", bbuf->p[i]); + } + } + break; + + case NT_CTYPE: + fprintf(f, " ", (int )node); + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) + fputs("not word", f); + else + fputs("word", f); + break; + + default: + fprintf(f, "ERROR: undefined ctype.\n"); + exit(0); + } + break; + + case NT_CANY: + fprintf(f, "", (int )node); + break; + + case NT_ANCHOR: + fprintf(f, " ", (int )node); + switch (NANCHOR(node)->type) { + case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; + case ANCHOR_END_BUF: fputs("end buf", f); break; + case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; + case ANCHOR_END_LINE: fputs("end line", f); break; + case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; + case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; + + case ANCHOR_WORD_BOUND: fputs("word bound", f); break; + case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; + case ANCHOR_WORD_END: fputs("word end", f); break; +#endif + case ANCHOR_PREC_READ: fputs("prec read", f); break; + case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; + case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; + case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; + + default: + fprintf(f, "ERROR: undefined anchor type.\n"); + break; + } + break; + + case NT_BREF: + { + int* p; + BRefNode* br = NBREF(node); + p = BACKREFS_P(br); + fprintf(f, "", (int )node); + for (i = 0; i < br->back_num; i++) { + if (i > 0) fputs(", ", f); + fprintf(f, "%d", p[i]); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + { + CallNode* cn = NCALL(node); + fprintf(f, "", (int )node); + p_string(f, cn->name_end - cn->name, cn->name); + } + break; +#endif + + case NT_QTFR: + fprintf(f, "{%d,%d}%s\n", (int )node, + NQTFR(node)->lower, NQTFR(node)->upper, + (NQTFR(node)->greedy ? "" : "?")); + print_indent_tree(f, NQTFR(node)->target, indent + add); + break; + + case NT_ENCLOSE: + fprintf(f, " ", (int )node); + switch (NENCLOSE(node)->type) { + case ENCLOSE_OPTION: + fprintf(f, "option:%d\n", NENCLOSE(node)->option); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); + break; + case ENCLOSE_MEMORY: + fprintf(f, "memory:%d", NENCLOSE(node)->regnum); + break; + case ENCLOSE_STOP_BACKTRACK: + fprintf(f, "stop-bt"); + break; + + default: + break; + } + fprintf(f, "\n"); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); + break; + + default: + fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); + break; + } + + if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && + type != NT_ENCLOSE) + fprintf(f, "\n"); + fflush(f); +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_PARSE_TREE +static void +print_tree(FILE* f, Node* node) +{ + print_indent_tree(f, node, 0); +} +#endif +#endif //INCLUDE_REGEXP diff --git a/src/regenc.c b/src/regenc.c new file mode 100644 index 0000000000..70978cde88 --- /dev/null +++ b/src/regenc.c @@ -0,0 +1,909 @@ +/********************************************************************** + regenc.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#ifdef INCLUDE_ENCODING +#include +#include "regint.h" + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; + +extern int +onigenc_init(void) +{ + return 0; +} + +extern OnigEncoding +onigenc_get_default_encoding(void) +{ + return OnigEncDefaultCharEncoding; +} + +extern int +onigenc_set_default_encoding(OnigEncoding enc) +{ + OnigEncDefaultCharEncoding = enc; + return 0; +} + +extern int +onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); + if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) + return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); + else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) + return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); + return 1; +} + +extern UChar* +onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); + if (p < s) { + p += enclen(enc, p, end); + } + return p; +} + +extern UChar* +onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, + const UChar* start, const UChar* s, const UChar* end, const UChar** prev) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); + + if (p < s) { + if (prev) *prev = (const UChar* )p; + p += enclen(enc, p, end); + } + else { + if (prev) *prev = (const UChar* )NULL; /* Sorry */ + } + return p; +} + +extern UChar* +onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) +{ + if (s <= start) + return (UChar* )NULL; + + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); +} + +extern UChar* +onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n) +{ + while (ONIG_IS_NOT_NULL(s) && n-- > 0) { + if (s <= start) + return (UChar* )NULL; + + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); + } + return (UChar* )s; +} + +extern UChar* +onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) +{ + UChar* q = (UChar* )p; + while (n-- > 0) { + q += ONIGENC_MBC_ENC_LEN(enc, q, end); + } + return (q <= end ? q : NULL); +} + +extern int +onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int n = 0; + UChar* q = (UChar* )p; + + while (q < end) { + q += ONIGENC_MBC_ENC_LEN(enc, q, end); + n++; + } + return n; +} + +extern int +onigenc_strlen_null(OnigEncoding enc, const UChar* s) +{ + int n = 0; + UChar* p = (UChar* )s; + UChar* e; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return n; + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return n; + } + e = p + ONIGENC_MBC_MAXLEN(enc); + p += ONIGENC_MBC_ENC_LEN(enc, p, e); + n++; + } +} + +extern int +onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) +{ + UChar* start = (UChar* )s; + UChar* p = (UChar* )s; + UChar* e; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return (int )(p - start); + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return (int )(p - start); + } + e = p + ONIGENC_MBC_MAXLEN(enc); + p += ONIGENC_MBC_ENC_LEN(enc, p, e); + } +} + +const UChar OnigEncAsciiToLowerCaseTable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncAsciiToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif + +const unsigned short OnigEncAsciiCtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', +}; +#endif + +extern void +onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) +{ + /* nothing */ + /* obsoleted. */ +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) +{ + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); +} + +const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { + { 0x41, 0x61 }, + { 0x42, 0x62 }, + { 0x43, 0x63 }, + { 0x44, 0x64 }, + { 0x45, 0x65 }, + { 0x46, 0x66 }, + { 0x47, 0x67 }, + { 0x48, 0x68 }, + { 0x49, 0x69 }, + { 0x4a, 0x6a }, + { 0x4b, 0x6b }, + { 0x4c, 0x6c }, + { 0x4d, 0x6d }, + { 0x4e, 0x6e }, + { 0x4f, 0x6f }, + { 0x50, 0x70 }, + { 0x51, 0x71 }, + { 0x52, 0x72 }, + { 0x53, 0x73 }, + { 0x54, 0x74 }, + { 0x55, 0x75 }, + { 0x56, 0x76 }, + { 0x57, 0x77 }, + { 0x58, 0x78 }, + { 0x59, 0x79 }, + { 0x5a, 0x7a } +}; + +extern int +onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + OnigCodePoint code; + int i, r; + + for (i = 0; + i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); + i++) { + code = OnigAsciiLowerMap[i].to; + r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); + if (r != 0) return r; + + code = OnigAsciiLowerMap[i].from; + r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); + if (r != 0) return r; + } + + return 0; +} + +extern int +onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + else + return 0; +} + +static int +ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + OnigCodePoint ss[] = { 0x73, 0x73 }; + + return (*f)((OnigCodePoint )0xdf, ss, 2, arg); +} + +extern int +onigenc_apply_all_case_fold_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + OnigCodePoint code; + int i, r; + + r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0); + if (r != 0) return r; + + for (i = 0; i < map_size; i++) { + code = map[i].to; + r = (*f)(map[i].from, &code, 1, arg); + if (r != 0) return r; + + code = map[i].from; + r = (*f)(map[i].to, &code, 1, arg); + if (r != 0) return r; + } + + if (ess_tsett_flag != 0) + return ss_apply_all_case_fold(flag, f, arg); + + return 0; +} + +extern int +onigenc_get_case_fold_codes_by_str_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { + /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { + /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (*p == 0xdf && ess_tsett_flag != 0) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else { + int i; + + for (i = 0; i < map_size; i++) { + if (*p == map[i].from) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].to; + return 1; + } + else if (*p == map[i].to) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].from; + return 1; + } + } + } + + return 0; +} + + +extern int +onigenc_not_support_get_ctype_code_range(OnigCtype ctype, + OnigCodePoint* sb_out, const OnigCodePoint* ranges[], + OnigEncoding enc) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + if (p < end) { + if (*p == 0x0a) return 1; + } + return 0; +} + +/* for single byte encodings */ +extern int +onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, + const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); + + (*p)++; + return 1; /* return byte length of converted char to lower */ +} + +extern int +onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return 1; +} + +extern OnigCodePoint +onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return (OnigCodePoint )(*p); +} + +extern int +onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + return 1; +} + +extern int +onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) +{ + *buf = (UChar )(code & 0xff); + return 1; +} + +extern UChar* +onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s, + const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + return (UChar* )s; +} + +extern int +onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return TRUE; +} + +extern int +onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return FALSE; +} + +extern int +onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, + OnigEncoding enc ARG_UNUSED) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +extern OnigCodePoint +onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + n = (OnigCodePoint )(*p++); + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +extern int +onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower) +{ + int len; + const UChar *p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + + len = enclen(enc, p, end); + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted to lower char */ + } +} + +extern int +onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + return (int)(p - buf); +} + +extern int +onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff000000) != 0) { + *p++ = (UChar )((code >> 24) & 0xff); + } + if ((code & 0xff0000) != 0 || p != buf) { + *p++ = (UChar )((code >> 16) & 0xff); + } + if ((code & 0xff00) != 0 || p != buf) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + return (int)(p - buf); +} + +extern int +onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + static const PosixBracketEntryType PBS[] = { + PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM), + PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA), + PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK), + PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL), + PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT), + PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH), + PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER), + PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT), + PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT), + PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE), + PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER), + PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT), + PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII), + PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD), + }; + + const PosixBracketEntryType *pb, *pbe; + int len; + + len = onigenc_strlen(enc, p, end); + for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) { + if (len == pb->len && + onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + return pb->ctype; + } + + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + +extern int +onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, + const UChar* sascii /* ascii */, int n) +{ + int x, c; + + while (n-- > 0) { + if (p >= end) return (int )(*sascii); + + c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); + x = *sascii - c; + if (x) return x; + + sascii++; + p += enclen(enc, p, end); + } + return 0; +} + +/* Property management */ +static int +resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) +{ + size_t size; + const OnigCodePoint **list = *plist; + + size = sizeof(OnigCodePoint*) * new_size; + if (IS_NULL(list)) { + list = (const OnigCodePoint** )xmalloc(size); + } + else { + list = (const OnigCodePoint** )xrealloc((void* )list, size); + } + + if (IS_NULL(list)) return ONIGERR_MEMORY; + + *plist = list; + *psize = new_size; + + return 0; +} + +extern int +onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, + hash_table_type **table, const OnigCodePoint*** plist, int *pnum, + int *psize) +{ +#define PROP_INIT_SIZE 16 + + int r; + + if (*psize <= *pnum) { + int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); + r = resize_property_list(new_size, plist, psize); + if (r != 0) return r; + } + + (*plist)[*pnum] = prop; + + if (ONIG_IS_NULL(*table)) { + *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); + if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; + } + + *pnum = *pnum + 1; + onig_st_insert_strend(*table, name, name + strlen((char* )name), + (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); + return 0; +} + +extern int +onigenc_property_list_init(int (*f)(void)) +{ + int r; + + THREAD_ATOMIC_START; + + r = f(); + + THREAD_ATOMIC_END; + return r; +} +#endif //INCLUDE_ENCODING diff --git a/src/regenc.h b/src/regenc.h new file mode 100644 index 0000000000..1d8e752bc7 --- /dev/null +++ b/src/regenc.h @@ -0,0 +1,203 @@ +#ifndef ONIGURUMA_REGENC_H +#define ONIGURUMA_REGENC_H +/********************************************************************** + regenc.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#define RUBY + +#ifndef mrb_compile_warn +#define mrb_compile_warn(a,b,c,d) printf(c,d) +#endif + +#ifndef REGINT_H +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#endif +#include "oniguruma.h" + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigPairCaseFoldCodes; + + +#ifndef ARG_UNUSED +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif +#endif + +#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) +#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL +#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) + +#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e)) + +/* character types bit flag */ +#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) +#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) +#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK) +#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL) +#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT) +#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH) +#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER) +#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT) +#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT) +#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE) +#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER) +#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT) +#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD) +#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM) +#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII) + +#define CTYPE_TO_BIT(ctype) (1<<(ctype)) +#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \ + ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\ + (ctype) == ONIGENC_CTYPE_PRINT) + + +typedef struct { + const UChar *name; + int ctype; + short int len; +} PosixBracketEntryType; + +#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)} + +/* #define USE_CRNL_AS_LINE_TERMINATOR */ +#define USE_UNICODE_PROPERTIES +/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ + + +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII + +/* for encoding system implementation (internal) */ +ONIG_EXTERN int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc); +ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc); +ONIG_EXTERN int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg); +ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc); +ONIG_EXTERN int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc); + +/* methods for single byte encoding */ +ONIG_EXTERN int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc); +ONIG_EXTERN int onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc); +ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc); +ONIG_EXTERN int onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc); +ONIG_EXTERN int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc); +ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc); +ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc); +ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc); +ONIG_EXTERN int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc); + +/* methods for multi byte encoding */ +ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end); +ONIG_EXTERN int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower); +ONIG_EXTERN int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc); +ONIG_EXTERN int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf); +ONIG_EXTERN int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end); +ONIG_EXTERN int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end); +ONIG_EXTERN int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype); +ONIG_EXTERN int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc); +ONIG_EXTERN int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf); +ONIG_EXTERN int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype); + + +/* in enc/unicode.c */ +ONIG_EXTERN int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc); +ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc); +ONIG_EXTERN int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]); +ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]); +ONIG_EXTERN int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold); +ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc); + + +#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) +#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) + +#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ + OnigEncISO_8859_1_ToLowerCaseTable[c] +#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ + OnigEncISO_8859_1_ToUpperCaseTable[c] + +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; + +ONIG_EXTERN int +onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n); +ONIG_EXTERN UChar* +onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n); + +/* defined in regexec.c, but used in enc/xxx.c */ +extern int onig_is_in_code_range (const UChar* p, OnigCodePoint code); + +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; +ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; + +#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) +#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] +#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] +#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ + ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) +#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ + (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ + ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) + +#ifdef ONIG_ENC_REGISTER +extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*); +#define OnigEncodingName(n) encoding_##n +#define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n) +#define OnigEncodingDefine(f,n) \ + OnigEncodingDeclare(n); \ + void Init_##f(void) { \ + ONIG_ENC_REGISTER(OnigEncodingName(n).name, \ + &OnigEncodingName(n)); \ + } \ + OnigEncodingDeclare(n) +#else +#define OnigEncodingName(n) OnigEncoding##n +#define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n) +#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n) +#endif + +/* macros for define replica encoding and encoding alias */ +#define ENC_REPLICATE(name, orig) +#define ENC_ALIAS(name, orig) +#define ENC_DUMMY(name) + +#endif /* ONIGURUMA_REGENC_H */ diff --git a/src/regerror.c b/src/regerror.c new file mode 100644 index 0000000000..72db3a502b --- /dev/null +++ b/src/regerror.c @@ -0,0 +1,375 @@ +/********************************************************************** + regerror.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#ifdef INCLUDE_REGEXP +#include +#include "regint.h" +#include /* for vsnprintf() */ +#include + +extern UChar* +onig_error_code_to_format(int code) +{ + const char *p; + + if (code >= 0) return (UChar* )0; + + switch (code) { + case ONIG_MISMATCH: + p = "mismatch"; break; + case ONIG_NO_SUPPORT_CONFIG: + p = "no support in this configuration"; break; + case ONIGERR_MEMORY: + p = "failed to allocate memory"; break; + case ONIGERR_MATCH_STACK_LIMIT_OVER: + p = "match-stack limit over"; break; + case ONIGERR_TYPE_BUG: + p = "undefined type (bug)"; break; + case ONIGERR_PARSER_BUG: + p = "internal parser error (bug)"; break; + case ONIGERR_STACK_BUG: + p = "stack error (bug)"; break; + case ONIGERR_UNDEFINED_BYTECODE: + p = "undefined bytecode (bug)"; break; + case ONIGERR_UNEXPECTED_BYTECODE: + p = "unexpected bytecode (bug)"; break; + case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: + p = "default multibyte-encoding is not setted"; break; + case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: + p = "can't convert to wide-char on specified multibyte-encoding"; break; + case ONIGERR_INVALID_ARGUMENT: + p = "invalid argument"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACE: + p = "end pattern at left brace"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: + p = "end pattern at left bracket"; break; + case ONIGERR_EMPTY_CHAR_CLASS: + p = "empty char-class"; break; + case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: + p = "premature end of char-class"; break; + case ONIGERR_END_PATTERN_AT_ESCAPE: + p = "end pattern at escape"; break; + case ONIGERR_END_PATTERN_AT_META: + p = "end pattern at meta"; break; + case ONIGERR_END_PATTERN_AT_CONTROL: + p = "end pattern at control"; break; + case ONIGERR_META_CODE_SYNTAX: + p = "invalid meta-code syntax"; break; + case ONIGERR_CONTROL_CODE_SYNTAX: + p = "invalid control-code syntax"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: + p = "char-class value at end of range"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: + p = "char-class value at start of range"; break; + case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: + p = "unmatched range specifier in char-class"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: + p = "target of repeat operator is not specified"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: + p = "target of repeat operator is invalid"; break; + case ONIGERR_NESTED_REPEAT_OPERATOR: + p = "nested repeat operator"; break; + case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: + p = "unmatched close parenthesis"; break; + case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: + p = "end pattern with unmatched parenthesis"; break; + case ONIGERR_END_PATTERN_IN_GROUP: + p = "end pattern in group"; break; + case ONIGERR_UNDEFINED_GROUP_OPTION: + p = "undefined group option"; break; + case ONIGERR_INVALID_POSIX_BRACKET_TYPE: + p = "invalid POSIX bracket type"; break; + case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: + p = "invalid pattern in look-behind"; break; + case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: + p = "invalid repeat range {lower,upper}"; break; + case ONIGERR_TOO_BIG_NUMBER: + p = "too big number"; break; + case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: + p = "too big number for repeat range"; break; + case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: + p = "upper is smaller than lower in repeat range"; break; + case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: + p = "empty range in char class"; break; + case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: + p = "mismatch multibyte code length in char-class range"; break; + case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: + p = "too many multibyte code ranges are specified"; break; + case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: + p = "too short multibyte code string"; break; + case ONIGERR_TOO_BIG_BACKREF_NUMBER: + p = "too big backref number"; break; + case ONIGERR_INVALID_BACKREF: +#ifdef USE_NAMED_GROUP + p = "invalid backref number/name"; break; +#else + p = "invalid backref number"; break; +#endif + case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: + p = "numbered backref/call is not allowed. (use name)"; break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: + p = "too big wide-char value"; break; + case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: + p = "too long wide-char value"; break; + case ONIGERR_INVALID_CODE_POINT_VALUE: + p = "invalid code point value"; break; + case ONIGERR_EMPTY_GROUP_NAME: + p = "group name is empty"; break; + case ONIGERR_INVALID_GROUP_NAME: + p = "invalid group name <%n>"; break; + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: +#ifdef USE_NAMED_GROUP + p = "invalid char in group name <%n>"; break; +#else + p = "invalid char in group number <%n>"; break; +#endif + case ONIGERR_UNDEFINED_NAME_REFERENCE: + p = "undefined name <%n> reference"; break; + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + p = "undefined group <%n> reference"; break; + case ONIGERR_MULTIPLEX_DEFINED_NAME: + p = "multiplex defined name <%n>"; break; + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + p = "multiplex definition name <%n> call"; break; + case ONIGERR_NEVER_ENDING_RECURSION: + p = "never ending recursion"; break; + case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: + p = "group number is too big for capture history"; break; + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + p = "invalid character property name {%n}"; break; + case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: + p = "not supported encoding combination"; break; + case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: + p = "invalid combination of options"; break; + case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: + p = "over thread pass limit count"; break; + + default: + p = "undefined error code"; break; + } + + return (UChar* )p; +} + +static void sprint_byte(char* s, unsigned int v) +{ + sprintf(s, "%02x", (v & 0377)); +} + +static void sprint_byte_with_x(char* s, unsigned int v) +{ + sprintf(s, "\\x%02x", (v & 0377)); +} + +static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, + UChar buf[], int buf_size, int *is_over) +{ + int len; + UChar *p; + OnigCodePoint code; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + p = s; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + if (code > 0xffff && len + 10 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); + sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); + len += 10; + } + else if (len + 6 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); + len += 6; + } + else { + break; + } + } + else { + buf[len++] = (UChar )code; + } + + p += enclen(enc, p, end); + if (len >= buf_size) break; + } + + *is_over = ((p < end) ? 1 : 0); + } + else { + len = (int)MIN((end - s), buf_size); + xmemcpy(buf, s, (size_t )len); + *is_over = ((buf_size < (end - s)) ? 1 : 0); + } + + return len; +} + + +/* for ONIG_MAX_ERROR_MESSAGE_LEN */ +#define MAX_ERROR_PAR_LEN 30 + +extern int +onig_error_code_to_str(UChar* s, int code, ...) +{ + UChar *p, *q; + OnigErrorInfo* einfo; + size_t len; + int is_over; + UChar parbuf[MAX_ERROR_PAR_LEN]; + va_list vargs; + + va_start(vargs, code); + + switch (code) { + case ONIGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + case ONIGERR_MULTIPLEX_DEFINED_NAME: + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_INVALID_GROUP_NAME: + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + einfo = va_arg(vargs, OnigErrorInfo*); + len = to_ascii(einfo->enc, einfo->par, einfo->par_end, + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); + q = onig_error_code_to_format(code); + p = s; + while (*q != '\0') { + if (*q == '%') { + q++; + if (*q == 'n') { /* '%n': name */ + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { + xmemcpy(p, "...", 3); + p += 3; + } + q++; + } + else + goto normal_char; + } + else { + normal_char: + *p++ = *q++; + } + } + *p = '\0'; + len = p - s; + break; + + default: + q = onig_error_code_to_format(code); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); + xmemcpy(s, q, len); + s[len] = '\0'; + break; + } + + va_end(vargs); + return (int)len; +} + +void +onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, va_list args) +{ + size_t need; + int n, len; + UChar *p, *s, *bp; + UChar bs[6]; + + n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); + + need = (pat_end - pat) * 4 + 4; + + if (n + need < (size_t)bufsize) { + strcat((char* )buf, ": /"); + s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); + + p = pat; + while (p < pat_end) { + if (*p == '\\') { + *s++ = *p++; + len = enclen(enc, p, pat_end); + while (len-- > 0) *s++ = *p++; + } + else if (*p == '/') { + *s++ = (unsigned char )'\\'; + *s++ = *p++; + } + else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) { + len = enclen(enc, p, pat_end); + if (ONIGENC_MBC_MINLEN(enc) == 1) { + while (len-- > 0) *s++ = *p++; + } + else { /* for UTF16 */ + int blen; + + while (len-- > 0) { + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (blen-- > 0) *s++ = *bp++; + } + } + } + else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && + !ONIGENC_IS_CODE_SPACE(enc, *p)) { + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (len-- > 0) *s++ = *bp++; + } + else { + *s++ = *p++; + } + } + + *s++ = '/'; + *s = '\0'; + } +} + +void +onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) +{ + va_list args; + va_start(args, fmt); + onig_vsnprintf_with_pattern(buf, bufsize, enc, + pat, pat_end, fmt, args); + va_end(args); +} +#endif //INCLUDE_REGEXP diff --git a/src/regex.h b/src/regex.h new file mode 100644 index 0000000000..b5472122a3 --- /dev/null +++ b/src/regex.h @@ -0,0 +1,32 @@ +/********************************************************************** + + regex.h - + + $Author: akr $ + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef ONIGURUMA_REGEX_H +#define ONIGURUMA_REGEX_H 1 + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "oniguruma.h" + +#ifndef ONIG_RUBY_M17N + +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; + +#define mbclen(p,e,enc) mrb_enc_mbclen((p),(e),(enc)) + +#endif /* ifndef ONIG_RUBY_M17N */ + +#if defined(__cplusplus) +} /* extern "C" { */ +#endif + +#endif /* ONIGURUMA_REGEX_H */ diff --git a/src/regexec.c b/src/regexec.c new file mode 100644 index 0000000000..3ef38bc8ff --- /dev/null +++ b/src/regexec.c @@ -0,0 +1,3757 @@ +/********************************************************************** + regexec.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#ifdef INCLUDE_REGEXP +#include +#include "regint.h" + +/* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + +#ifdef USE_CRNL_AS_LINE_TERMINATOR +#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) +#endif + +#ifdef USE_CAPTURE_HISTORY +static void history_tree_free(OnigCaptureTreeNode* node); + +static void +history_tree_clear(OnigCaptureTreeNode* node) +{ + int i; + + if (IS_NOT_NULL(node)) { + for (i = 0; i < node->num_childs; i++) { + if (IS_NOT_NULL(node->childs[i])) { + history_tree_free(node->childs[i]); + } + } + for (i = 0; i < node->allocated; i++) { + node->childs[i] = (OnigCaptureTreeNode* )0; + } + node->num_childs = 0; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + node->group = -1; + } +} + +static void +history_tree_free(OnigCaptureTreeNode* node) +{ + history_tree_clear(node); + xfree(node); +} + +static void +history_root_free(OnigRegion* r) +{ + if (IS_NOT_NULL(r->history_root)) { + history_tree_free(r->history_root); + r->history_root = (OnigCaptureTreeNode* )0; + } +} + +static OnigCaptureTreeNode* +history_node_new(void) +{ + OnigCaptureTreeNode* node; + + node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); + CHECK_NULL_RETURN(node); + node->childs = (OnigCaptureTreeNode** )0; + node->allocated = 0; + node->num_childs = 0; + node->group = -1; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + + return node; +} + +static int +history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) +{ +#define HISTORY_TREE_INIT_ALLOC_SIZE 8 + + if (parent->num_childs >= parent->allocated) { + int n, i; + + if (IS_NULL(parent->childs)) { + n = HISTORY_TREE_INIT_ALLOC_SIZE; + parent->childs = + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + } + else { + n = parent->allocated * 2; + parent->childs = + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); + } + CHECK_NULL_RETURN_MEMERR(parent->childs); + for (i = parent->allocated; i < n; i++) { + parent->childs[i] = (OnigCaptureTreeNode* )0; + } + parent->allocated = n; + } + + parent->childs[parent->num_childs] = child; + parent->num_childs++; + return 0; +} + +static OnigCaptureTreeNode* +history_tree_clone(OnigCaptureTreeNode* node) +{ + int i; + OnigCaptureTreeNode *clone, *child; + + clone = history_node_new(); + CHECK_NULL_RETURN(clone); + + clone->beg = node->beg; + clone->end = node->end; + for (i = 0; i < node->num_childs; i++) { + child = history_tree_clone(node->childs[i]); + if (IS_NULL(child)) { + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + history_tree_add_child(clone, child); + } + + return clone; +} + +extern OnigCaptureTreeNode* +onig_get_capture_tree(OnigRegion* region) +{ + return region->history_root; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern void +onig_region_clear(OnigRegion* region) +{ + int i; + + for (i = 0; i < region->num_regs; i++) { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(region); +#endif +} + +extern int +onig_region_resize(OnigRegion* region, int n) +{ + region->num_regs = n; + + if (n < ONIG_NREGION) + n = ONIG_NREGION; + + if (region->allocated == 0) { + region->beg = (int* )xmalloc(n * sizeof(int)); + if (region->beg == 0) + return ONIGERR_MEMORY; + + region->end = (int* )xmalloc(n * sizeof(int)); + if (region->end == 0) { + xfree(region->beg); + return ONIGERR_MEMORY; + } + + region->allocated = n; + } + else if (region->allocated < n) { + int *tmp; + + region->allocated = 0; + tmp = (int* )xrealloc(region->beg, n * sizeof(int)); + if (tmp == 0) { + xfree(region->beg); + xfree(region->end); + return ONIGERR_MEMORY; + } + region->beg = tmp; + tmp = (int* )xrealloc(region->end, n * sizeof(int)); + if (tmp == 0) { + xfree(region->beg); + return ONIGERR_MEMORY; + } + region->end = tmp; + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + + return 0; +} + +static int +onig_region_resize_clear(OnigRegion* region, int n) +{ + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); + return 0; +} + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) +{ + if (at < 0) return ONIGERR_INVALID_ARGUMENT; + + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; + } + + region->beg[at] = beg; + region->end[at] = end; + return 0; +} + +extern void +onig_region_init(OnigRegion* region) +{ + region->num_regs = 0; + region->allocated = 0; + region->beg = (int* )0; + region->end = (int* )0; + region->history_root = (OnigCaptureTreeNode* )0; +} + +extern OnigRegion* +onig_region_new(void) +{ + OnigRegion* r; + + r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + if (r) + onig_region_init(r); + return r; +} + +extern void +onig_region_free(OnigRegion* r, int free_self) +{ + if (r) { + if (r->allocated > 0) { + if (r->beg) xfree(r->beg); + if (r->end) xfree(r->end); + r->allocated = 0; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(r); +#endif + if (free_self) xfree(r); + } +} + +extern void +onig_region_copy(OnigRegion* to, OnigRegion* from) +{ +#define RREGC_SIZE (sizeof(int) * from->num_regs) + int i; + + if (to == from) return; + + onig_region_resize(to, from->num_regs); + for (i = 0; i < from->num_regs; i++) { + to->beg[i] = from->beg[i]; + to->end[i] = from->end[i]; + } + to->num_regs = from->num_regs; + +#ifdef USE_CAPTURE_HISTORY + history_root_free(to); + + if (IS_NOT_NULL(from->history_root)) { + to->history_root = history_tree_clone(from->history_root); + } +#endif +} + + +/** stack **/ +#define INVALID_STACK_INDEX -1 + +/* stack type */ +/* used by normal-POP */ +#define STK_ALT 0x0001 +#define STK_LOOK_BEHIND_NOT 0x0002 +#define STK_POS_NOT 0x0003 +/* handled by normal-POP */ +#define STK_MEM_START 0x0100 +#define STK_MEM_END 0x8200 +#define STK_REPEAT_INC 0x0300 +#define STK_STATE_CHECK_MARK 0x1000 +/* avoided by normal-POP */ +#define STK_NULL_CHECK_START 0x3000 +#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8400 +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0700 +#define STK_CALL_FRAME 0x0800 +#define STK_RETURN 0x0900 +#define STK_VOID 0x0a00 /* for fill a blank */ + +/* stack type check mask */ +#define STK_MASK_POP_USED 0x00ff +#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).best_len = ONIG_MISMATCH;\ +} while(0) +#else +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ +} while(0) +#endif + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 + +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ + if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ + unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ + offset = ((offset) * (state_num)) >> 3;\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ + (msa).state_check_buff = (void* )xmalloc(size);\ + CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ + }\ + else \ + (msa).state_check_buff = (void* )xalloca(size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + } while(0) + +#define MATCH_ARG_FREE(msa) do {\ + if ((msa).stack_p) xfree((msa).stack_p);\ + if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ + if ((msa).state_check_buff) xfree((msa).state_check_buff);\ + }\ +} while(0) +#else +#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +#endif + + + +#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ + if (msa->stack_p) {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ + stk_alloc = (OnigStackType* )(msa->stack_p);\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + }\ + else {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ + + sizeof(OnigStackType) * (stack_num));\ + stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ +} while(0) + +#define STACK_SAVE do{\ + if (stk_base != stk_alloc) {\ + msa->stack_p = stk_base;\ + msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\ + };\ +} while(0) + +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + +static int +stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, + OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) +{ + size_t n; + OnigStackType *x, *stk_base, *stk_end, *stk; + + stk_base = *arg_stk_base; + stk_end = *arg_stk_end; + stk = *arg_stk; + + n = stk_end - stk_base; + if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { + x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + xmemcpy(x, stk_base, n * sizeof(OnigStackType)); + n *= 2; + } + else { + unsigned int limit_size = MatchStackLimitSize; + n *= 2; + if (limit_size != 0 && n > limit_size) { + if ((unsigned int )(stk_end - stk_base) == limit_size) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = limit_size; + } + x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + } + *arg_stk = x + (stk - stk_base); + *arg_stk_base = x; + *arg_stk_end = x + n; + return 0; +} + +#define STACK_ENSURE(n) do {\ + if (stk_end - stk < (n)) {\ + int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ + if (r != 0) { STACK_SAVE; return r; } \ + }\ +} while(0) + +#define STACK_AT(index) (stk_base + (index)) +#define GET_STACK_INDEX(stk) ((stk) - stk_base) + +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) + +#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define STATE_CHECK_POS(s,snum) \ + (((s) - str) * num_comb_exp_check + ((snum) - 1)) +#define STATE_CHECK_VAL(v,snum) do {\ + if (state_check_buff != NULL) {\ + int x = STATE_CHECK_POS(s,snum);\ + (v) = state_check_buff[x/8] & (1<<(x%8));\ + }\ + else (v) = 0;\ +} while(0) + + +#define ELSE_IF_STATE_CHECK_MARK(stk) \ + else if ((stk)->type == STK_STATE_CHECK_MARK) { \ + int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ + state_check_buff[x/8] |= (1<<(x%8)); \ + } + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_ALT;\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_STATE_CHECK(s,snum) do {\ + if (state_check_buff != NULL) {\ + STACK_ENSURE(1);\ + stk->type = STK_STATE_CHECK_MARK;\ + stk->u.state.pstr = (s);\ + stk->u.state.state_check = (snum);\ + STACK_INC;\ + }\ +} while(0) + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define ELSE_IF_STATE_CHECK_MARK(stk) + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while(0) +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) +#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) +#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) +#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ + STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) + +#define STACK_PUSH_REPEAT(id, pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT;\ + stk->u.repeat.num = (id);\ + stk->u.repeat.pcode = (pat);\ + stk->u.repeat.count = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_REPEAT_INC(sindex) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT_INC;\ + stk->u.repeat_inc.si = (sindex);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_START(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_START;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END_MARK(mnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END_MARK;\ + stk->u.mem.num = (mnum);\ + STACK_INC;\ +} while(0) + +#define STACK_GET_MEM_START(mnum, k) do {\ + int level = 0;\ + k = stk;\ + while (k > stk_base) {\ + k--;\ + if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ + && k->u.mem.num == (mnum)) {\ + level++;\ + }\ + else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) break;\ + level--;\ + }\ + }\ +} while(0) + +#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ + int level = 0;\ + while (k < stk) {\ + if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) (start) = k->u.mem.pstr;\ + level++;\ + }\ + else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ + level--;\ + if (level == 0) {\ + (end) = k->u.mem.pstr;\ + break;\ + }\ + }\ + k++;\ + }\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_START;\ + stk->u.null_check.num = (cnum);\ + stk->u.null_check.pstr = (s);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_END;\ + stk->u.null_check.num = (cnum);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALL_FRAME(pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALL_FRAME;\ + stk->u.call_frame.ret_addr = (pat);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_RETURN do {\ + STACK_ENSURE(1);\ + stk->type = STK_RETURN;\ + STACK_INC;\ +} while(0) + + +#ifdef ONIG_DEBUG +#define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } +#else +#define STACK_BASE_CHECK(p, at) +#endif + +#define STACK_POP_ONE do {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ +} while(0) + +#define STACK_POP do {\ + switch (pop_level) {\ + case STACK_POP_LEVEL_FREE:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + case STACK_POP_LEVEL_MEM_START:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + default:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + }\ +} while(0) + +#define STACK_POP_TIL_POS_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ + if (stk->type == STK_POS_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ + if (stk->type == STK_LOOK_BEHIND_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POS_END(k) do {\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_POS) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_STOP_BT_END do {\ + OnigStackType *k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_STOP_BT) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK(isnull,id,s) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + else level--;\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + level++;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + else {\ + level--;\ + }\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + if (k->u.null_check.num == (id)) level++;\ + }\ + }\ +} while(0) + +#define STACK_GET_REPEAT(id, k) do {\ + int level = 0;\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ + if (k->type == STK_REPEAT) {\ + if (level == 0) {\ + if (k->u.repeat.num == (id)) {\ + break;\ + }\ + }\ + }\ + else if (k->type == STK_CALL_FRAME) level--;\ + else if (k->type == STK_RETURN) level++;\ + }\ +} while(0) + +#define STACK_RETURN(addr) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ + if (k->type == STK_CALL_FRAME) {\ + if (level == 0) {\ + (addr) = k->u.call_frame.ret_addr;\ + break;\ + }\ + else level--;\ + }\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while(0) + + +#define STRING_CMP(s1,s2,len) do {\ + while (len-- > 0) {\ + if (*s1++ != *s2++) goto fail;\ + }\ +} while(0) + +#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ + goto fail; \ +} while(0) + +static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, + UChar* s1, UChar** ps2, int mblen, const UChar* text_end) +{ + UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p1, *p2, *end1, *s2; + int len1, len2; + + s2 = *ps2; + end1 = s1 + mblen; + while (s1 < end1) { + len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); + len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); + if (len1 != len2) return 0; + p1 = buf1; + p2 = buf2; + while (len1-- > 0) { + if (*p1 != *p2) return 0; + p1++; + p2++; + } + } + + *ps2 = s2; + return 1; +} + +#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ + is_fail = 0;\ + while (len-- > 0) {\ + if (*s1++ != *s2++) {\ + is_fail = 1; break;\ + }\ + }\ +} while(0) + +#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ + is_fail = 1; \ + else \ + is_fail = 0; \ +} while(0) + + +#define IS_EMPTY_STR (str == end) +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define DATA_ENSURE_CHECK1 (s < right_range) +#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) +#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +#else +#define DATA_ENSURE_CHECK1 (s < end) +#define DATA_ENSURE_CHECK(n) (s + (n) <= end) +#define DATA_ENSURE(n) if (s + (n) > end) goto fail +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + +#ifdef USE_CAPTURE_HISTORY +static int +make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, + OnigStackType* stk_top, UChar* str, regex_t* reg) +{ + int n, r; + OnigCaptureTreeNode* child; + OnigStackType* k = *kp; + + while (k < stk_top) { + if (k->type == STK_MEM_START) { + n = k->u.mem.num; + if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_MEMERR(child); + child->group = n; + child->beg = (int )(k->u.mem.pstr - str); + r = history_tree_add_child(node, child); + if (r != 0) return r; + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = (int )(k->u.mem.pstr - str); + } + } + else if (k->type == STK_MEM_END) { + if (k->u.mem.num == node->group) { + node->end = (int )(k->u.mem.pstr - str); + *kp = k; + return 0; + } + } + k++; + } + + return 1; /* 1: root node ending. */ +} +#endif + +#ifdef USE_BACKREF_WITH_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg + , OnigStackType* top, OnigStackType* stk_base + , int ignore_case, int case_fold_flag + , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + OnigStackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, case_fold_flag, + pstart, &ss, (int )(pend - pstart), send) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_WITH_LEVEL */ + + +#ifdef ONIG_DEBUG_STATISTICS + +#define USE_TIMEOFDAY + +#ifdef USE_TIMEOFDAY +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +static struct timeval ts, te; +#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) +#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ + (((te).tv_sec - (ts).tv_sec)*1000000)) +#else +#ifdef HAVE_SYS_TIMES_H +#include +#endif +static struct tms ts, te; +#define GETTIME(t) times(&(t)) +#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) +#endif + +static int OpCounter[256]; +static int OpPrevCounter[256]; +static unsigned long OpTime[256]; +static int OpCurr = OP_FINISH; +static int OpPrevTarget = OP_FAIL; +static int MaxStackDepth = 0; + +#define MOP_IN(opcode) do {\ + if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ + OpCurr = opcode;\ + OpCounter[opcode]++;\ + GETTIME(ts);\ +} while(0) + +#define MOP_OUT do {\ + GETTIME(te);\ + OpTime[OpCurr] += TIMEDIFF(te, ts);\ +} while(0) + +extern void +onig_statistics_init(void) +{ + int i; + for (i = 0; i < 256; i++) { + OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; + } + MaxStackDepth = 0; +} + +extern void +onig_print_statistics(FILE* f) +{ + int i; + fprintf(f, " count prev time\n"); + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + } + fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +} + +#define STACK_INC do {\ + stk++;\ + if (stk - stk_base > MaxStackDepth) \ + MaxStackDepth = stk - stk_base;\ +} while(0) + +#else +#define STACK_INC stk++ + +#define MOP_IN(opcode) +#define MOP_OUT +#endif + + +/* matching region of POSIX API */ +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} posix_regmatch_t; + +/* match data(str - end) from position (sstart). */ +/* if sstart == str then set sprev to NULL. */ +static long +match_at(regex_t* reg, const UChar* str, const UChar* end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar* right_range, +#endif + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) +{ + static const UChar FinishCode[] = { OP_FINISH }; + + int i, n, num_mem, best_len, pop_level; + LengthType tlen, tlen2; + MemNumType mem; + RelAddrType addr; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; + OnigCaseFoldType case_fold_flag = reg->case_fold_flag; + UChar *s, *q, *sbegin; + UChar *p = reg->p; + char *alloca_base; + OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + OnigStackType *stkp; /* used as any purpose. */ + OnigStackIndex si; + OnigStackIndex *repeat_stk; + OnigStackIndex *mem_start_stk, *mem_end_stk; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int scv; + unsigned char* state_check_buff = msa->state_check_buff; + int num_comb_exp_check = reg->num_comb_exp_check; +#endif + n = reg->num_repeat + reg->num_mem * 2; + + STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); + pop_level = reg->stack_pop_level; + num_mem = reg->num_mem; + repeat_stk = (OnigStackIndex* )alloca_base; + + mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); + mem_end_stk = mem_start_stk + num_mem; + mem_start_stk--; /* for index start from 1, + mem_start_stk[1]..mem_start_stk[num_mem] */ + mem_end_stk--; /* for index start from 1, + mem_end_stk[1]..mem_end_stk[num_mem] */ + for (i = 1; i <= num_mem; i++) { + mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; + } + +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", + (int )str, (int )end, (int )sstart, (int )sprev); + fprintf(stderr, "size: %d, start offset: %d\n", + (int )(end - str), (int )(sstart - str)); +#endif + + STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */ + best_len = ONIG_MISMATCH; + s = (UChar* )sstart; + while (1) { +#ifdef ONIG_DEBUG_MATCH + if (s) { + UChar *q, *bp, buf[50]; + int len; + fprintf(stderr, "%4d> \"", (int )(s - str)); + bp = buf; + for (i = 0, q = s; i < 7 && q < end; i++) { + len = enclen(encode, q, end); + while (len-- > 0) *bp++ = *q++; + } + if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } + else { xmemcpy(bp, "\"", 1); bp += 1; } + *bp = 0; + fputs((char* )buf, stderr); + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); + onig_print_compiled_byte_code(stderr, p, NULL, encode); + fprintf(stderr, "\n"); + } +#endif + + sbegin = s; + switch (*p++) { + case OP_END: MOP_IN(OP_END); + n = s - sstart; + if (n > best_len) { + OnigRegion* region; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } +#endif + best_len = n; + region = msa->region; + if (region) { + region->beg[0] = sstart - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } + +#ifdef USE_CAPTURE_HISTORY + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_MEMERR(node); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = sstart - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } +#endif /* USE_CAPTURE_HISTORY */ + } /* if (region) */ + } /* n > best_len */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + end_best_len: +#endif + MOP_OUT; + + if (IS_FIND_CONDITION(option)) { + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + goto fail; /* for retry */ + } + } + + /* default behavior: return first-matching result. */ + goto finish; + break; + + case OP_EXACT1: MOP_IN(OP_EXACT1); + if (*p != *s++) goto fail; + DATA_ENSURE(0); + p++; + MOP_OUT; + break; + + case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); + { + int len; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { + goto fail; + } + p++; q++; + } + } + MOP_OUT; + break; + + case OP_EXACT2: MOP_IN(OP_EXACT2); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT3: MOP_IN(OP_EXACT3); + DATA_ENSURE(3); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT4: MOP_IN(OP_EXACT4); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT5: MOP_IN(OP_EXACT5); + DATA_ENSURE(5); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTN: MOP_IN(OP_EXACTN); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen); + while (tlen-- > 0) { + if (*p++ != *s++) goto fail; + } + sprev = s - 1; + MOP_OUT; + continue; + break; + + case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); + { + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } + } + + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + break; + + case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); + DATA_ENSURE(6); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 2); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 2; + MOP_OUT; + continue; + break; + + case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 3); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 3; + MOP_OUT; + continue; + break; + + case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); + GET_LENGTH_INC(tlen, p); /* mb-len */ + GET_LENGTH_INC(tlen2, p); /* string len */ + tlen2 *= tlen; + DATA_ENSURE(tlen2); + while (tlen2-- > 0) { + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - tlen; + MOP_OUT; + continue; + break; + + case OP_CCLASS: MOP_IN(OP_CCLASS); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ + MOP_OUT; + break; + + case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; + + cclass_mb: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len; + + DATA_ENSURE(1); + mb_len = enclen(encode, s, end); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (! onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + MOP_OUT; + break; + + case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { + p += SIZE_BITSET; + goto cclass_mb; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + break; + + case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s, end); + MOP_OUT; + break; + + case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; + } + + cclass_mb_not: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len = enclen(encode, s, end); + + if (! DATA_ENSURE_CHECK(mb_len)) { + DATA_ENSURE(1); + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } + + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + + cc_mb_not_success: + MOP_OUT; + break; + + case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { + p += SIZE_BITSET; + goto cclass_mb_not; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + break; + + case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enclen(encode, s, end); + ss = s; + s += mb_len; + DATA_ENSURE(0); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; + } + MOP_OUT; + break; + + case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); + DATA_ENSURE(1); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + s += n; + MOP_OUT; + break; + + case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); + DATA_ENSURE(1); + n = enclen(encode, s, end); + DATA_ENSURE(n); + s += n; + MOP_OUT; + break; + + case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + break; + + case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + break; + + case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + p++; + MOP_OUT; + break; + + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + p++; + MOP_OUT; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + break; + + case OP_STATE_CHECK_ANYCHAR_ML_STAR: + MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_WORD: MOP_IN(OP_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + break; + + case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + break; + + case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + continue; + break; + + case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + continue; + break; + +#ifdef USE_WORD_BEGIN_END + case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + MOP_OUT; + continue; + } + } + goto fail; + break; + + case OP_WORD_END: MOP_IN(OP_WORD_END); + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + MOP_OUT; + continue; + } + } + goto fail; + break; +#endif + + case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); + if (! ON_STR_BEGIN(s)) goto fail; + + MOP_OUT; + continue; + break; + + case OP_END_BUF: MOP_IN(OP_END_BUF); + if (! ON_STR_END(s)) goto fail; + + MOP_OUT; + continue; + break; + + case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); + if (ON_STR_BEGIN(s)) { + if (IS_NOTBOL(msa->options)) goto fail; + MOP_OUT; + continue; + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { + MOP_OUT; + continue; + } + goto fail; + break; + + case OP_END_LINE: MOP_IN(OP_END_LINE); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { + MOP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + MOP_OUT; + continue; + } +#endif + goto fail; + break; + + case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && + ON_STR_END(s + enclen(encode, s, end))) { + MOP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + UChar* ss = s + enclen(encode, s); + ss += enclen(encode, ss); + if (ON_STR_END(ss)) { + MOP_OUT; + continue; + } + } +#endif + goto fail; + break; + + case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); + if (s != msa->start) + goto fail; + + MOP_OUT; + continue; + break; + + case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_START(mem, s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); + GET_MEMNUM_INC(mem, p); + mem_start_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_END(mem, s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; + continue; + break; + +#ifdef USE_SUBEXP_CALL + case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); + GET_MEMNUM_INC(mem, p); + STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ + STACK_PUSH_MEM_END(mem, s); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + STACK_GET_MEM_START(mem, stkp); + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + else + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); + + STACK_PUSH_MEM_END_MARK(mem); + MOP_OUT; + continue; + break; +#endif + + case OP_BACKREF1: MOP_IN(OP_BACKREF1); + mem = 1; + goto backref; + break; + + case OP_BACKREF2: MOP_IN(OP_BACKREF2); + mem = 2; + goto backref; + break; + + case OP_BACKREFN: MOP_IN(OP_BACKREFN); + GET_MEMNUM_INC(mem, p); + backref: + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + continue; + } + break; + + case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); + GET_MEMNUM_INC(mem, p); + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; + } + break; + +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_WITH_LEVEL: + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic + , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + MOP_OUT; + continue; + } + + break; +#endif + + case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_PUSH_NULL_CHECK_START(mem, s); + MOP_OUT; + continue; + break; + + case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } + } + MOP_OUT; + continue; + break; + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + } + MOP_OUT; + continue; + break; +#endif + +#ifdef USE_SUBEXP_CALL + case OP_NULL_CHECK_END_MEMST_PUSH: + MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +#else + STACK_NULL_CHECK_REC(isnull, mem, s); +#endif + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } + } + MOP_OUT; + continue; + break; +#endif + + case OP_JUMP: MOP_IN(OP_JUMP); + GET_RELADDR_INC(addr, p); + p += addr; + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_PUSH: MOP_IN(OP_PUSH); + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + MOP_OUT; + continue; + break; + + case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + GET_STATE_CHECK_NUM_INC(mem, p); + GET_RELADDR_INC(addr, p); + STATE_CHECK_VAL(scv, mem); + if (scv) { + p += addr; + } + else { + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + } + MOP_OUT; + continue; + break; + + case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_STATE_CHECK(s, mem); + MOP_OUT; + continue; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_POP: MOP_IN(OP_POP); + STACK_POP_ONE; + MOP_OUT; + continue; + break; + + case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + GET_RELADDR_INC(addr, p); + if (*p == *s && DATA_ENSURE_CHECK1) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + } + p += (addr + 1); + MOP_OUT; + continue; + break; + + case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); + GET_RELADDR_INC(addr, p); + if (*p == *s) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + } + p++; + MOP_OUT; + continue; + break; + + case OP_REPEAT: MOP_IN(OP_REPEAT); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev); + } + } + MOP_OUT; + continue; + break; + + case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev); + p += addr; + } + } + MOP_OUT; + continue; + break; + + case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc: + stkp->u.repeat.count++; + if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { + /* end of repeat. Nothing to do. */ + } + else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + STACK_PUSH_ALT(p, s, sprev); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + } + else { + p = stkp->u.repeat.pcode; + } + STACK_PUSH_REPEAT_INC(si); + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc; + break; + + case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc_ng: + stkp->u.repeat.count++; + if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } + } + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); + } + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc_ng; + break; + + case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); + STACK_PUSH_POS(s, sprev); + MOP_OUT; + continue; + break; + + case OP_POP_POS: MOP_IN(OP_POP_POS); + { + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; + } + MOP_OUT; + continue; + break; + + case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); + GET_RELADDR_INC(addr, p); + STACK_PUSH_POS_NOT(p + addr, s, sprev); + MOP_OUT; + continue; + break; + + case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); + STACK_POP_TIL_POS_NOT; + goto fail; + break; + + case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); + STACK_PUSH_STOP_BT; + MOP_OUT; + continue; + break; + + case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); + STACK_STOP_BT_END; + MOP_OUT; + continue; + break; + + case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); + GET_LENGTH_INC(tlen, p); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); + if (IS_NULL(s)) goto fail; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); + MOP_OUT; + continue; + break; + + case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(tlen, p); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); + if (IS_NULL(q)) { + /* too short case -> success. ex. /(?p + addr; + MOP_OUT; + continue; + break; + + case OP_RETURN: MOP_IN(OP_RETURN); + STACK_RETURN(p); + STACK_PUSH_RETURN; + MOP_OUT; + continue; + break; +#endif + + case OP_FINISH: + goto finish; + break; + + fail: + MOP_OUT; + /* fall */ + case OP_FAIL: MOP_IN(OP_FAIL); + STACK_POP; + p = stk->u.state.pcode; + s = stk->u.state.pstr; + sprev = stk->u.state.pstr_prev; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (stk->u.state.state_check != 0) { + stk->type = STK_STATE_CHECK_MARK; + stk++; + } +#endif + + MOP_OUT; + continue; + break; + + default: + goto bytecode_error; + + } /* end of switch */ + sprev = sbegin; + } /* end of while(1) */ + + finish: + STACK_SAVE; + return best_len; + +#ifdef ONIG_DEBUG + stack_error: + STACK_SAVE; + return ONIGERR_STACK_BUG; +#endif + + bytecode_error: + STACK_SAVE; + return ONIGERR_UNDEFINED_BYTECODE; + + unexpected_bytecode_error: + STACK_SAVE; + return ONIGERR_UNEXPECTED_BYTECODE; +} + + +static UChar* +slow_search(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *t, *p, *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + if (enc->max_enc_len == enc->min_enc_len) { + int n = enc->max_enc_len; + + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + if (target_end == t || memcmp(t, p, target_end - t) == 0) + return s; + } + s += n; + } + return (UChar*)NULL; + } + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + if (target_end == t || memcmp(t, p, target_end - t) == 0) + return s; + } + s += enclen(enc, s, text_end); + } + + return (UChar* )NULL; +} + +static int +str_lower_case_match(OnigEncoding enc, int case_fold_flag, + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) +{ + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + while (t < tend) { + lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) return 0; + lowlen--; + } + } + + return 1; +} + +static UChar* +slow_search_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) + return s; + + s += enclen(enc, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *t, *p, *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); + + while (s >= text) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); + + while (s >= text) { + if (str_lower_case_match(enc, case_fold_flag, + target, target_end, s, text_end)) + return s; + + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *se, *t, *p, *end; + const UChar *tail; + int skip, tlen1; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", + (int )text, (int )text_end, (int )text_range); +#endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + + s = text; + + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; + } + skip = reg->map[*se]; + t = s; + do { + s += enclen(reg->enc, s, end); + } while ((s - t) < skip && s < end); + } + } + else { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; + } + skip = reg->int_map[*se]; + t = s; + do { + s += enclen(reg->enc, s, end); + } while ((s - t) < skip && s < end); + } + } + + return (UChar* )NULL; +} + +static UChar* +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + + end = text_range + (target_end - target) - 1; + if (end > text_end) + end = text_end; + + tail = target_end - 1; + s = text + (target_end - target) - 1; + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + s += reg->map[*s]; + } + } + else { /* see int_map[] */ + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + s += reg->int_map[*s]; + } + } + return (UChar* )NULL; +} + +static int +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + int** skip) +{ + int i, len; + + if (IS_NULL(*skip)) { + *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*skip)) return ONIGERR_MEMORY; + } + + len = end - s; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + (*skip)[i] = len; + + for (i = len - 1; i > 0; i--) + (*skip)[s[i]] = i; + + return 0; +} + +static UChar* +bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + const UChar *s, *t, *p; + + s = text_end - (target_end - target); + if (text_start < s) + s = text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); + + while (s >= text) { + p = s; + t = target; + while (t < target_end && *p == *t) { + p++; t++; + } + if (t == target_end) + return (UChar* )s; + + s -= reg->int_map_backward[*s]; + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range, const UChar* text_end) +{ + const UChar *s = text; + + while (s < text_range) { + if (map[*s]) return (UChar* )s; + + s += enclen(enc, s, text_end); + } + return (UChar* )NULL; +} + +static UChar* +map_search_backward(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* adjust_text, + const UChar* text_start, const UChar* text_end) +{ + const UChar *s = text_start; + + while (s >= text) { + if (map[*s]) return (UChar* )s; + + s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + return (UChar* )NULL; +} + +extern long +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, + OnigOptionType option) +{ + long r; + UChar *prev; + OnigMatchArg msa; + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + + MATCH_ARG_INIT(msa, option, region, at); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + if (region + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + } + else + r = 0; + + if (r == 0) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); + r = match_at(reg, str, end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + end, +#endif + at, prev, &msa); + } + + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + return r; +} + +static int +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + UChar* range, UChar** low, UChar** high, UChar** low_prev) +{ + UChar *p, *pprev = (UChar* )NULL; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", + (int )str, (int )end, (int )s, (int )range); +#endif + + p = s; + if (reg->dmin > 0) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { + p += reg->dmin; + } + else { + UChar *q = p + reg->dmin; + while (p < q) p += enclen(reg->enc, p, end); + } + } + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); + break; + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search(reg->enc, reg->map, p, range, end); + break; + } + + if (p && p < range) { + if (p - reg->dmin < s) { + retry_gate: + pprev = p; + p += enclen(reg->enc, p, end); + goto retry; + } + + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) + goto retry_gate; + break; + } + } + + if (reg->dmax == 0) { + *low = p; + if (low_prev) { + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); + } + } + else { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, end, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low, end); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low, end); + } + } + } + /* no needs to adjust *high, *high is used as range check only */ + *high = p - reg->dmin; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); +#endif + return 1; /* success */ + } + + return 0; /* fail */ +} + +#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 + +static long +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) +{ + int r; + UChar *p; + + range += reg->dmin; + p = s; + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + exact_method: + p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + if (IS_NULL(reg->int_map_backward)) { + if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) + goto exact_method; + + r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, + &(reg->int_map_backward)); + if (r) return r; + } + p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, + end, p); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); + break; + } + + if (p) { + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p, end); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; + } + } + + /* no needs to adjust *high, *high is used as range check only */ + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + *high = p - reg->dmin; + *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: low: %d, high: %d\n", + (int )(*low - str), (int )(*high - str)); +#endif + return 1; /* success */ + } + + fail: +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: fail.\n"); +#endif + return 0; /* fail */ +} + + +extern long +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) +{ + int r; + UChar *s, *prev; + OnigMatchArg msa; + const UChar *orig_start = start; +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar *orig_range = range; +#endif + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", + (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + if (region + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + if (r) goto finish_no_msa; + } + + if (start > end || start < str) goto mismatch_no_msa; + + +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#else +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + + /* anchor optimize: resume search range */ + if (reg->anchor != 0 && str < end) { + UChar *min_semi_end, *max_semi_end; + + if (reg->anchor & ANCHOR_BEGIN_POSITION) { + /* search start-position only */ + begin_position: + if (range > start) + range = start + 1; + else + range = start; + } + else if (reg->anchor & ANCHOR_BEGIN_BUF) { + /* search str-position only */ + if (range > start) { + if (start != str) goto mismatch_no_msa; + range = str + 1; + } + else { + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_END_BUF) { + min_semi_end = max_semi_end = (UChar* )end; + + end_buf: + if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; + + if (range > start) { + if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); + else { /* match with empty at end */ + start = onigenc_get_prev_char_head(reg->enc, str, end, end); + } + } + if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start >= range) goto mismatch_no_msa; + } + else { + if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); + } + if (range > start) goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); + + max_semi_end = (UChar* )end; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } + } + else { + min_semi_end = (UChar* )end; + goto end_buf; + } + } + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + goto begin_position; + } + } + else if (str == end) { /* empty string */ + static const UChar address_for_empty_string[] = ""; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search: empty string.\n"); +#endif + + if (reg->threshold_len == 0) { + start = end = str = address_for_empty_string; + s = (UChar* )start; + prev = (UChar* )NULL; + + MATCH_ARG_INIT(msa, option, region, start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ +#endif + MATCH_AND_RETURN_CHECK(end); + goto mismatch; + } + goto mismatch_no_msa; + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", + (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + MATCH_ARG_INIT(msa, option, region, orig_start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + s = (UChar* )start; + if (range > start) { /* forward search */ + if (s > str) + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + else + prev = (UChar* )NULL; + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *sch_range, *low, *high, *low_prev; + + sch_range = (UChar* )range; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } + } + + if ((end - start) < reg->threshold_len) + goto mismatch; + + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } + } while (s < range); + goto mismatch; + } + else { /* check only. */ + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; + + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } while (s < range); + goto mismatch; + } + } + } + + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + MATCH_AND_RETURN_CHECK(orig_range); + } + } + else { /* backward search */ +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + if (orig_start < end) + orig_start += enclen(reg->enc, orig_start, end); /* is upper range */ +#endif + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *low, *high, *adjrange, *sch_start; + + if (range < end) + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); + else + adjrange = (UChar* )end; + + if (reg->dmax != ONIG_INFINITE_DISTANCE && + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } + } while (s >= range); + goto mismatch; + } + else { /* check only. */ + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start, end); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; + } + } + + do { + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } while (s >= range); + } + + mismatch: +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(reg->options)) { + if (msa.best_len >= 0) { + s = msa.best_s; + goto match; + } + } +#endif + r = ONIG_MISMATCH; + + finish: + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + + /* If result is mismatch and no FIND_NOT_EMPTY option, + then the region is not setted in match_at(). */ + if (IS_FIND_NOT_EMPTY(reg->options) && region + ) { + onig_region_clear(region); + } + +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + mismatch_no_msa: + r = ONIG_MISMATCH; + finish_no_msa: + ONIG_STATE_DEC_THREAD(reg); +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + match: + ONIG_STATE_DEC_THREAD(reg); + MATCH_ARG_FREE(msa); + return s - str; +} + +extern OnigEncoding +onig_get_encoding(regex_t* reg) +{ + return reg->enc; +} + +extern OnigOptionType +onig_get_options(regex_t* reg) +{ + return reg->options; +} + +extern OnigCaseFoldType +onig_get_case_fold_flag(regex_t* reg) +{ + return reg->case_fold_flag; +} + +extern const OnigSyntaxType* +onig_get_syntax(regex_t* reg) +{ + return reg->syntax; +} + +extern int +onig_number_of_captures(regex_t* reg) +{ + return reg->num_mem; +} + +extern int +onig_number_of_capture_histories(regex_t* reg) +{ +#ifdef USE_CAPTURE_HISTORY + int i, n; + + n = 0; + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) + n++; + } + return n; +#else + return 0; +#endif +} + +extern void +onig_copy_encoding(OnigEncoding to, OnigEncoding from) +{ + *to = *from; +} +#endif //INCLUDE_REGEXP diff --git a/src/regint.h b/src/regint.h new file mode 100644 index 0000000000..e86a95f27a --- /dev/null +++ b/src/regint.h @@ -0,0 +1,833 @@ +#ifndef ONIGURUMA_REGINT_H +#define ONIGURUMA_REGINT_H +/********************************************************************** + regint.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* for debug */ +/* #define ONIG_DEBUG_PARSE_TREE */ +/* #define ONIG_DEBUG_COMPILE */ +/* #define ONIG_DEBUG_SEARCH */ +/* #define ONIG_DEBUG_MATCH */ +/* #define ONIG_DONT_OPTIMIZE */ + +/* for byte-code statistical data. */ +/* #define ONIG_DEBUG_STATISTICS */ + +#ifndef RUBY +#define RUBY +#endif + +#include //typedef unsigned int ptrdiff_t; + +#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ + defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ + defined(ONIG_DEBUG_STATISTICS) +#ifndef ONIG_DEBUG +#define ONIG_DEBUG +#endif +#endif + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + (defined(__ppc__) && defined(__APPLE__)) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \ + defined(__mc68020__) +#define PLATFORM_UNALIGNED_WORD_ACCESS +#endif + +/* config */ +/* spec. config */ +#define USE_NAMED_GROUP +#define USE_SUBEXP_CALL +#define USE_BACKREF_WITH_LEVEL /* \k, \k */ +#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ +#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ +#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* #define USE_RECOMPILE_API */ +/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ + +/* internal config */ +#define USE_PARSE_TREE_NODE_RECYCLE +#define USE_OP_PUSH_OR_JUMP_EXACT +#define USE_QTFR_PEEK_NEXT +#define USE_ST_LIBRARY +#define USE_SHARED_CCLASS_TABLE + +#define INIT_MATCH_STACK_SIZE 160 +#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ + +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif + +/* */ +/* escape other system UChar definition */ +#ifndef RUBY_DEFINES_H +#include "mruby.h" +#endif +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#undef USE_CAPTURE_HISTORY +#define USE_VARIABLE_META_CHARS +#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ +#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ +/* #define USE_MULTI_THREAD_SYSTEM */ +#define THREAD_SYSTEM_INIT /* depend on thread system */ +#define THREAD_SYSTEM_END /* depend on thread system */ +#define THREAD_ATOMIC_START /* depend on thread system */ +#define THREAD_ATOMIC_END /* depend on thread system */ +#define THREAD_PASS /* depend on thread system */ + +#ifdef RUBY + +//#define CHECK_INTERRUPT_IN_MATCH_AT mrb_thread_check_ints() +#define CHECK_INTERRUPT_IN_MATCH_AT +#define onig_st_init_table st_init_table +#define onig_st_init_table_with_size st_init_table_with_size +#define onig_st_init_numtable st_init_numtable +#define onig_st_init_numtable_with_size st_init_numtable_with_size +#define onig_st_init_strtable st_init_strtable +#define onig_st_init_strtable_with_size st_init_strtable_with_size +#define onig_st_delete st_delete +#define onig_st_delete_safe st_delete_safe +#define onig_st_insert st_insert +#define onig_st_lookup st_lookup +#define onig_st_foreach st_foreach +#define onig_st_add_direct st_add_direct +#define onig_st_free_table st_free_table +#define onig_st_cleanup_safe st_cleanup_safe +#define onig_st_copy st_copy +#define onig_st_nothing_key_clone st_nothing_key_clone +#define onig_st_nothing_key_free st_nothing_key_free +#define onig_st_is_member st_is_member + +#define USE_UPPER_CASE_TABLE +#else + +#define st_init_table onig_st_init_table +#define st_init_table_with_size onig_st_init_table_with_size +#define st_init_numtable onig_st_init_numtable +#define st_init_numtable_with_size onig_st_init_numtable_with_size +#define st_init_strtable onig_st_init_strtable +#define st_init_strtable_with_size onig_st_init_strtable_with_size +#define st_delete onig_st_delete +#define st_delete_safe onig_st_delete_safe +#define st_insert onig_st_insert +#define st_lookup onig_st_lookup +#define st_foreach onig_st_foreach +#define st_add_direct onig_st_add_direct +#define st_free_table onig_st_free_table +#define st_cleanup_safe onig_st_cleanup_safe +#define st_copy onig_st_copy +#define st_nothing_key_clone onig_st_nothing_key_clone +#define st_nothing_key_free onig_st_nothing_key_free +/* */ +#define onig_st_is_member st_is_member + +#define CHECK_INTERRUPT_IN_MATCH_AT + +#endif + +#define STATE_CHECK_STRING_THRESHOLD_LEN 7 +#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 + +#define THREAD_PASS_LIMIT_COUNT 8 +#define xmemset memset +#define xmemcpy memcpy +#define xmemmove memmove + +#if defined(_WIN32) && !defined(__GNUC__) +#define xalloca _alloca +#define xvsnprintf _vsnprintf +#else +#define xalloca malloc +#define xvsnprintf vsnprintf +#endif + + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- + +#define ONIG_STATE_INC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state++;\ + THREAD_ATOMIC_END;\ +} while(0) +#define ONIG_STATE_DEC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state--;\ + THREAD_ATOMIC_END;\ +} while(0) +#else +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ +#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + +#ifdef HAVE_STDLIB_H +#include +#endif + +#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__)) +#include +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifdef ONIG_DEBUG +# include +#endif + +#include "regenc.h" + +#define MIN(a,b) (((a)>(b))?(b):(a)) +#define MAX(a,b) (((a)<(b))?(b):(a)) + +#define IS_NULL(p) (((void*)(p)) == (void*)0) +#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL +#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY +#define NULL_UCHARP ((UChar* )0) + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + +#define PLATFORM_GET_INC(val,p,type) do{\ + val = *(type* )p;\ + (p) += sizeof(type);\ +} while(0) + +#else + +#define PLATFORM_GET_INC(val,p,type) do{\ + xmemcpy(&val, (p), sizeof(type));\ + (p) += sizeof(type);\ +} while(0) + +/* sizeof(OnigCodePoint) */ +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG + +#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ + (pad_size) = WORD_ALIGNMENT_SIZE \ + - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ + if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ +} while (0) + +#define ALIGNMENT_RIGHT(addr) do {\ + (addr) += (WORD_ALIGNMENT_SIZE - 1);\ + (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ +} while (0) + +#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +/* stack pop level */ +#define STACK_POP_LEVEL_FREE 0 +#define STACK_POP_LEVEL_MEM_START 1 +#define STACK_POP_LEVEL_ALL 2 + +/* optimize flags */ +#define ONIG_OPTIMIZE_NONE 0 +#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ +#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ +#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ +#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ +#define ONIG_OPTIMIZE_MAP 5 /* char map */ + +/* bit status */ +typedef unsigned int BitStatusType; + +#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) +#define BIT_STATUS_CLEAR(stats) (stats) = 0 +#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) +#define BIT_STATUS_AT(stats,n) \ + ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) + +#define BIT_STATUS_ON_AT(stats,n) do {\ + if ((n) < (int )BIT_STATUS_BITS_NUM) \ + (stats) |= (1 << (n));\ + else\ + (stats) |= 1;\ +} while (0) + +#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ + if ((n) < (int )BIT_STATUS_BITS_NUM)\ + (stats) |= (1 << (n));\ +} while (0) + + +#define INT_MAX_LIMIT ((1UL << (sizeof(int) * 8 - 1)) - 1) + +#define DIGITVAL(code) ((code) - '0') +#define ODIGITVAL(code) DIGITVAL(code) +#define XDIGITVAL(enc,code) \ + (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ + : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) + +#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) +#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) +#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) +#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) +#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) +#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) +#define IS_FIND_CONDITION(option) ((option) & \ + (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) +#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) +#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) +#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) + +/* OP_SET_OPTION is required for these options. +#define IS_DYNAMIC_OPTION(option) \ + (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) +*/ +/* ignore-case and multibyte status are included in compiled code. */ +#define IS_DYNAMIC_OPTION(option) 0 + +#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ + ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) + +#define REPEAT_INFINITE -1 +#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) + +/* bitset */ +#define BITS_PER_BYTE 8 +#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) +#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) +#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS +typedef unsigned int Bits; +#else +typedef unsigned char Bits; +#endif +typedef Bits BitSet[BITSET_SIZE]; +typedef Bits* BitSetRef; + +#define SIZE_BITSET (int)sizeof(BitSet) + +#define BITSET_CLEAR(bs) do {\ + int i;\ + for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ +} while (0) + +#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] +#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) + +#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) +#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) +#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) +#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) + +/* bytes buffer */ +typedef struct _BBuf { + UChar* p; + unsigned int used; + unsigned int alloc; +} BBuf; + +#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) + +#define BBUF_SIZE_INC(buf,inc) do{\ + (buf)->alloc += (inc);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_EXPAND(buf,low) do{\ + do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_ENSURE_SIZE(buf,size) do{\ + unsigned int new_alloc = (buf)->alloc;\ + while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ + if ((buf)->alloc != new_alloc) {\ + (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ + (buf)->alloc = new_alloc;\ + }\ +} while (0) + +#define BBUF_WRITE(buf,pos,bytes,n) do{\ + int used = (pos) + (n);\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_WRITE1(buf,pos,byte) do{\ + int used = (pos) + 1;\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + (buf)->p[(pos)] = (byte);\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) +#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) +#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) +#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) + +/* from < to */ +#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ + if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ + if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ + (buf)->used -= (from - to);\ +} while (0) + +#define BBUF_INSERT(buf,pos,bytes,n) do {\ + if (pos >= (buf)->used) {\ + BBUF_WRITE(buf,pos,bytes,n);\ + }\ + else {\ + BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + }\ +} while (0) + +#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] + + +#define ANCHOR_BEGIN_BUF (1<<0) +#define ANCHOR_BEGIN_LINE (1<<1) +#define ANCHOR_BEGIN_POSITION (1<<2) +#define ANCHOR_END_BUF (1<<3) +#define ANCHOR_SEMI_END_BUF (1<<4) +#define ANCHOR_END_LINE (1<<5) + +#define ANCHOR_WORD_BOUND (1<<6) +#define ANCHOR_NOT_WORD_BOUND (1<<7) +#define ANCHOR_WORD_BEGIN (1<<8) +#define ANCHOR_WORD_END (1<<9) +#define ANCHOR_PREC_READ (1<<10) +#define ANCHOR_PREC_READ_NOT (1<<11) +#define ANCHOR_LOOK_BEHIND (1<<12) +#define ANCHOR_LOOK_BEHIND_NOT (1<<13) + +#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ +#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ + +/* operation code */ +enum OpCode { + OP_FINISH = 0, /* matching process terminator (no more alternative) */ + OP_END = 1, /* pattern code terminator (success end) */ + + OP_EXACT1 = 2, /* single byte, N = 1 */ + OP_EXACT2, /* single byte, N = 2 */ + OP_EXACT3, /* single byte, N = 3 */ + OP_EXACT4, /* single byte, N = 4 */ + OP_EXACT5, /* single byte, N = 5 */ + OP_EXACTN, /* single byte */ + OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ + OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ + OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ + OP_EXACTMB2N, /* mb-length = 2 */ + OP_EXACTMB3N, /* mb-length = 3 */ + OP_EXACTMBN, /* other length */ + + OP_EXACT1_IC, /* single byte, N = 1, ignore case */ + OP_EXACTN_IC, /* single byte, ignore case */ + + OP_CCLASS, + OP_CCLASS_MB, + OP_CCLASS_MIX, + OP_CCLASS_NOT, + OP_CCLASS_MB_NOT, + OP_CCLASS_MIX_NOT, + OP_CCLASS_NODE, /* pointer to CClassNode node */ + + OP_ANYCHAR, /* "." */ + OP_ANYCHAR_ML, /* "." multi-line */ + OP_ANYCHAR_STAR, /* ".*" */ + OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ + OP_ANYCHAR_STAR_PEEK_NEXT, + OP_ANYCHAR_ML_STAR_PEEK_NEXT, + + OP_WORD, + OP_NOT_WORD, + OP_WORD_BOUND, + OP_NOT_WORD_BOUND, + OP_WORD_BEGIN, + OP_WORD_END, + + OP_BEGIN_BUF, + OP_END_BUF, + OP_BEGIN_LINE, + OP_END_LINE, + OP_SEMI_END_BUF, + OP_BEGIN_POSITION, + + OP_BACKREF1, + OP_BACKREF2, + OP_BACKREFN, + OP_BACKREFN_IC, + OP_BACKREF_MULTI, + OP_BACKREF_MULTI_IC, + OP_BACKREF_WITH_LEVEL, /* \k, \k */ + + OP_MEMORY_START, + OP_MEMORY_START_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ + OP_MEMORY_END, + OP_MEMORY_END_REC, /* push marker to stack */ + + OP_FAIL, /* pop stack and move */ + OP_JUMP, + OP_PUSH, + OP_POP, + OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ + OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ + OP_REPEAT, /* {n,m} */ + OP_REPEAT_NG, /* {n,m}? (non greedy) */ + OP_REPEAT_INC, + OP_REPEAT_INC_NG, /* non greedy */ + OP_REPEAT_INC_SG, /* search and get in stack */ + OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ + OP_NULL_CHECK_START, /* null loop checker start */ + OP_NULL_CHECK_END, /* null loop checker end */ + OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ + OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ + + OP_PUSH_POS, /* (?=...) start */ + OP_POP_POS, /* (?=...) end */ + OP_PUSH_POS_NOT, /* (?!...) start */ + OP_FAIL_POS, /* (?!...) end */ + OP_PUSH_STOP_BT, /* (?>...) start */ + OP_POP_STOP_BT, /* (?>...) end */ + OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ + OP_PUSH_LOOK_BEHIND_NOT, /* (? */ + OP_RETURN, + + OP_STATE_CHECK_PUSH, /* combination explosion check and push */ + OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ + OP_STATE_CHECK, /* check only */ + OP_STATE_CHECK_ANYCHAR_STAR, + OP_STATE_CHECK_ANYCHAR_ML_STAR, + + /* no need: IS_DYNAMIC_OPTION() == 0 */ + OP_SET_OPTION_PUSH, /* set option and push recover option */ + OP_SET_OPTION /* set option */ +}; + +typedef int RelAddrType; +typedef int AbsAddrType; +typedef int LengthType; +typedef int RepeatNumType; +typedef short int MemNumType; +typedef short int StateCheckNumType; +typedef void* PointerType; + +#define SIZE_OPCODE 1 +#define SIZE_RELADDR (int)sizeof(RelAddrType) +#define SIZE_ABSADDR (int)sizeof(AbsAddrType) +#define SIZE_LENGTH (int)sizeof(LengthType) +#define SIZE_MEMNUM (int)sizeof(MemNumType) +#define SIZE_STATE_CHECK_NUM (int)sizeof(StateCheckNumType) +#define SIZE_REPEATNUM (int)sizeof(RepeatNumType) +#define SIZE_OPTION (int)sizeof(OnigOptionType) +#define SIZE_CODE_POINT (int)sizeof(OnigCodePoint) +#define SIZE_POINTER (int)sizeof(PointerType) + + +#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) +#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) +#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) +#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) +#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) +#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) +#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) +#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) + +/* code point's address must be aligned address. */ +#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) +#define GET_BYTE_INC(byte,p) do{\ + byte = *(p);\ + (p)++;\ +} while(0) + + +/* op-code + arg size */ +#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE +#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) +#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP SIZE_OPCODE +#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_POS SIZE_OPCODE +#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP_POS SIZE_OPCODE +#define SIZE_OP_FAIL_POS SIZE_OPCODE +#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_FAIL SIZE_OPCODE +#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE +#define SIZE_OP_POP_STOP_BT SIZE_OPCODE +#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) +#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) +#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE +#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) +#define SIZE_OP_RETURN SIZE_OPCODE + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#endif + +#define MC_ESC(syn) (syn)->meta_char_table.esc +#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar +#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime +#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time +#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time +#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime + +#define IS_MC_ESC_CODE(code, syn) \ + ((code) == MC_ESC(syn) && \ + !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) + + +#define SYN_POSIX_COMMON_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ + ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ + ONIG_SYN_OP_LINE_ANCHOR | \ + ONIG_SYN_OP_ESC_CONTROL_CHARS ) + +#define SYN_GNU_REGEX_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ + ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ + ONIG_SYN_OP_VBAR_ALT | \ + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ + ONIG_SYN_OP_QMARK_ZERO_ONE | \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ + ONIG_SYN_OP_ESC_W_WORD | \ + ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ + ONIG_SYN_OP_LINE_ANCHOR ) + +#define SYN_GNU_REGEX_BV \ + ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + + +#define NCCLASS_FLAGS(cc) ((cc)->flags) +#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) +#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) +#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) + +/* cclass node */ +#define FLAG_NCCLASS_NOT (1<<0) +#define FLAG_NCCLASS_SHARE (1<<1) + +#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) +#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) +#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) + +typedef struct { + int type; + /* struct _Node* next; */ + /* unsigned int flags; */ +} NodeBase; + +typedef struct { + NodeBase base; + unsigned int flags; + BitSet bs; + BBuf* mbuf; /* multi-byte info or NULL */ +} CClassNode; + +typedef intptr_t OnigStackIndex; + +typedef struct _OnigStackType { + unsigned int type; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + unsigned int state_check; +#endif + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + int num; /* repeat id */ + } repeat; + struct { + OnigStackIndex si; /* index of stack */ + } repeat_inc; + struct { + int num; /* memory num */ + UChar *pstr; /* start/end position */ + /* Following information is setted, if this stack type is MEM-START */ + OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ + OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + int num; /* null check id */ + UChar *pstr; /* start position */ + } null_check; +#ifdef USE_SUBEXP_CALL + struct { + UChar *ret_addr; /* byte code position */ + int num; /* null check id */ + UChar *pstr; /* string position */ + } call_frame; +#endif + } u; +} OnigStackType; + +typedef struct { + void* stack_p; + size_t stack_n; + OnigOptionType options; + OnigRegion* region; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +#ifdef USE_COMBINATION_EXPLOSION_CHECK + void* state_check_buff; + int state_check_buff_size; +#endif +} OnigMatchArg; + + +#define IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) + +#ifdef ONIG_DEBUG + +typedef struct { + short int opcode; + char* name; + short int arg_type; +} OnigOpInfoType; + +extern OnigOpInfoType OnigOpInfo[]; + +/* extern void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc); */ + +#ifdef ONIG_DEBUG_STATISTICS +extern void onig_statistics_init(void); +extern void onig_print_statistics(FILE* f); +#endif +#endif + +extern UChar* onig_error_code_to_format(int code); +extern void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...); +extern int onig_bbuf_init(BBuf* buf, int size); +extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline); +extern void onig_chain_reduce(regex_t* reg); +extern void onig_chain_link_add(regex_t* to, regex_t* add); +extern void onig_transfer(regex_t* to, regex_t* from); +extern int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc); +extern int onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc); + +/* strend hash */ +typedef void hash_table_type; +#ifdef RUBY +#include "st.h" + +typedef st_data_t hash_data_type; +#else +typedef unsigned long hash_data_type; +#endif + +extern hash_table_type* onig_st_init_strend_table_with_size(st_index_t size); +extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value); +extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value); + +/* encoding property management */ +#define PROPERTY_LIST_ADD_PROP(Name, CR) \ + r = onigenc_property_list_add_property((UChar* )Name, CR,\ + &PropertyNameTable, &PropertyList, &PropertyListNum,\ + &PropertyListSize);\ + if (r != 0) goto end + +#define PROPERTY_LIST_INIT_CHECK \ + if (PropertyInited == 0) {\ + int r = onigenc_property_list_init(init_property_list);\ + if (r != 0) return r;\ + } + +extern int onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize); + +typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); + +extern int onigenc_property_list_init(ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE); + +#endif /* ONIGURUMA_REGINT_H */ diff --git a/src/regparse.c b/src/regparse.c new file mode 100644 index 0000000000..c07d5c35e8 --- /dev/null +++ b/src/regparse.c @@ -0,0 +1,5600 @@ +/* -*- mode:c; c-file-style:"gnu" -*- */ +/********************************************************************** + regparse.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#include +#include "regparse.h" +#include +#ifdef INCLUDE_REGEXP + +#define WARN_BUFSIZE 256 + +#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + + +const OnigSyntaxType OnigSyntaxRuby = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_RUBY | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | + ONIG_SYN_OP2_ESC_H_XDIGIT ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | + ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_CC_DUP | + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; + +extern void onig_null_warn(const char* s ARG_UNUSED) { } + +#ifdef DEFAULT_WARN_FUNCTION +static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; +#else +static OnigWarnFunc onig_warn = onig_null_warn; +#endif + +#ifdef DEFAULT_VERB_WARN_FUNCTION +static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; +#else +static OnigWarnFunc onig_verb_warn = onig_null_warn; +#endif + +extern void onig_set_warn_func(OnigWarnFunc f) +{ + onig_warn = f; +} + +extern void onig_set_verb_warn_func(OnigWarnFunc f) +{ + onig_verb_warn = f; +} + +static void CC_DUP_WARN(ScanEnv *env); + +static void +bbuf_free(BBuf* bbuf) +{ + if (IS_NOT_NULL(bbuf)) { + if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); + xfree(bbuf); + } +} + +static int +bbuf_clone(BBuf** rto, BBuf* from) +{ + int r; + BBuf *to; + + *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_MEMERR(to); + r = BBUF_INIT(to, from->alloc); + if (r != 0) return r; + to->used = from->used; + xmemcpy(to->p, from->p, from->used); + return 0; +} + +#define BACKREF_REL_TO_ABS(rel_no, env) \ + ((env)->num_mem + 1 + (rel_no)) + +#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) + +#define MBCODE_START_POS(enc) \ + (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) + +#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ + add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) + +#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ + if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ + r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ + if (r) return r;\ + }\ +} while (0) + + +#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \ + if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \ + BS_ROOM(bs, pos) |= BS_BIT(pos); \ +} while (0) + +#define BITSET_IS_EMPTY(bs,empty) do {\ + int i;\ + empty = 1;\ + for (i = 0; i < (int )BITSET_SIZE; i++) {\ + if ((bs)[i] != 0) {\ + empty = 0; break;\ + }\ + }\ +} while (0) + +static void +bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to) +{ + int i; + for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { + BITSET_SET_BIT_CHKDUP(bs, i); + } +} + +static void +bitset_invert(BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } +} + +static void +bitset_invert_to(BitSetRef from, BitSetRef to) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } +} + +static void +bitset_and(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } +} + +static void +bitset_or(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } +} + +static void +bitset_copy(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } +} + +extern int +onig_strncmp(const UChar* s1, const UChar* s2, int n) +{ + int x; + + while (n-- > 0) { + x = *s2++ - *s1++; + if (x) return x; + } + return 0; +} + +extern void +onig_strcpy(UChar* dest, const UChar* src, const UChar* end) +{ + ptrdiff_t len = end - src; + if (len > 0) { + xmemcpy(dest, src, len); + dest[len] = (UChar )0; + } +} + +#ifdef USE_NAMED_GROUP +static UChar* +strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) +{ + ptrdiff_t slen; + int term_len, i; + UChar *r; + + slen = end - s; + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; +} +#endif + +/* scan pattern methods */ +#define PEND_VALUE 0 + +#define PFETCH_READY UChar* pfetch_prev +#define PEND (p < end ? 0 : 1) +#define PUNFETCH p = pfetch_prev +#define PINC do { \ + pfetch_prev = p; \ + p += enclen(enc, p, end); \ +} while (0) +#define PFETCH(c) do { \ + c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \ + pfetch_prev = p; \ + p += enclen(enc, p, end); \ +} while (0) + +#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) +#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) + +static UChar* +strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, + int capa) +{ + UChar* r; + + if (dest) + r = (UChar* )xrealloc(dest, capa + 1); + else + r = (UChar* )xmalloc(capa + 1); + + CHECK_NULL_RETURN(r); + onig_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + +/* dest on static area */ +static UChar* +strcat_capa_from_static(UChar* dest, UChar* dest_end, + const UChar* src, const UChar* src_end, int capa) +{ + UChar* r; + + r = (UChar* )xmalloc(capa + 1); + CHECK_NULL_RETURN(r); + onig_strcpy(r, dest, dest_end); + onig_strcpy(r + (dest_end - dest), src, src_end); + return r; +} +#endif //INCLUDE_REGEXP + +#ifdef INCLUDE_ENCODING +#ifdef USE_ST_LIBRARY + +//#include "st.h" + +typedef struct { + const UChar* s; + const UChar* end; +} st_str_end_key; + +static int +str_end_cmp(st_data_t xp, st_data_t yp) +{ + const st_str_end_key *x, *y; + const UChar *p, *q; + int c; + + x = (const st_str_end_key *)xp; + y = (const st_str_end_key *)yp; + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static st_index_t +str_end_hash(st_data_t xp) +{ + const st_str_end_key *x = (const st_str_end_key *)xp; + const UChar *p; + st_index_t val = 0; + + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + +extern hash_table_type* +onig_st_init_strend_table_with_size(st_index_t size) +{ + static const struct st_hash_type hashType = { + str_end_cmp, + str_end_hash, + }; + + return (hash_table_type* ) + onig_st_init_table_with_size(&hashType, size); +} + +extern int +onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type *value) +{ + st_str_end_key key; + + key.s = (UChar* )str_key; + key.end = (UChar* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +extern int +onig_st_insert_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type value) +{ + st_str_end_key* key; + int result; + + key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); + key->s = (UChar* )str_key; + key->end = (UChar* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +#endif /* USE_ST_LIBRARY */ +#endif //INCLUDE_ENCODING + +#ifdef INCLUDE_REGEXP +#ifdef USE_NAMED_GROUP + +#define INIT_NAME_BACKREFS_ALLOC_NUM 8 + +typedef struct { + UChar* name; + size_t name_len; /* byte length */ + int back_num; /* number of backrefs */ + int back_alloc; + int back_ref1; + int* back_refs; +} NameEntry; + +#ifdef USE_ST_LIBRARY + +typedef st_table NameTable; +typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ + +#define NAMEBUF_SIZE 24 +#define NAMEBUF_SIZE_1 25 + +#ifdef ONIG_DEBUG +static int +i_print_name_entry(UChar* key, NameEntry* e, void* arg) +{ + int i; + FILE* fp = (FILE* )arg; + + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) + fputs("-", fp); + else if (e->back_num == 1) + fprintf(fp, "%d", e->back_ref1); + else { + for (i = 0; i < e->back_num; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[i]); + } + } + fputs("\n", fp); + return ST_CONTINUE; +} + +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + fprintf(fp, "name table\n"); + onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); + fputs("\n", fp); + } + return 0; +} +#endif /* ONIG_DEBUG */ + +static int +i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) +{ + xfree(e->name); + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); + return ST_DELETE; +} + +static int +names_clear(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_name_entry, 0); + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) onig_st_free_table(t); + reg->name_table = (void* )NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, const UChar* name, const UChar* name_end) +{ + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + e = (NameEntry* )NULL; + if (IS_NOT_NULL(t)) { + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); + } + return e; +} + +typedef struct { + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; + int ret; + OnigEncoding enc; +} INamesArg; + +static int +i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) +{ + int r = (*(arg->func))(e->name, + e->name + e->name_len, + e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); + if (r != 0) { + arg->ret = r; + return ST_STOP; + } + return ST_CONTINUE; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + INamesArg narg; + NameTable* t = (NameTable* )reg->name_table; + + narg.ret = 0; + if (IS_NOT_NULL(t)) { + narg.func = func; + narg.reg = reg; + narg.arg = arg; + narg.enc = reg->enc; /* should be pattern encoding. */ + onig_st_foreach(t, i_names, (HashDataType )&narg); + } + return narg.ret; +} + +static int +i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) +{ + int i; + + if (e->back_num > 1) { + for (i = 0; i < e->back_num; i++) { + e->back_refs[i] = map[e->back_refs[i]].new_val; + } + } + else if (e->back_num == 1) { + e->back_ref1 = map[e->back_ref1].new_val; + } + + return ST_CONTINUE; +} + +extern int +onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_renumber_name, (HashDataType )map); + } + return 0; +} + + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num_entries; + else + return 0; +} + +#else /* USE_ST_LIBRARY */ + +#define INIT_NAMES_ALLOC_NUM 8 + +typedef struct { + NameEntry* e; + int num; + int alloc; +} NameTable; + +#ifdef ONIG_DEBUG +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + int i, j; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t) && t->num > 0) { + fprintf(fp, "name table\n"); + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) { + fputs("-", fp); + } + else if (e->back_num == 1) { + fprintf(fp, "%d", e->back_ref1); + } + else { + for (j = 0; j < e->back_num; j++) { + if (j > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[j]); + } + } + fputs("\n", fp); + } + fputs("\n", fp); + } + return 0; +} +#endif + +static int +names_clear(regex_t* reg) +{ + int i; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (IS_NOT_NULL(e->name)) { + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->back_num = 0; + e->back_alloc = 0; + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + e->back_refs = (int* )NULL; + } + } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } + t->num = 0; + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) xfree(t); + reg->name_table = NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, UChar* name, UChar* name_end) +{ + int i, len; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + len = name_end - name; + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) + return e; + } + } + return (NameEntry* )NULL; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + int i, r; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + r = (*func)(e->name, e->name + e->name_len, e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + reg, arg); + if (r != 0) return r; + } + } + return 0; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num; + else + return 0; +} + +#endif /* else USE_ST_LIBRARY */ + +static int +name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) +{ + int alloc; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (name_end - name <= 0) + return ONIGERR_EMPTY_GROUP_NAME; + + e = name_find(reg, name, name_end); + if (IS_NULL(e)) { +#ifdef USE_ST_LIBRARY + if (IS_NULL(t)) { + t = onig_st_init_strend_table_with_size(5); + reg->name_table = (void* )t; + } + e = (NameEntry* )xmalloc(sizeof(NameEntry)); + CHECK_NULL_RETURN_MEMERR(e); + + e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) { + xfree(e); + return ONIGERR_MEMORY; + } + onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); + + e->name_len = name_end - name; + e->back_num = 0; + e->back_alloc = 0; + e->back_refs = (int* )NULL; + +#else + + if (IS_NULL(t)) { + alloc = INIT_NAMES_ALLOC_NUM; + t = (NameTable* )xmalloc(sizeof(NameTable)); + CHECK_NULL_RETURN_MEMERR(t); + t->e = NULL; + t->alloc = 0; + t->num = 0; + + t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); + if (IS_NULL(t->e)) { + xfree(t); + return ONIGERR_MEMORY; + } + t->alloc = alloc; + reg->name_table = t; + goto clear; + } + else if (t->num == t->alloc) { + int i; + + alloc = t->alloc * 2; + t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(t->e); + t->alloc = alloc; + + clear: + for (i = t->num; i < t->alloc; i++) { + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].back_num = 0; + t->e[i].back_alloc = 0; + t->e[i].back_refs = (int* )NULL; + } + } + e = &(t->e[t->num]); + t->num++; + e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; + e->name_len = name_end - name; +#endif + } + + if (e->back_num >= 1 && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { + onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, + name, name_end); + return ONIGERR_MULTIPLEX_DEFINED_NAME; + } + + e->back_num++; + if (e->back_num == 1) { + e->back_ref1 = backref; + } + else { + if (e->back_num == 2) { + alloc = INIT_NAME_BACKREFS_ALLOC_NUM; + e->back_refs = (int* )xmalloc(sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(e->back_refs); + e->back_alloc = alloc; + e->back_refs[0] = e->back_ref1; + e->back_refs[1] = backref; + } + else { + if (e->back_num > e->back_alloc) { + alloc = e->back_alloc * 2; + e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(e->back_refs); + e->back_alloc = alloc; + } + e->back_refs[e->back_num - 1] = backref; + } + } + + return 0; +} + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + NameEntry* e = name_find(reg, name, name_end); + + if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; + + switch (e->back_num) { + case 0: + *nums = 0; + break; + case 1: + *nums = &(e->back_ref1); + break; + default: + *nums = e->back_refs; + break; + } + return e->back_num; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion *region) +{ + int i, n, *nums; + + n = onig_name_to_group_numbers(reg, name, name_end, &nums); + if (n < 0) + return n; + else if (n == 0) + return ONIGERR_PARSER_BUG; + else if (n == 1) + return nums[0]; + else { + if (IS_NOT_NULL(region)) { + for (i = n - 1; i >= 0; i--) { + if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) + return nums[i]; + } + } + return nums[n - 1]; + } +} + +#else /* USE_NAMED_GROUP */ + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion* region) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + return 0; +} +#endif /* else USE_NAMED_GROUP */ + +extern int +onig_noname_group_capture_is_active(regex_t* reg) +{ + if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + return 0; + +#ifdef USE_NAMED_GROUP + if (onig_number_of_names(reg) > 0 && + IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + return 0; + } +#endif + + return 1; +} + + +#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 + +static void +scan_env_clear(ScanEnv* env) +{ + int i; + + BIT_STATUS_CLEAR(env->capture_history); + BIT_STATUS_CLEAR(env->bt_mem_start); + BIT_STATUS_CLEAR(env->bt_mem_end); + BIT_STATUS_CLEAR(env->backrefed_mem); + env->error = (UChar* )NULL; + env->error_end = (UChar* )NULL; + env->num_call = 0; + env->num_mem = 0; +#ifdef USE_NAMED_GROUP + env->num_named = 0; +#endif + env->mem_alloc = 0; + env->mem_nodes_dynamic = (Node** )NULL; + + for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) + env->mem_nodes_static[i] = NULL_NODE; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + env->num_comb_exp_check = 0; + env->comb_exp_max_regnum = 0; + env->curr_max_regnum = 0; + env->has_recursion = 0; +#endif + env->warnings_flag = 0; +} + +static int +scan_env_add_mem_entry(ScanEnv* env) +{ + int i, need, alloc; + Node** p; + + need = env->num_mem + 1; + if (need >= SCANENV_MEMNODES_SIZE) { + if (env->mem_alloc <= need) { + if (IS_NULL(env->mem_nodes_dynamic)) { + alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; + p = (Node** )xmalloc(sizeof(Node*) * alloc); + xmemcpy(p, env->mem_nodes_static, + sizeof(Node*) * SCANENV_MEMNODES_SIZE); + } + else { + alloc = env->mem_alloc * 2; + p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + } + CHECK_NULL_RETURN_MEMERR(p); + + for (i = env->num_mem + 1; i < alloc; i++) + p[i] = NULL_NODE; + + env->mem_nodes_dynamic = p; + env->mem_alloc = alloc; + } + } + + env->num_mem++; + return env->num_mem; +} + +static int +scan_env_set_mem_node(ScanEnv* env, int num, Node* node) +{ + if (env->num_mem >= num) + SCANENV_MEM_NODES(env)[num] = node; + else + return ONIGERR_PARSER_BUG; + return 0; +} + + +#ifdef USE_PARSE_TREE_NODE_RECYCLE +typedef struct _FreeNode { + struct _FreeNode* next; +} FreeNode; + +static FreeNode* FreeNodeList = (FreeNode* )NULL; +#endif + +extern void +onig_node_free(Node* node) +{ + start: + if (IS_NULL(node)) return ; + + switch (NTYPE(node)) { + case NT_STR: + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); + } + break; + + case NT_LIST: + case NT_ALT: + onig_node_free(NCAR(node)); + { + Node* next_node = NCDR(node); + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif + node = next_node; + goto start; + } + break; + + case NT_CCLASS: + { + CClassNode* cc = NCCLASS(node); + + if (IS_NCCLASS_SHARE(cc)) return ; + if (cc->mbuf) + bbuf_free(cc->mbuf); + } + break; + + case NT_QTFR: + if (NQTFR(node)->target) + onig_node_free(NQTFR(node)->target); + break; + + case NT_ENCLOSE: + if (NENCLOSE(node)->target) + onig_node_free(NENCLOSE(node)->target); + break; + + case NT_BREF: + if (IS_NOT_NULL(NBREF(node)->back_dynamic)) + xfree(NBREF(node)->back_dynamic); + break; + + case NT_ANCHOR: + if (NANCHOR(node)->target) + onig_node_free(NANCHOR(node)->target); + break; + } + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif +} + +#ifdef USE_PARSE_TREE_NODE_RECYCLE +extern int +onig_free_node_list(void) +{ + FreeNode* n; + + /* THREAD_ATOMIC_START; */ + while (IS_NOT_NULL(FreeNodeList)) { + n = FreeNodeList; + FreeNodeList = FreeNodeList->next; + xfree(n); + } + /* THREAD_ATOMIC_END; */ + return 0; +} +#endif + +static Node* +node_new(void) +{ + Node* node; + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + THREAD_ATOMIC_START; + if (IS_NOT_NULL(FreeNodeList)) { + node = (Node* )FreeNodeList; + FreeNodeList = FreeNodeList->next; + THREAD_ATOMIC_END; + return node; + } + THREAD_ATOMIC_END; +#endif + + node = (Node* )xmalloc(sizeof(Node)); + /* xmemset(node, 0, sizeof(Node)); */ + return node; +} + + +static void +initialize_cclass(CClassNode* cc) +{ + BITSET_CLEAR(cc->bs); + /* cc->base.flags = 0; */ + cc->flags = 0; + cc->mbuf = NULL; +} + +static Node* +node_new_cclass(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CCLASS); + initialize_cclass(NCCLASS(node)); + return node; +} + +static Node* +node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, + const OnigCodePoint ranges[]) +{ + int n, i; + CClassNode* cc; + OnigCodePoint j; + + Node* node = node_new_cclass(); + CHECK_NULL_RETURN(node); + + cc = NCCLASS(node); + if (not != 0) NCCLASS_SET_NOT(cc); + + BITSET_CLEAR(cc->bs); + if (sb_out > 0 && IS_NOT_NULL(ranges)) { + n = ONIGENC_CODE_RANGE_NUM(ranges); + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); + j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { + if (j >= sb_out) goto sb_end; + + BITSET_SET_BIT(cc->bs, j); + } + } + } + + sb_end: + if (IS_NULL(ranges)) { + is_null: + cc->mbuf = NULL; + } + else { + BBuf* bbuf; + + n = ONIGENC_CODE_RANGE_NUM(ranges); + if (n == 0) goto is_null; + + bbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN(bbuf); + bbuf->alloc = n + 1; + bbuf->used = n + 1; + bbuf->p = (UChar* )((void* )ranges); + + cc->mbuf = bbuf; + } + + return node; +} + +static Node* +node_new_ctype(int type, int not) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CTYPE); + NCTYPE(node)->ctype = type; + NCTYPE(node)->not = not; + return node; +} + +static Node* +node_new_anychar(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CANY); + return node; +} + +static Node* +node_new_list(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_LIST); + NCAR(node) = left; + NCDR(node) = right; + return node; +} + +extern Node* +onig_node_new_list(Node* left, Node* right) +{ + return node_new_list(left, right); +} + +extern Node* +onig_node_list_add(Node* list, Node* x) +{ + Node *n; + + n = onig_node_new_list(x, NULL); + if (IS_NULL(n)) return NULL_NODE; + + if (IS_NOT_NULL(list)) { + while (IS_NOT_NULL(NCDR(list))) + list = NCDR(list); + + NCDR(list) = n; + } + + return n; +} + +extern Node* +onig_node_new_alt(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ALT); + NCAR(node) = left; + NCDR(node) = right; + return node; +} + +extern Node* +onig_node_new_anchor(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ANCHOR); + NANCHOR(node)->type = type; + NANCHOR(node)->target = NULL; + NANCHOR(node)->char_len = -1; + return node; +} + +static Node* +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_WITH_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) +{ + int i; + Node* node = node_new(); + + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_BREF); + NBREF(node)->state = 0; + NBREF(node)->back_num = back_num; + NBREF(node)->back_dynamic = (int* )NULL; + if (by_name != 0) + NBREF(node)->state |= NST_NAME_REF; + +#ifdef USE_BACKREF_WITH_LEVEL + if (exist_level != 0) { + NBREF(node)->state |= NST_NEST_LEVEL; + NBREF(node)->nest_level = nest_level; + } +#endif + + for (i = 0; i < back_num; i++) { + if (backrefs[i] <= env->num_mem && + IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { + NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ + break; + } + } + + if (back_num <= NODE_BACKREFS_SIZE) { + for (i = 0; i < back_num; i++) + NBREF(node)->back_static[i] = backrefs[i]; + } + else { + int* p = (int* )xmalloc(sizeof(int) * back_num); + if (IS_NULL(p)) { + onig_node_free(node); + return NULL; + } + NBREF(node)->back_dynamic = p; + for (i = 0; i < back_num; i++) + p[i] = backrefs[i]; + } + return node; +} + +#ifdef USE_SUBEXP_CALL +static Node* +node_new_call(UChar* name, UChar* name_end, int gnum) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CALL); + NCALL(node)->state = 0; + NCALL(node)->target = NULL_NODE; + NCALL(node)->name = name; + NCALL(node)->name_end = name_end; + NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ + return node; +} +#endif + +static Node* +node_new_quantifier(int lower, int upper, int by_number) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_QTFR); + NQTFR(node)->state = 0; + NQTFR(node)->target = NULL; + NQTFR(node)->lower = lower; + NQTFR(node)->upper = upper; + NQTFR(node)->greedy = 1; + NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQTFR(node)->head_exact = NULL_NODE; + NQTFR(node)->next_head_exact = NULL_NODE; + NQTFR(node)->is_refered = 0; + if (by_number != 0) + NQTFR(node)->state |= NST_BY_NUMBER; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + NQTFR(node)->comb_exp_check_num = 0; +#endif + + return node; +} + +static Node* +node_new_enclose(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ENCLOSE); + NENCLOSE(node)->type = type; + NENCLOSE(node)->state = 0; + NENCLOSE(node)->regnum = 0; + NENCLOSE(node)->option = 0; + NENCLOSE(node)->target = NULL; + NENCLOSE(node)->call_addr = -1; + NENCLOSE(node)->opt_count = 0; + return node; +} + +extern Node* +onig_node_new_enclose(int type) +{ + return node_new_enclose(type); +} + +static Node* +node_new_enclose_memory(OnigOptionType option, int is_named) +{ + Node* node = node_new_enclose(ENCLOSE_MEMORY); + CHECK_NULL_RETURN(node); + if (is_named != 0) + SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); + +#ifdef USE_SUBEXP_CALL + NENCLOSE(node)->option = option; +#endif + return node; +} + +static Node* +node_new_option(OnigOptionType option) +{ + Node* node = node_new_enclose(ENCLOSE_OPTION); + CHECK_NULL_RETURN(node); + NENCLOSE(node)->option = option; + return node; +} + +extern int +onig_node_str_cat(Node* node, const UChar* s, const UChar* end) +{ + ptrdiff_t addlen = end - s; + + if (addlen > 0) { + ptrdiff_t len = NSTR(node)->end - NSTR(node)->s; + + if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + UChar* p; + ptrdiff_t capa = len + addlen + NODE_STR_MARGIN; + + if (capa <= NSTR(node)->capa) { + onig_strcpy(NSTR(node)->s + len, s, end); + } + else { + if (NSTR(node)->s == NSTR(node)->buf) + p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, + s, end, capa); + else + p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); + + CHECK_NULL_RETURN_MEMERR(p); + NSTR(node)->s = p; + NSTR(node)->capa = capa; + } + } + else { + onig_strcpy(NSTR(node)->s + len, s, end); + } + NSTR(node)->end = NSTR(node)->s + len + addlen; + } + + return 0; +} + +extern int +onig_node_str_set(Node* node, const UChar* s, const UChar* end) +{ + onig_node_str_clear(node); + return onig_node_str_cat(node, s, end); +} + +static int +node_str_cat_char(Node* node, UChar c) +{ + UChar s[1]; + + s[0] = c; + return onig_node_str_cat(node, s, s + 1); +} + +extern void +onig_node_conv_to_str_node(Node* node, int flag) +{ + SET_NTYPE(node, NT_STR); + NSTR(node)->flag = flag; + NSTR(node)->capa = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; +} + +extern void +onig_node_str_clear(Node* node) +{ + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); + } + + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; +} + +static Node* +node_new_str(const UChar* s, const UChar* end) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_STR); + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; + if (onig_node_str_cat(node, s, end)) { + onig_node_free(node); + return NULL; + } + return node; +} + +extern Node* +onig_node_new_str(const UChar* s, const UChar* end) +{ + return node_new_str(s, end); +} + +static Node* +node_new_str_raw(UChar* s, UChar* end) +{ + Node* node = node_new_str(s, end); + NSTRING_SET_RAW(node); + return node; +} + +static Node* +node_new_empty(void) +{ + return node_new_str(NULL, NULL); +} + +static Node* +node_new_str_raw_char(UChar c) +{ + UChar p[1]; + + p[0] = c; + return node_new_str_raw(p, p + 1); +} + +static Node* +str_node_split_last_char(StrNode* sn, OnigEncoding enc) +{ + const UChar *p; + Node* n = NULL_NODE; + + if (sn->end > sn->s) { + p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end); + if (p && p > sn->s) { /* can be splitted. */ + n = node_new_str(p, sn->end); + if ((sn->flag & NSTR_RAW) != 0) + NSTRING_SET_RAW(n); + sn->end = (UChar* )p; + } + } + return n; +} + +static int +str_node_can_be_split(StrNode* sn, OnigEncoding enc) +{ + if (sn->end > sn->s) { + return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0); + } + return 0; +} + +extern int +onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) +{ + unsigned int num, val; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + val = (unsigned int )DIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 10UL < num) + return -1; /* overflow */ + + num = num * 10 + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { + val = (unsigned int )XDIGITVAL(enc,c); + if ((INT_MAX_LIMIT - val) / 16UL < num) + return -1; /* overflow */ + + num = (num << 4) + XDIGITVAL(enc,c); + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { + val = ODIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 8UL < num) + return -1; /* overflow */ + + num = (num << 3) + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + + +#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ + BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) + +/* data format: + [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] + (all data size is OnigCodePoint) + */ +static int +new_code_range(BBuf** pbuf) +{ +#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) + int r; + OnigCodePoint n; + BBuf* bbuf; + + bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_MEMERR(*pbuf); + r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); + if (r) return r; + + n = 0; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + return 0; +} + +static int +add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, + int checkdup) +{ + int r, inc_n, pos; + int low, high, bound, x; + OnigCodePoint n, *data; + BBuf* bbuf; + + if (from > to) { + n = from; from = to; to = n; + } + + if (IS_NULL(*pbuf)) { + r = new_code_range(pbuf); + if (r) return r; + bbuf = *pbuf; + n = 0; + } + else { + bbuf = *pbuf; + GET_CODE_POINT(n, bbuf->p); + } + data = (OnigCodePoint* )(bbuf->p); + data++; + + for (low = 0, bound = n; low < bound; ) { + x = (low + bound) >> 1; + if (from > data[x*2 + 1]) + low = x + 1; + else + bound = x; + } + + for (high = low, bound = n; high < bound; ) { + x = (high + bound) >> 1; + if (to >= data[x*2] - 1) + high = x + 1; + else + bound = x; + } + + inc_n = low + 1 - high; + if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) + return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; + + if (inc_n != 1) { + if (checkdup && to >= data[low*2]) CC_DUP_WARN(env); + if (from > data[low*2]) + from = data[low*2]; + if (to < data[(high - 1)*2 + 1]) + to = data[(high - 1)*2 + 1]; + } + + if (inc_n != 0 && (OnigCodePoint )high < n) { + int from_pos = SIZE_CODE_POINT * (1 + high * 2); + int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); + int size = (n - high) * 2 * SIZE_CODE_POINT; + + if (inc_n > 0) { + BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + } + else { + BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); + } + } + + pos = SIZE_CODE_POINT * (1 + low * 2); + BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); + BBUF_WRITE_CODE_POINT(bbuf, pos, from); + BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); + n += inc_n; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + + return 0; +} + +static int +add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) +{ + return add_code_range_to_buf0(pbuf, env, from, to, 1); +} + +static int +add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup) +{ + if (from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + return 0; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + + return add_code_range_to_buf0(pbuf, env, from, to, checkdup); +} + +static int +add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) +{ + return add_code_range0(pbuf, env, from, to, 1); +} + +static int +not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env) +{ + int r, i, n; + OnigCodePoint pre, from, *data, to = 0; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf)) { + set_all: + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + + data = (OnigCodePoint* )(bbuf->p); + GET_CODE_POINT(n, data); + data++; + if (n <= 0) goto set_all; + + r = 0; + pre = MBCODE_START_POS(enc); + for (i = 0; i < n; i++) { + from = data[i*2]; + to = data[i*2+1]; + if (pre <= from - 1) { + r = add_code_range_to_buf(pbuf, env, pre, from - 1); + if (r != 0) return r; + } + if (to == ~((OnigCodePoint )0)) break; + pre = to + 1; + } + if (to < ~((OnigCodePoint )0)) { + r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0)); + } + return r; +} + +#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ + BBuf *tbuf; \ + int tnot; \ + tnot = not1; not1 = not2; not2 = tnot; \ + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ +} while (0) + +static int +or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, + BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) +{ + int r; + OnigCodePoint i, n1, *data1; + OnigCodePoint from, to; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { + if (not1 != 0 || not2 != 0) + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + return 0; + } + + r = 0; + if (IS_NULL(bbuf2)) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + if (IS_NULL(bbuf1)) { + if (not1 != 0) { + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + else { + if (not2 == 0) { + return bbuf_clone(pbuf, bbuf2); + } + else { + return not_code_range_buf(enc, bbuf2, pbuf, env); + } + } + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + GET_CODE_POINT(n1, data1); + data1++; + + if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ + r = bbuf_clone(pbuf, bbuf2); + } + else if (not1 == 0) { /* 1 OR (not 2) */ + r = not_code_range_buf(enc, bbuf2, pbuf, env); + } + if (r != 0) return r; + + for (i = 0; i < n1; i++) { + from = data1[i*2]; + to = data1[i*2+1]; + r = add_code_range_to_buf(pbuf, env, from, to); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1, + OnigCodePoint* data, int n) +{ + int i, r; + OnigCodePoint from2, to2; + + for (i = 0; i < n; i++) { + from2 = data[i*2]; + to2 = data[i*2+1]; + if (from2 < from1) { + if (to2 < from1) continue; + else { + from1 = to2 + 1; + } + } + else if (from2 <= to1) { + if (to2 < to1) { + if (from1 <= from2 - 1) { + r = add_code_range_to_buf(pbuf, env, from1, from2-1); + if (r != 0) return r; + } + from1 = to2 + 1; + } + else { + to1 = from2 - 1; + } + } + else { + from1 = from2; + } + if (from1 > to1) break; + } + if (from1 <= to1) { + r = add_code_range_to_buf(pbuf, env, from1, to1); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) +{ + int r; + OnigCodePoint i, j, n1, n2, *data1, *data2; + OnigCodePoint from, to, from1, to1, from2, to2; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1)) { + if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ + return bbuf_clone(pbuf, bbuf2); + return 0; + } + else if (IS_NULL(bbuf2)) { + if (not2 != 0) + return bbuf_clone(pbuf, bbuf1); + return 0; + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + data2 = (OnigCodePoint* )(bbuf2->p); + GET_CODE_POINT(n1, data1); + GET_CODE_POINT(n2, data2); + data1++; + data2++; + + if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + for (j = 0; j < n2; j++) { + from2 = data2[j*2]; + to2 = data2[j*2+1]; + if (from2 > to1) break; + if (to2 < from1) continue; + from = MAX(from1, from2); + to = MIN(to1, to2); + r = add_code_range_to_buf(pbuf, env, from, to); + if (r != 0) return r; + } + } + } + else if (not1 == 0) { /* 1 AND (not 2) */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + r = and_code_range1(pbuf, env, from1, to1, data2, n2); + if (r != 0) return r; + } + } + + return 0; +} + +static int +and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) +{ + OnigEncoding enc = env->enc; + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf = 0; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_NCCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_NCCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_and(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env); + } + else { + r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env); + if (r == 0 && not1 != 0) { + BBuf *tbuf = 0; + r = not_code_range_buf(enc, pbuf, &tbuf, env); + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) { + bbuf_free(pbuf); + return r; + } + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + return 0; +} + +static int +or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) +{ + OnigEncoding enc = env->enc; + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf = 0; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_NCCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_NCCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_or(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env); + } + else { + r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env); + if (r == 0 && not1 != 0) { + BBuf *tbuf = 0; + r = not_code_range_buf(enc, pbuf, &tbuf, env); + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) { + bbuf_free(pbuf); + return r; + } + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + else + return 0; +} + +static void UNKNOWN_ESC_WARN(ScanEnv *env, int c); + +static int +conv_backslash_value(int c, ScanEnv* env) +{ + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'a': return '\007'; + case 'b': return '\010'; + case 'e': return '\033'; + case 'v': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) + return '\v'; + break; + + default: + if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) + UNKNOWN_ESC_WARN(env, c); + break; + } + } + return c; +} + +#define is_invalid_quantifier_target(node) 0 + +/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ +static int +popular_quantifier_num(QtfrNode* q) +{ + if (q->greedy) { + if (q->lower == 0) { + if (q->upper == 1) return 0; + else if (IS_REPEAT_INFINITE(q->upper)) return 1; + } + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 2; + } + } + else { + if (q->lower == 0) { + if (q->upper == 1) return 3; + else if (IS_REPEAT_INFINITE(q->upper)) return 4; + } + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 5; + } + } + return -1; +} + + +enum ReduceType { + RQ_ASIS = 0, /* as is */ + RQ_DEL = 1, /* delete parent */ + RQ_A, /* to '*' */ + RQ_AQ, /* to '*?' */ + RQ_QQ, /* to '??' */ + RQ_P_QQ, /* to '+)??' */ + RQ_PQ_Q /* to '+?)?' */ +}; + +static enum ReduceType const ReduceTypeTable[6][6] = { + {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ + {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ + {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ + {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ +}; + +extern void +onig_reduce_nested_quantifier(Node* pnode, Node* cnode) +{ + int pnum, cnum; + QtfrNode *p, *c; + + p = NQTFR(pnode); + c = NQTFR(cnode); + pnum = popular_quantifier_num(p); + cnum = popular_quantifier_num(c); + if (pnum < 0 || cnum < 0) return ; + + switch(ReduceTypeTable[cnum][pnum]) { + case RQ_DEL: + *pnode = *cnode; + break; + case RQ_A: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; + break; + case RQ_AQ: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; + break; + case RQ_QQ: + p->target = c->target; + p->lower = 0; p->upper = 1; p->greedy = 0; + break; + case RQ_P_QQ: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 0; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; + return ; + break; + case RQ_PQ_Q: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 1; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; + return ; + break; + case RQ_ASIS: + p->target = cnode; + return ; + break; + } + + c->target = NULL_NODE; + onig_node_free(cnode); +} + + +enum TokenSyms { + TK_EOT = 0, /* end of token */ + TK_RAW_BYTE = 1, + TK_CHAR, + TK_STRING, + TK_CODE_POINT, + TK_ANYCHAR, + TK_CHAR_TYPE, + TK_BACKREF, + TK_CALL, + TK_ANCHOR, + TK_OP_REPEAT, + TK_INTERVAL, + TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ + TK_ALT, + TK_SUBEXP_OPEN, + TK_SUBEXP_CLOSE, + TK_CC_OPEN, + TK_QUOTE_OPEN, + TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ + /* in cc */ + TK_CC_CLOSE, + TK_CC_RANGE, + TK_POSIX_BRACKET_OPEN, + TK_CC_AND, /* && */ + TK_CC_CC_OPEN /* [ */ +}; + +typedef struct { + enum TokenSyms type; + int escaped; + int base; /* is number: 8, 16 (used in [....]) */ + UChar* backp; + union { + UChar* s; + int c; + OnigCodePoint code; + int anchor; + int subtype; + struct { + int lower; + int upper; + int greedy; + int possessive; + } repeat; + struct { + int num; + int ref1; + int* refs; + int by_name; +#ifdef USE_BACKREF_WITH_LEVEL + int exist_level; + int level; /* \k */ +#endif + } backref; + struct { + UChar* name; + UChar* name_end; + int gnum; + } call; + struct { + int ctype; + int not; + } prop; + } u; +} OnigToken; + + +static int +fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) +{ + int low, up, syn_allow, non_low = 0; + int r = 0; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); + + if (PEND) { + if (syn_allow) + return 1; /* "....{" : OK! */ + else + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ + } + + if (! syn_allow) { + c = PPEEK; + if (c == ')' || c == '(' || c == '|') { + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; + } + } + + low = onig_scan_unsigned_number(&p, end, env->enc); + if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (low > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == *src) { /* can't read low */ + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { + /* allow {,n} as {0,n} */ + low = 0; + non_low = 1; + } + else + goto invalid; + } + + if (PEND) goto invalid; + PFETCH(c); + if (c == ',') { + UChar* prev = p; + up = onig_scan_unsigned_number(&p, end, env->enc); + if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (up > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == prev) { + if (non_low != 0) + goto invalid; + up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ + } + } + else { + if (non_low != 0) + goto invalid; + + PUNFETCH; + up = low; /* {n} : exact n times */ + r = 2; /* fixed */ + } + + if (PEND) goto invalid; + PFETCH(c); + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { + if (c != MC_ESC(env->syntax)) goto invalid; + PFETCH(c); + } + if (c != '}') goto invalid; + + if (!IS_REPEAT_INFINITE(up) && low > up) { + return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; + } + + tok->type = TK_INTERVAL; + tok->u.repeat.lower = low; + tok->u.repeat.upper = up; + *src = p; + return r; /* 0: normal {n,m}, 2: fixed {n} */ + + invalid: + if (syn_allow) + return 1; /* OK */ + else + return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; +} + +/* \M-, \C-, \c, or \... */ +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) +{ + int v; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + switch (c) { + case 'M': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c != '-') return ONIGERR_META_CODE_SYNTAX; + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c == MC_ESC(env->syntax)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c = ((c & 0xff) | 0x80); + } + else + goto backslash; + break; + + case 'C': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; + goto control; + } + else + goto backslash; + + case 'c': + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { + control: + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c == '?') { + c = 0177; + } + else { + if (c == MC_ESC(env->syntax)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c &= 0x9f; + } + break; + } + /* fall through */ + + default: + { + backslash: + c = conv_backslash_value(c, env); + } + break; + } + + *src = p; + return c; +} + +static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); + +static OnigCodePoint +get_name_end_code_point(OnigCodePoint start) +{ + switch (start) { + case '<': return (OnigCodePoint )'>'; break; + case '\'': return (OnigCodePoint )'\''; break; + default: + break; + } + + return (OnigCodePoint )0; +} + +#ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_WITH_LEVEL +/* + \k, \k + \k, \k + \k<-num+n>, \k<-num-n> +*/ +static int +fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel) +{ + int r, sign, is_num, exist_level; + OnigCodePoint end_code; + OnigCodePoint c = 0; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + is_num = exist_level = 0; + sign = 1; + pnum_head = *src; + + end_code = get_name_end_code_point(start_code); + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')' || c == '+' || c == '-') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } + + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (r == 0 && c != end_code) { + if (c == '+' || c == '-') { + int level; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + level = onig_scan_unsigned_number(&p, end, enc); + if (level < 0) return ONIGERR_TOO_BIG_NUMBER; + *rlevel = (level * flag); + exist_level = 1; + + PFETCH(c); + if (c == end_code) + goto end; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + end: + if (r == 0) { + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) goto err; + + *rback_num *= sign; + } + + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_WITH_LEVEL */ + +/* + def: 0 -> define name (don't allow number name) + 1 -> reference name (allow number name) +*/ +static int +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) +{ + int r, is_num, sign; + OnigCodePoint end_code; + OnigCodePoint c = 0; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + + name_end = end; + pnum_head = *src; + r = 0; + is_num = 0; + sign = 1; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (ref == 1) + is_num = 1; + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (c == '-') { + if (ref == 1) { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (r == 0) { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } + + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + + is_num = 0; + } + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + } + + if (c != end_code) { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + + *rback_num *= sign; + } + + *rname_end = name_end; + *src = p; + return 0; + } + else { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') + break; + } + if (PEND) + name_end = end; + + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#else +static int +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) +{ + int r, is_num, sign; + OnigCodePoint end_code; + OnigCodePoint c = 0; + UChar *name_end; + OnigEncoding enc = env->enc; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + + *rname_end = name_end = end; + r = 0; + pnum_head = *src; + is_num = 0; + sign = 1; + + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + + PFETCH(c); + if (c == end_code || c == ')') break; + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + if (r == 0 && c != end_code) { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (r == 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + *rback_num *= sign; + + *rname_end = name_end; + *src = p; + return 0; + } + else { + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_NAMED_GROUP */ + +void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, va_list args); + +static void +onig_syntax_warn(ScanEnv *env, const char *fmt, ...) +{ + va_list args; + UChar buf[WARN_BUFSIZE]; + va_start(args, fmt); + onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (const UChar *)fmt, args); + va_end(args); + if (env->sourcefile == NULL) + mrb_warn("%s", (char *)buf); + else + mrb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf); +} + +static void +CC_ESC_WARN(ScanEnv *env, UChar *c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { + onig_syntax_warn(env, "character class has '%s' without escape", c); + } +} + +static void +CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { + onig_syntax_warn(env, "regular expression has '%s' without escape", c); + } +} + +static void +CC_DUP_WARN(ScanEnv *env) +{ + if (onig_warn == onig_null_warn /*|| !RTEST(ruby_verbose)*/) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) && + !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) { + (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP; + onig_syntax_warn(env, "character class has duplicated range"); + } +} + +static void +UNKNOWN_ESC_WARN(ScanEnv *env, int c) +{ + if (onig_warn == onig_null_warn /*|| !RTEST(ruby_verbose)*/) return ; + onig_syntax_warn(env, "Unknown escape \\%c is ignored", c); +} + +static UChar* +find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, + UChar **next, OnigEncoding enc) +{ + int i; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + while (p < to) { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enclen(enc, p, to); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q, to); + } + if (i >= n) { + if (IS_NOT_NULL(next)) + *next = q; + return p; + } + } + p = q; + } + return NULL_UCHARP; +} + +static int +str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, + OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn) +{ + int i, in_esc; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + in_esc = 0; + while (p < to) { + if (in_esc) { + in_esc = 0; + p += enclen(enc, p, to); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enclen(enc, p, to); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q, to); + } + if (i >= n) return 1; + p += enclen(enc, p, to); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + if (x == bad) return 0; + else if (x == MC_ESC(syn)) in_esc = 1; + p = q; + } + } + } + return 0; +} + +static int +fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int num; + OnigCodePoint c, c2; + const OnigSyntaxType* syn = env->syntax; + OnigEncoding enc = env->enc; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + PFETCH(c); + tok->type = TK_CHAR; + tok->base = 0; + tok->u.c = c; + tok->escaped = 0; + + if (c == ']') { + tok->type = TK_CC_CLOSE; + } + else if (c == '-') { + tok->type = TK_CC_RANGE; + } + else if (c == MC_ESC(syn)) { + if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) + goto end; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + tok->escaped = 1; + tok->u.c = c; + switch (c) { + case 'w': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 0; + break; + case 'W': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 1; + break; + case 'd': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 0; + break; + case 'D': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 1; + break; + case 's': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 0; + break; + case 'S': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 1; + break; + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; + break; + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; + break; + + case 'p': + case 'P': + c2 = PPEEK; + if (c2 == '{' && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + else { + onig_syntax_warn(env, "invalid Unicode Property \\%c", c); + } + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '0': + case '1': case '2': case '3': case '4': case '5': case '6': case '7': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + PUNFETCH; + prev = p; + num = scan_unsigned_octal_number(&p, end, 3, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + if (tok->u.c != num) { + tok->u.code = (OnigCodePoint )num; + tok->type = TK_CODE_POINT; + } + break; + } + } + else if (c == '[') { + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { + OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; + tok->backp = p; /* point at '[' is readed */ + PINC; + if (str_exist_check_with_esc(send, 2, p, end, + (OnigCodePoint )']', enc, syn)) { + tok->type = TK_POSIX_BRACKET_OPEN; + } + else { + PUNFETCH; + goto cc_in_cc; + } + } + else { + cc_in_cc: + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { + tok->type = TK_CC_CC_OPEN; + } + else { + CC_ESC_WARN(env, (UChar* )"["); + } + } + } + else if (c == '&') { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && + !PEND && (PPEEK_IS('&'))) { + PINC; + tok->type = TK_CC_AND; + } + } + + end: + *src = p; + return tok->type; +} + +static int +fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int r, num; + OnigCodePoint c; + OnigEncoding enc = env->enc; + const OnigSyntaxType* syn = env->syntax; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + start: + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + tok->type = TK_STRING; + tok->base = 0; + tok->backp = p; + + PFETCH(c); + if (IS_MC_ESC_CODE(c, syn)) { + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + tok->backp = p; + PFETCH(c); + + tok->u.c = c; + tok->escaped = 1; + switch (c) { + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + greedy_check: + if (!PEND && PPEEK_IS('?') && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + PFETCH(c); + tok->u.repeat.greedy = 0; + tok->u.repeat.possessive = 0; + } + else { + possessive_check: + if (!PEND && PPEEK_IS('+') && + ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && + tok->type != TK_INTERVAL) || + (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && + tok->type == TK_INTERVAL))) { + PFETCH(c); + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 1; + } + else { + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 0; + } + } + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case 'w': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 0; + break; + + case 'W': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 1; + break; + + case 'b': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BOUND; + break; + + case 'B': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_NOT_WORD_BOUND; + break; + +#ifdef USE_WORD_BEGIN_END + case '<': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BEGIN; + break; + + case '>': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_END; + break; +#endif + + case 's': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 0; + break; + + case 'S': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 1; + break; + + case 'd': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 0; + break; + + case 'D': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 1; + break; + + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; + break; + + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; + break; + + case 'A': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + begin_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_BUF; + break; + + case 'Z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_SEMI_END_BUF; + break; + + case 'z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + end_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_END_BUF; + break; + + case 'G': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_POSITION; + break; + + case '`': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto begin_buf; + break; + + case '\'': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto end_buf; + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + PUNFETCH; + prev = p; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { + goto skip_backref; + } + + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && + (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + return ONIGERR_INVALID_BACKREF; + } + + tok->type = TK_BACKREF; + tok->u.backref.num = 1; + tok->u.backref.ref1 = num; + tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_WITH_LEVEL + tok->u.backref.exist_level = 0; +#endif + break; + } + + skip_backref: + if (c == '8' || c == '9') { + /* normal char */ + p = prev; PINC; + break; + } + + p = prev; + /* fall through */ + case '0': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + prev = p; + num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + else if (c != '0') { + PINC; + } + break; + +#ifdef USE_NAMED_GROUP + case 'k': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { + PFETCH(c); + if (c == '<' || c == '\'') { + UChar* name_end; + int* backs; + int back_num; + + prev = p; + +#ifdef USE_BACKREF_WITH_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, + env, &back_num, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else + r = fetch_name(&p, end, &name_end, env, &back_num, 1); +#endif + if (r < 0) return r; + + if (back_num != 0) { + if (back_num < 0) { + back_num = BACKREF_REL_TO_ABS(back_num, env); + if (back_num <= 0) + return ONIGERR_INVALID_BACKREF; + } + + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (back_num > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + return ONIGERR_INVALID_BACKREF; + } + tok->type = TK_BACKREF; + tok->u.backref.by_name = 0; + tok->u.backref.num = 1; + tok->u.backref.ref1 = back_num; + } + else { + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } + } + } + else { + PUNFETCH; + onig_syntax_warn(env, "invalid back reference"); + } + } + break; +#endif + +#ifdef USE_SUBEXP_CALL + case 'g': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { + PFETCH(c); + if (c == '<' || c == '\'') { + int gnum; + UChar* name_end; + + prev = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.name = prev; + tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; + } + else { + onig_syntax_warn(env, "invalid subexp call"); + PUNFETCH; + } + } + break; +#endif + + case 'Q': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { + tok->type = TK_QUOTE_OPEN; + } + break; + + case 'p': + case 'P': + if (PPEEK_IS('{') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c); + if (c == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + else { + onig_syntax_warn(env, "invalid Unicode Property \\%c", c); + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + /* set_raw: */ + if (tok->u.c != num) { + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { /* string */ + p = tok->backp + enclen(enc, tok->backp, end); + } + break; + } + } + else { + tok->u.c = c; + tok->escaped = 0; + +#ifdef USE_VARIABLE_META_CHARS + if ((c != ONIG_INEFFECTIVE_META_CHAR) && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { + if (c == MC_ANYCHAR(syn)) + goto any_char; + else if (c == MC_ANYTIME(syn)) + goto anytime; + else if (c == MC_ZERO_OR_ONE_TIME(syn)) + goto zero_or_one_time; + else if (c == MC_ONE_OR_MORE_TIME(syn)) + goto one_or_more_time; + else if (c == MC_ANYCHAR_ANYTIME(syn)) { + tok->type = TK_ANYCHAR_ANYTIME; + goto out; + } + } +#endif + + switch (c) { + case '.': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; +#ifdef USE_VARIABLE_META_CHARS + any_char: +#endif + tok->type = TK_ANYCHAR; + break; + + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + anytime: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + one_or_more_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; +#ifdef USE_VARIABLE_META_CHARS + zero_or_one_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + goto greedy_check; + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PPEEK_IS('#')) { + PFETCH(c); + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (c == MC_ESC(syn)) { + if (!PEND) PFETCH(c); + } + else { + if (c == ')') break; + } + } + goto start; + } + PUNFETCH; + } + + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case '^': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + break; + + case '$': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + break; + + case '[': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; + tok->type = TK_CC_OPEN; + break; + + case ']': + if (*src > env->pattern) /* /].../ is allowed. */ + CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); + break; + + case '#': + if (IS_EXTEND(env->option)) { + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_NEWLINE(enc, c)) + break; + } + goto start; + break; + } + break; + + case ' ': case '\t': case '\n': case '\r': case '\f': + if (IS_EXTEND(env->option)) + goto start; + break; + + default: + /* string */ + break; + } + } + +#ifdef USE_VARIABLE_META_CHARS + out: +#endif + *src = p; + return tok->type; +} + +static int +add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, + ScanEnv* env, + OnigCodePoint sb_out, const OnigCodePoint mbr[]) +{ + int i, r; + OnigCodePoint j; + + int n = ONIGENC_CODE_RANGE_NUM(mbr); + + if (not == 0) { + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); + j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { + if (j >= sb_out) { + if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), env, j, + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + i++; + } + + goto sb_end; + } + BITSET_SET_BIT_CHKDUP(cc->bs, j); + } + } + + sb_end: + for ( ; i < n; i++) { + r = add_code_range_to_buf(&(cc->mbuf), env, + ONIGENC_CODE_RANGE_FROM(mbr, i), + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + } + } + else { + OnigCodePoint prev = 0; + + for (i = 0; i < n; i++) { + for (j = prev; + j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { + if (j >= sb_out) { + goto sb_end2; + } + BITSET_SET_BIT_CHKDUP(cc->bs, j); + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + for (j = prev; j < sb_out; j++) { + BITSET_SET_BIT_CHKDUP(cc->bs, j); + } + + sb_end2: + prev = sb_out; + + for (i = 0; i < n; i++) { + if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), env, prev, + ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); + if (r != 0) return r; + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + if (prev < 0x7fffffff) { + r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff); + if (r != 0) return r; + } + } + + return 0; +} + +static int +add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) +{ + int c, r; + const OnigCodePoint *ranges; + OnigCodePoint sb_out; + OnigEncoding enc = env->enc; + + switch (ctype) { + case ONIGENC_CTYPE_D: + case ONIGENC_CTYPE_S: + case ONIGENC_CTYPE_W: + ctype ^= ONIGENC_CTYPE_SPECIAL_MASK; + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + } + return 0; + break; + } + + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); + if (r == 0) { + return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); + } + else if (r != ONIG_NO_SUPPORT_CONFIG) { + return r; + } + + r = 0; + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + case ONIGENC_CTYPE_BLANK: + case ONIGENC_CTYPE_CNTRL: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_LOWER: + case ONIGENC_CTYPE_PUNCT: + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_UPPER: + case ONIGENC_CTYPE_XDIGIT: + case ONIGENC_CTYPE_ASCII: + case ONIGENC_CTYPE_ALNUM: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + } + break; + + case ONIGENC_CTYPE_GRAPH: + case ONIGENC_CTYPE_PRINT: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + break; + + case ONIGENC_CTYPE_WORD: + if (not == 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ + && ! ONIGENC_IS_CODE_WORD(enc, c)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); + } + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + return r; +} + +static int +parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +{ +#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 +#define POSIX_BRACKET_NAME_MIN_LEN 4 + + static const PosixBracketEntryType PBS[] = { + { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, + { (UChar* )NULL, -1, 0 } + }; + + const PosixBracketEntryType *pb; + int not, i, r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *p = *src; + PFETCH_READY; + + if (PPEEK_IS('^')) { + PINC; + not = 1; + } + else + not = 0; + + if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) + goto not_posix_bracket; + + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { + p = (UChar* )onigenc_step(enc, p, end, pb->len); + if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + + r = add_ctype_to_cc(cc, pb->ctype, not, env); + if (r != 0) return r; + + PINC; PINC; + *src = p; + return 0; + } + } + + not_posix_bracket: + c = 0; + i = 0; + while (!PEND && ((c = PPEEK) != ':') && c != ']') { + PINC; + if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; + } + if (c == ':' && ! PEND) { + PINC; + if (! PEND) { + PFETCH(c); + if (c == ']') + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + } + } + + return 1; /* 1: is not POSIX bracket, but no error. */ +} + +static int +fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *prev, *start, *p = *src; + PFETCH_READY; + + r = 0; + start = prev = p; + + while (!PEND) { + prev = p; + PFETCH(c); + if (c == '}') { + r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); + if (r < 0) break; + + *src = p; + return r; + } + else if (c == '(' || c == ')' || c == '{' || c == '|') { + r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; + break; + } + } + + onig_scan_env_set_error_string(env, r, *src, prev); + return r; +} + +static int +parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, ctype; + CClassNode* cc; + + ctype = fetch_char_property_to_ctype(src, end, env); + if (ctype < 0) return ctype; + + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + r = add_ctype_to_cc(cc, ctype, 0, env); + if (r != 0) return r; + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); + + return 0; +} + + +enum CCSTATE { + CCS_VALUE, + CCS_RANGE, + CCS_COMPLETE, + CCS_START +}; + +enum CCVALTYPE { + CCV_SB, + CCV_CODE_POINT, + CCV_CLASS +}; + +static int +next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + if (*state == CCS_RANGE) + return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; + + if (*state == CCS_VALUE && *type != CCV_CLASS) { + if (*type == CCV_SB) + BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + } + + *state = CCS_VALUE; + *type = CCV_CLASS; + return 0; +} + +static int +next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, + int* vs_israw, int v_israw, + enum CCVALTYPE intype, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + switch (*state) { + case CCS_VALUE: + if (*type == CCV_SB) + BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + break; + + case CCS_RANGE: + if (intype == *type) { + if (intype == CCV_SB) { + if (*vs > 0xff || v > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(env, cc->bs, (int )*vs, (int )v); + } + else { + r = add_code_range(&(cc->mbuf), env, *vs, v); + if (r < 0) return r; + } + } + else { + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); + if (r < 0) return r; + } + ccs_range_end: + *state = CCS_COMPLETE; + break; + + case CCS_COMPLETE: + case CCS_START: + *state = CCS_VALUE; + break; + + default: + break; + } + + *vs_israw = v_israw; + *vs = v; + *type = intype; + return 0; +} + +static int +code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, + ScanEnv* env) +{ + int in_esc; + OnigCodePoint code; + OnigEncoding enc = env->enc; + UChar* p = from; + PFETCH_READY; + + in_esc = 0; + while (! PEND) { + if (ignore_escaped && in_esc) { + in_esc = 0; + } + else { + PFETCH(code); + if (code == c) return 1; + if (code == MC_ESC(env->syntax)) in_esc = 1; + } + } + return 0; +} + +static int +parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, neg, len, fetched, and_start; + OnigCodePoint v, vs; + UChar *p; + Node* node; + CClassNode *cc, *prev_cc; + CClassNode work_cc; + + enum CCSTATE state; + enum CCVALTYPE val_type, in_type; + int val_israw, in_israw; + + prev_cc = (CClassNode* )NULL; + *np = NULL_NODE; + r = fetch_token_in_cc(tok, src, end, env); + if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { + neg = 1; + r = fetch_token_in_cc(tok, src, end, env); + } + else { + neg = 0; + } + + if (r < 0) return r; + if (r == TK_CC_CLOSE) { + if (! code_exist_check((OnigCodePoint )']', + *src, env->pattern_end, 1, env)) + return ONIGERR_EMPTY_CHAR_CLASS; + + CC_ESC_WARN(env, (UChar* )"]"); + r = tok->type = TK_CHAR; /* allow []...] */ + } + + *np = node = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(node); + cc = NCCLASS(node); + + and_start = 0; + state = CCS_START; + p = *src; + while (r != TK_CC_CLOSE) { + fetched = 0; + switch (r) { + case TK_CHAR: + if ((tok->u.code >= SINGLE_BYTE_SIZE) || + (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) { + in_type = CCV_CODE_POINT; + } + else if (len < 0) { + r = len; + goto err; + } + else { + sb_char: + in_type = CCV_SB; + } + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry2; + break; + + case TK_RAW_BYTE: + /* tok->base != 0 : octal or hexadec. */ + if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; + UChar* psave = p; + int i, base = tok->base; + + buf[0] = tok->u.c; + for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + if (r != TK_RAW_BYTE || tok->base != base) { + fetched = 1; + break; + } + buf[i] = tok->u.c; + } + + if (i < ONIGENC_MBC_MINLEN(env->enc)) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + + len = enclen(env->enc, buf, buf+i); + if (i < len) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + else if (i > len) { /* fetch back */ + p = psave; + for (i = 1; i < len; i++) { + r = fetch_token_in_cc(tok, &p, end, env); + } + fetched = 0; + } + + if (i == 1) { + v = (OnigCodePoint )buf[0]; + goto raw_single; + } + else { + v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); + in_type = CCV_CODE_POINT; + } + } + else { + v = (OnigCodePoint )tok->u.c; + raw_single: + in_type = CCV_SB; + } + in_israw = 1; + goto val_entry2; + break; + + case TK_CODE_POINT: + v = tok->u.code; + in_israw = 1; + val_entry: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); + if (len < 0) { + r = len; + goto err; + } + in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); + val_entry2: + r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, + &state, env); + if (r != 0) goto err; + break; + + case TK_POSIX_BRACKET_OPEN: + r = parse_posix_bracket(cc, &p, end, env); + if (r < 0) goto err; + if (r == 1) { /* is not POSIX bracket */ + CC_ESC_WARN(env, (UChar* )"["); + p = tok->backp; + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry; + } + goto next_class; + break; + + case TK_CHAR_TYPE: + r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); + if (r != 0) return r; + + next_class: + r = next_state_class(cc, &vs, &val_type, &state, env); + if (r != 0) goto err; + break; + + case TK_CHAR_PROPERTY: + { + int ctype; + + ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) return ctype; + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); + if (r != 0) return r; + goto next_class; + } + break; + + case TK_CC_RANGE: + if (state == CCS_VALUE) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) { /* allow [x-] */ + range_end_val: + v = (OnigCodePoint )'-'; + in_israw = 0; + goto val_entry; + } + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + state = CCS_RANGE; + } + else if (state == CCS_START) { + /* [-xa] is allowed */ + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + /* [--x] or [a&&-x] is warned. */ + if (r == TK_CC_RANGE || and_start != 0) + CC_ESC_WARN(env, (UChar* )"-"); + + goto val_entry; + } + else if (state == CCS_RANGE) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [!--x] is allowed */ + } + else { /* CCS_COMPLETE */ + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ + } + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + break; + + case TK_CC_CC_OPEN: /* [ */ + { + Node *anode; + CClassNode* acc; + + r = parse_char_class(&anode, tok, &p, end, env); + if (r == 0) { + acc = NCCLASS(anode); + r = or_cclass(cc, acc, env); + } + onig_node_free(anode); + if (r != 0) goto err; + } + break; + + case TK_CC_AND: /* && */ + { + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + /* initialize local variables */ + and_start = 1; + state = CCS_START; + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + } + else { + prev_cc = cc; + cc = &work_cc; + } + initialize_cclass(cc); + } + break; + + case TK_EOT: + r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; + goto err; + break; + default: + r = ONIGERR_PARSER_BUG; + goto err; + break; + } + + if (fetched) + r = tok->type; + else { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + } + } + + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + cc = prev_cc; + } + + if (neg != 0) + NCCLASS_SET_NOT(cc); + else + NCCLASS_CLEAR_NOT(cc); + if (IS_NCCLASS_NOT(cc) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { + int is_empty; + + is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); + if (is_empty != 0) + BITSET_IS_EMPTY(cc->bs, is_empty); + + if (is_empty == 0) { +#define NEWLINE_CODE 0x0a + + if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { + if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) + BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE); + else + add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); + } + } + } + *src = p; + return 0; + + err: + if (cc != NCCLASS(*np)) + bbuf_free(cc->mbuf); + return r; +} + +static int parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env); + +static int +parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, num; + Node *target; + OnigOptionType option; + OnigCodePoint c; + OnigEncoding enc = env->enc; + +#ifdef USE_NAMED_GROUP + int list_capture; +#endif + + UChar* p = *src; + PFETCH_READY; + + *np = NULL; + if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + + option = env->option; + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + PFETCH(c); + switch (c) { + case ':': /* (?:...) grouping only */ + group: + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(np, tok, term, &p, end, env); + if (r < 0) return r; + *src = p; + return 1; /* group */ + break; + + case '=': + *np = onig_node_new_anchor(ANCHOR_PREC_READ); + break; + case '!': /* preceding read */ + *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); + break; + case '>': /* (?>...) stop backtrack */ + *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + break; + +#ifdef USE_NAMED_GROUP + case '\'': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + goto named_group1; + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#endif + + case '<': /* look behind (?<=...), (?syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + UChar *name; + UChar *name_end; + + PUNFETCH; + c = '<'; + + named_group1: + list_capture = 0; + + named_group2: + name = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); + if (r < 0) return r; + + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); + if (r != 0) return r; + *np = node_new_enclose_memory(env->option, 1); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } +#else + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } +#endif + break; + + case '@': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { +#ifdef USE_NAMED_GROUP + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + PFETCH(c); + if (c == '<' || c == '\'') { + list_capture = 1; + goto named_group2; /* (?@...) */ + } + PUNFETCH; + } +#endif + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); + num = scan_env_add_mem_entry(env); + if (num < 0) { + onig_node_free(*np); + return num; + } + else if (num >= (int )BIT_STATUS_BITS_NUM) { + onig_node_free(*np); + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + } + NENCLOSE(*np)->regnum = num; + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + break; + + case '-': case 'i': case 'm': case 's': case 'x': + { + int neg = 0; + + while (1) { + switch (c) { + case ':': + case ')': + break; + + case '-': neg = 1; break; + case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; + case 's': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'm': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + } + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + + if (c == ')') { + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + *src = p; + return 2; /* option only */ + } + else if (c == ':') { + OnigOptionType prev = env->option; + + env->option = option; + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + env->option = prev; + if (r < 0) return r; + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->target = target; + *src = p; + return 0; + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + } + } + break; + + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } + else { + if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) + goto group; + + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + NENCLOSE(*np)->regnum = num; + } + + CHECK_NULL_RETURN_MEMERR(*np); + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + if (r < 0) { + onig_node_free(target); + return r; + } + + if (NTYPE(*np) == NT_ANCHOR) + NANCHOR(*np)->target = target; + else { + NENCLOSE(*np)->target = target; + if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { + /* Don't move this to previous of parse_subexp() */ + r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); + if (r != 0) return r; + } + } + + *src = p; + return 0; +} + +static const char* const PopularQStr[] = { + "?", "*", "+", "??", "*?", "+?" +}; + +static const char* const ReduceQStr[] = { + "", "", "*", "*?", "??", "+ and ??", "+? and ?" +}; + +static int +set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) +{ + QtfrNode* qn; + + qn = NQTFR(qnode); + if (qn->lower == 1 && qn->upper == 1) { + return 1; + } + + switch (NTYPE(target)) { + case NT_STR: + if (! group) { + StrNode* sn = NSTR(target); + if (str_node_can_be_split(sn, env->enc)) { + Node* n = str_node_split_last_char(sn, env->enc); + if (IS_NOT_NULL(n)) { + qn->target = n; + return 2; + } + } + } + break; + + case NT_QTFR: + { /* check redundant double repeat. */ + /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ + QtfrNode* qnt = NQTFR(target); + int nestq_num = popular_quantifier_num(qn); + int targetq_num = popular_quantifier_num(qnt); + +#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR + if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { + UChar buf[WARN_BUFSIZE]; + + switch(ReduceTypeTable[targetq_num][nestq_num]) { + case RQ_ASIS: + break; + + case RQ_DEL: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"redundant nested repeat operator"); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + + default: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"nested repeat operator %s and %s was replaced with '%s'", + PopularQStr[targetq_num], PopularQStr[nestq_num], + ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + } + } + + warn_exit: +#endif + if (targetq_num >= 0) { + if (nestq_num >= 0) { + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } + else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + } + } + } + } + break; + + default: + break; + } + + qn->target = target; + q_exit: + return 0; +} + + +#ifdef USE_SHARED_CCLASS_TABLE + +#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 + +/* for ctype node hash table */ + +typedef struct { + OnigEncoding enc; + int not; + int type; +} type_cclass_key; + +static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) +{ + if (x->type != y->type) return 1; + if (x->enc != y->enc) return 1; + if (x->not != y->not) return 1; + return 0; +} + +static st_index_t type_cclass_hash(type_cclass_key* key) +{ + int i, val; + UChar *p; + + val = 0; + + p = (UChar* )&(key->enc); + for (i = 0; i < (int )sizeof(key->enc); i++) { + val = val * 997 + (int )*p++; + } + + p = (UChar* )(&key->type); + for (i = 0; i < (int )sizeof(key->type); i++) { + val = val * 997 + (int )*p++; + } + + val += key->not; + return val + (val >> 5); +} + +static const struct st_hash_type type_type_cclass_hash = { + type_cclass_cmp, + type_cclass_hash, +}; + +static st_table* OnigTypeCClassTable; + + +static int +i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) +{ + if (IS_NOT_NULL(node)) { + CClassNode* cc = NCCLASS(node); + if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); + xfree(node); + } + + if (IS_NOT_NULL(key)) xfree(key); + return ST_DELETE; +} + +extern int +onig_free_shared_cclass_table(void) +{ + THREAD_ATOMIC_START; + if (IS_NOT_NULL(OnigTypeCClassTable)) { + onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + onig_st_free_table(OnigTypeCClassTable); + OnigTypeCClassTable = NULL; + } + THREAD_ATOMIC_END; + + return 0; +} + +#endif /* USE_SHARED_CCLASS_TABLE */ + + +#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +static int +clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) +{ + BBuf *tbuf; + int r; + + if (IS_NCCLASS_NOT(cc)) { + bitset_invert(cc->bs); + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + r = not_code_range_buf(enc, cc->mbuf, &tbuf); + if (r != 0) return r; + + bbuf_free(cc->mbuf); + cc->mbuf = tbuf; + } + + NCCLASS_CLEAR_NOT(cc); + } + + return 0; +} +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + +typedef struct { + ScanEnv* env; + CClassNode* cc; + Node* alt_root; + Node** ptail; +} IApplyCaseFoldArg; + +static int +i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], + int to_len, void* arg) +{ + IApplyCaseFoldArg* iarg; + ScanEnv* env; + CClassNode* cc; + BitSetRef bs; + + iarg = (IApplyCaseFoldArg* )arg; + env = iarg->env; + cc = iarg->cc; + bs = cc->bs; + + if (to_len == 1) { + int is_in = onig_is_code_in_cc(env->enc, from, cc); +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || + (is_in == 0 && IS_NCCLASS_NOT(cc))) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + add_code_range0(&(cc->mbuf), env, *to, *to, 0); + } + else { + BITSET_SET_BIT(bs, *to); + } + } +#else + if (is_in != 0) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); + add_code_range0(&(cc->mbuf), env, *to, *to, 0); + } + else { + if (IS_NCCLASS_NOT(cc)) { + BITSET_CLEAR_BIT(bs, *to); + } + else + BITSET_SET_BIT(bs, *to); + } + } +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + } + else { + int r, i, len; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + Node *snode = NULL_NODE; + + if (onig_is_code_in_cc(env->enc, from, cc) +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + && !IS_NCCLASS_NOT(cc) +#endif + ) { + for (i = 0; i < to_len; i++) { + len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); + if (i == 0) { + snode = onig_node_new_str(buf, buf + len); + CHECK_NULL_RETURN_MEMERR(snode); + + /* char-class expanded multi-char only + compare with string folded at match time. */ + NSTRING_SET_AMBIG(snode); + } + else { + r = onig_node_str_cat(snode, buf, buf + len); + if (r < 0) { + onig_node_free(snode); + return r; + } + } + } + + *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); + CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); + iarg->ptail = &(NCDR((*(iarg->ptail)))); + } + } + + return 0; +} + +static int +parse_exp(Node** np, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r, len, group = 0; + Node* qn; + Node** targetp; + + *np = NULL; + if (tok->type == (enum TokenSyms )term) + goto end_of_token; + + switch (tok->type) { + case TK_ALT: + case TK_EOT: + end_of_token: + *np = node_new_empty(); + return tok->type; + + case TK_SUBEXP_OPEN: + r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); + if (r < 0) return r; + if (r == 1) group = 1; + else if (r == 2) { /* option only */ + Node* target; + OnigOptionType prev = env->option; + + env->option = NENCLOSE(*np)->option; + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, src, end, env); + env->option = prev; + if (r < 0) { + onig_node_free(target); + return r; + } + NENCLOSE(*np)->target = target; + return tok->type; + } + break; + + case TK_SUBEXP_CLOSE: + if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) + return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; + + if (tok->escaped) goto tk_raw_byte; + else goto tk_byte; + break; + + case TK_STRING: + tk_byte: + { + *np = node_new_str(tok->backp, *src); + CHECK_NULL_RETURN_MEMERR(*np); + + while (1) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_STRING) break; + + r = onig_node_str_cat(*np, tok->backp, *src); + if (r < 0) return r; + } + + string_end: + targetp = np; + goto repeat; + } + break; + + case TK_RAW_BYTE: + tk_raw_byte: + { + *np = node_new_str_raw_char((UChar )tok->u.c); + CHECK_NULL_RETURN_MEMERR(*np); + len = 1; + while (1) { + if (len >= ONIGENC_MBC_MINLEN(env->enc)) { + if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { + r = fetch_token(tok, src, end, env); + NSTRING_CLEAR_RAW(*np); + goto string_end; + } + } + + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_RAW_BYTE) { + /* Don't use this, it is wrong for little endian encodings. */ + return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + } + + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; + + len++; + } + } + break; + + case TK_CODE_POINT: + { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); + if (num < 0) return num; +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + *np = node_new_str_raw(buf, buf + num); +#else + *np = node_new_str(buf, buf + num); +#endif + CHECK_NULL_RETURN_MEMERR(*np); + } + break; + + case TK_QUOTE_OPEN: + { + OnigCodePoint end_op[2]; + UChar *qstart, *qend, *nextp; + + end_op[0] = (OnigCodePoint )MC_ESC(env->syntax); + end_op[1] = (OnigCodePoint )'E'; + qstart = *src; + qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); + if (IS_NULL(qend)) { + nextp = qend = end; + } + *np = node_new_str(qstart, qend); + CHECK_NULL_RETURN_MEMERR(*np); + *src = nextp; + } + break; + + case TK_CHAR_TYPE: + { + switch (tok->u.prop.ctype) { + case ONIGENC_CTYPE_D: + case ONIGENC_CTYPE_S: + case ONIGENC_CTYPE_W: + { + CClassNode* cc; + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); + } + break; + + case ONIGENC_CTYPE_WORD: + *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); + CHECK_NULL_RETURN_MEMERR(*np); + break; + + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_XDIGIT: + { + CClassNode* cc; + +#ifdef USE_SHARED_CCLASS_TABLE + const OnigCodePoint *mbr; + OnigCodePoint sb_out; + + r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, + &sb_out, &mbr); + if (r == 0 && + ONIGENC_CODE_RANGE_NUM(mbr) + >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { + type_cclass_key key; + type_cclass_key* new_key; + + key.enc = env->enc; + key.not = tok->u.prop.not; + key.type = tok->u.prop.ctype; + + THREAD_ATOMIC_START; + + if (IS_NULL(OnigTypeCClassTable)) { + OnigTypeCClassTable + = onig_st_init_table_with_size(&type_type_cclass_hash, 10); + if (IS_NULL(OnigTypeCClassTable)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + } + else { + if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, + (st_data_t* )np)) { + THREAD_ATOMIC_END; + break; + } + } + + *np = node_new_cclass_by_codepoint_range(tok->u.prop.not, + sb_out, mbr); + if (IS_NULL(*np)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + + cc = NCCLASS(*np); + NCCLASS_SET_SHARE(cc); + new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + xmemcpy(new_key, &key, sizeof(type_cclass_key)); + onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, + (st_data_t )*np); + + THREAD_ATOMIC_END; + } + else { +#endif + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); +#ifdef USE_SHARED_CCLASS_TABLE + } +#endif + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + } + break; + + case TK_CHAR_PROPERTY: + r = parse_char_property(np, tok, src, end, env); + if (r != 0) return r; + break; + + case TK_CC_OPEN: + { + CClassNode* cc; + + r = parse_char_class(np, tok, src, end, env); + if (r != 0) return r; + + cc = NCCLASS(*np); + if (IS_IGNORECASE(env->option)) { + IApplyCaseFoldArg iarg; + + iarg.env = env; + iarg.cc = cc; + iarg.alt_root = NULL_NODE; + iarg.ptail = &(iarg.alt_root); + + r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, + i_apply_case_fold, &iarg); + if (r != 0) { + onig_node_free(iarg.alt_root); + return r; + } + if (IS_NOT_NULL(iarg.alt_root)) { + Node* work = onig_node_new_alt(*np, iarg.alt_root); + if (IS_NULL(work)) { + onig_node_free(iarg.alt_root); + return ONIGERR_MEMORY; + } + *np = work; + } + } + } + break; + + case TK_ANYCHAR: + *np = node_new_anychar(); + CHECK_NULL_RETURN_MEMERR(*np); + break; + + case TK_ANYCHAR_ANYTIME: + *np = node_new_anychar(); + CHECK_NULL_RETURN_MEMERR(*np); + qn = node_new_quantifier(0, REPEAT_INFINITE, 0); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->target = *np; + *np = qn; + break; + + case TK_BACKREF: + len = tok->u.backref.num; + *np = node_new_backref(len, + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_WITH_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); + CHECK_NULL_RETURN_MEMERR(*np); + break; + +#ifdef USE_SUBEXP_CALL + case TK_CALL: + { + int gnum = tok->u.call.gnum; + + if (gnum < 0) { + gnum = BACKREF_REL_TO_ABS(gnum, env); + if (gnum <= 0) + return ONIGERR_INVALID_BACKREF; + } + *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); + CHECK_NULL_RETURN_MEMERR(*np); + env->num_call++; + } + break; +#endif + + case TK_ANCHOR: + *np = onig_node_new_anchor(tok->u.anchor); + break; + + case TK_OP_REPEAT: + case TK_INTERVAL: + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; + else + *np = node_new_empty(); + } + else { + goto tk_byte; + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + { + targetp = np; + + re_entry: + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + + repeat: + if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + if (is_invalid_quantifier_target(*targetp)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->greedy = tok->u.repeat.greedy; + r = set_quantifier(qn, *targetp, group, env); + if (r < 0) { + onig_node_free(qn); + return r; + } + + if (tok->u.repeat.possessive != 0) { + Node* en; + en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + if (IS_NULL(en)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + NENCLOSE(en)->target = qn; + qn = en; + } + + if (r == 0) { + *targetp = qn; + } + else if (r == 1) { + onig_node_free(qn); + } + else if (r == 2) { /* split case: /abc+/ */ + Node *tmp; + + *targetp = node_new_list(*targetp, NULL); + if (IS_NULL(*targetp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + tmp = NCDR(*targetp) = node_new_list(qn, NULL); + if (IS_NULL(tmp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + targetp = &(NCAR(tmp)); + } + goto re_entry; + } + } + + return r; +} + +static int +parse_branch(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (r == TK_EOT || r == term || r == TK_ALT) { + *top = node; + } + else { + *top = node_new_list(node, NULL); + headp = &(NCDR(*top)); + while (r != TK_EOT && r != term && r != TK_ALT) { + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (NTYPE(node) == NT_LIST) { + *headp = node; + while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); + headp = &(NCDR(node)); + } + else { + *headp = node_new_list(node, NULL); + headp = &(NCDR(*headp)); + } + } + } + + return r; +} + +/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ +static int +parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (r == term) { + *top = node; + } + else if (r == TK_ALT) { + *top = onig_node_new_alt(node, NULL); + headp = &(NCDR(*top)); + while (r == TK_ALT) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + *headp = onig_node_new_alt(node, NULL); + headp = &(NCDR(*headp)); + } + + if (tok->type != (enum TokenSyms )term) + goto err; + } + else { + onig_node_free(node); + err: + if (term == TK_SUBEXP_CLOSE) + return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + else + return ONIGERR_PARSER_BUG; + } + + return r; +} + +static int +parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigToken tok; + + r = fetch_token(&tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(top, &tok, TK_EOT, src, end, env); + if (r < 0) return r; + return 0; +} + +extern int +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, + regex_t* reg, ScanEnv* env) +{ + int r; + UChar* p; + +#ifdef USE_NAMED_GROUP + names_clear(reg); +#endif + + scan_env_clear(env); + env->option = reg->options; + env->case_fold_flag = reg->case_fold_flag; + env->enc = reg->enc; + env->syntax = reg->syntax; + env->pattern = (UChar* )pattern; + env->pattern_end = (UChar* )end; + env->reg = reg; + + *root = NULL; + p = (UChar* )pattern; + r = parse_regexp(root, &p, (UChar* )end, env); + reg->num_mem = env->num_mem; + return r; +} + +extern void +onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, + UChar* arg, UChar* arg_end) +{ + env->error = arg; + env->error_end = arg_end; +} +#endif //INCLUDE_REGEXP diff --git a/src/regparse.h b/src/regparse.h new file mode 100644 index 0000000000..ac8758bd14 --- /dev/null +++ b/src/regparse.h @@ -0,0 +1,354 @@ +#ifndef ONIGURUMA_REGPARSE_H +#define ONIGURUMA_REGPARSE_H +/********************************************************************** + regparse.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +/* node type */ +#define NT_STR 0 +#define NT_CCLASS 1 +#define NT_CTYPE 2 +#define NT_CANY 3 +#define NT_BREF 4 +#define NT_QTFR 5 +#define NT_ENCLOSE 6 +#define NT_ANCHOR 7 +#define NT_LIST 8 +#define NT_ALT 9 +#define NT_CALL 10 + +/* node type bit */ +#define NTYPE2BIT(type) (1<<(type)) + +#define BIT_NT_STR NTYPE2BIT(NT_STR) +#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) +#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) +#define BIT_NT_CANY NTYPE2BIT(NT_CANY) +#define BIT_NT_BREF NTYPE2BIT(NT_BREF) +#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) +#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) +#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) +#define BIT_NT_LIST NTYPE2BIT(NT_LIST) +#define BIT_NT_ALT NTYPE2BIT(NT_ALT) +#define BIT_NT_CALL NTYPE2BIT(NT_CALL) + +#define IS_NODE_TYPE_SIMPLE(type) \ + ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ + BIT_NT_CANY | BIT_NT_BREF)) != 0) + +#define NTYPE(node) ((node)->u.base.type) +#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) + +#define NSTR(node) (&((node)->u.str)) +#define NCCLASS(node) (&((node)->u.cclass)) +#define NCTYPE(node) (&((node)->u.ctype)) +#define NBREF(node) (&((node)->u.bref)) +#define NQTFR(node) (&((node)->u.qtfr)) +#define NENCLOSE(node) (&((node)->u.enclose)) +#define NANCHOR(node) (&((node)->u.anchor)) +#define NCONS(node) (&((node)->u.cons)) +#define NCALL(node) (&((node)->u.call)) + +#define NCAR(node) (NCONS(node)->car) +#define NCDR(node) (NCONS(node)->cdr) + + + +#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) +#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) + +#define ENCLOSE_MEMORY (1<<0) +#define ENCLOSE_OPTION (1<<1) +#define ENCLOSE_STOP_BACKTRACK (1<<2) + +#define NODE_STR_MARGIN 16 +#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_BACKREFS_SIZE 6 + +#define NSTR_RAW (1<<0) /* by backslashed number */ +#define NSTR_AMBIG (1<<1) +#define NSTR_DONT_GET_OPT_INFO (1<<2) + +#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) +#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW +#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW +#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG +#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ + (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO +#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) +#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) +#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ + (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) + +#define BACKREFS_P(br) \ + (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); + +#define NQ_TARGET_ISNOT_EMPTY 0 +#define NQ_TARGET_IS_EMPTY 1 +#define NQ_TARGET_IS_EMPTY_MEM 2 +#define NQ_TARGET_IS_EMPTY_REC 3 + +/* status bits */ +#define NST_MIN_FIXED (1<<0) +#define NST_MAX_FIXED (1<<1) +#define NST_CLEN_FIXED (1<<2) +#define NST_MARK1 (1<<3) +#define NST_MARK2 (1<<4) +#define NST_MEM_BACKREFED (1<<5) +#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) +#define NST_RECURSION (1<<7) +#define NST_CALLED (1<<8) +#define NST_ADDR_FIXED (1<<9) +#define NST_NAMED_GROUP (1<<10) +#define NST_NAME_REF (1<<11) +#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_NEST_LEVEL (1<<13) +#define NST_BY_NUMBER (1<<14) /* {n,m} */ + +#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) +#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) + +#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) +#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) +#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) +#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) +#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) +#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) +#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) +#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) +#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ + (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) +#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) + +#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION +#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) +#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) +#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) + +#define CALLNODE_REFNUM_UNDEF -1 + +typedef struct { + NodeBase base; + UChar* s; + UChar* end; + unsigned int flag; + int capa; /* (allocated size - 1) or 0: use buf[] */ + UChar buf[NODE_STR_BUF_SIZE]; +} StrNode; + +typedef struct { + NodeBase base; + int state; + struct _Node* target; + int lower; + int upper; + int greedy; + int target_empty_info; + struct _Node* head_exact; + struct _Node* next_head_exact; + int is_refered; /* include called node. don't eliminate even if {0} */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ +#endif +} QtfrNode; + +typedef struct { + NodeBase base; + int state; + int type; + int regnum; + OnigOptionType option; + struct _Node* target; + AbsAddrType call_addr; + /* for multiple call reference */ + OnigDistance min_len; /* min length (byte) */ + OnigDistance max_len; /* max length (byte) */ + int char_len; /* character length */ + int opt_count; /* referenced count in optimize_node_left() */ +} EncloseNode; + +#ifdef USE_SUBEXP_CALL + +typedef struct { + int offset; + struct _Node* target; +} UnsetAddr; + +typedef struct { + int num; + int alloc; + UnsetAddr* us; +} UnsetAddrList; + +typedef struct { + NodeBase base; + int state; + int group_num; + UChar* name; + UChar* name_end; + struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ + UnsetAddrList* unset_addr_list; +} CallNode; + +#endif + +typedef struct { + NodeBase base; + int state; + int back_num; + int back_static[NODE_BACKREFS_SIZE]; + int* back_dynamic; + int nest_level; +} BRefNode; + +typedef struct { + NodeBase base; + int type; + struct _Node* target; + int char_len; +} AnchorNode; + +typedef struct { + NodeBase base; + struct _Node* car; + struct _Node* cdr; +} ConsAltNode; + +typedef struct { + NodeBase base; + int ctype; + int not; +} CtypeNode; + +typedef struct _Node { + union { + NodeBase base; + StrNode str; + CClassNode cclass; + QtfrNode qtfr; + EncloseNode enclose; + BRefNode bref; + AnchorNode anchor; + ConsAltNode cons; + CtypeNode ctype; +#ifdef USE_SUBEXP_CALL + CallNode call; +#endif + } u; +} Node; + + +#define NULL_NODE ((Node* )0) + +#define SCANENV_MEMNODES_SIZE 8 +#define SCANENV_MEM_NODES(senv) \ + (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ + (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) + +typedef struct { + OnigOptionType option; + OnigCaseFoldType case_fold_flag; + OnigEncoding enc; + const OnigSyntaxType* syntax; + BitStatusType capture_history; + BitStatusType bt_mem_start; + BitStatusType bt_mem_end; + BitStatusType backrefed_mem; + UChar* pattern; + UChar* pattern_end; + UChar* error; + UChar* error_end; + regex_t* reg; /* for reg->names only */ + int num_call; +#ifdef USE_SUBEXP_CALL + UnsetAddrList* unset_addr_list; +#endif + int num_mem; +#ifdef USE_NAMED_GROUP + int num_named; +#endif + int mem_alloc; + Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; + Node** mem_nodes_dynamic; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int num_comb_exp_check; + int comb_exp_max_regnum; + int curr_max_regnum; + int has_recursion; +#endif + int warnings_flag; + const char* sourcefile; + int sourceline; +} ScanEnv; + + +#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) +#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) +#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) + +#ifdef USE_NAMED_GROUP +typedef struct { + int new_val; +} GroupNumRemap; + +extern int onig_renumber_name_table(regex_t* reg, GroupNumRemap* map); +#endif + +extern int onig_strncmp(const UChar* s1, const UChar* s2, int n); +extern void onig_strcpy(UChar* dest, const UChar* src, const UChar* end); +extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode, UChar* arg, UChar* arg_end); +extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc); +extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode); +extern void onig_node_conv_to_str_node(Node* node, int raw); +extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end); +extern int onig_node_str_set(Node* node, const UChar* s, const UChar* end); +extern void onig_node_free(Node* node); +extern Node* onig_node_new_enclose(int type); +extern Node* onig_node_new_anchor(int type); +extern Node* onig_node_new_str(const UChar* s, const UChar* end); +extern Node* onig_node_new_list(Node* left, Node* right); +extern Node* onig_node_list_add(Node* list, Node* x); +extern Node* onig_node_new_alt(Node* left, Node* right); +extern void onig_node_str_clear(Node* node); +extern int onig_free_node_list(void); +extern int onig_names_free(regex_t* reg); +extern int onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env); +extern int onig_free_shared_cclass_table(void); + +#ifdef ONIG_DEBUG +#ifdef USE_NAMED_GROUP +extern int onig_print_names(FILE*, regex_t*); +#endif +#endif + +#endif /* ONIGURUMA_REGPARSE_H */ diff --git a/src/ritehash.h b/src/ritehash.h new file mode 100644 index 0000000000..48feb9656f --- /dev/null +++ b/src/ritehash.h @@ -0,0 +1,203 @@ +/* + * Rite Hash + * + * + */ +#include +#include +#include + +typedef uint32_t khint_t; +typedef khint_t khiter_t; + +#define INITIAL_HASH_SIZE 32 +#define UPPER_BOUND(x) ((x)>>2|(x>>1)) + +//extern uint8_t __m[]; + +/* mask for flags */ +static uint8_t __m[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}; + + +#define __ac_isempty(e_flag, d_flag, i) (e_flag[(i)/8]&__m[(i)%8]) +#define __ac_isdel(e_flag, d_flag, i) (d_flag[(i)/8]&__m[(i)%8]) +#define __ac_iseither(e_flag, d_flag, i) (__ac_isempty(e_flag,d_flag,i)||__ac_isdel(e_flag,d_flag,i)) + + +/* struct kh_xxx + + name: ash name + khkey_t: key data type + khval_t: value data type + kh_is_map: (not implemented / not used in RiteVM ) + __hash_func: hash function + __hash_equal: hash comparation function +*/ +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + typedef struct kh_##name { \ + khint_t n_buckets; \ + khint_t size; \ + khint_t n_occupied; \ + khint_t upper_bound; \ + uint8_t *e_flags; \ + uint8_t *d_flags; \ + khkey_t *keys; \ + khval_t *vals; \ + khint_t mask; \ + khint_t inc; \ + mrb_state *mrb; \ + } kh_##name##_t; \ + static void kh_alloc_##name(kh_##name##_t *h) \ + { \ + khint_t sz = h->n_buckets; \ + h->size = h->n_occupied = 0; \ + h->upper_bound = UPPER_BOUND(sz); \ + h->e_flags = (uint8_t *)mrb_malloc(h->mrb, sizeof(uint8_t)*sz/4); \ + h->d_flags = h->e_flags + sz/8; \ + memset(h->e_flags, 0xff, sz/8*sizeof(uint8_t)); \ + memset(h->d_flags, 0x00, sz/8*sizeof(uint8_t)); \ + h->keys = (khkey_t *)mrb_malloc(h->mrb, sizeof(khkey_t)*sz); \ + h->vals = (khval_t *)mrb_malloc(h->mrb, sizeof(khval_t)*sz); \ + h->mask = sz-1; \ + h->inc = sz/2-1; \ + } \ + static inline kh_##name##_t *kh_init_##name(mrb_state *mrb){ \ + kh_##name##_t *h = (kh_##name##_t*)mrb_calloc(mrb, 1, sizeof(kh_##name##_t)); \ + h->n_buckets = INITIAL_HASH_SIZE; \ + h->mrb = mrb; \ + kh_alloc_##name(h); \ + return h; \ + } \ + static inline void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if( h ){ \ + mrb_free(h->mrb, h->keys); \ + mrb_free(h->mrb, h->vals); \ + mrb_free(h->mrb, h->e_flags); \ + mrb_free(h->mrb, h); \ + } \ + } \ + static inline void kh_clear_##name(kh_##name##_t *h) \ + { \ + if( h && h->e_flags ){ \ + memset(h->e_flags, 0xff, h->n_buckets/8*sizeof(uint8_t)); \ + memset(h->d_flags, 0x00, h->n_buckets/8*sizeof(uint8_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + static inline khint_t kh_get_##name(kh_##name##_t *h, khkey_t key) \ + { \ + khint_t k = __hash_func(h->mrb,key) & (h->mask); \ + while( !__ac_isempty(h->e_flags, h->d_flags, k) ){ \ + if( !__ac_isdel(h->e_flags, h->d_flags, k) ){ \ + if( __hash_equal(h->mrb,h->keys[k], key) ) return k; \ + } \ + k = (k+h->inc) & (h->mask); \ + } \ + return h->n_buckets; \ + } \ + static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key); \ + static void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { \ + if( new_n_bucketse_flags; \ + uint8_t *old_d_flags = h->d_flags; \ + khkey_t *old_keys = h->keys; \ + khval_t *old_vals = h->vals; \ + khint_t old_n_buckets = h->n_buckets; \ + h->n_buckets = new_n_buckets; \ + kh_alloc_##name(h); \ + /* relocate */ \ + khint_t i; \ + for( i=0 ; in_occupied >= h->upper_bound ){ \ + kh_resize_##name(h, h->n_buckets*2); \ + } \ + k = __hash_func(h->mrb,key) & (h->mask); \ + while( !__ac_iseither(h->e_flags, h->d_flags, k) ){ \ + if( __hash_equal(h->mrb,h->keys[k], key) ) break; \ + k = (k+h->inc) & (h->mask); \ + } \ + if( __ac_isempty(h->e_flags, h->d_flags, k) ) { \ + /* put at empty */ \ + h->keys[k] = key; \ + h->e_flags[k/8] &= ~__m[k%8]; \ + h->size++; \ + h->n_occupied++; \ + } else if( __ac_isdel(h->e_flags, h->d_flags, k) ) { \ + /* put at del */ \ + h->keys[k] = key; \ + h->d_flags[k/8] &= ~__m[k%8]; \ + h->size++; \ + } \ + return k; \ + } \ + static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + h->d_flags[x/8] |= __m[x%8]; \ + h->size--; \ + } \ + static inline void kh_debug_##name(kh_##name##_t *h) \ + { \ + khint_t i; \ + printf("idx:e_flag:d_flag\n"); \ + for( i=0 ; in_buckets/8 ; i++ ){ \ + printf("%4d:%02X:%02X\n", i, h->e_flags[i], h->d_flags[i]); \ + } \ + } \ + +#define khash_t(name) kh_##name##_t + +#define kh_init(name,mrb) kh_init_##name(mrb) +#define kh_destroy(name, h) kh_destroy_##name(h) +#define kh_clear(name, h) kh_clear_##name(h) +#define kh_resize(name, h, s) kh_resize_##name(h, s) +#define kh_put(name, h, k, r) kh_put_##name(h, k) +#define kh_get(name, h, k) kh_get_##name(h, k) +#define kh_del(name, h, k) kh_del_##name(h, k) +#define kh_debug(name, h) kh_debug_##name(h) + +#define kh_exist(h, x) (!__ac_iseither((h)->e_flags, (h)->d_flags, (x))) +#define kh_key(h, x) ((h)->keys[x]) +#define kh_val(h, x) ((h)->vals[x]) +#define kh_value(h, x) ((h)->vals[x]) +#define kh_begin(h) (khint_t)(0) +#define kh_end(h) ((h)->n_buckets) +#define kh_size(h) ((h)->size) +#define kh_n_buckets(h) ((h)->n_buckets) + +//#define kh_int_hash_func(mrb,key) (uint32_t)(key) +#define kh_int_hash_func(mrb,key) (uint32_t)((key)^((key)<<2)^((key)>>2)) +#define kh_int_hash_equal(mrb,a, b) (a == b) +#define kh_int64_hash_func(mrb,key) (uint32_t)((key)>>33^(key)^(key)<<11) +#define kh_int64_hash_equal(mrb,a, b) (a == b) +static inline khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = *s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + return h; +} +#define kh_str_hash_func(mrb,key) __ac_X31_hash_string(key) +#define kh_str_hash_equal(mrb,a, b) (strcmp(a, b) == 0) + +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) +typedef const char *kh_cstr_t; +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + diff --git a/src/sprintf.c b/src/sprintf.c new file mode 100644 index 0000000000..06e6b33d74 --- /dev/null +++ b/src/sprintf.c @@ -0,0 +1,1112 @@ +/********************************************************************** + + sprintf.c - + + $Author: yugui $ + created at: Fri Oct 15 10:39:26 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "mruby.h" +#include +#include +#include "encoding.h" +#include "mruby/string.h" +#include "mruby/hash.h" +#include "mruby/numeric.h" +#include +#include +#include + +#ifdef HAVE_IEEEFP_H +#include +#endif + +#ifndef MRB_TAINT_P + #define MRB_TAINTED_P(p) FALSE +#endif + +#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */ +#define BITSPERDIG (sizeof(mrb_int)*CHAR_BIT) +#define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n))) + +static void fmt_setup(char*,size_t,int,int,int,int); + +static char* +remove_sign_bits(char *str, int base) +{ + char *s, *t; + + s = t = str; + + if (base == 16) { + while (*t == 'f') { + t++; + } + } + else if (base == 8) { + *t |= EXTENDSIGN(3, strlen(t)); + while (*t == '7') { + t++; + } + } + else if (base == 2) { + while (*t == '1') { + t++; + } + } + + return t; +} + +static char +sign_bits(int base, const char *p) +{ + char c = '.'; + + switch (base) { + case 16: + if (*p == 'X') c = 'F'; + else c = 'f'; + break; + case 8: + c = '7'; break; + case 2: + c = '1'; break; + } + return c; +} + +static mrb_value +mrb_fix2binstr(mrb_state *mrb, mrb_value x, int base) +{ + char buf[64], *b = buf + sizeof buf; + unsigned long val = mrb_fixnum(x); + char d = 0; + + if (base != 2) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid radix %d", base); + } + + if (val >= (1 << 10)) + val &= 0x3ff; + + if (val == 0) { + return mrb_str_new2(mrb, "0"); + } + *--b = '\0'; + do { + *--b = ruby_digitmap[(int)(val % base)]; + } while (val /= base); + + if (mrb_fixnum(x) < 0) { + b = remove_sign_bits(b, base); + switch (base) { + case 16: d = 'f'; break; + case 8: d = '7'; break; + case 2: d = '1'; break; + } + + if (d && *b != d) { + *--b = d; + } + } + + return mrb_str_new2(mrb, b); +} + +#define FNONE 0 +#define FSHARP 1 +#define FMINUS 2 +#define FPLUS 4 +#define FZERO 8 +#define FSPACE 16 +#define FWIDTH 32 +#define FPREC 64 +#define FPREC0 128 + +#define CHECK(l) do {\ +/* int cr = ENC_CODERANGE(result);*/\ + while (blen + (l) >= bsiz) {\ + bsiz*=2;\ + }\ + mrb_str_resize(mrb, result, bsiz);\ +/* ENC_CODERANGE_SET(result, cr);*/\ + buf = RSTRING_PTR(result);\ +} while (0) + +#define PUSH(s, l) do { \ + CHECK(l);\ + memcpy(&buf[blen], s, l);\ + blen += (l);\ +} while (0) + +#define FILL(c, l) do { \ + CHECK(l);\ + memset(&buf[blen], c, l);\ + blen += (l);\ +} while (0) + +#define GETARG() (!UNDEF_P(nextvalue) ? nextvalue : \ + posarg == -1 ? \ + (mrb_raise(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with numbered", nextarg), mrb_undef_value()) : \ + posarg == -2 ? \ + (mrb_raise(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with named", nextarg), mrb_undef_value()) : \ + (posarg = nextarg++, GETNTHARG(posarg))) + +#define GETPOSARG(n) (posarg > 0 ? \ + (mrb_raise(mrb, E_ARGUMENT_ERROR, "numbered(%d) after unnumbered(%d)", n, posarg), mrb_undef_value()) : \ + posarg == -2 ? \ + (mrb_raise(mrb, E_ARGUMENT_ERROR, "numbered(%d) after named", n), mrb_undef_value()) : \ + ((n < 1) ? \ + (mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid index - %d$", n), mrb_undef_value()) : \ + (posarg = -1, GETNTHARG(n)))) + +#define GETNTHARG(nth) \ + ((nth >= argc) ? (mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"), mrb_undef_value()) : argv[nth]) + +#define GETNAMEARG(id, name, len) ( \ + posarg > 0 ? \ + (mrb_raise(mrb, E_ARGUMENT_ERROR, "named%.*s after unnumbered(%d)", (len), (name), posarg), mrb_undef_value()) : \ + posarg == -1 ? \ + (mrb_raise(mrb, E_ARGUMENT_ERROR, "named%.*s after numbered", (len), (name)), mrb_undef_value()) : \ + (posarg = -2, mrb_hash_getWithDef(mrb, get_hash(mrb, &hash, argc, argv), id, mrb_undef_value()))) + +#define GETNUM(n, val) \ + for (; p < end && ISDIGIT(*p); p++) {\ + int next_n = 10 * n + (*p - '0'); \ + if (next_n / 10 != n) {\ + mrb_raise(mrb, E_ARGUMENT_ERROR, #val " too big"); \ + } \ + n = next_n; \ + } \ + if (p >= end) { \ + mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%*[0-9]"); \ + } + +#define GETASTER(num) do { \ + t = p++; \ + n = 0; \ + GETNUM(n, val); \ + if (*p == '$') { \ + tmp = GETPOSARG(n); \ + } \ + else { \ + tmp = GETARG(); \ + p = t; \ + } \ + num = mrb_fixnum(tmp); \ +} while (0) + +static mrb_value +get_hash(mrb_state *mrb, volatile mrb_value *hash, int argc, const mrb_value *argv) +{ + mrb_value tmp; + + if (!UNDEF_P(*hash)) return *hash; + if (argc != 2) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required"); + } + tmp = mrb_check_convert_type(mrb, argv[1], MRB_TT_HASH, "Hash", "to_hash"); + if (mrb_nil_p(tmp)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required"); + } + return (*hash = tmp); +} + +/* + * call-seq: + * format(format_string [, arguments...] ) -> string + * sprintf(format_string [, arguments...] ) -> string + * + * Returns the string resulting from applying format_string to + * any additional arguments. Within the format string, any characters + * other than format sequences are copied to the result. + * + * The syntax of a format sequence is follows. + * + * %[flags][width][.precision]type + * + * A format + * sequence consists of a percent sign, followed by optional flags, + * width, and precision indicators, then terminated with a field type + * character. The field type controls how the corresponding + * sprintf argument is to be interpreted, while the flags + * modify that interpretation. + * + * The field type characters are: + * + * Field | Integer Format + * ------+-------------------------------------------------------------- + * b | Convert argument as a binary number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with `..1'. + * B | Equivalent to `b', but uses an uppercase 0B for prefix + * | in the alternative format by #. + * d | Convert argument as a decimal number. + * i | Identical to `d'. + * o | Convert argument as an octal number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with `..7'. + * u | Identical to `d'. + * x | Convert argument as a hexadecimal number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with `..f' (representing an infinite string of + * | leading 'ff's). + * X | Equivalent to `x', but uses uppercase letters. + * + * Field | Float Format + * ------+-------------------------------------------------------------- + * e | Convert floating point argument into exponential notation + * | with one digit before the decimal point as [-]d.dddddde[+-]dd. + * | The precision specifies the number of digits after the decimal + * | point (defaulting to six). + * E | Equivalent to `e', but uses an uppercase E to indicate + * | the exponent. + * f | Convert floating point argument as [-]ddd.dddddd, + * | where the precision specifies the number of digits after + * | the decimal point. + * g | Convert a floating point number using exponential form + * | if the exponent is less than -4 or greater than or + * | equal to the precision, or in dd.dddd form otherwise. + * | The precision specifies the number of significant digits. + * G | Equivalent to `g', but use an uppercase `E' in exponent form. + * a | Convert floating point argument as [-]0xh.hhhhp[+-]dd, + * | which is consisted from optional sign, "0x", fraction part + * | as hexadecimal, "p", and exponential part as decimal. + * A | Equivalent to `a', but use uppercase `X' and `P'. + * + * Field | Other Format + * ------+-------------------------------------------------------------- + * c | Argument is the numeric code for a single character or + * | a single character string itself. + * p | The valuing of argument.inspect. + * s | Argument is a string to be substituted. If the format + * | sequence contains a precision, at most that many characters + * | will be copied. + * % | A percent sign itself will be displayed. No argument taken. + * + * The flags modifies the behavior of the formats. + * The flag characters are: + * + * Flag | Applies to | Meaning + * ---------+---------------+----------------------------------------- + * space | bBdiouxX | Leave a space at the start of + * | aAeEfgG | non-negative numbers. + * | (numeric fmt) | For `o', `x', `X', `b' and `B', use + * | | a minus sign with absolute value for + * | | negative values. + * ---------+---------------+----------------------------------------- + * (digit)$ | all | Specifies the absolute argument number + * | | for this field. Absolute and relative + * | | argument numbers cannot be mixed in a + * | | sprintf string. + * ---------+---------------+----------------------------------------- + * # | bBoxX | Use an alternative format. + * | aAeEfgG | For the conversions `o', increase the precision + * | | until the first digit will be `0' if + * | | it is not formatted as complements. + * | | For the conversions `x', `X', `b' and `B' + * | | on non-zero, prefix the result with ``0x'', + * | | ``0X'', ``0b'' and ``0B'', respectively. + * | | For `a', `A', `e', `E', `f', `g', and 'G', + * | | force a decimal point to be added, + * | | even if no digits follow. + * | | For `g' and 'G', do not remove trailing zeros. + * ---------+---------------+----------------------------------------- + * + | bBdiouxX | Add a leading plus sign to non-negative + * | aAeEfgG | numbers. + * | (numeric fmt) | For `o', `x', `X', `b' and `B', use + * | | a minus sign with absolute value for + * | | negative values. + * ---------+---------------+----------------------------------------- + * - | all | Left-justify the result of this conversion. + * ---------+---------------+----------------------------------------- + * 0 (zero) | bBdiouxX | Pad with zeros, not spaces. + * | aAeEfgG | For `o', `x', `X', `b' and `B', radix-1 + * | (numeric fmt) | is used for negative numbers formatted as + * | | complements. + * ---------+---------------+----------------------------------------- + * * | all | Use the next argument as the field width. + * | | If negative, left-justify the result. If the + * | | asterisk is followed by a number and a dollar + * | | sign, use the indicated argument as the width. + * + * Examples of flags: + * + * # `+' and space flag specifies the sign of non-negative numbers. + * sprintf("%d", 123) #=> "123" + * sprintf("%+d", 123) #=> "+123" + * sprintf("% d", 123) #=> " 123" + * + * # `#' flag for `o' increases number of digits to show `0'. + * # `+' and space flag changes format of negative numbers. + * sprintf("%o", 123) #=> "173" + * sprintf("%#o", 123) #=> "0173" + * sprintf("%+o", -123) #=> "-173" + * sprintf("%o", -123) #=> "..7605" + * sprintf("%#o", -123) #=> "..7605" + * + * # `#' flag for `x' add a prefix `0x' for non-zero numbers. + * # `+' and space flag disables complements for negative numbers. + * sprintf("%x", 123) #=> "7b" + * sprintf("%#x", 123) #=> "0x7b" + * sprintf("%+x", -123) #=> "-7b" + * sprintf("%x", -123) #=> "..f85" + * sprintf("%#x", -123) #=> "0x..f85" + * sprintf("%#x", 0) #=> "0" + * + * # `#' for `X' uses the prefix `0X'. + * sprintf("%X", 123) #=> "7B" + * sprintf("%#X", 123) #=> "0X7B" + * + * # `#' flag for `b' add a prefix `0b' for non-zero numbers. + * # `+' and space flag disables complements for negative numbers. + * sprintf("%b", 123) #=> "1111011" + * sprintf("%#b", 123) #=> "0b1111011" + * sprintf("%+b", -123) #=> "-1111011" + * sprintf("%b", -123) #=> "..10000101" + * sprintf("%#b", -123) #=> "0b..10000101" + * sprintf("%#b", 0) #=> "0" + * + * # `#' for `B' uses the prefix `0B'. + * sprintf("%B", 123) #=> "1111011" + * sprintf("%#B", 123) #=> "0B1111011" + * + * # `#' for `e' forces to show the decimal point. + * sprintf("%.0e", 1) #=> "1e+00" + * sprintf("%#.0e", 1) #=> "1.e+00" + * + * # `#' for `f' forces to show the decimal point. + * sprintf("%.0f", 1234) #=> "1234" + * sprintf("%#.0f", 1234) #=> "1234." + * + * # `#' for `g' forces to show the decimal point. + * # It also disables stripping lowest zeros. + * sprintf("%g", 123.4) #=> "123.4" + * sprintf("%#g", 123.4) #=> "123.400" + * sprintf("%g", 123456) #=> "123456" + * sprintf("%#g", 123456) #=> "123456." + * + * The field width is an optional integer, followed optionally by a + * period and a precision. The width specifies the minimum number of + * characters that will be written to the result for this field. + * + * Examples of width: + * + * # padding is done by spaces, width=20 + * # 0 or radix-1. <------------------> + * sprintf("%20d", 123) #=> " 123" + * sprintf("%+20d", 123) #=> " +123" + * sprintf("%020d", 123) #=> "00000000000000000123" + * sprintf("%+020d", 123) #=> "+0000000000000000123" + * sprintf("% 020d", 123) #=> " 0000000000000000123" + * sprintf("%-20d", 123) #=> "123 " + * sprintf("%-+20d", 123) #=> "+123 " + * sprintf("%- 20d", 123) #=> " 123 " + * sprintf("%020x", -123) #=> "..ffffffffffffffff85" + * + * For + * numeric fields, the precision controls the number of decimal places + * displayed. For string fields, the precision determines the maximum + * number of characters to be copied from the string. (Thus, the format + * sequence %10.10s will always contribute exactly ten + * characters to the result.) + * + * Examples of precisions: + * + * # precision for `d', 'o', 'x' and 'b' is + * # minimum number of digits <------> + * sprintf("%20.8d", 123) #=> " 00000123" + * sprintf("%20.8o", 123) #=> " 00000173" + * sprintf("%20.8x", 123) #=> " 0000007b" + * sprintf("%20.8b", 123) #=> " 01111011" + * sprintf("%20.8d", -123) #=> " -00000123" + * sprintf("%20.8o", -123) #=> " ..777605" + * sprintf("%20.8x", -123) #=> " ..ffff85" + * sprintf("%20.8b", -11) #=> " ..110101" + * + * # "0x" and "0b" for `#x' and `#b' is not counted for + * # precision but "0" for `#o' is counted. <------> + * sprintf("%#20.8d", 123) #=> " 00000123" + * sprintf("%#20.8o", 123) #=> " 00000173" + * sprintf("%#20.8x", 123) #=> " 0x0000007b" + * sprintf("%#20.8b", 123) #=> " 0b01111011" + * sprintf("%#20.8d", -123) #=> " -00000123" + * sprintf("%#20.8o", -123) #=> " ..777605" + * sprintf("%#20.8x", -123) #=> " 0x..ffff85" + * sprintf("%#20.8b", -11) #=> " 0b..110101" + * + * # precision for `e' is number of + * # digits after the decimal point <------> + * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" + * + * # precision for `f' is number of + * # digits after the decimal point <------> + * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" + * + * # precision for `g' is number of + * # significant digits <-------> + * sprintf("%20.8g", 1234.56789) #=> " 1234.5679" + * + * # <-------> + * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08" + * + * # precision for `s' is + * # maximum number of characters <------> + * sprintf("%20.8s", "string test") #=> " string t" + * + * Examples: + * + * sprintf("%d %04x", 123, 123) #=> "123 007b" + * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" + * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" + * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" + * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" + * sprintf("%u", -123) #=> "-123" + * + * For more complex formatting, Ruby supports a reference by name. + * %s style uses format style, but %{name} style doesn't. + * + * Exapmles: + * sprintf("%d : %f", { :foo => 1, :bar => 2 }) + * #=> 1 : 2.000000 + * sprintf("%{foo}f", { :foo => 1 }) + * # => "1f" + */ + +mrb_value +mrb_f_sprintf(mrb_state *mrb, mrb_value obj) +{ + int argc; + mrb_value *argv; + + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc <= 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"); + return mrb_nil_value(); + } else { + return mrb_str_format(mrb, argc - 1, argv + 1, argv[0]); + } +} + +mrb_value +mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt) +{ + const char *p, *end; + char *buf; + long blen, bsiz; + mrb_value result; + + int width, prec, flags = FNONE; + int nextarg = 1; + int posarg = 0; + int tainted = 0; + mrb_value nextvalue; + mrb_value tmp; + mrb_value str; + volatile mrb_value hash = mrb_undef_value(); + +#define CHECK_FOR_WIDTH(f) \ + if ((f) & FWIDTH) { \ + mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice"); \ + } \ + if ((f) & FPREC0) { \ + mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision"); \ + } +#define CHECK_FOR_FLAGS(f) \ + if ((f) & FWIDTH) { \ + mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width"); \ + } \ + if ((f) & FPREC0) { \ + mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision"); \ + } + + ++argc; + --argv; + if (MRB_TAINTED_P(fmt)) tainted = 1; + mrb_string_value(mrb, &fmt); + fmt = mrb_str_new4(mrb, fmt); + p = RSTRING_PTR(fmt); + end = p + RSTRING_LEN(fmt); + blen = 0; + bsiz = 120; + result = mrb_str_buf_new(mrb, bsiz); + buf = RSTRING_PTR(result); + memset(buf, 0, bsiz); + + for (; p < end; p++) { + const char *t; + int n; + mrb_sym id = 0; + + for (t = p; t < end && *t != '%'; t++) ; + PUSH(p, t - p); + if (t >= end) + goto sprint_exit; /* end of fmt string */ + + p = t + 1; /* skip `%' */ + + width = prec = -1; + nextvalue = mrb_undef_value(); + +retry: + switch (*p) { + default: + mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p); + break; + + case ' ': + CHECK_FOR_FLAGS(flags); + flags |= FSPACE; + p++; + goto retry; + + case '#': + CHECK_FOR_FLAGS(flags); + flags |= FSHARP; + p++; + goto retry; + + case '+': + CHECK_FOR_FLAGS(flags); + flags |= FPLUS; + p++; + goto retry; + + case '-': + CHECK_FOR_FLAGS(flags); + flags |= FMINUS; + p++; + goto retry; + + case '0': + CHECK_FOR_FLAGS(flags); + flags |= FZERO; + p++; + goto retry; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = 0; + GETNUM(n, width); + if (*p == '$') { + if (!UNDEF_P(nextvalue)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "value given twice - %d$", n); + } + nextvalue = GETPOSARG(n); + p++; + goto retry; + } + CHECK_FOR_WIDTH(flags); + width = n; + flags |= FWIDTH; + goto retry; + + case '<': + case '{': + { + const char *start = p; + char term = (*p == '<') ? '>' : '}'; + + for (; p < end && *p != term; ) + p++; + if (id) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "name%.*s after <%s>", + (int)(p - start + 1), start, mrb_sym2name(mrb, id)); + } + mrb_value symname = mrb_str_new(mrb, start + 1, p - start - 1); + id = mrb_intern(mrb, RSTRING_PTR(symname)); + nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1)); + if (UNDEF_P(nextvalue)) { + mrb_raise(mrb, E_KEY_ERROR, "key%.*s not found", (int)(p - start + 1), start); + } + if (term == '}') goto format_s; + p++; + goto retry; + } + + case '*': + CHECK_FOR_WIDTH(flags); + flags |= FWIDTH; + GETASTER(width); + if (width < 0) { + flags |= FMINUS; + width = -width; + } + p++; + goto retry; + + case '.': + if (flags & FPREC0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice"); + } + flags |= FPREC|FPREC0; + + prec = 0; + p++; + if (*p == '*') { + GETASTER(prec); + if (prec < 0) { /* ignore negative precision */ + flags &= ~FPREC; + } + p++; + goto retry; + } + + GETNUM(prec, precision); + goto retry; + + case '\n': + case '\0': + p--; + case '%': + if (flags != FNONE) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %%"); + } + PUSH("%", 1); + break; + + case 'c': + { + mrb_value val = GETARG(); + mrb_value tmp; + unsigned int c; + int n; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc = mrb_enc_get(mrb, fmt); +#endif //INCLUDE_ENCODING + + tmp = mrb_check_string_type(mrb, val); + if (!mrb_nil_p(tmp)) { + if (RSTRING_LEN(tmp) != 1 ) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character"); + } +#ifdef INCLUDE_ENCODING + c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); +#else + c = RSTRING_PTR(tmp)[0]; + n = 1; +#endif //INCLUDE_ENCODING + } + else { + c = mrb_fixnum(val); + n = mrb_enc_codelen(mrb, c, enc); + } + if (n <= 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); + } + if (!(flags & FWIDTH)) { + CHECK(n); + mrb_enc_mbcput(c, &buf[blen], enc); + blen += n; + } + else if ((flags & FMINUS)) { + CHECK(n); + mrb_enc_mbcput(c, &buf[blen], enc); + blen += n; + FILL(' ', width-1); + } + else { + FILL(' ', width-1); + CHECK(n); + mrb_enc_mbcput(c, &buf[blen], enc); + blen += n; + } + } + break; + + case 's': + case 'p': +format_s: + { + mrb_value arg = GETARG(); + long len, slen; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc = mrb_enc_get(mrb, fmt); +#endif //INCLUDE_ENCODING + + if (*p == 'p') arg = mrb_inspect(mrb, arg); + str = mrb_obj_as_string(mrb, arg); + if (MRB_TAINTED_P(str)) tainted = 1; + len = RSTRING_LEN(str); + mrb_str_set_len(mrb, result, blen); + if (flags&(FPREC|FWIDTH)) { + slen = RSTRING_LEN(str); + if (slen < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence"); + } + if ((flags&FPREC) && (prec < slen)) { +#ifdef INCLUDE_ENCODING + char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc); +#else + char *p = RSTRING_PTR(str) + prec; +#endif //INCLUDE_ENCODING + slen = prec; + len = p - RSTRING_PTR(str); + } + /* need to adjust multi-byte string pos */ + if ((flags&FWIDTH) && (width > slen)) { + width -= (int)slen; + if (!(flags&FMINUS)) { + CHECK(width); + while (width--) { + buf[blen++] = ' '; + } + } + CHECK(len); + memcpy(&buf[blen], RSTRING_PTR(str), len); + blen += len; + if (flags&FMINUS) { + CHECK(width); + while (width--) { + buf[blen++] = ' '; + } + } + mrb_enc_associate(mrb, result, enc); + break; + } + } + PUSH(RSTRING_PTR(str), len); + mrb_enc_associate(mrb, result, enc); + } + break; + + case 'd': + case 'i': + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + case 'u': + { + volatile mrb_value val = GETARG(); + char fbuf[32], nbuf[64], *s; + const char *prefix = 0; + int sign = 0, dots = 0; + char sc = 0; + long v = 0, org_v = 0; + int base; + int len, pos; + + switch (*p) { + case 'd': + case 'i': + case 'u': + sign = 1; break; + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + if (flags&(FPLUS|FSPACE)) sign = 1; + break; + } + if (flags & FSHARP) { + switch (*p) { + case 'o': prefix = "0"; break; + case 'x': prefix = "0x"; break; + case 'X': prefix = "0X"; break; + case 'b': prefix = "0b"; break; + case 'B': prefix = "0B"; break; + } + } + +bin_retry: + switch (mrb_type(val)) { + case MRB_TT_FLOAT: + if (FIXABLE(mrb_float(val))) { + val = mrb_fixnum_value((mrb_int)mrb_float(val)); + goto bin_retry; + } + val = mrb_dbl2big(mrb, mrb_float(val)); + if (FIXNUM_P(val)) goto bin_retry; + break; + case MRB_TT_STRING: + val = mrb_str_to_inum(mrb, val, 0, TRUE); + goto bin_retry; + case MRB_TT_FIXNUM: + v = (long)mrb_fixnum(val); + break; + default: + val = mrb_Integer(mrb, val); + goto bin_retry; + } + + switch (*p) { + case 'o': + base = 8; break; + case 'x': + case 'X': + base = 16; break; + case 'b': + case 'B': + base = 2; break; + case 'u': + case 'd': + case 'i': + default: + base = 10; break; + } + + if (base == 2) { + org_v = v; + if ( v < 0 && !sign ) { + val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base); + dots = 1; + } + else { + val = mrb_fix2str(mrb, mrb_fixnum_value(v), base); + } + v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/)); + } + if (sign) { + char c = *p; + if (c == 'i') c = 'd'; /* %d and %i are identical */ + if (base == 2) c = 'd'; + if (v < 0) { + v = -v; + sc = '-'; + width--; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + snprintf(fbuf, sizeof(fbuf), "%%l%c", c); + snprintf(nbuf, sizeof(nbuf), fbuf, v); + s = nbuf; + } + else { + char c = *p; + if (c == 'X') c = 'x'; + if (base == 2) c = 'd'; + s = nbuf; + if (v < 0) { + dots = 1; + } + snprintf(fbuf, sizeof(fbuf), "%%l%c", c); + snprintf(++s, sizeof(nbuf) - 1, fbuf, v); + if (v < 0) { + char d = 0; + + s = remove_sign_bits(s, base); + switch (base) { + case 16: d = 'f'; break; + case 8: d = '7'; break; + case 2: d = '1'; break; + } + + if (d && *s != d) { + *--s = d; + } + } + } + len = (int)strlen(s); + + pos = -1; + if (dots) { + prec -= 2; + width -= 2; + } + + if (*p == 'X') { + char *pp = s; + int c; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc = mrb_enc_get(mrb, fmt); +#endif //INCLUDE_ENCODING + while ((c = (int)(unsigned char)*pp) != 0) { +#ifdef INCLUDE_ENCODING + *pp = mrb_enc_toupper(c, enc); +#else + *pp = toupper(c); +#endif //INCLUDE_ENCODING + pp++; + } + } + if (prefix && !prefix[1]) { /* octal */ + if (dots) { + prefix = 0; + } + else if (len == 1 && *s == '0') { + len = 0; + if (flags & FPREC) prec--; + } + else if ((flags & FPREC) && (prec > len)) { + prefix = 0; + } + } + else if (len == 1 && *s == '0') { + prefix = 0; + } + if (prefix) { + width -= (int)strlen(prefix); + } + if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { + prec = width; + width = 0; + } + else { + if (prec < len) { + if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; + prec = len; + } + width -= prec; + } + if (!(flags&FMINUS)) { + CHECK(width); + while (width-- > 0) { + buf[blen++] = ' '; + } + } + if (sc) PUSH(&sc, 1); + if (prefix) { + int plen = (int)strlen(prefix); + PUSH(prefix, plen); + } + CHECK(prec - len); + if (dots) PUSH("..", 2); + if (v < 0 || (base == 2 && org_v < 0)) { + char c = sign_bits(base, p); + while (len < prec--) { + buf[blen++] = c; + } + } + else if ((flags & (FMINUS|FPREC)) != FMINUS) { + char c = '0'; + while (len < prec--) { + buf[blen++] = c; + } + } + PUSH(s, len); + CHECK(width); + while (width-- > 0) { + buf[blen++] = ' '; + } + } + break; + + case 'f': + case 'g': + case 'G': + case 'e': + case 'E': + case 'a': + case 'A': + { + mrb_value val = GETARG(); + double fval; + int i, need = 6; + char fbuf[32]; + + fval = mrb_float(mrb_Float(mrb, val)); + if (isnan(fval) || isinf(fval)) { + const char *expr; + + if (isnan(fval)) { + expr = "NaN"; + } + else { + expr = "Inf"; + } + need = (int)strlen(expr); + if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) + need++; + if ((flags & FWIDTH) && need < width) + need = width; + + CHECK(need + 1); + snprintf(&buf[blen], need + 1, "%*s", need, ""); + if (flags & FMINUS) { + if (!isnan(fval) && fval < 0.0) + buf[blen++] = '-'; + else if (flags & FPLUS) + buf[blen++] = '+'; + else if (flags & FSPACE) + blen++; + memcpy(&buf[blen], expr, strlen(expr)); + } + else { + if (!isnan(fval) && fval < 0.0) + buf[blen + need - strlen(expr) - 1] = '-'; + else if (flags & FPLUS) + buf[blen + need - strlen(expr) - 1] = '+'; + else if ((flags & FSPACE) && need > width) + blen++; + memcpy(&buf[blen + need - strlen(expr)], expr, + strlen(expr)); + } + blen += strlen(&buf[blen]); + break; + } + + fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); + need = 0; + if (*p != 'e' && *p != 'E') { + i = INT_MIN; + frexp(fval, &i); + if (i > 0) + need = BIT_DIGITS(i); + } + need += (flags&FPREC) ? prec : 6; + if ((flags&FWIDTH) && need < width) + need = width; + need += 20; + + CHECK(need); + snprintf(&buf[blen], need, fbuf, fval); + blen += strlen(&buf[blen]); + } + break; + } + flags = FNONE; + } + + sprint_exit: + /* XXX - We cannot validate the number of arguments if (digit)$ style used. + */ + if (posarg >= 0 && nextarg < argc) { + const char *mesg = "too many arguments for format string"; + if (RTEST(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, "%s", mesg); + if (RTEST(ruby_verbose)) mrb_warn("%s", mesg); + } + mrb_str_resize(mrb, result, blen); + + return result; +} + +static void +fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec) +{ + char *end = buf + size; + *buf++ = '%'; + if (flags & FSHARP) *buf++ = '#'; + if (flags & FPLUS) *buf++ = '+'; + if (flags & FMINUS) *buf++ = '-'; + if (flags & FZERO) *buf++ = '0'; + if (flags & FSPACE) *buf++ = ' '; + + if (flags & FWIDTH) { + snprintf(buf, end - buf, "%d", width); + buf += strlen(buf); + } + + if (flags & FPREC) { + snprintf(buf, end - buf, ".%d", prec); + buf += strlen(buf); + } + + *buf++ = c; + *buf = '\0'; +} diff --git a/src/st.c b/src/st.c new file mode 100644 index 0000000000..c4bbc702c6 --- /dev/null +++ b/src/st.c @@ -0,0 +1,1283 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ +#define NOT_RUBY +#ifdef NOT_RUBY +#include "regint.h" +#include "st.h" +#else +#include "ruby/ruby.h" +#endif + +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#include + +#define ST_DEFAULT_MAX_DENSITY 5 +#define ST_DEFAULT_INIT_TABLE_SIZE 11 + + /* + * DEFAULT_MAX_DENSITY is the default for the largest we allow the + * average number of items per bin before increasing the number of + * bins + * + * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins + * allocated initially + * + */ + +static const struct st_hash_type type_numhash = { + st_numcmp, + st_numhash, +}; + +/* extern int strcmp(const char *, const char *); */ +static st_index_t strhash(st_data_t); +static const struct st_hash_type type_strhash = { + strcmp, + strhash, +}; + +static st_index_t strcasehash(st_data_t); +static const struct st_hash_type type_strcasehash = { + st_strcasecmp, + strcasehash, +}; + +static void rehash(st_table *); + +#ifdef RUBY +#define malloc xmalloc +#define calloc xcalloc +#define free(x) xfree(x) +#endif + +#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) + +#define alloc(type) (type*)malloc((size_t)sizeof(type)) +#define Calloc(n,s) (char*)calloc((n),(s)) + +#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) + +/* remove cast to unsigned int in the future */ +#define do_hash(key,table) (unsigned int)(st_index_t)(*(table)->type->hash)((key)) +#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) + +/* + * MINSIZE is the minimum size of a dictionary. + */ + +#define MINSIZE 8 + +/* +Table of prime numbers 2^n+a, 2<=n<=30. +*/ +static const unsigned int primes[] = { + 8 + 3, + 16 + 3, + 32 + 5, + 64 + 3, + 128 + 3, + 256 + 27, + 512 + 9, + 1024 + 9, + 2048 + 5, + 4096 + 3, + 8192 + 27, + 16384 + 43, + 32768 + 3, + 65536 + 45, + 131072 + 29, + 262144 + 3, + 524288 + 21, + 1048576 + 7, + 2097152 + 17, + 4194304 + 15, + 8388608 + 9, + 16777216 + 43, + 33554432 + 35, + 67108864 + 15, + 134217728 + 29, + 268435456 + 3, + 536870912 + 11, + 1073741824 + 85, + 0 +}; + +static st_index_t +new_size(st_index_t size) +{ + int i; + + st_index_t newsize; + + for (i = 0, newsize = MINSIZE; i < numberof(primes); i++, newsize <<= 1) { + if (newsize > size) return primes[i]; + } + /* Ran out of polynomials */ +#ifndef NOT_RUBY + rb_raise(rb_eRuntimeError, "st_table too big"); +#endif + return -1; /* should raise exception */ +} + +#define MAX_PACKED_NUMHASH (ST_DEFAULT_INIT_TABLE_SIZE/2) + +st_table* +st_init_table_with_size(const struct st_hash_type *type, st_index_t size) +{ + st_table *tbl; + + size = new_size(size); /* round up to prime number */ + + tbl = alloc(st_table); + tbl->type = type; + tbl->num_entries = 0; + tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH; + tbl->num_bins = size; + tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); + tbl->head = 0; + tbl->tail = 0; + + return tbl; +} + +st_table* +st_init_table(const struct st_hash_type *type) +{ + return st_init_table_with_size(type, 0); +} + +st_table* +st_init_numtable(void) +{ + return st_init_table(&type_numhash); +} + +st_table* +st_init_numtable_with_size(st_index_t size) +{ + return st_init_table_with_size(&type_numhash, size); +} + +st_table* +st_init_strtable(void) +{ + return st_init_table(&type_strhash); +} + +st_table* +st_init_strtable_with_size(st_index_t size) +{ + return st_init_table_with_size(&type_strhash, size); +} + +st_table* +st_init_strcasetable(void) +{ + return st_init_table(&type_strcasehash); +} + +st_table* +st_init_strcasetable_with_size(st_index_t size) +{ + return st_init_table_with_size(&type_strcasehash, size); +} + +void +st_clear(st_table *table) +{ + register st_table_entry *ptr, *next; + st_index_t i; + + if (table->entries_packed) { + table->num_entries = 0; + return; + } + + for(i = 0; i < table->num_bins; i++) { + ptr = table->bins[i]; + table->bins[i] = 0; + while (ptr != 0) { + next = ptr->next; + free(ptr); + ptr = next; + } + } + table->num_entries = 0; + table->head = 0; + table->tail = 0; +} + +void +st_free_table(st_table *table) +{ + st_clear(table); + free(table->bins); + free(table); +} + +size_t +st_memsize(const st_table *table) +{ + if (table->entries_packed) { + return table->num_bins * sizeof (void *) + sizeof(st_table); + } + else { + return table->num_entries * sizeof(struct st_table_entry) + table->num_bins * sizeof (void *) + sizeof(st_table); + } +} + +#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) + +#define COLLISION +#define FOUND_ENTRY + +#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ + bin_pos = hash_val%(table)->num_bins;\ + ptr = (table)->bins[bin_pos];\ + FOUND_ENTRY;\ + if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ + COLLISION;\ + while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ + ptr = ptr->next;\ + }\ + ptr = ptr->next;\ + }\ +} while (0) + +#define collision_check 0 + +int +st_lookup(st_table *table, register st_data_t key, st_data_t *value) +{ + st_index_t hash_val, bin_pos; + register st_table_entry *ptr; + + if (table->entries_packed) { + st_index_t i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == key) { + if (value !=0) *value = (st_data_t)table->bins[i*2+1]; + return 1; + } + } + return 0; + } + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } + else { + if (value != 0) *value = ptr->record; + return 1; + } +} + +int +st_get_key(st_table *table, register st_data_t key, st_data_t *result) +{ + st_index_t hash_val, bin_pos; + register st_table_entry *ptr; + + if (table->entries_packed) { + st_index_t i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == key) { + if (result !=0) *result = (st_data_t)table->bins[i*2]; + return 1; + } + } + return 0; + } + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } + else { + if (result != 0) *result = ptr->key; + return 1; + } +} + +#undef collision_check +#define collision_check 1 + +#define MORE_PACKABLE_P(table) \ + ((st_index_t)((table)->num_entries+1) * 2 <= (table)->num_bins && \ + (table)->num_entries+1 <= MAX_PACKED_NUMHASH) + +#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ +do {\ + st_table_entry *entry;\ + if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {\ + rehash(table);\ + bin_pos = hash_val % table->num_bins;\ + }\ + \ + entry = alloc(st_table_entry);\ + \ + entry->hash = hash_val;\ + entry->key = key;\ + entry->record = value;\ + entry->next = table->bins[bin_pos];\ + if (table->head != 0) {\ + entry->fore = 0;\ + (entry->back = table->tail)->fore = entry;\ + table->tail = entry;\ + }\ + else {\ + table->head = table->tail = entry;\ + entry->fore = entry->back = 0;\ + }\ + table->bins[bin_pos] = entry;\ + table->num_entries++;\ +} while (0) + +static void +unpack_entries(register st_table *table) +{ + st_index_t i; + struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2]; + st_table tmp_table = *table; + + memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * table->num_entries*2); + table->bins = packed_bins; + tmp_table.entries_packed = 0; + tmp_table.num_entries = 0; + memset(tmp_table.bins, 0, sizeof(struct st_table_entry *) * tmp_table.num_bins); + for (i = 0; i < table->num_entries; i++) { + st_insert(&tmp_table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]); + } + *table = tmp_table; +} + +int +st_insert(register st_table *table, register st_data_t key, st_data_t value) +{ + st_index_t hash_val, bin_pos; + register st_table_entry *ptr; + + if (table->entries_packed) { + st_index_t i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == key) { + table->bins[i*2+1] = (struct st_table_entry*)value; + return 1; + } + } + if (MORE_PACKABLE_P(table)) { + i = table->num_entries++; + table->bins[i*2] = (struct st_table_entry*)key; + table->bins[i*2+1] = (struct st_table_entry*)value; + return 0; + } + else { + unpack_entries(table); + } + } + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + ADD_DIRECT(table, key, value, hash_val, bin_pos); + return 0; + } + else { + ptr->record = value; + return 1; + } +} + +int +st_insert2(register st_table *table, register st_data_t key, st_data_t value, + st_data_t (*func)(st_data_t)) +{ + st_index_t hash_val, bin_pos; + register st_table_entry *ptr; + + if (table->entries_packed) { + st_index_t i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == key) { + table->bins[i*2+1] = (struct st_table_entry*)value; + return 1; + } + } + if (MORE_PACKABLE_P(table)) { + i = table->num_entries++; + table->bins[i*2] = (struct st_table_entry*)key; + table->bins[i*2+1] = (struct st_table_entry*)value; + return 0; + } + else { + unpack_entries(table); + } + } + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + key = (*func)(key); + ADD_DIRECT(table, key, value, hash_val, bin_pos); + return 0; + } + else { + ptr->record = value; + return 1; + } +} + +void +st_add_direct(st_table *table, st_data_t key, st_data_t value) +{ + st_index_t hash_val, bin_pos; + + if (table->entries_packed) { + int i; + if (MORE_PACKABLE_P(table)) { + i = table->num_entries++; + table->bins[i*2] = (struct st_table_entry*)key; + table->bins[i*2+1] = (struct st_table_entry*)value; + return; + } + else { + unpack_entries(table); + } + } + + hash_val = do_hash(key, table); + bin_pos = hash_val % table->num_bins; + ADD_DIRECT(table, key, value, hash_val, bin_pos); +} + +static void +rehash(register st_table *table) +{ + register st_table_entry *ptr, **new_bins; + st_index_t i, new_num_bins, hash_val; + + new_num_bins = new_size(table->num_bins+1); + new_bins = (st_table_entry**) + xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*)); + for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0; + table->num_bins = new_num_bins; + table->bins = new_bins; + + if ((ptr = table->head) != 0) { + do { + hash_val = ptr->hash % new_num_bins; + ptr->next = new_bins[hash_val]; + new_bins[hash_val] = ptr; + } while ((ptr = ptr->fore) != 0); + } +} + +st_table* +st_copy(st_table *old_table) +{ + st_table *new_table; + st_table_entry *ptr, *entry, *prev, **tail; + st_index_t num_bins = old_table->num_bins; + st_index_t hash_val; + + new_table = alloc(st_table); + if (new_table == 0) { + return 0; + } + + *new_table = *old_table; + new_table->bins = (st_table_entry**) + Calloc((unsigned)num_bins, sizeof(st_table_entry*)); + + if (new_table->bins == 0) { + free(new_table); + return 0; + } + + if (old_table->entries_packed) { + memcpy(new_table->bins, old_table->bins, sizeof(struct st_table_entry *) * old_table->num_bins); + return new_table; + } + + if ((ptr = old_table->head) != 0) { + prev = 0; + tail = &new_table->head; + do { + entry = alloc(st_table_entry); + if (entry == 0) { + st_free_table(new_table); + return 0; + } + *entry = *ptr; + hash_val = entry->hash % num_bins; + entry->next = new_table->bins[hash_val]; + new_table->bins[hash_val] = entry; + entry->back = prev; + *tail = prev = entry; + tail = &entry->fore; + } while ((ptr = ptr->fore) != 0); + new_table->tail = prev; + } + + return new_table; +} + +#define REMOVE_ENTRY(table, ptr) do \ + { \ + if (ptr->fore == 0 && ptr->back == 0) { \ + table->head = 0; \ + table->tail = 0; \ + } \ + else { \ + st_table_entry *fore = ptr->fore, *back = ptr->back; \ + if (fore) fore->back = back; \ + if (back) back->fore = fore; \ + if (ptr == table->head) table->head = fore; \ + if (ptr == table->tail) table->tail = back; \ + } \ + table->num_entries--; \ + } while (0) + +int +st_delete(register st_table *table, register st_data_t *key, st_data_t *value) +{ + st_index_t hash_val; + st_table_entry **prev; + register st_table_entry *ptr; + + if (table->entries_packed) { + st_index_t i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == *key) { + if (value != 0) *value = (st_data_t)table->bins[i*2+1]; + table->num_entries--; + memmove(&table->bins[i*2], &table->bins[(i+1)*2], + sizeof(struct st_table_entry*) * 2*(table->num_entries-i)); + return 1; + } + } + if (value != 0) *value = 0; + return 0; + } + + hash_val = do_hash_bin(*key, table); + + for (prev = &table->bins[hash_val]; (ptr = *prev) != 0; prev = &ptr->next) { + if (EQUAL(table, *key, ptr->key)) { + *prev = ptr->next; + REMOVE_ENTRY(table, ptr); + if (value != 0) *value = ptr->record; + *key = ptr->key; + free(ptr); + return 1; + } + } + + if (value != 0) *value = 0; + return 0; +} + +int +st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *value, st_data_t never) +{ + st_index_t hash_val; + register st_table_entry *ptr; + + if (table->entries_packed) { + st_index_t i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == *key) { + if (value != 0) *value = (st_data_t)table->bins[i*2+1]; + table->bins[i*2] = (void *)never; + return 1; + } + } + if (value != 0) *value = 0; + return 0; + } + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + for (; ptr != 0; ptr = ptr->next) { + if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + REMOVE_ENTRY(table, ptr); + *key = ptr->key; + if (value != 0) *value = ptr->record; + ptr->key = ptr->record = never; + return 1; + } + } + + if (value != 0) *value = 0; + return 0; +} + +void +st_cleanup_safe(st_table *table, st_data_t never) +{ + st_table_entry *ptr, **last, *tmp; + st_index_t i; + + if (table->entries_packed) { + st_index_t i = 0, j = 0; + while ((st_data_t)table->bins[i*2] != never) { + if (i++ == table->num_entries) return; + } + for (j = i; ++i < table->num_entries;) { + if ((st_data_t)table->bins[i*2] == never) continue; + table->bins[j*2] = table->bins[i*2]; + table->bins[j*2+1] = table->bins[i*2+1]; + j++; + } + table->num_entries = j; + return; + } + + for (i = 0; i < table->num_bins; i++) { + ptr = *(last = &table->bins[i]); + while (ptr != 0) { + if (ptr->key == never) { + tmp = ptr; + *last = ptr = ptr->next; + free(tmp); + } + else { + ptr = *(last = &ptr->next); + } + } + } +} + +int +st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg) +{ + st_table_entry *ptr, **last, *tmp; + enum st_retval retval; + st_index_t i; + + if (table->entries_packed) { + for (i = 0; i < table->num_entries; i++) { + st_index_t j; + st_data_t key, val; + key = (st_data_t)table->bins[i*2]; + val = (st_data_t)table->bins[i*2+1]; + retval = (*func)(key, val, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + for (j = 0; j < table->num_entries; j++) { + if ((st_data_t)table->bins[j*2] == key) + break; + } + if (j == table->num_entries) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + /* fall through */ + case ST_CONTINUE: + break; + case ST_STOP: + return 0; + case ST_DELETE: + table->num_entries--; + memmove(&table->bins[i*2], &table->bins[(i+1)*2], + sizeof(struct st_table_entry*) * 2*(table->num_entries-i)); + i--; + break; + } + } + return 0; + } + + if ((ptr = table->head) != 0) { + do { + i = ptr->hash % table->num_bins; + retval = (*func)(ptr->key, ptr->record, (void*)arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) { + if (!tmp) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + } + /* fall through */ + case ST_CONTINUE: + ptr = ptr->fore; + break; + case ST_STOP: + return 0; + case ST_DELETE: + last = &table->bins[ptr->hash % table->num_bins]; + for (; (tmp = *last) != 0; last = &tmp->next) { + if (ptr == tmp) { + tmp = ptr->fore; + *last = ptr->next; + REMOVE_ENTRY(table, ptr); + free(ptr); + if (ptr == tmp) return 0; + ptr = tmp; + break; + } + } + } + } while (ptr && table->head); + } + return 0; +} + +typedef int st_foreach_func(mrb_sym, void*, void *); + +struct foreach_safe_arg { + st_table *tbl; + st_foreach_func *func; + void *arg; +}; + +static int +foreach_safe_i(mrb_state *mrb, mrb_sym key, void* value, struct foreach_safe_arg *arg) +{ + int status; + + if (key == 0xffffffff/*key == Qundef*/) return ST_CONTINUE; + status = (*arg->func)(key, value, arg->arg); + if (status == ST_CONTINUE) { + return ST_CHECK; + } + return status; +} + +void +st_foreach_safe(mrb_state *mrb, void *table, int (*func)(ANYARGS), void* a) +{ + struct foreach_safe_arg arg; + + arg.tbl = table; + arg.func = (st_foreach_func *)func; + arg.arg = a; + if (st_foreach(table, foreach_safe_i, (st_data_t)&arg)) { + mrb_raise(mrb, mrb->eRuntimeError_class, "hash modified during iteration"); + } +} + +int +st_foreachNew(mrb_state *mrb, st_table *table, int (*func)(ANYARGS), void* arg) +{ + st_table_entry *ptr, **last, *tmp; + enum st_retval retval; + st_index_t i; + + if (table->entries_packed) { + for (i = 0; i < table->num_entries; i++) { + st_index_t j; + st_data_t key, val; + key = (st_data_t)table->bins[i*2]; + val = (st_data_t)table->bins[i*2+1]; + retval = (*func)(mrb, key, val, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + for (j = 0; j < table->num_entries; j++) { + if ((st_data_t)table->bins[j*2] == key) + break; + } + if (j == table->num_entries) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + /* fall through */ + case ST_CONTINUE: + break; + case ST_STOP: + return 0; + case ST_DELETE: + table->num_entries--; + memmove(&table->bins[i*2], &table->bins[(i+1)*2], + sizeof(struct st_table_entry*) * 2*(table->num_entries-i)); + i--; + break; + } + } + return 0; + } + + if ((ptr = table->head) != 0) { + do { + i = ptr->hash % table->num_bins; + retval = (*func)(mrb, ptr->key, ptr->record, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) { + if (!tmp) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + } + /* fall through */ + case ST_CONTINUE: + ptr = ptr->fore; + break; + case ST_STOP: + return 0; + case ST_DELETE: + last = &table->bins[ptr->hash % table->num_bins]; + for (; (tmp = *last) != 0; last = &tmp->next) { + if (ptr == tmp) { + tmp = ptr->fore; + *last = ptr->next; + REMOVE_ENTRY(table, ptr); + free(ptr); + if (ptr == tmp) return 0; + ptr = tmp; + break; + } + } + } + } while (ptr && table->head); + } + return 0; +} + +/* + * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code + * + * @(#) $Hash32: Revision: 1.1 $ + * @(#) $Hash32: Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $ + * @(#) $Hash32: Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $ + * + *** + * + * Fowler/Noll/Vo hash + * + * The basis of this hash algorithm was taken from an idea sent + * as reviewer comments to the IEEE POSIX P1003.2 committee by: + * + * Phong Vo (http://www.research.att.com/info/kpv/) + * Glenn Fowler (http://www.research.att.com/~gsf/) + * + * In a subsequent ballot round: + * + * Landon Curt Noll (http://www.isthe.com/chongo/) + * + * improved on their algorithm. Some people tried this hash + * and found that it worked rather well. In an EMail message + * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. + * + * FNV hashes are designed to be fast while maintaining a low + * collision rate. The FNV speed allows one to quickly hash lots + * of data while maintaining a reasonable collision rate. See: + * + * http://www.isthe.com/chongo/tech/comp/fnv/index.html + * + * for more details as well as other forms of the FNV hash. + *** + * + * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the + * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str(). + * + *** + * + * Please do not copyright this code. This code is in the public domain. + * + * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO + * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + * + * By: + * chongo /\oo/\ + * http://www.isthe.com/chongo/ + * + * Share and Enjoy! :-) + */ + +/* + * 32 bit FNV-1 and FNV-1a non-zero initial basis + * + * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets: + * + * chongo /\../\ + * + * NOTE: The \'s above are not back-slashing escape characters. + * They are literal ASCII backslash 0x5c characters. + * + * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition. + */ +#define FNV1_32A_INIT 0x811c9dc5 + +/* + * 32 bit magic FNV-1a prime + */ +#define FNV_32_PRIME 0x01000193 + +#ifdef ST_USE_FNV1 +static st_index_t +strhash(st_data_t arg) +{ + register const char *string = (const char *)arg; + register st_index_t hval = FNV1_32A_INIT; + + /* + * FNV-1a hash each octet in the buffer + */ + while (*string) { + /* xor the bottom with the current octet */ + hval ^= (unsigned int)*string++; + + /* multiply by the 32 bit FNV magic prime mod 2^32 */ + hval *= FNV_32_PRIME; + } + return hval; +} +#else + +#ifndef UNALIGNED_WORD_ACCESS +# if defined __i386__ || defined _M_IX86 +# define UNALIGNED_WORD_ACCESS 1 +# endif +#endif +#ifndef UNALIGNED_WORD_ACCESS +# define UNALIGNED_WORD_ACCESS 0 +#endif + +/* MurmurHash described in http://murmurhash.googlepages.com/ */ +#ifndef MURMUR +#define MURMUR 2 +#endif + +#if MURMUR == 1 +#define MurmurMagic 0xc6a4a793 +#elif MURMUR == 2 +#if SIZEOF_ST_INDEX_T > 4 +#define MurmurMagic 0xc6a4a7935bd1e995 +#else +#define MurmurMagic 0x5bd1e995 +#endif +#endif + +static inline st_index_t +murmur(st_index_t h, st_index_t k, int r) +{ + const st_index_t m = MurmurMagic; +#if MURMUR == 1 + h += k; + h *= m; + h ^= h >> r; +#elif MURMUR == 2 + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; +#endif + return h; +} + +static inline st_index_t +murmur_finish(st_index_t h) +{ +#if MURMUR == 1 + h = murmur(h, 0, 10); + h = murmur(h, 0, 17); +#elif MURMUR == 2 + h ^= h >> 13; + h *= MurmurMagic; + h ^= h >> 15; +#endif + return h; +} + +#define murmur_step(h, k) murmur(h, k, 16) + +#if MURMUR == 1 +#define murmur1(h) murmur_step(h, 16) +#else +#define murmur1(h) murmur_step(h, 24) +#endif + +st_index_t +st_hash(const void *ptr, size_t len, st_index_t h) +{ + const char *data = ptr; + st_index_t t = 0; + + h += 0xdeadbeef; + +#define data_at(n) (st_index_t)((unsigned char)data[n]) +#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0) +#if SIZEOF_ST_INDEX_T > 4 +#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4 +#if SIZEOF_ST_INDEX_T > 8 +#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \ + UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8 +#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16 +#endif +#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8 +#else +#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4 +#endif + if (len >= sizeof(st_index_t)) { +#if !UNALIGNED_WORD_ACCESS + int align = (int)((st_data_t)data % sizeof(st_index_t)); + if (align) { + st_index_t d = 0; + int sl, sr, pack; + + switch (align) { +#ifdef WORDS_BIGENDIAN +# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ + t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2) +#else +# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ + t |= data_at(n) << CHAR_BIT*(n) +#endif + UNALIGNED_ADD_ALL; +#undef UNALIGNED_ADD + } + +#ifdef WORDS_BIGENDIAN + t >>= (CHAR_BIT * align) - CHAR_BIT; +#else + t <<= (CHAR_BIT * align); +#endif + + data += sizeof(st_index_t)-align; + len -= sizeof(st_index_t)-align; + + sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align); + sr = CHAR_BIT * align; + + while (len >= sizeof(st_index_t)) { + d = *(st_index_t *)data; +#ifdef WORDS_BIGENDIAN + t = (t << sr) | (d >> sl); +#else + t = (t >> sr) | (d << sl); +#endif + h = murmur_step(h, t); + t = d; + data += sizeof(st_index_t); + len -= sizeof(st_index_t); + } + + pack = len < (size_t)align ? (int)len : align; + d = 0; + switch (pack) { +#ifdef WORDS_BIGENDIAN +# define UNALIGNED_ADD(n) case (n) + 1: \ + d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) +#else +# define UNALIGNED_ADD(n) case (n) + 1: \ + d |= data_at(n) << CHAR_BIT*(n) +#endif + UNALIGNED_ADD_ALL; +#undef UNALIGNED_ADD + } +#ifdef WORDS_BIGENDIAN + t = (t << sr) | (d >> sl); +#else + t = (t >> sr) | (d << sl); +#endif + +#if MURMUR == 2 + if (len < (size_t)align) goto skip_tail; +#endif + h = murmur_step(h, t); + data += pack; + len -= pack; + } + else +#endif + { + do { + h = murmur_step(h, *(st_index_t *)data); + data += sizeof(st_index_t); + len -= sizeof(st_index_t); + } while (len >= sizeof(st_index_t)); + } + } + + t = 0; + switch (len) { +#ifdef WORDS_BIGENDIAN +# define UNALIGNED_ADD(n) case (n) + 1: \ + t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) +#else +# define UNALIGNED_ADD(n) case (n) + 1: \ + t |= data_at(n) << CHAR_BIT*(n) +#endif + UNALIGNED_ADD_ALL; +#undef UNALIGNED_ADD +#if MURMUR == 1 + h = murmur_step(h, t); +#elif MURMUR == 2 +# if !UNALIGNED_WORD_ACCESS + skip_tail: +# endif + h ^= t; + h *= MurmurMagic; +#endif + } + + return murmur_finish(h); +} + +st_index_t +st_hash_uint32(st_index_t h, uint32_t i) +{ + return murmur_step(h + i, 16); +} + +st_index_t +st_hash_uint(st_index_t h, st_index_t i) +{ + st_index_t v = 0; + h += i; +#ifdef WORDS_BIGENDIAN +#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8 + v = murmur1(v + (h >> 12*8)); +#endif +#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8 + v = murmur1(v + (h >> 8*8)); +#endif +#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8 + v = murmur1(v + (h >> 4*8)); +#endif +#endif + v = murmur1(v + h); +#ifndef WORDS_BIGENDIAN +#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8 + v = murmur1(v + (h >> 4*8)); +#endif +#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8 + v = murmur1(v + (h >> 8*8)); +#endif +#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8 + v = murmur1(v + (h >> 12*8)); +#endif +#endif + return v; +} + +st_index_t +st_hash_end(st_index_t h) +{ + h = murmur_step(h, 10); + h = murmur_step(h, 17); + return h; +} + +#undef st_hash_start +st_index_t +st_hash_start(st_index_t h) +{ + return h; +} + +static st_index_t +strhash(st_data_t arg) +{ + register const char *string = (const char *)arg; + return st_hash(string, strlen(string), FNV1_32A_INIT); +} +#endif + +int +st_strcasecmp(const char *s1, const char *s2) +{ + unsigned int c1, c2; + + while (1) { + c1 = (unsigned char)*s1++; + c2 = (unsigned char)*s2++; + if (c1 == '\0' || c2 == '\0') { + if (c1 != '\0') return 1; + if (c2 != '\0') return -1; + return 0; + } + if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A'; + if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A'; + if (c1 != c2) { + if (c1 > c2) + return 1; + else + return -1; + } + } +} + +int +st_strncasecmp(const char *s1, const char *s2, size_t n) +{ + unsigned int c1, c2; + + while (n--) { + c1 = (unsigned char)*s1++; + c2 = (unsigned char)*s2++; + if (c1 == '\0' || c2 == '\0') { + if (c1 != '\0') return 1; + if (c2 != '\0') return -1; + return 0; + } + if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A'; + if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A'; + if (c1 != c2) { + if (c1 > c2) + return 1; + else + return -1; + } + } + return 0; +} + +static st_index_t +strcasehash(st_data_t arg) +{ + register const char *string = (const char *)arg; + register st_index_t hval = FNV1_32A_INIT; + + /* + * FNV-1a hash each octet in the buffer + */ + while (*string) { + unsigned int c = (unsigned char)*string++; + if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A'; + hval ^= c; + + /* multiply by the 32 bit FNV magic prime mod 2^32 */ + hval *= FNV_32_PRIME; + } + return hval; +} + +int +st_numcmp(st_data_t x, st_data_t y) +{ + return x != y; +} + +st_index_t +st_numhash(st_data_t n) +{ + return (st_index_t)n; +} diff --git a/src/st.h b/src/st.h new file mode 100644 index 0000000000..7324e8da71 --- /dev/null +++ b/src/st.h @@ -0,0 +1,139 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* @(#) st.h 5.1 89/12/14 */ + +#ifndef RUBY_ST_H +#define RUBY_ST_H 1 + +#if defined(__cplusplus) +extern "C" { +#endif + +#ifndef RUBY_LIB_PREFIX + +#ifdef RUBY_EXTCONF_H +#include RUBY_EXTCONF_H +#endif +#endif + +#if defined STDC_HEADERS +#include +#elif defined HAVE_STDLIB_H +#include +#endif + +#ifdef HAVE_STDINT_H +# include +#endif +#include + +#ifndef CHAR_BIT +# ifdef HAVE_LIMITS_H +# include +# else +# define CHAR_BIT 8 +# endif +#endif + +#ifndef _ +# define _(args) args +#endif + +#ifndef ANYARGS +# ifdef __cplusplus +# define ANYARGS ... +# else +# define ANYARGS +# endif +#endif + +typedef uintptr_t st_data_t; +typedef struct st_table st_table; + +typedef st_data_t st_index_t; +typedef int st_compare_func(st_data_t, st_data_t); +typedef st_index_t st_hash_func(st_data_t); + +typedef struct st_table_entry st_table_entry; + +struct st_table_entry { + st_index_t hash; + st_data_t key; + st_data_t record; + st_table_entry *next; + st_table_entry *fore, *back; +}; + +#ifndef SIZEOF_VOIDP +#define SIZEOF_VOIDP 4 +#endif + +#define SIZEOF_ST_INDEX_T SIZEOF_VOIDP + +struct st_hash_type { + int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */ + st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */ +}; + +#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT) + +struct st_table { + const struct st_hash_type *type; + st_index_t num_bins; + unsigned int entries_packed : 1; +#ifdef __GNUC__ + __extension__ +#endif + st_index_t num_entries : ST_INDEX_BITS - 1; + struct st_table_entry **bins; + struct st_table_entry *head, *tail; +}; + +#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) + +enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; + +st_table *st_init_table(const struct st_hash_type *); +st_table *st_init_table_with_size(const struct st_hash_type *, st_index_t); +st_table *st_init_numtable(void); +st_table *st_init_numtable_with_size(st_index_t); +st_table *st_init_strtable(void); +st_table *st_init_strtable_with_size(st_index_t); +st_table *st_init_strcasetable(void); +st_table *st_init_strcasetable_with_size(st_index_t); +int st_delete(st_table *, st_data_t *, st_data_t *); /* returns 0:notfound 1:deleted */ +int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t); +int st_insert(st_table *, st_data_t, st_data_t); +int st_insert2(st_table *, st_data_t, st_data_t, st_data_t (*)(st_data_t)); +int st_lookup(st_table *, st_data_t, st_data_t *); +int st_get_key(st_table *, st_data_t, st_data_t *); +int st_foreach(st_table *, int (*)(ANYARGS), st_data_t); +int st_foreachNew(mrb_state *mrb, st_table *, int (*)(ANYARGS), void*); +int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t); +void st_add_direct(st_table *, st_data_t, st_data_t); +void st_free_table(st_table *); +void st_cleanup_safe(st_table *, st_data_t); +void st_clear(st_table *); +st_table *st_copy(st_table *); +int st_numcmp(st_data_t, st_data_t); +st_index_t st_numhash(st_data_t); +int st_strcasecmp(const char *s1, const char *s2); +int st_strncasecmp(const char *s1, const char *s2, size_t n); +size_t st_memsize(const st_table *); +st_index_t st_hash(const void *ptr, size_t len, st_index_t h); +st_index_t st_hash_uint32(st_index_t h, uint32_t i); +st_index_t st_hash_uint(st_index_t h, st_index_t i); +st_index_t st_hash_end(st_index_t h); +st_index_t st_hash_start(st_index_t h); +#define st_hash_start(h) ((st_index_t)(h)) + +int st_strcasecmp(const char *s1, const char *s2); +int st_strncasecmp(const char *s1, const char *s2, size_t n); +#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2)) +#define STRNCASECMP(s1, s2, n) (st_strncasecmp(s1, s2, n)) + +#if defined(__cplusplus) +} /* extern "C" { */ +#endif + +#endif /* RUBY_ST_H */ diff --git a/src/state.c b/src/state.c new file mode 100644 index 0000000000..14efed986c --- /dev/null +++ b/src/state.c @@ -0,0 +1,88 @@ +#include "mruby.h" +#include "irep.h" +#include + +void mrb_init_heap(mrb_state*); +void mrb_init_core(mrb_state*); +void mrb_init_ext(mrb_state*); + +mrb_state* +mrb_open_allocf(mrb_allocf f) +{ + mrb_state *mrb = (f)(NULL, NULL, sizeof(mrb_state)); + + memset(mrb, 0, sizeof(mrb_state)); + mrb->allocf = f; + mrb->current_white_part = MRB_GC_WHITE_A; + + mrb_init_heap(mrb); + mrb_init_core(mrb); + mrb_init_ext(mrb); + return mrb; +} + +static void* +allocf(mrb_state *mrb, void *p, size_t size) +{ + if (size == 0) { + free(p); + return NULL; + } + else { + return realloc(p, size); + } +} + +mrb_state* +mrb_open() +{ + mrb_state *mrb = mrb_open_allocf(allocf); + + return mrb; +} + +void +mrb_close(mrb_state *mrb) +{ + int i; + + /* free */ + mrb_free(mrb, mrb->stbase); + mrb_free(mrb, mrb->cibase); + for (i=0; iirep_len; i++) { + if (mrb->irep[i]->flags & MRB_IREP_NOFREE) continue; + if ((mrb->irep[i]->flags & MRB_ISEQ_NOFREE) == 0) { + mrb_free(mrb, mrb->irep[i]->iseq); + } + mrb_free(mrb, mrb->irep[i]->pool); + mrb_free(mrb, mrb->irep[i]->syms); + mrb_free(mrb, mrb->irep[i]); + } + mrb_free(mrb, mrb->irep); + mrb_free(mrb, mrb); +} + +void +mrb_add_irep(mrb_state *mrb, int idx) +{ + if (!mrb->irep) { + int max = 256; + + if (idx > max) max = idx+1; + mrb->irep = mrb_malloc(mrb, sizeof(mrb_irep*)*max); + mrb->irep_capa = max; + } + else if (mrb->irep_capa < idx) { + while (mrb->irep_capa < idx) { + mrb->irep_capa *= 2; + } + mrb->irep = mrb_realloc(mrb, mrb->irep, sizeof(mrb_irep)*mrb->irep_capa); + } +} + +mrb_value +mrb_top_self(mrb_state *mrb) +{ + // for now + return mrb_nil_value(); +} diff --git a/src/string.c b/src/string.c new file mode 100644 index 0000000000..da52172f7d --- /dev/null +++ b/src/string.c @@ -0,0 +1,5234 @@ +#include "mruby.h" + +#include +#include +#include "mruby/string.h" +#include "mruby/numeric.h" +#include "mruby/range.h" +#include +#include "mruby/array.h" +#include "mruby/class.h" +#include "variable.h" +#include "mruby/hash.h" +#include +#include "variable.h" +#include "re.h" +#ifdef INCLUDE_REGEXP +#include "regex.h" +#include "st.h" +#endif //INCLUDE_REGEXP + +#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + +#ifdef INCLUDE_REGEXP +static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote); +#endif //INCLUDE_REGEXP +#ifdef INCLUDE_ENCODING +static void mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src); +#else +#define mrb_enc_cr_str_copy_for_substr(mrb, dest, src) +#endif //INCLUDE_ENCODING +static mrb_value str_replace(mrb_state *mrb, mrb_value str, mrb_value str2); +#ifdef INCLUDE_ENCODING +static long str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc); +#endif //INCLUDE_ENCODING +int mrb_block_given_p(); +#ifdef INCLUDE_ENCODING +#define is_ascii_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) +#define is_broken_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN) +#define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str)) +#endif //INCLUDE_ENCODING + +void +mrb_str_set_len(mrb_state *mrb, mrb_value str, long len) +{ + mrb_str_modify(mrb, str); + RSTRING_LEN(str) = len; + RSTRING_PTR(str)[len] = '\0'; +} + +#define RESIZE_CAPA(str,capacity) do {\ + RSTRING(str)->buf = mrb_realloc(mrb, RSTRING(str)->buf, (capacity)+1);\ + if (!MRB_STR_NOCAPA_P(str))\ + RSTRING_CAPA(str) = capacity;\ +} while (0) + +#define STR_SET_LEN(str, n) do { \ + RSTRING(str)->len = (n);\ +} while (0) + +#define STR_DEC_LEN(str) do {\ + RSTRING(str)->len--;\ +} while (0) + +#ifdef INCLUDE_ENCODING +static mrb_value mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, + int ptr_encindex, int ptr_cr, int *ptr_cr_ret); +#endif //INCLUDE_ENCODING +mrb_value +mrb_tainted_str_new(mrb_state *mrb, const char *ptr, long len) +{ + mrb_value str = mrb_str_new(mrb, ptr, len); + + //OBJ_TAINT(str); + return str; +} + +#ifdef INCLUDE_ENCODING +mrb_value +mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr) +{ + mrb_value str = mrb_str_new_cstr(mrb, ptr);//mrb_str_new2(ptr); + ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} + +mrb_value +mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *eenc) +{ + mrb_value str; + + str = mrb_tainted_str_new(mrb, ptr, len); + if (eenc == mrb_usascii_encoding(mrb) && + mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) { + mrb_enc_associate(mrb, str, mrb_ascii8bit_encoding(mrb)); + return str; + } + mrb_enc_associate(mrb, str, eenc); + return mrb_str_conv_enc(mrb, str, eenc, mrb_default_internal_encoding(mrb)); +} + +mrb_value +mrb_locale_str_new(mrb_state *mrb, const char *ptr, long len) +{ + return mrb_external_str_new_with_enc(mrb, ptr, len, mrb_locale_encoding(mrb)); +} + +mrb_value +mrb_str_buf_cat_ascii(mrb_state *mrb, mrb_value str, const char *ptr) +{ + /* ptr must reference NUL terminated ASCII string. */ + int encindex = ENCODING_GET(mrb, str); + mrb_encoding *enc = mrb_enc_from_index(mrb, encindex); + if (mrb_enc_asciicompat(mrb, enc)) { + return mrb_enc_cr_str_buf_cat(mrb, str, ptr, strlen(ptr), + encindex, ENC_CODERANGE_7BIT, 0); + } + else { + //char *buf = ALLOCA_N(char, mrb_enc_mbmaxlen(enc)); + char *buf = mrb_malloc(mrb, mrb_enc_mbmaxlen(enc)); + while (*ptr) { + unsigned int c = (unsigned char)*ptr; + int len = mrb_enc_codelen(mrb, c, enc); + mrb_enc_mbcput(c, buf, enc); + mrb_enc_cr_str_buf_cat(mrb, str, buf, len, + encindex, ENC_CODERANGE_VALID, 0); + ptr++; + } + return str; + } +} + +mrb_value +mrb_filesystem_str_new_cstr(mrb_state *mrb, const char *ptr) +{ + return mrb_external_str_new_with_enc(mrb, ptr, strlen(ptr), mrb_filesystem_encoding(mrb)); +} +#endif //INCLUDE_ENCODING + +mrb_value +mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len) +{ + size_t slen; + + if (len < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); + } + + mrb_str_modify(mrb, str); + slen = RSTRING_LEN(str); + if (len != slen) { + if (slen < len || slen -len > 1024) { + RSTRING_PTR(str) = mrb_realloc(mrb, RSTRING_PTR(str), len+1); + } + if (!MRB_STR_NOCAPA_P(str)) { + RSTRING(str)->aux.capa = len; + } + RSTRING(str)->len = len; + RSTRING(str)->buf[len] = '\0'; /* sentinel */ + } + return str; +} + +#ifdef INCLUDE_ENCODING +mrb_value +mrb_usascii_str_new(mrb_state *mrb, const char *ptr, long len) +{ + mrb_value str = mrb_str_new(mrb, ptr, len); + ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} +#endif //INCLUDE_ENCODING + +static inline void +str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len) +{ + struct RString *s = mrb_str_ptr(str); + + if (s->buf != p || s->len != len) { + mrb_raise(mrb, mrb->eRuntimeError_class, "string modified"); + } +} + +#ifdef INCLUDE_ENCODING +static inline int +single_byte_optimizable(mrb_state *mrb, mrb_value str) +{ + mrb_encoding *enc; + /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */ + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) + return 1; + + enc = STR_ENC_GET(mrb, str); + if (mrb_enc_mbmaxlen(enc) == 1) + return 1; + + /* Conservative. Possibly single byte. + * "\xa1" in Shift_JIS for example. */ + return 0; +} + +static inline const char * +search_nonascii(const char *p, const char *e) +{ +#if SIZEOF_VALUE == 8 +# define NONASCII_MASK 0x8080808080808080ULL +#elif SIZEOF_VALUE == 4 +# define NONASCII_MASK 0x80808080UL +#endif +#ifdef NONASCII_MASK + if ((int)sizeof(intptr_t) * 2 < e - p) { + const intptr_t *s, *t; + const intptr_t lowbits = sizeof(intptr_t) - 1; + s = (const intptr_t*)(~lowbits & ((intptr_t)p + lowbits)); + while (p < (const char *)s) { + if (!ISASCII(*p)) + return p; + p++; + } + t = (const intptr_t*)(~lowbits & (intptr_t)e); + while (s < t) { + if (*s & (intptr_t)NONASCII_MASK) { + t = s; + break; + } + s++; + } + p = (const char *)t; + } +#endif + while (p < e) { + if (!ISASCII(*p)) + return p; + p++; + } + return NULL; +} +#endif //INCLUDE_ENCODING + +static inline void +str_modifiable(mrb_value str) +{ + ; +} + +static inline int +str_independent(mrb_value str) +{ + str_modifiable(str); + if (!MRB_STR_SHARED_P(str)) return 1; + return 0; +} + +#ifdef INCLUDE_ENCODING +static inline void +str_enc_copy(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + unsigned int tmp; + tmp = ENCODING_GET_INLINED(str2); + mrb_enc_set_index(mrb, str1, ENCODING_GET(mrb, str2)); +} + +static inline long +enc_strlen(const char *p, const char *e, mrb_encoding *enc, int cr) +{ + long c; + const char *q; + + if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { + return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc); + } + else if (mrb_enc_asciicompat(mrb, enc)) { + c = 0; + if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) { + while (p < e) { + if (ISASCII(*p)) { + q = search_nonascii(p, e); + if (!q) + return c + (e - p); + c += q - p; + p = q; + } + p += mrb_enc_fast_mbclen(p, e, enc); + c++; + } + } + else { + while (p < e) { + if (ISASCII(*p)) { + q = search_nonascii(p, e); + if (!q) + return c + (e - p); + c += q - p; + p = q; + } + p += mrb_enc_mbclen(p, e, enc); + c++; + } + } + return c; + } + + for (c=0; pstring_class); + //NEWOBJ(str, struct RString); + //OBJSETUP(str, klass, T_STRING); + + s->buf = 0; + s->len = 0; + s->aux.capa = 0; + + return mrb_obj_value(s); +} + +#ifdef INCLUDE_ENCODING +long +mrb_enc_strlen(const char *p, const char *e, mrb_encoding *enc) +{ + return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN); +} +#endif //INCLUDE_ENCODING + +static void +str_make_independent(mrb_state *mrb, mrb_value str) +{ + char *ptr; + long len = RSTRING_LEN(str); + + ptr = mrb_malloc(mrb, sizeof(char)*(len+1)); + if (RSTRING_PTR(str)) { + memcpy(ptr, RSTRING_PTR(str), len); + } + ptr[len] = 0; + RSTRING(str)->buf = ptr; + RSTRING(str)->len = len; + RSTRING(str)->aux.capa = len; + MRB_STR_UNSET_NOCAPA(str); +} + +#ifdef INCLUDE_ENCODING +static int +coderange_scan(const char *p, long len, mrb_encoding *enc) +{ + const char *e = p + len; + + if (mrb_enc_to_index(enc) == 0) { + /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + p = search_nonascii(p, e); + return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT; + } + + if (mrb_enc_asciicompat(mrb, enc)) { + p = search_nonascii(p, e); + if (!p) { + return ENC_CODERANGE_7BIT; + } + while (p < e) { + int ret = mrb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + return ENC_CODERANGE_BROKEN; + } + p += MBCLEN_CHARFOUND_LEN(ret); + if (p < e) { + p = search_nonascii(p, e); + if (!p) { + return ENC_CODERANGE_VALID; + } + } + } + if (e < p) { + return ENC_CODERANGE_BROKEN; + } + return ENC_CODERANGE_VALID; + } + + while (p < e) { + int ret = mrb_enc_precise_mbclen(p, e, enc); + + if (!MBCLEN_CHARFOUND_P(ret)) { + return ENC_CODERANGE_BROKEN; + } + p += MBCLEN_CHARFOUND_LEN(ret); + } + if (e < p) { + return ENC_CODERANGE_BROKEN; + } + return ENC_CODERANGE_VALID; +} + +int +mrb_enc_str_coderange(mrb_state *mrb, mrb_value str) +{ + int cr = ENC_CODERANGE(str); + + if (cr == ENC_CODERANGE_UNKNOWN) { + mrb_encoding *enc = STR_ENC_GET(mrb, str); + cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc); + ENC_CODERANGE_SET(str, cr); + } + return cr; +} + +char* +mrb_enc_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc) +{ + if (mrb_enc_mbmaxlen(enc) == 1) { + p += nth; + } + else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { + p += nth * mrb_enc_mbmaxlen(enc); + } + else if (mrb_enc_asciicompat(mrb, enc)) { + const char *p2, *e2; + int n; + + while (p < e && 0 < nth) { + e2 = p + nth; + if (e < e2) + return (char *)e; + if (ISASCII(*p)) { + p2 = search_nonascii(p, e2); + if (!p2) + return (char *)e2; + nth -= p2 - p; + p = p2; + } + n = mrb_enc_mbclen(p, e, enc); + p += n; + nth--; + } + if (nth != 0) + return (char *)e; + return (char *)p; + } + else { + while (p e) p = e; + return (char*)p; +} + +static char* +str_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte) +{ + if (singlebyte) + p += nth; + else { + p = mrb_enc_nth(mrb, p, e, nth, enc); + } + if (!p) return 0; + if (p > e) p = e; + return (char *)p; +} + +/* char offset to byte offset */ +static long +str_offset(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte) +{ + const char *pp = str_nth(mrb, p, e, nth, enc, singlebyte); + if (!pp) return e - p; + return pp - p; +} + +long +mrb_str_offset(mrb_state *mrb, mrb_value str, long pos) +{ + return str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos, + STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str)); +} + +static void +mrb_enc_cr_str_exact_copy(mrb_state *mrb, mrb_value dest, mrb_value src) +{ + str_enc_copy(mrb, dest, src); + ENC_CODERANGE_SET(dest, ENC_CODERANGE(src)); +} +#else +#define mrb_enc_cr_str_exact_copy(mrb, dest, src) +#endif //INCLUDE_ENCODING + +mrb_value +str_new4(mrb_state *mrb, enum mrb_vtype ttype, mrb_value str) +{ + mrb_value str2; + + str2 = mrb_obj_value(mrb_obj_alloc(mrb, ttype, mrb->string_class));//str_alloc(klass); + RSTRING(str2)->len = RSTRING_LEN(str); + RSTRING(str2)->buf = RSTRING_PTR(str); + + if (MRB_STR_SHARED_P(str)) { + mrb_value shared = RSTRING_SHARED(str); + FL_SET(str2, MRB_STR_SHARED); + RSTRING_SHARED(str2) = shared; + } + else { + FL_SET(str, MRB_STR_SHARED); + RSTRING_SHARED(str) = str2; + } + mrb_enc_cr_str_exact_copy(mrb, str2, str); + return str2; +} + +static mrb_value +str_new(mrb_state *mrb, enum mrb_vtype ttype, const char *p, size_t len) +{ + mrb_value str; + + if (len < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); + } + + //str = str_alloc(mrb); + str = mrb_str_buf_new(mrb, len); +#ifdef INCLUDE_ENCODING + if (len == 0) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + } +#endif //INCLUDE_ENCODING + if (p) { + memcpy(RSTRING_PTR(str), p, len); + } + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; + return str; +} + +mrb_value +mrb_str_new_with_class(mrb_state *mrb, mrb_value obj, const char *ptr, long len) +{ + return str_new(mrb, mrb_type(obj), ptr, len); +} + +#define mrb_str_new5 mrb_str_new_with_class + +static mrb_value +str_new_empty(mrb_state *mrb, mrb_value str) +{ + mrb_value v = mrb_str_new5(mrb, str, 0, 0); + return v; +} + +mrb_value +mrb_str_buf_new(mrb_state *mrb, size_t capa) +{ + struct RString *s; + + s = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class); + + if (capa < STR_BUF_MIN_SIZE) { + capa = STR_BUF_MIN_SIZE; + } + s->len = 0; + s->aux.capa = capa; + s->buf = mrb_malloc(mrb, capa+1); + s->buf[0] = '\0'; + + return mrb_obj_value(s); +} + +mrb_value +str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) +{ + long capa, total, off = -1; + + if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) { + off = ptr - RSTRING_PTR(str); + } + mrb_str_modify(mrb, str); + if (len == 0) return mrb_fixnum_value(0); + capa = RSTRING_CAPA(str); + if (RSTRING_LEN(str) >= LONG_MAX - len) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big"); + } + total = RSTRING_LEN(str)+len; + if (capa <= total) { + while (total > capa) { + if (capa + 1 >= LONG_MAX / 2) { + capa = (total + 4095) / 4096; + break; + } + capa = (capa + 1) * 2; + } + RESIZE_CAPA(str, capa); + } + if (off != -1) { + ptr = RSTRING_PTR(str) + off; + } + memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len); + STR_SET_LEN(str, total); + RSTRING_PTR(str)[total] = '\0'; /* sentinel */ + + return str; +} + +mrb_value +mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) +{ + if (len == 0) return str; + if (len < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); + } + return str_buf_cat(mrb, str, ptr, len); +} + +/* + * call-seq: + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of str. + */ + +mrb_value +mrb_str_new(mrb_state *mrb, const char *p, size_t len) +{ + struct RString *s; + + if (len == 0) { + return mrb_str_buf_new(mrb, len); + } + s = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class); + s->buf = mrb_malloc(mrb, len+1); + if (p) { + memcpy(s->buf, p, len); + } + s->len = len; + s->aux.capa = len; + s->buf[len] ='\0'; + return mrb_obj_value(s); +} + +/* ptr==0 is error */ +mrb_value +mrb_str_new2(mrb_state *mrb, const char *ptr) +{ + if (!ptr) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "NULL pointer given"); + } +#ifdef INCLUDE_ENCODING + return mrb_usascii_str_new2(mrb, ptr); +#else + return mrb_str_new(mrb, ptr, strlen(ptr)); +#endif //INCLUDE_ENCODING +} + +#ifdef INCLUDE_ENCODING +mrb_value +mrb_enc_str_new(mrb_state *mrb, const char *ptr, long len, mrb_encoding *enc) +{ + mrb_value str = mrb_str_new(mrb, ptr, len); + mrb_enc_associate(mrb, str, enc); + return str; +} +#endif //INCLUDE_ENCODING + +/* + * call-seq: (Caution! NULL string) + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of str. + */ + +mrb_value +mrb_str_new_cstr(mrb_state *mrb, const char *p) +{ + struct RString *s; + size_t len = strlen(p); + + s = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class); + s->buf = mrb_malloc(mrb, len+1); + memcpy(s->buf, p, len); + s->buf[len] = 0; + s->len = len; + s->aux.capa = len; + + return mrb_obj_value(s); +} + +/* + * call-seq: (Caution! string literal) + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of str. + */ + +mrb_value +mrb_str_literal(mrb_state *mrb, mrb_value lit) +{ + struct RString *s = mrb_str_ptr(lit); + + return mrb_str_new(mrb, s->buf, s->len); +} + +/* + * call-seq: + * char* str = String("abcd"), len=strlen("abcd") + * + * Returns a new string object containing a copy of str. + */ +const char* +mrb_str_body(mrb_value str, int *len_p) +{ + struct RString *s = mrb_str_ptr(str); + + *len_p = s->len; + return s->buf; +} + +/* + * call-seq: (Caution! String("abcd") change) + * String("abcdefg") = String("abcd") + String("efg") + * + * Returns a new string object containing a copy of str. + */ +void +mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) +{ + struct RString *s1 = mrb_str_ptr(self), *s2; + size_t len; + + if (mrb_type(other) != MRB_TT_STRING) { + other = mrb_str_to_str(mrb, other); + } + s2 = mrb_str_ptr(other); + len = s1->len + s2->len; + + if (s1->aux.capa < len) { + s1->aux.capa = len; + s1->buf = mrb_realloc(mrb, s1->buf, len+1); + } + memcpy(s1->buf+s1->len, s2->buf, s2->len); + s1->len = len; + s1->buf[len] = 0; +} + +/* + * call-seq: (Caution! String("abcd") remain) + * String("abcdefg") = String("abcd") + String("efg") + * + * Returns a new string object containing a copy of str. + */ +mrb_value +mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) +{ + struct RString *s = mrb_str_ptr(a); + struct RString *s2 = mrb_str_ptr(b); + struct RString *t; + mrb_value r; + + r = mrb_str_new(mrb, 0, s->len + s2->len); + t = mrb_str_ptr(r); + memcpy(t->buf, s->buf, s->len); + memcpy(t->buf + s->len, s2->buf, s2->len); + + return r; +} + +/* 15.2.10.5.2 */ + +/* + * call-seq: (Caution! String("abcd") remain) for stack_argument + * String("abcdefg") = String("abcd") + String("efg") + * + * Returns a new string object containing a copy of str. + */ +static mrb_value +mrb_str_plus_m(mrb_state *mrb, mrb_value self) +{ + mrb_value str3; + mrb_value str2; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; +#endif //INCLUDE_ENCODING + + //mrb_get_args(mrb, "s", &p, &len); + mrb_get_args(mrb, "o", &str2); + + mrb_string_value(mrb, &str2); +#ifdef INCLUDE_ENCODING + enc = mrb_enc_check(mrb, self, str2); +#endif //INCLUDE_ENCODING + str3 = mrb_str_new(mrb, 0, RSTRING_LEN(self)+RSTRING_LEN(str2)); + memcpy(RSTRING_PTR(str3), RSTRING_PTR(self), RSTRING_LEN(self)); + memcpy(RSTRING_PTR(str3) + RSTRING_LEN(self), + RSTRING_PTR(str2), RSTRING_LEN(str2)); + RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0'; +#ifdef INCLUDE_ENCODING + //if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2)) + // OBJ_TAINT(str3); + ENCODING_CODERANGE_SET(mrb, str3, mrb_enc_to_index(enc), + ENC_CODERANGE_AND(ENC_CODERANGE(self), ENC_CODERANGE(str2))); +#endif //INCLUDE_ENCODING + + return str3; +} + +/* + * call-seq: + * len = strlen(String("abcd")) + * + * Returns a new string object containing a copy of str. + */ +static mrb_value +mrb_str_bytesize(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + + return mrb_fixnum_value(s->len); +} + +/* 15.2.10.5.26 */ +/* 15.2.10.5.33 */ +/* + * call-seq: + * len = strlen(String("abcd")) + * + * Returns a new string object containing a copy of str. + */ +mrb_value +mrb_str_size(mrb_state *mrb, mrb_value self) +{ +#ifdef INCLUDE_ENCODING + long len; + + len = str_strlen(mrb, self, STR_ENC_GET(mrb, self)); + return mrb_fixnum_value(len); +#else + return mrb_str_bytesize(mrb, self); +#endif //INCLUDE_ENCODING +} + +void +mrb_str_modify(mrb_state *mrb, mrb_value str) +{ + if (!str_independent(str)) + str_make_independent(mrb, str); +} + + +/* 15.2.10.5.1 */ + +/* + * call-seq: + * str * integer => new_str + * + * Copy---Returns a new String containing integer copies of + * the receiver. + * + * "Ho! " * 3 #=> "Ho! Ho! Ho! " + */ +static mrb_value +mrb_str_times(mrb_state *mrb, mrb_value self) +{ + mrb_value str2; + mrb_int n,len,times; + char *ptr2; + + mrb_get_args(mrb, "i", ×); + + if (times < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); + } + if (times && INT32_MAX/times < RSTRING_LEN(self)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); + } + + str2 = mrb_str_new5(mrb, self, 0, len = RSTRING_LEN(self)*times); + ptr2 = RSTRING_PTR(str2); + if (len > 0) { + n = RSTRING_LEN(self); + memcpy(ptr2, RSTRING_PTR(self), n); + while (n <= len/2) { + memcpy(ptr2 + n, ptr2, n); + n *= 2; + } + memcpy(ptr2 + n, ptr2, len-n); + } + ptr2[RSTRING_LEN(str2)] = '\0'; + + //OBJ_INFECT(str2, str); + mrb_enc_cr_str_copy_for_substr(mrb, str2, self); + + return str2; +} +/* -------------------------------------------------------------- */ + +#define lesser(a,b) (((a)>(b))?(b):(a)) + +/* ---------------------------*/ +/* + * call-seq: + * mrb_value str1 <=> mrb_value str2 => int + * > 1 + * = 0 + * < -1 + */ +int +mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + mrb_int len; + mrb_int retval; + struct RString *s1 = mrb_str_ptr(str1); + struct RString *s2 = mrb_str_ptr(str2); + + len = lesser(s1->len, s2->len); + retval = memcmp(s1->buf, s2->buf, len); + if (retval == 0) { + if (s1->len == s2->len) return 0; + if (s1->len > s2->len) return 1; + return -1; + } + if (retval > 0) return 1; + return -1; +} + +/* 15.2.10.5.3 */ + +/* + * call-seq: + * str <=> other_str => -1, 0, +1 + * + * Comparison---Returns -1 if other_str is less than, 0 if + * other_str is equal to, and +1 if other_str is greater than + * str. If the strings are of different lengths, and the strings are + * equal when compared up to the shortest length, then the longer string is + * considered greater than the shorter one. If the variable $= is + * false, the comparison is based on comparing the binary values + * of each character in the string. In older versions of Ruby, setting + * $= allowed case-insensitive comparisons; this is now deprecated + * in favor of using String#casecmp. + * + * <=> is the basis for the methods <, + * <=, >, >=, and between?, + * included from module Comparable. The method + * String#== does not use Comparable#==. + * + * "abcdef" <=> "abcde" #=> 1 + * "abcdef" <=> "abcdef" #=> 0 + * "abcdef" <=> "abcdefg" #=> -1 + * "abcdef" <=> "ABCDEF" #=> 1 + */ +static mrb_value +mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) +{ + mrb_value str2; + mrb_int result; + + mrb_get_args(mrb, "o", &str2); + if (mrb_type(str2) != MRB_TT_STRING) { + if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_s"))) { + return mrb_nil_value(); + } + else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>"))) { + return mrb_nil_value(); + } + else + { + mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1); + + if (mrb_nil_p(tmp)) return mrb_nil_value(); + if (!mrb_fixnum(tmp)) { + return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp); + } + result = -mrb_fixnum(tmp); + } + } + else { + result = mrb_str_cmp(mrb, str1, str2); + } + return mrb_fixnum_value(result); +} + +#ifdef INCLUDE_ENCODING +int +mrb_str_comparable(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + int idx1, idx2; + int rc1, rc2; + + if (RSTRING_LEN(str1) == 0) return TRUE; + if (RSTRING_LEN(str2) == 0) return TRUE; + idx1 = ENCODING_GET(mrb, str1); + idx2 = ENCODING_GET(mrb, str2); + if (idx1 == idx2) return TRUE; + rc1 = mrb_enc_str_coderange(mrb, str1); + rc2 = mrb_enc_str_coderange(mrb, str2); + if (rc1 == ENC_CODERANGE_7BIT) { + if (rc2 == ENC_CODERANGE_7BIT) return TRUE; + if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx2))) + return TRUE; + } + if (rc2 == ENC_CODERANGE_7BIT) { + if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx1))) + return TRUE; + } + return FALSE; +} + +int +mrb_str_hash_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + long len; + + if (!mrb_str_comparable(mrb, str1, str2)) return 1; + if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) && + memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) { + return 0; + } + return 1; +} +#endif //INCLUDE_ENCODING + +static int +str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) +{ + const long len = RSTRING_LEN(str1); + + if (len != RSTRING_LEN(str2)) return FALSE; +#ifdef INCLUDE_ENCODING + if (!mrb_str_comparable(mrb, str1, str2)) return FALSE; +#endif //INCLUDE_ENCODING + if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) + return TRUE; + return FALSE; +} + +int +mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + if (mrb_obj_equal(mrb, str1, str2)) return TRUE; + if (mrb_type(str2) != MRB_TT_STRING) { + if (mrb_nil_p(str2)) return FALSE; + if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_str"))) { + return FALSE; + } + str2 = mrb_funcall(mrb, str2, "to_str", 0); + return mrb_equal(mrb, str2, str1); + } + return str_eql(mrb, str1, str2); +} + +/* 15.2.10.5.4 */ +/* + * call-seq: + * str == obj => true or false + * + * Equality--- + * If obj is not a String, returns false. + * Otherwise, returns false or true + * + * caution:if str <=> obj returns zero. + */ +static mrb_value +mrb_str_equal_m(mrb_state *mrb, mrb_value str1) +{ + mrb_value str2; + + mrb_get_args(mrb, "o", &str2); + if (mrb_str_equal(mrb, str1, str2)) + return mrb_true_value(); + return mrb_false_value(); +} +/* ---------------------------------- */ +mrb_value +mrb_str_to_str(mrb_state *mrb, mrb_value str) +{ + mrb_value s; + + if (mrb_type(str) != MRB_TT_STRING) { + s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); + if (mrb_nil_p(s)) { + s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); + } + return s; + } + return str; +} + +mrb_value +mrb_string_value(mrb_state *mrb, mrb_value *ptr) +{ + struct RString *ps; + mrb_value s = *ptr; + if (mrb_type(s) != MRB_TT_STRING) { + s = mrb_str_to_str(mrb, s); + *ptr = s; + } + ps = mrb_str_ptr(s); + return s; +} + +char * +mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr) +{ + mrb_value str = mrb_string_value(mrb, &ptr); + return RSTRING_PTR(str); +} +/* 15.2.10.5.5 */ + +/* + * call-seq: + * str =~ obj -> fixnum or nil + * + * Match---If obj is a Regexp, use it as a pattern to match + * against str,and returns the position the match starts, or + * nil if there is no match. Otherwise, invokes + * obj.=~, passing str as an argument. The default + * =~ in Object returns nil. + * + * "cat o' 9 tails" =~ /\d/ #=> 7 + * "cat o' 9 tails" =~ 9 #=> nil + */ + +static mrb_value +mrb_str_match(mrb_state *mrb, mrb_value self/* x */) +{ + mrb_value y; + mrb_get_args(mrb, "o", &y); + switch (mrb_type(y)) { + case MRB_TT_STRING: + mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: String given"); + case MRB_TT_REGEX: +#ifdef INCLUDE_REGEXP + return mrb_reg_match_str(mrb, y, self); +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); +#endif //INCLUDE_REGEXP + default: + if (mrb_respond_to(mrb, y, mrb_intern(mrb, "=~"))) { + return mrb_funcall(mrb, y, "=~", 1, self); + } + else { + return mrb_nil_value(); + } + } +} +/* ---------------------------------- */ +#ifdef INCLUDE_ENCODING +#ifdef NONASCII_MASK +#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80) +static inline int +count_utf8_lead_bytes_with_word(const intptr_t *s) +{ + int d = *s; + d |= ~(d>>1); + d >>= 6; + d &= NONASCII_MASK >> 7; + d += (d>>8); + d += (d>>16); +#if SIZEOF_VALUE == 8 + d += (d>>32); +#endif + return (d&0xF); +} +#endif + +#ifdef NONASCII_MASK +static char * +str_utf8_nth(const char *p, const char *e, long nth) +{ + if ((int)SIZEOF_VALUE < e - p && (int)SIZEOF_VALUE * 2 < nth) { + const intptr_t *s, *t; + const intptr_t lowbits = sizeof(int) - 1; + s = (const intptr_t*)(~lowbits & ((intptr_t)p + lowbits)); + t = (const intptr_t*)(~lowbits & (intptr_t)e); + while (p < (const char *)s) { + if (is_utf8_lead_byte(*p)) nth--; + p++; + } + do { + nth -= count_utf8_lead_bytes_with_word(s); + s++; + } while (s < t && (int)sizeof(intptr_t) <= nth); + p = (char *)s; + } + while (p < e) { + if (is_utf8_lead_byte(*p)) { + if (nth == 0) break; + nth--; + } + p++; + } + return (char *)p; +} + +static long +str_utf8_offset(const char *p, const char *e, long nth) +{ + const char *pp = str_utf8_nth(p, e, nth); + return pp - p; +} +#endif +#endif //INCLUDE_ENCODING + +mrb_value +mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc = STR_ENC_GET(mrb, str); +#endif //INCLUDE_ENCODING + mrb_value str2; +#ifdef INCLUDE_ENCODING + char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str); +#else + char *p, *s = RSTRING_PTR(str); +#endif //INCLUDE_ENCODING + + if (len < 0) return mrb_nil_value(); + if (!RSTRING_LEN(str)) { + len = 0; + } +#ifdef INCLUDE_ENCODING + if (single_byte_optimizable(mrb, str)) { +#endif //INCLUDE_ENCODING + if (beg > RSTRING_LEN(str)) return mrb_nil_value(); + if (beg < 0) { + beg += RSTRING_LEN(str); + if (beg < 0) return mrb_nil_value(); + } + if (beg + len > RSTRING_LEN(str)) + len = RSTRING_LEN(str) - beg; + if (len <= 0) { + len = 0; + p = 0; + } + else + p = s + beg; +#ifdef INCLUDE_ENCODING + goto sub; + } + if (beg < 0) { + if (len > -beg) len = -beg; + if (-beg * mrb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) { + beg = -beg; + while (beg-- > len && (e = mrb_enc_prev_char(s, e, e, enc)) != 0); + p = e; + if (!p) return mrb_nil_value(); + while (len-- > 0 && (p = mrb_enc_prev_char(s, p, e, enc)) != 0); + if (!p) return mrb_nil_value(); + len = e - p; + goto sub; + } + else { + beg += str_strlen(mrb, str, enc); + if (beg < 0) return mrb_nil_value(); + } + } + else if (beg > 0 && beg > str_strlen(mrb, str, enc)) { + return mrb_nil_value(); + } + if (len == 0) { + p = 0; + } +#ifdef NONASCII_MASK + else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && + enc == mrb_utf8_encoding(mrb)) { + p = str_utf8_nth(s, e, beg); + len = str_utf8_offset(p, e, len); + } +#endif + else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { + int char_sz = mrb_enc_mbmaxlen(enc); + + p = s + beg * char_sz; + if (p > e) { + p = e; + len = 0; + } + else if (len * char_sz > e - p) + len = e - p; + else + len *= char_sz; + } + else if ((p = str_nth(mrb, s, e, beg, enc, 0)) == e) { + len = 0; + } + else { + len = str_offset(mrb, p, e, len, enc, 0); + } +sub: +#endif //INCLUDE_ENCODING + if (len > STR_BUF_MIN_SIZE && beg + len == RSTRING_LEN(str)) { +#ifdef INCLUDE_ENCODING + str2 = mrb_str_new4(mrb, str); + str2 = str_new3(mrb, mrb_obj_class(mrb, str2), str2); +#else + str2 = mrb_str_new(mrb, s, RSTRING_LEN(str)); +#endif //INCLUDE_ENCODING + RSTRING(str2)->buf += RSTRING(str2)->len - len; + RSTRING(str2)->len = len; + } + else { + str2 = mrb_str_new5(mrb, str, p, len); + mrb_enc_cr_str_copy_for_substr(mrb, str2, str); + } + + return str2; +} + +#ifdef INCLUDE_REGEXP +static mrb_value +mrb_str_subpat(mrb_state *mrb, mrb_value str, mrb_value re, mrb_int backref) +{ + if (mrb_reg_search(mrb, re, str, 0, 0) >= 0) { + mrb_value match = mrb_backref_get(mrb); + int nth = mrb_reg_backref_number(mrb, match, mrb_fixnum_value(backref)); + return mrb_reg_nth_match(mrb, nth, mrb_backref_get(mrb)); + } + return mrb_nil_value(); +} +#endif //INCLUDE_REGEXP + +/* --- 1-8-7parse.c --> */ + +#ifdef INCLUDE_ENCODING +long +mrb_enc_strlen_cr(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc, int *cr) +{ + long c; + const char *q; + int ret; + + *cr = 0; + if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { + return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc); + } + else if (mrb_enc_asciicompat(mrb, enc)) { + c = 0; + while (p < e) { + if (ISASCII(*p)) { + q = search_nonascii(p, e); + if (!q) { + if (!*cr) *cr = ENC_CODERANGE_7BIT; + return c + (e - p); + } + c += q - p; + p = q; + } + ret = mrb_enc_precise_mbclen(p, e, enc); + if (MBCLEN_CHARFOUND_P(ret)) { + *cr |= ENC_CODERANGE_VALID; + p += MBCLEN_CHARFOUND_LEN(ret); + } + else { + *cr = ENC_CODERANGE_BROKEN; + p++; + } + c++; + } + if (!*cr) *cr = ENC_CODERANGE_7BIT; + return c; + } + + for (c=0; p> ((SIZEOF_VALUE - m) * CHAR_BIT); + + if (m > SIZEOF_VALUE) + mrb_bug("!!too long pattern string!!"); + + /* Prepare hash value */ + for (hx = *x++, hy = *y++; x < xe; ++x, ++y) { + hx <<= CHAR_BIT; + hy <<= CHAR_BIT; + hx |= *x; + hy |= *y; + } + /* Searching */ + while (hx != hy) { + if (y == ye) + return -1; + hy <<= CHAR_BIT; + hy |= *y; + hy &= mask; + y++; + } + return y - ys - m; +} + +static inline long +mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys; + int i, qstable[256]; + + /* Preprocessing */ + for (i = 0; i < 256; ++i) + qstable[i] = m + 1; + for (; x < xe; ++x) + qstable[*x] = xe - x; + /* Searching */ + for (; y + m <= ys + n; y += *(qstable + y[m])) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return y - ys; + } + return -1; +} +int +mrb_memsearch(const void *x0, int m, const void *y0, int n) +{ + const unsigned char *x = x0, *y = y0; + + if (m > n) return -1; + else if (m == n) { + return memcmp(x0, y0, m) == 0 ? 0 : -1; + } + else if (m < 1) { + return 0; + } + else if (m == 1) { + const unsigned char *ys = y, *ye = ys + n; + for (; y < ye; ++y) { + if (*x == *y) + return y - ys; + } + return -1; + } + else if (m <= SIZEOF_VALUE) { + return mrb_memsearch_ss(x0, m, y0, n); + } + else { + return mrb_memsearch_qs(x0, m, y0, n); + } +} +#endif //INCLUDE_ENCODING + +/* --- 1-8-7parse.c --< */ +#ifdef INCLUDE_ENCODING +static long +str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc) +{ + const char *p, *e; + long n; + int cr; + + if (single_byte_optimizable(mrb, str)) return RSTRING_LEN(str); + if (!enc) enc = STR_ENC_GET(mrb, str); + p = RSTRING_PTR(str); + e = RSTRING_END(str); + cr = ENC_CODERANGE(str); +#ifdef NONASCII_MASK + if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && + enc == mrb_utf8_encoding(mrb)) { + + int len = 0; + if ((int)sizeof(intptr_t) * 2 < e - p) { + const intptr_t *s, *t; + const intptr_t lowbits = sizeof(int) - 1; + s = (const intptr_t*)(~lowbits & ((intptr_t)p + lowbits)); + t = (const intptr_t*)(~lowbits & (intptr_t)e); + while (p < (const char *)s) { + if (is_utf8_lead_byte(*p)) len++; + p++; + } + while (s < t) { + len += count_utf8_lead_bytes_with_word(s); + s++; + } + p = (const char *)s; + } + while (p < e) { + if (is_utf8_lead_byte(*p)) len++; + p++; + } + return (long)len; + } +#endif + n = mrb_enc_strlen_cr(mrb, p, e, enc, &cr); + if (cr) { + ENC_CODERANGE_SET(str, cr); + } + return n; +} +#endif //INCLUDE_ENCODING + +static mrb_int +mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset) +{ + mrb_int pos; + char *s, *sptr, *e; + int len, slen; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; + + enc = mrb_enc_check(mrb, str, sub); + if (is_broken_string(mrb, sub)) { + return -1; + } + len = str_strlen(mrb, str, enc); + slen = str_strlen(mrb, sub, enc); +#else + len = RSTRING_LEN(str); + slen = RSTRING_LEN(sub); +#endif //INCLUDE_ENCODING + if (offset < 0) { + offset += len; + if (offset < 0) return -1; + } + if (len - offset < slen) return -1; + s = RSTRING_PTR(str); + e = s + RSTRING_LEN(str); + if (offset) { +#ifdef INCLUDE_ENCODING + offset = str_offset(mrb, s, RSTRING_END(str), offset, enc, single_byte_optimizable(mrb, str)); +#endif //INCLUDE_ENCODING + s += offset; + } + if (slen == 0) return offset; + /* need proceed one character at a time */ + sptr = RSTRING_PTR(sub); + slen = RSTRING_LEN(sub); + len = RSTRING_LEN(str) - offset; +#ifdef INCLUDE_ENCODING + for (;;) { + char *t; + pos = mrb_memsearch(mrb, sptr, slen, s, len, enc); + if (pos < 0) return pos; + t = mrb_enc_right_char_head(s, s+pos, e, enc); + if (t == s + pos) break; + if ((len -= t - s) <= 0) return -1; + offset += t - s; + s = t; + } +#else + pos = mrb_memsearch(sptr, slen, s+offset, len-offset); + if (pos < 0) return pos; +#endif //INCLUDE_ENCODING + return pos + offset; +} + +mrb_value +mrb_str_dup(mrb_state *mrb, mrb_value str) +{ + struct RString *s = mrb_str_ptr(str); + struct RString *dup; + + dup = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class); + dup->buf = mrb_malloc(mrb, s->len+1); + if (s->buf) { + memcpy(dup->buf, s->buf, s->len); + dup->buf[s->len] = 0; + } + dup->len = s->len; + dup->aux.capa = s->len; + return mrb_obj_value(dup); +} + +static mrb_value +mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) +{ + long idx; + + switch (mrb_type(indx)) { + case MRB_TT_FIXNUM: + idx = mrb_fixnum(indx); + +num_index: + str = mrb_str_substr(mrb, str, idx, 1); + if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); + return str; + + case MRB_TT_REGEX: +#ifdef INCLUDE_REGEXP + return mrb_str_subpat(mrb, str, indx, 0); //mrb_str_subpat(str, indx, INT2FIX(0)); +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); + return mrb_nil_value(); +#endif //INCLUDE_REGEXP + + case MRB_TT_STRING: + if (mrb_str_index(mrb, str, indx, 0) != -1) + return mrb_str_dup(mrb, indx); + return mrb_nil_value(); + + default: + /* check if indx is Range */ + { + mrb_int beg, len; + mrb_value tmp; + +#ifdef INCLUDE_ENCODING + len = str_strlen(mrb, str, STR_ENC_GET(mrb, str)); +#else + len = RSTRING_LEN(str); +#endif //INCLUDE_ENCODING + switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, 0)) { + case 0/*FLASE*/: + break; + case 2/*OTHER*/: + return mrb_nil_value(); + default: + tmp = mrb_str_substr(mrb, str, beg, len); + /*OBJ_INFECT(tmp, indx);*/ + return tmp; + } + } + idx = mrb_fixnum(indx); + goto num_index; + } + return mrb_nil_value(); /* not reached */ +} + +/* 15.2.10.5.6 */ +/* 15.2.10.5.34 */ +/* + * call-seq: + * str[fixnum] => fixnum or nil + * str[fixnum, fixnum] => new_str or nil + * str[range] => new_str or nil + * str[regexp] => new_str or nil + * str[regexp, fixnum] => new_str or nil + * str[other_str] => new_str or nil + * str.slice(fixnum) => fixnum or nil + * str.slice(fixnum, fixnum) => new_str or nil + * str.slice(range) => new_str or nil + * str.slice(regexp) => new_str or nil + * str.slice(regexp, fixnum) => new_str or nil + * str.slice(other_str) => new_str or nil + * + * Element Reference---If passed a single Fixnum, returns the code + * of the character at that position. If passed two Fixnum + * objects, returns a substring starting at the offset given by the first, and + * a length given by the second. If given a range, a substring containing + * characters at offsets given by the range is returned. In all three cases, if + * an offset is negative, it is counted from the end of str. Returns + * nil if the initial offset falls outside the string, the length + * is negative, or the beginning of the range is greater than the end. + * + * If a Regexp is supplied, the matching portion of str is + * returned. If a numeric parameter follows the regular expression, that + * component of the MatchData is returned instead. If a + * String is given, that string is returned if it occurs in + * str. In both cases, nil is returned if there is no + * match. + * + * a = "hello there" + * a[1] #=> 101(1.8.7) "e"(1.9.2) + * a[1,3] #=> "ell" + * a[1..3] #=> "ell" + * a[-3,2] #=> "er" + * a[-4..-2] #=> "her" + * a[12..-1] #=> nil + * a[-2..-4] #=> "" + * a[/[aeiou](.)\1/] #=> "ell" + * a[/[aeiou](.)\1/, 0] #=> "ell" + * a[/[aeiou](.)\1/, 1] #=> "l" + * a[/[aeiou](.)\1/, 2] #=> nil + * a["lo"] #=> "lo" + * a["bye"] #=> nil + */ +static mrb_value +mrb_str_aref_m(mrb_state *mrb, mrb_value str) +{ + int argc; + mrb_value *argv; + + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 2) { + if (mrb_type(argv[0]) == MRB_TT_REGEX) { +#ifdef INCLUDE_REGEXP + return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1])); +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); + return mrb_nil_value(); +#endif //INCLUDE_REGEXP + } + return mrb_str_substr(mrb, str, mrb_fixnum(argv[0]), mrb_fixnum(argv[1])); + } + if (argc != 1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1)", argc); + } + return mrb_str_aref(mrb, str, argv[0]); +} + +#ifdef INCLUDE_ENCODING +/* As mrb_str_modify(), but don't clear coderange */ +static void +str_modify_keep_cr(mrb_state *mrb, mrb_value str) +{ + if (!str_independent(str)) + str_make_independent(mrb, str); + if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN) + /* Force re-scan later */ + ENC_CODERANGE_CLEAR(str); +} + +static void +mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc) +{ + if (mrb_enc_dummy_p(enc)) { + mrb_raise(mrb, E_ENCODING_ERROR, "incompatible encoding with this operation: %s", + mrb_enc_name(enc)); + } +} +#else +#define str_modify_keep_cr(mrb, str) mrb_str_modify((mrb), (str)) +#endif //INCLUDE_ENCODING + +/* 15.2.10.5.8 */ +/* + * call-seq: + * str.capitalize! => str or nil + * + * Modifies str by converting the first character to uppercase and the + * remainder to lowercase. Returns nil if no changes are made. + * + * a = "hello" + * a.capitalize! #=> "Hello" + * a #=> "Hello" + * a.capitalize! #=> nil + */ +static mrb_value +mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; +#endif //INCLUDE_ENCODING + char *s, *send; + int modify = 0; +#ifdef INCLUDE_ENCODING + unsigned int c; + int n; +#endif //INCLUDE_ENCODING + + str_modify_keep_cr(mrb, str); +#ifdef INCLUDE_ENCODING + enc = STR_ENC_GET(mrb, str); + mrb_str_check_dummy_enc(mrb, enc); +#endif //INCLUDE_ENCODING + if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return mrb_nil_value(); + s = RSTRING_PTR(str); send = RSTRING_END(str); +#ifdef INCLUDE_ENCODING + c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); + if (mrb_enc_islower(c, enc)) { + mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc); + modify = 1; + } + s += n; + while (s < send) { + c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); + if (mrb_enc_isupper(c, enc)) { + mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc); + modify = 1; + } + s += n; + } +#else + if (ISLOWER(*s)) { + *s = toupper(*s); + modify = 1; + } + while (++s < send) { + if (ISUPPER(*s)) { + *s = tolower(*s); + modify = 1; + } + } +#endif //INCLUDE_ENCODING + if (modify) return str; + return mrb_nil_value(); +} + +/* 15.2.10.5.7 */ +/* + * call-seq: + * str.capitalize => new_str + * + * Returns a copy of str with the first character converted to uppercase + * and the remainder to lowercase. + * + * "hello".capitalize #=> "Hello" + * "HELLO".capitalize #=> "Hello" + * "123ABC".capitalize #=> "123abc" + */ +static mrb_value +mrb_str_capitalize(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_capitalize_bang(mrb, str); + return str; +} + +/* 15.2.10.5.10 */ +/* + * call-seq: + * str.chomp!(separator=$/) => str or nil + * + * Modifies str in place as described for String#chomp, + * returning str, or nil if no modifications were made. + */ +static mrb_value +mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value *argv; + int argc; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; +#endif //INCLUDE_ENCODING + mrb_value rs; + mrb_int newline; + char *p, *pp, *e; + long len, rslen; + + str_modify_keep_cr(mrb, str); + len = RSTRING_LEN(str); + if (len == 0) return mrb_nil_value(); + p = RSTRING_PTR(str); + e = p + len; + //if (mrb_scan_args(argc, argv, "01", &rs) == 0) { + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 0) { + rs = mrb_str_new2(mrb, "\n"); +smart_chomp: +#ifdef INCLUDE_ENCODING + enc = mrb_enc_get(mrb, str); + if (mrb_enc_mbminlen(enc) > 1) { + pp = mrb_enc_left_char_head(p, e-mrb_enc_mbminlen(enc), e, enc); + if (mrb_enc_is_newline(pp, e, enc)) { + e = pp; + } + pp = e - mrb_enc_mbminlen(enc); + if (pp >= p) { + pp = mrb_enc_left_char_head(p, pp, e, enc); + if (mrb_enc_ascget(mrb, pp, e, 0, enc) == '\r') { + e = pp; + } + } + if (e == RSTRING_END(str)) { + return mrb_nil_value(); + } + len = e - RSTRING_PTR(str); + STR_SET_LEN(str, len); + } + else { +#endif //INCLUDE_ENCODING + if (RSTRING_PTR(str)[len-1] == '\n') { + STR_DEC_LEN(str); + if (RSTRING_LEN(str) > 0 && + RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') { + STR_DEC_LEN(str); + } + } + else if (RSTRING_PTR(str)[len-1] == '\r') { + STR_DEC_LEN(str); + } + else { + return mrb_nil_value(); + } +#ifdef INCLUDE_ENCODING + } +#endif //INCLUDE_ENCODING + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + return str; + } + rs = argv[0]; + if (mrb_nil_p(rs)) return mrb_nil_value(); + //StringValue(rs); + mrb_string_value(mrb, &rs); + rslen = RSTRING_LEN(rs); + if (rslen == 0) { + while (len>0 && p[len-1] == '\n') { + len--; + if (len>0 && p[len-1] == '\r') + len--; + } + if (len < RSTRING_LEN(str)) { + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; + return str; + } + return mrb_nil_value(); + } + if (rslen > len) return mrb_nil_value(); + newline = RSTRING_PTR(rs)[rslen-1]; + if (rslen == 1 && newline == '\n') + goto smart_chomp; + +#ifdef INCLUDE_ENCODING + enc = mrb_enc_check(mrb, str, rs); + if (is_broken_string(mrb, rs)) { + return mrb_nil_value(); + } + pp = e - rslen; +#else + pp = p + len - rslen; +#endif //INCLUDE_ENCODING + if (p[len-1] == newline && + (rslen <= 1 || + memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { +#ifdef INCLUDE_ENCODING + if (mrb_enc_left_char_head(p, pp, e, enc) != pp) + return mrb_nil_value(); + if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { + ENC_CODERANGE_CLEAR(str); + } +#endif //INCLUDE_ENCODING + STR_SET_LEN(str, RSTRING_LEN(str) - rslen); + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + return str; + } + return mrb_nil_value(); +} + +/* 15.2.10.5.9 */ +/* + * call-seq: + * str.chomp(separator=$/) => new_str + * + * Returns a new String with the given record separator removed + * from the end of str (if present). If $/ has not been + * changed from the default Ruby record separator, then chomp also + * removes carriage return characters (that is it will remove \n, + * \r, and \r\n). + * + * "hello".chomp #=> "hello" + * "hello\n".chomp #=> "hello" + * "hello\r\n".chomp #=> "hello" + * "hello\n\r".chomp #=> "hello\n" + * "hello\r".chomp #=> "hello" + * "hello \n there".chomp #=> "hello \n there" + * "hello".chomp("llo") #=> "he" + */ +static mrb_value +mrb_str_chomp(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_chomp_bang(mrb, str); + return str; +} + +#ifdef INCLUDE_ENCODING +static long +chopped_length(mrb_state *mrb, mrb_value str) +{ + mrb_encoding *enc = STR_ENC_GET(mrb, str); + const char *p, *p2, *beg, *end; + + beg = RSTRING_PTR(str); + end = beg + RSTRING_LEN(str); + if (beg > end) return 0; + p = mrb_enc_prev_char(beg, end, end, enc); + if (!p) return 0; + if (p > beg && mrb_enc_ascget(mrb, p, end, 0, enc) == '\n') { + p2 = mrb_enc_prev_char(beg, p, end, enc); + if (p2 && mrb_enc_ascget(mrb, p2, end, 0, enc) == '\r') p = p2; + } + return p - beg; +} +#endif //INCLUDE_ENCODING + +/* 15.2.10.5.12 */ +/* + * call-seq: + * str.chop! => str or nil + * + * Processes str as for String#chop, returning str, + * or nil if str is the empty string. See also + * String#chomp!. + */ +static mrb_value +mrb_str_chop_bang(mrb_state *mrb, mrb_value str) +{ + str_modify_keep_cr(mrb, str); + if (RSTRING_LEN(str) > 0) { +#ifdef INCLUDE_ENCODING + long len; + len = chopped_length(mrb, str); +#else + size_t len; + len = RSTRING_LEN(str) - 1; + if (RSTRING_PTR(str)[len] == '\n') { + if (len > 0 && + RSTRING_PTR(str)[len-1] == '\r') { + len--; + } + } +#endif //INCLUDE_ENCODING + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; +#ifdef INCLUDE_ENCODING + if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { + ENC_CODERANGE_CLEAR(str); + } +#endif //INCLUDE_ENCODING + return str; + } + return mrb_nil_value(); +} + +/* 15.2.10.5.11 */ +/* + * call-seq: + * str.chop => new_str + * + * Returns a new String with the last character removed. If the + * string ends with \r\n, both characters are removed. Applying + * chop to an empty string returns an empty + * string. String#chomp is often a safer alternative, as it leaves + * the string unchanged if it doesn't end in a record separator. + * + * "string\r\n".chop #=> "string" + * "string\n\r".chop #=> "string\n" + * "string\n".chop #=> "string" + * "string".chop #=> "strin" + * "x".chop #=> "" + */ +static mrb_value +mrb_str_chop(mrb_state *mrb, mrb_value self) +{ + mrb_value str; +#ifdef INCLUDE_ENCODING + str = mrb_str_new5(mrb, self, RSTRING_PTR(self), chopped_length(mrb, self)); + mrb_enc_cr_str_copy_for_substr(mrb, str, self); +#else + str = mrb_str_dup(mrb, self); + mrb_str_chop_bang(mrb, str); +#endif //INCLUDE_ENCODING + return str; +} + +/* 15.2.10.5.14 */ +/* + * call-seq: + * str.downcase! => str or nil + * + * Downcases the contents of str, returning nil if no + * changes were made. + */ +static mrb_value +mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; +#endif //INCLUDE_ENCODING + char *s, *send; + int modify = 0; + + str_modify_keep_cr(mrb, str); +#ifdef INCLUDE_ENCODING + enc = STR_ENC_GET(mrb, str); + mrb_str_check_dummy_enc(mrb, enc); +#endif //INCLUDE_ENCODING + s = RSTRING_PTR(str); send = RSTRING_END(str); +#ifdef INCLUDE_ENCODING + if (single_byte_optimizable(mrb, str)) { +#endif //INCLUDE_ENCODING + while (s < send) { + unsigned int c = *(unsigned char*)s; + +#ifdef INCLUDE_ENCODING + if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { +#else + if ('A' <= c && c <= 'Z') { +#endif //INCLUDE_ENCODING + *s = 'a' + (c - 'A'); + modify = 1; + } + s++; + } +#ifdef INCLUDE_ENCODING + } + else { + int ascompat = mrb_enc_asciicompat(mrb, enc); + + while (s < send) { + unsigned int c; + int n; + + if (ascompat && (c = *(unsigned char*)s) < 0x80) { + if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { + *s = 'a' + (c - 'A'); + modify = 1; + } + s++; + } + else { + c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); + if (mrb_enc_isupper(c, enc)) { + /* assuming toupper returns codepoint with same size */ + mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc); + modify = 1; + } + s += n; + } + } + } +#endif //INCLUDE_ENCODING + if (modify) return str; + return mrb_nil_value(); +} + +/* 15.2.10.5.13 */ +/* + * call-seq: + * str.downcase => new_str + * + * Returns a copy of str with all uppercase letters replaced with their + * lowercase counterparts. The operation is locale insensitive---only + * characters ``A'' to ``Z'' are affected. + * + * "hEllO".downcase #=> "hello" + */ +static mrb_value +mrb_str_downcase(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_downcase_bang(mrb, str); + return str; +} + +/* 15.2.10.5.15 */ +/* + * call-seq: + * str.each(separator=$/) {|substr| block } => str + * str.each_line(separator=$/) {|substr| block } => str + * + * Splits str using the supplied parameter as the record separator + * ($/ by default), passing each substring in turn to the supplied + * block. If a zero-length record separator is supplied, the string is split + * into paragraphs delimited by multiple successive newlines. + * + * print "Example one\n" + * "hello\nworld".each {|s| p s} + * print "Example two\n" + * "hello\nworld".each('l') {|s| p s} + * print "Example three\n" + * "hello\n\n\nworld".each('') {|s| p s} + * + * produces: + * + * Example one + * "hello\n" + * "world" + * Example two + * "hel" + * "l" + * "o\nworl" + * "d" + * Example three + * "hello\n\n\n" + * "world" + */ +static mrb_value +mrb_str_each_line(mrb_state *mrb, mrb_value str) +{ + mrb_value rs; + int newline; + struct RString *ps = mrb_str_ptr(str); + char *p = ps->buf, *pend = p + ps->len, *s; + char *ptr = p; + long len = ps->len, rslen; + mrb_value line; + struct RString *prs; + mrb_value *argv, b; + int argc; + + //if (mrb_scan_args(argc, argv, "01", &rs) == 0) { + mrb_get_args(mrb, "&*", &b, &argv, &argc); + if (argc > 0) { + rs = argv[0]; + } else { + rs = mrb_str_new2(mrb, "\n"); + } + /*RETURN_ENUMERATOR(str, argc, argv);*/ + if (mrb_nil_p(rs)) { + mrb_yield(mrb, b, str); + return str; + } + //StringValue(rs); + mrb_string_value(mrb, &rs); + prs = mrb_str_ptr(rs); + rslen = prs->len; + if (rslen == 0) { + newline = '\n'; + } + else { + newline = prs->buf[rslen-1]; + } + + for (s = p, p += rslen; p < pend; p++) { + if (rslen == 0 && *p == '\n') { + if (*++p != '\n') continue; + while (*p == '\n') p++; + } + if (ps->buf < p && p[-1] == newline && + (rslen <= 1 || + memcmp(prs->buf, p-rslen, rslen) == 0)) { + line = mrb_str_new5(mrb, str, s, p - s); + /*OBJ_INFECT(line, str);*/ + mrb_yield(mrb, b, line); + str_mod_check(mrb, str, ptr, len); + s = p; + } + } + + if (s != pend) { + if (p > pend) p = pend; + line = mrb_str_new5(mrb, str, s, p - s); + /*OBJ_INFECT(line, str);*/ + mrb_yield(mrb, b, line); + } + + return str; +} + +/* 15.2.10.5.16 */ +/* + * call-seq: + * str.empty? => true or false + * + * Returns true if str has a length of zero. + * + * "hello".empty? #=> false + * "".empty? #=> true + */ +static mrb_value +mrb_str_empty(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + + if (s->len == 0) + return mrb_true_value(); + return mrb_false_value(); +} + +/* 15.2.10.5.17 */ +/* + * call-seq: + * str.eql?(other) => true or false + * + * Two strings are equal if the have the same length and content. + */ +static mrb_value +mrb_str_eql(mrb_state *mrb, mrb_value self) +{ + mrb_value str2; + + mrb_get_args(mrb, "o", &str2); + if (mrb_type(str2) != MRB_TT_STRING) + return mrb_false_value(); + if (str_eql(mrb, self, str2)) + return mrb_true_value(); + return mrb_false_value(); +} + +#ifdef INCLUDE_ENCODING +static void +mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src) +{ + /* this function is designed for copying encoding and coderange + * from src to new string "dest" which is made from the part of src. + */ + str_enc_copy(mrb, dest, src); + switch (ENC_CODERANGE(src)) { + case ENC_CODERANGE_7BIT: + ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); + break; + case ENC_CODERANGE_VALID: + if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)) || + search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest))) + ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID); + else + ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); + break; + default: + if (RSTRING_LEN(dest) == 0) { + if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src))) + ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID); + else + ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); + } + break; + } +} +#endif //INCLUDE_ENCODING + +static mrb_value +str_replace_shared(mrb_state *mrb, mrb_value str2, mrb_value str) +{ + str = mrb_str_new_frozen(mrb, str); + RSTRING(str2)->len = RSTRING_LEN(str); + RSTRING(str2)->buf = RSTRING_PTR(str); + RSTRING_SHARED(str2) = str; + FL_SET(str2, MRB_STR_SHARED); + mrb_enc_cr_str_exact_copy(mrb, str2, str); + + return str2; +} + +static mrb_value +str_new_shared(mrb_state *mrb, struct RClass* klass, mrb_value str) +{ + return str_replace_shared(mrb, str_alloc(mrb), str); +} + +mrb_value +str_new3(mrb_state *mrb, struct RClass* klass, mrb_value str) +{ + return str_new_shared(mrb, klass, str); +} + +mrb_value +mrb_str_new_shared(mrb_state *mrb, mrb_value str) +{ + mrb_value str2 = str_new3(mrb, mrb_obj_class(mrb, str), str); + + //OBJ_INFECT(str2, str); + return str2; +} + +mrb_value +mrb_str_new_frozen(mrb_state *mrb, mrb_value orig) +{ + struct RClass* klass; + mrb_value str; + + klass = mrb_obj_class(mrb, orig); + + if (MRB_STR_SHARED_P(orig) && !mrb_nil_p(RSTRING_SHARED(orig))) { + long ofs; + ofs = RSTRING_LEN(str) - RSTRING_LEN(orig); +#ifdef INCLUDE_ENCODING + if ((ofs > 0) || (klass != RBASIC(str)->c) || + ENCODING_GET(mrb, str) != ENCODING_GET(mrb, orig)) { +#else + if ((ofs > 0) || (klass != RBASIC(str)->c)) { +#endif //INCLUDE_ENCODING + str = str_new3(mrb, klass, str); + RSTRING_PTR(str) += ofs; + RSTRING_LEN(str) -= ofs; + mrb_enc_cr_str_exact_copy(mrb, str, orig); + } + } + else { + str = str_new4(mrb, orig.tt, orig); + } + return str; +} + +mrb_value +mrb_str_drop_bytes(mrb_state *mrb, mrb_value str, long len) +{ + char *ptr = RSTRING_PTR(str); + long olen = RSTRING_LEN(str), nlen; + + str_modifiable(str); + if (len > olen) len = olen; + nlen = olen - len; + if (!MRB_STR_SHARED_P(str)) mrb_str_new4(mrb, str); + ptr = RSTRING(str)->buf += len; + RSTRING(str)->len = nlen; + ptr[nlen] = 0; + //ENC_CODERANGE_CLEAR(str); + return str; +} + +mrb_value +mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len) +{ + mrb_value str2; + if (RSTRING_LEN(str) == beg + len && + STR_BUF_MIN_SIZE < len) { + str2 = mrb_str_new_shared(mrb, mrb_str_new_frozen(mrb, str)); + mrb_str_drop_bytes(mrb, str2, beg); + } + else { + str2 = mrb_str_new5(mrb, str, RSTRING_PTR(str)+beg, len); + } + mrb_enc_cr_str_copy_for_substr(mrb, str2, str); + + return str2; +} + +#ifdef INCLUDE_ENCODING +int +mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value str) +{ + mrb_encoding *enc = STR_ENC_GET(mrb, str); + + if (!mrb_enc_asciicompat(mrb, enc)) + return 0/*FALSE*/; + else if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) + return 1/*TRUE*/; + return 0/*FALSE*/; +} + +static mrb_value +mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, + int ptr_encindex, int ptr_cr, int *ptr_cr_ret) +{ + int str_encindex = ENCODING_GET(mrb, str); + int res_encindex; + int str_cr, res_cr; + int str_a8 = ENCODING_IS_ASCII8BIT(str); + int ptr_a8 = ptr_encindex == 0; + + str_cr = ENC_CODERANGE(str); + + if (str_encindex == ptr_encindex) { + if (str_cr == ENC_CODERANGE_UNKNOWN || + (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) { + ptr_cr = ENC_CODERANGE_UNKNOWN; + } + else if (ptr_cr == ENC_CODERANGE_UNKNOWN) { + ptr_cr = coderange_scan(ptr, len, mrb_enc_from_index(mrb, ptr_encindex)); + } + } + else { + mrb_encoding *str_enc = mrb_enc_from_index(mrb, str_encindex); + mrb_encoding *ptr_enc = mrb_enc_from_index(mrb, ptr_encindex); + if (!mrb_enc_asciicompat(mrb, str_enc) || !mrb_enc_asciicompat(mrb, ptr_enc)) { + if (len == 0) + return str; + if (RSTRING_LEN(str) == 0) { + mrb_str_buf_cat(mrb, str, ptr, len); + ENCODING_CODERANGE_SET(mrb, str, ptr_encindex, ptr_cr); + return str; + } + goto incompatible; + } + if (ptr_cr == ENC_CODERANGE_UNKNOWN) { + ptr_cr = coderange_scan(ptr, len, ptr_enc); + } + if (str_cr == ENC_CODERANGE_UNKNOWN) { + if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) { + str_cr = mrb_enc_str_coderange(mrb, str); + } + } + } + if (ptr_cr_ret) + *ptr_cr_ret = ptr_cr; + + if (str_encindex != ptr_encindex && + str_cr != ENC_CODERANGE_7BIT && + ptr_cr != ENC_CODERANGE_7BIT) { +incompatible: + mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s", + mrb_enc_name(mrb_enc_from_index(mrb, str_encindex)), + mrb_enc_name(mrb_enc_from_index(mrb, ptr_encindex))); + } + + if (str_cr == ENC_CODERANGE_UNKNOWN) { + res_encindex = str_encindex; + res_cr = ENC_CODERANGE_UNKNOWN; + } + else if (str_cr == ENC_CODERANGE_7BIT) { + if (ptr_cr == ENC_CODERANGE_7BIT) { + res_encindex = !str_a8 ? str_encindex : ptr_encindex; + res_cr = ENC_CODERANGE_7BIT; + } + else { + res_encindex = ptr_encindex; + res_cr = ptr_cr; + } + } + else if (str_cr == ENC_CODERANGE_VALID) { + res_encindex = str_encindex; + if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID) + res_cr = str_cr; + else + res_cr = ptr_cr; + } + else { /* str_cr == ENC_CODERANGE_BROKEN */ + res_encindex = str_encindex; + res_cr = str_cr; + if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN; + } + + if (len < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); + } + str_buf_cat(mrb, str, ptr, len); + ENCODING_CODERANGE_SET(mrb, str, res_encindex, res_cr); + return str; +} + +mrb_value +mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *ptr_enc) +{ + return mrb_enc_cr_str_buf_cat(mrb, str, ptr, len, + mrb_enc_to_index(ptr_enc), ENC_CODERANGE_UNKNOWN, NULL); +} + +mrb_value +mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2) +{ + int str2_cr; + + str2_cr = ENC_CODERANGE(str2); + + mrb_enc_cr_str_buf_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2), + ENCODING_GET(mrb, str2), str2_cr, &str2_cr); + + //OBJ_INFECT(str, str2); + ENC_CODERANGE_SET(str2, str2_cr); + + return str; +} +#endif //INCLUDE_ENCODING + +static inline void +str_discard(mrb_state *mrb, mrb_value str) +{ + str_modifiable(str); + if (!MRB_STR_SHARED_P(str)) { + mrb_free(mrb, RSTRING_PTR(str)); + RSTRING(str)->buf = 0; + RSTRING(str)->len = 0; + } +} + +void +mrb_str_shared_replace(mrb_state *mrb, mrb_value str, mrb_value str2) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; + int cr; +#endif //INCLUDE_ENCODING + + if (mrb_obj_equal(mrb, str, str2)) return; +#ifdef INCLUDE_ENCODING + enc = STR_ENC_GET(mrb, str2); + cr = ENC_CODERANGE(str2); +#endif //INCLUDE_ENCODING + str_discard(mrb, str); + MRB_STR_UNSET_NOCAPA(str); + RSTRING_PTR(str) = RSTRING_PTR(str2); + RSTRING_LEN(str) = RSTRING_LEN(str2); + if (MRB_STR_NOCAPA_P(str2)) { + FL_SET(str, RBASIC(str2)->flags & MRB_STR_NOCAPA); + RSTRING_SHARED(str) = RSTRING_SHARED(str2); + } + else { + RSTRING_CAPA(str) = RSTRING_CAPA(str2); + } + + MRB_STR_UNSET_NOCAPA(str2); /* abandon str2 */ + RSTRING_PTR(str2)[0] = 0; + RSTRING_LEN(str2) = 0; + mrb_enc_associate(mrb, str, enc); + ENC_CODERANGE_SET(str, cr); +} + +#ifdef INCLUDE_REGEXP +static mrb_value +str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) +{ + mrb_value *argv; + int argc; + mrb_value pat, val, repl, match, dest = mrb_nil_value(); + struct re_registers *regs; + mrb_int beg, n; + mrb_int beg0, end0; + mrb_int offset, blen, slen, len, last; + int iter = 0; + char *sp, *cp; + //int tainted = 0; + mrb_encoding *str_enc; + + mrb_get_args(mrb, "*", &argv, &argc); + switch (argc) { + case 1: + /*RETURN_ENUMERATOR(str, argc, argv);*/ + iter = 1; + break; + case 2: + repl = argv[1]; + mrb_string_value(mrb, &repl); + /*if (OBJ_TAINTED(repl)) tainted = 1;*/ + break; + default: + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc); + } + + pat = get_pat(mrb, argv[0], 1); + beg = mrb_reg_search(mrb, pat, str, 0, 0); + if (beg < 0) { + if (bang) return mrb_nil_value(); /* no match, no substitution */ + return mrb_str_dup(mrb, str); + } + + offset = 0; + n = 0; + blen = RSTRING_LEN(str) + 30; + dest = mrb_str_buf_new(mrb, blen); + sp = RSTRING_PTR(str); + slen = RSTRING_LEN(str); + cp = sp; + str_enc = STR_ENC_GET(mrb, str); + + do { + n++; + match = mrb_backref_get(mrb); + regs = RMATCH_REGS(match); + beg0 = BEG(0); + end0 = END(0); + val = mrb_reg_regsub(mrb, repl, str, regs, pat); + + len = beg - offset; /* copy pre-match substr */ + if (len) { + mrb_enc_str_buf_cat(mrb, dest, cp, len, str_enc); + } + + mrb_str_buf_append(mrb, dest, val); + + last = offset; + offset = end0; + if (beg0 == end0) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops. + */ + if (RSTRING_LEN(str) <= end0) break; + len = mrb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc); + mrb_enc_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len, str_enc); + offset = end0 + len; + } + cp = RSTRING_PTR(str) + offset; + if (offset > RSTRING_LEN(str)) break; + beg = mrb_reg_search(mrb, pat, str, offset, 0); + } while (beg >= 0); + if (RSTRING_LEN(str) > offset) { + mrb_enc_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset, str_enc); + } + mrb_reg_search(mrb, pat, str, last, 0); + if (bang) { + mrb_str_shared_replace(mrb, str, dest); + } + else { + RBASIC(dest)->c = mrb_obj_class(mrb, str); + str = dest; + } + + return str; +} + +/* 15.2.10.5.18 */ +/* + * call-seq: + * str.gsub(pattern, replacement) => new_str + * str.gsub(pattern) {|match| block } => new_str + * + * Returns a copy of str with all occurrences of pattern + * replaced with either replacement or the value of the block. The + * pattern will typically be a Regexp; if it is a + * String then no regular expression metacharacters will be + * interpreted (that is /\d/ will match a digit, but + * '\d' will match a backslash followed by a 'd'). + * + * If a string is used as the replacement, special variables from the match + * (such as $& and $1) cannot be substituted into it, + * as substitution into the string occurs before the pattern match + * starts. However, the sequences \1, \2, and so on + * may be used to interpolate successive groups in the match. + * + * In the block form, the current match string is passed in as a parameter, and + * variables such as $1, $2, $`, + * $&, and $' will be set appropriately. The value + * returned by the block will be substituted for the match on each call. + * + * The result inherits any tainting in the original string or any supplied + * replacement string. + * + * When neither a block nor a second argument is supplied, an + * Enumerator is returned. + * + * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*" + * "hello".gsub(/([aeiou])/, '<\1>') #=> "hll" + * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 " + * "hello".gsub(/(?[aeiou])/, '{\k}') #=> "h{e}ll{o}" + * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*" + */ +static mrb_value +mrb_str_gsub(mrb_state *mrb, mrb_value self) +{ + //return str_gsub(argc, argv, self, 0); + return str_gsub(mrb, self, 0); +} + +/* 15.2.10.5.19 */ +/* + * call-seq: + * str.gsub!(pattern, replacement) => str or nil + * str.gsub!(pattern) {|match| block } => str or nil + * + * Performs the substitutions of String#gsub in place, returning + * str, or nil if no substitutions were performed. + */ +static mrb_value +mrb_str_gsub_bang(mrb_state *mrb, mrb_value self) +{ + str_modify_keep_cr(mrb, self); + //return str_gsub(argc, argv, self, 1); + return str_gsub(mrb, self, 1); +} +#endif //INCLUDE_REGEXP + +mrb_int +mrb_str_hash(mrb_state *mrb, mrb_value str) +{ + /* 1-8-7 */ + struct RString *s = mrb_str_ptr(str); + long len = s->len; + char *p = s->buf; + mrb_int key = 0; + + while (len--) { + key = key*65599 + *p; + p++; + } + key = key + (key>>5); + return key; +} + +/* 15.2.10.5.20 */ +/* + * call-seq: + * str.hash => fixnum + * + * Return a hash based on the string's length and content. + */ +static mrb_value +mrb_str_hash_m(mrb_state *mrb, mrb_value self) +{ + mrb_int key = mrb_str_hash(mrb, self); + return mrb_fixnum_value(key); +} + +/* 15.2.10.5.21 */ +/* + * call-seq: + * str.include? other_str => true or false + * str.include? fixnum => true or false + * + * Returns true if str contains the given string or + * character. + * + * "hello".include? "lo" #=> true + * "hello".include? "ol" #=> false + * "hello".include? ?h #=> true + */ +static mrb_value +mrb_str_include(mrb_state *mrb, mrb_value self) +{ + mrb_int i; + mrb_value str2; + mrb_get_args(mrb, "o", &str2); + + if (mrb_type(str2) == MRB_TT_FIXNUM) { + if (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self))) + return mrb_true_value(); + return mrb_false_value(); + } + //StringValue(arg); + mrb_string_value(mrb, &str2); + i = mrb_str_index(mrb, self, str2, 0); + + if (i == -1) return mrb_false_value(); + return mrb_true_value(); +} + +/* 15.2.10.5.22 */ +/* + * call-seq: + * str.index(substring [, offset]) => fixnum or nil + * str.index(fixnum [, offset]) => fixnum or nil + * str.index(regexp [, offset]) => fixnum or nil + * + * Returns the index of the first occurrence of the given + * substring, + * character (fixnum), or pattern (regexp) in str. + * Returns + * nil if not found. + * If the second parameter is present, it + * specifies the position in the string to begin the search. + * + * "hello".index('e') #=> 1 + * "hello".index('lo') #=> 3 + * "hello".index('a') #=> nil + * "hello".index(101) #=> 1(101=0x65='e') + * "hello".index(/[aeiou]/, -3) #=> 4 + */ +static mrb_value +mrb_str_index_m(mrb_state *mrb, mrb_value str) +{ + mrb_value *argv; + int argc; + + mrb_value sub; + //mrb_value initpos; + mrb_int pos; + + //if (mrb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 2) { + pos = mrb_fixnum(argv[1]); + sub = argv[0]; + } + else { + pos = 0; + if (argc > 0) + sub = argv[0]; + else + sub = mrb_nil_value(); + + } + if (pos < 0) { +#ifdef INCLUDE_ENCODING + pos += str_strlen(mrb, str, STR_ENC_GET(mrb, str)); +#else + pos += RSTRING_LEN(str); +#endif //INCLUDE_ENCODING + if (pos < 0) { + if (mrb_type(sub) == MRB_TT_REGEX) { +#ifdef INCLUDE_REGEXP + mrb_backref_set(mrb, mrb_nil_value()); +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); +#endif //INCLUDE_REGEXP + } + return mrb_nil_value(); + } + } + + switch (mrb_type(sub)) { + case MRB_TT_REGEX: +#ifdef INCLUDE_REGEXP + if (pos > str_strlen(mrb, str, STR_ENC_GET(mrb, str))) + return mrb_nil_value(); + pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos, + mrb_enc_check(mrb, str, sub), single_byte_optimizable(mrb, str)); + + pos = mrb_reg_search(mrb, sub, str, pos, 0); + pos = mrb_str_sublen(mrb, str, pos); +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); +#endif //INCLUDE_REGEXP + break; + + case MRB_TT_FIXNUM: { + int c = mrb_fixnum(sub); + long len = RSTRING_LEN(str); + unsigned char *p = (unsigned char*)RSTRING_PTR(str); + + for (;pos str + * + * Replaces the contents and taintedness of str with the corresponding + * values in other_str. + * + * s = "hello" #=> "hello" + * s.replace "world" #=> "world" + */ +static mrb_value +mrb_str_replace(mrb_state *mrb, mrb_value str) +{ + mrb_value str2; + + mrb_get_args(mrb, "o", &str2); + str_modifiable(str); + if (mrb_obj_equal(mrb, str, str2)) return str; + + //StringValue(str2); + mrb_string_value(mrb, &str2); + //str_discard(str); + return str_replace(mrb, str, str2); +} + +/* 15.2.10.5.23 */ +/* + * call-seq: + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of str. + */ +static mrb_value +mrb_str_init(mrb_state *mrb, mrb_value self) +{ + //mrb_value orig; + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 1) + mrb_str_replace(mrb, self); + return self; +} + +#ifdef INCLUDE_ENCODING +mrb_sym +mrb_intern3(mrb_state *mrb, const char *name, long len, mrb_encoding *enc) +{ + return mrb_intern(mrb, name); +} +#endif //INCLUDE_ENCODING + +mrb_sym +mrb_intern_str(mrb_state *mrb, mrb_value str) +{ + mrb_sym id; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; + + if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) { + enc = mrb_usascii_encoding(mrb); + } + else { + enc = mrb_enc_get(mrb, str); + } + id = mrb_intern3(mrb, RSTRING_PTR(str), RSTRING_LEN(str), enc); +#else + id = mrb_intern(mrb, RSTRING_PTR(str)); +#endif //INCLUDE_ENCODING + str = RB_GC_GUARD(str); + return id; +} + +/* 15.2.10.5.25 */ +/* 15.2.10.5.41 */ +/* + * call-seq: + * str.intern => symbol + * str.to_sym => symbol + * + * Returns the Symbol corresponding to str, creating the + * symbol if it did not previously exist. See Symbol#id2name. + * + * "Koala".intern #=> :Koala + * s = 'cat'.to_sym #=> :cat + * s == :cat #=> true + * s = '@cat'.to_sym #=> :@cat + * s == :@cat #=> true + * + * This can also be used to create symbols that cannot be represented using the + * :xxx notation. + * + * 'cat and dog'.to_sym #=> :"cat and dog" + */ +mrb_value +mrb_str_intern(mrb_state *mrb, mrb_value self) +{ + mrb_sym id; + mrb_value str = RB_GC_GUARD(self); + + id = mrb_intern_str(mrb, str); + return mrb_symbol_value(id); + +} +/* ---------------------------------- */ +mrb_value +mrb_obj_as_string(mrb_state *mrb, mrb_value obj) +{ + mrb_value str; + + if (mrb_type(obj) == MRB_TT_STRING) { + return obj; + } + str = mrb_funcall(mrb, obj, "to_s", 0); + if (mrb_type(str) != MRB_TT_STRING) + return mrb_any_to_s(mrb, obj); + return str; +} + +mrb_value +mrb_check_string_type(mrb_state *mrb, mrb_value str) +{ + return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); +} + +#ifdef INCLUDE_REGEXP +static mrb_value +get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote) +{ + mrb_value val; + + switch (mrb_type(pat)) { + case MRB_TT_REGEX: + return pat; + + case MRB_TT_STRING: + break; + + default: + val = mrb_check_string_type(mrb, pat); + if (mrb_nil_p(val)) { + //Check_Type(pat, T_REGEXP); + mrb_check_type(mrb, pat, MRB_TT_REGEX); + } + pat = val; + } + + if (quote) { + pat = mrb_reg_quote(mrb, pat); + } + + return mrb_reg_regcomp(mrb, pat); +} +#endif //INCLUDE_REGEXP + +/* 15.2.10.5.27 */ +/* + * call-seq: + * str.match(pattern) => matchdata or nil + * + * Converts pattern to a Regexp (if it isn't already one), + * then invokes its match method on str. + * + * 'hello'.match('(.)\1') #=> # + * 'hello'.match('(.)\1')[0] #=> "ll" + * 'hello'.match(/(.)\1/)[0] #=> "ll" + * 'hello'.match('xx') #=> nil + */ +#ifdef INCLUDE_REGEXP +static mrb_value +mrb_str_match_m(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + mrb_value re, result, b; + mrb_get_args(mrb, "&*", &b, &argv, &argc); + if (argc < 1) + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..2)", argc); + re = argv[0]; + argv[0] = self; + // result = mrb_funcall2(get_pat(re, 0), mrb_intern("match"), argc, argv); + result = mrb_funcall(mrb, get_pat(mrb, re, 0), "match", 1, self); + if (!mrb_nil_p(result) && mrb_block_given_p()) { + return mrb_yield(mrb, b, result); + } + return result; +} +#endif //INCLUDE_REGEXP + +/* ---------------------------------- */ +/* 15.2.10.5.29 */ +/* + * call-seq: + * str.reverse => new_str + * + * Returns a new string with the characters from str in reverse order. + * + * "stressed".reverse #=> "desserts" + */ +static mrb_value +mrb_str_reverse(mrb_state *mrb, mrb_value str) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; +#endif //INCLUDE_ENCODING + mrb_value rev; + char *s, *e, *p; +#ifdef INCLUDE_ENCODING + int single = 1; +#endif //INCLUDE_ENCODING + + if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str); +#ifdef INCLUDE_ENCODING + enc = STR_ENC_GET(mrb, str); +#endif //INCLUDE_ENCODING + rev = mrb_str_new5(mrb, str, 0, RSTRING_LEN(str)); + s = RSTRING_PTR(str); e = RSTRING_END(str); + p = RSTRING_END(rev); + + if (RSTRING_LEN(str) > 1) { +#ifdef INCLUDE_ENCODING + if (single_byte_optimizable(mrb, str)) { +#endif //INCLUDE_ENCODING + while (s < e) { + *--p = *s++; + } +#ifdef INCLUDE_ENCODING + } + else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) { + while (s < e) { + int clen = mrb_enc_fast_mbclen(s, e, enc); + + if (clen > 1 || (*s & 0x80)) single = 0; + p -= clen; + memcpy(p, s, clen); + s += clen; + } + } + else { + while (s < e) { + int clen = mrb_enc_mbclen(s, e, enc); + + if (clen > 1 || (*s & 0x80)) single = 0; + p -= clen; + memcpy(p, s, clen); + s += clen; + } + } + } + STR_SET_LEN(rev, RSTRING_LEN(str)); + if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) { + if (single) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + } + else { + ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); + } +#endif //INCLUDE_ENCODING + } + mrb_enc_cr_str_copy_for_substr(mrb, rev, str); + + return rev; +} + +/* 15.2.10.5.30 */ +/* + * call-seq: + * str.reverse! => str + * + * Reverses str in place. + */ +static mrb_value +mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) +{ +#ifdef INCLUDE_ENCODING + if (RSTRING_LEN(str) > 1) { + if (single_byte_optimizable(mrb, str)) { +#endif //INCLUDE_ENCODING + char *s, *e, c; + str_modify_keep_cr(mrb, str); + s = RSTRING_PTR(str); + e = RSTRING_END(str) - 1; + while (s < e) { + c = *s; + *s++ = *e; + *e-- = c; + } +#ifdef INCLUDE_ENCODING + } + else { + mrb_str_shared_replace(mrb, str, mrb_str_reverse(mrb, str)); + } + } + else { + str_modify_keep_cr(mrb, str); + } +#endif //INCLUDE_ENCODING + return str; +} + +/* + * call-seq: + * str.rindex(substring [, fixnum]) => fixnum or nil + * str.rindex(fixnum [, fixnum]) => fixnum or nil + * str.rindex(regexp [, fixnum]) => fixnum or nil + * + * Returns the index of the last occurrence of the given substring, + * character (fixnum), or pattern (regexp) in str. Returns + * nil if not found. If the second parameter is present, it + * specifies the position in the string to end the search---characters beyond + * this point will not be considered. + * + * "hello".rindex('e') #=> 1 + * "hello".rindex('l') #=> 3 + * "hello".rindex('a') #=> nil + * "hello".rindex(101) #=> 1 + * "hello".rindex(/[aeiou]/, -2) #=> 1 + */ +static mrb_int +mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) +{ + char *s, *sbeg, *t; + struct RString *ps = mrb_str_ptr(str); + struct RString *psub = mrb_str_ptr(sub); + long len = psub->len; + + /* substring longer than string */ + if (ps->len < len) return -1; + if (ps->len - pos < len) { + pos = ps->len - len; + } + sbeg = ps->buf; + s = ps->buf + pos; + t = psub->buf; + if (len) { + while (sbeg <= s) { + if (memcmp(s, t, len) == 0) { + return s - ps->buf; + } + s--; + } + return -1; + } + else { + return pos; + } +} + +#ifdef INCLUDE_ENCODING +/* byte offset to char offset */ +size_t +mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos) +{ + if (single_byte_optimizable(mrb, str) || pos < 0) + return pos; + else { + char *p = RSTRING_PTR(str); + return enc_strlen(p, p + pos, STR_ENC_GET(mrb, str), ENC_CODERANGE(str)); + } +} +#endif //INCLUDE_ENCODING + +/* 15.2.10.5.31 */ +/* + * call-seq: + * str.rindex(substring [, fixnum]) => fixnum or nil + * str.rindex(fixnum [, fixnum]) => fixnum or nil + * str.rindex(regexp [, fixnum]) => fixnum or nil + * + * Returns the index of the last occurrence of the given substring, + * character (fixnum), or pattern (regexp) in str. Returns + * nil if not found. If the second parameter is present, it + * specifies the position in the string to end the search---characters beyond + * this point will not be considered. + * + * "hello".rindex('e') #=> 1 + * "hello".rindex('l') #=> 3 + * "hello".rindex('a') #=> nil + * "hello".rindex(101) #=> 1 + * "hello".rindex(/[aeiou]/, -2) #=> 1 + */ +static mrb_value +mrb_str_rindex_m(mrb_state *mrb, mrb_value str) +{ + mrb_value *argv; + int argc; + mrb_value sub; + mrb_value vpos; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc = STR_ENC_GET(mrb, str); + int pos, len = str_strlen(mrb, str, enc); +#else + int pos, len = RSTRING_LEN(str); +#endif //INCLUDE_ENCODING + + //if (mrb_scan_args(argc, argv, "11", &sub, &vpos) == 2) { + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 2) { + sub = argv[0]; + vpos = argv[1]; + pos = mrb_fixnum(vpos); + if (pos < 0) { + pos += len; + if (pos < 0) { + if (mrb_type(sub) == MRB_TT_REGEX) { +#ifdef INCLUDE_REGEXP + mrb_backref_set(mrb, mrb_nil_value()); +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); +#endif //INCLUDE_REGEXP + } + return mrb_nil_value(); + } + } + if (pos > len) pos = len; + } + else { + pos = len; + if (argc > 0) + sub = argv[0]; + else + sub = mrb_nil_value(); + } + + switch (mrb_type(sub)) { + case MRB_TT_REGEX: +#ifdef INCLUDE_REGEXP + pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos, + STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str)); + + if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) { + pos = mrb_reg_search(mrb, sub, str, pos, 1); + pos = mrb_str_sublen(mrb, str, pos); + } + if (pos >= 0) return mrb_fixnum_value(pos); +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); +#endif //INCLUDE_REGEXP + break; + + case MRB_TT_FIXNUM: { + int c = mrb_fixnum(sub); + long len = RSTRING_LEN(str); + unsigned char *p = (unsigned char*)RSTRING_PTR(str); + + for (pos=len;pos>=0;pos--) { + if (p[pos] == c) return mrb_fixnum_value(pos); + } + return mrb_nil_value(); + } + + default: { + mrb_value tmp; + + tmp = mrb_check_string_type(mrb, sub); + if (mrb_nil_p(tmp)) { + mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: %s given", + mrb_obj_classname(mrb, sub)); + } + sub = tmp; + } + /* fall through */ + case MRB_TT_STRING: + pos = mrb_str_rindex(mrb, str, sub, pos); + if (pos >= 0) return mrb_fixnum_value(pos); + break; + + } /* end of switch (TYPE(sub)) */ + return mrb_nil_value(); +} + +#ifdef INCLUDE_REGEXP +static mrb_value +scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) +{ + mrb_value result, match; + struct re_registers *regs; + long i; + struct RString *ps = mrb_str_ptr(str); + struct RMatch *pmatch; + + if (mrb_reg_search(mrb, pat, str, *start, 0) >= 0) { + match = mrb_backref_get(mrb); + //regs = RMATCH(match)->regs; + pmatch = mrb_match_ptr(match); + regs = &pmatch->rmatch->regs; + if (regs->beg[0] == regs->end[0]) { + mrb_encoding *enc = STR_ENC_GET(mrb, str); + /* + * Always consume at least one character of the input string + */ + if (ps->len > regs->end[0]) + *start = regs->end[0] + mrb_enc_fast_mbclen(RSTRING_PTR(str)+regs->end[0],RSTRING_END(str), enc); + else + *start = regs->end[0] + 1; + } + else { + *start = regs->end[0]; + } + if (regs->num_regs == 1) { + return mrb_reg_nth_match(mrb, 0, match); + } + result = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs); + for (i=1; i < regs->num_regs; i++) { + mrb_ary_push(mrb, result, mrb_reg_nth_match(mrb, i, match)); + } + + return result; + } + return mrb_nil_value(); +} +#endif //INCLUDE_REGEXP + +/* 15.2.10.5.32 */ +/* + * call-seq: + * str.scan(pattern) => array + * str.scan(pattern) {|match, ...| block } => str + * + * Both forms iterate through str, matching the pattern (which may be a + * Regexp or a String). For each match, a result is + * generated and either added to the result array or passed to the block. If + * the pattern contains no groups, each individual result consists of the + * matched string, $&. If the pattern contains groups, each + * individual result is itself an array containing one entry per group. + * + * a = "cruel world" + * a.scan(/\w+/) #=> ["cruel", "world"] + * a.scan(/.../) #=> ["cru", "el ", "wor"] + * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]] + * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]] + * + * And the block form: + * + * a.scan(/\w+/) {|w| print "<<#{w}>> " } + * print "\n" + * a.scan(/(.)(.)/) {|x,y| print y, x } + * print "\n" + * + * produces: + * + * <> <> + * rceu lowlr + */ +#ifdef INCLUDE_REGEXP +static mrb_value +mrb_str_scan(mrb_state *mrb, mrb_value str) +{ + mrb_value result; + mrb_value pat, b; + mrb_int start = 0; + mrb_value match = mrb_nil_value(); + struct RString *ps = mrb_str_ptr(str); + char *p = ps->buf; + long len = ps->len; + + mrb_get_args(mrb, "&o", &b, &pat); + pat = get_pat(mrb, pat, 1); + if (!mrb_block_given_p()) { + mrb_value ary = mrb_ary_new(mrb); + + while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) { + match = mrb_backref_get(mrb); + mrb_ary_push(mrb, ary, result); + } + mrb_backref_set(mrb, match); + return ary; + } + + while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) { + match = mrb_backref_get(mrb); + mrb_yield(mrb, b, result); + str_mod_check(mrb, str, p, len); + mrb_backref_set(mrb, match); /* restore $~ value */ + } + mrb_backref_set(mrb, match); + return str; +} +#endif //INCLUDE_REGEXP + +static const char isspacetable[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#define ascii_isspace(c) isspacetable[(unsigned char)(c)] + +/* 15.2.10.5.35 */ + +/* + * call-seq: + * str.split(pattern=$;, [limit]) => anArray + * + * Divides str into substrings based on a delimiter, returning an array + * of these substrings. + * + * If pattern is a String, then its contents are used as + * the delimiter when splitting str. If pattern is a single + * space, str is split on whitespace, with leading whitespace and runs + * of contiguous whitespace characters ignored. + * + * If pattern is a Regexp, str is divided where the + * pattern matches. Whenever the pattern matches a zero-length string, + * str is split into individual characters. + * + * If pattern is omitted, the value of $; is used. If + * $; is nil (which is the default), str is + * split on whitespace as if ` ' were specified. + * + * If the limit parameter is omitted, trailing null fields are + * suppressed. If limit is a positive number, at most that number of + * fields will be returned (if limit is 1, the entire + * string is returned as the only entry in an array). If negative, there is no + * limit to the number of fields returned, and trailing null fields are not + * suppressed. + * + * " now's the time".split #=> ["now's", "the", "time"] + * " now's the time".split(' ') #=> ["now's", "the", "time"] + * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] + * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"] + * "hello".split(//) #=> ["h", "e", "l", "l", "o"] + * "hello".split(//, 3) #=> ["h", "e", "llo"] + * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"] + * + * "mellow yellow".split("ello") #=> ["m", "w y", "w"] + * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] + * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] + * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] + */ + +//static mrb_value +//mrb_str_split_m(int argc, mrb_value *argv, mrb_value str) +static mrb_value +mrb_str_split_m(mrb_state *mrb, mrb_value str) +{ + mrb_value *argv; + int argc; +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; +#endif //INCLUDE_ENCODING + mrb_value spat; + mrb_value limit; + enum {awk, string, regexp} split_type; + long beg, end, i = 0; + int lim = 0; + mrb_value result, tmp; + + mrb_get_args(mrb, "*", &argv, &argc); + if (argc > 0) + spat = argv[0]; + if (argc > 1) + limit = argv[1]; + else + limit = mrb_nil_value(); + + if (argc == 2) { + lim = mrb_fixnum(limit); + if (lim <= 0) limit = mrb_nil_value(); + else if (lim == 1) { + if (RSTRING_LEN(str) == 0) + return mrb_ary_new_capa(mrb, 0); + return mrb_ary_new_from_values(mrb, &str, 1); + } + i = 1; + } + +#ifdef INCLUDE_ENCODING + enc = STR_ENC_GET(mrb, str); +#endif //INCLUDE_ENCODING + //if (mrb_nil_p(spat)) { + if (argc == 0) { +// spat = mrb_nil_value(); +// goto fs_set; + split_type = awk; + } + else { +//fs_set: + if (mrb_type(spat) == MRB_TT_STRING) { +#ifdef INCLUDE_REGEXP + mrb_encoding *enc2 = STR_ENC_GET(mrb, spat); +#endif //INCLUDE_REGEXP + split_type = string; +#ifdef INCLUDE_REGEXP + if (RSTRING_LEN(spat) == 0) { + /* Special case - split into chars */ + spat = mrb_reg_regcomp(mrb, spat); + split_type = regexp; + } + else if (mrb_enc_asciicompat(mrb, enc2) == 1) { +#endif //INCLUDE_REGEXP + if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ + split_type = awk; + } +#ifdef INCLUDE_REGEXP + } + else { + int l; + if (mrb_enc_ascget(mrb, RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' && + RSTRING_LEN(spat) == l) { + split_type = awk; + } + } +#endif //INCLUDE_REGEXP + } + else { +#ifdef INCLUDE_REGEXP + spat = get_pat(mrb, spat, 1); + split_type = regexp; +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); +#endif //INCLUDE_REGEXP + } + } + + result = mrb_ary_new(mrb); + beg = 0; + if (split_type == awk) { + char *ptr = RSTRING_PTR(str); + char *eptr = RSTRING_END(str); + char *bptr = ptr; + int skip = 1; + unsigned int c; + + end = beg; +#ifdef INCLUDE_ENCODING + if (is_ascii_string(mrb, str)) { +#endif //INCLUDE_ENCODING + while (ptr < eptr) { + c = (unsigned char)*ptr++; + if (skip) { + if (ascii_isspace(c)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!mrb_nil_p(limit) && lim <= i) break; + } + } + else if (ascii_isspace(c)) { + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); + skip = 1; + beg = ptr - bptr; + if (!mrb_nil_p(limit)) ++i; + } + else { + end = ptr - bptr; + } + } +#ifdef INCLUDE_ENCODING + } + else { + while (ptr < eptr) { + int n; + + c = mrb_enc_codepoint_len(mrb, ptr, eptr, &n, enc); + ptr += n; + if (skip) { + if (mrb_isspace(c)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!mrb_nil_p(limit) && lim <= i) break; + } + } + else if (mrb_isspace(c)) { + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); + skip = 1; + beg = ptr - bptr; + if (!mrb_nil_p(limit)) ++i; + } + else { + end = ptr - bptr; + } + } + } + } + else if (split_type == string) { + char *ptr = RSTRING_PTR(str); + char *temp = ptr; + char *eptr = RSTRING_END(str); + char *sptr = RSTRING_PTR(spat); + long slen = RSTRING_LEN(spat); + + if (is_broken_string(mrb, str)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, str))); + } + if (is_broken_string(mrb, spat)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, spat))); + } + enc = mrb_enc_check(mrb, str, spat); + while (ptr < eptr && + (end = mrb_memsearch(mrb, sptr, slen, ptr, eptr - ptr, enc)) >= 0) { + /* Check we are at the start of a char */ + char *t = mrb_enc_right_char_head(ptr, ptr + end, eptr, enc); + if (t != ptr + end) { + ptr = t; + continue; + } + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end)); + ptr += end + slen; + if (!mrb_nil_p(limit) && lim <= ++i) break; + } + beg = ptr - temp; +#endif //INCLUDE_ENCODING + } + else { +#ifdef INCLUDE_REGEXP + char *ptr = RSTRING_PTR(str); + long len = RSTRING_LEN(str); + long start = beg; + long idx; + int last_null = 0; + struct re_registers *regs; + + while ((end = mrb_reg_search(mrb, spat, str, start, 0)) >= 0) { + regs = RMATCH_REGS(mrb_backref_get(mrb)); + if (start == end && BEG(0) == END(0)) { + if (!ptr) { + mrb_ary_push(mrb, result, str_new_empty(mrb, str)); + break; + } + else if (last_null == 1) { + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, + mrb_enc_fast_mbclen(ptr+beg, + ptr+len, + enc))); + beg = start; + } + else { + if (ptr+start == ptr+len) + start++; + else + start += mrb_enc_fast_mbclen(ptr+start,ptr+len,enc); + last_null = 1; + continue; + } + } + else { + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); + beg = start = END(0); + } + last_null = 0; + + for (idx=1; idx < regs->num_regs; idx++) { + if (BEG(idx) == -1) continue; + if (BEG(idx) == END(idx)) + tmp = str_new_empty(mrb, str); + else + tmp = mrb_str_subseq(mrb, str, BEG(idx), END(idx)-BEG(idx)); + mrb_ary_push(mrb, result, tmp); + } + if (!mrb_nil_p(limit) && lim <= ++i) break; + } +#else + mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); +#endif //INCLUDE_REGEXP + } + if (RSTRING_LEN(str) > 0 && (!mrb_nil_p(limit) || RSTRING_LEN(str) > beg || lim < 0)) { + if (RSTRING_LEN(str) == beg) + tmp = str_new_empty(mrb, str); + else + tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); + mrb_ary_push(mrb, result, tmp); + } + if (mrb_nil_p(limit) && lim == 0) { + long len; + while ((len = RARRAY_LEN(result)) > 0 && + (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0)) + mrb_ary_pop(mrb, result); + } + + return result; +} + + +int +mrb_block_given_p() +{ + /*if (ruby_frame->iter == ITER_CUR && ruby_block) + return 1;*//*Qtrue*/ + return 0/*Qfalse*/; +} + +/* 15.2.10.5.37 */ +/* + * call-seq: + * str.sub!(pattern, replacement) => str or nil + * str.sub!(pattern) {|match| block } => str or nil + * + * Performs the substitutions of String#sub in place, + * returning str, or nil if no substitutions were + * performed. + */ +#ifdef INCLUDE_REGEXP +static mrb_value +mrb_str_sub_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value *argv; + int argc; + mrb_value pat, repl; + int iter = 0; + long plen; + + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 1 && mrb_block_given_p()) { + iter = 1; + } + else if (argc == 2) { + repl = argv[1]; + //StringValue(repl); + mrb_string_value(mrb, &repl); + } + else { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc); + } + + pat = get_pat(mrb, argv[0], 1); + str_modifiable(str); + if (mrb_reg_search(mrb, pat, str, 0, 0) >= 0) { + mrb_encoding *enc; + int cr = ENC_CODERANGE(str); + mrb_value match = mrb_backref_get(mrb); + struct re_registers *regs = RMATCH_REGS(match); + long beg0 = BEG(0); + long end0 = END(0); + char *p, *rp; + long len, rlen; + + repl = mrb_reg_regsub(mrb, repl, str, regs, pat); + enc = mrb_enc_compatible(mrb, str, repl); + if (!enc) { + mrb_encoding *str_enc = STR_ENC_GET(mrb, str); + p = RSTRING_PTR(str); len = RSTRING_LEN(str); + if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT || + coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) { + mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s", + mrb_enc_name(str_enc), + mrb_enc_name(STR_ENC_GET(mrb, repl))); + } + enc = STR_ENC_GET(mrb, repl); + } + mrb_str_modify(mrb, str); + mrb_enc_associate(mrb, str, enc); + //if (OBJ_TAINTED(repl)) tainted = 1; + //if (OBJ_UNTRUSTED(repl)) untrusted = 1; + if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) { + int cr2 = ENC_CODERANGE(repl); + if (cr2 == ENC_CODERANGE_BROKEN || + (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT)) + cr = ENC_CODERANGE_UNKNOWN; + else + cr = cr2; + } + plen = end0 - beg0; + rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl); + len = RSTRING_LEN(str); + if (rlen > plen) { + RESIZE_CAPA(str, len + rlen - plen); + } + p = RSTRING_PTR(str); + if (rlen != plen) { + memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen); + } + memcpy(p + beg0, rp, rlen); + len += rlen - plen; + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; + ENC_CODERANGE_SET(str, cr); + + return str; + } + return mrb_nil_value(); +} +#endif //INCLUDE_REGEXP + +/* 15.2.10.5.36 */ + +/* + * call-seq: + * str.sub(pattern, replacement) -> new_str + * str.sub(pattern, hash) -> new_str + * str.sub(pattern) {|match| block } -> new_str + * + * Returns a copy of str with the first occurrence of + * pattern substituted for the second argument. The pattern is + * typically a Regexp; if given as a String, any + * regular expression metacharacters it contains will be interpreted + * literally, e.g. '\\\d' will match a backlash followed by 'd', + * instead of a digit. + * + * If replacement is a String it will be substituted for + * the matched text. It may contain back-references to the pattern's capture + * groups of the form \\\d, where d is a group number, or + * \\\k, where n is a group name. If it is a + * double-quoted string, both back-references must be preceded by an + * additional backslash. However, within replacement the special match + * variables, such as &$, will not refer to the current match. + * + * If the second argument is a Hash, and the matched text is one + * of its keys, the corresponding value is the replacement string. + * + * In the block form, the current match string is passed in as a parameter, + * and variables such as $1, $2, $`, + * $&, and $' will be set appropriately. The value + * returned by the block will be substituted for the match on each call. + * + * The result inherits any tainting in the original string or any supplied + * replacement string. + * + * "hello".sub(/[aeiou]/, '*') #=> "h*llo" + * "hello".sub(/([aeiou])/, '<\1>') #=> "hllo" + * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello" + * "hello".sub(/(?[aeiou])/, '*\k*') #=> "h*e*llo" + * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV) + * #=> "Is /bin/bash your preferred shell?" + */ + +#ifdef INCLUDE_REGEXP +static mrb_value +mrb_str_sub(mrb_state *mrb, mrb_value self) +{ + mrb_value str = mrb_str_dup(mrb, self); + + mrb_str_sub_bang(mrb, str); + return str; +} +#endif //INCLUDE_REGEXP + +mrb_value +mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) +{ + #define BDIGIT unsigned int + #define BDIGIT_DBL unsigned long + +// const char *s = str; + char *end; + char sign = 1; +// char nondigit = 0; + int c; +// BDIGIT_DBL num; + long len; +// long blen = 1; +// long i; +// mrb_value z; +// BDIGIT *zds; + +#undef ISDIGIT +#define ISDIGIT(c) ('0' <= (c) && (c) <= '9') +#define conv_digit(c) \ + (!ISASCII(c) ? -1 : \ + isdigit(c) ? ((c) - '0') : \ + islower(c) ? ((c) - 'a' + 10) : \ + isupper(c) ? ((c) - 'A' + 10) : \ + -1) + + if (!str) { + if (badcheck) goto bad; + return mrb_fixnum_value(0); + } + while (ISSPACE(*str)) str++; + + if (str[0] == '+') { + str++; + } + else if (str[0] == '-') { + str++; + sign = 0; + } + if (str[0] == '+' || str[0] == '-') { + if (badcheck) goto bad; + return mrb_fixnum_value(0); + } + if (base <= 0) { + if (str[0] == '0') { + switch (str[1]) { + case 'x': case 'X': + base = 16; + break; + case 'b': case 'B': + base = 2; + break; + case 'o': case 'O': + base = 8; + break; + case 'd': case 'D': + base = 10; + break; + default: + base = 8; + } + } + else if (base < -1) { + base = -base; + } + else { + base = 10; + } + } + switch (base) { + case 2: + len = 1; + if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) { + str += 2; + } + break; + case 3: + len = 2; + break; + case 8: + if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) { + str += 2; + } + case 4: case 5: case 6: case 7: + len = 3; + break; + case 10: + if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) { + str += 2; + } + case 9: case 11: case 12: case 13: case 14: case 15: + len = 4; + break; + case 16: + len = 4; + if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) { + str += 2; + } + break; + default: + if (base < 2 || 36 < base) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "illegal radix %d", base); + } + if (base <= 32) { + len = 5; + } + else { + len = 6; + } + break; + } /* end of switch (base) { */ + if (*str == '0') { /* squeeze preceeding 0s */ + int us = 0; + while ((c = *++str) == '0' || c == '_') { + if (c == '_') { + if (++us >= 2) + break; + } + else + us = 0; + } + if (!(c = *str) || ISSPACE(c)) --str; + } + c = *str; + c = conv_digit(c); + if (c < 0 || c >= base) { + if (badcheck) goto bad; + return mrb_fixnum_value(0); + } + len *= strlen(str)*sizeof(char); + + unsigned long val = strtoul((char*)str, &end, base); + + if (badcheck) { + if (end == str) goto bad; /* no number */ + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; /* trailing garbage */ + } + + if (sign) return mrb_fixnum_value(val); + else { + long result = -(long)val; + return mrb_fixnum_value(result); + } +bad: + printf("Integer"); + return mrb_fixnum_value(0); +} +char * +mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) +{ + struct RString *ps = mrb_str_ptr(*ptr); + char *s = ps->buf; + + if (!s || ps->len != strlen(s)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); + } + return s; +} + +mrb_value +mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck) +{ + char *s; + size_t len; + + //StringValue(str); + mrb_string_value(mrb, &str); + if (badcheck) { + //s = StringValueCStr(str); + s = mrb_string_value_cstr(mrb, &str); + } + else { + s = RSTRING_PTR(str); + } + if (s) { + len = RSTRING_LEN(str); + if (s[len]) { /* no sentinel somehow */ + //char *p = ALLOCA_N(char, len+1); + char *p = mrb_malloc(mrb, len+1); + + //MEMCPY(p, s, char, len); + memcpy(p, s, sizeof(char)*len); + p[len] = '\0'; + s = p; + } + } + return mrb_cstr_to_inum(mrb, s, base, badcheck); +} + +/* 15.2.10.5.38 */ +/* + * call-seq: + * str.to_i(base=10) => integer + * + * Returns the result of interpreting leading characters in str as an + * integer base base (between 2 and 36). Extraneous characters past the + * end of a valid number are ignored. If there is not a valid number at the + * start of str, 0 is returned. This method never raises an + * exception. + * + * "12345".to_i #=> 12345 + * "99 red balloons".to_i #=> 99 + * "0a".to_i #=> 0 + * "0a".to_i(16) #=> 10 + * "hello".to_i #=> 0 + * "1100101".to_i(2) #=> 101 + * "1100101".to_i(8) #=> 294977 + * "1100101".to_i(10) #=> 1100101 + * "1100101".to_i(16) #=> 17826049 + */ +static mrb_value +mrb_str_to_i(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + int argc; + //mrb_value b; + int base; + + //mrb_scan_args(argc, *argv, "01", &b); + mrb_get_args(mrb, "*", &argv, &argc); + if (argc == 0) + base = 10; + else + base = mrb_fixnum(argv[0]); + + if (base < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "illegal radix %d", base); + } + return mrb_str_to_inum(mrb, self, base, 0/*Qfalse*/); +} + +double +mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck) +{ + const char *q; + char *end; + double d; +// const char *ellipsis = ""; +// int w; +#define DBL_DIG 16 + enum {max_width = 20}; +#define OutOfRange() (((w = end - p) > max_width) ? \ + (w = max_width, ellipsis = "...") : \ + (w = (int)(end - p), ellipsis = "")) + + if (!p) return 0.0; + q = p; + while (ISSPACE(*p)) p++; + + if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + return 0.0; + } + d = strtod(p, &end); + if (p == end) { + if (badcheck) { +bad: + //mrb_invalid_str(q, "Float()"); + printf("Float()\n"); + } + return d; + } + if (*end) { + char buf[DBL_DIG * 4 + 10]; + char *n = buf; + char *e = buf + sizeof(buf) - 1; + char prev = 0; + + while (p < end && n < e) prev = *n++ = *p++; + while (*p) { + if (*p == '_') { + /* remove underscores between digits */ + if (badcheck) { + if (n == buf || !ISDIGIT(prev)) goto bad; + ++p; + if (!ISDIGIT(*p)) goto bad; + } + else { + while (*++p == '_'); + continue; + } + } + prev = *p++; + if (n < e) *n++ = prev; + } + *n = '\0'; + p = buf; + + if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + return 0.0; + } + + d = strtod(p, &end); + if (badcheck) { + if (!end || p == end) goto bad; + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; + } + } + return d; +} + +double +mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck) +{ + char *s; + size_t len; + + //StringValue(str); + mrb_string_value(mrb, &str); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + if (s) { + if (badcheck && memchr(s, '\0', len)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); + } + if (s[len]) { /* no sentinel somehow */ + char *p = mrb_malloc(mrb, len+1); + + memcpy(p, s, sizeof(char)*len); + p[len] = '\0'; + s = p; + } + } + return mrb_cstr_to_dbl(mrb, s, badcheck); +} + +/* 15.2.10.5.39 */ +/* + * call-seq: + * str.to_f => float + * + * Returns the result of interpreting leading characters in str as a + * floating point number. Extraneous characters past the end of a valid number + * are ignored. If there is not a valid number at the start of str, + * 0.0 is returned. This method never raises an exception. + * + * "123.45e1".to_f #=> 1234.5 + * "45.67 degrees".to_f #=> 45.67 + * "thx1138".to_f #=> 0.0 + */ +static mrb_value +mrb_str_to_f(mrb_state *mrb, mrb_value self) +{ + //return mrb_float_new(mrb_str_to_dbl(self, 0/*Qfalse*/)); + return mrb_float_value(mrb_str_to_dbl(mrb, self, 0/*Qfalse*/)); +} + +/* 15.2.10.5.40 */ +/* + * call-seq: + * str.to_s => str + * str.to_str => str + * + * Returns the receiver. + */ +static mrb_value +mrb_str_to_s(mrb_state *mrb, mrb_value self) +{ + if (mrb_obj_class(mrb, self) != mrb->string_class) { + return mrb_str_dup(mrb, self); + } + return self; +} + +/* 15.2.10.5.43 */ +/* + * call-seq: + * str.upcase! => str or nil + * + * Upcases the contents of str, returning nil if no changes + * were made. + */ +static mrb_value +mrb_str_upcase_bang(mrb_state *mrb, mrb_value str) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; +#endif //INCLUDE_ENCODING + char *s, *send; + int modify = 0; +#ifdef INCLUDE_ENCODING + int n; + + str_modify_keep_cr(mrb, str); + enc = STR_ENC_GET(mrb, str); + mrb_str_check_dummy_enc(mrb, enc); + s = RSTRING_PTR(str); send = RSTRING_END(str); + if (single_byte_optimizable(mrb, str)) { + while (s < send) { + unsigned int c = *(unsigned char*)s; + + if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') { + *s = 'A' + (c - 'a'); + modify = 1; + } + s++; + } + } + else { + int ascompat = mrb_enc_asciicompat(mrb, enc); + + while (s < send) { + unsigned int c; + + if (ascompat && (c = *(unsigned char*)s) < 0x80) { + if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') { + *s = 'A' + (c - 'a'); + modify = 1; + } + s++; + } + else { + c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); + if (mrb_enc_islower(c, enc)) { + /* assuming toupper returns codepoint with same size */ + mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc); + modify = 1; + } + s += n; + } + } + } +#else + mrb_str_modify(mrb, str); + s = RSTRING_PTR(str); send = RSTRING_END(str); + while (s < send) { + unsigned int c = *(unsigned char*)s; + + if ('a' <= c && c <= 'z') { + *s = 'A' + (c - 'a'); + modify = 1; + } + s++; + } +#endif //INCLUDE_ENCODING + if (modify) return str; + return mrb_nil_value(); +} + +/* 15.2.10.5.42 */ +/* + * call-seq: + * str.upcase => new_str + * + * Returns a copy of str with all lowercase letters replaced with their + * uppercase counterparts. The operation is locale insensitive---only + * characters ``a'' to ``z'' are affected. + * + * "hEllO".upcase #=> "HELLO" + */ +static mrb_value +mrb_str_upcase(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_upcase_bang(mrb, str); + return str; +} + +/* 15.2.10.5.xx */ +/* + * call-seq: + * str.force_encoding(encoding) -> str + * + * Changes the encoding to +encoding+ and returns self. + */ +#ifdef INCLUDE_ENCODING +static mrb_value +mrb_str_force_encoding(mrb_state *mrb, mrb_value self) +{ + mrb_value enc; + mrb_get_args(mrb, "o", &enc); + str_modifiable(self); + mrb_enc_associate(mrb, self, mrb_to_encoding(mrb, enc)); + ENC_CODERANGE_CLEAR(self); + return self; +} + +long +mrb_str_coderange_scan_restartable(const char *s, const char *e, mrb_encoding *enc, int *cr) +{ + const char *p = s; + + if (*cr == ENC_CODERANGE_BROKEN) + return e - s; + + if (mrb_enc_to_index(enc) == 0) { + /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + p = search_nonascii(p, e); + *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; + return e - s; + } + else if (mrb_enc_asciicompat(mrb, enc)) { + p = search_nonascii(p, e); + if (!p) { + if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT; + return e - s; + } + while (p < e) { + int ret = mrb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN; + return p - s; + } + p += MBCLEN_CHARFOUND_LEN(ret); + if (p < e) { + p = search_nonascii(p, e); + if (!p) { + *cr = ENC_CODERANGE_VALID; + return e - s; + } + } + } + *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; + return p - s; + } + else { + while (p < e) { + int ret = mrb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN; + return p - s; + } + p += MBCLEN_CHARFOUND_LEN(ret); + } + *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; + return p - s; + } +} + +mrb_value +mrb_str_conv_enc_opts(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to, int ecflags, mrb_value ecopts) +{ + mrb_econv_t *ec; + mrb_econv_result_t ret; + long len; + mrb_value newstr; + const unsigned char *sp; + unsigned char *dp; + + if (!to) return str; + if (from == to) return str; + if ((mrb_enc_asciicompat(mrb, to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) || + to == mrb_ascii8bit_encoding(mrb)) { + if (STR_ENC_GET(mrb, str) != to) { + str = mrb_str_dup(mrb, str); + mrb_enc_associate(mrb, str, to); + } + return str; + } + + len = RSTRING_LEN(str); + newstr = mrb_str_new(mrb, 0, len); + + retry: + ec = mrb_econv_open_opts(mrb, from->name, to->name, ecflags, ecopts); + if (!ec) return str; + + sp = (unsigned char*)RSTRING_PTR(str); + dp = (unsigned char*)RSTRING_PTR(newstr); + ret = mrb_econv_convert(mrb, ec, &sp, (unsigned char*)RSTRING_END(str), + &dp, (unsigned char*)RSTRING_END(newstr), 0); + mrb_econv_close(ec); + switch (ret) { + case econv_destination_buffer_full: + /* destination buffer short */ + len = len < 2 ? 2 : len * 2; + mrb_str_resize(mrb, newstr, len); + goto retry; + + case econv_finished: + len = dp - (unsigned char*)RSTRING_PTR(newstr); + mrb_str_set_len(mrb, newstr, len); + mrb_enc_associate(mrb, newstr, to); + return newstr; + + default: + /* some error, return original */ + return str; + } +} + +mrb_value +mrb_str_conv_enc(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to) +{ + return mrb_str_conv_enc_opts(mrb, str, from, to, 0, mrb_nil_value()); +} +#endif //INCLUDE_ENCODING + +#ifndef INCLUDE_ENCODING +#undef SIGN_EXTEND_CHAR +#if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char)(c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) +#endif +#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_')) + +static int +is_special_global_name(m) + const char *m; +{ + switch (*m) { + case '~': case '*': case '$': case '?': case '!': case '@': + case '/': case '\\': case ';': case ',': case '.': case '=': + case ':': case '<': case '>': case '\"': + case '&': case '`': case '\'': case '+': + case '0': + ++m; + break; + case '-': + ++m; + if (is_identchar(*m)) m += 1; + break; + default: + if (!ISDIGIT(*m)) return 0; + do ++m; while (ISDIGIT(*m)); + } + return !*m; +} + +int +mrb_symname_p(const char *name) +{ + const char *m = name; + int localid = FALSE; + + if (!m) return FALSE; + switch (*m) { + case '\0': + return FALSE; + + case '$': + if (is_special_global_name(++m)) return TRUE; + goto id; + + case '@': + if (*++m == '@') ++m; + goto id; + + case '<': + switch (*++m) { + case '<': ++m; break; + case '=': if (*++m == '>') ++m; break; + default: break; + } + break; + + case '>': + switch (*++m) { + case '>': case '=': ++m; break; + } + break; + + case '=': + switch (*++m) { + case '~': ++m; break; + case '=': if (*++m == '=') ++m; break; + default: return FALSE; + } + break; + + case '*': + if (*++m == '*') ++m; + break; + + case '+': case '-': + if (*++m == '@') ++m; + break; + + case '|': case '^': case '&': case '/': case '%': case '~': case '`': + ++m; + break; + + case '[': + if (*++m != ']') return FALSE; + if (*++m == '=') ++m; + break; + + default: + localid = !ISUPPER(*m); +id: + if (*m != '_' && !ISALPHA(*m)) return FALSE; + while (is_identchar(*m)) m += 1; + if (localid) { + switch (*m) { + case '!': case '?': case '=': ++m; + } + } + break; + } + return *m ? FALSE : TRUE; +} +#endif //INCLUDE_ENCODING + +/* + * call-seq: + * str.dump -> new_str + * + * Produces a version of str with all nonprinting characters replaced by + * \nnn notation and all special characters escaped. + */ +mrb_value +mrb_str_dump(mrb_state *mrb, mrb_value str) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc = mrb_enc_get(mrb, str); +#endif //INCLUDE_ENCODING + long len; + const char *p, *pend; + char *q, *qend; + mrb_value result; +#ifdef INCLUDE_ENCODING + int u8 = (enc == mrb_utf8_encoding(mrb)); +#endif //INCLUDE_ENCODING + + len = 2; /* "" */ + p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); + while (p < pend) { + unsigned char c = *p++; + switch (c) { + case '"': case '\\': + case '\n': case '\r': + case '\t': case '\f': + case '\013': case '\010': case '\007': case '\033': + len += 2; + break; + + case '#': + len += IS_EVSTR(p, pend) ? 2 : 1; + break; + + default: + if (ISPRINT(c)) { + len++; + } + else { +#ifdef INCLUDE_ENCODING + if (u8) { /* \u{NN} */ + int n = mrb_enc_precise_mbclen(p-1, pend, enc); + if (MBCLEN_CHARFOUND_P(n-1)) { + unsigned int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc); + while (cc >>= 4) len++; + len += 5; + p += MBCLEN_CHARFOUND_LEN(n)-1; + break; + } + } +#endif //INCLUDE_ENCODING + len += 4; /* \xNN */ + } + break; + } + } +#ifdef INCLUDE_ENCODING + if (!mrb_enc_asciicompat(mrb, enc)) { + len += 19; /* ".force_encoding('')" */ + len += strlen(enc->name); + } +#endif //INCLUDE_ENCODING + + result = mrb_str_new5(mrb, str, 0, len); + p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); + q = RSTRING_PTR(result); qend = q + len + 1; + + *q++ = '"'; + while (p < pend) { + unsigned char c = *p++; + + if (c == '"' || c == '\\') { + *q++ = '\\'; + *q++ = c; + } + else if (c == '#') { + if (IS_EVSTR(p, pend)) *q++ = '\\'; + *q++ = '#'; + } + else if (c == '\n') { + *q++ = '\\'; + *q++ = 'n'; + } + else if (c == '\r') { + *q++ = '\\'; + *q++ = 'r'; + } + else if (c == '\t') { + *q++ = '\\'; + *q++ = 't'; + } + else if (c == '\f') { + *q++ = '\\'; + *q++ = 'f'; + } + else if (c == '\013') { + *q++ = '\\'; + *q++ = 'v'; + } + else if (c == '\010') { + *q++ = '\\'; + *q++ = 'b'; + } + else if (c == '\007') { + *q++ = '\\'; + *q++ = 'a'; + } + else if (c == '\033') { + *q++ = '\\'; + *q++ = 'e'; + } + else if (ISPRINT(c)) { + *q++ = c; + } + else { + *q++ = '\\'; +#ifdef INCLUDE_ENCODING + if (u8) { + int n = mrb_enc_precise_mbclen(p-1, pend, enc) - 1; + if (MBCLEN_CHARFOUND_P(n)) { + int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc); + p += n; + snprintf(q, qend-q, "u{%x}", cc); + q += strlen(q); + continue; + } + } + snprintf(q, qend-q, "x%02X", c); +#else + sprintf(q, "%03o", c&0xff); +#endif //INCLUDE_ENCODING + q += 3; + } + } + *q++ = '"'; +#ifdef INCLUDE_ENCODING + *q = '\0'; + if (!mrb_enc_asciicompat(mrb, enc)) { + snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name); + enc = mrb_ascii8bit_encoding(mrb); + } + //OBJ_INFECT(result, str); + /* result from dump is ASCII */ + mrb_enc_associate(mrb, result, enc); + ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT); +#endif //INCLUDE_ENCODING + return result; +} + +mrb_value +mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len) +{ + if (len < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); + } + if (0/*STR_ASSOC_P(str)*/) { + mrb_str_modify(mrb, str); + //if (STR_EMBED_P(str)) str_make_independent(mrb, str); + mrb_realloc(mrb, RSTRING(str)->buf, RSTRING(str)->len+len+1); + memcpy(RSTRING(str)->buf + RSTRING(str)->len, ptr, len); + RSTRING(str)->len += len; + RSTRING(str)->buf[RSTRING(str)->len] = '\0'; /* sentinel */ + return str; + } + + return str_buf_cat(mrb, str, ptr, len); +} + +mrb_value +mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr) +{ + return mrb_str_cat(mrb, str, ptr, strlen(ptr)); +} + +mrb_value +mrb_str_vcatf(mrb_state *mrb, mrb_value str, const char *fmt, va_list ap) +{ + //mrb_printf_buffer f; + //mrb_value klass; + + //StringValue(str); + mrb_string_value(mrb, &str); + mrb_str_modify(mrb, str); + mrb_str_resize(mrb, str, (char *)RSTRING_END(str) - RSTRING_PTR(str)); + + return str; +} + +mrb_value +mrb_str_catf(mrb_state *mrb, mrb_value str, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + str = mrb_str_vcatf(mrb, str, format, ap); + va_end(ap); + + return str; +} + +void +mrb_lastline_set(mrb_value val) +{ + //vm_svar_set(0, val); +} + +mrb_value +mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc; + int cr, cr2; +#endif //INCLUDE_ENCODING + + //StringValue(str2); + mrb_string_value(mrb, &str2); + if (RSTRING_LEN(str2) > 0 /*&& STR_ASSOC_P(str)*/) { + long len = RSTRING_LEN(str)+RSTRING_LEN(str2); +#ifdef INCLUDE_ENCODING + enc = mrb_enc_check(mrb, str, str2); + cr = ENC_CODERANGE(str); + if ((cr2 = ENC_CODERANGE(str2)) > cr) cr = cr2; +#endif //INCLUDE_ENCODING + mrb_str_modify(mrb, str); + REALLOC_N(mrb, RSTRING(str)->buf, char, len+1); + memcpy(RSTRING(str)->buf + RSTRING(str)->len, + RSTRING_PTR(str2), RSTRING_LEN(str2)+1); + RSTRING(str)->len = len; + mrb_enc_associate(mrb, str, enc); + ENC_CODERANGE_SET(str, cr); + //OBJ_INFECT(str, str2); + return str; + } +#ifdef INCLUDE_ENCODING + return mrb_str_buf_append(mrb, str, str2); +#else + return str; +#endif //INCLUDE_ENCODING +} + +void +mrb_str_setter(mrb_state *mrb, mrb_value val, mrb_sym id, mrb_value *var) +{ + if (!mrb_nil_p(val) && (mrb_type(val) != MRB_TT_STRING)) { + mrb_raise(mrb, E_TYPE_ERROR, "value of %s must be String", mrb_sym2name(mrb, id)); + } + *var = val; +} + +#ifdef INCLUDE_ENCODING +/* + * call-seq: + * str.ascii_only? -> true or false + * + * Returns true for a string which has only ASCII characters. + * + * "abc".force_encoding("UTF-8").ascii_only? #=> true + * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false + */ + +int +mrb_str_is_ascii_only_p(mrb_state *mrb, mrb_value str) +{ + int cr = mrb_enc_str_coderange(mrb, str); + + return cr == ENC_CODERANGE_7BIT ? TRUE : FALSE; +} + +#endif //INCLUDE_ENCODING + +#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ +int +mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p) +{ + char buf[CHAR_ESC_LEN + 1]; + int l; + +#if SIZEOF_INT > 4 + c &= 0xffffffff; +#endif + if (unicode_p) { + if (c < 0x7F && ISPRINT(c)) { + snprintf(buf, CHAR_ESC_LEN, "%c", c); + } + else if (c < 0x10000) { + snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c); + } + else { + snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c); + } + } + else { + if (c < 0x100) { + snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c); + } + else { + snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c); + } + } + l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */ + mrb_str_buf_cat(mrb, result, buf, l); + return l; +} + +/* + * call-seq: + * str.inspect -> string + * + * Returns a printable version of _str_, surrounded by quote marks, + * with special characters escaped. + * + * str = "hello" + * str[3] = "\b" + * str.inspect #=> "\"hel\\bo\"" + */ +mrb_value +mrb_str_inspect(mrb_state *mrb, mrb_value str) +{ +#ifdef INCLUDE_ENCODING + mrb_encoding *enc = STR_ENC_GET(mrb, str); +#endif //INCLUDE_ENCODING + const char *p, *pend, *prev; + char buf[CHAR_ESC_LEN + 1]; +#ifdef INCLUDE_ENCODING + mrb_value result = mrb_str_buf_new(mrb, 0); + mrb_encoding *resenc = mrb_default_internal_encoding(mrb); + int unicode_p = mrb_enc_unicode_p(enc); + int asciicompat = mrb_enc_asciicompat(mrb, enc); + + if (resenc == NULL) resenc = mrb_default_external_encoding(mrb); + if (!mrb_enc_asciicompat(mrb, resenc)) resenc = mrb_usascii_encoding(mrb); + mrb_enc_associate(mrb, result, resenc); + mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); //str_buf_cat2(result, "\""); +#else + mrb_value result = mrb_str_new_cstr(mrb, "\"");//mrb_str_buf_new2("\""); +#endif //INCLUDE_ENCODING + + p = RSTRING_PTR(str); pend = RSTRING_END(str); + prev = p; + while (p < pend) { + unsigned int c, cc; + int n; + +#ifdef INCLUDE_ENCODING + n = mrb_enc_precise_mbclen(p, pend, enc); + if (!MBCLEN_CHARFOUND_P(n)) { + if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev); + n = mrb_enc_mbminlen(enc); + if (pend < p + n) + n = (int)(pend - p); + while (n--) { + snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377); + mrb_str_buf_cat(mrb, result, buf, strlen(buf)); + prev = ++p; + } + continue; + } + n = MBCLEN_CHARFOUND_LEN(n); + c = mrb_enc_mbc_to_codepoint(p, pend, enc); + p += n; + if (c == '"'|| c == '\\' || + (c == '#' && + p < pend && + MBCLEN_CHARFOUND_P(mrb_enc_precise_mbclen(p,pend,enc)) && + (cc = mrb_enc_codepoint(mrb, p, pend, enc), + (cc == '$' || cc == '@' || cc == '{')))) { + if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev); + mrb_str_buf_cat(mrb, result, "\\", strlen("\\")); //str_buf_cat2(result, "\\"); + if (asciicompat || enc == resenc) { + prev = p - n; + continue; + } + } +#else + c = *p++; + n = 1; + if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) { + buf[0] = '\\'; buf[1] = c; + mrb_str_buf_cat(mrb, result, buf, 2); + continue; + } + if (ISPRINT(c)) { + buf[0] = c; + mrb_str_buf_cat(mrb, result, buf, 1); + continue; + } +#endif //INCLUDE_ENCODING + switch (c) { + case '\n': cc = 'n'; break; + case '\r': cc = 'r'; break; + case '\t': cc = 't'; break; + case '\f': cc = 'f'; break; + case '\013': cc = 'v'; break; + case '\010': cc = 'b'; break; + case '\007': cc = 'a'; break; + case 033: cc = 'e'; break; + default: cc = 0; break; + } + if (cc) { + if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev); + buf[0] = '\\'; + buf[1] = (char)cc; + mrb_str_buf_cat(mrb, result, buf, 2); + prev = p; + continue; + } +#ifdef INCLUDE_ENCODING + if ((enc == resenc && mrb_enc_isprint(c, enc)) || + (asciicompat && mrb_enc_isascii(c, enc) && ISPRINT(c))) { + continue; + } +#endif //INCLUDE_ENCODING + else { + if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev); +#ifdef INCLUDE_ENCODING + mrb_str_buf_cat_escaped_char(mrb, result, c, unicode_p); +#else + sprintf(buf, "\\%03o", c & 0377); + mrb_str_buf_cat(mrb, result, buf, strlen(buf)); +#endif //INCLUDE_ENCODING + prev = p; + continue; + } + } + if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev); + mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); //str_buf_cat2(result, "\""); + + //OBJ_INFECT(result, str); + return result; +} + +#ifdef INCLUDE_ENCODING +int +sym_printable(mrb_state *mrb, const char *s, const char *send, mrb_encoding *enc) +{ + while (s < send) { + int n; + int c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); + + if (!mrb_enc_isprint(c, enc)) return FALSE; + s += n; + } + return TRUE; +} +#endif //INCLUDE_ENCODING + +/* ---------------------------*/ +void +mrb_init_string(mrb_state *mrb) +{ + struct RClass *s; + + s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class); + MRB_SET_INSTANCE_TT(s, MRB_TT_STRING); + mrb_include_module(mrb, s, mrb_class_get(mrb, "Comparable")); + + mrb_define_method(mrb, s, "+", mrb_str_plus_m, ARGS_REQ(1)); /* 15.2.10.5.2 */ + mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, ARGS_NONE()); + mrb_define_method(mrb, s, "size", mrb_str_size, ARGS_NONE()); /* 15.2.10.5.33 */ + mrb_define_method(mrb, s, "length", mrb_str_size, ARGS_NONE()); /* 15.2.10.5.26 */ + mrb_define_method(mrb, s, "*", mrb_str_times, ARGS_REQ(1)); /* 15.2.10.5.1 */ + mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, ARGS_REQ(1)); /* 15.2.10.5.3 */ + mrb_define_method(mrb, s, "==", mrb_str_equal_m, ARGS_REQ(1)); /* 15.2.10.5.4 */ + mrb_define_method(mrb, s, "=~", mrb_str_match, ARGS_REQ(1)); /* 15.2.10.5.5 */ + mrb_define_method(mrb, s, "[]", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.6 */ + mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, ARGS_NONE()); /* 15.2.10.5.7 */ + mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, ARGS_REQ(1)); /* 15.2.10.5.8 */ + mrb_define_method(mrb, s, "chomp", mrb_str_chomp, ARGS_ANY()); /* 15.2.10.5.9 */ + mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, ARGS_ANY()); /* 15.2.10.5.10 */ + mrb_define_method(mrb, s, "chop", mrb_str_chop, ARGS_REQ(1)); /* 15.2.10.5.11 */ + mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, ARGS_REQ(1)); /* 15.2.10.5.12 */ + mrb_define_method(mrb, s, "downcase", mrb_str_downcase, ARGS_NONE()); /* 15.2.10.5.13 */ + mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, ARGS_NONE()); /* 15.2.10.5.14 */ + mrb_define_method(mrb, s, "each_line", mrb_str_each_line, ARGS_REQ(1)); /* 15.2.10.5.15 */ + mrb_define_method(mrb, s, "empty?", mrb_str_empty, ARGS_NONE()); /* 15.2.10.5.16 */ + mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */ +#ifdef INCLUDE_REGEXP + mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */ + mrb_define_method(mrb, s, "gsub!", mrb_str_gsub_bang, ARGS_REQ(1)); /* 15.2.10.5.19 */ +#endif + mrb_define_method(mrb, s, "hash", mrb_str_hash_m, ARGS_REQ(1)); /* 15.2.10.5.20 */ + mrb_define_method(mrb, s, "include?", mrb_str_include, ARGS_REQ(1)); /* 15.2.10.5.21 */ + mrb_define_method(mrb, s, "index", mrb_str_index_m, ARGS_ANY()); /* 15.2.10.5.22 */ + mrb_define_method(mrb, s, "initialize", mrb_str_init, ARGS_REQ(1)); /* 15.2.10.5.23 */ + mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.24 */ + mrb_define_method(mrb, s, "intern", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.25 */ +#ifdef INCLUDE_REGEXP + mrb_define_method(mrb, s, "match", mrb_str_match_m, ARGS_REQ(1)); /* 15.2.10.5.27 */ +#endif + mrb_define_method(mrb, s, "replace", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.28 */ + mrb_define_method(mrb, s, "reverse", mrb_str_reverse, ARGS_NONE()); /* 15.2.10.5.29 */ + mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, ARGS_NONE()); /* 15.2.10.5.30 */ + mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, ARGS_ANY()); /* 15.2.10.5.31 */ +#ifdef INCLUDE_REGEXP + mrb_define_method(mrb, s, "scan", mrb_str_scan, ARGS_REQ(1)); /* 15.2.10.5.32 */ +#endif + mrb_define_method(mrb, s, "slice", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.34 */ + mrb_define_method(mrb, s, "split", mrb_str_split_m, ARGS_ANY()); /* 15.2.10.5.35 */ +#ifdef INCLUDE_REGEXP + mrb_define_method(mrb, s, "sub", mrb_str_sub, ARGS_REQ(1)); /* 15.2.10.5.36 */ + mrb_define_method(mrb, s, "sub!", mrb_str_sub_bang, ARGS_REQ(1)); /* 15.2.10.5.37 */ +#endif + mrb_define_method(mrb, s, "to_i", mrb_str_to_i, ARGS_ANY()); /* 15.2.10.5.38 */ + mrb_define_method(mrb, s, "to_f", mrb_str_to_f, ARGS_NONE()); /* 15.2.10.5.39 */ + mrb_define_method(mrb, s, "to_s", mrb_str_to_s, ARGS_NONE()); /* 15.2.10.5.40 */ + mrb_define_method(mrb, s, "to_str", mrb_str_to_s, ARGS_NONE()); /* 15.2.10.5.40 */ + mrb_define_method(mrb, s, "to_sym", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.41 */ + mrb_define_method(mrb, s, "upcase", mrb_str_upcase, ARGS_REQ(1)); /* 15.2.10.5.42 */ + mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, ARGS_REQ(1)); /* 15.2.10.5.43 */ +#ifdef INCLUDE_ENCODING + mrb_define_method(mrb, s, "encoding", mrb_obj_encoding, ARGS_NONE()); /* 15.2.10.5.44(x) */ + mrb_define_method(mrb, s, "force_encoding", mrb_str_force_encoding, ARGS_REQ(1)); /* 15.2.10.5.45(x) */ +#endif + mrb_define_method(mrb, s, "inspect", mrb_str_inspect, ARGS_NONE()); /* 15.2.10.5.46(x) */ +} diff --git a/src/struct.c b/src/struct.c new file mode 100644 index 0000000000..9af6e2ee46 --- /dev/null +++ b/src/struct.c @@ -0,0 +1,824 @@ +/********************************************************************** + + struct.c - + + $Author: marcandre $ + created at: Tue Mar 22 18:44:30 JST 1995 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "mruby.h" +#include +#include "error.h" +#include "mruby/struct.h" +#include "mruby/array.h" +#include +//#include "defines.h" + +#ifdef INCLUDE_REGEXP +#include "encoding.h" +#endif +mrb_sym rb_frame_this_func(mrb_state *mrb); +mrb_sym mrb_frame_callee(mrb_state *mrb); +mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int), + mrb_value obj, mrb_value paired_obj, void* arg); + +#include "mruby/numeric.h" +#include "mruby/hash.h" +#include "mruby/string.h" +#include "mruby/class.h" +#include "variable.h" +#include "mruby/range.h" +#include "error.h" +//#include "defines.h" +#define mrb_long2int(n) ((int)(n)) + + +static mrb_value struct_alloc(mrb_state *mrb, mrb_value); + +static struct RClass * +struct_class(mrb_state *mrb) +{ + return mrb_class_get(mrb, "Struct"); +} + +static inline mrb_value +struct_ivar_get(mrb_state *mrb, mrb_value c, mrb_sym id) +{ + struct RClass* kclass; + struct RClass* sclass = struct_class(mrb); + + mrb_value ans; + for (;;) { + //if (mrb_ivar_defined(c, id)) + // return mrb_iv_get(mrb, c, id); + ans = mrb_iv_get(mrb, c, id); + if (!mrb_nil_p(ans)) return ans; + kclass = RCLASS_SUPER(c); + if (kclass == 0 || kclass == sclass) + return mrb_nil_value(); + c = mrb_obj_value(kclass); + } +} + +mrb_value +mrb_struct_iv_get(mrb_state *mrb, mrb_value c, const char *name) +{ + return struct_ivar_get(mrb, c, mrb_intern(mrb, name)); +} + +mrb_value +mrb_struct_s_members(mrb_state *mrb, mrb_value klass) +{ + mrb_value members = struct_ivar_get(mrb, klass, mrb_intern(mrb, "__members__")); + + if (mrb_nil_p(members)) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized struct"); + } + if (mrb_type(members) != MRB_TT_ARRAY) { + mrb_raise(mrb, E_TYPE_ERROR, "corrupted struct"); + } + return members; +} + +mrb_value +mrb_struct_members(mrb_state *mrb, mrb_value s) +{ + mrb_value members = mrb_struct_s_members(mrb, mrb_obj_value(mrb_obj_class(mrb, s))); + if (mrb_type(s) == MRB_TT_STRUCT) { + if (RSTRUCT_LEN(s) != RARRAY_LEN(members)) { + mrb_raise(mrb, E_TYPE_ERROR, "struct size differs (%ld required %ld given)", + RARRAY_LEN(members), RSTRUCT_LEN(s)); + } + } + return members; +} + +static mrb_value +mrb_struct_s_members_m(mrb_state *mrb, mrb_value klass) +{ + mrb_value members, ary; + mrb_value *p, *pend; + + members = mrb_struct_s_members(mrb, klass); + ary = mrb_ary_new_capa(mrb, RARRAY_LEN(members));//mrb_ary_new2(RARRAY_LEN(members)); + p = RARRAY_PTR(members); pend = p + RARRAY_LEN(members); + while (p < pend) { + mrb_ary_push(mrb, ary, *p); + p++; + } + + return ary; +} + +/* 15.2.18.4.6 */ +/* + * call-seq: + * struct.members -> array + * + * Returns an array of strings representing the names of the instance + * variables. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.members #=> [:name, :address, :zip] + */ + +static mrb_value +mrb_struct_members_m(mrb_state *mrb, mrb_value obj) +{ + return mrb_struct_s_members_m(mrb, mrb_obj_value(mrb_obj_class(mrb, obj))); +} + +mrb_value +mrb_struct_getmember(mrb_state *mrb, mrb_value obj, mrb_sym id) +{ + mrb_value members, slot, *ptr, *ptr_members; + long i, len; + + ptr = RSTRUCT_PTR(obj); + members = mrb_struct_members(mrb, obj); + ptr_members = RARRAY_PTR(members); + slot = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); + len = RARRAY_LEN(members); + for (i=0; itLAST_TOKEN) +#define is_local_id(id) (is_notop_id(id))//&&((id)&ID_SCOPE_MASK)==ID_LOCAL) +int +mrb_is_local_id(mrb_sym id) +{ + return is_local_id(id); +} + +#define is_const_id(id) (is_notop_id(id))//&&((id)&ID_SCOPE_MASK)==ID_CONST) +int +mrb_is_const_id(mrb_sym id) +{ + return is_const_id(id); +} + +static mrb_value +make_struct(mrb_state *mrb, mrb_value name, mrb_value members, struct RClass * klass) +{ + mrb_value nstr, *ptr_members; + mrb_sym id; + long i, len; + struct RClass *c; + + //OBJ_FREEZE(members); + if (mrb_nil_p(name)) { + c = mrb_class_new(mrb, klass); + //mrb_make_metaclass(nstr, RBASIC(klass)->c); + //mrb_class_inherited(klass, nstr); + } + else { + /* old style: should we warn? */ + name = mrb_str_to_str(mrb, name); + id = mrb_to_id(mrb, name); + if (!mrb_is_const_id(id)) { + //mrb_name_error(id, "identifier %s needs to be constant", StringValuePtr(name)); + mrb_name_error(mrb, id, "identifier %s needs to be constant", mrb_string_value_ptr(mrb, name)); + } + if (mrb_const_defined_at(mrb, klass, id)) { + //mrb_warn("redefining constant Struct::%s", StringValuePtr(name)); + mrb_warn("redefining constant Struct::%s", mrb_string_value_ptr(mrb, name)); + //?rb_mod_remove_const(klass, mrb_sym2name(mrb, id)); + } + c = mrb_define_class_under(mrb, klass, RSTRING_PTR(name), klass); + } + MRB_SET_INSTANCE_TT(c, MRB_TT_STRUCT); + nstr = mrb_obj_value(c); + mrb_iv_set(mrb, nstr, mrb_intern(mrb, "__members__"), members); + + mrb_define_class_method(mrb, c, "new", mrb_class_new_instance_m, ARGS_ANY()); + mrb_define_class_method(mrb, c, "[]", mrb_class_new_instance_m, ARGS_ANY()); + mrb_define_class_method(mrb, c, "members", mrb_struct_s_members_m, ARGS_NONE()); + //RSTRUCT(nstr)->basic.c->super = c->c; + ptr_members = RARRAY_PTR(members); + len = RARRAY_LEN(members); + for (i=0; i< len; i++) { + mrb_sym id = SYM2ID(ptr_members[i]); + if (mrb_is_local_id(id) || mrb_is_const_id(id)) { + if (i < N_REF_FUNC) { + mrb_define_method_id(mrb, c, id, (mrb_func_t)ref_func[i], 0); + } + else { + mrb_define_method_id(mrb, c, id, mrb_struct_ref, 0); + } + mrb_define_method_id(mrb, c, mrb_id_attrset(id), (mrb_func_t)mrb_struct_set, 1); + } + } + + return nstr; +} + +mrb_value +mrb_struct_define(mrb_state *mrb, const char *name, ...) +{ + va_list ar; + mrb_value nm, ary; + char *mem; + + if (!name) nm = mrb_nil_value(); + else nm = mrb_str_new2(mrb, name); + ary = mrb_ary_new(mrb); + + va_start(ar, name); + while ((mem = va_arg(ar, char*)) != 0) { + mrb_sym slot = mrb_intern(mrb, mem); + mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, mrb_sym2name(mrb, slot))); + } + va_end(ar); + + return make_struct(mrb, nm, ary, struct_class(mrb)); +} + +/* 15.2.18.3.1 */ +/* + * call-seq: + * Struct.new( [aString] [, aSym]+> ) -> StructClass + * StructClass.new(arg, ...) -> obj + * StructClass[arg, ...] -> obj + * + * Creates a new class, named by aString, containing accessor + * methods for the given symbols. If the name aString is + * omitted, an anonymous structure class will be created. Otherwise, + * the name of this struct will appear as a constant in class + * Struct, so it must be unique for all + * Structs in the system and should start with a capital + * letter. Assigning a structure class to a constant effectively gives + * the class the name of the constant. + * + * Struct::new returns a new Class object, + * which can then be used to create specific instances of the new + * structure. The number of actual parameters must be + * less than or equal to the number of attributes defined for this + * class; unset parameters default to nil. Passing too many + * parameters will raise an ArgumentError. + * + * The remaining methods listed in this section (class and instance) + * are defined for this generated class. + * + * # Create a structure with a name in Struct + * Struct.new("Customer", :name, :address) #=> Struct::Customer + * Struct::Customer.new("Dave", "123 Main") #=> # + * + * # Create a structure named by its constant + * Customer = Struct.new(:name, :address) #=> Customer + * Customer.new("Dave", "123 Main") #=> # + */ +static mrb_value +mrb_struct_s_def(mrb_state *mrb, mrb_value klass) +{ + mrb_value name, rest; + mrb_value *pargv; + int argcnt; + long i; + mrb_value b, st; + mrb_sym id; + mrb_value *argv; + int argc; + + name = mrb_nil_value(); + rest = mrb_nil_value(); + mrb_get_args(mrb, "&*", &b, &argv, &argc); + if (argc > 0) name = argv[0]; + if (argc > 1) rest = argv[1]; + //mrb_scan_args(argc, argv, "1*", &name, &rest); + if (mrb_type(rest) == MRB_TT_ARRAY) { + if (!mrb_nil_p(name) && SYMBOL_P(name)) { + /* 1stArgument:symbol -> name=nil rest=argv[0]-[n] */ + mrb_ary_unshift(mrb, rest, name); + name = mrb_nil_value(); + } + for (i=0; i name=nil rest=argv[0]-[n] */ + //mrb_ary_unshift(mrb, rest, name); + name = mrb_nil_value(); + pargv = &argv[0]; + argcnt++; + } + rest = mrb_ary_new_from_values(mrb, pargv, argcnt); + } + st = make_struct(mrb, name, rest, struct_class(mrb)); + if (!mrb_nil_p(b)) { + mrb_funcall(mrb, b, "call", 1, &st); + } + + return st; +} + +static long +num_members(mrb_state *mrb, struct RClass *klass) +{ + mrb_value members; + members = struct_ivar_get(mrb, mrb_obj_value(klass), mrb_intern(mrb, "__members__")); + if (mrb_type(members) != MRB_TT_ARRAY) { + mrb_raise(mrb, E_TYPE_ERROR, "broken members"); + } + return RARRAY_LEN(members); +} + +/* 15.2.18.4.8 */ +/* + */ +static mrb_value +mrb_struct_initialize_withArg(mrb_state *mrb, int argc, mrb_value *argv, mrb_value self) +{ + struct RClass *klass = mrb_obj_class(mrb, self); + long n; + struct RStruct *st; + + mrb_struct_modify(self); + n = num_members(mrb, klass); + if (n < argc) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "struct size differs"); + } + st = RSTRUCT(self); + st->ptr = malloc(sizeof(mrb_value)*argc); + st->len = n; + memcpy(st->ptr, argv, sizeof(mrb_value)*argc); + //if (n > argc) { + // mrb_mem_clear(RSTRUCT_PTR(self)+argc, n-argc); + //} + return self; +} + +static mrb_value +mrb_struct_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) +{ + mrb_value *argv; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return mrb_struct_initialize_withArg(mrb, argc, argv, self); +} + +mrb_value +mrb_struct_initialize(mrb_state *mrb, mrb_value self, mrb_value values) +{ + return mrb_struct_initialize_withArg(mrb, RARRAY_LEN/*INT*/(values), RARRAY_PTR(values), self); +} + +mrb_value +mrb_struct_alloc(mrb_state *mrb, mrb_value klass, mrb_value values) +{ + return mrb_class_new_instance(mrb, RARRAY_LEN(values), RARRAY_PTR(values), mrb_class(mrb, klass)); +} + +mrb_value +mrb_struct_new(mrb_state *mrb, struct RClass *klass, ...) +{ + mrb_value tmpargs[N_REF_FUNC], *mem = tmpargs; + int size, i; + va_list args; + + size = mrb_long2int(num_members(mrb, klass)); + if (size > numberof(tmpargs)) { + tmpargs[0] = mrb_ary_tmp_new(mrb, size); + mem = RARRAY_PTR(tmpargs[0]); + } + va_start(args, klass); + for (i=0; i"); + } + + members = mrb_struct_members(mrb, s); + ptr_members = RARRAY_PTR(members); + ptr = RSTRUCT_PTR(s); + len = RSTRUCT_LEN(s); + for (i=0; i 0) { + mrb_str_cat2(mrb, str, ", "); + } + else if (cn) { + mrb_str_cat2(mrb, str, " "); + } + slot = ptr_members[i]; + id = SYM2ID(slot); + if (mrb_is_local_id(id) || mrb_is_const_id(id)) { + //mrb_str_append(str, mrb_id2str(id)); + mrb_str_append(mrb, str, mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id))); + } + else { + mrb_str_append(mrb, str, mrb_inspect(mrb, slot)); + } + mrb_str_cat2(mrb, str, "="); + mrb_str_append(mrb, str, mrb_inspect(mrb, ptr[i])); + } + mrb_str_cat2(mrb, str, ">"); + //OBJ_INFECT(str, s); + + return str; +} + +/* + * call-seq: + * struct.to_s -> string + * struct.inspect -> string + * + * Describe the contents of this struct in a string. + */ +static mrb_value +mrb_struct_inspect(mrb_state *mrb, mrb_value s) +{ + return inspect_struct(mrb, s, s, 0); +} + +/* 15.2.18.4.9 */ +/* :nodoc: */ +mrb_value +mrb_struct_init_copy(mrb_state *mrb, mrb_value copy) +{ + mrb_value s; + mrb_get_args(mrb, "o", &s); + + if (mrb_obj_equal(mrb, copy, s)) return copy; + //mrb_check_frozen(copy); + if (!mrb_obj_is_instance_of(mrb, s, mrb_obj_class(mrb, copy))) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); + } + if (RSTRUCT_LEN(copy) != RSTRUCT_LEN(s)) { + mrb_raise(mrb, E_TYPE_ERROR, "struct size mismatch"); + } + memcpy(RSTRUCT_PTR(copy), RSTRUCT_PTR(s), sizeof(mrb_value)*RSTRUCT_LEN(copy)); + + return copy; +} + +static mrb_value +mrb_struct_aref_id(mrb_state *mrb, mrb_value s, mrb_sym id) +{ + mrb_value *ptr, members, *ptr_members; + long i, len; + + ptr = RSTRUCT_PTR(s); + members = mrb_struct_members(mrb, s); + ptr_members = RARRAY_PTR(members); + len = RARRAY_LEN(members); + for (i=0; i anObject + * struct[fixnum] -> anObject + * + * Attribute Reference---Returns the value of the instance variable + * named by symbol, or indexed (0..length-1) by + * fixnum. Will raise NameError if the named + * variable does not exist, or IndexError if the index is + * out of range. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * + * joe["name"] #=> "Joe Smith" + * joe[:name] #=> "Joe Smith" + * joe[0] #=> "Joe Smith" + */ +mrb_value +mrb_struct_aref_n(mrb_state *mrb, mrb_value s, mrb_value idx) +{ + long i; + + if (mrb_type(idx) == MRB_TT_STRING || mrb_type(idx) == MRB_TT_SYMBOL) { + return mrb_struct_aref_id(mrb, s, mrb_to_id(mrb, idx)); + } + + i = mrb_fixnum(idx); + if (i < 0) i = RSTRUCT_LEN(s) + i; + if (i < 0) + mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too small for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + if (RSTRUCT_LEN(s) <= i) + mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too large for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + return RSTRUCT_PTR(s)[i]; +} + +mrb_value +mrb_struct_aref(mrb_state *mrb, mrb_value s) +{ + mrb_value idx; + + mrb_get_args(mrb, "o", &idx); + return mrb_struct_aref_n(mrb, s, idx); +} + +static mrb_value +mrb_struct_aset_id(mrb_state *mrb, mrb_value s, mrb_sym id, mrb_value val) +{ + mrb_value members, *ptr, *ptr_members; + long i, len; + + members = mrb_struct_members(mrb, s); + len = RARRAY_LEN(members); + mrb_struct_modify(s); + if (RSTRUCT_LEN(s) != len) { + mrb_raise(mrb, E_TYPE_ERROR, "struct size differs (%ld required %ld given)", + len, RSTRUCT_LEN(s)); + } + ptr = RSTRUCT_PTR(s); + ptr_members = RARRAY_PTR(members); + for (i=0; i obj + * struct[fixnum] = obj -> obj + * + * Attribute Assignment---Assigns to the instance variable named by + * symbol or fixnum the value obj and + * returns it. Will raise a NameError if the named + * variable does not exist, or an IndexError if the index + * is out of range. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * + * joe["name"] = "Luke" + * joe[:zip] = "90210" + * + * joe.name #=> "Luke" + * joe.zip #=> "90210" + */ + +mrb_value +mrb_struct_aset(mrb_state *mrb, mrb_value s) +{ + long i; + mrb_value idx; + mrb_value val; + mrb_get_args(mrb, "oo", &idx, &val); + + if (mrb_type(idx) == MRB_TT_STRING || mrb_type(idx) == MRB_TT_SYMBOL) { + return mrb_struct_aset_id(mrb, s, mrb_to_id(mrb, idx), val); + } + + i = mrb_fixnum(idx); + if (i < 0) i = RSTRUCT_LEN(s) + i; + if (i < 0) { + mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too small for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + } + if (RSTRUCT_LEN(s) <= i) { + mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too large for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + } + mrb_struct_modify(s); + return RSTRUCT_PTR(s)[i] = val; +} + +static mrb_value +recursive_equal(mrb_state *mrb, mrb_value s, mrb_value s2, int recur) +{ + mrb_value *ptr, *ptr2; + long i, len; + + if (recur) return mrb_true_value(); /* Subtle! */ + ptr = RSTRUCT_PTR(s); + ptr2 = RSTRUCT_PTR(s2); + len = RSTRUCT_LEN(s); + for (i=0; i true or false + * + * Equality---Returns true if other_struct is + * equal to this one: they must be of the same class as generated by + * Struct::new, and the values of all instance variables + * must be equal (according to Object#==). + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joejr = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * jane = Customer.new("Jane Doe", "456 Elm, Anytown NC", 12345) + * joe == joejr #=> true + * joe == jane #=> false + */ + +static mrb_value +mrb_struct_equal(mrb_state *mrb, mrb_value s) +{ + mrb_value s2; + + mrb_get_args(mrb, "o", &s2); + if (mrb_obj_equal(mrb, s, s2)) return mrb_true_value(); + if (mrb_type(s2) != MRB_TT_STRUCT) return mrb_false_value(); + if (mrb_obj_class(mrb, s) != mrb_obj_class(mrb, s2)) return mrb_false_value(); + if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) { + mrb_bug("inconsistent struct"); /* should never happen */ + } + + return mrb_exec_recursive_paired(mrb, recursive_equal, s, s2, (void*)0); +} + +static mrb_value +recursive_eql(mrb_state *mrb, mrb_value s, mrb_value s2, int recur) +{ + mrb_value *ptr, *ptr2; + long i, len; + + if (recur) return mrb_true_value(); /* Subtle! */ + ptr = RSTRUCT_PTR(s); + ptr2 = RSTRUCT_PTR(s2); + len = RSTRUCT_LEN(s); + for (i=0; i true or false + * + * Two structures are equal if they are the same object, or if all their + * fields are equal (using eql?). + */ +static mrb_value +mrb_struct_eql(mrb_state *mrb, mrb_value s) +{ + mrb_value s2; + + mrb_get_args(mrb, "o", &s2); + if (mrb_obj_equal(mrb, s, s2)) return mrb_true_value(); + if (mrb_type(s2) != MRB_TT_STRUCT) return mrb_false_value(); + if (mrb_obj_class(mrb, s) != mrb_obj_class(mrb, s2)) return mrb_false_value(); + if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) { + mrb_bug("inconsistent struct"); /* should never happen */ + } + + return mrb_exec_recursive_paired(mrb, recursive_eql, s, s2, (void*)0); +} + +/* + * A Struct is a convenient way to bundle a number of + * attributes together, using accessor methods, without having to write + * an explicit class. + * + * The Struct class is a generator of specific classes, + * each one of which is defined to hold a set of variables and their + * accessors. In these examples, we'll call the generated class + * ``CustomerClass,'' and we'll show an example instance of that + * class as ``CustomerInst.'' + * + * In the descriptions that follow, the parameter symbol refers + * to a symbol, which is either a quoted string or a + * Symbol (such as :name). + */ +void +mrb_init_struct(mrb_state *mrb) +{ + struct RClass *st; + st = mrb_define_class(mrb, "Struct", mrb->object_class); + //mrb_include_module(mrb_cStruct, rb_mEnumerable); + + //mrb_undef_alloc_func(mrb_cStruct); + mrb_define_class_method(mrb, st, "new", mrb_struct_s_def, ARGS_ANY()); /* 15.2.18.3.1 */ + + mrb_define_method(mrb, st, "==", mrb_struct_equal, ARGS_REQ(1)); /* 15.2.18.4.1 */ + mrb_define_method(mrb, st, "[]", mrb_struct_aref, ARGS_REQ(1)); /* 15.2.18.4.2 */ + mrb_define_method(mrb, st, "[]=", mrb_struct_aset, ARGS_REQ(2)); /* 15.2.18.4.3 */ + mrb_define_method(mrb, st, "members", mrb_struct_members_m, ARGS_NONE()); /* 15.2.18.4.6 */ + mrb_define_method(mrb, st, "initialize", mrb_struct_initialize_m,ARGS_ANY()); /* 15.2.18.4.8 */ + mrb_define_method(mrb, st, "initialize_copy", mrb_struct_init_copy, ARGS_REQ(1)); /* 15.2.18.4.9 */ + mrb_define_method(mrb, st, "inspect", mrb_struct_inspect, ARGS_NONE()); /* 15.2.18.4.10(x) */ + mrb_define_alias(mrb, st, "to_s", "inspect"); /* 15.2.18.4.11(x) */ + mrb_define_method(mrb, st, "eql?", mrb_struct_eql, ARGS_REQ(1)); /* 15.2.18.4.12(x) */ + +} diff --git a/src/symbol.c b/src/symbol.c new file mode 100644 index 0000000000..ad3400834f --- /dev/null +++ b/src/symbol.c @@ -0,0 +1,255 @@ +#include "mruby.h" +#include "ritehash.h" +#include + +#include +#include +#include "mruby/string.h" +#include +#include "mruby/class.h" +#include "variable.h" +#include + +#ifdef INCLUDE_REGEXP +#include "re.h" +#include "regex.h" +#include "st.h" +#endif + +/* ------------------------------------------------------ */ +KHASH_MAP_INIT_INT(s2n, const char*); +KHASH_MAP_INIT_STR(n2s, mrb_sym); +/* ------------------------------------------------------ */ +mrb_sym +mrb_intern(mrb_state *mrb, const char *name) +{ + khash_t(n2s) *h = mrb->name2sym; + khash_t(s2n) *rh = mrb->sym2name; + khiter_t k; + int r; + size_t len; + char *p; + mrb_sym sym; + + k = kh_get(n2s, h, name); + if (k != kh_end(h)) + return kh_value(h, k); + + sym = ++mrb->symidx; + len = strlen(name); + p = mrb_malloc(mrb, len+1); + memcpy(p, name, len); + p[len] = 0; + k = kh_put(n2s, h, p, &r); + kh_value(h, k) = sym; + + k = kh_put(s2n, rh, sym, &r); + kh_value(rh, k) = p; + + return sym; +} + +const char* +mrb_sym2name(mrb_state *mrb, mrb_sym sym) +{ + khash_t(s2n) *h = mrb->sym2name; + khiter_t k; + + k = kh_get(s2n, h, sym); + if (k == kh_end(h)) { + return NULL; /* missing */ + } + return kh_value(h, k); +} + +void +mrb_free_symtbls(mrb_state *mrb) +{ + khash_t(s2n) *h = mrb->sym2name; + khiter_t k; + + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) mrb_free(mrb, (char*)kh_value(h, k)); + kh_destroy(s2n,mrb->sym2name); + kh_destroy(n2s,mrb->name2sym); +} + +void +mrb_init_symtbl(mrb_state *mrb) +{ + mrb->name2sym = kh_init(n2s, mrb); + mrb->sym2name = kh_init(s2n, mrb); +} + +/********************************************************************** + * Document-class: Symbol + * + * Symbol objects represent names and some strings + * inside the Ruby + * interpreter. They are generated using the :name and + * :"string" literals + * syntax, and by the various to_sym methods. The same + * Symbol object will be created for a given name or string + * for the duration of a program's execution, regardless of the context + * or meaning of that name. Thus if Fred is a constant in + * one context, a method in another, and a class in a third, the + * Symbol :Fred will be the same object in + * all three contexts. + * + * module One + * class Fred + * end + * $f1 = :Fred + * end + * module Two + * Fred = 1 + * $f2 = :Fred + * end + * def Fred() + * end + * $f3 = :Fred + * $f1.object_id #=> 2514190 + * $f2.object_id #=> 2514190 + * $f3.object_id #=> 2514190 + * + */ + + +/* 15.2.11.3.1 */ +/* + * call-seq: + * sym == obj -> true or false + * + * Equality---If sym and obj are exactly the same + * symbol, returns true. + */ + +static mrb_value +sym_equal(mrb_state *mrb, mrb_value sym1) +{ + mrb_value sym2; + + mrb_get_args(mrb, "o", &sym2); + if (mrb_obj_equal(mrb, sym1, sym2)) return mrb_true_value(); + return mrb_false_value(); +} + +/* 15.2.11.3.2 */ +/* 15.2.11.3.3 */ +/* + * call-seq: + * sym.id2name -> string + * sym.to_s -> string + * + * Returns the name or string corresponding to sym. + * + * :fred.id2name #=> "fred" + */ +mrb_value +mrb_sym_to_s(mrb_state *mrb, mrb_value sym) +{ + mrb_sym id = SYM2ID(sym); + +#ifdef INCLUDE_REGEXP + //return str_new3(mrb_cString, mrb_id2str(id)); + return str_new3(mrb, mrb_obj_class(mrb, sym), mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id))); +#else + return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); //mrb_str_new2(mrb_id2name(SYM2ID(sym))); +#endif + +} + +/* 15.2.11.3.4 */ +/* + * call-seq: + * sym.to_sym -> sym + * sym.intern -> sym + * + * In general, to_sym returns the Symbol corresponding + * to an object. As sym is already a symbol, self is returned + * in this case. + */ + +static mrb_value +sym_to_sym(mrb_state *mrb, mrb_value sym) +{ + return sym; +} + +/* 15.2.11.3.5(x) */ +/* + * call-seq: + * sym.inspect -> string + * + * Returns the representation of sym as a symbol literal. + * + * :fred.inspect #=> ":fred" + */ + +static mrb_value +sym_inspect(mrb_state *mrb, mrb_value sym) +{ +#ifdef INCLUDE_ENCODING + #define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str)) + mrb_value str; + mrb_sym id = SYM2ID(sym); + mrb_encoding *enc; + const char *ptr; + long len; + char *dest; + mrb_encoding *resenc = mrb_default_internal_encoding(mrb); + + if (resenc == NULL) resenc = mrb_default_external_encoding(mrb); + sym = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));//mrb_id2str(id); + enc = STR_ENC_GET(mrb, sym); + ptr = RSTRING_PTR(sym); + len = RSTRING_LEN(sym); + if ((resenc != enc && !mrb_str_is_ascii_only_p(mrb, sym)) || len != (long)strlen(ptr) || + !mrb_enc_symname_p(ptr, enc) || !sym_printable(mrb, ptr, ptr + len, enc)) { + str = mrb_str_inspect(mrb, sym); + len = RSTRING_LEN(str); + mrb_str_resize(mrb, str, len + 1); + dest = RSTRING_PTR(str); + memmove(dest + 1, dest, len); + dest[0] = ':'; + } + else { + char *dest; + str = mrb_enc_str_new(mrb, 0, len + 1, enc); + dest = RSTRING_PTR(str); + dest[0] = ':'; + memcpy(dest + 1, ptr, len); + } + return str; +#else + mrb_value str; + const char *name; + mrb_sym id = SYM2ID(sym); + + name = mrb_sym2name(mrb, id); //mrb_id2name(id); + str = mrb_str_new(mrb, 0, strlen(name)+1); + RSTRING(str)->buf[0] = ':'; + strcpy(RSTRING(str)->buf+1, name); + if (!mrb_symname_p(name)) { + str = mrb_str_dump(mrb, str); + strncpy(RSTRING(str)->buf, ":\"", 2); + } + return str; +#endif +} + + +void +mrb_init_symbols(mrb_state *mrb) +{ + struct RClass *sym; + + sym = mrb->symbol_class = mrb_define_class(mrb, "Symbol", mrb->object_class); + + mrb_define_method(mrb, sym, "===", sym_equal, ARGS_REQ(1)); /* 15.2.11.3.1 */ + mrb_define_method(mrb, sym, "id2name", mrb_sym_to_s, ARGS_NONE()); /* 15.2.11.3.2 */ + mrb_define_method(mrb, sym, "to_s", mrb_sym_to_s, ARGS_NONE()); /* 15.2.11.3.3 */ + mrb_define_method(mrb, sym, "to_sym", sym_to_sym, ARGS_NONE()); /* 15.2.11.3.4 */ + + mrb_define_method(mrb, sym, "inspect", sym_inspect, ARGS_NONE()); /* 15.2.11.3.5(x) */ +} diff --git a/src/transcode.c b/src/transcode.c new file mode 100644 index 0000000000..cb2f867318 --- /dev/null +++ b/src/transcode.c @@ -0,0 +1,4366 @@ +/********************************************************************** + + transcode.c - + + $Author: usa $ + created at: Tue Oct 30 16:10:22 JST 2007 + + Copyright (C) 2007 Martin Duerst + +**********************************************************************/ + +#include "mruby.h" +#ifdef INCLUDE_ENCODING +#include "encoding.h" +#include /* for ssize_t */ +#include "transcode_data.h" +#include +#include "st.h" +#include "variable.h" +#include +#include "mruby/string.h" +#include "mruby/array.h" +#include "mruby/hash.h" +#include "error.h" +#include "mruby/numeric.h" +//#include "mio.h" +#include + + +#define TYPE(o) (o).tt//mrb_type(o) + +#define E_CONVERTERNOTFOUND_ERROR (mrb_class_obj_get(mrb, "ConverterNotFoundError")) +#define E_INVALIDBYTESEQUENCE_ERROR (mrb_class_obj_get(mrb, "InvalidByteSequenceError")) +#define E_UNDEFINEDCONVERSION_ERROR (mrb_class_obj_get(mrb, "UndefinedConversionError")) + +/* mrb_value mrb_cEncoding = rb_define_class("Encoding", rb_cObject); */ +mrb_value rb_eUndefinedConversionError; +mrb_value mrb_eInvalidByteSequenceError; +mrb_value rb_eConverterNotFoundError; + +mrb_value mrb_cEncodingConverter; + +static mrb_value sym_invalid, sym_undef, sym_replace, sym_fallback; +static mrb_value sym_xml, sym_text, sym_attr; +static mrb_value sym_universal_newline; +static mrb_value sym_crlf_newline; +static mrb_value sym_cr_newline; +static mrb_value sym_partial_input; + +static mrb_value sym_invalid_byte_sequence; +static mrb_value sym_undefined_conversion; +static mrb_value sym_destination_buffer_full; +static mrb_value sym_source_buffer_empty; +static mrb_value sym_finished; +static mrb_value sym_after_output; +static mrb_value sym_incomplete_input; + +static unsigned char * +allocate_converted_string(mrb_state *mrb, + const char *sname, const char *dname, + const unsigned char *str, size_t len, + unsigned char *caller_dst_buf, size_t caller_dst_bufsize, + size_t *dst_len_ptr); + +/* dynamic structure, one per conversion (similar to iconv_t) */ +/* may carry conversion state (e.g. for iso-2022-jp) */ +typedef struct mrb_transcoding { + const mrb_transcoder *transcoder; + + int flags; + + int resume_position; + unsigned int next_table; + mrb_value next_info; + unsigned char next_byte; + unsigned int output_index; + + ssize_t recognized_len; /* already interpreted */ + ssize_t readagain_len; /* not yet interpreted */ + union { + unsigned char ary[8]; /* max_input <= sizeof(ary) */ + unsigned char *ptr; /* length: max_input */ + } readbuf; /* recognized_len + readagain_len used */ + + ssize_t writebuf_off; + ssize_t writebuf_len; + union { + unsigned char ary[8]; /* max_output <= sizeof(ary) */ + unsigned char *ptr; /* length: max_output */ + } writebuf; + + union mrb_transcoding_state_t { /* opaque data for stateful encoding */ + void *ptr; + char ary[sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*)]; + double dummy_for_alignment; + } state; +} mrb_transcoding; +#define TRANSCODING_READBUF(tc) \ + ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \ + (tc)->readbuf.ary : \ + (tc)->readbuf.ptr) +#define TRANSCODING_WRITEBUF(tc) \ + ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \ + (tc)->writebuf.ary : \ + (tc)->writebuf.ptr) +#define TRANSCODING_WRITEBUF_SIZE(tc) \ + ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \ + sizeof((tc)->writebuf.ary) : \ + (size_t)(tc)->transcoder->max_output) +#define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union mrb_transcoding_state_t)) +#define TRANSCODING_STATE(tc) \ + ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \ + (tc)->state.ary : \ + (tc)->state.ptr) + +typedef struct { + struct mrb_transcoding *tc; + unsigned char *out_buf_start; + unsigned char *out_data_start; + unsigned char *out_data_end; + unsigned char *out_buf_end; + mrb_econv_result_t last_result; +} mrb_econv_elem_t; + +struct mrb_econv_t { + int flags; + const char *source_encoding_name; + const char *destination_encoding_name; + + int started; + + const unsigned char *replacement_str; + size_t replacement_len; + const char *replacement_enc; + int replacement_allocated; + + unsigned char *in_buf_start; + unsigned char *in_data_start; + unsigned char *in_data_end; + unsigned char *in_buf_end; + mrb_econv_elem_t *elems; + int num_allocated; + int num_trans; + int num_finished; + struct mrb_transcoding *last_tc; + + /* last error */ + struct { + mrb_econv_result_t result; + struct mrb_transcoding *error_tc; + const char *source_encoding; + const char *destination_encoding; + const unsigned char *error_bytes_start; + size_t error_bytes_len; + size_t readagain_len; + } last_error; + + /* The following fields are only for Encoding::Converter. + * mrb_econv_open set them NULL. */ + mrb_encoding *source_encoding; + mrb_encoding *destination_encoding; +}; + +/* + * Dispatch data and logic + */ + +#define DECORATOR_P(sname, dname) (*(sname) == '\0') + +typedef struct { + const char *sname; + const char *dname; + const char *lib; /* null means means no need to load a library */ + const mrb_transcoder *transcoder; +} transcoder_entry_t; + +static st_table *transcoder_table; + +static transcoder_entry_t * +make_transcoder_entry(const char *sname, const char *dname) +{ + st_data_t val; + st_table *table2; + + if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) { + val = (st_data_t)st_init_strcasetable(); + st_add_direct(transcoder_table, (st_data_t)sname, val); + } + table2 = (st_table *)val; + if (!st_lookup(table2, (st_data_t)dname, &val)) { + transcoder_entry_t *entry = malloc(sizeof(transcoder_entry_t)); + entry->sname = sname; + entry->dname = dname; + entry->lib = NULL; + entry->transcoder = NULL; + val = (st_data_t)entry; + st_add_direct(table2, (st_data_t)dname, val); + } + return (transcoder_entry_t *)val; +} + +static transcoder_entry_t * +get_transcoder_entry(const char *sname, const char *dname) +{ + st_data_t val; + st_table *table2; + + if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) { + return NULL; + } + table2 = (st_table *)val; + if (!st_lookup(table2, (st_data_t)dname, &val)) { + return NULL; + } + return (transcoder_entry_t *)val; +} + +void +mrb_register_transcoder(mrb_state *mrb, const mrb_transcoder *tr) +{ + const char *const sname = tr->src_encoding; + const char *const dname = tr->dst_encoding; + + transcoder_entry_t *entry; + + entry = make_transcoder_entry(sname, dname); + if (entry->transcoder) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "transcoder from %s to %s has been already registered", + sname, dname); + } + + entry->transcoder = tr; +} + +static void +declare_transcoder(const char *sname, const char *dname, const char *lib) +{ + transcoder_entry_t *entry; + + entry = make_transcoder_entry(sname, dname); + entry->lib = lib; +} + +#define MAX_TRANSCODER_LIBNAME_LEN 64 +static const char transcoder_lib_prefix[] = "enc/trans/"; + +void +mrb_declare_transcoder(mrb_state *mrb, const char *enc1, const char *enc2, const char *lib) +{ + if (!lib || strlen(lib) > MAX_TRANSCODER_LIBNAME_LEN) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid library name - %s", + lib ? lib : "(null)"); + } + declare_transcoder(enc1, enc2, lib); +} + +#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0) + +typedef struct search_path_queue_tag { + struct search_path_queue_tag *next; + const char *enc; +} search_path_queue_t; + +typedef struct { + st_table *visited; + search_path_queue_t *queue; + search_path_queue_t **queue_last_ptr; + const char *base_enc; +} search_path_bfs_t; + +static int +transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg) +{ + const char *dname = (const char *)key; + search_path_bfs_t *bfs = (search_path_bfs_t *)arg; + search_path_queue_t *q; + + if (st_lookup(bfs->visited, (st_data_t)dname, &val)) { + return ST_CONTINUE; + } + + q = malloc(sizeof(search_path_queue_t)); + q->enc = dname; + q->next = NULL; + *bfs->queue_last_ptr = q; + bfs->queue_last_ptr = &q->next; + + st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc); + return ST_CONTINUE; +} + +static int +transcode_search_path(mrb_state *mrb, const char *sname, const char *dname, + void (*callback)(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg), + void *arg) +{ + search_path_bfs_t bfs; + search_path_queue_t *q; + st_data_t val; + st_table *table2; + int found; + int pathlen = -1; + + if (encoding_equal(sname, dname)) + return -1; + + q = malloc(sizeof(search_path_queue_t));//ALLOC(search_path_queue_t); + q->enc = sname; + q->next = NULL; + bfs.queue_last_ptr = &q->next; + bfs.queue = q; + + bfs.visited = st_init_strcasetable(); + st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL); + + while (bfs.queue) { + q = bfs.queue; + bfs.queue = q->next; + if (!bfs.queue) + bfs.queue_last_ptr = &bfs.queue; + + if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) { + xfree(q); + continue; + } + table2 = (st_table *)val; + + if (st_lookup(table2, (st_data_t)dname, &val)) { + st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc); + xfree(q); + found = 1; + goto cleanup; + } + + bfs.base_enc = q->enc; + st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs); + bfs.base_enc = NULL; + + xfree(q); + } + found = 0; + + cleanup: + while (bfs.queue) { + q = bfs.queue; + bfs.queue = q->next; + xfree(q); + } + + if (found) { + const char *enc = dname; + int depth; + pathlen = 0; + while (1) { + st_lookup(bfs.visited, (st_data_t)enc, &val); + if (!val) + break; + pathlen++; + enc = (const char *)val; + } + depth = pathlen; + enc = dname; + while (1) { + st_lookup(bfs.visited, (st_data_t)enc, &val); + if (!val) + break; + callback(mrb, (const char *)val, enc, --depth, arg); + enc = (const char *)val; + } + } + + st_free_table(bfs.visited); + + return pathlen; /* is -1 if not found */ +} + +int +mrb_require(mrb_state *mrb, const char *fname) +{ + //mrb_value fn = mrb_str_new2(mrb, fname); + //OBJ_FREEZE(fn); + //return mrb_require_safe(fn, mrb_safe_level()); + mrb_str_new2(mrb, fname); + return 1/* OK */; +} + +static const mrb_transcoder * +load_transcoder_entry(mrb_state *mrb, transcoder_entry_t *entry) +{ + if (entry->transcoder) + return entry->transcoder; + + if (entry->lib) { + const char *lib = entry->lib; + size_t len = strlen(lib); + char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN]; + + entry->lib = NULL; + + if (len > MAX_TRANSCODER_LIBNAME_LEN) + return NULL; + memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1); + memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1); + if (!mrb_require(mrb, path)) + return NULL; + } + + if (entry->transcoder) + return entry->transcoder; + + return NULL; +} + +static const char* +get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr) +{ + if (encoding_equal(encname, "UTF-8")) { + *len_ret = 3; + *repl_encname_ptr = "UTF-8"; + return "\xEF\xBF\xBD"; + } + else { + *len_ret = 1; + *repl_encname_ptr = "US-ASCII"; + return "?"; + } +} + +/* + * Transcoding engine logic + */ + +static const unsigned char * +transcode_char_start(mrb_transcoding *tc, + const unsigned char *in_start, + const unsigned char *inchar_start, + const unsigned char *in_p, + size_t *char_len_ptr) +{ + const unsigned char *ptr; + if (inchar_start - in_start < tc->recognized_len) { + memcpy(TRANSCODING_READBUF(tc) + tc->recognized_len, + inchar_start, in_p - inchar_start); + ptr = TRANSCODING_READBUF(tc); + } + else { + ptr = inchar_start - tc->recognized_len; + } + *char_len_ptr = tc->recognized_len + (in_p - inchar_start); + return ptr; +} + +static mrb_econv_result_t +transcode_restartable0(mrb_state *mrb, + const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + mrb_transcoding *tc, + const int opt) +{ + const mrb_transcoder *tr = tc->transcoder; + int unitlen = tr->input_unit_length; + ssize_t readagain_len = 0; + + const unsigned char *inchar_start; + const unsigned char *in_p; + + unsigned char *out_p; + + in_p = inchar_start = *in_pos; + + out_p = *out_pos; + +#define SUSPEND(ret, num) \ + do { \ + tc->resume_position = (num); \ + if (0 < in_p - inchar_start) \ + memmove(TRANSCODING_READBUF(tc)+tc->recognized_len, \ + inchar_start, in_p - inchar_start); \ + *in_pos = in_p; \ + *out_pos = out_p; \ + tc->recognized_len += in_p - inchar_start; \ + if (readagain_len) { \ + tc->recognized_len -= readagain_len; \ + tc->readagain_len = readagain_len; \ + } \ + return ret; \ + resume_label ## num:; \ + } while (0) +#define SUSPEND_OBUF(num) \ + do { \ + while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \ + } while (0) + +#define SUSPEND_AFTER_OUTPUT(num) \ + if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \ + SUSPEND(econv_after_output, num); \ + } + +#define next_table (tc->next_table) +#define next_info (tc->next_info) +#define next_byte (tc->next_byte) +#define writebuf_len (tc->writebuf_len) +#define writebuf_off (tc->writebuf_off) + + switch (tc->resume_position) { + case 0: break; + case 1: goto resume_label1; + case 2: goto resume_label2; + case 3: goto resume_label3; + case 4: goto resume_label4; + case 5: goto resume_label5; + case 6: goto resume_label6; + case 7: goto resume_label7; + case 8: goto resume_label8; + case 9: goto resume_label9; + case 10: goto resume_label10; + case 11: goto resume_label11; + case 12: goto resume_label12; + case 13: goto resume_label13; + case 14: goto resume_label14; + case 15: goto resume_label15; + case 16: goto resume_label16; + case 17: goto resume_label17; + case 18: goto resume_label18; + case 19: goto resume_label19; + case 20: goto resume_label20; + case 21: goto resume_label21; + case 22: goto resume_label22; + case 23: goto resume_label23; + case 24: goto resume_label24; + case 25: goto resume_label25; + case 26: goto resume_label26; + case 27: goto resume_label27; + case 28: goto resume_label28; + case 29: goto resume_label29; + case 30: goto resume_label30; + case 31: goto resume_label31; + case 32: goto resume_label32; + case 33: goto resume_label33; + case 34: goto resume_label34; + } + + while (1) { + inchar_start = in_p; + tc->recognized_len = 0; + next_table = tr->conv_tree_start; + + SUSPEND_AFTER_OUTPUT(24); + + if (in_stop <= in_p) { + if (!(opt & ECONV_PARTIAL_INPUT)) + break; + SUSPEND(econv_source_buffer_empty, 7); + continue; + } + +#define BYTE_ADDR(index) (tr->byte_array + (index)) +#define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index)) +#define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table))) +#define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table))) +#define BL_MIN_BYTE (BL_BASE[0]) +#define BL_MAX_BYTE (BL_BASE[1]) +#define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE]) +#define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))]) + + next_byte = (unsigned char)*in_p++; + follow_byte: + if (next_byte < BL_MIN_BYTE || BL_MAX_BYTE < next_byte) + next_info = mrb_fixnum_value(INVALID); + else { + next_info = mrb_fixnum_value(BL_ACTION(next_byte)); + } + follow_info: + switch (mrb_fixnum(next_info) & 0x1F) { + case NOMAP: + { + const unsigned char *p = inchar_start; + writebuf_off = 0; + while (p < in_p) { + TRANSCODING_WRITEBUF(tc)[writebuf_off++] = (unsigned char)*p++; + } + writebuf_len = writebuf_off; + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(3); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + continue; + case 0x00: case 0x04: case 0x08: case 0x0C: + case 0x10: case 0x14: case 0x18: case 0x1C: + SUSPEND_AFTER_OUTPUT(25); + while (in_p >= in_stop) { + if (!(opt & ECONV_PARTIAL_INPUT)) + goto incomplete; + SUSPEND(econv_source_buffer_empty, 5); + } + next_byte = (unsigned char)*in_p++; + next_table = (unsigned int)mrb_fixnum(next_info); + goto follow_byte; + case ZERObt: /* drop input */ + continue; + case ONEbt: + SUSPEND_OBUF(9); *out_p++ = getBT1(mrb_fixnum(next_info)); + continue; + case TWObt: + SUSPEND_OBUF(10); *out_p++ = getBT1(mrb_fixnum(next_info)); + SUSPEND_OBUF(21); *out_p++ = getBT2(mrb_fixnum(next_info)); + continue; + case THREEbt: + SUSPEND_OBUF(11); *out_p++ = getBT1(mrb_fixnum(next_info)); + SUSPEND_OBUF(15); *out_p++ = getBT2(mrb_fixnum(next_info)); + SUSPEND_OBUF(16); *out_p++ = getBT3(mrb_fixnum(next_info)); + continue; + case FOURbt: + SUSPEND_OBUF(12); *out_p++ = getBT0(mrb_fixnum(next_info)); + SUSPEND_OBUF(17); *out_p++ = getBT1(mrb_fixnum(next_info)); + SUSPEND_OBUF(18); *out_p++ = getBT2(mrb_fixnum(next_info)); + SUSPEND_OBUF(19); *out_p++ = getBT3(mrb_fixnum(next_info)); + continue; + case GB4bt: + SUSPEND_OBUF(29); *out_p++ = getGB4bt0((unsigned char)mrb_fixnum(next_info)); + SUSPEND_OBUF(30); *out_p++ = getGB4bt1((mrb_fixnum(next_info))); + SUSPEND_OBUF(31); *out_p++ = getGB4bt2((unsigned char)mrb_fixnum(next_info)); + SUSPEND_OBUF(32); *out_p++ = getGB4bt3(mrb_fixnum(next_info)); + continue; + case STR1: + tc->output_index = 0; + while (tc->output_index < STR1_LENGTH(BYTE_ADDR(STR1_BYTEINDEX(mrb_fixnum(next_info))))) { + SUSPEND_OBUF(28); *out_p++ = BYTE_ADDR(STR1_BYTEINDEX(mrb_fixnum(next_info)))[1+tc->output_index]; + tc->output_index++; + } + continue; + case FUNii: + next_info = (mrb_value)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info); + goto follow_info; + case FUNsi: + { + const unsigned char *char_start; + size_t char_len; + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + next_info = (mrb_value)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len); + goto follow_info; + } + case FUNio: + SUSPEND_OBUF(13); + if (tr->max_output <= out_stop - out_p) + out_p += tr->func_io(TRANSCODING_STATE(tc), + next_info, out_p, out_stop - out_p); + else { + writebuf_len = tr->func_io(TRANSCODING_STATE(tc), + next_info, + TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc)); + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(20); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + break; + case FUNso: + { + const unsigned char *char_start; + size_t char_len; + SUSPEND_OBUF(14); + if (tr->max_output <= out_stop - out_p) { + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + out_p += tr->func_so(TRANSCODING_STATE(tc), + char_start, (size_t)char_len, + out_p, out_stop - out_p); + } + else { + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + writebuf_len = tr->func_so(TRANSCODING_STATE(tc), + char_start, (size_t)char_len, + TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc)); + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(22); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + break; + } + case FUNsio: + { + const unsigned char *char_start; + size_t char_len; + SUSPEND_OBUF(33); + if (tr->max_output <= out_stop - out_p) { + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + out_p += tr->func_sio(TRANSCODING_STATE(tc), + char_start, (size_t)char_len, next_info, + out_p, out_stop - out_p); + } + else { + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + writebuf_len = tr->func_sio(TRANSCODING_STATE(tc), + char_start, (size_t)char_len, next_info, + TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc)); + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(34); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + break; + } + case INVALID: + if (tc->recognized_len + (in_p - inchar_start) <= unitlen) { + if (tc->recognized_len + (in_p - inchar_start) < unitlen) + SUSPEND_AFTER_OUTPUT(26); + while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) { + in_p = in_stop; + SUSPEND(econv_source_buffer_empty, 8); + } + if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) { + in_p = in_stop; + } + else { + in_p = inchar_start + (unitlen - tc->recognized_len); + } + } + else { + ssize_t invalid_len; /* including the last byte which causes invalid */ + ssize_t discard_len; + invalid_len = tc->recognized_len + (in_p - inchar_start); + discard_len = ((invalid_len - 1) / unitlen) * unitlen; + readagain_len = invalid_len - discard_len; + } + goto invalid; + case UNDEF: + goto undef; + default: + mrb_raise(mrb, mrb->eRuntimeError_class, "unknown transcoding instruction"); + } + continue; + + invalid: + SUSPEND(econv_invalid_byte_sequence, 1); + continue; + + incomplete: + SUSPEND(econv_incomplete_input, 27); + continue; + + undef: + SUSPEND(econv_undefined_conversion, 2); + continue; + } + + /* cleanup */ + if (tr->finish_func) { + SUSPEND_OBUF(4); + if (tr->max_output <= out_stop - out_p) { + out_p += tr->finish_func(TRANSCODING_STATE(tc), + out_p, out_stop - out_p); + } + else { + writebuf_len = tr->finish_func(TRANSCODING_STATE(tc), + TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc)); + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(23); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + } + while (1) + SUSPEND(econv_finished, 6); +#undef SUSPEND +#undef next_table +#undef next_info +#undef next_byte +#undef writebuf_len +#undef writebuf_off +} + +static mrb_econv_result_t +transcode_restartable(mrb_state *mrb, + const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + mrb_transcoding *tc, + const int opt) +{ + if (tc->readagain_len) { + unsigned char *readagain_buf = malloc(tc->readagain_len);//ALLOCA_N(unsigned char, tc->readagain_len); + const unsigned char *readagain_pos = readagain_buf; + const unsigned char *readagain_stop = readagain_buf + tc->readagain_len; + mrb_econv_result_t res; + + memcpy(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len, + tc->readagain_len); + tc->readagain_len = 0; + res = transcode_restartable0(mrb, &readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT); + if (res != econv_source_buffer_empty) { + memcpy(TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len, + readagain_pos, readagain_stop - readagain_pos); + tc->readagain_len += readagain_stop - readagain_pos; + return res; + } + } + return transcode_restartable0(mrb, in_pos, out_pos, in_stop, out_stop, tc, opt); +} + +static mrb_transcoding * +mrb_transcoding_open_by_transcoder(const mrb_transcoder *tr, int flags) +{ + mrb_transcoding *tc; + + tc = malloc(sizeof(mrb_transcoding)); + tc->transcoder = tr; + tc->flags = flags; + if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) + tc->state.ptr = xmalloc(tr->state_size); + if (tr->state_init_func) { + (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */ + } + tc->resume_position = 0; + tc->recognized_len = 0; + tc->readagain_len = 0; + tc->writebuf_len = 0; + tc->writebuf_off = 0; + if ((int)sizeof(tc->readbuf.ary) < tr->max_input) { + tc->readbuf.ptr = xmalloc(tr->max_input); + } + if ((int)sizeof(tc->writebuf.ary) < tr->max_output) { + tc->writebuf.ptr = xmalloc(tr->max_output); + } + return tc; +} + +static mrb_econv_result_t +mrb_transcoding_convert(mrb_state *mrb, mrb_transcoding *tc, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + return transcode_restartable(mrb, + input_ptr, output_ptr, + input_stop, output_stop, + tc, flags); +} + +static void +mrb_transcoding_close(mrb_transcoding *tc) +{ + const mrb_transcoder *tr = tc->transcoder; + if (tr->state_fini_func) { + (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */ + } + if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) + xfree(tc->state.ptr); + if ((int)sizeof(tc->readbuf.ary) < tr->max_input) + xfree(tc->readbuf.ptr); + if ((int)sizeof(tc->writebuf.ary) < tr->max_output) + xfree(tc->writebuf.ptr); + xfree(tc); +} + +static size_t +mrb_transcoding_memsize(mrb_transcoding *tc) +{ + size_t size = sizeof(mrb_transcoding); + const mrb_transcoder *tr = tc->transcoder; + + if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) { + size += tr->state_size; + } + if ((int)sizeof(tc->readbuf.ary) < tr->max_input) { + size += tr->max_input; + } + if ((int)sizeof(tc->writebuf.ary) < tr->max_output) { + size += tr->max_output; + } + return size; +} + +static mrb_econv_t * +mrb_econv_alloc(int n_hint) +{ + mrb_econv_t *ec; + + if (n_hint <= 0) + n_hint = 1; + + ec = malloc(sizeof(mrb_econv_t));//ALLOC(mrb_econv_t); + ec->flags = 0; + ec->source_encoding_name = NULL; + ec->destination_encoding_name = NULL; + ec->started = 0; + ec->replacement_str = NULL; + ec->replacement_len = 0; + ec->replacement_enc = NULL; + ec->replacement_allocated = 0; + ec->in_buf_start = NULL; + ec->in_data_start = NULL; + ec->in_data_end = NULL; + ec->in_buf_end = NULL; + ec->num_allocated = n_hint; + ec->num_trans = 0; + ec->elems = malloc(sizeof(mrb_econv_elem_t)*ec->num_allocated);//ALLOC_N(mrb_econv_elem_t, ec->num_allocated); + ec->num_finished = 0; + ec->last_tc = NULL; + ec->last_error.result = econv_source_buffer_empty; + ec->last_error.error_tc = NULL; + ec->last_error.source_encoding = NULL; + ec->last_error.destination_encoding = NULL; + ec->last_error.error_bytes_start = NULL; + ec->last_error.error_bytes_len = 0; + ec->last_error.readagain_len = 0; + ec->source_encoding = NULL; + ec->destination_encoding = NULL; + return ec; +} + +static int +mrb_econv_add_transcoder_at(mrb_state *mrb, mrb_econv_t *ec, const mrb_transcoder *tr, int i) +{ + int n, j; + int bufsize = 4096; + unsigned char *p; + + if (ec->num_trans == ec->num_allocated) { + n = ec->num_allocated * 2; + mrb_realloc(mrb, ec->elems, sizeof(mrb_econv_elem_t)*n);//REALLOC_N(ec->elems, mrb_econv_elem_t, n); + ec->num_allocated = n; + } + + p = xmalloc(bufsize); + + memmove(ec->elems+i+1, ec->elems+i, sizeof(mrb_econv_elem_t)*(ec->num_trans-i)); + + ec->elems[i].tc = mrb_transcoding_open_by_transcoder(tr, 0); + ec->elems[i].out_buf_start = p; + ec->elems[i].out_buf_end = p + bufsize; + ec->elems[i].out_data_start = p; + ec->elems[i].out_data_end = p; + ec->elems[i].last_result = econv_source_buffer_empty; + + ec->num_trans++; + + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding)) + for (j = ec->num_trans-1; i <= j; j--) { + mrb_transcoding *tc = ec->elems[j].tc; + const mrb_transcoder *tr2 = tc->transcoder; + if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) { + ec->last_tc = tc; + break; + } + } + + return 0; +} + +static mrb_econv_t * +mrb_econv_open_by_transcoder_entries(mrb_state *mrb, int n, transcoder_entry_t **entries) +{ + mrb_econv_t *ec; + int i, ret; + + for (i = 0; i < n; i++) { + const mrb_transcoder *tr; + tr = load_transcoder_entry(mrb, entries[i]); + if (!tr) + return NULL; + } + + ec = mrb_econv_alloc(n); + + for (i = 0; i < n; i++) { + const mrb_transcoder *tr = load_transcoder_entry(mrb, entries[i]); + ret = mrb_econv_add_transcoder_at(mrb, ec, tr, ec->num_trans); + if (ret == -1) { + mrb_econv_close(ec); + return NULL; + } + } + + return ec; +} + +struct trans_open_t { + transcoder_entry_t **entries; + int num_additional; +}; + +static void +trans_open_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg) +{ + struct trans_open_t *toarg = arg; + + if (!toarg->entries) { + toarg->entries = malloc(sizeof(transcoder_entry_t*)*depth+1+toarg->num_additional);//ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional); + } + toarg->entries[depth] = get_transcoder_entry(sname, dname); +} + +static mrb_econv_t * +mrb_econv_open0(mrb_state *mrb, const char *sname, const char *dname, int ecflags) +{ + transcoder_entry_t **entries = NULL; + int num_trans; + mrb_econv_t *ec; + + mrb_encoding *senc, *denc; + int sidx, didx; + + senc = NULL; + if (*sname) { + sidx = mrb_enc_find_index(mrb, sname); + if (0 <= sidx) { + senc = mrb_enc_from_index(mrb, sidx); + } + } + + denc = NULL; + if (*dname) { + didx = mrb_enc_find_index(mrb, dname); + if (0 <= didx) { + denc = mrb_enc_from_index(mrb, didx); + } + } + + if (*sname == '\0' && *dname == '\0') { + num_trans = 0; + entries = NULL; + } + else { + struct trans_open_t toarg; + toarg.entries = NULL; + toarg.num_additional = 0; + num_trans = transcode_search_path(mrb, sname, dname, trans_open_i, (void *)&toarg); + entries = toarg.entries; + if (num_trans < 0) { + xfree(entries); + return NULL; + } + } + + ec = mrb_econv_open_by_transcoder_entries(mrb, num_trans, entries); + xfree(entries); + if (!ec) + return NULL; + + ec->flags = ecflags; + ec->source_encoding_name = sname; + ec->destination_encoding_name = dname; + + return ec; +} + +#define MAX_ECFLAGS_DECORATORS 32 + +static int +decorator_names(int ecflags, const char **decorators_ret) +{ + int num_decorators; + + if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) && + (ecflags & ECONV_CR_NEWLINE_DECORATOR)) + return -1; + + if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) && + (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)) + return -1; + + if ((ecflags & ECONV_XML_TEXT_DECORATOR) && + (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)) + return -1; + + num_decorators = 0; + + if (ecflags & ECONV_XML_TEXT_DECORATOR) + decorators_ret[num_decorators++] = "xml_text_escape"; + if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) + decorators_ret[num_decorators++] = "xml_attr_content_escape"; + if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) + decorators_ret[num_decorators++] = "xml_attr_quote"; + + if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "crlf_newline"; + if (ecflags & ECONV_CR_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "cr_newline"; + if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "universal_newline"; + + return num_decorators; +} + +mrb_econv_t * +mrb_econv_open(mrb_state *mrb, const char *sname, const char *dname, int ecflags) +{ + mrb_econv_t *ec; + int num_decorators; + const char *decorators[MAX_ECFLAGS_DECORATORS]; + int i; + + num_decorators = decorator_names(ecflags, decorators); + if (num_decorators == -1) + return NULL; + + ec = mrb_econv_open0(mrb, sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK); + if (!ec) + return NULL; + + for (i = 0; i < num_decorators; i++) + if (mrb_econv_decorate_at_last(mrb, ec, decorators[i]) == -1) { + mrb_econv_close(ec); + return NULL; + } + + ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK; + + return ec; +} + +static int +trans_sweep(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags, + int start) +{ + int try; + int i, f; + + const unsigned char **ipp, *is, *iold; + unsigned char **opp, *os, *oold; + mrb_econv_result_t res; + + try = 1; + while (try) { + try = 0; + for (i = start; i < ec->num_trans; i++) { + mrb_econv_elem_t *te = &ec->elems[i]; + + if (i == 0) { + ipp = input_ptr; + is = input_stop; + } + else { + mrb_econv_elem_t *prev_te = &ec->elems[i-1]; + ipp = (const unsigned char **)&prev_te->out_data_start; + is = prev_te->out_data_end; + } + + if (i == ec->num_trans-1) { + opp = output_ptr; + os = output_stop; + } + else { + if (te->out_buf_start != te->out_data_start) { + ssize_t len = te->out_data_end - te->out_data_start; + ssize_t off = te->out_data_start - te->out_buf_start; + memmove(te->out_buf_start, te->out_data_start, len); + te->out_data_start = te->out_buf_start; + te->out_data_end -= off; + } + opp = &te->out_data_end; + os = te->out_buf_end; + } + + f = flags; + if (ec->num_finished != i) + f |= ECONV_PARTIAL_INPUT; + if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) { + start = 1; + flags &= ~ECONV_AFTER_OUTPUT; + } + if (i != 0) + f &= ~ECONV_AFTER_OUTPUT; + iold = *ipp; + oold = *opp; + te->last_result = res = mrb_transcoding_convert(mrb, te->tc, ipp, is, opp, os, f); + if (iold != *ipp || oold != *opp) + try = 1; + + switch (res) { + case econv_invalid_byte_sequence: + case econv_incomplete_input: + case econv_undefined_conversion: + case econv_after_output: + return i; + + case econv_destination_buffer_full: + case econv_source_buffer_empty: + break; + + case econv_finished: + ec->num_finished = i+1; + break; + } + } + } + return -1; +} + +static mrb_econv_result_t +mrb_trans_conv(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags, + int *result_position_ptr) +{ + int i; + int needreport_index; + int sweep_start; + + unsigned char empty_buf; + unsigned char *empty_ptr = &empty_buf; + + if (!input_ptr) { + input_ptr = (const unsigned char **)&empty_ptr; + input_stop = empty_ptr; + } + + if (!output_ptr) { + output_ptr = &empty_ptr; + output_stop = empty_ptr; + } + + if (ec->elems[0].last_result == econv_after_output) + ec->elems[0].last_result = econv_source_buffer_empty; + + needreport_index = -1; + for (i = ec->num_trans-1; 0 <= i; i--) { + switch (ec->elems[i].last_result) { + case econv_invalid_byte_sequence: + case econv_incomplete_input: + case econv_undefined_conversion: + case econv_after_output: + case econv_finished: + sweep_start = i+1; + needreport_index = i; + goto found_needreport; + + case econv_destination_buffer_full: + case econv_source_buffer_empty: + break; + + default: + mrb_bug("unexpected transcode last result"); + } + } + + /* /^[sd]+$/ is confirmed. but actually /^s*d*$/. */ + + if (ec->elems[ec->num_trans-1].last_result == econv_destination_buffer_full && + (flags & ECONV_AFTER_OUTPUT)) { + mrb_econv_result_t res; + + res = mrb_trans_conv(mrb, ec, NULL, NULL, output_ptr, output_stop, + (flags & ~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, + result_position_ptr); + + if (res == econv_source_buffer_empty) + return econv_after_output; + return res; + } + + sweep_start = 0; + + found_needreport: + + do { + needreport_index = trans_sweep(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start); + sweep_start = needreport_index + 1; + } while (needreport_index != -1 && needreport_index != ec->num_trans-1); + + for (i = ec->num_trans-1; 0 <= i; i--) { + if (ec->elems[i].last_result != econv_source_buffer_empty) { + mrb_econv_result_t res = ec->elems[i].last_result; + if (res == econv_invalid_byte_sequence || + res == econv_incomplete_input || + res == econv_undefined_conversion || + res == econv_after_output) { + ec->elems[i].last_result = econv_source_buffer_empty; + } + if (result_position_ptr) + *result_position_ptr = i; + return res; + } + } + if (result_position_ptr) + *result_position_ptr = -1; + return econv_source_buffer_empty; +} + +static mrb_econv_result_t +mrb_econv_convert0(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + mrb_econv_result_t res; + int result_position; + int has_output = 0; + + memset(&ec->last_error, 0, sizeof(ec->last_error)); + + if (ec->num_trans == 0) { + size_t len; + if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) { + if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) { + len = output_stop - *output_ptr; + memcpy(*output_ptr, ec->in_data_start, len); + *output_ptr = output_stop; + ec->in_data_start += len; + res = econv_destination_buffer_full; + goto gotresult; + } + len = ec->in_data_end - ec->in_data_start; + memcpy(*output_ptr, ec->in_data_start, len); + *output_ptr += len; + ec->in_data_start = ec->in_data_end = ec->in_buf_start; + if (flags & ECONV_AFTER_OUTPUT) { + res = econv_after_output; + goto gotresult; + } + } + if (output_stop - *output_ptr < input_stop - *input_ptr) { + len = output_stop - *output_ptr; + } + else { + len = input_stop - *input_ptr; + } + if (0 < len && (flags & ECONV_AFTER_OUTPUT)) { + *(*output_ptr)++ = *(*input_ptr)++; + res = econv_after_output; + goto gotresult; + } + memcpy(*output_ptr, *input_ptr, len); + *output_ptr += len; + *input_ptr += len; + if (*input_ptr != input_stop) + res = econv_destination_buffer_full; + else if (flags & ECONV_PARTIAL_INPUT) + res = econv_source_buffer_empty; + else + res = econv_finished; + goto gotresult; + } + + if (ec->elems[ec->num_trans-1].out_data_start) { + unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start; + unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end; + if (data_start != data_end) { + size_t len; + if (output_stop - *output_ptr < data_end - data_start) { + len = output_stop - *output_ptr; + memcpy(*output_ptr, data_start, len); + *output_ptr = output_stop; + ec->elems[ec->num_trans-1].out_data_start += len; + res = econv_destination_buffer_full; + goto gotresult; + } + len = data_end - data_start; + memcpy(*output_ptr, data_start, len); + *output_ptr += len; + ec->elems[ec->num_trans-1].out_data_start = + ec->elems[ec->num_trans-1].out_data_end = + ec->elems[ec->num_trans-1].out_buf_start; + has_output = 1; + } + } + + if (ec->in_buf_start && + ec->in_data_start != ec->in_data_end) { + res = mrb_trans_conv(mrb, ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop, + (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position); + if (res != econv_source_buffer_empty) + goto gotresult; + } + + if (has_output && + (flags & ECONV_AFTER_OUTPUT) && + *input_ptr != input_stop) { + input_stop = *input_ptr; + res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position); + if (res == econv_source_buffer_empty) + res = econv_after_output; + } + else if ((flags & ECONV_AFTER_OUTPUT) || + ec->num_trans == 1) { + res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position); + } + else { + flags |= ECONV_AFTER_OUTPUT; + do { + res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position); + } while (res == econv_after_output); + } + + gotresult: + ec->last_error.result = res; + if (res == econv_invalid_byte_sequence || + res == econv_incomplete_input || + res == econv_undefined_conversion) { + mrb_transcoding *error_tc = ec->elems[result_position].tc; + ec->last_error.error_tc = error_tc; + ec->last_error.source_encoding = error_tc->transcoder->src_encoding; + ec->last_error.destination_encoding = error_tc->transcoder->dst_encoding; + ec->last_error.error_bytes_start = TRANSCODING_READBUF(error_tc); + ec->last_error.error_bytes_len = error_tc->recognized_len; + ec->last_error.readagain_len = error_tc->readagain_len; + } + + return res; +} + +static int output_replacement_character(mrb_state *mrb, mrb_econv_t *ec); + +static int +output_hex_charref(mrb_state *mrb, mrb_econv_t *ec) +{ + int ret; + unsigned char utfbuf[1024]; + const unsigned char *utf; + size_t utf_len; + int utf_allocated = 0; + char charef_buf[16]; + const unsigned char *p; + + if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) { + utf = ec->last_error.error_bytes_start; + utf_len = ec->last_error.error_bytes_len; + } + else { + utf = allocate_converted_string(mrb, + ec->last_error.source_encoding, "UTF-32BE", + ec->last_error.error_bytes_start, ec->last_error.error_bytes_len, + utfbuf, sizeof(utfbuf), + &utf_len); + if (!utf) + return -1; + if (utf != utfbuf && utf != ec->last_error.error_bytes_start) + utf_allocated = 1; + } + + if (utf_len % 4 != 0) + goto fail; + + p = utf; + while (4 <= utf_len) { + unsigned int u = 0; + u += p[0] << 24; + u += p[1] << 16; + u += p[2] << 8; + u += p[3]; + snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u); + + ret = mrb_econv_insert_output(mrb, ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII"); + if (ret == -1) + goto fail; + + p += 4; + utf_len -= 4; + } + + if (utf_allocated) + xfree((void *)utf); + return 0; + + fail: + if (utf_allocated) + xfree((void *)utf); + return -1; +} + +mrb_econv_result_t +mrb_econv_convert(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + mrb_econv_result_t ret; + + unsigned char empty_buf; + unsigned char *empty_ptr = &empty_buf; + + ec->started = 1; + + if (!input_ptr) { + input_ptr = (const unsigned char **)&empty_ptr; + input_stop = empty_ptr; + } + + if (!output_ptr) { + output_ptr = &empty_ptr; + output_stop = empty_ptr; + } + + resume: + ret = mrb_econv_convert0(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags); + + if (ret == econv_invalid_byte_sequence || + ret == econv_incomplete_input) { + /* deal with invalid byte sequence */ + /* todo: add more alternative behaviors */ + switch (ec->flags & ECONV_INVALID_MASK) { + case ECONV_INVALID_REPLACE: + if (output_replacement_character(mrb, ec) == 0) + goto resume; + } + } + + if (ret == econv_undefined_conversion) { + /* valid character in source encoding + * but no related character(s) in destination encoding */ + /* todo: add more alternative behaviors */ + switch (ec->flags & ECONV_UNDEF_MASK) { + case ECONV_UNDEF_REPLACE: + if (output_replacement_character(mrb, ec) == 0) + goto resume; + break; + + case ECONV_UNDEF_HEX_CHARREF: + if (output_hex_charref(mrb, ec) == 0) + goto resume; + break; + } + } + + return ret; +} + +const char * +mrb_econv_encoding_to_insert_output(mrb_econv_t *ec) +{ + mrb_transcoding *tc = ec->last_tc; + const mrb_transcoder *tr; + + if (tc == NULL) + return ""; + + tr = tc->transcoder; + + if (tr->asciicompat_type == asciicompat_encoder) + return tr->src_encoding; + return tr->dst_encoding; +} + +static unsigned char * +allocate_converted_string(mrb_state *mrb, + const char *sname, const char *dname, + const unsigned char *str, size_t len, + unsigned char *caller_dst_buf, size_t caller_dst_bufsize, + size_t *dst_len_ptr) +{ + unsigned char *dst_str; + size_t dst_len; + size_t dst_bufsize; + + mrb_econv_t *ec; + mrb_econv_result_t res; + + const unsigned char *sp; + unsigned char *dp; + + if (caller_dst_buf) + dst_bufsize = caller_dst_bufsize; + else if (len == 0) + dst_bufsize = 1; + else + dst_bufsize = len; + + ec = mrb_econv_open(mrb, sname, dname, 0); + if (ec == NULL) + return NULL; + if (caller_dst_buf) + dst_str = caller_dst_buf; + else + dst_str = xmalloc(dst_bufsize); + dst_len = 0; + sp = str; + dp = dst_str+dst_len; + res = mrb_econv_convert(mrb, ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0); + dst_len = dp - dst_str; + while (res == econv_destination_buffer_full) { + if (SIZE_MAX/2 < dst_bufsize) { + goto fail; + } + dst_bufsize *= 2; + if (dst_str == caller_dst_buf) { + unsigned char *tmp; + tmp = xmalloc(dst_bufsize); + memcpy(tmp, dst_str, dst_bufsize/2); + dst_str = tmp; + } + else { + dst_str = xrealloc(dst_str, dst_bufsize); + } + dp = dst_str+dst_len; + res = mrb_econv_convert(mrb, ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0); + dst_len = dp - dst_str; + } + if (res != econv_finished) { + goto fail; + } + mrb_econv_close(ec); + *dst_len_ptr = dst_len; + return dst_str; + + fail: + if (dst_str != caller_dst_buf) + xfree(dst_str); + mrb_econv_close(ec); + return NULL; +} + +/* result: 0:success -1:failure */ +int +mrb_econv_insert_output(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char *str, size_t len, const char *str_encoding) +{ + const char *insert_encoding = mrb_econv_encoding_to_insert_output(ec); + unsigned char insert_buf[4096]; + const unsigned char *insert_str = NULL; + size_t insert_len; + + int last_trans_index; + mrb_transcoding *tc; + + unsigned char **buf_start_p; + unsigned char **data_start_p; + unsigned char **data_end_p; + unsigned char **buf_end_p; + + size_t need; + + ec->started = 1; + + if (len == 0) + return 0; + + if (encoding_equal(insert_encoding, str_encoding)) { + insert_str = str; + insert_len = len; + } + else { + insert_str = allocate_converted_string(mrb, str_encoding, insert_encoding, + str, len, insert_buf, sizeof(insert_buf), &insert_len); + if (insert_str == NULL) + return -1; + } + + need = insert_len; + + last_trans_index = ec->num_trans-1; + if (ec->num_trans == 0) { + tc = NULL; + buf_start_p = &ec->in_buf_start; + data_start_p = &ec->in_data_start; + data_end_p = &ec->in_data_end; + buf_end_p = &ec->in_buf_end; + } + else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) { + tc = ec->elems[last_trans_index].tc; + need += tc->readagain_len; + if (need < insert_len) + goto fail; + if (last_trans_index == 0) { + buf_start_p = &ec->in_buf_start; + data_start_p = &ec->in_data_start; + data_end_p = &ec->in_data_end; + buf_end_p = &ec->in_buf_end; + } + else { + mrb_econv_elem_t *ee = &ec->elems[last_trans_index-1]; + buf_start_p = &ee->out_buf_start; + data_start_p = &ee->out_data_start; + data_end_p = &ee->out_data_end; + buf_end_p = &ee->out_buf_end; + } + } + else { + mrb_econv_elem_t *ee = &ec->elems[last_trans_index]; + buf_start_p = &ee->out_buf_start; + data_start_p = &ee->out_data_start; + data_end_p = &ee->out_data_end; + buf_end_p = &ee->out_buf_end; + tc = ec->elems[last_trans_index].tc; + } + + if (*buf_start_p == NULL) { + unsigned char *buf = xmalloc(need); + *buf_start_p = buf; + *data_start_p = buf; + *data_end_p = buf; + *buf_end_p = buf+need; + } + else if ((size_t)(*buf_end_p - *data_end_p) < need) { + memmove(*buf_start_p, *data_start_p, *data_end_p - *data_start_p); + *data_end_p = *buf_start_p + (*data_end_p - *data_start_p); + *data_start_p = *buf_start_p; + if ((size_t)(*buf_end_p - *data_end_p) < need) { + unsigned char *buf; + size_t s = (*data_end_p - *buf_start_p) + need; + if (s < need) + goto fail; + buf = xrealloc(*buf_start_p, s); + *data_start_p = buf; + *data_end_p = buf + (*data_end_p - *buf_start_p); + *buf_start_p = buf; + *buf_end_p = buf + s; + } + } + + memcpy(*data_end_p, insert_str, insert_len); + *data_end_p += insert_len; + if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) { + memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len); + *data_end_p += tc->readagain_len; + tc->readagain_len = 0; + } + + if (insert_str != str && insert_str != insert_buf) + xfree((void*)insert_str); + return 0; + + fail: + if (insert_str != str && insert_str != insert_buf) + xfree((void*)insert_str); + return -1; +} + +void +mrb_econv_close(mrb_econv_t *ec) +{ + int i; + + if (ec->replacement_allocated) { + xfree((void *)ec->replacement_str); + } + for (i = 0; i < ec->num_trans; i++) { + mrb_transcoding_close(ec->elems[i].tc); + if (ec->elems[i].out_buf_start) + xfree(ec->elems[i].out_buf_start); + } + xfree(ec->in_buf_start); + xfree(ec->elems); + xfree(ec); +} + +size_t +mrb_econv_memsize(mrb_econv_t *ec) +{ + size_t size = sizeof(mrb_econv_t); + int i; + + if (ec->replacement_allocated) { + size += ec->replacement_len; + } + for (i = 0; i < ec->num_trans; i++) { + size += mrb_transcoding_memsize(ec->elems[i].tc); + + if (ec->elems[i].out_buf_start) { + size += ec->elems[i].out_buf_end - ec->elems[i].out_buf_start; + } + } + size += ec->in_buf_end - ec->in_buf_start; + size += sizeof(mrb_econv_elem_t) * ec->num_allocated; + + return size; +} + +int +mrb_econv_putbackable(mrb_econv_t *ec) +{ + if (ec->num_trans == 0) + return 0; +#if SIZEOF_SIZE_T > SIZEOF_INT + if (ec->elems[0].tc->readagain_len > INT_MAX) return INT_MAX; +#endif + return (int)ec->elems[0].tc->readagain_len; +} + +void +mrb_econv_putback(mrb_econv_t *ec, unsigned char *p, int n) +{ + mrb_transcoding *tc; + if (ec->num_trans == 0 || n == 0) + return; + tc = ec->elems[0].tc; + memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n); + tc->readagain_len -= n; +} + +struct asciicompat_encoding_t { + const char *ascii_compat_name; + const char *ascii_incompat_name; +}; + +static int +asciicompat_encoding_i(mrb_state *mrb, st_data_t key, st_data_t val, st_data_t arg) +{ + struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg; + transcoder_entry_t *entry = (transcoder_entry_t *)val; + const mrb_transcoder *tr; + + if (DECORATOR_P(entry->sname, entry->dname)) + return ST_CONTINUE; + tr = load_transcoder_entry(mrb, entry); + if (tr && tr->asciicompat_type == asciicompat_decoder) { + data->ascii_compat_name = tr->dst_encoding; + return ST_STOP; + } + return ST_CONTINUE; +} + +const char * +mrb_econv_asciicompat_encoding(const char *ascii_incompat_name) +{ + st_data_t v; + st_table *table2; + struct asciicompat_encoding_t data; + + if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) + return NULL; + table2 = (st_table *)v; + + /* + * Assumption: + * There is at most one transcoder for + * converting from ASCII incompatible encoding. + * + * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others. + */ + if (table2->num_entries != 1) + return NULL; + + data.ascii_incompat_name = ascii_incompat_name; + data.ascii_compat_name = NULL; + st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data); + return data.ascii_compat_name; +} + +mrb_value +mrb_econv_substr_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long off, long len, mrb_value dst, int flags) +{ + unsigned const char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + mrb_econv_result_t res; + int max_output; + + if (mrb_nil_p(dst)) { + dst = mrb_str_buf_new(mrb, len); + if (ec->destination_encoding) + mrb_enc_associate(mrb, dst, ec->destination_encoding); + } + + if (ec->last_tc) + max_output = ec->last_tc->transcoder->max_output; + else + max_output = 1; + + res = econv_destination_buffer_full; + while (res == econv_destination_buffer_full) { + long dlen = RSTRING_LEN(dst); + if (mrb_str_capacity(dst) - dlen < (size_t)len + max_output) { + unsigned long new_capa = (unsigned long)dlen + len + max_output; + if (LONG_MAX < new_capa) + mrb_raise(mrb, E_ARGUMENT_ERROR, "too long string"); + mrb_str_resize(mrb, dst, new_capa); + mrb_str_set_len(mrb, dst, dlen); + } + ss = sp = (const unsigned char *)RSTRING_PTR(src) + off; + se = ss + len; + ds = (unsigned char *)RSTRING_PTR(dst); + de = ds + mrb_str_capacity(dst); + dp = ds += dlen; + res = mrb_econv_convert(mrb, ec, &sp, se, &dp, de, flags); + off += sp - ss; + len -= sp - ss; + mrb_str_set_len(mrb, dst, dlen + (dp - ds)); + mrb_econv_check_error(mrb, ec); + } + + return dst; +} + +mrb_value +mrb_econv_str_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, mrb_value dst, int flags) +{ + return mrb_econv_substr_append(mrb, ec, src, 0, RSTRING_LEN(src), dst, flags); +} + +mrb_value +mrb_econv_substr_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, int flags) +{ + return mrb_econv_substr_append(mrb, ec, src, byteoff, bytesize, mrb_nil_value(), flags); +} + +mrb_value +mrb_econv_str_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, int flags) +{ + return mrb_econv_substr_append(mrb, ec, src, 0, RSTRING_LEN(src), mrb_nil_value(), flags); +} + +static int +mrb_econv_add_converter(mrb_state *mrb, mrb_econv_t *ec, const char *sname, const char *dname, int n) +{ + transcoder_entry_t *entry; + const mrb_transcoder *tr; + + if (ec->started != 0) + return -1; + + entry = get_transcoder_entry(sname, dname); + if (!entry) + return -1; + + tr = load_transcoder_entry(mrb, entry); + + return mrb_econv_add_transcoder_at(mrb, ec, tr, n); +} + +static int +mrb_econv_decorate_at(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name, int n) +{ + return mrb_econv_add_converter(mrb, ec, "", decorator_name, n); +} + +int +mrb_econv_decorate_at_first(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name) +{ + const mrb_transcoder *tr; + + if (ec->num_trans == 0) + return mrb_econv_decorate_at(mrb, ec, decorator_name, 0); + + tr = ec->elems[0].tc->transcoder; + + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) && + tr->asciicompat_type == asciicompat_decoder) + return mrb_econv_decorate_at(mrb, ec, decorator_name, 1); + + return mrb_econv_decorate_at(mrb, ec, decorator_name, 0); +} + +int +mrb_econv_decorate_at_last(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name) +{ + const mrb_transcoder *tr; + + if (ec->num_trans == 0) + return mrb_econv_decorate_at(mrb, ec, decorator_name, 0); + + tr = ec->elems[ec->num_trans-1].tc->transcoder; + + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) && + tr->asciicompat_type == asciicompat_encoder) + return mrb_econv_decorate_at(mrb, ec, decorator_name, ec->num_trans-1); + + return mrb_econv_decorate_at(mrb, ec, decorator_name, ec->num_trans); +} + +void +mrb_econv_binmode(mrb_econv_t *ec) +{ + const mrb_transcoder *trs[3]; + int n, i, j; + transcoder_entry_t *entry; + int num_trans; + + n = 0; + if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) { + entry = get_transcoder_entry("", "universal_newline"); + if (entry->transcoder) + trs[n++] = entry->transcoder; + } + if (ec->flags & ECONV_CRLF_NEWLINE_DECORATOR) { + entry = get_transcoder_entry("", "crlf_newline"); + if (entry->transcoder) + trs[n++] = entry->transcoder; + } + if (ec->flags & ECONV_CR_NEWLINE_DECORATOR) { + entry = get_transcoder_entry("", "cr_newline"); + if (entry->transcoder) + trs[n++] = entry->transcoder; + } + + num_trans = ec->num_trans; + j = 0; + for (i = 0; i < num_trans; i++) { + int k; + for (k = 0; k < n; k++) + if (trs[k] == ec->elems[i].tc->transcoder) + break; + if (k == n) { + ec->elems[j] = ec->elems[i]; + j++; + } + else { + mrb_transcoding_close(ec->elems[i].tc); + xfree(ec->elems[i].out_buf_start); + ec->num_trans--; + } + } + + ec->flags &= ~(ECONV_UNIVERSAL_NEWLINE_DECORATOR|ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR); + +} + +static mrb_value +econv_description(mrb_state *mrb, const char *sname, const char *dname, int ecflags, mrb_value mesg) +{ + int has_description = 0; + + if (mrb_nil_p(mesg)) + mesg = mrb_str_new(mrb, NULL, 0); + + if (*sname != '\0' || *dname != '\0') { + if (*sname == '\0') + mrb_str_cat2(mrb, mesg, dname); + else if (*dname == '\0') + mrb_str_cat2(mrb, mesg, sname); + else + mrb_str_catf(mrb, mesg, "%s to %s", sname, dname); + has_description = 1; + } + + if (ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR| + ECONV_CRLF_NEWLINE_DECORATOR| + ECONV_CR_NEWLINE_DECORATOR| + ECONV_XML_TEXT_DECORATOR| + ECONV_XML_ATTR_CONTENT_DECORATOR| + ECONV_XML_ATTR_QUOTE_DECORATOR)) { + const char *pre = ""; + if (has_description) + mrb_str_cat2(mrb, mesg, " with "); + if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) { + mrb_str_cat2(mrb, mesg, pre); pre = ","; + mrb_str_cat2(mrb, mesg, "universal_newline"); + } + if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) { + mrb_str_cat2(mrb, mesg, pre); pre = ","; + mrb_str_cat2(mrb, mesg, "crlf_newline"); + } + if (ecflags & ECONV_CR_NEWLINE_DECORATOR) { + mrb_str_cat2(mrb, mesg, pre); pre = ","; + mrb_str_cat2(mrb, mesg, "cr_newline"); + } + if (ecflags & ECONV_XML_TEXT_DECORATOR) { + mrb_str_cat2(mrb, mesg, pre); pre = ","; + mrb_str_cat2(mrb, mesg, "xml_text"); + } + if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) { + mrb_str_cat2(mrb, mesg, pre); pre = ","; + mrb_str_cat2(mrb, mesg, "xml_attr_content"); + } + if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) { + mrb_str_cat2(mrb, mesg, pre); pre = ","; + mrb_str_cat2(mrb, mesg, "xml_attr_quote"); + } + has_description = 1; + } + if (!has_description) { + mrb_str_cat2(mrb, mesg, "no-conversion"); + } + + return mesg; +} + +mrb_value +mrb_econv_open_exc(mrb_state *mrb, const char *sname, const char *dname, int ecflags) +{ + mrb_value mesg, exc; + mesg = mrb_str_new_cstr(mrb, "code converter not found ("); + econv_description(mrb, sname, dname, ecflags, mesg); + mrb_str_cat2(mrb, mesg, ")"); + exc = mrb_exc_new3(mrb, E_CONVERTERNOTFOUND_ERROR, mesg); + return exc; +} + +static mrb_value +make_econv_exception(mrb_state *mrb, mrb_econv_t *ec) +{ + mrb_value mesg, exc; + if (ec->last_error.result == econv_invalid_byte_sequence || + ec->last_error.result == econv_incomplete_input) { + const char *err = (const char *)ec->last_error.error_bytes_start; + size_t error_len = ec->last_error.error_bytes_len; + mrb_value bytes = mrb_str_new(mrb, err, error_len); + mrb_value dumped = mrb_str_dump(mrb, bytes); + size_t readagain_len = ec->last_error.readagain_len; + mrb_value bytes2 = mrb_nil_value(); + mrb_value dumped2; + int idx; + if (ec->last_error.result == econv_incomplete_input) { + mesg = mrb_sprintf(mrb, "incomplete %s on %s", + //StringValueCStr(dumped), + mrb_string_value_cstr(mrb, &dumped), + ec->last_error.source_encoding); + } + else if (readagain_len) { + bytes2 = mrb_str_new(mrb, err+error_len, readagain_len); + dumped2 = mrb_str_dump(mrb, bytes2); + mesg = mrb_sprintf(mrb, "%s followed by %s on %s", + //StringValueCStr(dumped), + mrb_string_value_cstr(mrb, &dumped), + //StringValueCStr(dumped2), + mrb_string_value_cstr(mrb, &dumped2), + ec->last_error.source_encoding); + } + else { + mesg = mrb_sprintf(mrb, "%s on %s", + //StringValueCStr(dumped), + mrb_string_value_cstr(mrb, &dumped), + ec->last_error.source_encoding); + } + + exc = mrb_exc_new3(mrb, E_INVALIDBYTESEQUENCE_ERROR, mesg); + mrb_iv_set(mrb, exc, mrb_intern(mrb, "error_bytes"), bytes); + mrb_iv_set(mrb, exc, mrb_intern(mrb, "readagain_bytes"), bytes2); + mrb_iv_set(mrb, exc, mrb_intern(mrb, "incomplete_input"), ec->last_error.result == econv_incomplete_input ? mrb_true_value() : mrb_false_value()); + +set_encs: + mrb_iv_set(mrb, exc, mrb_intern(mrb, "source_encoding_name"), mrb_str_new2(mrb, ec->last_error.source_encoding)); + mrb_iv_set(mrb, exc, mrb_intern(mrb, "destination_encoding_name"), mrb_str_new2(mrb, ec->last_error.destination_encoding)); + idx = mrb_enc_find_index(mrb, ec->last_error.source_encoding); + if (0 <= idx) + mrb_iv_set(mrb, exc, mrb_intern(mrb, "source_encoding"), mrb_enc_from_encoding(mrb, mrb_enc_from_index(mrb, idx))); + idx = mrb_enc_find_index(mrb, ec->last_error.destination_encoding); + if (0 <= idx) + mrb_iv_set(mrb, exc, mrb_intern(mrb, "destination_encoding"), mrb_enc_from_encoding(mrb, mrb_enc_from_index(mrb, idx))); + return exc; + } + if (ec->last_error.result == econv_undefined_conversion) { + mrb_value bytes = mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start, + ec->last_error.error_bytes_len); + mrb_value dumped = mrb_nil_value(); + int idx; + if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) { + mrb_encoding *utf8 = mrb_utf8_encoding(mrb); + const char *start, *end; + int n; + start = (const char *)ec->last_error.error_bytes_start; + end = start + ec->last_error.error_bytes_len; + n = mrb_enc_precise_mbclen(start, end, utf8); + if (MBCLEN_CHARFOUND_P(n) && + (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) { + unsigned int cc = mrb_enc_mbc_to_codepoint(start, end, utf8); + dumped = mrb_sprintf(mrb, "U+%04X", cc); + } + } + if (mrb_obj_equal(mrb, dumped, mrb_nil_value())) + dumped = mrb_str_dump(mrb, bytes); + if (strcmp(ec->last_error.source_encoding, + ec->source_encoding_name) == 0 && + strcmp(ec->last_error.destination_encoding, + ec->destination_encoding_name) == 0) { + mesg = mrb_sprintf(mrb, "%s from %s to %s", + //StringValueCStr(dumped), + mrb_string_value_cstr(mrb, &dumped), + ec->last_error.source_encoding, + ec->last_error.destination_encoding); + } + else { + int i; + mesg = mrb_sprintf(mrb, "%s to %s in conversion from %s", + //StringValueCStr(dumped), + mrb_string_value_cstr(mrb, &dumped), + ec->last_error.destination_encoding, + ec->source_encoding_name); + for (i = 0; i < ec->num_trans; i++) { + const mrb_transcoder *tr = ec->elems[i].tc->transcoder; + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding)) + mrb_str_catf(mrb, mesg, " to %s", + ec->elems[i].tc->transcoder->dst_encoding); + } + } + exc = mrb_exc_new3(mrb, E_UNDEFINEDCONVERSION_ERROR, mesg); + idx = mrb_enc_find_index(mrb, ec->last_error.source_encoding); + if (0 <= idx) + mrb_enc_associate_index(mrb, bytes, idx); + mrb_iv_set(mrb, exc, mrb_intern(mrb, "error_char"), bytes); + goto set_encs; + } + return mrb_nil_value(); +} + +static void +more_output_buffer(mrb_state *mrb, + mrb_value destination, + unsigned char *(*resize_destination)(mrb_state *, mrb_value, size_t, size_t), + int max_output, + unsigned char **out_start_ptr, + unsigned char **out_pos, + unsigned char **out_stop_ptr) +{ + size_t len = (*out_pos - *out_start_ptr); + size_t new_len = (len + max_output) * 2; + *out_start_ptr = resize_destination(mrb, destination, len, new_len); + *out_pos = *out_start_ptr + len; + *out_stop_ptr = *out_start_ptr + new_len; +} + +static int +make_replacement(mrb_state *mrb, mrb_econv_t *ec) +{ + mrb_transcoding *tc; + const mrb_transcoder *tr; + mrb_encoding *enc; + const unsigned char *replacement; + const char *repl_enc; + const char *ins_enc; + size_t len; + + if (ec->replacement_str) + return 0; + + ins_enc = mrb_econv_encoding_to_insert_output(ec); + + tc = ec->last_tc; + if (*ins_enc) { + tr = tc->transcoder; + enc = mrb_enc_find(mrb, tr->dst_encoding); + replacement = (const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc); + } + else { + replacement = (unsigned char *)"?"; + len = 1; + repl_enc = ""; + } + + ec->replacement_str = replacement; + ec->replacement_len = len; + ec->replacement_enc = repl_enc; + ec->replacement_allocated = 0; + return 0; +} + +int +mrb_econv_set_replacement(mrb_state *mrb, mrb_econv_t *ec, + const unsigned char *str, size_t len, const char *encname) +{ + unsigned char *str2; + size_t len2; + const char *encname2; + + encname2 = mrb_econv_encoding_to_insert_output(ec); + + if (encoding_equal(encname, encname2)) { + str2 = xmalloc(len); + memcpy(str2, str, len); /* xxx: str may be invalid */ + len2 = len; + encname2 = encname; + } + else { + str2 = allocate_converted_string(mrb, encname, encname2, str, len, NULL, 0, &len2); + if (!str2) + return -1; + } + + if (ec->replacement_allocated) { + xfree((void *)ec->replacement_str); + } + ec->replacement_allocated = 1; + ec->replacement_str = str2; + ec->replacement_len = len2; + ec->replacement_enc = encname2; + return 0; +} + +static int +output_replacement_character(mrb_state *mrb, mrb_econv_t *ec) +{ + int ret; + + if (make_replacement(mrb, ec) == -1) + return -1; + + ret = mrb_econv_insert_output(mrb, ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc); + if (ret == -1) + return -1; + + return 0; +} + +static void +transcode_loop(mrb_state *mrb, + const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + mrb_value destination, + unsigned char *(*resize_destination)(mrb_state *, mrb_value, size_t, size_t), + const char *src_encoding, + const char *dst_encoding, + int ecflags, + mrb_value ecopts) +{ + mrb_econv_t *ec; + mrb_transcoding *last_tc; + mrb_econv_result_t ret; + unsigned char *out_start = *out_pos; + int max_output; + mrb_value exc; + mrb_value fallback = mrb_nil_value(); + mrb_value Qundef; + Qundef.tt = 0; + + ec = mrb_econv_open_opts(mrb, src_encoding, dst_encoding, ecflags, ecopts); + if (!ec) + mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, src_encoding, dst_encoding, ecflags)); + + if (!mrb_nil_p(ecopts) && TYPE(ecopts) == MRB_TT_HASH) + fallback = mrb_hash_get(mrb, ecopts, sym_fallback); + last_tc = ec->last_tc; + max_output = last_tc ? last_tc->transcoder->max_output : 1; + + resume: + ret = mrb_econv_convert(mrb, ec, in_pos, in_stop, out_pos, out_stop, 0); + + if (!mrb_nil_p(fallback) && ret == econv_undefined_conversion) { + mrb_value rep = mrb_enc_str_new(mrb, + (const char *)ec->last_error.error_bytes_start, + ec->last_error.error_bytes_len, + mrb_enc_find(mrb, ec->last_error.source_encoding)); + rep = mrb_hash_getWithDef(mrb, fallback, rep, Qundef);//mrb_hash_lookup2(fallback, rep, Qundef); + if (!mrb_obj_equal(mrb, rep, Qundef)) { + //StringValue(rep); + mrb_string_value(mrb, &rep); + ret = mrb_econv_insert_output(mrb, ec, (const unsigned char *)RSTRING_PTR(rep), + RSTRING_LEN(rep), mrb_enc_name(mrb_enc_get(mrb, rep))); + if ((int)ret == -1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "too big fallback string"); + } + goto resume; + } + } + + if (ret == econv_invalid_byte_sequence || + ret == econv_incomplete_input || + ret == econv_undefined_conversion) { + exc = make_econv_exception(mrb, ec); + mrb_econv_close(ec); + mrb_exc_raise(mrb, exc); + } + + if (ret == econv_destination_buffer_full) { + more_output_buffer(mrb, destination, resize_destination, max_output, &out_start, out_pos, &out_stop); + goto resume; + } + + mrb_econv_close(ec); + return; +} + +/* + * String-specific code + */ + +static unsigned char * +str_transcoding_resize(mrb_state *mrb, mrb_value destination, size_t len, size_t new_len) +{ + mrb_str_resize(mrb, destination, new_len); + return (unsigned char *)RSTRING_PTR(destination); +} + +static int +econv_opts(mrb_state *mrb, mrb_value opt) +{ + mrb_value v; + int ecflags = 0; + + v = mrb_hash_get(mrb, opt, sym_invalid); + if (mrb_nil_p(v)) { + } + else if (mrb_obj_equal(mrb, v, sym_replace)) { + ecflags |= ECONV_INVALID_REPLACE; + } + else { + mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown value for invalid character option"); + } + + v = mrb_hash_get(mrb, opt, sym_undef); + if (mrb_nil_p(v)) { + } + else if (mrb_obj_equal(mrb, v, sym_replace)) { + ecflags |= ECONV_UNDEF_REPLACE; + } + else { + mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown value for undefined character option"); + } + + v = mrb_hash_get(mrb, opt, sym_replace); + if (!mrb_nil_p(v) && !(ecflags & ECONV_INVALID_REPLACE)) { + ecflags |= ECONV_UNDEF_REPLACE; + } + + v = mrb_hash_get(mrb, opt, sym_xml); + if (!mrb_nil_p(v)) { + if (mrb_obj_equal(mrb, v, sym_text)) { + ecflags |= ECONV_XML_TEXT_DECORATOR|ECONV_UNDEF_HEX_CHARREF; + } + else if (mrb_obj_equal(mrb, v, sym_attr)) { + ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF; + } + else if (TYPE(v) == MRB_TT_SYMBOL) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "unexpected value for xml option: %s", mrb_sym2name(mrb, SYM2ID(v))); + } + else { + mrb_raise(mrb, E_ARGUMENT_ERROR, "unexpected value for xml option"); + } + } + + v = mrb_hash_get(mrb, opt, sym_universal_newline); + if (RTEST(v)) + ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + + v = mrb_hash_get(mrb, opt, sym_crlf_newline); + if (RTEST(v)) + ecflags |= ECONV_CRLF_NEWLINE_DECORATOR; + + v = mrb_hash_get(mrb, opt, sym_cr_newline); + if (RTEST(v)) + ecflags |= ECONV_CR_NEWLINE_DECORATOR; + + return ecflags; +} + +int +mrb_econv_prepare_opts(mrb_state *mrb, mrb_value opthash, mrb_value *opts) +{ + int ecflags; + mrb_value newhash = mrb_nil_value(); + mrb_value v; + + if (mrb_nil_p(opthash)) { + *opts = mrb_nil_value(); + return 0; + } + ecflags = econv_opts(mrb, opthash); + + v = mrb_hash_get(mrb, opthash, sym_replace); + if (!mrb_nil_p(v)) { + //StringValue(v); + mrb_string_value(mrb, &v); + if (mrb_enc_str_coderange(mrb, v) == ENC_CODERANGE_BROKEN) { + mrb_value dumped = mrb_str_dump(mrb, v); + mrb_raise(mrb, E_ARGUMENT_ERROR, "replacement string is broken: %s as %s", + //StringValueCStr(dumped), + mrb_string_value_cstr(mrb, &dumped), + mrb_enc_name(mrb_enc_get(mrb, v))); + } + v = mrb_str_new_frozen(mrb, v); + newhash = mrb_hash_new_capa(mrb, 0); + mrb_hash_set(mrb, newhash, sym_replace, v); + } + + v = mrb_hash_get(mrb, opthash, sym_fallback); + if (!mrb_nil_p(v)) { + v = mrb_convert_type(mrb, v, MRB_TT_HASH, "Hash", "to_hash"); + if (!mrb_nil_p(v)) { + if (mrb_nil_p(newhash)) + newhash = mrb_hash_new_capa(mrb, 0); + mrb_hash_set(mrb, newhash, sym_fallback, v); + } + } + + //if (!mrb_nil_p(newhash)) + // mrb_hash_freeze(newhash); + *opts = newhash; + + return ecflags; +} + +mrb_econv_t * +mrb_econv_open_opts(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags, mrb_value opthash) +{ + mrb_econv_t *ec; + mrb_value replacement; + + if (mrb_nil_p(opthash)) { + replacement = mrb_nil_value(); + } + else { + if (TYPE(opthash) != MRB_TT_HASH /*|| !OBJ_FROZEN(opthash)*/) + mrb_bug("mrb_econv_open_opts called with invalid opthash"); + replacement = mrb_hash_get(mrb, opthash, sym_replace); + } + + ec = mrb_econv_open(mrb, source_encoding, destination_encoding, ecflags); + if (!ec) + return ec; + + if (!mrb_nil_p(replacement)) { + int ret; + mrb_encoding *enc = mrb_enc_get(mrb, replacement); + + ret = mrb_econv_set_replacement(mrb, ec, + (const unsigned char *)RSTRING_PTR(replacement), + RSTRING_LEN(replacement), + mrb_enc_name(enc)); + if (ret == -1) { + mrb_econv_close(ec); + return NULL; + } + } + return ec; +} + +static int +enc_arg(mrb_state *mrb, mrb_value *arg, const char **name_p, mrb_encoding **enc_p) +{ + mrb_encoding *enc; + const char *n; + int encidx; + mrb_value encval; + + if (((encidx = mrb_to_encoding_index(mrb, encval = *arg)) < 0) || + !(enc = mrb_enc_from_index(mrb, encidx))) { + enc = NULL; + encidx = 0; + //n = StringValueCStr(*arg); + n = mrb_string_value_cstr(mrb, arg); + } + else { + n = mrb_enc_name(enc); + } + + *name_p = n; + *enc_p = enc; + + return encidx; +} + +static int +str_transcode_enc_args(mrb_state *mrb, + mrb_value str, mrb_value *arg1, mrb_value *arg2, + const char **sname_p, mrb_encoding **senc_p, + const char **dname_p, mrb_encoding **denc_p) +{ + mrb_encoding *senc, *denc; + const char *sname, *dname; + int sencidx, dencidx; + + dencidx = enc_arg(mrb, arg1, &dname, &denc); + + if (mrb_nil_p(*arg2)) { + sencidx = mrb_enc_get_index(mrb, str); + senc = mrb_enc_from_index(mrb, sencidx); + sname = mrb_enc_name(senc); + } + else { + sencidx = enc_arg(mrb, arg2, &sname, &senc); + } + + *sname_p = sname; + *senc_p = senc; + *dname_p = dname; + *denc_p = denc; + return dencidx; +} + +mrb_value +mrb_str_tmp_new(mrb_state *mrb, long len) +{ + return mrb_str_new(mrb, 0, len); +} + +static int +str_transcode0(mrb_state *mrb, int argc, mrb_value *argv, mrb_value *self, int ecflags, mrb_value ecopts) +{ + + mrb_value dest; + mrb_value str = *self; + mrb_value arg1, arg2; + long blen, slen; + unsigned char *buf, *bp, *sp; + const unsigned char *fromp; + mrb_encoding *senc, *denc; + const char *sname, *dname; + int dencidx; + + if (argc <0 || argc > 2) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 0..2)", argc); + } + + if (argc == 0) { + arg1 = mrb_enc_default_internal(mrb); + if (mrb_nil_p(arg1)) { + if (!ecflags) return -1; + arg1 = mrb_obj_encoding(mrb, str); + } + ecflags |= ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE; + } + else { + arg1 = argv[0]; + } + arg2 = argc<=1 ? mrb_nil_value() : argv[1]; + dencidx = str_transcode_enc_args(mrb, str, &arg1, &arg2, &sname, &senc, &dname, &denc); + + if ((ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR| + ECONV_CRLF_NEWLINE_DECORATOR| + ECONV_CR_NEWLINE_DECORATOR| + ECONV_XML_TEXT_DECORATOR| + ECONV_XML_ATTR_CONTENT_DECORATOR| + ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) { + if (senc && senc == denc) { + return mrb_nil_p(arg2) ? -1 : dencidx; + } + if (senc && denc && mrb_enc_asciicompat(mrb, senc) && mrb_enc_asciicompat(mrb, denc)) { + if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) { + return dencidx; + } + } + if (encoding_equal(sname, dname)) { + return mrb_nil_p(arg2) ? -1 : dencidx; + } + } + else { + if (encoding_equal(sname, dname)) { + sname = ""; + dname = ""; + } + } + + fromp = sp = (unsigned char *)RSTRING_PTR(str); + slen = RSTRING_LEN(str); + blen = slen + 30; /* len + margin */ + dest = mrb_str_tmp_new(mrb, blen); + bp = (unsigned char *)RSTRING_PTR(dest); + + transcode_loop(mrb, &fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts); + if (fromp != sp+slen) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp); + } + buf = (unsigned char *)RSTRING_PTR(dest); + *bp = '\0'; + mrb_str_set_len(mrb, dest, bp - buf); + + /* set encoding */ + if (!denc) { + dencidx = mrb_define_dummy_encoding(mrb, dname); + } + *self = dest; + + return dencidx; +} + +static int +str_transcode(mrb_state *mrb, int argc, mrb_value *argv, mrb_value *self) +{ + mrb_value opt; + int ecflags = 0; + mrb_value ecopts = mrb_nil_value(); + + if (0 < argc) { + opt = mrb_check_convert_type(mrb, argv[argc-1], MRB_TT_HASH, "Hash", "to_hash"); + if (!mrb_nil_p(opt)) { + argc--; + ecflags = mrb_econv_prepare_opts(mrb, opt, &ecopts); + } + } + return str_transcode0(mrb, argc, argv, self, ecflags, ecopts); +} + +static inline mrb_value +str_encode_associate(mrb_state *mrb, mrb_value str, int encidx) +{ + int cr = 0; + + mrb_enc_associate_index(mrb, str, encidx); + + /* transcoded string never be broken. */ + if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, encidx))) { + mrb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr); + } + else { + cr = ENC_CODERANGE_VALID; + } + ENC_CODERANGE_SET(str, cr); + return str; +} + +/* + * call-seq: + * str.encode!(encoding [, options] ) -> str + * str.encode!(dst_encoding, src_encoding [, options] ) -> str + * + * The first form transcodes the contents of str from + * str.encoding to +encoding+. + * The second form transcodes the contents of str from + * src_encoding to dst_encoding. + * The options Hash gives details for conversion. See String#encode + * for details. + * Returns the string even if no changes were made. + */ + +static mrb_value +str_encode_bang(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value str) +{ + mrb_value argv[16]; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value newstr; + int encidx; + + //if (OBJ_FROZEN(str)) { /* in future, may use str_frozen_check from string.c, but that's currently static */ + // mrb_raise(mrb, mrb->eRuntimeError_class, "string frozen"); + //} + + newstr = str; + encidx = str_transcode(mrb, argc, argv, &newstr); + + if (encidx < 0) return str; + mrb_str_shared_replace(mrb, str, newstr); + return str_encode_associate(mrb, str, encidx); +} + +/* + * call-seq: + * str.encode(encoding [, options] ) -> str + * str.encode(dst_encoding, src_encoding [, options] ) -> str + * str.encode([options]) -> str + * + * The first form returns a copy of str transcoded + * to encoding +encoding+. + * The second form returns a copy of str transcoded + * from src_encoding to dst_encoding. + * The last form returns a copy of str transcoded to + * Encoding.default_internal. + * By default, the first and second form raise + * Encoding::UndefinedConversionError for characters that are + * undefined in the destination encoding, and + * Encoding::InvalidByteSequenceError for invalid byte sequences + * in the source encoding. The last form by default does not raise + * exceptions but uses replacement strings. + * The options Hash gives details for conversion. + * + * === options + * The hash options can have the following keys: + * :invalid :: + * If the value is :replace, #encode replaces + * invalid byte sequences in str with the replacement character. + * The default is to raise the exception + * :undef :: + * If the value is :replace, #encode replaces + * characters which are undefined in the destination encoding with + * the replacement character. + * :replace :: + * Sets the replacement string to the value. The default replacement + * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise. + * :fallback :: + * Sets the replacement string by the hash for undefined character. + * Its key is a such undefined character encoded in source encoding + * of current transcoder. Its value can be any encoding until it + * can be converted into the destination encoding of the transcoder. + * :xml :: + * The value must be :text or :attr. + * If the value is :text #encode replaces + * undefined characters with their (upper-case hexadecimal) numeric + * character references. '&', '<', and '>' are converted to "&", + * "<", and ">", respectively. + * If the value is :attr, #encode also quotes + * the replacement result (using '"'), and replaces '"' with """. + * :cr_newline :: + * Replaces LF ("\n") with CR ("\r") if value is true. + * :crlf_newline :: + * Replaces LF ("\n") with CRLF ("\r\n") if value is true. + * :universal_newline :: + * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true. + */ + +static mrb_value +str_encode(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value str) +{ + mrb_value argv[16]; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value newstr = str; + int encidx = str_transcode(mrb, argc, argv, &newstr); + + if (encidx < 0) return mrb_str_dup(mrb, str); + if (mrb_obj_equal(mrb, newstr, str)) { + newstr = mrb_str_dup(mrb, str); + } + else { + RBASIC(newstr)->c = mrb_obj_class(mrb, str); + } + return str_encode_associate(mrb, newstr, encidx); +} + +mrb_value +mrb_str_encode(mrb_state *mrb, mrb_value str, mrb_value to, int ecflags, mrb_value ecopts) +{ + int argc = 1; + mrb_value *argv = &to; + mrb_value newstr = str; + int encidx = str_transcode0(mrb, argc, argv, &newstr, ecflags, ecopts); + + if (encidx < 0) return mrb_str_dup(mrb, str); + if (mrb_obj_equal(mrb, newstr, str)) { + newstr = mrb_str_dup(mrb, str); + } + else { + RBASIC(newstr)->c = mrb_obj_class(mrb, str); + } + return str_encode_associate(mrb, newstr, encidx); +} + +static void +econv_free(mrb_state *mrb, void *ptr) +{ + mrb_econv_t *ec = ptr; + mrb_econv_close(ec); +} + +static const struct mrb_data_type econv_data_type = { + "econv", econv_free, +}; + +static mrb_encoding * +make_dummy_encoding(mrb_state *mrb, const char *name) +{ + mrb_encoding *enc; + int idx; + idx = mrb_define_dummy_encoding(mrb, name); + enc = mrb_enc_from_index(mrb, idx); + return enc; +} + +static mrb_encoding * +make_encoding(mrb_state *mrb, const char *name) +{ + mrb_encoding *enc; + enc = mrb_enc_find(mrb, name); + if (!enc) + enc = make_dummy_encoding(mrb, name); + return enc; +} + +static mrb_value +make_encobj(mrb_state *mrb, const char *name) +{ + return mrb_enc_from_encoding(mrb, make_encoding(mrb, name)); +} + +/* + * call-seq: + * Encoding::Converter.asciicompat_encoding(string) -> encoding or nil + * Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil + * + * Returns the corresponding ASCII compatible encoding. + * + * Returns nil if the argument is an ASCII compatible encoding. + * + * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which + * can represents exactly the same characters as the given ASCII incompatible encoding. + * So, no conversion undefined error occurs when converting between the two encodings. + * + * Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> # + * Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> # + * Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil + * + */ +static mrb_value +econv_s_asciicompat_encoding(mrb_state *mrb, mrb_value klass) +{ + mrb_value arg; + const char *arg_name, *result_name; + mrb_encoding *arg_enc, *result_enc; + + mrb_get_args(mrb, "o", &arg); + enc_arg(mrb, &arg, &arg_name, &arg_enc); + + result_name = mrb_econv_asciicompat_encoding(arg_name); + + if (result_name == NULL) + return mrb_nil_value(); + + result_enc = make_encoding(mrb, result_name); + + return mrb_enc_from_encoding(mrb, result_enc); +} + +static void +econv_args(mrb_state *mrb, + int argc, mrb_value *argv, + mrb_value *snamev_p, mrb_value *dnamev_p, + const char **sname_p, const char **dname_p, + mrb_encoding **senc_p, mrb_encoding **denc_p, + int *ecflags_p, + mrb_value *ecopts_p) +{ + mrb_value opt, opthash, flags_v, ecopts; + int sidx, didx; + const char *sname, *dname; + mrb_encoding *senc, *denc; + int ecflags; + + //mrb_scan_args(argc, argv, "21", snamev_p, dnamev_p, &opt); + *snamev_p = argv[0]; + *dnamev_p = argv[1]; + opt = argv[2]; + + if (argc < 3) {//mrb_nil_p(opt)) { + ecflags = 0; + ecopts = mrb_nil_value(); + } + else if (!mrb_nil_p(flags_v = mrb_check_to_integer(mrb, opt, "to_int"))) { + ecflags = mrb_fixnum(flags_v); + ecopts = mrb_nil_value(); + } + else { + opthash = mrb_convert_type(mrb, opt, MRB_TT_HASH, "Hash", "to_hash"); + ecflags = mrb_econv_prepare_opts(mrb, opthash, &ecopts); + } + + senc = NULL; + sidx = mrb_to_encoding_index(mrb, *snamev_p); + if (0 <= sidx) { + senc = mrb_enc_from_index(mrb, sidx); + } + else { + //StringValue(*snamev_p); + mrb_string_value(mrb, snamev_p); + } + + denc = NULL; + didx = mrb_to_encoding_index(mrb, *dnamev_p); + if (0 <= didx) { + denc = mrb_enc_from_index(mrb, didx); + } + else { + //StringValue(*dnamev_p); + mrb_string_value(mrb, dnamev_p); + } + + //sname = senc ? mrb_enc_name(senc) : StringValueCStr(*snamev_p); + sname = senc ? mrb_enc_name(senc) : mrb_string_value_cstr(mrb, snamev_p); + //dname = denc ? mrb_enc_name(denc) : StringValueCStr(*dnamev_p); + dname = denc ? mrb_enc_name(denc) : mrb_string_value_cstr(mrb, dnamev_p); + + *sname_p = sname; + *dname_p = dname; + *senc_p = senc; + *denc_p = denc; + *ecflags_p = ecflags; + *ecopts_p = ecopts; +} + +static int +decorate_convpath(mrb_state *mrb, mrb_value convpath, int ecflags) +{ + int num_decorators; + const char *decorators[MAX_ECFLAGS_DECORATORS]; + int i; + int n, len; + + num_decorators = decorator_names(ecflags, decorators); + if (num_decorators == -1) + return -1; + + len = n = RARRAY_LEN(convpath);//RARRAY_LENINT(convpath); + if (n != 0) { + mrb_value pair = RARRAY_PTR(convpath)[n-1]; + if (TYPE(pair) == MRB_TT_ARRAY) { + const char *sname = mrb_enc_name(mrb_to_encoding(mrb, RARRAY_PTR(pair)[0])); + const char *dname = mrb_enc_name(mrb_to_encoding(mrb, RARRAY_PTR(pair)[1])); + transcoder_entry_t *entry = get_transcoder_entry(sname, dname); + const mrb_transcoder *tr = load_transcoder_entry(mrb, entry); + if (!tr) + return -1; + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) && + tr->asciicompat_type == asciicompat_encoder) { + n--; + mrb_ary_set(mrb, convpath, len + num_decorators - 1, pair); + } + } + else { + mrb_ary_set(mrb, convpath, len + num_decorators - 1, pair); + } + } + + for (i = 0; i < num_decorators; i++) + mrb_ary_set(mrb, convpath, n + i, mrb_str_new_cstr(mrb, decorators[i])); + + return 0; +} + +static void +search_convpath_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg) +{ + mrb_value *ary_p = arg; + mrb_value v; + + if (mrb_obj_equal(mrb, *ary_p, mrb_nil_value())) { + *ary_p = mrb_ary_new(mrb); + } + + if (DECORATOR_P(sname, dname)) { + v = mrb_str_new_cstr(mrb, dname); + } + else { + v = mrb_assoc_new(mrb, make_encobj(mrb, sname), make_encobj(mrb, dname)); + } + mrb_ary_set(mrb, *ary_p, depth, v); +} + +/* + * call-seq: + * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary + * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary + * + * Returns a conversion path. + * + * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP") + * #=> [[#, #], + * # [#, #]] + * + * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true) + * #=> [[#, #], + * # [#, #], + * # "universal_newline"] + * + * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true) + * #=> [[#, #], + * # "universal_newline", + * # [#, #]] + */ +static mrb_value +econv_s_search_convpath(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value klass) +{ + mrb_value snamev, dnamev; + const char *sname, *dname; + mrb_encoding *senc, *denc; + int ecflags; + mrb_value ecopts; + mrb_value convpath; + + mrb_value argv[16]; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + econv_args(mrb, argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts); + convpath = mrb_nil_value(); + transcode_search_path(mrb, sname, dname, search_convpath_i, &convpath); + + if (mrb_nil_p(convpath)) + mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags)); + + if (decorate_convpath(mrb, convpath, ecflags) == -1) + mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags)); + + return convpath; +} + +/* + * Check the existence of a conversion path. + * Returns the number of converters in the conversion path. + * result: >=0:success -1:failure + */ +int +mrb_econv_has_convpath_p(mrb_state *mrb, const char* from_encoding, const char* to_encoding) +{ + mrb_value convpath = mrb_nil_value(); + transcode_search_path(mrb, from_encoding, to_encoding, search_convpath_i, + &convpath); + return RTEST(convpath); +} + +struct mrb_econv_init_by_convpath_t { + mrb_econv_t *ec; + int index; + int ret; +}; + +static void +mrb_econv_init_by_convpath_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg) +{ + struct mrb_econv_init_by_convpath_t *a = (struct mrb_econv_init_by_convpath_t *)arg; + int ret; + + if (a->ret == -1) + return; + + ret = mrb_econv_add_converter(mrb, a->ec, sname, dname, a->index); + + a->ret = ret; + return; +} + +static mrb_econv_t * +mrb_econv_init_by_convpath(mrb_state *mrb, mrb_value self, mrb_value convpath, + const char **sname_p, const char **dname_p, + mrb_encoding **senc_p, mrb_encoding**denc_p) +{ + mrb_econv_t *ec; + long i; + int ret, first=1; + mrb_value elt; + mrb_encoding *senc = 0, *denc = 0; + const char *sname, *dname; + + ec = mrb_econv_alloc(RARRAY_LEN/*INT*/(convpath)); + DATA_PTR(self) = ec; + + for (i = 0; i < RARRAY_LEN(convpath); i++) { + mrb_value snamev, dnamev; + mrb_value pair; + elt = mrb_ary_ref(mrb, convpath, i); + if (!mrb_nil_p(pair = mrb_check_array_type(mrb, elt))) { + if (RARRAY_LEN(pair) != 2) + mrb_raise(mrb, E_ARGUMENT_ERROR, "not a 2-element array in convpath"); + snamev = mrb_ary_ref(mrb, pair, 0); + enc_arg(mrb, &snamev, &sname, &senc); + dnamev = mrb_ary_ref(mrb, pair, 1); + enc_arg(mrb, &dnamev, &dname, &denc); + } + else { + sname = ""; + //dname = StringValueCStr(elt); + dname = mrb_string_value_cstr(mrb, &elt); + } + if (DECORATOR_P(sname, dname)) { + ret = mrb_econv_add_converter(mrb, ec, sname, dname, ec->num_trans); + if (ret == -1) + mrb_raise(mrb, E_ARGUMENT_ERROR, "decoration failed: %s", dname); + } + else { + int j = ec->num_trans; + struct mrb_econv_init_by_convpath_t arg; + arg.ec = ec; + arg.index = ec->num_trans; + arg.ret = 0; + ret = transcode_search_path(mrb, sname, dname, mrb_econv_init_by_convpath_i, &arg); + if (ret == -1 || arg.ret == -1) + mrb_raise(mrb, E_ARGUMENT_ERROR, "adding conversion failed: %s to %s", sname, dname); + if (first) { + first = 0; + *senc_p = senc; + *sname_p = ec->elems[j].tc->transcoder->src_encoding; + } + *denc_p = denc; + *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding; + } + } + + if (first) { + *senc_p = NULL; + *denc_p = NULL; + *sname_p = ""; + *dname_p = ""; + } + + ec->source_encoding_name = *sname_p; + ec->destination_encoding_name = *dname_p; + + return ec; +} + +/* + * call-seq: + * Encoding::Converter.new(source_encoding, destination_encoding) + * Encoding::Converter.new(source_encoding, destination_encoding, opt) + * Encoding::Converter.new(convpath) + * + * possible options elements: + * hash form: + * :invalid => nil # raise error on invalid byte sequence (default) + * :invalid => :replace # replace invalid byte sequence + * :undef => nil # raise error on undefined conversion (default) + * :undef => :replace # replace undefined conversion + * :replace => string # replacement string ("?" or "\uFFFD" if not specified) + * :universal_newline => true # decorator for converting CRLF and CR to LF + * :crlf_newline => true # decorator for converting LF to CRLF + * :cr_newline => true # decorator for converting LF to CR + * :xml => :text # escape as XML CharData. + * :xml => :attr # escape as XML AttValue + * integer form: + * Encoding::Converter::INVALID_REPLACE + * Encoding::Converter::UNDEF_REPLACE + * Encoding::Converter::UNDEF_HEX_CHARREF + * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR + * Encoding::Converter::CRLF_NEWLINE_DECORATOR + * Encoding::Converter::CR_NEWLINE_DECORATOR + * Encoding::Converter::XML_TEXT_DECORATOR + * Encoding::Converter::XML_ATTR_CONTENT_DECORATOR + * Encoding::Converter::XML_ATTR_QUOTE_DECORATOR + * + * Encoding::Converter.new creates an instance of Encoding::Converter. + * + * Source_encoding and destination_encoding should be a string or + * Encoding object. + * + * opt should be nil, a hash or an integer. + * + * convpath should be an array. + * convpath may contain + * - two-element arrays which contain encodings or encoding names, or + * - strings representing decorator names. + * + * Encoding::Converter.new optionally takes an option. + * The option should be a hash or an integer. + * The option hash can contain :invalid => nil, etc. + * The option integer should be logical-or of constants such as + * Encoding::Converter::INVALID_REPLACE, etc. + * + * [:invalid => nil] + * Raise error on invalid byte sequence. This is a default behavior. + * [:invalid => :replace] + * Replace invalid byte sequence by replacement string. + * [:undef => nil] + * Raise an error if a character in source_encoding is not defined in destination_encoding. + * This is a default behavior. + * [:undef => :replace] + * Replace undefined character in destination_encoding with replacement string. + * [:replace => string] + * Specify the replacement string. + * If not specified, "\uFFFD" is used for Unicode encodings and "?" for others. + * [:universal_newline => true] + * Convert CRLF and CR to LF. + * [:crlf_newline => true] + * Convert LF to CRLF. + * [:cr_newline => true] + * Convert LF to CR. + * [:xml => :text] + * Escape as XML CharData. + * This form can be used as a HTML 4.0 #PCDATA. + * - '&' -> '&' + * - '<' -> '<' + * - '>' -> '>' + * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH; + * [:xml => :attr] + * Escape as XML AttValue. + * The converted result is quoted as "...". + * This form can be used as a HTML 4.0 attribute value. + * - '&' -> '&' + * - '<' -> '<' + * - '>' -> '>' + * - '"' -> '"' + * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH; + * + * Examples: + * # UTF-16BE to UTF-8 + * ec = Encoding::Converter.new("UTF-16BE", "UTF-8") + * + * # Usually, decorators such as newline conversion are inserted last. + * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true) + * p ec.convpath #=> [[#, #], + * # "universal_newline"] + * + * # But, if the last encoding is ASCII incompatible, + * # decorators are inserted before the last conversion. + * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true) + * p ec.convpath #=> ["crlf_newline", + * # [#, #]] + * + * # Conversion path can be specified directly. + * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]]) + * p ec.convpath #=> ["universal_newline", + * # [#, #], + * # [#, #]] + */ +static mrb_value +econv_init(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) +{ + mrb_value ecopts; + mrb_value snamev, dnamev; + const char *sname, *dname; + mrb_encoding *senc, *denc; + mrb_econv_t *ec; + int ecflags; + mrb_value convpath; + mrb_value argv[16]; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + if (mrb_check_datatype(mrb, self, &econv_data_type)) { + mrb_raise(mrb, E_TYPE_ERROR, "already initialized"); + } + + if (argc == 1 && !mrb_nil_p(convpath = mrb_check_array_type(mrb, argv[0]))) { + ec = mrb_econv_init_by_convpath(mrb, self, convpath, &sname, &dname, &senc, &denc); + ecflags = 0; + ecopts = mrb_nil_value(); + } + else { + econv_args(mrb, argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts); + ec = mrb_econv_open_opts(mrb, sname, dname, ecflags, ecopts); + } + + if (!ec) { + mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags)); + } + + if (!DECORATOR_P(sname, dname)) { + if (!senc) + senc = make_dummy_encoding(mrb, sname); + if (!denc) + denc = make_dummy_encoding(mrb, dname); + } + + ec->source_encoding = senc; + ec->destination_encoding = denc; + + DATA_PTR(self) = ec; + + return self; +} + +/* + * call-seq: + * ec.inspect -> string + * + * Returns a printable version of ec + * + * ec = Encoding::Converter.new("iso-8859-1", "utf-8") + * puts ec.inspect #=> # + * + */ +static mrb_value +econv_inspect(mrb_state *mrb, mrb_value self) +{ + const char *cname = mrb_obj_classname(mrb, self); + mrb_econv_t *ec; + + Data_Get_Struct(mrb, self, &econv_data_type, ec); + if (!ec) + return mrb_sprintf(mrb, "#<%s: uninitialized>", cname); + else { + const char *sname = ec->source_encoding_name; + const char *dname = ec->destination_encoding_name; + mrb_value str; + str = mrb_sprintf(mrb, "#<%s: ", cname); + econv_description(mrb, sname, dname, ec->flags, str); + mrb_str_cat2(mrb, str, ">"); + return str; + } +} + +static mrb_econv_t * +check_econv(mrb_state *mrb, mrb_value self) +{ + mrb_econv_t *ec; + + Data_Get_Struct(mrb, self, &econv_data_type, ec); + if (!ec) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized encoding converter"); + } + return ec; +} + +/* + * call-seq: + * ec.source_encoding -> encoding + * + * Returns the source encoding as an Encoding object. + */ +static mrb_value +econv_source_encoding(mrb_state *mrb, mrb_value self) +{ + mrb_econv_t *ec = check_econv(mrb, self); + if (!ec->source_encoding) + return mrb_nil_value(); + return mrb_enc_from_encoding(mrb, ec->source_encoding); +} + +/* + * call-seq: + * ec.destination_encoding -> encoding + * + * Returns the destination encoding as an Encoding object. + */ +static mrb_value +econv_destination_encoding(mrb_state *mrb, mrb_value self) +{ + mrb_econv_t *ec = check_econv(mrb, self); + if (!ec->destination_encoding) + return mrb_nil_value(); + return mrb_enc_from_encoding(mrb, ec->destination_encoding); +} + +/* + * call-seq: + * ec.convpath -> ary + * + * Returns the conversion path of ec. + * + * The result is an array of conversions. + * + * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true) + * p ec.convpath + * #=> [[#, #], + * # [#, #], + * # "crlf_newline"] + * + * Each element of the array is a pair of encodings or a string. + * A pair means an encoding conversion. + * A string means a decorator. + * + * In the above example, [#, #] means + * a converter from ISO-8859-1 to UTF-8. + * "crlf_newline" means newline converter from LF to CRLF. + */ +static mrb_value +econv_convpath(mrb_state *mrb, mrb_value self) +{ + mrb_econv_t *ec = check_econv(mrb, self); + mrb_value result; + int i; + + result = mrb_ary_new(mrb); + for (i = 0; i < ec->num_trans; i++) { + const mrb_transcoder *tr = ec->elems[i].tc->transcoder; + mrb_value v; + if (DECORATOR_P(tr->src_encoding, tr->dst_encoding)) + v = mrb_str_new_cstr(mrb, tr->dst_encoding); + else + v = mrb_assoc_new(mrb, make_encobj(mrb, tr->src_encoding), make_encobj(mrb, tr->dst_encoding)); + mrb_ary_push(mrb, result, v); + } + return result; +} + +static mrb_value +econv_result_to_symbol(mrb_econv_result_t res) +{ + switch (res) { + case econv_invalid_byte_sequence: return sym_invalid_byte_sequence; + case econv_incomplete_input: return sym_incomplete_input; + case econv_undefined_conversion: return sym_undefined_conversion; + case econv_destination_buffer_full: return sym_destination_buffer_full; + case econv_source_buffer_empty: return sym_source_buffer_empty; + case econv_finished: return sym_finished; + case econv_after_output: return sym_after_output; + default: return mrb_fixnum_value(res); /* should not be reached */ + } +} + +mrb_value econv_primitive_cnvproc(mrb_state *mrb, int argc, mrb_value *argv, mrb_value self) +{ + mrb_value input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v; + mrb_econv_t *ec = check_econv(mrb, self); + mrb_econv_result_t res; + const unsigned char *ip, *is; + unsigned char *op, *os; + long output_byteoffset, output_bytesize; + unsigned long output_byteend; + int flags; + + //mrb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt); + input = argv[0]; + output = argv[1]; + output_byteoffset_v = argv[2]; + output_bytesize_v = argv[3]; + opt = argv[4]; + + if (argc < 3)//mrb_nil_p(output_byteoffset_v)) + output_byteoffset = 0; /* dummy */ + else + output_byteoffset = mrb_fixnum(output_byteoffset_v); + + if (argc < 4)//mrb_nil_p(output_bytesize_v)) + output_bytesize = 0; /* dummy */ + else + output_bytesize = mrb_fixnum(output_bytesize_v); + + if (argc < 5) {//mrb_nil_p(opt)) { + flags = 0; + } + else if (!mrb_nil_p(flags_v = mrb_check_to_integer(mrb, opt, "to_int"))) { + flags = mrb_fixnum(flags_v); + } + else { + mrb_value v; + opt = mrb_convert_type(mrb, opt, MRB_TT_HASH, "Hash", "to_hash"); + flags = 0; + v = mrb_hash_get(mrb, opt, sym_partial_input); + if (RTEST(v)) + flags |= ECONV_PARTIAL_INPUT; + v = mrb_hash_get(mrb, opt, sym_after_output); + if (RTEST(v)) + flags |= ECONV_AFTER_OUTPUT; + } + + //StringValue(output); + mrb_string_value(mrb, &output); + if (!mrb_nil_p(input)) + //StringValue(input); + mrb_string_value(mrb, &input); + mrb_str_modify(mrb, output); + + if (mrb_nil_p(output_bytesize_v)) { + output_bytesize = STR_BUF_MIN_SIZE; + if (!mrb_nil_p(input) && output_bytesize < RSTRING_LEN(input)) + output_bytesize = RSTRING_LEN(input); + } + + retry: + + if (mrb_nil_p(output_byteoffset_v)) + output_byteoffset = RSTRING_LEN(output); + + if (output_byteoffset < 0) + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative output_byteoffset"); + + if (RSTRING_LEN(output) < output_byteoffset) + mrb_raise(mrb, E_ARGUMENT_ERROR, "output_byteoffset too big"); + + if (output_bytesize < 0) + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative output_bytesize"); + + output_byteend = (unsigned long)output_byteoffset + + (unsigned long)output_bytesize; + + if (output_byteend < (unsigned long)output_byteoffset || + LONG_MAX < output_byteend) + mrb_raise(mrb, E_ARGUMENT_ERROR, "output_byteoffset+output_bytesize too big"); + + if (mrb_str_capacity(output) < output_byteend) + mrb_str_resize(mrb, output, output_byteend); + + if (mrb_nil_p(input)) { + ip = is = NULL; + } + else { + ip = (const unsigned char *)RSTRING_PTR(input); + is = ip + RSTRING_LEN(input); + } + + op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset; + os = op + output_bytesize; + + res = mrb_econv_convert(mrb, ec, &ip, is, &op, os, flags); + mrb_str_set_len(mrb, output, op-(unsigned char *)RSTRING_PTR(output)); + if (!mrb_nil_p(input)) + mrb_str_drop_bytes(mrb, input, ip - (unsigned char *)RSTRING_PTR(input)); + + if (mrb_nil_p(output_bytesize_v) && res == econv_destination_buffer_full) { + if (LONG_MAX / 2 < output_bytesize) + mrb_raise(mrb, E_ARGUMENT_ERROR, "too long conversion result"); + output_bytesize *= 2; + output_byteoffset_v = mrb_nil_value(); + goto retry; + } + + if (ec->destination_encoding) { + mrb_enc_associate(mrb, output, ec->destination_encoding); + } + + return econv_result_to_symbol(res); +} + +/* + * call-seq: + * ec.primitive_convert(source_buffer, destination_buffer) -> symbol + * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol + * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol + * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol + * + * possible opt elements: + * hash form: + * :partial_input => true # source buffer may be part of larger source + * :after_output => true # stop conversion after output before input + * integer form: + * Encoding::Converter::PARTIAL_INPUT + * Encoding::Converter::AFTER_OUTPUT + * + * possible results: + * :invalid_byte_sequence + * :incomplete_input + * :undefined_conversion + * :after_output + * :destination_buffer_full + * :source_buffer_empty + * :finished + * + * primitive_convert converts source_buffer into destination_buffer. + * + * source_buffer should be a string or nil. + * nil means a empty string. + * + * destination_buffer should be a string. + * + * destination_byteoffset should be an integer or nil. + * nil means the end of destination_buffer. + * If it is omitted, nil is assumed. + * + * destination_bytesize should be an integer or nil. + * nil means unlimited. + * If it is omitted, nil is assumed. + * + * opt should be nil, a hash or an integer. + * nil means no flags. + * If it is omitted, nil is assumed. + * + * primitive_convert converts the content of source_buffer from beginning + * and store the result into destination_buffer. + * + * destination_byteoffset and destination_bytesize specify the region which + * the converted result is stored. + * destination_byteoffset specifies the start position in destination_buffer in bytes. + * If destination_byteoffset is nil, + * destination_buffer.bytesize is used for appending the result. + * destination_bytesize specifies maximum number of bytes. + * If destination_bytesize is nil, + * destination size is unlimited. + * After conversion, destination_buffer is resized to + * destination_byteoffset + actually produced number of bytes. + * Also destination_buffer's encoding is set to destination_encoding. + * + * primitive_convert drops the converted part of source_buffer. + * the dropped part is converted in destination_buffer or + * buffered in Encoding::Converter object. + * + * primitive_convert stops conversion when one of following condition met. + * - invalid byte sequence found in source buffer (:invalid_byte_sequence) + * - unexpected end of source buffer (:incomplete_input) + * this occur only when :partial_input is not specified. + * - character not representable in output encoding (:undefined_conversion) + * - after some output is generated, before input is done (:after_output) + * this occur only when :after_output is specified. + * - destination buffer is full (:destination_buffer_full) + * this occur only when destination_bytesize is non-nil. + * - source buffer is empty (:source_buffer_empty) + * this occur only when :partial_input is specified. + * - conversion is finished (:finished) + * + * example: + * ec = Encoding::Converter.new("UTF-8", "UTF-16BE") + * ret = ec.primitive_convert(src="pi", dst="", nil, 100) + * p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"] + * + * ec = Encoding::Converter.new("UTF-8", "UTF-16BE") + * ret = ec.primitive_convert(src="pi", dst="", nil, 1) + * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"] + * ret = ec.primitive_convert(src, dst="", nil, 1) + * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"] + * ret = ec.primitive_convert(src, dst="", nil, 1) + * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"] + * ret = ec.primitive_convert(src, dst="", nil, 1) + * p [ret, src, dst] #=> [:finished, "", "i"] + * + */ +static mrb_value +econv_primitive_convert(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) +{ + mrb_value argv[16]; + int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + return econv_primitive_cnvproc(mrb, argc, argv, self); +} + +/* + * call-seq: + * ec.convert(source_string) -> destination_string + * + * Convert source_string and return destination_string. + * + * source_string is assumed as a part of source. + * i.e. :partial_input=>true is specified internally. + * finish method should be used last. + * + * ec = Encoding::Converter.new("utf-8", "euc-jp") + * puts ec.convert("\u3042").dump #=> "\xA4\xA2" + * puts ec.finish.dump #=> "" + * + * ec = Encoding::Converter.new("euc-jp", "utf-8") + * puts ec.convert("\xA4").dump #=> "" + * puts ec.convert("\xA2").dump #=> "\xE3\x81\x82" + * puts ec.finish.dump #=> "" + * + * ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + * puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP") + * puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP") + * puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP") + * puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP") + * + * If a conversion error occur, + * Encoding::UndefinedConversionError or + * Encoding::InvalidByteSequenceError is raised. + * Encoding::Converter#convert doesn't supply methods to recover or restart + * from these exceptions. + * When you want to handle these conversion errors, + * use Encoding::Converter#primitive_convert. + * + */ +static mrb_value +econv_convert(mrb_state *mrb, mrb_value self) +{ + mrb_value source_string; + mrb_value ret, dst; + mrb_value av[5]; + int ac; + mrb_econv_t *ec = check_econv(mrb, self); + + mrb_get_args(mrb, "o", &source_string); + //StringValue(source_string); + mrb_string_value(mrb, &source_string); + + dst = mrb_str_new(mrb, NULL, 0); + + av[0] = mrb_str_dup(mrb, source_string); + av[1] = dst; + av[2] = mrb_nil_value(); + av[3] = mrb_nil_value(); + av[4] = mrb_fixnum_value(ECONV_PARTIAL_INPUT); + ac = 5; + + ret = econv_primitive_cnvproc(mrb, ac, av, self); + + if (mrb_obj_equal(mrb, ret, sym_invalid_byte_sequence) || + mrb_obj_equal(mrb, ret, sym_undefined_conversion) || + mrb_obj_equal(mrb, ret, sym_incomplete_input)) { + mrb_value exc = make_econv_exception(mrb, ec); + mrb_exc_raise(mrb, exc); + } + + if (mrb_obj_equal(mrb, ret, sym_finished)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "converter already finished"); + } + + if (!mrb_obj_equal(mrb, ret, sym_source_buffer_empty)) { + mrb_bug("unexpected result of econv_primitive_convert"); + } + + return dst; +} + +/* + * call-seq: + * ec.finish -> string + * + * Finishes the converter. + * It returns the last part of the converted string. + * + * ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + * p ec.convert("\u3042") #=> "\e$B$\"" + * p ec.finish #=> "\e(B" + */ +static mrb_value +econv_finish(mrb_state *mrb, mrb_value self) +{ + mrb_value ret, dst; + mrb_value av[5]; + int ac; + mrb_econv_t *ec = check_econv(mrb, self); + + dst = mrb_str_new(mrb, NULL, 0); + + av[0] = mrb_nil_value(); + av[1] = dst; + av[2] = mrb_nil_value(); + av[3] = mrb_nil_value(); + av[4] = mrb_fixnum_value(0); + ac = 5; + + ret = econv_primitive_cnvproc(mrb, ac, av, self); + + if (mrb_obj_equal(mrb, ret, sym_invalid_byte_sequence) || + mrb_obj_equal(mrb, ret, sym_undefined_conversion) || + mrb_obj_equal(mrb, ret, sym_incomplete_input)) { + mrb_value exc = make_econv_exception(mrb, ec); + mrb_exc_raise(mrb, exc); + } + + if (!mrb_obj_equal(mrb, ret, sym_finished)) { + mrb_bug("unexpected result of econv_primitive_convert"); + } + + return dst; +} + +/* + * call-seq: + * ec.primitive_errinfo -> array + * + * primitive_errinfo returns important information regarding the last error + * as a 5-element array: + * + * [result, enc1, enc2, error_bytes, readagain_bytes] + * + * result is the last result of primitive_convert. + * + * Other elements are only meaningful when result is + * :invalid_byte_sequence, :incomplete_input or :undefined_conversion. + * + * enc1 and enc2 indicate a conversion step as a pair of strings. + * For example, a converter from EUC-JP to ISO-8859-1 converts + * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1. + * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"]. + * + * error_bytes and readagain_bytes indicate the byte sequences which caused the error. + * error_bytes is discarded portion. + * readagain_bytes is buffered portion which is read again on next conversion. + * + * Example: + * + * # \xff is invalid as EUC-JP. + * ec = Encoding::Converter.new("EUC-JP", "Shift_JIS") + * ec.primitive_convert(src="\xff", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", ""] + * + * # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1. + * # Since this error is occur in UTF-8 to ISO-8859-1 conversion, + * # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82). + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""] + * + * # partial character is invalid + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * ec.primitive_convert(src="\xa4", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""] + * + * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by + * # partial characters. + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT) + * p ec.primitive_errinfo + * #=> [:source_buffer_empty, nil, nil, nil, nil] + * + * # \xd8\x00\x00@ is invalid as UTF-16BE because + * # no low surrogate after high surrogate (\xd8\x00). + * # It is detected by 3rd byte (\00) which is part of next character. + * # So the high surrogate (\xd8\x00) is discarded and + * # the 3rd byte is read again later. + * # Since the byte is buffered in ec, it is dropped from src. + * ec = Encoding::Converter.new("UTF-16BE", "UTF-8") + * ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"] + * p src + * #=> "@" + * + * # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE. + * # The problem is detected by 4th byte. + * ec = Encoding::Converter.new("UTF-16LE", "UTF-8") + * ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"] + * p src + * #=> "" + * + */ +static mrb_value +econv_primitive_errinfo(mrb_state *mrb, mrb_value self) +{ + mrb_econv_t *ec = check_econv(mrb, self); + + mrb_value ary; + + ary = mrb_ary_new_capa(mrb, 5);//mrb_ary_new2(5); + + mrb_ary_set(mrb, ary, 0, econv_result_to_symbol(ec->last_error.result));//rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result)); + mrb_ary_set(mrb, ary, 4, mrb_nil_value());//rb_ary_store(ary, 4, mrb_nil_value()); + + if (ec->last_error.source_encoding) + mrb_ary_set(mrb, ary, 1, mrb_str_new2(mrb, ec->last_error.source_encoding));//rb_ary_store(ary, 1, mrb_str_new2(mrb, ec->last_error.source_encoding)); + + if (ec->last_error.destination_encoding) + mrb_ary_set(mrb, ary, 2, mrb_str_new2(mrb, ec->last_error.destination_encoding));//rb_ary_store(ary, 2, mrb_str_new2(mrb, ec->last_error.destination_encoding)); + + if (ec->last_error.error_bytes_start) { + //rb_ary_store(ary, 3, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len)); + mrb_ary_set(mrb, ary, 3, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len)); + //rb_ary_store(ary, 4, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len)); + mrb_ary_set(mrb, ary, 4, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len)); + } + + return ary; +} + +/* + * call-seq: + * ec.insert_output(string) -> nil + * + * Inserts string into the encoding converter. + * The string will be converted to the destination encoding and + * output on later conversions. + * + * If the destination encoding is stateful, + * string is converted according to the state and the state is updated. + * + * This method should be used only when a conversion error occurs. + * + * ec = Encoding::Converter.new("utf-8", "iso-8859-1") + * src = "HIRAGANA LETTER A is \u{3042}." + * dst = "" + * p ec.primitive_convert(src, dst) #=> :undefined_conversion + * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."] + * ec.insert_output("") + * p ec.primitive_convert(src, dst) #=> :finished + * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is .", ""] + * + * ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + * src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp + * dst = "" + * p ec.primitive_convert(src, dst) #=> :undefined_conversion + * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"] + * ec.insert_output "?" # state change required to output "?". + * p ec.primitive_convert(src, dst) #=> :finished + * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""] + * + */ +static mrb_value +econv_insert_output(mrb_state *mrb, mrb_value self) +{ + mrb_value string; + const char *insert_enc; + + int ret; + + mrb_get_args(mrb, "o", &string); + mrb_econv_t *ec = check_econv(mrb, self); + + //StringValue(string); + mrb_string_value(mrb, &string); + insert_enc = mrb_econv_encoding_to_insert_output(ec); + string = mrb_str_encode(mrb, string, mrb_enc_from_encoding(mrb, mrb_enc_find(mrb, insert_enc)), 0, mrb_nil_value()); + + ret = mrb_econv_insert_output(mrb, ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc); + if (ret == -1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "too big string"); + } + + return mrb_nil_value(); +} + +/* + * call-seq + * ec.putback -> string + * ec.putback(max_numbytes) -> string + * + * Put back the bytes which will be converted. + * + * The bytes are caused by invalid_byte_sequence error. + * When invalid_byte_sequence error, some bytes are discarded and + * some bytes are buffered to be converted later. + * The latter bytes can be put back. + * It can be observed by + * Encoding::InvalidByteSequenceError#readagain_bytes and + * Encoding::Converter#primitive_errinfo. + * + * ec = Encoding::Converter.new("utf-16le", "iso-8859-1") + * src = "\x00\xd8\x61\x00" + * dst = "" + * p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence + * p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"] + * p ec.putback #=> "a\x00" + * p ec.putback #=> "" # no more bytes to put back + * + */ +static mrb_value +econv_putback(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) +{ + mrb_econv_t *ec = check_econv(mrb, self); + int n; + int putbackable; + mrb_value str, max; + + mrb_value argv[16]; + int argc; + + //mrb_scan_args(argc, argv, "01", &max); + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc == 0)//mrb_nil_p(max)) + n = mrb_econv_putbackable(ec); + else { + max = argv[0]; + n = mrb_fixnum(max); + putbackable = mrb_econv_putbackable(ec); + if (putbackable < n) + n = putbackable; + } + + str = mrb_str_new(mrb, NULL, n); + mrb_econv_putback(ec, (unsigned char *)RSTRING_PTR(str), n); + + if (ec->source_encoding) { + mrb_enc_associate(mrb, str, ec->source_encoding); + } + + return str; +} + +/* + * call-seq: + * ec.last_error -> exception or nil + * + * Returns an exception object for the last conversion. + * Returns nil if the last conversion did not produce an error. + * + * "error" means that + * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for + * Encoding::Converter#convert and + * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for + * Encoding::Converter#primitive_convert. + * + * ec = Encoding::Converter.new("utf-8", "iso-8859-1") + * p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence + * p ec.last_error #=> # + * p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full + * p ec.last_error #=> nil + * + */ +static mrb_value +econv_last_error(mrb_state *mrb, mrb_value self) +{ + mrb_econv_t *ec = check_econv(mrb, self); + mrb_value exc; + + exc = make_econv_exception(mrb, ec); + if (mrb_nil_p(exc)) + return mrb_nil_value(); + return exc; +} + +/* + * call-seq: + * ec.replacement -> string + * + * Returns the replacement string. + * + * ec = Encoding::Converter.new("euc-jp", "us-ascii") + * p ec.replacement #=> "?" + * + * ec = Encoding::Converter.new("euc-jp", "utf-8") + * p ec.replacement #=> "\uFFFD" + */ +static mrb_value +econv_get_replacement(mrb_state *mrb, mrb_value self) +{ + mrb_econv_t *ec = check_econv(mrb, self); + int ret; + mrb_encoding *enc; + + ret = make_replacement(mrb, ec); + if (ret == -1) { + mrb_raise(mrb, E_UNDEFINEDCONVERSION_ERROR, "replacement character setup failed"); + } + + enc = mrb_enc_find(mrb, ec->replacement_enc); + return mrb_enc_str_new(mrb, (const char *)ec->replacement_str, (long)ec->replacement_len, enc); +} + +/* + * call-seq: + * ec.replacement = string + * + * Sets the replacement string. + * + * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace) + * ec.replacement = "" + * p ec.convert("a \u3042 b") #=> "a b" + */ +static mrb_value +econv_set_replacement(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + mrb_econv_t *ec = check_econv(mrb, self); + mrb_value string = arg; + int ret; + mrb_encoding *enc; + mrb_get_args(mrb, "o", &arg); + + //StringValue(string); + mrb_string_value(mrb, &string); + enc = mrb_enc_get(mrb, string); + + ret = mrb_econv_set_replacement(mrb, ec, + (const unsigned char *)RSTRING_PTR(string), + RSTRING_LEN(string), + mrb_enc_name(enc)); + + if (ret == -1) { + /* xxx: mrb_eInvalidByteSequenceError? */ + mrb_raise(mrb, E_UNDEFINEDCONVERSION_ERROR, "replacement character setup failed"); + } + + return arg; +} + +mrb_value +mrb_econv_make_exception(mrb_state *mrb, mrb_econv_t *ec) +{ + return make_econv_exception(mrb, ec); +} + +void +mrb_econv_check_error(mrb_state *mrb, mrb_econv_t *ec) +{ + mrb_value exc; + + exc = make_econv_exception(mrb, ec); + if (mrb_nil_p(exc)) + return; + mrb_exc_raise(mrb, exc); +} + +/* + * call-seq: + * ecerr.source_encoding_name -> string + * + * Returns the source encoding name as a string. + */ +static mrb_value +ecerr_source_encoding_name(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "source_encoding_name")); +} + +/* + * call-seq: + * ecerr.source_encoding -> encoding + * + * Returns the source encoding as an encoding object. + * + * Note that the result may not be equal to the source encoding of + * the encoding converter if the conversion has multiple steps. + * + * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP + * begin + * ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP. + * rescue Encoding::UndefinedConversionError + * p $!.source_encoding #=> # + * p $!.destination_encoding #=> # + * p $!.source_encoding_name #=> "UTF-8" + * p $!.destination_encoding_name #=> "EUC-JP" + * end + * + */ +static mrb_value +ecerr_source_encoding(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "source_encoding")); +} + +/* + * call-seq: + * ecerr.destination_encoding_name -> string + * + * Returns the destination encoding name as a string. + */ +static mrb_value +ecerr_destination_encoding_name(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "destination_encoding_name")); +} + +/* + * call-seq: + * ecerr.destination_encoding -> string + * + * Returns the destination encoding as an encoding object. + */ +static mrb_value +ecerr_destination_encoding(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "destination_encoding")); +} + +/* + * call-seq: + * ecerr.error_char -> string + * + * Returns the one-character string which cause Encoding::UndefinedConversionError. + * + * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") + * begin + * ec.convert("\xa0") + * rescue Encoding::UndefinedConversionError + * puts $!.error_char.dump #=> "\xC2\xA0" + * p $!.error_char.encoding #=> # + * end + * + */ +static mrb_value +ecerr_error_char(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "error_char")); +} + +/* + * call-seq: + * ecerr.error_bytes -> string + * + * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs. + * + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * begin + * ec.convert("abc\xA1\xFFdef") + * rescue Encoding::InvalidByteSequenceError + * p $! #=> # + * puts $!.error_bytes.dump #=> "\xA1" + * puts $!.readagain_bytes.dump #=> "\xFF" + * end + */ +static mrb_value +ecerr_error_bytes(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "error_bytes")); +} + +/* + * call-seq: + * ecerr.readagain_bytes -> string + * + * Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs. + */ +static mrb_value +ecerr_readagain_bytes(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "readagain_bytes")); +} + +/* + * call-seq: + * ecerr.incomplete_input? -> true or false + * + * Returns true if the invalid byte sequence error is caused by + * premature end of string. + * + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * + * begin + * ec.convert("abc\xA1z") + * rescue Encoding::InvalidByteSequenceError + * p $! #=> # + * p $!.incomplete_input? #=> false + * end + * + * begin + * ec.convert("abc\xA1") + * ec.finish + * rescue Encoding::InvalidByteSequenceError + * p $! #=> # + * p $!.incomplete_input? #=> true + * end + */ +static mrb_value +ecerr_incomplete_input(mrb_state *mrb, mrb_value self) +{ + return mrb_attr_get(mrb, self, mrb_intern(mrb, "incomplete_input")); +} + +extern void Init_newline(void); + +/* + * Document-class: Encoding::UndefinedConversionError + * + * Raised by Encoding and String methods when a transcoding operation + * fails. + */ + +/* + * Document-class: Encoding::InvalidByteSequenceError + * + * Raised by Encoding and String methods when the string being + * transcoded contains a byte invalid for the either the source or + * target encoding. + */ + +/* + * Document-class: Encoding::ConverterNotFoundError + * + * Raised by transcoding methods when a named encoding does not + * correspond with a known converter. + */ + +void +mrb_init_transcode(mrb_state *mrb) +{ + struct RClass *e; + struct RClass *s; + struct RClass *c; + struct RClass *u; + struct RClass *i; + struct RClass *eConverterNotFoundError_class; + struct RClass *eInvalidByteSequenceError_class; + struct RClass *eUndefinedConversionError_class; + e = mrb->encode_class; + eUndefinedConversionError_class = mrb_define_class(mrb, "UndefinedConversionError", E_ENCODING_ERROR); + eInvalidByteSequenceError_class = mrb_define_class(mrb, "InvalidByteSequenceError", E_ENCODING_ERROR); + eConverterNotFoundError_class = mrb_define_class(mrb, "ConverterNotFoundError", E_ENCODING_ERROR); + + transcoder_table = st_init_strcasetable(); + + //sym_invalid = ID2SYM(mrb_intern("invalid")); + //sym_undef = ID2SYM(mrb_intern("undef")); + //sym_replace = ID2SYM(mrb_intern("replace")); + //sym_fallback = ID2SYM(mrb_intern("fallback")); + //sym_xml = ID2SYM(mrb_intern("xml")); + //sym_text = ID2SYM(mrb_intern("text")); + //sym_attr = ID2SYM(mrb_intern("attr")); + + //sym_invalid_byte_sequence = ID2SYM(mrb_intern("invalid_byte_sequence")); + //sym_undefined_conversion = ID2SYM(mrb_intern("undefined_conversion")); + //sym_destination_buffer_full = ID2SYM(mrb_intern("destination_buffer_full")); + //sym_source_buffer_empty = ID2SYM(mrb_intern("source_buffer_empty")); + //sym_finished = ID2SYM(mrb_intern("finished")); + //sym_after_output = ID2SYM(mrb_intern("after_output")); + //sym_incomplete_input = ID2SYM(mrb_intern("incomplete_input")); + //sym_universal_newline = ID2SYM(mrb_intern("universal_newline")); + //sym_crlf_newline = ID2SYM(mrb_intern("crlf_newline")); + //sym_cr_newline = ID2SYM(mrb_intern("cr_newline")); + //sym_partial_input = ID2SYM(mrb_intern("partial_input")); + + s = mrb->string_class; + mrb_define_method(mrb, s, "encode", str_encode, ARGS_ANY()); + mrb_define_method(mrb, s, "encode!", str_encode_bang, ARGS_ANY()); + + c = mrb->converter_class = mrb_define_class(mrb, "Converter", mrb->encode_class); + //mrb_cEncodingConverter = rb_define_class_under(mrb_cEncoding, "Converter", rb_cData); + //mrb_define_alloc_func(mrb_cEncodingConverter, econv_s_allocate); + mrb_define_class_method(mrb, c, "asciicompat_encoding", econv_s_asciicompat_encoding, ARGS_REQ(1)); /* 1 */ + mrb_define_class_method(mrb, c, "search_convpath", econv_s_search_convpath, ARGS_ANY()); /* 2 */ + mrb_define_method(mrb, s, "initialize", econv_init, ARGS_ANY()); + mrb_define_method(mrb, s, "inspect", econv_inspect, ARGS_NONE()); + mrb_define_method(mrb, s, "convpath", econv_convpath, ARGS_NONE()); + mrb_define_method(mrb, s, "source_encoding", econv_source_encoding, ARGS_NONE()); + mrb_define_method(mrb, s, "destination_encoding", econv_destination_encoding, ARGS_NONE()); + mrb_define_method(mrb, s, "primitive_convert", econv_primitive_convert, ARGS_ANY()); + mrb_define_method(mrb, s, "convert", econv_convert, ARGS_REQ(1)); + mrb_define_method(mrb, s, "finish", econv_finish, ARGS_NONE()); + mrb_define_method(mrb, s, "primitive_errinfo", econv_primitive_errinfo, ARGS_NONE()); + mrb_define_method(mrb, s, "insert_output", econv_insert_output, ARGS_REQ(1)); + mrb_define_method(mrb, s, "putback", econv_putback, ARGS_ANY()); + mrb_define_method(mrb, s, "last_error", econv_last_error, ARGS_NONE()); + mrb_define_method(mrb, s, "replacement", econv_get_replacement, ARGS_NONE()); + mrb_define_method(mrb, s, "replacement=", econv_set_replacement, ARGS_REQ(1)); + + mrb_define_const(mrb, s, "INVALID_MASK", mrb_fixnum_value(ECONV_INVALID_MASK)); + mrb_define_const(mrb, s, "INVALID_REPLACE", mrb_fixnum_value(ECONV_INVALID_REPLACE)); + mrb_define_const(mrb, s, "UNDEF_MASK", mrb_fixnum_value(ECONV_UNDEF_MASK)); + mrb_define_const(mrb, s, "UNDEF_REPLACE", mrb_fixnum_value(ECONV_UNDEF_REPLACE)); + mrb_define_const(mrb, s, "UNDEF_HEX_CHARREF", mrb_fixnum_value(ECONV_UNDEF_HEX_CHARREF)); + mrb_define_const(mrb, s, "PARTIAL_INPUT", mrb_fixnum_value(ECONV_PARTIAL_INPUT)); + mrb_define_const(mrb, s, "AFTER_OUTPUT", mrb_fixnum_value(ECONV_AFTER_OUTPUT)); + mrb_define_const(mrb, s, "UNIVERSAL_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_UNIVERSAL_NEWLINE_DECORATOR)); + mrb_define_const(mrb, s, "CRLF_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_CRLF_NEWLINE_DECORATOR)); + mrb_define_const(mrb, s, "CR_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_CR_NEWLINE_DECORATOR)); + mrb_define_const(mrb, s, "XML_TEXT_DECORATOR", mrb_fixnum_value(ECONV_XML_TEXT_DECORATOR)); + mrb_define_const(mrb, s, "XML_ATTR_CONTENT_DECORATOR", mrb_fixnum_value(ECONV_XML_ATTR_CONTENT_DECORATOR)); + mrb_define_const(mrb, s, "XML_ATTR_QUOTE_DECORATOR", mrb_fixnum_value(ECONV_XML_ATTR_QUOTE_DECORATOR)); + + u = E_UNDEFINEDCONVERSION_ERROR; + mrb_define_method(mrb, u, "source_encoding_name", ecerr_source_encoding_name, ARGS_NONE()); + mrb_define_method(mrb, u, "destination_encoding_name", ecerr_destination_encoding_name, ARGS_NONE()); + mrb_define_method(mrb, u, "source_encoding", ecerr_source_encoding, ARGS_NONE()); + mrb_define_method(mrb, u, "destination_encoding", ecerr_destination_encoding, ARGS_NONE()); + mrb_define_method(mrb, u, "error_char", ecerr_error_char, ARGS_NONE()); + + i = E_INVALIDBYTESEQUENCE_ERROR; + mrb_define_method(mrb, i, "source_encoding_name", ecerr_source_encoding_name, ARGS_NONE()); + mrb_define_method(mrb, i, "destination_encoding_name", ecerr_destination_encoding_name, ARGS_NONE()); + mrb_define_method(mrb, i, "source_encoding", ecerr_source_encoding, ARGS_NONE()); + mrb_define_method(mrb, i, "destination_encoding", ecerr_destination_encoding, ARGS_NONE()); + mrb_define_method(mrb, i, "error_bytes", ecerr_error_bytes, ARGS_NONE()); + mrb_define_method(mrb, i, "readagain_bytes", ecerr_readagain_bytes, ARGS_NONE()); + mrb_define_method(mrb, i, "incomplete_input?", ecerr_incomplete_input, ARGS_NONE()); + + //Init_newline(); +} +#endif //INCLUDE_ENCODING diff --git a/src/transcode_data.h b/src/transcode_data.h new file mode 100644 index 0000000000..7ff540120c --- /dev/null +++ b/src/transcode_data.h @@ -0,0 +1,109 @@ +/********************************************************************** + + transcode_data.h - + + $Author: duerst $ + created at: Mon 10 Dec 2007 14:01:47 JST 2007 + + Copyright (C) 2007 Martin Duerst + +**********************************************************************/ + +//#include "ruby/ruby.h" + +#ifndef RUBY_TRANSCODE_DATA_H +#define RUBY_TRANSCODE_DATA_H 1 + +#define WORDINDEX_SHIFT_BITS 2 +#define WORDINDEX2INFO(widx) ((widx) << WORDINDEX_SHIFT_BITS) +#define INFO2WORDINDEX(info) ((info) >> WORDINDEX_SHIFT_BITS) +#define BYTE_LOOKUP_BASE(bl) ((bl)[0]) +#define BYTE_LOOKUP_INFO(bl) ((bl)[1]) + +#define PType (unsigned int) + +#define NOMAP (PType 0x01) /* direct map */ +#define ONEbt (0x02) /* one byte payload */ +#define TWObt (0x03) /* two bytes payload */ +#define THREEbt (0x05) /* three bytes payload */ +#define FOURbt (0x06) /* four bytes payload, UTF-8 only, macros start at getBT0 */ +#define INVALID (PType 0x07) /* invalid byte sequence */ +#define UNDEF (PType 0x09) /* legal but undefined */ +#define ZERObt (PType 0x0A) /* zero bytes of payload, i.e. remove */ +#define FUNii (PType 0x0B) /* function from info to info */ +#define FUNsi (PType 0x0D) /* function from start to info */ +#define FUNio (PType 0x0E) /* function from info to output */ +#define FUNso (PType 0x0F) /* function from start to output */ +#define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */ +#define GB4bt (PType 0x12) /* GB18030 four bytes payload */ +#define FUNsio (PType 0x13) /* function from start and info to output */ + +#define STR1_LENGTH(byte_addr) (unsigned int)(*(byte_addr) + 4) +#define STR1_BYTEINDEX(w) ((w) >> 6) +#define makeSTR1(bi) (((bi) << 6) | STR1) +#define makeSTR1LEN(len) ((len)-4) + +#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt)) +#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt)) +#define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned int)(unsigned char)(b3))<<24)|THREEbt)&0xffffffffU)) +#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU)) +#define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned int)(unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU)) +#define funsio(diff) (PType((((unsigned int)(diff))<<8)|FUNsio)) + +#define getBT1(a) ((unsigned char)((a)>> 8)) +#define getBT2(a) ((unsigned char)((a)>>16)) +#define getBT3(a) ((unsigned char)((a)>>24)) +#define getBT0(a) (((unsigned char)((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */ + +#define getGB4bt0(a) ((unsigned char)((a)>> 8)) +#define getGB4bt1(a) ((((unsigned char)((a)>>24))&0x0F)|0x30) +#define getGB4bt2(a) ((unsigned char)((a)>>16)) +#define getGB4bt3(a) ((((unsigned char)((a)>>28))&0x0F)|0x30) + +#define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii)) + +/* do we need these??? maybe not, can be done with simple tables */ +#define ONETRAIL /* legal but undefined if one more trailing UTF-8 */ +#define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */ +#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */ + +typedef enum { + asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */ + asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */ + asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */ + /* ASCII-incompatible -> ASCII-incompatible is intentionally omitted. */ +} mrb_transcoder_asciicompat_type_t; + +typedef struct mrb_transcoder mrb_transcoder; + +/* static structure, one per supported encoding pair */ +struct mrb_transcoder { + const char *src_encoding; + const char *dst_encoding; + unsigned int conv_tree_start; + const unsigned char *byte_array; + unsigned int byte_array_length; + const unsigned int *word_array; + unsigned int word_array_length; + int word_size; + int input_unit_length; + int max_input; + int max_output; + mrb_transcoder_asciicompat_type_t asciicompat_type; + size_t state_size; + int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */ + int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */ + mrb_value (*func_ii)(void*, mrb_value); /* info -> info */ + mrb_value (*func_si)(void*, const unsigned char*, size_t); /* start -> info */ + ssize_t (*func_io)(void*, mrb_value, const unsigned char*, size_t); /* info -> output */ + ssize_t (*func_so)(void*, const unsigned char*, size_t, unsigned char*, size_t); /* start -> output */ + ssize_t (*finish_func)(void*, unsigned char*, size_t); /* -> output */ + ssize_t (*resetsize_func)(void*); /* -> len */ + ssize_t (*resetstate_func)(void*, unsigned char*, size_t); /* -> output */ + ssize_t (*func_sio)(void*, const unsigned char*, size_t, mrb_value, unsigned char*, size_t); /* start -> output */ +}; + +void mrb_declare_transcoder(mrb_state *mrb, const char *enc1, const char *enc2, const char *lib); +void mrb_register_transcoder(mrb_state *mrb, const mrb_transcoder *); + +#endif /* RUBY_TRANSCODE_DATA_H */ diff --git a/src/unicode.c b/src/unicode.c new file mode 100644 index 0000000000..0753fe62a8 --- /dev/null +++ b/src/unicode.c @@ -0,0 +1,2607 @@ +/********************************************************************** + unicode.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#ifdef INCLUDE_ENCODING +#include +#include "regint.h" + +#include "encoding.h" //#define TOLOWER(c) + +#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +#include "name2ctype.h" + +typedef struct { + int n; + OnigCodePoint code[3]; +} CodePointList3; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseFold_11_Type; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseUnfold_11_Type; + +typedef struct { + int n; + OnigCodePoint code[2]; +} CodePointList2; + +typedef struct { + OnigCodePoint from[2]; + CodePointList2 to; +} CaseUnfold_12_Type; + +typedef struct { + OnigCodePoint from[3]; + CodePointList2 to; +} CaseUnfold_13_Type; + +static const CaseFold_11_Type CaseFold[] = { + { 0x0041, {1, {0x0061}}}, + { 0x0042, {1, {0x0062}}}, + { 0x0043, {1, {0x0063}}}, + { 0x0044, {1, {0x0064}}}, + { 0x0045, {1, {0x0065}}}, + { 0x0046, {1, {0x0066}}}, + { 0x0047, {1, {0x0067}}}, + { 0x0048, {1, {0x0068}}}, + { 0x004a, {1, {0x006a}}}, + { 0x004b, {1, {0x006b}}}, + { 0x004c, {1, {0x006c}}}, + { 0x004d, {1, {0x006d}}}, + { 0x004e, {1, {0x006e}}}, + { 0x004f, {1, {0x006f}}}, + { 0x0050, {1, {0x0070}}}, + { 0x0051, {1, {0x0071}}}, + { 0x0052, {1, {0x0072}}}, + { 0x0053, {1, {0x0073}}}, + { 0x0054, {1, {0x0074}}}, + { 0x0055, {1, {0x0075}}}, + { 0x0056, {1, {0x0076}}}, + { 0x0057, {1, {0x0077}}}, + { 0x0058, {1, {0x0078}}}, + { 0x0059, {1, {0x0079}}}, + { 0x005a, {1, {0x007a}}}, + { 0x00b5, {1, {0x03bc}}}, + { 0x00c0, {1, {0x00e0}}}, + { 0x00c1, {1, {0x00e1}}}, + { 0x00c2, {1, {0x00e2}}}, + { 0x00c3, {1, {0x00e3}}}, + { 0x00c4, {1, {0x00e4}}}, + { 0x00c5, {1, {0x00e5}}}, + { 0x00c6, {1, {0x00e6}}}, + { 0x00c7, {1, {0x00e7}}}, + { 0x00c8, {1, {0x00e8}}}, + { 0x00c9, {1, {0x00e9}}}, + { 0x00ca, {1, {0x00ea}}}, + { 0x00cb, {1, {0x00eb}}}, + { 0x00cc, {1, {0x00ec}}}, + { 0x00cd, {1, {0x00ed}}}, + { 0x00ce, {1, {0x00ee}}}, + { 0x00cf, {1, {0x00ef}}}, + { 0x00d0, {1, {0x00f0}}}, + { 0x00d1, {1, {0x00f1}}}, + { 0x00d2, {1, {0x00f2}}}, + { 0x00d3, {1, {0x00f3}}}, + { 0x00d4, {1, {0x00f4}}}, + { 0x00d5, {1, {0x00f5}}}, + { 0x00d6, {1, {0x00f6}}}, + { 0x00d8, {1, {0x00f8}}}, + { 0x00d9, {1, {0x00f9}}}, + { 0x00da, {1, {0x00fa}}}, + { 0x00db, {1, {0x00fb}}}, + { 0x00dc, {1, {0x00fc}}}, + { 0x00dd, {1, {0x00fd}}}, + { 0x00de, {1, {0x00fe}}}, + { 0x00df, {2, {0x0073, 0x0073}}}, + { 0x0100, {1, {0x0101}}}, + { 0x0102, {1, {0x0103}}}, + { 0x0104, {1, {0x0105}}}, + { 0x0106, {1, {0x0107}}}, + { 0x0108, {1, {0x0109}}}, + { 0x010a, {1, {0x010b}}}, + { 0x010c, {1, {0x010d}}}, + { 0x010e, {1, {0x010f}}}, + { 0x0110, {1, {0x0111}}}, + { 0x0112, {1, {0x0113}}}, + { 0x0114, {1, {0x0115}}}, + { 0x0116, {1, {0x0117}}}, + { 0x0118, {1, {0x0119}}}, + { 0x011a, {1, {0x011b}}}, + { 0x011c, {1, {0x011d}}}, + { 0x011e, {1, {0x011f}}}, + { 0x0120, {1, {0x0121}}}, + { 0x0122, {1, {0x0123}}}, + { 0x0124, {1, {0x0125}}}, + { 0x0126, {1, {0x0127}}}, + { 0x0128, {1, {0x0129}}}, + { 0x012a, {1, {0x012b}}}, + { 0x012c, {1, {0x012d}}}, + { 0x012e, {1, {0x012f}}}, + { 0x0132, {1, {0x0133}}}, + { 0x0134, {1, {0x0135}}}, + { 0x0136, {1, {0x0137}}}, + { 0x0139, {1, {0x013a}}}, + { 0x013b, {1, {0x013c}}}, + { 0x013d, {1, {0x013e}}}, + { 0x013f, {1, {0x0140}}}, + { 0x0141, {1, {0x0142}}}, + { 0x0143, {1, {0x0144}}}, + { 0x0145, {1, {0x0146}}}, + { 0x0147, {1, {0x0148}}}, + { 0x0149, {2, {0x02bc, 0x006e}}}, + { 0x014a, {1, {0x014b}}}, + { 0x014c, {1, {0x014d}}}, + { 0x014e, {1, {0x014f}}}, + { 0x0150, {1, {0x0151}}}, + { 0x0152, {1, {0x0153}}}, + { 0x0154, {1, {0x0155}}}, + { 0x0156, {1, {0x0157}}}, + { 0x0158, {1, {0x0159}}}, + { 0x015a, {1, {0x015b}}}, + { 0x015c, {1, {0x015d}}}, + { 0x015e, {1, {0x015f}}}, + { 0x0160, {1, {0x0161}}}, + { 0x0162, {1, {0x0163}}}, + { 0x0164, {1, {0x0165}}}, + { 0x0166, {1, {0x0167}}}, + { 0x0168, {1, {0x0169}}}, + { 0x016a, {1, {0x016b}}}, + { 0x016c, {1, {0x016d}}}, + { 0x016e, {1, {0x016f}}}, + { 0x0170, {1, {0x0171}}}, + { 0x0172, {1, {0x0173}}}, + { 0x0174, {1, {0x0175}}}, + { 0x0176, {1, {0x0177}}}, + { 0x0178, {1, {0x00ff}}}, + { 0x0179, {1, {0x017a}}}, + { 0x017b, {1, {0x017c}}}, + { 0x017d, {1, {0x017e}}}, + { 0x017f, {1, {0x0073}}}, + { 0x0181, {1, {0x0253}}}, + { 0x0182, {1, {0x0183}}}, + { 0x0184, {1, {0x0185}}}, + { 0x0186, {1, {0x0254}}}, + { 0x0187, {1, {0x0188}}}, + { 0x0189, {1, {0x0256}}}, + { 0x018a, {1, {0x0257}}}, + { 0x018b, {1, {0x018c}}}, + { 0x018e, {1, {0x01dd}}}, + { 0x018f, {1, {0x0259}}}, + { 0x0190, {1, {0x025b}}}, + { 0x0191, {1, {0x0192}}}, + { 0x0193, {1, {0x0260}}}, + { 0x0194, {1, {0x0263}}}, + { 0x0196, {1, {0x0269}}}, + { 0x0197, {1, {0x0268}}}, + { 0x0198, {1, {0x0199}}}, + { 0x019c, {1, {0x026f}}}, + { 0x019d, {1, {0x0272}}}, + { 0x019f, {1, {0x0275}}}, + { 0x01a0, {1, {0x01a1}}}, + { 0x01a2, {1, {0x01a3}}}, + { 0x01a4, {1, {0x01a5}}}, + { 0x01a6, {1, {0x0280}}}, + { 0x01a7, {1, {0x01a8}}}, + { 0x01a9, {1, {0x0283}}}, + { 0x01ac, {1, {0x01ad}}}, + { 0x01ae, {1, {0x0288}}}, + { 0x01af, {1, {0x01b0}}}, + { 0x01b1, {1, {0x028a}}}, + { 0x01b2, {1, {0x028b}}}, + { 0x01b3, {1, {0x01b4}}}, + { 0x01b5, {1, {0x01b6}}}, + { 0x01b7, {1, {0x0292}}}, + { 0x01b8, {1, {0x01b9}}}, + { 0x01bc, {1, {0x01bd}}}, + { 0x01c4, {1, {0x01c6}}}, + { 0x01c5, {1, {0x01c6}}}, + { 0x01c7, {1, {0x01c9}}}, + { 0x01c8, {1, {0x01c9}}}, + { 0x01ca, {1, {0x01cc}}}, + { 0x01cb, {1, {0x01cc}}}, + { 0x01cd, {1, {0x01ce}}}, + { 0x01cf, {1, {0x01d0}}}, + { 0x01d1, {1, {0x01d2}}}, + { 0x01d3, {1, {0x01d4}}}, + { 0x01d5, {1, {0x01d6}}}, + { 0x01d7, {1, {0x01d8}}}, + { 0x01d9, {1, {0x01da}}}, + { 0x01db, {1, {0x01dc}}}, + { 0x01de, {1, {0x01df}}}, + { 0x01e0, {1, {0x01e1}}}, + { 0x01e2, {1, {0x01e3}}}, + { 0x01e4, {1, {0x01e5}}}, + { 0x01e6, {1, {0x01e7}}}, + { 0x01e8, {1, {0x01e9}}}, + { 0x01ea, {1, {0x01eb}}}, + { 0x01ec, {1, {0x01ed}}}, + { 0x01ee, {1, {0x01ef}}}, + { 0x01f0, {2, {0x006a, 0x030c}}}, + { 0x01f1, {1, {0x01f3}}}, + { 0x01f2, {1, {0x01f3}}}, + { 0x01f4, {1, {0x01f5}}}, + { 0x01f6, {1, {0x0195}}}, + { 0x01f7, {1, {0x01bf}}}, + { 0x01f8, {1, {0x01f9}}}, + { 0x01fa, {1, {0x01fb}}}, + { 0x01fc, {1, {0x01fd}}}, + { 0x01fe, {1, {0x01ff}}}, + { 0x0200, {1, {0x0201}}}, + { 0x0202, {1, {0x0203}}}, + { 0x0204, {1, {0x0205}}}, + { 0x0206, {1, {0x0207}}}, + { 0x0208, {1, {0x0209}}}, + { 0x020a, {1, {0x020b}}}, + { 0x020c, {1, {0x020d}}}, + { 0x020e, {1, {0x020f}}}, + { 0x0210, {1, {0x0211}}}, + { 0x0212, {1, {0x0213}}}, + { 0x0214, {1, {0x0215}}}, + { 0x0216, {1, {0x0217}}}, + { 0x0218, {1, {0x0219}}}, + { 0x021a, {1, {0x021b}}}, + { 0x021c, {1, {0x021d}}}, + { 0x021e, {1, {0x021f}}}, + { 0x0220, {1, {0x019e}}}, + { 0x0222, {1, {0x0223}}}, + { 0x0224, {1, {0x0225}}}, + { 0x0226, {1, {0x0227}}}, + { 0x0228, {1, {0x0229}}}, + { 0x022a, {1, {0x022b}}}, + { 0x022c, {1, {0x022d}}}, + { 0x022e, {1, {0x022f}}}, + { 0x0230, {1, {0x0231}}}, + { 0x0232, {1, {0x0233}}}, + { 0x023b, {1, {0x023c}}}, + { 0x023d, {1, {0x019a}}}, + { 0x0241, {1, {0x0294}}}, + { 0x0345, {1, {0x03b9}}}, + { 0x0386, {1, {0x03ac}}}, + { 0x0388, {1, {0x03ad}}}, + { 0x0389, {1, {0x03ae}}}, + { 0x038a, {1, {0x03af}}}, + { 0x038c, {1, {0x03cc}}}, + { 0x038e, {1, {0x03cd}}}, + { 0x038f, {1, {0x03ce}}}, + { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x0391, {1, {0x03b1}}}, + { 0x0392, {1, {0x03b2}}}, + { 0x0393, {1, {0x03b3}}}, + { 0x0394, {1, {0x03b4}}}, + { 0x0395, {1, {0x03b5}}}, + { 0x0396, {1, {0x03b6}}}, + { 0x0397, {1, {0x03b7}}}, + { 0x0398, {1, {0x03b8}}}, + { 0x0399, {1, {0x03b9}}}, + { 0x039a, {1, {0x03ba}}}, + { 0x039b, {1, {0x03bb}}}, + { 0x039c, {1, {0x03bc}}}, + { 0x039d, {1, {0x03bd}}}, + { 0x039e, {1, {0x03be}}}, + { 0x039f, {1, {0x03bf}}}, + { 0x03a0, {1, {0x03c0}}}, + { 0x03a1, {1, {0x03c1}}}, + { 0x03a3, {1, {0x03c3}}}, + { 0x03a4, {1, {0x03c4}}}, + { 0x03a5, {1, {0x03c5}}}, + { 0x03a6, {1, {0x03c6}}}, + { 0x03a7, {1, {0x03c7}}}, + { 0x03a8, {1, {0x03c8}}}, + { 0x03a9, {1, {0x03c9}}}, + { 0x03aa, {1, {0x03ca}}}, + { 0x03ab, {1, {0x03cb}}}, + { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x03c2, {1, {0x03c3}}}, + { 0x03d0, {1, {0x03b2}}}, + { 0x03d1, {1, {0x03b8}}}, + { 0x03d5, {1, {0x03c6}}}, + { 0x03d6, {1, {0x03c0}}}, + { 0x03d8, {1, {0x03d9}}}, + { 0x03da, {1, {0x03db}}}, + { 0x03dc, {1, {0x03dd}}}, + { 0x03de, {1, {0x03df}}}, + { 0x03e0, {1, {0x03e1}}}, + { 0x03e2, {1, {0x03e3}}}, + { 0x03e4, {1, {0x03e5}}}, + { 0x03e6, {1, {0x03e7}}}, + { 0x03e8, {1, {0x03e9}}}, + { 0x03ea, {1, {0x03eb}}}, + { 0x03ec, {1, {0x03ed}}}, + { 0x03ee, {1, {0x03ef}}}, + { 0x03f0, {1, {0x03ba}}}, + { 0x03f1, {1, {0x03c1}}}, + { 0x03f4, {1, {0x03b8}}}, + { 0x03f5, {1, {0x03b5}}}, + { 0x03f7, {1, {0x03f8}}}, + { 0x03f9, {1, {0x03f2}}}, + { 0x03fa, {1, {0x03fb}}}, + { 0x0400, {1, {0x0450}}}, + { 0x0401, {1, {0x0451}}}, + { 0x0402, {1, {0x0452}}}, + { 0x0403, {1, {0x0453}}}, + { 0x0404, {1, {0x0454}}}, + { 0x0405, {1, {0x0455}}}, + { 0x0406, {1, {0x0456}}}, + { 0x0407, {1, {0x0457}}}, + { 0x0408, {1, {0x0458}}}, + { 0x0409, {1, {0x0459}}}, + { 0x040a, {1, {0x045a}}}, + { 0x040b, {1, {0x045b}}}, + { 0x040c, {1, {0x045c}}}, + { 0x040d, {1, {0x045d}}}, + { 0x040e, {1, {0x045e}}}, + { 0x040f, {1, {0x045f}}}, + { 0x0410, {1, {0x0430}}}, + { 0x0411, {1, {0x0431}}}, + { 0x0412, {1, {0x0432}}}, + { 0x0413, {1, {0x0433}}}, + { 0x0414, {1, {0x0434}}}, + { 0x0415, {1, {0x0435}}}, + { 0x0416, {1, {0x0436}}}, + { 0x0417, {1, {0x0437}}}, + { 0x0418, {1, {0x0438}}}, + { 0x0419, {1, {0x0439}}}, + { 0x041a, {1, {0x043a}}}, + { 0x041b, {1, {0x043b}}}, + { 0x041c, {1, {0x043c}}}, + { 0x041d, {1, {0x043d}}}, + { 0x041e, {1, {0x043e}}}, + { 0x041f, {1, {0x043f}}}, + { 0x0420, {1, {0x0440}}}, + { 0x0421, {1, {0x0441}}}, + { 0x0422, {1, {0x0442}}}, + { 0x0423, {1, {0x0443}}}, + { 0x0424, {1, {0x0444}}}, + { 0x0425, {1, {0x0445}}}, + { 0x0426, {1, {0x0446}}}, + { 0x0427, {1, {0x0447}}}, + { 0x0428, {1, {0x0448}}}, + { 0x0429, {1, {0x0449}}}, + { 0x042a, {1, {0x044a}}}, + { 0x042b, {1, {0x044b}}}, + { 0x042c, {1, {0x044c}}}, + { 0x042d, {1, {0x044d}}}, + { 0x042e, {1, {0x044e}}}, + { 0x042f, {1, {0x044f}}}, + { 0x0460, {1, {0x0461}}}, + { 0x0462, {1, {0x0463}}}, + { 0x0464, {1, {0x0465}}}, + { 0x0466, {1, {0x0467}}}, + { 0x0468, {1, {0x0469}}}, + { 0x046a, {1, {0x046b}}}, + { 0x046c, {1, {0x046d}}}, + { 0x046e, {1, {0x046f}}}, + { 0x0470, {1, {0x0471}}}, + { 0x0472, {1, {0x0473}}}, + { 0x0474, {1, {0x0475}}}, + { 0x0476, {1, {0x0477}}}, + { 0x0478, {1, {0x0479}}}, + { 0x047a, {1, {0x047b}}}, + { 0x047c, {1, {0x047d}}}, + { 0x047e, {1, {0x047f}}}, + { 0x0480, {1, {0x0481}}}, + { 0x048a, {1, {0x048b}}}, + { 0x048c, {1, {0x048d}}}, + { 0x048e, {1, {0x048f}}}, + { 0x0490, {1, {0x0491}}}, + { 0x0492, {1, {0x0493}}}, + { 0x0494, {1, {0x0495}}}, + { 0x0496, {1, {0x0497}}}, + { 0x0498, {1, {0x0499}}}, + { 0x049a, {1, {0x049b}}}, + { 0x049c, {1, {0x049d}}}, + { 0x049e, {1, {0x049f}}}, + { 0x04a0, {1, {0x04a1}}}, + { 0x04a2, {1, {0x04a3}}}, + { 0x04a4, {1, {0x04a5}}}, + { 0x04a6, {1, {0x04a7}}}, + { 0x04a8, {1, {0x04a9}}}, + { 0x04aa, {1, {0x04ab}}}, + { 0x04ac, {1, {0x04ad}}}, + { 0x04ae, {1, {0x04af}}}, + { 0x04b0, {1, {0x04b1}}}, + { 0x04b2, {1, {0x04b3}}}, + { 0x04b4, {1, {0x04b5}}}, + { 0x04b6, {1, {0x04b7}}}, + { 0x04b8, {1, {0x04b9}}}, + { 0x04ba, {1, {0x04bb}}}, + { 0x04bc, {1, {0x04bd}}}, + { 0x04be, {1, {0x04bf}}}, + { 0x04c1, {1, {0x04c2}}}, + { 0x04c3, {1, {0x04c4}}}, + { 0x04c5, {1, {0x04c6}}}, + { 0x04c7, {1, {0x04c8}}}, + { 0x04c9, {1, {0x04ca}}}, + { 0x04cb, {1, {0x04cc}}}, + { 0x04cd, {1, {0x04ce}}}, + { 0x04d0, {1, {0x04d1}}}, + { 0x04d2, {1, {0x04d3}}}, + { 0x04d4, {1, {0x04d5}}}, + { 0x04d6, {1, {0x04d7}}}, + { 0x04d8, {1, {0x04d9}}}, + { 0x04da, {1, {0x04db}}}, + { 0x04dc, {1, {0x04dd}}}, + { 0x04de, {1, {0x04df}}}, + { 0x04e0, {1, {0x04e1}}}, + { 0x04e2, {1, {0x04e3}}}, + { 0x04e4, {1, {0x04e5}}}, + { 0x04e6, {1, {0x04e7}}}, + { 0x04e8, {1, {0x04e9}}}, + { 0x04ea, {1, {0x04eb}}}, + { 0x04ec, {1, {0x04ed}}}, + { 0x04ee, {1, {0x04ef}}}, + { 0x04f0, {1, {0x04f1}}}, + { 0x04f2, {1, {0x04f3}}}, + { 0x04f4, {1, {0x04f5}}}, + { 0x04f6, {1, {0x04f7}}}, + { 0x04f8, {1, {0x04f9}}}, + { 0x0500, {1, {0x0501}}}, + { 0x0502, {1, {0x0503}}}, + { 0x0504, {1, {0x0505}}}, + { 0x0506, {1, {0x0507}}}, + { 0x0508, {1, {0x0509}}}, + { 0x050a, {1, {0x050b}}}, + { 0x050c, {1, {0x050d}}}, + { 0x050e, {1, {0x050f}}}, + { 0x0531, {1, {0x0561}}}, + { 0x0532, {1, {0x0562}}}, + { 0x0533, {1, {0x0563}}}, + { 0x0534, {1, {0x0564}}}, + { 0x0535, {1, {0x0565}}}, + { 0x0536, {1, {0x0566}}}, + { 0x0537, {1, {0x0567}}}, + { 0x0538, {1, {0x0568}}}, + { 0x0539, {1, {0x0569}}}, + { 0x053a, {1, {0x056a}}}, + { 0x053b, {1, {0x056b}}}, + { 0x053c, {1, {0x056c}}}, + { 0x053d, {1, {0x056d}}}, + { 0x053e, {1, {0x056e}}}, + { 0x053f, {1, {0x056f}}}, + { 0x0540, {1, {0x0570}}}, + { 0x0541, {1, {0x0571}}}, + { 0x0542, {1, {0x0572}}}, + { 0x0543, {1, {0x0573}}}, + { 0x0544, {1, {0x0574}}}, + { 0x0545, {1, {0x0575}}}, + { 0x0546, {1, {0x0576}}}, + { 0x0547, {1, {0x0577}}}, + { 0x0548, {1, {0x0578}}}, + { 0x0549, {1, {0x0579}}}, + { 0x054a, {1, {0x057a}}}, + { 0x054b, {1, {0x057b}}}, + { 0x054c, {1, {0x057c}}}, + { 0x054d, {1, {0x057d}}}, + { 0x054e, {1, {0x057e}}}, + { 0x054f, {1, {0x057f}}}, + { 0x0550, {1, {0x0580}}}, + { 0x0551, {1, {0x0581}}}, + { 0x0552, {1, {0x0582}}}, + { 0x0553, {1, {0x0583}}}, + { 0x0554, {1, {0x0584}}}, + { 0x0555, {1, {0x0585}}}, + { 0x0556, {1, {0x0586}}}, + { 0x0587, {2, {0x0565, 0x0582}}}, + { 0x10a0, {1, {0x2d00}}}, + { 0x10a1, {1, {0x2d01}}}, + { 0x10a2, {1, {0x2d02}}}, + { 0x10a3, {1, {0x2d03}}}, + { 0x10a4, {1, {0x2d04}}}, + { 0x10a5, {1, {0x2d05}}}, + { 0x10a6, {1, {0x2d06}}}, + { 0x10a7, {1, {0x2d07}}}, + { 0x10a8, {1, {0x2d08}}}, + { 0x10a9, {1, {0x2d09}}}, + { 0x10aa, {1, {0x2d0a}}}, + { 0x10ab, {1, {0x2d0b}}}, + { 0x10ac, {1, {0x2d0c}}}, + { 0x10ad, {1, {0x2d0d}}}, + { 0x10ae, {1, {0x2d0e}}}, + { 0x10af, {1, {0x2d0f}}}, + { 0x10b0, {1, {0x2d10}}}, + { 0x10b1, {1, {0x2d11}}}, + { 0x10b2, {1, {0x2d12}}}, + { 0x10b3, {1, {0x2d13}}}, + { 0x10b4, {1, {0x2d14}}}, + { 0x10b5, {1, {0x2d15}}}, + { 0x10b6, {1, {0x2d16}}}, + { 0x10b7, {1, {0x2d17}}}, + { 0x10b8, {1, {0x2d18}}}, + { 0x10b9, {1, {0x2d19}}}, + { 0x10ba, {1, {0x2d1a}}}, + { 0x10bb, {1, {0x2d1b}}}, + { 0x10bc, {1, {0x2d1c}}}, + { 0x10bd, {1, {0x2d1d}}}, + { 0x10be, {1, {0x2d1e}}}, + { 0x10bf, {1, {0x2d1f}}}, + { 0x10c0, {1, {0x2d20}}}, + { 0x10c1, {1, {0x2d21}}}, + { 0x10c2, {1, {0x2d22}}}, + { 0x10c3, {1, {0x2d23}}}, + { 0x10c4, {1, {0x2d24}}}, + { 0x10c5, {1, {0x2d25}}}, + { 0x1e00, {1, {0x1e01}}}, + { 0x1e02, {1, {0x1e03}}}, + { 0x1e04, {1, {0x1e05}}}, + { 0x1e06, {1, {0x1e07}}}, + { 0x1e08, {1, {0x1e09}}}, + { 0x1e0a, {1, {0x1e0b}}}, + { 0x1e0c, {1, {0x1e0d}}}, + { 0x1e0e, {1, {0x1e0f}}}, + { 0x1e10, {1, {0x1e11}}}, + { 0x1e12, {1, {0x1e13}}}, + { 0x1e14, {1, {0x1e15}}}, + { 0x1e16, {1, {0x1e17}}}, + { 0x1e18, {1, {0x1e19}}}, + { 0x1e1a, {1, {0x1e1b}}}, + { 0x1e1c, {1, {0x1e1d}}}, + { 0x1e1e, {1, {0x1e1f}}}, + { 0x1e20, {1, {0x1e21}}}, + { 0x1e22, {1, {0x1e23}}}, + { 0x1e24, {1, {0x1e25}}}, + { 0x1e26, {1, {0x1e27}}}, + { 0x1e28, {1, {0x1e29}}}, + { 0x1e2a, {1, {0x1e2b}}}, + { 0x1e2c, {1, {0x1e2d}}}, + { 0x1e2e, {1, {0x1e2f}}}, + { 0x1e30, {1, {0x1e31}}}, + { 0x1e32, {1, {0x1e33}}}, + { 0x1e34, {1, {0x1e35}}}, + { 0x1e36, {1, {0x1e37}}}, + { 0x1e38, {1, {0x1e39}}}, + { 0x1e3a, {1, {0x1e3b}}}, + { 0x1e3c, {1, {0x1e3d}}}, + { 0x1e3e, {1, {0x1e3f}}}, + { 0x1e40, {1, {0x1e41}}}, + { 0x1e42, {1, {0x1e43}}}, + { 0x1e44, {1, {0x1e45}}}, + { 0x1e46, {1, {0x1e47}}}, + { 0x1e48, {1, {0x1e49}}}, + { 0x1e4a, {1, {0x1e4b}}}, + { 0x1e4c, {1, {0x1e4d}}}, + { 0x1e4e, {1, {0x1e4f}}}, + { 0x1e50, {1, {0x1e51}}}, + { 0x1e52, {1, {0x1e53}}}, + { 0x1e54, {1, {0x1e55}}}, + { 0x1e56, {1, {0x1e57}}}, + { 0x1e58, {1, {0x1e59}}}, + { 0x1e5a, {1, {0x1e5b}}}, + { 0x1e5c, {1, {0x1e5d}}}, + { 0x1e5e, {1, {0x1e5f}}}, + { 0x1e60, {1, {0x1e61}}}, + { 0x1e62, {1, {0x1e63}}}, + { 0x1e64, {1, {0x1e65}}}, + { 0x1e66, {1, {0x1e67}}}, + { 0x1e68, {1, {0x1e69}}}, + { 0x1e6a, {1, {0x1e6b}}}, + { 0x1e6c, {1, {0x1e6d}}}, + { 0x1e6e, {1, {0x1e6f}}}, + { 0x1e70, {1, {0x1e71}}}, + { 0x1e72, {1, {0x1e73}}}, + { 0x1e74, {1, {0x1e75}}}, + { 0x1e76, {1, {0x1e77}}}, + { 0x1e78, {1, {0x1e79}}}, + { 0x1e7a, {1, {0x1e7b}}}, + { 0x1e7c, {1, {0x1e7d}}}, + { 0x1e7e, {1, {0x1e7f}}}, + { 0x1e80, {1, {0x1e81}}}, + { 0x1e82, {1, {0x1e83}}}, + { 0x1e84, {1, {0x1e85}}}, + { 0x1e86, {1, {0x1e87}}}, + { 0x1e88, {1, {0x1e89}}}, + { 0x1e8a, {1, {0x1e8b}}}, + { 0x1e8c, {1, {0x1e8d}}}, + { 0x1e8e, {1, {0x1e8f}}}, + { 0x1e90, {1, {0x1e91}}}, + { 0x1e92, {1, {0x1e93}}}, + { 0x1e94, {1, {0x1e95}}}, + { 0x1e96, {2, {0x0068, 0x0331}}}, + { 0x1e97, {2, {0x0074, 0x0308}}}, + { 0x1e98, {2, {0x0077, 0x030a}}}, + { 0x1e99, {2, {0x0079, 0x030a}}}, + { 0x1e9a, {2, {0x0061, 0x02be}}}, + { 0x1e9b, {1, {0x1e61}}}, + { 0x1ea0, {1, {0x1ea1}}}, + { 0x1ea2, {1, {0x1ea3}}}, + { 0x1ea4, {1, {0x1ea5}}}, + { 0x1ea6, {1, {0x1ea7}}}, + { 0x1ea8, {1, {0x1ea9}}}, + { 0x1eaa, {1, {0x1eab}}}, + { 0x1eac, {1, {0x1ead}}}, + { 0x1eae, {1, {0x1eaf}}}, + { 0x1eb0, {1, {0x1eb1}}}, + { 0x1eb2, {1, {0x1eb3}}}, + { 0x1eb4, {1, {0x1eb5}}}, + { 0x1eb6, {1, {0x1eb7}}}, + { 0x1eb8, {1, {0x1eb9}}}, + { 0x1eba, {1, {0x1ebb}}}, + { 0x1ebc, {1, {0x1ebd}}}, + { 0x1ebe, {1, {0x1ebf}}}, + { 0x1ec0, {1, {0x1ec1}}}, + { 0x1ec2, {1, {0x1ec3}}}, + { 0x1ec4, {1, {0x1ec5}}}, + { 0x1ec6, {1, {0x1ec7}}}, + { 0x1ec8, {1, {0x1ec9}}}, + { 0x1eca, {1, {0x1ecb}}}, + { 0x1ecc, {1, {0x1ecd}}}, + { 0x1ece, {1, {0x1ecf}}}, + { 0x1ed0, {1, {0x1ed1}}}, + { 0x1ed2, {1, {0x1ed3}}}, + { 0x1ed4, {1, {0x1ed5}}}, + { 0x1ed6, {1, {0x1ed7}}}, + { 0x1ed8, {1, {0x1ed9}}}, + { 0x1eda, {1, {0x1edb}}}, + { 0x1edc, {1, {0x1edd}}}, + { 0x1ede, {1, {0x1edf}}}, + { 0x1ee0, {1, {0x1ee1}}}, + { 0x1ee2, {1, {0x1ee3}}}, + { 0x1ee4, {1, {0x1ee5}}}, + { 0x1ee6, {1, {0x1ee7}}}, + { 0x1ee8, {1, {0x1ee9}}}, + { 0x1eea, {1, {0x1eeb}}}, + { 0x1eec, {1, {0x1eed}}}, + { 0x1eee, {1, {0x1eef}}}, + { 0x1ef0, {1, {0x1ef1}}}, + { 0x1ef2, {1, {0x1ef3}}}, + { 0x1ef4, {1, {0x1ef5}}}, + { 0x1ef6, {1, {0x1ef7}}}, + { 0x1ef8, {1, {0x1ef9}}}, + { 0x1f08, {1, {0x1f00}}}, + { 0x1f09, {1, {0x1f01}}}, + { 0x1f0a, {1, {0x1f02}}}, + { 0x1f0b, {1, {0x1f03}}}, + { 0x1f0c, {1, {0x1f04}}}, + { 0x1f0d, {1, {0x1f05}}}, + { 0x1f0e, {1, {0x1f06}}}, + { 0x1f0f, {1, {0x1f07}}}, + { 0x1f18, {1, {0x1f10}}}, + { 0x1f19, {1, {0x1f11}}}, + { 0x1f1a, {1, {0x1f12}}}, + { 0x1f1b, {1, {0x1f13}}}, + { 0x1f1c, {1, {0x1f14}}}, + { 0x1f1d, {1, {0x1f15}}}, + { 0x1f28, {1, {0x1f20}}}, + { 0x1f29, {1, {0x1f21}}}, + { 0x1f2a, {1, {0x1f22}}}, + { 0x1f2b, {1, {0x1f23}}}, + { 0x1f2c, {1, {0x1f24}}}, + { 0x1f2d, {1, {0x1f25}}}, + { 0x1f2e, {1, {0x1f26}}}, + { 0x1f2f, {1, {0x1f27}}}, + { 0x1f38, {1, {0x1f30}}}, + { 0x1f39, {1, {0x1f31}}}, + { 0x1f3a, {1, {0x1f32}}}, + { 0x1f3b, {1, {0x1f33}}}, + { 0x1f3c, {1, {0x1f34}}}, + { 0x1f3d, {1, {0x1f35}}}, + { 0x1f3e, {1, {0x1f36}}}, + { 0x1f3f, {1, {0x1f37}}}, + { 0x1f48, {1, {0x1f40}}}, + { 0x1f49, {1, {0x1f41}}}, + { 0x1f4a, {1, {0x1f42}}}, + { 0x1f4b, {1, {0x1f43}}}, + { 0x1f4c, {1, {0x1f44}}}, + { 0x1f4d, {1, {0x1f45}}}, + { 0x1f50, {2, {0x03c5, 0x0313}}}, + { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}}, + { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}}, + { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}}, + { 0x1f59, {1, {0x1f51}}}, + { 0x1f5b, {1, {0x1f53}}}, + { 0x1f5d, {1, {0x1f55}}}, + { 0x1f5f, {1, {0x1f57}}}, + { 0x1f68, {1, {0x1f60}}}, + { 0x1f69, {1, {0x1f61}}}, + { 0x1f6a, {1, {0x1f62}}}, + { 0x1f6b, {1, {0x1f63}}}, + { 0x1f6c, {1, {0x1f64}}}, + { 0x1f6d, {1, {0x1f65}}}, + { 0x1f6e, {1, {0x1f66}}}, + { 0x1f6f, {1, {0x1f67}}}, + { 0x1f80, {2, {0x1f00, 0x03b9}}}, + { 0x1f81, {2, {0x1f01, 0x03b9}}}, + { 0x1f82, {2, {0x1f02, 0x03b9}}}, + { 0x1f83, {2, {0x1f03, 0x03b9}}}, + { 0x1f84, {2, {0x1f04, 0x03b9}}}, + { 0x1f85, {2, {0x1f05, 0x03b9}}}, + { 0x1f86, {2, {0x1f06, 0x03b9}}}, + { 0x1f87, {2, {0x1f07, 0x03b9}}}, + { 0x1f88, {2, {0x1f00, 0x03b9}}}, + { 0x1f89, {2, {0x1f01, 0x03b9}}}, + { 0x1f8a, {2, {0x1f02, 0x03b9}}}, + { 0x1f8b, {2, {0x1f03, 0x03b9}}}, + { 0x1f8c, {2, {0x1f04, 0x03b9}}}, + { 0x1f8d, {2, {0x1f05, 0x03b9}}}, + { 0x1f8e, {2, {0x1f06, 0x03b9}}}, + { 0x1f8f, {2, {0x1f07, 0x03b9}}}, + { 0x1f90, {2, {0x1f20, 0x03b9}}}, + { 0x1f91, {2, {0x1f21, 0x03b9}}}, + { 0x1f92, {2, {0x1f22, 0x03b9}}}, + { 0x1f93, {2, {0x1f23, 0x03b9}}}, + { 0x1f94, {2, {0x1f24, 0x03b9}}}, + { 0x1f95, {2, {0x1f25, 0x03b9}}}, + { 0x1f96, {2, {0x1f26, 0x03b9}}}, + { 0x1f97, {2, {0x1f27, 0x03b9}}}, + { 0x1f98, {2, {0x1f20, 0x03b9}}}, + { 0x1f99, {2, {0x1f21, 0x03b9}}}, + { 0x1f9a, {2, {0x1f22, 0x03b9}}}, + { 0x1f9b, {2, {0x1f23, 0x03b9}}}, + { 0x1f9c, {2, {0x1f24, 0x03b9}}}, + { 0x1f9d, {2, {0x1f25, 0x03b9}}}, + { 0x1f9e, {2, {0x1f26, 0x03b9}}}, + { 0x1f9f, {2, {0x1f27, 0x03b9}}}, + { 0x1fa0, {2, {0x1f60, 0x03b9}}}, + { 0x1fa1, {2, {0x1f61, 0x03b9}}}, + { 0x1fa2, {2, {0x1f62, 0x03b9}}}, + { 0x1fa3, {2, {0x1f63, 0x03b9}}}, + { 0x1fa4, {2, {0x1f64, 0x03b9}}}, + { 0x1fa5, {2, {0x1f65, 0x03b9}}}, + { 0x1fa6, {2, {0x1f66, 0x03b9}}}, + { 0x1fa7, {2, {0x1f67, 0x03b9}}}, + { 0x1fa8, {2, {0x1f60, 0x03b9}}}, + { 0x1fa9, {2, {0x1f61, 0x03b9}}}, + { 0x1faa, {2, {0x1f62, 0x03b9}}}, + { 0x1fab, {2, {0x1f63, 0x03b9}}}, + { 0x1fac, {2, {0x1f64, 0x03b9}}}, + { 0x1fad, {2, {0x1f65, 0x03b9}}}, + { 0x1fae, {2, {0x1f66, 0x03b9}}}, + { 0x1faf, {2, {0x1f67, 0x03b9}}}, + { 0x1fb2, {2, {0x1f70, 0x03b9}}}, + { 0x1fb3, {2, {0x03b1, 0x03b9}}}, + { 0x1fb4, {2, {0x03ac, 0x03b9}}}, + { 0x1fb6, {2, {0x03b1, 0x0342}}}, + { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}}, + { 0x1fb8, {1, {0x1fb0}}}, + { 0x1fb9, {1, {0x1fb1}}}, + { 0x1fba, {1, {0x1f70}}}, + { 0x1fbb, {1, {0x1f71}}}, + { 0x1fbc, {2, {0x03b1, 0x03b9}}}, + { 0x1fbe, {1, {0x03b9}}}, + { 0x1fc2, {2, {0x1f74, 0x03b9}}}, + { 0x1fc3, {2, {0x03b7, 0x03b9}}}, + { 0x1fc4, {2, {0x03ae, 0x03b9}}}, + { 0x1fc6, {2, {0x03b7, 0x0342}}}, + { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}}, + { 0x1fc8, {1, {0x1f72}}}, + { 0x1fc9, {1, {0x1f73}}}, + { 0x1fca, {1, {0x1f74}}}, + { 0x1fcb, {1, {0x1f75}}}, + { 0x1fcc, {2, {0x03b7, 0x03b9}}}, + { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}}, + { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x1fd6, {2, {0x03b9, 0x0342}}}, + { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}}, + { 0x1fd8, {1, {0x1fd0}}}, + { 0x1fd9, {1, {0x1fd1}}}, + { 0x1fda, {1, {0x1f76}}}, + { 0x1fdb, {1, {0x1f77}}}, + { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}}, + { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x1fe4, {2, {0x03c1, 0x0313}}}, + { 0x1fe6, {2, {0x03c5, 0x0342}}}, + { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}}, + { 0x1fe8, {1, {0x1fe0}}}, + { 0x1fe9, {1, {0x1fe1}}}, + { 0x1fea, {1, {0x1f7a}}}, + { 0x1feb, {1, {0x1f7b}}}, + { 0x1fec, {1, {0x1fe5}}}, + { 0x1ff2, {2, {0x1f7c, 0x03b9}}}, + { 0x1ff3, {2, {0x03c9, 0x03b9}}}, + { 0x1ff4, {2, {0x03ce, 0x03b9}}}, + { 0x1ff6, {2, {0x03c9, 0x0342}}}, + { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}}, + { 0x1ff8, {1, {0x1f78}}}, + { 0x1ff9, {1, {0x1f79}}}, + { 0x1ffa, {1, {0x1f7c}}}, + { 0x1ffb, {1, {0x1f7d}}}, + { 0x1ffc, {2, {0x03c9, 0x03b9}}}, + { 0x2126, {1, {0x03c9}}}, + { 0x212a, {1, {0x006b}}}, + { 0x212b, {1, {0x00e5}}}, + { 0x2160, {1, {0x2170}}}, + { 0x2161, {1, {0x2171}}}, + { 0x2162, {1, {0x2172}}}, + { 0x2163, {1, {0x2173}}}, + { 0x2164, {1, {0x2174}}}, + { 0x2165, {1, {0x2175}}}, + { 0x2166, {1, {0x2176}}}, + { 0x2167, {1, {0x2177}}}, + { 0x2168, {1, {0x2178}}}, + { 0x2169, {1, {0x2179}}}, + { 0x216a, {1, {0x217a}}}, + { 0x216b, {1, {0x217b}}}, + { 0x216c, {1, {0x217c}}}, + { 0x216d, {1, {0x217d}}}, + { 0x216e, {1, {0x217e}}}, + { 0x216f, {1, {0x217f}}}, + { 0x24b6, {1, {0x24d0}}}, + { 0x24b7, {1, {0x24d1}}}, + { 0x24b8, {1, {0x24d2}}}, + { 0x24b9, {1, {0x24d3}}}, + { 0x24ba, {1, {0x24d4}}}, + { 0x24bb, {1, {0x24d5}}}, + { 0x24bc, {1, {0x24d6}}}, + { 0x24bd, {1, {0x24d7}}}, + { 0x24be, {1, {0x24d8}}}, + { 0x24bf, {1, {0x24d9}}}, + { 0x24c0, {1, {0x24da}}}, + { 0x24c1, {1, {0x24db}}}, + { 0x24c2, {1, {0x24dc}}}, + { 0x24c3, {1, {0x24dd}}}, + { 0x24c4, {1, {0x24de}}}, + { 0x24c5, {1, {0x24df}}}, + { 0x24c6, {1, {0x24e0}}}, + { 0x24c7, {1, {0x24e1}}}, + { 0x24c8, {1, {0x24e2}}}, + { 0x24c9, {1, {0x24e3}}}, + { 0x24ca, {1, {0x24e4}}}, + { 0x24cb, {1, {0x24e5}}}, + { 0x24cc, {1, {0x24e6}}}, + { 0x24cd, {1, {0x24e7}}}, + { 0x24ce, {1, {0x24e8}}}, + { 0x24cf, {1, {0x24e9}}}, + { 0x2c00, {1, {0x2c30}}}, + { 0x2c01, {1, {0x2c31}}}, + { 0x2c02, {1, {0x2c32}}}, + { 0x2c03, {1, {0x2c33}}}, + { 0x2c04, {1, {0x2c34}}}, + { 0x2c05, {1, {0x2c35}}}, + { 0x2c06, {1, {0x2c36}}}, + { 0x2c07, {1, {0x2c37}}}, + { 0x2c08, {1, {0x2c38}}}, + { 0x2c09, {1, {0x2c39}}}, + { 0x2c0a, {1, {0x2c3a}}}, + { 0x2c0b, {1, {0x2c3b}}}, + { 0x2c0c, {1, {0x2c3c}}}, + { 0x2c0d, {1, {0x2c3d}}}, + { 0x2c0e, {1, {0x2c3e}}}, + { 0x2c0f, {1, {0x2c3f}}}, + { 0x2c10, {1, {0x2c40}}}, + { 0x2c11, {1, {0x2c41}}}, + { 0x2c12, {1, {0x2c42}}}, + { 0x2c13, {1, {0x2c43}}}, + { 0x2c14, {1, {0x2c44}}}, + { 0x2c15, {1, {0x2c45}}}, + { 0x2c16, {1, {0x2c46}}}, + { 0x2c17, {1, {0x2c47}}}, + { 0x2c18, {1, {0x2c48}}}, + { 0x2c19, {1, {0x2c49}}}, + { 0x2c1a, {1, {0x2c4a}}}, + { 0x2c1b, {1, {0x2c4b}}}, + { 0x2c1c, {1, {0x2c4c}}}, + { 0x2c1d, {1, {0x2c4d}}}, + { 0x2c1e, {1, {0x2c4e}}}, + { 0x2c1f, {1, {0x2c4f}}}, + { 0x2c20, {1, {0x2c50}}}, + { 0x2c21, {1, {0x2c51}}}, + { 0x2c22, {1, {0x2c52}}}, + { 0x2c23, {1, {0x2c53}}}, + { 0x2c24, {1, {0x2c54}}}, + { 0x2c25, {1, {0x2c55}}}, + { 0x2c26, {1, {0x2c56}}}, + { 0x2c27, {1, {0x2c57}}}, + { 0x2c28, {1, {0x2c58}}}, + { 0x2c29, {1, {0x2c59}}}, + { 0x2c2a, {1, {0x2c5a}}}, + { 0x2c2b, {1, {0x2c5b}}}, + { 0x2c2c, {1, {0x2c5c}}}, + { 0x2c2d, {1, {0x2c5d}}}, + { 0x2c2e, {1, {0x2c5e}}}, + { 0x2c80, {1, {0x2c81}}}, + { 0x2c82, {1, {0x2c83}}}, + { 0x2c84, {1, {0x2c85}}}, + { 0x2c86, {1, {0x2c87}}}, + { 0x2c88, {1, {0x2c89}}}, + { 0x2c8a, {1, {0x2c8b}}}, + { 0x2c8c, {1, {0x2c8d}}}, + { 0x2c8e, {1, {0x2c8f}}}, + { 0x2c90, {1, {0x2c91}}}, + { 0x2c92, {1, {0x2c93}}}, + { 0x2c94, {1, {0x2c95}}}, + { 0x2c96, {1, {0x2c97}}}, + { 0x2c98, {1, {0x2c99}}}, + { 0x2c9a, {1, {0x2c9b}}}, + { 0x2c9c, {1, {0x2c9d}}}, + { 0x2c9e, {1, {0x2c9f}}}, + { 0x2ca0, {1, {0x2ca1}}}, + { 0x2ca2, {1, {0x2ca3}}}, + { 0x2ca4, {1, {0x2ca5}}}, + { 0x2ca6, {1, {0x2ca7}}}, + { 0x2ca8, {1, {0x2ca9}}}, + { 0x2caa, {1, {0x2cab}}}, + { 0x2cac, {1, {0x2cad}}}, + { 0x2cae, {1, {0x2caf}}}, + { 0x2cb0, {1, {0x2cb1}}}, + { 0x2cb2, {1, {0x2cb3}}}, + { 0x2cb4, {1, {0x2cb5}}}, + { 0x2cb6, {1, {0x2cb7}}}, + { 0x2cb8, {1, {0x2cb9}}}, + { 0x2cba, {1, {0x2cbb}}}, + { 0x2cbc, {1, {0x2cbd}}}, + { 0x2cbe, {1, {0x2cbf}}}, + { 0x2cc0, {1, {0x2cc1}}}, + { 0x2cc2, {1, {0x2cc3}}}, + { 0x2cc4, {1, {0x2cc5}}}, + { 0x2cc6, {1, {0x2cc7}}}, + { 0x2cc8, {1, {0x2cc9}}}, + { 0x2cca, {1, {0x2ccb}}}, + { 0x2ccc, {1, {0x2ccd}}}, + { 0x2cce, {1, {0x2ccf}}}, + { 0x2cd0, {1, {0x2cd1}}}, + { 0x2cd2, {1, {0x2cd3}}}, + { 0x2cd4, {1, {0x2cd5}}}, + { 0x2cd6, {1, {0x2cd7}}}, + { 0x2cd8, {1, {0x2cd9}}}, + { 0x2cda, {1, {0x2cdb}}}, + { 0x2cdc, {1, {0x2cdd}}}, + { 0x2cde, {1, {0x2cdf}}}, + { 0x2ce0, {1, {0x2ce1}}}, + { 0x2ce2, {1, {0x2ce3}}}, + { 0xfb00, {2, {0x0066, 0x0066}}}, + { 0xfb01, {2, {0x0066, 0x0069}}}, + { 0xfb02, {2, {0x0066, 0x006c}}}, + { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}}, + { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}}, + { 0xfb05, {2, {0x0073, 0x0074}}}, + { 0xfb06, {2, {0x0073, 0x0074}}}, + { 0xfb13, {2, {0x0574, 0x0576}}}, + { 0xfb14, {2, {0x0574, 0x0565}}}, + { 0xfb15, {2, {0x0574, 0x056b}}}, + { 0xfb16, {2, {0x057e, 0x0576}}}, + { 0xfb17, {2, {0x0574, 0x056d}}}, + { 0xff21, {1, {0xff41}}}, + { 0xff22, {1, {0xff42}}}, + { 0xff23, {1, {0xff43}}}, + { 0xff24, {1, {0xff44}}}, + { 0xff25, {1, {0xff45}}}, + { 0xff26, {1, {0xff46}}}, + { 0xff27, {1, {0xff47}}}, + { 0xff28, {1, {0xff48}}}, + { 0xff29, {1, {0xff49}}}, + { 0xff2a, {1, {0xff4a}}}, + { 0xff2b, {1, {0xff4b}}}, + { 0xff2c, {1, {0xff4c}}}, + { 0xff2d, {1, {0xff4d}}}, + { 0xff2e, {1, {0xff4e}}}, + { 0xff2f, {1, {0xff4f}}}, + { 0xff30, {1, {0xff50}}}, + { 0xff31, {1, {0xff51}}}, + { 0xff32, {1, {0xff52}}}, + { 0xff33, {1, {0xff53}}}, + { 0xff34, {1, {0xff54}}}, + { 0xff35, {1, {0xff55}}}, + { 0xff36, {1, {0xff56}}}, + { 0xff37, {1, {0xff57}}}, + { 0xff38, {1, {0xff58}}}, + { 0xff39, {1, {0xff59}}}, + { 0xff3a, {1, {0xff5a}}}, + { 0x10400, {1, {0x10428}}}, + { 0x10401, {1, {0x10429}}}, + { 0x10402, {1, {0x1042a}}}, + { 0x10403, {1, {0x1042b}}}, + { 0x10404, {1, {0x1042c}}}, + { 0x10405, {1, {0x1042d}}}, + { 0x10406, {1, {0x1042e}}}, + { 0x10407, {1, {0x1042f}}}, + { 0x10408, {1, {0x10430}}}, + { 0x10409, {1, {0x10431}}}, + { 0x1040a, {1, {0x10432}}}, + { 0x1040b, {1, {0x10433}}}, + { 0x1040c, {1, {0x10434}}}, + { 0x1040d, {1, {0x10435}}}, + { 0x1040e, {1, {0x10436}}}, + { 0x1040f, {1, {0x10437}}}, + { 0x10410, {1, {0x10438}}}, + { 0x10411, {1, {0x10439}}}, + { 0x10412, {1, {0x1043a}}}, + { 0x10413, {1, {0x1043b}}}, + { 0x10414, {1, {0x1043c}}}, + { 0x10415, {1, {0x1043d}}}, + { 0x10416, {1, {0x1043e}}}, + { 0x10417, {1, {0x1043f}}}, + { 0x10418, {1, {0x10440}}}, + { 0x10419, {1, {0x10441}}}, + { 0x1041a, {1, {0x10442}}}, + { 0x1041b, {1, {0x10443}}}, + { 0x1041c, {1, {0x10444}}}, + { 0x1041d, {1, {0x10445}}}, + { 0x1041e, {1, {0x10446}}}, + { 0x1041f, {1, {0x10447}}}, + { 0x10420, {1, {0x10448}}}, + { 0x10421, {1, {0x10449}}}, + { 0x10422, {1, {0x1044a}}}, + { 0x10423, {1, {0x1044b}}}, + { 0x10424, {1, {0x1044c}}}, + { 0x10425, {1, {0x1044d}}}, + { 0x10426, {1, {0x1044e}}}, + { 0x10427, {1, {0x1044f}}} +}; + +static const CaseFold_11_Type CaseFold_Locale[] = { + { 0x0049, {1, {0x0069}}}, + { 0x0130, {2, {0x0069, 0x0307}}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11[] = { + { 0x0061, {1, {0x0041 }}}, + { 0x0062, {1, {0x0042 }}}, + { 0x0063, {1, {0x0043 }}}, + { 0x0064, {1, {0x0044 }}}, + { 0x0065, {1, {0x0045 }}}, + { 0x0066, {1, {0x0046 }}}, + { 0x0067, {1, {0x0047 }}}, + { 0x0068, {1, {0x0048 }}}, + { 0x006a, {1, {0x004a }}}, + { 0x006b, {2, {0x212a, 0x004b }}}, + { 0x006c, {1, {0x004c }}}, + { 0x006d, {1, {0x004d }}}, + { 0x006e, {1, {0x004e }}}, + { 0x006f, {1, {0x004f }}}, + { 0x0070, {1, {0x0050 }}}, + { 0x0071, {1, {0x0051 }}}, + { 0x0072, {1, {0x0052 }}}, + { 0x0073, {2, {0x0053, 0x017f }}}, + { 0x0074, {1, {0x0054 }}}, + { 0x0075, {1, {0x0055 }}}, + { 0x0076, {1, {0x0056 }}}, + { 0x0077, {1, {0x0057 }}}, + { 0x0078, {1, {0x0058 }}}, + { 0x0079, {1, {0x0059 }}}, + { 0x007a, {1, {0x005a }}}, + { 0x00e0, {1, {0x00c0 }}}, + { 0x00e1, {1, {0x00c1 }}}, + { 0x00e2, {1, {0x00c2 }}}, + { 0x00e3, {1, {0x00c3 }}}, + { 0x00e4, {1, {0x00c4 }}}, + { 0x00e5, {2, {0x212b, 0x00c5 }}}, + { 0x00e6, {1, {0x00c6 }}}, + { 0x00e7, {1, {0x00c7 }}}, + { 0x00e8, {1, {0x00c8 }}}, + { 0x00e9, {1, {0x00c9 }}}, + { 0x00ea, {1, {0x00ca }}}, + { 0x00eb, {1, {0x00cb }}}, + { 0x00ec, {1, {0x00cc }}}, + { 0x00ed, {1, {0x00cd }}}, + { 0x00ee, {1, {0x00ce }}}, + { 0x00ef, {1, {0x00cf }}}, + { 0x00f0, {1, {0x00d0 }}}, + { 0x00f1, {1, {0x00d1 }}}, + { 0x00f2, {1, {0x00d2 }}}, + { 0x00f3, {1, {0x00d3 }}}, + { 0x00f4, {1, {0x00d4 }}}, + { 0x00f5, {1, {0x00d5 }}}, + { 0x00f6, {1, {0x00d6 }}}, + { 0x00f8, {1, {0x00d8 }}}, + { 0x00f9, {1, {0x00d9 }}}, + { 0x00fa, {1, {0x00da }}}, + { 0x00fb, {1, {0x00db }}}, + { 0x00fc, {1, {0x00dc }}}, + { 0x00fd, {1, {0x00dd }}}, + { 0x00fe, {1, {0x00de }}}, + { 0x00ff, {1, {0x0178 }}}, + { 0x0101, {1, {0x0100 }}}, + { 0x0103, {1, {0x0102 }}}, + { 0x0105, {1, {0x0104 }}}, + { 0x0107, {1, {0x0106 }}}, + { 0x0109, {1, {0x0108 }}}, + { 0x010b, {1, {0x010a }}}, + { 0x010d, {1, {0x010c }}}, + { 0x010f, {1, {0x010e }}}, + { 0x0111, {1, {0x0110 }}}, + { 0x0113, {1, {0x0112 }}}, + { 0x0115, {1, {0x0114 }}}, + { 0x0117, {1, {0x0116 }}}, + { 0x0119, {1, {0x0118 }}}, + { 0x011b, {1, {0x011a }}}, + { 0x011d, {1, {0x011c }}}, + { 0x011f, {1, {0x011e }}}, + { 0x0121, {1, {0x0120 }}}, + { 0x0123, {1, {0x0122 }}}, + { 0x0125, {1, {0x0124 }}}, + { 0x0127, {1, {0x0126 }}}, + { 0x0129, {1, {0x0128 }}}, + { 0x012b, {1, {0x012a }}}, + { 0x012d, {1, {0x012c }}}, + { 0x012f, {1, {0x012e }}}, + { 0x0133, {1, {0x0132 }}}, + { 0x0135, {1, {0x0134 }}}, + { 0x0137, {1, {0x0136 }}}, + { 0x013a, {1, {0x0139 }}}, + { 0x013c, {1, {0x013b }}}, + { 0x013e, {1, {0x013d }}}, + { 0x0140, {1, {0x013f }}}, + { 0x0142, {1, {0x0141 }}}, + { 0x0144, {1, {0x0143 }}}, + { 0x0146, {1, {0x0145 }}}, + { 0x0148, {1, {0x0147 }}}, + { 0x014b, {1, {0x014a }}}, + { 0x014d, {1, {0x014c }}}, + { 0x014f, {1, {0x014e }}}, + { 0x0151, {1, {0x0150 }}}, + { 0x0153, {1, {0x0152 }}}, + { 0x0155, {1, {0x0154 }}}, + { 0x0157, {1, {0x0156 }}}, + { 0x0159, {1, {0x0158 }}}, + { 0x015b, {1, {0x015a }}}, + { 0x015d, {1, {0x015c }}}, + { 0x015f, {1, {0x015e }}}, + { 0x0161, {1, {0x0160 }}}, + { 0x0163, {1, {0x0162 }}}, + { 0x0165, {1, {0x0164 }}}, + { 0x0167, {1, {0x0166 }}}, + { 0x0169, {1, {0x0168 }}}, + { 0x016b, {1, {0x016a }}}, + { 0x016d, {1, {0x016c }}}, + { 0x016f, {1, {0x016e }}}, + { 0x0171, {1, {0x0170 }}}, + { 0x0173, {1, {0x0172 }}}, + { 0x0175, {1, {0x0174 }}}, + { 0x0177, {1, {0x0176 }}}, + { 0x017a, {1, {0x0179 }}}, + { 0x017c, {1, {0x017b }}}, + { 0x017e, {1, {0x017d }}}, + { 0x0183, {1, {0x0182 }}}, + { 0x0185, {1, {0x0184 }}}, + { 0x0188, {1, {0x0187 }}}, + { 0x018c, {1, {0x018b }}}, + { 0x0192, {1, {0x0191 }}}, + { 0x0195, {1, {0x01f6 }}}, + { 0x0199, {1, {0x0198 }}}, + { 0x019a, {1, {0x023d }}}, + { 0x019e, {1, {0x0220 }}}, + { 0x01a1, {1, {0x01a0 }}}, + { 0x01a3, {1, {0x01a2 }}}, + { 0x01a5, {1, {0x01a4 }}}, + { 0x01a8, {1, {0x01a7 }}}, + { 0x01ad, {1, {0x01ac }}}, + { 0x01b0, {1, {0x01af }}}, + { 0x01b4, {1, {0x01b3 }}}, + { 0x01b6, {1, {0x01b5 }}}, + { 0x01b9, {1, {0x01b8 }}}, + { 0x01bd, {1, {0x01bc }}}, + { 0x01bf, {1, {0x01f7 }}}, + { 0x01c6, {2, {0x01c4, 0x01c5 }}}, + { 0x01c9, {2, {0x01c7, 0x01c8 }}}, + { 0x01cc, {2, {0x01ca, 0x01cb }}}, + { 0x01ce, {1, {0x01cd }}}, + { 0x01d0, {1, {0x01cf }}}, + { 0x01d2, {1, {0x01d1 }}}, + { 0x01d4, {1, {0x01d3 }}}, + { 0x01d6, {1, {0x01d5 }}}, + { 0x01d8, {1, {0x01d7 }}}, + { 0x01da, {1, {0x01d9 }}}, + { 0x01dc, {1, {0x01db }}}, + { 0x01dd, {1, {0x018e }}}, + { 0x01df, {1, {0x01de }}}, + { 0x01e1, {1, {0x01e0 }}}, + { 0x01e3, {1, {0x01e2 }}}, + { 0x01e5, {1, {0x01e4 }}}, + { 0x01e7, {1, {0x01e6 }}}, + { 0x01e9, {1, {0x01e8 }}}, + { 0x01eb, {1, {0x01ea }}}, + { 0x01ed, {1, {0x01ec }}}, + { 0x01ef, {1, {0x01ee }}}, + { 0x01f3, {2, {0x01f1, 0x01f2 }}}, + { 0x01f5, {1, {0x01f4 }}}, + { 0x01f9, {1, {0x01f8 }}}, + { 0x01fb, {1, {0x01fa }}}, + { 0x01fd, {1, {0x01fc }}}, + { 0x01ff, {1, {0x01fe }}}, + { 0x0201, {1, {0x0200 }}}, + { 0x0203, {1, {0x0202 }}}, + { 0x0205, {1, {0x0204 }}}, + { 0x0207, {1, {0x0206 }}}, + { 0x0209, {1, {0x0208 }}}, + { 0x020b, {1, {0x020a }}}, + { 0x020d, {1, {0x020c }}}, + { 0x020f, {1, {0x020e }}}, + { 0x0211, {1, {0x0210 }}}, + { 0x0213, {1, {0x0212 }}}, + { 0x0215, {1, {0x0214 }}}, + { 0x0217, {1, {0x0216 }}}, + { 0x0219, {1, {0x0218 }}}, + { 0x021b, {1, {0x021a }}}, + { 0x021d, {1, {0x021c }}}, + { 0x021f, {1, {0x021e }}}, + { 0x0223, {1, {0x0222 }}}, + { 0x0225, {1, {0x0224 }}}, + { 0x0227, {1, {0x0226 }}}, + { 0x0229, {1, {0x0228 }}}, + { 0x022b, {1, {0x022a }}}, + { 0x022d, {1, {0x022c }}}, + { 0x022f, {1, {0x022e }}}, + { 0x0231, {1, {0x0230 }}}, + { 0x0233, {1, {0x0232 }}}, + { 0x023c, {1, {0x023b }}}, + { 0x0253, {1, {0x0181 }}}, + { 0x0254, {1, {0x0186 }}}, + { 0x0256, {1, {0x0189 }}}, + { 0x0257, {1, {0x018a }}}, + { 0x0259, {1, {0x018f }}}, + { 0x025b, {1, {0x0190 }}}, + { 0x0260, {1, {0x0193 }}}, + { 0x0263, {1, {0x0194 }}}, + { 0x0268, {1, {0x0197 }}}, + { 0x0269, {1, {0x0196 }}}, + { 0x026f, {1, {0x019c }}}, + { 0x0272, {1, {0x019d }}}, + { 0x0275, {1, {0x019f }}}, + { 0x0280, {1, {0x01a6 }}}, + { 0x0283, {1, {0x01a9 }}}, + { 0x0288, {1, {0x01ae }}}, + { 0x028a, {1, {0x01b1 }}}, + { 0x028b, {1, {0x01b2 }}}, + { 0x0292, {1, {0x01b7 }}}, + { 0x0294, {1, {0x0241 }}}, + { 0x03ac, {1, {0x0386 }}}, + { 0x03ad, {1, {0x0388 }}}, + { 0x03ae, {1, {0x0389 }}}, + { 0x03af, {1, {0x038a }}}, + { 0x03b1, {1, {0x0391 }}}, + { 0x03b2, {2, {0x0392, 0x03d0 }}}, + { 0x03b3, {1, {0x0393 }}}, + { 0x03b4, {1, {0x0394 }}}, + { 0x03b5, {2, {0x03f5, 0x0395 }}}, + { 0x03b6, {1, {0x0396 }}}, + { 0x03b7, {1, {0x0397 }}}, + { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}}, + { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}}, + { 0x03ba, {2, {0x03f0, 0x039a }}}, + { 0x03bb, {1, {0x039b }}}, + { 0x03bc, {2, {0x00b5, 0x039c }}}, + { 0x03bd, {1, {0x039d }}}, + { 0x03be, {1, {0x039e }}}, + { 0x03bf, {1, {0x039f }}}, + { 0x03c0, {2, {0x03a0, 0x03d6 }}}, + { 0x03c1, {2, {0x03f1, 0x03a1 }}}, + { 0x03c3, {2, {0x03a3, 0x03c2 }}}, + { 0x03c4, {1, {0x03a4 }}}, + { 0x03c5, {1, {0x03a5 }}}, + { 0x03c6, {2, {0x03a6, 0x03d5 }}}, + { 0x03c7, {1, {0x03a7 }}}, + { 0x03c8, {1, {0x03a8 }}}, + { 0x03c9, {2, {0x03a9, 0x2126 }}}, + { 0x03ca, {1, {0x03aa }}}, + { 0x03cb, {1, {0x03ab }}}, + { 0x03cc, {1, {0x038c }}}, + { 0x03cd, {1, {0x038e }}}, + { 0x03ce, {1, {0x038f }}}, + { 0x03d9, {1, {0x03d8 }}}, + { 0x03db, {1, {0x03da }}}, + { 0x03dd, {1, {0x03dc }}}, + { 0x03df, {1, {0x03de }}}, + { 0x03e1, {1, {0x03e0 }}}, + { 0x03e3, {1, {0x03e2 }}}, + { 0x03e5, {1, {0x03e4 }}}, + { 0x03e7, {1, {0x03e6 }}}, + { 0x03e9, {1, {0x03e8 }}}, + { 0x03eb, {1, {0x03ea }}}, + { 0x03ed, {1, {0x03ec }}}, + { 0x03ef, {1, {0x03ee }}}, + { 0x03f2, {1, {0x03f9 }}}, + { 0x03f8, {1, {0x03f7 }}}, + { 0x03fb, {1, {0x03fa }}}, + { 0x0430, {1, {0x0410 }}}, + { 0x0431, {1, {0x0411 }}}, + { 0x0432, {1, {0x0412 }}}, + { 0x0433, {1, {0x0413 }}}, + { 0x0434, {1, {0x0414 }}}, + { 0x0435, {1, {0x0415 }}}, + { 0x0436, {1, {0x0416 }}}, + { 0x0437, {1, {0x0417 }}}, + { 0x0438, {1, {0x0418 }}}, + { 0x0439, {1, {0x0419 }}}, + { 0x043a, {1, {0x041a }}}, + { 0x043b, {1, {0x041b }}}, + { 0x043c, {1, {0x041c }}}, + { 0x043d, {1, {0x041d }}}, + { 0x043e, {1, {0x041e }}}, + { 0x043f, {1, {0x041f }}}, + { 0x0440, {1, {0x0420 }}}, + { 0x0441, {1, {0x0421 }}}, + { 0x0442, {1, {0x0422 }}}, + { 0x0443, {1, {0x0423 }}}, + { 0x0444, {1, {0x0424 }}}, + { 0x0445, {1, {0x0425 }}}, + { 0x0446, {1, {0x0426 }}}, + { 0x0447, {1, {0x0427 }}}, + { 0x0448, {1, {0x0428 }}}, + { 0x0449, {1, {0x0429 }}}, + { 0x044a, {1, {0x042a }}}, + { 0x044b, {1, {0x042b }}}, + { 0x044c, {1, {0x042c }}}, + { 0x044d, {1, {0x042d }}}, + { 0x044e, {1, {0x042e }}}, + { 0x044f, {1, {0x042f }}}, + { 0x0450, {1, {0x0400 }}}, + { 0x0451, {1, {0x0401 }}}, + { 0x0452, {1, {0x0402 }}}, + { 0x0453, {1, {0x0403 }}}, + { 0x0454, {1, {0x0404 }}}, + { 0x0455, {1, {0x0405 }}}, + { 0x0456, {1, {0x0406 }}}, + { 0x0457, {1, {0x0407 }}}, + { 0x0458, {1, {0x0408 }}}, + { 0x0459, {1, {0x0409 }}}, + { 0x045a, {1, {0x040a }}}, + { 0x045b, {1, {0x040b }}}, + { 0x045c, {1, {0x040c }}}, + { 0x045d, {1, {0x040d }}}, + { 0x045e, {1, {0x040e }}}, + { 0x045f, {1, {0x040f }}}, + { 0x0461, {1, {0x0460 }}}, + { 0x0463, {1, {0x0462 }}}, + { 0x0465, {1, {0x0464 }}}, + { 0x0467, {1, {0x0466 }}}, + { 0x0469, {1, {0x0468 }}}, + { 0x046b, {1, {0x046a }}}, + { 0x046d, {1, {0x046c }}}, + { 0x046f, {1, {0x046e }}}, + { 0x0471, {1, {0x0470 }}}, + { 0x0473, {1, {0x0472 }}}, + { 0x0475, {1, {0x0474 }}}, + { 0x0477, {1, {0x0476 }}}, + { 0x0479, {1, {0x0478 }}}, + { 0x047b, {1, {0x047a }}}, + { 0x047d, {1, {0x047c }}}, + { 0x047f, {1, {0x047e }}}, + { 0x0481, {1, {0x0480 }}}, + { 0x048b, {1, {0x048a }}}, + { 0x048d, {1, {0x048c }}}, + { 0x048f, {1, {0x048e }}}, + { 0x0491, {1, {0x0490 }}}, + { 0x0493, {1, {0x0492 }}}, + { 0x0495, {1, {0x0494 }}}, + { 0x0497, {1, {0x0496 }}}, + { 0x0499, {1, {0x0498 }}}, + { 0x049b, {1, {0x049a }}}, + { 0x049d, {1, {0x049c }}}, + { 0x049f, {1, {0x049e }}}, + { 0x04a1, {1, {0x04a0 }}}, + { 0x04a3, {1, {0x04a2 }}}, + { 0x04a5, {1, {0x04a4 }}}, + { 0x04a7, {1, {0x04a6 }}}, + { 0x04a9, {1, {0x04a8 }}}, + { 0x04ab, {1, {0x04aa }}}, + { 0x04ad, {1, {0x04ac }}}, + { 0x04af, {1, {0x04ae }}}, + { 0x04b1, {1, {0x04b0 }}}, + { 0x04b3, {1, {0x04b2 }}}, + { 0x04b5, {1, {0x04b4 }}}, + { 0x04b7, {1, {0x04b6 }}}, + { 0x04b9, {1, {0x04b8 }}}, + { 0x04bb, {1, {0x04ba }}}, + { 0x04bd, {1, {0x04bc }}}, + { 0x04bf, {1, {0x04be }}}, + { 0x04c2, {1, {0x04c1 }}}, + { 0x04c4, {1, {0x04c3 }}}, + { 0x04c6, {1, {0x04c5 }}}, + { 0x04c8, {1, {0x04c7 }}}, + { 0x04ca, {1, {0x04c9 }}}, + { 0x04cc, {1, {0x04cb }}}, + { 0x04ce, {1, {0x04cd }}}, + { 0x04d1, {1, {0x04d0 }}}, + { 0x04d3, {1, {0x04d2 }}}, + { 0x04d5, {1, {0x04d4 }}}, + { 0x04d7, {1, {0x04d6 }}}, + { 0x04d9, {1, {0x04d8 }}}, + { 0x04db, {1, {0x04da }}}, + { 0x04dd, {1, {0x04dc }}}, + { 0x04df, {1, {0x04de }}}, + { 0x04e1, {1, {0x04e0 }}}, + { 0x04e3, {1, {0x04e2 }}}, + { 0x04e5, {1, {0x04e4 }}}, + { 0x04e7, {1, {0x04e6 }}}, + { 0x04e9, {1, {0x04e8 }}}, + { 0x04eb, {1, {0x04ea }}}, + { 0x04ed, {1, {0x04ec }}}, + { 0x04ef, {1, {0x04ee }}}, + { 0x04f1, {1, {0x04f0 }}}, + { 0x04f3, {1, {0x04f2 }}}, + { 0x04f5, {1, {0x04f4 }}}, + { 0x04f7, {1, {0x04f6 }}}, + { 0x04f9, {1, {0x04f8 }}}, + { 0x0501, {1, {0x0500 }}}, + { 0x0503, {1, {0x0502 }}}, + { 0x0505, {1, {0x0504 }}}, + { 0x0507, {1, {0x0506 }}}, + { 0x0509, {1, {0x0508 }}}, + { 0x050b, {1, {0x050a }}}, + { 0x050d, {1, {0x050c }}}, + { 0x050f, {1, {0x050e }}}, + { 0x0561, {1, {0x0531 }}}, + { 0x0562, {1, {0x0532 }}}, + { 0x0563, {1, {0x0533 }}}, + { 0x0564, {1, {0x0534 }}}, + { 0x0565, {1, {0x0535 }}}, + { 0x0566, {1, {0x0536 }}}, + { 0x0567, {1, {0x0537 }}}, + { 0x0568, {1, {0x0538 }}}, + { 0x0569, {1, {0x0539 }}}, + { 0x056a, {1, {0x053a }}}, + { 0x056b, {1, {0x053b }}}, + { 0x056c, {1, {0x053c }}}, + { 0x056d, {1, {0x053d }}}, + { 0x056e, {1, {0x053e }}}, + { 0x056f, {1, {0x053f }}}, + { 0x0570, {1, {0x0540 }}}, + { 0x0571, {1, {0x0541 }}}, + { 0x0572, {1, {0x0542 }}}, + { 0x0573, {1, {0x0543 }}}, + { 0x0574, {1, {0x0544 }}}, + { 0x0575, {1, {0x0545 }}}, + { 0x0576, {1, {0x0546 }}}, + { 0x0577, {1, {0x0547 }}}, + { 0x0578, {1, {0x0548 }}}, + { 0x0579, {1, {0x0549 }}}, + { 0x057a, {1, {0x054a }}}, + { 0x057b, {1, {0x054b }}}, + { 0x057c, {1, {0x054c }}}, + { 0x057d, {1, {0x054d }}}, + { 0x057e, {1, {0x054e }}}, + { 0x057f, {1, {0x054f }}}, + { 0x0580, {1, {0x0550 }}}, + { 0x0581, {1, {0x0551 }}}, + { 0x0582, {1, {0x0552 }}}, + { 0x0583, {1, {0x0553 }}}, + { 0x0584, {1, {0x0554 }}}, + { 0x0585, {1, {0x0555 }}}, + { 0x0586, {1, {0x0556 }}}, + { 0x1e01, {1, {0x1e00 }}}, + { 0x1e03, {1, {0x1e02 }}}, + { 0x1e05, {1, {0x1e04 }}}, + { 0x1e07, {1, {0x1e06 }}}, + { 0x1e09, {1, {0x1e08 }}}, + { 0x1e0b, {1, {0x1e0a }}}, + { 0x1e0d, {1, {0x1e0c }}}, + { 0x1e0f, {1, {0x1e0e }}}, + { 0x1e11, {1, {0x1e10 }}}, + { 0x1e13, {1, {0x1e12 }}}, + { 0x1e15, {1, {0x1e14 }}}, + { 0x1e17, {1, {0x1e16 }}}, + { 0x1e19, {1, {0x1e18 }}}, + { 0x1e1b, {1, {0x1e1a }}}, + { 0x1e1d, {1, {0x1e1c }}}, + { 0x1e1f, {1, {0x1e1e }}}, + { 0x1e21, {1, {0x1e20 }}}, + { 0x1e23, {1, {0x1e22 }}}, + { 0x1e25, {1, {0x1e24 }}}, + { 0x1e27, {1, {0x1e26 }}}, + { 0x1e29, {1, {0x1e28 }}}, + { 0x1e2b, {1, {0x1e2a }}}, + { 0x1e2d, {1, {0x1e2c }}}, + { 0x1e2f, {1, {0x1e2e }}}, + { 0x1e31, {1, {0x1e30 }}}, + { 0x1e33, {1, {0x1e32 }}}, + { 0x1e35, {1, {0x1e34 }}}, + { 0x1e37, {1, {0x1e36 }}}, + { 0x1e39, {1, {0x1e38 }}}, + { 0x1e3b, {1, {0x1e3a }}}, + { 0x1e3d, {1, {0x1e3c }}}, + { 0x1e3f, {1, {0x1e3e }}}, + { 0x1e41, {1, {0x1e40 }}}, + { 0x1e43, {1, {0x1e42 }}}, + { 0x1e45, {1, {0x1e44 }}}, + { 0x1e47, {1, {0x1e46 }}}, + { 0x1e49, {1, {0x1e48 }}}, + { 0x1e4b, {1, {0x1e4a }}}, + { 0x1e4d, {1, {0x1e4c }}}, + { 0x1e4f, {1, {0x1e4e }}}, + { 0x1e51, {1, {0x1e50 }}}, + { 0x1e53, {1, {0x1e52 }}}, + { 0x1e55, {1, {0x1e54 }}}, + { 0x1e57, {1, {0x1e56 }}}, + { 0x1e59, {1, {0x1e58 }}}, + { 0x1e5b, {1, {0x1e5a }}}, + { 0x1e5d, {1, {0x1e5c }}}, + { 0x1e5f, {1, {0x1e5e }}}, + { 0x1e61, {2, {0x1e9b, 0x1e60 }}}, + { 0x1e63, {1, {0x1e62 }}}, + { 0x1e65, {1, {0x1e64 }}}, + { 0x1e67, {1, {0x1e66 }}}, + { 0x1e69, {1, {0x1e68 }}}, + { 0x1e6b, {1, {0x1e6a }}}, + { 0x1e6d, {1, {0x1e6c }}}, + { 0x1e6f, {1, {0x1e6e }}}, + { 0x1e71, {1, {0x1e70 }}}, + { 0x1e73, {1, {0x1e72 }}}, + { 0x1e75, {1, {0x1e74 }}}, + { 0x1e77, {1, {0x1e76 }}}, + { 0x1e79, {1, {0x1e78 }}}, + { 0x1e7b, {1, {0x1e7a }}}, + { 0x1e7d, {1, {0x1e7c }}}, + { 0x1e7f, {1, {0x1e7e }}}, + { 0x1e81, {1, {0x1e80 }}}, + { 0x1e83, {1, {0x1e82 }}}, + { 0x1e85, {1, {0x1e84 }}}, + { 0x1e87, {1, {0x1e86 }}}, + { 0x1e89, {1, {0x1e88 }}}, + { 0x1e8b, {1, {0x1e8a }}}, + { 0x1e8d, {1, {0x1e8c }}}, + { 0x1e8f, {1, {0x1e8e }}}, + { 0x1e91, {1, {0x1e90 }}}, + { 0x1e93, {1, {0x1e92 }}}, + { 0x1e95, {1, {0x1e94 }}}, + { 0x1ea1, {1, {0x1ea0 }}}, + { 0x1ea3, {1, {0x1ea2 }}}, + { 0x1ea5, {1, {0x1ea4 }}}, + { 0x1ea7, {1, {0x1ea6 }}}, + { 0x1ea9, {1, {0x1ea8 }}}, + { 0x1eab, {1, {0x1eaa }}}, + { 0x1ead, {1, {0x1eac }}}, + { 0x1eaf, {1, {0x1eae }}}, + { 0x1eb1, {1, {0x1eb0 }}}, + { 0x1eb3, {1, {0x1eb2 }}}, + { 0x1eb5, {1, {0x1eb4 }}}, + { 0x1eb7, {1, {0x1eb6 }}}, + { 0x1eb9, {1, {0x1eb8 }}}, + { 0x1ebb, {1, {0x1eba }}}, + { 0x1ebd, {1, {0x1ebc }}}, + { 0x1ebf, {1, {0x1ebe }}}, + { 0x1ec1, {1, {0x1ec0 }}}, + { 0x1ec3, {1, {0x1ec2 }}}, + { 0x1ec5, {1, {0x1ec4 }}}, + { 0x1ec7, {1, {0x1ec6 }}}, + { 0x1ec9, {1, {0x1ec8 }}}, + { 0x1ecb, {1, {0x1eca }}}, + { 0x1ecd, {1, {0x1ecc }}}, + { 0x1ecf, {1, {0x1ece }}}, + { 0x1ed1, {1, {0x1ed0 }}}, + { 0x1ed3, {1, {0x1ed2 }}}, + { 0x1ed5, {1, {0x1ed4 }}}, + { 0x1ed7, {1, {0x1ed6 }}}, + { 0x1ed9, {1, {0x1ed8 }}}, + { 0x1edb, {1, {0x1eda }}}, + { 0x1edd, {1, {0x1edc }}}, + { 0x1edf, {1, {0x1ede }}}, + { 0x1ee1, {1, {0x1ee0 }}}, + { 0x1ee3, {1, {0x1ee2 }}}, + { 0x1ee5, {1, {0x1ee4 }}}, + { 0x1ee7, {1, {0x1ee6 }}}, + { 0x1ee9, {1, {0x1ee8 }}}, + { 0x1eeb, {1, {0x1eea }}}, + { 0x1eed, {1, {0x1eec }}}, + { 0x1eef, {1, {0x1eee }}}, + { 0x1ef1, {1, {0x1ef0 }}}, + { 0x1ef3, {1, {0x1ef2 }}}, + { 0x1ef5, {1, {0x1ef4 }}}, + { 0x1ef7, {1, {0x1ef6 }}}, + { 0x1ef9, {1, {0x1ef8 }}}, + { 0x1f00, {1, {0x1f08 }}}, + { 0x1f01, {1, {0x1f09 }}}, + { 0x1f02, {1, {0x1f0a }}}, + { 0x1f03, {1, {0x1f0b }}}, + { 0x1f04, {1, {0x1f0c }}}, + { 0x1f05, {1, {0x1f0d }}}, + { 0x1f06, {1, {0x1f0e }}}, + { 0x1f07, {1, {0x1f0f }}}, + { 0x1f10, {1, {0x1f18 }}}, + { 0x1f11, {1, {0x1f19 }}}, + { 0x1f12, {1, {0x1f1a }}}, + { 0x1f13, {1, {0x1f1b }}}, + { 0x1f14, {1, {0x1f1c }}}, + { 0x1f15, {1, {0x1f1d }}}, + { 0x1f20, {1, {0x1f28 }}}, + { 0x1f21, {1, {0x1f29 }}}, + { 0x1f22, {1, {0x1f2a }}}, + { 0x1f23, {1, {0x1f2b }}}, + { 0x1f24, {1, {0x1f2c }}}, + { 0x1f25, {1, {0x1f2d }}}, + { 0x1f26, {1, {0x1f2e }}}, + { 0x1f27, {1, {0x1f2f }}}, + { 0x1f30, {1, {0x1f38 }}}, + { 0x1f31, {1, {0x1f39 }}}, + { 0x1f32, {1, {0x1f3a }}}, + { 0x1f33, {1, {0x1f3b }}}, + { 0x1f34, {1, {0x1f3c }}}, + { 0x1f35, {1, {0x1f3d }}}, + { 0x1f36, {1, {0x1f3e }}}, + { 0x1f37, {1, {0x1f3f }}}, + { 0x1f40, {1, {0x1f48 }}}, + { 0x1f41, {1, {0x1f49 }}}, + { 0x1f42, {1, {0x1f4a }}}, + { 0x1f43, {1, {0x1f4b }}}, + { 0x1f44, {1, {0x1f4c }}}, + { 0x1f45, {1, {0x1f4d }}}, + { 0x1f51, {1, {0x1f59 }}}, + { 0x1f53, {1, {0x1f5b }}}, + { 0x1f55, {1, {0x1f5d }}}, + { 0x1f57, {1, {0x1f5f }}}, + { 0x1f60, {1, {0x1f68 }}}, + { 0x1f61, {1, {0x1f69 }}}, + { 0x1f62, {1, {0x1f6a }}}, + { 0x1f63, {1, {0x1f6b }}}, + { 0x1f64, {1, {0x1f6c }}}, + { 0x1f65, {1, {0x1f6d }}}, + { 0x1f66, {1, {0x1f6e }}}, + { 0x1f67, {1, {0x1f6f }}}, + { 0x1f70, {1, {0x1fba }}}, + { 0x1f71, {1, {0x1fbb }}}, + { 0x1f72, {1, {0x1fc8 }}}, + { 0x1f73, {1, {0x1fc9 }}}, + { 0x1f74, {1, {0x1fca }}}, + { 0x1f75, {1, {0x1fcb }}}, + { 0x1f76, {1, {0x1fda }}}, + { 0x1f77, {1, {0x1fdb }}}, + { 0x1f78, {1, {0x1ff8 }}}, + { 0x1f79, {1, {0x1ff9 }}}, + { 0x1f7a, {1, {0x1fea }}}, + { 0x1f7b, {1, {0x1feb }}}, + { 0x1f7c, {1, {0x1ffa }}}, + { 0x1f7d, {1, {0x1ffb }}}, + { 0x1fb0, {1, {0x1fb8 }}}, + { 0x1fb1, {1, {0x1fb9 }}}, + { 0x1fd0, {1, {0x1fd8 }}}, + { 0x1fd1, {1, {0x1fd9 }}}, + { 0x1fe0, {1, {0x1fe8 }}}, + { 0x1fe1, {1, {0x1fe9 }}}, + { 0x1fe5, {1, {0x1fec }}}, + { 0x2170, {1, {0x2160 }}}, + { 0x2171, {1, {0x2161 }}}, + { 0x2172, {1, {0x2162 }}}, + { 0x2173, {1, {0x2163 }}}, + { 0x2174, {1, {0x2164 }}}, + { 0x2175, {1, {0x2165 }}}, + { 0x2176, {1, {0x2166 }}}, + { 0x2177, {1, {0x2167 }}}, + { 0x2178, {1, {0x2168 }}}, + { 0x2179, {1, {0x2169 }}}, + { 0x217a, {1, {0x216a }}}, + { 0x217b, {1, {0x216b }}}, + { 0x217c, {1, {0x216c }}}, + { 0x217d, {1, {0x216d }}}, + { 0x217e, {1, {0x216e }}}, + { 0x217f, {1, {0x216f }}}, + { 0x24d0, {1, {0x24b6 }}}, + { 0x24d1, {1, {0x24b7 }}}, + { 0x24d2, {1, {0x24b8 }}}, + { 0x24d3, {1, {0x24b9 }}}, + { 0x24d4, {1, {0x24ba }}}, + { 0x24d5, {1, {0x24bb }}}, + { 0x24d6, {1, {0x24bc }}}, + { 0x24d7, {1, {0x24bd }}}, + { 0x24d8, {1, {0x24be }}}, + { 0x24d9, {1, {0x24bf }}}, + { 0x24da, {1, {0x24c0 }}}, + { 0x24db, {1, {0x24c1 }}}, + { 0x24dc, {1, {0x24c2 }}}, + { 0x24dd, {1, {0x24c3 }}}, + { 0x24de, {1, {0x24c4 }}}, + { 0x24df, {1, {0x24c5 }}}, + { 0x24e0, {1, {0x24c6 }}}, + { 0x24e1, {1, {0x24c7 }}}, + { 0x24e2, {1, {0x24c8 }}}, + { 0x24e3, {1, {0x24c9 }}}, + { 0x24e4, {1, {0x24ca }}}, + { 0x24e5, {1, {0x24cb }}}, + { 0x24e6, {1, {0x24cc }}}, + { 0x24e7, {1, {0x24cd }}}, + { 0x24e8, {1, {0x24ce }}}, + { 0x24e9, {1, {0x24cf }}}, + { 0x2c30, {1, {0x2c00 }}}, + { 0x2c31, {1, {0x2c01 }}}, + { 0x2c32, {1, {0x2c02 }}}, + { 0x2c33, {1, {0x2c03 }}}, + { 0x2c34, {1, {0x2c04 }}}, + { 0x2c35, {1, {0x2c05 }}}, + { 0x2c36, {1, {0x2c06 }}}, + { 0x2c37, {1, {0x2c07 }}}, + { 0x2c38, {1, {0x2c08 }}}, + { 0x2c39, {1, {0x2c09 }}}, + { 0x2c3a, {1, {0x2c0a }}}, + { 0x2c3b, {1, {0x2c0b }}}, + { 0x2c3c, {1, {0x2c0c }}}, + { 0x2c3d, {1, {0x2c0d }}}, + { 0x2c3e, {1, {0x2c0e }}}, + { 0x2c3f, {1, {0x2c0f }}}, + { 0x2c40, {1, {0x2c10 }}}, + { 0x2c41, {1, {0x2c11 }}}, + { 0x2c42, {1, {0x2c12 }}}, + { 0x2c43, {1, {0x2c13 }}}, + { 0x2c44, {1, {0x2c14 }}}, + { 0x2c45, {1, {0x2c15 }}}, + { 0x2c46, {1, {0x2c16 }}}, + { 0x2c47, {1, {0x2c17 }}}, + { 0x2c48, {1, {0x2c18 }}}, + { 0x2c49, {1, {0x2c19 }}}, + { 0x2c4a, {1, {0x2c1a }}}, + { 0x2c4b, {1, {0x2c1b }}}, + { 0x2c4c, {1, {0x2c1c }}}, + { 0x2c4d, {1, {0x2c1d }}}, + { 0x2c4e, {1, {0x2c1e }}}, + { 0x2c4f, {1, {0x2c1f }}}, + { 0x2c50, {1, {0x2c20 }}}, + { 0x2c51, {1, {0x2c21 }}}, + { 0x2c52, {1, {0x2c22 }}}, + { 0x2c53, {1, {0x2c23 }}}, + { 0x2c54, {1, {0x2c24 }}}, + { 0x2c55, {1, {0x2c25 }}}, + { 0x2c56, {1, {0x2c26 }}}, + { 0x2c57, {1, {0x2c27 }}}, + { 0x2c58, {1, {0x2c28 }}}, + { 0x2c59, {1, {0x2c29 }}}, + { 0x2c5a, {1, {0x2c2a }}}, + { 0x2c5b, {1, {0x2c2b }}}, + { 0x2c5c, {1, {0x2c2c }}}, + { 0x2c5d, {1, {0x2c2d }}}, + { 0x2c5e, {1, {0x2c2e }}}, + { 0x2c81, {1, {0x2c80 }}}, + { 0x2c83, {1, {0x2c82 }}}, + { 0x2c85, {1, {0x2c84 }}}, + { 0x2c87, {1, {0x2c86 }}}, + { 0x2c89, {1, {0x2c88 }}}, + { 0x2c8b, {1, {0x2c8a }}}, + { 0x2c8d, {1, {0x2c8c }}}, + { 0x2c8f, {1, {0x2c8e }}}, + { 0x2c91, {1, {0x2c90 }}}, + { 0x2c93, {1, {0x2c92 }}}, + { 0x2c95, {1, {0x2c94 }}}, + { 0x2c97, {1, {0x2c96 }}}, + { 0x2c99, {1, {0x2c98 }}}, + { 0x2c9b, {1, {0x2c9a }}}, + { 0x2c9d, {1, {0x2c9c }}}, + { 0x2c9f, {1, {0x2c9e }}}, + { 0x2ca1, {1, {0x2ca0 }}}, + { 0x2ca3, {1, {0x2ca2 }}}, + { 0x2ca5, {1, {0x2ca4 }}}, + { 0x2ca7, {1, {0x2ca6 }}}, + { 0x2ca9, {1, {0x2ca8 }}}, + { 0x2cab, {1, {0x2caa }}}, + { 0x2cad, {1, {0x2cac }}}, + { 0x2caf, {1, {0x2cae }}}, + { 0x2cb1, {1, {0x2cb0 }}}, + { 0x2cb3, {1, {0x2cb2 }}}, + { 0x2cb5, {1, {0x2cb4 }}}, + { 0x2cb7, {1, {0x2cb6 }}}, + { 0x2cb9, {1, {0x2cb8 }}}, + { 0x2cbb, {1, {0x2cba }}}, + { 0x2cbd, {1, {0x2cbc }}}, + { 0x2cbf, {1, {0x2cbe }}}, + { 0x2cc1, {1, {0x2cc0 }}}, + { 0x2cc3, {1, {0x2cc2 }}}, + { 0x2cc5, {1, {0x2cc4 }}}, + { 0x2cc7, {1, {0x2cc6 }}}, + { 0x2cc9, {1, {0x2cc8 }}}, + { 0x2ccb, {1, {0x2cca }}}, + { 0x2ccd, {1, {0x2ccc }}}, + { 0x2ccf, {1, {0x2cce }}}, + { 0x2cd1, {1, {0x2cd0 }}}, + { 0x2cd3, {1, {0x2cd2 }}}, + { 0x2cd5, {1, {0x2cd4 }}}, + { 0x2cd7, {1, {0x2cd6 }}}, + { 0x2cd9, {1, {0x2cd8 }}}, + { 0x2cdb, {1, {0x2cda }}}, + { 0x2cdd, {1, {0x2cdc }}}, + { 0x2cdf, {1, {0x2cde }}}, + { 0x2ce1, {1, {0x2ce0 }}}, + { 0x2ce3, {1, {0x2ce2 }}}, + { 0x2d00, {1, {0x10a0 }}}, + { 0x2d01, {1, {0x10a1 }}}, + { 0x2d02, {1, {0x10a2 }}}, + { 0x2d03, {1, {0x10a3 }}}, + { 0x2d04, {1, {0x10a4 }}}, + { 0x2d05, {1, {0x10a5 }}}, + { 0x2d06, {1, {0x10a6 }}}, + { 0x2d07, {1, {0x10a7 }}}, + { 0x2d08, {1, {0x10a8 }}}, + { 0x2d09, {1, {0x10a9 }}}, + { 0x2d0a, {1, {0x10aa }}}, + { 0x2d0b, {1, {0x10ab }}}, + { 0x2d0c, {1, {0x10ac }}}, + { 0x2d0d, {1, {0x10ad }}}, + { 0x2d0e, {1, {0x10ae }}}, + { 0x2d0f, {1, {0x10af }}}, + { 0x2d10, {1, {0x10b0 }}}, + { 0x2d11, {1, {0x10b1 }}}, + { 0x2d12, {1, {0x10b2 }}}, + { 0x2d13, {1, {0x10b3 }}}, + { 0x2d14, {1, {0x10b4 }}}, + { 0x2d15, {1, {0x10b5 }}}, + { 0x2d16, {1, {0x10b6 }}}, + { 0x2d17, {1, {0x10b7 }}}, + { 0x2d18, {1, {0x10b8 }}}, + { 0x2d19, {1, {0x10b9 }}}, + { 0x2d1a, {1, {0x10ba }}}, + { 0x2d1b, {1, {0x10bb }}}, + { 0x2d1c, {1, {0x10bc }}}, + { 0x2d1d, {1, {0x10bd }}}, + { 0x2d1e, {1, {0x10be }}}, + { 0x2d1f, {1, {0x10bf }}}, + { 0x2d20, {1, {0x10c0 }}}, + { 0x2d21, {1, {0x10c1 }}}, + { 0x2d22, {1, {0x10c2 }}}, + { 0x2d23, {1, {0x10c3 }}}, + { 0x2d24, {1, {0x10c4 }}}, + { 0x2d25, {1, {0x10c5 }}}, + { 0xff41, {1, {0xff21 }}}, + { 0xff42, {1, {0xff22 }}}, + { 0xff43, {1, {0xff23 }}}, + { 0xff44, {1, {0xff24 }}}, + { 0xff45, {1, {0xff25 }}}, + { 0xff46, {1, {0xff26 }}}, + { 0xff47, {1, {0xff27 }}}, + { 0xff48, {1, {0xff28 }}}, + { 0xff49, {1, {0xff29 }}}, + { 0xff4a, {1, {0xff2a }}}, + { 0xff4b, {1, {0xff2b }}}, + { 0xff4c, {1, {0xff2c }}}, + { 0xff4d, {1, {0xff2d }}}, + { 0xff4e, {1, {0xff2e }}}, + { 0xff4f, {1, {0xff2f }}}, + { 0xff50, {1, {0xff30 }}}, + { 0xff51, {1, {0xff31 }}}, + { 0xff52, {1, {0xff32 }}}, + { 0xff53, {1, {0xff33 }}}, + { 0xff54, {1, {0xff34 }}}, + { 0xff55, {1, {0xff35 }}}, + { 0xff56, {1, {0xff36 }}}, + { 0xff57, {1, {0xff37 }}}, + { 0xff58, {1, {0xff38 }}}, + { 0xff59, {1, {0xff39 }}}, + { 0xff5a, {1, {0xff3a }}}, + { 0x10428, {1, {0x10400 }}}, + { 0x10429, {1, {0x10401 }}}, + { 0x1042a, {1, {0x10402 }}}, + { 0x1042b, {1, {0x10403 }}}, + { 0x1042c, {1, {0x10404 }}}, + { 0x1042d, {1, {0x10405 }}}, + { 0x1042e, {1, {0x10406 }}}, + { 0x1042f, {1, {0x10407 }}}, + { 0x10430, {1, {0x10408 }}}, + { 0x10431, {1, {0x10409 }}}, + { 0x10432, {1, {0x1040a }}}, + { 0x10433, {1, {0x1040b }}}, + { 0x10434, {1, {0x1040c }}}, + { 0x10435, {1, {0x1040d }}}, + { 0x10436, {1, {0x1040e }}}, + { 0x10437, {1, {0x1040f }}}, + { 0x10438, {1, {0x10410 }}}, + { 0x10439, {1, {0x10411 }}}, + { 0x1043a, {1, {0x10412 }}}, + { 0x1043b, {1, {0x10413 }}}, + { 0x1043c, {1, {0x10414 }}}, + { 0x1043d, {1, {0x10415 }}}, + { 0x1043e, {1, {0x10416 }}}, + { 0x1043f, {1, {0x10417 }}}, + { 0x10440, {1, {0x10418 }}}, + { 0x10441, {1, {0x10419 }}}, + { 0x10442, {1, {0x1041a }}}, + { 0x10443, {1, {0x1041b }}}, + { 0x10444, {1, {0x1041c }}}, + { 0x10445, {1, {0x1041d }}}, + { 0x10446, {1, {0x1041e }}}, + { 0x10447, {1, {0x1041f }}}, + { 0x10448, {1, {0x10420 }}}, + { 0x10449, {1, {0x10421 }}}, + { 0x1044a, {1, {0x10422 }}}, + { 0x1044b, {1, {0x10423 }}}, + { 0x1044c, {1, {0x10424 }}}, + { 0x1044d, {1, {0x10425 }}}, + { 0x1044e, {1, {0x10426 }}}, + { 0x1044f, {1, {0x10427 }}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = { + { 0x0069, {1, {0x0049 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12[] = { + { {0x0061, 0x02be}, {1, {0x1e9a }}}, + { {0x0066, 0x0066}, {1, {0xfb00 }}}, + { {0x0066, 0x0069}, {1, {0xfb01 }}}, + { {0x0066, 0x006c}, {1, {0xfb02 }}}, + { {0x0068, 0x0331}, {1, {0x1e96 }}}, + { {0x006a, 0x030c}, {1, {0x01f0 }}}, + { {0x0073, 0x0073}, {1, {0x00df }}}, + { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}}, + { {0x0074, 0x0308}, {1, {0x1e97 }}}, + { {0x0077, 0x030a}, {1, {0x1e98 }}}, + { {0x0079, 0x030a}, {1, {0x1e99 }}}, + { {0x02bc, 0x006e}, {1, {0x0149 }}}, + { {0x03ac, 0x03b9}, {1, {0x1fb4 }}}, + { {0x03ae, 0x03b9}, {1, {0x1fc4 }}}, + { {0x03b1, 0x0342}, {1, {0x1fb6 }}}, + { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}}, + { {0x03b7, 0x0342}, {1, {0x1fc6 }}}, + { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}}, + { {0x03b9, 0x0342}, {1, {0x1fd6 }}}, + { {0x03c1, 0x0313}, {1, {0x1fe4 }}}, + { {0x03c5, 0x0313}, {1, {0x1f50 }}}, + { {0x03c5, 0x0342}, {1, {0x1fe6 }}}, + { {0x03c9, 0x0342}, {1, {0x1ff6 }}}, + { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}}, + { {0x03ce, 0x03b9}, {1, {0x1ff4 }}}, + { {0x0565, 0x0582}, {1, {0x0587 }}}, + { {0x0574, 0x0565}, {1, {0xfb14 }}}, + { {0x0574, 0x056b}, {1, {0xfb15 }}}, + { {0x0574, 0x056d}, {1, {0xfb17 }}}, + { {0x0574, 0x0576}, {1, {0xfb13 }}}, + { {0x057e, 0x0576}, {1, {0xfb16 }}}, + { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}}, + { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}}, + { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}}, + { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}}, + { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}}, + { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}}, + { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}}, + { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}}, + { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}}, + { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}}, + { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}}, + { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}}, + { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}}, + { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}}, + { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}}, + { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}}, + { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}}, + { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}}, + { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}}, + { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}}, + { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}}, + { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}}, + { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}}, + { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}}, + { {0x1f70, 0x03b9}, {1, {0x1fb2 }}}, + { {0x1f74, 0x03b9}, {1, {0x1fc2 }}}, + { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = { + { {0x0069, 0x0307}, {1, {0x0130 }}} +}; + +static const CaseUnfold_13_Type CaseUnfold_13[] = { + { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}}, + { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}}, + { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}}, + { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}}, + { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}}, + { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}}, + { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}}, + { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}}, + { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}}, + { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}}, + { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}}, + { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}}, + { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}}, + { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}} +}; + +#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) +#define CODE_RANGES_NUM numberof(CodeRanges) + +extern int +onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if ( +#ifdef USE_UNICODE_PROPERTIES + ctype <= ONIGENC_MAX_STD_CTYPE && +#endif + code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); +} + + +extern int +onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) +{ + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + *ranges = CodeRanges[ctype]; + + return 0; +} + +extern int +onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[], + struct OnigEncodingTypeST* enc ARG_UNUSED) +{ + *sb_out = 0x00; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + +#include "st.h" + +#define PROPERTY_NAME_MAX_SIZE MAX_WORD_LENGTH + +extern int +onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) +{ + int len; + int ctype; + UChar buf[PROPERTY_NAME_MAX_SIZE]; + UChar *p; + OnigCodePoint code; + + p = name; + len = 0; + for (p = name; p < end; p += enclen(enc, p, end)) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code == ' ' || code == '-' || code == '_') + continue; + if (code >= 0x80) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + buf[len++] = (UChar )TOLOWER((unsigned char)code); + if (len >= PROPERTY_NAME_MAX_SIZE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + buf[len] = 0; + + if ((ctype = uniname2ctype(buf, len)) < 0) { + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + return ctype; +} + + +static int +code2_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1]) return 0; + return 1; +} + +static st_index_t +code2_hash(OnigCodePoint* x) +{ + return (st_index_t )(x[0] + x[1]); +} + +static const struct st_hash_type type_code2_hash = { + code2_cmp, + code2_hash, +}; + +static int +code3_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; + return 1; +} + +static st_index_t +code3_hash(OnigCodePoint* x) +{ + return (st_index_t )(x[0] + x[1] + x[2]); +} + +static const struct st_hash_type type_code3_hash = { + code3_cmp, + code3_hash, +}; + + +static st_table* FoldTable; /* fold-1, fold-2, fold-3 */ +static st_table* Unfold1Table; +static st_table* Unfold2Table; +static st_table* Unfold3Table; +static int CaseFoldInited = 0; + +static int init_case_fold_table(void) +{ + const CaseFold_11_Type *p; + const CaseUnfold_11_Type *p1; + const CaseUnfold_12_Type *p2; + const CaseUnfold_13_Type *p3; + int i; + + THREAD_ATOMIC_START; + + FoldTable = st_init_numtable_with_size(1200); + if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; + for (i = 0; i < numberof(CaseFold); i++) { + p = &CaseFold[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + for (i = 0; i < numberof(CaseFold_Locale); i++) { + p = &CaseFold_Locale[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + + Unfold1Table = st_init_numtable_with_size(1000); + if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; + + for (i = 0; i < numberof(CaseUnfold_11); i++) { + p1 = &CaseUnfold_11[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) { + p1 = &CaseUnfold_11_Locale[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + + Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); + if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; + + for (i = 0; i < numberof(CaseUnfold_12); i++) { + p2 = &CaseUnfold_12[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) { + p2 = &CaseUnfold_12_Locale[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + + Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); + if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; + + for (i = 0; i < numberof(CaseUnfold_13); i++) { + p3 = &CaseUnfold_13[i]; + st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); + } + + CaseFoldInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_mbc_case_fold(OnigEncoding enc, + OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, + UChar* fold) +{ + CodePointList3 *to; + OnigCodePoint code; + int i, len, rlen; + const UChar *p = *pp; + + if (CaseFoldInited == 0) init_case_fold_table(); + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p, end); + *pp += len; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold); + } + else if (code == 0x0130) { + return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold); + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold); + } + else + { + rlen = 0; + for (i = 0; i < to->n; i++) { + len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold); + fold += len; + rlen += len; + } + return rlen; + } + } + + for (i = 0; i < len; i++) { + *fold++ = *p++; + } + return len; +} + +extern int +onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + const CaseUnfold_11_Type* p11; + OnigCodePoint code; + int i, j, k, r; + + /* if (CaseFoldInited == 0) init_case_fold_table(); */ + + for (i = 0; i < numberof(CaseUnfold_11); i++) { + p11 = &CaseUnfold_11[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + code = 0x0131; + r = (*f)(0x0049, &code, 1, arg); + if (r != 0) return r; + code = 0x0049; + r = (*f)(0x0131, &code, 1, arg); + if (r != 0) return r; + + code = 0x0130; + r = (*f)(0x0069, &code, 1, arg); + if (r != 0) return r; + code = 0x0069; + r = (*f)(0x0130, &code, 1, arg); + if (r != 0) return r; + } + else { +#endif + for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) { + p11 = &CaseUnfold_11_Locale[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), + 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + for (i = 0; i < numberof(CaseUnfold_12); i++) { + for (j = 0; j < CaseUnfold_12[i].to.n; j++) { + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { +#endif + for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) { + for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + for (i = 0; i < numberof(CaseUnfold_13); i++) { + for (j = 0; j < CaseUnfold_13[i].to.n; j++) { + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_13[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + } + + return 0; +} + +extern int +onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, + OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + int n, i, j, k, len; + OnigCodePoint code, codes[3]; + CodePointList3 *to, *z3; + CodePointList2 *z2; + + if (CaseFoldInited == 0) init_case_fold_table(); + + n = 0; + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p, end); + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0131; + return 1; + } + else if (code == 0x0130) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0069; + return 1; + } + else if (code == 0x0131) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0049; + return 1; + } + else if (code == 0x0069) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0130; + return 1; + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + OnigCodePoint orig_code = code; + + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = to->code[0]; + n++; + + code = to->code[0]; + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + if (to->code[i] != orig_code) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + } + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + OnigCodePoint cs[3][4]; + int fn, ncs[3]; + + for (fn = 0; fn < to->n; fn++) { + cs[fn][0] = to->code[fn]; + if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0], + (void* )&z3) != 0) { + for (i = 0; i < z3->n; i++) { + cs[fn][i+1] = z3->code[i]; + } + ncs[fn] = z3->n + 1; + } + else + ncs[fn] = 1; + } + + if (fn == 2) { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + items[n].byte_len = len; + items[n].code_len = 2; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + n++; + } + } + + if (onig_st_lookup(Unfold2Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + else { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + for (k = 0; k < ncs[2]; k++) { + items[n].byte_len = len; + items[n].code_len = 3; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + items[n].code[2] = cs[2][k]; + n++; + } + } + } + + if (onig_st_lookup(Unfold3Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + + /* multi char folded code is not head of another folded multi char */ + flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */ + } + } + else { + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + p += len; + if (p < end) { + int clen; + + codes[0] = code; + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[1] = to->code[0]; + } + else + codes[1] = code; + + clen = enclen(enc, p, end); + len += clen; + if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + + p += clen; + if (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[2] = to->code[0]; + } + else + codes[2] = code; + + clen = enclen(enc, p, end); + len += clen; + if (onig_st_lookup(Unfold3Table, (st_data_t )codes, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + } + } + + return n; +} +#endif //INCLUDE_ENCODING diff --git a/src/us_ascii.c b/src/us_ascii.c new file mode 100644 index 0000000000..b6e3f50cf2 --- /dev/null +++ b/src/us_ascii.c @@ -0,0 +1,34 @@ +#include "mruby.h" +#ifdef INCLUDE_ENCODING +#include "regenc.h" + +static int +us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc) +{ + if (*p & 0x80) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); +} + +OnigEncodingDefine(us_ascii, US_ASCII) = { + us_ascii_mbc_enc_len, + "US-ASCII",/* name */ + 1, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + onigenc_ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ASCII", "US-ASCII") +ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII") +ENC_ALIAS("646", "US-ASCII") +#endif //INCLUDE_ENCODING diff --git a/src/utf_8.c b/src/utf_8.c new file mode 100644 index 0000000000..9af010d4af --- /dev/null +++ b/src/utf_8.c @@ -0,0 +1,460 @@ +/********************************************************************** + utf_8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mruby.h" +#ifdef INCLUDE_ENCODING +#include "regenc.h" + +#define USE_INVALID_CODE_SCHEME + +#ifdef USE_INVALID_CODE_SCHEME +/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ +#define INVALID_CODE_FE 0xfffffffe +#define INVALID_CODE_FF 0xffffffff +#define VALID_CODE_LIMIT 0x7fffffff +#endif + +#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) + +static const int EncLen_UTF8[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +typedef enum { + FAILURE = -2, + ACCEPT, + S0, S1, S2, S3, + S4, S5, S6, S7 +} state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, + /* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S4 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S5 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S6 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S7 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, +}; +#undef A +#undef F + +static int +mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s; + s = trans[0][firstbyte]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-1); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3); + s = trans[s][*p++]; + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); +} + +static int +is_mbc_newline(const UChar* p, const UChar* end, OnigEncoding enc) +{ + if (p < end) { + if (*p == 0x0a) return 1; + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS +#ifndef USE_CRNL_AS_LINE_TERMINATOR + if (*p == 0x0d) return 1; +#endif + if (p + 1 < end) { + if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; + if (p + 2 < end) { + if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) + && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; + } + } +#endif + } + + return 0; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + int c, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + c = *p++; + if (len > 1) { + len--; + n = c & ((1 << (6 - len)) - 1); + while (len--) { + c = *p++; + n = (n << 6) | (c & ((1 << 6) - 1)); + } + return n; + } + else { +#ifdef USE_INVALID_CODE_SCHEME + if (c > 0xfd) { + return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); + } +#endif + return (OnigCodePoint )c; + } +} + +static int +code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if ((code & 0xffffff80) == 0) return 1; + else if ((code & 0xfffff800) == 0) return 2; + else if ((code & 0xffff0000) == 0) return 3; + else if ((code & 0xffe00000) == 0) return 4; + else if ((code & 0xfc000000) == 0) return 5; + else if ((code & 0x80000000) == 0) return 6; +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) return 1; + else if (code == INVALID_CODE_FF) return 1; +#endif + else + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) +{ +#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) +#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) + + if ((code & 0xffffff80) == 0) { + *buf = (UChar )code; + return 1; + } + else { + UChar *p = buf; + + if ((code & 0xfffff800) == 0) { + *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); + } + else if ((code & 0xffff0000) == 0) { + *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xffe00000) == 0) { + *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xfc000000) == 0) { + *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0x80000000) == 0) { + *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); + *p++ = UTF8_TRAILS(code, 24); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) { + *p = 0xfe; + return 1; + } + else if (code == INVALID_CODE_FF) { + *p = 0xff; + return 1; + } +#endif + else { + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + } + + *p++ = UTF8_TRAIL0(code); + return (int)(p - buf); + } +} + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end, UChar* fold, OnigEncoding enc) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0xc4; + *fold = 0xb1; + (*pp)++; + return 2; + } + } +#endif + + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ + } + else { + return onigenc_unicode_mbc_case_fold(enc, flag, pp, end, fold); + } +} + + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, + const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) +{ + *sb_out = 0x80; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + const UChar *p; + + if (s <= start) return (UChar* )s; + p = s; + + while (!utf8_islead(*p) && p > start) p--; + return (UChar* )p; +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], + OnigEncoding enc) +{ + return onigenc_unicode_get_case_fold_codes_by_str(enc, flag, p, end, items); +} + +OnigEncodingDefine(utf_8, UTF_8) = { + mbc_enc_len, + "UTF-8", /* name */ + 6, /* max byte length */ + 1, /* min byte length */ + is_mbc_newline, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("CP65001", "UTF-8") + +/* + * Name: UTF8-MAC + * Link: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/BPFileSystem.html + * Link: http://developer.apple.com/qa/qa2001/qa1235.html + * Link: http://developer.apple.com/jp/qa/qa2001/qa1235.html + * Link: http://www.gnu.org/software/emacs/NEWS.23.2 + */ +ENC_REPLICATE("UTF8-MAC", "UTF-8") +ENC_ALIAS("UTF-8-MAC", "UTF8-MAC") +ENC_ALIAS("UTF-8-HFS", "UTF8-MAC") /* Emacs 23.2 */ + +#endif //INCLUDE_ENCODING diff --git a/src/variable.c b/src/variable.c new file mode 100644 index 0000000000..0bc1f0de1a --- /dev/null +++ b/src/variable.c @@ -0,0 +1,453 @@ +#include "mruby.h" +#include "mruby/class.h" +#include "ritehash.h" +#include "variable.h" +#include "mruby/string.h" +#include "mruby/range.h" +#include "error.h" +#include "mruby/array.h" + +#ifdef INCLUDE_REGEXP +#include "re.h" +#include "st.h" +#endif + +KHASH_MAP_INIT_INT(iv, mrb_value); + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +static void +mark_tbl(mrb_state *mrb, struct kh_iv *h) +{ + khiter_t k; + + if (!h) return; + for (k = kh_begin(h); k != kh_end(h); k++) + if (kh_exist(h, k)) + mrb_gc_mark_value(mrb, kh_value(h, k)); +} + +void +mrb_gc_mark_gv(mrb_state *mrb) +{ + mark_tbl(mrb, mrb->globals); +} + +void +mrb_gc_free_gv(mrb_state *mrb) +{ + kh_destroy(iv, mrb->globals); +} + +void +mrb_gc_mark_iv(mrb_state *mrb, struct RObject *obj) +{ + mark_tbl(mrb, obj->iv); +} + +size_t +mrb_gc_mark_iv_size(mrb_state *mrb, struct RObject *obj) +{ + khiter_t k; + struct kh_iv *h = obj->iv; + + if (!h) return 0; + return kh_size(h); +} + +void +mrb_gc_free_iv(mrb_state *mrb, struct RObject *obj) +{ + kh_destroy(iv, obj->iv); +} + +mrb_value +mrb_vm_special_get(mrb_state *mrb, mrb_sym i) +{ + return mrb_fixnum_value(0); +} + +void +mrb_vm_special_set(mrb_state *mrb, mrb_sym i, mrb_value v) +{ +} + +static mrb_value +ivget(mrb_state *mrb, struct kh_iv *h, mrb_sym sym) +{ + khiter_t k; + + k = kh_get(iv, h, sym); + if (k != kh_end(h)) + return kh_value(h, k); + return mrb_nil_value(); +} + +mrb_value +mrb_obj_iv_get(mrb_state *mrb, struct RObject *obj, mrb_sym sym) +{ + if (!obj->iv) { + return mrb_nil_value(); + } + return ivget(mrb, obj->iv, sym); +} + +mrb_value +mrb_iv_get(mrb_state *mrb, mrb_value obj, mrb_sym sym) +{ + return mrb_obj_iv_get(mrb, mrb_obj_ptr(obj), sym); +} + +static void +ivset(mrb_state *mrb, struct kh_iv *h, mrb_sym sym, mrb_value v) +{ + khiter_t k; + int r; + + k = kh_put(iv, h, sym, &r); + kh_value(h, k) = v; +} + +void +mrb_obj_iv_set(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) +{ + khash_t(iv) *h; + + if (!obj->iv) { + h = obj->iv = kh_init(iv, mrb); + } + else { + h = obj->iv; + } + mrb_write_barrier(mrb, (struct RBasic*)obj); + ivset(mrb, h, sym, v); +} + +void +mrb_iv_set(mrb_state *mrb, mrb_value obj, mrb_sym sym, mrb_value v) /* mrb_ivar_set */ +{ + mrb_obj_iv_set(mrb, mrb_obj_ptr(obj), sym, v); +} + +mrb_value +mrb_vm_iv_get(mrb_state *mrb, mrb_sym sym) +{ + /* get self */ + return mrb_iv_get(mrb, mrb->stack[0], sym); +} + +void +mrb_vm_iv_set(mrb_state *mrb, mrb_sym sym, mrb_value v) +{ + /* get self */ + mrb_iv_set(mrb, mrb->stack[0], sym, v); +} + +mrb_value +mrb_vm_cv_get(mrb_state *mrb, mrb_sym sym) +{ + struct RClass *c = mrb->ci->target_class; + + while (c) { + if (c->iv) { + khash_t(iv) *h = c->iv; + khiter_t k = kh_get(iv, h, sym); + + if (k != kh_end(h)) + return kh_value(h, k); + } + c = c->super; + } + return mrb_nil_value(); +} + +void +mrb_vm_cv_set(mrb_state *mrb, mrb_sym sym, mrb_value v) +{ + struct RClass *c = mrb->ci->target_class; + khash_t(iv) *h; + khiter_t k; + int r; + + while (c) { + if (c->iv) { + h = c->iv; + k = kh_get(iv, h, sym); + if (k != kh_end(h)) { + k = kh_put(iv, h, sym, &r); + kh_value(h, k) = v; + } + } + c = c->super; + } + c = mrb->ci->target_class; + h = c->iv = kh_init(iv, mrb); + k = kh_put(iv, h, sym, &r); + kh_value(h, k) = v; +} + +int +mrb_const_defined(mrb_state *mrb, mrb_value mod, mrb_sym sym) +{ + khiter_t k; + struct RClass *m = mrb_class_ptr(mod); + struct kh_iv *h = m->iv; + + if (!h) return 0; + k = kh_get(iv, h, sym); + if (k != kh_end(h)) + return 1; + return 0; +} + +static void +mod_const_check(mrb_state *mrb, mrb_value mod) +{ + switch (mod.tt) { + case MRB_TT_CLASS: + case MRB_TT_MODULE: + break; + default: + mrb_raise(mrb, E_TYPE_ERROR, "constant look-up for non class/module"); + break; + } +} + +static mrb_value +const_get(mrb_state *mrb, struct RClass *base, mrb_sym sym) +{ + struct RClass *c = base; + khash_t(iv) *h; + khiter_t k; + + if (c->iv) { + h = c->iv; + k = kh_get(iv, h, sym); + if (k != kh_end(h)) { + return kh_value(h, k); + } + } + for (;;) { + c = mrb_class_outer_module(mrb, c); + if (!c) break; + if (c->iv) { + h = c->iv; + k = kh_get(iv, h, sym); + if (k != kh_end(h)) { + return kh_value(h, k); + } + } + } + c = base->super; + while (c) { + if (c->iv) { + h = c->iv; + k = kh_get(iv, h, sym); + if (k != kh_end(h)) { + return kh_value(h, k); + } + } + c = c->super; + } + mrb_raise(mrb, E_NAME_ERROR, "uninitialized constant %s", + mrb_sym2name(mrb, sym)); + /* not reached */ + return mrb_nil_value(); +} + +mrb_value +mrb_const_get(mrb_state *mrb, mrb_value mod, mrb_sym sym) +{ + mod_const_check(mrb, mod); + return const_get(mrb, mrb_class_ptr(mod), sym); +} + +mrb_value +mrb_vm_const_get(mrb_state *mrb, mrb_sym sym) +{ + return const_get(mrb, mrb->ci->target_class, sym); +} + +void +mrb_const_set(mrb_state *mrb, mrb_value mod, mrb_sym sym, mrb_value v) +{ + mod_const_check(mrb, mod); + mrb_iv_set(mrb, mod, sym, v); +} + +void +mrb_vm_const_set(mrb_state *mrb, mrb_sym sym, mrb_value v) +{ + mrb_obj_iv_set(mrb, (struct RObject*)mrb->ci->target_class, sym, v); +} + +void +mrb_define_const(mrb_state *mrb, struct RClass *mod, const char *name, mrb_value v) +{ + mrb_obj_iv_set(mrb, (struct RObject*)mod, mrb_intern(mrb, name), v); +} + +void +mrb_define_global_const(mrb_state *mrb, const char *name, mrb_value val) +{ + mrb_define_const(mrb, mrb->object_class, name, val); +} + +mrb_value +mrb_gv_get(mrb_state *mrb, mrb_sym sym) +{ + if (!mrb->globals) { + return mrb_nil_value(); + } + return ivget(mrb, mrb->globals, sym); +} + +void +mrb_gv_set(mrb_state *mrb, mrb_sym sym, mrb_value v) +{ + khash_t(iv) *h; + + if (!mrb->globals) { + h = mrb->globals = kh_init(iv, mrb); + } + else { + h = mrb->globals; + } + ivset(mrb, h, sym, v); +} + +/* 15.3.1.2.4 */ +/* 15.3.1.3.14 */ +/* + * call-seq: + * global_variables -> array + * + * Returns an array of the names of global variables. + * + * global_variables.grep /std/ #=> [:$stdin, :$stdout, :$stderr] + */ +mrb_value +mrb_f_global_variables(mrb_state *mrb, mrb_value self) +{ + char buf[3]; + int i; + struct kh_iv *h = mrb->globals; + mrb_value ary = mrb_ary_new(mrb); + + for (i=0;i< kh_end(h);i++) { + if (kh_exist(h, i)) { + mrb_ary_push(mrb, ary, mrb_symbol_value(kh_key(h,i))); + } + } + buf[0] = '$'; + buf[2] = 0; + for (i = 1; i <= 9; ++i) { + buf[1] = (char)(i + '0'); + mrb_ary_push(mrb, ary, mrb_symbol_value(mrb_intern(mrb, buf))); + } + return ary; +} + +int +mrb_st_lookup(struct kh_iv *table, mrb_sym id, khiter_t *value) +{ + khash_t(iv) *h; + khiter_t k; + + if (table) { + h = (khash_t(iv) *)table; + k = kh_get(iv, h, id); + if (k != kh_end(h)) { + if (value != 0) *value = k;//kh_value(h, k); + return 1;/* TRUE */ + } + return 0;/* FALSE */ + } + else { + return 0;/* FALSE */ + } +} + +int +kiv_lookup(khash_t(iv)* table, mrb_sym key, mrb_value *value) +{ + khash_t(iv) *h=table; + khiter_t k; + + // you must check(iv==0), before you call this function. + //if (!obj->iv) { + // return 0; + //} + k = kh_get(iv, h, key); + if (k != kh_end(h)) { + *value = kh_value(h, k); + return 1; + } + return 0; +} + +static int +mrb_const_defined_0(mrb_state *mrb, struct RClass *klass, mrb_sym id, int exclude, int recurse) +{ + mrb_value value; + struct RClass * tmp; + int mod_retry = 0; + + tmp = klass; +retry: + while (tmp) { + if (tmp->iv && kiv_lookup(tmp->iv, id, &value)) { + return (int)1/*Qtrue*/; + } + if (!recurse && (klass != mrb->object_class)) break; + tmp = tmp->super; + } + if (!exclude && !mod_retry && (klass->tt == MRB_TT_MODULE)) { + mod_retry = 1; + tmp = mrb->object_class; + goto retry; + } + return (int)0/*Qfalse*/; +} + +int +mrb_const_defined_at(mrb_state *mrb, struct RClass *klass, mrb_sym id) +{ + return mrb_const_defined_0(mrb, klass, id, TRUE, FALSE); +} + +struct RClass * +mrb_class_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym id) +{ + mrb_value c = const_get(mrb, klass, id); + return mrb_class_ptr(c); +} + +struct RClass * +mrb_class_get(mrb_state *mrb, char *name) +{ + return mrb_class_from_sym(mrb, mrb->object_class, mrb_intern(mrb, name)); +} + +mrb_value +mrb_attr_get(mrb_state *mrb, mrb_value obj, mrb_sym id) +{ + //return ivar_get(obj, id, FALSE); + return mrb_iv_get(mrb, obj, id); +} + +struct RClass * +mrb_class_obj_get(mrb_state *mrb, char *name) +{ + mrb_value mod = mrb_obj_value(mrb->object_class); + mrb_sym sym = mrb_intern(mrb, name); + + return mrb_class_ptr(mrb_const_get(mrb, mod, sym)); +} + diff --git a/src/variable.h b/src/variable.h new file mode 100644 index 0000000000..494099c2c5 --- /dev/null +++ b/src/variable.h @@ -0,0 +1,42 @@ +#ifndef MRUBY_VARIABLE_H +#define MRUBY_VARIABLE_H + +typedef struct global_variable { + int counter; + mrb_value *data; + mrb_value (*getter)(); + void (*setter)(); + //void (*marker)(); + //int block_trace; + //struct trace_var *trace; +} global_variable; +struct global_entry { + global_variable *var; + mrb_sym id; +}; + +mrb_value mrb_vm_special_get(mrb_state*, mrb_sym); +void mrb_vm_special_set(mrb_state*, mrb_sym, mrb_value); +mrb_value mrb_vm_iv_get(mrb_state*, mrb_sym); +void mrb_vm_iv_set(mrb_state*, mrb_sym, mrb_value); +mrb_value mrb_vm_cv_get(mrb_state*, mrb_sym); +void mrb_vm_cv_set(mrb_state*, mrb_sym, mrb_value); +mrb_value mrb_vm_const_get(mrb_state*, mrb_sym); +void mrb_vm_const_set(mrb_state*, mrb_sym, mrb_value); +mrb_value mrb_const_get(mrb_state*, mrb_value, mrb_sym); +void mrb_const_set(mrb_state*, mrb_value, mrb_sym, mrb_value); +int mrb_const_defined(mrb_state*, mrb_value, mrb_sym); + +mrb_value mrb_obj_iv_get(mrb_state*, struct RObject*, mrb_sym); +void mrb_obj_iv_set(mrb_state*, struct RObject*, mrb_sym, mrb_value); +const char * mrb_class2name(mrb_state *mrb, struct RClass* klass); +void mrb_define_variable(mrb_state *mrb, const char *name, mrb_value *var); +mrb_value mrb_iv_get(mrb_state *mrb, mrb_value obj, mrb_sym sym); +void mrb_iv_set(mrb_state *mrb, mrb_value obj, mrb_sym sym, mrb_value v); /* mrb_iv_set */ +void mrb_copy_generic_ivar(mrb_value clone, mrb_value obj); +int mrb_const_defined_at(mrb_state *mrb, struct RClass *klass, mrb_sym id); +mrb_value mrb_f_global_variables(mrb_state *mrb, mrb_value self); +mrb_value mrb_gv_get(mrb_state *mrb, mrb_sym sym); +void mrb_gv_set(mrb_state *mrb, mrb_sym sym, mrb_value val); + +#endif /* MRUBY_VARIABLE_H */ diff --git a/src/version.c b/src/version.c new file mode 100644 index 0000000000..d69c6941c1 --- /dev/null +++ b/src/version.c @@ -0,0 +1,87 @@ +/********************************************************************** + + version.c - + + $Author: knu $ + $Date: 2008-05-31 22:37:06 +0900 (Sat, 31 May 2008) $ + created at: Thu Sep 30 20:08:01 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "mruby.h" +#include "version.h" +#include +#include "mruby/string.h" +#include "variable.h" + +#define PRINT(type) puts(ruby_##type) +//#define MKSTR(type) mrb_obj_freeze(mrb_str_new(ruby_##type, sizeof(ruby_##type)-1)) +#define MKSTR(type) mrb_str_new(mrb, ruby_##type, sizeof(ruby_##type)-1) + +const char ruby_version[] = RUBY_VERSION; +const char ruby_release_date[] = RUBY_RELEASE_DATE; +const char ruby_platform[] = RUBY_PLATFORM; +const int ruby_patchlevel = RUBY_PATCHLEVEL; +const char ruby_engine[] = RUBY_ENGINE; + +void +Init_version(mrb_state *mrb) +{ + char description[128]; + char copyright[128]; + mrb_value v = MKSTR(version); + mrb_value d = MKSTR(release_date); + mrb_value p = MKSTR(platform); + mrb_value e = MKSTR(engine); + mrb_value tmp; + + mrb_define_global_const(mrb, "RUBY_VERSION", v); + mrb_define_global_const(mrb, "RUBY_RELEASE_DATE", d); + mrb_define_global_const(mrb, "RUBY_PLATFORM", p); + mrb_define_global_const(mrb, "RUBY_PATCHLEVEL", mrb_fixnum_value(RUBY_PATCHLEVEL)); + mrb_define_global_const(mrb, "RUBY_ENGINE", e); + + snprintf(description, sizeof(description), "ruby %s (%s %s %d) [%s]", + RUBY_VERSION, RUBY_RELEASE_DATE, RUBY_RELEASE_STR, + RUBY_RELEASE_NUM, RUBY_PLATFORM); + //tmp = mrb_obj_freeze(mrb_str_new2(description)); + tmp = mrb_str_new2(mrb, description); + mrb_define_global_const(mrb, "RUBY_DESCRIPTION", tmp); + + snprintf(copyright, sizeof(copyright), "ruby - Copyright (C) %d-%d %s", + RUBY_BIRTH_YEAR, RUBY_RELEASE_YEAR, RUBY_AUTHOR); + //tmp = mrb_obj_freeze(mrb_str_new2(copyright)); + tmp = mrb_str_new2(mrb, copyright); + mrb_define_global_const(mrb, "RUBY_COPYRIGHT", tmp); + + /* obsolete constants */ + mrb_define_global_const(mrb, "VERSION", v); + mrb_define_global_const(mrb, "RELEASE_DATE", d); + mrb_define_global_const(mrb, "PLATFORM", p); +} + +void +ruby_show_version(mrb_state *mrb) +{ + mrb_value v = mrb_const_get(mrb, mrb_obj_value(mrb->object_class), mrb_intern(mrb, "RUBY_DESCRIPTION")); + + if (mrb_type(v) != MRB_TT_STRING) + return; + + puts(RSTRING_PTR(v)); + fflush(stdout); +} + +void +ruby_show_copyright(mrb_state *mrb) +{ + mrb_value v = mrb_const_get(mrb, mrb_obj_value(mrb->object_class), mrb_intern(mrb, "RUBY_COPYRIGHT")); + + if (mrb_type(v) != MRB_TT_STRING) + return; + + puts(RSTRING_PTR(v)); + exit(0); +} diff --git a/src/version.h b/src/version.h new file mode 100644 index 0000000000..e132efdf4e --- /dev/null +++ b/src/version.h @@ -0,0 +1,32 @@ +#define RUBY_VERSION "1.8.7" +#define RUBY_RELEASE_DATE "2010-08-16" +#define RUBY_VERSION_CODE 187 +#define RUBY_RELEASE_CODE 20100816 +#define RUBY_PATCHLEVEL 302 + +#define RUBY_VERSION_MAJOR 1 +#define RUBY_VERSION_MINOR 8 +#define RUBY_VERSION_TEENY 7 +#define RUBY_RELEASE_YEAR 2010 +#define RUBY_RELEASE_MONTH 8 +#define RUBY_RELEASE_DAY 16 + +#ifdef RUBY_EXTERN +RUBY_EXTERN const char ruby_version[]; +RUBY_EXTERN const char ruby_release_date[]; +RUBY_EXTERN const char ruby_platform[]; +RUBY_EXTERN const int ruby_patchlevel; +RUBY_EXTERN const char *ruby_description; +RUBY_EXTERN const char *ruby_copyright; +#endif + +#define RUBY_AUTHOR "Yukihiro Matsumoto" +#define RUBY_BIRTH_YEAR 1993 +#define RUBY_BIRTH_MONTH 2 +#define RUBY_BIRTH_DAY 24 + +#define RUBY_RELEASE_STR "patchlevel" +#define RUBY_RELEASE_NUM RUBY_PATCHLEVEL + +#define RUBY_PLATFORM "i386-mingw32" +#define RUBY_ENGINE "ruby" diff --git a/src/vm.c b/src/vm.c new file mode 100644 index 0000000000..6983fd86f1 --- /dev/null +++ b/src/vm.c @@ -0,0 +1,1544 @@ +#include "mruby.h" +#include "opcode.h" +#include "irep.h" +#include "variable.h" +#include "mruby/proc.h" +#include "mruby/array.h" +#include "mruby/string.h" +#include "mruby/hash.h" +#include "mruby/range.h" +#include "mruby/class.h" +#include "mruby/numeric.h" +#include "error.h" + +#include +#include +#include + +#define STACK_INIT_SIZE 128 +#define CALLINFO_INIT_SIZE 32 + +static void +stack_init(mrb_state *mrb) +{ + /* assert(mrb->stack == NULL); */ + mrb->stbase = mrb_malloc(mrb, sizeof(mrb_value) * STACK_INIT_SIZE); + memset(mrb->stbase, 0, sizeof(mrb_value) * STACK_INIT_SIZE); + mrb->stend = mrb->stbase + STACK_INIT_SIZE; + mrb->stack = mrb->stbase; + + /* assert(mrb->ci == NULL); */ + mrb->cibase = mrb_malloc(mrb, sizeof(mrb_callinfo)*CALLINFO_INIT_SIZE); + mrb->ciend = mrb->cibase + CALLINFO_INIT_SIZE; + mrb->ci = mrb->cibase; + memset(mrb->ci, 0, sizeof(mrb_callinfo)); + mrb->ci->target_class = mrb->object_class; +} + +static void +stack_extend(mrb_state *mrb, int room, int keep) +{ + size_t size, off; + + if (mrb->stack + room > mrb->stend) { + size = mrb->stend - mrb->stbase; + off = mrb->stack - mrb->stbase; + + if (room <= size) /* double size is enough? */ + size *= 2; + else + size += room; + mrb->stbase = mrb_realloc(mrb, mrb->stbase, sizeof(mrb_value) * size); + mrb->stack = mrb->stbase + off; + mrb->stend = mrb->stbase + size; + } + if (room > keep) { + memset(mrb->stack+keep, 0, sizeof(mrb_value) * (room-keep)); + } +} + +int +mrb_checkstack(mrb_state *mrb, int size) +{ + stack_extend(mrb, size+1, 1); + return 0; +} + +struct REnv* +uvenv(mrb_state *mrb, int up) +{ + struct REnv *e = mrb->ci->proc->env; + + if (!e) return 0; + while (up--) { + e = (struct REnv*)e->c; + } + return e; +} + +static mrb_value +uvget(mrb_state *mrb, int up, int idx) +{ + struct REnv *e = uvenv(mrb, up); + + if (!e) return mrb_nil_value(); + return e->stack[idx]; +} + +static void +uvset(mrb_state *mrb, int up, int idx, mrb_value v) +{ + struct REnv *e = uvenv(mrb, up); + + if (!e) return; + e->stack[idx] = v; + mrb_write_barrier(mrb, (struct RBasic*)e); +} + +static mrb_callinfo* +cipush(mrb_state *mrb) +{ + size_t nregs = mrb->ci->nregs; + int eidx = mrb->ci->eidx; + int ridx = mrb->ci->ridx; + + if (mrb->ci + 1 == mrb->ciend) { + size_t size = mrb->ci - mrb->cibase; + + mrb->cibase = mrb_realloc(mrb, mrb->cibase, sizeof(mrb_callinfo)*size*2); + mrb->ci = mrb->cibase + size; + mrb->ciend = mrb->cibase + size * 2; + } + mrb->ci++; + mrb->ci->nregs = nregs; + mrb->ci->eidx = eidx; + mrb->ci->ridx = ridx; + mrb->ci->env = 0; + return mrb->ci; +} + +static void +cipop(mrb_state *mrb) +{ + mrb->ci--; +} + +static void +ecall(mrb_state *mrb, int i) +{ + struct RProc *p; + mrb_callinfo *ci; + mrb_value *self = mrb->stack; + + p = mrb->ensure[i]; + ci = cipush(mrb); + ci->stackidx = mrb->stack - mrb->stbase; + ci->mid = ci[-1].mid; + ci->acc = -1; + ci->argc = 0; + ci->proc = p; + ci->nregs = p->body.irep->nregs; + ci->target_class = p->target_class; + mrb->stack = mrb->stack + ci[-1].nregs; + mrb_run(mrb, p, *self); +} + +mrb_value +mrb_funcall_with_block(mrb_state *mrb, mrb_value self, const char *name, int argc, mrb_value *argv, struct RProc *blk) +{ + struct RProc *p; + struct RClass *c; + mrb_sym mid = mrb_intern(mrb, name); + mrb_sym undef = 0; + mrb_callinfo *ci; + int n = mrb->ci->nregs; + mrb_value val; + + c = mrb_class(mrb, self); + p = mrb_method_search_vm(mrb, &c, mid); + if (!p) { + undef = mid; + mid = mrb_intern(mrb, "method_missing"); + p = mrb_method_search_vm(mrb, &c, mid); + n++; argc++; + } + ci = cipush(mrb); + ci->mid = mid; + ci->proc = p; + ci->stackidx = mrb->stack - mrb->stbase; + ci->argc = argc; + ci->target_class = p->target_class; + ci->nregs = argc + 2; + ci->acc = -1; + mrb->stack = mrb->stack + n; + + stack_extend(mrb, ci->nregs, 0); + mrb->stack[0] = self; + if (undef) { + mrb->stack[1] = mrb_symbol_value(undef); + memcpy(mrb->stack+2, argv, sizeof(mrb_value)*(argc-1)); + } + else if (argc > 0) { + memcpy(mrb->stack+1, argv, sizeof(mrb_value)*argc); + } + if (!blk) { + mrb->stack[argc+1] = mrb_nil_value(); + } + else { + mrb->stack[argc+1] = mrb_obj_value(blk); + } + + if (MRB_PROC_CFUNC_P(p)) { + val = p->body.func(mrb, self); + mrb->stack = mrb->stbase + ci->stackidx; + cipop(mrb); + } + else { + val = mrb_run(mrb, p, self); + } + return val; +} + +mrb_value +mrb_funcall_argv(mrb_state *mrb, mrb_value self, const char *name, int argc, mrb_value *argv) +{ + return mrb_funcall_with_block(mrb, self, name, argc, argv, 0); +} + +mrb_value +mrb_yield_with_self(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv, mrb_value self) +{ + struct RProc *p; + mrb_sym mid = mrb->ci->mid; + mrb_callinfo *ci; + int n = mrb->ci->nregs; + mrb_value val; + + p = mrb_proc_ptr(b); + ci = cipush(mrb); + ci->mid = mid; + ci->proc = p; + ci->stackidx = mrb->stack - mrb->stbase; + ci->argc = argc; + ci->target_class = p->target_class; + ci->nregs = argc + 2; + ci->acc = -1; + mrb->stack = mrb->stack + n; + + stack_extend(mrb, ci->nregs, 0); + mrb->stack[0] = self; + if (argc > 0) { + memcpy(mrb->stack+1, argv, sizeof(mrb_value)*argc); + } + mrb->stack[argc+1] = mrb_nil_value(); + + if (MRB_PROC_CFUNC_P(p)) { + val = p->body.func(mrb, self); + mrb->stack = mrb->stbase + ci->stackidx; + cipop(mrb); + } + else { + val = mrb_run(mrb, p, self); + } + return val; +} + +mrb_value +mrb_yield_argv(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv) +{ + return mrb_yield_with_self(mrb, b, argc, argv, mrb->stack[0]); +} + +mrb_value +mrb_yield(mrb_state *mrb, mrb_value b, mrb_value v) +{ + return mrb_yield_with_self(mrb, b, 1, &v, mrb->stack[0]); +} + +void +localjump_error(mrb_state *mrb, const char *kind) +{ + char buf[256]; + mrb_value exc; + + snprintf(buf, 256, "unexpected %s", kind); + exc = mrb_exc_new(mrb, E_LOCALJUMP_ERROR, buf, sizeof(buf)); + mrb->exc = mrb_object(exc); +} + +#define SET_TRUE_VALUE(r) {\ + (r).tt = MRB_TT_TRUE;\ + (r).value.i = 1;\ +} + +#define SET_FALSE_VALUE(r) {\ + (r).tt = MRB_TT_FALSE;\ + (r).value.i = 1;\ +} + +#define SET_NIL_VALUE(r) { \ + (r).tt = MRB_TT_FALSE;\ + (r).value.p = 0;\ +} + +#define SET_INT_VALUE(r,n) {\ + (r).tt = MRB_TT_FIXNUM;\ + (r).value.i = (n);\ +} + +#define SET_FLOAT_VALUE(r,v) {\ + (r).tt = MRB_TT_FLOAT;\ + (r).value.f = (v);\ +} + +#define SET_SYM_VALUE(r,v) {\ + (r).tt = MRB_TT_SYMBOL;\ + (r).value.i = (v);\ +} + +#define SET_OBJ_VALUE(r,v) {\ + (r).tt = (((struct RObject*)(v))->tt);\ + (r).value.p = (void*)(v);\ +} + +#define DIRECT_THREADED +#ifndef DIRECT_THREADED + +#define INIT_DISPACTH for (;;) { i = *pc; switch (GET_OPCODE(i)) { +#define CASE(op) case op: +#define NEXT mrb->arena_idx = ai; pc++; break +#define JUMP break +#define END_DISPACTH }} + +#else + +#define INIT_DISPACTH JUMP; return mrb_nil_value(); +#define CASE(op) L_ ## op: +#define NEXT mrb->arena_idx = ai; i=*++pc; goto *optable[GET_OPCODE(i)] +#define JUMP i=*pc; goto *optable[GET_OPCODE(i)] + +#define END_DISPACTH + +#endif + +mrb_value mrb_gv_val_get(mrb_state *mrb, mrb_sym sym); +void mrb_gv_val_set(mrb_state *mrb, mrb_sym sym, mrb_value val); + +#define CALL_MAXARGS 127 + +mrb_value +mrb_run(mrb_state *mrb, struct RProc *proc, mrb_value self) +{ + /* assert(mrb_proc_cfunc_p(proc)) */ + mrb_irep *irep = proc->body.irep; + mrb_code *pc = irep->iseq; + mrb_value *pool = irep->pool; + mrb_sym *syms = irep->syms; + mrb_value *regs; + mrb_code i; + int ai = mrb->arena_idx; + jmp_buf c_jmp; + jmp_buf *prev_jmp; + +#ifdef DIRECT_THREADED + static void *optable[] = { + &&L_OP_NOP, &&L_OP_MOVE, + &&L_OP_LOADL, &&L_OP_LOADI, &&L_OP_LOADSYM, &&L_OP_LOADNIL, + &&L_OP_LOADSELF, &&L_OP_LOADT, &&L_OP_LOADF, + &&L_OP_GETGLOBAL, &&L_OP_SETGLOBAL, &&L_OP_GETSPECIAL, &&L_OP_SETSPECIAL, + &&L_OP_GETIV, &&L_OP_SETIV, &&L_OP_GETCV, &&L_OP_SETCV, + &&L_OP_GETCONST, &&L_OP_SETCONST, &&L_OP_GETMCNST, &&L_OP_SETMCNST, + &&L_OP_GETUPVAR, &&L_OP_SETUPVAR, + &&L_OP_JMP, &&L_OP_JMPIF, &&L_OP_JMPNOT, + &&L_OP_ONERR, &&L_OP_RESCUE, &&L_OP_POPERR, &&L_OP_RAISE, &&L_OP_EPUSH, &&L_OP_EPOP, + &&L_OP_SEND, &&L_OP_FSEND, &&L_OP_VSEND, + &&L_OP_CALL, &&L_OP_SUPER, &&L_OP_ARGARY, &&L_OP_ENTER, + &&L_OP_KARG, &&L_OP_KDICT, &&L_OP_RETURN, &&L_OP_TAILCALL, &&L_OP_BLKPUSH, + &&L_OP_ADD, &&L_OP_ADDI, &&L_OP_SUB, &&L_OP_SUBI, &&L_OP_MUL, &&L_OP_DIV, + &&L_OP_EQ, &&L_OP_LT, &&L_OP_LE, &&L_OP_GT, &&L_OP_GE, + &&L_OP_ARRAY, &&L_OP_ARYCAT, &&L_OP_ARYPUSH, &&L_OP_AREF, &&L_OP_ASET, &&L_OP_APOST, + &&L_OP_STRING, &&L_OP_STRCAT, &&L_OP_HASH, + &&L_OP_LAMBDA, &&L_OP_RANGE, &&L_OP_OCLASS, + &&L_OP_CLASS, &&L_OP_MODULE, &&L_OP_EXEC, + &&L_OP_METHOD, &&L_OP_SCLASS, &&L_OP_TCLASS, + &&L_OP_DEBUG, &&L_OP_STOP, &&L_OP_ERR, + }; +#endif + + + if (setjmp(c_jmp) == 0) { + prev_jmp = mrb->jmp; + mrb->jmp = &c_jmp; + } + else { + goto L_RAISE; + } + if (!mrb->stack) { + stack_init(mrb); + } + mrb->ci->proc = proc; + mrb->ci->nregs = irep->nregs + 2; + regs = mrb->stack; + + INIT_DISPACTH { + CASE(OP_NOP) { + /* do nothing */ + NEXT; + } + + CASE(OP_MOVE) { + /* A B R(A) := R(B) */ +#if 0 + regs[GETARG_A(i)] = regs[GETARG_B(i)]; +#elif 1 + int a = GETARG_A(i); + int b = GETARG_B(i); + + regs[a].tt = regs[b].tt; + regs[a].value = regs[b].value; +#else + memcpy(regs+GETARG_A(i), regs+GETARG_B(i), sizeof(mrb_value)); +#endif + NEXT; + } + + CASE(OP_LOADL) { + /* A Bx R(A) := Pool(Bx) */ + regs[GETARG_A(i)] = pool[GETARG_Bx(i)]; + NEXT; + } + + CASE(OP_LOADI) { + /* A Bx R(A) := sBx */ + SET_INT_VALUE(regs[GETARG_A(i)], GETARG_sBx(i)); + NEXT; + } + + CASE(OP_LOADSYM) { + /* A B R(A) := Sym(B) */ + SET_SYM_VALUE(regs[GETARG_A(i)], syms[GETARG_Bx(i)]); + NEXT; + } + + CASE(OP_LOADNIL) { + /* A B R(A) := nil */ + int a = GETARG_A(i); + + SET_NIL_VALUE(regs[a]); + NEXT; + } + + CASE(OP_LOADSELF) { + /* A R(A) := self */ + regs[GETARG_A(i)] = mrb->stack[0]; + NEXT; + } + + CASE(OP_LOADT) { + /* A R(A) := true */ + regs[GETARG_A(i)] = mrb_true_value(); + NEXT; + } + + CASE(OP_LOADF) { + /* A R(A) := false */ + regs[GETARG_A(i)] = mrb_false_value(); + NEXT; + } + + CASE(OP_GETGLOBAL) { + /* A B R(A) := getglobal(Sym(B)) */ + regs[GETARG_A(i)] = mrb_gv_get(mrb, syms[GETARG_Bx(i)]); + NEXT; + } + + CASE(OP_SETGLOBAL) { + /* setglobal(Sym(b), R(A)) */ + mrb_gv_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]); + NEXT; + } + + CASE(OP_GETSPECIAL) { + /* A Bx R(A) := Special[Bx] */ + regs[GETARG_A(i)] = mrb_vm_special_get(mrb, GETARG_Bx(i)); + NEXT; + } + + CASE(OP_SETSPECIAL) { + /* A Bx Special[Bx] := R(A) */ + mrb_vm_special_set(mrb, GETARG_Bx(i), regs[GETARG_A(i)]); + NEXT; + } + + CASE(OP_GETIV) { + /* A Bx R(A) := ivget(Bx) */ + regs[GETARG_A(i)] = mrb_vm_iv_get(mrb, syms[GETARG_Bx(i)]); + NEXT; + } + + CASE(OP_SETIV) { + /* ivset(Sym(B),R(A)) */ + mrb_vm_iv_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]); + NEXT; + } + + CASE(OP_GETCV) { + /* A B R(A) := ivget(Sym(B)) */ + regs[GETARG_A(i)] = mrb_vm_cv_get(mrb, syms[GETARG_Bx(i)]); + NEXT; + } + + CASE(OP_SETCV) { + /* ivset(Sym(B),R(A)) */ + mrb_vm_cv_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]); + NEXT; + } + + CASE(OP_GETCONST) { + /* A B R(A) := constget(Sym(B)) */ + regs[GETARG_A(i)] = mrb_vm_const_get(mrb, syms[GETARG_Bx(i)]); + NEXT; + } + + CASE(OP_SETCONST) { + /* A B constset(Sym(B),R(A)) */ + mrb_vm_const_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]); + NEXT; + } + + CASE(OP_GETMCNST) { + /* A B C R(A) := R(C)::Sym(B) */ + int a = GETARG_A(i); + + regs[a] = mrb_const_get(mrb, regs[a], syms[GETARG_Bx(i)]); + NEXT; + } + + CASE(OP_SETMCNST) { + /* A B C R(A+1)::Sym(B) := R(A) */ + int a = GETARG_A(i); + + mrb_const_set(mrb, regs[a+1], syms[GETARG_Bx(i)], regs[a]); + NEXT; + } + + CASE(OP_GETUPVAR) { + /* A B C R(A) := uvget(B,C) */ + regs[GETARG_A(i)] = uvget(mrb, GETARG_C(i), GETARG_B(i)); + NEXT; + } + + CASE(OP_SETUPVAR) { + /* A B C uvset(B,C,R(A)) */ + uvset(mrb, GETARG_C(i), GETARG_B(i), regs[GETARG_A(i)]); + NEXT; + } + + CASE(OP_JMP) { + /* sBx pc+=sBx */ + pc += GETARG_sBx(i); + JUMP; + } + + CASE(OP_JMPIF) { + /* A sBx if R(A) pc+=sBx */ + if (mrb_test(regs[GETARG_A(i)])) { + pc += GETARG_sBx(i); + JUMP; + } + NEXT; + } + + CASE(OP_JMPNOT) { + /* A sBx if R(A) pc+=sBx */ + if (!mrb_test(regs[GETARG_A(i)])) { + pc += GETARG_sBx(i); + JUMP; + } + NEXT; + } + + CASE(OP_ONERR) { + /* sBx pc+=sBx on exception */ + if (mrb->rsize <= mrb->ci->ridx) { + if (mrb->rsize == 0) mrb->rsize = 16; + else mrb->rsize *= 2; + mrb->rescue = mrb_realloc(mrb, mrb->rescue, sizeof(mrb_code*) * mrb->rsize); + } + mrb->rescue[mrb->ci->ridx++] = pc + GETARG_sBx(i); + NEXT; + } + + CASE(OP_RESCUE) { + /* A R(A) := exc; clear(exc) */ + SET_OBJ_VALUE(regs[GETARG_A(i)],mrb->exc); + mrb->exc = 0; + NEXT; + } + + CASE(OP_POPERR) { + int a = GETARG_A(i); + + while (a--) { + mrb->ci->ridx--; + } + NEXT; + } + + CASE(OP_RAISE) { + /* A raise(R(A)) */ + mrb->exc = mrb_object(regs[GETARG_A(i)]); + goto L_RAISE; + } + + CASE(OP_EPUSH) { + /* Bx ensure_push(SEQ[Bx]) */ + struct RProc *p; + + p = mrb_closure_new(mrb, mrb->irep[irep->idx+GETARG_Bx(i)]); + /* push ensure_stack */ + if (mrb->esize <= mrb->ci->eidx) { + if (mrb->esize == 0) mrb->esize = 16; + else mrb->esize *= 2; + mrb->ensure = mrb_realloc(mrb, mrb->ensure, sizeof(struct RProc*) * mrb->esize); + } + mrb->ensure[mrb->ci->eidx++] = p; + NEXT; + } + + CASE(OP_EPOP) { + /* A A.times{ensure_pop().call} */ + int n; + int a = GETARG_A(i); + + for (n=0; nci->eidx); + } + NEXT; + } + + L_SEND: + CASE(OP_SEND) { + /* A B C R(A) := call(R(A),Sym(B),R(A+1),... ,R(A+C-1)) */ + int a = GETARG_A(i); + int n = GETARG_C(i); + struct RProc *m; + struct RClass *c; + mrb_callinfo *ci; + mrb_value recv; + mrb_sym mid = syms[GETARG_B(i)]; + + recv = regs[a]; + c = mrb_class(mrb, recv); + m = mrb_method_search_vm(mrb, &c, mid); + if (!m) { + mrb_value sym = mrb_symbol_value(mid); + + mid = mrb_intern(mrb, "method_missing"); + m = mrb_method_search_vm(mrb, &c, mid); + if (n == CALL_MAXARGS) { + mrb_ary_unshift(mrb, regs[a+1], sym); + } + else { + memmove(regs+a+2, regs+a+1, sizeof(mrb_value)*(n+1)); + regs[a+1] = sym; + n++; + } + } + + /* push callinfo */ + ci = cipush(mrb); + ci->mid = mid; + ci->proc = m; + ci->stackidx = mrb->stack - mrb->stbase; + ci->argc = n; + if (ci->argc == CALL_MAXARGS) ci->argc = -1; + ci->target_class = m->target_class; + ci->pc = pc + 1; + + /* prepare stack */ + mrb->stack += a; + + if (MRB_PROC_CFUNC_P(m)) { + mrb->stack[0] = m->body.func(mrb, recv); + mrb->arena_idx = ai; + if (mrb->exc) goto L_RAISE; + /* pop stackpos */ + mrb->stack = mrb->stbase + ci->stackidx; + cipop(mrb); + NEXT; + } + else { + /* fill callinfo */ + ci->acc = a; + + /* setup environment for calling method */ + proc = mrb->ci->proc = m; + irep = m->body.irep; + pool = irep->pool; + syms = irep->syms; + ci->nregs = irep->nregs; + if (ci->argc < 0) { + stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3); + } + else { + stack_extend(mrb, irep->nregs, ci->argc+2); + } + regs = mrb->stack; + pc = irep->iseq; + JUMP; + } + } + + CASE(OP_FSEND) { + /* A B C R(A) := fcall(R(A),Sym(B),R(A+1),... ,R(A+C)) */ + NEXT; + } + + CASE(OP_VSEND) { + /* A B R(A) := vcall(R(A),Sym(B)) */ + NEXT; + } + + CASE(OP_CALL) { + /* A R(A) := self.call(frame.argc, frame.argv) */ + mrb_callinfo *ci; + mrb_value recv = mrb->stack[0]; + struct RProc *m = mrb_proc_ptr(recv); + + /* replace callinfo */ + ci = mrb->ci; + ci->target_class = m->target_class; + ci->proc = m; + if (m->env) { + ci->mid = m->env->mid; + if (!m->env->stack) { + m->env->stack = mrb->stack; + } + } + + /* prepare stack */ + if (MRB_PROC_CFUNC_P(m)) { + mrb->stack[0] = m->body.func(mrb, recv); + mrb->arena_idx = ai; + if (mrb->exc) goto L_RAISE; + /* pop stackpos */ + regs = mrb->stack = mrb->stbase + ci->stackidx; + cipop(mrb); + NEXT; + } + else { + /* setup environment for calling method */ + proc = m; + irep = m->body.irep; + pool = irep->pool; + syms = irep->syms; + ci->nregs = irep->nregs; + if (ci->argc < 0) { + stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3); + } + else { + stack_extend(mrb, irep->nregs, ci->argc+2); + } + regs = mrb->stack; + regs[0] = m->env->stack[0]; + pc = m->body.irep->iseq; + JUMP; + } + } + + CASE(OP_SUPER) { + /* A B C R(A) := super(R(A+1),... ,R(A+C-1)) */ + mrb_value recv; + mrb_callinfo *ci = mrb->ci; + struct RProc *m; + struct RClass *c; + mrb_sym mid = ci->mid; + int a = GETARG_A(i); + int n = GETARG_C(i); + + recv = regs[0]; + c = mrb->ci->proc->target_class->super; + m = mrb_method_search_vm(mrb, &c, mid); + if (!m) { + c = mrb->ci->proc->target_class; + mid = mrb_intern(mrb, "method_missing"); + m = mrb_method_search_vm(mrb, &c, mid); + if (n == CALL_MAXARGS) { + mrb_ary_unshift(mrb, regs[a+1], mrb_symbol_value(ci->mid)); + } + else { + memmove(regs+a+2, regs+a+1, sizeof(mrb_value)*(n+1)); + regs[a+1] = mrb_symbol_value(ci->mid); + n++; + } + } + + /* push callinfo */ + ci = cipush(mrb); + ci->mid = mid; + ci->proc = m; + ci->stackidx = mrb->stack - mrb->stbase; + ci->argc = n; + if (ci->argc == CALL_MAXARGS) ci->argc = -1; + ci->target_class = m->target_class; + ci->pc = pc + 1; + + /* prepare stack */ + mrb->stack += a; + mrb->stack[0] = recv; + + if (MRB_PROC_CFUNC_P(m)) { + mrb->stack[0] = m->body.func(mrb, recv); + mrb->arena_idx = ai; + if (mrb->exc) goto L_RAISE; + /* pop stackpos */ + mrb->stack = mrb->stbase + ci->stackidx; + cipop(mrb); + NEXT; + } + else { + /* fill callinfo */ + ci->acc = a; + + /* setup environment for calling method */ + ci->proc = m; + irep = m->body.irep; + pool = irep->pool; + syms = irep->syms; + ci->nregs = irep->nregs; + if (ci->argc < 0) { + stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3); + } + else { + stack_extend(mrb, irep->nregs, ci->argc+2); + } + regs = mrb->stack; + pc = irep->iseq; + JUMP; + } + } + + CASE(OP_ARGARY) { + /* A Bx R(A) := argument array (16=6:1:5:4) */ + int a = GETARG_A(i); + int bx = GETARG_Bx(i); + int m1 = (bx>>10)&0x3f; + int r = (bx>>9)&0x1; + int m2 = (bx>>4)&0x1f; + int lv = (bx>>0)&0xf; + mrb_value *stack; + + if (lv == 0) stack = regs + 1; + else { + struct REnv *e = uvenv(mrb, lv-1); + stack = e->stack + 1; + } + if (r == 0) { + regs[a] = mrb_ary_new_elts(mrb, m1+m2, stack); + } + else { + mrb_value *pp; + struct RArray *rest; + int len = 0; + + if (stack[m1].tt == MRB_TT_ARRAY) { + struct RArray *ary = mrb_ary_ptr(stack[m1]); + + pp = ary->buf; + len = ary->len; + } + regs[a] = mrb_ary_new_capa(mrb, m1+len+m2); + rest = mrb_ary_ptr(regs[a]); + memcpy(rest->buf, stack, sizeof(mrb_value)*m1); + if (len > 0) { + memcpy(rest->buf+m1, pp, sizeof(mrb_value)*len); + } + if (m2 > 0) { + memcpy(rest->buf+m1+len, stack+m1+1, sizeof(mrb_value)*m2); + } + rest->len = m1+len+m2; + } + regs[a+1] = stack[m1+r+m2]; + NEXT; + } + + CASE(OP_ENTER) { + /* Ax arg setup according to flags (24=5:5:1:5:5:1:1) */ + /* number of optional arguments times OP_JMP should follow */ + int ax = GETARG_Ax(i); + int m1 = (ax>>18)&0x1f; + int o = (ax>>13)&0x1f; + int r = (ax>>12)&0x1; + int m2 = (ax>>7)&0x1f; + int k = (ax>>2)&0x1f; + int kd = (ax>>1)&0x1; + int b = (ax>>0)& 0x1; + int argc = mrb->ci->argc; + mrb_value *argv = regs+1; + int len = m1 + o + r + m2; + + if (argc < 0) { + struct RArray *ary = mrb_ary_ptr(regs[1]); + argv = ary->buf; + argc = ary->len; + regs[len+2] = regs[1]; /* save argary in register */ + } + if (mrb->ci->proc && MRB_PROC_STRICT_P(mrb->ci->proc)) { + if (argc >= 0) { + if (argc < m1 + m2 || (r == 0 && argc > len)) { + fprintf(stderr, "'%s': wrong number of arguments (%d for %d)\n", + mrb_sym2name(mrb, mrb->ci->mid), + mrb->ci->argc, m1+m2); + exit(1); + } + } + } + else if (len > 1 && argc == 1 && argv[0].tt == MRB_TT_ARRAY) { + argc = mrb_ary_ptr(argv[0])->len; + argv = mrb_ary_ptr(argv[0])->buf; + } + mrb->ci->argc = len; + if (argc < len) { + regs[len+1] = argv[argc]; /* move block */ + memmove(®s[1], argv, sizeof(mrb_value)*(argc-m2)); /* m1 + o */ + memmove(®s[len-m2+1], &argv[argc-m2], sizeof(mrb_value)*m2); /* m2 */ + if (r) { /* r */ + regs[m1+o+1] = mrb_ary_new_capa(mrb, 0); + } + pc += argc - m1 - m2 + 1; + } + else { + memmove(®s[1], argv, sizeof(mrb_value)*(m1+o)); /* m1 + o */ + if (r) { /* r */ + regs[m1+o+1] = mrb_ary_new_elts(mrb, argc-m1-o-m2, argv+m1+o); + } + memmove(®s[m1+o+r+1], &argv[argc-m2], sizeof(mrb_value)*m2); + regs[len+1] = argv[argc]; /* move block */ + pc += o + 1; + } + JUMP; + } + + CASE(OP_KARG) { + /* A B C R(A) := kdict[Sym(B)]; if C kdict.rm(Sym(B)) */ + /* if C == 2; raise unless kdict.empty? */ + /* OP_JMP should follow to skip init code */ + NEXT; + } + + CASE(OP_KDICT) { + /* A C R(A) := kdict */ + NEXT; + } + + CASE(OP_RETURN) { + /* A return R(A) */ + L_RETURN: + if (mrb->ci->env) { + struct REnv *e = mrb->ci->env; + int len = (int)e->flags; + mrb_value *p = mrb_malloc(mrb, sizeof(mrb_value)*len); + + e->cioff = -1; + memcpy(p, e->stack, sizeof(mrb_value)*len); + e->stack = p; + } + + if (mrb->exc) { + mrb_callinfo *ci; + int ridx; + + L_RAISE: + ci = mrb->ci; + if (ci == mrb->cibase) goto L_STOP; + while (ci[0].ridx == ci[-1].ridx) { + cipop(mrb); + ci = mrb->ci; + if (ci == mrb->cibase) { + if (ci->ridx == 0) goto L_STOP; + break; + } + } + irep = ci->proc->body.irep; + pool = irep->pool; + syms = irep->syms; + regs = mrb->stack = mrb->stbase + ci->stackidx; + pc = mrb->rescue[--ci->ridx]; + } + else { + mrb_callinfo *ci = mrb->ci; + int acc, eidx = mrb->ci->eidx; + mrb_value v = regs[GETARG_A(i)]; + + switch (GETARG_B(i)) { + case OP_R_NORMAL: + ci = mrb->ci; + break; + case OP_R_BREAK: + if (proc->env->cioff < 0) { + localjump_error(mrb, "break"); + goto L_RAISE; + } + ci = mrb->ci = mrb->cibase + proc->env->cioff + 1; + break; + case OP_R_RETURN: + if (proc->env->cioff < 0) { + localjump_error(mrb, "return"); + } + ci = mrb->ci = mrb->cibase + proc->env->cioff; + break; + default: + /* cannot happen */ + break; + } + cipop(mrb); + acc = ci->acc; + pc = ci->pc; + regs = mrb->stack = mrb->stbase + ci->stackidx; + while (eidx > mrb->ci->eidx) { + ecall(mrb, --eidx); + } + if (acc < 0) { + mrb->jmp = prev_jmp; + return v; + } + DEBUG(printf("from :%s\n", mrb_sym2name(mrb, ci->mid))); + proc = mrb->ci->proc; + irep = proc->body.irep; + pool = irep->pool; + syms = irep->syms; + + regs[acc] = v; + } + JUMP; + } + + CASE(OP_TAILCALL) { + /* A B C return call(R(A),Sym(B),R(A+1),... ,R(A+C-1)) */ + int a = GETARG_A(i); + int n = GETARG_C(i); + struct RProc *m; + struct RClass *c; + mrb_callinfo *ci; + mrb_value recv; + mrb_sym mid = syms[GETARG_B(i)]; + + recv = regs[a]; + c = mrb_class(mrb, recv); + m = mrb_method_search_vm(mrb, &c, mid); + if (!m) { + mrb_value sym = mrb_symbol_value(mid); + + mid = mrb_intern(mrb, "method_missing"); + m = mrb_method_search_vm(mrb, &c, mid); + if (n == CALL_MAXARGS) { + mrb_ary_unshift(mrb, regs[a+1], sym); + } + else { + memmove(regs+a+2, regs+a+1, sizeof(mrb_value)*(n+1)); + regs[a+1] = sym; + n++; + } + } + + + /* replace callinfo */ + mrb->ci = ci = &mrb->ci[-1]; + ci->mid = mid; + ci->target_class = m->target_class; + ci->argc = n; + if (ci->argc == CALL_MAXARGS) ci->argc = -1; + + /* move stack */ + memmove(mrb->stack, ®s[a], (ci->argc+1)*sizeof(mrb_value)); + + if (MRB_PROC_CFUNC_P(m)) { + mrb->stack[0] = m->body.func(mrb, recv); + mrb->arena_idx = ai; + goto L_RETURN; + } + else { + /* setup environment for calling method */ + irep = m->body.irep; + pool = irep->pool; + syms = irep->syms; + if (ci->argc < 0) { + stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3); + } + else { + stack_extend(mrb, irep->nregs, ci->argc+2); + } + regs = mrb->stack; + pc = irep->iseq; + } + JUMP; + } + + CASE(OP_BLKPUSH) { + /* A Bx R(A) := block (16=6:1:5:4) */ + int a = GETARG_A(i); + int bx = GETARG_Bx(i); + int m1 = (bx>>10)&0x3f; + int r = (bx>>9)&0x1; + int m2 = (bx>>4)&0x1f; + int lv = (bx>>0)&0xf; + mrb_value *stack; + + if (lv == 0) stack = regs + 1; + else { + struct REnv *e = uvenv(mrb, lv-1); + stack = e->stack + 1; + } + regs[a] = stack[m1+r+m2]; + NEXT; + } + +#define TYPES2(a,b) (((((int)(a))<<8)|((int)(b)))&0xffff) +#define OP_MATH_BODY(op,v1,v2) do {\ + regs[a].value.v1 = regs[a].value.v1 op regs[a+1].value.v2;\ +} while(0) + +#define OP_MATH(op) do {\ + int a = GETARG_A(i);\ + /* need to check if - is overridden */\ + switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) {\ + case TYPES2(MRB_TT_FIXNUM,MRB_TT_FIXNUM):\ + OP_MATH_BODY(op,i,i); \ + break;\ + case TYPES2(MRB_TT_FIXNUM,MRB_TT_FLOAT):\ + {\ + mrb_int x = regs[a].value.i;\ + mrb_float y = regs[a+1].value.f;\ + SET_FLOAT_VALUE(regs[a], (mrb_float)x op y);\ + }\ + break;\ + case TYPES2(MRB_TT_FLOAT,MRB_TT_FIXNUM):\ + OP_MATH_BODY(op,f,i);\ + break;\ + case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT):\ + OP_MATH_BODY(op,f,f);\ + break;\ + default:\ + i = MKOP_ABC(OP_SEND, a, GETARG_B(i), GETARG_C(i));\ + goto L_SEND;\ + }\ +} while (0) + + CASE(OP_ADD) { + /* A B C R(A) := R(A)+R(A+1) (Syms[B]=:+,C=1)*/ + int a = GETARG_A(i); + + switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) { + case TYPES2(MRB_TT_FIXNUM,MRB_TT_FIXNUM): + OP_MATH_BODY(+,i,i); + break; + case TYPES2(MRB_TT_FIXNUM,MRB_TT_FLOAT): + { + mrb_int x = regs[a].value.i; + mrb_float y = regs[a+1].value.f; + SET_FLOAT_VALUE(regs[a], (mrb_float)x + y); + } + break; + case TYPES2(MRB_TT_FLOAT,MRB_TT_FIXNUM): + OP_MATH_BODY(+,f,i); + break; + case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT): + OP_MATH_BODY(+,f,f); + break; + case TYPES2(MRB_TT_STRING,MRB_TT_STRING): + regs[a] = mrb_str_plus(mrb, regs[a], regs[a+1]); + break; + default: + i = MKOP_ABC(OP_SEND, a, GETARG_B(i), GETARG_C(i)); + goto L_SEND; + } + NEXT; + } + + CASE(OP_SUB) { + /* A B C R(A) := R(A)-R(A+1) (Syms[B]=:-,C=1)*/ + OP_MATH(-); + NEXT; + } + + CASE(OP_MUL) { + /* A B C R(A) := R(A)*R(A+1) (Syms[B]=:*,C=1)*/ + OP_MATH(*); + NEXT; + } + + CASE(OP_DIV) { + /* A B C R(A) := R(A)/R(A+1) (Syms[B]=:/,C=1)*/ + OP_MATH(/); + NEXT; + } + + CASE(OP_ADDI) { + /* A B C R(A) := R(A)+C (Syms[B]=:+)*/ + int a = GETARG_A(i); + + /* need to check if + is overridden */ + switch (mrb_type(regs[a])) { + case MRB_TT_FIXNUM: + regs[a].value.i += GETARG_C(i); + break; + case MRB_TT_FLOAT: + regs[a].value.f += GETARG_C(i); + break; + default: + SET_INT_VALUE(regs[a+1], GETARG_C(i)); + i = MKOP_ABC(OP_SEND, a, GETARG_B(i), 1); + goto L_SEND; + } + NEXT; + } + + CASE(OP_SUBI) { + /* A B C R(A) := R(A)-C (Syms[B]=:+)*/ + int a = GETARG_A(i); + + /* need to check if + is overridden */ + switch (mrb_type(regs[a])) { + case MRB_TT_FIXNUM: + regs[a].value.i -= GETARG_C(i); + break; + case MRB_TT_FLOAT: + regs[a].value.f -= GETARG_C(i); + break; + default: + SET_INT_VALUE(regs[a+1], GETARG_C(i)); + i = MKOP_ABC(OP_SEND, a, GETARG_B(i), 1); + goto L_SEND; + } + NEXT; + } + +#define OP_CMP_BODY(op,v1,v2) do {\ + if (regs[a].value.v1 op regs[a+1].value.v2) {\ + SET_TRUE_VALUE(regs[a]);\ + }\ + else {\ + SET_FALSE_VALUE(regs[a]);\ + }\ +} while(0) + +#define OP_CMP(op) do {\ + int a = GETARG_A(i);\ + /* need to check if - is overridden */\ + switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) {\ + case TYPES2(MRB_TT_FIXNUM,MRB_TT_FIXNUM):\ + OP_CMP_BODY(op,i,i); \ + break;\ + case TYPES2(MRB_TT_FIXNUM,MRB_TT_FLOAT):\ + OP_CMP_BODY(op,i,f);\ + break;\ + case TYPES2(MRB_TT_FLOAT,MRB_TT_FIXNUM):\ + OP_CMP_BODY(op,f,i);\ + break;\ + case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT):\ + OP_CMP_BODY(op,f,f);\ + break;\ + default:\ + i = MKOP_ABC(OP_SEND, a, GETARG_B(i), GETARG_C(i));\ + goto L_SEND;\ + }\ +} while (0) + + CASE(OP_EQ) { + /* A B C R(A) := R(A)); + NEXT; + } + + CASE(OP_GE) { + /* A B C R(A) := R(A)=); + NEXT; + } + + CASE(OP_ARRAY) { + /* A B C R(A) := ary_new(R(B),R(B+1)..R(B+C)) */ + int b = GETARG_B(i); + int lim = b+GETARG_C(i); + mrb_value ary = mrb_ary_new_capa(mrb, GETARG_C(i)); + + while (b < lim) { + mrb_ary_push(mrb, ary, regs[b++]); + } + regs[GETARG_A(i)] = ary; + NEXT; + } + + CASE(OP_ARYCAT) { + /* A B mrb_ary_concat(R(A),R(B)) */ + mrb_ary_concat(mrb, regs[GETARG_A(i)], + mrb_ary_splat(mrb, regs[GETARG_B(i)])); + NEXT; + } + + CASE(OP_ARYPUSH) { + /* A B R(A).push(R(B)) */ + mrb_ary_push(mrb, regs[GETARG_A(i)], regs[GETARG_B(i)]); + NEXT; + } + + CASE(OP_AREF) { + /* A B C R(A) := R(B)[C] */ + int a = GETARG_A(i); + int c = GETARG_C(i); + mrb_value v = regs[GETARG_B(i)]; + + if (v.tt != MRB_TT_ARRAY) { + if (c == 0) { + regs[GETARG_A(i)] = v; + } + else { + SET_NIL_VALUE(regs[a]); + } + } + else { + regs[GETARG_A(i)] = mrb_ary_ref(mrb, v, c); + } + NEXT; + } + + CASE(OP_ASET) { + /* A B C R(B)[C] := R(A) */ + mrb_ary_set(mrb, regs[GETARG_B(i)], GETARG_C(i), regs[GETARG_A(i)]); + NEXT; + } + + CASE(OP_APOST) { + /* A B C *R(A),R(A+1)..R(A+C) := R(A) */ + int a = GETARG_A(i); + mrb_value v = regs[a]; + int pre = GETARG_B(i); + int post = GETARG_C(i); + + if (v.tt != MRB_TT_ARRAY) { + regs[a++] = mrb_ary_new_capa(mrb, 0); + while (post--) { + SET_NIL_VALUE(regs[a]); + a++; + } + } + else { + struct RArray *ary = mrb_ary_ptr(v); + size_t len = ary->len; + int i; + + if (len > pre + post) { + regs[a++] = mrb_ary_new_elts(mrb, len - pre - post, ary->buf+pre); + while (post--) { + regs[a++] = ary->buf[len-post-1]; + } + } + else { + regs[a++] = mrb_ary_new_capa(mrb, 0); + for (i=0; i+prebuf[pre+i]; + } + while (i < post) { + SET_NIL_VALUE(regs[a+i]); + i++; + } + } + } + NEXT; + } + + CASE(OP_STRING) { + /* A Bx R(A) := str_new(Lit(Bx)) */ + regs[GETARG_A(i)] = mrb_str_literal(mrb, pool[GETARG_Bx(i)]); + NEXT; + } + + CASE(OP_STRCAT) { + /* A B R(A).concat(R(B)) */ + mrb_str_concat(mrb, regs[GETARG_A(i)], regs[GETARG_B(i)]); + NEXT; + } + + CASE(OP_HASH) { + /* A B C R(A) := hash_new(R(B),R(B+1)..R(B+C)) */ + int b = GETARG_B(i); + int c = GETARG_C(i); + int lim = b+c*2; + mrb_value hash = mrb_hash_new_capa(mrb, c); + + while (b < lim) { + mrb_hash_set(mrb, hash, regs[b], regs[b+1]); + b+=2; + } + regs[GETARG_A(i)] = hash; + NEXT; + } + + CASE(OP_LAMBDA) { + /* A b c R(A) := lambda(SEQ[b],c) (b:c = 14:2) */ + struct RProc *p; + int c = GETARG_c(i); + + if (c & OP_L_CAPTURE) { + p = mrb_closure_new(mrb, mrb->irep[irep->idx+GETARG_b(i)]); + } + else { + p = mrb_proc_new(mrb, mrb->irep[irep->idx+GETARG_b(i)]); + } + if (c & OP_L_STRICT) p->flags |= MRB_PROC_STRICT; + regs[GETARG_A(i)] = mrb_obj_value(p); + NEXT; + } + + CASE(OP_OCLASS) { + /* A R(A) := ::Object */ + regs[GETARG_A(i)] = mrb_obj_value(mrb->object_class); + NEXT; + } + + CASE(OP_CLASS) { + /* A B R(A) := newclass(R(A),Sym(B),R(A+1)) */ + struct RClass *c = 0; + int a = GETARG_A(i); + mrb_value base, super; + mrb_sym id = syms[GETARG_B(i)]; + + base = regs[a]; + super = regs[a+1]; + if (mrb_nil_p(base)) { + base = mrb_obj_value(mrb->ci->target_class); + } + c = mrb_vm_define_class(mrb, base, super, id); + regs[a] = mrb_obj_value(c); + NEXT; + } + + CASE(OP_MODULE) { + /* A B R(A) := newmodule(R(A),Sym(B)) */ + struct RClass *c = 0; + int a = GETARG_A(i); + mrb_value base; + mrb_sym id = syms[GETARG_B(i)]; + + base = regs[a]; + if (mrb_nil_p(base)) { + base = mrb_obj_value(mrb->ci->target_class); + } + c = mrb_vm_define_module(mrb, base, id); + regs[a] = mrb_obj_value(c); + NEXT; + } + + CASE(OP_EXEC) { + /* A Bx R(A) := blockexec(R(A),SEQ[Bx]) */ + int a = GETARG_A(i); + mrb_callinfo *ci; + mrb_value recv = regs[a]; + struct RProc *p; + + /* prepare stack */ + ci = cipush(mrb); + ci->pc = pc + 1; + ci->acc = a; + ci->mid = 0; + ci->stackidx = mrb->stack - mrb->stbase; + ci->argc = 0; + ci->target_class = mrb_class_ptr(regs[GETARG_A(i)]); + + p = mrb_proc_new(mrb, mrb->irep[irep->idx+GETARG_Bx(i)]); + p->target_class = ci->target_class; + ci->proc = p; + + if (MRB_PROC_CFUNC_P(p)) { + mrb->stack[0] = p->body.func(mrb, recv); + mrb->arena_idx = ai; + if (mrb->exc) goto L_RAISE; + /* pop stackpos */ + regs = mrb->stack = mrb->stbase + ci->stackidx; + cipop(mrb); + NEXT; + } + else { + /* setup environment for calling method */ + irep = p->body.irep; + pool = irep->pool; + syms = irep->syms; + mrb->stack += a; + stack_extend(mrb, irep->nregs, 1); + regs = mrb->stack; + pc = irep->iseq; + JUMP; + } + } + + CASE(OP_METHOD) { + /* A B R(A).newmethod(Sym(B),R(A+1)) */ + int a = GETARG_A(i); + struct RClass *c = mrb_class_ptr(regs[a]); + + mrb_define_method_vm(mrb, c, syms[GETARG_B(i)], regs[a+1]); + NEXT; + } + + CASE(OP_SCLASS) { + /* A B R(A) := R(B).singleton_class */ + regs[GETARG_A(i)] = mrb_singleton_class(mrb, regs[GETARG_B(i)]); + NEXT; + } + + CASE(OP_TCLASS) { + /* A B R(A) := target_class */ + regs[GETARG_A(i)] = mrb_obj_value(mrb->ci->target_class); + NEXT; + } + + CASE(OP_RANGE) { + /* A B C R(A) := range_new(R(B),R(B+1),C) */ + int b = GETARG_B(i); + regs[GETARG_A(i)] = mrb_range_new(mrb, regs[b], regs[b+1], GETARG_C(i)); + NEXT; + } + + CASE(OP_DEBUG) { + /* A debug print R(A),R(B),R(C) */ + printf("OP_DEBUG %d %d %d\n", GETARG_A(i), GETARG_B(i), GETARG_C(i)); + NEXT; + } + + CASE(OP_STOP) { + /* stop VM */ + L_STOP: + mrb->jmp = prev_jmp; + return mrb_nil_value(); + } + + CASE(OP_ERR) { + /* Bx raise RuntimeError with message Lit(Bx) */ + mrb_value msg = pool[GETARG_Bx(i)]; + mrb_value exc = mrb_exc_new3(mrb, mrb->eRuntimeError_class, msg); + + mrb->exc = mrb_object(exc); + goto L_RAISE; + } + } + END_DISPACTH; +} diff --git a/src/vm_core.h b/src/vm_core.h new file mode 100644 index 0000000000..98da043a23 --- /dev/null +++ b/src/vm_core.h @@ -0,0 +1,414 @@ +/********************************************************************** + + vm_core.h - + + $Author: yugui $ + created at: 04/01/01 19:41:38 JST + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_VM_CORE_H +#define RUBY_VM_CORE_H + +#define RUBY_VM_THREAD_MODEL 2 + +//#include "ruby/ruby.h" +#include "st.h" /* define ANYARGS */ + +//#include "node.h" +//#include "debug.h" +//#include "vm_opts.h" +//#include "id.h" +#include "method.h" + +#if defined(_WIN32) +#include "thread_win32.h" +#elif defined(HAVE_PTHREAD_H) +#include "thread_pthread.h" +#else +#error "unsupported thread type" +#endif + +#ifndef ENABLE_VM_OBJSPACE +#ifdef _WIN32 +/* + * TODO: object space indenpendent st_table. + * socklist needs st_table in mrb_w32_sysinit(), before object space + * initialization. + * It is too early now to change st_hash_type, since it breaks binary + * compatibility. + */ +#define ENABLE_VM_OBJSPACE 0 +#else +#define ENABLE_VM_OBJSPACE 1 +#endif +#endif + +#include +#include + +//#ifndef NSIG +//# define NSIG (_SIGMAX + 1) /* For QNX */ +//#endif + +//#define RUBY_NSIG NSIG + +#ifdef HAVE_STDARG_PROTOTYPES +#include +#define va_init_list(a,b) va_start(a,b) +#else +#include +#define va_init_list(a,b) va_start(a) +#endif + +#if defined(SIGSEGV) && defined(HAVE_SIGALTSTACK) && defined(SA_SIGINFO) && !defined(__NetBSD__) +#define USE_SIGALTSTACK +#endif + +/*****************/ +/* configuration */ +/*****************/ + +/* gcc ver. check */ +#if defined(__GNUC__) && __GNUC__ >= 2 + +#if OPT_TOKEN_THREADED_CODE +#if OPT_DIRECT_THREADED_CODE +#undef OPT_DIRECT_THREADED_CODE +#endif +#endif + +#else /* defined(__GNUC__) && __GNUC__ >= 2 */ + +/* disable threaded code options */ +#if OPT_DIRECT_THREADED_CODE +#undef OPT_DIRECT_THREADED_CODE +#endif +#if OPT_TOKEN_THREADED_CODE +#undef OPT_TOKEN_THREADED_CODE +#endif +#endif + +/* call threaded code */ +#if OPT_CALL_THREADED_CODE +#if OPT_DIRECT_THREADED_CODE +#undef OPT_DIRECT_THREADED_CODE +#endif /* OPT_DIRECT_THREADED_CODE */ +#if OPT_STACK_CACHING +#undef OPT_STACK_CACHING +#endif /* OPT_STACK_CACHING */ +#endif /* OPT_CALL_THREADED_CODE */ + +/* likely */ +#if __GNUC__ >= 3 +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else /* __GNUC__ >= 3 */ +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif /* __GNUC__ >= 3 */ + +#if __GNUC__ >= 3 +#define UNINITIALIZED_VAR(x) x = x +#else +#define UNINITIALIZED_VAR(x) x +#endif + +typedef unsigned long mrb_num_t; + +/* iseq data type */ + +struct iseq_compile_data_ensure_node_stack; + +typedef struct mrb_compile_option_struct { + int inline_const_cache; + int peephole_optimization; + int tailcall_optimization; + int specialized_instruction; + int operands_unification; + int instructions_unification; + int stack_caching; + int trace_instruction; + int debug_level; +} mrb_compile_option_t; + +struct iseq_inline_cache_entry { + mrb_value ic_vmstat; + mrb_value ic_class; + union { + mrb_value value; + mrb_method_entry_t *method; + long index; + } ic_value; +}; + +#if 1 +#define GetCoreDataFromValue(obj, type, ptr) do { \ + ptr = (type*)DATA_PTR(obj); \ +} while (0) +#else +#define GetCoreDataFromValue(obj, type, ptr) Data_Get_Struct(obj, type, ptr) +#endif + +#define GetISeqPtr(obj, ptr) \ + GetCoreDataFromValue(obj, mrb_iseq_t, ptr) + +struct mrb_iseq_struct; + +//enum ruby_special_exceptions { +// ruby_error_reenter, +// ruby_error_nomemory, +// ruby_error_sysstack, +// ruby_special_error_count +//}; + +#define GetVMPtr(obj, ptr) \ + GetCoreDataFromValue(obj, mrb_vm_t, ptr) + +#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE +struct mrb_objspace; +void mrb_objspace_free(struct mrb_objspace *); +#endif + +typedef struct mrb_block_struct { + mrb_value self; /* share with method frame if it's only block */ + mrb_value *lfp; /* share with method frame if it's only block */ + mrb_value *dfp; /* share with method frame if it's only block */ + mrb_iseq_t *iseq; + mrb_value proc; +} mrb_block_t; + +#define GetThreadPtr(obj, ptr) \ + GetCoreDataFromValue(obj, mrb_thread_t, ptr) + +//typedef RUBY_JMP_BUF mrb_jmpbuf_t; /* kusuda */ +#define mrb_jmpbuf_t void* /* kusuda */ + +struct mrb_vm_protect_tag { + struct mrb_vm_protect_tag *prev; +}; + +#define RUBY_VM_VALUE_CACHE_SIZE 0x1000 +#define USE_VALUE_CACHE 0 + +struct mrb_mutex_struct; + + +/* iseq.c */ +mrb_value mrb_iseq_new(NODE*, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value); +mrb_value mrb_iseq_new_top(NODE *node, mrb_value name, mrb_value filename, mrb_value filepath, mrb_value parent); +mrb_value mrb_iseq_new_main(NODE *node, mrb_value filename, mrb_value filepath); +mrb_value mrb_iseq_new_with_bopt(NODE*, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value); +mrb_value mrb_iseq_new_with_opt(NODE*, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, const mrb_compile_option_t*); +mrb_value mrb_iseq_compile(mrb_value src, mrb_value file, mrb_value line); +mrb_value mrb_iseq_disasm(mrb_value self); +int mrb_iseq_disasm_insn(mrb_value str, mrb_value *iseqval, size_t pos, mrb_iseq_t *iseq, mrb_value child); +const char *ruby_node_name(int node); +int mrb_iseq_first_lineno(mrb_iseq_t *iseq); + +RUBY_EXTERN mrb_value mrb_cISeq; +RUBY_EXTERN mrb_value mrb_cRubyVM; +RUBY_EXTERN mrb_value mrb_cEnv; +RUBY_EXTERN mrb_value mrb_mRubyVMFrozenCore; + +/* each thread has this size stack : 128KB */ +#define RUBY_VM_THREAD_STACK_SIZE (128 * 1024) + +#define GetProcPtr(obj, ptr) \ + GetCoreDataFromValue(obj, mrb_proc_t, ptr) + +typedef struct { + mrb_block_t block; + + mrb_value envval; /* for GC mark */ + mrb_value blockprocval; + int safe_level; + int is_from_method; + int is_lambda; +} mrb_proc_t; + +#define GetEnvPtr(obj, ptr) \ + GetCoreDataFromValue(obj, mrb_env_t, ptr) + +typedef struct { + mrb_value *env; + int env_size; + int local_size; + mrb_value prev_envval; /* for GC mark */ + mrb_block_t block; +} mrb_env_t; + +//#define GetBindingPtr(obj, ptr) +// GetCoreDataFromValue(obj, mrb_binding_t, ptr) + +//typedef struct { +// mrb_value env; +// mrb_value filename; +// unsigned short line_no; +//} mrb_binding_t; + +/* used by compile time and send insn */ +#define VM_CALL_ARGS_SPLAT_BIT (0x01 << 1) +#define VM_CALL_ARGS_BLOCKARG_BIT (0x01 << 2) +#define VM_CALL_FCALL_BIT (0x01 << 3) +#define VM_CALL_VCALL_BIT (0x01 << 4) +#define VM_CALL_TAILCALL_BIT (0x01 << 5) +#define VM_CALL_TAILRECURSION_BIT (0x01 << 6) +#define VM_CALL_SUPER_BIT (0x01 << 7) +#define VM_CALL_OPT_SEND_BIT (0x01 << 8) + +#define VM_SPECIAL_OBJECT_VMCORE 0x01 +#define VM_SPECIAL_OBJECT_CBASE 0x02 +#define VM_SPECIAL_OBJECT_CONST_BASE 0x03 + +#define VM_FRAME_MAGIC_METHOD 0x11 +#define VM_FRAME_MAGIC_BLOCK 0x21 +#define VM_FRAME_MAGIC_CLASS 0x31 +#define VM_FRAME_MAGIC_TOP 0x41 +#define VM_FRAME_MAGIC_FINISH 0x51 +#define VM_FRAME_MAGIC_CFUNC 0x61 +#define VM_FRAME_MAGIC_PROC 0x71 +#define VM_FRAME_MAGIC_IFUNC 0x81 +#define VM_FRAME_MAGIC_EVAL 0x91 +#define VM_FRAME_MAGIC_LAMBDA 0xa1 +#define VM_FRAME_MAGIC_MASK_BITS 8 +#define VM_FRAME_MAGIC_MASK (~(~0<flag & VM_FRAME_MAGIC_MASK) + +/* other frame flag */ +#define VM_FRAME_FLAG_PASSED 0x0100 + +#define RUBYVM_CFUNC_FRAME_P(cfp) \ + (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_CFUNC) + +/* inline cache */ +typedef struct iseq_inline_cache_entry *IC; + +extern mrb_value ruby_vm_global_state_version; + +#define GET_VM_STATE_VERSION() (ruby_vm_global_state_version) +#define INC_VM_STATE_VERSION() \ + (ruby_vm_global_state_version = (ruby_vm_global_state_version+1) & 0x8fffffff) +void mrb_vm_change_state(void); + +typedef mrb_value CDHASH; + +#define GC_GUARDED_PTR(p) ((mrb_value)((mrb_value)(p) | 0x01)) +#define GC_GUARDED_PTR_REF(p) ((void *)(((mrb_value)p) & ~0x03)) +#define GC_GUARDED_PTR_P(p) (((mrb_value)p) & 0x01) + +#define RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp) (cfp+1) +#define RUBY_VM_NEXT_CONTROL_FRAME(cfp) (cfp-1) +#define RUBY_VM_END_CONTROL_FRAME(th) \ + ((mrb_control_frame_t *)((th)->stack + (th)->stack_size)) +#define RUBY_VM_VALID_CONTROL_FRAME_P(cfp, ecfp) \ + ((void *)(ecfp) > (void *)(cfp)) +#define RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp) \ + (!RUBY_VM_VALID_CONTROL_FRAME_P((cfp), RUBY_VM_END_CONTROL_FRAME(th))) + +#define RUBY_VM_IFUNC_P(ptr) (BUILTIN_TYPE(ptr) == T_NODE) +#define RUBY_VM_NORMAL_ISEQ_P(ptr) \ + (ptr && !RUBY_VM_IFUNC_P(ptr)) + +#define RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp) ((mrb_block_t *)(&(cfp)->self)) +#define RUBY_VM_GET_CFP_FROM_BLOCK_PTR(b) \ + ((mrb_control_frame_t *)((mrb_value *)(b) - 5)) + +/* VM related object allocate functions */ +//mrb_value mrb_thread_alloc(mrb_value klass); +mrb_value mrb_proc_alloc(mrb_value klass); + +/* for debug */ +extern void mrb_vmdebug_stack_dump_raw(mrb_thread_t *, mrb_control_frame_t *); +#define SDR() mrb_vmdebug_stack_dump_raw(GET_THREAD(), GET_THREAD()->cfp) +#define SDR2(cfp) mrb_vmdebug_stack_dump_raw(GET_THREAD(), (cfp)) +void mrb_vm_bugreport(void); + +/* functions about thread/vm execution */ +mrb_value mrb_iseq_eval(mrb_value iseqval); +mrb_value mrb_iseq_eval_main(mrb_value iseqval); +void mrb_enable_interrupt(void); +void mrb_disable_interrupt(void); +//int mrb_thread_method_id_and_class(mrb_thread_t *th, mrb_sym *idp, mrb_value *klassp); + +mrb_value mrb_vm_invoke_proc(mrb_thread_t *th, mrb_proc_t *proc, mrb_value self, + int argc, const mrb_value *argv, const mrb_block_t *blockptr); +mrb_value mrb_vm_make_proc(mrb_thread_t *th, const mrb_block_t *block, mrb_value klass); +mrb_value mrb_vm_make_env_object(mrb_thread_t *th, mrb_control_frame_t *cfp); + +//void mrb_thread_start_timer_thread(void); +//void mrb_thread_stop_timer_thread(void); +//void mrb_thread_reset_timer_thread(void); +//void *mrb_thread_call_with_gvl(void *(*func)(void *), void *data1); +int ruby_thread_has_gvl_p(void); +mrb_value mrb_make_backtrace(void); +typedef int mrb_backtrace_iter_func(void *, mrb_value, int, mrb_value); +int mrb_backtrace_each(mrb_backtrace_iter_func *iter, void *arg); +//mrb_control_frame_t *mrb_vm_get_ruby_level_next_cfp(mrb_thread_t *th, mrb_control_frame_t *cfp); +int mrb_vm_get_sourceline(const mrb_control_frame_t *); +mrb_value mrb_name_err_mesg_new(mrb_value obj, mrb_value mesg, mrb_value recv, mrb_value method); + +NOINLINE(void mrb_gc_save_machine_context(mrb_thread_t *)); + +//#define sysstack_error GET_VM()->special_exceptions[ruby_error_sysstack] + +mrb_value mrb_str_resurrect(mrb_value str); +mrb_value mrb_ary_resurrect(mrb_value ary); + +/* for thread */ + +#if RUBY_VM_THREAD_MODEL == 2 +RUBY_EXTERN mrb_thread_t *ruby_current_thread; +extern mrb_vm_t *ruby_current_vm; + +#define GET_VM() ruby_current_vm +#define GET_THREAD() ruby_current_thread +#define mrb_thread_set_current_raw(th) (void)(ruby_current_thread = (th)) +#define mrb_thread_set_current(th) do { \ + mrb_thread_set_current_raw(th); \ + th->vm->running_thread = th; \ +} while (0) + +#else +#error "unsupported thread model" +#endif + +#define RUBY_VM_SET_INTERRUPT(th) ((th)->interrupt_flag |= 0x02) +#define RUBY_VM_SET_TIMER_INTERRUPT(th) ((th)->interrupt_flag |= 0x01) +#define RUBY_VM_SET_FINALIZER_INTERRUPT(th) ((th)->interrupt_flag |= 0x04) +#define RUBY_VM_INTERRUPTED(th) ((th)->interrupt_flag & 0x02) + +void mrb_threadptr_check_signal(mrb_thread_t *mth); +//void mrb_threadptr_signal_raise(mrb_thread_t *th, int sig); +void mrb_threadptr_signal_exit(mrb_state *mrb, mrb_thread_t *th); +//void mrb_threadptr_execute_interrupts(mrb_thread_t *); + +void mrb_thread_lock_unlock(mrb_thread_lock_t *); +void mrb_thread_lock_destroy(mrb_thread_lock_t *); + +//#define RUBY_VM_CHECK_INTS_TH(th) do { \ +// if (UNLIKELY(th->interrupt_flag)) { \ +// mrb_threadptr_execute_interrupts(th); \ +// } \ +//} while (0) + +//#define RUBY_VM_CHECK_INTS() \ +// RUBY_VM_CHECK_INTS_TH(GET_THREAD()) + +/* tracer */ +//void +//mrb_threadptr_exec_event_hooks(mrb_thread_t *th, mrb_event_flag_t flag, mrb_value self, mrb_sym id, mrb_value klass); +#if 0 +#define EXEC_EVENT_HOOK(th, flag, self, id, klass) do { \ + mrb_event_flag_t wait_event__ = th->event_flags; \ + if (UNLIKELY(wait_event__)) { \ + if (wait_event__ & (flag | RUBY_EVENT_VM)) { \ + mrb_threadptr_exec_event_hooks(th, flag, self, id, klass); \ + } \ + } \ +} while (0) +#endif +#endif /* RUBY_VM_CORE_H */ diff --git a/tools/mrbc/Makefile b/tools/mrbc/Makefile new file mode 100644 index 0000000000..767f5c0743 --- /dev/null +++ b/tools/mrbc/Makefile @@ -0,0 +1,73 @@ +# makefile discription. +# basic build file for Rite-Compiler +# 11.Apr.2011 coded by Kenji Yoshimoto. +# 31.Aug.2011 coded by Hiroshi Mimaki. + +# project-specific macros +# extension of the executable-file is modifiable(.exe .out ...) +BASEDIR := ../../src +TARGET := ../../bin/mrbc +ifeq ($(OS),Windows_NT) +EXE := $(TARGET).exe +else +EXE := $(TARGET) +endif +YSRC := $(BASEDIR)/parse.y +YC := $(BASEDIR)/y.tab.c +EXCEPT1 := $(YC) $(BASEDIR)/minimain.c $(BASEDIR)/load.c $(BASEDIR)/init_ext.c +OBJY := $(patsubst %.c,%.o,$(YC)) +OBJ0 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/../tools/mrbc/*.c)) +OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard $(BASEDIR)/*.c))) +#OBJ2 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/regex/*.c)) +#OBJ3 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/enc/*.c)) +OBJS := $(OBJ0) $(OBJ1) $(OBJ2) $(OBJ3) + +# libraries, includes +LIBS = -lm +INCLUDES = -I$(BASEDIR) -I$(BASEDIR)/../include + +# compiler, linker (gcc) +CC = gcc +LL = gcc +YACC = bison +DEBUG_MODE = 1 +ifeq ($(DEBUG_MODE),1) +CFLAGS = -g +else +CFLAGS = -O3 +endif +ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS) +MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)" + +############################## +# generic build targets, rules + +.PHONY : all +all : $(EXE) + @echo "make: built targets of `pwd`" + +# executable constructed using linker from object files +$(EXE) : $(OBJS) $(OBJY) + $(LL) -o $@ $(OBJS) $(OBJY) $(LIBS) + +-include $(OBJS:.o=.d) $(OBJY:.o=.d) + +# objects compiled from source +$(OBJS) : %.o : %.c + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@ + +# parser complie +$(OBJY) : $(YC) + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(YC) -o $(OBJY) + +# yacc complie +$(YC) : $(YSRC) + $(YACC) -o $(YC) $(YSRC) + +# clean up +.PHONY : clean +clean : + -rm -f $(EXE) $(OBJS) $(OBJY) $(YC) + -rm -f $(OBJS:.o=.d) $(OBJY:.o=.d) + @echo "make: removing targets, objects and depend files of `pwd`" + diff --git a/tools/mrbc/mrbc.c b/tools/mrbc/mrbc.c new file mode 100644 index 0000000000..1e54edbdfb --- /dev/null +++ b/tools/mrbc/mrbc.c @@ -0,0 +1,210 @@ +#include "mruby.h" +#include "mruby/proc.h" +#include "compile.h" +#include "dump.h" +#include "cdump.h" +#include "stdio.h" +#include "memory.h" +#include "stdlib.h" + +#define RITEBIN_EXT ".mrb" +#define C_EXT ".c" +void ruby_show_version(mrb_state *); +void ruby_show_copyright(mrb_state *); +void parser_dump(mrb_state*, struct mrb_ast_node*, int); +void codedump_all(mrb_state*, int); + +struct _args { + FILE *rfp; + FILE *wfp; + char *initname; + char *ext; + int check_syntax : 1; + int dump_type : 2; + int verbose : 1; +}; + +static void +usage(const char *name) +{ + static const char *const usage_msg[] = { + "switches:", + "-c check syntax only", + "-o place the output into ", + "-v print version number, then trun on verbose mode", + "-B binary output in C language format", + "-C function output in C language format", + "--verbose run at verbose mode", + "--version print the version", + "--copyright print the copyright", + NULL + }; + const char *const *p = usage_msg; + + printf("Usage: %s [switches] programfile\n", name); + while(*p) + printf(" %s\n", *p++); +} + +static char * +get_outfilename(char *infile, char *ext) +{ + char *outfile; + char *p; + + outfile = (char*)malloc(strlen(infile) + strlen(ext) + 1); + strcpy(outfile, infile); + if (*ext) { + if ((p = strrchr(outfile, '.')) == NULL) + p = &outfile[strlen(outfile)]; + strcpy(p, ext); + } + + return outfile; +} + +static int +parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args) +{ + char *infile = NULL; + char *outfile = NULL; + char **origargv = argv; + + memset(args, 0, sizeof(*args)); + args->ext = RITEBIN_EXT; + + for (argc--,argv++; argc > 0; argc--,argv++) { + if (**argv == '-') { + if (strlen(*argv) <= 1) + return -1; + + switch ((*argv)[1]) { + case 'o': + outfile = get_outfilename((*argv) + 2, ""); + break; + case 'B': + case 'C': + args->ext = C_EXT; + args->initname = (*argv) + 2; + if (*args->initname == '\0') { + printf("%s: Function name is not specified.\n", *origargv); + return -2; + } + args->dump_type = ((*argv)[1] == 'B') ? DUMP_TYPE_BIN : DUMP_TYPE_CODE; + break; + case 'c': + args->check_syntax = 1; + break; + case 'v': + ruby_show_version(mrb); + args->verbose = 1; + break; + case '-': + if (strcmp((*argv) + 2, "version") == 0) { + ruby_show_version(mrb); + } + else if (strcmp((*argv) + 2, "verbose") == 0) { + args->verbose = 1; + break; + } + else if (strcmp((*argv) + 2, "copyright") == 0) { + ruby_show_copyright(mrb); + } + else return -3; + return 0; + } + } + else if (args->rfp == NULL) { + infile = *argv; + if ((args->rfp = fopen(infile, "r")) == NULL) { + printf("%s: Cannot open program file. (%s)\n", *origargv, infile); + return 0; + } + } + } + + if (infile == NULL) + return -4; + if (args->check_syntax) + return 0; + + if (outfile == NULL) + outfile = get_outfilename(infile, args->ext); + + if ((args->wfp = fopen(outfile, "wb")) == NULL) { + printf("%s: Cannot open output file. (%s)\n", *origargv, outfile); + return 0; + } + + return 0; +} + +static void +cleanup(struct _args *args) +{ + if (args->rfp) + fclose(args->rfp); + if (args->wfp) + fclose(args->wfp); +} + +int +main(int argc, char **argv) +{ + mrb_state *mrb = mrb_open(); + int n = -1; + struct _args args; + struct mrb_parser_state *p; + + n = parse_args(mrb, argc, argv, &args); + + if (n < 0 || args.rfp == NULL) { + cleanup(&args); + usage(argv[0]); + return n; + } + + p = mrb_parse_file(mrb, args.rfp); + if (!p || !p->tree || p->nerr) { + cleanup(&args); + return -1; + } + + if (args.verbose) + parser_dump(mrb, p->tree, 0); + + n = mrb_generate_code(mrb, p->tree); + mrb_pool_close(p->pool); + + if (args.verbose) + codedump_all(mrb, n); + + if (n < 0 || args.check_syntax) { + cleanup(&args); + return n; + } + if (args.initname) { + if (args.dump_type == DUMP_TYPE_BIN) + n = mrb_bdump_irep(mrb, n, args.wfp, args.initname); + else + n = mrb_cdump_irep(mrb, n, args.wfp, args.initname); + } + else { + n = mrb_dump_irep(mrb, n, args.wfp); + } + + cleanup(&args); + + return n; +} + +void +mrb_init_ext(mrb_state *mrb) +{ +} + +void +mrb_init_mrblib(mrb_state *mrb) +{ +} + diff --git a/tools/mruby/Makefile b/tools/mruby/Makefile new file mode 100644 index 0000000000..18882e4ee6 --- /dev/null +++ b/tools/mruby/Makefile @@ -0,0 +1,89 @@ +# makefile discription. +# basic build file for Rite-Interpreter +# 11.Apr.2011 coded by Kenji Yoshimoto. +# 31.Aug.2011 coded by Hiroshi Mimaki. + +# project-specific macros +# extension of the executable-file is modifiable(.exe .out ...) +BASEDIR = ../../src +TARGET := ../../bin/mruby +ifeq ($(OS),Windows_NT) +EXE := $(TARGET).exe +else +EXE := $(TARGET) +endif +YSRC := $(BASEDIR)/parse.y +YC := $(BASEDIR)/y.tab.c +EXCEPT1 := $(YC) $(BASEDIR)/minimain.c $(BASEDIR)/dump.c $(BASEDIR)/cdump.c +OBJY := $(patsubst %.c,%.o,$(YC)) +OBJ0 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/../tools/mruby/*.c)) +OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard $(BASEDIR)/*.c))) +#OBJ2 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/regex/*.c)) +#OBJ3 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/enc/*.c)) +OBJS := $(OBJ0) $(OBJ1) $(OBJ2) $(OBJ3) +# mruby libraries +EXTC := $(BASEDIR)/../mrblib/mrblib.c +EXTRB := $(wildcard $(BASEDIR)/../mrblib/*.rb) +EXTM := $(patsubst %.c,%.o,$(EXTC)) +# ext libraries +#EXT1 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/../ext/socket/*.c)) +EXTS := $(EXT1) + +# libraries, includes +LIBS = -lm +INCLUDES = -I$(BASEDIR) -I$(BASEDIR)/../include +#INCLUDES = -I$(RITEVM_ROOT) + +# compiler, linker (gcc) +CC = gcc +LL = gcc +YACC = bison +DEBUG_MODE = 1 +ifeq ($(DEBUG_MODE),1) +CFLAGS = -g +else +CFLAGS = -O3 +endif +ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS) +MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)" + +############################## +# generic build targets, rules + +.PHONY : all +all : $(EXTM) $(EXE) + @echo "make: built targets of `pwd`" + +# executable constructed using linker from object files +$(EXE) : $(OBJS) $(OBJY) $(EXTM) $(EXTS) + $(LL) -o $@ $(OBJS) $(OBJY) $(EXTM) $(EXTS) $(LIBS) + +-include $(OBJS:.o=.d) $(OBJY:.o=.d) + +# objects compiled from source +$(OBJS) : %.o : %.c + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@ + +# mruby library compile +$(EXTM) : $(EXTRB) $(OBJS) $(OBJY) + $(MAKE) -C ../../mrblib $(MAKE_FLAGS) + +# extend libraries complile +$(EXTS) : %.o : %.c + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@ + +# parser complie +$(OBJY) : $(YC) + $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(YC) -o $(OBJY) + +# yacc complie +$(YC) : $(YSRC) + $(YACC) -o $(YC) $(YSRC) + +# clean up +.PHONY : clean #cleandep +clean : + $(MAKE) clean -C ../../mrblib $(MAKE_FLAGS) + -rm -f $(EXE) $(OBJS) $(OBJY) $(YC) $(EXTS) + -rm -f $(OBJS:.o=.d) $(OBJY:.o=.d) $(EXTS:.o=.d) + @echo "make: removing targets, objects and depend files of `pwd`" diff --git a/tools/mruby/mruby.c b/tools/mruby/mruby.c new file mode 100644 index 0000000000..4e84b3c7d1 --- /dev/null +++ b/tools/mruby/mruby.c @@ -0,0 +1,143 @@ +#include "mruby.h" +#include "mruby/proc.h" +#include "compile.h" +#include "dump.h" +#include "stdio.h" +#include "string.h" + +void ruby_show_version(mrb_state *); +void ruby_show_copyright(mrb_state *); +void parser_dump(mrb_state*, struct mrb_ast_node*, int); +void codedump_all(mrb_state*, int); + +struct _args { + FILE *rfp; + int mrbfile : 1; + int check_syntax : 1; + int verbose : 1; +}; + +static void +usage(const char *name) +{ + static const char *const usage_msg[] = { + "switches:", + "-b load and execute RiteBinary(mrb) file", + "-c check syntax only", + "-v print version number, then trun on verbose mode", + "--verbose run at verbose mode", + "--version print the version", + "--copyright print the copyright", + NULL + }; + const char *const *p = usage_msg; + + printf("Usage: %s [switches] programfile\n", name); + while(*p) + printf(" %s\n", *p++); +} + +static int +parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args) +{ + char **origargv = argv; + + memset(args, 0, sizeof(*args)); + + for (argc--,argv++; argc > 0; argc--,argv++) { + if (**argv == '-') { + if (strlen(*argv) <= 1) + return -1; + + switch ((*argv)[1]) { + case 'b': + args->mrbfile = 1; + break; + case 'c': + args->check_syntax = 1; + break; + case 'v': + ruby_show_version(mrb); + args->verbose = 1; + break; + case '-': + if (strcmp((*argv) + 2, "version") == 0) { + ruby_show_version(mrb); + } + else if (strcmp((*argv) + 2, "verbose") == 0) { + args->verbose = 1; + break; + } + else if (strcmp((*argv) + 2, "copyright") == 0) { + ruby_show_copyright(mrb); + } + else return -3; + return 0; + } + } + else if (args->rfp == NULL) { + if ((args->rfp = fopen(*argv, args->mrbfile ? "rb" : "r")) == NULL) { + printf("%s: Cannot open program file. (%s)\n", *origargv, *argv); + return 0; + } + } + } + + return 0; +} + +static void +cleanup(struct _args *args) +{ + if (args->rfp) + fclose(args->rfp); +} + +int +main(int argc, char **argv) +{ + mrb_state *mrb = mrb_open(); + int n = -1; + struct _args args; + struct mrb_parser_state *p; + + n = parse_args(mrb, argc, argv, &args); + if (n < 0 || args.rfp == NULL) { + cleanup(&args); + usage(argv[0]); + return n; + } + + if (args.mrbfile) { + n = mrb_load_irep(mrb, args.rfp); + } + else { + p = mrb_parse_file(mrb, args.rfp); + if (!p || !p->tree || p->nerr) { + cleanup(&args); + return -1; + } + + if (args.verbose) + parser_dump(mrb, p->tree, 0); + + n = mrb_generate_code(mrb, p->tree); + mrb_pool_close(p->pool); + } + + if (n >= 0) { + if (args.verbose) + codedump_all(mrb, n); + + if (!args.check_syntax) { + mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_nil_value()); + if (mrb->exc) { + mrb_funcall(mrb, mrb_nil_value(), "p", 1, mrb_obj_value(mrb->exc)); + } + } + } + + cleanup(&args); + + return n; +}