charsetハンドリング実装の断片

m_ctype.hより

typedef struct my_charset_handler_st
{
  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
  /* Multibyte routines */
  int     (*ismbchar)(struct charset_info_st *, const char *, const char *);
  int     (*mbcharlen)(struct charset_info_st *, uint);
  uint    (*numchars)(struct charset_info_st *, const char *b, const char *e);
  uint    (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos);
  uint    (*well_formed_len)(struct charset_info_st *,
                             const char *b,const char *e,
                             uint nchars, int *error);
  uint    (*lengthsp)(struct charset_info_st *, const char *ptr, uint length);
  uint    (*numcells)(struct charset_info_st *, const char *b, const char *e);
  
  /* Unicode convertion */
  int (*mb_wc)(struct charset_info_st *cs,my_wc_t *wc,
           const unsigned char *s,const unsigned char *e);
  int (*wc_mb)(struct charset_info_st *cs,my_wc_t wc,
           unsigned char *s,unsigned char *e);
  
  /* Functions for case and sort convertion */
  void    (*caseup_str)(struct charset_info_st *, char *);
  void    (*casedn_str)(struct charset_info_st *, char *);
  void    (*caseup)(struct charset_info_st *, char *, uint);
  void    (*casedn)(struct charset_info_st *, char *, uint);
  
  /* Charset dependant snprintf() */
  int  (*snprintf)(struct charset_info_st *, char *to, uint n, const char *fmt,
           ...);
  int  (*long10_to_str)(struct charset_info_st *, char *to, uint n, int radix,
            long int val);
  int (*longlong10_to_str)(struct charset_info_st *, char *to, uint n,
               int radix, longlong val);
  
  void (*fill)(struct charset_info_st *, char *to, uint len, int fill);
  
  /* String-to-number convertion routines */
  long        (*strntol)(struct charset_info_st *, const char *s, uint l,
             int base, char **e, int *err);
  ulong      (*strntoul)(struct charset_info_st *, const char *s, uint l,
             int base, char **e, int *err);
  longlong   (*strntoll)(struct charset_info_st *, const char *s, uint l,
             int base, char **e, int *err);
  ulonglong (*strntoull)(struct charset_info_st *, const char *s, uint l,
             int base, char **e, int *err);
  double      (*strntod)(struct charset_info_st *, char *s, uint l, char **e,
             int *err);
  longlong (*my_strtoll10)(struct charset_info_st *cs,
                           const char *nptr, char **endptr, int *error);
  ulong        (*scan)(struct charset_info_st *, const char *b, const char *e,
               int sq);
} MY_CHARSET_HANDLER;

ctype-sjis.cより

static MY_CHARSET_HANDLER my_charset_handler=
{
  NULL,			/* init */
  ismbchar_sjis,
  mbcharlen_sjis,
  my_numchars_mb,
  my_charpos_mb,
  my_well_formed_len_sjis,
  my_lengthsp_8bit,
  my_numcells_sjis,
  my_mb_wc_sjis,	/* mb_wc */
  my_wc_mb_sjis,	/* wc_mb */
  my_caseup_str_8bit,
  my_casedn_str_8bit,
  my_caseup_8bit,
  my_casedn_8bit,
  my_snprintf_8bit,
  my_long10_to_str_8bit,
  my_longlong10_to_str_8bit,
  my_fill_8bit,
  my_strntol_8bit,
  my_strntoul_8bit,
  my_strntoll_8bit,
  my_strntoull_8bit,
  my_strntod_8bit,
  my_strtoll10_8bit,
  my_scan_8bit
};

m_ctype.hのさっきのの抜粋

  uint    (*well_formed_len)(struct charset_info_st *,
                             const char *b,const char *e,
                             uint nchars, int *error);

field.ccより

int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
{
  int error= 0, well_formed_error;
  uint32 not_used;
  char buff[80];
  String tmpstr(buff,sizeof(buff), &my_charset_bin);
  uint copy_length;
  
  /* See the comment for Field_long::store(long long) */
  DBUG_ASSERT(table->in_use == current_thd);
  
  /* Convert character set if nesessary */
  if (String::needs_conversion(length, cs, field_charset, &not_used))
  { 
    uint conv_errors;
    tmpstr.copy(from, length, cs, field_charset, &conv_errors);
    from= tmpstr.ptr();
    length=  tmpstr.length();
    if (conv_errors)
      error= 2;
  }

  /* 
    Make sure we don't break a multibyte sequence
    as well as don't copy a malformed data.
  */
  copy_length= field_charset->cset->well_formed_len(field_charset,
                                                    from,from+length,
                                                    field_length/
                                                    field_charset->mbmaxlen,
                                                    &well_formed_error);
  memcpy(ptr,from,copy_length);
  if (copy_length < field_length)	// Append spaces if shorter
    field_charset->cset->fill(field_charset,ptr+copy_length,
			      field_length-copy_length,' ');
  
  if ((copy_length < length) && table->in_use->count_cuted_fields)
  {					// Check if we loosed some info
    const char *end=from+length;
    from+= copy_length;
    from+= field_charset->cset->scan(field_charset, from, end,
				     MY_SEQ_SPACES);
    if (from != end)
      error= 2;
  }
  if (error)
    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED, 1);
  return error;
}