Ruby  2.0.0p353(2013-11-22revision43784)
pack.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  pack.c -
4 
5  $Author: eregon $
6  created at: Thu Feb 10 15:17:05 JST 1994
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
14 #include <sys/types.h>
15 #include <ctype.h>
16 #include <errno.h>
17 
18 #define GCC_VERSION_SINCE(major, minor, patchlevel) \
19  (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
20  ((__GNUC__ > (major)) || \
21  (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
22  (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
23 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4
24 # define NATINT_PACK
25 #endif
26 
27 #ifdef DYNAMIC_ENDIAN
28  /* for universal binary of NEXTSTEP and MacOS X */
29  /* useless since autoconf 2.63? */
30  static int
31  is_bigendian(void)
32  {
33  static int init = 0;
34  static int endian_value;
35  char *p;
36 
37  if (init) return endian_value;
38  init = 1;
39  p = (char*)&init;
40  return endian_value = p[0]?0:1;
41  }
42 # define BIGENDIAN_P() (is_bigendian())
43 #elif defined(WORDS_BIGENDIAN)
44 # define BIGENDIAN_P() 1
45 #else
46 # define BIGENDIAN_P() 0
47 #endif
48 
49 #ifdef NATINT_PACK
50 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
51 #else
52 # define NATINT_LEN(type,len) ((int)sizeof(type))
53 #endif
54 
55 #if SIZEOF_LONG == 8
56 # define INT64toNUM(x) LONG2NUM(x)
57 # define UINT64toNUM(x) ULONG2NUM(x)
58 #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
59 # define INT64toNUM(x) LL2NUM(x)
60 # define UINT64toNUM(x) ULL2NUM(x)
61 #endif
62 
63 #define define_swapx(x, xtype) \
64 static xtype \
65 TOKEN_PASTE(swap,x)(xtype z) \
66 { \
67  xtype r; \
68  xtype *zp; \
69  unsigned char *s, *t; \
70  int i; \
71  \
72  zp = xmalloc(sizeof(xtype)); \
73  *zp = z; \
74  s = (unsigned char*)zp; \
75  t = xmalloc(sizeof(xtype)); \
76  for (i=0; i<sizeof(xtype); i++) { \
77  t[sizeof(xtype)-i-1] = s[i]; \
78  } \
79  r = *(xtype *)t; \
80  xfree(t); \
81  xfree(zp); \
82  return r; \
83 }
84 
85 #if GCC_VERSION_SINCE(4,3,0)
86 # define swap32(x) __builtin_bswap32(x)
87 # define swap64(x) __builtin_bswap64(x)
88 #endif
89 
90 #ifndef swap16
91 # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
92 #endif
93 
94 #ifndef swap32
95 # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
96  |(((x)>>24)&0xFF) \
97  |(((x)&0x0000FF00)<<8) \
98  |(((x)&0x00FF0000)>>8) ))
99 #endif
100 
101 #ifndef swap64
102 # ifdef HAVE_INT64_T
103 # define byte_in_64bit(n) ((uint64_t)0xff << (n))
104 # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
105  |(((x)>>56)&0xFF) \
106  |(((x)&byte_in_64bit(8))<<40) \
107  |(((x)&byte_in_64bit(48))>>40) \
108  |(((x)&byte_in_64bit(16))<<24) \
109  |(((x)&byte_in_64bit(40))>>24) \
110  |(((x)&byte_in_64bit(24))<<8) \
111  |(((x)&byte_in_64bit(32))>>8)))
112 # endif
113 #endif
114 
115 #if SIZEOF_SHORT == 2
116 # define swaps(x) swap16(x)
117 #elif SIZEOF_SHORT == 4
118 # define swaps(x) swap32(x)
119 #else
120  define_swapx(s,short)
121 #endif
122 
123 #if SIZEOF_INT == 2
124 # define swapi(x) swap16(x)
125 #elif SIZEOF_INT == 4
126 # define swapi(x) swap32(x)
127 #else
128  define_swapx(i,int)
129 #endif
130 
131 #if SIZEOF_LONG == 4
132 # define swapl(x) swap32(x)
133 #elif SIZEOF_LONG == 8
134 # define swapl(x) swap64(x)
135 #else
136  define_swapx(l,long)
137 #endif
138 
139 #ifdef HAVE_LONG_LONG
140 # if SIZEOF_LONG_LONG == 8
141 # define swapll(x) swap64(x)
142 # else
143  define_swapx(ll,LONG_LONG)
144 # endif
145 #endif
146 
147 #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T)
148 # define swapf(x) swap32(x)
149 # define FLOAT_SWAPPER uint32_t
150 #else
151  define_swapx(f,float)
152 #endif
153 
154 #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T)
155 # define swapd(x) swap64(x)
156 # define DOUBLE_SWAPPER uint64_t
157 #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T)
158  static double
159  swapd(const double d)
160  {
161  double dtmp = d;
162  uint32_t utmp[2];
163  uint32_t utmp0;
164 
165  utmp[0] = 0; utmp[1] = 0;
166  memcpy(utmp,&dtmp,sizeof(double));
167  utmp0 = utmp[0];
168  utmp[0] = swap32(utmp[1]);
169  utmp[1] = swap32(utmp0);
170  memcpy(&dtmp,utmp,sizeof(double));
171  return dtmp;
172  }
173 #else
174  define_swapx(d, double)
175 #endif
176 
177 #undef define_swapx
178 
179 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
180 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
181 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
182 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
183 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
184 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
185 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
186 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
187 
188 #ifdef FLOAT_SWAPPER
189 # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
190 # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
191  (y) = rb_htonf((FLOAT_SWAPPER)(y)), \
192  memcpy(&(x),&(y),sizeof(float)), \
193  (x))
194 # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
195  (y) = rb_htovf((FLOAT_SWAPPER)(y)), \
196  memcpy(&(x),&(y),sizeof(float)), \
197  (x))
198 # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
199  (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \
200  memcpy(&(x),&(y),sizeof(float)), \
201  (x))
202 # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
203  (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \
204  memcpy(&(x),&(y),sizeof(float)), \
205  (x))
206 #else
207 # define FLOAT_CONVWITH(y)
208 # define HTONF(x,y) rb_htonf(x)
209 # define HTOVF(x,y) rb_htovf(x)
210 # define NTOHF(x,y) rb_ntohf(x)
211 # define VTOHF(x,y) rb_vtohf(x)
212 #endif
213 
214 #ifdef DOUBLE_SWAPPER
215 # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
216 # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \
217  (y) = rb_htond((DOUBLE_SWAPPER)(y)), \
218  memcpy(&(x),&(y),sizeof(double)), \
219  (x))
220 # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
221  (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \
222  memcpy(&(x),&(y),sizeof(double)), \
223  (x))
224 # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
225  (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \
226  memcpy(&(x),&(y),sizeof(double)), \
227  (x))
228 # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
229  (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \
230  memcpy(&(x),&(y),sizeof(double)), \
231  (x))
232 #else
233 # define DOUBLE_CONVWITH(y)
234 # define HTOND(x,y) rb_htond(x)
235 # define HTOVD(x,y) rb_htovd(x)
236 # define NTOHD(x,y) rb_ntohd(x)
237 # define VTOHD(x,y) rb_vtohd(x)
238 #endif
239 
240 static unsigned long
241 num2i32(VALUE x)
242 {
243  x = rb_to_int(x); /* is nil OK? (should not) */
244 
245  if (FIXNUM_P(x)) return FIX2LONG(x);
246  if (RB_TYPE_P(x, T_BIGNUM)) {
247  return rb_big2ulong_pack(x);
248  }
249  rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
250 
251  UNREACHABLE;
252 }
253 
254 #define MAX_INTEGER_PACK_SIZE 8
255 /* #define FORCE_BIG_PACK */
256 
257 static const char toofew[] = "too few arguments";
258 
259 static void encodes(VALUE,const char*,long,int,int);
260 static void qpencode(VALUE,VALUE,long);
261 
262 static unsigned long utf8_to_uv(const char*,long*);
263 
264 /*
265  * call-seq:
266  * arr.pack ( aTemplateString ) -> aBinaryString
267  *
268  * Packs the contents of <i>arr</i> into a binary sequence according to
269  * the directives in <i>aTemplateString</i> (see the table below)
270  * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
271  * which gives the width of the resulting field. The remaining
272  * directives also may take a count, indicating the number of array
273  * elements to convert. If the count is an asterisk
274  * (``<code>*</code>''), all remaining array elements will be
275  * converted. Any of the directives ``<code>sSiIlL</code>'' may be
276  * followed by an underscore (``<code>_</code>'') or
277  * exclamation mark (``<code>!</code>'') to use the underlying
278  * platform's native size for the specified type; otherwise, they use a
279  * platform-independent size. Spaces are ignored in the template
280  * string. See also <code>String#unpack</code>.
281  *
282  * a = [ "a", "b", "c" ]
283  * n = [ 65, 66, 67 ]
284  * a.pack("A3A3A3") #=> "a b c "
285  * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
286  * n.pack("ccc") #=> "ABC"
287  *
288  * Directives for +pack+.
289  *
290  * Integer | Array |
291  * Directive | Element | Meaning
292  * ---------------------------------------------------------------------------
293  * C | Integer | 8-bit unsigned (unsigned char)
294  * S | Integer | 16-bit unsigned, native endian (uint16_t)
295  * L | Integer | 32-bit unsigned, native endian (uint32_t)
296  * Q | Integer | 64-bit unsigned, native endian (uint64_t)
297  * | |
298  * c | Integer | 8-bit signed (signed char)
299  * s | Integer | 16-bit signed, native endian (int16_t)
300  * l | Integer | 32-bit signed, native endian (int32_t)
301  * q | Integer | 64-bit signed, native endian (int64_t)
302  * | |
303  * S_, S! | Integer | unsigned short, native endian
304  * I, I_, I! | Integer | unsigned int, native endian
305  * L_, L! | Integer | unsigned long, native endian
306  * | |
307  * s_, s! | Integer | signed short, native endian
308  * i, i_, i! | Integer | signed int, native endian
309  * l_, l! | Integer | signed long, native endian
310  * | |
311  * S> L> Q> | Integer | same as the directives without ">" except
312  * s> l> q> | | big endian
313  * S!> I!> | | (available since Ruby 1.9.3)
314  * L!> | | "S>" is same as "n"
315  * s!> i!> | | "L>" is same as "N"
316  * l!> | |
317  * | |
318  * S< L< Q< | Integer | same as the directives without "<" except
319  * s< l< q< | | little endian
320  * S!< I!< | | (available since Ruby 1.9.3)
321  * L!< | | "S<" is same as "v"
322  * s!< i!< | | "L<" is same as "V"
323  * l!< | |
324  * | |
325  * n | Integer | 16-bit unsigned, network (big-endian) byte order
326  * N | Integer | 32-bit unsigned, network (big-endian) byte order
327  * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
328  * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
329  * | |
330  * U | Integer | UTF-8 character
331  * w | Integer | BER-compressed integer
332  *
333  * Float | |
334  * Directive | | Meaning
335  * ---------------------------------------------------------------------------
336  * D, d | Float | double-precision, native format
337  * F, f | Float | single-precision, native format
338  * E | Float | double-precision, little-endian byte order
339  * e | Float | single-precision, little-endian byte order
340  * G | Float | double-precision, network (big-endian) byte order
341  * g | Float | single-precision, network (big-endian) byte order
342  *
343  * String | |
344  * Directive | | Meaning
345  * ---------------------------------------------------------------------------
346  * A | String | arbitrary binary string (space padded, count is width)
347  * a | String | arbitrary binary string (null padded, count is width)
348  * Z | String | same as ``a'', except that null is added with *
349  * B | String | bit string (MSB first)
350  * b | String | bit string (LSB first)
351  * H | String | hex string (high nibble first)
352  * h | String | hex string (low nibble first)
353  * u | String | UU-encoded string
354  * M | String | quoted printable, MIME encoding (see RFC2045)
355  * m | String | base64 encoded string (see RFC 2045, count is width)
356  * | | (if count is 0, no line feed are added, see RFC 4648)
357  * P | String | pointer to a structure (fixed-length string)
358  * p | String | pointer to a null-terminated string
359  *
360  * Misc. | |
361  * Directive | | Meaning
362  * ---------------------------------------------------------------------------
363  * @ | --- | moves to absolute position
364  * X | --- | back up a byte
365  * x | --- | null byte
366  */
367 
368 static VALUE
370 {
371  static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
372  static const char spc10[] = " ";
373  const char *p, *pend;
374  VALUE res, from, associates = 0;
375  char type;
376  long items, len, idx, plen;
377  const char *ptr;
378  int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
379 #ifdef NATINT_PACK
380  int natint; /* native integer */
381 #endif
382  int integer_size, bigendian_p;
383 
384  StringValue(fmt);
385  p = RSTRING_PTR(fmt);
386  pend = p + RSTRING_LEN(fmt);
387  res = rb_str_buf_new(0);
388 
389  items = RARRAY_LEN(ary);
390  idx = 0;
391 
392 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
393 #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
394 #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
395 
396  while (p < pend) {
397  int explicit_endian = 0;
398  if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
399  rb_raise(rb_eRuntimeError, "format string modified");
400  }
401  type = *p++; /* get data type */
402 #ifdef NATINT_PACK
403  natint = 0;
404 #endif
405 
406  if (ISSPACE(type)) continue;
407  if (type == '#') {
408  while ((p < pend) && (*p != '\n')) {
409  p++;
410  }
411  continue;
412  }
413 
414  {
415  static const char natstr[] = "sSiIlL";
416  static const char endstr[] = "sSiIlLqQ";
417 
418  modifiers:
419  switch (*p) {
420  case '_':
421  case '!':
422  if (strchr(natstr, type)) {
423 #ifdef NATINT_PACK
424  natint = 1;
425 #endif
426  p++;
427  }
428  else {
429  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
430  }
431  goto modifiers;
432 
433  case '<':
434  case '>':
435  if (!strchr(endstr, type)) {
436  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
437  }
438  if (explicit_endian) {
439  rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
440  }
441  explicit_endian = *p++;
442  goto modifiers;
443  }
444  }
445 
446  if (*p == '*') { /* set data length */
447  len = strchr("@Xxu", type) ? 0
448  : strchr("PMm", type) ? 1
449  : items;
450  p++;
451  }
452  else if (ISDIGIT(*p)) {
453  errno = 0;
454  len = STRTOUL(p, (char**)&p, 10);
455  if (errno) {
456  rb_raise(rb_eRangeError, "pack length too big");
457  }
458  }
459  else {
460  len = 1;
461  }
462 
463  switch (type) {
464  case 'U':
465  /* if encoding is US-ASCII, upgrade to UTF-8 */
466  if (enc_info == 1) enc_info = 2;
467  break;
468  case 'm': case 'M': case 'u':
469  /* keep US-ASCII (do nothing) */
470  break;
471  default:
472  /* fall back to BINARY */
473  enc_info = 0;
474  break;
475  }
476  switch (type) {
477  case 'A': case 'a': case 'Z':
478  case 'B': case 'b':
479  case 'H': case 'h':
480  from = NEXTFROM;
481  if (NIL_P(from)) {
482  ptr = "";
483  plen = 0;
484  }
485  else {
486  StringValue(from);
487  ptr = RSTRING_PTR(from);
488  plen = RSTRING_LEN(from);
489  OBJ_INFECT(res, from);
490  }
491 
492  if (p[-1] == '*')
493  len = plen;
494 
495  switch (type) {
496  case 'a': /* arbitrary binary string (null padded) */
497  case 'A': /* arbitrary binary string (ASCII space padded) */
498  case 'Z': /* null terminated string */
499  if (plen >= len) {
500  rb_str_buf_cat(res, ptr, len);
501  if (p[-1] == '*' && type == 'Z')
502  rb_str_buf_cat(res, nul10, 1);
503  }
504  else {
505  rb_str_buf_cat(res, ptr, plen);
506  len -= plen;
507  while (len >= 10) {
508  rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
509  len -= 10;
510  }
511  rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
512  }
513  break;
514 
515 #define castchar(from) (char)((from) & 0xff)
516 
517  case 'b': /* bit string (ascending) */
518  {
519  int byte = 0;
520  long i, j = 0;
521 
522  if (len > plen) {
523  j = (len - plen + 1)/2;
524  len = plen;
525  }
526  for (i=0; i++ < len; ptr++) {
527  if (*ptr & 1)
528  byte |= 128;
529  if (i & 7)
530  byte >>= 1;
531  else {
532  char c = castchar(byte);
533  rb_str_buf_cat(res, &c, 1);
534  byte = 0;
535  }
536  }
537  if (len & 7) {
538  char c;
539  byte >>= 7 - (len & 7);
540  c = castchar(byte);
541  rb_str_buf_cat(res, &c, 1);
542  }
543  len = j;
544  goto grow;
545  }
546  break;
547 
548  case 'B': /* bit string (descending) */
549  {
550  int byte = 0;
551  long i, j = 0;
552 
553  if (len > plen) {
554  j = (len - plen + 1)/2;
555  len = plen;
556  }
557  for (i=0; i++ < len; ptr++) {
558  byte |= *ptr & 1;
559  if (i & 7)
560  byte <<= 1;
561  else {
562  char c = castchar(byte);
563  rb_str_buf_cat(res, &c, 1);
564  byte = 0;
565  }
566  }
567  if (len & 7) {
568  char c;
569  byte <<= 7 - (len & 7);
570  c = castchar(byte);
571  rb_str_buf_cat(res, &c, 1);
572  }
573  len = j;
574  goto grow;
575  }
576  break;
577 
578  case 'h': /* hex string (low nibble first) */
579  {
580  int byte = 0;
581  long i, j = 0;
582 
583  if (len > plen) {
584  j = (len + 1) / 2 - (plen + 1) / 2;
585  len = plen;
586  }
587  for (i=0; i++ < len; ptr++) {
588  if (ISALPHA(*ptr))
589  byte |= (((*ptr & 15) + 9) & 15) << 4;
590  else
591  byte |= (*ptr & 15) << 4;
592  if (i & 1)
593  byte >>= 4;
594  else {
595  char c = castchar(byte);
596  rb_str_buf_cat(res, &c, 1);
597  byte = 0;
598  }
599  }
600  if (len & 1) {
601  char c = castchar(byte);
602  rb_str_buf_cat(res, &c, 1);
603  }
604  len = j;
605  goto grow;
606  }
607  break;
608 
609  case 'H': /* hex string (high nibble first) */
610  {
611  int byte = 0;
612  long i, j = 0;
613 
614  if (len > plen) {
615  j = (len + 1) / 2 - (plen + 1) / 2;
616  len = plen;
617  }
618  for (i=0; i++ < len; ptr++) {
619  if (ISALPHA(*ptr))
620  byte |= ((*ptr & 15) + 9) & 15;
621  else
622  byte |= *ptr & 15;
623  if (i & 1)
624  byte <<= 4;
625  else {
626  char c = castchar(byte);
627  rb_str_buf_cat(res, &c, 1);
628  byte = 0;
629  }
630  }
631  if (len & 1) {
632  char c = castchar(byte);
633  rb_str_buf_cat(res, &c, 1);
634  }
635  len = j;
636  goto grow;
637  }
638  break;
639  }
640  break;
641 
642  case 'c': /* signed char */
643  case 'C': /* unsigned char */
644  while (len-- > 0) {
645  char c;
646 
647  from = NEXTFROM;
648  c = (char)num2i32(from);
649  rb_str_buf_cat(res, &c, sizeof(char));
650  }
651  break;
652 
653  case 's': /* signed short */
654  integer_size = NATINT_LEN(short, 2);
655  bigendian_p = BIGENDIAN_P();
656  goto pack_integer;
657 
658  case 'S': /* unsigned short */
659  integer_size = NATINT_LEN(short, 2);
660  bigendian_p = BIGENDIAN_P();
661  goto pack_integer;
662 
663  case 'i': /* signed int */
664  integer_size = (int)sizeof(int);
665  bigendian_p = BIGENDIAN_P();
666  goto pack_integer;
667 
668  case 'I': /* unsigned int */
669  integer_size = (int)sizeof(int);
670  bigendian_p = BIGENDIAN_P();
671  goto pack_integer;
672 
673  case 'l': /* signed long */
674  integer_size = NATINT_LEN(long, 4);
675  bigendian_p = BIGENDIAN_P();
676  goto pack_integer;
677 
678  case 'L': /* unsigned long */
679  integer_size = NATINT_LEN(long, 4);
680  bigendian_p = BIGENDIAN_P();
681  goto pack_integer;
682 
683  case 'q': /* signed quad (64bit) int */
684  integer_size = 8;
685  bigendian_p = BIGENDIAN_P();
686  goto pack_integer;
687 
688  case 'Q': /* unsigned quad (64bit) int */
689  integer_size = 8;
690  bigendian_p = BIGENDIAN_P();
691  goto pack_integer;
692 
693  case 'n': /* unsigned short (network byte-order) */
694  integer_size = 2;
695  bigendian_p = 1;
696  goto pack_integer;
697 
698  case 'N': /* unsigned long (network byte-order) */
699  integer_size = 4;
700  bigendian_p = 1;
701  goto pack_integer;
702 
703  case 'v': /* unsigned short (VAX byte-order) */
704  integer_size = 2;
705  bigendian_p = 0;
706  goto pack_integer;
707 
708  case 'V': /* unsigned long (VAX byte-order) */
709  integer_size = 4;
710  bigendian_p = 0;
711  goto pack_integer;
712 
713  pack_integer:
714  if (explicit_endian) {
715  bigendian_p = explicit_endian == '>';
716  }
717 
718  switch (integer_size) {
719 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
720  case SIZEOF_INT16_T:
721  while (len-- > 0) {
722  union {
723  int16_t i;
724  char a[sizeof(int16_t)];
725  } v;
726 
727  from = NEXTFROM;
728  v.i = (int16_t)num2i32(from);
729  if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
730  rb_str_buf_cat(res, v.a, sizeof(int16_t));
731  }
732  break;
733 #endif
734 
735 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
736  case SIZEOF_INT32_T:
737  while (len-- > 0) {
738  union {
739  int32_t i;
740  char a[sizeof(int32_t)];
741  } v;
742 
743  from = NEXTFROM;
744  v.i = (int32_t)num2i32(from);
745  if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
746  rb_str_buf_cat(res, v.a, sizeof(int32_t));
747  }
748  break;
749 #endif
750 
751 #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK)
752  case SIZEOF_INT64_T:
753  while (len-- > 0) {
754  union {
755  int64_t i;
756  char a[sizeof(int64_t)];
757  } v;
758 
759  from = NEXTFROM;
760  v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */
761  if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
762  rb_str_buf_cat(res, v.a, sizeof(int64_t));
763  }
764  break;
765 #endif
766 
767  default:
768  if (integer_size > MAX_INTEGER_PACK_SIZE)
769  rb_bug("unexpected intger size for pack: %d", integer_size);
770  while (len-- > 0) {
771  union {
772  unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG];
773  char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG];
774  } v;
775  int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG;
776  int i;
777 
778  from = NEXTFROM;
779  rb_big_pack(from, v.i, num_longs);
780  if (bigendian_p) {
781  for (i = 0; i < num_longs/2; i++) {
782  unsigned long t = v.i[i];
783  v.i[i] = v.i[num_longs-1-i];
784  v.i[num_longs-1-i] = t;
785  }
786  }
787  if (bigendian_p != BIGENDIAN_P()) {
788  for (i = 0; i < num_longs; i++)
789  v.i[i] = swapl(v.i[i]);
790  }
791  rb_str_buf_cat(res,
792  bigendian_p ?
793  v.a + sizeof(long)*num_longs - integer_size :
794  v.a,
795  integer_size);
796  }
797  break;
798  }
799  break;
800 
801  case 'f': /* single precision float in native format */
802  case 'F': /* ditto */
803  while (len-- > 0) {
804  float f;
805 
806  from = NEXTFROM;
807  f = (float)RFLOAT_VALUE(rb_to_float(from));
808  rb_str_buf_cat(res, (char*)&f, sizeof(float));
809  }
810  break;
811 
812  case 'e': /* single precision float in VAX byte-order */
813  while (len-- > 0) {
814  float f;
815  FLOAT_CONVWITH(ftmp);
816 
817  from = NEXTFROM;
818  f = (float)RFLOAT_VALUE(rb_to_float(from));
819  f = HTOVF(f,ftmp);
820  rb_str_buf_cat(res, (char*)&f, sizeof(float));
821  }
822  break;
823 
824  case 'E': /* double precision float in VAX byte-order */
825  while (len-- > 0) {
826  double d;
827  DOUBLE_CONVWITH(dtmp);
828 
829  from = NEXTFROM;
830  d = RFLOAT_VALUE(rb_to_float(from));
831  d = HTOVD(d,dtmp);
832  rb_str_buf_cat(res, (char*)&d, sizeof(double));
833  }
834  break;
835 
836  case 'd': /* double precision float in native format */
837  case 'D': /* ditto */
838  while (len-- > 0) {
839  double d;
840 
841  from = NEXTFROM;
842  d = RFLOAT_VALUE(rb_to_float(from));
843  rb_str_buf_cat(res, (char*)&d, sizeof(double));
844  }
845  break;
846 
847  case 'g': /* single precision float in network byte-order */
848  while (len-- > 0) {
849  float f;
850  FLOAT_CONVWITH(ftmp);
851 
852  from = NEXTFROM;
853  f = (float)RFLOAT_VALUE(rb_to_float(from));
854  f = HTONF(f,ftmp);
855  rb_str_buf_cat(res, (char*)&f, sizeof(float));
856  }
857  break;
858 
859  case 'G': /* double precision float in network byte-order */
860  while (len-- > 0) {
861  double d;
862  DOUBLE_CONVWITH(dtmp);
863 
864  from = NEXTFROM;
865  d = RFLOAT_VALUE(rb_to_float(from));
866  d = HTOND(d,dtmp);
867  rb_str_buf_cat(res, (char*)&d, sizeof(double));
868  }
869  break;
870 
871  case 'x': /* null byte */
872  grow:
873  while (len >= 10) {
874  rb_str_buf_cat(res, nul10, 10);
875  len -= 10;
876  }
877  rb_str_buf_cat(res, nul10, len);
878  break;
879 
880  case 'X': /* back up byte */
881  shrink:
882  plen = RSTRING_LEN(res);
883  if (plen < len)
884  rb_raise(rb_eArgError, "X outside of string");
885  rb_str_set_len(res, plen - len);
886  break;
887 
888  case '@': /* null fill to absolute position */
889  len -= RSTRING_LEN(res);
890  if (len > 0) goto grow;
891  len = -len;
892  if (len > 0) goto shrink;
893  break;
894 
895  case '%':
896  rb_raise(rb_eArgError, "%% is not supported");
897  break;
898 
899  case 'U': /* Unicode character */
900  while (len-- > 0) {
901  SIGNED_VALUE l;
902  char buf[8];
903  int le;
904 
905  from = NEXTFROM;
906  from = rb_to_int(from);
907  l = NUM2LONG(from);
908  if (l < 0) {
909  rb_raise(rb_eRangeError, "pack(U): value out of range");
910  }
911  le = rb_uv_to_utf8(buf, l);
912  rb_str_buf_cat(res, (char*)buf, le);
913  }
914  break;
915 
916  case 'u': /* uuencoded string */
917  case 'm': /* base64 encoded string */
918  from = NEXTFROM;
919  StringValue(from);
920  ptr = RSTRING_PTR(from);
921  plen = RSTRING_LEN(from);
922 
923  if (len == 0 && type == 'm') {
924  encodes(res, ptr, plen, type, 0);
925  ptr += plen;
926  break;
927  }
928  if (len <= 2)
929  len = 45;
930  else if (len > 63 && type == 'u')
931  len = 63;
932  else
933  len = len / 3 * 3;
934  while (plen > 0) {
935  long todo;
936 
937  if (plen > len)
938  todo = len;
939  else
940  todo = plen;
941  encodes(res, ptr, todo, type, 1);
942  plen -= todo;
943  ptr += todo;
944  }
945  break;
946 
947  case 'M': /* quoted-printable encoded string */
948  from = rb_obj_as_string(NEXTFROM);
949  if (len <= 1)
950  len = 72;
951  qpencode(res, from, len);
952  break;
953 
954  case 'P': /* pointer to packed byte string */
955  from = THISFROM;
956  if (!NIL_P(from)) {
957  StringValue(from);
958  if (RSTRING_LEN(from) < len) {
959  rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
960  RSTRING_LEN(from), len);
961  }
962  }
963  len = 1;
964  /* FALL THROUGH */
965  case 'p': /* pointer to string */
966  while (len-- > 0) {
967  char *t;
968  from = NEXTFROM;
969  if (NIL_P(from)) {
970  t = 0;
971  }
972  else {
973  t = StringValuePtr(from);
974  }
975  if (!associates) {
976  associates = rb_ary_new();
977  }
978  rb_ary_push(associates, from);
979  rb_obj_taint(from);
980  rb_str_buf_cat(res, (char*)&t, sizeof(char*));
981  }
982  break;
983 
984  case 'w': /* BER compressed integer */
985  while (len-- > 0) {
986  unsigned long ul;
987  VALUE buf = rb_str_new(0, 0);
988  char c, *bufs, *bufe;
989 
990  from = NEXTFROM;
991  if (RB_TYPE_P(from, T_BIGNUM)) {
992  VALUE big128 = rb_uint2big(128);
993  while (RB_TYPE_P(from, T_BIGNUM)) {
994  from = rb_big_divmod(from, big128);
995  c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */
996  rb_str_buf_cat(buf, &c, sizeof(char));
997  from = RARRAY_PTR(from)[0]; /* div */
998  }
999  }
1000 
1001  {
1002  long l = NUM2LONG(from);
1003  if (l < 0) {
1004  rb_raise(rb_eArgError, "can't compress negative numbers");
1005  }
1006  ul = l;
1007  }
1008 
1009  while (ul) {
1010  c = castchar((ul & 0x7f) | 0x80);
1011  rb_str_buf_cat(buf, &c, sizeof(char));
1012  ul >>= 7;
1013  }
1014 
1015  if (RSTRING_LEN(buf)) {
1016  bufs = RSTRING_PTR(buf);
1017  bufe = bufs + RSTRING_LEN(buf) - 1;
1018  *bufs &= 0x7f; /* clear continue bit */
1019  while (bufs < bufe) { /* reverse */
1020  c = *bufs;
1021  *bufs++ = *bufe;
1022  *bufe-- = c;
1023  }
1024  rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
1025  }
1026  else {
1027  c = 0;
1028  rb_str_buf_cat(res, &c, sizeof(char));
1029  }
1030  }
1031  break;
1032 
1033  default:
1034  rb_warning("unknown pack directive '%c' in '%s'",
1035  type, RSTRING_PTR(fmt));
1036  break;
1037  }
1038  }
1039 
1040  if (associates) {
1041  rb_str_associate(res, associates);
1042  }
1043  OBJ_INFECT(res, fmt);
1044  switch (enc_info) {
1045  case 1:
1047  break;
1048  case 2:
1050  break;
1051  default:
1052  /* do nothing, keep ASCII-8BIT */
1053  break;
1054  }
1055  return res;
1056 }
1057 
1058 static const char uu_table[] =
1059 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1060 static const char b64_table[] =
1061 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1062 
1063 static void
1064 encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1065 {
1066  char buff[4096];
1067  long i = 0;
1068  const char *trans = type == 'u' ? uu_table : b64_table;
1069  char padding;
1070 
1071  if (type == 'u') {
1072  buff[i++] = (char)len + ' ';
1073  padding = '`';
1074  }
1075  else {
1076  padding = '=';
1077  }
1078  while (len >= 3) {
1079  while (len >= 3 && sizeof(buff)-i >= 4) {
1080  buff[i++] = trans[077 & (*s >> 2)];
1081  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1082  buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
1083  buff[i++] = trans[077 & s[2]];
1084  s += 3;
1085  len -= 3;
1086  }
1087  if (sizeof(buff)-i < 4) {
1088  rb_str_buf_cat(str, buff, i);
1089  i = 0;
1090  }
1091  }
1092 
1093  if (len == 2) {
1094  buff[i++] = trans[077 & (*s >> 2)];
1095  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1096  buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
1097  buff[i++] = padding;
1098  }
1099  else if (len == 1) {
1100  buff[i++] = trans[077 & (*s >> 2)];
1101  buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
1102  buff[i++] = padding;
1103  buff[i++] = padding;
1104  }
1105  if (tail_lf) buff[i++] = '\n';
1106  rb_str_buf_cat(str, buff, i);
1107 }
1108 
1109 static const char hex_table[] = "0123456789ABCDEF";
1110 
1111 static void
1112 qpencode(VALUE str, VALUE from, long len)
1113 {
1114  char buff[1024];
1115  long i = 0, n = 0, prev = EOF;
1116  unsigned char *s = (unsigned char*)RSTRING_PTR(from);
1117  unsigned char *send = s + RSTRING_LEN(from);
1118 
1119  while (s < send) {
1120  if ((*s > 126) ||
1121  (*s < 32 && *s != '\n' && *s != '\t') ||
1122  (*s == '=')) {
1123  buff[i++] = '=';
1124  buff[i++] = hex_table[*s >> 4];
1125  buff[i++] = hex_table[*s & 0x0f];
1126  n += 3;
1127  prev = EOF;
1128  }
1129  else if (*s == '\n') {
1130  if (prev == ' ' || prev == '\t') {
1131  buff[i++] = '=';
1132  buff[i++] = *s;
1133  }
1134  buff[i++] = *s;
1135  n = 0;
1136  prev = *s;
1137  }
1138  else {
1139  buff[i++] = *s;
1140  n++;
1141  prev = *s;
1142  }
1143  if (n > len) {
1144  buff[i++] = '=';
1145  buff[i++] = '\n';
1146  n = 0;
1147  prev = '\n';
1148  }
1149  if (i > 1024 - 5) {
1150  rb_str_buf_cat(str, buff, i);
1151  i = 0;
1152  }
1153  s++;
1154  }
1155  if (n > 0) {
1156  buff[i++] = '=';
1157  buff[i++] = '\n';
1158  }
1159  if (i > 0) {
1160  rb_str_buf_cat(str, buff, i);
1161  }
1162 }
1163 
1164 static inline int
1165 hex2num(char c)
1166 {
1167  switch (c) {
1168  case '0': case '1': case '2': case '3': case '4':
1169  case '5': case '6': case '7': case '8': case '9':
1170  return c - '0';
1171  case 'a': case 'b': case 'c':
1172  case 'd': case 'e': case 'f':
1173  return c - 'a' + 10;
1174  case 'A': case 'B': case 'C':
1175  case 'D': case 'E': case 'F':
1176  return c - 'A' + 10;
1177  default:
1178  return -1;
1179  }
1180 }
1181 
1182 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
1183  tmp_len = 0; \
1184  if (len > (long)((send-s)/(sz))) { \
1185  if (!star) { \
1186  tmp_len = len-(send-s)/(sz); \
1187  } \
1188  len = (send-s)/(sz); \
1189  } \
1190 } while (0)
1191 
1192 #define PACK_ITEM_ADJUST() do { \
1193  if (tmp_len > 0 && !block_p) \
1194  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1195 } while (0)
1196 
1197 static VALUE
1198 infected_str_new(const char *ptr, long len, VALUE str)
1199 {
1200  VALUE s = rb_str_new(ptr, len);
1201 
1202  OBJ_INFECT(s, str);
1203  return s;
1204 }
1205 
1206 /*
1207  * call-seq:
1208  * str.unpack(format) -> anArray
1209  *
1210  * Decodes <i>str</i> (which may contain binary data) according to the
1211  * format string, returning an array of each value extracted. The
1212  * format string consists of a sequence of single-character directives,
1213  * summarized in the table at the end of this entry.
1214  * Each directive may be followed
1215  * by a number, indicating the number of times to repeat with this
1216  * directive. An asterisk (``<code>*</code>'') will use up all
1217  * remaining elements. The directives <code>sSiIlL</code> may each be
1218  * followed by an underscore (``<code>_</code>'') or
1219  * exclamation mark (``<code>!</code>'') to use the underlying
1220  * platform's native size for the specified type; otherwise, it uses a
1221  * platform-independent consistent size. Spaces are ignored in the
1222  * format string. See also <code>Array#pack</code>.
1223  *
1224  * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1225  * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1226  * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1227  * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1228  * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1229  * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1230  * "now=20is".unpack('M*') #=> ["now is"]
1231  * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1232  *
1233  * This table summarizes the various formats and the Ruby classes
1234  * returned by each.
1235  *
1236  * Integer | |
1237  * Directive | Returns | Meaning
1238  * -----------------------------------------------------------------
1239  * C | Integer | 8-bit unsigned (unsigned char)
1240  * S | Integer | 16-bit unsigned, native endian (uint16_t)
1241  * L | Integer | 32-bit unsigned, native endian (uint32_t)
1242  * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1243  * | |
1244  * c | Integer | 8-bit signed (signed char)
1245  * s | Integer | 16-bit signed, native endian (int16_t)
1246  * l | Integer | 32-bit signed, native endian (int32_t)
1247  * q | Integer | 64-bit signed, native endian (int64_t)
1248  * | |
1249  * S_, S! | Integer | unsigned short, native endian
1250  * I, I_, I! | Integer | unsigned int, native endian
1251  * L_, L! | Integer | unsigned long, native endian
1252  * | |
1253  * s_, s! | Integer | signed short, native endian
1254  * i, i_, i! | Integer | signed int, native endian
1255  * l_, l! | Integer | signed long, native endian
1256  * | |
1257  * S> L> Q> | Integer | same as the directives without ">" except
1258  * s> l> q> | | big endian
1259  * S!> I!> | | (available since Ruby 1.9.3)
1260  * L!> Q!> | | "S>" is same as "n"
1261  * s!> i!> | | "L>" is same as "N"
1262  * l!> q!> | |
1263  * | |
1264  * S< L< Q< | Integer | same as the directives without "<" except
1265  * s< l< q< | | little endian
1266  * S!< I!< | | (available since Ruby 1.9.3)
1267  * L!< Q!< | | "S<" is same as "v"
1268  * s!< i!< | | "L<" is same as "V"
1269  * l!< q!< | |
1270  * | |
1271  * n | Integer | 16-bit unsigned, network (big-endian) byte order
1272  * N | Integer | 32-bit unsigned, network (big-endian) byte order
1273  * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1274  * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1275  * | |
1276  * U | Integer | UTF-8 character
1277  * w | Integer | BER-compressed integer (see Array.pack)
1278  *
1279  * Float | |
1280  * Directive | Returns | Meaning
1281  * -----------------------------------------------------------------
1282  * D, d | Float | double-precision, native format
1283  * F, f | Float | single-precision, native format
1284  * E | Float | double-precision, little-endian byte order
1285  * e | Float | single-precision, little-endian byte order
1286  * G | Float | double-precision, network (big-endian) byte order
1287  * g | Float | single-precision, network (big-endian) byte order
1288  *
1289  * String | |
1290  * Directive | Returns | Meaning
1291  * -----------------------------------------------------------------
1292  * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1293  * a | String | arbitrary binary string
1294  * Z | String | null-terminated string
1295  * B | String | bit string (MSB first)
1296  * b | String | bit string (LSB first)
1297  * H | String | hex string (high nibble first)
1298  * h | String | hex string (low nibble first)
1299  * u | String | UU-encoded string
1300  * M | String | quoted-printable, MIME encoding (see RFC2045)
1301  * m | String | base64 encoded string (RFC 2045) (default)
1302  * | | base64 encoded string (RFC 4648) if followed by 0
1303  * P | String | pointer to a structure (fixed-length string)
1304  * p | String | pointer to a null-terminated string
1305  *
1306  * Misc. | |
1307  * Directive | Returns | Meaning
1308  * -----------------------------------------------------------------
1309  * @ | --- | skip to the offset given by the length argument
1310  * X | --- | skip backward one byte
1311  * x | --- | skip forward one byte
1312  */
1313 
1314 static VALUE
1316 {
1317  static const char hexdigits[] = "0123456789abcdef";
1318  char *s, *send;
1319  char *p, *pend;
1320  VALUE ary;
1321  char type;
1322  long len, tmp_len;
1323  int star;
1324 #ifdef NATINT_PACK
1325  int natint; /* native integer */
1326 #endif
1327  int block_p = rb_block_given_p();
1328  int signed_p, integer_size, bigendian_p;
1329 #define UNPACK_PUSH(item) do {\
1330  VALUE item_val = (item);\
1331  if (block_p) {\
1332  rb_yield(item_val);\
1333  }\
1334  else {\
1335  rb_ary_push(ary, item_val);\
1336  }\
1337  } while (0)
1338 
1339  StringValue(str);
1340  StringValue(fmt);
1341  s = RSTRING_PTR(str);
1342  send = s + RSTRING_LEN(str);
1343  p = RSTRING_PTR(fmt);
1344  pend = p + RSTRING_LEN(fmt);
1345 
1346  ary = block_p ? Qnil : rb_ary_new();
1347  while (p < pend) {
1348  int explicit_endian = 0;
1349  type = *p++;
1350 #ifdef NATINT_PACK
1351  natint = 0;
1352 #endif
1353 
1354  if (ISSPACE(type)) continue;
1355  if (type == '#') {
1356  while ((p < pend) && (*p != '\n')) {
1357  p++;
1358  }
1359  continue;
1360  }
1361 
1362  star = 0;
1363  {
1364  static const char natstr[] = "sSiIlL";
1365  static const char endstr[] = "sSiIlLqQ";
1366 
1367  modifiers:
1368  switch (*p) {
1369  case '_':
1370  case '!':
1371 
1372  if (strchr(natstr, type)) {
1373 #ifdef NATINT_PACK
1374  natint = 1;
1375 #endif
1376  p++;
1377  }
1378  else {
1379  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1380  }
1381  goto modifiers;
1382 
1383  case '<':
1384  case '>':
1385  if (!strchr(endstr, type)) {
1386  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1387  }
1388  if (explicit_endian) {
1389  rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1390  }
1391  explicit_endian = *p++;
1392  goto modifiers;
1393  }
1394  }
1395 
1396  if (p >= pend)
1397  len = 1;
1398  else if (*p == '*') {
1399  star = 1;
1400  len = send - s;
1401  p++;
1402  }
1403  else if (ISDIGIT(*p)) {
1404  errno = 0;
1405  len = STRTOUL(p, (char**)&p, 10);
1406  if (errno) {
1407  rb_raise(rb_eRangeError, "pack length too big");
1408  }
1409  }
1410  else {
1411  len = (type != '@');
1412  }
1413 
1414  switch (type) {
1415  case '%':
1416  rb_raise(rb_eArgError, "%% is not supported");
1417  break;
1418 
1419  case 'A':
1420  if (len > send - s) len = send - s;
1421  {
1422  long end = len;
1423  char *t = s + len - 1;
1424 
1425  while (t >= s) {
1426  if (*t != ' ' && *t != '\0') break;
1427  t--; len--;
1428  }
1429  UNPACK_PUSH(infected_str_new(s, len, str));
1430  s += end;
1431  }
1432  break;
1433 
1434  case 'Z':
1435  {
1436  char *t = s;
1437 
1438  if (len > send-s) len = send-s;
1439  while (t < s+len && *t) t++;
1440  UNPACK_PUSH(infected_str_new(s, t-s, str));
1441  if (t < send) t++;
1442  s = star ? t : s+len;
1443  }
1444  break;
1445 
1446  case 'a':
1447  if (len > send - s) len = send - s;
1448  UNPACK_PUSH(infected_str_new(s, len, str));
1449  s += len;
1450  break;
1451 
1452  case 'b':
1453  {
1454  VALUE bitstr;
1455  char *t;
1456  int bits;
1457  long i;
1458 
1459  if (p[-1] == '*' || len > (send - s) * 8)
1460  len = (send - s) * 8;
1461  bits = 0;
1462  UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1463  t = RSTRING_PTR(bitstr);
1464  for (i=0; i<len; i++) {
1465  if (i & 7) bits >>= 1;
1466  else bits = *s++;
1467  *t++ = (bits & 1) ? '1' : '0';
1468  }
1469  }
1470  break;
1471 
1472  case 'B':
1473  {
1474  VALUE bitstr;
1475  char *t;
1476  int bits;
1477  long i;
1478 
1479  if (p[-1] == '*' || len > (send - s) * 8)
1480  len = (send - s) * 8;
1481  bits = 0;
1482  UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1483  t = RSTRING_PTR(bitstr);
1484  for (i=0; i<len; i++) {
1485  if (i & 7) bits <<= 1;
1486  else bits = *s++;
1487  *t++ = (bits & 128) ? '1' : '0';
1488  }
1489  }
1490  break;
1491 
1492  case 'h':
1493  {
1494  VALUE bitstr;
1495  char *t;
1496  int bits;
1497  long i;
1498 
1499  if (p[-1] == '*' || len > (send - s) * 2)
1500  len = (send - s) * 2;
1501  bits = 0;
1502  UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1503  t = RSTRING_PTR(bitstr);
1504  for (i=0; i<len; i++) {
1505  if (i & 1)
1506  bits >>= 4;
1507  else
1508  bits = *s++;
1509  *t++ = hexdigits[bits & 15];
1510  }
1511  }
1512  break;
1513 
1514  case 'H':
1515  {
1516  VALUE bitstr;
1517  char *t;
1518  int bits;
1519  long i;
1520 
1521  if (p[-1] == '*' || len > (send - s) * 2)
1522  len = (send - s) * 2;
1523  bits = 0;
1524  UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1525  t = RSTRING_PTR(bitstr);
1526  for (i=0; i<len; i++) {
1527  if (i & 1)
1528  bits <<= 4;
1529  else
1530  bits = *s++;
1531  *t++ = hexdigits[(bits >> 4) & 15];
1532  }
1533  }
1534  break;
1535 
1536  case 'c':
1537  PACK_LENGTH_ADJUST_SIZE(sizeof(char));
1538  while (len-- > 0) {
1539  int c = *s++;
1540  if (c > (char)127) c-=256;
1541  UNPACK_PUSH(INT2FIX(c));
1542  }
1543  PACK_ITEM_ADJUST();
1544  break;
1545 
1546  case 'C':
1547  PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char));
1548  while (len-- > 0) {
1549  unsigned char c = *s++;
1550  UNPACK_PUSH(INT2FIX(c));
1551  }
1552  PACK_ITEM_ADJUST();
1553  break;
1554 
1555  case 's':
1556  signed_p = 1;
1557  integer_size = NATINT_LEN(short, 2);
1558  bigendian_p = BIGENDIAN_P();
1559  goto unpack_integer;
1560 
1561  case 'S':
1562  signed_p = 0;
1563  integer_size = NATINT_LEN(short, 2);
1564  bigendian_p = BIGENDIAN_P();
1565  goto unpack_integer;
1566 
1567  case 'i':
1568  signed_p = 1;
1569  integer_size = (int)sizeof(int);
1570  bigendian_p = BIGENDIAN_P();
1571  goto unpack_integer;
1572 
1573  case 'I':
1574  signed_p = 0;
1575  integer_size = (int)sizeof(int);
1576  bigendian_p = BIGENDIAN_P();
1577  goto unpack_integer;
1578 
1579  case 'l':
1580  signed_p = 1;
1581  integer_size = NATINT_LEN(long, 4);
1582  bigendian_p = BIGENDIAN_P();
1583  goto unpack_integer;
1584 
1585  case 'L':
1586  signed_p = 0;
1587  integer_size = NATINT_LEN(long, 4);
1588  bigendian_p = BIGENDIAN_P();
1589  goto unpack_integer;
1590 
1591  case 'q':
1592  signed_p = 1;
1593  integer_size = 8;
1594  bigendian_p = BIGENDIAN_P();
1595  goto unpack_integer;
1596 
1597  case 'Q':
1598  signed_p = 0;
1599  integer_size = 8;
1600  bigendian_p = BIGENDIAN_P();
1601  goto unpack_integer;
1602 
1603  case 'n':
1604  signed_p = 0;
1605  integer_size = 2;
1606  bigendian_p = 1;
1607  goto unpack_integer;
1608 
1609  case 'N':
1610  signed_p = 0;
1611  integer_size = 4;
1612  bigendian_p = 1;
1613  goto unpack_integer;
1614 
1615  case 'v':
1616  signed_p = 0;
1617  integer_size = 2;
1618  bigendian_p = 0;
1619  goto unpack_integer;
1620 
1621  case 'V':
1622  signed_p = 0;
1623  integer_size = 4;
1624  bigendian_p = 0;
1625  goto unpack_integer;
1626 
1627  unpack_integer:
1628  if (explicit_endian) {
1629  bigendian_p = explicit_endian == '>';
1630  }
1631 
1632  switch (integer_size) {
1633 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
1634  case SIZEOF_INT16_T:
1635  if (signed_p) {
1636  PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t));
1637  while (len-- > 0) {
1638  union {
1639  int16_t i;
1640  char a[sizeof(int16_t)];
1641  } v;
1642  memcpy(v.a, s, sizeof(int16_t));
1643  if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1644  s += sizeof(int16_t);
1645  UNPACK_PUSH(INT2FIX(v.i));
1646  }
1647  PACK_ITEM_ADJUST();
1648  }
1649  else {
1650  PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t));
1651  while (len-- > 0) {
1652  union {
1653  uint16_t i;
1654  char a[sizeof(uint16_t)];
1655  } v;
1656  memcpy(v.a, s, sizeof(uint16_t));
1657  if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1658  s += sizeof(uint16_t);
1659  UNPACK_PUSH(INT2FIX(v.i));
1660  }
1661  PACK_ITEM_ADJUST();
1662  }
1663  break;
1664 #endif
1665 
1666 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
1667  case SIZEOF_INT32_T:
1668  if (signed_p) {
1669  PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t));
1670  while (len-- > 0) {
1671  union {
1672  int32_t i;
1673  char a[sizeof(int32_t)];
1674  } v;
1675  memcpy(v.a, s, sizeof(int32_t));
1676  if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1677  s += sizeof(int32_t);
1678  UNPACK_PUSH(INT2NUM(v.i));
1679  }
1680  PACK_ITEM_ADJUST();
1681  }
1682  else {
1684  while (len-- > 0) {
1685  union {
1686  uint32_t i;
1687  char a[sizeof(uint32_t)];
1688  } v;
1689  memcpy(v.a, s, sizeof(uint32_t));
1690  if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1691  s += sizeof(uint32_t);
1692  UNPACK_PUSH(UINT2NUM(v.i));
1693  }
1694  PACK_ITEM_ADJUST();
1695  }
1696  break;
1697 #endif
1698 
1699 #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK)
1700  case SIZEOF_INT64_T:
1701  if (signed_p) {
1702  PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t));
1703  while (len-- > 0) {
1704  union {
1705  int64_t i;
1706  char a[sizeof(int64_t)];
1707  } v;
1708  memcpy(v.a, s, sizeof(int64_t));
1709  if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1710  s += sizeof(int64_t);
1711  UNPACK_PUSH(INT64toNUM(v.i));
1712  }
1713  PACK_ITEM_ADJUST();
1714  }
1715  else {
1717  while (len-- > 0) {
1718  union {
1719  uint64_t i;
1720  char a[sizeof(uint64_t)];
1721  } v;
1722  memcpy(v.a, s, sizeof(uint64_t));
1723  if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1724  s += sizeof(uint64_t);
1725  UNPACK_PUSH(UINT64toNUM(v.i));
1726  }
1727  PACK_ITEM_ADJUST();
1728  }
1729  break;
1730 #endif
1731 
1732  default:
1733  if (integer_size > MAX_INTEGER_PACK_SIZE)
1734  rb_bug("unexpected integer size for pack: %d", integer_size);
1735  PACK_LENGTH_ADJUST_SIZE(integer_size);
1736  while (len-- > 0) {
1737  union {
1738  unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG];
1739  char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG];
1740  } v;
1741  int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG;
1742  int i;
1743 
1744  if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0)
1745  memset(v.a, 0xff, sizeof(long)*num_longs);
1746  else
1747  memset(v.a, 0, sizeof(long)*num_longs);
1748  if (bigendian_p)
1749  memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size);
1750  else
1751  memcpy(v.a, s, integer_size);
1752  if (bigendian_p) {
1753  for (i = 0; i < num_longs/2; i++) {
1754  unsigned long t = v.i[i];
1755  v.i[i] = v.i[num_longs-1-i];
1756  v.i[num_longs-1-i] = t;
1757  }
1758  }
1759  if (bigendian_p != BIGENDIAN_P()) {
1760  for (i = 0; i < num_longs; i++)
1761  v.i[i] = swapl(v.i[i]);
1762  }
1763  s += integer_size;
1764  UNPACK_PUSH(rb_big_unpack(v.i, num_longs));
1765  }
1766  PACK_ITEM_ADJUST();
1767  break;
1768  }
1769  break;
1770 
1771  case 'f':
1772  case 'F':
1773  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1774  while (len-- > 0) {
1775  float tmp;
1776  memcpy(&tmp, s, sizeof(float));
1777  s += sizeof(float);
1778  UNPACK_PUSH(DBL2NUM((double)tmp));
1779  }
1780  PACK_ITEM_ADJUST();
1781  break;
1782 
1783  case 'e':
1784  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1785  while (len-- > 0) {
1786  float tmp;
1787  FLOAT_CONVWITH(ftmp);
1788 
1789  memcpy(&tmp, s, sizeof(float));
1790  s += sizeof(float);
1791  tmp = VTOHF(tmp,ftmp);
1792  UNPACK_PUSH(DBL2NUM((double)tmp));
1793  }
1794  PACK_ITEM_ADJUST();
1795  break;
1796 
1797  case 'E':
1798  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1799  while (len-- > 0) {
1800  double tmp;
1801  DOUBLE_CONVWITH(dtmp);
1802 
1803  memcpy(&tmp, s, sizeof(double));
1804  s += sizeof(double);
1805  tmp = VTOHD(tmp,dtmp);
1806  UNPACK_PUSH(DBL2NUM(tmp));
1807  }
1808  PACK_ITEM_ADJUST();
1809  break;
1810 
1811  case 'D':
1812  case 'd':
1813  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1814  while (len-- > 0) {
1815  double tmp;
1816  memcpy(&tmp, s, sizeof(double));
1817  s += sizeof(double);
1818  UNPACK_PUSH(DBL2NUM(tmp));
1819  }
1820  PACK_ITEM_ADJUST();
1821  break;
1822 
1823  case 'g':
1824  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1825  while (len-- > 0) {
1826  float tmp;
1827  FLOAT_CONVWITH(ftmp);
1828 
1829  memcpy(&tmp, s, sizeof(float));
1830  s += sizeof(float);
1831  tmp = NTOHF(tmp,ftmp);
1832  UNPACK_PUSH(DBL2NUM((double)tmp));
1833  }
1834  PACK_ITEM_ADJUST();
1835  break;
1836 
1837  case 'G':
1838  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1839  while (len-- > 0) {
1840  double tmp;
1841  DOUBLE_CONVWITH(dtmp);
1842 
1843  memcpy(&tmp, s, sizeof(double));
1844  s += sizeof(double);
1845  tmp = NTOHD(tmp,dtmp);
1846  UNPACK_PUSH(DBL2NUM(tmp));
1847  }
1848  PACK_ITEM_ADJUST();
1849  break;
1850 
1851  case 'U':
1852  if (len > send - s) len = send - s;
1853  while (len > 0 && s < send) {
1854  long alen = send - s;
1855  unsigned long l;
1856 
1857  l = utf8_to_uv(s, &alen);
1858  s += alen; len--;
1859  UNPACK_PUSH(ULONG2NUM(l));
1860  }
1861  break;
1862 
1863  case 'u':
1864  {
1865  VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1866  char *ptr = RSTRING_PTR(buf);
1867  long total = 0;
1868 
1869  while (s < send && *s > ' ' && *s < 'a') {
1870  long a,b,c,d;
1871  char hunk[4];
1872 
1873  hunk[3] = '\0';
1874  len = (*s++ - ' ') & 077;
1875  total += len;
1876  if (total > RSTRING_LEN(buf)) {
1877  len -= total - RSTRING_LEN(buf);
1878  total = RSTRING_LEN(buf);
1879  }
1880 
1881  while (len > 0) {
1882  long mlen = len > 3 ? 3 : len;
1883 
1884  if (s < send && *s >= ' ')
1885  a = (*s++ - ' ') & 077;
1886  else
1887  a = 0;
1888  if (s < send && *s >= ' ')
1889  b = (*s++ - ' ') & 077;
1890  else
1891  b = 0;
1892  if (s < send && *s >= ' ')
1893  c = (*s++ - ' ') & 077;
1894  else
1895  c = 0;
1896  if (s < send && *s >= ' ')
1897  d = (*s++ - ' ') & 077;
1898  else
1899  d = 0;
1900  hunk[0] = (char)(a << 2 | b >> 4);
1901  hunk[1] = (char)(b << 4 | c >> 2);
1902  hunk[2] = (char)(c << 6 | d);
1903  memcpy(ptr, hunk, mlen);
1904  ptr += mlen;
1905  len -= mlen;
1906  }
1907  if (*s == '\r') s++;
1908  if (*s == '\n') s++;
1909  else if (s < send && (s+1 == send || s[1] == '\n'))
1910  s += 2; /* possible checksum byte */
1911  }
1912 
1913  rb_str_set_len(buf, total);
1914  UNPACK_PUSH(buf);
1915  }
1916  break;
1917 
1918  case 'm':
1919  {
1920  VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1921  char *ptr = RSTRING_PTR(buf);
1922  int a = -1,b = -1,c = 0,d = 0;
1923  static signed char b64_xtable[256];
1924 
1925  if (b64_xtable['/'] <= 0) {
1926  int i;
1927 
1928  for (i = 0; i < 256; i++) {
1929  b64_xtable[i] = -1;
1930  }
1931  for (i = 0; i < 64; i++) {
1932  b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1933  }
1934  }
1935  if (len == 0) {
1936  while (s < send) {
1937  a = b = c = d = -1;
1938  a = b64_xtable[(unsigned char)*s++];
1939  if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1940  b = b64_xtable[(unsigned char)*s++];
1941  if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1942  if (*s == '=') {
1943  if (s + 2 == send && *(s + 1) == '=') break;
1944  rb_raise(rb_eArgError, "invalid base64");
1945  }
1946  c = b64_xtable[(unsigned char)*s++];
1947  if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1948  if (s + 1 == send && *s == '=') break;
1949  d = b64_xtable[(unsigned char)*s++];
1950  if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1951  *ptr++ = castchar(a << 2 | b >> 4);
1952  *ptr++ = castchar(b << 4 | c >> 2);
1953  *ptr++ = castchar(c << 6 | d);
1954  }
1955  if (c == -1) {
1956  *ptr++ = castchar(a << 2 | b >> 4);
1957  if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1958  }
1959  else if (d == -1) {
1960  *ptr++ = castchar(a << 2 | b >> 4);
1961  *ptr++ = castchar(b << 4 | c >> 2);
1962  if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1963  }
1964  }
1965  else {
1966  while (s < send) {
1967  a = b = c = d = -1;
1968  while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1969  if (s >= send) break;
1970  s++;
1971  while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1972  if (s >= send) break;
1973  s++;
1974  while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1975  if (*s == '=' || s >= send) break;
1976  s++;
1977  while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1978  if (*s == '=' || s >= send) break;
1979  s++;
1980  *ptr++ = castchar(a << 2 | b >> 4);
1981  *ptr++ = castchar(b << 4 | c >> 2);
1982  *ptr++ = castchar(c << 6 | d);
1983  }
1984  if (a != -1 && b != -1) {
1985  if (c == -1 && *s == '=')
1986  *ptr++ = castchar(a << 2 | b >> 4);
1987  else if (c != -1 && *s == '=') {
1988  *ptr++ = castchar(a << 2 | b >> 4);
1989  *ptr++ = castchar(b << 4 | c >> 2);
1990  }
1991  }
1992  }
1993  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1994  UNPACK_PUSH(buf);
1995  }
1996  break;
1997 
1998  case 'M':
1999  {
2000  VALUE buf = infected_str_new(0, send - s, str);
2001  char *ptr = RSTRING_PTR(buf), *ss = s;
2002  int c1, c2;
2003 
2004  while (s < send) {
2005  if (*s == '=') {
2006  if (++s == send) break;
2007  if (s+1 < send && *s == '\r' && *(s+1) == '\n')
2008  s++;
2009  if (*s != '\n') {
2010  if ((c1 = hex2num(*s)) == -1) break;
2011  if (++s == send) break;
2012  if ((c2 = hex2num(*s)) == -1) break;
2013  *ptr++ = castchar(c1 << 4 | c2);
2014  }
2015  }
2016  else {
2017  *ptr++ = *s;
2018  }
2019  s++;
2020  ss = s;
2021  }
2022  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
2023  rb_str_buf_cat(buf, ss, send-ss);
2025  UNPACK_PUSH(buf);
2026  }
2027  break;
2028 
2029  case '@':
2030  if (len > RSTRING_LEN(str))
2031  rb_raise(rb_eArgError, "@ outside of string");
2032  s = RSTRING_PTR(str) + len;
2033  break;
2034 
2035  case 'X':
2036  if (len > s - RSTRING_PTR(str))
2037  rb_raise(rb_eArgError, "X outside of string");
2038  s -= len;
2039  break;
2040 
2041  case 'x':
2042  if (len > send - s)
2043  rb_raise(rb_eArgError, "x outside of string");
2044  s += len;
2045  break;
2046 
2047  case 'P':
2048  if (sizeof(char *) <= (size_t)(send - s)) {
2049  VALUE tmp = Qnil;
2050  char *t;
2051 
2052  memcpy(&t, s, sizeof(char *));
2053  s += sizeof(char *);
2054 
2055  if (t) {
2056  VALUE a, *p, *pend;
2057 
2058  if (!(a = rb_str_associated(str))) {
2059  rb_raise(rb_eArgError, "no associated pointer");
2060  }
2061  p = RARRAY_PTR(a);
2062  pend = p + RARRAY_LEN(a);
2063  while (p < pend) {
2064  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2065  if (len < RSTRING_LEN(*p)) {
2066  tmp = rb_tainted_str_new(t, len);
2067  rb_str_associate(tmp, a);
2068  }
2069  else {
2070  tmp = *p;
2071  }
2072  break;
2073  }
2074  p++;
2075  }
2076  if (p == pend) {
2077  rb_raise(rb_eArgError, "non associated pointer");
2078  }
2079  }
2080  UNPACK_PUSH(tmp);
2081  }
2082  break;
2083 
2084  case 'p':
2085  if (len > (long)((send - s) / sizeof(char *)))
2086  len = (send - s) / sizeof(char *);
2087  while (len-- > 0) {
2088  if ((size_t)(send - s) < sizeof(char *))
2089  break;
2090  else {
2091  VALUE tmp = Qnil;
2092  char *t;
2093 
2094  memcpy(&t, s, sizeof(char *));
2095  s += sizeof(char *);
2096 
2097  if (t) {
2098  VALUE a, *p, *pend;
2099 
2100  if (!(a = rb_str_associated(str))) {
2101  rb_raise(rb_eArgError, "no associated pointer");
2102  }
2103  p = RARRAY_PTR(a);
2104  pend = p + RARRAY_LEN(a);
2105  while (p < pend) {
2106  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2107  tmp = *p;
2108  break;
2109  }
2110  p++;
2111  }
2112  if (p == pend) {
2113  rb_raise(rb_eArgError, "non associated pointer");
2114  }
2115  }
2116  UNPACK_PUSH(tmp);
2117  }
2118  }
2119  break;
2120 
2121  case 'w':
2122  {
2123  unsigned long ul = 0;
2124  unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
2125 
2126  while (len > 0 && s < send) {
2127  ul <<= 7;
2128  ul |= (*s & 0x7f);
2129  if (!(*s++ & 0x80)) {
2130  UNPACK_PUSH(ULONG2NUM(ul));
2131  len--;
2132  ul = 0;
2133  }
2134  else if (ul & ulmask) {
2135  VALUE big = rb_uint2big(ul);
2136  VALUE big128 = rb_uint2big(128);
2137  while (s < send) {
2138  big = rb_big_mul(big, big128);
2139  big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
2140  if (!(*s++ & 0x80)) {
2141  UNPACK_PUSH(big);
2142  len--;
2143  ul = 0;
2144  break;
2145  }
2146  }
2147  }
2148  }
2149  }
2150  break;
2151 
2152  default:
2153  rb_warning("unknown unpack directive '%c' in '%s'",
2154  type, RSTRING_PTR(fmt));
2155  break;
2156  }
2157  }
2158 
2159  return ary;
2160 }
2161 
2162 #define BYTEWIDTH 8
2163 
2164 int
2165 rb_uv_to_utf8(char buf[6], unsigned long uv)
2166 {
2167  if (uv <= 0x7f) {
2168  buf[0] = (char)uv;
2169  return 1;
2170  }
2171  if (uv <= 0x7ff) {
2172  buf[0] = castchar(((uv>>6)&0xff)|0xc0);
2173  buf[1] = castchar((uv&0x3f)|0x80);
2174  return 2;
2175  }
2176  if (uv <= 0xffff) {
2177  buf[0] = castchar(((uv>>12)&0xff)|0xe0);
2178  buf[1] = castchar(((uv>>6)&0x3f)|0x80);
2179  buf[2] = castchar((uv&0x3f)|0x80);
2180  return 3;
2181  }
2182  if (uv <= 0x1fffff) {
2183  buf[0] = castchar(((uv>>18)&0xff)|0xf0);
2184  buf[1] = castchar(((uv>>12)&0x3f)|0x80);
2185  buf[2] = castchar(((uv>>6)&0x3f)|0x80);
2186  buf[3] = castchar((uv&0x3f)|0x80);
2187  return 4;
2188  }
2189  if (uv <= 0x3ffffff) {
2190  buf[0] = castchar(((uv>>24)&0xff)|0xf8);
2191  buf[1] = castchar(((uv>>18)&0x3f)|0x80);
2192  buf[2] = castchar(((uv>>12)&0x3f)|0x80);
2193  buf[3] = castchar(((uv>>6)&0x3f)|0x80);
2194  buf[4] = castchar((uv&0x3f)|0x80);
2195  return 5;
2196  }
2197  if (uv <= 0x7fffffff) {
2198  buf[0] = castchar(((uv>>30)&0xff)|0xfc);
2199  buf[1] = castchar(((uv>>24)&0x3f)|0x80);
2200  buf[2] = castchar(((uv>>18)&0x3f)|0x80);
2201  buf[3] = castchar(((uv>>12)&0x3f)|0x80);
2202  buf[4] = castchar(((uv>>6)&0x3f)|0x80);
2203  buf[5] = castchar((uv&0x3f)|0x80);
2204  return 6;
2205  }
2206  rb_raise(rb_eRangeError, "pack(U): value out of range");
2207 
2208  UNREACHABLE;
2209 }
2210 
2211 static const unsigned long utf8_limits[] = {
2212  0x0, /* 1 */
2213  0x80, /* 2 */
2214  0x800, /* 3 */
2215  0x10000, /* 4 */
2216  0x200000, /* 5 */
2217  0x4000000, /* 6 */
2218  0x80000000, /* 7 */
2219 };
2220 
2221 static unsigned long
2222 utf8_to_uv(const char *p, long *lenp)
2223 {
2224  int c = *p++ & 0xff;
2225  unsigned long uv = c;
2226  long n;
2227 
2228  if (!(uv & 0x80)) {
2229  *lenp = 1;
2230  return uv;
2231  }
2232  if (!(uv & 0x40)) {
2233  *lenp = 1;
2234  rb_raise(rb_eArgError, "malformed UTF-8 character");
2235  }
2236 
2237  if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
2238  else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
2239  else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
2240  else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
2241  else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
2242  else {
2243  *lenp = 1;
2244  rb_raise(rb_eArgError, "malformed UTF-8 character");
2245  }
2246  if (n > *lenp) {
2247  rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2248  n, *lenp);
2249  }
2250  *lenp = n--;
2251  if (n != 0) {
2252  while (n--) {
2253  c = *p++ & 0xff;
2254  if ((c & 0xc0) != 0x80) {
2255  *lenp -= n + 1;
2256  rb_raise(rb_eArgError, "malformed UTF-8 character");
2257  }
2258  else {
2259  c &= 0x3f;
2260  uv = uv << 6 | c;
2261  }
2262  }
2263  }
2264  n = *lenp - 1;
2265  if (uv < utf8_limits[n]) {
2266  rb_raise(rb_eArgError, "redundant UTF-8 sequence");
2267  }
2268  return uv;
2269 }
2270 
2271 void
2273 {
2274  rb_define_method(rb_cArray, "pack", pack_pack, 1);
2275  rb_define_method(rb_cString, "unpack", pack_unpack, 1);
2276 }
RUBY_EXTERN VALUE rb_cString
Definition: ruby.h:1456
#define MAX_INTEGER_PACK_SIZE
Definition: pack.c:254
#define RARRAY_LEN(a)
Definition: ruby.h:899
void rb_bug(const char *fmt,...)
Definition: error.c:290
VALUE rb_uint2big(VALUE n)
Definition: bignum.c:288
#define swap32(x)
Definition: pack.c:95
#define INT2NUM(x)
Definition: ruby.h:1178
#define BIGENDIAN_P()
Definition: pack.c:46
int i
Definition: win32ole.c:784
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:73
#define NATINT_LEN(type, len)
Definition: pack.c:50
#define NUM2INT(x)
Definition: ruby.h:622
#define HTONF(x, y)
#define THISFROM
void rb_big_pack(VALUE val, unsigned long *buf, long num_longs)
Definition: bignum.c:369
#define PACK_ITEM_ADJUST()
Definition: pack.c:1192
VALUE rb_big_plus(VALUE x, VALUE y)
Definition: bignum.c:2031
#define HTOND(x, y)
VALUE rb_eTypeError
Definition: error.c:511
#define UNREACHABLE
Definition: ruby.h:40
#define castchar(from)
#define ULONG2NUM(x)
Definition: ruby.h:1209
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
int rb_usascii_encindex(void)
Definition: encoding.c:1192
void rb_str_set_len(VALUE, long)
Definition: string.c:1837
VALUE rb_to_int(VALUE)
Definition: object.c:2431
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1780
VALUE rb_str_associated(VALUE)
Definition: string.c:1453
#define VTOHD(x, y)
static const char uu_table[]
Definition: pack.c:1058
static const char toofew[]
Definition: pack.c:257
#define DOUBLE_CONVWITH(y)
#define NEXTFROM
#define ISDIGIT(c)
VALUE rb_big_unpack(unsigned long *buf, long num_longs)
Definition: bignum.c:411
#define FIXNUM_P(f)
Definition: ruby.h:355
#define ENC_CODERANGE_7BIT
Definition: encoding.h:58
VALUE rb_eRangeError
Definition: error.c:515
const char * rb_obj_classname(VALUE)
Definition: variable.c:396
VALUE rb_str_buf_cat(VALUE, const char *, long)
Definition: string.c:1947
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define HTOVD(x, y)
#define ISALPHA(c)
Definition: ruby.h:1636
#define NTOHF(x, y)
static VALUE pack_unpack(VALUE str, VALUE fmt)
Definition: pack.c:1315
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1537
VALUE rb_big2ulong_pack(VALUE x)
Definition: bignum.c:1215
VALUE rb_big_divmod(VALUE x, VALUE y)
Definition: bignum.c:3010
#define le(x, y)
Definition: time.c:69
unsigned long long uint64_t
Definition: sha2.h:102
static void encodes(VALUE, const char *, long, int, int)
Definition: pack.c:1064
int rb_block_given_p(void)
Definition: eval.c:672
VALUE rb_obj_taint(VALUE)
Definition: object.c:878
VALUE rb_eRuntimeError
Definition: error.c:510
VALUE rb_obj_as_string(VALUE)
Definition: string.c:895
static VALUE infected_str_new(const char *ptr, long len, VALUE str)
Definition: pack.c:1198
VALUE rb_ary_new(void)
Definition: array.c:424
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1162
#define UINT2NUM(x)
Definition: ruby.h:1188
#define STRTOUL(str, endptr, base)
Definition: ruby.h:1649
#define NIL_P(v)
Definition: ruby.h:446
#define define_swapx(x, xtype)
Definition: pack.c:63
static void qpencode(VALUE, VALUE, long)
Definition: pack.c:1112
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:741
void rb_str_associate(VALUE, VALUE)
Definition: string.c:1422
#define T_BIGNUM
Definition: ruby.h:495
#define HTOVF(x, y)
#define ENC_CODERANGE_VALID
Definition: encoding.h:59
static const char hex_table[]
Definition: pack.c:1109
#define EOF
Definition: vsnprintf.c:207
#define NTOHD(x, y)
#define RSTRING_LEN(str)
Definition: ruby.h:862
int errno
#define VTOHF(x, y)
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define Qnil
Definition: ruby.h:435
int type
Definition: tcltklib.c:111
static int hex2num(char c)
Definition: pack.c:1165
unsigned long VALUE
Definition: ruby.h:104
VALUE rb_big_mul(VALUE x, VALUE y)
Definition: bignum.c:2660
char * strchr(char *, char)
int rb_utf8_encindex(void)
Definition: encoding.c:1177
#define FLOAT_CONVWITH(y)
unsigned int uint32_t
Definition: sha2.h:101
static VALUE pack_pack(VALUE ary, VALUE fmt)
Definition: pack.c:369
#define RSTRING_PTR(str)
Definition: ruby.h:866
static unsigned long utf8_to_uv(const char *, long *)
Definition: pack.c:2222
static const unsigned long utf8_limits[]
Definition: pack.c:2211
#define RFLOAT_VALUE(v)
Definition: ruby.h:836
#define f
#define INT2FIX(i)
Definition: ruby.h:241
#define RARRAY_PTR(a)
Definition: ruby.h:904
#define T_STRING
Definition: ruby.h:490
#define OBJ_INFECT(x, s)
Definition: ruby.h:1157
v
Definition: win32ole.c:798
VALUE rb_cArray
Definition: array.c:29
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Definition: pack.c:2165
#define PACK_LENGTH_ADJUST_SIZE(sz)
Definition: pack.c:1182
#define swap16(x)
Definition: pack.c:91
#define StringValuePtr(v)
Definition: ruby.h:547
static const char b64_table[]
Definition: pack.c:1060
void Init_pack(void)
Definition: pack.c:2272
void rb_warning(const char *fmt,...)
Definition: error.c:229
VALUE rb_tainted_str_new(const char *, long)
VALUE rb_str_buf_new(long)
Definition: string.c:777
VALUE rb_usascii_str_new(const char *, long)
Definition: string.c:431
#define FIX2LONG(x)
Definition: ruby.h:353
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1344
#define UNPACK_PUSH(item)
VALUE rb_eArgError
Definition: error.c:512
#define NUM2LONG(x)
Definition: ruby.h:592
#define DBL2NUM(dbl)
Definition: ruby.h:837
#define ISSPACE(c)
Definition: ruby.h:1632
#define StringValue(v)
Definition: ruby.h:546
VALUE rb_to_float(VALUE)
Definition: object.c:2692
VALUE rb_str_new(const char *, long)
Definition: string.c:425
#define SIGNED_VALUE
Definition: ruby.h:106