Ruby  2.0.0p648(2015-12-16revision53162)
nkf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3  * Copyright (c) 1996-2010, The nkf Project.
4  *
5  * This software is provided 'as-is', without any express or implied
6  * warranty. In no event will the authors be held liable for any damages
7  * arising from the use of this software.
8  *
9  * Permission is granted to anyone to use this software for any purpose,
10  * including commercial applications, and to alter it and redistribute it
11  * freely, subject to the following restrictions:
12  *
13  * 1. The origin of this software must not be misrepresented; you must not
14  * claim that you wrote the original software. If you use this software
15  * in a product, an acknowledgment in the product documentation would be
16  * appreciated but is not required.
17  *
18  * 2. Altered source versions must be plainly marked as such, and must not be
19  * misrepresented as being the original software.
20  *
21  * 3. This notice may not be removed or altered from any source distribution.
22  */
23 #define NKF_VERSION "2.1.3"
24 #define NKF_RELEASE_DATE "2012-11-22"
25 #define COPY_RIGHT \
26  "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27  "Copyright (C) 1996-2012, The nkf Project."
28 
29 #include "config.h"
30 #include "nkf.h"
31 #include "utf8tbl.h"
32 #ifdef __WIN32__
33 #include <windows.h>
34 #include <locale.h>
35 #endif
36 #if defined(__OS2__)
37 # define INCL_DOS
38 # define INCL_DOSERRORS
39 # include <os2.h>
40 #endif
41 #include <assert.h>
42 
43 
44 /* state of output_mode and input_mode
45 
46  c2 0 means ASCII
47  JIS_X_0201_1976_K
48  ISO_8859_1
49  JIS_X_0208
50  EOF all termination
51  c1 32bit data
52 
53  */
54 
55 /* MIME ENCODE */
56 
57 #define FIXED_MIME 7
58 #define STRICT_MIME 8
59 
60 /* byte order */
61 enum byte_order {
66 };
67 
68 /* ASCII CODE */
69 
70 #define BS 0x08
71 #define TAB 0x09
72 #define LF 0x0a
73 #define CR 0x0d
74 #define ESC 0x1b
75 #define SP 0x20
76 #define DEL 0x7f
77 #define SI 0x0f
78 #define SO 0x0e
79 #define SS2 0x8e
80 #define SS3 0x8f
81 #define CRLF 0x0D0A
82 
83 
84 /* encodings */
85 
124  JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125  /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126  /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127  /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128  JIS_X_0208 = 0x1168, /* @B */
129  JIS_X_0212 = 0x1159, /* D */
130  /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131  JIS_X_0213_2 = 0x1229, /* P */
132  JIS_X_0213_1 = 0x1233 /* Q */
133 };
134 
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
146 
147 typedef struct {
148  const char *name;
150  void (*oconv)(nkf_char c2, nkf_char c1);
152 
160 
161 typedef struct {
162  const int id;
163  const char *name;
165 } nkf_encoding;
166 
168  {ASCII, "US-ASCII", &NkfEncodingASCII},
169  {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170  {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171  {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172  {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173  {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174  {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175  {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176  {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177  {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178  {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179  {CP10001, "CP10001", &NkfEncodingShift_JIS},
180  {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181  {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182  {CP51932, "CP51932", &NkfEncodingEUC_JP},
183  {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184  {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185  {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186  {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187  {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188  {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189  {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190  {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191  {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192  {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193  {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194  {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195  {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196  {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197  {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198  {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199  {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200  {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201  {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202  {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203  {BINARY, "BINARY", &NkfEncodingASCII},
204  {-1, NULL, NULL}
205 };
206 
207 struct {
208  const char *name;
209  const int id;
211  {"US-ASCII", ASCII},
212  {"ASCII", ASCII},
213  {"646", ASCII},
214  {"ROMAN8", ASCII},
215  {"ISO-2022-JP", ISO_2022_JP},
216  {"ISO2022JP-CP932", CP50220},
217  {"CP50220", CP50220},
218  {"CP50221", CP50221},
219  {"CSISO2022JP", CP50221},
220  {"CP50222", CP50222},
221  {"ISO-2022-JP-1", ISO_2022_JP_1},
222  {"ISO-2022-JP-3", ISO_2022_JP_3},
223  {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224  {"SHIFT_JIS", SHIFT_JIS},
225  {"SJIS", SHIFT_JIS},
226  {"MS_Kanji", SHIFT_JIS},
227  {"PCK", SHIFT_JIS},
228  {"WINDOWS-31J", WINDOWS_31J},
229  {"CSWINDOWS31J", WINDOWS_31J},
230  {"CP932", WINDOWS_31J},
231  {"MS932", WINDOWS_31J},
232  {"CP10001", CP10001},
233  {"EUCJP", EUC_JP},
234  {"EUC-JP", EUC_JP},
235  {"EUCJP-NKF", EUCJP_NKF},
236  {"CP51932", CP51932},
237  {"EUC-JP-MS", EUCJP_MS},
238  {"EUCJP-MS", EUCJP_MS},
239  {"EUCJPMS", EUCJP_MS},
240  {"EUC-JP-ASCII", EUCJP_ASCII},
241  {"EUCJP-ASCII", EUCJP_ASCII},
242  {"SHIFT_JISX0213", SHIFT_JISX0213},
243  {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244  {"EUC-JISX0213", EUC_JISX0213},
245  {"EUC-JIS-2004", EUC_JIS_2004},
246  {"UTF-8", UTF_8},
247  {"UTF-8N", UTF_8N},
248  {"UTF-8-BOM", UTF_8_BOM},
249  {"UTF8-MAC", UTF8_MAC},
250  {"UTF-8-MAC", UTF8_MAC},
251  {"UTF-16", UTF_16},
252  {"UTF-16BE", UTF_16BE},
253  {"UTF-16BE-BOM", UTF_16BE_BOM},
254  {"UTF-16LE", UTF_16LE},
255  {"UTF-16LE-BOM", UTF_16LE_BOM},
256  {"UTF-32", UTF_32},
257  {"UTF-32BE", UTF_32BE},
258  {"UTF-32BE-BOM", UTF_32BE_BOM},
259  {"UTF-32LE", UTF_32LE},
260  {"UTF-32LE-BOM", UTF_32LE_BOM},
261  {"BINARY", BINARY},
262  {NULL, -1}
263 };
264 
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
275 #endif
276 
277 
278 #define is_alnum(c) \
279  (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280 
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293  ('A'<=c&&c<='F') ? (c-'A'+10) : \
294  ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298  ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299  && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300 
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303 
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
307 #else
308 #define IOBUF_SIZE 16384
309 #endif
310 
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
313 
314 
315 #define GETA1 0x22
316 #define GETA2 0x2e
317 
318 
319 /* MIME preprocessor */
320 
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
323 #endif
324 
325 struct input_code{
326  const char *name;
331  void (*status_func)(struct input_code *, nkf_char);
334 };
335 
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
339 
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341 /* UCS Mapping
342  * 0: Shift_JIS, eucJP-ascii
343  * 1: eucJP-ms
344  * 2: CP932, CP51932
345  * 3: CP10001
346  */
347 #define UCS_MAP_ASCII 0
348 #define UCS_MAP_MS 1
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
352 #endif
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static int no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
359 static int input_bom_f = FALSE;
360 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
361 static void (*encode_fallback)(nkf_char c) = NULL;
362 static void w_status(struct input_code *, nkf_char);
363 #endif
364 #ifdef UTF8_OUTPUT_ENABLE
365 static int output_bom_f = FALSE;
367 #endif
368 
369 static void std_putc(nkf_char c);
370 static nkf_char std_getc(FILE *f);
371 static nkf_char std_ungetc(nkf_char c,FILE *f);
372 
373 static nkf_char broken_getc(FILE *f);
375 
376 static nkf_char mime_getc(FILE *f);
377 
378 static void mime_putc(nkf_char c);
379 
380 /* buffers */
381 
382 #if !defined(PERL_XS) && !defined(WIN32DLL)
383 static unsigned char stdibuf[IOBUF_SIZE];
384 static unsigned char stdobuf[IOBUF_SIZE];
385 #endif
386 
387 #define NKF_UNSPECIFIED (-TRUE)
388 
389 /* flags */
390 static int unbuf_f = FALSE;
391 static int estab_f = FALSE;
392 static int nop_f = FALSE;
393 static int binmode_f = TRUE; /* binary mode */
394 static int rot_f = FALSE; /* rot14/43 mode */
395 static int hira_f = FALSE; /* hira/kata henkan */
396 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
397 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
398 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
399 static int mimebuf_f = FALSE; /* MIME buffered input */
400 static int broken_f = FALSE; /* convert ESC-less broken JIS */
401 static int iso8859_f = FALSE; /* ISO8859 through */
402 static int mimeout_f = FALSE; /* base64 mode */
403 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
404 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
405 
406 #ifdef UNICODE_NORMALIZATION
407 static int nfc_f = FALSE;
408 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
410 #endif
411 
412 #ifdef INPUT_OPTION
413 static int cap_f = FALSE;
414 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
416 
417 static int url_f = FALSE;
418 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
420 #endif
421 
422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
423 #define CLASS_MASK NKF_INT32_C(0xFF000000)
424 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433 
434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
435 
436 #ifdef NUMCHAR_OPTION
437 static int numchar_f = FALSE;
438 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
440 #endif
441 
442 #ifdef CHECK_OPTION
443 static int noout_f = FALSE;
444 static void no_putc(nkf_char c);
445 static int debug_f = FALSE;
446 static void debug(const char *str);
448 #endif
449 
450 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
451 static void set_input_codename(const char *codename);
452 
453 #ifdef EXEC_IO
454 static int exec_f = 0;
455 #endif
456 
457 #ifdef SHIFTJIS_CP932
458 /* invert IBM extended characters to others */
459 static int cp51932_f = FALSE;
460 
461 /* invert NEC-selected IBM extended characters to IBM extended characters */
462 static int cp932inv_f = TRUE;
463 
464 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
465 #endif /* SHIFTJIS_CP932 */
466 
467 static int x0212_f = FALSE;
468 static int x0213_f = FALSE;
469 
470 static unsigned char prefix_table[256];
471 
472 static void e_status(struct input_code *, nkf_char);
473 static void s_status(struct input_code *, nkf_char);
474 
476  {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477  {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478 #ifdef UTF8_INPUT_ENABLE
479  {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480  {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
481  {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
482 #endif
483  {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
484 };
485 
486 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487 static int base64_count = 0;
488 
489 /* X0208 -> ASCII converter */
490 
491 /* fold parameter */
492 static int f_line = 0; /* chars in line */
493 static int f_prev = 0;
494 static int fold_preserve_f = FALSE; /* preserve new lines */
495 static int fold_f = FALSE;
496 static int fold_len = 0;
497 
498 /* options */
499 static unsigned char kanji_intro = DEFAULT_J;
500 static unsigned char ascii_intro = DEFAULT_R;
501 
502 /* Folding */
503 
504 #define FOLD_MARGIN 10
505 #define DEFAULT_FOLD 60
506 
508 
509 /* process default */
510 
511 static nkf_char
513 {
514  fprintf(stderr,"nkf internal module connection failure.\n");
515  exit(EXIT_FAILURE);
516  return 0; /* LINT */
517 }
518 
519 static void
521 {
522  no_connection2(c2,c1,0);
523 }
524 
526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
527 
528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
535 
536 /* static redirections */
537 
538 static void (*o_putc)(nkf_char c) = std_putc;
539 
540 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
542 
543 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
545 
546 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
547 
548 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
550 
551 /* for strict mime */
552 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
554 
555 /* Global states */
556 static int output_mode = ASCII; /* output kanji mode */
557 static int input_mode = ASCII; /* input kanji mode */
558 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
559 
560 /* X0201 / X0208 conversion tables */
561 
562 /* X0201 kana conversion table */
563 /* 90-9F A0-DF */
564 static const unsigned char cv[]= {
565  0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566  0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567  0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568  0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569  0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570  0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571  0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572  0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573  0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574  0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575  0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576  0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577  0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578  0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579  0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580  0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581  0x00,0x00};
582 
583 
584 /* X0201 kana conversion table for daguten */
585 /* 90-9F A0-DF */
586 static const unsigned char dv[]= {
587  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591  0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592  0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593  0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594  0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595  0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596  0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597  0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598  0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603  0x00,0x00};
604 
605 /* X0201 kana conversion table for han-daguten */
606 /* 90-9F A0-DF */
607 static const unsigned char ev[]= {
608  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618  0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619  0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624  0x00,0x00};
625 
626 /* X0201 kana to X0213 conversion table for han-daguten */
627 /* 90-9F A0-DF */
628 static const unsigned char ev_x0213[]= {
629  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634  0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635  0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636  0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637  0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638  0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645  0x00,0x00};
646 
647 
648 /* X0208 kigou conversion table */
649 /* 0x8140 - 0x819e */
650 static const unsigned char fv[] = {
651 
652  0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653  0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654  0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655  0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656  0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657  0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658  0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659  0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660  0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662  0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
664 } ;
665 
666 
667 
668 static int option_mode = 0;
669 static int file_out_f = FALSE;
670 #ifdef OVERWRITE
671 static int overwrite_f = FALSE;
672 static int preserve_time_f = FALSE;
673 static int backup_f = FALSE;
674 static char *backup_suffix = "";
675 #endif
676 
677 static int eolmode_f = 0; /* CR, LF, CRLF */
678 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
679 static nkf_char prev_cr = 0; /* CR or 0 */
680 #ifdef EASYWIN /*Easy Win */
681 static int end_check;
682 #endif /*Easy Win */
683 
684 static void *
686 {
687  void *ptr;
688 
689  if (size == 0) size = 1;
690 
691  ptr = malloc(size);
692  if (ptr == NULL) {
693  perror("can't malloc");
694  exit(EXIT_FAILURE);
695  }
696 
697  return ptr;
698 }
699 
700 static void *
701 nkf_xrealloc(void *ptr, size_t size)
702 {
703  if (size == 0) size = 1;
704 
705  ptr = realloc(ptr, size);
706  if (ptr == NULL) {
707  perror("can't realloc");
708  exit(EXIT_FAILURE);
709  }
710 
711  return ptr;
712 }
713 
714 #define nkf_xfree(ptr) free(ptr)
715 
716 static int
717 nkf_str_caseeql(const char *src, const char *target)
718 {
719  int i;
720  for (i = 0; src[i] && target[i]; i++) {
721  if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
722  }
723  if (src[i] || target[i]) return FALSE;
724  else return TRUE;
725 }
726 
727 static nkf_encoding*
729 {
730  if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
731  return 0;
732  }
733  return &nkf_encoding_table[idx];
734 }
735 
736 static int
738 {
739  int i;
740  if (name[0] == 'X' && *(name+1) == '-') name += 2;
741  for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
742  if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
743  return encoding_name_to_id_table[i].id;
744  }
745  }
746  return -1;
747 }
748 
749 static nkf_encoding*
750 nkf_enc_find(const char *name)
751 {
752  int idx = -1;
753  idx = nkf_enc_find_index(name);
754  if (idx < 0) return 0;
755  return nkf_enc_from_index(idx);
756 }
757 
758 #define nkf_enc_name(enc) (enc)->name
759 #define nkf_enc_to_index(enc) (enc)->id
760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763 #define nkf_enc_asciicompat(enc) (\
764  nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765  nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766 #define nkf_enc_unicode_p(enc) (\
767  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770 #define nkf_enc_cp5022x_p(enc) (\
771  nkf_enc_to_index(enc) == CP50220 ||\
772  nkf_enc_to_index(enc) == CP50221 ||\
773  nkf_enc_to_index(enc) == CP50222)
774 
775 #ifdef DEFAULT_CODE_LOCALE
776 static const char*
778 {
779 #ifdef HAVE_LANGINFO_H
780  return nl_langinfo(CODESET);
781 #elif defined(__WIN32__)
782  static char buf[16];
783  sprintf(buf, "CP%d", GetACP());
784  return buf;
785 #elif defined(__OS2__)
786 # if defined(INT_IS_SHORT)
787  /* OS/2 1.x */
788  return NULL;
789 # else
790  /* OS/2 32bit */
791  static char buf[16];
792  ULONG ulCP[1], ulncp;
793  DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
794  if (ulCP[0] == 932 || ulCP[0] == 943)
795  strcpy(buf, "Shift_JIS");
796  else
797  sprintf(buf, "CP%lu", ulCP[0]);
798  return buf;
799 # endif
800 #endif
801  return NULL;
802 }
803 
804 static nkf_encoding*
806 {
807  nkf_encoding *enc = 0;
808  const char *encname = nkf_locale_charmap();
809  if (encname)
810  enc = nkf_enc_find(encname);
811  return enc;
812 }
813 #endif /* DEFAULT_CODE_LOCALE */
814 
815 static nkf_encoding*
817 {
818  return &nkf_encoding_table[UTF_8];
819 }
820 
821 static nkf_encoding*
823 {
824  nkf_encoding *enc = 0;
825 #ifdef DEFAULT_CODE_LOCALE
826  enc = nkf_locale_encoding();
827 #elif defined(DEFAULT_ENCIDX)
828  enc = nkf_enc_from_index(DEFAULT_ENCIDX);
829 #endif
830  if (!enc) enc = nkf_utf8_encoding();
831  return enc;
832 }
833 
834 typedef struct {
835  long capa;
836  long len;
838 } nkf_buf_t;
839 
840 static nkf_buf_t *
841 nkf_buf_new(int length)
842 {
843  nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
844  buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
845  buf->capa = length;
846  buf->len = 0;
847  return buf;
848 }
849 
850 #if 0
851 static void
852 nkf_buf_dispose(nkf_buf_t *buf)
853 {
854  nkf_xfree(buf->ptr);
855  nkf_xfree(buf);
856 }
857 #endif
858 
859 #define nkf_buf_length(buf) ((buf)->len)
860 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
861 
862 static nkf_char
864 {
865  assert(index <= buf->len);
866  return buf->ptr[index];
867 }
868 
869 static void
871 {
872  buf->len = 0;
873 }
874 
875 static void
877 {
878  if (buf->capa <= buf->len) {
879  exit(EXIT_FAILURE);
880  }
881  buf->ptr[buf->len++] = c;
882 }
883 
884 static nkf_char
886 {
887  assert(!nkf_buf_empty_p(buf));
888  return buf->ptr[--buf->len];
889 }
890 
891 /* Normalization Form C */
892 #ifndef PERL_XS
893 #ifdef WIN32DLL
894 #define fprintf dllprintf
895 #endif
896 
897 static void
898 version(void)
899 {
900  fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
901 }
902 
903 static void
904 usage(void)
905 {
906  fprintf(HELP_OUTPUT,
907  "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
908 #ifdef UTF8_OUTPUT_ENABLE
909  " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910  " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
911 #else
912 #endif
913 #ifdef UTF8_INPUT_ENABLE
914  " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915  " UTF option is -W[8,[16,32][B,L]]\n"
916 #else
917  " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
918 #endif
919  );
920  fprintf(HELP_OUTPUT,
921  " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922  " M[BQ] MIME encode [B:base64 Q:quoted]\n"
923  " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
924  );
925  fprintf(HELP_OUTPUT,
926  " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
927  " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
928  " 4: JISX0208 Katakana to JISX0201 Katakana\n"
929  " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
930  );
931  fprintf(HELP_OUTPUT,
932  " O Output to File (DEFAULT 'nkf.out')\n"
933  " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
934  );
935  fprintf(HELP_OUTPUT,
936  " --ic=<encoding> Specify the input encoding\n"
937  " --oc=<encoding> Specify the output encoding\n"
938  " --hiragana --katakana Hiragana/Katakana Conversion\n"
939  " --katakana-hiragana Converts each other\n"
940  );
941  fprintf(HELP_OUTPUT,
942 #ifdef INPUT_OPTION
943  " --{cap, url}-input Convert hex after ':' or '%%'\n"
944 #endif
945 #ifdef NUMCHAR_OPTION
946  " --numchar-input Convert Unicode Character Reference\n"
947 #endif
948 #ifdef UTF8_INPUT_ENABLE
949  " --fb-{skip, html, xml, perl, java, subchar}\n"
950  " Specify unassigned character's replacement\n"
951 #endif
952  );
953  fprintf(HELP_OUTPUT,
954 #ifdef OVERWRITE
955  " --in-place[=SUF] Overwrite original files\n"
956  " --overwrite[=SUF] Preserve timestamp of original files\n"
957 #endif
958  " -g --guess Guess the input code\n"
959  " -v --version Print the version\n"
960  " --help/-V Print this help / configuration\n"
961  );
962  version();
963 }
964 
965 static void
967 {
968  fprintf(HELP_OUTPUT,
969  "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
970  " Compile-time options:\n"
971  " Compiled at: " __DATE__ " " __TIME__ "\n"
972  );
973  fprintf(HELP_OUTPUT,
974  " Default output encoding: "
975 #ifdef DEFAULT_CODE_LOCALE
976  "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
977 #elif defined(DEFAULT_ENCIDX)
978  "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
979 #else
980  "NONE\n"
981 #endif
982  );
983  fprintf(HELP_OUTPUT,
984  " Default output end of line: "
985 #if DEFAULT_NEWLINE == CR
986  "CR"
987 #elif DEFAULT_NEWLINE == CRLF
988  "CRLF"
989 #else
990  "LF"
991 #endif
992  "\n"
993  " Decode MIME encoded string: "
995  "ON"
996 #else
997  "OFF"
998 #endif
999  "\n"
1000  " Convert JIS X 0201 Katakana: "
1001 #if X0201_DEFAULT
1002  "ON"
1003 #else
1004  "OFF"
1005 #endif
1006  "\n"
1007  " --help, --version output: "
1008 #if HELP_OUTPUT_HELP_OUTPUT
1009  "HELP_OUTPUT"
1010 #else
1011  "STDOUT"
1012 #endif
1013  "\n");
1014 }
1015 #endif /*PERL_XS*/
1016 
1017 #ifdef OVERWRITE
1018 static char*
1019 get_backup_filename(const char *suffix, const char *filename)
1020 {
1021  char *backup_filename;
1022  int asterisk_count = 0;
1023  int i, j;
1024  int filename_length = strlen(filename);
1025 
1026  for(i = 0; suffix[i]; i++){
1027  if(suffix[i] == '*') asterisk_count++;
1028  }
1029 
1030  if(asterisk_count){
1031  backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032  for(i = 0, j = 0; suffix[i];){
1033  if(suffix[i] == '*'){
1034  backup_filename[j] = '\0';
1035  strncat(backup_filename, filename, filename_length);
1036  i++;
1037  j += filename_length;
1038  }else{
1039  backup_filename[j++] = suffix[i++];
1040  }
1041  }
1042  backup_filename[j] = '\0';
1043  }else{
1044  j = filename_length + strlen(suffix);
1045  backup_filename = nkf_xmalloc(j + 1);
1046  strcpy(backup_filename, filename);
1047  strcat(backup_filename, suffix);
1048  backup_filename[j] = '\0';
1049  }
1050  return backup_filename;
1051 }
1052 #endif
1053 
1054 #ifdef UTF8_INPUT_ENABLE
1055 static void
1057 {
1058  int shift = 20;
1059  c &= VALUE_MASK;
1060  while(shift >= 0){
1061  if(c >= NKF_INT32_C(1)<<shift){
1062  while(shift >= 0){
1063  (*f)(0, bin2hex(c>>shift));
1064  shift -= 4;
1065  }
1066  }else{
1067  shift -= 4;
1068  }
1069  }
1070  return;
1071 }
1072 
1073 static void
1075 {
1076  (*oconv)(0, '&');
1077  (*oconv)(0, '#');
1078  c &= VALUE_MASK;
1079  if(c >= NKF_INT32_C(1000000))
1080  (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1081  if(c >= NKF_INT32_C(100000))
1082  (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1083  if(c >= 10000)
1084  (*oconv)(0, 0x30+(c/10000 )%10);
1085  if(c >= 1000)
1086  (*oconv)(0, 0x30+(c/1000 )%10);
1087  if(c >= 100)
1088  (*oconv)(0, 0x30+(c/100 )%10);
1089  if(c >= 10)
1090  (*oconv)(0, 0x30+(c/10 )%10);
1091  if(c >= 0)
1092  (*oconv)(0, 0x30+ c %10);
1093  (*oconv)(0, ';');
1094  return;
1095 }
1096 
1097 static void
1099 {
1100  (*oconv)(0, '&');
1101  (*oconv)(0, '#');
1102  (*oconv)(0, 'x');
1103  nkf_each_char_to_hex(oconv, c);
1104  (*oconv)(0, ';');
1105  return;
1106 }
1107 
1108 static void
1110 {
1111  (*oconv)(0, '\\');
1112  c &= VALUE_MASK;
1113  if(!nkf_char_unicode_bmp_p(c)){
1114  (*oconv)(0, 'U');
1115  (*oconv)(0, '0');
1116  (*oconv)(0, '0');
1117  (*oconv)(0, bin2hex(c>>20));
1118  (*oconv)(0, bin2hex(c>>16));
1119  }else{
1120  (*oconv)(0, 'u');
1121  }
1122  (*oconv)(0, bin2hex(c>>12));
1123  (*oconv)(0, bin2hex(c>> 8));
1124  (*oconv)(0, bin2hex(c>> 4));
1125  (*oconv)(0, bin2hex(c ));
1126  return;
1127 }
1128 
1129 static void
1131 {
1132  (*oconv)(0, '\\');
1133  (*oconv)(0, 'x');
1134  (*oconv)(0, '{');
1135  nkf_each_char_to_hex(oconv, c);
1136  (*oconv)(0, '}');
1137  return;
1138 }
1139 
1140 static void
1142 {
1143  c = unicode_subchar;
1144  (*oconv)((c>>8)&0xFF, c&0xFF);
1145  return;
1146 }
1147 #endif
1148 
1149 static const struct {
1150  const char *name;
1151  const char *alias;
1152 } long_option[] = {
1153  {"ic=", ""},
1154  {"oc=", ""},
1155  {"base64","jMB"},
1156  {"euc","e"},
1157  {"euc-input","E"},
1158  {"fj","jm"},
1159  {"help",""},
1160  {"jis","j"},
1161  {"jis-input","J"},
1162  {"mac","sLm"},
1163  {"mime","jM"},
1164  {"mime-input","m"},
1165  {"msdos","sLw"},
1166  {"sjis","s"},
1167  {"sjis-input","S"},
1168  {"unix","eLu"},
1169  {"version","v"},
1170  {"windows","sLw"},
1171  {"hiragana","h1"},
1172  {"katakana","h2"},
1173  {"katakana-hiragana","h3"},
1174  {"guess=", ""},
1175  {"guess", "g2"},
1176  {"cp932", ""},
1177  {"no-cp932", ""},
1178 #ifdef X0212_ENABLE
1179  {"x0212", ""},
1180 #endif
1181 #ifdef UTF8_OUTPUT_ENABLE
1182  {"utf8", "w"},
1183  {"utf16", "w16"},
1184  {"ms-ucs-map", ""},
1185  {"fb-skip", ""},
1186  {"fb-html", ""},
1187  {"fb-xml", ""},
1188  {"fb-perl", ""},
1189  {"fb-java", ""},
1190  {"fb-subchar", ""},
1191  {"fb-subchar=", ""},
1192 #endif
1193 #ifdef UTF8_INPUT_ENABLE
1194  {"utf8-input", "W"},
1195  {"utf16-input", "W16"},
1196  {"no-cp932ext", ""},
1197  {"no-best-fit-chars",""},
1198 #endif
1199 #ifdef UNICODE_NORMALIZATION
1200  {"utf8mac-input", ""},
1201 #endif
1202 #ifdef OVERWRITE
1203  {"overwrite", ""},
1204  {"overwrite=", ""},
1205  {"in-place", ""},
1206  {"in-place=", ""},
1207 #endif
1208 #ifdef INPUT_OPTION
1209  {"cap-input", ""},
1210  {"url-input", ""},
1211 #endif
1212 #ifdef NUMCHAR_OPTION
1213  {"numchar-input", ""},
1214 #endif
1215 #ifdef CHECK_OPTION
1216  {"no-output", ""},
1217  {"debug", ""},
1218 #endif
1219 #ifdef SHIFTJIS_CP932
1220  {"cp932inv", ""},
1221 #endif
1222 #ifdef EXEC_IO
1223  {"exec-in", ""},
1224  {"exec-out", ""},
1225 #endif
1226  {"prefix=", ""},
1227 };
1228 
1229 static void
1231 {
1232  switch (nkf_enc_to_index(enc)) {
1233  case ISO_8859_1:
1234  iso8859_f = TRUE;
1235  break;
1236  case CP50221:
1237  case CP50222:
1238  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1239  case CP50220:
1240 #ifdef SHIFTJIS_CP932
1241  cp51932_f = TRUE;
1242 #endif
1243 #ifdef UTF8_OUTPUT_ENABLE
1245 #endif
1246  break;
1247  case ISO_2022_JP_1:
1248  x0212_f = TRUE;
1249  break;
1250  case ISO_2022_JP_3:
1251  x0212_f = TRUE;
1252  x0213_f = TRUE;
1253  break;
1254  case ISO_2022_JP_2004:
1255  x0212_f = TRUE;
1256  x0213_f = TRUE;
1257  break;
1258  case SHIFT_JIS:
1259  break;
1260  case WINDOWS_31J:
1261  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1262 #ifdef SHIFTJIS_CP932
1263  cp51932_f = TRUE;
1264 #endif
1265 #ifdef UTF8_OUTPUT_ENABLE
1267 #endif
1268  break;
1269  break;
1270  case CP10001:
1271 #ifdef SHIFTJIS_CP932
1272  cp51932_f = TRUE;
1273 #endif
1274 #ifdef UTF8_OUTPUT_ENABLE
1276 #endif
1277  break;
1278  case EUC_JP:
1279  break;
1280  case EUCJP_NKF:
1281  break;
1282  case CP51932:
1283  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1284 #ifdef SHIFTJIS_CP932
1285  cp51932_f = TRUE;
1286 #endif
1287 #ifdef UTF8_OUTPUT_ENABLE
1289 #endif
1290  break;
1291  case EUCJP_MS:
1292  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1293 #ifdef SHIFTJIS_CP932
1294  cp51932_f = FALSE;
1295 #endif
1296 #ifdef UTF8_OUTPUT_ENABLE
1298 #endif
1299  break;
1300  case EUCJP_ASCII:
1301  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1302 #ifdef SHIFTJIS_CP932
1303  cp51932_f = FALSE;
1304 #endif
1305 #ifdef UTF8_OUTPUT_ENABLE
1307 #endif
1308  break;
1309  case SHIFT_JISX0213:
1310  case SHIFT_JIS_2004:
1311  x0213_f = TRUE;
1312 #ifdef SHIFTJIS_CP932
1313  cp51932_f = FALSE;
1314  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1315 #endif
1316  break;
1317  case EUC_JISX0213:
1318  case EUC_JIS_2004:
1319  x0213_f = TRUE;
1320 #ifdef SHIFTJIS_CP932
1321  cp51932_f = FALSE;
1322 #endif
1323  break;
1324 #ifdef UTF8_INPUT_ENABLE
1325 #ifdef UNICODE_NORMALIZATION
1326  case UTF8_MAC:
1327  nfc_f = TRUE;
1328  break;
1329 #endif
1330  case UTF_16:
1331  case UTF_16BE:
1332  case UTF_16BE_BOM:
1334  break;
1335  case UTF_16LE:
1336  case UTF_16LE_BOM:
1338  break;
1339  case UTF_32:
1340  case UTF_32BE:
1341  case UTF_32BE_BOM:
1343  break;
1344  case UTF_32LE:
1345  case UTF_32LE_BOM:
1347  break;
1348 #endif
1349  }
1350 }
1351 
1352 static void
1354 {
1355  switch (nkf_enc_to_index(enc)) {
1356  case CP50220:
1357 #ifdef SHIFTJIS_CP932
1358  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1359 #endif
1360 #ifdef UTF8_OUTPUT_ENABLE
1362 #endif
1363  break;
1364  case CP50221:
1365  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1366 #ifdef SHIFTJIS_CP932
1367  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1368 #endif
1369 #ifdef UTF8_OUTPUT_ENABLE
1371 #endif
1372  break;
1373  case ISO_2022_JP:
1374 #ifdef SHIFTJIS_CP932
1375  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1376 #endif
1377  break;
1378  case ISO_2022_JP_1:
1379  x0212_f = TRUE;
1380 #ifdef SHIFTJIS_CP932
1381  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1382 #endif
1383  break;
1384  case ISO_2022_JP_3:
1385  case ISO_2022_JP_2004:
1386  x0212_f = TRUE;
1387  x0213_f = TRUE;
1388 #ifdef SHIFTJIS_CP932
1389  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1390 #endif
1391  break;
1392  case SHIFT_JIS:
1393  break;
1394  case WINDOWS_31J:
1395  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1396 #ifdef UTF8_OUTPUT_ENABLE
1398 #endif
1399  break;
1400  case CP10001:
1401 #ifdef UTF8_OUTPUT_ENABLE
1403 #endif
1404  break;
1405  case EUC_JP:
1406  x0212_f = TRUE;
1407 #ifdef SHIFTJIS_CP932
1408  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1409 #endif
1410 #ifdef UTF8_OUTPUT_ENABLE
1412 #endif
1413  break;
1414  case EUCJP_NKF:
1415  x0212_f = FALSE;
1416 #ifdef SHIFTJIS_CP932
1417  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1418 #endif
1419 #ifdef UTF8_OUTPUT_ENABLE
1421 #endif
1422  break;
1423  case CP51932:
1424  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1425 #ifdef SHIFTJIS_CP932
1426  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1427 #endif
1428 #ifdef UTF8_OUTPUT_ENABLE
1430 #endif
1431  break;
1432  case EUCJP_MS:
1433  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1434  x0212_f = TRUE;
1435 #ifdef UTF8_OUTPUT_ENABLE
1437 #endif
1438  break;
1439  case EUCJP_ASCII:
1440  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1441  x0212_f = TRUE;
1442 #ifdef UTF8_OUTPUT_ENABLE
1444 #endif
1445  break;
1446  case SHIFT_JISX0213:
1447  case SHIFT_JIS_2004:
1448  x0213_f = TRUE;
1449 #ifdef SHIFTJIS_CP932
1450  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1451 #endif
1452  break;
1453  case EUC_JISX0213:
1454  case EUC_JIS_2004:
1455  x0212_f = TRUE;
1456  x0213_f = TRUE;
1457 #ifdef SHIFTJIS_CP932
1458  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1459 #endif
1460  break;
1461 #ifdef UTF8_OUTPUT_ENABLE
1462  case UTF_8_BOM:
1463  output_bom_f = TRUE;
1464  break;
1465  case UTF_16:
1466  case UTF_16BE_BOM:
1467  output_bom_f = TRUE;
1468  break;
1469  case UTF_16LE:
1471  output_bom_f = FALSE;
1472  break;
1473  case UTF_16LE_BOM:
1475  output_bom_f = TRUE;
1476  break;
1477  case UTF_32:
1478  case UTF_32BE_BOM:
1479  output_bom_f = TRUE;
1480  break;
1481  case UTF_32LE:
1483  output_bom_f = FALSE;
1484  break;
1485  case UTF_32LE_BOM:
1487  output_bom_f = TRUE;
1488  break;
1489 #endif
1490  }
1491 }
1492 
1493 static struct input_code*
1495 {
1496  if (iconv_func){
1497  struct input_code *p = input_code_list;
1498  while (p->name){
1499  if (iconv_func == p->iconv_func){
1500  return p;
1501  }
1502  p++;
1503  }
1504  }
1505  return 0;
1506 }
1507 
1508 static void
1510 {
1511 #ifdef INPUT_CODE_FIX
1512  if (f || !input_encoding)
1513 #endif
1514  if (estab_f != f){
1515  estab_f = f;
1516  }
1517 
1518  if (iconv_func
1519 #ifdef INPUT_CODE_FIX
1520  && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1521 #endif
1522  ){
1523  iconv = iconv_func;
1524  }
1525 #ifdef CHECK_OPTION
1526  if (estab_f && iconv_for_check != iconv){
1528  if (p){
1530  debug(p->name);
1531  }
1533  }
1534 #endif
1535 }
1536 
1537 #ifdef X0212_ENABLE
1538 static nkf_char
1540 {
1541  nkf_char ret = c;
1542  c &= 0x7f;
1543  if (is_eucg3(ret)){
1544  if (0x75 <= c && c <= 0x7f){
1545  ret = c + (0x109 - 0x75);
1546  }
1547  }else{
1548  if (0x75 <= c && c <= 0x7f){
1549  ret = c + (0x113 - 0x75);
1550  }
1551  }
1552  return ret;
1553 }
1554 
1555 
1556 static nkf_char
1558 {
1559  nkf_char ret = c;
1560  if (0x7f <= c && c <= 0x88){
1561  ret = c + (0x75 - 0x7f);
1562  }else if (0x89 <= c && c <= 0x92){
1563  ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1564  }
1565  return ret;
1566 }
1567 #endif /* X0212_ENABLE */
1568 
1569 static int
1571 {
1572  static const char x0213_2_table[] =
1573  {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1574  int ku = c1 - 0x20;
1575  if (ku <= 15)
1576  return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
1577  if (78 <= ku && ku <= 94)
1578  return 1;
1579  return 0;
1580 }
1581 
1582 static nkf_char
1584 {
1585  nkf_char ndx;
1586  if (is_eucg3(c2)){
1587  ndx = c2 & 0x7f;
1588  if (x0213_f && is_x0213_2_in_x0212(ndx)){
1589  if((0x21 <= ndx && ndx <= 0x2F)){
1590  if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1591  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1592  return 0;
1593  }else if(0x6E <= ndx && ndx <= 0x7E){
1594  if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1595  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1596  return 0;
1597  }
1598  return 1;
1599  }
1600 #ifdef X0212_ENABLE
1601  else if(nkf_isgraph(ndx)){
1602  nkf_char val = 0;
1603  const unsigned short *ptr;
1604  ptr = x0212_shiftjis[ndx - 0x21];
1605  if (ptr){
1606  val = ptr[(c1 & 0x7f) - 0x21];
1607  }
1608  if (val){
1609  c2 = val >> 8;
1610  c1 = val & 0xff;
1611  if (p2) *p2 = c2;
1612  if (p1) *p1 = c1;
1613  return 0;
1614  }
1615  c2 = x0212_shift(c2);
1616  }
1617 #endif /* X0212_ENABLE */
1618  }
1619  if(0x7F < c2) return 1;
1620  if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1621  if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1622  return 0;
1623 }
1624 
1625 static nkf_char
1627 {
1628 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1629  nkf_char val;
1630 #endif
1631  static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1632  if (0xFC < c1) return 1;
1633 #ifdef SHIFTJIS_CP932
1634  if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
1635  val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1636  if (val){
1637  c2 = val >> 8;
1638  c1 = val & 0xff;
1639  }
1640  }
1641  if (cp932inv_f
1642  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1643  val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1644  if (val){
1645  c2 = val >> 8;
1646  c1 = val & 0xff;
1647  }
1648  }
1649 #endif /* SHIFTJIS_CP932 */
1650 #ifdef X0212_ENABLE
1651  if (!x0213_f && is_ibmext_in_sjis(c2)){
1652  val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1653  if (val){
1654  if (val > 0x7FFF){
1655  c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1656  c1 = val & 0xff;
1657  }else{
1658  c2 = val >> 8;
1659  c1 = val & 0xff;
1660  }
1661  if (p2) *p2 = c2;
1662  if (p1) *p1 = c1;
1663  return 0;
1664  }
1665  }
1666 #endif
1667  if(c2 >= 0x80){
1668  if(x0213_f && c2 >= 0xF0){
1669  if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1670  c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1671  }else{ /* 78<=k<=94 */
1672  c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1673  if (0x9E < c1) c2++;
1674  }
1675  }else{
1676 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1677 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1678  c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1679  if (0x9E < c1) c2++;
1680  }
1681  if (c1 < 0x9F)
1682  c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1683  else {
1684  c1 = c1 - 0x7E;
1685  }
1686  }
1687 
1688 #ifdef X0212_ENABLE
1689  c2 = x0212_unshift(c2);
1690 #endif
1691  if (p2) *p2 = c2;
1692  if (p1) *p1 = c1;
1693  return 0;
1694 }
1695 
1696 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1697 static void
1699 {
1700  val &= VALUE_MASK;
1701  if (val < 0x80){
1702  *p1 = val;
1703  *p2 = 0;
1704  *p3 = 0;
1705  *p4 = 0;
1706  }else if (val < 0x800){
1707  *p1 = 0xc0 | (val >> 6);
1708  *p2 = 0x80 | (val & 0x3f);
1709  *p3 = 0;
1710  *p4 = 0;
1711  } else if (nkf_char_unicode_bmp_p(val)) {
1712  *p1 = 0xe0 | (val >> 12);
1713  *p2 = 0x80 | ((val >> 6) & 0x3f);
1714  *p3 = 0x80 | ( val & 0x3f);
1715  *p4 = 0;
1716  } else if (nkf_char_unicode_value_p(val)) {
1717  *p1 = 0xf0 | (val >> 18);
1718  *p2 = 0x80 | ((val >> 12) & 0x3f);
1719  *p3 = 0x80 | ((val >> 6) & 0x3f);
1720  *p4 = 0x80 | ( val & 0x3f);
1721  } else {
1722  *p1 = 0;
1723  *p2 = 0;
1724  *p3 = 0;
1725  *p4 = 0;
1726  }
1727 }
1728 
1729 static nkf_char
1731 {
1732  nkf_char wc;
1733  if (c1 <= 0x7F) {
1734  /* single byte */
1735  wc = c1;
1736  }
1737  else if (c1 <= 0xC1) {
1738  /* trail byte or invalid */
1739  return -1;
1740  }
1741  else if (c1 <= 0xDF) {
1742  /* 2 bytes */
1743  wc = (c1 & 0x1F) << 6;
1744  wc |= (c2 & 0x3F);
1745  }
1746  else if (c1 <= 0xEF) {
1747  /* 3 bytes */
1748  wc = (c1 & 0x0F) << 12;
1749  wc |= (c2 & 0x3F) << 6;
1750  wc |= (c3 & 0x3F);
1751  }
1752  else if (c2 <= 0xF4) {
1753  /* 4 bytes */
1754  wc = (c1 & 0x0F) << 18;
1755  wc |= (c2 & 0x3F) << 12;
1756  wc |= (c3 & 0x3F) << 6;
1757  wc |= (c4 & 0x3F);
1758  }
1759  else {
1760  return -1;
1761  }
1762  return wc;
1763 }
1764 #endif
1765 
1766 #ifdef UTF8_INPUT_ENABLE
1767 static int
1769  const unsigned short *const *pp, nkf_char psize,
1770  nkf_char *p2, nkf_char *p1)
1771 {
1772  nkf_char c2;
1773  const unsigned short *p;
1774  unsigned short val;
1775 
1776  if (pp == 0) return 1;
1777 
1778  c1 -= 0x80;
1779  if (c1 < 0 || psize <= c1) return 1;
1780  p = pp[c1];
1781  if (p == 0) return 1;
1782 
1783  c0 -= 0x80;
1784  if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1785  val = p[c0];
1786  if (val == 0) return 1;
1787  if (no_cp932ext_f && (
1788  (val>>8) == 0x2D || /* NEC special characters */
1789  val > NKF_INT32_C(0xF300) /* IBM extended characters */
1790  )) return 1;
1791 
1792  c2 = val >> 8;
1793  if (val > 0x7FFF){
1794  c2 &= 0x7f;
1795  c2 |= PREFIX_EUCG3;
1796  }
1797  if (c2 == SO) c2 = JIS_X_0201_1976_K;
1798  c1 = val & 0xFF;
1799  if (p2) *p2 = c2;
1800  if (p1) *p1 = c1;
1801  return 0;
1802 }
1803 
1804 static int
1806 {
1807  const unsigned short *const *pp;
1808  const unsigned short *const *const *ppp;
1809  static const char no_best_fit_chars_table_C2[] =
1810  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1811  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1812  1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1813  0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1814  static const char no_best_fit_chars_table_C2_ms[] =
1815  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1816  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1817  1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1818  0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1819  static const char no_best_fit_chars_table_932_C2[] =
1820  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1821  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1822  1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1823  0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1824  static const char no_best_fit_chars_table_932_C3[] =
1825  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1826  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1827  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1828  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1829  nkf_char ret = 0;
1830 
1831  if(c2 < 0x80){
1832  *p2 = 0;
1833  *p1 = c2;
1834  }else if(c2 < 0xe0){
1835  if(no_best_fit_chars_f){
1836  if(ms_ucs_map_f == UCS_MAP_CP932){
1837  switch(c2){
1838  case 0xC2:
1839  if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1840  break;
1841  case 0xC3:
1842  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1843  break;
1844  }
1845  }else if(!cp932inv_f){
1846  switch(c2){
1847  case 0xC2:
1848  if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1849  break;
1850  case 0xC3:
1851  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1852  break;
1853  }
1854  }else if(ms_ucs_map_f == UCS_MAP_MS){
1855  if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1856  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1857  switch(c2){
1858  case 0xC2:
1859  switch(c1){
1860  case 0xA2:
1861  case 0xA3:
1862  case 0xA5:
1863  case 0xA6:
1864  case 0xAC:
1865  case 0xAF:
1866  case 0xB8:
1867  return 1;
1868  }
1869  break;
1870  }
1871  }
1872  }
1873  pp =
1879  ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1880  }else if(c0 < 0xF0){
1881  if(no_best_fit_chars_f){
1882  if(ms_ucs_map_f == UCS_MAP_CP932){
1883  if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1884  }else if(ms_ucs_map_f == UCS_MAP_MS){
1885  switch(c2){
1886  case 0xE2:
1887  switch(c1){
1888  case 0x80:
1889  if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1890  break;
1891  case 0x88:
1892  if(c0 == 0x92) return 1;
1893  break;
1894  }
1895  break;
1896  case 0xE3:
1897  if(c1 == 0x80 || c0 == 0x9C) return 1;
1898  break;
1899  }
1900  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1901  switch(c2){
1902  case 0xE3:
1903  switch(c1){
1904  case 0x82:
1905  if(c0 == 0x94) return 1;
1906  break;
1907  case 0x83:
1908  if(c0 == 0xBB) return 1;
1909  break;
1910  }
1911  break;
1912  }
1913  }else{
1914  switch(c2){
1915  case 0xE2:
1916  switch(c1){
1917  case 0x80:
1918  if(c0 == 0x95) return 1;
1919  break;
1920  case 0x88:
1921  if(c0 == 0xA5) return 1;
1922  break;
1923  }
1924  break;
1925  case 0xEF:
1926  switch(c1){
1927  case 0xBC:
1928  if(c0 == 0x8D) return 1;
1929  break;
1930  case 0xBD:
1931  if(c0 == 0x9E && !cp932inv_f) return 1;
1932  break;
1933  case 0xBF:
1934  if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1935  break;
1936  }
1937  break;
1938  }
1939  }
1940  }
1941  ppp =
1947  ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1948  }else return -1;
1949 #ifdef SHIFTJIS_CP932
1950  if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1951  nkf_char s2, s1;
1952  if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1953  s2e_conv(s2, s1, p2, p1);
1954  }else{
1955  ret = 1;
1956  }
1957  }
1958 #endif
1959  return ret;
1960 }
1961 
1962 #ifdef UTF8_OUTPUT_ENABLE
1963 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1964  int i; \
1965  for (i = 0; i < size; i++) \
1966  if (tbl[i][0] == euc) { \
1967  low = tbl[i][2]; \
1968  break; \
1969  } \
1970  } while (0)
1971 
1972 static nkf_char
1974 {
1975  const unsigned short *p;
1976 
1977  if (c2 == JIS_X_0201_1976_K) {
1978  if (ms_ucs_map_f == UCS_MAP_CP10001) {
1979  switch (c1) {
1980  case 0x20:
1981  return 0xA0;
1982  case 0x7D:
1983  return 0xA9;
1984  }
1985  }
1986  p = euc_to_utf8_1byte;
1987 #ifdef X0212_ENABLE
1988  } else if (is_eucg3(c2)){
1989  if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1990  return 0xA6;
1991  }
1992  c2 = (c2&0x7f) - 0x21;
1993  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1994  p =
1997  else
1998  return 0;
1999 #endif
2000  } else {
2001  c2 &= 0x7f;
2002  c2 = (c2&0x7f) - 0x21;
2003  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2004  p =
2009  else
2010  return 0;
2011  }
2012  if (!p) return 0;
2013  c1 = (c1 & 0x7f) - 0x21;
2014  if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2015  nkf_char val = p[c1];
2016  if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2017  nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2018  nkf_char low = 0;
2019  if (p==x0212_to_utf8_2bytes_x0213[c2]) {
2020  X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
2021  } else {
2022  X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
2023  }
2024  if (!low) return 0;
2025  return UTF16_TO_UTF32(val, low);
2026  } else {
2027  return val;
2028  }
2029  }
2030  return 0;
2031 }
2032 
2033 static nkf_char
2035 {
2036  nkf_char euc;
2037  int i;
2038  for (i = 0; i < sizeof_x0213_combining_chars; i++)
2039  if (x0213_combining_chars[i] == comb)
2040  break;
2041  if (i >= sizeof_x0213_combining_chars)
2042  return 0;
2043  euc = (c2&0x7f)<<8 | (c1&0x7f);
2044  for (i = 0; i < sizeof_x0213_combining_table; i++)
2045  if (x0213_combining_table[i][0] == euc)
2046  return x0213_combining_table[i][1];
2047  return 0;
2048 }
2049 #endif
2050 
2051 static nkf_char
2053 {
2054  nkf_char ret = 0;
2055 
2056  if (!c1){
2057  *p2 = 0;
2058  *p1 = c2;
2059  }else if (0xc0 <= c2 && c2 <= 0xef) {
2060  ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2061 #ifdef NUMCHAR_OPTION
2062  if (ret > 0){
2063  if (p2) *p2 = 0;
2064  if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
2065  ret = 0;
2066  }
2067 #endif
2068  }
2069  return ret;
2070 }
2071 
2072 #ifdef UTF8_INPUT_ENABLE
2073 static nkf_char
2075 {
2076  nkf_char c1, c2, c3, c4;
2077  nkf_char ret = 0;
2078  val &= VALUE_MASK;
2079  if (val < 0x80) {
2080  *p2 = 0;
2081  *p1 = val;
2082  }
2083  else if (nkf_char_unicode_bmp_p(val)){
2084  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2085  ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2086  if (ret > 0){
2087  *p2 = 0;
2088  *p1 = nkf_char_unicode_new(val);
2089  ret = 0;
2090  }
2091  }
2092  else {
2093  int i;
2094  if (x0213_f) {
2095  c1 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2096  c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2097  for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2098  if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
2099  val = x0213_1_surrogate_table[i][0];
2100  *p2 = val >> 8;
2101  *p1 = val & 0xFF;
2102  return 0;
2103  }
2104  for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2105  if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
2106  val = x0213_2_surrogate_table[i][0];
2107  *p2 = PREFIX_EUCG3 | (val >> 8);
2108  *p1 = val & 0xFF;
2109  return 0;
2110  }
2111  }
2112  *p2 = 0;
2113  *p1 = nkf_char_unicode_new(val);
2114  }
2115  return ret;
2116 }
2117 #endif
2118 
2119 static nkf_char
2121 {
2122  if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2123  if (iso2022jp_f && !x0201_f) {
2124  c2 = GETA1; c1 = GETA2;
2125  } else {
2126  c2 = JIS_X_0201_1976_K;
2127  c1 &= 0x7f;
2128  }
2129 #ifdef X0212_ENABLE
2130  }else if (c2 == 0x8f){
2131  if (c0 == 0){
2132  return -1;
2133  }
2134  if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2135  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2136  c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2137  c2 = 0;
2138  } else {
2139  c2 = (c2 << 8) | (c1 & 0x7f);
2140  c1 = c0 & 0x7f;
2141 #ifdef SHIFTJIS_CP932
2142  if (cp51932_f){
2143  nkf_char s2, s1;
2144  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2145  s2e_conv(s2, s1, &c2, &c1);
2146  if (c2 < 0x100){
2147  c1 &= 0x7f;
2148  c2 &= 0x7f;
2149  }
2150  }
2151  }
2152 #endif /* SHIFTJIS_CP932 */
2153  }
2154 #endif /* X0212_ENABLE */
2155  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2156  /* NOP */
2157  } else {
2158  if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2159  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2160  c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2161  c2 = 0;
2162  } else {
2163  c1 &= 0x7f;
2164  c2 &= 0x7f;
2165 #ifdef SHIFTJIS_CP932
2166  if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2167  nkf_char s2, s1;
2168  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2169  s2e_conv(s2, s1, &c2, &c1);
2170  if (c2 < 0x100){
2171  c1 &= 0x7f;
2172  c2 &= 0x7f;
2173  }
2174  }
2175  }
2176 #endif /* SHIFTJIS_CP932 */
2177  }
2178  }
2179  (*oconv)(c2, c1);
2180  return 0;
2181 }
2182 
2183 static nkf_char
2185 {
2186  if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2187  if (iso2022jp_f && !x0201_f) {
2188  c2 = GETA1; c1 = GETA2;
2189  } else {
2190  c1 &= 0x7f;
2191  }
2192  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2193  /* NOP */
2194  } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2195  /* CP932 UDC */
2196  if(c1 == 0x7F) return 0;
2197  c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2198  c2 = 0;
2199  } else {
2200  nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2201  if (ret) return ret;
2202  }
2203  (*oconv)(c2, c1);
2204  return 0;
2205 }
2206 
2207 static int
2209 {
2210  int i;
2211  for (i = 0; i < sizeof_x0213_combining_table; i++) {
2212  if (x0213_combining_table[i][1] == wc) {
2213  return TRUE;
2214  }
2215  }
2216  return FALSE;
2217 }
2218 
2219 static int
2221 {
2222  int i;
2223  for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2224  if (x0213_combining_chars[i] == wc) {
2225  return TRUE;
2226  }
2227  }
2228  return FALSE;
2229 }
2230 
2231 static nkf_char
2233 {
2234  nkf_char ret = 0, c4 = 0;
2235  static const char w_iconv_utf8_1st_byte[] =
2236  { /* 0xC0 - 0xFF */
2237  20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2238  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2239  30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2240  40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2241 
2242  if (c3 > 0xFF) {
2243  c4 = c3 & 0xFF;
2244  c3 >>= 8;
2245  }
2246 
2247  if (c1 < 0 || 0xff < c1) {
2248  }else if (c1 == 0) { /* 0 : 1 byte*/
2249  c3 = 0;
2250  } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2251  return 0;
2252  } else{
2253  switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2254  case 21:
2255  if (c2 < 0x80 || 0xBF < c2) return 0;
2256  break;
2257  case 30:
2258  if (c3 == 0) return -1;
2259  if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2260  return 0;
2261  break;
2262  case 31:
2263  case 33:
2264  if (c3 == 0) return -1;
2265  if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2266  return 0;
2267  break;
2268  case 32:
2269  if (c3 == 0) return -1;
2270  if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2271  return 0;
2272  break;
2273  case 40:
2274  if (c3 == 0) return -2;
2275  if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2276  return 0;
2277  break;
2278  case 41:
2279  if (c3 == 0) return -2;
2280  if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2281  return 0;
2282  break;
2283  case 42:
2284  if (c3 == 0) return -2;
2285  if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2286  return 0;
2287  break;
2288  default:
2289  return 0;
2290  break;
2291  }
2292  }
2293  if (c1 == 0 || c1 == EOF){
2294  } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2295  c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2296  c1 = 0;
2297  } else {
2298  if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
2299  return -3;
2300  ret = w2e_conv(c1, c2, c3, &c1, &c2);
2301  }
2302  if (ret == 0){
2303  (*oconv)(c1, c2);
2304  }
2305  return ret;
2306 }
2307 
2308 static nkf_char
2310 {
2311  /* continue from the line below 'return -3;' in w_iconv() */
2312  nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
2313  if (ret == 0){
2314  (*oconv)(c1, c2);
2315  }
2316  return ret;
2317 }
2318 
2319 #define NKF_ICONV_INVALID_CODE_RANGE -13
2320 #define NKF_ICONV_WAIT_COMBINING_CHAR -14
2321 #define NKF_ICONV_NOT_COMBINED -15
2322 static size_t
2323 unicode_iconv(nkf_char wc, int nocombine)
2324 {
2325  nkf_char c1, c2;
2326  int ret = 0;
2327 
2328  if (wc < 0x80) {
2329  c2 = 0;
2330  c1 = wc;
2331  }else if ((wc>>11) == 27) {
2332  /* unpaired surrogate */
2334  }else if (wc < 0xFFFF) {
2335  if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
2337  ret = w16e_conv(wc, &c2, &c1);
2338  if (ret) return ret;
2339  }else if (wc < 0x10FFFF) {
2340  c2 = 0;
2341  c1 = nkf_char_unicode_new(wc);
2342  } else {
2344  }
2345  (*oconv)(c2, c1);
2346  return 0;
2347 }
2348 
2349 static nkf_char
2351 {
2352  nkf_char c1, c2;
2353  int i;
2354 
2355  if (wc2 < 0x80) {
2356  return NKF_ICONV_NOT_COMBINED;
2357  }else if ((wc2>>11) == 27) {
2358  /* unpaired surrogate */
2360  }else if (wc2 < 0xFFFF) {
2361  if (!x0213_combining_p(wc2))
2362  return NKF_ICONV_NOT_COMBINED;
2363  for (i = 0; i < sizeof_x0213_combining_table; i++) {
2364  if (x0213_combining_table[i][1] == wc &&
2365  x0213_combining_table[i][2] == wc2) {
2366  c2 = x0213_combining_table[i][0] >> 8;
2367  c1 = x0213_combining_table[i][0] & 0x7f;
2368  (*oconv)(c2, c1);
2369  return 0;
2370  }
2371  }
2372  }else if (wc2 < 0x10FFFF) {
2373  return NKF_ICONV_NOT_COMBINED;
2374  } else {
2376  }
2377  return NKF_ICONV_NOT_COMBINED;
2378 }
2379 
2380 static nkf_char
2382 {
2383  nkf_char wc, wc2;
2384  wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
2385  wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
2386  if (wc2 < 0)
2387  return wc2;
2388  return unicode_iconv_combine(wc, wc2);
2389 }
2390 
2391 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2392 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2393 static size_t
2395 {
2396  nkf_char wc;
2397 
2398  if (c1 == EOF) {
2399  (*oconv)(EOF, 0);
2400  return 0;
2401  }
2402 
2403  if (input_endian == ENDIAN_BIG) {
2404  if (0xD8 <= c1 && c1 <= 0xDB) {
2405  if (0xDC <= c3 && c3 <= 0xDF) {
2406  wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2407  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2408  } else {
2409  wc = c1 << 8 | c2;
2410  }
2411  } else {
2412  if (0xD8 <= c2 && c2 <= 0xDB) {
2413  if (0xDC <= c4 && c4 <= 0xDF) {
2414  wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2415  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2416  } else {
2417  wc = c2 << 8 | c1;
2418  }
2419  }
2420 
2421  return (*unicode_iconv)(wc, FALSE);
2422 }
2423 
2424 static size_t
2426 {
2427  nkf_char wc, wc2;
2428 
2429  if (input_endian == ENDIAN_BIG) {
2430  if (0xD8 <= c3 && c3 <= 0xDB) {
2431  return NKF_ICONV_NOT_COMBINED;
2432  } else {
2433  wc = c1 << 8 | c2;
2434  wc2 = c3 << 8 | c4;
2435  }
2436  } else {
2437  if (0xD8 <= c2 && c2 <= 0xDB) {
2438  return NKF_ICONV_NOT_COMBINED;
2439  } else {
2440  wc = c2 << 8 | c1;
2441  wc2 = c4 << 8 | c3;
2442  }
2443  }
2444 
2445  return unicode_iconv_combine(wc, wc2);
2446 }
2447 
2448 static size_t
2450 {
2451  nkf_char wc;
2452  if (input_endian == ENDIAN_BIG)
2453  wc = c1 << 8 | c2;
2454  else
2455  wc = c2 << 8 | c1;
2456  return (*unicode_iconv)(wc, TRUE);
2457 }
2458 
2459 static nkf_char
2461 {
2462  (*oconv)(c2, c1);
2463  return 16; /* different from w_iconv32 */
2464 }
2465 
2466 static nkf_char
2468 {
2469  (*oconv)(c2, c1);
2470  return 32; /* different from w_iconv16 */
2471 }
2472 
2473 static nkf_char
2475 {
2476  nkf_char wc;
2477 
2478  switch(input_endian){
2479  case ENDIAN_BIG:
2480  wc = c2 << 16 | c3 << 8 | c4;
2481  break;
2482  case ENDIAN_LITTLE:
2483  wc = c3 << 16 | c2 << 8 | c1;
2484  break;
2485  case ENDIAN_2143:
2486  wc = c1 << 16 | c4 << 8 | c3;
2487  break;
2488  case ENDIAN_3412:
2489  wc = c4 << 16 | c1 << 8 | c2;
2490  break;
2491  default:
2493  }
2494  return wc;
2495 }
2496 
2497 static size_t
2499 {
2500  nkf_char wc;
2501 
2502  if (c1 == EOF) {
2503  (*oconv)(EOF, 0);
2504  return 0;
2505  }
2506 
2507  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2508  if (wc < 0)
2509  return wc;
2510 
2511  return (*unicode_iconv)(wc, FALSE);
2512 }
2513 
2514 static nkf_char
2516 {
2517  nkf_char wc, wc2;
2518 
2519  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2520  if (wc < 0)
2521  return wc;
2522  wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
2523  if (wc2 < 0)
2524  return wc2;
2525 
2526  return unicode_iconv_combine(wc, wc2);
2527 }
2528 
2529 static size_t
2531 {
2532  nkf_char wc;
2533 
2534  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2535  return (*unicode_iconv)(wc, TRUE);
2536 }
2537 #endif
2538 
2539 #define output_ascii_escape_sequence(mode) do { \
2540  if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2541  (*o_putc)(ESC); \
2542  (*o_putc)('('); \
2543  (*o_putc)(ascii_intro); \
2544  output_mode = mode; \
2545  } \
2546  } while (0)
2547 
2548 static void
2550 {
2551  if (output_mode == mode)
2552  return;
2553  switch(mode) {
2554  case ISO_8859_1:
2555  (*o_putc)(ESC);
2556  (*o_putc)('.');
2557  (*o_putc)('A');
2558  break;
2559  case JIS_X_0201_1976_K:
2560  (*o_putc)(ESC);
2561  (*o_putc)('(');
2562  (*o_putc)('I');
2563  break;
2564  case JIS_X_0208:
2565  (*o_putc)(ESC);
2566  (*o_putc)('$');
2567  (*o_putc)(kanji_intro);
2568  break;
2569  case JIS_X_0212:
2570  (*o_putc)(ESC);
2571  (*o_putc)('$');
2572  (*o_putc)('(');
2573  (*o_putc)('D');
2574  break;
2575  case JIS_X_0213_1:
2576  (*o_putc)(ESC);
2577  (*o_putc)('$');
2578  (*o_putc)('(');
2579  (*o_putc)('Q');
2580  break;
2581  case JIS_X_0213_2:
2582  (*o_putc)(ESC);
2583  (*o_putc)('$');
2584  (*o_putc)('(');
2585  (*o_putc)('P');
2586  break;
2587  }
2588  output_mode = mode;
2589 }
2590 
2591 static void
2593 {
2594 #ifdef NUMCHAR_OPTION
2595  if (c2 == 0 && nkf_char_unicode_p(c1)){
2596  w16e_conv(c1, &c2, &c1);
2597  if (c2 == 0 && nkf_char_unicode_p(c1)){
2598  c2 = c1 & VALUE_MASK;
2599  if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2600  /* CP5022x UDC */
2601  c1 &= 0xFFF;
2602  c2 = 0x7F + c1 / 94;
2603  c1 = 0x21 + c1 % 94;
2604  } else {
2605  if (encode_fallback) (*encode_fallback)(c1);
2606  return;
2607  }
2608  }
2609  }
2610 #endif
2611  if (c2 == 0) {
2613  (*o_putc)(c1);
2614  }
2615  else if (c2 == EOF) {
2617  (*o_putc)(EOF);
2618  }
2619  else if (c2 == ISO_8859_1) {
2621  (*o_putc)(c1|0x80);
2622  }
2623  else if (c2 == JIS_X_0201_1976_K) {
2625  (*o_putc)(c1);
2626 #ifdef X0212_ENABLE
2627  } else if (is_eucg3(c2)){
2629  (*o_putc)(c2 & 0x7f);
2630  (*o_putc)(c1);
2631 #endif
2632  } else {
2633  if(ms_ucs_map_f
2634  ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2635  : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2637  (*o_putc)(c2);
2638  (*o_putc)(c1);
2639  }
2640 }
2641 
2642 static void
2644 {
2645  if (c2 == 0 && nkf_char_unicode_p(c1)){
2646  w16e_conv(c1, &c2, &c1);
2647  if (c2 == 0 && nkf_char_unicode_p(c1)){
2648  c2 = c1 & VALUE_MASK;
2649  if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2650  /* eucJP-ms UDC */
2651  c1 &= 0xFFF;
2652  c2 = c1 / 94;
2653  c2 += c2 < 10 ? 0x75 : 0x8FEB;
2654  c1 = 0x21 + c1 % 94;
2655  if (is_eucg3(c2)){
2656  (*o_putc)(0x8f);
2657  (*o_putc)((c2 & 0x7f) | 0x080);
2658  (*o_putc)(c1 | 0x080);
2659  }else{
2660  (*o_putc)((c2 & 0x7f) | 0x080);
2661  (*o_putc)(c1 | 0x080);
2662  }
2663  return;
2664  } else {
2665  if (encode_fallback) (*encode_fallback)(c1);
2666  return;
2667  }
2668  }
2669  }
2670 
2671  if (c2 == EOF) {
2672  (*o_putc)(EOF);
2673  } else if (c2 == 0) {
2674  output_mode = ASCII;
2675  (*o_putc)(c1);
2676  } else if (c2 == JIS_X_0201_1976_K) {
2677  output_mode = EUC_JP;
2678  (*o_putc)(SS2); (*o_putc)(c1|0x80);
2679  } else if (c2 == ISO_8859_1) {
2681  (*o_putc)(c1 | 0x080);
2682 #ifdef X0212_ENABLE
2683  } else if (is_eucg3(c2)){
2684  output_mode = EUC_JP;
2685 #ifdef SHIFTJIS_CP932
2686  if (!cp932inv_f){
2687  nkf_char s2, s1;
2688  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2689  s2e_conv(s2, s1, &c2, &c1);
2690  }
2691  }
2692 #endif
2693  if (c2 == 0) {
2694  output_mode = ASCII;
2695  (*o_putc)(c1);
2696  }else if (is_eucg3(c2)){
2697  if (x0212_f){
2698  (*o_putc)(0x8f);
2699  (*o_putc)((c2 & 0x7f) | 0x080);
2700  (*o_putc)(c1 | 0x080);
2701  }
2702  }else{
2703  (*o_putc)((c2 & 0x7f) | 0x080);
2704  (*o_putc)(c1 | 0x080);
2705  }
2706 #endif
2707  } else {
2708  if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2709  set_iconv(FALSE, 0);
2710  return; /* too late to rescue this char */
2711  }
2712  output_mode = EUC_JP;
2713  (*o_putc)(c2 | 0x080);
2714  (*o_putc)(c1 | 0x080);
2715  }
2716 }
2717 
2718 static void
2720 {
2721 #ifdef NUMCHAR_OPTION
2722  if (c2 == 0 && nkf_char_unicode_p(c1)){
2723  w16e_conv(c1, &c2, &c1);
2724  if (c2 == 0 && nkf_char_unicode_p(c1)){
2725  c2 = c1 & VALUE_MASK;
2726  if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2727  /* CP932 UDC */
2728  c1 &= 0xFFF;
2729  c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2730  c1 = c1 % 188;
2731  c1 += 0x40 + (c1 > 0x3e);
2732  (*o_putc)(c2);
2733  (*o_putc)(c1);
2734  return;
2735  } else {
2736  if(encode_fallback)(*encode_fallback)(c1);
2737  return;
2738  }
2739  }
2740  }
2741 #endif
2742  if (c2 == EOF) {
2743  (*o_putc)(EOF);
2744  return;
2745  } else if (c2 == 0) {
2746  output_mode = ASCII;
2747  (*o_putc)(c1);
2748  } else if (c2 == JIS_X_0201_1976_K) {
2750  (*o_putc)(c1|0x80);
2751  } else if (c2 == ISO_8859_1) {
2753  (*o_putc)(c1 | 0x080);
2754 #ifdef X0212_ENABLE
2755  } else if (is_eucg3(c2)){
2757  if (e2s_conv(c2, c1, &c2, &c1) == 0){
2758  (*o_putc)(c2);
2759  (*o_putc)(c1);
2760  }
2761 #endif
2762  } else {
2763  if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2764  set_iconv(FALSE, 0);
2765  return; /* too late to rescue this char */
2766  }
2768  e2s_conv(c2, c1, &c2, &c1);
2769 
2770 #ifdef SHIFTJIS_CP932
2771  if (cp932inv_f
2772  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2773  nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2774  if (c){
2775  c2 = c >> 8;
2776  c1 = c & 0xff;
2777  }
2778  }
2779 #endif /* SHIFTJIS_CP932 */
2780 
2781  (*o_putc)(c2);
2782  if (prefix_table[(unsigned char)c1]){
2783  (*o_putc)(prefix_table[(unsigned char)c1]);
2784  }
2785  (*o_putc)(c1);
2786  }
2787 }
2788 
2789 #ifdef UTF8_OUTPUT_ENABLE
2790 #define OUTPUT_UTF8(val) do { \
2791  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
2792  (*o_putc)(c1); \
2793  if (c2) (*o_putc)(c2); \
2794  if (c3) (*o_putc)(c3); \
2795  if (c4) (*o_putc)(c4); \
2796  } while (0)
2797 
2798 static void
2800 {
2801  nkf_char c3, c4;
2802  nkf_char val, val2;
2803 
2804  if (output_bom_f) {
2805  output_bom_f = FALSE;
2806  (*o_putc)('\357');
2807  (*o_putc)('\273');
2808  (*o_putc)('\277');
2809  }
2810 
2811  if (c2 == EOF) {
2812  (*o_putc)(EOF);
2813  return;
2814  }
2815 
2816  if (c2 == 0 && nkf_char_unicode_p(c1)){
2817  val = c1 & VALUE_MASK;
2818  OUTPUT_UTF8(val);
2819  return;
2820  }
2821 
2822  if (c2 == 0) {
2823  (*o_putc)(c1);
2824  } else {
2825  val = e2w_conv(c2, c1);
2826  if (val){
2827  val2 = e2w_combining(val, c2, c1);
2828  if (val2)
2829  OUTPUT_UTF8(val2);
2830  OUTPUT_UTF8(val);
2831  }
2832  }
2833 }
2834 
2835 #define OUTPUT_UTF16_BYTES(c1, c2) do { \
2836  if (output_endian == ENDIAN_LITTLE){ \
2837  (*o_putc)(c1); \
2838  (*o_putc)(c2); \
2839  }else{ \
2840  (*o_putc)(c2); \
2841  (*o_putc)(c1); \
2842  } \
2843  } while (0)
2844 
2845 #define OUTPUT_UTF16(val) do { \
2846  if (nkf_char_unicode_bmp_p(val)) { \
2847  c2 = (val >> 8) & 0xff; \
2848  c1 = val & 0xff; \
2849  OUTPUT_UTF16_BYTES(c1, c2); \
2850  } else { \
2851  val &= VALUE_MASK; \
2852  if (val <= UNICODE_MAX) { \
2853  c2 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */ \
2854  c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \
2855  OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
2856  OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
2857  } \
2858  } \
2859  } while (0)
2860 
2861 static void
2863 {
2864  if (output_bom_f) {
2865  output_bom_f = FALSE;
2866  OUTPUT_UTF16_BYTES(0xFF, 0xFE);
2867  }
2868 
2869  if (c2 == EOF) {
2870  (*o_putc)(EOF);
2871  return;
2872  }
2873 
2874  if (c2 == 0 && nkf_char_unicode_p(c1)) {
2875  OUTPUT_UTF16(c1);
2876  } else if (c2) {
2877  nkf_char val, val2;
2878  val = e2w_conv(c2, c1);
2879  if (!val) return;
2880  val2 = e2w_combining(val, c2, c1);
2881  if (val2)
2882  OUTPUT_UTF16(val2);
2883  OUTPUT_UTF16(val);
2884  } else {
2885  OUTPUT_UTF16_BYTES(c1, c2);
2886  }
2887 }
2888 
2889 #define OUTPUT_UTF32(c) do { \
2890  if (output_endian == ENDIAN_LITTLE){ \
2891  (*o_putc)( (c) & 0xFF); \
2892  (*o_putc)(((c) >> 8) & 0xFF); \
2893  (*o_putc)(((c) >> 16) & 0xFF); \
2894  (*o_putc)(0); \
2895  }else{ \
2896  (*o_putc)(0); \
2897  (*o_putc)(((c) >> 16) & 0xFF); \
2898  (*o_putc)(((c) >> 8) & 0xFF); \
2899  (*o_putc)( (c) & 0xFF); \
2900  } \
2901  } while (0)
2902 
2903 static void
2905 {
2906  if (output_bom_f) {
2907  output_bom_f = FALSE;
2908  if (output_endian == ENDIAN_LITTLE){
2909  (*o_putc)(0xFF);
2910  (*o_putc)(0xFE);
2911  (*o_putc)(0);
2912  (*o_putc)(0);
2913  }else{
2914  (*o_putc)(0);
2915  (*o_putc)(0);
2916  (*o_putc)(0xFE);
2917  (*o_putc)(0xFF);
2918  }
2919  }
2920 
2921  if (c2 == EOF) {
2922  (*o_putc)(EOF);
2923  return;
2924  }
2925 
2926  if (c2 == ISO_8859_1) {
2927  c1 |= 0x80;
2928  } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2929  c1 &= VALUE_MASK;
2930  } else if (c2) {
2931  nkf_char val, val2;
2932  val = e2w_conv(c2, c1);
2933  if (!val) return;
2934  val2 = e2w_combining(val, c2, c1);
2935  if (val2)
2936  OUTPUT_UTF32(val2);
2937  c1 = val;
2938  }
2939  OUTPUT_UTF32(c1);
2940 }
2941 #endif
2942 
2943 #define SCORE_L2 (1) /* Kanji Level 2 */
2944 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2945 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2946 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2947 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2948 #define SCORE_X0213 (SCORE_X0212 << 1) /* JIS X 0213 */
2949 #define SCORE_NO_EXIST (SCORE_X0213 << 1) /* Undefined Characters */
2950 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2951 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2952 
2953 #define SCORE_INIT (SCORE_iMIME)
2954 
2955 static const nkf_char score_table_A0[] = {
2956  0, 0, 0, 0,
2957  0, 0, 0, 0,
2960 };
2961 
2962 static const nkf_char score_table_F0[] = {
2967 };
2968 
2969 static const nkf_char score_table_8FA0[] = {
2974 };
2975 
2976 static const nkf_char score_table_8FE0[] = {
2981 };
2982 
2983 static const nkf_char score_table_8FF0[] = {
2988 };
2989 
2990 static void
2992 {
2993  if (ptr){
2994  ptr->score |= score;
2995  }
2996 }
2997 
2998 static void
3000 {
3001  if (ptr){
3002  ptr->score &= ~score;
3003  }
3004 }
3005 
3006 static void
3008 {
3009  nkf_char c2 = ptr->buf[0];
3010  nkf_char c1 = ptr->buf[1];
3011  if (c2 < 0){
3013  }else if (c2 == SS2){
3014  set_code_score(ptr, SCORE_KANA);
3015  }else if (c2 == 0x8f){
3016  if ((c1 & 0x70) == 0x20){
3017  set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
3018  }else if ((c1 & 0x70) == 0x60){
3019  set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
3020  }else if ((c1 & 0x70) == 0x70){
3021  set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
3022  }else{
3024  }
3025 #ifdef UTF8_OUTPUT_ENABLE
3026  }else if (!e2w_conv(c2, c1)){
3028 #endif
3029  }else if ((c2 & 0x70) == 0x20){
3030  set_code_score(ptr, score_table_A0[c2 & 0x0f]);
3031  }else if ((c2 & 0x70) == 0x70){
3032  set_code_score(ptr, score_table_F0[c2 & 0x0f]);
3033  }else if ((c2 & 0x70) >= 0x50){
3034  set_code_score(ptr, SCORE_L2);
3035  }
3036 }
3037 
3038 static void
3040 {
3041  ptr->stat = -1;
3042  ptr->buf[0] = -1;
3043  code_score(ptr);
3044  if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
3045 }
3046 
3047 static void
3049 {
3050  ptr->buf[ptr->index++] = c;
3051 }
3052 
3053 static void
3055 {
3056  ptr->stat = 0;
3057  ptr->index = 0;
3058 }
3059 
3060 static void
3062 {
3063  status_clear(ptr);
3064  ptr->score = SCORE_INIT;
3065 }
3066 
3067 static void
3069 {
3070  status_reset(ptr);
3071  ptr->_file_stat = 0;
3072 }
3073 
3074 static void
3076 {
3077  if (c <= DEL && estab_f){
3078  status_reset(ptr);
3079  }
3080 }
3081 
3082 static void
3084 {
3085  switch(ptr->stat){
3086  case -1:
3087  status_check(ptr, c);
3088  break;
3089  case 0:
3090  if (c <= DEL){
3091  break;
3092  }else if (nkf_char_unicode_p(c)){
3093  break;
3094  }else if (0xa1 <= c && c <= 0xdf){
3095  status_push_ch(ptr, SS2);
3096  status_push_ch(ptr, c);
3097  code_score(ptr);
3098  status_clear(ptr);
3099  }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3100  ptr->stat = 1;
3101  status_push_ch(ptr, c);
3102  }else if (0xed <= c && c <= 0xee){
3103  ptr->stat = 3;
3104  status_push_ch(ptr, c);
3105 #ifdef SHIFTJIS_CP932
3106  }else if (is_ibmext_in_sjis(c)){
3107  ptr->stat = 2;
3108  status_push_ch(ptr, c);
3109 #endif /* SHIFTJIS_CP932 */
3110 #ifdef X0212_ENABLE
3111  }else if (0xf0 <= c && c <= 0xfc){
3112  ptr->stat = 1;
3113  status_push_ch(ptr, c);
3114 #endif /* X0212_ENABLE */
3115  }else{
3116  status_disable(ptr);
3117  }
3118  break;
3119  case 1:
3120  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3121  status_push_ch(ptr, c);
3122  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3123  code_score(ptr);
3124  status_clear(ptr);
3125  }else{
3126  status_disable(ptr);
3127  }
3128  break;
3129  case 2:
3130 #ifdef SHIFTJIS_CP932
3131  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3132  status_push_ch(ptr, c);
3133  if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
3135  status_clear(ptr);
3136  break;
3137  }
3138  }
3139 #endif /* SHIFTJIS_CP932 */
3140  status_disable(ptr);
3141  break;
3142  case 3:
3143  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3144  status_push_ch(ptr, c);
3145  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3147  status_clear(ptr);
3148  }else{
3149  status_disable(ptr);
3150  }
3151  break;
3152  }
3153 }
3154 
3155 static void
3157 {
3158  switch (ptr->stat){
3159  case -1:
3160  status_check(ptr, c);
3161  break;
3162  case 0:
3163  if (c <= DEL){
3164  break;
3165  }else if (nkf_char_unicode_p(c)){
3166  break;
3167  }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
3168  ptr->stat = 1;
3169  status_push_ch(ptr, c);
3170 #ifdef X0212_ENABLE
3171  }else if (0x8f == c){
3172  ptr->stat = 2;
3173  status_push_ch(ptr, c);
3174 #endif /* X0212_ENABLE */
3175  }else{
3176  status_disable(ptr);
3177  }
3178  break;
3179  case 1:
3180  if (0xa1 <= c && c <= 0xfe){
3181  status_push_ch(ptr, c);
3182  code_score(ptr);
3183  status_clear(ptr);
3184  }else{
3185  status_disable(ptr);
3186  }
3187  break;
3188 #ifdef X0212_ENABLE
3189  case 2:
3190  if (0xa1 <= c && c <= 0xfe){
3191  ptr->stat = 1;
3192  status_push_ch(ptr, c);
3193  }else{
3194  status_disable(ptr);
3195  }
3196 #endif /* X0212_ENABLE */
3197  }
3198 }
3199 
3200 #ifdef UTF8_INPUT_ENABLE
3201 static void
3203 {
3204  switch (ptr->stat){
3205  case -1:
3206  status_check(ptr, c);
3207  break;
3208  case 0:
3209  if (c <= DEL){
3210  break;
3211  }else if (nkf_char_unicode_p(c)){
3212  break;
3213  }else if (0xc0 <= c && c <= 0xdf){
3214  ptr->stat = 1;
3215  status_push_ch(ptr, c);
3216  }else if (0xe0 <= c && c <= 0xef){
3217  ptr->stat = 2;
3218  status_push_ch(ptr, c);
3219  }else if (0xf0 <= c && c <= 0xf4){
3220  ptr->stat = 3;
3221  status_push_ch(ptr, c);
3222  }else{
3223  status_disable(ptr);
3224  }
3225  break;
3226  case 1:
3227  case 2:
3228  if (0x80 <= c && c <= 0xbf){
3229  status_push_ch(ptr, c);
3230  if (ptr->index > ptr->stat){
3231  int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
3232  && ptr->buf[2] == 0xbf);
3233  w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
3234  &ptr->buf[0], &ptr->buf[1]);
3235  if (!bom){
3236  code_score(ptr);
3237  }
3238  status_clear(ptr);
3239  }
3240  }else{
3241  status_disable(ptr);
3242  }
3243  break;
3244  case 3:
3245  if (0x80 <= c && c <= 0xbf){
3246  if (ptr->index < ptr->stat){
3247  status_push_ch(ptr, c);
3248  } else {
3249  status_clear(ptr);
3250  }
3251  }else{
3252  status_disable(ptr);
3253  }
3254  break;
3255  }
3256 }
3257 #endif
3258 
3259 static void
3261 {
3262  int action_flag = 1;
3263  struct input_code *result = 0;
3264  struct input_code *p = input_code_list;
3265  while (p->name){
3266  if (!p->status_func) {
3267  ++p;
3268  continue;
3269  }
3270  if (!p->status_func)
3271  continue;
3272  (p->status_func)(p, c);
3273  if (p->stat > 0){
3274  action_flag = 0;
3275  }else if(p->stat == 0){
3276  if (result){
3277  action_flag = 0;
3278  }else{
3279  result = p;
3280  }
3281  }
3282  ++p;
3283  }
3284 
3285  if (action_flag){
3286  if (result && !estab_f){
3287  set_iconv(TRUE, result->iconv_func);
3288  }else if (c <= DEL){
3289  struct input_code *ptr = input_code_list;
3290  while (ptr->name){
3291  status_reset(ptr);
3292  ++ptr;
3293  }
3294  }
3295  }
3296 }
3297 
3298 typedef struct {
3304 } nkf_state_t;
3305 
3307 
3308 #define STD_GC_BUFSIZE (256)
3309 
3310 static void
3312 {
3313  if (nkf_state) {
3314  nkf_buf_clear(nkf_state->std_gc_buf);
3315  nkf_buf_clear(nkf_state->broken_buf);
3316  nkf_buf_clear(nkf_state->nfc_buf);
3317  }
3318  else {
3319  nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3320  nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3321  nkf_state->broken_buf = nkf_buf_new(3);
3322  nkf_state->nfc_buf = nkf_buf_new(9);
3323  }
3324  nkf_state->broken_state = 0;
3325  nkf_state->mimeout_state = 0;
3326 }
3327 
3328 #ifndef WIN32DLL
3329 static nkf_char
3331 {
3332  if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3333  return nkf_buf_pop(nkf_state->std_gc_buf);
3334  }
3335  return getc(f);
3336 }
3337 #endif /*WIN32DLL*/
3338 
3339 static nkf_char
3341 {
3342  nkf_buf_push(nkf_state->std_gc_buf, c);
3343  return c;
3344 }
3345 
3346 #ifndef WIN32DLL
3347 static void
3349 {
3350  if(c!=EOF)
3351  putchar(c);
3352 }
3353 #endif /*WIN32DLL*/
3354 
3356 static int hold_count = 0;
3357 static nkf_char
3359 {
3360  if (hold_count >= HOLD_SIZE*2)
3361  return (EOF);
3362  hold_buf[hold_count++] = c2;
3363  return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3364 }
3365 
3366 static int
3368 {
3369  int ret;
3370  int hold_index;
3371  int fromhold_count;
3372  nkf_char c3, c4;
3373 
3378  hold_count = 0;
3379  push_hold_buf(c1);
3380  push_hold_buf(c2);
3381 
3382  while ((c2 = (*i_getc)(f)) != EOF) {
3383  if (c2 == ESC){
3384  (*i_ungetc)(c2,f);
3385  break;
3386  }
3387  code_status(c2);
3388  if (push_hold_buf(c2) == EOF || estab_f) {
3389  break;
3390  }
3391  }
3392 
3393  if (!estab_f) {
3394  struct input_code *p = input_code_list;
3395  struct input_code *result = p;
3396  if (c2 == EOF) {
3397  code_status(c2);
3398  }
3399  while (p->name) {
3400  if (p->status_func && p->score < result->score) {
3401  result = p;
3402  }
3403  p++;
3404  }
3405  set_iconv(TRUE, result->iconv_func);
3406  }
3407 
3408 
3418  ret = c2;
3419  hold_index = 0;
3420  while (hold_index < hold_count){
3421  c1 = hold_buf[hold_index++];
3422  if (nkf_char_unicode_p(c1)) {
3423  (*oconv)(0, c1);
3424  continue;
3425  }
3426  else if (c1 <= DEL){
3427  (*iconv)(0, c1, 0);
3428  continue;
3429  }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3430  (*iconv)(JIS_X_0201_1976_K, c1, 0);
3431  continue;
3432  }
3433  fromhold_count = 1;
3434  if (hold_index < hold_count){
3435  c2 = hold_buf[hold_index++];
3436  fromhold_count++;
3437  }else{
3438  c2 = (*i_getc)(f);
3439  if (c2 == EOF){
3440  c4 = EOF;
3441  break;
3442  }
3443  code_status(c2);
3444  }
3445  c3 = 0;
3446  switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3447  case -2:
3448  /* 4 bytes UTF-8 */
3449  if (hold_index < hold_count){
3450  c3 = hold_buf[hold_index++];
3451  } else if ((c3 = (*i_getc)(f)) == EOF) {
3452  ret = EOF;
3453  break;
3454  }
3455  code_status(c3);
3456  if (hold_index < hold_count){
3457  c4 = hold_buf[hold_index++];
3458  } else if ((c4 = (*i_getc)(f)) == EOF) {
3459  c3 = ret = EOF;
3460  break;
3461  }
3462  code_status(c4);
3463  (*iconv)(c1, c2, (c3<<8)|c4);
3464  break;
3465  case -3:
3466  /* 4 bytes UTF-8 (check combining character) */
3467  if (hold_index < hold_count){
3468  c3 = hold_buf[hold_index++];
3469  fromhold_count++;
3470  } else if ((c3 = (*i_getc)(f)) == EOF) {
3471  w_iconv_nocombine(c1, c2, 0);
3472  break;
3473  }
3474  if (hold_index < hold_count){
3475  c4 = hold_buf[hold_index++];
3476  fromhold_count++;
3477  } else if ((c4 = (*i_getc)(f)) == EOF) {
3478  w_iconv_nocombine(c1, c2, 0);
3479  if (fromhold_count <= 2)
3480  (*i_ungetc)(c3,f);
3481  else
3482  hold_index--;
3483  continue;
3484  }
3485  if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
3486  w_iconv_nocombine(c1, c2, 0);
3487  if (fromhold_count <= 2) {
3488  (*i_ungetc)(c4,f);
3489  (*i_ungetc)(c3,f);
3490  } else if (fromhold_count == 3) {
3491  (*i_ungetc)(c4,f);
3492  hold_index--;
3493  } else {
3494  hold_index -= 2;
3495  }
3496  }
3497  break;
3498  case -1:
3499  /* 3 bytes EUC or UTF-8 */
3500  if (hold_index < hold_count){
3501  c3 = hold_buf[hold_index++];
3502  fromhold_count++;
3503  } else if ((c3 = (*i_getc)(f)) == EOF) {
3504  ret = EOF;
3505  break;
3506  } else {
3507  code_status(c3);
3508  }
3509  if ((*iconv)(c1, c2, c3) == -3) {
3510  /* 6 bytes UTF-8 (check combining character) */
3511  nkf_char c5, c6;
3512  if (hold_index < hold_count){
3513  c4 = hold_buf[hold_index++];
3514  fromhold_count++;
3515  } else if ((c4 = (*i_getc)(f)) == EOF) {
3516  w_iconv_nocombine(c1, c2, c3);
3517  continue;
3518  }
3519  if (hold_index < hold_count){
3520  c5 = hold_buf[hold_index++];
3521  fromhold_count++;
3522  } else if ((c5 = (*i_getc)(f)) == EOF) {
3523  w_iconv_nocombine(c1, c2, c3);
3524  if (fromhold_count == 4)
3525  hold_index--;
3526  else
3527  (*i_ungetc)(c4,f);
3528  continue;
3529  }
3530  if (hold_index < hold_count){
3531  c6 = hold_buf[hold_index++];
3532  fromhold_count++;
3533  } else if ((c6 = (*i_getc)(f)) == EOF) {
3534  w_iconv_nocombine(c1, c2, c3);
3535  if (fromhold_count == 5) {
3536  hold_index -= 2;
3537  } else if (fromhold_count == 4) {
3538  hold_index--;
3539  (*i_ungetc)(c5,f);
3540  } else {
3541  (*i_ungetc)(c5,f);
3542  (*i_ungetc)(c4,f);
3543  }
3544  continue;
3545  }
3546  if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
3547  w_iconv_nocombine(c1, c2, c3);
3548  if (fromhold_count == 6) {
3549  hold_index -= 3;
3550  } else if (fromhold_count == 5) {
3551  hold_index -= 2;
3552  (*i_ungetc)(c6,f);
3553  } else if (fromhold_count == 4) {
3554  hold_index--;
3555  (*i_ungetc)(c6,f);
3556  (*i_ungetc)(c5,f);
3557  } else {
3558  (*i_ungetc)(c6,f);
3559  (*i_ungetc)(c5,f);
3560  (*i_ungetc)(c4,f);
3561  }
3562  }
3563  }
3564  break;
3565  }
3566  if (c3 == EOF) break;
3567  }
3568  return ret;
3569 }
3570 
3571 /*
3572  * Check and Ignore BOM
3573  */
3574 static void
3576 {
3577  int c2;
3578  switch(c2 = (*i_getc)(f)){
3579  case 0x00:
3580  if((c2 = (*i_getc)(f)) == 0x00){
3581  if((c2 = (*i_getc)(f)) == 0xFE){
3582  if((c2 = (*i_getc)(f)) == 0xFF){
3583  if(!input_encoding){
3585  }
3586  if (iconv == w_iconv32) {
3587  input_bom_f = TRUE;
3589  return;
3590  }
3591  (*i_ungetc)(0xFF,f);
3592  }else (*i_ungetc)(c2,f);
3593  (*i_ungetc)(0xFE,f);
3594  }else if(c2 == 0xFF){
3595  if((c2 = (*i_getc)(f)) == 0xFE){
3596  if(!input_encoding){
3598  }
3599  if (iconv == w_iconv32) {
3601  return;
3602  }
3603  (*i_ungetc)(0xFF,f);
3604  }else (*i_ungetc)(c2,f);
3605  (*i_ungetc)(0xFF,f);
3606  }else (*i_ungetc)(c2,f);
3607  (*i_ungetc)(0x00,f);
3608  }else (*i_ungetc)(c2,f);
3609  (*i_ungetc)(0x00,f);
3610  break;
3611  case 0xEF:
3612  if((c2 = (*i_getc)(f)) == 0xBB){
3613  if((c2 = (*i_getc)(f)) == 0xBF){
3614  if(!input_encoding){
3616  }
3617  if (iconv == w_iconv) {
3618  input_bom_f = TRUE;
3619  return;
3620  }
3621  (*i_ungetc)(0xBF,f);
3622  }else (*i_ungetc)(c2,f);
3623  (*i_ungetc)(0xBB,f);
3624  }else (*i_ungetc)(c2,f);
3625  (*i_ungetc)(0xEF,f);
3626  break;
3627  case 0xFE:
3628  if((c2 = (*i_getc)(f)) == 0xFF){
3629  if((c2 = (*i_getc)(f)) == 0x00){
3630  if((c2 = (*i_getc)(f)) == 0x00){
3631  if(!input_encoding){
3633  }
3634  if (iconv == w_iconv32) {
3636  return;
3637  }
3638  (*i_ungetc)(0x00,f);
3639  }else (*i_ungetc)(c2,f);
3640  (*i_ungetc)(0x00,f);
3641  }else (*i_ungetc)(c2,f);
3642  if(!input_encoding){
3644  }
3645  if (iconv == w_iconv16) {
3647  input_bom_f = TRUE;
3648  return;
3649  }
3650  (*i_ungetc)(0xFF,f);
3651  }else (*i_ungetc)(c2,f);
3652  (*i_ungetc)(0xFE,f);
3653  break;
3654  case 0xFF:
3655  if((c2 = (*i_getc)(f)) == 0xFE){
3656  if((c2 = (*i_getc)(f)) == 0x00){
3657  if((c2 = (*i_getc)(f)) == 0x00){
3658  if(!input_encoding){
3660  }
3661  if (iconv == w_iconv32) {
3663  input_bom_f = TRUE;
3664  return;
3665  }
3666  (*i_ungetc)(0x00,f);
3667  }else (*i_ungetc)(c2,f);
3668  (*i_ungetc)(0x00,f);
3669  }else (*i_ungetc)(c2,f);
3670  if(!input_encoding){
3672  }
3673  if (iconv == w_iconv16) {
3675  input_bom_f = TRUE;
3676  return;
3677  }
3678  (*i_ungetc)(0xFE,f);
3679  }else (*i_ungetc)(c2,f);
3680  (*i_ungetc)(0xFF,f);
3681  break;
3682  default:
3683  (*i_ungetc)(c2,f);
3684  break;
3685  }
3686 }
3687 
3688 static nkf_char
3690 {
3691  nkf_char c, c1;
3692 
3693  if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3694  return nkf_buf_pop(nkf_state->broken_buf);
3695  }
3696  c = (*i_bgetc)(f);
3697  if (c=='$' && nkf_state->broken_state != ESC
3698  && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3699  c1= (*i_bgetc)(f);
3700  nkf_state->broken_state = 0;
3701  if (c1=='@'|| c1=='B') {
3702  nkf_buf_push(nkf_state->broken_buf, c1);
3703  nkf_buf_push(nkf_state->broken_buf, c);
3704  return ESC;
3705  } else {
3706  (*i_bungetc)(c1,f);
3707  return c;
3708  }
3709  } else if (c=='(' && nkf_state->broken_state != ESC
3711  c1= (*i_bgetc)(f);
3712  nkf_state->broken_state = 0;
3713  if (c1=='J'|| c1=='B') {
3714  nkf_buf_push(nkf_state->broken_buf, c1);
3715  nkf_buf_push(nkf_state->broken_buf, c);
3716  return ESC;
3717  } else {
3718  (*i_bungetc)(c1,f);
3719  return c;
3720  }
3721  } else {
3722  nkf_state->broken_state = c;
3723  return c;
3724  }
3725 }
3726 
3727 static nkf_char
3729 {
3730  if (nkf_buf_length(nkf_state->broken_buf) < 2)
3731  nkf_buf_push(nkf_state->broken_buf, c);
3732  return c;
3733 }
3734 
3735 static void
3737 {
3738  if (guess_f && input_eol != EOF) {
3739  if (c2 == 0 && c1 == LF) {
3740  if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3741  else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3742  } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3743  else if (!prev_cr);
3744  else if (!input_eol) input_eol = CR;
3745  else if (input_eol != CR) input_eol = EOF;
3746  }
3747  if (prev_cr || (c2 == 0 && c1 == LF)) {
3748  prev_cr = 0;
3749  if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3750  if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3751  }
3752  if (c2 == 0 && c1 == CR) prev_cr = CR;
3753  else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3754 }
3755 
3756 static void
3758 {
3759  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3760  case CRLF:
3761  (*func)(0x0D);
3762  (*func)(0x0A);
3763  break;
3764  case CR:
3765  (*func)(0x0D);
3766  break;
3767  case LF:
3768  (*func)(0x0A);
3769  break;
3770  }
3771 }
3772 
3773 static void
3775 {
3776  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3777  case CRLF:
3778  (*func)(0, 0x0D);
3779  (*func)(0, 0x0A);
3780  break;
3781  case CR:
3782  (*func)(0, 0x0D);
3783  break;
3784  case LF:
3785  (*func)(0, 0x0A);
3786  break;
3787  }
3788 }
3789 
3790 /*
3791  Return value of fold_conv()
3792 
3793  LF add newline and output char
3794  CR add newline and output nothing
3795  SP space
3796  0 skip
3797  1 (or else) normal output
3798 
3799  fold state in prev (previous character)
3800 
3801  >0x80 Japanese (X0208/X0201)
3802  <0x80 ASCII
3803  LF new line
3804  SP space
3805 
3806  This fold algorthm does not preserve heading space in a line.
3807  This is the main difference from fmt.
3808  */
3809 
3810 #define char_size(c2,c1) (c2?2:1)
3811 
3812 static void
3814 {
3815  nkf_char prev0;
3816  nkf_char fold_state;
3817 
3818  if (c1== CR && !fold_preserve_f) {
3819  fold_state=0; /* ignore cr */
3820  }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3821  f_prev = LF;
3822  fold_state=0; /* ignore cr */
3823  } else if (c1== BS) {
3824  if (f_line>0) f_line--;
3825  fold_state = 1;
3826  } else if (c2==EOF && f_line != 0) { /* close open last line */
3827  fold_state = LF;
3828  } else if ((c1==LF && !fold_preserve_f)
3829  || ((c1==CR||(c1==LF&&f_prev!=CR))
3830  && fold_preserve_f)) {
3831  /* new line */
3832  if (fold_preserve_f) {
3833  f_prev = c1;
3834  f_line = 0;
3835  fold_state = CR;
3836  } else if ((f_prev == c1 && !fold_preserve_f)
3837  || (f_prev == LF && fold_preserve_f)
3838  ) { /* duplicate newline */
3839  if (f_line) {
3840  f_line = 0;
3841  fold_state = LF; /* output two newline */
3842  } else {
3843  f_line = 0;
3844  fold_state = 1;
3845  }
3846  } else {
3847  if (f_prev&0x80) { /* Japanese? */
3848  f_prev = c1;
3849  fold_state = 0; /* ignore given single newline */
3850  } else if (f_prev==SP) {
3851  fold_state = 0;
3852  } else {
3853  f_prev = c1;
3854  if (++f_line<=fold_len)
3855  fold_state = SP;
3856  else {
3857  f_line = 0;
3858  fold_state = CR; /* fold and output nothing */
3859  }
3860  }
3861  }
3862  } else if (c1=='\f') {
3863  f_prev = LF;
3864  f_line = 0;
3865  fold_state = LF; /* output newline and clear */
3866  } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3867  /* X0208 kankaku or ascii space */
3868  if (f_prev == SP) {
3869  fold_state = 0; /* remove duplicate spaces */
3870  } else {
3871  f_prev = SP;
3872  if (++f_line<=fold_len)
3873  fold_state = SP; /* output ASCII space only */
3874  else {
3875  f_prev = SP; f_line = 0;
3876  fold_state = CR; /* fold and output nothing */
3877  }
3878  }
3879  } else {
3880  prev0 = f_prev; /* we still need this one... , but almost done */
3881  f_prev = c1;
3882  if (c2 || c2 == JIS_X_0201_1976_K)
3883  f_prev |= 0x80; /* this is Japanese */
3884  f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
3885  if (f_line<=fold_len) { /* normal case */
3886  fold_state = 1;
3887  } else {
3888  if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3889  f_line = char_size(c2,c1);
3890  fold_state = LF; /* We can't wait, do fold now */
3891  } else if (c2 == JIS_X_0201_1976_K) {
3892  /* simple kinsoku rules return 1 means no folding */
3893  if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
3894  else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
3895  else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
3896  else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
3897  else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
3898  else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3899  else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3900  f_line = 1;
3901  fold_state = LF;/* add one new f_line before this character */
3902  } else {
3903  f_line = 1;
3904  fold_state = LF;/* add one new f_line before this character */
3905  }
3906  } else if (c2==0) {
3907  /* kinsoku point in ASCII */
3908  if ( c1==')'|| /* { [ ( */
3909  c1==']'||
3910  c1=='}'||
3911  c1=='.'||
3912  c1==','||
3913  c1=='!'||
3914  c1=='?'||
3915  c1=='/'||
3916  c1==':'||
3917  c1==';') {
3918  fold_state = 1;
3919  /* just after special */
3920  } else if (!is_alnum(prev0)) {
3921  f_line = char_size(c2,c1);
3922  fold_state = LF;
3923  } else if ((prev0==SP) || /* ignored new f_line */
3924  (prev0==LF)|| /* ignored new f_line */
3925  (prev0&0x80)) { /* X0208 - ASCII */
3926  f_line = char_size(c2,c1);
3927  fold_state = LF;/* add one new f_line before this character */
3928  } else {
3929  fold_state = 1; /* default no fold in ASCII */
3930  }
3931  } else {
3932  if (c2=='!') {
3933  if (c1=='"') fold_state = 1; /* $B!"(B */
3934  else if (c1=='#') fold_state = 1; /* $B!#(B */
3935  else if (c1=='W') fold_state = 1; /* $B!W(B */
3936  else if (c1=='K') fold_state = 1; /* $B!K(B */
3937  else if (c1=='$') fold_state = 1; /* $B!$(B */
3938  else if (c1=='%') fold_state = 1; /* $B!%(B */
3939  else if (c1=='\'') fold_state = 1; /* $B!\(B */
3940  else if (c1=='(') fold_state = 1; /* $B!((B */
3941  else if (c1==')') fold_state = 1; /* $B!)(B */
3942  else if (c1=='*') fold_state = 1; /* $B!*(B */
3943  else if (c1=='+') fold_state = 1; /* $B!+(B */
3944  else if (c1==',') fold_state = 1; /* $B!,(B */
3945  /* default no fold in kinsoku */
3946  else {
3947  fold_state = LF;
3948  f_line = char_size(c2,c1);
3949  /* add one new f_line before this character */
3950  }
3951  } else {
3952  f_line = char_size(c2,c1);
3953  fold_state = LF;
3954  /* add one new f_line before this character */
3955  }
3956  }
3957  }
3958  }
3959  /* terminator process */
3960  switch(fold_state) {
3961  case LF:
3963  (*o_fconv)(c2,c1);
3964  break;
3965  case 0:
3966  return;
3967  case CR:
3969  break;
3970  case TAB:
3971  case SP:
3972  (*o_fconv)(0,SP);
3973  break;
3974  default:
3975  (*o_fconv)(c2,c1);
3976  }
3977 }
3978 
3980 
3981 static void
3983 {
3984 
3985  /* if (c2) c1 &= 0x7f; assertion */
3986 
3987  if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3988  (*o_zconv)(c2,c1);
3989  return;
3990  }
3991 
3992  if (x0201_f) {
3993  if (z_prev2 == JIS_X_0201_1976_K) {
3994  if (c2 == JIS_X_0201_1976_K) {
3995  if (c1 == (0xde&0x7f)) { /* $BByE@(B */
3996  z_prev2 = 0;
3997  (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3998  return;
3999  } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /* $BH>ByE@(B */
4000  z_prev2 = 0;
4001  (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
4002  return;
4003  } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) { /* $BH>ByE@(B */
4004  z_prev2 = 0;
4005  (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
4006  return;
4007  }
4008  }
4009  z_prev2 = 0;
4010  (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
4011  }
4012  if (c2 == JIS_X_0201_1976_K) {
4013  if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
4014  /* wait for $BByE@(B or $BH>ByE@(B */
4015  z_prev1 = c1;
4016  z_prev2 = c2;
4017  return;
4018  } else {
4019  (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
4020  return;
4021  }
4022  }
4023  }
4024 
4025  if (c2 == EOF) {
4026  (*o_zconv)(c2, c1);
4027  return;
4028  }
4029 
4030  if (alpha_f&1 && c2 == 0x23) {
4031  /* JISX0208 Alphabet */
4032  c2 = 0;
4033  } else if (c2 == 0x21) {
4034  /* JISX0208 Kigou */
4035  if (0x21==c1) {
4036  if (alpha_f&2) {
4037  c2 = 0;
4038  c1 = SP;
4039  } else if (alpha_f&4) {
4040  (*o_zconv)(0, SP);
4041  (*o_zconv)(0, SP);
4042  return;
4043  }
4044  } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4045  c2 = 0;
4046  c1 = fv[c1-0x20];
4047  }
4048  }
4049 
4050  if (alpha_f&8 && c2 == 0) {
4051  /* HTML Entity */
4052  const char *entity = 0;
4053  switch (c1){
4054  case '>': entity = "&gt;"; break;
4055  case '<': entity = "&lt;"; break;
4056  case '\"': entity = "&quot;"; break;
4057  case '&': entity = "&amp;"; break;
4058  }
4059  if (entity){
4060  while (*entity) (*o_zconv)(0, *entity++);
4061  return;
4062  }
4063  }
4064 
4065  if (alpha_f & 16) {
4066  /* JIS X 0208 Katakana to JIS X 0201 Katakana */
4067  if (c2 == 0x21) {
4068  nkf_char c = 0;
4069  switch (c1) {
4070  case 0x23:
4071  /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
4072  c = 0xA1;
4073  break;
4074  case 0x56:
4075  /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
4076  c = 0xA2;
4077  break;
4078  case 0x57:
4079  /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
4080  c = 0xA3;
4081  break;
4082  case 0x22:
4083  /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
4084  c = 0xA4;
4085  break;
4086  case 0x26:
4087  /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
4088  c = 0xA5;
4089  break;
4090  case 0x3C:
4091  /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
4092  c = 0xB0;
4093  break;
4094  case 0x2B:
4095  /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
4096  c = 0xDE;
4097  break;
4098  case 0x2C:
4099  /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
4100  c = 0xDF;
4101  break;
4102  }
4103  if (c) {
4104  (*o_zconv)(JIS_X_0201_1976_K, c);
4105  return;
4106  }
4107  } else if (c2 == 0x25) {
4108  /* JISX0208 Katakana */
4109  static const int fullwidth_to_halfwidth[] =
4110  {
4111  0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4112  0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4113  0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4114  0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4115  0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4116  0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4117  0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4118  0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4119  0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4120  0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4121  0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4122  0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4123  };
4124  if (fullwidth_to_halfwidth[c1-0x20]){
4125  c2 = fullwidth_to_halfwidth[c1-0x20];
4126  (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
4127  if (c2 & 0xFF) {
4128  (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
4129  }
4130  return;
4131  }
4132  } else if (c2 == 0 && nkf_char_unicode_p(c1) &&
4133  ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* $B9g@.MQByE@!&H>ByE@(B */
4134  (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
4135  return;
4136  }
4137  }
4138  (*o_zconv)(c2,c1);
4139 }
4140 
4141 
4142 #define rot13(c) ( \
4143  ( c < 'A') ? c: \
4144  (c <= 'M') ? (c + 13): \
4145  (c <= 'Z') ? (c - 13): \
4146  (c < 'a') ? (c): \
4147  (c <= 'm') ? (c + 13): \
4148  (c <= 'z') ? (c - 13): \
4149  (c) \
4150  )
4151 
4152 #define rot47(c) ( \
4153  ( c < '!') ? c: \
4154  ( c <= 'O') ? (c + 47) : \
4155  ( c <= '~') ? (c - 47) : \
4156  c \
4157  )
4158 
4159 static void
4161 {
4162  if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
4163  c1 = rot13(c1);
4164  } else if (c2) {
4165  c1 = rot47(c1);
4166  c2 = rot47(c2);
4167  }
4168  (*o_rot_conv)(c2,c1);
4169 }
4170 
4171 static void
4173 {
4174  if (hira_f & 1) {
4175  if (c2 == 0x25) {
4176  if (0x20 < c1 && c1 < 0x74) {
4177  c2 = 0x24;
4178  (*o_hira_conv)(c2,c1);
4179  return;
4180  } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
4181  c2 = 0;
4182  c1 = nkf_char_unicode_new(0x3094);
4183  (*o_hira_conv)(c2,c1);
4184  return;
4185  }
4186  } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4187  c1 += 2;
4188  (*o_hira_conv)(c2,c1);
4189  return;
4190  }
4191  }
4192  if (hira_f & 2) {
4193  if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
4194  c2 = 0x25;
4195  c1 = 0x74;
4196  } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4197  c2 = 0x25;
4198  } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4199  c1 -= 2;
4200  }
4201  }
4202  (*o_hira_conv)(c2,c1);
4203 }
4204 
4205 
4206 static void
4208 {
4209 #define RANGE_NUM_MAX 18
4210  static const nkf_char range[RANGE_NUM_MAX][2] = {
4211  {0x222f, 0x2239,},
4212  {0x2242, 0x2249,},
4213  {0x2251, 0x225b,},
4214  {0x226b, 0x2271,},
4215  {0x227a, 0x227d,},
4216  {0x2321, 0x232f,},
4217  {0x233a, 0x2340,},
4218  {0x235b, 0x2360,},
4219  {0x237b, 0x237e,},
4220  {0x2474, 0x247e,},
4221  {0x2577, 0x257e,},
4222  {0x2639, 0x2640,},
4223  {0x2659, 0x267e,},
4224  {0x2742, 0x2750,},
4225  {0x2772, 0x277e,},
4226  {0x2841, 0x287e,},
4227  {0x4f54, 0x4f7e,},
4228  {0x7425, 0x747e},
4229  };
4230  nkf_char i;
4231  nkf_char start, end, c;
4232 
4233  if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4234  c2 = GETA1;
4235  c1 = GETA2;
4236  }
4237  if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4238  c2 = GETA1;
4239  c1 = GETA2;
4240  }
4241 
4242  for (i = 0; i < RANGE_NUM_MAX; i++) {
4243  start = range[i][0];
4244  end = range[i][1];
4245  c = (c2 << 8) + c1;
4246  if (c >= start && c <= end) {
4247  c2 = GETA1;
4248  c1 = GETA2;
4249  }
4250  }
4251  (*o_iso2022jp_check_conv)(c2,c1);
4252 }
4253 
4254 
4255 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4256 
4257 static const unsigned char *mime_pattern[] = {
4258  (const unsigned char *)"\075?EUC-JP?B?",
4259  (const unsigned char *)"\075?SHIFT_JIS?B?",
4260  (const unsigned char *)"\075?ISO-8859-1?Q?",
4261  (const unsigned char *)"\075?ISO-8859-1?B?",
4262  (const unsigned char *)"\075?ISO-2022-JP?B?",
4263  (const unsigned char *)"\075?ISO-2022-JP?B?",
4264  (const unsigned char *)"\075?ISO-2022-JP?Q?",
4265 #if defined(UTF8_INPUT_ENABLE)
4266  (const unsigned char *)"\075?UTF-8?B?",
4267  (const unsigned char *)"\075?UTF-8?Q?",
4268 #endif
4269  (const unsigned char *)"\075?US-ASCII?Q?",
4270  NULL
4271 };
4272 
4273 
4274 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
4276  e_iconv, s_iconv, 0, 0, 0, 0, 0,
4277 #if defined(UTF8_INPUT_ENABLE)
4278  w_iconv, w_iconv,
4279 #endif
4280  0,
4281 };
4282 
4283 static const nkf_char mime_encode[] = {
4285 #if defined(UTF8_INPUT_ENABLE)
4286  UTF_8, UTF_8,
4287 #endif
4288  ASCII,
4289  0
4290 };
4291 
4292 static const nkf_char mime_encode_method[] = {
4293  'B', 'B','Q', 'B', 'B', 'B', 'Q',
4294 #if defined(UTF8_INPUT_ENABLE)
4295  'B', 'Q',
4296 #endif
4297  'Q',
4298  0
4299 };
4300 
4301 
4302 /* MIME preprocessor fifo */
4303 
4304 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
4305 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
4306 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
4307 static struct {
4308  unsigned char buf[MIME_BUF_SIZE];
4309  unsigned int top;
4310  unsigned int last; /* decoded */
4311  unsigned int input; /* undecoded */
4314 
4315 #define MAXRECOVER 20
4316 
4317 static void
4319 {
4320  mime_input_buf(--mime_input_state.top) = (unsigned char)c;
4321 }
4322 
4323 static nkf_char
4325 {
4327  return c;
4328 }
4329 
4330 static nkf_char
4332 {
4333  if (mimebuf_f)
4334  (*i_mungetc_buf)(c,f);
4335  else
4336  mime_input_buf(--mime_input_state.input) = (unsigned char)c;
4337  return c;
4338 }
4339 
4340 static nkf_char
4342 {
4343  /* we don't keep eof of mime_input_buf, becase it contains ?= as
4344  a terminator. It was checked in mime_integrity. */
4345  return ((mimebuf_f)?
4347 }
4348 
4349 static void
4351 {
4352  if (i_getc!=mime_getc) {
4355  if(mime_f==STRICT_MIME) {
4358  }
4359  }
4360 }
4361 
4362 static void
4364 {
4365  if(mime_f==STRICT_MIME) {
4366  i_mgetc = i_mgetc_buf;
4368  }
4369  i_getc = i_mgetc;
4370  i_ungetc = i_mungetc;
4373 }
4374 
4375 static nkf_char
4376 mime_integrity(FILE *f, const unsigned char *p)
4377 {
4378  nkf_char c,d;
4379  unsigned int q;
4380  /* In buffered mode, read until =? or NL or buffer full
4381  */
4382  mime_input_state.input = mime_input_state.top;
4384 
4385  while(*p) mime_input_buf(mime_input_state.input++) = *p++;
4386  d = 0;
4387  q = mime_input_state.input;
4388  while((c=(*i_getc)(f))!=EOF) {
4389  if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
4390  break; /* buffer full */
4391  }
4392  if (c=='=' && d=='?') {
4393  /* checked. skip header, start decode */
4394  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4395  /* mime_last_input = mime_input_state.input; */
4396  mime_input_state.input = q;
4397  switch_mime_getc();
4398  return 1;
4399  }
4400  if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4401  break;
4402  /* Should we check length mod 4? */
4403  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4404  d=c;
4405  }
4406  /* In case of Incomplete MIME, no MIME decode */
4407  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4408  mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4409  mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4410  switch_mime_getc(); /* anyway we need buffered getc */
4411  return 1;
4412 }
4413 
4414 static nkf_char
4416 {
4417  nkf_char c1 = 0;
4418  int i,j,k;
4419  const unsigned char *p,*q;
4420  nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4421 
4423  /* =? has been checked */
4424  j = 0;
4425  p = mime_pattern[j];
4426  r[0]='='; r[1]='?';
4427 
4428  for(i=2;p[i]>SP;i++) { /* start at =? */
4429  if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4430  /* pattern fails, try next one */
4431  q = p;
4432  while (mime_pattern[++j]) {
4433  p = mime_pattern[j];
4434  for(k=2;k<i;k++) /* assume length(p) > i */
4435  if (p[k]!=q[k]) break;
4436  if (k==i && nkf_toupper(c1)==p[k]) break;
4437  }
4438  p = mime_pattern[j];
4439  if (p) continue; /* found next one, continue */
4440  /* all fails, output from recovery buffer */
4441  (*i_ungetc)(c1,f);
4442  for(j=0;j<i;j++) {
4443  (*oconv)(0,r[j]);
4444  }
4445  return c1;
4446  }
4447  }
4448  mime_decode_mode = p[i-2];
4449 
4453 
4454  if (mime_decode_mode=='B') {
4455  mimebuf_f = unbuf_f;
4456  if (!unbuf_f) {
4457  /* do MIME integrity check */
4458  return mime_integrity(f,mime_pattern[j]);
4459  }
4460  }
4461  switch_mime_getc();
4462  mimebuf_f = TRUE;
4463  return c1;
4464 }
4465 
4466 static nkf_char
4468 {
4469  nkf_char c1 = 0;
4470  int i,k;
4471 
4472  /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4473  /* re-read and convert again from mime_buffer. */
4474 
4475  /* =? has been checked */
4476  k = mime_input_state.last;
4478  for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4479  /* We accept any character type even if it is breaked by new lines */
4480  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4481  if (c1==LF||c1==SP||c1==CR||
4482  c1=='-'||c1=='_'||is_alnum(c1)) continue;
4483  if (c1=='=') {
4484  /* Failed. But this could be another MIME preemble */
4485  (*i_ungetc)(c1,f);
4486  mime_input_state.last--;
4487  break;
4488  }
4489  if (c1!='?') break;
4490  else {
4491  /* c1=='?' */
4492  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4493  if (!(++i<MAXRECOVER) || c1==EOF) break;
4494  if (c1=='b'||c1=='B') {
4495  mime_decode_mode = 'B';
4496  } else if (c1=='q'||c1=='Q') {
4497  mime_decode_mode = 'Q';
4498  } else {
4499  break;
4500  }
4501  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4502  if (!(++i<MAXRECOVER) || c1==EOF) break;
4503  if (c1!='?') {
4505  }
4506  break;
4507  }
4508  }
4509  switch_mime_getc();
4510  if (!mime_decode_mode) {
4511  /* false MIME premble, restart from mime_buffer */
4512  mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4513  /* Since we are in MIME mode until buffer becomes empty, */
4514  /* we never go into mime_begin again for a while. */
4515  return c1;
4516  }
4517  /* discard mime preemble, and goto MIME mode */
4518  mime_input_state.last = k;
4519  /* do no MIME integrity check */
4520  return c1; /* used only for checking EOF */
4521 }
4522 
4523 #ifdef CHECK_OPTION
4524 static void
4526 {
4527  ;
4528 }
4529 
4530 static void
4531 debug(const char *str)
4532 {
4533  if (debug_f){
4534  fprintf(stderr, "%s\n", str ? str : "NULL");
4535  }
4536 }
4537 #endif
4538 
4539 static void
4540 set_input_codename(const char *codename)
4541 {
4542  if (!input_codename) {
4543  input_codename = codename;
4544  } else if (strcmp(codename, input_codename) != 0) {
4545  input_codename = "";
4546  }
4547 }
4548 
4549 static const char*
4551 {
4552  if (input_codename && !*input_codename) {
4553  input_codename = "BINARY";
4554  } else {
4556  if (!input_codename) {
4557  input_codename = "ASCII";
4558  } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4559  if (p->score & (SCORE_DEPEND|SCORE_CP932))
4560  input_codename = "CP932";
4561  } else if (strcmp(input_codename, "EUC-JP") == 0) {
4562  if (p->score & SCORE_X0213)
4563  input_codename = "EUC-JIS-2004";
4564  else if (p->score & (SCORE_X0212))
4565  input_codename = "EUCJP-MS";
4566  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4567  input_codename = "CP51932";
4568  } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4569  if (p->score & (SCORE_KANA))
4570  input_codename = "CP50221";
4571  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4572  input_codename = "CP50220";
4573  }
4574  }
4575  return input_codename;
4576 }
4577 
4578 #if !defined(PERL_XS) && !defined(WIN32DLL)
4579 static void
4580 print_guessed_code(char *filename)
4581 {
4582  if (filename != NULL) printf("%s: ", filename);
4583  if (input_codename && !*input_codename) {
4584  printf("BINARY\n");
4585  } else {
4587  if (guess_f == 1) {
4588  printf("%s\n", input_codename);
4589  } else {
4590  printf("%s%s%s%s\n",
4592  iconv != w_iconv16 && iconv != w_iconv32 ? "" :
4593  input_endian == ENDIAN_LITTLE ? " LE" :
4594  input_endian == ENDIAN_BIG ? " BE" :
4595  "[BUG]",
4596  input_bom_f ? " (BOM)" : "",
4597  input_eol == CR ? " (CR)" :
4598  input_eol == LF ? " (LF)" :
4599  input_eol == CRLF ? " (CRLF)" :
4600  input_eol == EOF ? " (MIXED NL)" :
4601  "");
4602  }
4603  }
4604 }
4605 #endif /*WIN32DLL*/
4606 
4607 #ifdef INPUT_OPTION
4608 
4609 static nkf_char
4610 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4611 {
4612  nkf_char c1, c2, c3;
4613  c1 = (*g)(f);
4614  if (c1 != ch){
4615  return c1;
4616  }
4617  c2 = (*g)(f);
4618  if (!nkf_isxdigit(c2)){
4619  (*u)(c2, f);
4620  return c1;
4621  }
4622  c3 = (*g)(f);
4623  if (!nkf_isxdigit(c3)){
4624  (*u)(c2, f);
4625  (*u)(c3, f);
4626  return c1;
4627  }
4628  return (hex2bin(c2) << 4) | hex2bin(c3);
4629 }
4630 
4631 static nkf_char
4633 {
4634  return hex_getc(':', f, i_cgetc, i_cungetc);
4635 }
4636 
4637 static nkf_char
4639 {
4640  return (*i_cungetc)(c, f);
4641 }
4642 
4643 static nkf_char
4645 {
4646  return hex_getc('%', f, i_ugetc, i_uungetc);
4647 }
4648 
4649 static nkf_char
4651 {
4652  return (*i_uungetc)(c, f);
4653 }
4654 #endif
4655 
4656 #ifdef NUMCHAR_OPTION
4657 static nkf_char
4659 {
4660  nkf_char (*g)(FILE *) = i_ngetc;
4661  nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4662  int i = 0, j;
4663  nkf_char buf[12];
4664  nkf_char c = -1;
4665 
4666  buf[i] = (*g)(f);
4667  if (buf[i] == '&'){
4668  buf[++i] = (*g)(f);
4669  if (buf[i] == '#'){
4670  c = 0;
4671  buf[++i] = (*g)(f);
4672  if (buf[i] == 'x' || buf[i] == 'X'){
4673  for (j = 0; j < 7; j++){
4674  buf[++i] = (*g)(f);
4675  if (!nkf_isxdigit(buf[i])){
4676  if (buf[i] != ';'){
4677  c = -1;
4678  }
4679  break;
4680  }
4681  c <<= 4;
4682  c |= hex2bin(buf[i]);
4683  }
4684  }else{
4685  for (j = 0; j < 8; j++){
4686  if (j){
4687  buf[++i] = (*g)(f);
4688  }
4689  if (!nkf_isdigit(buf[i])){
4690  if (buf[i] != ';'){
4691  c = -1;
4692  }
4693  break;
4694  }
4695  c *= 10;
4696  c += hex2bin(buf[i]);
4697  }
4698  }
4699  }
4700  }
4701  if (c != -1){
4702  return nkf_char_unicode_new(c);
4703  }
4704  while (i > 0){
4705  (*u)(buf[i], f);
4706  --i;
4707  }
4708  return buf[0];
4709 }
4710 
4711 static nkf_char
4713 {
4714  return (*i_nungetc)(c, f);
4715 }
4716 #endif
4717 
4718 #ifdef UNICODE_NORMALIZATION
4719 
4720 static nkf_char
4722 {
4723  nkf_char (*g)(FILE *f) = i_nfc_getc;
4724  nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4725  nkf_buf_t *buf = nkf_state->nfc_buf;
4726  const unsigned char *array;
4727  int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4728  nkf_char c = (*g)(f);
4729 
4730  if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4731 
4732  nkf_buf_push(buf, c);
4733  do {
4734  while (lower <= upper) {
4735  int mid = (lower+upper) / 2;
4736  int len;
4737  array = normalization_table[mid].nfd;
4738  for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4739  if (len >= nkf_buf_length(buf)) {
4740  c = (*g)(f);
4741  if (c == EOF) {
4742  len = 0;
4743  lower = 1, upper = 0;
4744  break;
4745  }
4746  nkf_buf_push(buf, c);
4747  }
4748  if (array[len] != nkf_buf_at(buf, len)) {
4749  if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4750  else upper = mid - 1;
4751  len = 0;
4752  break;
4753  }
4754  }
4755  if (len > 0) {
4756  int i;
4757  array = normalization_table[mid].nfc;
4758  nkf_buf_clear(buf);
4759  for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4760  nkf_buf_push(buf, array[i]);
4761  break;
4762  }
4763  }
4764  } while (lower <= upper);
4765 
4766  while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4767  c = nkf_buf_pop(buf);
4768 
4769  return c;
4770 }
4771 
4772 static nkf_char
4774 {
4775  return (*i_nfc_ungetc)(c, f);
4776 }
4777 #endif /* UNICODE_NORMALIZATION */
4778 
4779 
4780 static nkf_char
4782 {
4783  int i;
4784  if (c > '@') {
4785  if (c < '[') {
4786  i = c - 'A'; /* A..Z 0-25 */
4787  } else if (c == '_') {
4788  i = '?' /* 63 */ ; /* _ 63 */
4789  } else {
4790  i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4791  }
4792  } else if (c > '/') {
4793  i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4794  } else if (c == '+' || c == '-') {
4795  i = '>' /* 62 */ ; /* + and - 62 */
4796  } else {
4797  i = '?' /* 63 */ ; /* / 63 */
4798  }
4799  return (i);
4800 }
4801 
4802 static nkf_char
4804 {
4805  nkf_char c1, c2, c3, c4, cc;
4806  nkf_char t1, t2, t3, t4, mode, exit_mode;
4807  nkf_char lwsp_count;
4808  char *lwsp_buf;
4809  char *lwsp_buf_new;
4810  nkf_char lwsp_size = 128;
4811 
4812  if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4813  return mime_input_buf(mime_input_state.top++);
4814  }
4818  return (*i_getc)(f);
4819  }
4820 
4821  if (mimebuf_f == FIXED_MIME)
4822  exit_mode = mime_decode_mode;
4823  else
4824  exit_mode = FALSE;
4825  if (mime_decode_mode == 'Q') {
4826  if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4827  restart_mime_q:
4828  if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4829  if (c1<=SP || DEL<=c1) {
4830  mime_decode_mode = exit_mode; /* prepare for quit */
4831  return c1;
4832  }
4833  if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4834  return c1;
4835  }
4836 
4837  mime_decode_mode = exit_mode; /* prepare for quit */
4838  if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4839  if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4840  /* end Q encoding */
4841  input_mode = exit_mode;
4842  lwsp_count = 0;
4843  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4844  while ((c1=(*i_getc)(f))!=EOF) {
4845  switch (c1) {
4846  case LF:
4847  case CR:
4848  if (c1==LF) {
4849  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4850  i_ungetc(SP,f);
4851  continue;
4852  } else {
4853  i_ungetc(c1,f);
4854  }
4855  c1 = LF;
4856  } else {
4857  if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4858  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4859  i_ungetc(SP,f);
4860  continue;
4861  } else {
4862  i_ungetc(c1,f);
4863  }
4864  i_ungetc(LF,f);
4865  } else {
4866  i_ungetc(c1,f);
4867  }
4868  c1 = CR;
4869  }
4870  break;
4871  case SP:
4872  case TAB:
4873  lwsp_buf[lwsp_count] = (unsigned char)c1;
4874  if (lwsp_count++>lwsp_size){
4875  lwsp_size <<= 1;
4876  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4877  lwsp_buf = lwsp_buf_new;
4878  }
4879  continue;
4880  }
4881  break;
4882  }
4883  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4884  i_ungetc(c1,f);
4885  for(lwsp_count--;lwsp_count>0;lwsp_count--)
4886  i_ungetc(lwsp_buf[lwsp_count],f);
4887  c1 = lwsp_buf[0];
4888  }
4889  nkf_xfree(lwsp_buf);
4890  return c1;
4891  }
4892  if (c1=='='&&c2<SP) { /* this is soft wrap */
4893  while((c1 = (*i_mgetc)(f)) <=SP) {
4894  if (c1 == EOF) return (EOF);
4895  }
4896  mime_decode_mode = 'Q'; /* still in MIME */
4897  goto restart_mime_q;
4898  }
4899  if (c1=='?') {
4900  mime_decode_mode = 'Q'; /* still in MIME */
4901  (*i_mungetc)(c2,f);
4902  return c1;
4903  }
4904  if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4905  if (c2<=SP) return c2;
4906  mime_decode_mode = 'Q'; /* still in MIME */
4907  return ((hex2bin(c2)<<4) + hex2bin(c3));
4908  }
4909 
4910  if (mime_decode_mode != 'B') {
4912  return (*i_mgetc)(f);
4913  }
4914 
4915 
4916  /* Base64 encoding */
4917  /*
4918  MIME allows line break in the middle of
4919  Base64, but we are very pessimistic in decoding
4920  in unbuf mode because MIME encoded code may broken by
4921  less or editor's control sequence (such as ESC-[-K in unbuffered
4922  mode. ignore incomplete MIME.
4923  */
4924  mode = mime_decode_mode;
4925  mime_decode_mode = exit_mode; /* prepare for quit */
4926 
4927  while ((c1 = (*i_mgetc)(f))<=SP) {
4928  if (c1==EOF)
4929  return (EOF);
4930  }
4931  mime_c2_retry:
4932  if ((c2 = (*i_mgetc)(f))<=SP) {
4933  if (c2==EOF)
4934  return (EOF);
4935  if (mime_f != STRICT_MIME) goto mime_c2_retry;
4937  return c2;
4938  }
4939  if ((c1 == '?') && (c2 == '=')) {
4940  input_mode = ASCII;
4941  lwsp_count = 0;
4942  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4943  while ((c1=(*i_getc)(f))!=EOF) {
4944  switch (c1) {
4945  case LF:
4946  case CR:
4947  if (c1==LF) {
4948  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4949  i_ungetc(SP,f);
4950  continue;
4951  } else {
4952  i_ungetc(c1,f);
4953  }
4954  c1 = LF;
4955  } else {
4956  if ((c1=(*i_getc)(f))!=EOF) {
4957  if (c1==SP) {
4958  i_ungetc(SP,f);
4959  continue;
4960  } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4961  i_ungetc(SP,f);
4962  continue;
4963  } else {
4964  i_ungetc(c1,f);
4965  }
4966  i_ungetc(LF,f);
4967  } else {
4968  i_ungetc(c1,f);
4969  }
4970  c1 = CR;
4971  }
4972  break;
4973  case SP:
4974  case TAB:
4975  lwsp_buf[lwsp_count] = (unsigned char)c1;
4976  if (lwsp_count++>lwsp_size){
4977  lwsp_size <<= 1;
4978  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4979  lwsp_buf = lwsp_buf_new;
4980  }
4981  continue;
4982  }
4983  break;
4984  }
4985  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4986  i_ungetc(c1,f);
4987  for(lwsp_count--;lwsp_count>0;lwsp_count--)
4988  i_ungetc(lwsp_buf[lwsp_count],f);
4989  c1 = lwsp_buf[0];
4990  }
4991  nkf_xfree(lwsp_buf);
4992  return c1;
4993  }
4994  mime_c3_retry:
4995  if ((c3 = (*i_mgetc)(f))<=SP) {
4996  if (c3==EOF)
4997  return (EOF);
4998  if (mime_f != STRICT_MIME) goto mime_c3_retry;
5000  return c3;
5001  }
5002  mime_c4_retry:
5003  if ((c4 = (*i_mgetc)(f))<=SP) {
5004  if (c4==EOF)
5005  return (EOF);
5006  if (mime_f != STRICT_MIME) goto mime_c4_retry;
5008  return c4;
5009  }
5010 
5011  mime_decode_mode = mode; /* still in MIME sigh... */
5012 
5013  /* BASE 64 decoding */
5014 
5015  t1 = 0x3f & base64decode(c1);
5016  t2 = 0x3f & base64decode(c2);
5017  t3 = 0x3f & base64decode(c3);
5018  t4 = 0x3f & base64decode(c4);
5019  cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5020  if (c2 != '=') {
5021  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5022  cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5023  if (c3 != '=') {
5024  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5025  cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5026  if (c4 != '=')
5027  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5028  }
5029  } else {
5030  return c1;
5031  }
5032  return mime_input_buf(mime_input_state.top++);
5033 }
5034 
5035 static const char basis_64[] =
5036  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5037 
5038 #define MIMEOUT_BUF_LENGTH 74
5039 static struct {
5040  unsigned char buf[MIMEOUT_BUF_LENGTH+1];
5041  int count;
5042 } mimeout_state;
5043 
5044 /*nkf_char mime_lastchar2, mime_lastchar1;*/
5045 
5046 static void
5048 {
5049  const unsigned char *p;
5050  int i;
5051  int j;
5052  p = mime_pattern[0];
5053  for(i=0;mime_pattern[i];i++) {
5054  if (mode == mime_encode[i]) {
5055  p = mime_pattern[i];
5056  break;
5057  }
5058  }
5059  mimeout_mode = mime_encode_method[i];
5060  i = 0;
5061  if (base64_count>45) {
5062  if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
5063  (*o_mputc)(mimeout_state.buf[i]);
5064  i++;
5065  }
5067  (*o_mputc)(SP);
5068  base64_count = 1;
5069  if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
5070  i++;
5071  }
5072  }
5073  for (;i<mimeout_state.count;i++) {
5074  if (nkf_isspace(mimeout_state.buf[i])) {
5075  (*o_mputc)(mimeout_state.buf[i]);
5076  base64_count ++;
5077  } else {
5078  break;
5079  }
5080  }
5081  while(*p) {
5082  (*o_mputc)(*p++);
5083  base64_count ++;
5084  }
5085  j = mimeout_state.count;
5086  mimeout_state.count = 0;
5087  for (;i<j;i++) {
5088  mime_putc(mimeout_state.buf[i]);
5089  }
5090 }
5091 
5092 static void
5094 {
5095  if (mimeout_mode > 0){
5096  if (c2 == EOF){
5097  if (base64_count + mimeout_state.count/3*4> 73){
5098  (*o_base64conv)(EOF,0);
5100  (*o_base64conv)(0,SP);
5101  base64_count = 1;
5102  }
5103  } else {
5104  if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
5105  (*o_base64conv)(EOF,0);
5107  (*o_base64conv)(0,SP);
5108  base64_count = 1;
5109  mimeout_mode = -1;
5110  }
5111  }
5112  } else if (c2) {
5113  if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
5114  mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
5116  (*o_base64conv)(EOF,0);
5118  (*o_base64conv)(0,SP);
5119  base64_count = 1;
5120  mimeout_mode = -1;
5121  }
5122  }
5123 }
5124 
5125 static void
5127 {
5128  (*o_mputc)('?');
5129  (*o_mputc)('=');
5130  base64_count += 2;
5131  mimeout_mode = 0;
5132 }
5133 
5134 static void
5136 {
5137  switch(mimeout_mode) {
5138  case 'Q':
5139  case 'B':
5140  break;
5141  case 2:
5142  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
5143  (*o_mputc)('=');
5144  (*o_mputc)('=');
5145  base64_count += 3;
5146  break;
5147  case 1:
5148  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
5149  (*o_mputc)('=');
5150  base64_count += 2;
5151  break;
5152  }
5153  if (mimeout_mode > 0) {
5154  if (mimeout_f!=FIXED_MIME) {
5155  close_mime();
5156  } else if (mimeout_mode != 'Q')
5157  mimeout_mode = 'B';
5158  }
5159 }
5160 
5161 static void
5163 {
5164  switch(mimeout_mode) {
5165  case 'Q':
5166  if (c==CR||c==LF) {
5167  (*o_mputc)(c);
5168  base64_count = 0;
5169  } else if(!nkf_isalnum(c)) {
5170  (*o_mputc)('=');
5171  (*o_mputc)(bin2hex(((c>>4)&0xf)));
5172  (*o_mputc)(bin2hex((c&0xf)));
5173  base64_count += 3;
5174  } else {
5175  (*o_mputc)(c);
5176  base64_count++;
5177  }
5178  break;
5179  case 'B':
5180  nkf_state->mimeout_state=c;
5181  (*o_mputc)(basis_64[c>>2]);
5182  mimeout_mode=2;
5183  base64_count ++;
5184  break;
5185  case 2:
5186  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5187  nkf_state->mimeout_state=c;
5188  mimeout_mode=1;
5189  base64_count ++;
5190  break;
5191  case 1:
5192  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5193  (*o_mputc)(basis_64[c & 0x3F]);
5194  mimeout_mode='B';
5195  base64_count += 2;
5196  break;
5197  default:
5198  (*o_mputc)(c);
5199  base64_count++;
5200  break;
5201  }
5202 }
5203 
5204 static void
5206 {
5207  int i, j;
5208  nkf_char lastchar;
5209 
5210  if (mimeout_f == FIXED_MIME){
5211  if (mimeout_mode == 'Q'){
5212  if (base64_count > 71){
5213  if (c!=CR && c!=LF) {
5214  (*o_mputc)('=');
5216  }
5217  base64_count = 0;
5218  }
5219  }else{
5220  if (base64_count > 71){
5221  eof_mime();
5223  base64_count = 0;
5224  }
5225  if (c == EOF) { /* c==EOF */
5226  eof_mime();
5227  }
5228  }
5229  if (c != EOF) { /* c==EOF */
5230  mimeout_addchar(c);
5231  }
5232  return;
5233  }
5234 
5235  /* mimeout_f != FIXED_MIME */
5236 
5237  if (c == EOF) { /* c==EOF */
5238  if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
5239  j = mimeout_state.count;
5240  mimeout_state.count = 0;
5241  i = 0;
5242  if (mimeout_mode > 0) {
5243  if (!nkf_isblank(mimeout_state.buf[j-1])) {
5244  for (;i<j;i++) {
5245  if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
5246  break;
5247  }
5249  }
5250  eof_mime();
5251  for (;i<j;i++) {
5253  }
5254  } else {
5255  for (;i<j;i++) {
5257  }
5258  eof_mime();
5259  }
5260  } else {
5261  for (;i<j;i++) {
5263  }
5264  }
5265  return;
5266  }
5267 
5268  if (mimeout_state.count > 0){
5269  lastchar = mimeout_state.buf[mimeout_state.count - 1];
5270  }else{
5271  lastchar = -1;
5272  }
5273 
5274  if (mimeout_mode=='Q') {
5275  if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
5276  if (c == CR || c == LF) {
5277  close_mime();
5278  (*o_mputc)(c);
5279  base64_count = 0;
5280  return;
5281  } else if (c <= SP) {
5282  close_mime();
5283  if (base64_count > 70) {
5285  base64_count = 0;
5286  }
5287  if (!nkf_isblank(c)) {
5288  (*o_mputc)(SP);
5289  base64_count++;
5290  }
5291  } else {
5292  if (base64_count > 70) {
5293  close_mime();
5295  (*o_mputc)(SP);
5296  base64_count = 1;
5298  }
5299  if (!nkf_noescape_mime(c)) {
5300  mimeout_addchar(c);
5301  return;
5302  }
5303  }
5304  if (c != 0x1B) {
5305  (*o_mputc)(c);
5306  base64_count++;
5307  return;
5308  }
5309  }
5310  }
5311 
5312  if (mimeout_mode <= 0) {
5313  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5314  output_mode == UTF_8)) {
5315  if (nkf_isspace(c)) {
5316  int flag = 0;
5317  if (mimeout_mode == -1) {
5318  flag = 1;
5319  }
5320  if (c==CR || c==LF) {
5321  if (flag) {
5323  output_mode = 0;
5324  } else {
5325  base64_count = 0;
5326  }
5327  }
5328  for (i=0;i<mimeout_state.count;i++) {
5329  (*o_mputc)(mimeout_state.buf[i]);
5330  if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
5331  base64_count = 0;
5332  }else{
5333  base64_count++;
5334  }
5335  }
5336  if (flag) {
5337  eof_mime();
5338  base64_count = 0;
5339  mimeout_mode = 0;
5340  }
5341  mimeout_state.buf[0] = (char)c;
5342  mimeout_state.count = 1;
5343  }else{
5344  if (base64_count > 1
5345  && base64_count + mimeout_state.count > 76
5346  && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
5347  static const char *str = "boundary=\"";
5348  static int len = 10;
5349  i = 0;
5350 
5351  for (; i < mimeout_state.count - len; ++i) {
5352  if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
5353  i += len - 2;
5354  break;
5355  }
5356  }
5357 
5358  if (i == 0 || i == mimeout_state.count - len) {
5360  base64_count = 0;
5361  if (!nkf_isspace(mimeout_state.buf[0])){
5362  (*o_mputc)(SP);
5363  base64_count++;
5364  }
5365  }
5366  else {
5367  int j;
5368  for (j = 0; j <= i; ++j) {
5369  (*o_mputc)(mimeout_state.buf[j]);
5370  }
5372  base64_count = 1;
5373  for (; j <= mimeout_state.count; ++j) {
5374  mimeout_state.buf[j - i] = mimeout_state.buf[j];
5375  }
5376  mimeout_state.count -= i;
5377  }
5378  }
5379  mimeout_state.buf[mimeout_state.count++] = (char)c;
5380  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5382  }
5383  }
5384  return;
5385  }else{
5386  if (lastchar==CR || lastchar == LF){
5387  for (i=0;i<mimeout_state.count;i++) {
5388  (*o_mputc)(mimeout_state.buf[i]);
5389  }
5390  base64_count = 0;
5391  mimeout_state.count = 0;
5392  }
5393  if (lastchar==SP) {
5394  for (i=0;i<mimeout_state.count-1;i++) {
5395  (*o_mputc)(mimeout_state.buf[i]);
5396  base64_count++;
5397  }
5398  mimeout_state.buf[0] = SP;
5399  mimeout_state.count = 1;
5400  }
5402  }
5403  }else{
5404  /* mimeout_mode == 'B', 1, 2 */
5405  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5406  output_mode == UTF_8)) {
5407  if (lastchar == CR || lastchar == LF){
5408  if (nkf_isblank(c)) {
5409  for (i=0;i<mimeout_state.count;i++) {
5411  }
5412  mimeout_state.count = 0;
5413  } else {
5414  eof_mime();
5415  for (i=0;i<mimeout_state.count;i++) {
5416  (*o_mputc)(mimeout_state.buf[i]);
5417  }
5418  base64_count = 0;
5419  mimeout_state.count = 0;
5420  }
5421  mimeout_state.buf[mimeout_state.count++] = (char)c;
5422  return;
5423  }
5424  if (nkf_isspace(c)) {
5425  for (i=0;i<mimeout_state.count;i++) {
5426  if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5427  eof_mime();
5428  for (i=0;i<mimeout_state.count;i++) {
5429  (*o_mputc)(mimeout_state.buf[i]);
5430  base64_count++;
5431  }
5432  mimeout_state.count = 0;
5433  }
5434  }
5435  mimeout_state.buf[mimeout_state.count++] = (char)c;
5436  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5437  eof_mime();
5438  for (i=0;i<mimeout_state.count;i++) {
5439  (*o_mputc)(mimeout_state.buf[i]);
5440  base64_count++;
5441  }
5442  mimeout_state.count = 0;
5443  }
5444  return;
5445  }
5446  if (mimeout_state.count>0 && SP<c && c!='=') {
5447  mimeout_state.buf[mimeout_state.count++] = (char)c;
5448  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5449  j = mimeout_state.count;
5450  mimeout_state.count = 0;
5451  for (i=0;i<j;i++) {
5453  }
5454  }
5455  return;
5456  }
5457  }
5458  }
5459  if (mimeout_state.count>0) {
5460  j = mimeout_state.count;
5461  mimeout_state.count = 0;
5462  for (i=0;i<j;i++) {
5463  if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5464  break;
5466  }
5467  if (i<j) {
5468  eof_mime();
5469  base64_count=0;
5470  for (;i<j;i++) {
5471  (*o_mputc)(mimeout_state.buf[i]);
5472  }
5474  }
5475  }
5476  mimeout_addchar(c);
5477 }
5478 
5479 static void
5481 {
5482  mime_prechar(c2, c1);
5483  (*o_base64conv)(c2,c1);
5484 }
5485 
5486 #ifdef HAVE_ICONV_H
5487 typedef struct nkf_iconv_t {
5488  iconv_t cd;
5489  char *input_buffer;
5490  size_t input_buffer_size;
5491  char *output_buffer;
5492  size_t output_buffer_size;
5493 }
5494 
5495 static nkf_iconv_t
5496 nkf_iconv_new(char *tocode, char *fromcode)
5497 {
5498  nkf_iconv_t converter;
5499 
5500  converter->input_buffer_size = IOBUF_SIZE;
5501  converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5502  converter->output_buffer_size = IOBUF_SIZE * 2;
5503  converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5504  converter->cd = iconv_open(tocode, fromcode);
5505  if (converter->cd == (iconv_t)-1)
5506  {
5507  switch (errno) {
5508  case EINVAL:
5509  perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5510  return -1;
5511  default:
5512  perror("can't iconv_open");
5513  }
5514  }
5515 }
5516 
5517 static size_t
5518 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5519 {
5520  size_t invalid = (size_t)0;
5521  char *input_buffer = converter->input_buffer;
5522  size_t input_length = (size_t)0;
5523  char *output_buffer = converter->output_buffer;
5524  size_t output_length = converter->output_buffer_size;
5525  int c;
5526 
5527  do {
5528  if (c != EOF) {
5529  while ((c = (*i_getc)(f)) != EOF) {
5530  input_buffer[input_length++] = c;
5531  if (input_length < converter->input_buffer_size) break;
5532  }
5533  }
5534 
5535  size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5536  while (output_length-- > 0) {
5537  (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5538  }
5539  if (ret == (size_t) - 1) {
5540  switch (errno) {
5541  case EINVAL:
5542  if (input_buffer != converter->input_buffer)
5543  memmove(converter->input_buffer, input_buffer, input_length);
5544  break;
5545  case E2BIG:
5546  converter->output_buffer_size *= 2;
5547  output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5548  if (output_buffer == NULL) {
5549  perror("can't realloc");
5550  return -1;
5551  }
5552  converter->output_buffer = output_buffer;
5553  break;
5554  default:
5555  perror("can't iconv");
5556  return -1;
5557  }
5558  } else {
5559  invalid += ret;
5560  }
5561  } while (1);
5562 
5563  return invalid;
5564 }
5565 
5566 
5567 static void
5568 nkf_iconv_close(nkf_iconv_t *convert)
5569 {
5570  nkf_xfree(converter->inbuf);
5571  nkf_xfree(converter->outbuf);
5572  iconv_close(converter->cd);
5573 }
5574 #endif
5575 
5576 
5577 static void
5578 reinit(void)
5579 {
5580  {
5581  struct input_code *p = input_code_list;
5582  while (p->name){
5583  status_reinit(p++);
5584  }
5585  }
5586  unbuf_f = FALSE;
5587  estab_f = FALSE;
5588  nop_f = FALSE;
5589  binmode_f = TRUE;
5590  rot_f = FALSE;
5591  hira_f = FALSE;
5592  alpha_f = FALSE;
5594  mime_decode_f = FALSE;
5595  mimebuf_f = FALSE;
5596  broken_f = FALSE;
5597  iso8859_f = FALSE;
5598  mimeout_f = FALSE;
5600  iso2022jp_f = FALSE;
5601 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5603 #endif
5604 #ifdef UTF8_INPUT_ENABLE
5605  no_cp932ext_f = FALSE;
5608  unicode_subchar = '?';
5610 #endif
5611 #ifdef UTF8_OUTPUT_ENABLE
5612  output_bom_f = FALSE;
5614 #endif
5615 #ifdef UNICODE_NORMALIZATION
5616  nfc_f = FALSE;
5617 #endif
5618 #ifdef INPUT_OPTION
5619  cap_f = FALSE;
5620  url_f = FALSE;
5621  numchar_f = FALSE;
5622 #endif
5623 #ifdef CHECK_OPTION
5624  noout_f = FALSE;
5625  debug_f = FALSE;
5626 #endif
5627  guess_f = 0;
5628 #ifdef EXEC_IO
5629  exec_f = 0;
5630 #endif
5631 #ifdef SHIFTJIS_CP932
5632  cp51932_f = TRUE;
5633  cp932inv_f = TRUE;
5634 #endif
5635 #ifdef X0212_ENABLE
5636  x0212_f = FALSE;
5637  x0213_f = FALSE;
5638 #endif
5639  {
5640  int i;
5641  for (i = 0; i < 256; i++){
5642  prefix_table[i] = 0;
5643  }
5644  }
5645  hold_count = 0;
5646  mimeout_state.count = 0;
5647  mimeout_mode = 0;
5648  base64_count = 0;
5649  f_line = 0;
5650  f_prev = 0;
5652  fold_f = FALSE;
5653  fold_len = 0;
5664  o_putc = std_putc;
5665  i_getc = std_getc;
5666  i_ungetc = std_ungetc;
5667  i_bgetc = std_getc;
5669  o_mputc = std_putc;
5670  i_mgetc = std_getc;
5674  output_mode = ASCII;
5675  input_mode = ASCII;
5677  file_out_f = FALSE;
5678  eolmode_f = 0;
5679  input_eol = 0;
5680  prev_cr = 0;
5681  option_mode = 0;
5682  z_prev2=0,z_prev1=0;
5683 #ifdef CHECK_OPTION
5684  iconv_for_check = 0;
5685 #endif
5686  input_codename = NULL;
5687  input_encoding = NULL;
5688  output_encoding = NULL;
5689  nkf_state_init();
5690 #ifdef WIN32DLL
5691  reinitdll();
5692 #endif /*WIN32DLL*/
5693 }
5694 
5695 static int
5697 {
5698  if (input_encoding) set_input_encoding(input_encoding);
5699  if (!output_encoding) {
5700  output_encoding = nkf_default_encoding();
5701  }
5702  if (!output_encoding) {
5703  if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5704  else return -1;
5705  }
5706  set_output_encoding(output_encoding);
5707  oconv = nkf_enc_to_oconv(output_encoding);
5708  o_putc = std_putc;
5709  if (nkf_enc_unicode_p(output_encoding))
5710  output_mode = UTF_8;
5711 
5712  if (x0201_f == NKF_UNSPECIFIED) {
5714  }
5715 
5716  /* replace continucation module, from output side */
5717 
5718  /* output redicrection */
5719 #ifdef CHECK_OPTION
5720  if (noout_f || guess_f){
5721  o_putc = no_putc;
5722  }
5723 #endif
5724  if (mimeout_f) {
5725  o_mputc = o_putc;
5726  o_putc = mime_putc;
5727  if (mimeout_f == TRUE) {
5729  }
5730  /* base64_count = 0; */
5731  }
5732 
5733  if (eolmode_f || guess_f) {
5735  }
5736  if (rot_f) {
5738  }
5739  if (iso2022jp_f) {
5741  }
5742  if (hira_f) {
5744  }
5745  if (fold_f) {
5747  f_line = 0;
5748  }
5749  if (alpha_f || x0201_f) {
5750  o_zconv = oconv; oconv = z_conv;
5751  }
5752 
5753  i_getc = std_getc;
5754  i_ungetc = std_ungetc;
5755  /* input redicrection */
5756 #ifdef INPUT_OPTION
5757  if (cap_f){
5760  }
5761  if (url_f){
5764  }
5765 #endif
5766 #ifdef NUMCHAR_OPTION
5767  if (numchar_f){
5770  }
5771 #endif
5772 #ifdef UNICODE_NORMALIZATION
5773  if (nfc_f){
5776  }
5777 #endif
5778  if (mime_f && mimebuf_f==FIXED_MIME) {
5781  }
5782  if (broken_f & 1) {
5785  }
5786  if (input_encoding) {
5787  set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5788  } else {
5790  }
5791 
5792  {
5793  struct input_code *p = input_code_list;
5794  while (p->name){
5795  status_reinit(p++);
5796  }
5797  }
5798  return 0;
5799 }
5800 
5801 /*
5802  Conversion main loop. Code detection only.
5803  */
5804 
5805 #if !defined(PERL_XS) && !defined(WIN32DLL)
5806 static nkf_char
5808 {
5809  nkf_char c;
5810 
5811  if (nop_f == 2)
5813  while ((c = (*i_getc)(f)) != EOF)
5814  (*o_putc)(c);
5815  (*o_putc)(EOF);
5816  return 1;
5817 }
5818 #endif
5819 
5820 #define NEXT continue /* no output, get next */
5821 #define SKIP c2=0;continue /* no output, get next */
5822 #define MORE c2=c1;continue /* need one more byte */
5823 #define SEND (void)0 /* output c1 and c2, get next */
5824 #define LAST break /* end of loop, go closing */
5825 #define set_input_mode(mode) do { \
5826  input_mode = mode; \
5827  shift_mode = 0; \
5828  set_input_codename("ISO-2022-JP"); \
5829  debug("ISO-2022-JP"); \
5830 } while (0)
5831 
5832 static int
5834 {
5835  nkf_char c1=0, c2=0, c3=0, c4=0;
5836  int shift_mode = 0; /* 0, 1, 2, 3 */
5837  int g2 = 0;
5838  int is_8bit = FALSE;
5839 
5840  if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5841  is_8bit = TRUE;
5842  }
5843 
5844  input_mode = ASCII;
5845  output_mode = ASCII;
5846 
5847  if (module_connection() < 0) {
5848 #if !defined(PERL_XS) && !defined(WIN32DLL)
5849  fprintf(stderr, "no output encoding given\n");
5850 #endif
5851  return -1;
5852  }
5853  check_bom(f);
5854 
5855 #ifdef UTF8_INPUT_ENABLE
5856  if(iconv == w_iconv32){
5857  while ((c1 = (*i_getc)(f)) != EOF &&
5858  (c2 = (*i_getc)(f)) != EOF &&
5859  (c3 = (*i_getc)(f)) != EOF &&
5860  (c4 = (*i_getc)(f)) != EOF) {
5861  nkf_char c5, c6, c7, c8;
5862  if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5863  if ((c5 = (*i_getc)(f)) != EOF &&
5864  (c6 = (*i_getc)(f)) != EOF &&
5865  (c7 = (*i_getc)(f)) != EOF &&
5866  (c8 = (*i_getc)(f)) != EOF) {
5867  if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
5868  (*i_ungetc)(c8, f);
5869  (*i_ungetc)(c7, f);
5870  (*i_ungetc)(c6, f);
5871  (*i_ungetc)(c5, f);
5872  nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5873  }
5874  } else {
5875  nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5876  }
5877  }
5878  }
5879  goto finished;
5880  }
5881  else if (iconv == w_iconv16) {
5882  while ((c1 = (*i_getc)(f)) != EOF &&
5883  (c2 = (*i_getc)(f)) != EOF) {
5884  size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
5885  if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5886  (c3 = (*i_getc)(f)) != EOF &&
5887  (c4 = (*i_getc)(f)) != EOF) {
5888  nkf_iconv_utf_16(c1, c2, c3, c4);
5889  } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5890  if ((c3 = (*i_getc)(f)) != EOF &&
5891  (c4 = (*i_getc)(f)) != EOF) {
5892  if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
5893  (*i_ungetc)(c4, f);
5894  (*i_ungetc)(c3, f);
5896  }
5897  } else {
5899  }
5900  }
5901  }
5902  goto finished;
5903  }
5904 #endif
5905 
5906  while ((c1 = (*i_getc)(f)) != EOF) {
5907 #ifdef INPUT_CODE_FIX
5908  if (!input_encoding)
5909 #endif
5910  code_status(c1);
5911  if (c2) {
5912  /* second byte */
5913  if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
5914  /* in case of 8th bit is on */
5915  if (!estab_f&&!mime_decode_mode) {
5916  /* in case of not established yet */
5917  /* It is still ambiguious */
5918  if (h_conv(f, c2, c1)==EOF) {
5919  LAST;
5920  }
5921  else {
5922  SKIP;
5923  }
5924  }
5925  else {
5926  /* in case of already established */
5927  if (c1 < 0x40) {
5928  /* ignore bogus code */
5929  SKIP;
5930  } else {
5931  SEND;
5932  }
5933  }
5934  }
5935  else {
5936  /* 2nd byte of 7 bit code or SJIS */
5937  SEND;
5938  }
5939  }
5940  else if (nkf_char_unicode_p(c1)) {
5941  (*oconv)(0, c1);
5942  NEXT;
5943  }
5944  else {
5945  /* first byte */
5946  if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5947  /* CP5022x */
5948  MORE;
5949  }else if (input_codename && input_codename[0] == 'I' &&
5950  0xA1 <= c1 && c1 <= 0xDF) {
5951  /* JIS X 0201 Katakana in 8bit JIS */
5952  c2 = JIS_X_0201_1976_K;
5953  c1 &= 0x7f;
5954  SEND;
5955  } else if (c1 > DEL) {
5956  /* 8 bit code */
5957  if (!estab_f && !iso8859_f) {
5958  /* not established yet */
5959  MORE;
5960  } else { /* estab_f==TRUE */
5961  if (iso8859_f) {
5962  c2 = ISO_8859_1;
5963  c1 &= 0x7f;
5964  SEND;
5965  }
5966  else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5967  (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5968  /* JIS X 0201 */
5969  c2 = JIS_X_0201_1976_K;
5970  c1 &= 0x7f;
5971  SEND;
5972  }
5973  else {
5974  /* already established */
5975  MORE;
5976  }
5977  }
5978  } else if (SP < c1 && c1 < DEL) {
5979  /* in case of Roman characters */
5980  if (shift_mode) {
5981  /* output 1 shifted byte */
5982  if (iso8859_f) {
5983  c2 = ISO_8859_1;
5984  SEND;
5985  } else if (nkf_byte_jisx0201_katakana_p(c1)){
5986  /* output 1 shifted byte */
5987  c2 = JIS_X_0201_1976_K;
5988  SEND;
5989  } else {
5990  /* look like bogus code */
5991  SKIP;
5992  }
5993  } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5995  /* in case of Kanji shifted */
5996  MORE;
5997  } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5998  /* Check MIME code */
5999  if ((c1 = (*i_getc)(f)) == EOF) {
6000  (*oconv)(0, '=');
6001  LAST;
6002  } else if (c1 == '?') {
6003  /* =? is mime conversion start sequence */
6004  if(mime_f == STRICT_MIME) {
6005  /* check in real detail */
6006  if (mime_begin_strict(f) == EOF)
6007  LAST;
6008  SKIP;
6009  } else if (mime_begin(f) == EOF)
6010  LAST;
6011  SKIP;
6012  } else {
6013  (*oconv)(0, '=');
6014  (*i_ungetc)(c1,f);
6015  SKIP;
6016  }
6017  } else {
6018  /* normal ASCII code */
6019  SEND;
6020  }
6021  } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
6022  shift_mode = 0;
6023  SKIP;
6024  } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
6025  shift_mode = 1;
6026  SKIP;
6027  } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
6028  if ((c1 = (*i_getc)(f)) == EOF) {
6029  (*oconv)(0, ESC);
6030  LAST;
6031  }
6032  else if (c1 == '&') {
6033  /* IRR */
6034  if ((c1 = (*i_getc)(f)) == EOF) {
6035  LAST;
6036  } else {
6037  SKIP;
6038  }
6039  }
6040  else if (c1 == '$') {
6041  /* GZDMx */
6042  if ((c1 = (*i_getc)(f)) == EOF) {
6043  /* don't send bogus code
6044  (*oconv)(0, ESC);
6045  (*oconv)(0, '$'); */
6046  LAST;
6047  } else if (c1 == '@' || c1 == 'B') {
6048  /* JIS X 0208 */
6050  SKIP;
6051  } else if (c1 == '(') {
6052  /* GZDM4 */
6053  if ((c1 = (*i_getc)(f)) == EOF) {
6054  /* don't send bogus code
6055  (*oconv)(0, ESC);
6056  (*oconv)(0, '$');
6057  (*oconv)(0, '(');
6058  */
6059  LAST;
6060  } else if (c1 == '@'|| c1 == 'B') {
6061  /* JIS X 0208 */
6063  SKIP;
6064 #ifdef X0212_ENABLE
6065  } else if (c1 == 'D'){
6067  SKIP;
6068 #endif /* X0212_ENABLE */
6069  } else if (c1 == 'O' || c1 == 'Q'){
6071  SKIP;
6072  } else if (c1 == 'P'){
6074  SKIP;
6075  } else {
6076  /* could be some special code */
6077  (*oconv)(0, ESC);
6078  (*oconv)(0, '$');
6079  (*oconv)(0, '(');
6080  (*oconv)(0, c1);
6081  SKIP;
6082  }
6083  } else if (broken_f&0x2) {
6084  /* accept any ESC-(-x as broken code ... */
6086  shift_mode = 0;
6087  SKIP;
6088  } else {
6089  (*oconv)(0, ESC);
6090  (*oconv)(0, '$');
6091  (*oconv)(0, c1);
6092  SKIP;
6093  }
6094  } else if (c1 == '(') {
6095  /* GZD4 */
6096  if ((c1 = (*i_getc)(f)) == EOF) {
6097  /* don't send bogus code
6098  (*oconv)(0, ESC);
6099  (*oconv)(0, '('); */
6100  LAST;
6101  }
6102  else if (c1 == 'I') {
6103  /* JIS X 0201 Katakana */
6105  shift_mode = 1;
6106  SKIP;
6107  }
6108  else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
6109  /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
6111  SKIP;
6112  }
6113  else if (broken_f&0x2) {
6115  SKIP;
6116  }
6117  else {
6118  (*oconv)(0, ESC);
6119  (*oconv)(0, '(');
6120  SEND;
6121  }
6122  }
6123  else if (c1 == '.') {
6124  /* G2D6 */
6125  if ((c1 = (*i_getc)(f)) == EOF) {
6126  LAST;
6127  }
6128  else if (c1 == 'A') {
6129  /* ISO-8859-1 */
6130  g2 = ISO_8859_1;
6131  SKIP;
6132  }
6133  else {
6134  (*oconv)(0, ESC);
6135  (*oconv)(0, '.');
6136  SEND;
6137  }
6138  }
6139  else if (c1 == 'N') {
6140  /* SS2 */
6141  c1 = (*i_getc)(f);
6142  if (g2 == ISO_8859_1) {
6143  c2 = ISO_8859_1;
6144  SEND;
6145  }else{
6146  (*i_ungetc)(c1, f);
6147  /* lonely ESC */
6148  (*oconv)(0, ESC);
6149  SEND;
6150  }
6151  }
6152  else {
6153  /* lonely ESC */
6154  (*oconv)(0, ESC);
6155  SEND;
6156  }
6157  } else if (c1 == ESC && iconv == s_iconv) {
6158  /* ESC in Shift_JIS */
6159  if ((c1 = (*i_getc)(f)) == EOF) {
6160  (*oconv)(0, ESC);
6161  LAST;
6162  } else if (c1 == '$') {
6163  /* J-PHONE emoji */
6164  if ((c1 = (*i_getc)(f)) == EOF) {
6165  LAST;
6166  } else if (('E' <= c1 && c1 <= 'G') ||
6167  ('O' <= c1 && c1 <= 'Q')) {
6168  /*
6169  NUM : 0 1 2 3 4 5
6170  BYTE: G E F O P Q
6171  C%7 : 1 6 0 2 3 4
6172  C%7 : 0 1 2 3 4 5 6
6173  NUM : 2 0 3 4 5 X 1
6174  */
6175  static const nkf_char jphone_emoji_first_table[7] =
6176  {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6177  c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
6178  if ((c1 = (*i_getc)(f)) == EOF) LAST;
6179  while (SP <= c1 && c1 <= 'z') {
6180  (*oconv)(0, c1 + c3);
6181  if ((c1 = (*i_getc)(f)) == EOF) LAST;
6182  }
6183  SKIP;
6184  }
6185  else {
6186  (*oconv)(0, ESC);
6187  (*oconv)(0, '$');
6188  SEND;
6189  }
6190  }
6191  else {
6192  /* lonely ESC */
6193  (*oconv)(0, ESC);
6194  SEND;
6195  }
6196  } else if (c1 == LF || c1 == CR) {
6197  if (broken_f&4) {
6199  SEND;
6200  } else if (mime_decode_f && !mime_decode_mode){
6201  if (c1 == LF) {
6202  if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
6203  i_ungetc(SP,f);
6204  continue;
6205  } else {
6206  i_ungetc(c1,f);
6207  }
6208  c1 = LF;
6209  SEND;
6210  } else { /* if (c1 == CR)*/
6211  if ((c1=(*i_getc)(f))!=EOF) {
6212  if (c1==SP) {
6213  i_ungetc(SP,f);
6214  continue;
6215  } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
6216  i_ungetc(SP,f);
6217  continue;
6218  } else {
6219  i_ungetc(c1,f);
6220  }
6221  i_ungetc(LF,f);
6222  } else {
6223  i_ungetc(c1,f);
6224  }
6225  c1 = CR;
6226  SEND;
6227  }
6228  }
6229  } else
6230  SEND;
6231  }
6232  /* send: */
6233  switch(input_mode){
6234  case ASCII:
6235  switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
6236  case -2:
6237  /* 4 bytes UTF-8 */
6238  if ((c3 = (*i_getc)(f)) != EOF) {
6239  code_status(c3);
6240  c3 <<= 8;
6241  if ((c4 = (*i_getc)(f)) != EOF) {
6242  code_status(c4);
6243  (*iconv)(c2, c1, c3|c4);
6244  }
6245  }
6246  break;
6247  case -3:
6248  /* 4 bytes UTF-8 (check combining character) */
6249  if ((c3 = (*i_getc)(f)) != EOF) {
6250  if ((c4 = (*i_getc)(f)) != EOF) {
6251  if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
6252  (*i_ungetc)(c4, f);
6253  (*i_ungetc)(c3, f);
6254  w_iconv_nocombine(c2, c1, 0);
6255  }
6256  } else {
6257  (*i_ungetc)(c3, f);
6258  w_iconv_nocombine(c2, c1, 0);
6259  }
6260  } else {
6261  w_iconv_nocombine(c2, c1, 0);
6262  }
6263  break;
6264  case -1:
6265  /* 3 bytes EUC or UTF-8 */
6266  if ((c3 = (*i_getc)(f)) != EOF) {
6267  code_status(c3);
6268  if ((*iconv)(c2, c1, c3) == -3) {
6269  /* 6 bytes UTF-8 (check combining character) */
6270  nkf_char c5, c6;
6271  if ((c4 = (*i_getc)(f)) != EOF) {
6272  if ((c5 = (*i_getc)(f)) != EOF) {
6273  if ((c6 = (*i_getc)(f)) != EOF) {
6274  if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
6275  (*i_ungetc)(c6, f);
6276  (*i_ungetc)(c5, f);
6277  (*i_ungetc)(c4, f);
6278  w_iconv_nocombine(c2, c1, c3);
6279  }
6280  } else {
6281  (*i_ungetc)(c5, f);
6282  (*i_ungetc)(c4, f);
6283  w_iconv_nocombine(c2, c1, c3);
6284  }
6285  } else {
6286  (*i_ungetc)(c4, f);
6287  w_iconv_nocombine(c2, c1, c3);
6288  }
6289  } else {
6290  w_iconv_nocombine(c2, c1, c3);
6291  }
6292  }
6293  }
6294  break;
6295  }
6296  break;
6297  case JIS_X_0208:
6298  case JIS_X_0213_1:
6299  if (ms_ucs_map_f &&
6300  0x7F <= c2 && c2 <= 0x92 &&
6301  0x21 <= c1 && c1 <= 0x7E) {
6302  /* CP932 UDC */
6303  c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
6304  c2 = 0;
6305  }
6306  (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
6307  break;
6308 #ifdef X0212_ENABLE
6309  case JIS_X_0212:
6310  (*oconv)(PREFIX_EUCG3 | c2, c1);
6311  break;
6312 #endif /* X0212_ENABLE */
6313  case JIS_X_0213_2:
6314  (*oconv)(PREFIX_EUCG3 | c2, c1);
6315  break;
6316  default:
6317  (*oconv)(input_mode, c1); /* other special case */
6318  }
6319 
6320  c2 = 0;
6321  c3 = 0;
6322  continue;
6323  /* goto next_word */
6324  }
6325 
6326 finished:
6327  /* epilogue */
6328  (*iconv)(EOF, 0, 0);
6329  if (!input_codename)
6330  {
6331  if (is_8bit) {
6332  struct input_code *p = input_code_list;
6333  struct input_code *result = p;
6334  while (p->name){
6335  if (p->score < result->score) result = p;
6336  ++p;
6337  }
6338  set_input_codename(result->name);
6339 #ifdef CHECK_OPTION
6340  debug(result->name);
6341 #endif
6342  }
6343  }
6344  return 0;
6345 }
6346 
6347 /*
6348  * int options(unsigned char *cp)
6349  *
6350  * return values:
6351  * 0: success
6352  * -1: ArgumentError
6353  */
6354 static int
6355 options(unsigned char *cp)
6356 {
6357  nkf_char i, j;
6358  unsigned char *p;
6359  unsigned char *cp_back = NULL;
6360  nkf_encoding *enc;
6361 
6362  if (option_mode==1)
6363  return 0;
6364  while(*cp && *cp++!='-');
6365  while (*cp || cp_back) {
6366  if(!*cp){
6367  cp = cp_back;
6368  cp_back = NULL;
6369  continue;
6370  }
6371  p = 0;
6372  switch (*cp++) {
6373  case '-': /* literal options */
6374  if (!*cp || *cp == SP) { /* ignore the rest of arguments */
6375  option_mode = 1;
6376  return 0;
6377  }
6378  for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
6379  p = (unsigned char *)long_option[i].name;
6380  for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
6381  if (*p == cp[j] || cp[j] == SP){
6382  p = &cp[j] + 1;
6383  break;
6384  }
6385  p = 0;
6386  }
6387  if (p == 0) {
6388 #if !defined(PERL_XS) && !defined(WIN32DLL)
6389  fprintf(stderr, "unknown long option: --%s\n", cp);
6390 #endif
6391  return -1;
6392  }
6393  while(*cp && *cp != SP && cp++);
6394  if (long_option[i].alias[0]){
6395  cp_back = cp;
6396  cp = (unsigned char *)long_option[i].alias;
6397  }else{
6398 #ifndef PERL_XS
6399  if (strcmp(long_option[i].name, "help") == 0){
6400  usage();
6401  exit(EXIT_SUCCESS);
6402  }
6403 #endif
6404  if (strcmp(long_option[i].name, "ic=") == 0){
6405  enc = nkf_enc_find((char *)p);
6406  if (!enc) continue;
6407  input_encoding = enc;
6408  continue;
6409  }
6410  if (strcmp(long_option[i].name, "oc=") == 0){
6411  enc = nkf_enc_find((char *)p);
6412  /* if (enc <= 0) continue; */
6413  if (!enc) continue;
6414  output_encoding = enc;
6415  continue;
6416  }
6417  if (strcmp(long_option[i].name, "guess=") == 0){
6418  if (p[0] == '0' || p[0] == '1') {
6419  guess_f = 1;
6420  } else {
6421  guess_f = 2;
6422  }
6423  continue;
6424  }
6425 #ifdef OVERWRITE
6426  if (strcmp(long_option[i].name, "overwrite") == 0){
6427  file_out_f = TRUE;
6428  overwrite_f = TRUE;
6430  continue;
6431  }
6432  if (strcmp(long_option[i].name, "overwrite=") == 0){
6433  file_out_f = TRUE;
6434  overwrite_f = TRUE;
6436  backup_f = TRUE;
6437  backup_suffix = (char *)p;
6438  continue;
6439  }
6440  if (strcmp(long_option[i].name, "in-place") == 0){
6441  file_out_f = TRUE;
6442  overwrite_f = TRUE;
6444  continue;
6445  }
6446  if (strcmp(long_option[i].name, "in-place=") == 0){
6447  file_out_f = TRUE;
6448  overwrite_f = TRUE;
6450  backup_f = TRUE;
6451  backup_suffix = (char *)p;
6452  continue;
6453  }
6454 #endif
6455 #ifdef INPUT_OPTION
6456  if (strcmp(long_option[i].name, "cap-input") == 0){
6457  cap_f = TRUE;
6458  continue;
6459  }
6460  if (strcmp(long_option[i].name, "url-input") == 0){
6461  url_f = TRUE;
6462  continue;
6463  }
6464 #endif
6465 #ifdef NUMCHAR_OPTION
6466  if (strcmp(long_option[i].name, "numchar-input") == 0){
6467  numchar_f = TRUE;
6468  continue;
6469  }
6470 #endif
6471 #ifdef CHECK_OPTION
6472  if (strcmp(long_option[i].name, "no-output") == 0){
6473  noout_f = TRUE;
6474  continue;
6475  }
6476  if (strcmp(long_option[i].name, "debug") == 0){
6477  debug_f = TRUE;
6478  continue;
6479  }
6480 #endif
6481  if (strcmp(long_option[i].name, "cp932") == 0){
6482 #ifdef SHIFTJIS_CP932
6483  cp51932_f = TRUE;
6484  cp932inv_f = -TRUE;
6485 #endif
6486 #ifdef UTF8_OUTPUT_ENABLE
6488 #endif
6489  continue;
6490  }
6491  if (strcmp(long_option[i].name, "no-cp932") == 0){
6492 #ifdef SHIFTJIS_CP932
6493  cp51932_f = FALSE;
6494  cp932inv_f = FALSE;
6495 #endif
6496 #ifdef UTF8_OUTPUT_ENABLE
6498 #endif
6499  continue;
6500  }
6501 #ifdef SHIFTJIS_CP932
6502  if (strcmp(long_option[i].name, "cp932inv") == 0){
6503  cp932inv_f = -TRUE;
6504  continue;
6505  }
6506 #endif
6507 
6508 #ifdef X0212_ENABLE
6509  if (strcmp(long_option[i].name, "x0212") == 0){
6510  x0212_f = TRUE;
6511  continue;
6512  }
6513 #endif
6514 
6515 #ifdef EXEC_IO
6516  if (strcmp(long_option[i].name, "exec-in") == 0){
6517  exec_f = 1;
6518  return 0;
6519  }
6520  if (strcmp(long_option[i].name, "exec-out") == 0){
6521  exec_f = -1;
6522  return 0;
6523  }
6524 #endif
6525 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6526  if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6527  no_cp932ext_f = TRUE;
6528  continue;
6529  }
6530  if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6532  continue;
6533  }
6534  if (strcmp(long_option[i].name, "fb-skip") == 0){
6536  continue;
6537  }
6538  if (strcmp(long_option[i].name, "fb-html") == 0){
6540  continue;
6541  }
6542  if (strcmp(long_option[i].name, "fb-xml") == 0){
6544  continue;
6545  }
6546  if (strcmp(long_option[i].name, "fb-java") == 0){
6548  continue;
6549  }
6550  if (strcmp(long_option[i].name, "fb-perl") == 0){
6552  continue;
6553  }
6554  if (strcmp(long_option[i].name, "fb-subchar") == 0){
6556  continue;
6557  }
6558  if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6560  unicode_subchar = 0;
6561  if (p[0] != '0'){
6562  /* decimal number */
6563  for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6564  unicode_subchar *= 10;
6565  unicode_subchar += hex2bin(p[i]);
6566  }
6567  }else if(p[1] == 'x' || p[1] == 'X'){
6568  /* hexadecimal number */
6569  for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6570  unicode_subchar <<= 4;
6571  unicode_subchar |= hex2bin(p[i]);
6572  }
6573  }else{
6574  /* octal number */
6575  for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6576  unicode_subchar *= 8;
6577  unicode_subchar += hex2bin(p[i]);
6578  }
6579  }
6580  w16e_conv(unicode_subchar, &i, &j);
6581  unicode_subchar = i<<8 | j;
6582  continue;
6583  }
6584 #endif
6585 #ifdef UTF8_OUTPUT_ENABLE
6586  if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6588  continue;
6589  }
6590 #endif
6591 #ifdef UNICODE_NORMALIZATION
6592  if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6593  nfc_f = TRUE;
6594  continue;
6595  }
6596 #endif
6597  if (strcmp(long_option[i].name, "prefix=") == 0){
6598  if (nkf_isgraph(p[0])){
6599  for (i = 1; nkf_isgraph(p[i]); i++){
6600  prefix_table[p[i]] = p[0];
6601  }
6602  }
6603  continue;
6604  }
6605 #if !defined(PERL_XS) && !defined(WIN32DLL)
6606  fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6607 #endif
6608  return -1;
6609  }
6610  continue;
6611  case 'b': /* buffered mode */
6612  unbuf_f = FALSE;
6613  continue;
6614  case 'u': /* non bufferd mode */
6615  unbuf_f = TRUE;
6616  continue;
6617  case 't': /* transparent mode */
6618  if (*cp=='1') {
6619  /* alias of -t */
6620  cp++;
6621  nop_f = TRUE;
6622  } else if (*cp=='2') {
6623  /*
6624  * -t with put/get
6625  *
6626  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6627  *
6628  */
6629  cp++;
6630  nop_f = 2;
6631  } else
6632  nop_f = TRUE;
6633  continue;
6634  case 'j': /* JIS output */
6635  case 'n':
6636  output_encoding = nkf_enc_from_index(ISO_2022_JP);
6637  continue;
6638  case 'e': /* AT&T EUC output */
6639  output_encoding = nkf_enc_from_index(EUCJP_NKF);
6640  continue;
6641  case 's': /* SJIS output */
6642  output_encoding = nkf_enc_from_index(SHIFT_JIS);
6643  continue;
6644  case 'l': /* ISO8859 Latin-1 support, no conversion */
6645  iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6646  input_encoding = nkf_enc_from_index(ISO_8859_1);
6647  continue;
6648  case 'i': /* Kanji IN ESC-$-@/B */
6649  if (*cp=='@'||*cp=='B')
6650  kanji_intro = *cp++;
6651  continue;
6652  case 'o': /* ASCII IN ESC-(-J/B/H */
6653  /* ESC ( H was used in initial JUNET messages */
6654  if (*cp=='J'||*cp=='B'||*cp=='H')
6655  ascii_intro = *cp++;
6656  continue;
6657  case 'h':
6658  /*
6659  bit:1 katakana->hiragana
6660  bit:2 hiragana->katakana
6661  */
6662  if ('9'>= *cp && *cp>='0')
6663  hira_f |= (*cp++ -'0');
6664  else
6665  hira_f |= 1;
6666  continue;
6667  case 'r':
6668  rot_f = TRUE;
6669  continue;
6670 #if defined(MSDOS) || defined(__OS2__)
6671  case 'T':
6672  binmode_f = FALSE;
6673  continue;
6674 #endif
6675 #ifndef PERL_XS
6676  case 'V':
6678  exit(EXIT_SUCCESS);
6679  break;
6680  case 'v':
6681  version();
6682  exit(EXIT_SUCCESS);
6683  break;
6684 #endif
6685 #ifdef UTF8_OUTPUT_ENABLE
6686  case 'w': /* UTF-{8,16,32} output */
6687  if (cp[0] == '8') {
6688  cp++;
6689  if (cp[0] == '0'){
6690  cp++;
6691  output_encoding = nkf_enc_from_index(UTF_8N);
6692  } else {
6693  output_bom_f = TRUE;
6694  output_encoding = nkf_enc_from_index(UTF_8_BOM);
6695  }
6696  } else {
6697  int enc_idx;
6698  if ('1'== cp[0] && '6'==cp[1]) {
6699  cp += 2;
6700  enc_idx = UTF_16;
6701  } else if ('3'== cp[0] && '2'==cp[1]) {
6702  cp += 2;
6703  enc_idx = UTF_32;
6704  } else {
6705  output_encoding = nkf_enc_from_index(UTF_8);
6706  continue;
6707  }
6708  if (cp[0]=='L') {
6709  cp++;
6711  output_bom_f = TRUE;
6712  } else if (cp[0] == 'B') {
6713  cp++;
6714  output_bom_f = TRUE;
6715  }
6716  if (cp[0] == '0'){
6717  output_bom_f = FALSE;
6718  cp++;
6719  enc_idx = enc_idx == UTF_16
6721  : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6722  } else {
6723  enc_idx = enc_idx == UTF_16
6725  : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6726  }
6727  output_encoding = nkf_enc_from_index(enc_idx);
6728  }
6729  continue;
6730 #endif
6731 #ifdef UTF8_INPUT_ENABLE
6732  case 'W': /* UTF input */
6733  if (cp[0] == '8') {
6734  cp++;
6735  input_encoding = nkf_enc_from_index(UTF_8);
6736  }else{
6737  int enc_idx;
6738  if ('1'== cp[0] && '6'==cp[1]) {
6739  cp += 2;
6741  enc_idx = UTF_16;
6742  } else if ('3'== cp[0] && '2'==cp[1]) {
6743  cp += 2;
6745  enc_idx = UTF_32;
6746  } else {
6747  input_encoding = nkf_enc_from_index(UTF_8);
6748  continue;
6749  }
6750  if (cp[0]=='L') {
6751  cp++;
6753  } else if (cp[0] == 'B') {
6754  cp++;
6756  }
6757  enc_idx = (enc_idx == UTF_16
6759  : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6760  input_encoding = nkf_enc_from_index(enc_idx);
6761  }
6762  continue;
6763 #endif
6764  /* Input code assumption */
6765  case 'J': /* ISO-2022-JP input */
6766  input_encoding = nkf_enc_from_index(ISO_2022_JP);
6767  continue;
6768  case 'E': /* EUC-JP input */
6769  input_encoding = nkf_enc_from_index(EUCJP_NKF);
6770  continue;
6771  case 'S': /* Shift_JIS input */
6772  input_encoding = nkf_enc_from_index(SHIFT_JIS);
6773  continue;
6774  case 'Z': /* Convert X0208 alphabet to asii */
6775  /* alpha_f
6776  bit:0 Convert JIS X 0208 Alphabet to ASCII
6777  bit:1 Convert Kankaku to one space
6778  bit:2 Convert Kankaku to two spaces
6779  bit:3 Convert HTML Entity
6780  bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6781  */
6782  while ('0'<= *cp && *cp <='4') {
6783  alpha_f |= 1 << (*cp++ - '0');
6784  }
6785  alpha_f |= 1;
6786  continue;
6787  case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6788  x0201_f = FALSE; /* No X0201->X0208 conversion */
6789  /* accept X0201
6790  ESC-(-I in JIS, EUC, MS Kanji
6791  SI/SO in JIS, EUC, MS Kanji
6792  SS2 in EUC, JIS, not in MS Kanji
6793  MS Kanji (0xa0-0xdf)
6794  output X0201
6795  ESC-(-I in JIS (0x20-0x5f)
6796  SS2 in EUC (0xa0-0xdf)
6797  0xa0-0xd in MS Kanji (0xa0-0xdf)
6798  */
6799  continue;
6800  case 'X': /* Convert X0201 kana to X0208 */
6801  x0201_f = TRUE;
6802  continue;
6803  case 'F': /* prserve new lines */
6805  case 'f': /* folding -f60 or -f */
6806  fold_f = TRUE;
6807  fold_len = 0;
6808  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6809  fold_len *= 10;
6810  fold_len += *cp++ - '0';
6811  }
6812  if (!(0<fold_len && fold_len<BUFSIZ))
6814  if (*cp=='-') {
6815  fold_margin = 0;
6816  cp++;
6817  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6818  fold_margin *= 10;
6819  fold_margin += *cp++ - '0';
6820  }
6821  }
6822  continue;
6823  case 'm': /* MIME support */
6824  /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6825  if (*cp=='B'||*cp=='Q') {
6826  mime_decode_mode = *cp++;
6828  } else if (*cp=='N') {
6829  mime_f = TRUE; cp++;
6830  } else if (*cp=='S') {
6831  mime_f = STRICT_MIME; cp++;
6832  } else if (*cp=='0') {
6833  mime_decode_f = FALSE;
6834  mime_f = FALSE; cp++;
6835  } else {
6836  mime_f = STRICT_MIME;
6837  }
6838  continue;
6839  case 'M': /* MIME output */
6840  if (*cp=='B') {
6841  mimeout_mode = 'B';
6842  mimeout_f = FIXED_MIME; cp++;
6843  } else if (*cp=='Q') {
6844  mimeout_mode = 'Q';
6845  mimeout_f = FIXED_MIME; cp++;
6846  } else {
6847  mimeout_f = TRUE;
6848  }
6849  continue;
6850  case 'B': /* Broken JIS support */
6851  /* bit:0 no ESC JIS
6852  bit:1 allow any x on ESC-(-x or ESC-$-x
6853  bit:2 reset to ascii on NL
6854  */
6855  if ('9'>= *cp && *cp>='0')
6856  broken_f |= 1<<(*cp++ -'0');
6857  else
6858  broken_f |= TRUE;
6859  continue;
6860 #ifndef PERL_XS
6861  case 'O':/* for Output file */
6862  file_out_f = TRUE;
6863  continue;
6864 #endif
6865  case 'c':/* add cr code */
6866  eolmode_f = CRLF;
6867  continue;
6868  case 'd':/* delete cr code */
6869  eolmode_f = LF;
6870  continue;
6871  case 'I': /* ISO-2022-JP output */
6872  iso2022jp_f = TRUE;
6873  continue;
6874  case 'L': /* line mode */
6875  if (*cp=='u') { /* unix */
6876  eolmode_f = LF; cp++;
6877  } else if (*cp=='m') { /* mac */
6878  eolmode_f = CR; cp++;
6879  } else if (*cp=='w') { /* windows */
6880  eolmode_f = CRLF; cp++;
6881  } else if (*cp=='0') { /* no conversion */
6882  eolmode_f = 0; cp++;
6883  }
6884  continue;
6885 #ifndef PERL_XS
6886  case 'g':
6887  if ('2' <= *cp && *cp <= '9') {
6888  guess_f = 2;
6889  cp++;
6890  } else if (*cp == '0' || *cp == '1') {
6891  guess_f = 1;
6892  cp++;
6893  } else {
6894  guess_f = 1;
6895  }
6896  continue;
6897 #endif
6898  case SP:
6899  /* module muliple options in a string are allowed for Perl moudle */
6900  while(*cp && *cp++!='-');
6901  continue;
6902  default:
6903 #if !defined(PERL_XS) && !defined(WIN32DLL)
6904  fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6905 #endif
6906  /* bogus option but ignored */
6907  return -1;
6908  }
6909  }
6910  return 0;
6911 }
6912 
6913 #ifdef WIN32DLL
6914 #include "nkf32dll.c"
6915 #elif defined(PERL_XS)
6916 #else /* WIN32DLL */
6917 int
6918 main(int argc, char **argv)
6919 {
6920  FILE *fin;
6921  unsigned char *cp;
6922 
6923  char *outfname = NULL;
6924  char *origfname;
6925 
6926 #ifdef EASYWIN /*Easy Win */
6927  _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6928 #endif
6929 #ifdef DEFAULT_CODE_LOCALE
6930  setlocale(LC_CTYPE, "");
6931 #endif
6932  nkf_state_init();
6933 
6934  for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6935  cp = (unsigned char *)*argv;
6936  options(cp);
6937 #ifdef EXEC_IO
6938  if (exec_f){
6939  int fds[2], pid;
6940  if (pipe(fds) < 0 || (pid = fork()) < 0){
6941  abort();
6942  }
6943  if (pid == 0){
6944  if (exec_f > 0){
6945  close(fds[0]);
6946  dup2(fds[1], 1);
6947  }else{
6948  close(fds[1]);
6949  dup2(fds[0], 0);
6950  }
6951  execvp(argv[1], &argv[1]);
6952  }
6953  if (exec_f > 0){
6954  close(fds[1]);
6955  dup2(fds[0], 0);
6956  }else{
6957  close(fds[0]);
6958  dup2(fds[1], 1);
6959  }
6960  argc = 0;
6961  break;
6962  }
6963 #endif
6964  }
6965 
6966  if (guess_f) {
6967 #ifdef CHECK_OPTION
6968  int debug_f_back = debug_f;
6969 #endif
6970 #ifdef EXEC_IO
6971  int exec_f_back = exec_f;
6972 #endif
6973 #ifdef X0212_ENABLE
6974  int x0212_f_back = x0212_f;
6975 #endif
6976  int x0213_f_back = x0213_f;
6977  int guess_f_back = guess_f;
6978  reinit();
6979  guess_f = guess_f_back;
6980  mime_f = FALSE;
6981 #ifdef CHECK_OPTION
6982  debug_f = debug_f_back;
6983 #endif
6984 #ifdef EXEC_IO
6985  exec_f = exec_f_back;
6986 #endif
6987  x0212_f = x0212_f_back;
6988  x0213_f = x0213_f_back;
6989  }
6990 
6991  if (binmode_f == TRUE)
6992 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6993  if (freopen("","wb",stdout) == NULL)
6994  return (-1);
6995 #else
6996  setbinmode(stdout);
6997 #endif
6998 
6999  if (unbuf_f)
7000  setbuf(stdout, (char *) NULL);
7001  else
7002  setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
7003 
7004  if (argc == 0) {
7005  if (binmode_f == TRUE)
7006 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7007  if (freopen("","rb",stdin) == NULL) return (-1);
7008 #else
7009  setbinmode(stdin);
7010 #endif
7011  setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
7012  if (nop_f)
7013  noconvert(stdin);
7014  else {
7015  kanji_convert(stdin);
7017  }
7018  } else {
7019  int nfiles = argc;
7020  int is_argument_error = FALSE;
7021  while (argc--) {
7022  input_codename = NULL;
7023  input_eol = 0;
7024 #ifdef CHECK_OPTION
7025  iconv_for_check = 0;
7026 #endif
7027  if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
7028  perror(*(argv-1));
7029  is_argument_error = TRUE;
7030  continue;
7031  } else {
7032 #ifdef OVERWRITE
7033  int fd = 0;
7034  int fd_backup = 0;
7035 #endif
7036 
7037  /* reopen file for stdout */
7038  if (file_out_f == TRUE) {
7039 #ifdef OVERWRITE
7040  if (overwrite_f){
7041  outfname = nkf_xmalloc(strlen(origfname)
7042  + strlen(".nkftmpXXXXXX")
7043  + 1);
7044  strcpy(outfname, origfname);
7045 #ifdef MSDOS
7046  {
7047  int i;
7048  for (i = strlen(outfname); i; --i){
7049  if (outfname[i - 1] == '/'
7050  || outfname[i - 1] == '\\'){
7051  break;
7052  }
7053  }
7054  outfname[i] = '\0';
7055  }
7056  strcat(outfname, "ntXXXXXX");
7057  mktemp(outfname);
7058  fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7059  S_IREAD | S_IWRITE);
7060 #else
7061  strcat(outfname, ".nkftmpXXXXXX");
7062  fd = mkstemp(outfname);
7063 #endif
7064  if (fd < 0
7065  || (fd_backup = dup(fileno(stdout))) < 0
7066  || dup2(fd, fileno(stdout)) < 0
7067  ){
7068  perror(origfname);
7069  return -1;
7070  }
7071  }else
7072 #endif
7073  if(argc == 1) {
7074  outfname = *argv++;
7075  argc--;
7076  } else {
7077  outfname = "nkf.out";
7078  }
7079 
7080  if(freopen(outfname, "w", stdout) == NULL) {
7081  perror (outfname);
7082  return (-1);
7083  }
7084  if (binmode_f == TRUE) {
7085 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7086  if (freopen("","wb",stdout) == NULL)
7087  return (-1);
7088 #else
7089  setbinmode(stdout);
7090 #endif
7091  }
7092  }
7093  if (binmode_f == TRUE)
7094 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7095  if (freopen("","rb",fin) == NULL)
7096  return (-1);
7097 #else
7098  setbinmode(fin);
7099 #endif
7100  setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
7101  if (nop_f)
7102  noconvert(fin);
7103  else {
7104  char *filename = NULL;
7105  kanji_convert(fin);
7106  if (nfiles > 1) filename = origfname;
7107  if (guess_f) print_guessed_code(filename);
7108  }
7109  fclose(fin);
7110 #ifdef OVERWRITE
7111  if (overwrite_f) {
7112  struct stat sb;
7113 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7114  time_t tb[2];
7115 #else
7116  struct utimbuf tb;
7117 #endif
7118 
7119  fflush(stdout);
7120  close(fd);
7121  if (dup2(fd_backup, fileno(stdout)) < 0){
7122  perror("dup2");
7123  }
7124  if (stat(origfname, &sb)) {
7125  fprintf(stderr, "Can't stat %s\n", origfname);
7126  }
7127  /* $B%Q!<%_%C%7%g%s$rI|85(B */
7128  if (chmod(outfname, sb.st_mode)) {
7129  fprintf(stderr, "Can't set permission %s\n", outfname);
7130  }
7131 
7132  /* $B%?%$%`%9%?%s%W$rI|85(B */
7133  if(preserve_time_f){
7134 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7135  tb[0] = tb[1] = sb.st_mtime;
7136  if (utime(outfname, tb)) {
7137  fprintf(stderr, "Can't set timestamp %s\n", outfname);
7138  }
7139 #else
7140  tb.actime = sb.st_atime;
7141  tb.modtime = sb.st_mtime;
7142  if (utime(outfname, &tb)) {
7143  fprintf(stderr, "Can't set timestamp %s\n", outfname);
7144  }
7145 #endif
7146  }
7147  if(backup_f){
7148  char *backup_filename = get_backup_filename(backup_suffix, origfname);
7149 #ifdef MSDOS
7150  unlink(backup_filename);
7151 #endif
7152  if (rename(origfname, backup_filename)) {
7153  perror(backup_filename);
7154  fprintf(stderr, "Can't rename %s to %s\n",
7155  origfname, backup_filename);
7156  }
7157  nkf_xfree(backup_filename);
7158  }else{
7159 #ifdef MSDOS
7160  if (unlink(origfname)){
7161  perror(origfname);
7162  }
7163 #endif
7164  }
7165  if (rename(outfname, origfname)) {
7166  perror(origfname);
7167  fprintf(stderr, "Can't rename %s to %s\n",
7168  outfname, origfname);
7169  }
7170  nkf_xfree(outfname);
7171  }
7172 #endif
7173  }
7174  }
7175  if (is_argument_error)
7176  return(-1);
7177  }
7178 #ifdef EASYWIN /*Easy Win */
7179  if (file_out_f == FALSE)
7180  scanf("%d",&end_check);
7181  else
7182  fclose(stdout);
7183 #else /* for Other OS */
7184  if (file_out_f == TRUE)
7185  fclose(stdout);
7186 #endif /*Easy Win */
7187  return (0);
7188 }
7189 #endif /* WIN32DLL */
#define nkf_char_unicode_new(c)
Definition: nkf.c:429
#define SP
Definition: nkf.c:75
Definition: nkf.c:98
static int x0213_combining_p(nkf_char wc)
Definition: nkf.c:2220
const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars]
Definition: utf8tbl.c:3221
#define OUTPUT_UTF16(val)
Definition: nkf.c:2845
#define output_ascii_escape_sequence(mode)
Definition: nkf.c:2539
static nkf_char nkf_buf_at(nkf_buf_t *buf, int index)
Definition: nkf.c:863
static void status_check(struct input_code *ptr, nkf_char c)
Definition: nkf.c:3075
static nkf_char hold_buf[HOLD_SIZE *2]
Definition: nkf.c:3355
static void output_escape_sequence(int mode)
Definition: nkf.c:2549
#define OUTPUT_UTF16_BYTES(c1, c2)
Definition: nkf.c:2835
#define NUMCHAR_OPTION
Definition: config.h:22
nkf_native_encoding NkfEncodingUTF_32
Definition: nkf.c:159
static nkf_char mime_begin(FILE *f)
Definition: nkf.c:4467
static void set_iconv(nkf_char f, nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
Definition: nkf.c:1509
static void encode_fallback_xml(nkf_char c)
Definition: nkf.c:1098
static int noout_f
Definition: nkf.c:443
#define NKF_ICONV_INVALID_CODE_RANGE
Definition: nkf.c:2319
const unsigned short *const x0212_shiftjis[]
Definition: utf8tbl.c:14603
#define SCORE_iMIME
Definition: nkf.c:2950
static void * nkf_xmalloc(size_t size)
Definition: nkf.c:685
#define INPUT_CODE_FIX
Definition: config.h:12
#define FALSE
Definition: nkf.h:174
#define BS
Definition: nkf.c:70
static nkf_char url_ungetc(nkf_char c, FILE *f)
Definition: nkf.c:4650
static int iso8859_f
Definition: nkf.c:401
size_t strlen(const char *)
static size_t unicode_iconv(nkf_char wc, int nocombine)
Definition: nkf.c:2323
nkf_char(* iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:332
int i
Definition: win32ole.c:784
static size_t nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
Definition: nkf.c:2425
static nkf_encoding * nkf_utf8_encoding()
Definition: nkf.c:816
static nkf_char nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
Definition: nkf.c:1730
static const nkf_char score_table_8FF0[]
Definition: nkf.c:2983
#define NKF_ICONV_WAIT_COMBINING_CHAR
Definition: nkf.c:2320
Definition: nkf.c:96
static nkf_char std_getc(FILE *f)
Definition: nkf.c:3330
const unsigned short *const *const utf8_to_euc_3bytes_932[]
Definition: utf8tbl.c:12573
static int mimebuf_f
Definition: nkf.c:399
static int cap_f
Definition: nkf.c:413
static nkf_char mime_ungetc_buf(nkf_char c, FILE *f)
Definition: nkf.c:4331
static nkf_char(* i_cungetc)(nkf_char c, FILE *f)
Definition: nkf.c:415
nkf_char stat
Definition: nkf.c:327
#define NKF_RELEASE_DATE
Definition: nkf.c:24
static void(* o_mputc)(nkf_char c)
Definition: nkf.c:546
#define SCORE_INIT
Definition: nkf.c:2953
static void nkf_buf_push(nkf_buf_t *buf, nkf_char c)
Definition: nkf.c:876
static size_t nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
Definition: nkf.c:2394
#define nkf_enc_asciicompat(enc)
Definition: nkf.c:763
static void usage(void)
Definition: nkf.c:904
#define NKF_UNSPECIFIED
Definition: nkf.c:387
#define SCORE_CP932
Definition: nkf.c:2946
static int url_f
Definition: nkf.c:417
#define nkf_enc_name(enc)
Definition: nkf.c:758
static nkf_char z_prev1
Definition: nkf.c:3979
nkf_native_encoding NkfEncodingASCII
Definition: nkf.c:153
static void eof_mime(void)
Definition: nkf.c:5135
static nkf_char(* i_mungetc)(nkf_char c, FILE *f)
Definition: nkf.c:549
static nkf_char e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
Definition: nkf.c:2034
const int id
Definition: nkf.c:209
static void s_status(struct input_code *, nkf_char)
Definition: nkf.c:3083
static int overwrite_f
Definition: nkf.c:671
static int input_bom_f
Definition: nkf.c:359
#define MIME_BUF_MASK
Definition: nkf.c:4305
static int output_endian
Definition: nkf.c:366
#define nkf_isalnum(c)
Definition: nkf.c:289
static int fold_len
Definition: nkf.c:496
static int h_conv(FILE *f, nkf_char c1, nkf_char c2)
Definition: nkf.c:3367
static void e_oconv(nkf_char c2, nkf_char c1)
Definition: nkf.c:2643
static nkf_char base64decode(nkf_char c)
Definition: nkf.c:4781
Definition: nkf.c:62
static void(* o_iso2022jp_check_conv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:534
static void(* o_eol_conv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:530
static int broken_f
Definition: nkf.c:400
nkf_buf_t * broken_buf
Definition: nkf.c:3301
static nkf_char(* iconv_for_check)(nkf_char c2, nkf_char c1, nkf_char c0)=0
Definition: nkf.c:447
static void j_oconv(nkf_char c2, nkf_char c1)
Definition: nkf.c:2592
const unsigned short *const utf8_to_euc_2bytes_932[]
Definition: utf8tbl.c:12471
static void base64_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:5480
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:108
#define LF
Definition: nkf.c:72
#define PREFIX_EUCG3
Definition: nkf.c:422
#define STD_GC_BUFSIZE
Definition: nkf.c:3308
#define nkf_enc_to_iconv(enc)
Definition: nkf.c:761
#define TAB
Definition: nkf.c:71
#define SCORE_L2
Definition: nkf.c:2943
static int base64_count
Definition: nkf.c:487
nkf_encoding nkf_encoding_table[]
Definition: nkf.c:167
Definition: nkf.c:115
const unsigned short cp932inv[2][189]
Definition: utf8tbl.c:13625
Definition: nkf.c:90
static char * backup_suffix
Definition: nkf.c:674
#define nkf_char_unicode_p(c)
Definition: nkf.c:430
static const struct @8 long_option[]
static size_t nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
Definition: nkf.c:2449
static const unsigned char ev_x0213[]
Definition: nkf.c:628
static void nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
Definition: nkf.c:1698
static void status_push_ch(struct input_code *ptr, nkf_char c)
Definition: nkf.c:3048
static void status_reinit(struct input_code *ptr)
Definition: nkf.c:3068
#define setbinmode(fp)
Definition: nkf.h:86
#define HOLD_SIZE
Definition: nkf.c:304
#define UTF16_TO_UTF32(lead, trail)
Definition: nkf.c:434
static nkf_char numchar_getc(FILE *f)
Definition: nkf.c:4658
#define nkf_char_unicode_value_p(c)
Definition: nkf.c:432
static void w_oconv(nkf_char c2, nkf_char c1)
Definition: nkf.c:2799
const char * alias
Definition: nkf.c:1151
static const char * input_codename
Definition: nkf.c:336
static nkf_char mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
Definition: nkf.c:4324
static int is_x0213_2_in_x0212(nkf_char c1)
Definition: nkf.c:1570
byte_order
Definition: nkf.c:61
static struct @9 mime_input_state
static int f_line
Definition: nkf.c:492
#define COPY_RIGHT
Definition: nkf.c:25
static int nkf_enc_find_index(const char *name)
Definition: nkf.c:737
#define nkf_buf_length(buf)
Definition: nkf.c:859
static int iso2022jp_f
Definition: nkf.c:404
static nkf_char unicode_iconv_combine(nkf_char wc, nkf_char wc2)
Definition: nkf.c:2350
unsigned int last
Definition: nkf.c:4310
static const char * nkf_locale_charmap()
Definition: nkf.c:777
static void reinit(void)
Definition: nkf.c:5578
static int kanji_convert(FILE *f)
Definition: nkf.c:5833
static nkf_encoding * nkf_locale_encoding()
Definition: nkf.c:805
static int binmode_f
Definition: nkf.c:393
static void switch_mime_getc(void)
Definition: nkf.c:4350
const unsigned short *const euc_to_utf8_2bytes[]
Definition: utf8tbl.c:3060
static int no_cp932ext_f
Definition: nkf.c:355
const int id
Definition: nkf.c:162
#define nkf_noescape_mime(c)
Definition: nkf.c:297
static nkf_char(* i_ugetc)(FILE *)
Definition: nkf.c:418
#define nkf_char_unicode_bmp_p(c)
Definition: nkf.c:431
Definition: nkf.c:111
const nkf_native_encoding * base_encoding
Definition: nkf.c:164
#define VALUE_MASK
Definition: nkf.c:425
static nkf_char mime_getc_buf(FILE *f)
Definition: nkf.c:4341
static char * get_backup_filename(const char *suffix, const char *filename)
Definition: nkf.c:1019
#define DEFAULT_CODE_LOCALE
Definition: nkf.h:138
static void no_connection(nkf_char c2, nkf_char c1)
Definition: nkf.c:520
static void status_clear(struct input_code *ptr)
Definition: nkf.c:3054
static const unsigned char dv[]
Definition: nkf.c:586
#define MIME_DECODE_DEFAULT
Definition: nkf.h:14
static const nkf_char mime_encode_method[]
Definition: nkf.c:4292
static void print_guessed_code(char *filename)
Definition: nkf.c:4580
static int mimeout_f
Definition: nkf.c:402
#define MORE
Definition: nkf.c:5822
static unsigned char ascii_intro
Definition: nkf.c:500
#define NKF_ICONV_NOT_COMBINED
Definition: nkf.c:2321
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
static int mime_decode_f
Definition: nkf.c:398
static void encode_fallback_subchar(nkf_char c)
Definition: nkf.c:1141
static int rot_f
Definition: nkf.c:394
static nkf_char(* iconv)(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:525
static nkf_char x0212_shift(nkf_char c)
Definition: nkf.c:1539
static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
static nkf_encoding * input_encoding
Definition: nkf.c:337
static int alpha_f
Definition: nkf.c:396
Win32OLEIDispatch * p
Definition: win32ole.c:786
static unsigned char prefix_table[256]
Definition: nkf.c:470
static nkf_char(* mime_iconv_back)(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:4313
Definition: nkf.c:87
#define nkf_isxdigit(c)
Definition: nkf.c:285
static void s_oconv(nkf_char c2, nkf_char c1)
Definition: nkf.c:2719
static int option_mode
Definition: nkf.c:668
static nkf_char(* i_getc)(FILE *f)
Definition: nkf.c:540
#define SCORE_NO_EXIST
Definition: nkf.c:2949
static nkf_char z_prev2
Definition: nkf.c:3979
static nkf_char cap_getc(FILE *f)
Definition: nkf.c:4632
#define rot47(c)
Definition: nkf.c:4152
Definition: file.c:2361
static int mime_decode_mode
Definition: nkf.c:558
#define NKF_VERSION
Definition: nkf.c:23
nkf_buf_t * std_gc_buf
Definition: nkf.c:3299
Definition: nkf.c:91
#define STRICT_MIME
Definition: nkf.c:58
nkf_char buf[3]
Definition: nkf.c:330
static nkf_char e2w_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:1973
#define getc(f)
Definition: nkf.c:21
static nkf_char(* i_mgetc)(FILE *)
Definition: nkf.c:548
#define SO
Definition: nkf.c:78
#define UCS_MAP_CP932
Definition: nkf.c:349
#define SJ6394
unsigned int input
Definition: nkf.c:4311
static void * nkf_xrealloc(void *ptr, size_t size)
Definition: nkf.c:701
static nkf_char w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
Definition: nkf.c:2381
static unsigned char kanji_intro
Definition: nkf.c:499
const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3]
Definition: utf8tbl.c:3279
const unsigned short euc_to_utf8_1byte[]
Definition: utf8tbl.c:3046
#define val
static void w_oconv32(nkf_char c2, nkf_char c1)
Definition: nkf.c:2904
static nkf_char(* i_uungetc)(nkf_char c, FILE *f)
Definition: nkf.c:419
struct @7 encoding_name_to_id_table[]
static void set_code_score(struct input_code *ptr, nkf_char score)
Definition: nkf.c:2991
nkf_buf_t * nfc_buf
Definition: nkf.c:3303
static int fold_f
Definition: nkf.c:495
#define SI
Definition: nkf.c:77
Definition: nkf.c:99
static void set_input_encoding(nkf_encoding *enc)
Definition: nkf.c:1230
static int fold_preserve_f
Definition: nkf.c:494
#define ARG_UNUSED
Definition: nkf.h:181
static void nkf_state_init(void)
Definition: nkf.c:3311
static int f_prev
Definition: nkf.c:493
static const nkf_char mime_encode[]
Definition: nkf.c:4283
#define nkf_isoctal(c)
Definition: nkf.c:283
struct input_code input_code_list[]
Definition: nkf.c:475
#define NKF_ICONV_NEED_TWO_MORE_BYTES
Definition: nkf.c:2392
static void oconv_newline(void(*func)(nkf_char, nkf_char))
Definition: nkf.c:3774
static nkf_char mime_integrity(FILE *f, const unsigned char *p)
Definition: nkf.c:4376
#define nkf_enc_unicode_p(enc)
Definition: nkf.c:766
static size_t nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
Definition: nkf.c:2530
#define CR
Definition: nkf.c:73
static const nkf_char score_table_F0[]
Definition: nkf.c:2962
static void encode_fallback_java(nkf_char c)
Definition: nkf.c:1109
#define nkf_buf_empty_p(buf)
Definition: nkf.c:860
static int mimeout_mode
Definition: nkf.c:486
nkf_native_encoding NkfEncodingISO_2022_JP
Definition: nkf.c:154
static int output_mode
Definition: nkf.c:556
static int output_bom_f
Definition: nkf.c:365
#define SCORE_ERROR
Definition: nkf.c:2951
#define DEFAULT_J
Definition: nkf.c:311
static int unicode_to_jis_common2(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
Definition: nkf.c:1768
static nkf_char mime_getc(FILE *f)
Definition: nkf.c:4803
static void mimeout_addchar(nkf_char c)
Definition: nkf.c:5162
static nkf_char push_hold_buf(nkf_char c2)
Definition: nkf.c:3358
#define MAXRECOVER
Definition: nkf.c:4315
#define SJ0162
#define rot13(c)
Definition: nkf.c:4142
static void iso2022jp_check_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:4207
static void put_newline(void(*func)(nkf_char))
Definition: nkf.c:3757
int argc
Definition: ruby.c:130
static int x0213_wait_combining_p(nkf_char wc)
Definition: nkf.c:2208
static int x0212_f
Definition: nkf.c:467
#define set_input_mode(mode)
Definition: nkf.c:5825
#define realloc
Definition: ripper.c:99
static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
long modtime
Definition: file.c:2363
const unsigned short *const euc_to_utf8_2bytes_ms[]
Definition: utf8tbl.c:3087
#define range(low, item, hi)
Definition: date_strftime.c:21
static const char * get_guessed_code(void)
Definition: nkf.c:4550
static unsigned char stdobuf[IOBUF_SIZE]
Definition: nkf.c:384
#define GETA2
Definition: nkf.c:316
static nkf_char(* i_nungetc)(nkf_char c, FILE *f)
Definition: nkf.c:439
static nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
Definition: nkf.c:2074
static nkf_char(* i_bgetc)(FILE *)
Definition: nkf.c:543
static const nkf_char score_table_8FA0[]
Definition: nkf.c:2969
#define MIME_BUF_SIZE
Definition: nkf.c:4304
#define EXIT_FAILURE
Definition: eval_intern.h:24
static nkf_char(* i_mungetc_buf)(nkf_char c, FILE *f)
Definition: nkf.c:553
static nkf_char cap_ungetc(nkf_char c, FILE *f)
Definition: nkf.c:4638
#define is_ibmext_in_sjis(c2)
Definition: nkf.c:301
const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3]
Definition: utf8tbl.c:3251
static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
Definition: nkf.c:1583
#define nkf_isblank(c)
Definition: nkf.c:286
static void code_score(struct input_code *ptr)
Definition: nkf.c:3007
#define hex2bin(c)
Definition: nkf.c:292
static int cp51932_f
Definition: nkf.c:459
#define EOF
Definition: vsnprintf.c:207
nkf_char * ptr
Definition: nkf.c:837
const char * name
Definition: nkf.c:163
static nkf_char std_ungetc(nkf_char c, FILE *f)
static struct input_code * find_inputcode_byfunc(nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
Definition: nkf.c:1494
static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
#define nkf_isprint(c)
Definition: nkf.c:290
static void set_input_codename(const char *codename)
Definition: nkf.c:4540
static int preserve_time_f
Definition: nkf.c:672
static void check_bom(FILE *f)
Definition: nkf.c:3575
static void encode_fallback_html(nkf_char c)
Definition: nkf.c:1074
static int eolmode_f
Definition: nkf.c:677
#define SCORE_KANA
Definition: nkf.c:2944
int errno
#define TRUE
Definition: nkf.h:175
static nkf_char nfc_getc(FILE *f)
Definition: nkf.c:4721
Definition: nkf.c:101
static nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
Definition: nkf.c:2052
#define EXIT_SUCCESS
Definition: error.c:31
static int x0213_f
Definition: nkf.c:468
Definition: nkf.c:92
static nkf_char(* i_mgetc_buf)(FILE *)
Definition: nkf.c:552
static nkf_char broken_ungetc(nkf_char c, FILE *f)
#define malloc
Definition: ripper.c:98
const unsigned short *const *const utf8_to_euc_3bytes_x0213[]
Definition: utf8tbl.c:12585
static int hold_count
Definition: nkf.c:3356
const char * name
Definition: nkf.c:326
static int cp932inv_f
Definition: nkf.c:462
static int backup_f
Definition: nkf.c:673
#define UTF8_INPUT_ENABLE
Definition: config.h:5
static const char basis_64[]
Definition: nkf.c:5035
Definition: nkf.c:102
static void hira_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:4172
#define is_eucg3(c2)
Definition: nkf.c:296
const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3]
Definition: utf8tbl.c:3224
static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:2232
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define mime_input_buf(n)
Definition: nkf.c:4306
static int file_out_f
Definition: nkf.c:669
static int nkf_str_caseeql(const char *src, const char *target)
Definition: nkf.c:717
static nkf_char url_getc(FILE *f)
Definition: nkf.c:4644
#define nkf_enc_to_index(enc)
Definition: nkf.c:759
Definition: nkf.c:834
static const nkf_char score_table_8FE0[]
Definition: nkf.c:2976
static int options(unsigned char *cp)
Definition: nkf.c:6355
#define UCS_MAP_CP10001
Definition: nkf.c:350
static nkf_char noconvert(FILE *f)
Definition: nkf.c:5807
#define CRLF
Definition: nkf.c:81
static void(* oconv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:526
static int unbuf_f
Definition: nkf.c:390
int _file_stat
Definition: nkf.c:333
#define SEND
Definition: nkf.c:5823
static VALUE result
Definition: nkf.c:40
static nkf_char nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
Definition: nkf.c:2515
static int nfc_f
Definition: nkf.c:407
static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:2120
static void mime_input_buf_unshift(nkf_char c)
Definition: nkf.c:4318
#define DEFAULT_NEWLINE
Definition: nkf.h:23
static nkf_char prev_cr
Definition: nkf.c:679
static void show_configuration(void)
Definition: nkf.c:966
#define LAST
Definition: nkf.c:5824
static int x0201_f
Definition: nkf.c:403
static void set_output_encoding(nkf_encoding *enc)
Definition: nkf.c:1353
nkf_native_encoding NkfEncodingUTF_16
Definition: nkf.c:158
#define bin2hex(c)
Definition: nkf.c:295
#define UCS_MAP_MS
Definition: nkf.c:348
const unsigned short *const utf8_to_euc_2bytes_ms[]
Definition: utf8tbl.c:12441
static void shift(struct cparse_params *v, long act, VALUE tok, VALUE val)
Definition: cparse.c:662
static void(* o_hira_conv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:532
static nkf_char(* i_bungetc)(nkf_char c, FILE *f)
Definition: nkf.c:544
static int nop_f
Definition: nkf.c:392
static struct @10 mimeout_state
#define nkf_byte_jisx0201_katakana_p(c)
Definition: nkf.c:302
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
Definition: utf8tbl.c:12579
long len
Definition: nkf.c:836
const unsigned short *const x0212_to_utf8_2bytes[]
Definition: utf8tbl.c:3168
const unsigned short *const euc_to_utf8_2bytes_x0213[]
Definition: utf8tbl.c:3140
static void z_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:3982
static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
Definition: nkf.c:1626
#define X0213_SURROGATE_FIND(tbl, size, euc)
Definition: nkf.c:1963
static int mime_f
Definition: nkf.c:397
int count
Definition: nkf.c:5041
unsigned int top
Definition: nkf.c:4309
const unsigned short shiftjis_x0212[3][189]
Definition: utf8tbl.c:13682
static void w_status(struct input_code *, nkf_char)
Definition: nkf.c:3202
static void(* o_rot_conv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:531
static int input_endian
Definition: nkf.c:358
#define SCORE_DEPEND
Definition: nkf.c:2945
static int hira_f
Definition: nkf.c:395
#define setvbuffer(fp, buf, size)
Definition: nkf.h:92
static void eol_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:3736
int size
Definition: encoding.c:52
static void mime_putc(nkf_char c)
Definition: nkf.c:5205
nkf_char mimeout_state
Definition: nkf.c:3302
static nkf_char(* i_ngetc)(FILE *)
Definition: nkf.c:438
#define f
static const nkf_char score_table_A0[]
Definition: nkf.c:2955
int utime(const char *filename, const struct utimbuf *times)
const struct normalization_pair normalization_table[]
Definition: utf8tbl.c:12597
static void(* o_base64conv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:533
static nkf_encoding * nkf_enc_find(const char *name)
Definition: nkf.c:750
const unsigned short *const euc_to_utf8_2bytes_mac[]
Definition: utf8tbl.c:3114
const unsigned short *const utf8_to_euc_2bytes[]
Definition: utf8tbl.c:12411
static const unsigned char cv[]
Definition: nkf.c:564
#define OVERWRITE
Definition: config.h:16
#define SCORE_X0213
Definition: nkf.c:2948
#define UCS_MAP_ASCII
Definition: nkf.c:347
#define putchar(c)
Definition: nkf.c:28
#define nkf_toupper(c)
Definition: nkf.c:282
static nkf_char utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
Definition: nkf.c:2474
static const unsigned char fv[]
Definition: nkf.c:650
static nkf_char unicode_subchar
Definition: nkf.c:360
static int module_connection(void)
Definition: nkf.c:5696
static void rot_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:4160
#define FIXED_MIME
Definition: nkf.c:57
static void debug(const char *str)
Definition: nkf.c:4531
static void(* o_zconv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:528
static size_t nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
Definition: nkf.c:2498
#define GETA1
Definition: nkf.c:315
static nkf_char(* i_nfc_getc)(FILE *)
Definition: nkf.c:408
const unsigned short shiftjis_cp932[3][189]
Definition: utf8tbl.c:13545
static const unsigned char ev[]
Definition: nkf.c:607
#define HELP_OUTPUT
Definition: nkf.h:28
#define char_size(c2, c1)
Definition: nkf.c:3810
#define FOLD_MARGIN
Definition: nkf.c:504
#define SCORE_X0212
Definition: nkf.c:2947
static void(* o_putc)(nkf_char c)
Definition: nkf.c:538
static nkf_char nfc_ungetc(nkf_char c, FILE *f)
Definition: nkf.c:4773
#define SS2
Definition: nkf.c:79
#define nkf_isspace(c)
Definition: nkf.c:287
#define nkf_isgraph(c)
Definition: nkf.c:291
nkf_native_encoding NkfEncodingShift_JIS
Definition: nkf.c:155
long capa
Definition: nkf.c:835
Definition: nkf.c:112
#define nkf_isdigit(c)
Definition: nkf.c:284
static nkf_char(* i_nfc_ungetc)(nkf_char c, FILE *f)
Definition: nkf.c:409
static void(* encode_fallback)(nkf_char c)
Definition: nkf.c:361
#define RANGE_NUM_MAX
static nkf_buf_t * nkf_buf_new(int length)
Definition: nkf.c:841
#define UTF8_OUTPUT_ENABLE
Definition: config.h:6
const char * name
Definition: nkf.c:148
Definition: nkf.c:110
#define nkf_enc_cp5022x_p(enc)
Definition: nkf.c:770
static nkf_char(* i_ungetc)(nkf_char c, FILE *f)
Definition: nkf.c:541
Definition: nkf.c:113
static unsigned char stdibuf[IOBUF_SIZE]
Definition: nkf.c:383
static nkf_char x0212_unshift(nkf_char c)
Definition: nkf.c:1557
int main(int argc, char **argv)
Definition: nkf.c:6918
static void w_oconv16(nkf_char c2, nkf_char c1)
Definition: nkf.c:2862
#define assert(condition)
Definition: ossl.h:45
static void e_status(struct input_code *, nkf_char)
Definition: nkf.c:3156
const char * name
Definition: nkf.c:208
#define nkf_xfree(ptr)
Definition: nkf.c:714
static void unswitch_mime_getc(void)
Definition: nkf.c:4363
static nkf_char w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
Definition: nkf.c:2309
static int numchar_f
Definition: nkf.c:437
RUBY_EXTERN int dup2(int, int)
Definition: dup2.c:27
static int estab_f
Definition: nkf.c:391
#define ESC
Definition: nkf.c:74
const unsigned short *const *const utf8_to_euc_3bytes[]
Definition: utf8tbl.c:12561
static void mime_prechar(nkf_char c2, nkf_char c1)
Definition: nkf.c:5093
const unsigned short *const x0212_to_utf8_2bytes_x0213[]
Definition: utf8tbl.c:3194
static int input_eol
Definition: nkf.c:678
#define OUTPUT_UTF8(val)
Definition: nkf.c:2790
Definition: nkf.c:122
static nkf_char hex_getc(nkf_char ch, FILE *f, nkf_char(*g)(FILE *f), nkf_char(*u)(nkf_char c, FILE *f))
Definition: nkf.c:4610
#define IOBUF_SIZE
Definition: nkf.c:308
static int unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
Definition: nkf.c:1805
#define fileno(p)
Definition: vsnprintf.c:223
#define is_alnum(c)
Definition: nkf.c:278
static int input_mode
Definition: nkf.c:557
static void clr_code_score(struct input_code *ptr, nkf_char score)
Definition: nkf.c:2999
const unsigned short *const utf8_to_euc_2bytes_mac[]
Definition: utf8tbl.c:12501
static void version(void)
Definition: nkf.c:898
#define NEXT
Definition: nkf.c:5820
#define nkf_enc_to_oconv(enc)
Definition: nkf.c:762
#define MIMEOUT_BUF_LENGTH
Definition: nkf.c:5038
static int ms_ucs_map_f
Definition: nkf.c:351
static int fold_margin
Definition: nkf.c:507
Definition: nkf.c:108
#define DEFAULT_FOLD
Definition: nkf.c:505
#define DEL
Definition: nkf.c:76
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
Definition: utf8tbl.c:12567
nkf_native_encoding NkfEncodingUTF_8
Definition: nkf.c:157
static int debug_f
Definition: nkf.c:445
static nkf_encoding * output_encoding
Definition: nkf.c:338
static nkf_char(* i_cgetc)(FILE *)
Definition: nkf.c:414
static nkf_char numchar_ungetc(nkf_char c, FILE *f)
Definition: nkf.c:4712
#define DEFAULT_R
Definition: nkf.c:312
static void(* o_fconv)(nkf_char c2, nkf_char c1)
Definition: nkf.c:529
static nkf_encoding * nkf_default_encoding()
Definition: nkf.c:822
#define stat(path, st)
Definition: win32.h:193
static nkf_char no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
Definition: nkf.c:512
static const unsigned char * mime_pattern[]
Definition: nkf.c:4257
Definition: nkf.c:100
nkf_char broken_state
Definition: nkf.c:3300
static void nkf_buf_clear(nkf_buf_t *buf)
Definition: nkf.c:870
#define NULL
Definition: _sdbm.c:102
static nkf_state_t * nkf_state
Definition: nkf.c:3306
static void open_mime(nkf_char mode)
Definition: nkf.c:5047
int nkf_char
Definition: nkf.h:39
nkf_char index
Definition: nkf.c:329
static int no_best_fit_chars_f
Definition: nkf.c:357
static nkf_char nkf_buf_pop(nkf_buf_t *buf)
Definition: nkf.c:885
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:4275
nkf_encodings
Definition: nkf.c:86
nkf_char score
Definition: nkf.c:328
Definition: nkf.c:118
#define INPUT_OPTION
Definition: config.h:19
static ULONG(STDMETHODCALLTYPE AddRef)(IDispatch __RPC_FAR *This)
Definition: win32ole.c:614
#define OUTPUT_UTF32(c)
Definition: nkf.c:2889
static void fold_conv(nkf_char c2, nkf_char c1)
Definition: nkf.c:3813
static void encode_fallback_perl(nkf_char c)
Definition: nkf.c:1130
#define NKF_INT32_C(n)
Definition: nkf.h:40
static nkf_char mime_begin_strict(FILE *f)
Definition: nkf.c:4415
const unsigned short *const utf8_to_euc_2bytes_x0213[]
Definition: utf8tbl.c:12531
static int guess_f
Definition: nkf.c:450
Definition: nkf.c:109
static void no_putc(nkf_char c)
void(* status_func)(struct input_code *, nkf_char)
Definition: nkf.c:331
static void status_reset(struct input_code *ptr)
Definition: nkf.c:3061
static void close_mime(void)
Definition: nkf.c:5126
long actime
Definition: file.c:2362
static nkf_char broken_getc(FILE *f)
Definition: nkf.c:3689
static void code_status(nkf_char c)
Definition: nkf.c:3260
static void std_putc(nkf_char c)
Definition: nkf.c:3348
char ** argv
Definition: ruby.c:131
static void nkf_each_char_to_hex(void(*f)(nkf_char c2, nkf_char c1), nkf_char c)
Definition: nkf.c:1056
#define X0201_DEFAULT
Definition: nkf.h:17
static nkf_encoding * nkf_enc_from_index(int idx)
Definition: nkf.c:728
Definition: nkf.c:88
static void status_disable(struct input_code *ptr)
Definition: nkf.c:3039
nkf_native_encoding NkfEncodingEUC_JP
Definition: nkf.c:156
Definition: nkf.c:117
Definition: nkf.c:120
#define SKIP
Definition: nkf.c:5821