23 #define NKF_VERSION "2.1.3"
24 #define NKF_RELEASE_DATE "2012-11-22"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2012, The nkf Project."
38 # define INCL_DOSERRORS
168 {
ASCII,
"US-ASCII", &NkfEncodingASCII},
169 {
ISO_8859_1,
"ISO-8859-1", &NkfEncodingASCII},
170 {
ISO_2022_JP,
"ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {
CP50220,
"CP50220", &NkfEncodingISO_2022_JP},
172 {
CP50221,
"CP50221", &NkfEncodingISO_2022_JP},
173 {
CP50222,
"CP50222", &NkfEncodingISO_2022_JP},
177 {
SHIFT_JIS,
"Shift_JIS", &NkfEncodingShift_JIS},
178 {
WINDOWS_31J,
"Windows-31J", &NkfEncodingShift_JIS},
179 {
CP10001,
"CP10001", &NkfEncodingShift_JIS},
180 {
EUC_JP,
"EUC-JP", &NkfEncodingEUC_JP},
181 {
EUCJP_NKF,
"eucJP-nkf", &NkfEncodingEUC_JP},
182 {
CP51932,
"CP51932", &NkfEncodingEUC_JP},
183 {
EUCJP_MS,
"eucJP-MS", &NkfEncodingEUC_JP},
189 {
UTF_8,
"UTF-8", &NkfEncodingUTF_8},
190 {
UTF_8N,
"UTF-8N", &NkfEncodingUTF_8},
191 {
UTF_8_BOM,
"UTF-8-BOM", &NkfEncodingUTF_8},
192 {
UTF8_MAC,
"UTF8-MAC", &NkfEncodingUTF_8},
193 {
UTF_16,
"UTF-16", &NkfEncodingUTF_16},
194 {
UTF_16BE,
"UTF-16BE", &NkfEncodingUTF_16},
196 {
UTF_16LE,
"UTF-16LE", &NkfEncodingUTF_16},
198 {
UTF_32,
"UTF-32", &NkfEncodingUTF_32},
199 {
UTF_32BE,
"UTF-32BE", &NkfEncodingUTF_32},
201 {
UTF_32LE,
"UTF-32LE", &NkfEncodingUTF_32},
203 {
BINARY,
"BINARY", &NkfEncodingASCII},
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
278 #define is_alnum(c) \
279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
308 #define IOBUF_SIZE 16384
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
322 extern POINT _BufferSize;
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
347 #define UCS_MAP_ASCII 0
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
353 #ifdef UTF8_INPUT_ENABLE
364 #ifdef UTF8_OUTPUT_ENABLE
382 #if !defined(PERL_XS) && !defined(WIN32DLL)
387 #define NKF_UNSPECIFIED (-TRUE)
406 #ifdef UNICODE_NORMALIZATION
422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
423 #define CLASS_MASK NKF_INT32_C(0xFF000000)
424 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
436 #ifdef NUMCHAR_OPTION
446 static void debug(
const char *str);
454 static int exec_f = 0;
457 #ifdef SHIFTJIS_CP932
476 {
"EUC-JP", 0, 0, 0, {0, 0, 0},
e_status,
e_iconv, 0},
477 {
"Shift_JIS", 0, 0, 0, {0, 0, 0},
s_status,
s_iconv, 0},
478 #ifdef UTF8_INPUT_ENABLE
479 {
"UTF-8", 0, 0, 0, {0, 0, 0},
w_status,
w_iconv, 0},
480 {
"UTF-16", 0, 0, 0, {0, 0, 0},
NULL,
w_iconv16, 0},
481 {
"UTF-32", 0, 0, 0, {0, 0, 0},
NULL,
w_iconv32, 0},
504 #define FOLD_MARGIN 10
505 #define DEFAULT_FOLD 60
514 fprintf(stderr,
"nkf internal module connection failure.\n");
564 static const unsigned char cv[]= {
565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
586 static const unsigned char dv[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 static const unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
650 static const unsigned char fv[] = {
652 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
681 static int end_check;
689 if (size == 0) size = 1;
693 perror(
"can't malloc");
703 if (size == 0) size = 1;
707 perror(
"can't realloc");
714 #define nkf_xfree(ptr) free(ptr)
720 for (i = 0; src[
i] && target[
i]; i++) {
723 if (src[i] || target[i])
return FALSE;
733 return &nkf_encoding_table[idx];
740 if (name[0] ==
'X' && *(name+1) ==
'-') name += 2;
754 if (idx < 0)
return 0;
758 #define nkf_enc_name(enc) (enc)->name
759 #define nkf_enc_to_index(enc) (enc)->id
760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763 #define nkf_enc_asciicompat(enc) (\
764 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766 #define nkf_enc_unicode_p(enc) (\
767 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770 #define nkf_enc_cp5022x_p(enc) (\
771 nkf_enc_to_index(enc) == CP50220 ||\
772 nkf_enc_to_index(enc) == CP50221 ||\
773 nkf_enc_to_index(enc) == CP50222)
775 #ifdef DEFAULT_CODE_LOCALE
779 #ifdef HAVE_LANGINFO_H
780 return nl_langinfo(CODESET);
781 #elif defined(__WIN32__)
783 sprintf(buf,
"CP%d", GetACP());
785 #elif defined(__OS2__)
786 # if defined(INT_IS_SHORT)
792 ULONG ulCP[1], ulncp;
793 DosQueryCp(
sizeof(ulCP), ulCP, &ulncp);
794 if (ulCP[0] == 932 || ulCP[0] == 943)
795 strcpy(buf,
"Shift_JIS");
797 sprintf(buf,
"CP%lu", ulCP[0]);
818 return &nkf_encoding_table[
UTF_8];
825 #ifdef DEFAULT_CODE_LOCALE
827 #elif defined(DEFAULT_ENCIDX)
859 #define nkf_buf_length(buf) ((buf)->len)
860 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
865 assert(index <= buf->len);
866 return buf->
ptr[index];
888 return buf->
ptr[--buf->
len];
894 #define fprintf dllprintf
907 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
909 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
914 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915 " UTF option is -W[8,[16,32][B,L]]\n"
917 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
921 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
923 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
926 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
927 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
928 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
929 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
932 " O Output to File (DEFAULT 'nkf.out')\n"
933 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
936 " --ic=<encoding> Specify the input encoding\n"
937 " --oc=<encoding> Specify the output encoding\n"
938 " --hiragana --katakana Hiragana/Katakana Conversion\n"
939 " --katakana-hiragana Converts each other\n"
943 " --{cap, url}-input Convert hex after ':' or '%%'\n"
946 " --numchar-input Convert Unicode Character Reference\n"
949 " --fb-{skip, html, xml, perl, java, subchar}\n"
950 " Specify unassigned character's replacement\n"
955 " --in-place[=SUF] Overwrite original files\n"
956 " --overwrite[=SUF] Preserve timestamp of original files\n"
958 " -g --guess Guess the input code\n"
959 " -v --version Print the version\n"
960 " --help/-V Print this help / configuration\n"
970 " Compile-time options:\n"
971 " Compiled at: " __DATE__
" " __TIME__
"\n"
974 " Default output encoding: "
977 #elif defined(DEFAULT_ENCIDX)
984 " Default output end of line: "
993 " Decode MIME encoded string: "
1000 " Convert JIS X 0201 Katakana: "
1007 " --help, --version output: "
1008 #
if HELP_OUTPUT_HELP_OUTPUT
1021 char *backup_filename;
1022 int asterisk_count = 0;
1024 int filename_length =
strlen(filename);
1026 for(i = 0; suffix[
i]; i++){
1027 if(suffix[i] ==
'*') asterisk_count++;
1031 backup_filename =
nkf_xmalloc(
strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032 for(i = 0, j = 0; suffix[
i];){
1033 if(suffix[i] ==
'*'){
1034 backup_filename[j] =
'\0';
1035 strncat(backup_filename, filename, filename_length);
1037 j += filename_length;
1039 backup_filename[j++] = suffix[i++];
1042 backup_filename[j] =
'\0';
1044 j = filename_length +
strlen(suffix);
1046 strcpy(backup_filename, filename);
1047 strcat(backup_filename, suffix);
1048 backup_filename[j] =
'\0';
1050 return backup_filename;
1054 #ifdef UTF8_INPUT_ENABLE
1084 (*oconv)(0, 0x30+(c/10000 )%10);
1086 (*oconv)(0, 0x30+(c/1000 )%10);
1088 (*oconv)(0, 0x30+(c/100 )%10);
1090 (*oconv)(0, 0x30+(c/10 )%10);
1092 (*oconv)(0, 0x30+ c %10);
1144 (*oconv)((c>>8)&0xFF, c&0xFF);
1149 static const struct {
1173 {
"katakana-hiragana",
"h3"},
1181 #ifdef UTF8_OUTPUT_ENABLE
1191 {
"fb-subchar=",
""},
1193 #ifdef UTF8_INPUT_ENABLE
1194 {
"utf8-input",
"W"},
1195 {
"utf16-input",
"W16"},
1196 {
"no-cp932ext",
""},
1197 {
"no-best-fit-chars",
""},
1199 #ifdef UNICODE_NORMALIZATION
1200 {
"utf8mac-input",
""},
1212 #ifdef NUMCHAR_OPTION
1213 {
"numchar-input",
""},
1219 #ifdef SHIFTJIS_CP932
1240 #ifdef SHIFTJIS_CP932
1243 #ifdef UTF8_OUTPUT_ENABLE
1262 #ifdef SHIFTJIS_CP932
1265 #ifdef UTF8_OUTPUT_ENABLE
1271 #ifdef SHIFTJIS_CP932
1274 #ifdef UTF8_OUTPUT_ENABLE
1284 #ifdef SHIFTJIS_CP932
1287 #ifdef UTF8_OUTPUT_ENABLE
1293 #ifdef SHIFTJIS_CP932
1296 #ifdef UTF8_OUTPUT_ENABLE
1302 #ifdef SHIFTJIS_CP932
1305 #ifdef UTF8_OUTPUT_ENABLE
1312 #ifdef SHIFTJIS_CP932
1320 #ifdef SHIFTJIS_CP932
1324 #ifdef UTF8_INPUT_ENABLE
1325 #ifdef UNICODE_NORMALIZATION
1357 #ifdef SHIFTJIS_CP932
1360 #ifdef UTF8_OUTPUT_ENABLE
1366 #ifdef SHIFTJIS_CP932
1369 #ifdef UTF8_OUTPUT_ENABLE
1374 #ifdef SHIFTJIS_CP932
1380 #ifdef SHIFTJIS_CP932
1388 #ifdef SHIFTJIS_CP932
1396 #ifdef UTF8_OUTPUT_ENABLE
1401 #ifdef UTF8_OUTPUT_ENABLE
1407 #ifdef SHIFTJIS_CP932
1410 #ifdef UTF8_OUTPUT_ENABLE
1416 #ifdef SHIFTJIS_CP932
1419 #ifdef UTF8_OUTPUT_ENABLE
1425 #ifdef SHIFTJIS_CP932
1428 #ifdef UTF8_OUTPUT_ENABLE
1435 #ifdef UTF8_OUTPUT_ENABLE
1442 #ifdef UTF8_OUTPUT_ENABLE
1449 #ifdef SHIFTJIS_CP932
1457 #ifdef SHIFTJIS_CP932
1461 #ifdef UTF8_OUTPUT_ENABLE
1511 #ifdef INPUT_CODE_FIX
1512 if (f || !input_encoding)
1520 && (f == -
TRUE || !input_encoding)
1544 if (0x75 <= c && c <= 0x7f){
1545 ret = c + (0x109 - 0x75);
1548 if (0x75 <= c && c <= 0x7f){
1549 ret = c + (0x113 - 0x75);
1560 if (0x7f <= c && c <= 0x88){
1561 ret = c + (0x75 - 0x7f);
1562 }
else if (0x89 <= c && c <= 0x92){
1572 static const char x0213_2_table[] =
1573 {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1576 return x0213_2_table[ku];
1577 if (78 <= ku && ku <= 94)
1589 if((0x21 <= ndx && ndx <= 0x2F)){
1590 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1591 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1593 }
else if(0x6E <= ndx && ndx <= 0x7E){
1594 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1595 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1603 const unsigned short *ptr;
1606 val = ptr[(c1 & 0x7f) - 0x21];
1619 if(0x7F < c2)
return 1;
1620 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1621 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1628 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1631 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1632 if (0xFC < c1)
return 1;
1633 #ifdef SHIFTJIS_CP932
1642 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1643 val =
cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1669 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){
1670 c2 =
PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1673 if (0x9E < c1) c2++;
1676 #define SJ0162 0x00e1
1677 #define SJ6394 0x0161
1679 if (0x9E < c1) c2++;
1682 c1 = c1 - ((c1 >
DEL) ?
SP : 0x1F);
1696 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1706 }
else if (val < 0x800){
1707 *p1 = 0xc0 | (val >> 6);
1708 *p2 = 0x80 | (val & 0x3f);
1712 *p1 = 0xe0 | (val >> 12);
1713 *p2 = 0x80 | ((val >> 6) & 0x3f);
1714 *p3 = 0x80 | ( val & 0x3f);
1717 *p1 = 0xf0 | (val >> 18);
1718 *p2 = 0x80 | ((val >> 12) & 0x3f);
1719 *p3 = 0x80 | ((val >> 6) & 0x3f);
1720 *p4 = 0x80 | ( val & 0x3f);
1737 else if (c1 <= 0xC1) {
1741 else if (c1 <= 0xDF) {
1743 wc = (c1 & 0x1F) << 6;
1746 else if (c1 <= 0xEF) {
1748 wc = (c1 & 0x0F) << 12;
1749 wc |= (c2 & 0x3F) << 6;
1752 else if (c2 <= 0xF4) {
1754 wc = (c1 & 0x0F) << 18;
1755 wc |= (c2 & 0x3F) << 12;
1756 wc |= (c3 & 0x3F) << 6;
1766 #ifdef UTF8_INPUT_ENABLE
1769 const unsigned short *
const *pp,
nkf_char psize,
1773 const unsigned short *
p;
1776 if (pp == 0)
return 1;
1779 if (c1 < 0 || psize <= c1)
return 1;
1781 if (p == 0)
return 1;
1784 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0)
return 1;
1786 if (val == 0)
return 1;
1807 const unsigned short *
const *pp;
1808 const unsigned short *
const *
const *ppp;
1809 static const char no_best_fit_chars_table_C2[] =
1810 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1812 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1813 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1814 static const char no_best_fit_chars_table_C2_ms[] =
1815 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1816 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1817 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1818 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1819 static const char no_best_fit_chars_table_932_C2[] =
1820 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1821 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1822 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1823 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1824 static const char no_best_fit_chars_table_932_C3[] =
1825 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1826 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1827 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1828 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1834 }
else if(c2 < 0xe0){
1839 if(no_best_fit_chars_table_932_C2[c1&0x3F])
return 1;
1842 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1848 if(no_best_fit_chars_table_C2[c1&0x3F])
return 1;
1851 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1855 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F])
return 1;
1880 }
else if(c0 < 0xF0){
1883 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
return 1;
1889 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE)
return 1;
1892 if(c0 == 0x92)
return 1;
1897 if(c1 == 0x80 || c0 == 0x9C)
return 1;
1905 if(c0 == 0x94)
return 1;
1908 if(c0 == 0xBB)
return 1;
1918 if(c0 == 0x95)
return 1;
1921 if(c0 == 0xA5)
return 1;
1928 if(c0 == 0x8D)
return 1;
1934 if(0xA0 <= c0 && c0 <= 0xA5)
return 1;
1949 #ifdef SHIFTJIS_CP932
1952 if (
e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1962 #ifdef UTF8_OUTPUT_ENABLE
1963 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1965 for (i = 0; i < size; i++) \
1966 if (tbl[i][0] == euc) { \
1975 const unsigned short *
p;
1992 c2 = (c2&0x7f) - 0x21;
1993 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2002 c2 = (c2&0x7f) - 0x21;
2003 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2013 c1 = (c1 & 0x7f) - 0x21;
2014 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2016 if (
x0213_f && 0xD800<=val && val<=0xDBFF) {
2017 nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2038 for (i = 0; i < sizeof_x0213_combining_chars; i++)
2041 if (i >= sizeof_x0213_combining_chars)
2043 euc = (c2&0x7f)<<8 | (c1&0x7f);
2044 for (i = 0; i < sizeof_x0213_combining_table; i++)
2059 }
else if (0xc0 <= c2 && c2 <= 0xef) {
2061 #ifdef NUMCHAR_OPTION
2072 #ifdef UTF8_INPUT_ENABLE
2097 for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2104 for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2130 }
else if (c2 == 0x8f){
2134 if (!
cp51932_f && !
x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2139 c2 = (c2 << 8) | (c1 & 0x7f);
2141 #ifdef SHIFTJIS_CP932
2144 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2165 #ifdef SHIFTJIS_CP932
2166 if (
cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2168 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2192 }
else if ((c2 ==
EOF) || (c2 == 0) || c2 <
SP) {
2194 }
else if (!
x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2196 if(c1 == 0x7F)
return 0;
2201 if (ret)
return ret;
2211 for (i = 0; i < sizeof_x0213_combining_table; i++) {
2223 for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2235 static const char w_iconv_utf8_1st_byte[] =
2237 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2238 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2239 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2240 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2247 if (c1 < 0 || 0xff < c1) {
2248 }
else if (c1 == 0) {
2250 }
else if ((c1 & 0xC0) == 0x80) {
2253 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2255 if (c2 < 0x80 || 0xBF < c2)
return 0;
2258 if (c3 == 0)
return -1;
2259 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2264 if (c3 == 0)
return -1;
2265 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2269 if (c3 == 0)
return -1;
2270 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2274 if (c3 == 0)
return -2;
2275 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2279 if (c3 == 0)
return -2;
2280 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2284 if (c3 == 0)
return -2;
2285 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2293 if (c1 == 0 || c1 ==
EOF){
2294 }
else if ((c1 & 0xf8) == 0xf0) {
2300 ret =
w2e_conv(c1, c2, c3, &c1, &c2);
2319 #define NKF_ICONV_INVALID_CODE_RANGE -13
2320 #define NKF_ICONV_WAIT_COMBINING_CHAR -14
2321 #define NKF_ICONV_NOT_COMBINED -15
2331 }
else if ((wc>>11) == 27) {
2334 }
else if (wc < 0xFFFF) {
2338 if (ret)
return ret;
2339 }
else if (wc < 0x10FFFF) {
2357 }
else if ((wc2>>11) == 27) {
2360 }
else if (wc2 < 0xFFFF) {
2363 for (i = 0; i < sizeof_x0213_combining_table; i++) {
2372 }
else if (wc2 < 0x10FFFF) {
2391 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2392 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2404 if (0xD8 <= c1 && c1 <= 0xDB) {
2405 if (0xDC <= c3 && c3 <= 0xDF) {
2412 if (0xD8 <= c2 && c2 <= 0xDB) {
2413 if (0xDC <= c4 && c4 <= 0xDF) {
2430 if (0xD8 <= c3 && c3 <= 0xDB) {
2437 if (0xD8 <= c2 && c2 <= 0xDB) {
2480 wc = c2 << 16 | c3 << 8 | c4;
2483 wc = c3 << 16 | c2 << 8 | c1;
2486 wc = c1 << 16 | c4 << 8 | c3;
2489 wc = c4 << 16 | c1 << 8 | c2;
2539 #define output_ascii_escape_sequence(mode) do { \
2540 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2543 (*o_putc)(ascii_intro); \
2544 output_mode = mode; \
2594 #ifdef NUMCHAR_OPTION
2602 c2 = 0x7F + c1 / 94;
2603 c1 = 0x21 + c1 % 94;
2615 else if (c2 ==
EOF) {
2629 (*o_putc)(c2 & 0x7f);
2634 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2635 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1)
return;
2649 if (
x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2653 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2654 c1 = 0x21 + c1 % 94;
2657 (*o_putc)((c2 & 0x7f) | 0x080);
2658 (*o_putc)(c1 | 0x080);
2660 (*o_putc)((c2 & 0x7f) | 0x080);
2661 (*o_putc)(c1 | 0x080);
2673 }
else if (c2 == 0) {
2678 (*o_putc)(
SS2); (*o_putc)(c1|0x80);
2681 (*o_putc)(c1 | 0x080);
2685 #ifdef SHIFTJIS_CP932
2688 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2699 (*o_putc)((c2 & 0x7f) | 0x080);
2700 (*o_putc)(c1 | 0x080);
2703 (*o_putc)((c2 & 0x7f) | 0x080);
2704 (*o_putc)(c1 | 0x080);
2713 (*o_putc)(c2 | 0x080);
2714 (*o_putc)(c1 | 0x080);
2721 #ifdef NUMCHAR_OPTION
2726 if (!
x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2731 c1 += 0x40 + (c1 > 0x3e);
2745 }
else if (c2 == 0) {
2753 (*o_putc)(c1 | 0x080);
2757 if (
e2s_conv(c2, c1, &c2, &c1) == 0){
2770 #ifdef SHIFTJIS_CP932
2772 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2789 #ifdef UTF8_OUTPUT_ENABLE
2790 #define OUTPUT_UTF8(val) do { \
2791 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
2793 if (c2) (*o_putc)(c2); \
2794 if (c3) (*o_putc)(c3); \
2795 if (c4) (*o_putc)(c4); \
2835 #define OUTPUT_UTF16_BYTES(c1, c2) do { \
2836 if (output_endian == ENDIAN_LITTLE){ \
2845 #define OUTPUT_UTF16(val) do { \
2846 if (nkf_char_unicode_bmp_p(val)) { \
2847 c2 = (val >> 8) & 0xff; \
2849 OUTPUT_UTF16_BYTES(c1, c2); \
2851 val &= VALUE_MASK; \
2852 if (val <= UNICODE_MAX) { \
2853 c2 = (val >> 10) + NKF_INT32_C(0xD7C0); \
2854 c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); \
2855 OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
2856 OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
2889 #define OUTPUT_UTF32(c) do { \
2890 if (output_endian == ENDIAN_LITTLE){ \
2891 (*o_putc)( (c) & 0xFF); \
2892 (*o_putc)(((c) >> 8) & 0xFF); \
2893 (*o_putc)(((c) >> 16) & 0xFF); \
2897 (*o_putc)(((c) >> 16) & 0xFF); \
2898 (*o_putc)(((c) >> 8) & 0xFF); \
2899 (*o_putc)( (c) & 0xFF); \
2943 #define SCORE_L2 (1)
2944 #define SCORE_KANA (SCORE_L2 << 1)
2945 #define SCORE_DEPEND (SCORE_KANA << 1)
2946 #define SCORE_CP932 (SCORE_DEPEND << 1)
2947 #define SCORE_X0212 (SCORE_CP932 << 1)
2948 #define SCORE_X0213 (SCORE_X0212 << 1)
2949 #define SCORE_NO_EXIST (SCORE_X0213 << 1)
2950 #define SCORE_iMIME (SCORE_NO_EXIST << 1)
2951 #define SCORE_ERROR (SCORE_iMIME << 1)
2953 #define SCORE_INIT (SCORE_iMIME)
3002 ptr->
score &= ~score;
3013 }
else if (c2 ==
SS2){
3015 }
else if (c2 == 0x8f){
3016 if ((c1 & 0x70) == 0x20){
3018 }
else if ((c1 & 0x70) == 0x60){
3020 }
else if ((c1 & 0x70) == 0x70){
3025 #ifdef UTF8_OUTPUT_ENABLE
3029 }
else if ((c2 & 0x70) == 0x20){
3031 }
else if ((c2 & 0x70) == 0x70){
3033 }
else if ((c2 & 0x70) >= 0x50){
3094 }
else if (0xa1 <= c && c <= 0xdf){
3099 }
else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3102 }
else if (0xed <= c && c <= 0xee){
3105 #ifdef SHIFTJIS_CP932
3111 }
else if (0xf0 <= c && c <= 0xfc){
3120 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3130 #ifdef SHIFTJIS_CP932
3131 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3143 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3167 }
else if (
SS2 == c || (0xa1 <= c && c <= 0xfe)){
3171 }
else if (0x8f == c){
3180 if (0xa1 <= c && c <= 0xfe){
3190 if (0xa1 <= c && c <= 0xfe){
3200 #ifdef UTF8_INPUT_ENABLE
3213 }
else if (0xc0 <= c && c <= 0xdf){
3216 }
else if (0xe0 <= c && c <= 0xef){
3219 }
else if (0xf0 <= c && c <= 0xf4){
3228 if (0x80 <= c && c <= 0xbf){
3231 int bom = (ptr->
buf[0] == 0xef && ptr->
buf[1] == 0xbb
3232 && ptr->
buf[2] == 0xbf);
3234 &ptr->
buf[0], &ptr->
buf[1]);
3245 if (0x80 <= c && c <= 0xbf){
3262 int action_flag = 1;
3275 }
else if(p->
stat == 0){
3288 }
else if (c <=
DEL){
3308 #define STD_GC_BUFSIZE (256)
3362 hold_buf[hold_count++] = c2;
3363 return ((hold_count >=
HOLD_SIZE*2) ?
EOF : hold_count);
3420 while (hold_index < hold_count){
3421 c1 = hold_buf[hold_index++];
3426 else if (c1 <=
DEL){
3429 }
else if (
iconv ==
s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3434 if (hold_index < hold_count){
3435 c2 = hold_buf[hold_index++];
3446 switch ((*
iconv)(c1, c2, 0)) {
3449 if (hold_index < hold_count){
3450 c3 = hold_buf[hold_index++];
3451 }
else if ((c3 = (*
i_getc)(f)) ==
EOF) {
3456 if (hold_index < hold_count){
3457 c4 = hold_buf[hold_index++];
3458 }
else if ((c4 = (*
i_getc)(f)) ==
EOF) {
3463 (*iconv)(c1, c2, (c3<<8)|c4);
3467 if (hold_index < hold_count){
3468 c3 = hold_buf[hold_index++];
3470 }
else if ((c3 = (*
i_getc)(f)) ==
EOF) {
3474 if (hold_index < hold_count){
3475 c4 = hold_buf[hold_index++];
3477 }
else if ((c4 = (*
i_getc)(f)) ==
EOF) {
3479 if (fromhold_count <= 2)
3487 if (fromhold_count <= 2) {
3490 }
else if (fromhold_count == 3) {
3500 if (hold_index < hold_count){
3501 c3 = hold_buf[hold_index++];
3509 if ((*
iconv)(c1, c2, c3) == -3) {
3512 if (hold_index < hold_count){
3513 c4 = hold_buf[hold_index++];
3515 }
else if ((c4 = (*
i_getc)(f)) ==
EOF) {
3519 if (hold_index < hold_count){
3520 c5 = hold_buf[hold_index++];
3522 }
else if ((c5 = (*
i_getc)(f)) ==
EOF) {
3524 if (fromhold_count == 4)
3530 if (hold_index < hold_count){
3531 c6 = hold_buf[hold_index++];
3533 }
else if ((c6 = (*
i_getc)(f)) ==
EOF) {
3535 if (fromhold_count == 5) {
3537 }
else if (fromhold_count == 4) {
3548 if (fromhold_count == 6) {
3550 }
else if (fromhold_count == 5) {
3553 }
else if (fromhold_count == 4) {
3566 if (c3 ==
EOF)
break;
3578 switch(c2 = (*
i_getc)(f)){
3580 if((c2 = (*
i_getc)(f)) == 0x00){
3581 if((c2 = (*
i_getc)(f)) == 0xFE){
3582 if((c2 = (*
i_getc)(f)) == 0xFF){
3583 if(!input_encoding){
3591 (*i_ungetc)(0xFF,
f);
3593 (*i_ungetc)(0xFE,
f);
3594 }
else if(c2 == 0xFF){
3595 if((c2 = (*
i_getc)(
f)) == 0xFE){
3596 if(!input_encoding){
3603 (*i_ungetc)(0xFF,
f);
3605 (*i_ungetc)(0xFF,
f);
3607 (*i_ungetc)(0x00,
f);
3609 (*i_ungetc)(0x00,
f);
3612 if((c2 = (*
i_getc)(
f)) == 0xBB){
3613 if((c2 = (*
i_getc)(
f)) == 0xBF){
3614 if(!input_encoding){
3621 (*i_ungetc)(0xBF,
f);
3623 (*i_ungetc)(0xBB,
f);
3625 (*i_ungetc)(0xEF,
f);
3628 if((c2 = (*
i_getc)(
f)) == 0xFF){
3629 if((c2 = (*
i_getc)(
f)) == 0x00){
3630 if((c2 = (*
i_getc)(
f)) == 0x00){
3631 if(!input_encoding){
3638 (*i_ungetc)(0x00,
f);
3640 (*i_ungetc)(0x00,
f);
3642 if(!input_encoding){
3650 (*i_ungetc)(0xFF,
f);
3652 (*i_ungetc)(0xFE,
f);
3655 if((c2 = (*
i_getc)(
f)) == 0xFE){
3656 if((c2 = (*
i_getc)(
f)) == 0x00){
3657 if((c2 = (*
i_getc)(
f)) == 0x00){
3658 if(!input_encoding){
3666 (*i_ungetc)(0x00,
f);
3668 (*i_ungetc)(0x00,
f);
3670 if(!input_encoding){
3678 (*i_ungetc)(0xFE,
f);
3680 (*i_ungetc)(0xFF,
f);
3701 if (c1==
'@'|| c1==
'B') {
3713 if (c1==
'J'|| c1==
'B') {
3739 if (c2 == 0 && c1 ==
LF) {
3753 else if (c2 != 0 || c1 !=
LF) (*o_eol_conv)(c2, c1);
3810 #define char_size(c2,c1) (c2?2:1)
3823 }
else if (c1==
BS) {
3862 }
else if (c1==
'\f') {
3866 }
else if ((c2==0 &&
nkf_isblank(c1)) || (c2 ==
'!' && c1 ==
'!')) {
3893 if (c1==(0xde&0x7f)) fold_state = 1;
3894 else if (c1==(0xdf&0x7f)) fold_state = 1;
3895 else if (c1==(0xa4&0x7f)) fold_state = 1;
3896 else if (c1==(0xa3&0x7f)) fold_state = 1;
3897 else if (c1==(0xa1&0x7f)) fold_state = 1;
3898 else if (c1==(0xb0&0x7f)) fold_state = 1;
3899 else if (
SP<=c1 && c1<=(0xdf&0x7f)) {
3923 }
else if ((prev0==
SP) ||
3933 if (c1==
'"') fold_state = 1;
3934 else if (c1==
'#') fold_state = 1;
3935 else if (c1==
'W') fold_state = 1;
3936 else if (c1==
'K') fold_state = 1;
3937 else if (c1==
'$') fold_state = 1;
3938 else if (c1==
'%') fold_state = 1;
3939 else if (c1==
'\'') fold_state = 1;
3940 else if (c1==
'(') fold_state = 1;
3941 else if (c1==
')') fold_state = 1;
3942 else if (c1==
'*') fold_state = 1;
3943 else if (c1==
'+') fold_state = 1;
3944 else if (c1==
',') fold_state = 1;
3960 switch(fold_state) {
3995 if (c1 == (0xde&0x7f)) {
3999 }
else if (c1 == (0xdf&0x7f) &&
ev[(
z_prev1-
SP)*2]) {
4019 (*o_zconv)(
cv[(c1-
SP)*2],
cv[(c1-
SP)*2+1]);
4030 if (
alpha_f&1 && c2 == 0x23) {
4033 }
else if (c2 == 0x21) {
4044 }
else if (
alpha_f&1 && 0x20<c1 && c1<0x7f &&
fv[c1-0x20]) {
4052 const char *entity = 0;
4054 case '>': entity =
">";
break;
4055 case '<': entity =
"<";
break;
4056 case '\"': entity =
""";
break;
4057 case '&': entity =
"&";
break;
4060 while (*entity) (*o_zconv)(0, *entity++);
4107 }
else if (c2 == 0x25) {
4109 static const int fullwidth_to_halfwidth[] =
4111 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4112 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4113 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4114 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4115 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4116 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4117 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4118 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4119 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4120 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4121 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4122 0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4124 if (fullwidth_to_halfwidth[c1-0x20]){
4125 c2 = fullwidth_to_halfwidth[c1-0x20];
4142 #define rot13(c) ( \
4144 (c <= 'M') ? (c + 13): \
4145 (c <= 'Z') ? (c - 13): \
4147 (c <= 'm') ? (c + 13): \
4148 (c <= 'z') ? (c - 13): \
4152 #define rot47(c) ( \
4154 ( c <= 'O') ? (c + 47) : \
4155 ( c <= '~') ? (c - 47) : \
4168 (*o_rot_conv)(c2,c1);
4176 if (0x20 < c1 && c1 < 0x74) {
4178 (*o_hira_conv)(c2,c1);
4183 (*o_hira_conv)(c2,c1);
4186 }
else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4188 (*o_hira_conv)(c2,c1);
4196 }
else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4198 }
else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4202 (*o_hira_conv)(c2,c1);
4209 #define RANGE_NUM_MAX 18
4233 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4237 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4243 start = range[
i][0];
4246 if (c >= start && c <= end) {
4251 (*o_iso2022jp_check_conv)(c2,c1);
4258 (
const unsigned char *)
"\075?EUC-JP?B?",
4259 (
const unsigned char *)
"\075?SHIFT_JIS?B?",
4260 (
const unsigned char *)
"\075?ISO-8859-1?Q?",
4261 (
const unsigned char *)
"\075?ISO-8859-1?B?",
4262 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4263 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4264 (
const unsigned char *)
"\075?ISO-2022-JP?Q?",
4266 (
const unsigned char *)
"\075?UTF-8?B?",
4267 (
const unsigned char *)
"\075?UTF-8?Q?",
4269 (
const unsigned char *)
"\075?US-ASCII?Q?",
4277 #if defined(UTF8_INPUT_ENABLE)
4285 #if defined(UTF8_INPUT_ENABLE)
4293 'B',
'B',
'Q',
'B',
'B',
'B',
'Q',
4294 #if defined(UTF8_INPUT_ENABLE)
4304 #define MIME_BUF_SIZE (1024)
4305 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
4306 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
4315 #define MAXRECOVER 20
4334 (*i_mungetc_buf)(c,
f);
4392 if (c==
'=' && d==
'?') {
4400 if (!( (c==
'+'||c==
'/'|| c==
'=' || c==
'?' ||
is_alnum(c))))
4419 const unsigned char *
p,*q;
4425 p = mime_pattern[j];
4428 for(i=2;p[
i]>
SP;i++) {
4432 while (mime_pattern[++j]) {
4433 p = mime_pattern[j];
4435 if (p[k]!=q[k])
break;
4438 p = mime_pattern[j];
4481 if (c1==
LF||c1==
SP||c1==
CR||
4482 c1==
'-'||c1==
'_'||
is_alnum(c1))
continue;
4493 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4494 if (c1==
'b'||c1==
'B') {
4496 }
else if (c1==
'q'||c1==
'Q') {
4502 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4534 fprintf(stderr,
"%s\n", str ? str :
"NULL");
4578 #if !defined(PERL_XS) && !defined(WIN32DLL)
4582 if (filename !=
NULL) printf(
"%s: ", filename);
4590 printf(
"%s%s%s%s\n",
4656 #ifdef NUMCHAR_OPTION
4672 if (buf[i] ==
'x' || buf[i] ==
'X'){
4673 for (j = 0; j < 7; j++){
4685 for (j = 0; j < 8; j++){
4718 #ifdef UNICODE_NORMALIZATION
4726 const unsigned char *array;
4727 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4730 if (c ==
EOF || c > 0xFF || (c & 0xc0) == 0x80)
return c;
4734 while (lower <= upper) {
4735 int mid = (lower+upper) / 2;
4738 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4743 lower = 1, upper = 0;
4749 if (array[len] <
nkf_buf_at(buf, len)) lower = mid + 1;
4750 else upper = mid - 1;
4759 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[
i]; i++)
4764 }
while (lower <= upper);
4787 }
else if (c ==
'_') {
4792 }
else if (c >
'/') {
4794 }
else if (c ==
'+' || c ==
'-') {
4806 nkf_char t1, t2, t3, t4, mode, exit_mode;
4829 if (c1<=
SP ||
DEL<=c1) {
4843 lwsp_buf =
nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4873 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4874 if (lwsp_count++>lwsp_size){
4876 lwsp_buf_new =
nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4877 lwsp_buf = lwsp_buf_new;
4883 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4885 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4892 if (c1==
'='&&c2<
SP) {
4894 if (c1 ==
EOF)
return (
EOF);
4897 goto restart_mime_q;
4905 if (c2<=
SP)
return c2;
4939 if ((c1 ==
'?') && (c2 ==
'=')) {
4942 lwsp_buf =
nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4975 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4976 if (lwsp_count++>lwsp_size){
4978 lwsp_buf_new =
nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4979 lwsp_buf = lwsp_buf_new;
4985 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4987 for(lwsp_count--;lwsp_count>0;lwsp_count--)
5019 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5022 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5025 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5036 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5038 #define MIMEOUT_BUF_LENGTH 74
5049 const unsigned char *
p;
5052 p = mime_pattern[0];
5053 for(i=0;mime_pattern[
i];i++) {
5054 if (mode == mime_encode[i]) {
5055 p = mime_pattern[
i];
5098 (*o_base64conv)(
EOF,0);
5100 (*o_base64conv)(0,
SP);
5105 (*o_base64conv)(
EOF,0);
5107 (*o_base64conv)(0,
SP);
5116 (*o_base64conv)(
EOF,0);
5118 (*o_base64conv)(0,
SP);
5142 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4)]);
5148 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2)]);
5171 (*o_mputc)(
bin2hex(((c>>4)&0xf)));
5181 (*o_mputc)(basis_64[c>>2]);
5186 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5192 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5193 (*o_mputc)(basis_64[c & 0x3F]);
5213 if (c!=
CR && c!=
LF) {
5276 if (c ==
CR || c ==
LF) {
5281 }
else if (c <=
SP) {
5320 if (c==
CR || c==
LF) {
5347 static const char *str =
"boundary=\"";
5348 static int len = 10;
5368 for (j = 0; j <=
i; ++j) {
5386 if (lastchar==
CR || lastchar ==
LF){
5407 if (lastchar ==
CR || lastchar ==
LF){
5483 (*o_base64conv)(c2,c1);
5487 typedef struct nkf_iconv_t {
5490 size_t input_buffer_size;
5491 char *output_buffer;
5492 size_t output_buffer_size;
5496 nkf_iconv_new(
char *tocode,
char *fromcode)
5498 nkf_iconv_t converter;
5501 converter->input_buffer =
nkf_xmalloc(converter->input_buffer_size);
5502 converter->output_buffer_size =
IOBUF_SIZE * 2;
5503 converter->output_buffer =
nkf_xmalloc(converter->output_buffer_size);
5504 converter->cd = iconv_open(tocode, fromcode);
5505 if (converter->cd == (iconv_t)-1)
5509 perror(fprintf(
"iconv doesn't support %s to %s conversion.", fromcode, tocode));
5512 perror(
"can't iconv_open");
5518 nkf_iconv_convert(nkf_iconv_t *converter,
FILE *
input)
5520 size_t invalid = (size_t)0;
5521 char *input_buffer = converter->input_buffer;
5522 size_t input_length = (size_t)0;
5523 char *output_buffer = converter->output_buffer;
5524 size_t output_length = converter->output_buffer_size;
5530 input_buffer[input_length++] = c;
5531 if (input_length < converter->input_buffer_size)
break;
5535 size_t ret =
iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5536 while (output_length-- > 0) {
5537 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5539 if (ret == (
size_t) - 1) {
5542 if (input_buffer != converter->input_buffer)
5543 memmove(converter->input_buffer, input_buffer, input_length);
5546 converter->output_buffer_size *= 2;
5547 output_buffer =
realloc(converter->outbuf, converter->output_buffer_size);
5548 if (output_buffer ==
NULL) {
5549 perror(
"can't realloc");
5552 converter->output_buffer = output_buffer;
5555 perror(
"can't iconv");
5568 nkf_iconv_close(nkf_iconv_t *convert)
5572 iconv_close(converter->cd);
5601 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5604 #ifdef UTF8_INPUT_ENABLE
5611 #ifdef UTF8_OUTPUT_ENABLE
5615 #ifdef UNICODE_NORMALIZATION
5631 #ifdef SHIFTJIS_CP932
5641 for (i = 0; i < 256; i++){
5687 input_encoding =
NULL;
5688 output_encoding =
NULL;
5699 if (!output_encoding) {
5702 if (!output_encoding) {
5766 #ifdef NUMCHAR_OPTION
5772 #ifdef UNICODE_NORMALIZATION
5786 if (input_encoding) {
5805 #if !defined(PERL_XS) && !defined(WIN32DLL)
5820 #define NEXT continue
5821 #define SKIP c2=0;continue
5822 #define MORE c2=c1;continue
5823 #define SEND (void)0
5825 #define set_input_mode(mode) do { \
5826 input_mode = mode; \
5828 set_input_codename("ISO-2022-JP"); \
5829 debug("ISO-2022-JP"); \
5838 int is_8bit =
FALSE;
5848 #if !defined(PERL_XS) && !defined(WIN32DLL)
5849 fprintf(stderr,
"no output encoding given\n");
5855 #ifdef UTF8_INPUT_ENABLE
5907 #ifdef INPUT_CODE_FIX
5908 if (!input_encoding)
5950 0xA1 <= c1 && c1 <= 0xDF) {
5955 }
else if (c1 >
DEL) {
5966 else if ((
iconv ==
s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5978 }
else if (
SP < c1 && c1 <
DEL) {
6002 }
else if (c1 ==
'?') {
6032 else if (c1 ==
'&') {
6040 else if (c1 ==
'$') {
6047 }
else if (c1 ==
'@' || c1 ==
'B') {
6051 }
else if (c1 ==
'(') {
6060 }
else if (c1 ==
'@'|| c1 ==
'B') {
6065 }
else if (c1 ==
'D'){
6069 }
else if (c1 ==
'O' || c1 ==
'Q'){
6072 }
else if (c1 ==
'P'){
6094 }
else if (c1 ==
'(') {
6102 else if (c1 ==
'I') {
6108 else if (c1 ==
'B' || c1 ==
'J' || c1 ==
'H') {
6123 else if (c1 ==
'.') {
6128 else if (c1 ==
'A') {
6139 else if (c1 ==
'N') {
6162 }
else if (c1 ==
'$') {
6166 }
else if ((
'E' <= c1 && c1 <=
'G') ||
6167 (
'O' <= c1 && c1 <=
'Q')) {
6175 static const nkf_char jphone_emoji_first_table[7] =
6176 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6179 while (
SP <= c1 && c1 <=
'z') {
6180 (*oconv)(0, c1 + c3);
6196 }
else if (c1 ==
LF || c1 ==
CR) {
6215 }
else if (c1 ==
LF && (c1=(*
i_getc)(f))!=
EOF && c1 ==
SP) {
6235 switch ((*
iconv)(c2, c1, 0)) {
6243 (*iconv)(c2, c1, c3|c4);
6268 if ((*
iconv)(c2, c1, c3) == -3) {
6300 0x7F <= c2 && c2 <= 0x92 &&
6301 0x21 <= c1 && c1 <= 0x7E) {
6328 (*iconv)(
EOF, 0, 0);
6359 unsigned char *cp_back =
NULL;
6364 while(*cp && *cp++!=
'-');
6365 while (*cp || cp_back) {
6374 if (!*cp || *cp ==
SP) {
6380 for (j=0;*p && *p !=
'=' && *p == cp[j];p++, j++);
6381 if (*p == cp[j] || cp[j] ==
SP){
6388 #if !defined(PERL_XS) && !defined(WIN32DLL)
6389 fprintf(stderr,
"unknown long option: --%s\n", cp);
6393 while(*cp && *cp !=
SP && cp++);
6407 input_encoding = enc;
6414 output_encoding = enc;
6418 if (p[0] ==
'0' || p[0] ==
'1') {
6426 if (strcmp(
long_option[i].name,
"overwrite") == 0){
6432 if (strcmp(
long_option[i].name,
"overwrite=") == 0){
6440 if (strcmp(
long_option[i].name,
"in-place") == 0){
6446 if (strcmp(
long_option[i].name,
"in-place=") == 0){
6456 if (strcmp(
long_option[i].name,
"cap-input") == 0){
6460 if (strcmp(
long_option[i].name,
"url-input") == 0){
6465 #ifdef NUMCHAR_OPTION
6466 if (strcmp(
long_option[i].name,
"numchar-input") == 0){
6472 if (strcmp(
long_option[i].name,
"no-output") == 0){
6482 #ifdef SHIFTJIS_CP932
6486 #ifdef UTF8_OUTPUT_ENABLE
6491 if (strcmp(
long_option[i].name,
"no-cp932") == 0){
6492 #ifdef SHIFTJIS_CP932
6496 #ifdef UTF8_OUTPUT_ENABLE
6501 #ifdef SHIFTJIS_CP932
6502 if (strcmp(
long_option[i].name,
"cp932inv") == 0){
6520 if (strcmp(
long_option[i].name,
"exec-out") == 0){
6525 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6526 if (strcmp(
long_option[i].name,
"no-cp932ext") == 0){
6530 if (strcmp(
long_option[i].name,
"no-best-fit-chars") == 0){
6554 if (strcmp(
long_option[i].name,
"fb-subchar") == 0){
6558 if (strcmp(
long_option[i].name,
"fb-subchar=") == 0){
6567 }
else if(p[1] ==
'x' || p[1] ==
'X'){
6585 #ifdef UTF8_OUTPUT_ENABLE
6586 if (strcmp(
long_option[i].name,
"ms-ucs-map") == 0){
6591 #ifdef UNICODE_NORMALIZATION
6592 if (strcmp(
long_option[i].name,
"utf8mac-input") == 0){
6605 #if !defined(PERL_XS) && !defined(WIN32DLL)
6606 fprintf(stderr,
"unsupported long option: --%s\n",
long_option[i].name);
6622 }
else if (*cp==
'2') {
6649 if (*cp==
'@'||*cp==
'B')
6654 if (*cp==
'J'||*cp==
'B'||*cp==
'H')
6662 if (
'9'>= *cp && *cp>=
'0')
6670 #if defined(MSDOS) || defined(__OS2__)
6685 #ifdef UTF8_OUTPUT_ENABLE
6698 if (
'1'== cp[0] &&
'6'==cp[1]) {
6701 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6712 }
else if (cp[0] ==
'B') {
6719 enc_idx = enc_idx ==
UTF_16
6723 enc_idx = enc_idx ==
UTF_16
6731 #ifdef UTF8_INPUT_ENABLE
6738 if (
'1'== cp[0] &&
'6'==cp[1]) {
6742 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6753 }
else if (cp[0] ==
'B') {
6757 enc_idx = (enc_idx ==
UTF_16
6782 while (
'0'<= *cp && *cp <=
'4') {
6783 alpha_f |= 1 << (*cp++ -
'0');
6808 while(
'0'<= *cp && *cp <=
'9') {
6817 while(
'0'<= *cp && *cp <=
'9') {
6825 if (*cp==
'B'||*cp==
'Q') {
6828 }
else if (*cp==
'N') {
6830 }
else if (*cp==
'S') {
6832 }
else if (*cp==
'0') {
6843 }
else if (*cp==
'Q') {
6855 if (
'9'>= *cp && *cp>=
'0')
6877 }
else if (*cp==
'm') {
6879 }
else if (*cp==
'w') {
6881 }
else if (*cp==
'0') {
6887 if (
'2' <= *cp && *cp <=
'9') {
6890 }
else if (*cp ==
'0' || *cp ==
'1') {
6900 while(*cp && *cp++!=
'-');
6903 #if !defined(PERL_XS) && !defined(WIN32DLL)
6904 fprintf(stderr,
"unknown option: -%c\n", *(cp-1));
6914 #include "nkf32dll.c"
6915 #elif defined(PERL_XS)
6923 char *outfname =
NULL;
6927 _BufferSize.y = 400;
6929 #ifdef DEFAULT_CODE_LOCALE
6930 setlocale(LC_CTYPE,
"");
6934 for (argc--,argv++; (argc > 0) && **argv ==
'-'; argc--, argv++) {
6935 cp = (
unsigned char *)*argv;
6940 if (pipe(fds) < 0 || (pid = fork()) < 0){
6951 execvp(argv[1], &argv[1]);
6971 int exec_f_back = exec_f;
6985 exec_f = exec_f_back;
6992 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6993 if (freopen(
"",
"wb",stdout) ==
NULL)
7000 setbuf(stdout, (
char *)
NULL);
7006 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7007 if (freopen(
"",
"rb",stdin) ==
NULL)
return (-1);
7020 int is_argument_error =
FALSE;
7027 if ((fin = fopen((origfname = *argv++),
"r")) ==
NULL) {
7029 is_argument_error =
TRUE;
7042 +
strlen(
".nkftmpXXXXXX")
7044 strcpy(outfname, origfname);
7048 for (i =
strlen(outfname);
i; --
i){
7049 if (outfname[i - 1] ==
'/'
7050 || outfname[i - 1] ==
'\\'){
7056 strcat(outfname,
"ntXXXXXX");
7058 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7059 S_IREAD | S_IWRITE);
7061 strcat(outfname,
".nkftmpXXXXXX");
7062 fd = mkstemp(outfname);
7065 || (fd_backup = dup(
fileno(stdout))) < 0
7077 outfname =
"nkf.out";
7080 if(freopen(outfname,
"w", stdout) ==
NULL) {
7085 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7086 if (freopen(
"",
"wb",stdout) ==
NULL)
7094 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7095 if (freopen(
"",
"rb",fin) ==
NULL)
7104 char *filename =
NULL;
7106 if (nfiles > 1) filename = origfname;
7113 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7124 if (
stat(origfname, &sb)) {
7125 fprintf(stderr,
"Can't stat %s\n", origfname);
7128 if (chmod(outfname, sb.st_mode)) {
7129 fprintf(stderr,
"Can't set permission %s\n", outfname);
7134 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7135 tb[0] = tb[1] = sb.st_mtime;
7136 if (
utime(outfname, tb)) {
7137 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
7142 if (
utime(outfname, &tb)) {
7143 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
7150 unlink(backup_filename);
7152 if (rename(origfname, backup_filename)) {
7153 perror(backup_filename);
7154 fprintf(stderr,
"Can't rename %s to %s\n",
7155 origfname, backup_filename);
7160 if (unlink(origfname)){
7165 if (rename(outfname, origfname)) {
7167 fprintf(stderr,
"Can't rename %s to %s\n",
7168 outfname, origfname);
7175 if (is_argument_error)
7180 scanf(
"%d",&end_check);
#define nkf_char_unicode_new(c)
static int x0213_combining_p(nkf_char wc)
const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars]
#define OUTPUT_UTF16(val)
#define output_ascii_escape_sequence(mode)
static nkf_char nkf_buf_at(nkf_buf_t *buf, int index)
static void status_check(struct input_code *ptr, nkf_char c)
static nkf_char hold_buf[HOLD_SIZE *2]
static void output_escape_sequence(int mode)
#define OUTPUT_UTF16_BYTES(c1, c2)
nkf_native_encoding NkfEncodingUTF_32
static nkf_char mime_begin(FILE *f)
static void set_iconv(nkf_char f, nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
static void encode_fallback_xml(nkf_char c)
#define NKF_ICONV_INVALID_CODE_RANGE
const unsigned short *const x0212_shiftjis[]
static void * nkf_xmalloc(size_t size)
static nkf_char url_ungetc(nkf_char c, FILE *f)
size_t strlen(const char *)
static size_t unicode_iconv(nkf_char wc, int nocombine)
static size_t nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static nkf_encoding * nkf_utf8_encoding()
static nkf_char nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static const nkf_char score_table_8FF0[]
#define NKF_ICONV_WAIT_COMBINING_CHAR
static nkf_char std_getc(FILE *f)
const unsigned short *const *const utf8_to_euc_3bytes_932[]
static nkf_char mime_ungetc_buf(nkf_char c, FILE *f)
static nkf_char(* i_cungetc)(nkf_char c, FILE *f)
static void(* o_mputc)(nkf_char c)
static void nkf_buf_push(nkf_buf_t *buf, nkf_char c)
static size_t nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
#define nkf_enc_asciicompat(enc)
#define nkf_enc_name(enc)
nkf_native_encoding NkfEncodingASCII
static void eof_mime(void)
static nkf_char(* i_mungetc)(nkf_char c, FILE *f)
static nkf_char e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
static void s_status(struct input_code *, nkf_char)
static int h_conv(FILE *f, nkf_char c1, nkf_char c2)
static void e_oconv(nkf_char c2, nkf_char c1)
static nkf_char base64decode(nkf_char c)
static void(* o_iso2022jp_check_conv)(nkf_char c2, nkf_char c1)
static void(* o_eol_conv)(nkf_char c2, nkf_char c1)
static nkf_char(* iconv_for_check)(nkf_char c2, nkf_char c1, nkf_char c0)=0
static void j_oconv(nkf_char c2, nkf_char c1)
const unsigned short *const utf8_to_euc_2bytes_932[]
static void base64_conv(nkf_char c2, nkf_char c1)
SSL_METHOD *(* func)(void)
#define nkf_enc_to_iconv(enc)
nkf_encoding nkf_encoding_table[]
const unsigned short cp932inv[2][189]
static char * backup_suffix
#define nkf_char_unicode_p(c)
static const struct @8 long_option[]
static size_t nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
static const unsigned char ev_x0213[]
static void nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
static void status_push_ch(struct input_code *ptr, nkf_char c)
static void status_reinit(struct input_code *ptr)
#define UTF16_TO_UTF32(lead, trail)
static nkf_char numchar_getc(FILE *f)
#define nkf_char_unicode_value_p(c)
static void w_oconv(nkf_char c2, nkf_char c1)
static const char * input_codename
static nkf_char mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
static int is_x0213_2_in_x0212(nkf_char c1)
static struct @9 mime_input_state
static int nkf_enc_find_index(const char *name)
#define nkf_buf_length(buf)
static nkf_char unicode_iconv_combine(nkf_char wc, nkf_char wc2)
static const char * nkf_locale_charmap()
static int kanji_convert(FILE *f)
static nkf_encoding * nkf_locale_encoding()
static void switch_mime_getc(void)
const unsigned short *const euc_to_utf8_2bytes[]
#define nkf_noescape_mime(c)
static nkf_char(* i_ugetc)(FILE *)
#define nkf_char_unicode_bmp_p(c)
const nkf_native_encoding * base_encoding
static nkf_char mime_getc_buf(FILE *f)
static char * get_backup_filename(const char *suffix, const char *filename)
#define DEFAULT_CODE_LOCALE
static void no_connection(nkf_char c2, nkf_char c1)
static void status_clear(struct input_code *ptr)
static const unsigned char dv[]
#define MIME_DECODE_DEFAULT
static const nkf_char mime_encode_method[]
static void print_guessed_code(char *filename)
static unsigned char ascii_intro
#define NKF_ICONV_NOT_COMBINED
RUBY_EXTERN void * memmove(void *, const void *, size_t)
static void encode_fallback_subchar(nkf_char c)
static nkf_char(* iconv)(nkf_char c2, nkf_char c1, nkf_char c0)
static nkf_char x0212_shift(nkf_char c)
static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
static nkf_encoding * input_encoding
static unsigned char prefix_table[256]
static nkf_char(* mime_iconv_back)(nkf_char c2, nkf_char c1, nkf_char c0)
static void s_oconv(nkf_char c2, nkf_char c1)
static nkf_char(* i_getc)(FILE *f)
static nkf_char cap_getc(FILE *f)
static int mime_decode_mode
static nkf_char e2w_conv(nkf_char c2, nkf_char c1)
static nkf_char(* i_mgetc)(FILE *)
static void * nkf_xrealloc(void *ptr, size_t size)
static nkf_char w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
static unsigned char kanji_intro
const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3]
const unsigned short euc_to_utf8_1byte[]
static void w_oconv32(nkf_char c2, nkf_char c1)
static nkf_char(* i_uungetc)(nkf_char c, FILE *f)
struct @7 encoding_name_to_id_table[]
static void set_code_score(struct input_code *ptr, nkf_char score)
static void set_input_encoding(nkf_encoding *enc)
static int fold_preserve_f
static void nkf_state_init(void)
static const nkf_char mime_encode[]
struct input_code input_code_list[]
#define NKF_ICONV_NEED_TWO_MORE_BYTES
static void oconv_newline(void(*func)(nkf_char, nkf_char))
static nkf_char mime_integrity(FILE *f, const unsigned char *p)
#define nkf_enc_unicode_p(enc)
static size_t nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static const nkf_char score_table_F0[]
static void encode_fallback_java(nkf_char c)
#define nkf_buf_empty_p(buf)
nkf_native_encoding NkfEncodingISO_2022_JP
static int unicode_to_jis_common2(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
static nkf_char mime_getc(FILE *f)
static void mimeout_addchar(nkf_char c)
static nkf_char push_hold_buf(nkf_char c2)
static void iso2022jp_check_conv(nkf_char c2, nkf_char c1)
static void put_newline(void(*func)(nkf_char))
static int x0213_wait_combining_p(nkf_char wc)
#define set_input_mode(mode)
static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
const unsigned short *const euc_to_utf8_2bytes_ms[]
#define range(low, item, hi)
static const char * get_guessed_code(void)
static unsigned char stdobuf[IOBUF_SIZE]
static nkf_char(* i_nungetc)(nkf_char c, FILE *f)
static nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
static nkf_char(* i_bgetc)(FILE *)
static const nkf_char score_table_8FA0[]
static nkf_char(* i_mungetc_buf)(nkf_char c, FILE *f)
static nkf_char cap_ungetc(nkf_char c, FILE *f)
#define is_ibmext_in_sjis(c2)
const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3]
static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
static void code_score(struct input_code *ptr)
static nkf_char std_ungetc(nkf_char c, FILE *f)
static struct input_code * find_inputcode_byfunc(nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
static void set_input_codename(const char *codename)
static int preserve_time_f
static void check_bom(FILE *f)
static void encode_fallback_html(nkf_char c)
static nkf_char nfc_getc(FILE *f)
static nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
static nkf_char(* i_mgetc_buf)(FILE *)
static nkf_char broken_ungetc(nkf_char c, FILE *f)
const unsigned short *const *const utf8_to_euc_3bytes_x0213[]
#define UTF8_INPUT_ENABLE
static const char basis_64[]
static void hira_conv(nkf_char c2, nkf_char c1)
const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3]
static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
unsigned char buf[MIME_BUF_SIZE]
#define mime_input_buf(n)
static int nkf_str_caseeql(const char *src, const char *target)
static nkf_char url_getc(FILE *f)
#define nkf_enc_to_index(enc)
static const nkf_char score_table_8FE0[]
static int options(unsigned char *cp)
static nkf_char noconvert(FILE *f)
static void(* oconv)(nkf_char c2, nkf_char c1)
static nkf_char nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
static void mime_input_buf_unshift(nkf_char c)
static void show_configuration(void)
static void set_output_encoding(nkf_encoding *enc)
nkf_native_encoding NkfEncodingUTF_16
const unsigned short *const utf8_to_euc_2bytes_ms[]
static void shift(struct cparse_params *v, long act, VALUE tok, VALUE val)
static void(* o_hira_conv)(nkf_char c2, nkf_char c1)
static nkf_char(* i_bungetc)(nkf_char c, FILE *f)
static struct @10 mimeout_state
#define nkf_byte_jisx0201_katakana_p(c)
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
const unsigned short *const x0212_to_utf8_2bytes[]
const unsigned short *const euc_to_utf8_2bytes_x0213[]
static void z_conv(nkf_char c2, nkf_char c1)
static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
#define X0213_SURROGATE_FIND(tbl, size, euc)
const unsigned short shiftjis_x0212[3][189]
static void w_status(struct input_code *, nkf_char)
static void(* o_rot_conv)(nkf_char c2, nkf_char c1)
#define setvbuffer(fp, buf, size)
static void eol_conv(nkf_char c2, nkf_char c1)
static void mime_putc(nkf_char c)
static nkf_char(* i_ngetc)(FILE *)
static const nkf_char score_table_A0[]
int utime(const char *filename, const struct utimbuf *times)
const struct normalization_pair normalization_table[]
static void(* o_base64conv)(nkf_char c2, nkf_char c1)
static nkf_encoding * nkf_enc_find(const char *name)
const unsigned short *const euc_to_utf8_2bytes_mac[]
const unsigned short *const utf8_to_euc_2bytes[]
static const unsigned char cv[]
static nkf_char utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static const unsigned char fv[]
static nkf_char unicode_subchar
static int module_connection(void)
static void rot_conv(nkf_char c2, nkf_char c1)
static void debug(const char *str)
static void(* o_zconv)(nkf_char c2, nkf_char c1)
static size_t nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static nkf_char(* i_nfc_getc)(FILE *)
const unsigned short shiftjis_cp932[3][189]
static const unsigned char ev[]
#define char_size(c2, c1)
static void(* o_putc)(nkf_char c)
static nkf_char nfc_ungetc(nkf_char c, FILE *f)
nkf_native_encoding NkfEncodingShift_JIS
static nkf_char(* i_nfc_ungetc)(nkf_char c, FILE *f)
static void(* encode_fallback)(nkf_char c)
static nkf_buf_t * nkf_buf_new(int length)
#define UTF8_OUTPUT_ENABLE
#define nkf_enc_cp5022x_p(enc)
static nkf_char(* i_ungetc)(nkf_char c, FILE *f)
static unsigned char stdibuf[IOBUF_SIZE]
static nkf_char x0212_unshift(nkf_char c)
int main(int argc, char **argv)
static void w_oconv16(nkf_char c2, nkf_char c1)
#define assert(condition)
static void e_status(struct input_code *, nkf_char)
static void unswitch_mime_getc(void)
static nkf_char w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
RUBY_EXTERN int dup2(int, int)
const unsigned short *const *const utf8_to_euc_3bytes[]
static void mime_prechar(nkf_char c2, nkf_char c1)
const unsigned short *const x0212_to_utf8_2bytes_x0213[]
static nkf_char hex_getc(nkf_char ch, FILE *f, nkf_char(*g)(FILE *f), nkf_char(*u)(nkf_char c, FILE *f))
static int unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
static void clr_code_score(struct input_code *ptr, nkf_char score)
const unsigned short *const utf8_to_euc_2bytes_mac[]
static void version(void)
#define nkf_enc_to_oconv(enc)
#define MIMEOUT_BUF_LENGTH
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
nkf_native_encoding NkfEncodingUTF_8
static nkf_encoding * output_encoding
static nkf_char(* i_cgetc)(FILE *)
static nkf_char numchar_ungetc(nkf_char c, FILE *f)
static void(* o_fconv)(nkf_char c2, nkf_char c1)
static nkf_encoding * nkf_default_encoding()
static nkf_char no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
static const unsigned char * mime_pattern[]
static void nkf_buf_clear(nkf_buf_t *buf)
static nkf_state_t * nkf_state
static void open_mime(nkf_char mode)
static int no_best_fit_chars_f
static nkf_char nkf_buf_pop(nkf_buf_t *buf)
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
static ULONG(STDMETHODCALLTYPE AddRef)(IDispatch __RPC_FAR *This)
static void fold_conv(nkf_char c2, nkf_char c1)
static void encode_fallback_perl(nkf_char c)
static nkf_char mime_begin_strict(FILE *f)
const unsigned short *const utf8_to_euc_2bytes_x0213[]
static void no_putc(nkf_char c)
static void status_reset(struct input_code *ptr)
static void close_mime(void)
static nkf_char broken_getc(FILE *f)
static void code_status(nkf_char c)
static void std_putc(nkf_char c)
static void nkf_each_char_to_hex(void(*f)(nkf_char c2, nkf_char c1), nkf_char c)
static nkf_encoding * nkf_enc_from_index(int idx)
static void status_disable(struct input_code *ptr)
nkf_native_encoding NkfEncodingEUC_JP