View Raw SPL
/*****************************************************************************
*                                                                            *
*   JIS2UTF8.SPL  Copyright (C) 2024 DSP Development Corporation             *
*                               All Rights Reserved                          *
*                                                                            *
*   Author:       Randy Race                                                 *
*                                                                            *
*   Synopsis:     Converts Japanese Shift-JIS to UTF8                        *
*                                                                            *
*   Revisions:    21 Jul 2024  RRR  Creation                                 *
*                                                                            *
*****************************************************************************/


#if @HELP_JIS2UTF8

    JIS2UTF8

    Purpose: Converts Japanese Shift-JIS file to UTF-8 file.

    Syntax:  JIS2UTF8(infile, outfile, overwrite)

                infile - A string. The Shift-JIS file to convert.

               outfile - Optional. A string, the name of the output
                         file. Defaults to infile.

              overwrite - Optional. An integer, the overwrite flag if
                          outfile already exists.

                            0: prompt before overwriting file (default)

                            1: overwrite file if it exists without prompting

           

    Returns: An integer, the number of bytes converted. Each Shift-JIS charcter
             is converted to in the source file is converted to UTF-8 encoding.

    Example:
             jis2utf8("myfile.txt")

             converts "myfile.txt" to UTF8.

    Remarks:
             Shift-JIS (also written as Shift_JIS or SJIS) is a character
             encoding system designed to represent Japanese text on computers.
             It was developed in the 1980s by ASCII Corporation and Microsoft,
             based on the Japanese Industrial Standards JIS X 0201 and JIS X 0208.

             UTF-8 is the newer, dominant character encoding used across the
             internet and modern computing.      

    See Also:
             charstrsutf16
             filestrrep
             strchars
             strrep
#endif


/* convert unix linefeeds to CR-LF pair */
jis2utf8(fname = "", outname = fname, overwrite = 0)
{
        local bytes, s, status;

        if (not(isstring(fname)) || strlen(fname) == 0)
        {
                error(sprintf("%s - filename required", __FUNC__));
        }

        /* read as bytes */
        bytes = readb(fname, ubyte);

        /* convert to utf16 using 932 codepage */
        bytes = charstrsutf16(strchars(bytes), 932);

        /* convert to utf8 string */
        s = strcharsutf16(bytes);

        bytes = charstrs(s);

        writeb(outname, UBYTE, overwrite, bytes);

        return(length(bytes));
}