Character Set Conversion

Name

Character Set Conversion -- 

Synopsis


#include <glib.h>


gchar*      g_convert                       (const gchar *str,
                                             gssize len,
                                             const gchar *to_codeset,
                                             const gchar *from_codeset,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);
gchar*      g_convert_with_fallback         (const gchar *str,
                                             gssize len,
                                             const gchar *to_codeset,
                                             const gchar *from_codeset,
                                             gchar *fallback,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);
gchar*      g_convert_with_iconv            (const gchar *str,
                                             gssize len,
                                             GIConv converter,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);
#define     G_CONVERT_ERROR
GIConv      g_iconv_open                    (const gchar *to_codeset,
                                             const gchar *from_codeset);
size_t      g_iconv                         (GIConv converter,
                                             gchar **inbuf,
                                             gsize *inbytes_left,
                                             gchar **outbuf,
                                             gsize *outbytes_left);
gint        g_iconv_close                   (GIConv converter);
gchar*      g_locale_to_utf8                (const gchar *opsysstring,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);
gchar*      g_filename_to_utf8              (const gchar *opsysstring,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);
gchar*      g_filename_from_utf8            (const gchar *utf8string,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);
gchar*      g_filename_from_uri             (const char *uri,
                                             char **hostname,
                                             GError **error);
gchar*      g_filename_to_uri               (const char *filename,
                                             char *hostname,
                                             GError **error);
gchar*      g_locale_from_utf8              (const gchar *utf8string,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);
enum        GConvertError;

Description

Details

g_convert ()

gchar*      g_convert                       (const gchar *str,
                                             gssize len,
                                             const gchar *to_codeset,
                                             const gchar *from_codeset,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);

Convert a string from one character set to another.

str : the string to convert
len : the length of the string
to_codeset : name of character set into which to convert str
from_codeset : character set of str.
bytes_read : location to store the number of bytes in the input string that were successfully converted, or NULL. Even if the conversion was succesful, this may be less than len if there were partial characters at the end of the input. If the error G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value stored will the byte fofset after the last valid input sequence.
bytes_written : the stored in the output buffer (not including the terminating nul.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : If the conversion was successful, a newly allocated NUL-terminated string, which must be freed with g_free. Otherwise NULL and error will be set.


g_convert_with_fallback ()

gchar*      g_convert_with_fallback         (const gchar *str,
                                             gssize len,
                                             const gchar *to_codeset,
                                             const gchar *from_codeset,
                                             gchar *fallback,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);

Convert a string from one character set to another, possibly including fallback sequences for characters not representable in the output. Note that it is not guaranteed that the specification for the fallback sequences in fallback will be honored. Some systems may do a approximate conversion from from_codeset to to_codeset in their iconv() functions, in which case GLib will simply return that approximate conversion.

str : the string to convert
len : the length of the string
to_codeset : name of character set into which to convert str
from_codeset : character set of str.
fallback : UTF-8 string to use in place of character not present in the target encoding. (This must be in the target encoding), if NULL, characters not in the target encoding will be represented as Unicode escapes \x{XXXX} or \x{XXXXXX}.
bytes_read : location to store the number of bytes in the input string that were successfully converted, or NULL. Even if the conversion was succesful, this may be less than len if there were partial characters at the end of the input.
bytes_written : the stored in the output buffer (not including the terminating nul.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : If the conversion was successful, a newly allocated NUL-terminated string, which must be freed with g_free. Otherwise NULL and error will be set.


g_convert_with_iconv ()

gchar*      g_convert_with_iconv            (const gchar *str,
                                             gssize len,
                                             GIConv converter,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);

Convert a string from one character set to another.

str : the string to convert
len : the length of the string
converter : conversion descriptor from g_iconv_open()
bytes_read : location to store the number of bytes in the input string that were successfully converted, or NULL. Even if the conversion was succesful, this may be less than len if there were partial characters at the end of the input. If the error G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value stored will the byte fofset after the last valid input sequence.
bytes_written : the stored in the output buffer (not including the terminating nul.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : If the conversion was successful, a newly allocated NUL-terminated string, which must be freed with g_free. Otherwise NULL and error will be set.


G_CONVERT_ERROR

#define G_CONVERT_ERROR g_convert_error_quark()

Error domain for character set conversions. Errors in this domain will be from the GConvertError enumeration. See GError for information on error domains.


g_iconv_open ()

GIConv      g_iconv_open                    (const gchar *to_codeset,
                                             const gchar *from_codeset);

Same as the standard UNIX routine iconv_open(), but may be implemented via libiconv on UNIX flavors that lack a native implementation.

GLib provides g_convert() and g_locale_to_utf8() which are likely more convenient than the raw iconv wrappers.

to_codeset : destination codeset
from_codeset : source codeset
Returns : a "conversion descriptor"


g_iconv ()

size_t      g_iconv                         (GIConv converter,
                                             gchar **inbuf,
                                             gsize *inbytes_left,
                                             gchar **outbuf,
                                             gsize *outbytes_left);

Same as the standard UNIX routine iconv(), but may be implemented via libiconv on UNIX flavors that lack a native implementation.

GLib provides g_convert() and g_locale_to_utf8() which are likely more convenient than the raw iconv wrappers.

converter : conversion descriptor from g_iconv_open()
inbuf : bytes to convert
inbytes_left : inout parameter, bytes remaining to convert in inbuf
outbuf : converted output bytes
outbytes_left : inout parameter, bytes available to fill in outbuf
Returns : count of non-reversible conversions, or -1 on error


g_iconv_close ()

gint        g_iconv_close                   (GIConv converter);

Same as the standard UNIX routine iconv_close(), but may be implemented via libiconv on UNIX flavors that lack a native implementation. Should be called to clean up the conversion descriptor from iconv_open() when you are done converting things.

GLib provides g_convert() and g_locale_to_utf8() which are likely more convenient than the raw iconv wrappers.

converter : a conversion descriptor from g_iconv_open()
Returns : -1 on error, 0 on success


g_locale_to_utf8 ()

gchar*      g_locale_to_utf8                (const gchar *opsysstring,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);

Converts a string which is in the encoding used for strings by the C runtime (usually the same as that used by the operating system) in the current locale into a UTF-8 string.

opsysstring : a string in the encoding of the current locale
len : the length of the string, or -1 if the string is NULL-terminated.
bytes_read : location to store the number of bytes in the input string that were successfully converted, or NULL. Even if the conversion was succesful, this may be less than len if there were partial characters at the end of the input. If the error G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value stored will the byte fofset after the last valid input sequence.
bytes_written : the stored in the output buffer (not including the terminating nul.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : The converted string, or NULL on an error.


g_filename_to_utf8 ()

gchar*      g_filename_to_utf8              (const gchar *opsysstring,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);

Converts a string which is in the encoding used for filenames into a UTF-8 string.

opsysstring : a string in the encoding for filenames
len : the length of the string, or -1 if the string is NULL-terminated.
bytes_read : location to store the number of bytes in the input string that were successfully converted, or NULL. Even if the conversion was succesful, this may be less than len if there were partial characters at the end of the input. If the error G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value stored will the byte fofset after the last valid input sequence.
bytes_written : the stored in the output buffer (not including the terminating nul.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : The converted string, or NULL on an error.


g_filename_from_utf8 ()

gchar*      g_filename_from_utf8            (const gchar *utf8string,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);

Converts a string from UTF-8 to the encoding used for filenames.

utf8string : a UTF-8 encoded string
len : the length of the string, or -1 if the string is NULL-terminated.
bytes_read : location to store the number of bytes in the input string that were successfully converted, or NULL. Even if the conversion was succesful, this may be less than len if there were partial characters at the end of the input. If the error G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value stored will the byte fofset after the last valid input sequence.
bytes_written : the stored in the output buffer (not including the terminating nul.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : The converted string, or NULL on an error.


g_filename_from_uri ()

gchar*      g_filename_from_uri             (const char *uri,
                                             char **hostname,
                                             GError **error);

Converts an escaped UTF-8 encoded URI to a local filename in the encoding used for filenames.

uri : a uri describing a filename (escaped, encoded in UTF-8)
hostname : Location to store hostname for the URI, or NULL. If there is no hostname in the URI, NULL will be stored in this location.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : a newly allocated string holding the resulting filename, or NULL on an error.


g_filename_to_uri ()

gchar*      g_filename_to_uri               (const char *filename,
                                             char *hostname,
                                             GError **error);

Converts an absolute filename to an escaped UTF-8 encoded URI.

filename : an absolute filename specified in the encoding used for filenames by the operating system.
hostname : A UTF-8 encoded hostname, or NULL for none.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : a newly allocated string holding the resulting URI, or NULL on an error.


g_locale_from_utf8 ()

gchar*      g_locale_from_utf8              (const gchar *utf8string,
                                             gssize len,
                                             gsize *bytes_read,
                                             gsize *bytes_written,
                                             GError **error);

Converts a string from UTF-8 to the encoding used for strings by the C runtime (usually the same as that used by the operating system) in the current locale.

utf8string : a UTF-8 encoded string
len : the length of the string, or -1 if the string is NULL-terminated.
bytes_read : location to store the number of bytes in the input string that were successfully converted, or NULL. Even if the conversion was succesful, this may be less than len if there were partial characters at the end of the input. If the error G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value stored will the byte fofset after the last valid input sequence.
bytes_written : the stored in the output buffer (not including the terminating nul.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError may occur.
Returns : The converted string, or NULL on an error.


enum GConvertError

typedef enum 
{
  G_CONVERT_ERROR_NO_CONVERSION,
  G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
  G_CONVERT_ERROR_FAILED,
  G_CONVERT_ERROR_PARTIAL_INPUT,
  G_CONVERT_ERROR_NOT_ABSOLUTE_FILE_URI,
  G_CONVERT_ERROR_INVALID_URI,
  G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
} GConvertError;