/*
-Copyright (C) 2001-2006, William Joseph.
-All Rights Reserved.
+ Copyright (C) 2001-2006, William Joseph.
+ All Rights Reserved.
-This file is part of GtkRadiant.
+ This file is part of GtkRadiant.
-GtkRadiant is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ GtkRadiant is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
-GtkRadiant is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+ GtkRadiant is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with GtkRadiant; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-*/
+ You should have received a copy of the GNU General Public License
+ along with GtkRadiant; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
-#if !defined(INCLUDED_CONVERT_H)
+#if !defined( INCLUDED_CONVERT_H )
#define INCLUDED_CONVERT_H
/// \file
#include "debugging/debugging.h"
#include <algorithm>
-#include <glib/gunicode.h>
-#include <glib/gconvert.h>
+#include <glib.h>
#include "character.h"
/// \brief Returns the number of bytes required to represent \p character in UTF-8 encoding.
-inline std::size_t utf8_character_length(const char* character)
-{
- if((*character & 0xE0) == 0xC0) // 110xxxxx
- {
- return 2;
- }
- else if((*character & 0xF0) == 0xE0) // 1110xxxx
- {
- return 3;
- }
- else if((*character & 0xF8) == 0xF0) // 11110xxx
- {
- return 4;
- }
- else if((*character & 0xFC) == 0xF8) // 111110xx
- {
- return 5;
- }
- else if((*character & 0xFE) == 0xFC) // 1111110x
- {
- return 6;
- }
- ERROR_MESSAGE("");
- return 0;
+inline std::size_t utf8_character_length( const char* character ){
+ if ( ( *character & 0xE0 ) == 0xC0 ) { // 110xxxxx
+ return 2;
+ }
+ else if ( ( *character & 0xF0 ) == 0xE0 ) { // 1110xxxx
+ return 3;
+ }
+ else if ( ( *character & 0xF8 ) == 0xF0 ) { // 11110xxx
+ return 4;
+ }
+ else if ( ( *character & 0xFC ) == 0xF8 ) { // 111110xx
+ return 5;
+ }
+ else if ( ( *character & 0xFE ) == 0xFC ) { // 1111110x
+ return 6;
+ }
+ ERROR_MESSAGE( "" );
+ return 0;
}
struct UTF8Character
{
- const char* buffer;
- std::size_t length;
- UTF8Character() : buffer(0), length(0)
- {
- }
- UTF8Character(const char* bytes) : buffer(bytes), length(utf8_character_length(bytes))
- {
- }
+ const char* buffer;
+ std::size_t length;
+ UTF8Character() : buffer( 0 ), length( 0 ){
+ }
+ UTF8Character( const char* bytes ) : buffer( bytes ), length( utf8_character_length( bytes ) ){
+ }
};
-inline bool operator<(const UTF8Character& self, const UTF8Character& other)
-{
- return std::lexicographical_compare(self.buffer, self.buffer + self.length, other.buffer, other.buffer + other.length);
+inline bool operator<( const UTF8Character& self, const UTF8Character& other ){
+ return std::lexicographical_compare( self.buffer, self.buffer + self.length, other.buffer, other.buffer + other.length );
}
/// \brief Writes \p c to \p ostream in Hex form. Useful for debugging.
template<typename TextOutputStreamType>
-inline TextOutputStreamType& ostream_write(TextOutputStreamType& ostream, const UTF8Character& c)
-{
- for(const char* p = c.buffer; p != c.buffer + c.length; ++p)
- {
- ostream << HexChar(*p);
- }
- return ostream;
+inline TextOutputStreamType& ostream_write( TextOutputStreamType& ostream, const UTF8Character& c ){
+ for ( const char* p = c.buffer; p != c.buffer + c.length; ++p )
+ {
+ ostream << HexChar( *p );
+ }
+ return ostream;
}
/// Obtain the global instance with globalCharacterSet().
class CharacterSet
{
- const char* m_charSet;
+const char* m_charSet;
public:
- CharacterSet()
- {
- if(g_get_charset(&m_charSet) != FALSE)
- {
- m_charSet = 0;
- }
- }
- bool isUTF8() const
- {
- return m_charSet == 0;
- }
- const char* get() const
- {
- return m_charSet;
- }
+CharacterSet(){
+ if ( g_get_charset( &m_charSet ) != FALSE ) {
+ m_charSet = 0;
+ }
+}
+bool isUTF8() const {
+ return m_charSet == 0;
+}
+const char* get() const {
+ return m_charSet;
+}
};
typedef LazyStatic<CharacterSet> GlobalCharacterSet;
/// \brief Returns the global instance of CharacterSet.
-inline CharacterSet& globalCharacterSet()
-{
- return GlobalCharacterSet::instance();
+inline CharacterSet& globalCharacterSet(){
+ return GlobalCharacterSet::instance();
}
class UTF8CharacterToExtendedASCII
{
public:
- UTF8Character m_utf8;
- char m_c;
- UTF8CharacterToExtendedASCII() : m_c('\0')
- {
- }
- UTF8CharacterToExtendedASCII(const UTF8Character& utf8, char c) : m_utf8(utf8), m_c(c)
- {
- }
+UTF8Character m_utf8;
+char m_c;
+UTF8CharacterToExtendedASCII() : m_c( '\0' ){
+}
+UTF8CharacterToExtendedASCII( const UTF8Character& utf8, char c ) : m_utf8( utf8 ), m_c( c ){
+}
};
-inline bool operator<(const UTF8CharacterToExtendedASCII& self, const UTF8CharacterToExtendedASCII& other)
-{
- return self.m_utf8 < other.m_utf8;
+inline bool operator<( const UTF8CharacterToExtendedASCII& self, const UTF8CharacterToExtendedASCII& other ){
+ return self.m_utf8 < other.m_utf8;
}
-inline std::size_t extended_ascii_to_index(char c)
-{
- return static_cast<std::size_t>(c & 0x7F);
+inline std::size_t extended_ascii_to_index( char c ){
+ return static_cast<std::size_t>( c & 0x7F );
}
-inline char extended_ascii_for_index(std::size_t i)
-{
- return static_cast<char>(i | 0x80);
+inline char extended_ascii_for_index( std::size_t i ){
+ return static_cast<char>( i | 0x80 );
}
/// \brief The active extended-ascii character set encoding.
/// Obtain the global instance with globalExtendedASCIICharacterSet().
class ExtendedASCIICharacterSet
{
- typedef char UTF8CharBuffer[6];
- UTF8CharBuffer m_converted[128];
- UTF8Character m_decodeMap[128];
- UTF8CharacterToExtendedASCII m_encodeMap[128];
+typedef char UTF8CharBuffer[6];
+UTF8CharBuffer m_converted[128];
+UTF8Character m_decodeMap[128];
+UTF8CharacterToExtendedASCII m_encodeMap[128];
public:
- ExtendedASCIICharacterSet()
- {
- if(!globalCharacterSet().isUTF8())
- {
- GIConv descriptor = g_iconv_open("UTF-8", globalCharacterSet().get());
- for(std::size_t i = 1; i < 128; ++i)
- {
- char c = extended_ascii_for_index(i);
- char* inbuf = &c;
- std::size_t inbytesleft = 1;
- char* outbuf = m_converted[i];
- std::size_t outbytesleft = 6;
- if(g_iconv(descriptor, &inbuf, &inbytesleft, &outbuf, &outbytesleft) != (size_t)(-1))
- {
- UTF8Character utf8(m_converted[i]);
- m_decodeMap[i] = utf8;
- m_encodeMap[i] = UTF8CharacterToExtendedASCII(utf8, c);
- }
- }
- g_iconv_close(descriptor);
- std::sort(m_encodeMap, m_encodeMap + 128);
- }
- }
- /// \brief Prints the (up to) 128 characters in the current extended-ascii character set.
- /// Useful for debugging.
- void print() const
- {
- globalOutputStream() << "UTF-8 conversion required from charset: " << globalCharacterSet().get() << "\n";
- for(std::size_t i = 1; i < 128; ++i)
- {
- if(m_decodeMap[i].buffer != 0)
- {
- globalOutputStream() << extended_ascii_for_index(i) << " = " << m_decodeMap[i] << "\n";
- }
- }
- }
- /// \brief Returns \p c decoded from extended-ascii to UTF-8.
- /// \p c must be an extended-ascii character.
- const UTF8Character& decode(char c) const
- {
- ASSERT_MESSAGE(!globalCharacterSet().isUTF8(), "locale is utf8, no conversion required");
- ASSERT_MESSAGE(!char_is_ascii(c), "decode: ascii character");
- ASSERT_MESSAGE(m_decodeMap[extended_ascii_to_index(c)].buffer != 0, "decode: invalid character: " << HexChar(c));
- return m_decodeMap[extended_ascii_to_index(c)];
- }
- /// \brief Returns \p c encoded to extended-ascii from UTF-8.
- /// \p c must map to an extended-ascii character.
- char encode(const UTF8Character& c) const
- {
- ASSERT_MESSAGE(!globalCharacterSet().isUTF8(), "locale is utf8, no conversion required");
- ASSERT_MESSAGE(!char_is_ascii(*c.buffer), "encode: ascii character");
- std::pair<const UTF8CharacterToExtendedASCII*, const UTF8CharacterToExtendedASCII*> range
- = std::equal_range(m_encodeMap, m_encodeMap + 128, UTF8CharacterToExtendedASCII(c, 0));
- ASSERT_MESSAGE(range.first != range.second, "encode: invalid character: " << c);
- return (*range.first).m_c;
- }
+ExtendedASCIICharacterSet(){
+ if ( !globalCharacterSet().isUTF8() ) {
+ GIConv descriptor = g_iconv_open( "UTF-8", globalCharacterSet().get() );
+ for ( std::size_t i = 1; i < 128; ++i )
+ {
+ char c = extended_ascii_for_index( i );
+ char* inbuf = &c;
+ std::size_t inbytesleft = 1;
+ char* outbuf = m_converted[i];
+ std::size_t outbytesleft = 6;
+ if ( g_iconv( descriptor, &inbuf, &inbytesleft, &outbuf, &outbytesleft ) != (size_t)( -1 ) ) {
+ UTF8Character utf8( m_converted[i] );
+ m_decodeMap[i] = utf8;
+ m_encodeMap[i] = UTF8CharacterToExtendedASCII( utf8, c );
+ }
+ }
+ g_iconv_close( descriptor );
+ std::sort( m_encodeMap, m_encodeMap + 128 );
+ }
+}
+/// \brief Prints the (up to) 128 characters in the current extended-ascii character set.
+/// Useful for debugging.
+void print() const {
+ globalOutputStream() << "UTF-8 conversion required from charset: " << globalCharacterSet().get() << "\n";
+ for ( std::size_t i = 1; i < 128; ++i )
+ {
+ if ( m_decodeMap[i].buffer != 0 ) {
+ globalOutputStream() << extended_ascii_for_index( i ) << " = " << m_decodeMap[i] << "\n";
+ }
+ }
+}
+/// \brief Returns \p c decoded from extended-ascii to UTF-8.
+/// \p c must be an extended-ascii character.
+const UTF8Character& decode( char c ) const {
+ ASSERT_MESSAGE( !globalCharacterSet().isUTF8(), "locale is utf8, no conversion required" );
+ ASSERT_MESSAGE( !char_is_ascii( c ), "decode: ascii character" );
+ ASSERT_MESSAGE( m_decodeMap[extended_ascii_to_index( c )].buffer != 0, "decode: invalid character: " << HexChar( c ) );
+ return m_decodeMap[extended_ascii_to_index( c )];
+}
+/// \brief Returns \p c encoded to extended-ascii from UTF-8.
+/// \p c must map to an extended-ascii character.
+char encode( const UTF8Character& c ) const {
+ ASSERT_MESSAGE( !globalCharacterSet().isUTF8(), "locale is utf8, no conversion required" );
+ ASSERT_MESSAGE( !char_is_ascii( *c.buffer ), "encode: ascii character" );
+ std::pair<const UTF8CharacterToExtendedASCII*, const UTF8CharacterToExtendedASCII*> range
+ = std::equal_range( m_encodeMap, m_encodeMap + 128, UTF8CharacterToExtendedASCII( c, 0 ) );
+ ASSERT_MESSAGE( range.first != range.second, "encode: invalid character: " << c );
+ return ( *range.first ).m_c;
+}
};
typedef LazyStatic<ExtendedASCIICharacterSet> GlobalExtendedASCIICharacterSet;
/// \brief Returns the global instance of ExtendedASCIICharacterSet.
-inline ExtendedASCIICharacterSet& globalExtendedASCIICharacterSet()
-{
- return GlobalExtendedASCIICharacterSet::instance();
+inline ExtendedASCIICharacterSet& globalExtendedASCIICharacterSet(){
+ return GlobalExtendedASCIICharacterSet::instance();
}
class ConvertUTF8ToLocale
{
public:
- StringRange m_range;
- ConvertUTF8ToLocale(const char* string) : m_range(StringRange(string, string + strlen(string)))
- {
- }
- ConvertUTF8ToLocale(const StringRange& range) : m_range(range)
- {
- }
+StringRange m_range;
+ConvertUTF8ToLocale( const char* string ) : m_range( StringRange( string, string + strlen( string ) ) ){
+}
+ConvertUTF8ToLocale( const StringRange& range ) : m_range( range ){
+}
};
/// \brief Writes \p convert to \p ostream after encoding each character to extended-ascii from UTF-8.
template<typename TextOutputStreamType>
-inline TextOutputStreamType& ostream_write(TextOutputStreamType& ostream, const ConvertUTF8ToLocale& convert)
-{
- if(globalCharacterSet().isUTF8())
- {
- return ostream << convert.m_range;
- }
-
- for(const char* p = convert.m_range.first; p != convert.m_range.last;)
- {
- if(!char_is_ascii(*p))
- {
- UTF8Character c(p);
- ostream << globalExtendedASCIICharacterSet().encode(c);
- p += c.length;
- }
- else
- {
- ostream << *p++;
- }
- }
- return ostream;
+inline TextOutputStreamType& ostream_write( TextOutputStreamType& ostream, const ConvertUTF8ToLocale& convert ){
+ if ( globalCharacterSet().isUTF8() ) {
+ return ostream << convert.m_range;
+ }
+
+ for ( const char* p = convert.m_range.first; p != convert.m_range.last; )
+ {
+ if ( !char_is_ascii( *p ) ) {
+ UTF8Character c( p );
+ ostream << globalExtendedASCIICharacterSet().encode( c );
+ p += c.length;
+ }
+ else
+ {
+ ostream << *p++;
+ }
+ }
+ return ostream;
}
class ConvertLocaleToUTF8
{
public:
- StringRange m_range;
- ConvertLocaleToUTF8(const char* string) : m_range(StringRange(string, string + strlen(string)))
- {
- }
- ConvertLocaleToUTF8(const StringRange& range) : m_range(range)
- {
- }
+StringRange m_range;
+ConvertLocaleToUTF8( const char* string ) : m_range( StringRange( string, string + strlen( string ) ) ){
+}
+ConvertLocaleToUTF8( const StringRange& range ) : m_range( range ){
+}
};
/// \brief Writes \p convert to \p ostream after decoding each character from extended-ascii to UTF-8.
template<typename TextOutputStreamType>
-inline TextOutputStreamType& ostream_write(TextOutputStreamType& ostream, const ConvertLocaleToUTF8& convert)
-{
- if(globalCharacterSet().isUTF8())
- {
- return ostream << convert.m_range;
- }
-
- for(const char* p = convert.m_range.first; p != convert.m_range.last; ++p)
- {
- if(!char_is_ascii(*p))
- {
- UTF8Character c(globalExtendedASCIICharacterSet().decode(*p));
- ostream.write(c.buffer, c.length);
- }
- else
- {
- ostream << *p;
- }
- }
- return ostream;
+inline TextOutputStreamType& ostream_write( TextOutputStreamType& ostream, const ConvertLocaleToUTF8& convert ){
+ if ( globalCharacterSet().isUTF8() ) {
+ return ostream << convert.m_range;
+ }
+
+ for ( const char* p = convert.m_range.first; p != convert.m_range.last; ++p )
+ {
+ if ( !char_is_ascii( *p ) ) {
+ UTF8Character c( globalExtendedASCIICharacterSet().decode( *p ) );
+ ostream.write( c.buffer, c.length );
+ }
+ else
+ {
+ ostream << *p;
+ }
+ }
+ return ostream;
}