X-Git-Url: http://de.git.xonotic.org/?a=blobdiff_plain;f=libs%2Fconvert.h;h=a734e6feb91af2f265cc5675ecdf9eade780e7b6;hb=1f0271a98874f2a2bae502aba1dc16782e8073f6;hp=ea7a71f16de0e01c5e2b952224d3b26fb5727e13;hpb=12b372f89ce109a4db9d510884fbe7d05af79870;p=xonotic%2Fnetradiant.git diff --git a/libs/convert.h b/libs/convert.h index ea7a71f1..a734e6fe 100644 --- a/libs/convert.h +++ b/libs/convert.h @@ -1,25 +1,25 @@ /* -Copyright (C) 2001-2006, William Joseph. -All Rights Reserved. + Copyright (C) 2001-2006, William Joseph. + All Rights Reserved. -This file is part of GtkRadiant. + This file is part of GtkRadiant. -GtkRadiant is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. + GtkRadiant is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -GtkRadiant is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + GtkRadiant is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with GtkRadiant; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ + You should have received a copy of the GNU General Public License + along with GtkRadiant; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ -#if !defined(INCLUDED_CONVERT_H) +#if !defined( INCLUDED_CONVERT_H ) #define INCLUDED_CONVERT_H /// \file @@ -27,64 +27,53 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "debugging/debugging.h" #include -#include -#include +#include #include "character.h" /// \brief Returns the number of bytes required to represent \p character in UTF-8 encoding. -inline std::size_t utf8_character_length(const char* character) -{ - if((*character & 0xE0) == 0xC0) // 110xxxxx - { - return 2; - } - else if((*character & 0xF0) == 0xE0) // 1110xxxx - { - return 3; - } - else if((*character & 0xF8) == 0xF0) // 11110xxx - { - return 4; - } - else if((*character & 0xFC) == 0xF8) // 111110xx - { - return 5; - } - else if((*character & 0xFE) == 0xFC) // 1111110x - { - return 6; - } - ERROR_MESSAGE(""); - return 0; +inline std::size_t utf8_character_length( const char* character ){ + if ( ( *character & 0xE0 ) == 0xC0 ) { // 110xxxxx + return 2; + } + else if ( ( *character & 0xF0 ) == 0xE0 ) { // 1110xxxx + return 3; + } + else if ( ( *character & 0xF8 ) == 0xF0 ) { // 11110xxx + return 4; + } + else if ( ( *character & 0xFC ) == 0xF8 ) { // 111110xx + return 5; + } + else if ( ( *character & 0xFE ) == 0xFC ) { // 1111110x + return 6; + } + ERROR_MESSAGE( "" ); + return 0; } struct UTF8Character { - const char* buffer; - std::size_t length; - UTF8Character() : buffer(0), length(0) - { - } - UTF8Character(const char* bytes) : buffer(bytes), length(utf8_character_length(bytes)) - { - } + const char* buffer; + std::size_t length; + UTF8Character() : buffer( 0 ), length( 0 ){ + } + UTF8Character( const char* bytes ) : buffer( bytes ), length( utf8_character_length( bytes ) ){ + } }; -inline bool operator<(const UTF8Character& self, const UTF8Character& other) -{ - return std::lexicographical_compare(self.buffer, self.buffer + self.length, other.buffer, other.buffer + other.length); +inline bool operator<( const UTF8Character& self, const UTF8Character& other ){ + return std::lexicographical_compare( self.buffer, self.buffer + self.length, other.buffer, other.buffer + other.length ); } /// \brief Writes \p c to \p ostream in Hex form. Useful for debugging. template -inline TextOutputStreamType& ostream_write(TextOutputStreamType& ostream, const UTF8Character& c) -{ - for(const char* p = c.buffer; p != c.buffer + c.length; ++p) - { - ostream << HexChar(*p); - } - return ostream; +inline TextOutputStreamType& ostream_write( TextOutputStreamType& ostream, const UTF8Character& c ){ + for ( const char* p = c.buffer; p != c.buffer + c.length; ++p ) + { + ostream << HexChar( *p ); + } + return ostream; } @@ -94,60 +83,50 @@ inline TextOutputStreamType& ostream_write(TextOutputStreamType& ostream, const /// Obtain the global instance with globalCharacterSet(). class CharacterSet { - const char* m_charSet; +const char* m_charSet; public: - CharacterSet() - { - if(g_get_charset(&m_charSet) != FALSE) - { - m_charSet = 0; - } - } - bool isUTF8() const - { - return m_charSet == 0; - } - const char* get() const - { - return m_charSet; - } +CharacterSet(){ + if ( g_get_charset( &m_charSet ) != FALSE ) { + m_charSet = 0; + } +} +bool isUTF8() const { + return m_charSet == 0; +} +const char* get() const { + return m_charSet; +} }; typedef LazyStatic GlobalCharacterSet; /// \brief Returns the global instance of CharacterSet. -inline CharacterSet& globalCharacterSet() -{ - return GlobalCharacterSet::instance(); +inline CharacterSet& globalCharacterSet(){ + return GlobalCharacterSet::instance(); } class UTF8CharacterToExtendedASCII { public: - UTF8Character m_utf8; - char m_c; - UTF8CharacterToExtendedASCII() : m_c('\0') - { - } - UTF8CharacterToExtendedASCII(const UTF8Character& utf8, char c) : m_utf8(utf8), m_c(c) - { - } +UTF8Character m_utf8; +char m_c; +UTF8CharacterToExtendedASCII() : m_c( '\0' ){ +} +UTF8CharacterToExtendedASCII( const UTF8Character& utf8, char c ) : m_utf8( utf8 ), m_c( c ){ +} }; -inline bool operator<(const UTF8CharacterToExtendedASCII& self, const UTF8CharacterToExtendedASCII& other) -{ - return self.m_utf8 < other.m_utf8; +inline bool operator<( const UTF8CharacterToExtendedASCII& self, const UTF8CharacterToExtendedASCII& other ){ + return self.m_utf8 < other.m_utf8; } -inline std::size_t extended_ascii_to_index(char c) -{ - return static_cast(c & 0x7F); +inline std::size_t extended_ascii_to_index( char c ){ + return static_cast( c & 0x7F ); } -inline char extended_ascii_for_index(std::size_t i) -{ - return static_cast(i | 0x80); +inline char extended_ascii_for_index( std::size_t i ){ + return static_cast( i | 0x80 ); } /// \brief The active extended-ascii character set encoding. @@ -156,149 +135,131 @@ inline char extended_ascii_for_index(std::size_t i) /// Obtain the global instance with globalExtendedASCIICharacterSet(). class ExtendedASCIICharacterSet { - typedef char UTF8CharBuffer[6]; - UTF8CharBuffer m_converted[128]; - UTF8Character m_decodeMap[128]; - UTF8CharacterToExtendedASCII m_encodeMap[128]; +typedef char UTF8CharBuffer[6]; +UTF8CharBuffer m_converted[128]; +UTF8Character m_decodeMap[128]; +UTF8CharacterToExtendedASCII m_encodeMap[128]; public: - ExtendedASCIICharacterSet() - { - if(!globalCharacterSet().isUTF8()) - { - GIConv descriptor = g_iconv_open("UTF-8", globalCharacterSet().get()); - for(std::size_t i = 1; i < 128; ++i) - { - char c = extended_ascii_for_index(i); - char* inbuf = &c; - std::size_t inbytesleft = 1; - char* outbuf = m_converted[i]; - std::size_t outbytesleft = 6; - if(g_iconv(descriptor, &inbuf, &inbytesleft, &outbuf, &outbytesleft) != (size_t)(-1)) - { - UTF8Character utf8(m_converted[i]); - m_decodeMap[i] = utf8; - m_encodeMap[i] = UTF8CharacterToExtendedASCII(utf8, c); - } - } - g_iconv_close(descriptor); - std::sort(m_encodeMap, m_encodeMap + 128); - } - } - /// \brief Prints the (up to) 128 characters in the current extended-ascii character set. - /// Useful for debugging. - void print() const - { - globalOutputStream() << "UTF-8 conversion required from charset: " << globalCharacterSet().get() << "\n"; - for(std::size_t i = 1; i < 128; ++i) - { - if(m_decodeMap[i].buffer != 0) - { - globalOutputStream() << extended_ascii_for_index(i) << " = " << m_decodeMap[i] << "\n"; - } - } - } - /// \brief Returns \p c decoded from extended-ascii to UTF-8. - /// \p c must be an extended-ascii character. - const UTF8Character& decode(char c) const - { - ASSERT_MESSAGE(!globalCharacterSet().isUTF8(), "locale is utf8, no conversion required"); - ASSERT_MESSAGE(!char_is_ascii(c), "decode: ascii character"); - ASSERT_MESSAGE(m_decodeMap[extended_ascii_to_index(c)].buffer != 0, "decode: invalid character: " << HexChar(c)); - return m_decodeMap[extended_ascii_to_index(c)]; - } - /// \brief Returns \p c encoded to extended-ascii from UTF-8. - /// \p c must map to an extended-ascii character. - char encode(const UTF8Character& c) const - { - ASSERT_MESSAGE(!globalCharacterSet().isUTF8(), "locale is utf8, no conversion required"); - ASSERT_MESSAGE(!char_is_ascii(*c.buffer), "encode: ascii character"); - std::pair range - = std::equal_range(m_encodeMap, m_encodeMap + 128, UTF8CharacterToExtendedASCII(c, 0)); - ASSERT_MESSAGE(range.first != range.second, "encode: invalid character: " << c); - return (*range.first).m_c; - } +ExtendedASCIICharacterSet(){ + if ( !globalCharacterSet().isUTF8() ) { + GIConv descriptor = g_iconv_open( "UTF-8", globalCharacterSet().get() ); + for ( std::size_t i = 1; i < 128; ++i ) + { + char c = extended_ascii_for_index( i ); + char* inbuf = &c; + std::size_t inbytesleft = 1; + char* outbuf = m_converted[i]; + std::size_t outbytesleft = 6; + if ( g_iconv( descriptor, &inbuf, &inbytesleft, &outbuf, &outbytesleft ) != (size_t)( -1 ) ) { + UTF8Character utf8( m_converted[i] ); + m_decodeMap[i] = utf8; + m_encodeMap[i] = UTF8CharacterToExtendedASCII( utf8, c ); + } + } + g_iconv_close( descriptor ); + std::sort( m_encodeMap, m_encodeMap + 128 ); + } +} +/// \brief Prints the (up to) 128 characters in the current extended-ascii character set. +/// Useful for debugging. +void print() const { + globalOutputStream() << "UTF-8 conversion required from charset: " << globalCharacterSet().get() << "\n"; + for ( std::size_t i = 1; i < 128; ++i ) + { + if ( m_decodeMap[i].buffer != 0 ) { + globalOutputStream() << extended_ascii_for_index( i ) << " = " << m_decodeMap[i] << "\n"; + } + } +} +/// \brief Returns \p c decoded from extended-ascii to UTF-8. +/// \p c must be an extended-ascii character. +const UTF8Character& decode( char c ) const { + ASSERT_MESSAGE( !globalCharacterSet().isUTF8(), "locale is utf8, no conversion required" ); + ASSERT_MESSAGE( !char_is_ascii( c ), "decode: ascii character" ); + ASSERT_MESSAGE( m_decodeMap[extended_ascii_to_index( c )].buffer != 0, "decode: invalid character: " << HexChar( c ) ); + return m_decodeMap[extended_ascii_to_index( c )]; +} +/// \brief Returns \p c encoded to extended-ascii from UTF-8. +/// \p c must map to an extended-ascii character. +char encode( const UTF8Character& c ) const { + ASSERT_MESSAGE( !globalCharacterSet().isUTF8(), "locale is utf8, no conversion required" ); + ASSERT_MESSAGE( !char_is_ascii( *c.buffer ), "encode: ascii character" ); + std::pair range + = std::equal_range( m_encodeMap, m_encodeMap + 128, UTF8CharacterToExtendedASCII( c, 0 ) ); + ASSERT_MESSAGE( range.first != range.second, "encode: invalid character: " << c ); + return ( *range.first ).m_c; +} }; typedef LazyStatic GlobalExtendedASCIICharacterSet; /// \brief Returns the global instance of ExtendedASCIICharacterSet. -inline ExtendedASCIICharacterSet& globalExtendedASCIICharacterSet() -{ - return GlobalExtendedASCIICharacterSet::instance(); +inline ExtendedASCIICharacterSet& globalExtendedASCIICharacterSet(){ + return GlobalExtendedASCIICharacterSet::instance(); } class ConvertUTF8ToLocale { public: - StringRange m_range; - ConvertUTF8ToLocale(const char* string) : m_range(StringRange(string, string + strlen(string))) - { - } - ConvertUTF8ToLocale(const StringRange& range) : m_range(range) - { - } +StringRange m_range; +ConvertUTF8ToLocale( const char* string ) : m_range( StringRange( string, string + strlen( string ) ) ){ +} +ConvertUTF8ToLocale( const StringRange& range ) : m_range( range ){ +} }; /// \brief Writes \p convert to \p ostream after encoding each character to extended-ascii from UTF-8. template -inline TextOutputStreamType& ostream_write(TextOutputStreamType& ostream, const ConvertUTF8ToLocale& convert) -{ - if(globalCharacterSet().isUTF8()) - { - return ostream << convert.m_range; - } - - for(const char* p = convert.m_range.begin; p != convert.m_range.end;) - { - if(!char_is_ascii(*p)) - { - UTF8Character c(p); - ostream << globalExtendedASCIICharacterSet().encode(c); - p += c.length; - } - else - { - ostream << *p++; - } - } - return ostream; +inline TextOutputStreamType& ostream_write( TextOutputStreamType& ostream, const ConvertUTF8ToLocale& convert ){ + if ( globalCharacterSet().isUTF8() ) { + return ostream << convert.m_range; + } + + for ( const char* p = convert.m_range.first; p != convert.m_range.last; ) + { + if ( !char_is_ascii( *p ) ) { + UTF8Character c( p ); + ostream << globalExtendedASCIICharacterSet().encode( c ); + p += c.length; + } + else + { + ostream << *p++; + } + } + return ostream; } class ConvertLocaleToUTF8 { public: - StringRange m_range; - ConvertLocaleToUTF8(const char* string) : m_range(StringRange(string, string + strlen(string))) - { - } - ConvertLocaleToUTF8(const StringRange& range) : m_range(range) - { - } +StringRange m_range; +ConvertLocaleToUTF8( const char* string ) : m_range( StringRange( string, string + strlen( string ) ) ){ +} +ConvertLocaleToUTF8( const StringRange& range ) : m_range( range ){ +} }; /// \brief Writes \p convert to \p ostream after decoding each character from extended-ascii to UTF-8. template -inline TextOutputStreamType& ostream_write(TextOutputStreamType& ostream, const ConvertLocaleToUTF8& convert) -{ - if(globalCharacterSet().isUTF8()) - { - return ostream << convert.m_range; - } - - for(const char* p = convert.m_range.begin; p != convert.m_range.end; ++p) - { - if(!char_is_ascii(*p)) - { - UTF8Character c(globalExtendedASCIICharacterSet().decode(*p)); - ostream.write(c.buffer, c.length); - } - else - { - ostream << *p; - } - } - return ostream; +inline TextOutputStreamType& ostream_write( TextOutputStreamType& ostream, const ConvertLocaleToUTF8& convert ){ + if ( globalCharacterSet().isUTF8() ) { + return ostream << convert.m_range; + } + + for ( const char* p = convert.m_range.first; p != convert.m_range.last; ++p ) + { + if ( !char_is_ascii( *p ) ) { + UTF8Character c( globalExtendedASCIICharacterSet().decode( *p ) ); + ostream.write( c.buffer, c.length ); + } + else + { + ostream << *p; + } + } + return ostream; }