c++-gtk-utils
reassembler.h
Go to the documentation of this file.
1 /* Copyright (C) 2005 to 2010 Chris Vine
2 
3 The library comprised in this file or of which this file is part is
4 distributed by Chris Vine under the GNU Lesser General Public
5 License as follows:
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Lesser General Public License
9  as published by the Free Software Foundation; either version 2.1 of
10  the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Lesser General Public License, version 2.1, for more details.
16 
17  You should have received a copy of the GNU Lesser General Public
18  License, version 2.1, along with this library (see the file LGPL.TXT
19  which came with this source code package in the c++-gtk-utils
20  sub-directory); if not, write to the Free Software Foundation, Inc.,
21  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 
23 */
24 
25 #ifndef CGU_REASSEMBLER_H
26 #define CGU_REASSEMBLER_H
27 
30 
31 namespace Cgu {
32 
33 namespace Utf8 {
34 
35 
36 /**
37  * @class Reassembler reassembler.h c++-gtk-utils/reassembler.h
38  * @brief A class for reassembling UTF-8 strings sent over pipes and
39  * sockets so they form complete valid UTF-8 characters.
40  *
41  * Utf8::Reassembler is a functor class which takes in a partially
42  * formed UTF-8 string and returns a nul-terminated string comprising
43  * such of the input string (after inserting, at the beginning, any
44  * partially formed UTF-8 character which was at the end of the input
45  * string passed in previous calls to the functor) as forms complete
46  * UTF-8 characters (storing any partial character at the end for the
47  * next call to the functor). If the input string contains invalid
48  * UTF-8 after adding any stored previous part character (apart from
49  * any partially formed character at the end of the input string) then
50  * operator() will return a null Cgu::SharedHandle<char*> object (that
51  * is, Cgu::SharedHandle<char*>::get() will return 0). Such input
52  * will not be treated as invalid if it consists only of a single
53  * partly formed UTF-8 character which could be valid if further bytes
54  * were received and added to it. In that case the returned
55  * SharedHandle<char*> object will contain an allocated string of zero
56  * length, comprising only a terminating \0 character, rather than a
57  * NULL pointer.
58  *
59  * This enables UTF-8 strings to be sent over pipes, sockets, etc and
60  * displayed in a GTK+ object at the receiving end
61  *
62  * Note that for efficiency reasons the memory held in the returned
63  * Cgu::SharedHandle<char*> object may be greater than the length of
64  * the nul-terminated string that is contained in that memory: just
65  * let the Cgu::SharedHandle<char*> object manage the memory, and use
66  * the contents like any other nul-terminated string.
67  *
68  * This class is not needed if std::getline(), with its default '\\n'
69  * delimiter, is used to read UTF-8 characters using, say,
70  * Cgu::fdistream, because a whole '\\n' delimited line of UTF-8
71  * characters will always be complete.
72  *
73  * This is an example of its use, reading from a pipe until it is
74  * closed by the writer and putting the received text in a
75  * GtkTextBuffer object:
76  * @code
77  * using namespace Cgu;
78  *
79  * GtkTextIter end;
80  * GtkTextBuffer* text_buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_view));
81  * gtk_text_buffer_get_end_iter(text_buffer, &end);
82  *
83  * Utf8::Reassembler reassembler;
84  * const int BSIZE = 1024;
85  * char read_buffer[BSIZE];
86  * ssize_t res;
87  * do {
88  * res = ::read(fd, read_buffer, BSIZE);
89  * if (res > 0) {
90  * SharedHandle<char*> utf8(reassembler(read_buffer, res));
91  * if (utf8.get()) {
92  * gtk_text_buffer_insert(text_buffer, &end,
93  * utf8.get(), std::strlen(utf8));
94  * }
95  * else std::cerr << "Invalid utf8 text sent over pipe\n";
96  * }
97  * } while (res && (res != -1 || errno == EINTR));
98  * @endcode
99  */
100 
101 class Reassembler {
102  size_t stored;
103  const static size_t buff_size = 6;
104  char buffer[buff_size];
105  char* join_buffer(const char*, size_t);
106 public:
107 /**
108  * Takes a byte array of wholly or partly formed UTF-8 characters to
109  * be converted (after taking account of previous calls to the method)
110  * to a valid string of wholly formed characters.
111  * @param input The input array.
112  * @param size The number of bytes in the input (not the number of
113  * UTF-8 characters).
114  * @return A Cgu::SharedHandle<char*> object holding a nul-terminated
115  * string comprising such of the input (after inserting, at the
116  * beginning, any partially formed UTF-8 character which was at the
117  * end of the input passed in previous calls to the functor) as forms
118  * complete UTF-8 characters (storing any partial character at the end
119  * for the next call to the functor). If the input is invalid after
120  * such recombination, then a null Cgu::SharedHandle<char*> object is
121  * returned (that is, Cgu::SharedHandle<char*>::get() will return 0).
122  * Such input will not be treated as invalid if it consists only of a
123  * single partly formed UTF-8 character which could be valid if
124  * further bytes were received and added to it. In that case the
125  * returned Cgu::SharedHandle<char*> object will contain an allocated
126  * string of zero length, comprising only a terminating \0 character,
127  * rather than a NULL pointer.
128  * @exception std::bad_alloc The method might throw std::bad_alloc if
129  * memory is exhausted and the system throws in that case. It will
130  * not throw any other exception.
131  */
132  Cgu::SharedHandle<char*> operator()(const char* input, size_t size);
133 
134 /**
135  * Gets the number of bytes of a partially formed UTF-8 character
136  * stored for the next call to operator()(). It will not throw.
137  * @return The number of bytes.
138  */
139  size_t get_stored() const {return stored;}
140 
141 /**
142  * Resets the Reassembler, by discarding any partially formed UTF-8
143  * character from previous calls to operator()(). It will not throw.
144  */
145  void reset() {stored = 0;}
146 
147 /**
148  * The constructor will not throw.
149  */
150  Reassembler(): stored(0), buffer() {}
151 
152 /* Only has effect if --with-glib-memory-slices-compat or
153  * --with-glib-memory-slices-no-compat option picked */
155 };
156 
157 } // namespace Utf8
158 
159 } // namespace Cgu
160 
161 #endif
Cgu
Definition: application.h:44
Cgu::Utf8::Reassembler
A class for reassembling UTF-8 strings sent over pipes and sockets so they form complete valid UTF-8 ...
Definition: reassembler.h:101
Cgu::SharedHandle
This is a generic class for managing the lifetime of objects allocated on freestore.
Definition: shared_handle.h:451
Cgu::Utf8::Reassembler::get_stored
size_t get_stored() const
Definition: reassembler.h:139
CGU_GLIB_MEMORY_SLICES_FUNCS
#define CGU_GLIB_MEMORY_SLICES_FUNCS
Definition: cgu_config.h:84
shared_handle.h
Cgu::Utf8::Reassembler::reset
void reset()
Definition: reassembler.h:145
Cgu::Utf8::Reassembler::Reassembler
Reassembler()
Definition: reassembler.h:150
cgu_config.h
Cgu::Utf8::Reassembler::operator()
Cgu::SharedHandle< char * > operator()(const char *input, size_t size)