libabigail
abg-libxml-utils.cc
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2// -*- mode: C++ -*-
3//
4// Copyright (C) 2013-2025 Red Hat, Inc.
5
6/// @file
7
8#include <string>
9#include <iostream>
10#include <fstream>
11#include "abg-tools-utils.h"
12
13#include "abg-internal.h"
14// <headers defining libabigail's API go under here>
15ABG_BEGIN_EXPORT_DECLARATIONS
16
17#include "abg-libxml-utils.h"
18
19ABG_END_EXPORT_DECLARATIONS
20// </headers defining libabigail's API>
21
22namespace abigail
23{
24
25namespace sptr_utils
26{
27/// Build and return a shared_ptr for a pointer to xmlTextReader
28template<>
29shared_ptr<xmlTextReader>
30build_sptr<xmlTextReader>(::xmlTextReader *p)
31{
32 return shared_ptr<xmlTextReader>(p, abigail::xml::textReaderDeleter());
33}
34
35/// Build and return a shared_ptr for a pointer to xmlChar
36template<>
37shared_ptr<xmlChar>
38build_sptr<xmlChar>(xmlChar *p)
39{
40 return shared_ptr<xmlChar>(p, abigail::xml::charDeleter());
41}
42
43}//end namespace sptr_utils
44
45namespace xml
46{
47using std::istream;
48using std::ifstream;
52
53// <xmlIO callbacks for xz reading support>
54
55
56/// This is an xmlIO callback function used in the libxml2 I/O input
57/// API to detect if the current handler can provider input
58/// functionality for a file designed by a path.
59///
60/// This function should return 1 iff the file contains XZ-compressed
61/// data.
62///
63/// @param filepath the path to file to consider.
64///
65/// @return 1 iff the file designated by @p filepath is XZ-compressed.
66static int
67xz_io_match_cb(const char*filepath)
68{
69 bool does_match = false;
70 file_type t = guess_file_type(filepath, /*look_through_compression=*/false);
72 does_match = true;
73
74 return does_match;
75}
76
77/// This is the context used by the xmlIO handler that provides input
78/// functionality to the libxml2 I/O input API for XZ-compressed XML
79/// files.
80struct xz_ctxt_type
81{
82 // The input XZ-compressed file stream.
83 std::unique_ptr<std::ifstream> input_fstream;
84 // The custom XZ-decompressor streambuf provided by tools-utils.
85 std::unique_ptr<xz_decompressor_type> decompressor_streambuf;
86 // The decompressed input stream that we can read from.
87 std::unique_ptr<std::istream> decompressed_input_stream;
88
89 xz_ctxt_type() = delete;
90
91 /// Constructor.
92 ///
93 /// @param is the XZ-compressed input file stream to consider.
94 xz_ctxt_type(std::ifstream* is)
95 : input_fstream(is),
96 decompressor_streambuf(new xz_decompressor_type(*is)),
97 decompressed_input_stream(new istream(decompressor_streambuf.get()))
98 {}
99}; // end struct xz_ctxt_type.
100
101/// Callback used in the I/O input API of libxml2 to open a file
102/// designated by a path and containing XZ-compressed content.
103///
104/// @param filepath the path to the file to open. The file should
105/// contain XZ-compressed data, as detected by @ref xz_io_match_cb.
106///
107/// @return a pointer to an instance of @ref xz_ctxt_type if the
108/// function could successfully open the file denoted by @p filepath.
109/// Please note that this instance of @ref xz_ctxt_type has to be
110/// deleted by @ref xz_io_close_cb.
111static void*
112xz_io_open_cb(const char* filepath)
113{
114 std::ifstream* s = new std::ifstream(filepath, ifstream::binary);
115 if (s->bad())
116 {
117 delete s;
118 return nullptr;
119 }
120
121 xz_ctxt_type *ctxt = new xz_ctxt_type(s);
122 return ctxt;
123}
124
125/// Callback used in the I/O input API of libxml2 to read and
126/// decompress data from an XZ-compressed file previously opened by
127/// @ref xz_io_open_cb.
128///
129/// @param context a pointer to the instance of @ref xz_ctxt_type
130/// returned by @ref xz_io_open_cb. That context is used to read and
131/// decompress the XZ-compressed data coming from input file.
132///
133/// @param buffer the buffer where to copy the XZ-decompressed data.
134///
135/// @param len the length of @p buffer.
136///
137/// @return the actual number of bytes decompressed and copied into @p
138/// buffer.
139static int
140xz_io_read_cb(void* context, char *buffer, int len)
141{
142 xz_ctxt_type *ctxt = static_cast<xz_ctxt_type *>(context);
143 ctxt->decompressed_input_stream->read(buffer, len);
144 int nb_bytes_read = ctxt->decompressed_input_stream->gcount();
145 return nb_bytes_read;
146}
147
148/// Callback used in the I/O input API of libxml2 to delete the
149/// instance of @ref xz_ctxt_type created by @ref xz_io_open_cb and
150/// free its associated resources.
151///
152/// @param context the pointer to the instance of @ref xz_ctxt_type to
153/// delete.
154///
155/// @return 0 iff the operation was successful.
156static int
157xz_io_close_cb(void* context)
158{
159 xz_ctxt_type *ctxt = static_cast<xz_ctxt_type*>(context);
160 ctxt->decompressed_input_stream.reset();
161 ctxt->input_fstream->close();
162 ctxt->input_fstream.reset();
163 delete ctxt;
164 return 0;
165}
166
167// </xmlIO callbacks for xz reading support>
168
169/// The initialization function of libxml2 abstraction layer. This
170/// function must be called prior to using any of the libxml2 capabilities.
171void
173{
174 LIBXML_TEST_VERSION;
175 xmlInitParser();
176 xmlRegisterInputCallbacks(xz_io_match_cb, xz_io_open_cb,
177 xz_io_read_cb, xz_io_close_cb);
178}
179
180/// Instantiate an xmlTextReader that parses the content of an on-disk
181/// file, wrap it into a smart pointer and return it.
182///
183/// @param path the path to the file to be parsed by the returned
184/// instance of xmlTextReader.
186new_reader_from_file(const std::string& path)
187{
188 reader_sptr p =
189 build_sptr(xmlNewTextReaderFilename (path.c_str()));
190
191 return p;
192}
193
194/// Instanciate an xmlTextReader that parses the content of an
195/// in-memory buffer, wrap it into a smart pointer and return it.
196///
197/// @param buffer the in-memory buffer to be parsed by the returned
198/// instance of xmlTextReader.
200new_reader_from_buffer(const std::string& buffer)
201{
202 reader_sptr p =
203 build_sptr(xmlReaderForMemory(buffer.c_str(),
204 buffer.length(),
205 "", 0, 0));
206 return p;
207}
208
209/// This is an xmlInputReadCallback, meant to be passed to
210/// xmlNewTextReaderForIO. It reads a number of bytes from an istream.
211///
212/// @param context an std::istream* cast into a void*. This is the
213/// istream that the xmlTextReader is too read data from.
214///
215/// @param buffer the buffer where to copy the data read from the
216/// input stream.
217///
218/// @param len the number of byte to read from the input stream and to
219/// copy into @p buffer.
220///
221/// @return the number of bytes read or -1 in case of error.
222static int
223xml_istream_input_read(void* context,
224 char* buffer,
225 int len)
226{
227 istream* in = reinterpret_cast<istream*>(context);
228 in->read(buffer, len);
229 return in->gcount();
230}
231
232/// This is an xmlInputCloseCallback, meant to be passed to
233/// xmlNewTextReaderForIO. It's supposed to close the input stream
234/// that the xmlTextReader is reading from. This particular
235/// implementation is noop; it does nothing.
236///
237/// @return 0.
238static int
239xml_istream_input_close(void*)
240{return 0;}
241
242/// Instanciate an xmlTextReader that parses a content coming from an
243/// input stream.
244///
245/// @param in the input stream to consider.
246///
247/// @return reader_sptr a pointer to the newly instantiated xml
248/// reader.
250new_reader_from_istream(std::istream* in)
251{
252 reader_sptr p =
253 build_sptr(xmlReaderForIO(&xml_istream_input_read,
254 &xml_istream_input_close,
255 in, "", 0, 0));
256 return p;
257}
258
259/// Convert a shared pointer to xmlChar into an std::string.
260///
261/// If the xmlChar is NULL, set "" to the string.
262///
263/// @param ssptr the shared point to xmlChar to convert.
264///
265/// @param s the output string.
266///
267/// @return true if the shared pointer to xmlChar contained a non NULL
268/// string, false otherwise.
269bool
271{
272 bool non_nil = false;
273 if (CHAR_STR(ssptr))
274 {
275 s = CHAR_STR(ssptr);
276 non_nil = true;
277 }
278 else
279 {
280 s = "";
281 non_nil = false;
282 }
283
284 return non_nil;
285}
286
287/// Return the depth of an xml element node.
288///
289/// Note that the node must be attached to an XML document.
290///
291/// @param n the xml to consider.
292///
293/// @return a positive or zero number for an XML node properly
294/// attached to an xml document, -1 otherwise. Note that the function
295/// returns -1 if passed an xml document as well.
296int
298{
299 if (n->type == XML_DOCUMENT_NODE || n->parent == NULL)
300 return -1;
301
302 if (n->parent->type == XML_DOCUMENT_NODE)
303 return 0;
304
305 return 1 + get_xml_node_depth(n->parent);
306}
307
308/// Escape the 5 characters representing the predefined XML entities.
309///
310/// The resulting entities and their matching characters are:
311///
312/// &lt; for the character '<', &gt; for the character '>', &apos; for
313/// the character ''', &quot; for the character '"', and &amp; for the
314/// character '&'.
315///
316//// @param str the input string to read to search for the characters
317//// to escape.
318////
319//// @param escaped the output string where to write the resulting
320//// string that contains the pre-defined characters escaped as
321//// predefined entitites.
322void
323escape_xml_string(const std::string& str,
324 std::string& escaped)
325{
326 for (std::string::const_iterator i = str.begin(); i != str.end(); ++i)
327 switch (*i)
328 {
329 case '<':
330 escaped += "&lt;";
331 break;
332 case '>':
333 escaped += "&gt;";
334 break;
335 case '&':
336 escaped += "&amp;";
337 break;
338 case '\'':
339 escaped += "&apos;";
340 break;
341 case '"':
342 escaped += "&quot;";
343 break;
344 default:
345 escaped += *i;
346 }
347}
348
349/// Escape the 5 characters representing the predefined XML entities.
350///
351/// The resulting entities and their matching characters are:
352///
353/// &lt; for the character '<', &gt; for the character '>', &apos; for
354/// the character ''', &quot; for the character '"', and &amp; for the
355/// character '&'.
356///
357//// @param str the input string to read to search for the characters
358//// to escape.
359////
360//// @return the resulting string that contains the pre-defined
361//// characters escaped as predefined entitites.
362std::string
363escape_xml_string(const std::string& str)
364{
365 std::string result;
366 escape_xml_string(str, result);
367 return result;
368}
369
370/// Escape the '-' character, to avoid having a '--' in a comment.
371///
372/// The resulting entity for '-' is '&#45;'.
373///
374//// @param str the input string to read to search for the characters
375//// to escape.
376////
377//// @param escaped the output string where to write the resulting
378//// string that contains the pre-defined characters escaped as
379//// predefined entitites.
380void
381escape_xml_comment(const std::string& str,
382 std::string& escaped)
383{
384 for (std::string::const_iterator i = str.begin(); i != str.end(); ++i)
385 switch (*i)
386 {
387 case '-':
388 escaped += "&#45;";
389 break;
390 default:
391 escaped += *i;
392 }
393}
394
395/// Escape the '-' character, to avoid having a '--' in a comment.
396///
397/// The resulting entity for '-' is '&#45;'.
398///
399//// @param str the input string to read to search for the characters
400//// to escape.
401////
402//// @return the resulting string that contains the pre-defined
403//// characters escaped as predefined entitites.
404std::string
405escape_xml_comment(const std::string& str)
406{
407 std::string result;
408 escape_xml_comment(str, result);
409 return result;
410}
411
412/// Read a string, detect the 5 predefined XML entities it may contain
413/// and un-escape them, by writting their corresponding characters
414/// back in. The pre-defined entities are:
415///
416/// &lt; for the character '<', &gt; for the character '>', &apos; for
417/// the character ''', &quot; for the character '"', and &amp; for the
418/// character '&'.
419///
420/// @param str the input XML string to consider.
421///
422/// @param escaped where to write the resulting un-escaped string.
423void
424unescape_xml_string(const std::string& str,
425 std::string& escaped)
426{
427 std::string::size_type i = 0;
428 while (i < str.size())
429 {
430 if (str[i] == '&')
431 {
432 if (str[i+1] == 'l'
433 && str[i+2] == 't'
434 && str[i+3] == ';')
435 {
436 escaped += '<';
437 i+= 4;
438 }
439 else if (str[i+1] == 'g'
440 && str[i+2] == 't'
441 && str[i+3] == ';')
442 {
443 escaped += '>';
444 i += 4;
445 }
446 else if (str[i+1] == 'a'
447 && str[i+2] == 'm'
448 && str[i+3] == 'p'
449 && str[i+4] == ';')
450 {
451 escaped += '&';
452 i += 5;
453 }
454 else if (str[i+1] == 'a'
455 && str[i+2] == 'p'
456 && str[i+3] == 'o'
457 && str[i+4] == 's'
458 && str[i+5] == ';')
459 {
460 escaped += '\'';
461 i += 6;
462 }
463 else if (str[i+1] == 'q'
464 && str[i+2] == 'u'
465 && str[i+3] == 'o'
466 && str[i+4] == 't'
467 && str[i+5] == ';')
468 {
469 escaped += '"';
470 i += 6;
471 }
472 else
473 {
474 escaped += str[i];
475 ++i;
476 }
477 }
478 else
479 {
480 escaped += str[i];
481 ++i;
482 }
483 }
484}
485
486/// Read a string, detect the 5 predefined XML entities it may contain
487/// and un-escape them, by writting their corresponding characters
488/// back in. The pre-defined entities are:
489///
490/// &lt; for the character '<', &gt; for the character '>', &apos; for
491/// the character ''', &quot; for the character '"', and &amp; for the
492/// character '&'.
493///
494/// @param str the input XML string to consider.
495///
496/// @return escaped where to write the resulting un-escaped string.
497std::string
498unescape_xml_string(const std::string& str)
499{
500 std::string result;
501 unescape_xml_string(str, result);
502 return result;
503}
504
505/// Read a string, detect the '#&45;' entity and un-escape it into
506/// the '-' character.
507///
508/// @param str the input XML string to consider.
509///
510/// @param escaped where to write the resulting un-escaped string.
511void
512unescape_xml_comment(const std::string& str,
513 std::string& escaped)
514{
515 std::string::size_type i = 0;
516 while (i < str.size())
517 {
518 if (str[i] == '&'
519 && str[i + 1] == '#'
520 && str[i + 2] == '4'
521 && str[i + 3] == '5'
522 && str[i + 4] == ';')
523 {
524 escaped += '-';
525 i += 5;
526 }
527 else
528 {
529 escaped += str[i];
530 ++i;
531 }
532 }
533}
534
535/// Read a string, detect the '#&45;' entity and un-escape it into
536/// the '-' character.
537///
538/// @param str the input XML string to consider.
539///
540/// @return escaped where to write the resulting un-escaped string.
541std::string
542unescape_xml_comment(const std::string& str)
543{
544 std::string result;
545 unescape_xml_comment(str, result);
546 return result;
547}
548
549}//end namespace xml
550}//end namespace abigail
This is a custom std::streambuf that knows how to decompress an input stream that was compressed usin...
shared_ptr< xmlChar > build_sptr< xmlChar >(xmlChar *p)
Build and return a shared_ptr for a pointer to xmlChar.
shared_ptr< T > build_sptr(T *p)
This is to be specialized for the diverse C types that needs wrapping in shared_ptr.
shared_ptr< xmlTextReader > build_sptr< xmlTextReader >(::xmlTextReader *p)
Build and return a shared_ptr for a pointer to xmlTextReader.
file_type
The different types of files understood the bi* suite of tools.
@ FILE_TYPE_XZ
The XZ (lzma) compresson scheme.
file_type guess_file_type(istream &in)
Guess the type of the content of an input stream.
void unescape_xml_comment(const std::string &str, std::string &escaped)
Read a string, detect the '#&45;' entity and un-escape it into the '-' character.
reader_sptr new_reader_from_file(const std::string &path)
Instantiate an xmlTextReader that parses the content of an on-disk file, wrap it into a smart pointer...
void initialize()
The initialization function of libxml2 abstraction layer. This function must be called prior to using...
int get_xml_node_depth(xmlNodePtr n)
Return the depth of an xml element node.
bool xml_char_sptr_to_string(xml_char_sptr ssptr, std::string &s)
Convert a shared pointer to xmlChar into an std::string.
void escape_xml_comment(const std::string &str, std::string &escaped)
Escape the '-' character, to avoid having a '–' in a comment.
reader_sptr new_reader_from_buffer(const std::string &buffer)
Instanciate an xmlTextReader that parses the content of an in-memory buffer, wrap it into a smart poi...
shared_ptr< xmlChar > xml_char_sptr
A convenience typedef for a shared pointer of xmlChar.
void unescape_xml_string(const std::string &str, std::string &escaped)
Read a string, detect the 5 predefined XML entities it may contain and un-escape them,...
reader_sptr new_reader_from_istream(std::istream *in)
Instanciate an xmlTextReader that parses a content coming from an input stream.
shared_ptr< xmlTextReader > reader_sptr
A convenience typedef for a shared pointer of xmlTextReader.
void escape_xml_string(const std::string &str, std::string &escaped)
Escape the 5 characters representing the predefined XML entities.
Toplevel namespace for libabigail.
This functor is used to instantiate a shared_ptr for xmlChar.
This functor is used to instantiate a shared_ptr for the xmlTextReader.