App: Fix output string to XML

Not all unicode characters are allowed as XML output. When writing disallowed characters the SAX parser throws an exception
when loading a project file that results into a broken document and thus to a possible loss of data.

This PR replaces all disallowed characters with an underscore and prints a warning.

This fixes https://github.com/FreeCAD/FreeCAD/issues/22123
Note: It does not fix an already corrupted project file.
This commit is contained in:
wmayer
2025-08-15 20:45:11 +02:00
committed by Max Wilfinger
parent 9d806694ff
commit 696c18b6da
4 changed files with 54 additions and 0 deletions

View File

@@ -24,7 +24,11 @@
#include "PreCompiled.h"
#ifndef _PreComp_
#include <algorithm>
#include <array>
#include <cassert>
#include <codecvt>
#include <locale>
#endif
#include <zipios++/zipinputstream.h>
@@ -112,6 +116,41 @@ std::string Persistence::encodeAttribute(const std::string& str)
return tmp;
}
// clang-format off
// https://www.w3.org/TR/xml/#charsets
static constexpr std::array<std::pair<char32_t, char32_t>, 6> validRanges {{
{0x9, 0x9},
{0xA, 0xA},
{0xD, 0xD},
{0x20, 0xD7FF},
{0xE000, 0xFFFD},
{0x10000, 0x10FFFF},
}};
// clang-format on
/*!
* In XML not all valid Unicode characters are allowed. Replace all
* disallowed characters with '_'
*/
std::string Persistence::validateXMLString(const std::string& str)
{
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> cvt;
std::u32string cp_in = cvt.from_bytes(str);
std::u32string cp_out;
cp_out.reserve(cp_in.size());
for (auto cp : cp_in) {
if (std::any_of(validRanges.begin(), validRanges.end(), [cp](const auto& range){
return cp >= range.first && cp <= range.second;
})) {
cp_out += cp;
}
else {
cp_out += '_';
}
}
return cvt.to_bytes(cp_out);
}
void Persistence::dumpToStream(std::ostream& stream, int compression)
{
// we need to close the zipstream to get a good result, the only way to do this is to delete the