From 379fd2cf9b8a03624ac2b6a534b6d78dacf6c684 Mon Sep 17 00:00:00 2001 From: wmayer Date: Fri, 15 Aug 2025 20:45:11 +0200 Subject: [PATCH] App: Fix output string to XML Not all unicode characters are allowed as XML output. When writing disallowed characters the SAX parser throws an exception when loading a project file that results into a broken document and thus to a possible loss of data. This PR replaces all disallowed characters with an underscore and prints a warning. This fixes https://github.com/FreeCAD/FreeCAD/issues/22123 Note: It does not fix an already corrupted project file. --- src/App/PropertyStandard.cpp | 10 +++++++++ src/Base/Persistence.cpp | 39 ++++++++++++++++++++++++++++++++++++ src/Base/Persistence.h | 2 ++ src/Base/PreCompiled.h | 3 +++ 4 files changed, 54 insertions(+) diff --git a/src/App/PropertyStandard.cpp b/src/App/PropertyStandard.cpp index 130bcd2016..e93470a8e5 100644 --- a/src/App/PropertyStandard.cpp +++ b/src/App/PropertyStandard.cpp @@ -1509,6 +1509,14 @@ void PropertyString::setPyObject(PyObject* value) void PropertyString::Save(Base::Writer& writer) const { + auto verifyXMLString = [this](std::string& input) { + const std::string output = this->validateXMLString(input); + if (output != input) { + Base::Console().warning("XML output: Validate invalid string:\n'%s'\n'%s'\n", + input, output); + } + return output; + }; std::string val; auto obj = freecad_cast(getContainer()); writer.Stream() << writer.ind() << "getNameInDocument()) { writer.Stream() << "restore=\"0\" "; val = encodeAttribute(obj->getExportName()); + val = verifyXMLString(val); exported = true; } } if (!exported) { val = encodeAttribute(_cValue); + val = verifyXMLString(val); } writer.Stream() << "value=\"" << val << "\"/>" << std::endl; } diff --git a/src/Base/Persistence.cpp b/src/Base/Persistence.cpp index 3c4aa62f7a..6110555700 100644 --- a/src/Base/Persistence.cpp +++ b/src/Base/Persistence.cpp @@ -24,7 +24,11 @@ #include "PreCompiled.h" #ifndef _PreComp_ +#include +#include #include +#include +#include #endif #include @@ -112,6 +116,41 @@ std::string Persistence::encodeAttribute(const std::string& str) return tmp; } +// clang-format off +// https://www.w3.org/TR/xml/#charsets +static constexpr std::array, 6> validRanges {{ + {0x9, 0x9}, + {0xA, 0xA}, + {0xD, 0xD}, + {0x20, 0xD7FF}, + {0xE000, 0xFFFD}, + {0x10000, 0x10FFFF}, +}}; +// clang-format on + +/*! + * In XML not all valid Unicode characters are allowed. Replace all + * disallowed characters with '_' + */ +std::string Persistence::validateXMLString(const std::string& str) +{ + std::wstring_convert, char32_t> cvt; + std::u32string cp_in = cvt.from_bytes(str); + std::u32string cp_out; + cp_out.reserve(cp_in.size()); + for (auto cp : cp_in) { + if (std::any_of(validRanges.begin(), validRanges.end(), [cp](const auto& range){ + return cp >= range.first && cp <= range.second; + })) { + cp_out += cp; + } + else { + cp_out += '_'; + } + } + return cvt.to_bytes(cp_out); +} + void Persistence::dumpToStream(std::ostream& stream, int compression) { // we need to close the zipstream to get a good result, the only way to do this is to delete the diff --git a/src/Base/Persistence.h b/src/Base/Persistence.h index 8355482a63..6da07335ba 100644 --- a/src/Base/Persistence.h +++ b/src/Base/Persistence.h @@ -147,6 +147,8 @@ public: virtual void RestoreDocFile(Reader& /*reader*/); /// Encodes an attribute upon saving. static std::string encodeAttribute(const std::string&); + /// Replaces all characters with '_' that are not allowed in XML + static std::string validateXMLString(const std::string& str); // dump the binary persistence data into into the stream void dumpToStream(std::ostream& stream, int compression); diff --git a/src/Base/PreCompiled.h b/src/Base/PreCompiled.h index dd7d0a2182..301a4b37f7 100644 --- a/src/Base/PreCompiled.h +++ b/src/Base/PreCompiled.h @@ -32,6 +32,8 @@ #include // standard +#include +#include #include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include #ifdef FC_OS_WIN32 #include