From 379fd2cf9b8a03624ac2b6a534b6d78dacf6c684 Mon Sep 17 00:00:00 2001 From: wmayer Date: Fri, 15 Aug 2025 20:45:11 +0200 Subject: [PATCH 1/3] App: Fix output string to XML Not all unicode characters are allowed as XML output. When writing disallowed characters the SAX parser throws an exception when loading a project file that results into a broken document and thus to a possible loss of data. This PR replaces all disallowed characters with an underscore and prints a warning. This fixes https://github.com/FreeCAD/FreeCAD/issues/22123 Note: It does not fix an already corrupted project file. --- src/App/PropertyStandard.cpp | 10 +++++++++ src/Base/Persistence.cpp | 39 ++++++++++++++++++++++++++++++++++++ src/Base/Persistence.h | 2 ++ src/Base/PreCompiled.h | 3 +++ 4 files changed, 54 insertions(+) diff --git a/src/App/PropertyStandard.cpp b/src/App/PropertyStandard.cpp index 130bcd2016..e93470a8e5 100644 --- a/src/App/PropertyStandard.cpp +++ b/src/App/PropertyStandard.cpp @@ -1509,6 +1509,14 @@ void PropertyString::setPyObject(PyObject* value) void PropertyString::Save(Base::Writer& writer) const { + auto verifyXMLString = [this](std::string& input) { + const std::string output = this->validateXMLString(input); + if (output != input) { + Base::Console().warning("XML output: Validate invalid string:\n'%s'\n'%s'\n", + input, output); + } + return output; + }; std::string val; auto obj = freecad_cast(getContainer()); writer.Stream() << writer.ind() << "getNameInDocument()) { writer.Stream() << "restore=\"0\" "; val = encodeAttribute(obj->getExportName()); + val = verifyXMLString(val); exported = true; } } if (!exported) { val = encodeAttribute(_cValue); + val = verifyXMLString(val); } writer.Stream() << "value=\"" << val << "\"/>" << std::endl; } diff --git a/src/Base/Persistence.cpp b/src/Base/Persistence.cpp index 3c4aa62f7a..6110555700 100644 --- a/src/Base/Persistence.cpp +++ b/src/Base/Persistence.cpp @@ -24,7 +24,11 @@ #include "PreCompiled.h" #ifndef _PreComp_ +#include +#include #include +#include +#include #endif #include @@ -112,6 +116,41 @@ std::string Persistence::encodeAttribute(const std::string& str) return tmp; } +// clang-format off +// https://www.w3.org/TR/xml/#charsets +static constexpr std::array, 6> validRanges {{ + {0x9, 0x9}, + {0xA, 0xA}, + {0xD, 0xD}, + {0x20, 0xD7FF}, + {0xE000, 0xFFFD}, + {0x10000, 0x10FFFF}, +}}; +// clang-format on + +/*! + * In XML not all valid Unicode characters are allowed. Replace all + * disallowed characters with '_' + */ +std::string Persistence::validateXMLString(const std::string& str) +{ + std::wstring_convert, char32_t> cvt; + std::u32string cp_in = cvt.from_bytes(str); + std::u32string cp_out; + cp_out.reserve(cp_in.size()); + for (auto cp : cp_in) { + if (std::any_of(validRanges.begin(), validRanges.end(), [cp](const auto& range){ + return cp >= range.first && cp <= range.second; + })) { + cp_out += cp; + } + else { + cp_out += '_'; + } + } + return cvt.to_bytes(cp_out); +} + void Persistence::dumpToStream(std::ostream& stream, int compression) { // we need to close the zipstream to get a good result, the only way to do this is to delete the diff --git a/src/Base/Persistence.h b/src/Base/Persistence.h index 8355482a63..6da07335ba 100644 --- a/src/Base/Persistence.h +++ b/src/Base/Persistence.h @@ -147,6 +147,8 @@ public: virtual void RestoreDocFile(Reader& /*reader*/); /// Encodes an attribute upon saving. static std::string encodeAttribute(const std::string&); + /// Replaces all characters with '_' that are not allowed in XML + static std::string validateXMLString(const std::string& str); // dump the binary persistence data into into the stream void dumpToStream(std::ostream& stream, int compression); diff --git a/src/Base/PreCompiled.h b/src/Base/PreCompiled.h index dd7d0a2182..301a4b37f7 100644 --- a/src/Base/PreCompiled.h +++ b/src/Base/PreCompiled.h @@ -32,6 +32,8 @@ #include // standard +#include +#include #include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include #ifdef FC_OS_WIN32 #include From a8c41df1714014af6ad94ddd4f45de739ebb8d27 Mon Sep 17 00:00:00 2001 From: wmayer Date: Fri, 15 Aug 2025 20:46:11 +0200 Subject: [PATCH 2/3] Test: Add test cases for Persistence::validateXMLString --- src/Mod/Test/Document.py | 11 +++++++++++ tests/src/Base/Reader.cpp | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/Mod/Test/Document.py b/src/Mod/Test/Document.py index 75d207dcbb..09cf409878 100644 --- a/src/Mod/Test/Document.py +++ b/src/Mod/Test/Document.py @@ -666,6 +666,17 @@ class DocumentBasicCases(unittest.TestCase): root = ET.fromstring(test.Content) self.assertEqual(root.tag, "Properties") + def testValidateXml(self): + self.Doc.openTransaction("Add") + obj = self.Doc.addObject("App::FeatureTest","Label") + obj.Label = "abc\x01ef" + TempPath = tempfile.gettempdir() + SaveName = TempPath + os.sep + "CreateTest.FCStd" + self.Doc.saveAs(SaveName) + FreeCAD.closeDocument(self.Doc.Name) + self.Doc = FreeCAD.open(SaveName) + self.assertEqual(self.Doc.ActiveObject.Label, "abc_ef") + def tearDown(self): # closing doc FreeCAD.closeDocument("CreateTest") diff --git a/tests/src/Base/Reader.cpp b/tests/src/Base/Reader.cpp index 371863baaa..27cee37d6e 100644 --- a/tests/src/Base/Reader.cpp +++ b/tests/src/Base/Reader.cpp @@ -7,6 +7,7 @@ #endif #include "Base/Exception.h" +#include "Base/Persistence.h" #include "Base/Reader.h" #include #include @@ -459,3 +460,13 @@ TEST_F(ReaderTest, validDefaults) EXPECT_THROW({ xml.Reader()->getAttribute("missing"); }, Base::XMLBaseException); EXPECT_EQ(value20, TimesIGoToBed::Late); } + +TEST_F(ReaderTest, validateXmlString) +{ + std::string input = "abcde"; + std::string output = input; + input.push_back(char(15)); + output.push_back('_'); + std::string result = Base::Persistence::validateXMLString(input); + EXPECT_EQ(output, result); +} From 117b4821c235dc715d8e96f4f943c3fb2bdf57cc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 12:41:57 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/Base/Persistence.cpp | 2 +- src/Mod/Test/Document.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Base/Persistence.cpp b/src/Base/Persistence.cpp index 6110555700..8a90f858c7 100644 --- a/src/Base/Persistence.cpp +++ b/src/Base/Persistence.cpp @@ -139,7 +139,7 @@ std::string Persistence::validateXMLString(const std::string& str) std::u32string cp_out; cp_out.reserve(cp_in.size()); for (auto cp : cp_in) { - if (std::any_of(validRanges.begin(), validRanges.end(), [cp](const auto& range){ + if (std::any_of(validRanges.begin(), validRanges.end(), [cp](const auto& range) { return cp >= range.first && cp <= range.second; })) { cp_out += cp; diff --git a/src/Mod/Test/Document.py b/src/Mod/Test/Document.py index 09cf409878..8c32849489 100644 --- a/src/Mod/Test/Document.py +++ b/src/Mod/Test/Document.py @@ -668,7 +668,7 @@ class DocumentBasicCases(unittest.TestCase): def testValidateXml(self): self.Doc.openTransaction("Add") - obj = self.Doc.addObject("App::FeatureTest","Label") + obj = self.Doc.addObject("App::FeatureTest", "Label") obj.Label = "abc\x01ef" TempPath = tempfile.gettempdir() SaveName = TempPath + os.sep + "CreateTest.FCStd"