App: Fix output string to XML
Not all unicode characters are allowed as XML output. When writing disallowed characters the SAX parser throws an exception when loading a project file that results into a broken document and thus to a possible loss of data. This PR replaces all disallowed characters with an underscore and prints a warning. This fixes https://github.com/FreeCAD/FreeCAD/issues/22123 Note: It does not fix an already corrupted project file.
This commit is contained in:
@@ -1509,6 +1509,14 @@ void PropertyString::setPyObject(PyObject* value)
|
||||
|
||||
void PropertyString::Save(Base::Writer& writer) const
|
||||
{
|
||||
auto verifyXMLString = [this](std::string& input) {
|
||||
const std::string output = this->validateXMLString(input);
|
||||
if (output != input) {
|
||||
Base::Console().warning("XML output: Validate invalid string:\n'%s'\n'%s'\n",
|
||||
input, output);
|
||||
}
|
||||
return output;
|
||||
};
|
||||
std::string val;
|
||||
auto obj = freecad_cast<DocumentObject*>(getContainer());
|
||||
writer.Stream() << writer.ind() << "<String ";
|
||||
@@ -1520,11 +1528,13 @@ void PropertyString::Save(Base::Writer& writer) const
|
||||
else if (_cValue == obj->getNameInDocument()) {
|
||||
writer.Stream() << "restore=\"0\" ";
|
||||
val = encodeAttribute(obj->getExportName());
|
||||
val = verifyXMLString(val);
|
||||
exported = true;
|
||||
}
|
||||
}
|
||||
if (!exported) {
|
||||
val = encodeAttribute(_cValue);
|
||||
val = verifyXMLString(val);
|
||||
}
|
||||
writer.Stream() << "value=\"" << val << "\"/>" << std::endl;
|
||||
}
|
||||
|
||||
@@ -24,7 +24,11 @@
|
||||
#include "PreCompiled.h"
|
||||
|
||||
#ifndef _PreComp_
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <codecvt>
|
||||
#include <locale>
|
||||
#endif
|
||||
|
||||
#include <zipios++/zipinputstream.h>
|
||||
@@ -112,6 +116,41 @@ std::string Persistence::encodeAttribute(const std::string& str)
|
||||
return tmp;
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// https://www.w3.org/TR/xml/#charsets
|
||||
static constexpr std::array<std::pair<char32_t, char32_t>, 6> validRanges {{
|
||||
{0x9, 0x9},
|
||||
{0xA, 0xA},
|
||||
{0xD, 0xD},
|
||||
{0x20, 0xD7FF},
|
||||
{0xE000, 0xFFFD},
|
||||
{0x10000, 0x10FFFF},
|
||||
}};
|
||||
// clang-format on
|
||||
|
||||
/*!
|
||||
* In XML not all valid Unicode characters are allowed. Replace all
|
||||
* disallowed characters with '_'
|
||||
*/
|
||||
std::string Persistence::validateXMLString(const std::string& str)
|
||||
{
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> cvt;
|
||||
std::u32string cp_in = cvt.from_bytes(str);
|
||||
std::u32string cp_out;
|
||||
cp_out.reserve(cp_in.size());
|
||||
for (auto cp : cp_in) {
|
||||
if (std::any_of(validRanges.begin(), validRanges.end(), [cp](const auto& range){
|
||||
return cp >= range.first && cp <= range.second;
|
||||
})) {
|
||||
cp_out += cp;
|
||||
}
|
||||
else {
|
||||
cp_out += '_';
|
||||
}
|
||||
}
|
||||
return cvt.to_bytes(cp_out);
|
||||
}
|
||||
|
||||
void Persistence::dumpToStream(std::ostream& stream, int compression)
|
||||
{
|
||||
// we need to close the zipstream to get a good result, the only way to do this is to delete the
|
||||
|
||||
@@ -147,6 +147,8 @@ public:
|
||||
virtual void RestoreDocFile(Reader& /*reader*/);
|
||||
/// Encodes an attribute upon saving.
|
||||
static std::string encodeAttribute(const std::string&);
|
||||
/// Replaces all characters with '_' that are not allowed in XML
|
||||
static std::string validateXMLString(const std::string& str);
|
||||
|
||||
// dump the binary persistence data into into the stream
|
||||
void dumpToStream(std::ostream& stream, int compression);
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
#include <Python.h>
|
||||
|
||||
// standard
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <fcntl.h>
|
||||
#include <cstdio>
|
||||
#include <cassert>
|
||||
@@ -39,6 +41,7 @@
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <codecvt>
|
||||
#include <locale>
|
||||
|
||||
#ifdef FC_OS_WIN32
|
||||
#include <direct.h>
|
||||
|
||||
Reference in New Issue
Block a user