diff --git a/src/Mod/Import/App/dxf/dxf.cpp b/src/Mod/Import/App/dxf/dxf.cpp
index b20d39b837..812fe1a6d3 100644
--- a/src/Mod/Import/App/dxf/dxf.cpp
+++ b/src/Mod/Import/App/dxf/dxf.cpp
@@ -18,7 +18,7 @@
#include
#include
#include
-
+#include
#include "dxf.h"
@@ -1766,11 +1766,16 @@ CDxfRead::CDxfRead(const char* filepath)
}
m_ifs->imbue(std::locale("C"));
+ m_version = RUnknown;
+ m_CodePage = NULL;
+ m_encoding = NULL;
}
CDxfRead::~CDxfRead()
{
delete m_ifs;
+ delete m_CodePage;
+ delete m_encoding;
}
double CDxfRead::mm( double value ) const
@@ -2418,8 +2423,12 @@ bool CDxfRead::ReadText()
// that ReadText() and all the other Read... methods return having already read a code 0.
get_line();
DerefACI();
- textPrefix.append(m_str);
- OnReadText(c, height * 25.4 / 72.0, textPrefix.c_str());
+ {
+ const char* utfStr = (this->*stringToUTF8)(m_str);
+ OnReadText(c, height * 25.4 / 72.0, utfStr);
+ if (utfStr != m_str)
+ delete utfStr;
+ }
return(true);
case 62:
@@ -3271,7 +3280,118 @@ bool CDxfRead::ReadLayer()
return false;
}
-void CDxfRead::DoRead(const bool ignore_errors /* = false */ )
+bool CDxfRead::ReadVersion()
+{
+ static const std::vector VersionNames = {
+ // This table is indexed by eDXFVersion_t - (ROlder+1)
+ "AC1006",
+ "AC1009",
+ "AC1012",
+ "AC1014",
+ "AC1015",
+ "AC1018",
+ "AC1021",
+ "AC1024",
+ "AC1027",
+ "AC1032"};
+
+ assert(VersionNames.size() == RNewer - ROlder - 1);
+ get_line();
+ get_line();
+ std::vector::const_iterator first = VersionNames.cbegin();
+ std::vector::const_iterator last = VersionNames.cend();
+ std::vector::const_iterator found = std::lower_bound(first, last, m_str);
+ if (found == last)
+ m_version = RNewer;
+ else if (*found == m_str)
+ m_version = (eAutoCADVersion_t)(std::distance(first, found) + (ROlder + 1));
+ else if (found == first)
+ m_version = ROlder;
+ else
+ m_version = RUnknown;
+
+ return ResolveEncoding();
+}
+
+bool CDxfRead::ReadDWGCodePage()
+{
+ get_line();
+ get_line();
+ assert(m_CodePage == NULL); // If not, we have found two DWGCODEPAGE variables or DoRead was called twice on the same CDxfRead object.
+ m_CodePage = new std::string(m_str);
+
+ return ResolveEncoding();
+}
+
+bool CDxfRead::ResolveEncoding()
+{
+ if (m_encoding != NULL) {
+ delete m_encoding;
+ m_encoding = NULL;
+ }
+ if (m_version >= R2007) { // Note this does not include RUnknown, but does include RLater
+ m_encoding = new std::string("utf_8");
+ stringToUTF8 = &CDxfRead::UTF8ToUTF8;
+ }
+ else if (m_CodePage == NULL) {
+ // cp1252
+ m_encoding = new std::string("cp1252");
+ stringToUTF8 = &CDxfRead::GeneralToUTF8;
+ }
+ else {
+ // Names may be of the form "ansi_1252" which we map to "cp1252" but we don't map "ansi_x3xxxx" which means "ascii"
+ if (strncmp(m_CodePage->c_str(), "ansi_", 5) == 0
+ && strncmp(m_CodePage->c_str(), "ansi_x3", 7) != 0) {
+ std::string* p = new std::string(*m_CodePage);
+ p->replace(0, 5, "cp");
+ m_encoding = p;
+ }
+ else
+ m_encoding = new std::string(*m_CodePage);
+ // At this point we want to recognize synonyms for "utf_8" and use the custom decoder function.
+ // This is because this is one of the common cases and our decoder function is a fast no-op.
+ // We don't actually use the decoder function we get from PyCodec_Decoder because to call it we have to convert the (char *) text into
+ // a 'bytes' object first so we can pass it to the function using PyObject_Callxxx(), getting the PYObject containing the
+ // Python string, which we then decode back to UTF-8. It is simpler to call PyUnicode_DecodeXxxx which takes a (const char *)
+ // and is just a direct c++ callable.
+ PyObject* pyDecoder = PyCodec_Decoder(m_encoding->c_str());
+ if (pyDecoder == NULL)
+ return false; // A key error exception will have been placed.
+ PyObject* pyUTF8Decoder = PyCodec_Decoder("utf_8");
+ assert(pyUTF8Decoder != NULL);
+ if (pyDecoder == pyUTF8Decoder)
+ stringToUTF8 = &CDxfRead::UTF8ToUTF8;
+ else
+ stringToUTF8 = &CDxfRead::GeneralToUTF8;
+ Py_DECREF(pyDecoder);
+ Py_DECREF(pyUTF8Decoder);
+ }
+ return m_encoding != NULL;
+}
+const char* CDxfRead::UTF8ToUTF8(const char* encoded) const
+{
+ return encoded;
+}
+const char* CDxfRead::GeneralToUTF8(const char* encoded) const
+{
+ PyObject* decoded = PyUnicode_Decode(encoded, strlen(encoded), m_encoding->c_str(), "strict");
+ if (decoded == NULL)
+ return NULL;
+ Py_ssize_t len;
+ const char* converted = PyUnicode_AsUTF8AndSize(decoded, &len);
+ char* result = NULL;
+ if (converted != NULL) {
+ // converted only has lifetime of decoded so we must save a copy.
+ result = (char *)malloc(len + 1);
+ if (result == NULL)
+ PyErr_SetString(PyExc_MemoryError, "Out of memory");
+ else
+ memcpy(result, converted, len + 1);
+ }
+ Py_DECREF(decoded);
+ return result;
+}
+void CDxfRead::DoRead(const bool ignore_errors /* = false */)
{
m_ignore_errors = ignore_errors;
if(m_fail)
@@ -3298,7 +3418,19 @@ void CDxfRead::DoRead(const bool ignore_errors /* = false */ )
continue;
} // End if - then
- else if(!strcmp(m_str, "0"))
+ if (!strcmp(m_str, "$ACADVER")) {
+ if (!ReadVersion())
+ return;
+ continue;
+ }// End if - then
+
+ if (!strcmp(m_str, "$DWGCODEPAGE")) {
+ if (!ReadDWGCodePage())
+ return;
+ continue;
+ }// End if - then
+
+ if (!strcmp(m_str, "0"))
{
get_line();
if (!strcmp( m_str, "SECTION" )){
diff --git a/src/Mod/Import/App/dxf/dxf.h b/src/Mod/Import/App/dxf/dxf.h
index 401ecb908b..6980bbd166 100644
--- a/src/Mod/Import/App/dxf/dxf.h
+++ b/src/Mod/Import/App/dxf/dxf.h
@@ -118,6 +118,22 @@ struct LWPolyDataOut
std::vector Bulge;
point3D Extr;
};
+typedef enum
+{
+ RUnknown,
+ ROlder,
+ R10,
+ R11_12,
+ R13,
+ R14,
+ R2000,
+ R2004,
+ R2007,
+ R2010,
+ R2013,
+ R2018,
+ RNewer,
+} eAutoCADVersion_t;
//********************
class CDxfWrite{
@@ -277,6 +293,9 @@ private:
bool ReadInsert();
bool ReadDimension();
bool ReadBlockInfo();
+ bool ReadVersion();
+ bool ReadDWGCodePage();
+ bool ResolveEncoding();
void get_line();
void put_line(const char *value);
@@ -284,6 +303,15 @@ private:
protected:
Aci_t m_aci; // manifest color name or 256 for layer color
+ eAutoCADVersion_t m_version;// Version from $ACADVER variable in DXF
+ const char* (CDxfRead::*stringToUTF8)(const char*) const;
+
+private:
+ const std::string* m_CodePage; // Code Page name from $DWGCODEPAGE or null if none/not read yet
+ // The following was going to be python's canonical name for the encoding, but this is (a) not easily found and (b) does not speed up finding the encoding object.
+ const std::string* m_encoding;// A name for the encoding implied by m_version and m_CodePage
+ const char* UTF8ToUTF8(const char* encoded) const;
+ const char* GeneralToUTF8(const char* encoded) const;
public:
ImportExport CDxfRead(const char* filepath); // this opens the file