App/Toponaming: Add Comparator for mapped elements

This is the original code from the Toponaming branch, modified slightly to update the
method name and correct some grammatical errors in the descriptive comment.

Co-authored-by: Chris Hennes <chennes@pioneerlibrarysystem.org>
This commit is contained in:
Zheng, Lei
2024-01-15 21:22:57 -06:00
committed by Chris Hennes
parent 7bc3317455
commit ef2ef6d7aa
2 changed files with 118 additions and 0 deletions

View File

@@ -22,3 +22,105 @@
#include "PreCompiled.h"
#include "MappedElement.h"
using namespace Data;
bool ElementNameComparator::operator()(const MappedName &a, const MappedName &b) const {
size_t size = std::min(a.size(),b.size());
if(!size)
return a.size()<b.size();
size_t i=0;
if(b[0] == '#') {
if(a[0]!='#')
return true;
// If both string starts with '#', compare the following hex digits by
// its integer value.
int res = 0;
for(i=1;i<size;++i) {
unsigned char ac = (unsigned char)a[i];
unsigned char bc = (unsigned char)b[i];
if(std::isxdigit(bc)) {
if(!std::isxdigit(ac))
return true;
if(res==0) {
if(ac<bc)
res = -1;
else if(ac>bc)
res = 1;
}
}else if(std::isxdigit(ac))
return false;
else
break;
}
if(res < 0)
return true;
else if(res > 0)
return false;
for (; i<size; ++i) {
char ac = a[i];
char bc = b[i];
if (ac < bc)
return true;
if (ac > bc)
return false;
}
return a.size()<b.size();
}
else if (a[0] == '#')
return false;
// If the string does not start with '#', compare the non-digits prefix
// using lexical order.
for(i=0;i<size;++i) {
unsigned char ac = (unsigned char)a[i];
unsigned char bc = (unsigned char)b[i];
if(!std::isdigit(bc)) {
if(std::isdigit(ac))
return true;
if(ac<bc)
return true;
if(ac>bc)
return false;
} else if(!std::isdigit(ac)) {
return false;
} else
break;
}
// Then compare the following digits part by integer value
int res = 0;
for(;i<size;++i) {
unsigned char ac = (unsigned char)a[i];
unsigned char bc = (unsigned char)b[i];
if(std::isdigit(bc)) {
if(!std::isdigit(ac))
return true;
if(res==0) {
if(ac<bc)
res = -1;
else if(ac>bc)
res = 1;
}
}else if(std::isdigit(ac))
return false;
else
break;
}
if(res < 0)
return true;
else if(res > 0)
return false;
// Finally, compare the remaining tail using lexical order
for (; i<size; ++i) {
char ac = a[i];
char bc = b[i];
if (ac < bc)
return true;
if (ac > bc)
return false;
}
return a.size()<b.size();
}

View File

@@ -98,6 +98,22 @@ struct AppExport MappedElement
}
};
struct AppExport ElementNameComparator {
/** Comparison function to make topo name more stable
*
* The sorting decomposes the name into either of the following two forms
* '#' + hex_digits + tail
* non_digits + digits + tail
*
* The non-digits part is compared lexically, while the digits part is
* compared by its integer value.
*
* The reason for this is to prevent names with bigger digits (which usually means
* they come later in history) from coming earlier when sorting.
*/
bool operator()(const MappedName &a, const MappedName &b) const;
};
}// namespace Data