[Spread] remove unused file

- not used, not commented and no copyright
This commit is contained in:
Uwe
2022-10-09 20:18:12 +02:00
parent a4f5518497
commit 32ead51360

View File

@@ -1,234 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <string>
#include <map>
#include <algorithm>
int genUtf8(int c, unsigned char * b)
{
if (c<0x80) { *b++=c, *b++ = '\0';
return 1;
}
else if (c<0x800) { *b++=192+c/64, *b++=128+c%64, *b++ = '\0'; return 2; }
else if (c-0xd800u < 0x800) goto error;
else if (c<0x10000) { *b++=224+c/4096, *b++=128+c/64%64, *b++=128+c%64, *b++ = '\0'; return 3; }
else if (c<0x110000) { *b++=240+c/262144, *b++=128+c/4096%64, *b++=128+c/64%64, *b++=128+c%64, *b++ = '\0'; return 4; }
else goto error;
error:
// printf("Error! %x\n", c);
// exit(1);
return -1;
}
typedef std::basic_string<unsigned char> String;
typedef std::vector<String > StringList;
StringList list;
unsigned char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
static String escape(unsigned char c)
{
switch (c) {
case '*':
case '+':
case '-':
case '(':
case ')':
case '\\':
case '.':
case '[':
case ']':
case '?':
case '{':
case '}':
case '#':
case '^':
case '|':
case ':':
case '$':
case '/':
case '\'':
case '"':
return String((const unsigned char*)"\\") + c;
default:
return String((const unsigned char*)"") + c;
}
}
static String encode(unsigned char c)
{
if (c <= 32 || c > 126)
return String((const unsigned char*)"\\x") + hex[c >> 4] + hex[c & 0xf];
else {
return String((const unsigned char*)"") + c;
}
}
static String encodeString(String c)
{
int i;
String result;
for (i = 0; i < c.size(); ++i)
result += encode(c[i]);
return result;
}
static String encodeRange(String r)
{
String result;
int i, j;
for (i = 0; i < r.size(); ++i) {
int n = 0;
for (j = i; j < r.size() && r[i] + n == r[j]; ++j, ++n);
if (n > 1) {
result += escape(r[i]);
result += (const unsigned char*)"-";
result += escape(r[j - 1]);
i = j - 1;
}
else
result += escape(r[i]);
}
return result;
}
static String commonPrefix(int indent, StringList::const_iterator S, StringList::const_iterator E, int k)
{
StringList::const_iterator i = S;
String leafs;
String branches;
int nBranches = 0;
bool first = true;
if (S->size() <= k)
return String((const unsigned char*)"");
while (i != E) {
StringList::const_iterator start = i;
StringList::const_iterator end = i;
int n = 0;
if (i->size() == k + 1) {
leafs += i->at(k);
++i;
}
else {
/* Common path */
while (end != E &&
end->size() >= start->size() &&
end->at(k) == start->at(k)) {
++n;
++end;
}
//if (leafs.size() > 0)
if (!first)
branches += (const unsigned char*)"|";
branches += escape(start->at(k)) + commonPrefix(indent + 1, start, end, k + 1);
first = false;
nBranches++;
i = end;
}
}
if (leafs.size() > 1)
leafs = (const unsigned char*)"[" + encodeRange(leafs) + (const unsigned char*)"]";
if (nBranches == 0)
return leafs;
else {
if (leafs.size() > 0)
leafs += (const unsigned char*)"|";
return (const unsigned char*)"(" + leafs + branches + (const unsigned char*)")";
}
}
static void readFile(FILE * f)
{
while (!feof(f)) {
char line[2048];
int start;
int end;
char cat[128];
unsigned char out[8];
int i;
if (fgets(line, sizeof(line), f) == NULL)
break;
if (sscanf(line, "%4X..%4X ; %s", &start, &end, cat) == 3) {
for (i = start; i <= end; ++i) {
int n = genUtf8(i, out);
if (n > 0) {
list.push_back(String(out, n));
fprintf(stderr, "%6X %d %s\n", i, n, out);
}
}
}
else if (sscanf(line, "%X ; %s", &start, cat) == 2) {
int n = genUtf8(start, out);
if (n > 0) {
list.push_back(String(out, n));
fprintf(stderr, "%6X %d %s\n", start, n, out);
}
}
}
}
static String subdivide(String prefix, StringList::const_iterator S, StringList::const_iterator E, StringList & result)
{
String regexp = commonPrefix(0, S, E, 0);
regexp = encodeString(regexp);
if (regexp.size() < 2000) {
return regexp;
}
else {
int n = E - S;
StringList::const_iterator M = S + n / 2;
result.push_back( prefix + (const unsigned char*)"1\t" + subdivide(prefix + (const unsigned char*)"1", S, M, result) + (const unsigned char*)"\n");
result.push_back( prefix + (const unsigned char*)"2\t" + subdivide(prefix + (const unsigned char*)"2", M, E, result) + (const unsigned char*)"\n");
return (const unsigned char*)"({" + prefix + (const unsigned char*)"1}|" +
(const unsigned char*)"{" + prefix + (const unsigned char*)"2})";
}
}
int main(int argc, char * argv[])
{
for (int i = 2; i < argc; ++i) {
FILE * f = fopen(argv[i], "r");
if (f == NULL) {
perror("fopen");
return 1;
}
readFile(f);
fclose(f);
}
sort(list.begin(), list.end());
StringList result;
String regexp = subdivide((const unsigned char*)argv[1], list.begin(), list.end(), result);
for (StringList::const_iterator i = result.begin(); i != result.end(); ++i)
printf("%s", i->c_str());
printf("%s\t%s\n", argv[1], regexp.c_str());
}