Create blz78suf.cpp
This commit is contained in:
parent
5127b7f18a
commit
a5bfb4e192
578
blz78suf.cpp
Normal file
578
blz78suf.cpp
Normal file
|
@ -0,0 +1,578 @@
|
|||
// ---------------------------------------------------------------------------
|
||||
// Suffix trie-based LZ78 optimal parsing and Brainfuck code
|
||||
// generation. Written on Wednesday, 29th of January 2025
|
||||
// by Kamila Szewczyk.
|
||||
//
|
||||
// Makes no assumptions on cell size, cell wrapping behaviours or tape
|
||||
// wrapping behaviours of the interpreter.
|
||||
//
|
||||
// See also:
|
||||
// [1] - https://copy.sh/brainfuck/text.html & improvements
|
||||
//
|
||||
// TO-DO:
|
||||
// - Performance optimisations in the suffix trie search:
|
||||
// stop heuristics, better memory management, etc.
|
||||
// - Linked list-based strings for improved decomposition performance.
|
||||
// - RLE coding for uncoded phrasal sections.
|
||||
// - ...
|
||||
// ---------------------------------------------------------------------------
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Abstract Brainfuck machine. Interprets the subset of the language
|
||||
// without I/O. Interprets the high water mark of memory usage.
|
||||
// Generates tape annihilators for arbitrary terminating programs.
|
||||
// ---------------------------------------------------------------------------
|
||||
class BFInt {
|
||||
public:
|
||||
std::vector<int> memory; int ptr, mptr;
|
||||
BFInt(int cells) : memory(cells, 0), ptr(0), mptr(0) {}
|
||||
void simulate(const std::string & program) {
|
||||
for (int i = 0; i < program.size(); i++) {
|
||||
char c = program[i];
|
||||
switch (c) {
|
||||
case '>': ptr++; break; case '<': ptr--; break;
|
||||
case '+': memory[ptr]++; break;
|
||||
case '-': memory[ptr]--; break;
|
||||
case '[': {
|
||||
if (!memory[ptr]) {
|
||||
for (int depth = 1; depth != 0; ) {
|
||||
i++;
|
||||
if (program[i] == '[') depth++;
|
||||
else if (program[i] == ']') depth--;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ']': {
|
||||
if (memory[ptr]) {
|
||||
for (int depth = 1; depth != 0; ) {
|
||||
i--;
|
||||
if (program[i] == '[') depth--;
|
||||
else if (program[i] == ']') depth++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ptr > mptr) mptr = ptr;
|
||||
}
|
||||
}
|
||||
std::string craftAnnihilator() {
|
||||
std::string annihilator = "";
|
||||
annihilator.reserve(32);
|
||||
while (true) {
|
||||
switch (memory[ptr]) {
|
||||
case 0: break;
|
||||
case 1: annihilator += "-"; break;
|
||||
case 2: annihilator += "--"; break;
|
||||
default: annihilator += "[-]"; break;
|
||||
}
|
||||
memory[ptr] = 0;
|
||||
int i;
|
||||
for (i = ptr; memory[i] == 0 && i < memory.size(); i++);
|
||||
if (i != memory.size()) {
|
||||
annihilator += std::string(i - ptr, '>');
|
||||
ptr = i; continue;
|
||||
}
|
||||
for (i = ptr; memory[i] == 0 && i >= 0; i--);
|
||||
if (i != -1) {
|
||||
annihilator += std::string(ptr - i, '<');
|
||||
ptr = i; continue;
|
||||
}
|
||||
annihilator += std::string(ptr, '<');
|
||||
ptr = 0; return annihilator;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Non-quadratic approximator for uncoded phrase generation.
|
||||
// ---------------------------------------------------------------------------
|
||||
class BFGenApprox {
|
||||
private:
|
||||
int G[256][256];
|
||||
public:
|
||||
BFGenApprox() {
|
||||
for (int x = 0; x < 256; x++) {
|
||||
for (int y = 0; y < 256; y++) {
|
||||
int delta = y - x;
|
||||
if (delta > 128) delta -= 256;
|
||||
if (delta < -128) delta += 256;
|
||||
G[x][y] = delta >= 0 ? delta : -delta;
|
||||
}
|
||||
}
|
||||
bool iter = true;
|
||||
while (iter) {
|
||||
iter = false;
|
||||
for (int x = 0; x < 256; x++) {
|
||||
for (int n = 1; n < 40; n++) {
|
||||
for (int d = 1; d < 40; d++) {
|
||||
int j = x; int y = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (j == 0) break;
|
||||
j = (j - d + 256) % 256;
|
||||
y = (y + n) % 256;
|
||||
}
|
||||
if (j == 0) {
|
||||
int s = 5 + d + n;
|
||||
if (s < G[x][y]) {
|
||||
G[x][y] = s;
|
||||
iter = true;
|
||||
}
|
||||
}
|
||||
j = x; y = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (j == 0) break;
|
||||
j = (j + d) % 256;
|
||||
y = (y - n + 256) % 256;
|
||||
}
|
||||
if (j == 0) {
|
||||
int s = 5 + d + n;
|
||||
if (s < G[x][y]) {
|
||||
G[x][y] = s;
|
||||
iter = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int x = 0; x < 256; x++) {
|
||||
for (int y = 0; y < 256; y++) {
|
||||
for (int z = 0; z < 256; z++) {
|
||||
if (G[x][z] + G[z][y] < G[x][y]) {
|
||||
G[x][y] = G[x][z] + G[z][y];
|
||||
iter = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t phrase_len(const std::string & s) {
|
||||
int lastc = 0, gen = 0;
|
||||
for (char c : s) {
|
||||
int a = G[lastc][c], b = G[0][c];
|
||||
if (a + 3 <= b) gen += a + 1;
|
||||
else gen += b + 4;
|
||||
lastc = c;
|
||||
}
|
||||
gen += 3; return gen;
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Low-level, precise text generation for uncoded phrases.
|
||||
// ---------------------------------------------------------------------------
|
||||
enum class Direction { LEFT, RIGHT };
|
||||
Direction opposite(Direction d) {
|
||||
return d == Direction::LEFT ? Direction::RIGHT : Direction::LEFT;
|
||||
}
|
||||
class Transition {
|
||||
public:
|
||||
std::string code; Direction startD, endD;
|
||||
Transition() : code(""), startD(Direction::LEFT), endD(Direction::LEFT) {}
|
||||
Transition(std::string code, Direction startD, Direction endD)
|
||||
: code(code), startD(startD), endD(endD) {}
|
||||
Transition(const Transition & t)
|
||||
: code(t.code), startD(t.startD), endD(t.endD) {}
|
||||
Transition(Transition && t)
|
||||
: code(std::move(t.code)), startD(t.startD), endD(t.endD) {}
|
||||
Transition & operator=(const Transition & t) {
|
||||
code = t.code; startD = t.startD; endD = t.endD; return *this;
|
||||
}
|
||||
Transition plus(Transition t) const {
|
||||
if (endD == t.startD) {
|
||||
return Transition(std::move(code + t.code), t.startD, t.endD);
|
||||
} else {
|
||||
Direction tOriginalStartD = t.startD; t.reverse_in_place();
|
||||
return Transition(std::move(code + t.code), tOriginalStartD, t.endD);
|
||||
}
|
||||
}
|
||||
void reverse_in_place() {
|
||||
for (int i = 0; i < code.size(); i++) {
|
||||
if (code[i] == '>') code[i] = '<';
|
||||
else if (code[i] == '<') code[i] = '>';
|
||||
}
|
||||
startD = opposite(startD); endD = opposite(endD);
|
||||
}
|
||||
int size() const { return code.size(); }
|
||||
};
|
||||
class BFGen {
|
||||
private:
|
||||
Transition list[256][256];
|
||||
std::string generateFromCache(const std::string & s,
|
||||
const std::vector<bool> & caches) {
|
||||
std::vector<Transition> trans;
|
||||
trans.reserve(s.size());
|
||||
Direction d = Direction::LEFT;
|
||||
char last = 0;
|
||||
std::vector<char> cache;
|
||||
for (int index = 0; index < s.size(); index++) {
|
||||
char c = s[index]; Transition t = list[last][c];
|
||||
if (t.startD != d) t.reverse_in_place();
|
||||
int ci = std::find(cache.begin(), cache.end(), c) - cache.begin();
|
||||
int ims = (cache.size() - ci) * 2 + (d == Direction::RIGHT ? 2 : 0);
|
||||
if (ci != cache.size() && ims <= t.size())
|
||||
t = Transition(std::to_string(cache.size() - ci), t.startD, t.startD);
|
||||
else {
|
||||
t = Transition(t.code + ".", t.startD, t.endD); d = t.endD; last = c;
|
||||
}
|
||||
if (caches[index]) {
|
||||
cache.push_back(c); d = Direction::LEFT; last = 0;
|
||||
}
|
||||
trans.emplace_back(std::move(t));
|
||||
}
|
||||
bool reverse = false;
|
||||
for (int i = trans.size() - 1; i >= 0; i--) {
|
||||
if (caches[i]) reverse = trans[i].endD == Direction::RIGHT;
|
||||
if (reverse) trans[i].reverse_in_place();
|
||||
}
|
||||
if (trans.empty()) return "";
|
||||
std::string code = "";
|
||||
for (int i = 0; i < trans.size(); i++) {
|
||||
Transition t = trans[i];
|
||||
if ((i == 0 && t.startD != Direction::LEFT)
|
||||
|| (i != 0 && trans[i - 1].endD != t.startD))
|
||||
code += ">";
|
||||
if (std::isdigit(t.code[0])) {
|
||||
int num = std::stoi(t.code) + (t.startD == Direction::RIGHT);
|
||||
code += std::string(num, '<') + "." + std::string(num, '>');
|
||||
} else
|
||||
code += t.code;
|
||||
if (caches[i]) code += ">";
|
||||
}
|
||||
while (true) {
|
||||
std::string newCode = code; size_t pos = 0;
|
||||
while ((pos = newCode.find("><", pos)) != std::string::npos)
|
||||
newCode.replace(pos, 2, "");
|
||||
if (code == newCode) break; code = newCode;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
std::string generate_internal(const std::string & s, int cells) {
|
||||
std::vector<bool> caches(s.size(), false);
|
||||
std::string currentCost = generateFromCache(s, caches);
|
||||
std::map<char, int> ch;
|
||||
if (cells > 2) {
|
||||
for (int i = 0; i < s.size(); i++) {
|
||||
char c = s[i]; std::vector<bool> nc = caches;
|
||||
for (int j = 0; j < s.size(); j++) {
|
||||
if (s[j] == c) nc[j] = false;
|
||||
}
|
||||
char lowest = 0;
|
||||
if (std::count(nc.begin(), nc.end(), true) >= cells - 2) {
|
||||
lowest = std::min_element(ch.begin(), ch.end(),
|
||||
[](auto & a, auto & b) { return a.second < b.second; }
|
||||
)->first;
|
||||
auto iter = std::find(nc.begin(), nc.end(), true);
|
||||
nc[std::distance(nc.begin(), iter)] = false;
|
||||
}
|
||||
nc[i] = true;
|
||||
std::string newCost = generateFromCache(s, nc);
|
||||
if (newCost.size() < currentCost.size()) {
|
||||
ch[c] = currentCost.size() - newCost.size();
|
||||
currentCost = newCost; caches = nc;
|
||||
if (lowest != 0) ch.erase(lowest);
|
||||
}
|
||||
}
|
||||
}
|
||||
return currentCost;
|
||||
}
|
||||
int grade(int n, int base) {
|
||||
int sp = 0, norm = 0;
|
||||
while (n > 0) {
|
||||
sp++; norm += n % base; n = n / base;
|
||||
}
|
||||
return norm + (6 + base) * sp + ((sp % 2 == 1) ? 4 : 0);
|
||||
}
|
||||
int best_base(int n) {
|
||||
int v = 0, b = 0;
|
||||
for (int i = 2; i <= 60; i++) {
|
||||
int cv = grade(n, i);
|
||||
if (v == 0 || v > cv) {
|
||||
v = cv; b = i;
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
public:
|
||||
BFGen() {
|
||||
for (int x = 0; x < 256; x++) {
|
||||
for (int y = 0; y < 256; y++) {
|
||||
int delta = y - x;
|
||||
std::string code = "";
|
||||
if (delta > 0)
|
||||
code = std::string(delta, '+');
|
||||
else if (delta < 0)
|
||||
code = std::string(-delta, '-');
|
||||
list[x][y] = Transition(code, Direction::LEFT, Direction::LEFT);
|
||||
}
|
||||
}
|
||||
for (int x = 0; x < 256; x++) {
|
||||
for (int n = 1; n <= 39; n++) {
|
||||
for (int d = 1; d <= 39; d++) {
|
||||
int j = x; int y = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (j == 0 || j - d < 0 || y + n > 255) break;
|
||||
j = (j - d + 256) & 255;
|
||||
y = (y + n) & 255;
|
||||
}
|
||||
if (j == 0) {
|
||||
std::string s =
|
||||
"[" + std::string(d, '-') + ">" + std::string(n, '+') + "<]>";
|
||||
if (s.size() < list[x][y].size())
|
||||
list[x][y] = Transition(s, Direction::LEFT, Direction::RIGHT);
|
||||
}
|
||||
j = x; y = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (j == 0 || y - n < 0 || j + d > 255) break;
|
||||
j = (j + d) & 255;
|
||||
y = (y - n + 256) & 255;
|
||||
}
|
||||
if (j == 0) {
|
||||
std::string s =
|
||||
"[" + std::string(d, '+') + ">" + std::string(n, '-') + "<]>";
|
||||
if (s.size() < list[x][y].size())
|
||||
list[x][y] = Transition(s, Direction::LEFT, Direction::RIGHT);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 256; i++)
|
||||
if (list[i][0].size() > 3)
|
||||
list[i][0] = Transition("[-]", Direction::LEFT, Direction::LEFT);
|
||||
bool change = true;
|
||||
while (change) {
|
||||
change = false;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
for (int k = 0; k < 256; k++) {
|
||||
if (list[i][j].size() + list[j][k].size() < list[i][k].size()) {
|
||||
list[i][k] = list[i][j].plus(list[j][k]); change = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string generate(const std::string & s, int cells_max = 16) {
|
||||
std::string code = generate_internal(s, cells_max);
|
||||
BFInt bf(cells_max); bf.simulate(code);
|
||||
cells_max = bf.mptr + 1;
|
||||
std::string shortest_code = code + bf.craftAnnihilator();
|
||||
while (cells_max > 2) {
|
||||
std::string new_code = generate_internal(s, cells_max - 1);
|
||||
BFInt bf(cells_max - 1); bf.simulate(new_code);
|
||||
new_code += bf.craftAnnihilator();
|
||||
if (new_code.size() < shortest_code.size())
|
||||
shortest_code = new_code;
|
||||
cells_max--;
|
||||
}
|
||||
return shortest_code;
|
||||
}
|
||||
std::string gen_constant(int n) {
|
||||
std::vector<int> stack;
|
||||
std::string out = ">";
|
||||
int flip = 1;
|
||||
if (n < 12)
|
||||
return std::string(n, '+');
|
||||
int base = best_base(n);
|
||||
while (n > 0) {
|
||||
stack.push_back(n % base);
|
||||
n = n / base;
|
||||
}
|
||||
while (!stack.empty()) {
|
||||
int top = stack.back();
|
||||
stack.pop_back();
|
||||
int bc = base;
|
||||
out += std::string(top, '+');
|
||||
if (!stack.empty()) {
|
||||
if (!flip)
|
||||
out += "[>" + std::string(bc, '+') + "<-]>";
|
||||
else
|
||||
out += "[<" + std::string(bc, '+') + ">-]<";
|
||||
}
|
||||
flip = !flip;
|
||||
}
|
||||
if (!flip) out += "[-<+>]<";
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Suffix trie for ranked LZ78 parsing.
|
||||
// ---------------------------------------------------------------------------
|
||||
class SuffixTrie {
|
||||
private:
|
||||
class SuffixTrieNode {
|
||||
public:
|
||||
std::unordered_map<char, SuffixTrieNode *> children;
|
||||
int counter, cache;
|
||||
SuffixTrieNode() : counter(0), cache(0) {}
|
||||
};
|
||||
SuffixTrieNode * root;
|
||||
void buildTrie(const std::string & text) {
|
||||
int n = text.length();
|
||||
for (int i = 0; i < n; i++) {
|
||||
SuffixTrieNode * cn = root;
|
||||
for (int j = i; j < n; j++) {
|
||||
char ch = text[j];
|
||||
if (cn->children.find(ch) == cn->children.end()) {
|
||||
cn->children[ch] = new SuffixTrieNode();
|
||||
}
|
||||
cn = cn->children[ch]; cn->counter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
void annotateCounters(SuffixTrieNode * node) {
|
||||
if (node->children.empty()) {
|
||||
node->cache = 1; return;
|
||||
}
|
||||
for (auto & pair : node->children) {
|
||||
annotateCounters(pair.second);
|
||||
node->cache += pair.second->cache;
|
||||
}
|
||||
}
|
||||
public:
|
||||
SuffixTrie(const std::string & text) {
|
||||
this->root = new SuffixTrieNode();
|
||||
buildTrie(text); annotateCounters(root);
|
||||
}
|
||||
~SuffixTrie() { deleteTrie(root); }
|
||||
template <typename F> std::pair<std::string, double> findMaxString(F rate,
|
||||
double constant, double constant2) {
|
||||
double maxValue = -std::numeric_limits<double>::infinity();
|
||||
std::string bestString;
|
||||
auto dfs = [&](this auto const & dfs, SuffixTrieNode * node,
|
||||
std::string currentString) -> void {
|
||||
if (!currentString.empty() && node->counter > 1
|
||||
&& currentString.find('\x01') == std::string::npos) {
|
||||
int countX = node->cache; double rateX = rate(currentString);
|
||||
double value = countX * rateX - countX * constant2 - constant;
|
||||
if (value > maxValue) {
|
||||
maxValue = value; bestString = currentString;
|
||||
}
|
||||
}
|
||||
for (auto & pair : node->children)
|
||||
dfs(pair.second, currentString + pair.first);
|
||||
};
|
||||
dfs(root, ""); return { bestString, maxValue };
|
||||
}
|
||||
private:
|
||||
void deleteTrie(SuffixTrieNode * node) {
|
||||
for (auto & pair : node->children)
|
||||
deleteTrie(pair.second);
|
||||
delete node;
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Code generation and procedural system.
|
||||
// ---------------------------------------------------------------------------
|
||||
class CodeGen {
|
||||
private:
|
||||
std::string replacement_program(BFGen & gen, const std::string & msg) {
|
||||
return "-[-<+>>>>+<<<]<[->+<]>>>>>[-]<[>+<[-]]+>[<->-]<[-" +
|
||||
gen.generate(msg) + "<<[->+<]>>]<<<";
|
||||
}
|
||||
public:
|
||||
std::string gen(BFGen & gen, const std::vector<std::string> & replacements,
|
||||
const std::vector<std::string> & chunks) {
|
||||
int ret = 0; std::string new_code;
|
||||
new_code = ">" + gen.gen_constant(replacements.size() + 1) + "[";
|
||||
for (const auto & el : replacements)
|
||||
new_code += replacement_program(gen, el);
|
||||
new_code += "-[-<+>>>>+<<<]<[->+<]>>>>>[-]<[>+<[-]]+>[<->-]<[-";
|
||||
ret++;
|
||||
for (const auto & el : chunks) {
|
||||
auto iter = std::find(replacements.begin(), replacements.end(), el);
|
||||
if (iter != replacements.end()) {
|
||||
int c1 = (ret++) + replacements.size() + 1;
|
||||
int c2 = std::distance(replacements.begin(), iter) + 1;
|
||||
new_code += "<<" + gen.gen_constant(c1) + ">" + gen.gen_constant(c2) +
|
||||
">]<<<-[-<+>>>>+<<<]<[->+<]>>>>>[-]<[>+<[-]]+>[<->-]<[-";
|
||||
} else new_code += gen.generate(el);
|
||||
}
|
||||
return new_code + "]<<<[-]>>[-<<+>>]<<]";
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constrained, optimal phrasal LZ78 parsing of text.
|
||||
// ---------------------------------------------------------------------------
|
||||
template <typename F>
|
||||
std::vector<std::string> parse(std::string text, F score, int max,
|
||||
int c1, int c2) {
|
||||
if (text.contains("\x01"))
|
||||
throw std::runtime_error("Text contains forbidden characters.");
|
||||
std::vector<std::string> replacements;
|
||||
double maxValue = std::numeric_limits<double>::infinity();
|
||||
while (maxValue > 0 && replacements.size() < max) {
|
||||
SuffixTrie suffixTrie(text); std::string bestString;
|
||||
std::tie(bestString, maxValue) = suffixTrie.findMaxString(score, c1, c2);
|
||||
if (maxValue > 0) replacements.push_back(bestString);
|
||||
for (size_t pos = 0;
|
||||
(pos = text.find(bestString, pos)) != std::string::npos;
|
||||
pos++) text.replace(pos, bestString.length(), "\x01");
|
||||
}
|
||||
return replacements;
|
||||
}
|
||||
void splitText(std::vector<std::string> & acc, const std::string & text,
|
||||
const std::vector<std::string> & words) { // TO-DO: `words' as a map.
|
||||
if (text.empty()) return;
|
||||
std::string longest = "";
|
||||
for (const auto & word : words)
|
||||
if (text.rfind(word, 0) == 0 && word.length() > longest.length())
|
||||
longest = word;
|
||||
if (longest.empty()) {
|
||||
if (acc.empty() || find(words.begin(), words.end(), acc.back()) != words.end())
|
||||
acc.push_back(std::string(1, text[0]));
|
||||
else acc.back() += text[0];
|
||||
return splitText(acc, text.substr(1), words);
|
||||
} else {
|
||||
acc.push_back(longest);
|
||||
return splitText(acc, text.substr(longest.length()), words);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Command-line stub.
|
||||
// ---------------------------------------------------------------------------
|
||||
int main(int argc, char * argv[]) {
|
||||
if (argc != 5) {
|
||||
std::cerr << "Usage: blz78suf <text_file> <c1> <c2> <max>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::ifstream file(argv[1]);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Error: could not open file " << argv[1] << std::endl;
|
||||
return 1;
|
||||
}
|
||||
int c1 = std::stoi(argv[2]), c2 = std::stoi(argv[3]),
|
||||
max = std::stoi(argv[4]);
|
||||
std::string text = std::string(std::istreambuf_iterator<char>(file),
|
||||
std::istreambuf_iterator<char>());
|
||||
BFGen bfgen; std::string naive_gen = bfgen.generate(text);
|
||||
std::cout << "Naive (" << naive_gen.length() << " bytes): "
|
||||
<< naive_gen << std::endl;
|
||||
BFGenApprox approx;
|
||||
std::vector<std::string> replacements =
|
||||
parse(text, [&](const std::string & s) { return approx.phrase_len(s); },
|
||||
c1, c2, max);
|
||||
std::vector<std::string> acc;
|
||||
splitText(acc, text, replacements);
|
||||
std::cout << "LZ78 parsing produced " << replacements.size()
|
||||
<< " phrases and " << acc.size() << " tokens." << std::endl;
|
||||
CodeGen cg; std::string clever_gen = cg.gen(bfgen, replacements, acc);
|
||||
std::cout << "Clever (" << clever_gen.length() << " bytes): "
|
||||
<< clever_gen << std::endl;
|
||||
}
|
Loading…
Reference in New Issue
Block a user