codebreakparty/test.cpp

215 lines
5.3 KiB
C++

#include <algorithm>
#include <array>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <queue>
#include <random>
#include <stdio.h>
#include <stdlib.h>
#include <tuple>
#include "markov.hpp"
using Corpus = Markov<char, 6>;
/* Manage a lookup table for monoalphabetic substitution */
struct Mapping {
char map[26]{};
Mapping() = default;
char &operator[](size_t i) { return map[i]; }
constexpr auto begin() { return &map[0]; }
constexpr auto cbegin() const { return &map[0]; }
constexpr auto cend() const { return &map[26]; }
/* Create the LUT. Maps all characters except a-z to themselves, uses map for
the rest */
constexpr void create_map(char *output) const {
for (size_t i = 0; i < 256; i++) {
output[i] = i;
}
for (size_t i = 0; i < 26; i++) {
output['a' + i] = map[i];
output['A' + i] = map[i];
}
}
constexpr auto end() { return &map[26]; }
/* Swap two output characters */
bool swap(char a, char b) {
auto left = std::find(begin(), end(), a);
auto right = std::find(begin(), end(), b);
if (left != end() && right != end()) {
std::swap(left, right);
return true;
}
return false;
}
};
class Decrypt {
/* Markov model of the plaintext */
Corpus corpus;
/* Cipher text */
char const *cipher_text;
/* Temporary buffer for decryption attempts */
char *plain_text;
/* Length of the cipher text */
size_t len;
public:
Decrypt(char const *corpus, char const *data)
: corpus(create_corpus(corpus)), cipher_text(data),
plain_text(static_cast<char *>(malloc(strlen(data) + 1))),
len(strlen(cipher_text)) {
plain_text[strlen(data)] = 0;
}
~Decrypt() { free(plain_text); }
Mapping find_map(size_t tries, size_t iterations) {
Mapping map;
double best_score = INFINITY;
while (tries--) {
Mapping m;
double s;
std::tie(m, s) = random_map(iterations);
if (s < best_score) {
map = m;
best_score = s;
printf("New best try, character score is %g:\n",
best_score / double(len));
printf(" ");
for (uint32_t i = 0; i < 26; i++) {
putchar(toupper(map[i]));
}
printf("\n");
printf(" %s\n", apply_map(m));
}
}
return map;
}
char const *apply_map(Mapping const &m) {
char map[256];
m.create_map(&map[0]);
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
[map](unsigned char a) { return map[a]; });
return plain_text;
}
protected:
Corpus create_corpus(char const *file) {
Corpus m;
std::ifstream corpus{file};
char buf[128];
for (auto i = 0u; i < m.order(); i++) {
buf[i] = ' ';
}
while (corpus.good()) {
corpus.read(&buf[m.order()], sizeof(buf) - m.order());
auto buflen = m.order() + corpus.gcount();
for (size_t i = 0; i < (buflen - m.order() - 1u); i++) {
m.add(&buf[i], m.order() + 1);
}
}
return m;
}
double rate(Mapping const &m, int max_order = -1) const {
const auto order = 1 + ((-1 == max_order) ? corpus.order()
: static_cast<size_t>(max_order));
double score = 0.;
char map[256];
m.create_map(&map[0]);
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
[map](unsigned char a) { return map[a]; });
for (size_t i = 0, valid = 0; i < len; i++) {
if (plain_text[i]) {
valid++;
if (valid == order) {
score += log(corpus.probability(&plain_text[i - order], order));
} else if (valid > order) {
score += log(corpus.final_probability(&plain_text[i - order], order));
}
} else {
if (valid && (valid < order)) {
score += log(corpus.probability(&plain_text[i - valid], valid));
}
valid = 0;
}
}
return -score;
}
std::pair<Mapping, double> random_map(size_t iterations) {
Mapping m;
double current_score{INFINITY};
for (uint32_t i = 0; i < 26; i++) {
m[i] = 'a' + i;
}
std::random_device rd;
std::mt19937 g(rd());
do {
std::shuffle(&m.map[0], &m.map[26], g);
current_score = rate(m);
} while (std::isinf(current_score));
std::uniform_int_distribution<size_t> char_index{0, 25};
std::uniform_real_distribution random_float;
int order = 0;
for (uint32_t i = 0; i < iterations; i++) {
Mapping m2 = m;
std::swap(m2[char_index(g)], m2[char_index(g)]);
int new_order = (corpus.order() * (1 + i) + iterations - 1) / iterations;
if (new_order != order) {
current_score = rate(m);
order = new_order;
}
auto new_score = rate(m2, order);
double diff = current_score - new_score;
if ((diff > 0) || (random_float(g) <
std::exp(diff * double(1 + i) / double(iterations)))) {
current_score = new_score;
m = m2;
}
}
return std::pair(m, rate(m));
}
};
int main(int argc, char const **argv) {
if (argc < 3) {
printf("Usage: %s corpus ciphertext\n", argv[0]);
exit(1);
}
printf("Cipher text: %s\n", argv[2]);
Decrypt d(argv[1], argv[2]);
Mapping map = d.find_map(50, 1000);
printf("%s\n", d.apply_map(map));
return 0;
}