216 lines
5.3 KiB
C++
216 lines
5.3 KiB
C++
|
#include <algorithm>
|
||
|
#include <array>
|
||
|
#include <cmath>
|
||
|
#include <cstring>
|
||
|
#include <fstream>
|
||
|
#include <iostream>
|
||
|
#include <queue>
|
||
|
#include <random>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <tuple>
|
||
|
|
||
|
#include "markov.hpp"
|
||
|
|
||
|
using Corpus = Markov<char, 6>;
|
||
|
|
||
|
/* Manage a lookup table for monoalphabetic substitution */
|
||
|
struct Mapping {
|
||
|
char map[26]{};
|
||
|
Mapping() = default;
|
||
|
|
||
|
char &operator[](size_t i) { return map[i]; }
|
||
|
|
||
|
constexpr auto begin() { return &map[0]; }
|
||
|
|
||
|
constexpr auto cbegin() const { return &map[0]; }
|
||
|
|
||
|
constexpr auto cend() const { return &map[26]; }
|
||
|
|
||
|
/* Create the LUT. Maps all characters except a-z to themselves, uses map for
|
||
|
the rest */
|
||
|
constexpr void create_map(char *output) const {
|
||
|
for (size_t i = 0; i < 256; i++) {
|
||
|
output[i] = i;
|
||
|
}
|
||
|
for (size_t i = 0; i < 26; i++) {
|
||
|
output['a' + i] = map[i];
|
||
|
output['A' + i] = map[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
constexpr auto end() { return &map[26]; }
|
||
|
|
||
|
/* Swap two output characters */
|
||
|
bool swap(char a, char b) {
|
||
|
auto left = std::find(begin(), end(), a);
|
||
|
auto right = std::find(begin(), end(), b);
|
||
|
if (left != end() && right != end()) {
|
||
|
std::swap(left, right);
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
};
|
||
|
class Decrypt {
|
||
|
/* Markov model of the plaintext */
|
||
|
Corpus corpus;
|
||
|
|
||
|
/* Cipher text */
|
||
|
char const *cipher_text;
|
||
|
|
||
|
/* Temporary buffer for decryption attempts */
|
||
|
char *plain_text;
|
||
|
|
||
|
/* Length of the cipher text */
|
||
|
size_t len;
|
||
|
|
||
|
public:
|
||
|
Decrypt(char const *corpus, char const *data)
|
||
|
: corpus(create_corpus(corpus)), cipher_text(data),
|
||
|
plain_text(static_cast<char *>(malloc(strlen(data) + 1))),
|
||
|
len(strlen(cipher_text)) {
|
||
|
plain_text[strlen(data)] = 0;
|
||
|
}
|
||
|
|
||
|
~Decrypt() { free(plain_text); }
|
||
|
|
||
|
Mapping find_map(size_t tries, size_t iterations) {
|
||
|
Mapping map;
|
||
|
double best_score = INFINITY;
|
||
|
|
||
|
while (tries--) {
|
||
|
Mapping m;
|
||
|
double s;
|
||
|
std::tie(m, s) = random_map(iterations);
|
||
|
|
||
|
if (s < best_score) {
|
||
|
map = m;
|
||
|
best_score = s;
|
||
|
printf("New best try, character score is %g:\n",
|
||
|
best_score / double(len));
|
||
|
printf(" ");
|
||
|
for (uint32_t i = 0; i < 26; i++) {
|
||
|
putchar(toupper(map[i]));
|
||
|
}
|
||
|
printf("\n");
|
||
|
printf(" %s\n", apply_map(m));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return map;
|
||
|
}
|
||
|
|
||
|
char const *apply_map(Mapping const &m) {
|
||
|
char map[256];
|
||
|
m.create_map(&map[0]);
|
||
|
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
|
||
|
[map](unsigned char a) { return map[a]; });
|
||
|
|
||
|
return plain_text;
|
||
|
}
|
||
|
|
||
|
protected:
|
||
|
Corpus create_corpus(char const *file) {
|
||
|
Corpus m;
|
||
|
std::ifstream corpus{file};
|
||
|
|
||
|
char buf[128];
|
||
|
for (auto i = 0u; i < m.order(); i++) {
|
||
|
buf[i] = ' ';
|
||
|
}
|
||
|
|
||
|
while (corpus.good()) {
|
||
|
corpus.read(&buf[m.order()], sizeof(buf) - m.order());
|
||
|
auto buflen = m.order() + corpus.gcount();
|
||
|
|
||
|
for (size_t i = 0; i < (buflen - m.order() - 1u); i++) {
|
||
|
m.add(&buf[i], m.order() + 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return m;
|
||
|
}
|
||
|
|
||
|
double rate(Mapping const &m, int max_order = -1) const {
|
||
|
const auto order =
|
||
|
(-1 == max_order) ? corpus.order() : static_cast<size_t>(max_order);
|
||
|
double score = 0.;
|
||
|
char map[256];
|
||
|
m.create_map(&map[0]);
|
||
|
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
|
||
|
[map](unsigned char a) { return map[a]; });
|
||
|
for (size_t i = 0, valid = 0; i < len; i++) {
|
||
|
if (plain_text[i]) {
|
||
|
valid++;
|
||
|
if (valid == (order + 1)) {
|
||
|
score += log(corpus.probability(&plain_text[i - valid], valid));
|
||
|
} else if (valid > (order + 1)) {
|
||
|
score += log(
|
||
|
corpus.final_probability(&plain_text[i - order - 1], order + 1));
|
||
|
}
|
||
|
} else {
|
||
|
if (valid && (valid <= order)) {
|
||
|
score += log(corpus.probability(&plain_text[i - valid], valid));
|
||
|
}
|
||
|
valid = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return -score;
|
||
|
}
|
||
|
|
||
|
std::pair<Mapping, double> random_map(size_t iterations) {
|
||
|
Mapping m;
|
||
|
double current_score{INFINITY};
|
||
|
for (uint32_t i = 0; i < 26; i++) {
|
||
|
m[i] = 'a' + i;
|
||
|
}
|
||
|
|
||
|
std::random_device rd;
|
||
|
std::mt19937 g(rd());
|
||
|
do {
|
||
|
std::shuffle(&m.map[0], &m.map[26], g);
|
||
|
current_score = rate(m);
|
||
|
} while (std::isinf(current_score));
|
||
|
|
||
|
std::uniform_int_distribution<size_t> char_index{0, 25};
|
||
|
std::uniform_real_distribution random_float;
|
||
|
|
||
|
int order = 0;
|
||
|
for (uint32_t i = 0; i < iterations; i++) {
|
||
|
Mapping m2 = m;
|
||
|
std::swap(m2[char_index(g)], m2[char_index(g)]);
|
||
|
|
||
|
int new_order = (corpus.order() * (1 + i) + iterations - 1) / iterations;
|
||
|
if (new_order != order) {
|
||
|
current_score = rate(m);
|
||
|
order = new_order;
|
||
|
}
|
||
|
|
||
|
auto new_score = rate(m2, order);
|
||
|
double diff = current_score - new_score;
|
||
|
if ((diff > 0) || (random_float(g) <
|
||
|
std::exp(diff * double(1 + i) / double(iterations)))) {
|
||
|
current_score = new_score;
|
||
|
m = m2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return std::pair(m, rate(m));
|
||
|
}
|
||
|
};
|
||
|
|
||
|
int main(int argc, char const **argv) {
|
||
|
if (argc < 3) {
|
||
|
printf("Usage: %s corpus ciphertext\n", argv[0]);
|
||
|
exit(1);
|
||
|
}
|
||
|
printf("Cipher text: %s\n", argv[2]);
|
||
|
Decrypt d(argv[1], argv[2]);
|
||
|
Mapping map = d.find_map(50, 1000);
|
||
|
printf("%s\n", d.apply_map(map));
|
||
|
|
||
|
return 0;
|
||
|
}
|