#include #include #include #include #include #include #include #include #include #include #include #include "markov.hpp" using Corpus = Markov; /* Manage a lookup table for monoalphabetic substitution */ struct Mapping { char map[26]{}; Mapping() = default; char &operator[](size_t i) { return map[i]; } constexpr auto begin() { return &map[0]; } constexpr auto cbegin() const { return &map[0]; } constexpr auto cend() const { return &map[26]; } /* Create the LUT. Maps all characters except a-z to themselves, uses map for the rest */ constexpr void create_map(char *output) const { for (size_t i = 0; i < 256; i++) { output[i] = i; } for (size_t i = 0; i < 26; i++) { output['a' + i] = map[i]; output['A' + i] = map[i]; } } constexpr auto end() { return &map[26]; } /* Swap two output characters */ bool swap(char a, char b) { auto left = std::find(begin(), end(), a); auto right = std::find(begin(), end(), b); if (left != end() && right != end()) { std::swap(left, right); return true; } return false; } }; class Decrypt { /* Markov model of the plaintext */ Corpus corpus; /* Cipher text */ char const *cipher_text; /* Temporary buffer for decryption attempts */ char *plain_text; /* Length of the cipher text */ size_t len; public: Decrypt(char const *corpus, char const *data) : corpus(create_corpus(corpus)), cipher_text(data), plain_text(static_cast(malloc(strlen(data) + 1))), len(strlen(cipher_text)) { plain_text[strlen(data)] = 0; } ~Decrypt() { free(plain_text); } Mapping find_map(size_t tries, size_t iterations) { Mapping map; double best_score = INFINITY; while (tries--) { Mapping m; double s; std::tie(m, s) = random_map(iterations); if (s < best_score) { map = m; best_score = s; printf("New best try, character score is %g:\n", best_score / double(len)); printf(" "); for (uint32_t i = 0; i < 26; i++) { putchar(toupper(map[i])); } printf("\n"); printf(" %s\n", apply_map(m)); } } return map; } char const *apply_map(Mapping const &m) { char map[256]; m.create_map(&map[0]); std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0], [map](unsigned char a) { return map[a]; }); return plain_text; } protected: Corpus create_corpus(char const *file) { Corpus m; std::ifstream corpus{file}; char buf[128]; for (auto i = 0u; i < m.order(); i++) { buf[i] = ' '; } while (corpus.good()) { corpus.read(&buf[m.order()], sizeof(buf) - m.order()); auto buflen = m.order() + corpus.gcount(); for (size_t i = 0; i < (buflen - m.order() - 1u); i++) { m.add(&buf[i], m.order() + 1); } } return m; } double rate(Mapping const &m, int max_order = -1) const { const auto order = 1 + ((-1 == max_order) ? corpus.order() : static_cast(max_order)); double score = 0.; char map[256]; m.create_map(&map[0]); std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0], [map](unsigned char a) { return map[a]; }); for (size_t i = 0, valid = 0; i < len; i++) { if (plain_text[i]) { valid++; if (valid == order) { score += log(corpus.probability(&plain_text[i - order], order)); } else if (valid > order) { score += log(corpus.final_probability(&plain_text[i - order], order)); } } else { if (valid && (valid < order)) { score += log(corpus.probability(&plain_text[i - valid], valid)); } valid = 0; } } return -score; } std::pair random_map(size_t iterations) { Mapping m; double current_score{INFINITY}; for (uint32_t i = 0; i < 26; i++) { m[i] = 'a' + i; } std::random_device rd; std::mt19937 g(rd()); do { std::shuffle(&m.map[0], &m.map[26], g); current_score = rate(m); } while (std::isinf(current_score)); std::uniform_int_distribution char_index{0, 25}; std::uniform_real_distribution random_float; int order = 0; for (uint32_t i = 0; i < iterations; i++) { Mapping m2 = m; std::swap(m2[char_index(g)], m2[char_index(g)]); int new_order = (corpus.order() * (1 + i) + iterations - 1) / iterations; if (new_order != order) { current_score = rate(m); order = new_order; } auto new_score = rate(m2, order); double diff = current_score - new_score; if ((diff > 0) || (random_float(g) < std::exp(diff * double(1 + i) / double(iterations)))) { current_score = new_score; m = m2; } } return std::pair(m, rate(m)); } }; int main(int argc, char const **argv) { if (argc < 3) { printf("Usage: %s corpus ciphertext\n", argv[0]); exit(1); } printf("Cipher text: %s\n", argv[2]); Decrypt d(argv[1], argv[2]); Mapping map = d.find_map(50, 1000); printf("%s\n", d.apply_map(map)); return 0; }