Initial commit

This commit is contained in:
Lynn Ochs 2020-02-02 20:23:00 +01:00
commit e0c76dbcfb
3 changed files with 373 additions and 0 deletions

29
Makefile Normal file
View File

@ -0,0 +1,29 @@
CXXFLAGS += -std=c++17 -pedantic -Wall
LDFLAGS=-lm -lstdc++
SRC=test.cpp
OBJ=$(SRC:%=%.o)
JSON=$(OBJ:%=%.json)
HDR=markov.hpp
BIN=test
all: $(BIN) compile_commands.json
$(BIN): $(OBJ)
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(OBJ) -o $@
$(OBJ): %.o: % $(HDR)
$(CXX) $(CXXFLAGS) -c $< -o $@
$(JSON): %.o.json: % Makefile
clang $(CXXFLAGS) -c $< -o /dev/null -MJ $@
compile_commands.json: $(JSON)
sed -e '1s/^/[\n/' -e 's/,$$/\n]/' $^ > $@
clean:
rm -f $(BIN) $(OBJ) $(JSON)
.PHONY: clean

129
markov.hpp Normal file
View File

@ -0,0 +1,129 @@
#pragma once
#include <limits>
#include <map>
#include <stdlib.h>
#include <vector>
/* Marcov class.
T: Element type
ORDER: Prefix length. A markov chain of order n uses n elements to give a
probability for the next element
*/
template <typename T, size_t ORDER> class Markov {
public:
template <typename, size_t> friend class Markov;
using Child = Markov<T, ORDER - 1>;
using Children = std::map<T, Child>;
/* Total number of entries for this node
If not a leaf, also the sum of the totals of its children.
*/
size_t total = 0;
/* A map of a key to child nodes */
Children children;
Markov() = default;
constexpr size_t order() const { return ORDER; }
/* Add the start of iter to the model
Must have sufficient size (at least order + 1)
*/
template <typename IT> void add(IT iter, size_t size) {
if (size <= ORDER) {
return;
}
total++;
find(*iter)->second.add(iter + 1, size - 1);
}
/* Decrements the occurences of the start of iter by 1.
Must have sufficient size (at least order + 1)
Returns true if the sequence could be found in the model
*/
template <typename IT> bool dec(IT iter, size_t size) {
if (size < ORDER) {
return false;
}
total--;
iter++;
auto child = children.find(*iter);
if (child != children.end()) {
if (child->dec(iter, size - 1)) {
if (child->total == 0) {
children.erase(child);
}
return true;
}
}
return false;
}
protected:
/* Finds the key or adds it */
typename Children::iterator find(const T &key) {
return children.try_emplace(key).first;
}
public:
/* Probability of finding the first order() items (but at most size) in the
sequence */
template <typename IT> double probability(IT iter, size_t size) const {
if (size == 0) {
return 1.;
}
auto child = children.find(*iter);
if (child != children.cend()) {
auto &val = child->second;
return (double(val.total) / double(total)) *
val.probability(iter + 1, size - 1);
} else {
return 0.5 / double(total);
}
}
/* Probability of finding the last item (but at most the order+1st) in the
model, given the previous sequence */
template <typename IT> double final_probability(IT iter, size_t size) const {
if (size == 0) {
return 1.;
}
auto child = children.find(*iter);
if (child != children.cend()) {
auto &val = child->second;
if (size == 1) {
return double(val.total) / double(total);
} else {
return val.final_probability(iter + 1, size - 1);
}
} else {
return 0.5 / double(total);
}
}
};
/* Leaf of the recursion with just a count */
template <typename T> class Markov<T, std::numeric_limits<size_t>::max()> {
public:
size_t total = 0;
Markov() = default;
constexpr size_t order() const { return 0; }
template <typename IT> void add(IT, size_t) { total++; }
template <typename IT> bool dec(IT, size_t) {
total--;
return true;
}
template <typename IT> double probability(IT, size_t) const { return 1.; }
template <typename IT> double final_probability(IT, size_t) const {
return 1.;
}
};

215
test.cpp Normal file
View File

@ -0,0 +1,215 @@
#include <algorithm>
#include <array>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <queue>
#include <random>
#include <stdio.h>
#include <stdlib.h>
#include <tuple>
#include "markov.hpp"
using Corpus = Markov<char, 6>;
/* Manage a lookup table for monoalphabetic substitution */
struct Mapping {
char map[26]{};
Mapping() = default;
char &operator[](size_t i) { return map[i]; }
constexpr auto begin() { return &map[0]; }
constexpr auto cbegin() const { return &map[0]; }
constexpr auto cend() const { return &map[26]; }
/* Create the LUT. Maps all characters except a-z to themselves, uses map for
the rest */
constexpr void create_map(char *output) const {
for (size_t i = 0; i < 256; i++) {
output[i] = i;
}
for (size_t i = 0; i < 26; i++) {
output['a' + i] = map[i];
output['A' + i] = map[i];
}
}
constexpr auto end() { return &map[26]; }
/* Swap two output characters */
bool swap(char a, char b) {
auto left = std::find(begin(), end(), a);
auto right = std::find(begin(), end(), b);
if (left != end() && right != end()) {
std::swap(left, right);
return true;
}
return false;
}
};
class Decrypt {
/* Markov model of the plaintext */
Corpus corpus;
/* Cipher text */
char const *cipher_text;
/* Temporary buffer for decryption attempts */
char *plain_text;
/* Length of the cipher text */
size_t len;
public:
Decrypt(char const *corpus, char const *data)
: corpus(create_corpus(corpus)), cipher_text(data),
plain_text(static_cast<char *>(malloc(strlen(data) + 1))),
len(strlen(cipher_text)) {
plain_text[strlen(data)] = 0;
}
~Decrypt() { free(plain_text); }
Mapping find_map(size_t tries, size_t iterations) {
Mapping map;
double best_score = INFINITY;
while (tries--) {
Mapping m;
double s;
std::tie(m, s) = random_map(iterations);
if (s < best_score) {
map = m;
best_score = s;
printf("New best try, character score is %g:\n",
best_score / double(len));
printf(" ");
for (uint32_t i = 0; i < 26; i++) {
putchar(toupper(map[i]));
}
printf("\n");
printf(" %s\n", apply_map(m));
}
}
return map;
}
char const *apply_map(Mapping const &m) {
char map[256];
m.create_map(&map[0]);
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
[map](unsigned char a) { return map[a]; });
return plain_text;
}
protected:
Corpus create_corpus(char const *file) {
Corpus m;
std::ifstream corpus{file};
char buf[128];
for (auto i = 0u; i < m.order(); i++) {
buf[i] = ' ';
}
while (corpus.good()) {
corpus.read(&buf[m.order()], sizeof(buf) - m.order());
auto buflen = m.order() + corpus.gcount();
for (size_t i = 0; i < (buflen - m.order() - 1u); i++) {
m.add(&buf[i], m.order() + 1);
}
}
return m;
}
double rate(Mapping const &m, int max_order = -1) const {
const auto order =
(-1 == max_order) ? corpus.order() : static_cast<size_t>(max_order);
double score = 0.;
char map[256];
m.create_map(&map[0]);
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
[map](unsigned char a) { return map[a]; });
for (size_t i = 0, valid = 0; i < len; i++) {
if (plain_text[i]) {
valid++;
if (valid == (order + 1)) {
score += log(corpus.probability(&plain_text[i - valid], valid));
} else if (valid > (order + 1)) {
score += log(
corpus.final_probability(&plain_text[i - order - 1], order + 1));
}
} else {
if (valid && (valid <= order)) {
score += log(corpus.probability(&plain_text[i - valid], valid));
}
valid = 0;
}
}
return -score;
}
std::pair<Mapping, double> random_map(size_t iterations) {
Mapping m;
double current_score{INFINITY};
for (uint32_t i = 0; i < 26; i++) {
m[i] = 'a' + i;
}
std::random_device rd;
std::mt19937 g(rd());
do {
std::shuffle(&m.map[0], &m.map[26], g);
current_score = rate(m);
} while (std::isinf(current_score));
std::uniform_int_distribution<size_t> char_index{0, 25};
std::uniform_real_distribution random_float;
int order = 0;
for (uint32_t i = 0; i < iterations; i++) {
Mapping m2 = m;
std::swap(m2[char_index(g)], m2[char_index(g)]);
int new_order = (corpus.order() * (1 + i) + iterations - 1) / iterations;
if (new_order != order) {
current_score = rate(m);
order = new_order;
}
auto new_score = rate(m2, order);
double diff = current_score - new_score;
if ((diff > 0) || (random_float(g) <
std::exp(diff * double(1 + i) / double(iterations)))) {
current_score = new_score;
m = m2;
}
}
return std::pair(m, rate(m));
}
};
int main(int argc, char const **argv) {
if (argc < 3) {
printf("Usage: %s corpus ciphertext\n", argv[0]);
exit(1);
}
printf("Cipher text: %s\n", argv[2]);
Decrypt d(argv[1], argv[2]);
Mapping map = d.find_map(50, 1000);
printf("%s\n", d.apply_map(map));
return 0;
}