Initial commit
This commit is contained in:
commit
e0c76dbcfb
29
Makefile
Normal file
29
Makefile
Normal file
|
@ -0,0 +1,29 @@
|
|||
CXXFLAGS += -std=c++17 -pedantic -Wall
|
||||
LDFLAGS=-lm -lstdc++
|
||||
|
||||
SRC=test.cpp
|
||||
OBJ=$(SRC:%=%.o)
|
||||
JSON=$(OBJ:%=%.json)
|
||||
HDR=markov.hpp
|
||||
BIN=test
|
||||
|
||||
all: $(BIN) compile_commands.json
|
||||
|
||||
$(BIN): $(OBJ)
|
||||
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(OBJ) -o $@
|
||||
|
||||
$(OBJ): %.o: % $(HDR)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
$(JSON): %.o.json: % Makefile
|
||||
clang $(CXXFLAGS) -c $< -o /dev/null -MJ $@
|
||||
|
||||
compile_commands.json: $(JSON)
|
||||
sed -e '1s/^/[\n/' -e 's/,$$/\n]/' $^ > $@
|
||||
|
||||
|
||||
|
||||
clean:
|
||||
rm -f $(BIN) $(OBJ) $(JSON)
|
||||
|
||||
.PHONY: clean
|
129
markov.hpp
Normal file
129
markov.hpp
Normal file
|
@ -0,0 +1,129 @@
|
|||
#pragma once
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <stdlib.h>
|
||||
#include <vector>
|
||||
|
||||
/* Marcov class.
|
||||
T: Element type
|
||||
ORDER: Prefix length. A markov chain of order n uses n elements to give a
|
||||
probability for the next element
|
||||
*/
|
||||
template <typename T, size_t ORDER> class Markov {
|
||||
public:
|
||||
template <typename, size_t> friend class Markov;
|
||||
|
||||
using Child = Markov<T, ORDER - 1>;
|
||||
using Children = std::map<T, Child>;
|
||||
|
||||
/* Total number of entries for this node
|
||||
If not a leaf, also the sum of the totals of its children.
|
||||
*/
|
||||
size_t total = 0;
|
||||
/* A map of a key to child nodes */
|
||||
Children children;
|
||||
|
||||
Markov() = default;
|
||||
|
||||
constexpr size_t order() const { return ORDER; }
|
||||
|
||||
/* Add the start of iter to the model
|
||||
Must have sufficient size (at least order + 1)
|
||||
*/
|
||||
template <typename IT> void add(IT iter, size_t size) {
|
||||
if (size <= ORDER) {
|
||||
return;
|
||||
}
|
||||
total++;
|
||||
find(*iter)->second.add(iter + 1, size - 1);
|
||||
}
|
||||
|
||||
/* Decrements the occurences of the start of iter by 1.
|
||||
Must have sufficient size (at least order + 1)
|
||||
Returns true if the sequence could be found in the model
|
||||
*/
|
||||
template <typename IT> bool dec(IT iter, size_t size) {
|
||||
if (size < ORDER) {
|
||||
return false;
|
||||
}
|
||||
|
||||
total--;
|
||||
iter++;
|
||||
|
||||
auto child = children.find(*iter);
|
||||
if (child != children.end()) {
|
||||
if (child->dec(iter, size - 1)) {
|
||||
if (child->total == 0) {
|
||||
children.erase(child);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
/* Finds the key or adds it */
|
||||
typename Children::iterator find(const T &key) {
|
||||
return children.try_emplace(key).first;
|
||||
}
|
||||
|
||||
public:
|
||||
/* Probability of finding the first order() items (but at most size) in the
|
||||
sequence */
|
||||
template <typename IT> double probability(IT iter, size_t size) const {
|
||||
if (size == 0) {
|
||||
return 1.;
|
||||
}
|
||||
|
||||
auto child = children.find(*iter);
|
||||
if (child != children.cend()) {
|
||||
auto &val = child->second;
|
||||
return (double(val.total) / double(total)) *
|
||||
val.probability(iter + 1, size - 1);
|
||||
} else {
|
||||
return 0.5 / double(total);
|
||||
}
|
||||
}
|
||||
|
||||
/* Probability of finding the last item (but at most the order+1st) in the
|
||||
model, given the previous sequence */
|
||||
template <typename IT> double final_probability(IT iter, size_t size) const {
|
||||
if (size == 0) {
|
||||
return 1.;
|
||||
}
|
||||
auto child = children.find(*iter);
|
||||
if (child != children.cend()) {
|
||||
auto &val = child->second;
|
||||
if (size == 1) {
|
||||
return double(val.total) / double(total);
|
||||
} else {
|
||||
return val.final_probability(iter + 1, size - 1);
|
||||
}
|
||||
} else {
|
||||
return 0.5 / double(total);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* Leaf of the recursion with just a count */
|
||||
template <typename T> class Markov<T, std::numeric_limits<size_t>::max()> {
|
||||
public:
|
||||
size_t total = 0;
|
||||
Markov() = default;
|
||||
|
||||
constexpr size_t order() const { return 0; }
|
||||
|
||||
template <typename IT> void add(IT, size_t) { total++; }
|
||||
|
||||
template <typename IT> bool dec(IT, size_t) {
|
||||
total--;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename IT> double probability(IT, size_t) const { return 1.; }
|
||||
|
||||
template <typename IT> double final_probability(IT, size_t) const {
|
||||
return 1.;
|
||||
}
|
||||
};
|
215
test.cpp
Normal file
215
test.cpp
Normal file
|
@ -0,0 +1,215 @@
|
|||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <random>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <tuple>
|
||||
|
||||
#include "markov.hpp"
|
||||
|
||||
using Corpus = Markov<char, 6>;
|
||||
|
||||
/* Manage a lookup table for monoalphabetic substitution */
|
||||
struct Mapping {
|
||||
char map[26]{};
|
||||
Mapping() = default;
|
||||
|
||||
char &operator[](size_t i) { return map[i]; }
|
||||
|
||||
constexpr auto begin() { return &map[0]; }
|
||||
|
||||
constexpr auto cbegin() const { return &map[0]; }
|
||||
|
||||
constexpr auto cend() const { return &map[26]; }
|
||||
|
||||
/* Create the LUT. Maps all characters except a-z to themselves, uses map for
|
||||
the rest */
|
||||
constexpr void create_map(char *output) const {
|
||||
for (size_t i = 0; i < 256; i++) {
|
||||
output[i] = i;
|
||||
}
|
||||
for (size_t i = 0; i < 26; i++) {
|
||||
output['a' + i] = map[i];
|
||||
output['A' + i] = map[i];
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto end() { return &map[26]; }
|
||||
|
||||
/* Swap two output characters */
|
||||
bool swap(char a, char b) {
|
||||
auto left = std::find(begin(), end(), a);
|
||||
auto right = std::find(begin(), end(), b);
|
||||
if (left != end() && right != end()) {
|
||||
std::swap(left, right);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
class Decrypt {
|
||||
/* Markov model of the plaintext */
|
||||
Corpus corpus;
|
||||
|
||||
/* Cipher text */
|
||||
char const *cipher_text;
|
||||
|
||||
/* Temporary buffer for decryption attempts */
|
||||
char *plain_text;
|
||||
|
||||
/* Length of the cipher text */
|
||||
size_t len;
|
||||
|
||||
public:
|
||||
Decrypt(char const *corpus, char const *data)
|
||||
: corpus(create_corpus(corpus)), cipher_text(data),
|
||||
plain_text(static_cast<char *>(malloc(strlen(data) + 1))),
|
||||
len(strlen(cipher_text)) {
|
||||
plain_text[strlen(data)] = 0;
|
||||
}
|
||||
|
||||
~Decrypt() { free(plain_text); }
|
||||
|
||||
Mapping find_map(size_t tries, size_t iterations) {
|
||||
Mapping map;
|
||||
double best_score = INFINITY;
|
||||
|
||||
while (tries--) {
|
||||
Mapping m;
|
||||
double s;
|
||||
std::tie(m, s) = random_map(iterations);
|
||||
|
||||
if (s < best_score) {
|
||||
map = m;
|
||||
best_score = s;
|
||||
printf("New best try, character score is %g:\n",
|
||||
best_score / double(len));
|
||||
printf(" ");
|
||||
for (uint32_t i = 0; i < 26; i++) {
|
||||
putchar(toupper(map[i]));
|
||||
}
|
||||
printf("\n");
|
||||
printf(" %s\n", apply_map(m));
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
char const *apply_map(Mapping const &m) {
|
||||
char map[256];
|
||||
m.create_map(&map[0]);
|
||||
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
|
||||
[map](unsigned char a) { return map[a]; });
|
||||
|
||||
return plain_text;
|
||||
}
|
||||
|
||||
protected:
|
||||
Corpus create_corpus(char const *file) {
|
||||
Corpus m;
|
||||
std::ifstream corpus{file};
|
||||
|
||||
char buf[128];
|
||||
for (auto i = 0u; i < m.order(); i++) {
|
||||
buf[i] = ' ';
|
||||
}
|
||||
|
||||
while (corpus.good()) {
|
||||
corpus.read(&buf[m.order()], sizeof(buf) - m.order());
|
||||
auto buflen = m.order() + corpus.gcount();
|
||||
|
||||
for (size_t i = 0; i < (buflen - m.order() - 1u); i++) {
|
||||
m.add(&buf[i], m.order() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
double rate(Mapping const &m, int max_order = -1) const {
|
||||
const auto order =
|
||||
(-1 == max_order) ? corpus.order() : static_cast<size_t>(max_order);
|
||||
double score = 0.;
|
||||
char map[256];
|
||||
m.create_map(&map[0]);
|
||||
std::transform(&cipher_text[0], &cipher_text[len], &plain_text[0],
|
||||
[map](unsigned char a) { return map[a]; });
|
||||
for (size_t i = 0, valid = 0; i < len; i++) {
|
||||
if (plain_text[i]) {
|
||||
valid++;
|
||||
if (valid == (order + 1)) {
|
||||
score += log(corpus.probability(&plain_text[i - valid], valid));
|
||||
} else if (valid > (order + 1)) {
|
||||
score += log(
|
||||
corpus.final_probability(&plain_text[i - order - 1], order + 1));
|
||||
}
|
||||
} else {
|
||||
if (valid && (valid <= order)) {
|
||||
score += log(corpus.probability(&plain_text[i - valid], valid));
|
||||
}
|
||||
valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -score;
|
||||
}
|
||||
|
||||
std::pair<Mapping, double> random_map(size_t iterations) {
|
||||
Mapping m;
|
||||
double current_score{INFINITY};
|
||||
for (uint32_t i = 0; i < 26; i++) {
|
||||
m[i] = 'a' + i;
|
||||
}
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 g(rd());
|
||||
do {
|
||||
std::shuffle(&m.map[0], &m.map[26], g);
|
||||
current_score = rate(m);
|
||||
} while (std::isinf(current_score));
|
||||
|
||||
std::uniform_int_distribution<size_t> char_index{0, 25};
|
||||
std::uniform_real_distribution random_float;
|
||||
|
||||
int order = 0;
|
||||
for (uint32_t i = 0; i < iterations; i++) {
|
||||
Mapping m2 = m;
|
||||
std::swap(m2[char_index(g)], m2[char_index(g)]);
|
||||
|
||||
int new_order = (corpus.order() * (1 + i) + iterations - 1) / iterations;
|
||||
if (new_order != order) {
|
||||
current_score = rate(m);
|
||||
order = new_order;
|
||||
}
|
||||
|
||||
auto new_score = rate(m2, order);
|
||||
double diff = current_score - new_score;
|
||||
if ((diff > 0) || (random_float(g) <
|
||||
std::exp(diff * double(1 + i) / double(iterations)))) {
|
||||
current_score = new_score;
|
||||
m = m2;
|
||||
}
|
||||
}
|
||||
|
||||
return std::pair(m, rate(m));
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char const **argv) {
|
||||
if (argc < 3) {
|
||||
printf("Usage: %s corpus ciphertext\n", argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
printf("Cipher text: %s\n", argv[2]);
|
||||
Decrypt d(argv[1], argv[2]);
|
||||
Mapping map = d.find_map(50, 1000);
|
||||
printf("%s\n", d.apply_map(map));
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user