Machine Learning for hackers读书笔记(七)优化：密码破译

Machine Learning for hackers读书笔记(七)优化：密码破译

#凯撒密码：将每一个字母替换为字母表中下一位字母，比如a变成b。

english.letters <- c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',

                     'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',

                     'w', 'x', 'y', 'z')

caesar.cipher <- list()

inverse.caesar.cipher <- list()

#加密LIST和解密LIST

for (index in 1:length(english.letters))

{

  caesar.cipher[[english.letters[index]]] <- english.letters[index %% 26 + 1]

  inverse.caesar.cipher[[english.letters[index %% 26 + 1]]] <- english.letters[index]

}

print(caesar.cipher)

# 单字符串加密

apply.cipher.to.string <- function(string, cipher)

{

  output <- ''

  for (i in 1:nchar(string))

  {

  output <- paste(output, cipher[[substr(string, i, i)]], sep = '')

  }

  return(output)

}

#向量字符串加密

apply.cipher.to.text <- function(text, cipher)

{

  output <- c()

  for (string in text)

  {

    output <- c(output, apply.cipher.to.string(string, cipher))

  }

  return(output)

}

apply.cipher.to.text(c('sample', 'text'), caesar.cipher)

#贪心优化：只有当新解密规则得到的解密串的概率变高时，才接受新的解密规则

#思路：

#1.如果解密规则B解密出的解密串的概率大于解密规则A对应的解密串，那么我们用B代替A

#2.如果解密规则B解密出的解密串的概率小于解密规则A对应的解密串，我们仍然有可能用B代替A，不过并不是每次都替换。

#如果解密规则B对应的解密串的概率是p1，解密规则A对应的解密串的概率是p2，以p1/p2的概率从解密规则A替换到解密规则B（表示有一定的概率接受B，这使得不会陷入贪心优化陷阱中）

#随便产生一个加密规则

generate.random.cipher <- function()

{

  cipher <- list()

  inputs <- english.letters

  outputs <- english.letters[sample(1:length(english.letters), length(english.letters))]

  for (index in 1:length(english.letters))

  {

    cipher[[inputs[index]]] <- outputs[index] }

  return(cipher)

}

modify.cipher <- function(cipher, input, output)

{

  new.cipher <- cipher

  new.cipher[[input]] <- output

  old.output <- cipher[[input]]

  collateral.input <- names(which(sapply(names(cipher), function (key) {cipher[[key]]}) == output))

  new.cipher[[collateral.input]] <- old.output

  return(new.cipher)

}

#对加密算法作一些修改

propose.modified.cipher <- function(cipher)

{

  input <- sample(names(cipher), 1)

  output <- sample(english.letters, 1)

  return(modify.cipher(cipher, input, output))

}

#加载词典

load(file.path('G:\dataguru\ML_for_Hackers\ML_for_Hackers-master\07-Optimization\data\lexical_database.Rdata'))

#看一下里面的数据

lexical.database[['a']]

lexical.database[['the']]

lexical.database[['he']]

lexical.database[['she']]

lexical.database[['data']]

#取概率的,词典里有就返回,词典里没有返回一个最小的浮点数

one.gram.probability <- function(one.gram, lexical.database = list())

{

  lexical.probability <- lexical.database[[one.gram]]

  if (is.null(lexical.probability) || is.na(lexical.probability))

  {

  return(.Machine$double.eps)

  }

  else

  {

  return(lexical.probability)

  }

}

#给定一个字符串向量,计算概率,概率不用连乘,用求和

log.probability.of.text <- function(text, cipher, lexical.database = list())

{

  log.probability <- 0.0

  for (string in text)

  {

    decrypted.string <- apply.cipher.to.string(string, cipher)

    log.probability <- log.probability +

    log(one.gram.probability(decrypted.string, lexical.database))

  }

  return(log.probability)

}

#

metropolis.step <- function(text, cipher, lexical.database = list())

{

#对加密规则作一下修改

  proposed.cipher <- propose.modified.cipher(cipher)

#计算原加密规则及修改过的加密规则的概率

  lp1 <- log.probability.of.text(text, cipher, lexical.database)

  lp2 <- log.probability.of.text(text, proposed.cipher, lexical.database)

#如果新的比较好,直接换掉

  if (lp2 > lp1)

  {

    return(proposed.cipher)

  }

  else

  {

#如果旧的比较好,

    a <- exp(lp2 - lp1)

#x是均匀分布的0~1间随机数

    x <- runif(1)

    if (x < a)

    {

      return(proposed.cipher)

    }

    else

    {

      return(cipher)

    }

  }

}

# 5个字符串的向量

decrypted.text <- c('here', 'is', 'some', 'sample', 'text')

#用凯撒加密规则加一下密

encrypted.text <- apply.cipher.to.text(decrypted.text, caesar.cipher)

set.seed(1)

#生成随机加密规则

cipher <- generate.random.cipher()

results <- data.frame()

#50000次迭代

number.of.iterations <- 50000

for (iteration in 1:number.of.iterations)

{

#算一下加密结果的概率

  log.probability <- log.probability.of.text(encrypted.text,cipher,lexical.database)

#得出解密结果

  current.decrypted.text <- paste(apply.cipher.to.text(encrypted.text, cipher),collapse = ' ')

#得出判断结果,1为正确,0为不正确

  correct.text <- as.numeric(current.decrypted.text == paste(decrypted.text,

                                                             collapse = ' '))

#形成数据框,包括迭代次数,概率及解密后的结果,以及正确率

  results <- rbind(results,data.frame(Iteration = iteration, LogProbability = log.probability,CurrentDecryptedText = current.decrypted.text,CorrectText = correct.text))

  cipher <- metropolis.step(encrypted.text, cipher, lexical.database)

}
相关阅读:
.netCore读取配置文件
 初识.netCore以及如何vs2019创建项目和发布
 深度解析.NetFrameWork/CLR/C# 以及C#6/C#7新语法
 Asp.Net六大内置对象
 MVC的View本质和扩展
 Asp.net管道模型之（HttpModules 和 HttpHandler）
Serf：Gossip Protocol
Consul:ANTI-ENTROPY
Consul:网络坐标
 Consul：Gossip协议
原文地址：https://www.cnblogs.com/MarsMercury/p/4947603.html