Code: Select all
import Data.List
import Data.Char
-- Frequency table for the english language.
-- The first element is the the expected percentage of As used
-- in an english text. The most common letter is e, at 12.7 percent.
table :: [Float]
table = [8.2, 1.5, 2.8, 4.3, 12.7, 2.2, 2.0, 6.1, 7.0, 0.2, 0.8, 4.0, 2.4, 6.7, 7.5, 2.0, 0.1, 6.0, 6.3, 9.1, 2.8, 1.0, 2.4, 0.2, 2.0, 0.1]
-- Shifts a character to the right if positive, left if negative. Wraps around.
shift :: Int -> Char -> Char -- Modulus handles the wraparound(shift 1 'z' = 'a')
shift n c | isUpper c = chr $ ord 'A' + ((ord c + n - ord 'A') `mod` 26)
| isLower c = chr $ ord 'a' + ((ord c + n - ord 'a') `mod` 26)
| otherwise = c
-- Counts the number of times an element occurs in a list
count :: (Eq a) => a -> [a] -> Int
count e [] = 0
count e (x:xs) | e == x = 1 + count e xs
| otherwise = count e xs
-- Creates a table of frequencies for every letter in a particular string
freqtable :: String -> [Float]
freqtable [] = []
freqtable l = [(fromIntegral . count c $ map toLower l) / (fromIntegral . length . filter (not . isSpace) $ l) * 100 | c <- ['a' .. 'z']]
-- Encodes the message, shifting it n to the right(or n to the left is n is negative)
encode :: Int -> String -> String
encode _ [] = []
encode n xs = map (shift n) xs
{- Chi Squaring function - calculates the chi square on a list of floats
-- It is defined as the sum of the squared differences of two 'adjacent' elements
-- over the the second element.
-- It is used to determine which distribution of characters is closest to the
-- distribution in the table above.-}
chisqr :: [Float] -> [Float] -> Float
chisqr [] _ = 0
chisqr _ [] = 0
chisqr (o:os) (e:es) = ((o-e)^2)/e + chisqr os es
-- Rotates a list to the left by i places.
-- rotate 1 "123" becomes "231"
rotate :: Int -> [a] -> [a]
rotate _ [] = []
rotate i xs = drop i xs ++ take i xs
-- Finds the first position of an element in a list.
position :: (Eq a) => a -> [a] -> Int
position e (x:xs) | x == e = 0
| otherwise = 1 + position e xs
{- Attempts to determine the shift factor used to encrypt
-- the text passed to it.
-- To do this, it first gets the character frequencies of the encrypted
-- text. It then gets the chi square value of it, and all subsequent rotations
-- from 0 - 25. The rotation that has the smallest chi square value is most
-- likely to be the shift factor for the plaintext.
-- The longer the plaintext, the smaller the minimum chi square value will be.
-- This als implies the opposite - the smaller the plaintext, the lower the chance
-- of being able to crack the ciphertext is.
-}
crack [] = []
crack xs = encode (-minIndex) xs
where frequencies = freqtable xs
step a (b:bs) = rotate a frequencies : b : bs
step a [] = [rotate a frequencies]
chiVals = map (\x -> chisqr x table) $ foldr step [] [0..25]
minIndex = position (minimum chiVals) $ chiVals