Data Structure And Algorithms Assignment for Huffman

Here is the Data Structure and Algorithms Assignment for Huffman Coding

Question:

Huffman Coding

Huffman encoding is based on building the so called ‑ Huffman Encoding Tree.

  • It is a tree that determines the optimal encoding for a set of symbols with given probabilities of occurrence.
  • It also allows for efficient decoding of a stream of bits — when receiving a 0, we take the left child, with a 1, we take the right child, and when we reach a leaf node, we know that a character is complete (and the leaf node stores the corresponding symbol).
  • The idea is quite simple — each symbol is a node storing the symbol and its probability (initially all disconnected).

If each character is to be represented by 8 bits, the given string would require 120 bits. The objective is to compress the string to a lower size using Huffman coding technique.

Following steps are being performed in Huffman coding process:

  • Determine probability/frequency of occurrence of each character in the string
  • Sort the characters in increasing order of frequency and store these frequencies in a priority queue Q.
  • Create a leaf node for each unique character
  • Create an empty node $. Make the minimum frequency the left child whereas the next minimum as right child of the empty node. Set the value of the empty node as the sum of left and right children frequencies.
  • Remove these two minimum frequencies from queue Q and add the sum into the list of frequencies (* represents the internal nodes in the aforementioned figure).
  • Insert node $ into the tree.
  • Repeat steps 3-5 for all the characters.
  • For each non-leaf node, assign 0 to the left edge and 1 to the right edge.

For sending the data over the network, the original string is encoded with Huffman coding and the resulting code is transmitted to the receiver.

Decoding the Received Code

For decoding the code, we can take the code and traverse through the tree to find the character.

Let 101 is to be decoded, we can traverse from the root as in the figure below.

Task – Write C++ code to demonstrate the Huffman encoding and decoding process while making use of appropriate data-structures.

Code:

The below is the code for Data Structure And Algorithms Assignment for Huffman.

#include<iostream>

using namespace std;

struct node{ // For creating a huffman tree.

    double frequency;
    node* rightchild;
    node* leftchild;
    string value;
    string huffman_code;

};

class encode_class{

public:


    encode();


node minHeap(vector<node> &heap_vector)
{  // This function finds the minimum node in the heap and return it.

double temp_number = (double) INT_MAX;

vector<node>::iterator iter1,temp_it;
for(iter1  = heap_vector.begin();iter1!=heap_vector.end();iter1++){
    if(temp_number>(*iter1).frequency){

        temp_it = iter1;
        temp_number = (*iter1).frequency;

    }
} 

node temporary_node = (*temp_it);
heap_vector.erase(temp_it);


return temporary_node;

}

node Huffman_tree(vector &heap_vector)
{ // This function is for building the binary tree.

while(!heap_vector.empty()){
    node* temp = new node;
    node* child_1 = new node;
    node* child_2 = new node;
    *child_1 = minHeap(heap_vector); 
    *child_2 = minHeap(heap_vector);


    temp->leftchild = child_1;
    temp->rightchild=child_2;
    temp->frequency = child_1->frequency+child_2->frequency;
    heap_vector.push_back(*temp);
    if(heap_vector.size()==1){
        break;
    }
}

return heap_vector[0];

}

void tree_search(node* root_node,string s)
{// In this function we traverse across the tree and assign '1'
// to the right node and assign '0' to the left node.*/

node* temp_node = root_node;  

temp_node->huffman_code = s;

if(temp_node==NULL){
    return ;
}

else if(temp_node->leftchild==NULL && temp_node->rightchild==NULL){

// cout<<"The value is : "<value<huffman_code<<endl;
return ;
}

else{
    temp_node->leftchild->huffman_code = s.append("0");
    s.erase(s.end()-1);

    temp_node->rightchild->huffman_code = s.append("1");
    s.erase(s.end()-1);

    tree_search(temp_node->leftchild,s.append("0"));
    s.erase(s.end()-1);
    tree_search(temp_node->rightchild,s.append("1"));
    s.erase(s.end()-1);

}

}

node Huffman_code(vector &heap_vector,string str)
{ // For getting the frequency of the words.

vector<int> freq;   // Vector to store the frequencies of the values 
vector<char> number;  // Vector to store the corresponding numbers to frequencies

freq = gettingfrequency(str,number);  // This function will find the frequency of each word in the string and return it back.

// cout<<"The number vector is : "<<endl;
// for(int i=0;i<number.size();i++){
// cout<<number[i]<<" "<<freq[i]<<endl;
// }

for(int i=0;i<number.size();i++){

    node tempnode;

    tempnode.value = number[i];
    tempnode.frequency = freq[i];
    tempnode.leftchild = NULL;
    tempnode.rightchild = NULL;
    heap_vector.push_back(tempnode);


}

node root = Huffman_tree(heap_vector);

//node root;
return root;

}

bool in_vector(vector check_vector,char s )
{ /* To check if a number is present in the vector or not. */

int flag=0;
for(int i=0;i<check_vector.size();i++){
    if(check_vector[i]==s){
        return 1;
    }
}

return 0;

}

int countingfrequency_words(char word_check,string b)
{//This function looks at a word and counts its frequency.

int count=0;
for(int i=0;i<b.size();i++){

    char temp = b[i];

    if(word_check==temp){
        count++;
    }
}

return count;

}

vector gettingfrequency(string check,vector &values)
{ // This function returns the frequency of the words typed.

vector<int>  frequency_vector;

for(int i=0;i<check.size();i++){

    char temp = check[i];

    if(!in_vector(values,temp))
    {
        int frequency_numbers=countingfrequency_words(temp,check);// To look at a specific word and counting its frequency.
        cout<<"\nthe frequency of  "<<temp<<"  is  : "<<frequency_numbers<<endl;
        values.push_back(temp);
        frequency_vector.push_back(frequency_numbers);
    }
}

return frequency_vector;

}

void dfs(node *root,string s,string &encode)
{ // To look at a particular character in the string and convert it into huffman code.

if(root->leftchild==NULL && root->rightchild==NULL){
    if(s == root->value){

        encode= root->huffman_code;
    }
}

else {
    dfs(root->leftchild,s,encode);
    dfs(root->rightchild,s,encode);
}

}

string encode_values(node* root,string s)
{ // This funnction gets a string and the look up table, to encode that string.

string encoded_binary;

for(int i=0;i<s.size();i++){
    string temp(1,s[i]);
    string encoded_string;  // To assign the value to the string and return it using (by reference argument).

    dfs(root,temp,encoded_string);

    encoded_binary +=encoded_string; // Binary values to be returned.

}



return encoded_binary;

}

};

string decode_to_original(node* root,string s){ // This function gets the encoded values and decodes them to their original values.

if(root->leftchild==NULL && root->rightchild==NULL){
    cout<<"Tell me this is a joke..."<<endl;
}
string decoded_string;
int i=0;
node* temp=root;
while(s[i]!='\0'){  // Running the while loop until the end of the string.

     temp=root;
    while(temp->leftchild!=NULL || temp->rightchild!=NULL){  // Running the while to reach the leaf nodes.

        if(s[i]=='0'){  
            temp=temp->leftchild;
        }


        else if(s[i]=='1'){
            temp=temp->rightchild;
        }

         i++;  // going to the next value of the string as first value has already been decoded.
    }

    if(temp->rightchild==NULL &&temp->leftchild==NULL){ /* If both children are null, meaning it is a leaf node
    so storing it in a string.*/

            decoded_string+=temp->value;
        }


}

return decoded_string;

}

class decodeing{
public:

    encode_class e1;
    node root_node;

    string encoding_txt_file(string to_be_encoded){  // This function encodes the string values.


        vector<node> node_vector;

      root_node = e1.Huffman_code(node_vector,to_be_encoded);
      e1.tree_search(&root_node,""); // This function assigns the binary values to the tree nodes.

    string encoded = e1.encode_values(&root_node,to_be_encoded);

    cout<<endl<<"The encoded string is : "<<encoded<<endl;

    return encoded;

    }

    string get_file_for_encoding(string final_string){              


        string encode_text = encoding_txt_file(final_string);

        return encode_text;
    }

    string decode(string encoded_string){  // Function for decoding the file.

            string decoded = decode_to_original(&root_node,encoded_string);  // Calling the function to decode the file.
             cout<<endl<<endl<<endl;

        return decoded;
    }

};

int main(){

    decodeing f1;


    string paragraph;
    cout<<"Enter the paragraph : "<<endl;

    getline(cin,paragraph);

    string encoded_string = f1.get_file_for_encoding(paragraph);

    string final_string = f1.decode(encoded_string);
    cout<<"The original decoded string is : "<<endl;
    cout<<final_string<<endl;

return 0;

}

For more details about Huffman coding click here

For other assignments and quizzes click here 

Output:

Leave a Reply

Your email address will not be published. Required fields are marked *