Wednesday, March 3, 2021

Usage of pattern matching algorithm - Naive pattern matching / improved Naive pattern matching / KMP algorithm

 

This is a menu driven program on pattern matching. structure is used here for the pattern matching. 

#include<stdio.h>
#include<string.h>
struct tag
{
char pattern[100];
char text[20];
};
void improved(struct tag obj)  
{  
    char *pat=obj.pattern;
char *txt=obj.text;
int len1 = strlen(pat); 
        int len2 = strlen(txt); 
        int i = 0,j;  
    printf("The text is found in the pattern at the following indices : ");
    while (i<=len1-len2)  
    {  
         for (j = 0; j < len2; j++)  
            if (pat[i + j] != txt[j])  
                break;  
        if (j == len2) 
        {  
            printf(" %d ",i);
            i = i + len2;  
        } 
        else if (j == 0)  
            i = i + 1;  
        else
            i = i + j; 
    }  
}  
void naive(struct tag obj) 
{  
   int array[20];
   int index=-1; 
    char *mainString=obj.pattern;
char *text=obj.text;
   int patLen = strlen(text);
   int strLen = strlen(mainString);
   int i,j;
   for(i = 0; i<=(strLen - patLen); i++)
    {  
      for(j = 0; j<patLen; j++) 
  {  
         if(mainString[i+j] != text[j])
            break;
      }
      if(j == patLen) 
       {     //the text is found     
         array[++index] = i;
      }
   }
   printf("The text is found in the pattern at the following indices : ");
   for(i=0;i<=index;i++)
   printf(" %d",array[i]);
}
void arrange(char* pat, int M, int* pps) 
{
   int length = 0;
   pps[0] = 0;
   int i = 1;
   while (i < M) 
  {
      if (pat[i] == pat[length])
      {
         length++;
         pps[i] = length;
         i++;
      } 
      else 
       {
       if (length != 0)
         length = pps[length - 1];
         else 
        {
            pps[i] = 0;
            i++;
         }
      }
   }
}
void KMP(struct tag obj) 
{
   char* pattern=obj.pattern;
   char* text=obj.text;
   int M = strlen(text);
   int N = strlen(pattern);
   int pps[M];
   arrange(obj.text, M, pps);
   int i = 0;
   int j = 0;
     printf("The text is found in the pattern at the following indices : ");
   while (i < N) 
   {
      if (obj.text[j] == obj.pattern[i]) 
      {
         j++;
         i++;
      }
      if (j == M) 
     {
         printf(" %d", i - j);
         j = pps[j - 1];
      }
      else if (i < N && obj.text[j] != obj.pattern[i])
   {
         if (j != 0)
         j = pps[j - 1];
         else
         i = i + 1;
      }
   }
}
void main() 
{
  struct tag obj;
  int ch,f;
  printf("\nEnter the pattern string (LIKE 'This is a test'): ");
  gets(obj.pattern);
  printf("\nEnter the text string (LIKE 'is'): ");
  gets(obj.text);  
  while(1)
  {
    printf("\nEnter 1 for Naive pattern matching, 2 for improved Naive pattern matching and 3 KMP algorithm: ");
    scanf("%d",&ch);
    f=0;
    switch(ch)
    {
    case 1:
          naive(obj);
          break;
    case 2:
        improved(obj);
        break;
    case 3:
      KMP(obj);
      break;
    default:
    f=1;
     }
    if(f==0)
break; 
 } 
 getch();
}


 Pattern matching is the process of finding a text from a pattern sentence. Different algorithms are used, namely naive pattern checking, improved pattern checking and KMP pattern checking.


Improved naive pattern checking is done by void improved(struct tag obj)  function where char *pat holds the address of the pattern sentence and char *txt holds the address of the text to be searched.

Length of pattern sentence is stored in len1 and that of the text in len2. Inside the loop, the characters of each text are being checked from starting indexes of the both the texts and it is continued till the end of 

the length of the text (which is being searched); If any mismatch found, the search is contined with the starting index of second string with the index of the pattern string where mismatch was found and again the process is continued. 

Whenever no mismatch is found, it means the text is present in the pattern sentence and the staring index is displayed. Next the process is continued with start index of text with the index of the pattern sentence upto which matching was done.


Naive pattern checking is done by void naive(struct tag obj)  function where char *mainstring holds the address of the pattern sentence and char *pattern holds the address of the text to be searched.

Length of pattern sentence is stored in strLen and that of the text in patLen. This technique is also same as improved naive pattern checking with only difference that an array is used to store the starting indexes where the match is found.

Finally the array is displayed.

KMP pattern checking is done by using two functions - void KMP(struct tag obj)  and void arrange(char* pat, int M, int* pps). Inside the arrange function, it is checked whether the characters of the to be searched text are duplicate and if so their indexes are stored in array pps[].  This numeric array is used inside KMP function to search the text in pattern text.

No comments:

Post a Comment

Subscribe via email

Enter your email address:

Delivered by FeedBurner